Browse Source

first commit

master
zhangshu 9 months ago
parent
commit
882aca5045
  1. 1
      .gitignore
  2. BIN
      db/youtube_prod.db
  3. 7
      download/ChannelService.py
  4. 2
      download/Contant.py
  5. 28
      download/DownloadInfoService.py
  6. 173
      download/DownloadUtil.py
  7. 6
      download/LoggerUtils.py
  8. 65
      download/Orm.py
  9. 26
      download/VideoService.py
  10. BIN
      download/download.zip
  11. 49
      download/main_download.py
  12. 2
      init/Contant.py
  13. 6
      init/LoggerUtils.py
  14. 65
      init/Orm.py
  15. 62
      init/init.py
  16. 2
      init/urlList.txt
  17. 3
      init/urlList_en.txt
  18. 13
      init/urlList_india.txt
  19. 7
      init/urlList_ja.txt
  20. 1
      sftp/Contant.py
  21. 6
      sftp/LoggerUtils.py
  22. 97
      sftp/sftp.py
  23. 5
      sftp/sftp_config.ini
  24. 15
      src/ChannelService.py
  25. 4
      src/Contant.py
  26. 16
      src/DownloadInfoService.py
  27. 6
      src/LoggerUtils.py
  28. 67
      src/Orm.py
  29. 0
      src/SrcTest.py
  30. 31
      src/VideoService.py
  31. 164
      src/YouTubeUtils.py
  32. 4
      src/api_key.txt
  33. 1
      src/channelList.txt
  34. 70
      src/main.py
  35. 10
      start_download.sh
  36. 11
      start_sftp.sh
  37. 12
      start_src.sh
  38. 4
      stop_download.sh
  39. 2
      test.sh
  40. 42
      test/test.py
  41. 8
      test/test2.py
  42. 15
      view_count/ChannelService.py
  43. 4
      view_count/Contant.py
  44. 6
      view_count/LoggerUtils.py
  45. 75
      view_count/Orm.py
  46. 33
      view_count/VideoCountService.py
  47. 34
      view_count/VideoService.py
  48. 79
      view_count/view_count_main.py

1
.gitignore

@ -138,3 +138,4 @@ dmypy.json
# Cython debug symbols # Cython debug symbols
cython_debug/ cython_debug/
*/logs

BIN
db/youtube_prod.db

Binary file not shown.

7
download/ChannelService.py

@ -0,0 +1,7 @@
import json
from Orm import Channel
from playhouse.shortcuts import model_to_dict, dict_to_model
class ChannelService:
def getOneByChannelId(channelId):
return Channel.get_or_none(Channel.channelId == channelId)

2
download/Contant.py

@ -0,0 +1,2 @@
db=""
logDir=""

28
download/DownloadInfoService.py

@ -0,0 +1,28 @@
from Orm import DownloadInfo
class DownloadService:
def getOneByVideoId(videoId, downloadType):
return DownloadInfo.get(DownloadInfo.videoId == videoId, DownloadInfo.downloadType == downloadType)
def createOne(videoId, downloadType, tryTime, isFinished):
DownloadInfo.create(
videoId=videoId,
downloadType=downloadType,
tryTime=tryTime,
isFinished=isFinished
)
def updateInfoByVideoId(videoId, tryTime, isFinished, downloadType):
DownloadInfo.update(tryTime=tryTime, isFinished=isFinished).where(
DownloadInfo.videoId == videoId, DownloadInfo.downloadType == downloadType).execute()
def findNotFinishList():
return DownloadInfo.select().where(DownloadInfo.isFinished == 0, DownloadInfo.tryTime <= 5, DownloadInfo.downloadType == 1).limit(10).execute()
def changeDownloadType(videoId, tryTime, isFinished, downloadType, changeType):
DownloadInfo.update(tryTime=tryTime, isFinished=isFinished, downloadType=changeType).where(
DownloadInfo.videoId == videoId, DownloadInfo.downloadType == downloadType).execute()
def findNotFinishListTwo():
return DownloadInfo.select().where(DownloadInfo.isFinished == 0, DownloadInfo.tryTime <= 5, DownloadInfo.downloadType == 2).limit(10).execute()

173
download/DownloadUtil.py

@ -0,0 +1,173 @@
from shutil import copyfile
from youtube_transcript_api import YouTubeTranscriptApi
from youtube_transcript_api.formatters import SRTFormatter
from VideoService import VideoService
from ChannelService import ChannelService
from DownloadInfoService import DownloadService
from LoggerUtils import Logger
import time
import os
from func_timeout import func_set_timeout
import operator
class DownLoadUtil:
formatter = SRTFormatter()
proxies = {"http": "http://127.0.0.1:7890",
"https": "https://127.0.0.1:7890"}
@func_set_timeout(60)
def downloadOne(videoId):
# 获取数据
video = VideoService.getOneByVideoId(videoId)
channel = ChannelService.getOneByChannelId(str(video.channelId))
# 格式化title
videoTitle = str(video.videoTitle)
videoTitle = str(videoTitle).replace("/", u"\u2215")
videoTitle = str(videoTitle).replace("?", "")
videoTitle = str(videoTitle).replace("\\", "")
videoTitle = str(videoTitle).replace("|", "")
videoTitle = str(videoTitle).replace("<", "")
videoTitle = str(videoTitle).replace(">", "")
videoTitle = str(videoTitle).replace(":", "")
videoTitle = str(videoTitle).replace("में","")
# 获取发布时间
videoPublishTime = str(video.videoPublishTime)
videoPublishTime = str(videoPublishTime).split("T")[0]
# 开始下载
Logger.info("开始下载...{}".format(videoId))
cpPath = ""
try:
# 获取字幕
languages = str(video.videoLanguage)
storePath = "E:/code/python/srt_file/" + str(channel.channelTitle).rstrip()
cpPath = "E:/code/python/tmp_srt_file/" + str(channel.channelTitle).rstrip()
if not os.path.exists(storePath):
Logger.info("开始创建文件夹:" + storePath)
os.makedirs(storePath)
if not os.path.exists(cpPath):
Logger.info("开始创建文件夹:" + cpPath)
os.makedirs(cpPath)
storePath = storePath + "/" + videoPublishTime + \
"-" + languages + "-" + videoTitle + ".srt"
cpPath = cpPath + "/" + videoPublishTime + \
"-" + languages + "-" + videoTitle + ".srt"
if len(cpPath) > 120:
storePath = storePath[:-20] + ".srt"
cpPath = cpPath[:-20] + ".srt"
videoSrt = YouTubeTranscriptApi.get_transcript(
videoId, languages=[languages])
srt_formatted = DownLoadUtil.formatter.format_transcript(videoSrt)
Logger.info("文件地址...{}".format(storePath))
with open(storePath, 'w', encoding='utf-8') as srt_file:
srt_file.write(srt_formatted)
Logger.info("下载完成...{}".format(videoId))
copyfile(storePath, cpPath)
# 修改video数据
VideoService.updateIsDownloadByVideoId(videoId, 1)
# 修改downloadInfo
downloadInfo = DownloadService.getOneByVideoId(videoId, 1)
if downloadInfo is not None:
DownloadService.updateInfoByVideoId(
videoId, downloadInfo.tryTime + 1, 1, 1)
except Exception as e:
Logger.error("下载失败...{}".format(videoId))
logStr = "Exception...{}".format(e)
Logger.error(logStr)
downloadInfo = DownloadService.getOneByVideoId(videoId, 1)
if operator.contains(logStr, "No transcripts"):
Logger.error("VideoId:{},不存在字幕文件".format(videoId))
if downloadInfo is not None:
DownloadService.changeDownloadType(
videoId, 0, 0, 1, 2)
elif operator.contains(logStr, "File name too long"):
# 文件名过长
languages = str(video.videoLanguage)
videoSrt = YouTubeTranscriptApi.get_transcript(
videoId, languages=[languages])
srt_formatted = DownLoadUtil.formatter.format_transcript(videoSrt)
storePath = "E:/code/python/srt_file" + str(channel.channelTitle) + "/" + \
videoPublishTime + "-" + languages + "-" + videoId + ".srt"
cpPath = "E:/code/python/tmp_srt_file/" + str(channel.channelTitle) + "/" + \
videoPublishTime + "-" + languages + "-" + videoId + ".srt"
if len(cpPath) > 120:
storePath = storePath[:-20] + ".srt"
cpPath = cpPath[:-20] + ".srt"
Logger.info("文件名过长,文件地址...{}".format(storePath))
with open(storePath, 'w', encoding='utf-8') as srt_file:
srt_file.write(srt_formatted)
Logger.info("下载完成...{}".format(videoId))
copyfile(storePath, cpPath)
# 修改video数据
VideoService.updateIsDownloadByVideoId(videoId, 1)
# 修改downloadInfo
downloadInfo = DownloadService.getOneByVideoId(videoId, 1)
if downloadInfo is not None:
DownloadService.updateInfoByVideoId(
videoId, downloadInfo.tryTime + 1, 1, 1)
else:
if downloadInfo is not None:
Logger.info("VideoId:{}开始重试第{}".format(
videoId, downloadInfo.tryTime + 1))
DownloadService.updateInfoByVideoId(
videoId, downloadInfo.tryTime + 1, 0, 1)
@func_set_timeout(60)
def downloadTwo(videoId):
# 获取数据
video = VideoService.getOneByVideoId(videoId, 2)
channel = ChannelService.getOneByChannelId(str(video.channelId))
# 格式化title
videoTitle = str(video.videoTitle)
videoTitle = str(videoTitle).replace("/", u"\u2215")
videoTitle = str(videoTitle).replace("?", "")
videoTitle = str(videoTitle).replace("\\", "")
videoTitle = str(videoTitle).replace("|", "")
videoTitle = str(videoTitle).replace("<", "")
videoTitle = str(videoTitle).replace(">", "")
videoTitle = str(videoTitle).replace(":", "")
# 获取发布时间
videoPublishTime = str(video.videoPublishTime)
videoPublishTime = str(videoPublishTime).split("T")[0]
# 开始下载
Logger.info("开始下载...{}".format(videoId))
try:
# 获取字幕
languages = str(video.videoLanguage)
storePath = "./download/" + str(channel.channelTitle)
if not os.path.exists(storePath):
Logger.info("开始创建文件夹:" + storePath)
os.makedirs(storePath)
storePath = storePath + "\\" + videoPublishTime + \
"-" + languages + "-" + videoTitle + ".srt"
videoSrt = YouTubeTranscriptApi.get_transcript(
videoId, languages=[languages])
srt_formatted = DownLoadUtil.formatter.format_transcript(videoSrt)
Logger.info("文件地址...{}".format(storePath))
with open(storePath, 'w', encoding='utf-8') as srt_file:
srt_file.write(srt_formatted)
Logger.info("下载完成...{}".format(videoId))
# 修改video数据
VideoService.updateIsDownloadByVideoId(videoId, 1)
# 修改downloadInfo
downloadInfo = DownloadService.getOneByVideoId(videoId, 2)
if downloadInfo is not None:
DownloadService.updateInfoByVideoId(
videoId, downloadInfo.tryTime + 1, 1, 2)
except Exception as e:
Logger.error("下载失败...{}".format(videoId))
logStr = "Exception...{}".format(e)
Logger.error(logStr)
downloadInfo = DownloadService.getOneByVideoId(videoId, 2)
if operator.contains(logStr, "No transcripts"):
Logger.error("VideoId:{},不存在字幕文件".format(videoId))
if downloadInfo is not None:
DownloadService.changeDownloadType(
videoId, 6, 0, 2, 3)
else:
if downloadInfo is not None:
Logger.info("VideoId:{}开始重试第{}".format(
videoId, downloadInfo.tryTime + 1))
DownloadService.updateInfoByVideoId(
videoId, downloadInfo.tryTime + 1, 0, 2)

6
download/LoggerUtils.py

@ -0,0 +1,6 @@
from loguru import logger
import Contant
Logger = logger
def initLogger():
logger.add(Contant.logDir+"/download_{time}.log", rotation="500MB", encoding="utf-8",
enqueue=True, compression="zip", retention="10 days")

65
download/Orm.py

@ -0,0 +1,65 @@
from peewee import *
import Contant
import argparse
from LoggerUtils import Logger
parser = argparse.ArgumentParser(description='')
parser.add_argument('--db', type=str, default='')
parser.add_argument('--logDir', type=str, default='')
args = parser.parse_args()
Contant.db = args.db
db = SqliteDatabase(Contant.db)
def ormInit():
Channel.create_table()
Video.create_table()
DownloadInfo.create_table()
class BaseModel(Model):
class Meta:
database = db
# 频道信息
class Channel(BaseModel):
id = PrimaryKeyField()
channelId = CharField(null=False)
channelTitle = CharField(null=False)
channelLanguage = CharField()
channelReptileTime = CharField(null=True)
class Meta:
db_table = 'Channel'
# 视频信息
class Video(BaseModel):
id = PrimaryKeyField()
videoId = CharField(null=False)
channelId = CharField(null=False)
videoTitle = CharField()
videoLen = IntegerField()
videoType = CharField()
videoPublishTime = CharField()
videoLanguage = CharField()
isDownload = IntegerField()
class Meta:
db_table = 'Vidoes'
# 下载信息
class DownloadInfo(BaseModel):
id = PrimaryKeyField()
videoId = CharField()
downloadType = IntegerField()
tryTime = IntegerField()
isFinished = IntegerField()
class Meta:
db_table = 'Download_info'

26
download/VideoService.py

@ -0,0 +1,26 @@
import json
from Orm import Video
from playhouse.shortcuts import model_to_dict, dict_to_model
class VideoService:
def getOneByVideoId(videoId):
return Video.get_or_none(Video.videoId == videoId)
def createOne(videoId, channelId, videoTitle, videoLen, videoType, videoPublishTime, videoLanguage, isDownload):
Video.create(videoId=videoId,
channelId=channelId,
videoTitle=videoTitle,
videoLen=videoLen,
videoType=videoType,
videoPublishTime=videoPublishTime,
videoLanguage=videoLanguage,
isDownload=isDownload)
def updateLenByVideoId(videoId, len):
Video.update(videoLen=len).where(Video.videoId == videoId).execute()
def updateIsDownloadByVideoId(videoId, isDownload):
Video.update(isDownload=isDownload).where(
Video.videoId == videoId).execute()

BIN
download/download.zip

Binary file not shown.

49
download/main_download.py

@ -0,0 +1,49 @@
import argparse
import random
import time
import Contant
from LoggerUtils import Logger, initLogger
import Orm
from VideoService import VideoService
from ChannelService import ChannelService
from DownloadInfoService import DownloadService
from DownloadUtil import DownLoadUtil
from func_timeout import func_set_timeout
import func_timeout
import requests
# py ./main_download.py --db="../db/youtube_prod.db" --logDir="./logs"
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='')
parser.add_argument('--db', type=str, default='')
parser.add_argument('--logDir', type=str, default='')
args = parser.parse_args()
Contant.db = args.db
Contant.logDir = args.logDir
initLogger()
Orm.ormInit()
list = DownloadService.findNotFinishList()
Logger.info("list size:{}".format(len(list)))
while (len(list) > 0):
for info in list:
try:
DownLoadUtil.downloadOne(info.videoId)
restTime = random.randint(1, 3)
Logger.info("间隔{}秒后继续...".format(restTime))
time.sleep(restTime)
except func_timeout.exceptions.FunctionTimedOut as e:
Logger.error("执行下载方法超时错误:{}".format(e))
loopRestTime = random.randint(1, 3)
Logger.info("循环间隔{}秒后继续...".format(loopRestTime))
time.sleep(loopRestTime)
list = DownloadService.findNotFinishList()
# 发送钉钉消息
# webhook = "https://oapi.dingtalk.com/robot/send?access_token=c8c8d7d42c4eecd449dd303025ef968f647d1d8e8694e3fabc0ab5770d646dcb"
# jsonData = {
# "msgtype": "text",
# "text": {
# "content": "[Youtube]download finished"
# }
# }
# requests.post(webhook, json=jsonData)
# Logger.info("download发送钉钉消息成功...")

2
init/Contant.py

@ -0,0 +1,2 @@
db=""
logDir=""

6
init/LoggerUtils.py

@ -0,0 +1,6 @@
from loguru import logger
import Contant
Logger = logger
def initLogger():
logger.add(Contant.logDir+"/init_{time}.log", rotation="500MB", encoding="utf-8",
enqueue=True, compression="zip", retention="10 days")

65
init/Orm.py

@ -0,0 +1,65 @@
from peewee import *
import Contant
import argparse
from LoggerUtils import Logger
parser = argparse.ArgumentParser(description='')
parser.add_argument('--db', type=str, default='')
parser.add_argument('--logDir', type=str, default='')
args = parser.parse_args()
Contant.db = args.db
db = SqliteDatabase(Contant.db)
def ormInit():
Channel.create_table()
Vidoe.create_table()
DownloadInfo.create_table()
class BaseModel(Model):
class Meta:
database = db
# 频道信息
class Channel(BaseModel):
id = PrimaryKeyField()
channelId = CharField(null=False)
channelTitle = CharField(null=False)
channelLanguage = CharField()
channelReptileTime = CharField(null=True)
class Meta:
db_table = 'Channel'
# 视频信息
class Vidoe(BaseModel):
id = PrimaryKeyField()
videoId = CharField(null=False)
channelId = CharField(null=False)
videoTitle = CharField()
videoLen = IntegerField()
videoType = CharField()
videoPublishTime = CharField()
videoLanguage = CharField()
isDownload = IntegerField()
class Meta:
db_table = 'Vidoes'
# 下载信息
class DownloadInfo(BaseModel):
id = PrimaryKeyField()
videoId = CharField()
downloadType = IntegerField()
tryTime = IntegerField()
isFinished = IntegerField()
class Meta:
db_table = 'Download_info'

62
init/init.py

@ -0,0 +1,62 @@
import time
from LoggerUtils import Logger, initLogger
import argparse
import Contant
from Orm import ormInit, Channel
import operator
from bs4 import BeautifulSoup as bs
from urllib.request import urlopen, Request
# py .\init.py --db=../db/youtube_prod.db --logDir=./logs
def saveChannel(channelUrl, language):
Logger.info("频道链接:"+channelUrl)
channelId = ""
channelName = ""
url_opener = urlopen(
Request(channelUrl, headers={'User-Agent': 'Mozilla'}))
videoInfo = bs(url_opener, features="html.parser")
links = videoInfo.find_all("link")
for link in links:
if operator.contains(str(link), "canonical"):
channelId = str(link['href']).split("/channel/")[1]
if operator.contains(str(link), "content="):
channelName = str(link['content'])
Logger.info("channelId:"+channelId)
Logger.info("channelName:"+channelName)
channel = Channel.get_or_none(Channel.channelId == channelId)
if channel != None:
Logger.info("频道已存在:" + channelId)
return
Channel.create(channelTitle=channelName,
channelId=channelId, channelLanguage=language)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='')
parser.add_argument('--db', type=str, default='')
parser.add_argument('--logDir', type=str, default='')
args = parser.parse_args()
Contant.db = args.db
Contant.logDir = args.logDir
initLogger()
ormInit()
Logger.info("SqlLite存放地址:"+Contant.db)
Logger.info("日志文件存放地址:"+Contant.logDir)
Logger.info("开始初始化...")
# checkInit()
# 读取txt文件获取需要的频道地址
Logger.info("开始读取需要新增的频道地址...")
urlList = []
# 打开文件
for line in open("urlList.txt"):
line = line.strip('\n')
urlList.append(line)
# language = urlList[0]
for url_str in urlList:
if len(url_str) > 10:
url = url_str.split(" ")[0]
language = url_str.split(" ")[1]
Logger.info("url:{} ,language:{}", url, language)
saveChannel(url, language)

2
init/urlList.txt

@ -0,0 +1,2 @@
https://www.youtube.com/@easymoney380 en
https://www.youtube.com/@Groww en

3
init/urlList_en.txt

@ -0,0 +1,3 @@
en
https://www.youtube.com/@easymoney380
https://www.youtube.com/@Groww

13
init/urlList_india.txt

@ -0,0 +1,13 @@
hi
https://www.youtube.com/@goela
https://www.youtube.com/@GoelaSchoolofFinanceShorts/featured
https://www.youtube.com/@InvestYadnya
https://www.youtube.com/@NDTVProfitIndia
https://www.youtube.com/@Neerajjoshi/featured
https://www.youtube.com/@thehimanichaudhary
https://www.youtube.com/@ADigitalBlogger
https://www.youtube.com/@stockburnerofficial
https://www.youtube.com/@nehanagar
https://www.youtube.com/@easymoney380
https://www.youtube.com/@madhurokade
https://www.youtube.com/@stockmartpro

7
init/urlList_ja.txt

@ -0,0 +1,7 @@
ja
https://www.youtube.com/@ryogakucho
https://www.youtube.com/@DanTakahashi1
https://www.youtube.com/@buffett_taro
https://www.youtube.com/@Tsubame104
https://www.youtube.com/@inc_academy
https://www.youtube.com/@kamioka01

1
sftp/Contant.py

@ -0,0 +1 @@
logDir=""

6
sftp/LoggerUtils.py

@ -0,0 +1,6 @@
from loguru import logger
import Contant
Logger = logger
def initLogger():
logger.add(Contant.logDir+"/sftp_{time}.log", rotation="500MB", encoding="utf-8",
enqueue=True, compression="zip", retention="10 days")

97
sftp/sftp.py

@ -0,0 +1,97 @@
import os
import shutil
import paramiko
import argparse
import Contant
from LoggerUtils import Logger, initLogger
import configparser
import requests
import time
# python3 sftp.py --local="/mnt/tmp_srt_file" --logDir="./logs"
# python3 sftp.py --local="/mnt/test_file" --logDir="./logs"
if __name__ == "__main__":
# 读取参数
parser = argparse.ArgumentParser(description="")
parser.add_argument("--local", type=str, default="")
parser.add_argument('--logDir', type=str, default='')
args = parser.parse_args()
Contant.logDir = args.logDir
initLogger()
# 读取配置文件
config = configparser.ConfigParser()
config.read('sftp_config.ini')
# 获取SFTP配置信息
hostname = config.get('sftp_config', 'hostname')
port = config.getint('sftp_config', 'port')
username = config.get('sftp_config', 'username')
password = config.get('sftp_config', 'password')
Logger.info("host:{},port:{},username:{},password:{}".format(
hostname, port, username, password))
ssh_client = paramiko.SSHClient()
ssh_client.set_missing_host_key_policy(paramiko.AutoAddPolicy())
sftp_client = None # 设置默认值
ssh_client.connect(hostname, port, username, password)
# 创建SFTP客户端
sftp_client = ssh_client.open_sftp()
Logger.info("SFTP客户端已经建立:{}".format(sftp_client))
remote_root = "/Inbound/YouTube Captions"
local_root = args.local
Logger.info("remote_root:{},local_root:{}".format(remote_root, local_root))
names = os.listdir(local_root)
for name in names:
# sftp创建文件夹
try:
sftp_client.chdir(remote_root + "/" + name)
except BaseException:
sftp_client.mkdir(remote_root + "/" + name)
sftp_client.chdir(remote_root + "/" + name)
# 遍历本地临时文件夹
srtList = os.listdir(local_root + "/" + name)
for srt in srtList:
# 获取远程文件路径以及本地文件路径
remotePath = remote_root + "/" + name + "/" + srt
localPath = local_root + "/" + name + "/" + srt
# 如果远程文件存在,则进行删除
try:
sftp_client.stat(remotePath)
# 如果文件存在,删除它
sftp_client.remove(remotePath)
Logger.info("Remote file '{}' deleted.".format(remotePath))
except FileNotFoundError:
Logger.info("Remote file '{}' not found.".format(remotePath))
# 上传本地文件
try:
# 判断远程地址长度,过长需要截取一部分
if len(remotePath) > 120:
remotePath = remotePath[:-20] + ".srt"
# 判断本地文件是否存在,存在则上传
if os.path.exists(localPath):
Logger.info("本地文件 '{}' 存在,开始上传.".format(localPath))
sftp_client.put(localPath, remotePath, confirm=False)
os.remove(localPath)
else:
Logger.info("本地文件 '{}' 不存在,无法上传.".format(localPath))
except Exception as e:
Logger.info("上传失败 '{}' 文件名长度{}".format(
remotePath, len(remotePath)))
Logger.error(e)
sftp_client.close()
sftp_client = ssh_client.open_sftp()
# 发送钉钉消息
webhook = "https://oapi.dingtalk.com/robot/send?access_token=c8c8d7d42c4eecd449dd303025ef968f647d1d8e8694e3fabc0ab5770d646dcb"
jsonData = {
"msgtype": "text",
"text": {
"content": "[Youtube]sftp finished"
}
}
requests.post(webhook, json=jsonData)
Logger.info("sftp发送钉钉消息成功...")

5
sftp/sftp_config.ini

@ -0,0 +1,5 @@
[sftp_config]
hostname = filetransfer.blackrock.com
port = 22
username = ftp_yunbo
password = s8v{8SJr

15
src/ChannelService.py

@ -0,0 +1,15 @@
import json
from Orm import Channel
from playhouse.shortcuts import model_to_dict, dict_to_model
class ChannelService:
def getOneByChannelId(channelId):
return Channel.get_or_none(Channel.channelId == channelId)
def updateTimeByChannelId(channelId, chageTime):
Channel.update(channelReptileTime=chageTime).where(
Channel.channelId == channelId).execute()
def getChannelList():
return Channel.select().execute()

4
src/Contant.py

@ -0,0 +1,4 @@
db=""
logDir=""
startTime=""
endTime=""

16
src/DownloadInfoService.py

@ -0,0 +1,16 @@
from Orm import DownloadInfo
class DownloadService:
def createOne(videoId, downloadType, tryTime, isFinished):
DownloadInfo.create(
videoId=videoId,
downloadType=downloadType,
tryTime=tryTime,
isFinished=isFinished
)
def updateInfoByVideoId(videoId, tryTime, isFinished):
DownloadInfo.update(tryTime=tryTime, isFinished=isFinished).where(
DownloadInfo.videoId == videoId).execute()

6
src/LoggerUtils.py

@ -0,0 +1,6 @@
from loguru import logger
import Contant
Logger = logger
def initLogger():
logger.add(Contant.logDir+"/main_{time}.log", rotation="500MB", encoding="utf-8",
enqueue=True, compression="zip", retention="10 days")

67
src/Orm.py

@ -0,0 +1,67 @@
from peewee import *
import Contant
import argparse
from LoggerUtils import Logger
parser = argparse.ArgumentParser(description='')
parser.add_argument('--db', type=str, default='')
parser.add_argument('--logDir', type=str, default='')
parser.add_argument("--start", type=str, default="")
parser.add_argument("--end", type=str, default="")
args = parser.parse_args()
Contant.db = args.db
db = SqliteDatabase(Contant.db)
def ormInit():
Channel.create_table()
Video.create_table()
DownloadInfo.create_table()
class BaseModel(Model):
class Meta:
database = db
# 频道信息
class Channel(BaseModel):
id = PrimaryKeyField()
channelId = CharField(null=False)
channelTitle = CharField(null=False)
channelLanguage = CharField()
channelReptileTime = CharField(null=True)
class Meta:
db_table = 'Channel'
# 视频信息
class Video(BaseModel):
id = PrimaryKeyField()
videoId = CharField(null=False)
channelId = CharField(null=False)
videoTitle = CharField()
videoLen = IntegerField()
videoType = CharField()
videoPublishTime = CharField()
videoLanguage = CharField()
isDownload = IntegerField()
class Meta:
db_table = 'Vidoes'
# 下载信息
class DownloadInfo(BaseModel):
id = PrimaryKeyField()
videoId = CharField()
downloadType = IntegerField()
tryTime = IntegerField()
isFinished = IntegerField()
class Meta:
db_table = 'Download_info'

0
src/SrcTest.py

31
src/VideoService.py

@ -0,0 +1,31 @@
import json
from Orm import Video
from playhouse.shortcuts import model_to_dict, dict_to_model
class VideoService:
def getOneByVideoId(videoId):
return Video.get_or_none(Video.videoId == videoId)
def createOne(videoId, channelId, videoTitle, videoLen, videoType, videoPublishTime, videoLanguage, isDownload):
Video.create(videoId=videoId,
channelId=channelId,
videoTitle=videoTitle,
videoLen=videoLen,
videoType=videoType,
videoPublishTime=videoPublishTime,
videoLanguage=videoLanguage,
isDownload=isDownload)
def updateLenByVideoId(videoId, len):
Video.update(videoLen=len).where(Video.videoId == videoId).execute()
def getLastVideoByChannelId(channelId):
return Video.select().where(Video.channelId == channelId).order_by(Video.videoPublishTime.desc()).get()
def getFirstVideoByChannelId(channelId):
return Video.select().where(Video.channelId == channelId).order_by(Video.videoPublishTime).get()
def checkExist(channelId):
query = Video.select().where(Video.channelId == channelId)
return query.exists()

164
src/YouTubeUtils.py

@ -0,0 +1,164 @@
import httplib2
import googleapiclient.discovery
import googleapiclient.errors
from VideoService import VideoService
from ChannelService import ChannelService
from DownloadInfoService import DownloadService
from LoggerUtils import Logger
import operator
import time
import random
class YouTubeUtil:
# apiKeys = ["AIzaSyDlRgmPXVQEjF2gbmomI5FUZX_uAOBmEGI", "AIzaSyBI5i5vFZpQErMnEXKMf0VUS2Bel8jGrTk",
# "AIzaSyAnmA0Ggy1yXsZZACfItmeZAa7wcmh6SbM", "AIzaSyC4O8tBoAfkupmBybxDah2JUxgj4ct5uk0",
# "AIzaSyDJ2S9Ijhw_hULx3nHvPUoGUpMENbZOIl8", "AIzaSyA87Ckpna3hOQ31nISs8V8rp--OLw0m6Aw",
# "AIzaSyDIWbV0EOLHkOr9tWpANose6ggd2r9vcLg", "AIzaSyBKE3lYwWFIYc9Vx4YKMbRpkOXigZlY52U"]
# apiKeys = [
# "AIzaSyDJIKVldjWVeRSt3IBPAgredZsvldUDPhA",
# "AIzaSyChPXesnVx6fweon_BckhR6UiJWvi5Ma4s",
# "AIzaSyBI5i5vFZpQErMnEXKMf0VUS2Bel8jGrTk",
# "AIzaSyAnmA0Ggy1yXsZZACfItmeZAa7wcmh6SbM"
# ]
apiKeys = []
apiIndex = 0
def getYoutube():
proxy_info = httplib2.ProxyInfo(
proxy_type=httplib2.socks.PROXY_TYPE_HTTP, proxy_host="127.0.0.1", proxy_port=7890)
http = httplib2.Http(timeout=10, proxy_info=proxy_info,
disable_ssl_certificate_validation=False)
# http = httplib2.Http(timeout=10, disable_ssl_certificate_validation=False)
api_service_name = "youtube"
api_version = "v3"
# 获取apiKey
with open("api_key.txt", 'r') as file:
YouTubeUtil.apiKeys = file.readlines()
YouTubeUtil.apiIndex = random.randint(0, len(YouTubeUtil.apiKeys)-1)
apiKey = YouTubeUtil.apiKeys[YouTubeUtil.apiIndex].strip("\n")
Logger.info("当前APIKey:{},当前apiIndex:{}", apiKey, YouTubeUtil.apiIndex)
# apiKey = YouTubeUtil.apiKeys[YouTubeUtil.apiIndex]
# Logger.info(
# "当前APIKey:{},当前apiIndex:{},totalIndex:{}".format(
# apiKey, YouTubeUtil.apiIndex, len(YouTubeUtil.apiKeys) - 1
# )
# )
# # 等于7,还原成0
# if YouTubeUtil.apiIndex == (len(YouTubeUtil.apiKeys) - 1):
# YouTubeUtil.apiIndex = 0
# else:
# YouTubeUtil.apiIndex = YouTubeUtil.apiIndex + 1
# 获取对象
youtube = googleapiclient.discovery.build(
api_service_name, api_version, developerKey=apiKey, http=http
)
return youtube
def getVidoeLen(videoIds):
youtube = YouTubeUtil.getYoutube()
request = youtube.videos().list(part="contentDetails", id=videoIds)
response = request.execute()
response["items"][0]["contentDetails"]
return response
def getVideoLenByStr(str):
len = 0
str = str.split("PT")[1]
if operator.contains(str, "H"):
H = str.split("H")[0]
len = len + int(H) * 3600
str = str.split("H")[1]
if operator.contains(str, "M"):
M = str.split("M")[0]
len = len + int(M) * 60
str = str.split("M")[1]
if operator.contains(str, "S"):
S = str.split("S")[0]
len = len + int(S)
return len
def getByChannelId(channelId, startTime, endTime):
channel = ChannelService.getOneByChannelId(channelId)
if channel == None:
return
videoLanguage = str(channel.channelLanguage)
youtube = YouTubeUtil.getYoutube()
request = youtube.search().list(
part="snippet",
channelId=channelId,
maxResults=50,
order="date",
publishedAfter=startTime,
publishedBefore=endTime,
type="video",
)
response = request.execute()
while True:
videosRequest = ""
videosRequestCount = 0
idList = []
for i in response["items"]:
try:
videoId = i["id"]["videoId"]
publisTime = i["snippet"]["publishedAt"]
videoTitle = i["snippet"]["title"]
videoType = "video"
videoEntity = VideoService.getOneByVideoId(str(videoId))
if videoEntity == None:
VideoService.createOne(
videoId,
channelId,
videoTitle,
0,
videoType,
publisTime,
videoLanguage,
0,
)
DownloadService.createOne(videoId, 1, 0, 0)
videosRequest = videosRequest + "," + str(videoId)
videosRequestCount = videosRequestCount + 1
Logger.info(
"存储VideoUrl:https://www.youtube.com/watch?v=" + videoId
)
else:
Logger.info("已存在VideoId:{}".format(videoId))
idList.append(str(videoId))
if videosRequest != "" and videosRequestCount >= 10:
lenRes = YouTubeUtil.getVidoeLen(videosRequest)
for i in lenRes["items"]:
tmpId = i["id"]
videoLenStr = i["contentDetails"]["duration"]
videoLen = YouTubeUtil.getVideoLenByStr(videoLenStr)
VideoService.updateLenByVideoId(tmpId, videoLen)
Logger.info(
"更新时长,videoId:{},len:{}".format(tmpId, videoLen)
)
videosRequestCount = 0
videosRequest = ""
except:
pass
# 获取最后一个视频
vidoeo = VideoService.getLastVideoByChannelId(channelId)
ChannelService.updateTimeByChannelId(channelId, vidoeo.videoPublishTime)
time.sleep(5)
try:
# youtube = YouTubeUtil.getYoutube
request = youtube.search().list(
part="snippet",
channelId=channelId,
maxResults=50,
order="date",
publishedAfter=startTime,
publishedBefore=endTime,
type="video",
pageToken=response["nextPageToken"],
)
response = request.execute()
except Exception as e:
Logger.error(e)
print("no nextPageToken")
break

4
src/api_key.txt

@ -0,0 +1,4 @@
AIzaSyDJIKVldjWVeRSt3IBPAgredZsvldUDPhA
AIzaSyChPXesnVx6fweon_BckhR6UiJWvi5Ma4s
AIzaSyBI5i5vFZpQErMnEXKMf0VUS2Bel8jGrTk
AIzaSyAnmA0Ggy1yXsZZACfItmeZAa7wcmh6SbM

1
src/channelList.txt

@ -0,0 +1 @@
UCCLu5B_Ctsw4N20DJvDykOA 1

70
src/main.py

@ -0,0 +1,70 @@
import argparse
import time
import random
import Contant
import LoggerUtils
import Orm
from VideoService import VideoService
from YouTubeUtils import YouTubeUtil
from ChannelService import ChannelService
import requests
# py .\main.py --db=../db/youtube_prod.db --logDir=./logs --start="2021-03-14T00:00:01Z" --end="2024-03-14T00:00:01Z"
# py .\main.py --db=../db/youtube_prod.db --logDir=./logs --start="2024-03-14T00:00:01Z" --end="2024-04-25T00:00:01Z"
# python3 ./main.py --db="../db/youtube_prod.db" --logDir="./logs" --start="2023-08-10T00:00:01Z" --end="2023-09-12T00:00:01Z"
# python3 ./main.py --db="../db/youtube_prod.db" --logDir="./logs" --start="111" --end="222"
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="")
parser.add_argument("--db", type=str, default="")
parser.add_argument("--logDir", type=str, default="")
parser.add_argument("--start", type=str, default="")
parser.add_argument("--end", type=str, default="")
args = parser.parse_args()
Contant.db = args.db
Contant.logDir = args.logDir
Contant.startTime = args.start
Contant.endTime = args.end
LoggerUtils.initLogger()
Orm.ormInit()
LoggerUtils.Logger.info("db:{},logDir:{}".format(Contant.db, Contant.logDir))
LoggerUtils.Logger.info("starTime:{},endTime:{}".format(Contant.startTime, Contant.endTime))
# 通过文件读取apikeys
# for line in open("api_key.txt"):
# line = line.strip('\n')
# YouTubeUtil.apiKeys.append(line)
# LoggerUtils.Logger.info("YouTubeUtil.apiKeys:{}",YouTubeUtil.apiKeys)
# 读取文件获取需要获取的频道
channelList = []
for line in open("channelList.txt"):
line = line.strip('\n')
channelList.append(line)
for channel_str in channelList:
channelId = channel_str.split(" ")[0]
is_enable = channel_str.split(" ")[1]
if is_enable == "1":
LoggerUtils.Logger.info("channelId:{},startTime:{},endTime:{}".format(channelId, Contant.startTime, Contant.endTime))
YouTubeUtil.getByChannelId(channelId, Contant.startTime, Contant.endTime)
sleep_time = random.randint(3, 10)
LoggerUtils.Logger.info("{}获取完毕,暂停{}", channelId, sleep_time)
time.sleep(sleep_time)
# 执行查询
# channelList = ChannelService.getChannelList()
# LoggerUtils.Logger.info("list size:{}".format(len(channelList)))
# for channel in channelList:
# channelId = channel.channelId
# LoggerUtils.Logger.info(
# "channelId:{},startTime:{},endTime:{}".format(
# channelId, Contant.startTime, Contant.endTime
# )
# )
# YouTubeUtil.getByChannelId(channelId, Contant.startTime, Contant.endTime)
# 发送钉钉消息
# webhook = "https://oapi.dingtalk.com/robot/send?access_token=c8c8d7d42c4eecd449dd303025ef968f647d1d8e8694e3fabc0ab5770d646dcb"
# jsonData = {
# "msgtype": "text",
# "text": {
# "content": "[Youtube]src finished"
# }
# }
# requests.post(webhook, json=jsonData)
# LoggerUtils.Logger.info("src发送钉钉消息成功...")

10
start_download.sh

@ -0,0 +1,10 @@
#!/bin/bash
function log() {
local time_now=`date '+%Y-%m-%d %H:%M:%S'`
echo "$time_now [download] [info] $1" >> /mnt/youtube_prod/running.log
}
cd /mnt/youtube_prod/download
# /mnt/youtube_prod/start_download.sh
log "开始执行download..."
nohup python3 ./main_download.py --db="../db/youtube_prod.db" --logDir="./logs" >/dev/null 2>/mnt/youtube_prod/err.log &

11
start_sftp.sh

@ -0,0 +1,11 @@
#!/bin/bash
function log() {
local time_now=`date '+%Y-%m-%d %H:%M:%S'`
echo "$time_now [download] [info] $1" >> /mnt/youtube_prod/running.log
}
cd /mnt/youtube_prod/sftp
# /mnt/youtube_prod/start_download.sh
log "开始执行sftp..."
python3 ./sftp.py --local="/mnt/tmp_srt_file" --logDir="./logs"
rm -rf /mnt/tmp_srt_file

12
start_src.sh

@ -0,0 +1,12 @@
#!/bin/bash
function log() {
local time_now=`date '+%Y-%m-%d %H:%M:%S'`
echo "$time_now [src] [info] $1" >> /mnt/youtube_prod/running.log
}
cd /mnt/youtube_prod/src
start=`date '+%Y-%m-%dT%H:%M:%SZ' -d'-1 day'`
end=`date '+%Y-%m-%dT%H:%M:%SZ'`
log "开始执行src...startTime:"$start",endTime:"$end
# /mnt/youtube_prod/start_src.sh
nohup python3 ./main.py --db="../db/youtube_prod.db" --logDir="./logs" --start=$start --end=$end >/dev/null 2>/mnt/youtube_prod/err.log &

4
stop_download.sh

@ -0,0 +1,4 @@
#!/bin/bash
pid=`ps -ef | grep main_download | awk NR==1'{print $2}'`
echo $pid
kill -9 $pid

2
test.sh

@ -0,0 +1,2 @@
#!/bin/bash
echo "test"

42
test/test.py

@ -0,0 +1,42 @@
# import httplib2
# import googleapiclient.discovery
# import googleapiclient.errors
# def getYoutube():
# proxy_info = httplib2.ProxyInfo(
# proxy_type=httplib2.socks.PROXY_TYPE_HTTP, proxy_host="127.0.0.1", proxy_port=7890)
# http = httplib2.Http(timeout=10, proxy_info=proxy_info,
# disable_ssl_certificate_validation=False)
# # http = httplib2.Http(timeout=10, disable_ssl_certificate_validation=False)
# api_service_name = "youtube"
# api_version = "v3"
# # 获取apiKey
# apiKey = "AIzaSyARaW3mqO9szQiHgWZR4el0HWvdyheSHBc"
# # 获取对象
# youtube = googleapiclient.discovery.build(
# api_service_name, api_version, developerKey=apiKey, http=http
# )
# return youtube
# youtube = getYoutube()
# request = youtube.videos().list(part="statistics", id="9l7O_2KNomQ")
# response = request.execute()
# print(response)
# response = {'kind': 'youtube#videoListResponse', 'etag': 'I41mEoQqqiB5sxwKKu8X3wNWkB8', 'items': [{'kind': 'youtube#video', 'etag': 'mncS6_AC9-Y6HUjjt_A4ocpWVY4', 'id': '9l7O_2KNomQ', 'statistics': {'viewCount': '47212', 'likeCount': '2126', 'favoriteCount': '0', 'commentCount': '172'}}], 'pageInfo': {'totalResults': 1, 'resultsPerPage': 1}}
# print(response['items'][0]['statistics']['viewCount'])
countStr = "0"
for i in range(0,30):
if i != 29:
countStr = countStr + "," + "0"
print(countStr.split(","))
list = countStr.split(",")
list[0] = 1
print(list)
countStr = ""
for i in range(0,30):
if i != 29:
countStr = countStr + str(list[i]) + ","
else:
countStr = countStr + str(list[i])
print(countStr)

8
test/test2.py

@ -0,0 +1,8 @@
from youtube_transcript_api import YouTubeTranscriptApi
#zh-Hant
url = "https://www.youtube.com/watch?v=rhj42pLWa5s"
list = YouTubeTranscriptApi.list_transcripts("jtr9VBwwJ7M")
videoSrt = YouTubeTranscriptApi.get_transcript(
"KWlTphpCpcI", languages=['hi'])
print(list)
print(videoSrt)

15
view_count/ChannelService.py

@ -0,0 +1,15 @@
import json
from Orm import Channel
from playhouse.shortcuts import model_to_dict, dict_to_model
class ChannelService:
def getOneByChannelId(channelId):
return Channel.get_or_none(Channel.channelId == channelId)
def updateTimeByChannelId(channelId, chageTime):
Channel.update(channelReptileTime=chageTime).where(
Channel.channelId == channelId).execute()
def getChannelList():
return Channel.select().execute()

4
view_count/Contant.py

@ -0,0 +1,4 @@
db=""
logDir=""
startTime=""
endTime=""

6
view_count/LoggerUtils.py

@ -0,0 +1,6 @@
from loguru import logger
import Contant
Logger = logger
def initLogger():
logger.add(Contant.logDir+"/main_{time}.log", rotation="500MB", encoding="utf-8",
enqueue=True, compression="zip", retention="10 days")

75
view_count/Orm.py

@ -0,0 +1,75 @@
from peewee import *
import Contant
import argparse
from LoggerUtils import Logger
parser = argparse.ArgumentParser(description='')
parser.add_argument('--db', type=str, default='')
parser.add_argument('--logDir', type=str, default='')
args = parser.parse_args()
Contant.db = args.db
db = SqliteDatabase(Contant.db)
def ormInit():
Channel.create_table()
Video.create_table()
DownloadInfo.create_table()
ViewCountInfo.create_table()
class BaseModel(Model):
class Meta:
database = db
# 频道信息
class Channel(BaseModel):
id = PrimaryKeyField()
channelId = CharField(null=False)
channelTitle = CharField(null=False)
channelLanguage = CharField()
channelReptileTime = CharField(null=True)
class Meta:
db_table = 'Channel'
# 视频信息
class Video(BaseModel):
id = PrimaryKeyField()
videoId = CharField(null=False)
channelId = CharField(null=False)
videoTitle = CharField()
videoLen = IntegerField()
videoType = CharField()
videoPublishTime = CharField()
videoLanguage = CharField()
isDownload = IntegerField()
class Meta:
db_table = 'Vidoes'
# 下载信息
class DownloadInfo(BaseModel):
id = PrimaryKeyField()
videoId = CharField()
downloadType = IntegerField()
tryTime = IntegerField()
isFinished = IntegerField()
class Meta:
db_table = 'Download_info'
# 播放量信息
class ViewCountInfo(BaseModel):
id = PrimaryKeyField()
videoId = CharField()
viewCount = CharField()
class Meta:
db_table = 'ViewCount_info'

33
view_count/VideoCountService.py

@ -0,0 +1,33 @@
import json
from Orm import ViewCountInfo
from playhouse.shortcuts import model_to_dict, dict_to_model
class ViewCountService:
def createOrUpdateOne(videoId, day,count):
query = ViewCountInfo.select().where(ViewCountInfo.videoId == videoId)
if not query:
countStr = "0"
for i in range(0,30):
if i != 29:
countStr = countStr + "," + "0"
list = countStr.split(",")
list[day-1] = count
countStr = ""
for i in range(0,30):
if i != 29:
countStr = countStr + str(list[i]) + ","
else:
countStr = countStr + str(list[i])
ViewCountInfo.create(videoId=videoId, viewCount=countStr)
else:
viewCountInfo = ViewCountInfo.select().where(ViewCountInfo.videoId == videoId).get()
list = viewCountInfo.viewCount.split(",")
list[day-1] = count
countStr = ""
for i in range(0,30):
if i != 29:
countStr = countStr + str(list[i]) + ","
else:
countStr = countStr + str(list[i])
ViewCountInfo.update(viewCount=countStr).where(ViewCountInfo.videoId == videoId).execute()

34
view_count/VideoService.py

@ -0,0 +1,34 @@
import json
from Orm import Video
from playhouse.shortcuts import model_to_dict, dict_to_model
class VideoService:
def getOneByVideoId(videoId):
return Video.get_or_none(Video.videoId == videoId)
def createOne(videoId, channelId, videoTitle, videoLen, videoType, videoPublishTime, videoLanguage, isDownload):
Video.create(videoId=videoId,
channelId=channelId,
videoTitle=videoTitle,
videoLen=videoLen,
videoType=videoType,
videoPublishTime=videoPublishTime,
videoLanguage=videoLanguage,
isDownload=isDownload)
def updateLenByVideoId(videoId, len):
Video.update(videoLen=len).where(Video.videoId == videoId).execute()
def getLastVideoByChannelId(channelId):
return Video.select().where(Video.channelId == channelId).order_by(Video.videoPublishTime.desc()).get()
def getFirstVideoByChannelId(channelId):
return Video.select().where(Video.channelId == channelId).order_by(Video.videoPublishTime).get()
def checkExist(channelId):
query = Video.select().where(Video.channelId == channelId)
return query.exists()
def getVideosByTime(startTime,endTime):
return Video.select().where(Video.videoPublishTime >= startTime,Video.videoPublishTime <= endTime).execute()

79
view_count/view_count_main.py

@ -0,0 +1,79 @@
import argparse
import random
import time
import Contant
from LoggerUtils import Logger, initLogger
import Orm
from VideoService import VideoService
from ChannelService import ChannelService
from VideoCountService import ViewCountService
from func_timeout import func_set_timeout
import func_timeout
import requests
import httplib2
import googleapiclient.discovery
import googleapiclient.errors
import datetime
def getYoutube():
proxy_info = httplib2.ProxyInfo(
proxy_type=httplib2.socks.PROXY_TYPE_HTTP, proxy_host="127.0.0.1", proxy_port=7890)
http = httplib2.Http(timeout=10, proxy_info=proxy_info,
disable_ssl_certificate_validation=False)
# http = httplib2.Http(timeout=10, disable_ssl_certificate_validation=False)
api_service_name = "youtube"
api_version = "v3"
# 获取apiKey
apiKey = "AIzaSyARaW3mqO9szQiHgWZR4el0HWvdyheSHBc"
# 获取对象
youtube = googleapiclient.discovery.build(
api_service_name, api_version, developerKey=apiKey, http=http
)
return youtube
def updateVideoViewCount(startTime, endTime):
list = VideoService.getVideosByTime(startTime, endTime)
videoCount = 0
videosRequest = ""
youtube = getYoutube()
for video in list:
videoCount = videoCount + 1
Logger.info(video.videoId)
videosRequest = videosRequest + "," + video.videoId
if videoCount == 30 or videoCount == len(list):
request = youtube.videos().list(part="statistics", id=videosRequest)
response = request.execute()
for item in response['items']:
Logger.info(item)
ViewCountService.createOrUpdateOne(
item['id'], 1, item['statistics']['viewCount'])
videosRequest = ""
videoCount = 0
# python ./view_count_main.py --db="../db/youtube_prod.db" --logDir="./logs" --start="2024-01-03T00:00:00Z" --end="2024-01-04T00:00:00Z"
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='')
parser.add_argument('--db', type=str, default='')
parser.add_argument('--logDir', type=str, default='')
args = parser.parse_args()
Contant.db = args.db
Contant.logDir = args.logDir
initLogger()
Orm.ormInit()
# 查询30天内的所有视屏
now = datetime.datetime.now()
zero_today = now.replace(hour=0, minute=0, second=0, microsecond=0)
end_today = now.replace(hour=23, minute=59, second=59, microsecond=0)
for i in range(1, 31):
startTime = zero_today+datetime.timedelta(days=-i)
endTime = end_today+datetime.timedelta(days=-i)
startTime = startTime.strftime("%y-%m-%dT%H:%S:%MZ")
endTime = endTime.strftime("%y-%m-%dT%H:%S:%MZ")
Logger.info("startTime:%s, endTime:%s" % (startTime, endTime))
updateVideoViewCount(startTime, endTime)
# zero_today = zero_today.strftime("%y-%m-%dT%H:%S:%MZ")
# print(zero_today)
Loading…
Cancel
Save