zhangshu
7 months ago
48 changed files with 1511 additions and 1 deletions
@ -1,2 +1,3 @@ |
|||||
# youtube_prod |
# youtube_srt |
||||
|
|
||||
|
Youtube字幕项目 |
Binary file not shown.
Binary file not shown.
@ -0,0 +1,7 @@ |
|||||
|
import json |
||||
|
from Orm import Channel |
||||
|
from playhouse.shortcuts import model_to_dict, dict_to_model |
||||
|
|
||||
|
class ChannelService: |
||||
|
def getOneByChannelId(channelId): |
||||
|
return Channel.get_or_none(Channel.channelId == channelId) |
@ -0,0 +1,2 @@ |
|||||
|
db="" |
||||
|
logDir="" |
@ -0,0 +1,28 @@ |
|||||
|
from Orm import DownloadInfo |
||||
|
|
||||
|
|
||||
|
class DownloadService: |
||||
|
def getOneByVideoId(videoId, downloadType): |
||||
|
return DownloadInfo.get(DownloadInfo.videoId == videoId, DownloadInfo.downloadType == downloadType) |
||||
|
|
||||
|
def createOne(videoId, downloadType, tryTime, isFinished): |
||||
|
DownloadInfo.create( |
||||
|
videoId=videoId, |
||||
|
downloadType=downloadType, |
||||
|
tryTime=tryTime, |
||||
|
isFinished=isFinished |
||||
|
) |
||||
|
|
||||
|
def updateInfoByVideoId(videoId, tryTime, isFinished, downloadType): |
||||
|
DownloadInfo.update(tryTime=tryTime, isFinished=isFinished).where( |
||||
|
DownloadInfo.videoId == videoId, DownloadInfo.downloadType == downloadType).execute() |
||||
|
|
||||
|
def findNotFinishList(): |
||||
|
return DownloadInfo.select().where(DownloadInfo.isFinished == 0, DownloadInfo.tryTime <= 5, DownloadInfo.downloadType == 1).limit(10).execute() |
||||
|
|
||||
|
def changeDownloadType(videoId, tryTime, isFinished, downloadType, changeType): |
||||
|
DownloadInfo.update(tryTime=tryTime, isFinished=isFinished, downloadType=changeType).where( |
||||
|
DownloadInfo.videoId == videoId, DownloadInfo.downloadType == downloadType).execute() |
||||
|
|
||||
|
def findNotFinishListTwo(): |
||||
|
return DownloadInfo.select().where(DownloadInfo.isFinished == 0, DownloadInfo.tryTime <= 5, DownloadInfo.downloadType == 2).limit(10).execute() |
@ -0,0 +1,172 @@ |
|||||
|
from shutil import copyfile |
||||
|
from youtube_transcript_api import YouTubeTranscriptApi |
||||
|
from youtube_transcript_api.formatters import SRTFormatter |
||||
|
from VideoService import VideoService |
||||
|
from ChannelService import ChannelService |
||||
|
from DownloadInfoService import DownloadService |
||||
|
from LoggerUtils import Logger |
||||
|
import time |
||||
|
import os |
||||
|
from func_timeout import func_set_timeout |
||||
|
import operator |
||||
|
|
||||
|
|
||||
|
class DownLoadUtil: |
||||
|
|
||||
|
formatter = SRTFormatter() |
||||
|
proxies = {"http": "http://127.0.0.1:7890", |
||||
|
"https": "https://127.0.0.1:7890"} |
||||
|
|
||||
|
@func_set_timeout(60) |
||||
|
def downloadOne(videoId): |
||||
|
# 获取数据 |
||||
|
video = VideoService.getOneByVideoId(videoId) |
||||
|
channel = ChannelService.getOneByChannelId(str(video.channelId)) |
||||
|
# 格式化title |
||||
|
videoTitle = str(video.videoTitle) |
||||
|
videoTitle = str(videoTitle).replace("/", u"\u2215") |
||||
|
videoTitle = str(videoTitle).replace("?", "?") |
||||
|
videoTitle = str(videoTitle).replace("\\", "") |
||||
|
videoTitle = str(videoTitle).replace("|", "") |
||||
|
videoTitle = str(videoTitle).replace("<", "") |
||||
|
videoTitle = str(videoTitle).replace(">", "") |
||||
|
videoTitle = str(videoTitle).replace(":", "") |
||||
|
# 获取发布时间 |
||||
|
videoPublishTime = str(video.videoPublishTime) |
||||
|
videoPublishTime = str(videoPublishTime).split("T")[0] |
||||
|
# 开始下载 |
||||
|
Logger.info("开始下载...{}".format(videoId)) |
||||
|
cpPath = "" |
||||
|
try: |
||||
|
# 获取字幕 |
||||
|
languages = str(video.videoLanguage) |
||||
|
storePath = "/mnt/srt_file/" + str(channel.channelTitle) |
||||
|
cpPath = "/mnt/tmp_srt_file/" + str(channel.channelTitle) |
||||
|
if not os.path.exists(storePath): |
||||
|
Logger.info("开始创建文件夹:" + storePath) |
||||
|
os.makedirs(storePath) |
||||
|
if not os.path.exists(cpPath): |
||||
|
Logger.info("开始创建文件夹:" + cpPath) |
||||
|
os.makedirs(cpPath) |
||||
|
storePath = storePath + "/" + videoPublishTime + \ |
||||
|
"-" + languages + "-" + videoTitle + ".srt" |
||||
|
cpPath = cpPath + "/" + videoPublishTime + \ |
||||
|
"-" + languages + "-" + videoTitle + ".srt" |
||||
|
if len(cpPath) > 120: |
||||
|
storePath = storePath[:-20] + ".srt" |
||||
|
cpPath = cpPath[:-20] + ".srt" |
||||
|
videoSrt = YouTubeTranscriptApi.get_transcript( |
||||
|
videoId, languages=[languages]) |
||||
|
srt_formatted = DownLoadUtil.formatter.format_transcript(videoSrt) |
||||
|
Logger.info("文件地址...{}".format(storePath)) |
||||
|
with open(storePath, 'w', encoding='utf-8') as srt_file: |
||||
|
srt_file.write(srt_formatted) |
||||
|
Logger.info("下载完成...{}".format(videoId)) |
||||
|
copyfile(storePath, cpPath) |
||||
|
# 修改video数据 |
||||
|
VideoService.updateIsDownloadByVideoId(videoId, 1) |
||||
|
# 修改downloadInfo |
||||
|
downloadInfo = DownloadService.getOneByVideoId(videoId, 1) |
||||
|
if downloadInfo is not None: |
||||
|
DownloadService.updateInfoByVideoId( |
||||
|
videoId, downloadInfo.tryTime + 1, 1, 1) |
||||
|
except Exception as e: |
||||
|
Logger.error("下载失败...{}".format(videoId)) |
||||
|
logStr = "Exception...{}".format(e) |
||||
|
Logger.error(logStr) |
||||
|
downloadInfo = DownloadService.getOneByVideoId(videoId, 1) |
||||
|
if operator.contains(logStr, "No transcripts"): |
||||
|
Logger.error("VideoId:{},不存在字幕文件".format(videoId)) |
||||
|
if downloadInfo is not None: |
||||
|
DownloadService.changeDownloadType( |
||||
|
videoId, 0, 0, 1, 2) |
||||
|
elif operator.contains(logStr, "File name too long"): |
||||
|
# 文件名过长 |
||||
|
languages = str(video.videoLanguage) |
||||
|
videoSrt = YouTubeTranscriptApi.get_transcript( |
||||
|
videoId, languages=[languages]) |
||||
|
srt_formatted = DownLoadUtil.formatter.format_transcript(videoSrt) |
||||
|
storePath = "/mnt/srt_file/" + str(channel.channelTitle) + "/" + \ |
||||
|
videoPublishTime + "-" + languages + "-" + videoId + ".srt" |
||||
|
cpPath = "/mnt/tmp_srt_file/" + str(channel.channelTitle) + "/" + \ |
||||
|
videoPublishTime + "-" + languages + "-" + videoId + ".srt" |
||||
|
if len(cpPath) > 120: |
||||
|
storePath = storePath[:-20] + ".srt" |
||||
|
cpPath = cpPath[:-20] + ".srt" |
||||
|
Logger.info("文件名过长,文件地址...{}".format(storePath)) |
||||
|
with open(storePath, 'w', encoding='utf-8') as srt_file: |
||||
|
srt_file.write(srt_formatted) |
||||
|
Logger.info("下载完成...{}".format(videoId)) |
||||
|
copyfile(storePath, cpPath) |
||||
|
# 修改video数据 |
||||
|
VideoService.updateIsDownloadByVideoId(videoId, 1) |
||||
|
# 修改downloadInfo |
||||
|
downloadInfo = DownloadService.getOneByVideoId(videoId, 1) |
||||
|
if downloadInfo is not None: |
||||
|
DownloadService.updateInfoByVideoId( |
||||
|
videoId, downloadInfo.tryTime + 1, 1, 1) |
||||
|
else: |
||||
|
if downloadInfo is not None: |
||||
|
Logger.info("VideoId:{}开始重试第{}次".format( |
||||
|
videoId, downloadInfo.tryTime + 1)) |
||||
|
DownloadService.updateInfoByVideoId( |
||||
|
videoId, downloadInfo.tryTime + 1, 0, 1) |
||||
|
|
||||
|
@func_set_timeout(60) |
||||
|
def downloadTwo(videoId): |
||||
|
# 获取数据 |
||||
|
video = VideoService.getOneByVideoId(videoId, 2) |
||||
|
channel = ChannelService.getOneByChannelId(str(video.channelId)) |
||||
|
# 格式化title |
||||
|
videoTitle = str(video.videoTitle) |
||||
|
videoTitle = str(videoTitle).replace("/", u"\u2215") |
||||
|
videoTitle = str(videoTitle).replace("?", "?") |
||||
|
videoTitle = str(videoTitle).replace("\\", "") |
||||
|
videoTitle = str(videoTitle).replace("|", "") |
||||
|
videoTitle = str(videoTitle).replace("<", "") |
||||
|
videoTitle = str(videoTitle).replace(">", "") |
||||
|
videoTitle = str(videoTitle).replace(":", "") |
||||
|
# 获取发布时间 |
||||
|
videoPublishTime = str(video.videoPublishTime) |
||||
|
videoPublishTime = str(videoPublishTime).split("T")[0] |
||||
|
# 开始下载 |
||||
|
Logger.info("开始下载...{}".format(videoId)) |
||||
|
try: |
||||
|
# 获取字幕 |
||||
|
languages = str(video.videoLanguage) |
||||
|
storePath = "./download/" + str(channel.channelTitle) |
||||
|
if not os.path.exists(storePath): |
||||
|
Logger.info("开始创建文件夹:" + storePath) |
||||
|
os.makedirs(storePath) |
||||
|
storePath = storePath + "\\" + videoPublishTime + \ |
||||
|
"-" + languages + "-" + videoTitle + ".srt" |
||||
|
videoSrt = YouTubeTranscriptApi.get_transcript( |
||||
|
videoId, languages=[languages]) |
||||
|
srt_formatted = DownLoadUtil.formatter.format_transcript(videoSrt) |
||||
|
Logger.info("文件地址...{}".format(storePath)) |
||||
|
with open(storePath, 'w', encoding='utf-8') as srt_file: |
||||
|
srt_file.write(srt_formatted) |
||||
|
Logger.info("下载完成...{}".format(videoId)) |
||||
|
# 修改video数据 |
||||
|
VideoService.updateIsDownloadByVideoId(videoId, 1) |
||||
|
# 修改downloadInfo |
||||
|
downloadInfo = DownloadService.getOneByVideoId(videoId, 2) |
||||
|
if downloadInfo is not None: |
||||
|
DownloadService.updateInfoByVideoId( |
||||
|
videoId, downloadInfo.tryTime + 1, 1, 2) |
||||
|
except Exception as e: |
||||
|
Logger.error("下载失败...{}".format(videoId)) |
||||
|
logStr = "Exception...{}".format(e) |
||||
|
Logger.error(logStr) |
||||
|
downloadInfo = DownloadService.getOneByVideoId(videoId, 2) |
||||
|
if operator.contains(logStr, "No transcripts"): |
||||
|
Logger.error("VideoId:{},不存在字幕文件".format(videoId)) |
||||
|
if downloadInfo is not None: |
||||
|
DownloadService.changeDownloadType( |
||||
|
videoId, 6, 0, 2, 3) |
||||
|
else: |
||||
|
if downloadInfo is not None: |
||||
|
Logger.info("VideoId:{}开始重试第{}次".format( |
||||
|
videoId, downloadInfo.tryTime + 1)) |
||||
|
DownloadService.updateInfoByVideoId( |
||||
|
videoId, downloadInfo.tryTime + 1, 0, 2) |
@ -0,0 +1,6 @@ |
|||||
|
from loguru import logger |
||||
|
import Contant |
||||
|
Logger = logger |
||||
|
def initLogger(): |
||||
|
logger.add(Contant.logDir+"/download_{time}.log", rotation="500MB", encoding="utf-8", |
||||
|
enqueue=True, compression="zip", retention="10 days") |
@ -0,0 +1,65 @@ |
|||||
|
from peewee import * |
||||
|
import Contant |
||||
|
import argparse |
||||
|
from LoggerUtils import Logger |
||||
|
|
||||
|
parser = argparse.ArgumentParser(description='') |
||||
|
parser.add_argument('--db', type=str, default='') |
||||
|
parser.add_argument('--logDir', type=str, default='') |
||||
|
args = parser.parse_args() |
||||
|
Contant.db = args.db |
||||
|
db = SqliteDatabase(Contant.db) |
||||
|
|
||||
|
|
||||
|
def ormInit(): |
||||
|
Channel.create_table() |
||||
|
Video.create_table() |
||||
|
DownloadInfo.create_table() |
||||
|
|
||||
|
|
||||
|
class BaseModel(Model): |
||||
|
class Meta: |
||||
|
database = db |
||||
|
|
||||
|
# 频道信息 |
||||
|
|
||||
|
|
||||
|
class Channel(BaseModel): |
||||
|
id = PrimaryKeyField() |
||||
|
channelId = CharField(null=False) |
||||
|
channelTitle = CharField(null=False) |
||||
|
channelLanguage = CharField() |
||||
|
channelReptileTime = CharField(null=True) |
||||
|
|
||||
|
class Meta: |
||||
|
db_table = 'Channel' |
||||
|
|
||||
|
# 视频信息 |
||||
|
|
||||
|
|
||||
|
class Video(BaseModel): |
||||
|
id = PrimaryKeyField() |
||||
|
videoId = CharField(null=False) |
||||
|
channelId = CharField(null=False) |
||||
|
videoTitle = CharField() |
||||
|
videoLen = IntegerField() |
||||
|
videoType = CharField() |
||||
|
videoPublishTime = CharField() |
||||
|
videoLanguage = CharField() |
||||
|
isDownload = IntegerField() |
||||
|
|
||||
|
class Meta: |
||||
|
db_table = 'Vidoes' |
||||
|
|
||||
|
# 下载信息 |
||||
|
|
||||
|
|
||||
|
class DownloadInfo(BaseModel): |
||||
|
id = PrimaryKeyField() |
||||
|
videoId = CharField() |
||||
|
downloadType = IntegerField() |
||||
|
tryTime = IntegerField() |
||||
|
isFinished = IntegerField() |
||||
|
|
||||
|
class Meta: |
||||
|
db_table = 'Download_info' |
@ -0,0 +1,26 @@ |
|||||
|
import json |
||||
|
from Orm import Video |
||||
|
from playhouse.shortcuts import model_to_dict, dict_to_model |
||||
|
|
||||
|
|
||||
|
class VideoService: |
||||
|
def getOneByVideoId(videoId): |
||||
|
return Video.get_or_none(Video.videoId == videoId) |
||||
|
|
||||
|
def createOne(videoId, channelId, videoTitle, videoLen, videoType, videoPublishTime, videoLanguage, isDownload): |
||||
|
Video.create(videoId=videoId, |
||||
|
channelId=channelId, |
||||
|
videoTitle=videoTitle, |
||||
|
videoLen=videoLen, |
||||
|
videoType=videoType, |
||||
|
videoPublishTime=videoPublishTime, |
||||
|
videoLanguage=videoLanguage, |
||||
|
isDownload=isDownload) |
||||
|
|
||||
|
def updateLenByVideoId(videoId, len): |
||||
|
Video.update(videoLen=len).where(Video.videoId == videoId).execute() |
||||
|
|
||||
|
def updateIsDownloadByVideoId(videoId, isDownload): |
||||
|
Video.update(isDownload=isDownload).where( |
||||
|
Video.videoId == videoId).execute() |
||||
|
|
Binary file not shown.
@ -0,0 +1,49 @@ |
|||||
|
import argparse |
||||
|
import random |
||||
|
import time |
||||
|
import Contant |
||||
|
from LoggerUtils import Logger, initLogger |
||||
|
import Orm |
||||
|
from VideoService import VideoService |
||||
|
from ChannelService import ChannelService |
||||
|
from DownloadInfoService import DownloadService |
||||
|
from DownloadUtil import DownLoadUtil |
||||
|
from func_timeout import func_set_timeout |
||||
|
import func_timeout |
||||
|
import requests |
||||
|
|
||||
|
# python3 ./main_download.py --db="../db/youtube_prod.db" --logDir="./logs" |
||||
|
if __name__ == "__main__": |
||||
|
parser = argparse.ArgumentParser(description='') |
||||
|
parser.add_argument('--db', type=str, default='') |
||||
|
parser.add_argument('--logDir', type=str, default='') |
||||
|
args = parser.parse_args() |
||||
|
Contant.db = args.db |
||||
|
Contant.logDir = args.logDir |
||||
|
initLogger() |
||||
|
Orm.ormInit() |
||||
|
list = DownloadService.findNotFinishList() |
||||
|
Logger.info("list size:{}".format(len(list))) |
||||
|
while (len(list) > 0): |
||||
|
for info in list: |
||||
|
try: |
||||
|
DownLoadUtil.downloadOne(info.videoId) |
||||
|
restTime = random.randint(1, 3) |
||||
|
Logger.info("间隔{}秒后继续...".format(restTime)) |
||||
|
time.sleep(restTime) |
||||
|
except func_timeout.exceptions.FunctionTimedOut as e: |
||||
|
Logger.error("执行下载方法超时错误:{}".format(e)) |
||||
|
loopRestTime = random.randint(1, 3) |
||||
|
Logger.info("循环间隔{}秒后继续...".format(loopRestTime)) |
||||
|
time.sleep(loopRestTime) |
||||
|
list = DownloadService.findNotFinishList() |
||||
|
# 发送钉钉消息 |
||||
|
webhook = "https://oapi.dingtalk.com/robot/send?access_token=c8c8d7d42c4eecd449dd303025ef968f647d1d8e8694e3fabc0ab5770d646dcb" |
||||
|
jsonData = { |
||||
|
"msgtype": "text", |
||||
|
"text": { |
||||
|
"content": "[Youtube]download finished" |
||||
|
} |
||||
|
} |
||||
|
requests.post(webhook, json=jsonData) |
||||
|
Logger.info("download发送钉钉消息成功...") |
@ -0,0 +1,2 @@ |
|||||
|
db="" |
||||
|
logDir="" |
@ -0,0 +1,6 @@ |
|||||
|
from loguru import logger |
||||
|
import Contant |
||||
|
Logger = logger |
||||
|
def initLogger(): |
||||
|
logger.add(Contant.logDir+"/init_{time}.log", rotation="500MB", encoding="utf-8", |
||||
|
enqueue=True, compression="zip", retention="10 days") |
@ -0,0 +1,65 @@ |
|||||
|
from peewee import * |
||||
|
import Contant |
||||
|
import argparse |
||||
|
from LoggerUtils import Logger |
||||
|
|
||||
|
parser = argparse.ArgumentParser(description='') |
||||
|
parser.add_argument('--db', type=str, default='') |
||||
|
parser.add_argument('--logDir', type=str, default='') |
||||
|
args = parser.parse_args() |
||||
|
Contant.db = args.db |
||||
|
db = SqliteDatabase(Contant.db) |
||||
|
|
||||
|
|
||||
|
def ormInit(): |
||||
|
Channel.create_table() |
||||
|
Vidoe.create_table() |
||||
|
DownloadInfo.create_table() |
||||
|
|
||||
|
|
||||
|
class BaseModel(Model): |
||||
|
class Meta: |
||||
|
database = db |
||||
|
|
||||
|
# 频道信息 |
||||
|
|
||||
|
|
||||
|
class Channel(BaseModel): |
||||
|
id = PrimaryKeyField() |
||||
|
channelId = CharField(null=False) |
||||
|
channelTitle = CharField(null=False) |
||||
|
channelLanguage = CharField() |
||||
|
channelReptileTime = CharField(null=True) |
||||
|
|
||||
|
class Meta: |
||||
|
db_table = 'Channel' |
||||
|
|
||||
|
# 视频信息 |
||||
|
|
||||
|
|
||||
|
class Vidoe(BaseModel): |
||||
|
id = PrimaryKeyField() |
||||
|
videoId = CharField(null=False) |
||||
|
channelId = CharField(null=False) |
||||
|
videoTitle = CharField() |
||||
|
videoLen = IntegerField() |
||||
|
videoType = CharField() |
||||
|
videoPublishTime = CharField() |
||||
|
videoLanguage = CharField() |
||||
|
isDownload = IntegerField() |
||||
|
|
||||
|
class Meta: |
||||
|
db_table = 'Vidoes' |
||||
|
|
||||
|
# 下载信息 |
||||
|
|
||||
|
|
||||
|
class DownloadInfo(BaseModel): |
||||
|
id = PrimaryKeyField() |
||||
|
videoId = CharField() |
||||
|
downloadType = IntegerField() |
||||
|
tryTime = IntegerField() |
||||
|
isFinished = IntegerField() |
||||
|
|
||||
|
class Meta: |
||||
|
db_table = 'Download_info' |
@ -0,0 +1,56 @@ |
|||||
|
from LoggerUtils import Logger, initLogger |
||||
|
import argparse |
||||
|
import Contant |
||||
|
from Orm import ormInit, Channel |
||||
|
import operator |
||||
|
from bs4 import BeautifulSoup as bs |
||||
|
from urllib.request import urlopen, Request |
||||
|
|
||||
|
# py .\init.py --db=../db/youtube_prod.db --logDir=./logs |
||||
|
def saveChannel(channelUrl, language): |
||||
|
Logger.info("频道链接:"+channelUrl) |
||||
|
channelId = "" |
||||
|
channelName = "" |
||||
|
url_opener = urlopen( |
||||
|
Request(channelUrl, headers={'User-Agent': 'Mozilla'})) |
||||
|
videoInfo = bs(url_opener, features="html.parser") |
||||
|
links = videoInfo.find_all("link") |
||||
|
for link in links: |
||||
|
if operator.contains(str(link), "canonical"): |
||||
|
channelId = str(link['href']).split("/channel/")[1] |
||||
|
if operator.contains(str(link), "content="): |
||||
|
channelName = str(link['content']) |
||||
|
Logger.info("channelId:"+channelId) |
||||
|
Logger.info("channelName:"+channelName) |
||||
|
channel = Channel.get_or_none(Channel.channelId == channelId) |
||||
|
if channel != None: |
||||
|
Logger.info("频道已存在:" + channelId) |
||||
|
return |
||||
|
Channel.create(channelTitle=channelName, |
||||
|
channelId=channelId, channelLanguage=language) |
||||
|
|
||||
|
|
||||
|
if __name__ == "__main__": |
||||
|
parser = argparse.ArgumentParser(description='') |
||||
|
parser.add_argument('--db', type=str, default='') |
||||
|
parser.add_argument('--logDir', type=str, default='') |
||||
|
args = parser.parse_args() |
||||
|
Contant.db = args.db |
||||
|
Contant.logDir = args.logDir |
||||
|
initLogger() |
||||
|
ormInit() |
||||
|
Logger.info("SqlLite存放地址:"+Contant.db) |
||||
|
Logger.info("日志文件存放地址:"+Contant.logDir) |
||||
|
Logger.info("开始初始化...") |
||||
|
# checkInit() |
||||
|
# 读取txt文件获取需要的频道地址 |
||||
|
Logger.info("开始读取需要新增的频道地址...") |
||||
|
urlList = [] |
||||
|
# 打开文件 |
||||
|
for line in open("urlList.txt"): |
||||
|
line = line.strip('\n') |
||||
|
urlList.append(line) |
||||
|
language = urlList[0] |
||||
|
for url in urlList: |
||||
|
if len(url) > 10: |
||||
|
saveChannel(url, language) |
@ -0,0 +1,14 @@ |
|||||
|
zh-TW |
||||
|
https://www.youtube.com/@TheStormMedia |
||||
|
https://www.youtube.com/@57ETFN |
||||
|
https://www.youtube.com/@MoneyNewWorld |
||||
|
https://www.youtube.com/@tvbsmoney |
||||
|
https://www.youtube.com/@TheMasterhsiao |
||||
|
https://www.youtube.com/@mvp5888 |
||||
|
https://www.youtube.com/@HUNG64 |
||||
|
https://www.youtube.com/@user-vc2vr6tw4h |
||||
|
https://www.youtube.com/ustv |
||||
|
https://www.youtube.com/@leon888 |
||||
|
https://www.youtube.com/@smartmonthly-BW |
||||
|
https://www.youtube.com/@ustvstockonline |
||||
|
https://www.youtube.com/@AASTOCKS_AATV |
@ -0,0 +1,90 @@ |
|||||
|
hi |
||||
|
https://www.youtube.com/@procapitalacademy |
||||
|
https://www.youtube.com/@TEACHERANISH |
||||
|
https://www.youtube.com/@MarketGurukul1 |
||||
|
|
||||
|
|
||||
|
|
||||
|
en |
||||
|
https://www.youtube.com/@VishalKhandelwalshow |
||||
|
https://www.youtube.com/@Elearnmarkets |
||||
|
https://www.youtube.com/@MarketsMojo |
||||
|
https://www.youtube.com/@TradeWithTrend |
||||
|
https://www.youtube.com/@SHAREKHAN |
||||
|
https://www.youtube.com/@AvadhutSatheTradingAcademy |
||||
|
|
||||
|
|
||||
|
|
||||
|
ko |
||||
|
https://www.youtube.com/@E_TREND |
||||
|
https://www.youtube.com/@hkwownet |
||||
|
https://www.youtube.com/@giant_tv |
||||
|
https://www.youtube.com/@StrongStock |
||||
|
https://www.youtube.com/@stockwar999 |
||||
|
https://www.youtube.com/@user-sp1du8pm6q |
||||
|
https://www.youtube.com/@talentinvestment |
||||
|
https://www.youtube.com/@future_economy |
||||
|
https://www.youtube.com/@user-sf7hm6xj8d |
||||
|
https://www.youtube.com/@user-xv9xi6pi9o |
||||
|
https://www.youtube.com/@user-rd8fd1xj9b |
||||
|
https://www.youtube.com/@lucky_tv |
||||
|
https://www.youtube.com/@Min_woo |
||||
|
https://www.youtube.com/@taver1123 |
||||
|
https://www.youtube.com/@Super0Min |
||||
|
https://www.youtube.com/@ap5798 |
||||
|
https://www.youtube.com/@drematree100 |
||||
|
https://www.youtube.com/@MKeconomy_TV |
||||
|
https://www.youtube.com/@grit |
||||
|
https://www.youtube.com/@user-zn9js9fg5i |
||||
|
https://www.youtube.com/@youngikkim |
||||
|
https://www.youtube.com/@DonNawa |
||||
|
https://www.youtube.com/@woong-dal |
||||
|
https://www.youtube.com/@johnleeschool |
||||
|
https://www.youtube.com/@syukaworld-comics |
||||
|
https://www.youtube.com/@channelA-news |
||||
|
https://www.youtube.com/@user-bh7lr7pe9g |
||||
|
https://www.youtube.com/@singlefire |
||||
|
https://www.youtube.com/@moneyhi |
||||
|
https://www.youtube.com/@top.trader |
||||
|
https://www.youtube.com/@jusikdante |
||||
|
|
||||
|
|
||||
|
|
||||
|
zh-TW |
||||
|
https://www.youtube.com/@kukantieh |
||||
|
|
||||
|
ja |
||||
|
https://www.youtube.com/@DanTakahashi1 |
||||
|
https://www.youtube.com/@tvtokyobiz |
||||
|
https://www.youtube.com/@SHO1112 |
||||
|
https://www.youtube.com/@pivot8935 |
||||
|
https://www.youtube.com/@nikkei |
||||
|
https://www.youtube.com/@toushikomon |
||||
|
https://www.youtube.com/@pivot8935 |
||||
|
https://www.youtube.com/@NewsPicks/featured |
||||
|
https://www.youtube.com/@higedura24 |
||||
|
https://www.youtube.com/@tvtokyobiz |
||||
|
https://www.youtube.com/@omaegaowattendayo |
||||
|
https://www.youtube.com/@info_ask1 |
||||
|
https://www.youtube.com/@takaisanno/videos |
||||
|
https://www.youtube.com/@takaponjp |
||||
|
https://www.youtube.com/@tbsnewsdig |
||||
|
https://www.youtube.com/@rehacq |
||||
|
https://www.youtube.com/@mabuchi-mariko |
||||
|
https://www.youtube.com/@fp_nigu |
||||
|
https://www.youtube.com/@yukkuri-money |
||||
|
https://www.youtube.com/@SHO1112 |
||||
|
https://www.youtube.com/@yohei-chokin |
||||
|
https://www.youtube.com/@user-yu9sj9gq7z/videos |
||||
|
https://www.youtube.com/@tesuta-clipping |
||||
|
https://www.youtube.com/@tradelabo2222 |
||||
|
https://www.youtube.com/@jin115xx |
||||
|
https://www.youtube.com/@higedura24 |
||||
|
https://www.youtube.com/@nobujuku |
||||
|
https://www.youtube.com/@tokyosoken |
||||
|
https://www.youtube.com/@user-hx7bn7hp9v |
||||
|
https://www.youtube.com/@SLokRE |
||||
|
https://www.youtube.com/@rehacq |
||||
|
https://www.youtube.com/@moha-p |
||||
|
https://www.youtube.com/results?search_query=Buffett+Taro%27s |
||||
|
https://www.youtube.com/@Gorikoro |
@ -0,0 +1,7 @@ |
|||||
|
ja |
||||
|
https://www.youtube.com/@ryogakucho |
||||
|
https://www.youtube.com/@DanTakahashi1 |
||||
|
https://www.youtube.com/@buffett_taro |
||||
|
https://www.youtube.com/@Tsubame104 |
||||
|
https://www.youtube.com/@inc_academy |
||||
|
https://www.youtube.com/@kamioka01 |
@ -0,0 +1 @@ |
|||||
|
logDir="" |
@ -0,0 +1,6 @@ |
|||||
|
from loguru import logger |
||||
|
import Contant |
||||
|
Logger = logger |
||||
|
def initLogger(): |
||||
|
logger.add(Contant.logDir+"/sftp_{time}.log", rotation="500MB", encoding="utf-8", |
||||
|
enqueue=True, compression="zip", retention="10 days") |
@ -0,0 +1,97 @@ |
|||||
|
import os |
||||
|
import shutil |
||||
|
import paramiko |
||||
|
import argparse |
||||
|
import Contant |
||||
|
from LoggerUtils import Logger, initLogger |
||||
|
import configparser |
||||
|
import requests |
||||
|
import time |
||||
|
|
||||
|
# python3 sftp.py --local="/mnt/tmp_srt_file" --logDir="./logs" |
||||
|
# python3 sftp.py --local="/mnt/test_file" --logDir="./logs" |
||||
|
if __name__ == "__main__": |
||||
|
# 读取参数 |
||||
|
parser = argparse.ArgumentParser(description="") |
||||
|
parser.add_argument("--local", type=str, default="") |
||||
|
parser.add_argument('--logDir', type=str, default='') |
||||
|
args = parser.parse_args() |
||||
|
Contant.logDir = args.logDir |
||||
|
initLogger() |
||||
|
|
||||
|
# 读取配置文件 |
||||
|
config = configparser.ConfigParser() |
||||
|
config.read('sftp_config.ini') |
||||
|
|
||||
|
# 获取SFTP配置信息 |
||||
|
hostname = config.get('sftp_config', 'hostname') |
||||
|
port = config.getint('sftp_config', 'port') |
||||
|
username = config.get('sftp_config', 'username') |
||||
|
password = config.get('sftp_config', 'password') |
||||
|
|
||||
|
Logger.info("host:{},port:{},username:{},password:{}".format( |
||||
|
hostname, port, username, password)) |
||||
|
|
||||
|
ssh_client = paramiko.SSHClient() |
||||
|
ssh_client.set_missing_host_key_policy(paramiko.AutoAddPolicy()) |
||||
|
sftp_client = None # 设置默认值 |
||||
|
ssh_client.connect(hostname, port, username, password) |
||||
|
# 创建SFTP客户端 |
||||
|
sftp_client = ssh_client.open_sftp() |
||||
|
Logger.info("SFTP客户端已经建立:{}".format(sftp_client)) |
||||
|
|
||||
|
remote_root = "/Inbound/YouTube Captions" |
||||
|
local_root = args.local |
||||
|
Logger.info("remote_root:{},local_root:{}".format(remote_root, local_root)) |
||||
|
|
||||
|
names = os.listdir(local_root) |
||||
|
for name in names: |
||||
|
# sftp创建文件夹 |
||||
|
try: |
||||
|
sftp_client.chdir(remote_root + "/" + name) |
||||
|
except BaseException: |
||||
|
sftp_client.mkdir(remote_root + "/" + name) |
||||
|
sftp_client.chdir(remote_root + "/" + name) |
||||
|
|
||||
|
# 遍历本地临时文件夹 |
||||
|
srtList = os.listdir(local_root + "/" + name) |
||||
|
for srt in srtList: |
||||
|
# 获取远程文件路径以及本地文件路径 |
||||
|
remotePath = remote_root + "/" + name + "/" + srt |
||||
|
localPath = local_root + "/" + name + "/" + srt |
||||
|
# 如果远程文件存在,则进行删除 |
||||
|
try: |
||||
|
sftp_client.stat(remotePath) |
||||
|
# 如果文件存在,删除它 |
||||
|
sftp_client.remove(remotePath) |
||||
|
Logger.info("Remote file '{}' deleted.".format(remotePath)) |
||||
|
except FileNotFoundError: |
||||
|
Logger.info("Remote file '{}' not found.".format(remotePath)) |
||||
|
# 上传本地文件 |
||||
|
try: |
||||
|
# 判断远程地址长度,过长需要截取一部分 |
||||
|
if len(remotePath) > 120: |
||||
|
remotePath = remotePath[:-20] + ".srt" |
||||
|
# 判断本地文件是否存在,存在则上传 |
||||
|
if os.path.exists(localPath): |
||||
|
Logger.info("本地文件 '{}' 存在,开始上传.".format(localPath)) |
||||
|
sftp_client.put(localPath, remotePath, confirm=False) |
||||
|
os.remove(localPath) |
||||
|
else: |
||||
|
Logger.info("本地文件 '{}' 不存在,无法上传.".format(localPath)) |
||||
|
except Exception as e: |
||||
|
Logger.info("上传失败 '{}' 文件名长度{}".format( |
||||
|
remotePath, len(remotePath))) |
||||
|
Logger.error(e) |
||||
|
sftp_client.close() |
||||
|
sftp_client = ssh_client.open_sftp() |
||||
|
# 发送钉钉消息 |
||||
|
webhook = "https://oapi.dingtalk.com/robot/send?access_token=c8c8d7d42c4eecd449dd303025ef968f647d1d8e8694e3fabc0ab5770d646dcb" |
||||
|
jsonData = { |
||||
|
"msgtype": "text", |
||||
|
"text": { |
||||
|
"content": "[Youtube]sftp finished" |
||||
|
} |
||||
|
} |
||||
|
requests.post(webhook, json=jsonData) |
||||
|
Logger.info("sftp发送钉钉消息成功...") |
@ -0,0 +1,5 @@ |
|||||
|
[sftp_config] |
||||
|
hostname = filetransfer.blackrock.com |
||||
|
port = 22 |
||||
|
username = ftp_yunbo |
||||
|
password = s8v{8SJr |
@ -0,0 +1,15 @@ |
|||||
|
import json |
||||
|
from Orm import Channel |
||||
|
from playhouse.shortcuts import model_to_dict, dict_to_model |
||||
|
|
||||
|
|
||||
|
class ChannelService: |
||||
|
def getOneByChannelId(channelId): |
||||
|
return Channel.get_or_none(Channel.channelId == channelId) |
||||
|
|
||||
|
def updateTimeByChannelId(channelId, chageTime): |
||||
|
Channel.update(channelReptileTime=chageTime).where( |
||||
|
Channel.channelId == channelId).execute() |
||||
|
|
||||
|
def getChannelList(): |
||||
|
return Channel.select().execute() |
@ -0,0 +1,4 @@ |
|||||
|
db="" |
||||
|
logDir="" |
||||
|
startTime="" |
||||
|
endTime="" |
@ -0,0 +1,16 @@ |
|||||
|
from Orm import DownloadInfo |
||||
|
|
||||
|
|
||||
|
class DownloadService: |
||||
|
|
||||
|
def createOne(videoId, downloadType, tryTime, isFinished): |
||||
|
DownloadInfo.create( |
||||
|
videoId=videoId, |
||||
|
downloadType=downloadType, |
||||
|
tryTime=tryTime, |
||||
|
isFinished=isFinished |
||||
|
) |
||||
|
|
||||
|
def updateInfoByVideoId(videoId, tryTime, isFinished): |
||||
|
DownloadInfo.update(tryTime=tryTime, isFinished=isFinished).where( |
||||
|
DownloadInfo.videoId == videoId).execute() |
@ -0,0 +1,6 @@ |
|||||
|
from loguru import logger |
||||
|
import Contant |
||||
|
Logger = logger |
||||
|
def initLogger(): |
||||
|
logger.add(Contant.logDir+"/main_{time}.log", rotation="500MB", encoding="utf-8", |
||||
|
enqueue=True, compression="zip", retention="10 days") |
@ -0,0 +1,68 @@ |
|||||
|
from peewee import * |
||||
|
import Contant |
||||
|
import argparse |
||||
|
from LoggerUtils import Logger |
||||
|
|
||||
|
parser = argparse.ArgumentParser(description='') |
||||
|
parser.add_argument('--db', type=str, default='') |
||||
|
parser.add_argument('--logDir', type=str, default='') |
||||
|
parser.add_argument("--start", type=str, default="") |
||||
|
parser.add_argument("--end", type=str, default="") |
||||
|
parser.add_argument("--channelId", type=str, default="") |
||||
|
args = parser.parse_args() |
||||
|
Contant.db = args.db |
||||
|
db = SqliteDatabase(Contant.db) |
||||
|
|
||||
|
|
||||
|
def ormInit(): |
||||
|
Channel.create_table() |
||||
|
Video.create_table() |
||||
|
DownloadInfo.create_table() |
||||
|
|
||||
|
|
||||
|
class BaseModel(Model): |
||||
|
class Meta: |
||||
|
database = db |
||||
|
|
||||
|
# 频道信息 |
||||
|
|
||||
|
|
||||
|
class Channel(BaseModel): |
||||
|
id = PrimaryKeyField() |
||||
|
channelId = CharField(null=False) |
||||
|
channelTitle = CharField(null=False) |
||||
|
channelLanguage = CharField() |
||||
|
channelReptileTime = CharField(null=True) |
||||
|
|
||||
|
class Meta: |
||||
|
db_table = 'Channel' |
||||
|
|
||||
|
# 视频信息 |
||||
|
|
||||
|
|
||||
|
class Video(BaseModel): |
||||
|
id = PrimaryKeyField() |
||||
|
videoId = CharField(null=False) |
||||
|
channelId = CharField(null=False) |
||||
|
videoTitle = CharField() |
||||
|
videoLen = IntegerField() |
||||
|
videoType = CharField() |
||||
|
videoPublishTime = CharField() |
||||
|
videoLanguage = CharField() |
||||
|
isDownload = IntegerField() |
||||
|
|
||||
|
class Meta: |
||||
|
db_table = 'Vidoes' |
||||
|
|
||||
|
# 下载信息 |
||||
|
|
||||
|
|
||||
|
class DownloadInfo(BaseModel): |
||||
|
id = PrimaryKeyField() |
||||
|
videoId = CharField() |
||||
|
downloadType = IntegerField() |
||||
|
tryTime = IntegerField() |
||||
|
isFinished = IntegerField() |
||||
|
|
||||
|
class Meta: |
||||
|
db_table = 'Download_info' |
@ -0,0 +1,31 @@ |
|||||
|
import json |
||||
|
from Orm import Video |
||||
|
from playhouse.shortcuts import model_to_dict, dict_to_model |
||||
|
|
||||
|
|
||||
|
class VideoService: |
||||
|
def getOneByVideoId(videoId): |
||||
|
return Video.get_or_none(Video.videoId == videoId) |
||||
|
|
||||
|
def createOne(videoId, channelId, videoTitle, videoLen, videoType, videoPublishTime, videoLanguage, isDownload): |
||||
|
Video.create(videoId=videoId, |
||||
|
channelId=channelId, |
||||
|
videoTitle=videoTitle, |
||||
|
videoLen=videoLen, |
||||
|
videoType=videoType, |
||||
|
videoPublishTime=videoPublishTime, |
||||
|
videoLanguage=videoLanguage, |
||||
|
isDownload=isDownload) |
||||
|
|
||||
|
def updateLenByVideoId(videoId, len): |
||||
|
Video.update(videoLen=len).where(Video.videoId == videoId).execute() |
||||
|
|
||||
|
def getLastVideoByChannelId(channelId): |
||||
|
return Video.select().where(Video.channelId == channelId).order_by(Video.videoPublishTime.desc()).get() |
||||
|
|
||||
|
def getFirstVideoByChannelId(channelId): |
||||
|
return Video.select().where(Video.channelId == channelId).order_by(Video.videoPublishTime).get() |
||||
|
|
||||
|
def checkExist(channelId): |
||||
|
query = Video.select().where(Video.channelId == channelId) |
||||
|
return query.exists() |
@ -0,0 +1,169 @@ |
|||||
|
import httplib2 |
||||
|
import googleapiclient.discovery |
||||
|
import googleapiclient.errors |
||||
|
from VideoService import VideoService |
||||
|
from ChannelService import ChannelService |
||||
|
from DownloadInfoService import DownloadService |
||||
|
from LoggerUtils import Logger |
||||
|
import operator |
||||
|
import time |
||||
|
|
||||
|
|
||||
|
class YouTubeUtil: |
||||
|
# apiKeys = ["AIzaSyDlRgmPXVQEjF2gbmomI5FUZX_uAOBmEGI", "AIzaSyBI5i5vFZpQErMnEXKMf0VUS2Bel8jGrTk", |
||||
|
# "AIzaSyAnmA0Ggy1yXsZZACfItmeZAa7wcmh6SbM", "AIzaSyC4O8tBoAfkupmBybxDah2JUxgj4ct5uk0", |
||||
|
# "AIzaSyDJ2S9Ijhw_hULx3nHvPUoGUpMENbZOIl8", "AIzaSyA87Ckpna3hOQ31nISs8V8rp--OLw0m6Aw", |
||||
|
# "AIzaSyDIWbV0EOLHkOr9tWpANose6ggd2r9vcLg", "AIzaSyBKE3lYwWFIYc9Vx4YKMbRpkOXigZlY52U"] |
||||
|
|
||||
|
# AIzaSyCTBSbq0YjyxTtjmNsnDyKAwHamlv_ST-s |
||||
|
# AIzaSyAESnwtbTIBtU707iZowtQkmAo-qKuEOcY |
||||
|
# AIzaSyCsYUC5vN0pB6y9xsCj0B1ehAoqOJ3WMf0 |
||||
|
# AIzaSyDjPkCgDQ9Tv_xcChjY2E6GpJ6IzngnD5I |
||||
|
# AIzaSyAxIycOdQYGB5kWhwe3B-kJAYRo7wOnp8o |
||||
|
apiKeys = [ |
||||
|
"AIzaSyARaW3mqO9szQiHgWZR4el0HWvdyheSHBc", |
||||
|
"AIzaSyChPXesnVx6fweon_BckhR6UiJWvi5Ma4s" |
||||
|
|
||||
|
# "AIzaSyCTBSbq0YjyxTtjmNsnDyKAwHamlv_ST-s", |
||||
|
# "AIzaSyAESnwtbTIBtU707iZowtQkmAo-qKuEOcY" |
||||
|
|
||||
|
|
||||
|
# "AIzaSyDjPkCgDQ9Tv_xcChjY2E6GpJ6IzngnD5I", |
||||
|
# "AIzaSyAxIycOdQYGB5kWhwe3B-kJAYRo7wOnp8o", |
||||
|
# "AIzaSyCsYUC5vN0pB6y9xsCj0B1ehAoqOJ3WMf0" |
||||
|
] |
||||
|
|
||||
|
apiIndex = 0 |
||||
|
|
||||
|
def getYoutube(): |
||||
|
# proxy_info = httplib2.ProxyInfo( |
||||
|
# proxy_type=httplib2.socks.PROXY_TYPE_HTTP, proxy_host="127.0.0.1", proxy_port=7890) |
||||
|
# http = httplib2.Http(timeout=10, proxy_info=proxy_info, |
||||
|
# disable_ssl_certificate_validation=False) |
||||
|
http = httplib2.Http(timeout=10, disable_ssl_certificate_validation=False) |
||||
|
api_service_name = "youtube" |
||||
|
api_version = "v3" |
||||
|
# 获取apiKey |
||||
|
apiKey = YouTubeUtil.apiKeys[YouTubeUtil.apiIndex] |
||||
|
Logger.info( |
||||
|
"当前APIKey:{},当前apiIndex:{},totalIndex:{}".format( |
||||
|
apiKey, YouTubeUtil.apiIndex, len(YouTubeUtil.apiKeys) - 1 |
||||
|
) |
||||
|
) |
||||
|
# 等于7,还原成0 |
||||
|
if YouTubeUtil.apiIndex == (len(YouTubeUtil.apiKeys) - 1): |
||||
|
YouTubeUtil.apiIndex = 0 |
||||
|
else: |
||||
|
YouTubeUtil.apiIndex = YouTubeUtil.apiIndex + 1 |
||||
|
|
||||
|
# 获取对象 |
||||
|
youtube = googleapiclient.discovery.build( |
||||
|
api_service_name, api_version, developerKey=apiKey, http=http |
||||
|
) |
||||
|
return youtube |
||||
|
|
||||
|
def getVidoeLen(videoIds): |
||||
|
youtube = YouTubeUtil.getYoutube() |
||||
|
request = youtube.videos().list(part="contentDetails", id=videoIds) |
||||
|
response = request.execute() |
||||
|
response["items"][0]["contentDetails"] |
||||
|
return response |
||||
|
|
||||
|
def getVideoLenByStr(str): |
||||
|
len = 0 |
||||
|
str = str.split("PT")[1] |
||||
|
if operator.contains(str, "H"): |
||||
|
H = str.split("H")[0] |
||||
|
len = len + int(H) * 3600 |
||||
|
str = str.split("H")[1] |
||||
|
if operator.contains(str, "M"): |
||||
|
M = str.split("M")[0] |
||||
|
len = len + int(M) * 60 |
||||
|
str = str.split("M")[1] |
||||
|
if operator.contains(str, "S"): |
||||
|
S = str.split("S")[0] |
||||
|
len = len + int(S) |
||||
|
return len |
||||
|
|
||||
|
def getByChannelId(channelId, startTime, endTime): |
||||
|
channel = ChannelService.getOneByChannelId(channelId) |
||||
|
if channel == None: |
||||
|
return |
||||
|
videoLanguage = str(channel.channelLanguage) |
||||
|
youtube = YouTubeUtil.getYoutube() |
||||
|
request = youtube.search().list( |
||||
|
part="snippet", |
||||
|
channelId=channelId, |
||||
|
maxResults=50, |
||||
|
order="date", |
||||
|
publishedAfter=startTime, |
||||
|
publishedBefore=endTime, |
||||
|
type="video", |
||||
|
) |
||||
|
response = request.execute() |
||||
|
while True: |
||||
|
videosRequest = "" |
||||
|
videosRequestCount = 0 |
||||
|
idList = [] |
||||
|
for i in response["items"]: |
||||
|
try: |
||||
|
videoId = i["id"]["videoId"] |
||||
|
publisTime = i["snippet"]["publishedAt"] |
||||
|
videoTitle = i["snippet"]["title"] |
||||
|
videoType = "video" |
||||
|
videoEntity = VideoService.getOneByVideoId(str(videoId)) |
||||
|
if videoEntity == None: |
||||
|
VideoService.createOne( |
||||
|
videoId, |
||||
|
channelId, |
||||
|
videoTitle, |
||||
|
0, |
||||
|
videoType, |
||||
|
publisTime, |
||||
|
videoLanguage, |
||||
|
0, |
||||
|
) |
||||
|
DownloadService.createOne(videoId, 1, 0, 0) |
||||
|
videosRequest = videosRequest + "," + str(videoId) |
||||
|
videosRequestCount = videosRequestCount + 1 |
||||
|
Logger.info( |
||||
|
"存储VideoUrl:https://www.youtube.com/watch?v=" + videoId |
||||
|
) |
||||
|
else: |
||||
|
Logger.info("已存在VideoId:{}".format(videoId)) |
||||
|
idList.append(str(videoId)) |
||||
|
if videosRequest != "" and videosRequestCount >= 10: |
||||
|
lenRes = YouTubeUtil.getVidoeLen(videosRequest) |
||||
|
for i in lenRes["items"]: |
||||
|
tmpId = i["id"] |
||||
|
videoLenStr = i["contentDetails"]["duration"] |
||||
|
videoLen = YouTubeUtil.getVideoLenByStr(videoLenStr) |
||||
|
VideoService.updateLenByVideoId(tmpId, videoLen) |
||||
|
Logger.info( |
||||
|
"更新时长,videoId:{},len:{}".format(tmpId, videoLen) |
||||
|
) |
||||
|
videosRequestCount = 0 |
||||
|
videosRequest = "" |
||||
|
except: |
||||
|
pass |
||||
|
# 获取最后一个视频 |
||||
|
vidoeo = VideoService.getLastVideoByChannelId(channelId) |
||||
|
ChannelService.updateTimeByChannelId(channelId, vidoeo.videoPublishTime) |
||||
|
time.sleep(5) |
||||
|
try: |
||||
|
# youtube = YouTubeUtil.getYoutube |
||||
|
request = youtube.search().list( |
||||
|
part="snippet", |
||||
|
channelId=channelId, |
||||
|
maxResults=50, |
||||
|
order="date", |
||||
|
publishedAfter=startTime, |
||||
|
publishedBefore=endTime, |
||||
|
type="video", |
||||
|
pageToken=response["nextPageToken"], |
||||
|
) |
||||
|
response = request.execute() |
||||
|
except Exception as e: |
||||
|
Logger.error(e) |
||||
|
print("no nextPageToken") |
||||
|
break |
@ -0,0 +1,49 @@ |
|||||
|
import argparse |
||||
|
import Contant |
||||
|
import LoggerUtils |
||||
|
import Orm |
||||
|
from VideoService import VideoService |
||||
|
from YouTubeUtils import YouTubeUtil |
||||
|
from ChannelService import ChannelService |
||||
|
import requests |
||||
|
|
||||
|
# py .\main.py --db=../db/youtube_prod.db --logDir=./logs --start="2023-09-10T00:00:01Z" --end="2023-09-11T00:00:01Z" |
||||
|
# python3 ./main.py --db="../db/youtube_prod.db" --logDir="./logs" --start="2024-03-25T00:10:01Z" --end="2024-03-26T00:10:01Z" |
||||
|
# python3 ./main.py --db="../db/youtube_prod.db" --logDir="./logs" --start="111" --end="222" |
||||
|
if __name__ == "__main__": |
||||
|
parser = argparse.ArgumentParser(description="") |
||||
|
parser.add_argument("--db", type=str, default="") |
||||
|
parser.add_argument("--logDir", type=str, default="") |
||||
|
parser.add_argument("--start", type=str, default="") |
||||
|
parser.add_argument("--end", type=str, default="") |
||||
|
args = parser.parse_args() |
||||
|
Contant.db = args.db |
||||
|
Contant.logDir = args.logDir |
||||
|
Contant.startTime = args.start |
||||
|
Contant.endTime = args.end |
||||
|
LoggerUtils.initLogger() |
||||
|
Orm.ormInit() |
||||
|
LoggerUtils.Logger.info("db:{},logDir:{}".format(Contant.db, Contant.logDir)) |
||||
|
LoggerUtils.Logger.info("starTime:{},endTime:{}".format(Contant.startTime, Contant.endTime)) |
||||
|
|
||||
|
# 执行查询 |
||||
|
channelList = ChannelService.getChannelList() |
||||
|
LoggerUtils.Logger.info("list size:{}".format(len(channelList))) |
||||
|
for channel in channelList: |
||||
|
channelId = channel.channelId |
||||
|
LoggerUtils.Logger.info( |
||||
|
"channelId:{},startTime:{},endTime:{}".format( |
||||
|
channelId, Contant.startTime, Contant.endTime |
||||
|
) |
||||
|
) |
||||
|
YouTubeUtil.getByChannelId(channelId, Contant.startTime, Contant.endTime) |
||||
|
# 发送钉钉消息 |
||||
|
webhook = "https://oapi.dingtalk.com/robot/send?access_token=c8c8d7d42c4eecd449dd303025ef968f647d1d8e8694e3fabc0ab5770d646dcb" |
||||
|
jsonData = { |
||||
|
"msgtype": "text", |
||||
|
"text": { |
||||
|
"content": "[Youtube]src finished" |
||||
|
} |
||||
|
} |
||||
|
requests.post(webhook, json=jsonData) |
||||
|
LoggerUtils.Logger.info("src发送钉钉消息成功...") |
@ -0,0 +1,32 @@ |
|||||
|
import argparse |
||||
|
import Contant |
||||
|
import LoggerUtils |
||||
|
import Orm |
||||
|
from VideoService import VideoService |
||||
|
from YouTubeUtils import YouTubeUtil |
||||
|
from ChannelService import ChannelService |
||||
|
import requests |
||||
|
|
||||
|
# py .\main.py --db=../db/youtube_prod.db --logDir=./logs --start="2023-09-10T00:00:01Z" --end="2023-09-11T00:00:01Z" |
||||
|
# python3 ./main.py --db="../db/youtube_prod.db" --logDir="./logs" --start="2023-08-10T00:00:01Z" --end="2023-09-12T00:00:01Z" |
||||
|
# python3 ./main.py --db="../db/youtube_prod.db" --logDir="./logs" --start="111" --end="222" |
||||
|
# python3 ./one_channel.py --db="../db/youtube_prod.db" --logDir="./logs/one_channel" --start="2021-03-06T00:00:01Z" --end="2024-03-06T00:00:01Z" --channelId="UCzoF2M_RG3Qz10hP16vQOng" |
||||
|
if __name__ == "__main__": |
||||
|
parser = argparse.ArgumentParser(description="") |
||||
|
parser.add_argument("--db", type=str, default="") |
||||
|
parser.add_argument("--logDir", type=str, default="") |
||||
|
parser.add_argument("--start", type=str, default="") |
||||
|
parser.add_argument("--end", type=str, default="") |
||||
|
parser.add_argument("--channelId", type=str, default="") |
||||
|
args = parser.parse_args() |
||||
|
Contant.db = args.db |
||||
|
Contant.logDir = args.logDir |
||||
|
Contant.startTime = args.start |
||||
|
Contant.endTime = args.end |
||||
|
channelId = args.channelId |
||||
|
LoggerUtils.initLogger() |
||||
|
Orm.ormInit() |
||||
|
LoggerUtils.Logger.info("db:{},logDir:{}".format(Contant.db, Contant.logDir)) |
||||
|
LoggerUtils.Logger.info("channleId:{},starTime:{},endTime:{}".format(channelId, Contant.startTime, Contant.endTime)) |
||||
|
|
||||
|
YouTubeUtil.getByChannelId(channelId, Contant.startTime, Contant.endTime) |
@ -0,0 +1,87 @@ |
|||||
|
#!/bin/bash |
||||
|
cd /mnt/youtube_prod/src |
||||
|
|
||||
|
|
||||
|
|
||||
|
# python3 ./one_channel.py --db="../db/youtube_prod.db" --logDir="./logs/one_channel" --start="2021-03-06T00:00:01Z" --end="2024-03-06T00:00:01Z" --channelId="UCpsfkRRT7L2nBnizBn_u9YA" |
||||
|
# sleep 600 |
||||
|
# python3 ./one_channel.py --db="../db/youtube_prod.db" --logDir="./logs/one_channel" --start="2021-03-06T00:00:01Z" --end="2024-03-06T00:00:01Z" --channelId="UCRbT3P-2tmr-9l8D7jNoZMQ" |
||||
|
# sleep 600 |
||||
|
# python3 ./one_channel.py --db="../db/youtube_prod.db" --logDir="./logs/one_channel" --start="2021-03-06T00:00:01Z" --end="2024-03-06T00:00:01Z" --channelId="UCPTy0BNqiv-0SdAvFgrXvXg" |
||||
|
# sleep 600 |
||||
|
# python3 ./one_channel.py --db="../db/youtube_prod.db" --logDir="./logs/one_channel" --start="2021-03-06T00:00:01Z" --end="2024-03-06T00:00:01Z" --channelId="UCMlDu8Vuowmqz03kByFcUhw" |
||||
|
# sleep 600 |
||||
|
# python3 ./one_channel.py --db="../db/youtube_prod.db" --logDir="./logs/one_channel" --start="2021-03-06T00:00:01Z" --end="2024-03-06T00:00:01Z" --channelId="UC5mn3VEg_9GY52G6eumKJRg" |
||||
|
# sleep 600 |
||||
|
# python3 ./one_channel.py --db="../db/youtube_prod.db" --logDir="./logs/one_channel" --start="2021-03-06T00:00:01Z" --end="2024-03-06T00:00:01Z" --channelId="UClhhyZ0xyeOAEVdcr0N9KDA" |
||||
|
# sleep 600 |
||||
|
# python3 ./one_channel.py --db="../db/youtube_prod.db" --logDir="./logs/one_channel" --start="2021-03-06T00:00:01Z" --end="2024-03-06T00:00:01Z" --channelId="UCBM86JVoHLqg9irpR2XKvGw" |
||||
|
# sleep 600 |
||||
|
# python3 ./one_channel.py --db="../db/youtube_prod.db" --logDir="./logs/one_channel" --start="2021-03-06T00:00:01Z" --end="2024-03-06T00:00:01Z" --channelId="UCzp9CmDIFVNtzhyOjptIi4g" |
||||
|
# sleep 600 |
||||
|
# python3 ./one_channel.py --db="../db/youtube_prod.db" --logDir="./logs/one_channel" --start="2021-03-06T00:00:01Z" --end="2024-03-06T00:00:01Z" --channelId="UCv-spDeZBGYVUI9eGXGaLSg" |
||||
|
# sleep 600 |
||||
|
# python3 ./one_channel.py --db="../db/youtube_prod.db" --logDir="./logs/one_channel" --start="2021-03-06T00:00:01Z" --end="2024-03-06T00:00:01Z" --channelId="UCF08I8KEKTsBo22RIXFwTAA" |
||||
|
# sleep 600 |
||||
|
# python3 ./one_channel.py --db="../db/youtube_prod.db" --logDir="./logs/one_channel" --start="2021-03-06T00:00:01Z" --end="2024-03-06T00:00:01Z" --channelId="UC5Mjj4LKlMtP_PXlIVYGxIQ" |
||||
|
# sleep 600 |
||||
|
# python3 ./one_channel.py --db="../db/youtube_prod.db" --logDir="./logs/one_channel" --start="2021-03-06T00:00:01Z" --end="2024-03-06T00:00:01Z" --channelId="UCvil4OAt-zShzkKHsg9EQAw" |
||||
|
# sleep 600 |
||||
|
# python3 ./one_channel.py --db="../db/youtube_prod.db" --logDir="./logs/one_channel" --start="2021-03-06T00:00:01Z" --end="2024-03-06T00:00:01Z" --channelId="UCI6C5V4J8FWRcLcOdh1yElw" |
||||
|
# sleep 600 |
||||
|
# python3 ./one_channel.py --db="../db/youtube_prod.db" --logDir="./logs/one_channel" --start="2021-03-06T00:00:01Z" --end="2024-03-06T00:00:01Z" --channelId="UCOio3vyYLWiKlHSYRKW-9UA" |
||||
|
# sleep 600 |
||||
|
# python3 ./one_channel.py --db="../db/youtube_prod.db" --logDir="./logs/one_channel" --start="2021-03-06T00:00:01Z" --end="2024-03-06T00:00:01Z" --channelId="UCaWi2foADm_lKAKnmeQwLSA" |
||||
|
# sleep 600 |
||||
|
# python3 ./one_channel.py --db="../db/youtube_prod.db" --logDir="./logs/one_channel" --start="2021-03-06T00:00:01Z" --end="2024-03-06T00:00:01Z" --channelId="UCUFUOdQwKTWda7kKqxQwMxw" |
||||
|
# sleep 600 |
||||
|
# python3 ./one_channel.py --db="../db/youtube_prod.db" --logDir="./logs/one_channel" --start="2021-03-06T00:00:01Z" --end="2024-03-06T00:00:01Z" --channelId="UCoZdXdFowKP0heWRkQ9RABQ" |
||||
|
# sleep 600 |
||||
|
# python3 ./one_channel.py --db="../db/youtube_prod.db" --logDir="./logs/one_channel" --start="2021-03-06T00:00:01Z" --end="2024-03-06T00:00:01Z" --channelId="UCnfwIKyFYRuqZzzKBDt6JOA" |
||||
|
# sleep 600 |
||||
|
# python3 ./one_channel.py --db="../db/youtube_prod.db" --logDir="./logs/one_channel" --start="2021-03-06T00:00:01Z" --end="2024-03-06T00:00:01Z" --channelId="UCnZJqzwt6LuRymM0jbqiD9A" |
||||
|
# sleep 600 |
||||
|
# python3 ./one_channel.py --db="../db/youtube_prod.db" --logDir="./logs/one_channel" --start="2021-03-06T00:00:01Z" --end="2024-03-06T00:00:01Z" --channelId="UCHpGooMnVgnILywqrpqvZcQ" |
||||
|
# sleep 600 |
||||
|
# python3 ./one_channel.py --db="../db/youtube_prod.db" --logDir="./logs/one_channel" --start="2021-03-06T00:00:01Z" --end="2024-03-06T00:00:01Z" --channelId="UCQIyAcoLsO3L0RMFQk7YMYA" |
||||
|
# sleep 600 |
||||
|
# python3 ./one_channel.py --db="../db/youtube_prod.db" --logDir="./logs/one_channel" --start="2021-03-06T00:00:01Z" --end="2024-03-06T00:00:01Z" --channelId="UCYdHxiRAUUJhuE1DZsnWqXg" |
||||
|
# sleep 600 |
||||
|
# python3 ./one_channel.py --db="../db/youtube_prod.db" --logDir="./logs/one_channel" --start="2021-03-06T00:00:01Z" --end="2024-03-06T00:00:01Z" --channelId="UCbOIEn95Rvnk97KRtSFqvbQ" |
||||
|
# sleep 600 |
||||
|
# python3 ./one_channel.py --db="../db/youtube_prod.db" --logDir="./logs/one_channel" --start="2021-03-06T00:00:01Z" --end="2024-03-06T00:00:01Z" --channelId="UCXWOlSe2GHTev8QZhY_gMPg" |
||||
|
# sleep 600 |
||||
|
# python3 ./one_channel.py --db="../db/youtube_prod.db" --logDir="./logs/one_channel" --start="2021-03-06T00:00:01Z" --end="2024-03-06T00:00:01Z" --channelId="UCJo6G1u0e_-wS-JQn3T-zEw" |
||||
|
# sleep 600 |
||||
|
# python3 ./one_channel.py --db="../db/youtube_prod.db" --logDir="./logs/one_channel" --start="2021-03-06T00:00:01Z" --end="2024-03-06T00:00:01Z" --channelId="UCfq4V1DAuaojnr2ryvWNysw" |
||||
|
# sleep 600 |
||||
|
# python3 ./one_channel.py --db="../db/youtube_prod.db" --logDir="./logs/one_channel" --start="2021-03-06T00:00:01Z" --end="2024-03-06T00:00:01Z" --channelId="UCFznPlqnBtRKQhtkm6GGoRQ" |
||||
|
# sleep 600 |
||||
|
# python3 ./one_channel.py --db="../db/youtube_prod.db" --logDir="./logs/one_channel" --start="2021-03-06T00:00:01Z" --end="2024-03-06T00:00:01Z" --channelId="UC5CyCSvCdoEP-VgQmFq3iww" |
||||
|
# sleep 600 |
||||
|
# python3 ./one_channel.py --db="../db/youtube_prod.db" --logDir="./logs/one_channel" --start="2021-03-06T00:00:01Z" --end="2024-03-06T00:00:01Z" --channelId="UC6mp159KMtzjhP65DmldR0A" |
||||
|
# sleep 600 |
||||
|
# python3 ./one_channel.py --db="../db/youtube_prod.db" --logDir="./logs/one_channel" --start="2021-03-06T00:00:01Z" --end="2024-03-06T00:00:01Z" --channelId="UC7YLvjJf3lDJUQ-TsbWyBjg" |
||||
|
# sleep 600 |
||||
|
# python3 ./one_channel.py --db="../db/youtube_prod.db" --logDir="./logs/one_channel" --start="2021-03-06T00:00:01Z" --end="2024-03-06T00:00:01Z" --channelId="UC6ij59Gy_HnqO4pFu9A_zgQ" |
||||
|
# sleep 600 |
||||
|
# python3 ./one_channel.py --db="../db/youtube_prod.db" --logDir="./logs/one_channel" --start="2021-03-06T00:00:01Z" --end="2024-03-06T00:00:01Z" --channelId="UCpyjRAERLqcD_wI3qQnIY3A" |
||||
|
# sleep 600 |
||||
|
# python3 ./one_channel.py --db="../db/youtube_prod.db" --logDir="./logs/one_channel" --start="2021-03-06T00:00:01Z" --end="2024-03-06T00:00:01Z" --channelId="UCSU_iBWoCnXe1VnAbQhO3Ug" |
||||
|
# sleep 600 |
||||
|
# python3 ./one_channel.py --db="../db/youtube_prod.db" --logDir="./logs/one_channel" --start="2021-03-06T00:00:01Z" --end="2024-03-06T00:00:01Z" --channelId="UC6ZkHcW5QQubZ-Q6XYINE3Q" |
||||
|
# sleep 600 |
||||
|
# python3 ./one_channel.py --db="../db/youtube_prod.db" --logDir="./logs/one_channel" --start="2021-03-06T00:00:01Z" --end="2024-03-06T00:00:01Z" --channelId="UCDpRrAXMYlxFz3a5-z8pE7w" |
||||
|
# sleep 600 |
||||
|
python3 ./one_channel.py --db="../db/youtube_prod.db" --logDir="./logs/one_channel" --start="2021-03-06T00:00:01Z" --end="2024-03-06T00:00:01Z" --channelId="UCMec1m9iUC3agiEK-nsndSg" |
||||
|
sleep 600 |
||||
|
python3 ./one_channel.py --db="../db/youtube_prod.db" --logDir="./logs/one_channel" --start="2021-03-06T00:00:01Z" --end="2024-03-06T00:00:01Z" --channelId="UCOmXyHRWpDFPYgs2VpoQEIw" |
||||
|
sleep 600 |
||||
|
python3 ./one_channel.py --db="../db/youtube_prod.db" --logDir="./logs/one_channel" --start="2021-03-06T00:00:01Z" --end="2024-03-06T00:00:01Z" --channelId="UCPgT-N-DQ0K0H88skjaDgkA" |
||||
|
sleep 600 |
||||
|
python3 ./one_channel.py --db="../db/youtube_prod.db" --logDir="./logs/one_channel" --start="2021-03-06T00:00:01Z" --end="2024-03-06T00:00:01Z" --channelId="UC40nk9kM2Ue8XQ9LsHQlKPA" |
||||
|
sleep 600 |
||||
|
python3 ./one_channel.py --db="../db/youtube_prod.db" --logDir="./logs/one_channel" --start="2021-03-06T00:00:01Z" --end="2024-03-06T00:00:01Z" --channelId="UCaiV1-PUXDu2Nmx8iOZkofQ" |
||||
|
sleep 600 |
||||
|
python3 ./one_channel.py --db="../db/youtube_prod.db" --logDir="./logs/one_channel" --start="2021-03-06T00:00:01Z" --end="2024-03-06T00:00:01Z" --channelId="UCDDneQi63kJAdr3i5VCPzHg" |
||||
|
sleep 600 |
@ -0,0 +1,10 @@ |
|||||
|
#!/bin/bash |
||||
|
function log() { |
||||
|
local time_now=`date '+%Y-%m-%d %H:%M:%S'` |
||||
|
echo "$time_now [download] [info] $1" >> /mnt/youtube_prod/running.log |
||||
|
} |
||||
|
|
||||
|
cd /mnt/youtube_prod/download |
||||
|
# /mnt/youtube_prod/start_download.sh |
||||
|
log "开始执行download..." |
||||
|
nohup python3 ./main_download.py --db="../db/youtube_prod.db" --logDir="./logs" >/dev/null 2>/mnt/youtube_prod/err.log & |
@ -0,0 +1,11 @@ |
|||||
|
#!/bin/bash |
||||
|
function log() { |
||||
|
local time_now=`date '+%Y-%m-%d %H:%M:%S'` |
||||
|
echo "$time_now [download] [info] $1" >> /mnt/youtube_prod/running.log |
||||
|
} |
||||
|
|
||||
|
cd /mnt/youtube_prod/sftp |
||||
|
# /mnt/youtube_prod/start_download.sh |
||||
|
log "开始执行sftp..." |
||||
|
python3 ./sftp.py --local="/mnt/tmp_srt_file" --logDir="./logs" |
||||
|
rm -rf /mnt/tmp_srt_file |
@ -0,0 +1,12 @@ |
|||||
|
#!/bin/bash |
||||
|
function log() { |
||||
|
local time_now=`date '+%Y-%m-%d %H:%M:%S'` |
||||
|
echo "$time_now [src] [info] $1" >> /mnt/youtube_prod/running.log |
||||
|
} |
||||
|
|
||||
|
cd /mnt/youtube_prod/src |
||||
|
start=`date '+%Y-%m-%dT%H:%M:%SZ' -d'-1 day'` |
||||
|
end=`date '+%Y-%m-%dT%H:%M:%SZ'` |
||||
|
log "开始执行src...startTime:"$start",endTime:"$end |
||||
|
# /mnt/youtube_prod/start_src.sh |
||||
|
nohup python3 ./main.py --db="../db/youtube_prod.db" --logDir="./logs" --start=$start --end=$end >/dev/null 2>/mnt/youtube_prod/err.log & |
@ -0,0 +1,4 @@ |
|||||
|
#!/bin/bash |
||||
|
pid=`ps -ef | grep main_download | awk NR==1'{print $2}'` |
||||
|
echo $pid |
||||
|
kill -9 $pid |
@ -0,0 +1,2 @@ |
|||||
|
#!/bin/bash |
||||
|
echo "test" |
@ -0,0 +1,9 @@ |
|||||
|
import requests |
||||
|
webhook = "https://oapi.dingtalk.com/robot/send?access_token=c8c8d7d42c4eecd449dd303025ef968f647d1d8e8694e3fabc0ab5770d646dcb" |
||||
|
jsonData = { |
||||
|
"msgtype": "text", |
||||
|
"text": { |
||||
|
"content": "[Youtube]aaaa" |
||||
|
} |
||||
|
} |
||||
|
requests.post(webhook, json=jsonData) |
@ -0,0 +1,8 @@ |
|||||
|
from youtube_transcript_api import YouTubeTranscriptApi |
||||
|
#zh-Hant |
||||
|
url = "https://www.youtube.com/watch?v=YbVger_nh-s" |
||||
|
list = YouTubeTranscriptApi.list_transcripts("_i5CoY_LMYs") |
||||
|
# videoSrt = YouTubeTranscriptApi.get_transcript( |
||||
|
# "gXeNXJrD-gw", languages=['zh-TW']) |
||||
|
print(list) |
||||
|
# print(videoSrt) |
@ -0,0 +1,15 @@ |
|||||
|
import json |
||||
|
from Orm import Channel |
||||
|
from playhouse.shortcuts import model_to_dict, dict_to_model |
||||
|
|
||||
|
|
||||
|
class ChannelService: |
||||
|
def getOneByChannelId(channelId): |
||||
|
return Channel.get_or_none(Channel.channelId == channelId) |
||||
|
|
||||
|
def updateTimeByChannelId(channelId, chageTime): |
||||
|
Channel.update(channelReptileTime=chageTime).where( |
||||
|
Channel.channelId == channelId).execute() |
||||
|
|
||||
|
def getChannelList(): |
||||
|
return Channel.select().execute() |
@ -0,0 +1,10 @@ |
|||||
|
db="" |
||||
|
logDir="" |
||||
|
startTime="" |
||||
|
endTime="" |
||||
|
apiIndex = 0 |
||||
|
apiKeys = [ |
||||
|
"AIzaSyDjPkCgDQ9Tv_xcChjY2E6GpJ6IzngnD5I", |
||||
|
"AIzaSyAxIycOdQYGB5kWhwe3B-kJAYRo7wOnp8o", |
||||
|
"AIzaSyCsYUC5vN0pB6y9xsCj0B1ehAoqOJ3WMf0" |
||||
|
] |
@ -0,0 +1,6 @@ |
|||||
|
from loguru import logger |
||||
|
import Contant |
||||
|
Logger = logger |
||||
|
def initLogger(): |
||||
|
logger.add(Contant.logDir+"/main_{time}.log", rotation="500MB", encoding="utf-8", |
||||
|
enqueue=True, compression="zip", retention="10 days") |
@ -0,0 +1,75 @@ |
|||||
|
from peewee import * |
||||
|
import Contant |
||||
|
import argparse |
||||
|
from LoggerUtils import Logger |
||||
|
|
||||
|
parser = argparse.ArgumentParser(description='') |
||||
|
parser.add_argument('--db', type=str, default='') |
||||
|
parser.add_argument('--logDir', type=str, default='') |
||||
|
args = parser.parse_args() |
||||
|
Contant.db = args.db |
||||
|
db = SqliteDatabase(Contant.db) |
||||
|
|
||||
|
|
||||
|
def ormInit(): |
||||
|
Channel.create_table() |
||||
|
Video.create_table() |
||||
|
DownloadInfo.create_table() |
||||
|
ViewCountInfo.create_table() |
||||
|
|
||||
|
|
||||
|
class BaseModel(Model): |
||||
|
class Meta: |
||||
|
database = db |
||||
|
|
||||
|
# 频道信息 |
||||
|
|
||||
|
|
||||
|
class Channel(BaseModel): |
||||
|
id = PrimaryKeyField() |
||||
|
channelId = CharField(null=False) |
||||
|
channelTitle = CharField(null=False) |
||||
|
channelLanguage = CharField() |
||||
|
channelReptileTime = CharField(null=True) |
||||
|
|
||||
|
class Meta: |
||||
|
db_table = 'Channel' |
||||
|
|
||||
|
# 视频信息 |
||||
|
|
||||
|
|
||||
|
class Video(BaseModel): |
||||
|
id = PrimaryKeyField() |
||||
|
videoId = CharField(null=False) |
||||
|
channelId = CharField(null=False) |
||||
|
videoTitle = CharField() |
||||
|
videoLen = IntegerField() |
||||
|
videoType = CharField() |
||||
|
videoPublishTime = CharField() |
||||
|
videoLanguage = CharField() |
||||
|
isDownload = IntegerField() |
||||
|
|
||||
|
class Meta: |
||||
|
db_table = 'Vidoes' |
||||
|
|
||||
|
# 下载信息 |
||||
|
|
||||
|
|
||||
|
class DownloadInfo(BaseModel): |
||||
|
id = PrimaryKeyField() |
||||
|
videoId = CharField() |
||||
|
downloadType = IntegerField() |
||||
|
tryTime = IntegerField() |
||||
|
isFinished = IntegerField() |
||||
|
|
||||
|
class Meta: |
||||
|
db_table = 'Download_info' |
||||
|
|
||||
|
# 播放量信息 |
||||
|
class ViewCountInfo(BaseModel): |
||||
|
id = PrimaryKeyField() |
||||
|
videoId = CharField() |
||||
|
viewCount = CharField() |
||||
|
|
||||
|
class Meta: |
||||
|
db_table = 'ViewCount_info' |
@ -0,0 +1,33 @@ |
|||||
|
import json |
||||
|
from Orm import ViewCountInfo |
||||
|
from playhouse.shortcuts import model_to_dict, dict_to_model |
||||
|
|
||||
|
|
||||
|
class ViewCountService: |
||||
|
def createOrUpdateOne(videoId, day,count): |
||||
|
query = ViewCountInfo.select().where(ViewCountInfo.videoId == videoId) |
||||
|
if not query: |
||||
|
countStr = "0" |
||||
|
for i in range(0,30): |
||||
|
if i != 29: |
||||
|
countStr = countStr + "," + "0" |
||||
|
list = countStr.split(",") |
||||
|
list[day-1] = count |
||||
|
countStr = "" |
||||
|
for i in range(0,30): |
||||
|
if i != 29: |
||||
|
countStr = countStr + str(list[i]) + "," |
||||
|
else: |
||||
|
countStr = countStr + str(list[i]) |
||||
|
ViewCountInfo.create(videoId=videoId, viewCount=countStr) |
||||
|
else: |
||||
|
viewCountInfo = ViewCountInfo.select().where(ViewCountInfo.videoId == videoId).get() |
||||
|
list = viewCountInfo.viewCount.split(",") |
||||
|
list[day-1] = count |
||||
|
countStr = "" |
||||
|
for i in range(0,30): |
||||
|
if i != 29: |
||||
|
countStr = countStr + str(list[i]) + "," |
||||
|
else: |
||||
|
countStr = countStr + str(list[i]) |
||||
|
ViewCountInfo.update(viewCount=countStr).where(ViewCountInfo.videoId == videoId).execute() |
@ -0,0 +1,34 @@ |
|||||
|
import json |
||||
|
from Orm import Video |
||||
|
from playhouse.shortcuts import model_to_dict, dict_to_model |
||||
|
|
||||
|
|
||||
|
class VideoService: |
||||
|
def getOneByVideoId(videoId): |
||||
|
return Video.get_or_none(Video.videoId == videoId) |
||||
|
|
||||
|
def createOne(videoId, channelId, videoTitle, videoLen, videoType, videoPublishTime, videoLanguage, isDownload): |
||||
|
Video.create(videoId=videoId, |
||||
|
channelId=channelId, |
||||
|
videoTitle=videoTitle, |
||||
|
videoLen=videoLen, |
||||
|
videoType=videoType, |
||||
|
videoPublishTime=videoPublishTime, |
||||
|
videoLanguage=videoLanguage, |
||||
|
isDownload=isDownload) |
||||
|
|
||||
|
def updateLenByVideoId(videoId, len): |
||||
|
Video.update(videoLen=len).where(Video.videoId == videoId).execute() |
||||
|
|
||||
|
def getLastVideoByChannelId(channelId): |
||||
|
return Video.select().where(Video.channelId == channelId).order_by(Video.videoPublishTime.desc()).get() |
||||
|
|
||||
|
def getFirstVideoByChannelId(channelId): |
||||
|
return Video.select().where(Video.channelId == channelId).order_by(Video.videoPublishTime).get() |
||||
|
|
||||
|
def checkExist(channelId): |
||||
|
query = Video.select().where(Video.channelId == channelId) |
||||
|
return query.exists() |
||||
|
|
||||
|
def getVideosByTime(startTime,endTime): |
||||
|
return Video.select().where(Video.videoPublishTime >= startTime,Video.videoPublishTime <= endTime).execute() |
@ -0,0 +1,99 @@ |
|||||
|
import argparse |
||||
|
import random |
||||
|
import time |
||||
|
import Contant |
||||
|
from LoggerUtils import Logger, initLogger |
||||
|
import Orm |
||||
|
from VideoService import VideoService |
||||
|
from ChannelService import ChannelService |
||||
|
from VideoCountService import ViewCountService |
||||
|
from func_timeout import func_set_timeout |
||||
|
import func_timeout |
||||
|
import requests |
||||
|
import httplib2 |
||||
|
import googleapiclient.discovery |
||||
|
import googleapiclient.errors |
||||
|
import datetime |
||||
|
|
||||
|
apiIndex = 0 |
||||
|
apiKeys = [ |
||||
|
"AIzaSyDjPkCgDQ9Tv_xcChjY2E6GpJ6IzngnD5I", |
||||
|
"AIzaSyAxIycOdQYGB5kWhwe3B-kJAYRo7wOnp8o", |
||||
|
"AIzaSyCsYUC5vN0pB6y9xsCj0B1ehAoqOJ3WMf0" |
||||
|
] |
||||
|
|
||||
|
|
||||
|
def getYoutube(): |
||||
|
proxy_info = httplib2.ProxyInfo( |
||||
|
proxy_type=httplib2.socks.PROXY_TYPE_HTTP, proxy_host="127.0.0.1", proxy_port=7890) |
||||
|
# http = httplib2.Http(timeout=10, proxy_info=proxy_info, |
||||
|
# disable_ssl_certificate_validation=False) |
||||
|
http = httplib2.Http(timeout=10, disable_ssl_certificate_validation=False) |
||||
|
# http = httplib2.Http(timeout=10, disable_ssl_certificate_validation=False) |
||||
|
api_service_name = "youtube" |
||||
|
api_version = "v3" |
||||
|
# 获取apiKey |
||||
|
apiKey = "AIzaSyARaW3mqO9szQiHgWZR4el0HWvdyheSHBc" |
||||
|
|
||||
|
# 获取对象 |
||||
|
youtube = googleapiclient.discovery.build( |
||||
|
api_service_name, api_version, developerKey=Contant.apiKeys[Contant.apiIndex], http=http |
||||
|
) |
||||
|
return youtube |
||||
|
|
||||
|
|
||||
|
def updateVideoViewCount(day, startTime, endTime): |
||||
|
list = VideoService.getVideosByTime(startTime, endTime) |
||||
|
Logger.info(len(list)) |
||||
|
videoCount = 0 |
||||
|
totalCount = 0 |
||||
|
videosRequest = "" |
||||
|
youtube = getYoutube() |
||||
|
for video in list: |
||||
|
videoCount = videoCount + 1 |
||||
|
totalCount = totalCount + 1 |
||||
|
Logger.info(video.videoId) |
||||
|
videosRequest = videosRequest + "," + video.videoId |
||||
|
if videoCount == 50 or videoCount == len(list) or totalCount == len(list): |
||||
|
request = youtube.videos().list(part="statistics", id=videosRequest) |
||||
|
if Contant.apiIndex < (len(Contant.apiKeys) - 1): |
||||
|
Contant.apiIndex = Contant.apiIndex + 1 |
||||
|
else: |
||||
|
Contant.apiIndex = 0 |
||||
|
response = request.execute() |
||||
|
for item in response['items']: |
||||
|
try: |
||||
|
Logger.info(item) |
||||
|
ViewCountService.createOrUpdateOne( |
||||
|
item['id'], day, item['statistics']['viewCount']) |
||||
|
except Exception as e: |
||||
|
Logger.error("存储失败{}".format(item)) |
||||
|
videosRequest = "" |
||||
|
videoCount = 0 |
||||
|
|
||||
|
# python ./view_count_main.py --db="../db/youtube_prod.db" --logDir="./logs" --start="2024-01-03T00:00:00Z" --end="2024-01-04T00:00:00Z" |
||||
|
|
||||
|
|
||||
|
if __name__ == "__main__": |
||||
|
parser = argparse.ArgumentParser(description='') |
||||
|
parser.add_argument('--db', type=str, default='') |
||||
|
parser.add_argument('--logDir', type=str, default='') |
||||
|
args = parser.parse_args() |
||||
|
Contant.db = args.db |
||||
|
Contant.logDir = args.logDir |
||||
|
initLogger() |
||||
|
Orm.ormInit() |
||||
|
# 查询30天内的所有视屏 |
||||
|
now = datetime.datetime.now() |
||||
|
zero_today = now.replace(hour=0, minute=0, second=0, microsecond=0) |
||||
|
end_today = now.replace(hour=23, minute=59, second=59, microsecond=0) |
||||
|
for i in range(1, 31): |
||||
|
startTime = zero_today+datetime.timedelta(days=-i) |
||||
|
endTime = end_today+datetime.timedelta(days=-i) |
||||
|
startTime = startTime.strftime("%Y-%m-%dT%H:%S:%MZ") |
||||
|
endTime = endTime.strftime("%Y-%m-%dT%H:%S:%MZ") |
||||
|
Logger.info("day:%d, startTime:%s, endTime:%s" % |
||||
|
(i, startTime, endTime)) |
||||
|
updateVideoViewCount(i, startTime, endTime) |
||||
|
# zero_today = zero_today.strftime("%y-%m-%dT%H:%S:%MZ") |
||||
|
# print(zero_today) |
Loading…
Reference in new issue