zhangshu
7 months ago
48 changed files with 1511 additions and 1 deletions
@ -1,2 +1,3 @@ |
|||
# youtube_prod |
|||
# youtube_srt |
|||
|
|||
Youtube字幕项目 |
Binary file not shown.
Binary file not shown.
@ -0,0 +1,7 @@ |
|||
import json |
|||
from Orm import Channel |
|||
from playhouse.shortcuts import model_to_dict, dict_to_model |
|||
|
|||
class ChannelService: |
|||
def getOneByChannelId(channelId): |
|||
return Channel.get_or_none(Channel.channelId == channelId) |
@ -0,0 +1,2 @@ |
|||
db="" |
|||
logDir="" |
@ -0,0 +1,28 @@ |
|||
from Orm import DownloadInfo |
|||
|
|||
|
|||
class DownloadService: |
|||
def getOneByVideoId(videoId, downloadType): |
|||
return DownloadInfo.get(DownloadInfo.videoId == videoId, DownloadInfo.downloadType == downloadType) |
|||
|
|||
def createOne(videoId, downloadType, tryTime, isFinished): |
|||
DownloadInfo.create( |
|||
videoId=videoId, |
|||
downloadType=downloadType, |
|||
tryTime=tryTime, |
|||
isFinished=isFinished |
|||
) |
|||
|
|||
def updateInfoByVideoId(videoId, tryTime, isFinished, downloadType): |
|||
DownloadInfo.update(tryTime=tryTime, isFinished=isFinished).where( |
|||
DownloadInfo.videoId == videoId, DownloadInfo.downloadType == downloadType).execute() |
|||
|
|||
def findNotFinishList(): |
|||
return DownloadInfo.select().where(DownloadInfo.isFinished == 0, DownloadInfo.tryTime <= 5, DownloadInfo.downloadType == 1).limit(10).execute() |
|||
|
|||
def changeDownloadType(videoId, tryTime, isFinished, downloadType, changeType): |
|||
DownloadInfo.update(tryTime=tryTime, isFinished=isFinished, downloadType=changeType).where( |
|||
DownloadInfo.videoId == videoId, DownloadInfo.downloadType == downloadType).execute() |
|||
|
|||
def findNotFinishListTwo(): |
|||
return DownloadInfo.select().where(DownloadInfo.isFinished == 0, DownloadInfo.tryTime <= 5, DownloadInfo.downloadType == 2).limit(10).execute() |
@ -0,0 +1,172 @@ |
|||
from shutil import copyfile |
|||
from youtube_transcript_api import YouTubeTranscriptApi |
|||
from youtube_transcript_api.formatters import SRTFormatter |
|||
from VideoService import VideoService |
|||
from ChannelService import ChannelService |
|||
from DownloadInfoService import DownloadService |
|||
from LoggerUtils import Logger |
|||
import time |
|||
import os |
|||
from func_timeout import func_set_timeout |
|||
import operator |
|||
|
|||
|
|||
class DownLoadUtil: |
|||
|
|||
formatter = SRTFormatter() |
|||
proxies = {"http": "http://127.0.0.1:7890", |
|||
"https": "https://127.0.0.1:7890"} |
|||
|
|||
@func_set_timeout(60) |
|||
def downloadOne(videoId): |
|||
# 获取数据 |
|||
video = VideoService.getOneByVideoId(videoId) |
|||
channel = ChannelService.getOneByChannelId(str(video.channelId)) |
|||
# 格式化title |
|||
videoTitle = str(video.videoTitle) |
|||
videoTitle = str(videoTitle).replace("/", u"\u2215") |
|||
videoTitle = str(videoTitle).replace("?", "?") |
|||
videoTitle = str(videoTitle).replace("\\", "") |
|||
videoTitle = str(videoTitle).replace("|", "") |
|||
videoTitle = str(videoTitle).replace("<", "") |
|||
videoTitle = str(videoTitle).replace(">", "") |
|||
videoTitle = str(videoTitle).replace(":", "") |
|||
# 获取发布时间 |
|||
videoPublishTime = str(video.videoPublishTime) |
|||
videoPublishTime = str(videoPublishTime).split("T")[0] |
|||
# 开始下载 |
|||
Logger.info("开始下载...{}".format(videoId)) |
|||
cpPath = "" |
|||
try: |
|||
# 获取字幕 |
|||
languages = str(video.videoLanguage) |
|||
storePath = "/mnt/srt_file/" + str(channel.channelTitle) |
|||
cpPath = "/mnt/tmp_srt_file/" + str(channel.channelTitle) |
|||
if not os.path.exists(storePath): |
|||
Logger.info("开始创建文件夹:" + storePath) |
|||
os.makedirs(storePath) |
|||
if not os.path.exists(cpPath): |
|||
Logger.info("开始创建文件夹:" + cpPath) |
|||
os.makedirs(cpPath) |
|||
storePath = storePath + "/" + videoPublishTime + \ |
|||
"-" + languages + "-" + videoTitle + ".srt" |
|||
cpPath = cpPath + "/" + videoPublishTime + \ |
|||
"-" + languages + "-" + videoTitle + ".srt" |
|||
if len(cpPath) > 120: |
|||
storePath = storePath[:-20] + ".srt" |
|||
cpPath = cpPath[:-20] + ".srt" |
|||
videoSrt = YouTubeTranscriptApi.get_transcript( |
|||
videoId, languages=[languages]) |
|||
srt_formatted = DownLoadUtil.formatter.format_transcript(videoSrt) |
|||
Logger.info("文件地址...{}".format(storePath)) |
|||
with open(storePath, 'w', encoding='utf-8') as srt_file: |
|||
srt_file.write(srt_formatted) |
|||
Logger.info("下载完成...{}".format(videoId)) |
|||
copyfile(storePath, cpPath) |
|||
# 修改video数据 |
|||
VideoService.updateIsDownloadByVideoId(videoId, 1) |
|||
# 修改downloadInfo |
|||
downloadInfo = DownloadService.getOneByVideoId(videoId, 1) |
|||
if downloadInfo is not None: |
|||
DownloadService.updateInfoByVideoId( |
|||
videoId, downloadInfo.tryTime + 1, 1, 1) |
|||
except Exception as e: |
|||
Logger.error("下载失败...{}".format(videoId)) |
|||
logStr = "Exception...{}".format(e) |
|||
Logger.error(logStr) |
|||
downloadInfo = DownloadService.getOneByVideoId(videoId, 1) |
|||
if operator.contains(logStr, "No transcripts"): |
|||
Logger.error("VideoId:{},不存在字幕文件".format(videoId)) |
|||
if downloadInfo is not None: |
|||
DownloadService.changeDownloadType( |
|||
videoId, 0, 0, 1, 2) |
|||
elif operator.contains(logStr, "File name too long"): |
|||
# 文件名过长 |
|||
languages = str(video.videoLanguage) |
|||
videoSrt = YouTubeTranscriptApi.get_transcript( |
|||
videoId, languages=[languages]) |
|||
srt_formatted = DownLoadUtil.formatter.format_transcript(videoSrt) |
|||
storePath = "/mnt/srt_file/" + str(channel.channelTitle) + "/" + \ |
|||
videoPublishTime + "-" + languages + "-" + videoId + ".srt" |
|||
cpPath = "/mnt/tmp_srt_file/" + str(channel.channelTitle) + "/" + \ |
|||
videoPublishTime + "-" + languages + "-" + videoId + ".srt" |
|||
if len(cpPath) > 120: |
|||
storePath = storePath[:-20] + ".srt" |
|||
cpPath = cpPath[:-20] + ".srt" |
|||
Logger.info("文件名过长,文件地址...{}".format(storePath)) |
|||
with open(storePath, 'w', encoding='utf-8') as srt_file: |
|||
srt_file.write(srt_formatted) |
|||
Logger.info("下载完成...{}".format(videoId)) |
|||
copyfile(storePath, cpPath) |
|||
# 修改video数据 |
|||
VideoService.updateIsDownloadByVideoId(videoId, 1) |
|||
# 修改downloadInfo |
|||
downloadInfo = DownloadService.getOneByVideoId(videoId, 1) |
|||
if downloadInfo is not None: |
|||
DownloadService.updateInfoByVideoId( |
|||
videoId, downloadInfo.tryTime + 1, 1, 1) |
|||
else: |
|||
if downloadInfo is not None: |
|||
Logger.info("VideoId:{}开始重试第{}次".format( |
|||
videoId, downloadInfo.tryTime + 1)) |
|||
DownloadService.updateInfoByVideoId( |
|||
videoId, downloadInfo.tryTime + 1, 0, 1) |
|||
|
|||
@func_set_timeout(60) |
|||
def downloadTwo(videoId): |
|||
# 获取数据 |
|||
video = VideoService.getOneByVideoId(videoId, 2) |
|||
channel = ChannelService.getOneByChannelId(str(video.channelId)) |
|||
# 格式化title |
|||
videoTitle = str(video.videoTitle) |
|||
videoTitle = str(videoTitle).replace("/", u"\u2215") |
|||
videoTitle = str(videoTitle).replace("?", "?") |
|||
videoTitle = str(videoTitle).replace("\\", "") |
|||
videoTitle = str(videoTitle).replace("|", "") |
|||
videoTitle = str(videoTitle).replace("<", "") |
|||
videoTitle = str(videoTitle).replace(">", "") |
|||
videoTitle = str(videoTitle).replace(":", "") |
|||
# 获取发布时间 |
|||
videoPublishTime = str(video.videoPublishTime) |
|||
videoPublishTime = str(videoPublishTime).split("T")[0] |
|||
# 开始下载 |
|||
Logger.info("开始下载...{}".format(videoId)) |
|||
try: |
|||
# 获取字幕 |
|||
languages = str(video.videoLanguage) |
|||
storePath = "./download/" + str(channel.channelTitle) |
|||
if not os.path.exists(storePath): |
|||
Logger.info("开始创建文件夹:" + storePath) |
|||
os.makedirs(storePath) |
|||
storePath = storePath + "\\" + videoPublishTime + \ |
|||
"-" + languages + "-" + videoTitle + ".srt" |
|||
videoSrt = YouTubeTranscriptApi.get_transcript( |
|||
videoId, languages=[languages]) |
|||
srt_formatted = DownLoadUtil.formatter.format_transcript(videoSrt) |
|||
Logger.info("文件地址...{}".format(storePath)) |
|||
with open(storePath, 'w', encoding='utf-8') as srt_file: |
|||
srt_file.write(srt_formatted) |
|||
Logger.info("下载完成...{}".format(videoId)) |
|||
# 修改video数据 |
|||
VideoService.updateIsDownloadByVideoId(videoId, 1) |
|||
# 修改downloadInfo |
|||
downloadInfo = DownloadService.getOneByVideoId(videoId, 2) |
|||
if downloadInfo is not None: |
|||
DownloadService.updateInfoByVideoId( |
|||
videoId, downloadInfo.tryTime + 1, 1, 2) |
|||
except Exception as e: |
|||
Logger.error("下载失败...{}".format(videoId)) |
|||
logStr = "Exception...{}".format(e) |
|||
Logger.error(logStr) |
|||
downloadInfo = DownloadService.getOneByVideoId(videoId, 2) |
|||
if operator.contains(logStr, "No transcripts"): |
|||
Logger.error("VideoId:{},不存在字幕文件".format(videoId)) |
|||
if downloadInfo is not None: |
|||
DownloadService.changeDownloadType( |
|||
videoId, 6, 0, 2, 3) |
|||
else: |
|||
if downloadInfo is not None: |
|||
Logger.info("VideoId:{}开始重试第{}次".format( |
|||
videoId, downloadInfo.tryTime + 1)) |
|||
DownloadService.updateInfoByVideoId( |
|||
videoId, downloadInfo.tryTime + 1, 0, 2) |
@ -0,0 +1,6 @@ |
|||
from loguru import logger |
|||
import Contant |
|||
Logger = logger |
|||
def initLogger(): |
|||
logger.add(Contant.logDir+"/download_{time}.log", rotation="500MB", encoding="utf-8", |
|||
enqueue=True, compression="zip", retention="10 days") |
@ -0,0 +1,65 @@ |
|||
from peewee import * |
|||
import Contant |
|||
import argparse |
|||
from LoggerUtils import Logger |
|||
|
|||
parser = argparse.ArgumentParser(description='') |
|||
parser.add_argument('--db', type=str, default='') |
|||
parser.add_argument('--logDir', type=str, default='') |
|||
args = parser.parse_args() |
|||
Contant.db = args.db |
|||
db = SqliteDatabase(Contant.db) |
|||
|
|||
|
|||
def ormInit(): |
|||
Channel.create_table() |
|||
Video.create_table() |
|||
DownloadInfo.create_table() |
|||
|
|||
|
|||
class BaseModel(Model): |
|||
class Meta: |
|||
database = db |
|||
|
|||
# 频道信息 |
|||
|
|||
|
|||
class Channel(BaseModel): |
|||
id = PrimaryKeyField() |
|||
channelId = CharField(null=False) |
|||
channelTitle = CharField(null=False) |
|||
channelLanguage = CharField() |
|||
channelReptileTime = CharField(null=True) |
|||
|
|||
class Meta: |
|||
db_table = 'Channel' |
|||
|
|||
# 视频信息 |
|||
|
|||
|
|||
class Video(BaseModel): |
|||
id = PrimaryKeyField() |
|||
videoId = CharField(null=False) |
|||
channelId = CharField(null=False) |
|||
videoTitle = CharField() |
|||
videoLen = IntegerField() |
|||
videoType = CharField() |
|||
videoPublishTime = CharField() |
|||
videoLanguage = CharField() |
|||
isDownload = IntegerField() |
|||
|
|||
class Meta: |
|||
db_table = 'Vidoes' |
|||
|
|||
# 下载信息 |
|||
|
|||
|
|||
class DownloadInfo(BaseModel): |
|||
id = PrimaryKeyField() |
|||
videoId = CharField() |
|||
downloadType = IntegerField() |
|||
tryTime = IntegerField() |
|||
isFinished = IntegerField() |
|||
|
|||
class Meta: |
|||
db_table = 'Download_info' |
@ -0,0 +1,26 @@ |
|||
import json |
|||
from Orm import Video |
|||
from playhouse.shortcuts import model_to_dict, dict_to_model |
|||
|
|||
|
|||
class VideoService: |
|||
def getOneByVideoId(videoId): |
|||
return Video.get_or_none(Video.videoId == videoId) |
|||
|
|||
def createOne(videoId, channelId, videoTitle, videoLen, videoType, videoPublishTime, videoLanguage, isDownload): |
|||
Video.create(videoId=videoId, |
|||
channelId=channelId, |
|||
videoTitle=videoTitle, |
|||
videoLen=videoLen, |
|||
videoType=videoType, |
|||
videoPublishTime=videoPublishTime, |
|||
videoLanguage=videoLanguage, |
|||
isDownload=isDownload) |
|||
|
|||
def updateLenByVideoId(videoId, len): |
|||
Video.update(videoLen=len).where(Video.videoId == videoId).execute() |
|||
|
|||
def updateIsDownloadByVideoId(videoId, isDownload): |
|||
Video.update(isDownload=isDownload).where( |
|||
Video.videoId == videoId).execute() |
|||
|
Binary file not shown.
@ -0,0 +1,49 @@ |
|||
import argparse |
|||
import random |
|||
import time |
|||
import Contant |
|||
from LoggerUtils import Logger, initLogger |
|||
import Orm |
|||
from VideoService import VideoService |
|||
from ChannelService import ChannelService |
|||
from DownloadInfoService import DownloadService |
|||
from DownloadUtil import DownLoadUtil |
|||
from func_timeout import func_set_timeout |
|||
import func_timeout |
|||
import requests |
|||
|
|||
# python3 ./main_download.py --db="../db/youtube_prod.db" --logDir="./logs" |
|||
if __name__ == "__main__": |
|||
parser = argparse.ArgumentParser(description='') |
|||
parser.add_argument('--db', type=str, default='') |
|||
parser.add_argument('--logDir', type=str, default='') |
|||
args = parser.parse_args() |
|||
Contant.db = args.db |
|||
Contant.logDir = args.logDir |
|||
initLogger() |
|||
Orm.ormInit() |
|||
list = DownloadService.findNotFinishList() |
|||
Logger.info("list size:{}".format(len(list))) |
|||
while (len(list) > 0): |
|||
for info in list: |
|||
try: |
|||
DownLoadUtil.downloadOne(info.videoId) |
|||
restTime = random.randint(1, 3) |
|||
Logger.info("间隔{}秒后继续...".format(restTime)) |
|||
time.sleep(restTime) |
|||
except func_timeout.exceptions.FunctionTimedOut as e: |
|||
Logger.error("执行下载方法超时错误:{}".format(e)) |
|||
loopRestTime = random.randint(1, 3) |
|||
Logger.info("循环间隔{}秒后继续...".format(loopRestTime)) |
|||
time.sleep(loopRestTime) |
|||
list = DownloadService.findNotFinishList() |
|||
# 发送钉钉消息 |
|||
webhook = "https://oapi.dingtalk.com/robot/send?access_token=c8c8d7d42c4eecd449dd303025ef968f647d1d8e8694e3fabc0ab5770d646dcb" |
|||
jsonData = { |
|||
"msgtype": "text", |
|||
"text": { |
|||
"content": "[Youtube]download finished" |
|||
} |
|||
} |
|||
requests.post(webhook, json=jsonData) |
|||
Logger.info("download发送钉钉消息成功...") |
@ -0,0 +1,2 @@ |
|||
db="" |
|||
logDir="" |
@ -0,0 +1,6 @@ |
|||
from loguru import logger |
|||
import Contant |
|||
Logger = logger |
|||
def initLogger(): |
|||
logger.add(Contant.logDir+"/init_{time}.log", rotation="500MB", encoding="utf-8", |
|||
enqueue=True, compression="zip", retention="10 days") |
@ -0,0 +1,65 @@ |
|||
from peewee import * |
|||
import Contant |
|||
import argparse |
|||
from LoggerUtils import Logger |
|||
|
|||
parser = argparse.ArgumentParser(description='') |
|||
parser.add_argument('--db', type=str, default='') |
|||
parser.add_argument('--logDir', type=str, default='') |
|||
args = parser.parse_args() |
|||
Contant.db = args.db |
|||
db = SqliteDatabase(Contant.db) |
|||
|
|||
|
|||
def ormInit(): |
|||
Channel.create_table() |
|||
Vidoe.create_table() |
|||
DownloadInfo.create_table() |
|||
|
|||
|
|||
class BaseModel(Model): |
|||
class Meta: |
|||
database = db |
|||
|
|||
# 频道信息 |
|||
|
|||
|
|||
class Channel(BaseModel): |
|||
id = PrimaryKeyField() |
|||
channelId = CharField(null=False) |
|||
channelTitle = CharField(null=False) |
|||
channelLanguage = CharField() |
|||
channelReptileTime = CharField(null=True) |
|||
|
|||
class Meta: |
|||
db_table = 'Channel' |
|||
|
|||
# 视频信息 |
|||
|
|||
|
|||
class Vidoe(BaseModel): |
|||
id = PrimaryKeyField() |
|||
videoId = CharField(null=False) |
|||
channelId = CharField(null=False) |
|||
videoTitle = CharField() |
|||
videoLen = IntegerField() |
|||
videoType = CharField() |
|||
videoPublishTime = CharField() |
|||
videoLanguage = CharField() |
|||
isDownload = IntegerField() |
|||
|
|||
class Meta: |
|||
db_table = 'Vidoes' |
|||
|
|||
# 下载信息 |
|||
|
|||
|
|||
class DownloadInfo(BaseModel): |
|||
id = PrimaryKeyField() |
|||
videoId = CharField() |
|||
downloadType = IntegerField() |
|||
tryTime = IntegerField() |
|||
isFinished = IntegerField() |
|||
|
|||
class Meta: |
|||
db_table = 'Download_info' |
@ -0,0 +1,56 @@ |
|||
from LoggerUtils import Logger, initLogger |
|||
import argparse |
|||
import Contant |
|||
from Orm import ormInit, Channel |
|||
import operator |
|||
from bs4 import BeautifulSoup as bs |
|||
from urllib.request import urlopen, Request |
|||
|
|||
# py .\init.py --db=../db/youtube_prod.db --logDir=./logs |
|||
def saveChannel(channelUrl, language): |
|||
Logger.info("频道链接:"+channelUrl) |
|||
channelId = "" |
|||
channelName = "" |
|||
url_opener = urlopen( |
|||
Request(channelUrl, headers={'User-Agent': 'Mozilla'})) |
|||
videoInfo = bs(url_opener, features="html.parser") |
|||
links = videoInfo.find_all("link") |
|||
for link in links: |
|||
if operator.contains(str(link), "canonical"): |
|||
channelId = str(link['href']).split("/channel/")[1] |
|||
if operator.contains(str(link), "content="): |
|||
channelName = str(link['content']) |
|||
Logger.info("channelId:"+channelId) |
|||
Logger.info("channelName:"+channelName) |
|||
channel = Channel.get_or_none(Channel.channelId == channelId) |
|||
if channel != None: |
|||
Logger.info("频道已存在:" + channelId) |
|||
return |
|||
Channel.create(channelTitle=channelName, |
|||
channelId=channelId, channelLanguage=language) |
|||
|
|||
|
|||
if __name__ == "__main__": |
|||
parser = argparse.ArgumentParser(description='') |
|||
parser.add_argument('--db', type=str, default='') |
|||
parser.add_argument('--logDir', type=str, default='') |
|||
args = parser.parse_args() |
|||
Contant.db = args.db |
|||
Contant.logDir = args.logDir |
|||
initLogger() |
|||
ormInit() |
|||
Logger.info("SqlLite存放地址:"+Contant.db) |
|||
Logger.info("日志文件存放地址:"+Contant.logDir) |
|||
Logger.info("开始初始化...") |
|||
# checkInit() |
|||
# 读取txt文件获取需要的频道地址 |
|||
Logger.info("开始读取需要新增的频道地址...") |
|||
urlList = [] |
|||
# 打开文件 |
|||
for line in open("urlList.txt"): |
|||
line = line.strip('\n') |
|||
urlList.append(line) |
|||
language = urlList[0] |
|||
for url in urlList: |
|||
if len(url) > 10: |
|||
saveChannel(url, language) |
@ -0,0 +1,14 @@ |
|||
zh-TW |
|||
https://www.youtube.com/@TheStormMedia |
|||
https://www.youtube.com/@57ETFN |
|||
https://www.youtube.com/@MoneyNewWorld |
|||
https://www.youtube.com/@tvbsmoney |
|||
https://www.youtube.com/@TheMasterhsiao |
|||
https://www.youtube.com/@mvp5888 |
|||
https://www.youtube.com/@HUNG64 |
|||
https://www.youtube.com/@user-vc2vr6tw4h |
|||
https://www.youtube.com/ustv |
|||
https://www.youtube.com/@leon888 |
|||
https://www.youtube.com/@smartmonthly-BW |
|||
https://www.youtube.com/@ustvstockonline |
|||
https://www.youtube.com/@AASTOCKS_AATV |
@ -0,0 +1,90 @@ |
|||
hi |
|||
https://www.youtube.com/@procapitalacademy |
|||
https://www.youtube.com/@TEACHERANISH |
|||
https://www.youtube.com/@MarketGurukul1 |
|||
|
|||
|
|||
|
|||
en |
|||
https://www.youtube.com/@VishalKhandelwalshow |
|||
https://www.youtube.com/@Elearnmarkets |
|||
https://www.youtube.com/@MarketsMojo |
|||
https://www.youtube.com/@TradeWithTrend |
|||
https://www.youtube.com/@SHAREKHAN |
|||
https://www.youtube.com/@AvadhutSatheTradingAcademy |
|||
|
|||
|
|||
|
|||
ko |
|||
https://www.youtube.com/@E_TREND |
|||
https://www.youtube.com/@hkwownet |
|||
https://www.youtube.com/@giant_tv |
|||
https://www.youtube.com/@StrongStock |
|||
https://www.youtube.com/@stockwar999 |
|||
https://www.youtube.com/@user-sp1du8pm6q |
|||
https://www.youtube.com/@talentinvestment |
|||
https://www.youtube.com/@future_economy |
|||
https://www.youtube.com/@user-sf7hm6xj8d |
|||
https://www.youtube.com/@user-xv9xi6pi9o |
|||
https://www.youtube.com/@user-rd8fd1xj9b |
|||
https://www.youtube.com/@lucky_tv |
|||
https://www.youtube.com/@Min_woo |
|||
https://www.youtube.com/@taver1123 |
|||
https://www.youtube.com/@Super0Min |
|||
https://www.youtube.com/@ap5798 |
|||
https://www.youtube.com/@drematree100 |
|||
https://www.youtube.com/@MKeconomy_TV |
|||
https://www.youtube.com/@grit |
|||
https://www.youtube.com/@user-zn9js9fg5i |
|||
https://www.youtube.com/@youngikkim |
|||
https://www.youtube.com/@DonNawa |
|||
https://www.youtube.com/@woong-dal |
|||
https://www.youtube.com/@johnleeschool |
|||
https://www.youtube.com/@syukaworld-comics |
|||
https://www.youtube.com/@channelA-news |
|||
https://www.youtube.com/@user-bh7lr7pe9g |
|||
https://www.youtube.com/@singlefire |
|||
https://www.youtube.com/@moneyhi |
|||
https://www.youtube.com/@top.trader |
|||
https://www.youtube.com/@jusikdante |
|||
|
|||
|
|||
|
|||
zh-TW |
|||
https://www.youtube.com/@kukantieh |
|||
|
|||
ja |
|||
https://www.youtube.com/@DanTakahashi1 |
|||
https://www.youtube.com/@tvtokyobiz |
|||
https://www.youtube.com/@SHO1112 |
|||
https://www.youtube.com/@pivot8935 |
|||
https://www.youtube.com/@nikkei |
|||
https://www.youtube.com/@toushikomon |
|||
https://www.youtube.com/@pivot8935 |
|||
https://www.youtube.com/@NewsPicks/featured |
|||
https://www.youtube.com/@higedura24 |
|||
https://www.youtube.com/@tvtokyobiz |
|||
https://www.youtube.com/@omaegaowattendayo |
|||
https://www.youtube.com/@info_ask1 |
|||
https://www.youtube.com/@takaisanno/videos |
|||
https://www.youtube.com/@takaponjp |
|||
https://www.youtube.com/@tbsnewsdig |
|||
https://www.youtube.com/@rehacq |
|||
https://www.youtube.com/@mabuchi-mariko |
|||
https://www.youtube.com/@fp_nigu |
|||
https://www.youtube.com/@yukkuri-money |
|||
https://www.youtube.com/@SHO1112 |
|||
https://www.youtube.com/@yohei-chokin |
|||
https://www.youtube.com/@user-yu9sj9gq7z/videos |
|||
https://www.youtube.com/@tesuta-clipping |
|||
https://www.youtube.com/@tradelabo2222 |
|||
https://www.youtube.com/@jin115xx |
|||
https://www.youtube.com/@higedura24 |
|||
https://www.youtube.com/@nobujuku |
|||
https://www.youtube.com/@tokyosoken |
|||
https://www.youtube.com/@user-hx7bn7hp9v |
|||
https://www.youtube.com/@SLokRE |
|||
https://www.youtube.com/@rehacq |
|||
https://www.youtube.com/@moha-p |
|||
https://www.youtube.com/results?search_query=Buffett+Taro%27s |
|||
https://www.youtube.com/@Gorikoro |
@ -0,0 +1,7 @@ |
|||
ja |
|||
https://www.youtube.com/@ryogakucho |
|||
https://www.youtube.com/@DanTakahashi1 |
|||
https://www.youtube.com/@buffett_taro |
|||
https://www.youtube.com/@Tsubame104 |
|||
https://www.youtube.com/@inc_academy |
|||
https://www.youtube.com/@kamioka01 |
@ -0,0 +1 @@ |
|||
logDir="" |
@ -0,0 +1,6 @@ |
|||
from loguru import logger |
|||
import Contant |
|||
Logger = logger |
|||
def initLogger(): |
|||
logger.add(Contant.logDir+"/sftp_{time}.log", rotation="500MB", encoding="utf-8", |
|||
enqueue=True, compression="zip", retention="10 days") |
@ -0,0 +1,97 @@ |
|||
import os |
|||
import shutil |
|||
import paramiko |
|||
import argparse |
|||
import Contant |
|||
from LoggerUtils import Logger, initLogger |
|||
import configparser |
|||
import requests |
|||
import time |
|||
|
|||
# python3 sftp.py --local="/mnt/tmp_srt_file" --logDir="./logs" |
|||
# python3 sftp.py --local="/mnt/test_file" --logDir="./logs" |
|||
if __name__ == "__main__": |
|||
# 读取参数 |
|||
parser = argparse.ArgumentParser(description="") |
|||
parser.add_argument("--local", type=str, default="") |
|||
parser.add_argument('--logDir', type=str, default='') |
|||
args = parser.parse_args() |
|||
Contant.logDir = args.logDir |
|||
initLogger() |
|||
|
|||
# 读取配置文件 |
|||
config = configparser.ConfigParser() |
|||
config.read('sftp_config.ini') |
|||
|
|||
# 获取SFTP配置信息 |
|||
hostname = config.get('sftp_config', 'hostname') |
|||
port = config.getint('sftp_config', 'port') |
|||
username = config.get('sftp_config', 'username') |
|||
password = config.get('sftp_config', 'password') |
|||
|
|||
Logger.info("host:{},port:{},username:{},password:{}".format( |
|||
hostname, port, username, password)) |
|||
|
|||
ssh_client = paramiko.SSHClient() |
|||
ssh_client.set_missing_host_key_policy(paramiko.AutoAddPolicy()) |
|||
sftp_client = None # 设置默认值 |
|||
ssh_client.connect(hostname, port, username, password) |
|||
# 创建SFTP客户端 |
|||
sftp_client = ssh_client.open_sftp() |
|||
Logger.info("SFTP客户端已经建立:{}".format(sftp_client)) |
|||
|
|||
remote_root = "/Inbound/YouTube Captions" |
|||
local_root = args.local |
|||
Logger.info("remote_root:{},local_root:{}".format(remote_root, local_root)) |
|||
|
|||
names = os.listdir(local_root) |
|||
for name in names: |
|||
# sftp创建文件夹 |
|||
try: |
|||
sftp_client.chdir(remote_root + "/" + name) |
|||
except BaseException: |
|||
sftp_client.mkdir(remote_root + "/" + name) |
|||
sftp_client.chdir(remote_root + "/" + name) |
|||
|
|||
# 遍历本地临时文件夹 |
|||
srtList = os.listdir(local_root + "/" + name) |
|||
for srt in srtList: |
|||
# 获取远程文件路径以及本地文件路径 |
|||
remotePath = remote_root + "/" + name + "/" + srt |
|||
localPath = local_root + "/" + name + "/" + srt |
|||
# 如果远程文件存在,则进行删除 |
|||
try: |
|||
sftp_client.stat(remotePath) |
|||
# 如果文件存在,删除它 |
|||
sftp_client.remove(remotePath) |
|||
Logger.info("Remote file '{}' deleted.".format(remotePath)) |
|||
except FileNotFoundError: |
|||
Logger.info("Remote file '{}' not found.".format(remotePath)) |
|||
# 上传本地文件 |
|||
try: |
|||
# 判断远程地址长度,过长需要截取一部分 |
|||
if len(remotePath) > 120: |
|||
remotePath = remotePath[:-20] + ".srt" |
|||
# 判断本地文件是否存在,存在则上传 |
|||
if os.path.exists(localPath): |
|||
Logger.info("本地文件 '{}' 存在,开始上传.".format(localPath)) |
|||
sftp_client.put(localPath, remotePath, confirm=False) |
|||
os.remove(localPath) |
|||
else: |
|||
Logger.info("本地文件 '{}' 不存在,无法上传.".format(localPath)) |
|||
except Exception as e: |
|||
Logger.info("上传失败 '{}' 文件名长度{}".format( |
|||
remotePath, len(remotePath))) |
|||
Logger.error(e) |
|||
sftp_client.close() |
|||
sftp_client = ssh_client.open_sftp() |
|||
# 发送钉钉消息 |
|||
webhook = "https://oapi.dingtalk.com/robot/send?access_token=c8c8d7d42c4eecd449dd303025ef968f647d1d8e8694e3fabc0ab5770d646dcb" |
|||
jsonData = { |
|||
"msgtype": "text", |
|||
"text": { |
|||
"content": "[Youtube]sftp finished" |
|||
} |
|||
} |
|||
requests.post(webhook, json=jsonData) |
|||
Logger.info("sftp发送钉钉消息成功...") |
@ -0,0 +1,5 @@ |
|||
[sftp_config] |
|||
hostname = filetransfer.blackrock.com |
|||
port = 22 |
|||
username = ftp_yunbo |
|||
password = s8v{8SJr |
@ -0,0 +1,15 @@ |
|||
import json |
|||
from Orm import Channel |
|||
from playhouse.shortcuts import model_to_dict, dict_to_model |
|||
|
|||
|
|||
class ChannelService: |
|||
def getOneByChannelId(channelId): |
|||
return Channel.get_or_none(Channel.channelId == channelId) |
|||
|
|||
def updateTimeByChannelId(channelId, chageTime): |
|||
Channel.update(channelReptileTime=chageTime).where( |
|||
Channel.channelId == channelId).execute() |
|||
|
|||
def getChannelList(): |
|||
return Channel.select().execute() |
@ -0,0 +1,4 @@ |
|||
db="" |
|||
logDir="" |
|||
startTime="" |
|||
endTime="" |
@ -0,0 +1,16 @@ |
|||
from Orm import DownloadInfo |
|||
|
|||
|
|||
class DownloadService: |
|||
|
|||
def createOne(videoId, downloadType, tryTime, isFinished): |
|||
DownloadInfo.create( |
|||
videoId=videoId, |
|||
downloadType=downloadType, |
|||
tryTime=tryTime, |
|||
isFinished=isFinished |
|||
) |
|||
|
|||
def updateInfoByVideoId(videoId, tryTime, isFinished): |
|||
DownloadInfo.update(tryTime=tryTime, isFinished=isFinished).where( |
|||
DownloadInfo.videoId == videoId).execute() |
@ -0,0 +1,6 @@ |
|||
from loguru import logger |
|||
import Contant |
|||
Logger = logger |
|||
def initLogger(): |
|||
logger.add(Contant.logDir+"/main_{time}.log", rotation="500MB", encoding="utf-8", |
|||
enqueue=True, compression="zip", retention="10 days") |
@ -0,0 +1,68 @@ |
|||
from peewee import * |
|||
import Contant |
|||
import argparse |
|||
from LoggerUtils import Logger |
|||
|
|||
parser = argparse.ArgumentParser(description='') |
|||
parser.add_argument('--db', type=str, default='') |
|||
parser.add_argument('--logDir', type=str, default='') |
|||
parser.add_argument("--start", type=str, default="") |
|||
parser.add_argument("--end", type=str, default="") |
|||
parser.add_argument("--channelId", type=str, default="") |
|||
args = parser.parse_args() |
|||
Contant.db = args.db |
|||
db = SqliteDatabase(Contant.db) |
|||
|
|||
|
|||
def ormInit(): |
|||
Channel.create_table() |
|||
Video.create_table() |
|||
DownloadInfo.create_table() |
|||
|
|||
|
|||
class BaseModel(Model): |
|||
class Meta: |
|||
database = db |
|||
|
|||
# 频道信息 |
|||
|
|||
|
|||
class Channel(BaseModel): |
|||
id = PrimaryKeyField() |
|||
channelId = CharField(null=False) |
|||
channelTitle = CharField(null=False) |
|||
channelLanguage = CharField() |
|||
channelReptileTime = CharField(null=True) |
|||
|
|||
class Meta: |
|||
db_table = 'Channel' |
|||
|
|||
# 视频信息 |
|||
|
|||
|
|||
class Video(BaseModel): |
|||
id = PrimaryKeyField() |
|||
videoId = CharField(null=False) |
|||
channelId = CharField(null=False) |
|||
videoTitle = CharField() |
|||
videoLen = IntegerField() |
|||
videoType = CharField() |
|||
videoPublishTime = CharField() |
|||
videoLanguage = CharField() |
|||
isDownload = IntegerField() |
|||
|
|||
class Meta: |
|||
db_table = 'Vidoes' |
|||
|
|||
# 下载信息 |
|||
|
|||
|
|||
class DownloadInfo(BaseModel): |
|||
id = PrimaryKeyField() |
|||
videoId = CharField() |
|||
downloadType = IntegerField() |
|||
tryTime = IntegerField() |
|||
isFinished = IntegerField() |
|||
|
|||
class Meta: |
|||
db_table = 'Download_info' |
@ -0,0 +1,31 @@ |
|||
import json |
|||
from Orm import Video |
|||
from playhouse.shortcuts import model_to_dict, dict_to_model |
|||
|
|||
|
|||
class VideoService: |
|||
def getOneByVideoId(videoId): |
|||
return Video.get_or_none(Video.videoId == videoId) |
|||
|
|||
def createOne(videoId, channelId, videoTitle, videoLen, videoType, videoPublishTime, videoLanguage, isDownload): |
|||
Video.create(videoId=videoId, |
|||
channelId=channelId, |
|||
videoTitle=videoTitle, |
|||
videoLen=videoLen, |
|||
videoType=videoType, |
|||
videoPublishTime=videoPublishTime, |
|||
videoLanguage=videoLanguage, |
|||
isDownload=isDownload) |
|||
|
|||
def updateLenByVideoId(videoId, len): |
|||
Video.update(videoLen=len).where(Video.videoId == videoId).execute() |
|||
|
|||
def getLastVideoByChannelId(channelId): |
|||
return Video.select().where(Video.channelId == channelId).order_by(Video.videoPublishTime.desc()).get() |
|||
|
|||
def getFirstVideoByChannelId(channelId): |
|||
return Video.select().where(Video.channelId == channelId).order_by(Video.videoPublishTime).get() |
|||
|
|||
def checkExist(channelId): |
|||
query = Video.select().where(Video.channelId == channelId) |
|||
return query.exists() |
@ -0,0 +1,169 @@ |
|||
import httplib2 |
|||
import googleapiclient.discovery |
|||
import googleapiclient.errors |
|||
from VideoService import VideoService |
|||
from ChannelService import ChannelService |
|||
from DownloadInfoService import DownloadService |
|||
from LoggerUtils import Logger |
|||
import operator |
|||
import time |
|||
|
|||
|
|||
class YouTubeUtil: |
|||
# apiKeys = ["AIzaSyDlRgmPXVQEjF2gbmomI5FUZX_uAOBmEGI", "AIzaSyBI5i5vFZpQErMnEXKMf0VUS2Bel8jGrTk", |
|||
# "AIzaSyAnmA0Ggy1yXsZZACfItmeZAa7wcmh6SbM", "AIzaSyC4O8tBoAfkupmBybxDah2JUxgj4ct5uk0", |
|||
# "AIzaSyDJ2S9Ijhw_hULx3nHvPUoGUpMENbZOIl8", "AIzaSyA87Ckpna3hOQ31nISs8V8rp--OLw0m6Aw", |
|||
# "AIzaSyDIWbV0EOLHkOr9tWpANose6ggd2r9vcLg", "AIzaSyBKE3lYwWFIYc9Vx4YKMbRpkOXigZlY52U"] |
|||
|
|||
# AIzaSyCTBSbq0YjyxTtjmNsnDyKAwHamlv_ST-s |
|||
# AIzaSyAESnwtbTIBtU707iZowtQkmAo-qKuEOcY |
|||
# AIzaSyCsYUC5vN0pB6y9xsCj0B1ehAoqOJ3WMf0 |
|||
# AIzaSyDjPkCgDQ9Tv_xcChjY2E6GpJ6IzngnD5I |
|||
# AIzaSyAxIycOdQYGB5kWhwe3B-kJAYRo7wOnp8o |
|||
apiKeys = [ |
|||
"AIzaSyARaW3mqO9szQiHgWZR4el0HWvdyheSHBc", |
|||
"AIzaSyChPXesnVx6fweon_BckhR6UiJWvi5Ma4s" |
|||
|
|||
# "AIzaSyCTBSbq0YjyxTtjmNsnDyKAwHamlv_ST-s", |
|||
# "AIzaSyAESnwtbTIBtU707iZowtQkmAo-qKuEOcY" |
|||
|
|||
|
|||
# "AIzaSyDjPkCgDQ9Tv_xcChjY2E6GpJ6IzngnD5I", |
|||
# "AIzaSyAxIycOdQYGB5kWhwe3B-kJAYRo7wOnp8o", |
|||
# "AIzaSyCsYUC5vN0pB6y9xsCj0B1ehAoqOJ3WMf0" |
|||
] |
|||
|
|||
apiIndex = 0 |
|||
|
|||
def getYoutube(): |
|||
# proxy_info = httplib2.ProxyInfo( |
|||
# proxy_type=httplib2.socks.PROXY_TYPE_HTTP, proxy_host="127.0.0.1", proxy_port=7890) |
|||
# http = httplib2.Http(timeout=10, proxy_info=proxy_info, |
|||
# disable_ssl_certificate_validation=False) |
|||
http = httplib2.Http(timeout=10, disable_ssl_certificate_validation=False) |
|||
api_service_name = "youtube" |
|||
api_version = "v3" |
|||
# 获取apiKey |
|||
apiKey = YouTubeUtil.apiKeys[YouTubeUtil.apiIndex] |
|||
Logger.info( |
|||
"当前APIKey:{},当前apiIndex:{},totalIndex:{}".format( |
|||
apiKey, YouTubeUtil.apiIndex, len(YouTubeUtil.apiKeys) - 1 |
|||
) |
|||
) |
|||
# 等于7,还原成0 |
|||
if YouTubeUtil.apiIndex == (len(YouTubeUtil.apiKeys) - 1): |
|||
YouTubeUtil.apiIndex = 0 |
|||
else: |
|||
YouTubeUtil.apiIndex = YouTubeUtil.apiIndex + 1 |
|||
|
|||
# 获取对象 |
|||
youtube = googleapiclient.discovery.build( |
|||
api_service_name, api_version, developerKey=apiKey, http=http |
|||
) |
|||
return youtube |
|||
|
|||
def getVidoeLen(videoIds): |
|||
youtube = YouTubeUtil.getYoutube() |
|||
request = youtube.videos().list(part="contentDetails", id=videoIds) |
|||
response = request.execute() |
|||
response["items"][0]["contentDetails"] |
|||
return response |
|||
|
|||
def getVideoLenByStr(str): |
|||
len = 0 |
|||
str = str.split("PT")[1] |
|||
if operator.contains(str, "H"): |
|||
H = str.split("H")[0] |
|||
len = len + int(H) * 3600 |
|||
str = str.split("H")[1] |
|||
if operator.contains(str, "M"): |
|||
M = str.split("M")[0] |
|||
len = len + int(M) * 60 |
|||
str = str.split("M")[1] |
|||
if operator.contains(str, "S"): |
|||
S = str.split("S")[0] |
|||
len = len + int(S) |
|||
return len |
|||
|
|||
def getByChannelId(channelId, startTime, endTime): |
|||
channel = ChannelService.getOneByChannelId(channelId) |
|||
if channel == None: |
|||
return |
|||
videoLanguage = str(channel.channelLanguage) |
|||
youtube = YouTubeUtil.getYoutube() |
|||
request = youtube.search().list( |
|||
part="snippet", |
|||
channelId=channelId, |
|||
maxResults=50, |
|||
order="date", |
|||
publishedAfter=startTime, |
|||
publishedBefore=endTime, |
|||
type="video", |
|||
) |
|||
response = request.execute() |
|||
while True: |
|||
videosRequest = "" |
|||
videosRequestCount = 0 |
|||
idList = [] |
|||
for i in response["items"]: |
|||
try: |
|||
videoId = i["id"]["videoId"] |
|||
publisTime = i["snippet"]["publishedAt"] |
|||
videoTitle = i["snippet"]["title"] |
|||
videoType = "video" |
|||
videoEntity = VideoService.getOneByVideoId(str(videoId)) |
|||
if videoEntity == None: |
|||
VideoService.createOne( |
|||
videoId, |
|||
channelId, |
|||
videoTitle, |
|||
0, |
|||
videoType, |
|||
publisTime, |
|||
videoLanguage, |
|||
0, |
|||
) |
|||
DownloadService.createOne(videoId, 1, 0, 0) |
|||
videosRequest = videosRequest + "," + str(videoId) |
|||
videosRequestCount = videosRequestCount + 1 |
|||
Logger.info( |
|||
"存储VideoUrl:https://www.youtube.com/watch?v=" + videoId |
|||
) |
|||
else: |
|||
Logger.info("已存在VideoId:{}".format(videoId)) |
|||
idList.append(str(videoId)) |
|||
if videosRequest != "" and videosRequestCount >= 10: |
|||
lenRes = YouTubeUtil.getVidoeLen(videosRequest) |
|||
for i in lenRes["items"]: |
|||
tmpId = i["id"] |
|||
videoLenStr = i["contentDetails"]["duration"] |
|||
videoLen = YouTubeUtil.getVideoLenByStr(videoLenStr) |
|||
VideoService.updateLenByVideoId(tmpId, videoLen) |
|||
Logger.info( |
|||
"更新时长,videoId:{},len:{}".format(tmpId, videoLen) |
|||
) |
|||
videosRequestCount = 0 |
|||
videosRequest = "" |
|||
except: |
|||
pass |
|||
# 获取最后一个视频 |
|||
vidoeo = VideoService.getLastVideoByChannelId(channelId) |
|||
ChannelService.updateTimeByChannelId(channelId, vidoeo.videoPublishTime) |
|||
time.sleep(5) |
|||
try: |
|||
# youtube = YouTubeUtil.getYoutube |
|||
request = youtube.search().list( |
|||
part="snippet", |
|||
channelId=channelId, |
|||
maxResults=50, |
|||
order="date", |
|||
publishedAfter=startTime, |
|||
publishedBefore=endTime, |
|||
type="video", |
|||
pageToken=response["nextPageToken"], |
|||
) |
|||
response = request.execute() |
|||
except Exception as e: |
|||
Logger.error(e) |
|||
print("no nextPageToken") |
|||
break |
@ -0,0 +1,49 @@ |
|||
import argparse |
|||
import Contant |
|||
import LoggerUtils |
|||
import Orm |
|||
from VideoService import VideoService |
|||
from YouTubeUtils import YouTubeUtil |
|||
from ChannelService import ChannelService |
|||
import requests |
|||
|
|||
# py .\main.py --db=../db/youtube_prod.db --logDir=./logs --start="2023-09-10T00:00:01Z" --end="2023-09-11T00:00:01Z" |
|||
# python3 ./main.py --db="../db/youtube_prod.db" --logDir="./logs" --start="2024-03-25T00:10:01Z" --end="2024-03-26T00:10:01Z" |
|||
# python3 ./main.py --db="../db/youtube_prod.db" --logDir="./logs" --start="111" --end="222" |
|||
if __name__ == "__main__": |
|||
parser = argparse.ArgumentParser(description="") |
|||
parser.add_argument("--db", type=str, default="") |
|||
parser.add_argument("--logDir", type=str, default="") |
|||
parser.add_argument("--start", type=str, default="") |
|||
parser.add_argument("--end", type=str, default="") |
|||
args = parser.parse_args() |
|||
Contant.db = args.db |
|||
Contant.logDir = args.logDir |
|||
Contant.startTime = args.start |
|||
Contant.endTime = args.end |
|||
LoggerUtils.initLogger() |
|||
Orm.ormInit() |
|||
LoggerUtils.Logger.info("db:{},logDir:{}".format(Contant.db, Contant.logDir)) |
|||
LoggerUtils.Logger.info("starTime:{},endTime:{}".format(Contant.startTime, Contant.endTime)) |
|||
|
|||
# 执行查询 |
|||
channelList = ChannelService.getChannelList() |
|||
LoggerUtils.Logger.info("list size:{}".format(len(channelList))) |
|||
for channel in channelList: |
|||
channelId = channel.channelId |
|||
LoggerUtils.Logger.info( |
|||
"channelId:{},startTime:{},endTime:{}".format( |
|||
channelId, Contant.startTime, Contant.endTime |
|||
) |
|||
) |
|||
YouTubeUtil.getByChannelId(channelId, Contant.startTime, Contant.endTime) |
|||
# 发送钉钉消息 |
|||
webhook = "https://oapi.dingtalk.com/robot/send?access_token=c8c8d7d42c4eecd449dd303025ef968f647d1d8e8694e3fabc0ab5770d646dcb" |
|||
jsonData = { |
|||
"msgtype": "text", |
|||
"text": { |
|||
"content": "[Youtube]src finished" |
|||
} |
|||
} |
|||
requests.post(webhook, json=jsonData) |
|||
LoggerUtils.Logger.info("src发送钉钉消息成功...") |
@ -0,0 +1,32 @@ |
|||
import argparse |
|||
import Contant |
|||
import LoggerUtils |
|||
import Orm |
|||
from VideoService import VideoService |
|||
from YouTubeUtils import YouTubeUtil |
|||
from ChannelService import ChannelService |
|||
import requests |
|||
|
|||
# py .\main.py --db=../db/youtube_prod.db --logDir=./logs --start="2023-09-10T00:00:01Z" --end="2023-09-11T00:00:01Z" |
|||
# python3 ./main.py --db="../db/youtube_prod.db" --logDir="./logs" --start="2023-08-10T00:00:01Z" --end="2023-09-12T00:00:01Z" |
|||
# python3 ./main.py --db="../db/youtube_prod.db" --logDir="./logs" --start="111" --end="222" |
|||
# python3 ./one_channel.py --db="../db/youtube_prod.db" --logDir="./logs/one_channel" --start="2021-03-06T00:00:01Z" --end="2024-03-06T00:00:01Z" --channelId="UCzoF2M_RG3Qz10hP16vQOng" |
|||
if __name__ == "__main__": |
|||
parser = argparse.ArgumentParser(description="") |
|||
parser.add_argument("--db", type=str, default="") |
|||
parser.add_argument("--logDir", type=str, default="") |
|||
parser.add_argument("--start", type=str, default="") |
|||
parser.add_argument("--end", type=str, default="") |
|||
parser.add_argument("--channelId", type=str, default="") |
|||
args = parser.parse_args() |
|||
Contant.db = args.db |
|||
Contant.logDir = args.logDir |
|||
Contant.startTime = args.start |
|||
Contant.endTime = args.end |
|||
channelId = args.channelId |
|||
LoggerUtils.initLogger() |
|||
Orm.ormInit() |
|||
LoggerUtils.Logger.info("db:{},logDir:{}".format(Contant.db, Contant.logDir)) |
|||
LoggerUtils.Logger.info("channleId:{},starTime:{},endTime:{}".format(channelId, Contant.startTime, Contant.endTime)) |
|||
|
|||
YouTubeUtil.getByChannelId(channelId, Contant.startTime, Contant.endTime) |
@ -0,0 +1,87 @@ |
|||
#!/bin/bash |
|||
cd /mnt/youtube_prod/src |
|||
|
|||
|
|||
|
|||
# python3 ./one_channel.py --db="../db/youtube_prod.db" --logDir="./logs/one_channel" --start="2021-03-06T00:00:01Z" --end="2024-03-06T00:00:01Z" --channelId="UCpsfkRRT7L2nBnizBn_u9YA" |
|||
# sleep 600 |
|||
# python3 ./one_channel.py --db="../db/youtube_prod.db" --logDir="./logs/one_channel" --start="2021-03-06T00:00:01Z" --end="2024-03-06T00:00:01Z" --channelId="UCRbT3P-2tmr-9l8D7jNoZMQ" |
|||
# sleep 600 |
|||
# python3 ./one_channel.py --db="../db/youtube_prod.db" --logDir="./logs/one_channel" --start="2021-03-06T00:00:01Z" --end="2024-03-06T00:00:01Z" --channelId="UCPTy0BNqiv-0SdAvFgrXvXg" |
|||
# sleep 600 |
|||
# python3 ./one_channel.py --db="../db/youtube_prod.db" --logDir="./logs/one_channel" --start="2021-03-06T00:00:01Z" --end="2024-03-06T00:00:01Z" --channelId="UCMlDu8Vuowmqz03kByFcUhw" |
|||
# sleep 600 |
|||
# python3 ./one_channel.py --db="../db/youtube_prod.db" --logDir="./logs/one_channel" --start="2021-03-06T00:00:01Z" --end="2024-03-06T00:00:01Z" --channelId="UC5mn3VEg_9GY52G6eumKJRg" |
|||
# sleep 600 |
|||
# python3 ./one_channel.py --db="../db/youtube_prod.db" --logDir="./logs/one_channel" --start="2021-03-06T00:00:01Z" --end="2024-03-06T00:00:01Z" --channelId="UClhhyZ0xyeOAEVdcr0N9KDA" |
|||
# sleep 600 |
|||
# python3 ./one_channel.py --db="../db/youtube_prod.db" --logDir="./logs/one_channel" --start="2021-03-06T00:00:01Z" --end="2024-03-06T00:00:01Z" --channelId="UCBM86JVoHLqg9irpR2XKvGw" |
|||
# sleep 600 |
|||
# python3 ./one_channel.py --db="../db/youtube_prod.db" --logDir="./logs/one_channel" --start="2021-03-06T00:00:01Z" --end="2024-03-06T00:00:01Z" --channelId="UCzp9CmDIFVNtzhyOjptIi4g" |
|||
# sleep 600 |
|||
# python3 ./one_channel.py --db="../db/youtube_prod.db" --logDir="./logs/one_channel" --start="2021-03-06T00:00:01Z" --end="2024-03-06T00:00:01Z" --channelId="UCv-spDeZBGYVUI9eGXGaLSg" |
|||
# sleep 600 |
|||
# python3 ./one_channel.py --db="../db/youtube_prod.db" --logDir="./logs/one_channel" --start="2021-03-06T00:00:01Z" --end="2024-03-06T00:00:01Z" --channelId="UCF08I8KEKTsBo22RIXFwTAA" |
|||
# sleep 600 |
|||
# python3 ./one_channel.py --db="../db/youtube_prod.db" --logDir="./logs/one_channel" --start="2021-03-06T00:00:01Z" --end="2024-03-06T00:00:01Z" --channelId="UC5Mjj4LKlMtP_PXlIVYGxIQ" |
|||
# sleep 600 |
|||
# python3 ./one_channel.py --db="../db/youtube_prod.db" --logDir="./logs/one_channel" --start="2021-03-06T00:00:01Z" --end="2024-03-06T00:00:01Z" --channelId="UCvil4OAt-zShzkKHsg9EQAw" |
|||
# sleep 600 |
|||
# python3 ./one_channel.py --db="../db/youtube_prod.db" --logDir="./logs/one_channel" --start="2021-03-06T00:00:01Z" --end="2024-03-06T00:00:01Z" --channelId="UCI6C5V4J8FWRcLcOdh1yElw" |
|||
# sleep 600 |
|||
# python3 ./one_channel.py --db="../db/youtube_prod.db" --logDir="./logs/one_channel" --start="2021-03-06T00:00:01Z" --end="2024-03-06T00:00:01Z" --channelId="UCOio3vyYLWiKlHSYRKW-9UA" |
|||
# sleep 600 |
|||
# python3 ./one_channel.py --db="../db/youtube_prod.db" --logDir="./logs/one_channel" --start="2021-03-06T00:00:01Z" --end="2024-03-06T00:00:01Z" --channelId="UCaWi2foADm_lKAKnmeQwLSA" |
|||
# sleep 600 |
|||
# python3 ./one_channel.py --db="../db/youtube_prod.db" --logDir="./logs/one_channel" --start="2021-03-06T00:00:01Z" --end="2024-03-06T00:00:01Z" --channelId="UCUFUOdQwKTWda7kKqxQwMxw" |
|||
# sleep 600 |
|||
# python3 ./one_channel.py --db="../db/youtube_prod.db" --logDir="./logs/one_channel" --start="2021-03-06T00:00:01Z" --end="2024-03-06T00:00:01Z" --channelId="UCoZdXdFowKP0heWRkQ9RABQ" |
|||
# sleep 600 |
|||
# python3 ./one_channel.py --db="../db/youtube_prod.db" --logDir="./logs/one_channel" --start="2021-03-06T00:00:01Z" --end="2024-03-06T00:00:01Z" --channelId="UCnfwIKyFYRuqZzzKBDt6JOA" |
|||
# sleep 600 |
|||
# python3 ./one_channel.py --db="../db/youtube_prod.db" --logDir="./logs/one_channel" --start="2021-03-06T00:00:01Z" --end="2024-03-06T00:00:01Z" --channelId="UCnZJqzwt6LuRymM0jbqiD9A" |
|||
# sleep 600 |
|||
# python3 ./one_channel.py --db="../db/youtube_prod.db" --logDir="./logs/one_channel" --start="2021-03-06T00:00:01Z" --end="2024-03-06T00:00:01Z" --channelId="UCHpGooMnVgnILywqrpqvZcQ" |
|||
# sleep 600 |
|||
# python3 ./one_channel.py --db="../db/youtube_prod.db" --logDir="./logs/one_channel" --start="2021-03-06T00:00:01Z" --end="2024-03-06T00:00:01Z" --channelId="UCQIyAcoLsO3L0RMFQk7YMYA" |
|||
# sleep 600 |
|||
# python3 ./one_channel.py --db="../db/youtube_prod.db" --logDir="./logs/one_channel" --start="2021-03-06T00:00:01Z" --end="2024-03-06T00:00:01Z" --channelId="UCYdHxiRAUUJhuE1DZsnWqXg" |
|||
# sleep 600 |
|||
# python3 ./one_channel.py --db="../db/youtube_prod.db" --logDir="./logs/one_channel" --start="2021-03-06T00:00:01Z" --end="2024-03-06T00:00:01Z" --channelId="UCbOIEn95Rvnk97KRtSFqvbQ" |
|||
# sleep 600 |
|||
# python3 ./one_channel.py --db="../db/youtube_prod.db" --logDir="./logs/one_channel" --start="2021-03-06T00:00:01Z" --end="2024-03-06T00:00:01Z" --channelId="UCXWOlSe2GHTev8QZhY_gMPg" |
|||
# sleep 600 |
|||
# python3 ./one_channel.py --db="../db/youtube_prod.db" --logDir="./logs/one_channel" --start="2021-03-06T00:00:01Z" --end="2024-03-06T00:00:01Z" --channelId="UCJo6G1u0e_-wS-JQn3T-zEw" |
|||
# sleep 600 |
|||
# python3 ./one_channel.py --db="../db/youtube_prod.db" --logDir="./logs/one_channel" --start="2021-03-06T00:00:01Z" --end="2024-03-06T00:00:01Z" --channelId="UCfq4V1DAuaojnr2ryvWNysw" |
|||
# sleep 600 |
|||
# python3 ./one_channel.py --db="../db/youtube_prod.db" --logDir="./logs/one_channel" --start="2021-03-06T00:00:01Z" --end="2024-03-06T00:00:01Z" --channelId="UCFznPlqnBtRKQhtkm6GGoRQ" |
|||
# sleep 600 |
|||
# python3 ./one_channel.py --db="../db/youtube_prod.db" --logDir="./logs/one_channel" --start="2021-03-06T00:00:01Z" --end="2024-03-06T00:00:01Z" --channelId="UC5CyCSvCdoEP-VgQmFq3iww" |
|||
# sleep 600 |
|||
# python3 ./one_channel.py --db="../db/youtube_prod.db" --logDir="./logs/one_channel" --start="2021-03-06T00:00:01Z" --end="2024-03-06T00:00:01Z" --channelId="UC6mp159KMtzjhP65DmldR0A" |
|||
# sleep 600 |
|||
# python3 ./one_channel.py --db="../db/youtube_prod.db" --logDir="./logs/one_channel" --start="2021-03-06T00:00:01Z" --end="2024-03-06T00:00:01Z" --channelId="UC7YLvjJf3lDJUQ-TsbWyBjg" |
|||
# sleep 600 |
|||
# python3 ./one_channel.py --db="../db/youtube_prod.db" --logDir="./logs/one_channel" --start="2021-03-06T00:00:01Z" --end="2024-03-06T00:00:01Z" --channelId="UC6ij59Gy_HnqO4pFu9A_zgQ" |
|||
# sleep 600 |
|||
# python3 ./one_channel.py --db="../db/youtube_prod.db" --logDir="./logs/one_channel" --start="2021-03-06T00:00:01Z" --end="2024-03-06T00:00:01Z" --channelId="UCpyjRAERLqcD_wI3qQnIY3A" |
|||
# sleep 600 |
|||
# python3 ./one_channel.py --db="../db/youtube_prod.db" --logDir="./logs/one_channel" --start="2021-03-06T00:00:01Z" --end="2024-03-06T00:00:01Z" --channelId="UCSU_iBWoCnXe1VnAbQhO3Ug" |
|||
# sleep 600 |
|||
# python3 ./one_channel.py --db="../db/youtube_prod.db" --logDir="./logs/one_channel" --start="2021-03-06T00:00:01Z" --end="2024-03-06T00:00:01Z" --channelId="UC6ZkHcW5QQubZ-Q6XYINE3Q" |
|||
# sleep 600 |
|||
# python3 ./one_channel.py --db="../db/youtube_prod.db" --logDir="./logs/one_channel" --start="2021-03-06T00:00:01Z" --end="2024-03-06T00:00:01Z" --channelId="UCDpRrAXMYlxFz3a5-z8pE7w" |
|||
# sleep 600 |
|||
python3 ./one_channel.py --db="../db/youtube_prod.db" --logDir="./logs/one_channel" --start="2021-03-06T00:00:01Z" --end="2024-03-06T00:00:01Z" --channelId="UCMec1m9iUC3agiEK-nsndSg" |
|||
sleep 600 |
|||
python3 ./one_channel.py --db="../db/youtube_prod.db" --logDir="./logs/one_channel" --start="2021-03-06T00:00:01Z" --end="2024-03-06T00:00:01Z" --channelId="UCOmXyHRWpDFPYgs2VpoQEIw" |
|||
sleep 600 |
|||
python3 ./one_channel.py --db="../db/youtube_prod.db" --logDir="./logs/one_channel" --start="2021-03-06T00:00:01Z" --end="2024-03-06T00:00:01Z" --channelId="UCPgT-N-DQ0K0H88skjaDgkA" |
|||
sleep 600 |
|||
python3 ./one_channel.py --db="../db/youtube_prod.db" --logDir="./logs/one_channel" --start="2021-03-06T00:00:01Z" --end="2024-03-06T00:00:01Z" --channelId="UC40nk9kM2Ue8XQ9LsHQlKPA" |
|||
sleep 600 |
|||
python3 ./one_channel.py --db="../db/youtube_prod.db" --logDir="./logs/one_channel" --start="2021-03-06T00:00:01Z" --end="2024-03-06T00:00:01Z" --channelId="UCaiV1-PUXDu2Nmx8iOZkofQ" |
|||
sleep 600 |
|||
python3 ./one_channel.py --db="../db/youtube_prod.db" --logDir="./logs/one_channel" --start="2021-03-06T00:00:01Z" --end="2024-03-06T00:00:01Z" --channelId="UCDDneQi63kJAdr3i5VCPzHg" |
|||
sleep 600 |
@ -0,0 +1,10 @@ |
|||
#!/bin/bash |
|||
function log() { |
|||
local time_now=`date '+%Y-%m-%d %H:%M:%S'` |
|||
echo "$time_now [download] [info] $1" >> /mnt/youtube_prod/running.log |
|||
} |
|||
|
|||
cd /mnt/youtube_prod/download |
|||
# /mnt/youtube_prod/start_download.sh |
|||
log "开始执行download..." |
|||
nohup python3 ./main_download.py --db="../db/youtube_prod.db" --logDir="./logs" >/dev/null 2>/mnt/youtube_prod/err.log & |
@ -0,0 +1,11 @@ |
|||
#!/bin/bash |
|||
function log() { |
|||
local time_now=`date '+%Y-%m-%d %H:%M:%S'` |
|||
echo "$time_now [download] [info] $1" >> /mnt/youtube_prod/running.log |
|||
} |
|||
|
|||
cd /mnt/youtube_prod/sftp |
|||
# /mnt/youtube_prod/start_download.sh |
|||
log "开始执行sftp..." |
|||
python3 ./sftp.py --local="/mnt/tmp_srt_file" --logDir="./logs" |
|||
rm -rf /mnt/tmp_srt_file |
@ -0,0 +1,12 @@ |
|||
#!/bin/bash |
|||
function log() { |
|||
local time_now=`date '+%Y-%m-%d %H:%M:%S'` |
|||
echo "$time_now [src] [info] $1" >> /mnt/youtube_prod/running.log |
|||
} |
|||
|
|||
cd /mnt/youtube_prod/src |
|||
start=`date '+%Y-%m-%dT%H:%M:%SZ' -d'-1 day'` |
|||
end=`date '+%Y-%m-%dT%H:%M:%SZ'` |
|||
log "开始执行src...startTime:"$start",endTime:"$end |
|||
# /mnt/youtube_prod/start_src.sh |
|||
nohup python3 ./main.py --db="../db/youtube_prod.db" --logDir="./logs" --start=$start --end=$end >/dev/null 2>/mnt/youtube_prod/err.log & |
@ -0,0 +1,4 @@ |
|||
#!/bin/bash |
|||
pid=`ps -ef | grep main_download | awk NR==1'{print $2}'` |
|||
echo $pid |
|||
kill -9 $pid |
@ -0,0 +1,2 @@ |
|||
#!/bin/bash |
|||
echo "test" |
@ -0,0 +1,9 @@ |
|||
import requests |
|||
webhook = "https://oapi.dingtalk.com/robot/send?access_token=c8c8d7d42c4eecd449dd303025ef968f647d1d8e8694e3fabc0ab5770d646dcb" |
|||
jsonData = { |
|||
"msgtype": "text", |
|||
"text": { |
|||
"content": "[Youtube]aaaa" |
|||
} |
|||
} |
|||
requests.post(webhook, json=jsonData) |
@ -0,0 +1,8 @@ |
|||
from youtube_transcript_api import YouTubeTranscriptApi |
|||
#zh-Hant |
|||
url = "https://www.youtube.com/watch?v=YbVger_nh-s" |
|||
list = YouTubeTranscriptApi.list_transcripts("_i5CoY_LMYs") |
|||
# videoSrt = YouTubeTranscriptApi.get_transcript( |
|||
# "gXeNXJrD-gw", languages=['zh-TW']) |
|||
print(list) |
|||
# print(videoSrt) |
@ -0,0 +1,15 @@ |
|||
import json |
|||
from Orm import Channel |
|||
from playhouse.shortcuts import model_to_dict, dict_to_model |
|||
|
|||
|
|||
class ChannelService: |
|||
def getOneByChannelId(channelId): |
|||
return Channel.get_or_none(Channel.channelId == channelId) |
|||
|
|||
def updateTimeByChannelId(channelId, chageTime): |
|||
Channel.update(channelReptileTime=chageTime).where( |
|||
Channel.channelId == channelId).execute() |
|||
|
|||
def getChannelList(): |
|||
return Channel.select().execute() |
@ -0,0 +1,10 @@ |
|||
db="" |
|||
logDir="" |
|||
startTime="" |
|||
endTime="" |
|||
apiIndex = 0 |
|||
apiKeys = [ |
|||
"AIzaSyDjPkCgDQ9Tv_xcChjY2E6GpJ6IzngnD5I", |
|||
"AIzaSyAxIycOdQYGB5kWhwe3B-kJAYRo7wOnp8o", |
|||
"AIzaSyCsYUC5vN0pB6y9xsCj0B1ehAoqOJ3WMf0" |
|||
] |
@ -0,0 +1,6 @@ |
|||
from loguru import logger |
|||
import Contant |
|||
Logger = logger |
|||
def initLogger(): |
|||
logger.add(Contant.logDir+"/main_{time}.log", rotation="500MB", encoding="utf-8", |
|||
enqueue=True, compression="zip", retention="10 days") |
@ -0,0 +1,75 @@ |
|||
from peewee import * |
|||
import Contant |
|||
import argparse |
|||
from LoggerUtils import Logger |
|||
|
|||
parser = argparse.ArgumentParser(description='') |
|||
parser.add_argument('--db', type=str, default='') |
|||
parser.add_argument('--logDir', type=str, default='') |
|||
args = parser.parse_args() |
|||
Contant.db = args.db |
|||
db = SqliteDatabase(Contant.db) |
|||
|
|||
|
|||
def ormInit(): |
|||
Channel.create_table() |
|||
Video.create_table() |
|||
DownloadInfo.create_table() |
|||
ViewCountInfo.create_table() |
|||
|
|||
|
|||
class BaseModel(Model): |
|||
class Meta: |
|||
database = db |
|||
|
|||
# 频道信息 |
|||
|
|||
|
|||
class Channel(BaseModel): |
|||
id = PrimaryKeyField() |
|||
channelId = CharField(null=False) |
|||
channelTitle = CharField(null=False) |
|||
channelLanguage = CharField() |
|||
channelReptileTime = CharField(null=True) |
|||
|
|||
class Meta: |
|||
db_table = 'Channel' |
|||
|
|||
# 视频信息 |
|||
|
|||
|
|||
class Video(BaseModel): |
|||
id = PrimaryKeyField() |
|||
videoId = CharField(null=False) |
|||
channelId = CharField(null=False) |
|||
videoTitle = CharField() |
|||
videoLen = IntegerField() |
|||
videoType = CharField() |
|||
videoPublishTime = CharField() |
|||
videoLanguage = CharField() |
|||
isDownload = IntegerField() |
|||
|
|||
class Meta: |
|||
db_table = 'Vidoes' |
|||
|
|||
# 下载信息 |
|||
|
|||
|
|||
class DownloadInfo(BaseModel): |
|||
id = PrimaryKeyField() |
|||
videoId = CharField() |
|||
downloadType = IntegerField() |
|||
tryTime = IntegerField() |
|||
isFinished = IntegerField() |
|||
|
|||
class Meta: |
|||
db_table = 'Download_info' |
|||
|
|||
# 播放量信息 |
|||
class ViewCountInfo(BaseModel): |
|||
id = PrimaryKeyField() |
|||
videoId = CharField() |
|||
viewCount = CharField() |
|||
|
|||
class Meta: |
|||
db_table = 'ViewCount_info' |
@ -0,0 +1,33 @@ |
|||
import json |
|||
from Orm import ViewCountInfo |
|||
from playhouse.shortcuts import model_to_dict, dict_to_model |
|||
|
|||
|
|||
class ViewCountService: |
|||
def createOrUpdateOne(videoId, day,count): |
|||
query = ViewCountInfo.select().where(ViewCountInfo.videoId == videoId) |
|||
if not query: |
|||
countStr = "0" |
|||
for i in range(0,30): |
|||
if i != 29: |
|||
countStr = countStr + "," + "0" |
|||
list = countStr.split(",") |
|||
list[day-1] = count |
|||
countStr = "" |
|||
for i in range(0,30): |
|||
if i != 29: |
|||
countStr = countStr + str(list[i]) + "," |
|||
else: |
|||
countStr = countStr + str(list[i]) |
|||
ViewCountInfo.create(videoId=videoId, viewCount=countStr) |
|||
else: |
|||
viewCountInfo = ViewCountInfo.select().where(ViewCountInfo.videoId == videoId).get() |
|||
list = viewCountInfo.viewCount.split(",") |
|||
list[day-1] = count |
|||
countStr = "" |
|||
for i in range(0,30): |
|||
if i != 29: |
|||
countStr = countStr + str(list[i]) + "," |
|||
else: |
|||
countStr = countStr + str(list[i]) |
|||
ViewCountInfo.update(viewCount=countStr).where(ViewCountInfo.videoId == videoId).execute() |
@ -0,0 +1,34 @@ |
|||
import json |
|||
from Orm import Video |
|||
from playhouse.shortcuts import model_to_dict, dict_to_model |
|||
|
|||
|
|||
class VideoService: |
|||
def getOneByVideoId(videoId): |
|||
return Video.get_or_none(Video.videoId == videoId) |
|||
|
|||
def createOne(videoId, channelId, videoTitle, videoLen, videoType, videoPublishTime, videoLanguage, isDownload): |
|||
Video.create(videoId=videoId, |
|||
channelId=channelId, |
|||
videoTitle=videoTitle, |
|||
videoLen=videoLen, |
|||
videoType=videoType, |
|||
videoPublishTime=videoPublishTime, |
|||
videoLanguage=videoLanguage, |
|||
isDownload=isDownload) |
|||
|
|||
def updateLenByVideoId(videoId, len): |
|||
Video.update(videoLen=len).where(Video.videoId == videoId).execute() |
|||
|
|||
def getLastVideoByChannelId(channelId): |
|||
return Video.select().where(Video.channelId == channelId).order_by(Video.videoPublishTime.desc()).get() |
|||
|
|||
def getFirstVideoByChannelId(channelId): |
|||
return Video.select().where(Video.channelId == channelId).order_by(Video.videoPublishTime).get() |
|||
|
|||
def checkExist(channelId): |
|||
query = Video.select().where(Video.channelId == channelId) |
|||
return query.exists() |
|||
|
|||
def getVideosByTime(startTime,endTime): |
|||
return Video.select().where(Video.videoPublishTime >= startTime,Video.videoPublishTime <= endTime).execute() |
@ -0,0 +1,99 @@ |
|||
import argparse |
|||
import random |
|||
import time |
|||
import Contant |
|||
from LoggerUtils import Logger, initLogger |
|||
import Orm |
|||
from VideoService import VideoService |
|||
from ChannelService import ChannelService |
|||
from VideoCountService import ViewCountService |
|||
from func_timeout import func_set_timeout |
|||
import func_timeout |
|||
import requests |
|||
import httplib2 |
|||
import googleapiclient.discovery |
|||
import googleapiclient.errors |
|||
import datetime |
|||
|
|||
apiIndex = 0 |
|||
apiKeys = [ |
|||
"AIzaSyDjPkCgDQ9Tv_xcChjY2E6GpJ6IzngnD5I", |
|||
"AIzaSyAxIycOdQYGB5kWhwe3B-kJAYRo7wOnp8o", |
|||
"AIzaSyCsYUC5vN0pB6y9xsCj0B1ehAoqOJ3WMf0" |
|||
] |
|||
|
|||
|
|||
def getYoutube(): |
|||
proxy_info = httplib2.ProxyInfo( |
|||
proxy_type=httplib2.socks.PROXY_TYPE_HTTP, proxy_host="127.0.0.1", proxy_port=7890) |
|||
# http = httplib2.Http(timeout=10, proxy_info=proxy_info, |
|||
# disable_ssl_certificate_validation=False) |
|||
http = httplib2.Http(timeout=10, disable_ssl_certificate_validation=False) |
|||
# http = httplib2.Http(timeout=10, disable_ssl_certificate_validation=False) |
|||
api_service_name = "youtube" |
|||
api_version = "v3" |
|||
# 获取apiKey |
|||
apiKey = "AIzaSyARaW3mqO9szQiHgWZR4el0HWvdyheSHBc" |
|||
|
|||
# 获取对象 |
|||
youtube = googleapiclient.discovery.build( |
|||
api_service_name, api_version, developerKey=Contant.apiKeys[Contant.apiIndex], http=http |
|||
) |
|||
return youtube |
|||
|
|||
|
|||
def updateVideoViewCount(day, startTime, endTime): |
|||
list = VideoService.getVideosByTime(startTime, endTime) |
|||
Logger.info(len(list)) |
|||
videoCount = 0 |
|||
totalCount = 0 |
|||
videosRequest = "" |
|||
youtube = getYoutube() |
|||
for video in list: |
|||
videoCount = videoCount + 1 |
|||
totalCount = totalCount + 1 |
|||
Logger.info(video.videoId) |
|||
videosRequest = videosRequest + "," + video.videoId |
|||
if videoCount == 50 or videoCount == len(list) or totalCount == len(list): |
|||
request = youtube.videos().list(part="statistics", id=videosRequest) |
|||
if Contant.apiIndex < (len(Contant.apiKeys) - 1): |
|||
Contant.apiIndex = Contant.apiIndex + 1 |
|||
else: |
|||
Contant.apiIndex = 0 |
|||
response = request.execute() |
|||
for item in response['items']: |
|||
try: |
|||
Logger.info(item) |
|||
ViewCountService.createOrUpdateOne( |
|||
item['id'], day, item['statistics']['viewCount']) |
|||
except Exception as e: |
|||
Logger.error("存储失败{}".format(item)) |
|||
videosRequest = "" |
|||
videoCount = 0 |
|||
|
|||
# python ./view_count_main.py --db="../db/youtube_prod.db" --logDir="./logs" --start="2024-01-03T00:00:00Z" --end="2024-01-04T00:00:00Z" |
|||
|
|||
|
|||
if __name__ == "__main__": |
|||
parser = argparse.ArgumentParser(description='') |
|||
parser.add_argument('--db', type=str, default='') |
|||
parser.add_argument('--logDir', type=str, default='') |
|||
args = parser.parse_args() |
|||
Contant.db = args.db |
|||
Contant.logDir = args.logDir |
|||
initLogger() |
|||
Orm.ormInit() |
|||
# 查询30天内的所有视屏 |
|||
now = datetime.datetime.now() |
|||
zero_today = now.replace(hour=0, minute=0, second=0, microsecond=0) |
|||
end_today = now.replace(hour=23, minute=59, second=59, microsecond=0) |
|||
for i in range(1, 31): |
|||
startTime = zero_today+datetime.timedelta(days=-i) |
|||
endTime = end_today+datetime.timedelta(days=-i) |
|||
startTime = startTime.strftime("%Y-%m-%dT%H:%S:%MZ") |
|||
endTime = endTime.strftime("%Y-%m-%dT%H:%S:%MZ") |
|||
Logger.info("day:%d, startTime:%s, endTime:%s" % |
|||
(i, startTime, endTime)) |
|||
updateVideoViewCount(i, startTime, endTime) |
|||
# zero_today = zero_today.strftime("%y-%m-%dT%H:%S:%MZ") |
|||
# print(zero_today) |
Loading…
Reference in new issue