zhangshu
9 months ago
48 changed files with 1339 additions and 0 deletions
Binary file not shown.
@ -0,0 +1,7 @@ |
|||
import json |
|||
from Orm import Channel |
|||
from playhouse.shortcuts import model_to_dict, dict_to_model |
|||
|
|||
class ChannelService: |
|||
def getOneByChannelId(channelId): |
|||
return Channel.get_or_none(Channel.channelId == channelId) |
@ -0,0 +1,2 @@ |
|||
db="" |
|||
logDir="" |
@ -0,0 +1,28 @@ |
|||
from Orm import DownloadInfo |
|||
|
|||
|
|||
class DownloadService: |
|||
def getOneByVideoId(videoId, downloadType): |
|||
return DownloadInfo.get(DownloadInfo.videoId == videoId, DownloadInfo.downloadType == downloadType) |
|||
|
|||
def createOne(videoId, downloadType, tryTime, isFinished): |
|||
DownloadInfo.create( |
|||
videoId=videoId, |
|||
downloadType=downloadType, |
|||
tryTime=tryTime, |
|||
isFinished=isFinished |
|||
) |
|||
|
|||
def updateInfoByVideoId(videoId, tryTime, isFinished, downloadType): |
|||
DownloadInfo.update(tryTime=tryTime, isFinished=isFinished).where( |
|||
DownloadInfo.videoId == videoId, DownloadInfo.downloadType == downloadType).execute() |
|||
|
|||
def findNotFinishList(): |
|||
return DownloadInfo.select().where(DownloadInfo.isFinished == 0, DownloadInfo.tryTime <= 5, DownloadInfo.downloadType == 1).limit(10).execute() |
|||
|
|||
def changeDownloadType(videoId, tryTime, isFinished, downloadType, changeType): |
|||
DownloadInfo.update(tryTime=tryTime, isFinished=isFinished, downloadType=changeType).where( |
|||
DownloadInfo.videoId == videoId, DownloadInfo.downloadType == downloadType).execute() |
|||
|
|||
def findNotFinishListTwo(): |
|||
return DownloadInfo.select().where(DownloadInfo.isFinished == 0, DownloadInfo.tryTime <= 5, DownloadInfo.downloadType == 2).limit(10).execute() |
@ -0,0 +1,173 @@ |
|||
from shutil import copyfile |
|||
from youtube_transcript_api import YouTubeTranscriptApi |
|||
from youtube_transcript_api.formatters import SRTFormatter |
|||
from VideoService import VideoService |
|||
from ChannelService import ChannelService |
|||
from DownloadInfoService import DownloadService |
|||
from LoggerUtils import Logger |
|||
import time |
|||
import os |
|||
from func_timeout import func_set_timeout |
|||
import operator |
|||
|
|||
|
|||
class DownLoadUtil: |
|||
|
|||
formatter = SRTFormatter() |
|||
proxies = {"http": "http://127.0.0.1:7890", |
|||
"https": "https://127.0.0.1:7890"} |
|||
|
|||
@func_set_timeout(60) |
|||
def downloadOne(videoId): |
|||
# 获取数据 |
|||
video = VideoService.getOneByVideoId(videoId) |
|||
channel = ChannelService.getOneByChannelId(str(video.channelId)) |
|||
# 格式化title |
|||
videoTitle = str(video.videoTitle) |
|||
videoTitle = str(videoTitle).replace("/", u"\u2215") |
|||
videoTitle = str(videoTitle).replace("?", "?") |
|||
videoTitle = str(videoTitle).replace("\\", "") |
|||
videoTitle = str(videoTitle).replace("|", "") |
|||
videoTitle = str(videoTitle).replace("<", "") |
|||
videoTitle = str(videoTitle).replace(">", "") |
|||
videoTitle = str(videoTitle).replace(":", "") |
|||
videoTitle = str(videoTitle).replace("में","") |
|||
# 获取发布时间 |
|||
videoPublishTime = str(video.videoPublishTime) |
|||
videoPublishTime = str(videoPublishTime).split("T")[0] |
|||
# 开始下载 |
|||
Logger.info("开始下载...{}".format(videoId)) |
|||
cpPath = "" |
|||
try: |
|||
# 获取字幕 |
|||
languages = str(video.videoLanguage) |
|||
storePath = "E:/code/python/srt_file/" + str(channel.channelTitle).rstrip() |
|||
cpPath = "E:/code/python/tmp_srt_file/" + str(channel.channelTitle).rstrip() |
|||
if not os.path.exists(storePath): |
|||
Logger.info("开始创建文件夹:" + storePath) |
|||
os.makedirs(storePath) |
|||
if not os.path.exists(cpPath): |
|||
Logger.info("开始创建文件夹:" + cpPath) |
|||
os.makedirs(cpPath) |
|||
storePath = storePath + "/" + videoPublishTime + \ |
|||
"-" + languages + "-" + videoTitle + ".srt" |
|||
cpPath = cpPath + "/" + videoPublishTime + \ |
|||
"-" + languages + "-" + videoTitle + ".srt" |
|||
if len(cpPath) > 120: |
|||
storePath = storePath[:-20] + ".srt" |
|||
cpPath = cpPath[:-20] + ".srt" |
|||
videoSrt = YouTubeTranscriptApi.get_transcript( |
|||
videoId, languages=[languages]) |
|||
srt_formatted = DownLoadUtil.formatter.format_transcript(videoSrt) |
|||
Logger.info("文件地址...{}".format(storePath)) |
|||
with open(storePath, 'w', encoding='utf-8') as srt_file: |
|||
srt_file.write(srt_formatted) |
|||
Logger.info("下载完成...{}".format(videoId)) |
|||
copyfile(storePath, cpPath) |
|||
# 修改video数据 |
|||
VideoService.updateIsDownloadByVideoId(videoId, 1) |
|||
# 修改downloadInfo |
|||
downloadInfo = DownloadService.getOneByVideoId(videoId, 1) |
|||
if downloadInfo is not None: |
|||
DownloadService.updateInfoByVideoId( |
|||
videoId, downloadInfo.tryTime + 1, 1, 1) |
|||
except Exception as e: |
|||
Logger.error("下载失败...{}".format(videoId)) |
|||
logStr = "Exception...{}".format(e) |
|||
Logger.error(logStr) |
|||
downloadInfo = DownloadService.getOneByVideoId(videoId, 1) |
|||
if operator.contains(logStr, "No transcripts"): |
|||
Logger.error("VideoId:{},不存在字幕文件".format(videoId)) |
|||
if downloadInfo is not None: |
|||
DownloadService.changeDownloadType( |
|||
videoId, 0, 0, 1, 2) |
|||
elif operator.contains(logStr, "File name too long"): |
|||
# 文件名过长 |
|||
languages = str(video.videoLanguage) |
|||
videoSrt = YouTubeTranscriptApi.get_transcript( |
|||
videoId, languages=[languages]) |
|||
srt_formatted = DownLoadUtil.formatter.format_transcript(videoSrt) |
|||
storePath = "E:/code/python/srt_file" + str(channel.channelTitle) + "/" + \ |
|||
videoPublishTime + "-" + languages + "-" + videoId + ".srt" |
|||
cpPath = "E:/code/python/tmp_srt_file/" + str(channel.channelTitle) + "/" + \ |
|||
videoPublishTime + "-" + languages + "-" + videoId + ".srt" |
|||
if len(cpPath) > 120: |
|||
storePath = storePath[:-20] + ".srt" |
|||
cpPath = cpPath[:-20] + ".srt" |
|||
Logger.info("文件名过长,文件地址...{}".format(storePath)) |
|||
with open(storePath, 'w', encoding='utf-8') as srt_file: |
|||
srt_file.write(srt_formatted) |
|||
Logger.info("下载完成...{}".format(videoId)) |
|||
copyfile(storePath, cpPath) |
|||
# 修改video数据 |
|||
VideoService.updateIsDownloadByVideoId(videoId, 1) |
|||
# 修改downloadInfo |
|||
downloadInfo = DownloadService.getOneByVideoId(videoId, 1) |
|||
if downloadInfo is not None: |
|||
DownloadService.updateInfoByVideoId( |
|||
videoId, downloadInfo.tryTime + 1, 1, 1) |
|||
else: |
|||
if downloadInfo is not None: |
|||
Logger.info("VideoId:{}开始重试第{}次".format( |
|||
videoId, downloadInfo.tryTime + 1)) |
|||
DownloadService.updateInfoByVideoId( |
|||
videoId, downloadInfo.tryTime + 1, 0, 1) |
|||
|
|||
@func_set_timeout(60) |
|||
def downloadTwo(videoId): |
|||
# 获取数据 |
|||
video = VideoService.getOneByVideoId(videoId, 2) |
|||
channel = ChannelService.getOneByChannelId(str(video.channelId)) |
|||
# 格式化title |
|||
videoTitle = str(video.videoTitle) |
|||
videoTitle = str(videoTitle).replace("/", u"\u2215") |
|||
videoTitle = str(videoTitle).replace("?", "?") |
|||
videoTitle = str(videoTitle).replace("\\", "") |
|||
videoTitle = str(videoTitle).replace("|", "") |
|||
videoTitle = str(videoTitle).replace("<", "") |
|||
videoTitle = str(videoTitle).replace(">", "") |
|||
videoTitle = str(videoTitle).replace(":", "") |
|||
# 获取发布时间 |
|||
videoPublishTime = str(video.videoPublishTime) |
|||
videoPublishTime = str(videoPublishTime).split("T")[0] |
|||
# 开始下载 |
|||
Logger.info("开始下载...{}".format(videoId)) |
|||
try: |
|||
# 获取字幕 |
|||
languages = str(video.videoLanguage) |
|||
storePath = "./download/" + str(channel.channelTitle) |
|||
if not os.path.exists(storePath): |
|||
Logger.info("开始创建文件夹:" + storePath) |
|||
os.makedirs(storePath) |
|||
storePath = storePath + "\\" + videoPublishTime + \ |
|||
"-" + languages + "-" + videoTitle + ".srt" |
|||
videoSrt = YouTubeTranscriptApi.get_transcript( |
|||
videoId, languages=[languages]) |
|||
srt_formatted = DownLoadUtil.formatter.format_transcript(videoSrt) |
|||
Logger.info("文件地址...{}".format(storePath)) |
|||
with open(storePath, 'w', encoding='utf-8') as srt_file: |
|||
srt_file.write(srt_formatted) |
|||
Logger.info("下载完成...{}".format(videoId)) |
|||
# 修改video数据 |
|||
VideoService.updateIsDownloadByVideoId(videoId, 1) |
|||
# 修改downloadInfo |
|||
downloadInfo = DownloadService.getOneByVideoId(videoId, 2) |
|||
if downloadInfo is not None: |
|||
DownloadService.updateInfoByVideoId( |
|||
videoId, downloadInfo.tryTime + 1, 1, 2) |
|||
except Exception as e: |
|||
Logger.error("下载失败...{}".format(videoId)) |
|||
logStr = "Exception...{}".format(e) |
|||
Logger.error(logStr) |
|||
downloadInfo = DownloadService.getOneByVideoId(videoId, 2) |
|||
if operator.contains(logStr, "No transcripts"): |
|||
Logger.error("VideoId:{},不存在字幕文件".format(videoId)) |
|||
if downloadInfo is not None: |
|||
DownloadService.changeDownloadType( |
|||
videoId, 6, 0, 2, 3) |
|||
else: |
|||
if downloadInfo is not None: |
|||
Logger.info("VideoId:{}开始重试第{}次".format( |
|||
videoId, downloadInfo.tryTime + 1)) |
|||
DownloadService.updateInfoByVideoId( |
|||
videoId, downloadInfo.tryTime + 1, 0, 2) |
@ -0,0 +1,6 @@ |
|||
from loguru import logger |
|||
import Contant |
|||
Logger = logger |
|||
def initLogger(): |
|||
logger.add(Contant.logDir+"/download_{time}.log", rotation="500MB", encoding="utf-8", |
|||
enqueue=True, compression="zip", retention="10 days") |
@ -0,0 +1,65 @@ |
|||
from peewee import * |
|||
import Contant |
|||
import argparse |
|||
from LoggerUtils import Logger |
|||
|
|||
parser = argparse.ArgumentParser(description='') |
|||
parser.add_argument('--db', type=str, default='') |
|||
parser.add_argument('--logDir', type=str, default='') |
|||
args = parser.parse_args() |
|||
Contant.db = args.db |
|||
db = SqliteDatabase(Contant.db) |
|||
|
|||
|
|||
def ormInit(): |
|||
Channel.create_table() |
|||
Video.create_table() |
|||
DownloadInfo.create_table() |
|||
|
|||
|
|||
class BaseModel(Model): |
|||
class Meta: |
|||
database = db |
|||
|
|||
# 频道信息 |
|||
|
|||
|
|||
class Channel(BaseModel): |
|||
id = PrimaryKeyField() |
|||
channelId = CharField(null=False) |
|||
channelTitle = CharField(null=False) |
|||
channelLanguage = CharField() |
|||
channelReptileTime = CharField(null=True) |
|||
|
|||
class Meta: |
|||
db_table = 'Channel' |
|||
|
|||
# 视频信息 |
|||
|
|||
|
|||
class Video(BaseModel): |
|||
id = PrimaryKeyField() |
|||
videoId = CharField(null=False) |
|||
channelId = CharField(null=False) |
|||
videoTitle = CharField() |
|||
videoLen = IntegerField() |
|||
videoType = CharField() |
|||
videoPublishTime = CharField() |
|||
videoLanguage = CharField() |
|||
isDownload = IntegerField() |
|||
|
|||
class Meta: |
|||
db_table = 'Vidoes' |
|||
|
|||
# 下载信息 |
|||
|
|||
|
|||
class DownloadInfo(BaseModel): |
|||
id = PrimaryKeyField() |
|||
videoId = CharField() |
|||
downloadType = IntegerField() |
|||
tryTime = IntegerField() |
|||
isFinished = IntegerField() |
|||
|
|||
class Meta: |
|||
db_table = 'Download_info' |
@ -0,0 +1,26 @@ |
|||
import json |
|||
from Orm import Video |
|||
from playhouse.shortcuts import model_to_dict, dict_to_model |
|||
|
|||
|
|||
class VideoService: |
|||
def getOneByVideoId(videoId): |
|||
return Video.get_or_none(Video.videoId == videoId) |
|||
|
|||
def createOne(videoId, channelId, videoTitle, videoLen, videoType, videoPublishTime, videoLanguage, isDownload): |
|||
Video.create(videoId=videoId, |
|||
channelId=channelId, |
|||
videoTitle=videoTitle, |
|||
videoLen=videoLen, |
|||
videoType=videoType, |
|||
videoPublishTime=videoPublishTime, |
|||
videoLanguage=videoLanguage, |
|||
isDownload=isDownload) |
|||
|
|||
def updateLenByVideoId(videoId, len): |
|||
Video.update(videoLen=len).where(Video.videoId == videoId).execute() |
|||
|
|||
def updateIsDownloadByVideoId(videoId, isDownload): |
|||
Video.update(isDownload=isDownload).where( |
|||
Video.videoId == videoId).execute() |
|||
|
Binary file not shown.
@ -0,0 +1,49 @@ |
|||
import argparse |
|||
import random |
|||
import time |
|||
import Contant |
|||
from LoggerUtils import Logger, initLogger |
|||
import Orm |
|||
from VideoService import VideoService |
|||
from ChannelService import ChannelService |
|||
from DownloadInfoService import DownloadService |
|||
from DownloadUtil import DownLoadUtil |
|||
from func_timeout import func_set_timeout |
|||
import func_timeout |
|||
import requests |
|||
|
|||
# py ./main_download.py --db="../db/youtube_prod.db" --logDir="./logs" |
|||
if __name__ == "__main__": |
|||
parser = argparse.ArgumentParser(description='') |
|||
parser.add_argument('--db', type=str, default='') |
|||
parser.add_argument('--logDir', type=str, default='') |
|||
args = parser.parse_args() |
|||
Contant.db = args.db |
|||
Contant.logDir = args.logDir |
|||
initLogger() |
|||
Orm.ormInit() |
|||
list = DownloadService.findNotFinishList() |
|||
Logger.info("list size:{}".format(len(list))) |
|||
while (len(list) > 0): |
|||
for info in list: |
|||
try: |
|||
DownLoadUtil.downloadOne(info.videoId) |
|||
restTime = random.randint(1, 3) |
|||
Logger.info("间隔{}秒后继续...".format(restTime)) |
|||
time.sleep(restTime) |
|||
except func_timeout.exceptions.FunctionTimedOut as e: |
|||
Logger.error("执行下载方法超时错误:{}".format(e)) |
|||
loopRestTime = random.randint(1, 3) |
|||
Logger.info("循环间隔{}秒后继续...".format(loopRestTime)) |
|||
time.sleep(loopRestTime) |
|||
list = DownloadService.findNotFinishList() |
|||
# 发送钉钉消息 |
|||
# webhook = "https://oapi.dingtalk.com/robot/send?access_token=c8c8d7d42c4eecd449dd303025ef968f647d1d8e8694e3fabc0ab5770d646dcb" |
|||
# jsonData = { |
|||
# "msgtype": "text", |
|||
# "text": { |
|||
# "content": "[Youtube]download finished" |
|||
# } |
|||
# } |
|||
# requests.post(webhook, json=jsonData) |
|||
# Logger.info("download发送钉钉消息成功...") |
@ -0,0 +1,2 @@ |
|||
db="" |
|||
logDir="" |
@ -0,0 +1,6 @@ |
|||
from loguru import logger |
|||
import Contant |
|||
Logger = logger |
|||
def initLogger(): |
|||
logger.add(Contant.logDir+"/init_{time}.log", rotation="500MB", encoding="utf-8", |
|||
enqueue=True, compression="zip", retention="10 days") |
@ -0,0 +1,65 @@ |
|||
from peewee import * |
|||
import Contant |
|||
import argparse |
|||
from LoggerUtils import Logger |
|||
|
|||
parser = argparse.ArgumentParser(description='') |
|||
parser.add_argument('--db', type=str, default='') |
|||
parser.add_argument('--logDir', type=str, default='') |
|||
args = parser.parse_args() |
|||
Contant.db = args.db |
|||
db = SqliteDatabase(Contant.db) |
|||
|
|||
|
|||
def ormInit(): |
|||
Channel.create_table() |
|||
Vidoe.create_table() |
|||
DownloadInfo.create_table() |
|||
|
|||
|
|||
class BaseModel(Model): |
|||
class Meta: |
|||
database = db |
|||
|
|||
# 频道信息 |
|||
|
|||
|
|||
class Channel(BaseModel): |
|||
id = PrimaryKeyField() |
|||
channelId = CharField(null=False) |
|||
channelTitle = CharField(null=False) |
|||
channelLanguage = CharField() |
|||
channelReptileTime = CharField(null=True) |
|||
|
|||
class Meta: |
|||
db_table = 'Channel' |
|||
|
|||
# 视频信息 |
|||
|
|||
|
|||
class Vidoe(BaseModel): |
|||
id = PrimaryKeyField() |
|||
videoId = CharField(null=False) |
|||
channelId = CharField(null=False) |
|||
videoTitle = CharField() |
|||
videoLen = IntegerField() |
|||
videoType = CharField() |
|||
videoPublishTime = CharField() |
|||
videoLanguage = CharField() |
|||
isDownload = IntegerField() |
|||
|
|||
class Meta: |
|||
db_table = 'Vidoes' |
|||
|
|||
# 下载信息 |
|||
|
|||
|
|||
class DownloadInfo(BaseModel): |
|||
id = PrimaryKeyField() |
|||
videoId = CharField() |
|||
downloadType = IntegerField() |
|||
tryTime = IntegerField() |
|||
isFinished = IntegerField() |
|||
|
|||
class Meta: |
|||
db_table = 'Download_info' |
@ -0,0 +1,62 @@ |
|||
import time |
|||
from LoggerUtils import Logger, initLogger |
|||
import argparse |
|||
import Contant |
|||
from Orm import ormInit, Channel |
|||
import operator |
|||
from bs4 import BeautifulSoup as bs |
|||
from urllib.request import urlopen, Request |
|||
|
|||
# py .\init.py --db=../db/youtube_prod.db --logDir=./logs |
|||
|
|||
|
|||
def saveChannel(channelUrl, language): |
|||
Logger.info("频道链接:"+channelUrl) |
|||
channelId = "" |
|||
channelName = "" |
|||
url_opener = urlopen( |
|||
Request(channelUrl, headers={'User-Agent': 'Mozilla'})) |
|||
videoInfo = bs(url_opener, features="html.parser") |
|||
links = videoInfo.find_all("link") |
|||
for link in links: |
|||
if operator.contains(str(link), "canonical"): |
|||
channelId = str(link['href']).split("/channel/")[1] |
|||
if operator.contains(str(link), "content="): |
|||
channelName = str(link['content']) |
|||
Logger.info("channelId:"+channelId) |
|||
Logger.info("channelName:"+channelName) |
|||
channel = Channel.get_or_none(Channel.channelId == channelId) |
|||
if channel != None: |
|||
Logger.info("频道已存在:" + channelId) |
|||
return |
|||
Channel.create(channelTitle=channelName, |
|||
channelId=channelId, channelLanguage=language) |
|||
|
|||
|
|||
if __name__ == "__main__": |
|||
parser = argparse.ArgumentParser(description='') |
|||
parser.add_argument('--db', type=str, default='') |
|||
parser.add_argument('--logDir', type=str, default='') |
|||
args = parser.parse_args() |
|||
Contant.db = args.db |
|||
Contant.logDir = args.logDir |
|||
initLogger() |
|||
ormInit() |
|||
Logger.info("SqlLite存放地址:"+Contant.db) |
|||
Logger.info("日志文件存放地址:"+Contant.logDir) |
|||
Logger.info("开始初始化...") |
|||
# checkInit() |
|||
# 读取txt文件获取需要的频道地址 |
|||
Logger.info("开始读取需要新增的频道地址...") |
|||
urlList = [] |
|||
# 打开文件 |
|||
for line in open("urlList.txt"): |
|||
line = line.strip('\n') |
|||
urlList.append(line) |
|||
# language = urlList[0] |
|||
for url_str in urlList: |
|||
if len(url_str) > 10: |
|||
url = url_str.split(" ")[0] |
|||
language = url_str.split(" ")[1] |
|||
Logger.info("url:{} ,language:{}", url, language) |
|||
saveChannel(url, language) |
@ -0,0 +1,2 @@ |
|||
https://www.youtube.com/@easymoney380 en |
|||
https://www.youtube.com/@Groww en |
@ -0,0 +1,3 @@ |
|||
en |
|||
https://www.youtube.com/@easymoney380 |
|||
https://www.youtube.com/@Groww |
@ -0,0 +1,13 @@ |
|||
hi |
|||
https://www.youtube.com/@goela |
|||
https://www.youtube.com/@GoelaSchoolofFinanceShorts/featured |
|||
https://www.youtube.com/@InvestYadnya |
|||
https://www.youtube.com/@NDTVProfitIndia |
|||
https://www.youtube.com/@Neerajjoshi/featured |
|||
https://www.youtube.com/@thehimanichaudhary |
|||
https://www.youtube.com/@ADigitalBlogger |
|||
https://www.youtube.com/@stockburnerofficial |
|||
https://www.youtube.com/@nehanagar |
|||
https://www.youtube.com/@easymoney380 |
|||
https://www.youtube.com/@madhurokade |
|||
https://www.youtube.com/@stockmartpro |
@ -0,0 +1,7 @@ |
|||
ja |
|||
https://www.youtube.com/@ryogakucho |
|||
https://www.youtube.com/@DanTakahashi1 |
|||
https://www.youtube.com/@buffett_taro |
|||
https://www.youtube.com/@Tsubame104 |
|||
https://www.youtube.com/@inc_academy |
|||
https://www.youtube.com/@kamioka01 |
@ -0,0 +1 @@ |
|||
logDir="" |
@ -0,0 +1,6 @@ |
|||
from loguru import logger |
|||
import Contant |
|||
Logger = logger |
|||
def initLogger(): |
|||
logger.add(Contant.logDir+"/sftp_{time}.log", rotation="500MB", encoding="utf-8", |
|||
enqueue=True, compression="zip", retention="10 days") |
@ -0,0 +1,97 @@ |
|||
import os |
|||
import shutil |
|||
import paramiko |
|||
import argparse |
|||
import Contant |
|||
from LoggerUtils import Logger, initLogger |
|||
import configparser |
|||
import requests |
|||
import time |
|||
|
|||
# python3 sftp.py --local="/mnt/tmp_srt_file" --logDir="./logs" |
|||
# python3 sftp.py --local="/mnt/test_file" --logDir="./logs" |
|||
if __name__ == "__main__": |
|||
# 读取参数 |
|||
parser = argparse.ArgumentParser(description="") |
|||
parser.add_argument("--local", type=str, default="") |
|||
parser.add_argument('--logDir', type=str, default='') |
|||
args = parser.parse_args() |
|||
Contant.logDir = args.logDir |
|||
initLogger() |
|||
|
|||
# 读取配置文件 |
|||
config = configparser.ConfigParser() |
|||
config.read('sftp_config.ini') |
|||
|
|||
# 获取SFTP配置信息 |
|||
hostname = config.get('sftp_config', 'hostname') |
|||
port = config.getint('sftp_config', 'port') |
|||
username = config.get('sftp_config', 'username') |
|||
password = config.get('sftp_config', 'password') |
|||
|
|||
Logger.info("host:{},port:{},username:{},password:{}".format( |
|||
hostname, port, username, password)) |
|||
|
|||
ssh_client = paramiko.SSHClient() |
|||
ssh_client.set_missing_host_key_policy(paramiko.AutoAddPolicy()) |
|||
sftp_client = None # 设置默认值 |
|||
ssh_client.connect(hostname, port, username, password) |
|||
# 创建SFTP客户端 |
|||
sftp_client = ssh_client.open_sftp() |
|||
Logger.info("SFTP客户端已经建立:{}".format(sftp_client)) |
|||
|
|||
remote_root = "/Inbound/YouTube Captions" |
|||
local_root = args.local |
|||
Logger.info("remote_root:{},local_root:{}".format(remote_root, local_root)) |
|||
|
|||
names = os.listdir(local_root) |
|||
for name in names: |
|||
# sftp创建文件夹 |
|||
try: |
|||
sftp_client.chdir(remote_root + "/" + name) |
|||
except BaseException: |
|||
sftp_client.mkdir(remote_root + "/" + name) |
|||
sftp_client.chdir(remote_root + "/" + name) |
|||
|
|||
# 遍历本地临时文件夹 |
|||
srtList = os.listdir(local_root + "/" + name) |
|||
for srt in srtList: |
|||
# 获取远程文件路径以及本地文件路径 |
|||
remotePath = remote_root + "/" + name + "/" + srt |
|||
localPath = local_root + "/" + name + "/" + srt |
|||
# 如果远程文件存在,则进行删除 |
|||
try: |
|||
sftp_client.stat(remotePath) |
|||
# 如果文件存在,删除它 |
|||
sftp_client.remove(remotePath) |
|||
Logger.info("Remote file '{}' deleted.".format(remotePath)) |
|||
except FileNotFoundError: |
|||
Logger.info("Remote file '{}' not found.".format(remotePath)) |
|||
# 上传本地文件 |
|||
try: |
|||
# 判断远程地址长度,过长需要截取一部分 |
|||
if len(remotePath) > 120: |
|||
remotePath = remotePath[:-20] + ".srt" |
|||
# 判断本地文件是否存在,存在则上传 |
|||
if os.path.exists(localPath): |
|||
Logger.info("本地文件 '{}' 存在,开始上传.".format(localPath)) |
|||
sftp_client.put(localPath, remotePath, confirm=False) |
|||
os.remove(localPath) |
|||
else: |
|||
Logger.info("本地文件 '{}' 不存在,无法上传.".format(localPath)) |
|||
except Exception as e: |
|||
Logger.info("上传失败 '{}' 文件名长度{}".format( |
|||
remotePath, len(remotePath))) |
|||
Logger.error(e) |
|||
sftp_client.close() |
|||
sftp_client = ssh_client.open_sftp() |
|||
# 发送钉钉消息 |
|||
webhook = "https://oapi.dingtalk.com/robot/send?access_token=c8c8d7d42c4eecd449dd303025ef968f647d1d8e8694e3fabc0ab5770d646dcb" |
|||
jsonData = { |
|||
"msgtype": "text", |
|||
"text": { |
|||
"content": "[Youtube]sftp finished" |
|||
} |
|||
} |
|||
requests.post(webhook, json=jsonData) |
|||
Logger.info("sftp发送钉钉消息成功...") |
@ -0,0 +1,5 @@ |
|||
[sftp_config] |
|||
hostname = filetransfer.blackrock.com |
|||
port = 22 |
|||
username = ftp_yunbo |
|||
password = s8v{8SJr |
@ -0,0 +1,15 @@ |
|||
import json |
|||
from Orm import Channel |
|||
from playhouse.shortcuts import model_to_dict, dict_to_model |
|||
|
|||
|
|||
class ChannelService: |
|||
def getOneByChannelId(channelId): |
|||
return Channel.get_or_none(Channel.channelId == channelId) |
|||
|
|||
def updateTimeByChannelId(channelId, chageTime): |
|||
Channel.update(channelReptileTime=chageTime).where( |
|||
Channel.channelId == channelId).execute() |
|||
|
|||
def getChannelList(): |
|||
return Channel.select().execute() |
@ -0,0 +1,4 @@ |
|||
db="" |
|||
logDir="" |
|||
startTime="" |
|||
endTime="" |
@ -0,0 +1,16 @@ |
|||
from Orm import DownloadInfo |
|||
|
|||
|
|||
class DownloadService: |
|||
|
|||
def createOne(videoId, downloadType, tryTime, isFinished): |
|||
DownloadInfo.create( |
|||
videoId=videoId, |
|||
downloadType=downloadType, |
|||
tryTime=tryTime, |
|||
isFinished=isFinished |
|||
) |
|||
|
|||
def updateInfoByVideoId(videoId, tryTime, isFinished): |
|||
DownloadInfo.update(tryTime=tryTime, isFinished=isFinished).where( |
|||
DownloadInfo.videoId == videoId).execute() |
@ -0,0 +1,6 @@ |
|||
from loguru import logger |
|||
import Contant |
|||
Logger = logger |
|||
def initLogger(): |
|||
logger.add(Contant.logDir+"/main_{time}.log", rotation="500MB", encoding="utf-8", |
|||
enqueue=True, compression="zip", retention="10 days") |
@ -0,0 +1,67 @@ |
|||
from peewee import * |
|||
import Contant |
|||
import argparse |
|||
from LoggerUtils import Logger |
|||
|
|||
parser = argparse.ArgumentParser(description='') |
|||
parser.add_argument('--db', type=str, default='') |
|||
parser.add_argument('--logDir', type=str, default='') |
|||
parser.add_argument("--start", type=str, default="") |
|||
parser.add_argument("--end", type=str, default="") |
|||
args = parser.parse_args() |
|||
Contant.db = args.db |
|||
db = SqliteDatabase(Contant.db) |
|||
|
|||
|
|||
def ormInit(): |
|||
Channel.create_table() |
|||
Video.create_table() |
|||
DownloadInfo.create_table() |
|||
|
|||
|
|||
class BaseModel(Model): |
|||
class Meta: |
|||
database = db |
|||
|
|||
# 频道信息 |
|||
|
|||
|
|||
class Channel(BaseModel): |
|||
id = PrimaryKeyField() |
|||
channelId = CharField(null=False) |
|||
channelTitle = CharField(null=False) |
|||
channelLanguage = CharField() |
|||
channelReptileTime = CharField(null=True) |
|||
|
|||
class Meta: |
|||
db_table = 'Channel' |
|||
|
|||
# 视频信息 |
|||
|
|||
|
|||
class Video(BaseModel): |
|||
id = PrimaryKeyField() |
|||
videoId = CharField(null=False) |
|||
channelId = CharField(null=False) |
|||
videoTitle = CharField() |
|||
videoLen = IntegerField() |
|||
videoType = CharField() |
|||
videoPublishTime = CharField() |
|||
videoLanguage = CharField() |
|||
isDownload = IntegerField() |
|||
|
|||
class Meta: |
|||
db_table = 'Vidoes' |
|||
|
|||
# 下载信息 |
|||
|
|||
|
|||
class DownloadInfo(BaseModel): |
|||
id = PrimaryKeyField() |
|||
videoId = CharField() |
|||
downloadType = IntegerField() |
|||
tryTime = IntegerField() |
|||
isFinished = IntegerField() |
|||
|
|||
class Meta: |
|||
db_table = 'Download_info' |
@ -0,0 +1,31 @@ |
|||
import json |
|||
from Orm import Video |
|||
from playhouse.shortcuts import model_to_dict, dict_to_model |
|||
|
|||
|
|||
class VideoService: |
|||
def getOneByVideoId(videoId): |
|||
return Video.get_or_none(Video.videoId == videoId) |
|||
|
|||
def createOne(videoId, channelId, videoTitle, videoLen, videoType, videoPublishTime, videoLanguage, isDownload): |
|||
Video.create(videoId=videoId, |
|||
channelId=channelId, |
|||
videoTitle=videoTitle, |
|||
videoLen=videoLen, |
|||
videoType=videoType, |
|||
videoPublishTime=videoPublishTime, |
|||
videoLanguage=videoLanguage, |
|||
isDownload=isDownload) |
|||
|
|||
def updateLenByVideoId(videoId, len): |
|||
Video.update(videoLen=len).where(Video.videoId == videoId).execute() |
|||
|
|||
def getLastVideoByChannelId(channelId): |
|||
return Video.select().where(Video.channelId == channelId).order_by(Video.videoPublishTime.desc()).get() |
|||
|
|||
def getFirstVideoByChannelId(channelId): |
|||
return Video.select().where(Video.channelId == channelId).order_by(Video.videoPublishTime).get() |
|||
|
|||
def checkExist(channelId): |
|||
query = Video.select().where(Video.channelId == channelId) |
|||
return query.exists() |
@ -0,0 +1,164 @@ |
|||
import httplib2 |
|||
import googleapiclient.discovery |
|||
import googleapiclient.errors |
|||
from VideoService import VideoService |
|||
from ChannelService import ChannelService |
|||
from DownloadInfoService import DownloadService |
|||
from LoggerUtils import Logger |
|||
import operator |
|||
import time |
|||
import random |
|||
|
|||
|
|||
class YouTubeUtil: |
|||
# apiKeys = ["AIzaSyDlRgmPXVQEjF2gbmomI5FUZX_uAOBmEGI", "AIzaSyBI5i5vFZpQErMnEXKMf0VUS2Bel8jGrTk", |
|||
# "AIzaSyAnmA0Ggy1yXsZZACfItmeZAa7wcmh6SbM", "AIzaSyC4O8tBoAfkupmBybxDah2JUxgj4ct5uk0", |
|||
# "AIzaSyDJ2S9Ijhw_hULx3nHvPUoGUpMENbZOIl8", "AIzaSyA87Ckpna3hOQ31nISs8V8rp--OLw0m6Aw", |
|||
# "AIzaSyDIWbV0EOLHkOr9tWpANose6ggd2r9vcLg", "AIzaSyBKE3lYwWFIYc9Vx4YKMbRpkOXigZlY52U"] |
|||
# apiKeys = [ |
|||
|
|||
# "AIzaSyDJIKVldjWVeRSt3IBPAgredZsvldUDPhA", |
|||
# "AIzaSyChPXesnVx6fweon_BckhR6UiJWvi5Ma4s", |
|||
# "AIzaSyBI5i5vFZpQErMnEXKMf0VUS2Bel8jGrTk", |
|||
# "AIzaSyAnmA0Ggy1yXsZZACfItmeZAa7wcmh6SbM" |
|||
# ] |
|||
apiKeys = [] |
|||
apiIndex = 0 |
|||
|
|||
def getYoutube(): |
|||
proxy_info = httplib2.ProxyInfo( |
|||
proxy_type=httplib2.socks.PROXY_TYPE_HTTP, proxy_host="127.0.0.1", proxy_port=7890) |
|||
http = httplib2.Http(timeout=10, proxy_info=proxy_info, |
|||
disable_ssl_certificate_validation=False) |
|||
# http = httplib2.Http(timeout=10, disable_ssl_certificate_validation=False) |
|||
api_service_name = "youtube" |
|||
api_version = "v3" |
|||
# 获取apiKey |
|||
with open("api_key.txt", 'r') as file: |
|||
YouTubeUtil.apiKeys = file.readlines() |
|||
YouTubeUtil.apiIndex = random.randint(0, len(YouTubeUtil.apiKeys)-1) |
|||
apiKey = YouTubeUtil.apiKeys[YouTubeUtil.apiIndex].strip("\n") |
|||
Logger.info("当前APIKey:{},当前apiIndex:{}", apiKey, YouTubeUtil.apiIndex) |
|||
# apiKey = YouTubeUtil.apiKeys[YouTubeUtil.apiIndex] |
|||
# Logger.info( |
|||
# "当前APIKey:{},当前apiIndex:{},totalIndex:{}".format( |
|||
# apiKey, YouTubeUtil.apiIndex, len(YouTubeUtil.apiKeys) - 1 |
|||
# ) |
|||
# ) |
|||
# # 等于7,还原成0 |
|||
# if YouTubeUtil.apiIndex == (len(YouTubeUtil.apiKeys) - 1): |
|||
# YouTubeUtil.apiIndex = 0 |
|||
# else: |
|||
# YouTubeUtil.apiIndex = YouTubeUtil.apiIndex + 1 |
|||
|
|||
# 获取对象 |
|||
youtube = googleapiclient.discovery.build( |
|||
api_service_name, api_version, developerKey=apiKey, http=http |
|||
) |
|||
return youtube |
|||
|
|||
def getVidoeLen(videoIds): |
|||
youtube = YouTubeUtil.getYoutube() |
|||
request = youtube.videos().list(part="contentDetails", id=videoIds) |
|||
response = request.execute() |
|||
response["items"][0]["contentDetails"] |
|||
return response |
|||
|
|||
def getVideoLenByStr(str): |
|||
len = 0 |
|||
str = str.split("PT")[1] |
|||
if operator.contains(str, "H"): |
|||
H = str.split("H")[0] |
|||
len = len + int(H) * 3600 |
|||
str = str.split("H")[1] |
|||
if operator.contains(str, "M"): |
|||
M = str.split("M")[0] |
|||
len = len + int(M) * 60 |
|||
str = str.split("M")[1] |
|||
if operator.contains(str, "S"): |
|||
S = str.split("S")[0] |
|||
len = len + int(S) |
|||
return len |
|||
|
|||
def getByChannelId(channelId, startTime, endTime): |
|||
channel = ChannelService.getOneByChannelId(channelId) |
|||
if channel == None: |
|||
return |
|||
videoLanguage = str(channel.channelLanguage) |
|||
youtube = YouTubeUtil.getYoutube() |
|||
request = youtube.search().list( |
|||
part="snippet", |
|||
channelId=channelId, |
|||
maxResults=50, |
|||
order="date", |
|||
publishedAfter=startTime, |
|||
publishedBefore=endTime, |
|||
type="video", |
|||
) |
|||
response = request.execute() |
|||
while True: |
|||
videosRequest = "" |
|||
videosRequestCount = 0 |
|||
idList = [] |
|||
for i in response["items"]: |
|||
try: |
|||
videoId = i["id"]["videoId"] |
|||
publisTime = i["snippet"]["publishedAt"] |
|||
videoTitle = i["snippet"]["title"] |
|||
videoType = "video" |
|||
videoEntity = VideoService.getOneByVideoId(str(videoId)) |
|||
if videoEntity == None: |
|||
VideoService.createOne( |
|||
videoId, |
|||
channelId, |
|||
videoTitle, |
|||
0, |
|||
videoType, |
|||
publisTime, |
|||
videoLanguage, |
|||
0, |
|||
) |
|||
DownloadService.createOne(videoId, 1, 0, 0) |
|||
videosRequest = videosRequest + "," + str(videoId) |
|||
videosRequestCount = videosRequestCount + 1 |
|||
Logger.info( |
|||
"存储VideoUrl:https://www.youtube.com/watch?v=" + videoId |
|||
) |
|||
else: |
|||
Logger.info("已存在VideoId:{}".format(videoId)) |
|||
idList.append(str(videoId)) |
|||
if videosRequest != "" and videosRequestCount >= 10: |
|||
lenRes = YouTubeUtil.getVidoeLen(videosRequest) |
|||
for i in lenRes["items"]: |
|||
tmpId = i["id"] |
|||
videoLenStr = i["contentDetails"]["duration"] |
|||
videoLen = YouTubeUtil.getVideoLenByStr(videoLenStr) |
|||
VideoService.updateLenByVideoId(tmpId, videoLen) |
|||
Logger.info( |
|||
"更新时长,videoId:{},len:{}".format(tmpId, videoLen) |
|||
) |
|||
videosRequestCount = 0 |
|||
videosRequest = "" |
|||
except: |
|||
pass |
|||
# 获取最后一个视频 |
|||
vidoeo = VideoService.getLastVideoByChannelId(channelId) |
|||
ChannelService.updateTimeByChannelId(channelId, vidoeo.videoPublishTime) |
|||
time.sleep(5) |
|||
try: |
|||
# youtube = YouTubeUtil.getYoutube |
|||
request = youtube.search().list( |
|||
part="snippet", |
|||
channelId=channelId, |
|||
maxResults=50, |
|||
order="date", |
|||
publishedAfter=startTime, |
|||
publishedBefore=endTime, |
|||
type="video", |
|||
pageToken=response["nextPageToken"], |
|||
) |
|||
response = request.execute() |
|||
except Exception as e: |
|||
Logger.error(e) |
|||
print("no nextPageToken") |
|||
break |
@ -0,0 +1,4 @@ |
|||
AIzaSyDJIKVldjWVeRSt3IBPAgredZsvldUDPhA |
|||
AIzaSyChPXesnVx6fweon_BckhR6UiJWvi5Ma4s |
|||
AIzaSyBI5i5vFZpQErMnEXKMf0VUS2Bel8jGrTk |
|||
AIzaSyAnmA0Ggy1yXsZZACfItmeZAa7wcmh6SbM |
@ -0,0 +1 @@ |
|||
UCCLu5B_Ctsw4N20DJvDykOA 1 |
@ -0,0 +1,70 @@ |
|||
import argparse |
|||
import time |
|||
import random |
|||
import Contant |
|||
import LoggerUtils |
|||
import Orm |
|||
from VideoService import VideoService |
|||
from YouTubeUtils import YouTubeUtil |
|||
from ChannelService import ChannelService |
|||
import requests |
|||
|
|||
# py .\main.py --db=../db/youtube_prod.db --logDir=./logs --start="2021-03-14T00:00:01Z" --end="2024-03-14T00:00:01Z" |
|||
# py .\main.py --db=../db/youtube_prod.db --logDir=./logs --start="2024-03-14T00:00:01Z" --end="2024-04-25T00:00:01Z" |
|||
# python3 ./main.py --db="../db/youtube_prod.db" --logDir="./logs" --start="2023-08-10T00:00:01Z" --end="2023-09-12T00:00:01Z" |
|||
# python3 ./main.py --db="../db/youtube_prod.db" --logDir="./logs" --start="111" --end="222" |
|||
if __name__ == "__main__": |
|||
parser = argparse.ArgumentParser(description="") |
|||
parser.add_argument("--db", type=str, default="") |
|||
parser.add_argument("--logDir", type=str, default="") |
|||
parser.add_argument("--start", type=str, default="") |
|||
parser.add_argument("--end", type=str, default="") |
|||
args = parser.parse_args() |
|||
Contant.db = args.db |
|||
Contant.logDir = args.logDir |
|||
Contant.startTime = args.start |
|||
Contant.endTime = args.end |
|||
LoggerUtils.initLogger() |
|||
Orm.ormInit() |
|||
LoggerUtils.Logger.info("db:{},logDir:{}".format(Contant.db, Contant.logDir)) |
|||
LoggerUtils.Logger.info("starTime:{},endTime:{}".format(Contant.startTime, Contant.endTime)) |
|||
# 通过文件读取apikeys |
|||
# for line in open("api_key.txt"): |
|||
# line = line.strip('\n') |
|||
# YouTubeUtil.apiKeys.append(line) |
|||
# LoggerUtils.Logger.info("YouTubeUtil.apiKeys:{}",YouTubeUtil.apiKeys) |
|||
# 读取文件获取需要获取的频道 |
|||
channelList = [] |
|||
for line in open("channelList.txt"): |
|||
line = line.strip('\n') |
|||
channelList.append(line) |
|||
for channel_str in channelList: |
|||
channelId = channel_str.split(" ")[0] |
|||
is_enable = channel_str.split(" ")[1] |
|||
if is_enable == "1": |
|||
LoggerUtils.Logger.info("channelId:{},startTime:{},endTime:{}".format(channelId, Contant.startTime, Contant.endTime)) |
|||
YouTubeUtil.getByChannelId(channelId, Contant.startTime, Contant.endTime) |
|||
sleep_time = random.randint(3, 10) |
|||
LoggerUtils.Logger.info("{}获取完毕,暂停{}秒", channelId, sleep_time) |
|||
time.sleep(sleep_time) |
|||
# 执行查询 |
|||
# channelList = ChannelService.getChannelList() |
|||
# LoggerUtils.Logger.info("list size:{}".format(len(channelList))) |
|||
# for channel in channelList: |
|||
# channelId = channel.channelId |
|||
# LoggerUtils.Logger.info( |
|||
# "channelId:{},startTime:{},endTime:{}".format( |
|||
# channelId, Contant.startTime, Contant.endTime |
|||
# ) |
|||
# ) |
|||
# YouTubeUtil.getByChannelId(channelId, Contant.startTime, Contant.endTime) |
|||
# 发送钉钉消息 |
|||
# webhook = "https://oapi.dingtalk.com/robot/send?access_token=c8c8d7d42c4eecd449dd303025ef968f647d1d8e8694e3fabc0ab5770d646dcb" |
|||
# jsonData = { |
|||
# "msgtype": "text", |
|||
# "text": { |
|||
# "content": "[Youtube]src finished" |
|||
# } |
|||
# } |
|||
# requests.post(webhook, json=jsonData) |
|||
# LoggerUtils.Logger.info("src发送钉钉消息成功...") |
@ -0,0 +1,10 @@ |
|||
#!/bin/bash |
|||
function log() { |
|||
local time_now=`date '+%Y-%m-%d %H:%M:%S'` |
|||
echo "$time_now [download] [info] $1" >> /mnt/youtube_prod/running.log |
|||
} |
|||
|
|||
cd /mnt/youtube_prod/download |
|||
# /mnt/youtube_prod/start_download.sh |
|||
log "开始执行download..." |
|||
nohup python3 ./main_download.py --db="../db/youtube_prod.db" --logDir="./logs" >/dev/null 2>/mnt/youtube_prod/err.log & |
@ -0,0 +1,11 @@ |
|||
#!/bin/bash |
|||
function log() { |
|||
local time_now=`date '+%Y-%m-%d %H:%M:%S'` |
|||
echo "$time_now [download] [info] $1" >> /mnt/youtube_prod/running.log |
|||
} |
|||
|
|||
cd /mnt/youtube_prod/sftp |
|||
# /mnt/youtube_prod/start_download.sh |
|||
log "开始执行sftp..." |
|||
python3 ./sftp.py --local="/mnt/tmp_srt_file" --logDir="./logs" |
|||
rm -rf /mnt/tmp_srt_file |
@ -0,0 +1,12 @@ |
|||
#!/bin/bash |
|||
function log() { |
|||
local time_now=`date '+%Y-%m-%d %H:%M:%S'` |
|||
echo "$time_now [src] [info] $1" >> /mnt/youtube_prod/running.log |
|||
} |
|||
|
|||
cd /mnt/youtube_prod/src |
|||
start=`date '+%Y-%m-%dT%H:%M:%SZ' -d'-1 day'` |
|||
end=`date '+%Y-%m-%dT%H:%M:%SZ'` |
|||
log "开始执行src...startTime:"$start",endTime:"$end |
|||
# /mnt/youtube_prod/start_src.sh |
|||
nohup python3 ./main.py --db="../db/youtube_prod.db" --logDir="./logs" --start=$start --end=$end >/dev/null 2>/mnt/youtube_prod/err.log & |
@ -0,0 +1,4 @@ |
|||
#!/bin/bash |
|||
pid=`ps -ef | grep main_download | awk NR==1'{print $2}'` |
|||
echo $pid |
|||
kill -9 $pid |
@ -0,0 +1,2 @@ |
|||
#!/bin/bash |
|||
echo "test" |
@ -0,0 +1,42 @@ |
|||
# import httplib2 |
|||
# import googleapiclient.discovery |
|||
# import googleapiclient.errors |
|||
|
|||
# def getYoutube(): |
|||
# proxy_info = httplib2.ProxyInfo( |
|||
# proxy_type=httplib2.socks.PROXY_TYPE_HTTP, proxy_host="127.0.0.1", proxy_port=7890) |
|||
# http = httplib2.Http(timeout=10, proxy_info=proxy_info, |
|||
# disable_ssl_certificate_validation=False) |
|||
# # http = httplib2.Http(timeout=10, disable_ssl_certificate_validation=False) |
|||
# api_service_name = "youtube" |
|||
# api_version = "v3" |
|||
# # 获取apiKey |
|||
# apiKey = "AIzaSyARaW3mqO9szQiHgWZR4el0HWvdyheSHBc" |
|||
# # 获取对象 |
|||
# youtube = googleapiclient.discovery.build( |
|||
# api_service_name, api_version, developerKey=apiKey, http=http |
|||
# ) |
|||
# return youtube |
|||
|
|||
|
|||
# youtube = getYoutube() |
|||
# request = youtube.videos().list(part="statistics", id="9l7O_2KNomQ") |
|||
# response = request.execute() |
|||
# print(response) |
|||
# response = {'kind': 'youtube#videoListResponse', 'etag': 'I41mEoQqqiB5sxwKKu8X3wNWkB8', 'items': [{'kind': 'youtube#video', 'etag': 'mncS6_AC9-Y6HUjjt_A4ocpWVY4', 'id': '9l7O_2KNomQ', 'statistics': {'viewCount': '47212', 'likeCount': '2126', 'favoriteCount': '0', 'commentCount': '172'}}], 'pageInfo': {'totalResults': 1, 'resultsPerPage': 1}} |
|||
# print(response['items'][0]['statistics']['viewCount']) |
|||
countStr = "0" |
|||
for i in range(0,30): |
|||
if i != 29: |
|||
countStr = countStr + "," + "0" |
|||
print(countStr.split(",")) |
|||
list = countStr.split(",") |
|||
list[0] = 1 |
|||
print(list) |
|||
countStr = "" |
|||
for i in range(0,30): |
|||
if i != 29: |
|||
countStr = countStr + str(list[i]) + "," |
|||
else: |
|||
countStr = countStr + str(list[i]) |
|||
print(countStr) |
@ -0,0 +1,8 @@ |
|||
from youtube_transcript_api import YouTubeTranscriptApi |
|||
#zh-Hant |
|||
url = "https://www.youtube.com/watch?v=rhj42pLWa5s" |
|||
list = YouTubeTranscriptApi.list_transcripts("jtr9VBwwJ7M") |
|||
videoSrt = YouTubeTranscriptApi.get_transcript( |
|||
"KWlTphpCpcI", languages=['hi']) |
|||
print(list) |
|||
print(videoSrt) |
@ -0,0 +1,15 @@ |
|||
import json |
|||
from Orm import Channel |
|||
from playhouse.shortcuts import model_to_dict, dict_to_model |
|||
|
|||
|
|||
class ChannelService: |
|||
def getOneByChannelId(channelId): |
|||
return Channel.get_or_none(Channel.channelId == channelId) |
|||
|
|||
def updateTimeByChannelId(channelId, chageTime): |
|||
Channel.update(channelReptileTime=chageTime).where( |
|||
Channel.channelId == channelId).execute() |
|||
|
|||
def getChannelList(): |
|||
return Channel.select().execute() |
@ -0,0 +1,4 @@ |
|||
db="" |
|||
logDir="" |
|||
startTime="" |
|||
endTime="" |
@ -0,0 +1,6 @@ |
|||
from loguru import logger |
|||
import Contant |
|||
Logger = logger |
|||
def initLogger(): |
|||
logger.add(Contant.logDir+"/main_{time}.log", rotation="500MB", encoding="utf-8", |
|||
enqueue=True, compression="zip", retention="10 days") |
@ -0,0 +1,75 @@ |
|||
from peewee import * |
|||
import Contant |
|||
import argparse |
|||
from LoggerUtils import Logger |
|||
|
|||
parser = argparse.ArgumentParser(description='') |
|||
parser.add_argument('--db', type=str, default='') |
|||
parser.add_argument('--logDir', type=str, default='') |
|||
args = parser.parse_args() |
|||
Contant.db = args.db |
|||
db = SqliteDatabase(Contant.db) |
|||
|
|||
|
|||
def ormInit(): |
|||
Channel.create_table() |
|||
Video.create_table() |
|||
DownloadInfo.create_table() |
|||
ViewCountInfo.create_table() |
|||
|
|||
|
|||
class BaseModel(Model): |
|||
class Meta: |
|||
database = db |
|||
|
|||
# 频道信息 |
|||
|
|||
|
|||
class Channel(BaseModel): |
|||
id = PrimaryKeyField() |
|||
channelId = CharField(null=False) |
|||
channelTitle = CharField(null=False) |
|||
channelLanguage = CharField() |
|||
channelReptileTime = CharField(null=True) |
|||
|
|||
class Meta: |
|||
db_table = 'Channel' |
|||
|
|||
# 视频信息 |
|||
|
|||
|
|||
class Video(BaseModel): |
|||
id = PrimaryKeyField() |
|||
videoId = CharField(null=False) |
|||
channelId = CharField(null=False) |
|||
videoTitle = CharField() |
|||
videoLen = IntegerField() |
|||
videoType = CharField() |
|||
videoPublishTime = CharField() |
|||
videoLanguage = CharField() |
|||
isDownload = IntegerField() |
|||
|
|||
class Meta: |
|||
db_table = 'Vidoes' |
|||
|
|||
# 下载信息 |
|||
|
|||
|
|||
class DownloadInfo(BaseModel): |
|||
id = PrimaryKeyField() |
|||
videoId = CharField() |
|||
downloadType = IntegerField() |
|||
tryTime = IntegerField() |
|||
isFinished = IntegerField() |
|||
|
|||
class Meta: |
|||
db_table = 'Download_info' |
|||
|
|||
# 播放量信息 |
|||
class ViewCountInfo(BaseModel): |
|||
id = PrimaryKeyField() |
|||
videoId = CharField() |
|||
viewCount = CharField() |
|||
|
|||
class Meta: |
|||
db_table = 'ViewCount_info' |
@ -0,0 +1,33 @@ |
|||
import json |
|||
from Orm import ViewCountInfo |
|||
from playhouse.shortcuts import model_to_dict, dict_to_model |
|||
|
|||
|
|||
class ViewCountService: |
|||
def createOrUpdateOne(videoId, day,count): |
|||
query = ViewCountInfo.select().where(ViewCountInfo.videoId == videoId) |
|||
if not query: |
|||
countStr = "0" |
|||
for i in range(0,30): |
|||
if i != 29: |
|||
countStr = countStr + "," + "0" |
|||
list = countStr.split(",") |
|||
list[day-1] = count |
|||
countStr = "" |
|||
for i in range(0,30): |
|||
if i != 29: |
|||
countStr = countStr + str(list[i]) + "," |
|||
else: |
|||
countStr = countStr + str(list[i]) |
|||
ViewCountInfo.create(videoId=videoId, viewCount=countStr) |
|||
else: |
|||
viewCountInfo = ViewCountInfo.select().where(ViewCountInfo.videoId == videoId).get() |
|||
list = viewCountInfo.viewCount.split(",") |
|||
list[day-1] = count |
|||
countStr = "" |
|||
for i in range(0,30): |
|||
if i != 29: |
|||
countStr = countStr + str(list[i]) + "," |
|||
else: |
|||
countStr = countStr + str(list[i]) |
|||
ViewCountInfo.update(viewCount=countStr).where(ViewCountInfo.videoId == videoId).execute() |
@ -0,0 +1,34 @@ |
|||
import json |
|||
from Orm import Video |
|||
from playhouse.shortcuts import model_to_dict, dict_to_model |
|||
|
|||
|
|||
class VideoService: |
|||
def getOneByVideoId(videoId): |
|||
return Video.get_or_none(Video.videoId == videoId) |
|||
|
|||
def createOne(videoId, channelId, videoTitle, videoLen, videoType, videoPublishTime, videoLanguage, isDownload): |
|||
Video.create(videoId=videoId, |
|||
channelId=channelId, |
|||
videoTitle=videoTitle, |
|||
videoLen=videoLen, |
|||
videoType=videoType, |
|||
videoPublishTime=videoPublishTime, |
|||
videoLanguage=videoLanguage, |
|||
isDownload=isDownload) |
|||
|
|||
def updateLenByVideoId(videoId, len): |
|||
Video.update(videoLen=len).where(Video.videoId == videoId).execute() |
|||
|
|||
def getLastVideoByChannelId(channelId): |
|||
return Video.select().where(Video.channelId == channelId).order_by(Video.videoPublishTime.desc()).get() |
|||
|
|||
def getFirstVideoByChannelId(channelId): |
|||
return Video.select().where(Video.channelId == channelId).order_by(Video.videoPublishTime).get() |
|||
|
|||
def checkExist(channelId): |
|||
query = Video.select().where(Video.channelId == channelId) |
|||
return query.exists() |
|||
|
|||
def getVideosByTime(startTime,endTime): |
|||
return Video.select().where(Video.videoPublishTime >= startTime,Video.videoPublishTime <= endTime).execute() |
@ -0,0 +1,79 @@ |
|||
import argparse |
|||
import random |
|||
import time |
|||
import Contant |
|||
from LoggerUtils import Logger, initLogger |
|||
import Orm |
|||
from VideoService import VideoService |
|||
from ChannelService import ChannelService |
|||
from VideoCountService import ViewCountService |
|||
from func_timeout import func_set_timeout |
|||
import func_timeout |
|||
import requests |
|||
import httplib2 |
|||
import googleapiclient.discovery |
|||
import googleapiclient.errors |
|||
import datetime |
|||
|
|||
|
|||
def getYoutube(): |
|||
proxy_info = httplib2.ProxyInfo( |
|||
proxy_type=httplib2.socks.PROXY_TYPE_HTTP, proxy_host="127.0.0.1", proxy_port=7890) |
|||
http = httplib2.Http(timeout=10, proxy_info=proxy_info, |
|||
disable_ssl_certificate_validation=False) |
|||
# http = httplib2.Http(timeout=10, disable_ssl_certificate_validation=False) |
|||
api_service_name = "youtube" |
|||
api_version = "v3" |
|||
# 获取apiKey |
|||
apiKey = "AIzaSyARaW3mqO9szQiHgWZR4el0HWvdyheSHBc" |
|||
# 获取对象 |
|||
youtube = googleapiclient.discovery.build( |
|||
api_service_name, api_version, developerKey=apiKey, http=http |
|||
) |
|||
return youtube |
|||
|
|||
|
|||
def updateVideoViewCount(startTime, endTime): |
|||
list = VideoService.getVideosByTime(startTime, endTime) |
|||
videoCount = 0 |
|||
videosRequest = "" |
|||
youtube = getYoutube() |
|||
for video in list: |
|||
videoCount = videoCount + 1 |
|||
Logger.info(video.videoId) |
|||
videosRequest = videosRequest + "," + video.videoId |
|||
if videoCount == 30 or videoCount == len(list): |
|||
request = youtube.videos().list(part="statistics", id=videosRequest) |
|||
response = request.execute() |
|||
for item in response['items']: |
|||
Logger.info(item) |
|||
ViewCountService.createOrUpdateOne( |
|||
item['id'], 1, item['statistics']['viewCount']) |
|||
videosRequest = "" |
|||
videoCount = 0 |
|||
|
|||
# python ./view_count_main.py --db="../db/youtube_prod.db" --logDir="./logs" --start="2024-01-03T00:00:00Z" --end="2024-01-04T00:00:00Z" |
|||
|
|||
|
|||
if __name__ == "__main__": |
|||
parser = argparse.ArgumentParser(description='') |
|||
parser.add_argument('--db', type=str, default='') |
|||
parser.add_argument('--logDir', type=str, default='') |
|||
args = parser.parse_args() |
|||
Contant.db = args.db |
|||
Contant.logDir = args.logDir |
|||
initLogger() |
|||
Orm.ormInit() |
|||
# 查询30天内的所有视屏 |
|||
now = datetime.datetime.now() |
|||
zero_today = now.replace(hour=0, minute=0, second=0, microsecond=0) |
|||
end_today = now.replace(hour=23, minute=59, second=59, microsecond=0) |
|||
for i in range(1, 31): |
|||
startTime = zero_today+datetime.timedelta(days=-i) |
|||
endTime = end_today+datetime.timedelta(days=-i) |
|||
startTime = startTime.strftime("%y-%m-%dT%H:%S:%MZ") |
|||
endTime = endTime.strftime("%y-%m-%dT%H:%S:%MZ") |
|||
Logger.info("startTime:%s, endTime:%s" % (startTime, endTime)) |
|||
updateVideoViewCount(startTime, endTime) |
|||
# zero_today = zero_today.strftime("%y-%m-%dT%H:%S:%MZ") |
|||
# print(zero_today) |
Loading…
Reference in new issue