Browse Source

commit

master
zhangshu 7 months ago
parent
commit
beae493df0
  1. 3
      README.md
  2. BIN
      db/youtube_prod.db
  3. BIN
      db/youtube_prod_bak.db
  4. 7
      download/ChannelService.py
  5. 2
      download/Contant.py
  6. 28
      download/DownloadInfoService.py
  7. 172
      download/DownloadUtil.py
  8. 6
      download/LoggerUtils.py
  9. 65
      download/Orm.py
  10. 26
      download/VideoService.py
  11. BIN
      download/download.zip
  12. 49
      download/main_download.py
  13. 2
      init/Contant.py
  14. 6
      init/LoggerUtils.py
  15. 65
      init/Orm.py
  16. 56
      init/init.py
  17. 14
      init/urlList.txt
  18. 90
      init/urlList_hi.txt
  19. 7
      init/urlList_ja.txt
  20. 1
      sftp/Contant.py
  21. 6
      sftp/LoggerUtils.py
  22. 97
      sftp/sftp.py
  23. 5
      sftp/sftp_config.ini
  24. 15
      src/ChannelService.py
  25. 4
      src/Contant.py
  26. 16
      src/DownloadInfoService.py
  27. 6
      src/LoggerUtils.py
  28. 68
      src/Orm.py
  29. 0
      src/SrcTest.py
  30. 31
      src/VideoService.py
  31. 169
      src/YouTubeUtils.py
  32. 49
      src/main.py
  33. 32
      src/one_channel.py
  34. 87
      src_tmp.sh
  35. 10
      start_download.sh
  36. 11
      start_sftp.sh
  37. 12
      start_src.sh
  38. 4
      stop_download.sh
  39. 2
      test.sh
  40. 9
      test/test.py
  41. 8
      test/test2.py
  42. 15
      view_count/ChannelService.py
  43. 10
      view_count/Contant.py
  44. 6
      view_count/LoggerUtils.py
  45. 75
      view_count/Orm.py
  46. 33
      view_count/VideoCountService.py
  47. 34
      view_count/VideoService.py
  48. 99
      view_count/view_count_main.py

3
README.md

@ -1,2 +1,3 @@
# youtube_prod # youtube_srt
Youtube字幕项目

BIN
db/youtube_prod.db

Binary file not shown.

BIN
db/youtube_prod_bak.db

Binary file not shown.

7
download/ChannelService.py

@ -0,0 +1,7 @@
import json
from Orm import Channel
from playhouse.shortcuts import model_to_dict, dict_to_model
class ChannelService:
def getOneByChannelId(channelId):
return Channel.get_or_none(Channel.channelId == channelId)

2
download/Contant.py

@ -0,0 +1,2 @@
db=""
logDir=""

28
download/DownloadInfoService.py

@ -0,0 +1,28 @@
from Orm import DownloadInfo
class DownloadService:
def getOneByVideoId(videoId, downloadType):
return DownloadInfo.get(DownloadInfo.videoId == videoId, DownloadInfo.downloadType == downloadType)
def createOne(videoId, downloadType, tryTime, isFinished):
DownloadInfo.create(
videoId=videoId,
downloadType=downloadType,
tryTime=tryTime,
isFinished=isFinished
)
def updateInfoByVideoId(videoId, tryTime, isFinished, downloadType):
DownloadInfo.update(tryTime=tryTime, isFinished=isFinished).where(
DownloadInfo.videoId == videoId, DownloadInfo.downloadType == downloadType).execute()
def findNotFinishList():
return DownloadInfo.select().where(DownloadInfo.isFinished == 0, DownloadInfo.tryTime <= 5, DownloadInfo.downloadType == 1).limit(10).execute()
def changeDownloadType(videoId, tryTime, isFinished, downloadType, changeType):
DownloadInfo.update(tryTime=tryTime, isFinished=isFinished, downloadType=changeType).where(
DownloadInfo.videoId == videoId, DownloadInfo.downloadType == downloadType).execute()
def findNotFinishListTwo():
return DownloadInfo.select().where(DownloadInfo.isFinished == 0, DownloadInfo.tryTime <= 5, DownloadInfo.downloadType == 2).limit(10).execute()

172
download/DownloadUtil.py

@ -0,0 +1,172 @@
from shutil import copyfile
from youtube_transcript_api import YouTubeTranscriptApi
from youtube_transcript_api.formatters import SRTFormatter
from VideoService import VideoService
from ChannelService import ChannelService
from DownloadInfoService import DownloadService
from LoggerUtils import Logger
import time
import os
from func_timeout import func_set_timeout
import operator
class DownLoadUtil:
formatter = SRTFormatter()
proxies = {"http": "http://127.0.0.1:7890",
"https": "https://127.0.0.1:7890"}
@func_set_timeout(60)
def downloadOne(videoId):
# 获取数据
video = VideoService.getOneByVideoId(videoId)
channel = ChannelService.getOneByChannelId(str(video.channelId))
# 格式化title
videoTitle = str(video.videoTitle)
videoTitle = str(videoTitle).replace("/", u"\u2215")
videoTitle = str(videoTitle).replace("?", "")
videoTitle = str(videoTitle).replace("\\", "")
videoTitle = str(videoTitle).replace("|", "")
videoTitle = str(videoTitle).replace("<", "")
videoTitle = str(videoTitle).replace(">", "")
videoTitle = str(videoTitle).replace(":", "")
# 获取发布时间
videoPublishTime = str(video.videoPublishTime)
videoPublishTime = str(videoPublishTime).split("T")[0]
# 开始下载
Logger.info("开始下载...{}".format(videoId))
cpPath = ""
try:
# 获取字幕
languages = str(video.videoLanguage)
storePath = "/mnt/srt_file/" + str(channel.channelTitle)
cpPath = "/mnt/tmp_srt_file/" + str(channel.channelTitle)
if not os.path.exists(storePath):
Logger.info("开始创建文件夹:" + storePath)
os.makedirs(storePath)
if not os.path.exists(cpPath):
Logger.info("开始创建文件夹:" + cpPath)
os.makedirs(cpPath)
storePath = storePath + "/" + videoPublishTime + \
"-" + languages + "-" + videoTitle + ".srt"
cpPath = cpPath + "/" + videoPublishTime + \
"-" + languages + "-" + videoTitle + ".srt"
if len(cpPath) > 120:
storePath = storePath[:-20] + ".srt"
cpPath = cpPath[:-20] + ".srt"
videoSrt = YouTubeTranscriptApi.get_transcript(
videoId, languages=[languages])
srt_formatted = DownLoadUtil.formatter.format_transcript(videoSrt)
Logger.info("文件地址...{}".format(storePath))
with open(storePath, 'w', encoding='utf-8') as srt_file:
srt_file.write(srt_formatted)
Logger.info("下载完成...{}".format(videoId))
copyfile(storePath, cpPath)
# 修改video数据
VideoService.updateIsDownloadByVideoId(videoId, 1)
# 修改downloadInfo
downloadInfo = DownloadService.getOneByVideoId(videoId, 1)
if downloadInfo is not None:
DownloadService.updateInfoByVideoId(
videoId, downloadInfo.tryTime + 1, 1, 1)
except Exception as e:
Logger.error("下载失败...{}".format(videoId))
logStr = "Exception...{}".format(e)
Logger.error(logStr)
downloadInfo = DownloadService.getOneByVideoId(videoId, 1)
if operator.contains(logStr, "No transcripts"):
Logger.error("VideoId:{},不存在字幕文件".format(videoId))
if downloadInfo is not None:
DownloadService.changeDownloadType(
videoId, 0, 0, 1, 2)
elif operator.contains(logStr, "File name too long"):
# 文件名过长
languages = str(video.videoLanguage)
videoSrt = YouTubeTranscriptApi.get_transcript(
videoId, languages=[languages])
srt_formatted = DownLoadUtil.formatter.format_transcript(videoSrt)
storePath = "/mnt/srt_file/" + str(channel.channelTitle) + "/" + \
videoPublishTime + "-" + languages + "-" + videoId + ".srt"
cpPath = "/mnt/tmp_srt_file/" + str(channel.channelTitle) + "/" + \
videoPublishTime + "-" + languages + "-" + videoId + ".srt"
if len(cpPath) > 120:
storePath = storePath[:-20] + ".srt"
cpPath = cpPath[:-20] + ".srt"
Logger.info("文件名过长,文件地址...{}".format(storePath))
with open(storePath, 'w', encoding='utf-8') as srt_file:
srt_file.write(srt_formatted)
Logger.info("下载完成...{}".format(videoId))
copyfile(storePath, cpPath)
# 修改video数据
VideoService.updateIsDownloadByVideoId(videoId, 1)
# 修改downloadInfo
downloadInfo = DownloadService.getOneByVideoId(videoId, 1)
if downloadInfo is not None:
DownloadService.updateInfoByVideoId(
videoId, downloadInfo.tryTime + 1, 1, 1)
else:
if downloadInfo is not None:
Logger.info("VideoId:{}开始重试第{}".format(
videoId, downloadInfo.tryTime + 1))
DownloadService.updateInfoByVideoId(
videoId, downloadInfo.tryTime + 1, 0, 1)
@func_set_timeout(60)
def downloadTwo(videoId):
# 获取数据
video = VideoService.getOneByVideoId(videoId, 2)
channel = ChannelService.getOneByChannelId(str(video.channelId))
# 格式化title
videoTitle = str(video.videoTitle)
videoTitle = str(videoTitle).replace("/", u"\u2215")
videoTitle = str(videoTitle).replace("?", "")
videoTitle = str(videoTitle).replace("\\", "")
videoTitle = str(videoTitle).replace("|", "")
videoTitle = str(videoTitle).replace("<", "")
videoTitle = str(videoTitle).replace(">", "")
videoTitle = str(videoTitle).replace(":", "")
# 获取发布时间
videoPublishTime = str(video.videoPublishTime)
videoPublishTime = str(videoPublishTime).split("T")[0]
# 开始下载
Logger.info("开始下载...{}".format(videoId))
try:
# 获取字幕
languages = str(video.videoLanguage)
storePath = "./download/" + str(channel.channelTitle)
if not os.path.exists(storePath):
Logger.info("开始创建文件夹:" + storePath)
os.makedirs(storePath)
storePath = storePath + "\\" + videoPublishTime + \
"-" + languages + "-" + videoTitle + ".srt"
videoSrt = YouTubeTranscriptApi.get_transcript(
videoId, languages=[languages])
srt_formatted = DownLoadUtil.formatter.format_transcript(videoSrt)
Logger.info("文件地址...{}".format(storePath))
with open(storePath, 'w', encoding='utf-8') as srt_file:
srt_file.write(srt_formatted)
Logger.info("下载完成...{}".format(videoId))
# 修改video数据
VideoService.updateIsDownloadByVideoId(videoId, 1)
# 修改downloadInfo
downloadInfo = DownloadService.getOneByVideoId(videoId, 2)
if downloadInfo is not None:
DownloadService.updateInfoByVideoId(
videoId, downloadInfo.tryTime + 1, 1, 2)
except Exception as e:
Logger.error("下载失败...{}".format(videoId))
logStr = "Exception...{}".format(e)
Logger.error(logStr)
downloadInfo = DownloadService.getOneByVideoId(videoId, 2)
if operator.contains(logStr, "No transcripts"):
Logger.error("VideoId:{},不存在字幕文件".format(videoId))
if downloadInfo is not None:
DownloadService.changeDownloadType(
videoId, 6, 0, 2, 3)
else:
if downloadInfo is not None:
Logger.info("VideoId:{}开始重试第{}".format(
videoId, downloadInfo.tryTime + 1))
DownloadService.updateInfoByVideoId(
videoId, downloadInfo.tryTime + 1, 0, 2)

6
download/LoggerUtils.py

@ -0,0 +1,6 @@
from loguru import logger
import Contant
Logger = logger
def initLogger():
logger.add(Contant.logDir+"/download_{time}.log", rotation="500MB", encoding="utf-8",
enqueue=True, compression="zip", retention="10 days")

65
download/Orm.py

@ -0,0 +1,65 @@
from peewee import *
import Contant
import argparse
from LoggerUtils import Logger
parser = argparse.ArgumentParser(description='')
parser.add_argument('--db', type=str, default='')
parser.add_argument('--logDir', type=str, default='')
args = parser.parse_args()
Contant.db = args.db
db = SqliteDatabase(Contant.db)
def ormInit():
Channel.create_table()
Video.create_table()
DownloadInfo.create_table()
class BaseModel(Model):
class Meta:
database = db
# 频道信息
class Channel(BaseModel):
id = PrimaryKeyField()
channelId = CharField(null=False)
channelTitle = CharField(null=False)
channelLanguage = CharField()
channelReptileTime = CharField(null=True)
class Meta:
db_table = 'Channel'
# 视频信息
class Video(BaseModel):
id = PrimaryKeyField()
videoId = CharField(null=False)
channelId = CharField(null=False)
videoTitle = CharField()
videoLen = IntegerField()
videoType = CharField()
videoPublishTime = CharField()
videoLanguage = CharField()
isDownload = IntegerField()
class Meta:
db_table = 'Vidoes'
# 下载信息
class DownloadInfo(BaseModel):
id = PrimaryKeyField()
videoId = CharField()
downloadType = IntegerField()
tryTime = IntegerField()
isFinished = IntegerField()
class Meta:
db_table = 'Download_info'

26
download/VideoService.py

@ -0,0 +1,26 @@
import json
from Orm import Video
from playhouse.shortcuts import model_to_dict, dict_to_model
class VideoService:
def getOneByVideoId(videoId):
return Video.get_or_none(Video.videoId == videoId)
def createOne(videoId, channelId, videoTitle, videoLen, videoType, videoPublishTime, videoLanguage, isDownload):
Video.create(videoId=videoId,
channelId=channelId,
videoTitle=videoTitle,
videoLen=videoLen,
videoType=videoType,
videoPublishTime=videoPublishTime,
videoLanguage=videoLanguage,
isDownload=isDownload)
def updateLenByVideoId(videoId, len):
Video.update(videoLen=len).where(Video.videoId == videoId).execute()
def updateIsDownloadByVideoId(videoId, isDownload):
Video.update(isDownload=isDownload).where(
Video.videoId == videoId).execute()

BIN
download/download.zip

Binary file not shown.

49
download/main_download.py

@ -0,0 +1,49 @@
import argparse
import random
import time
import Contant
from LoggerUtils import Logger, initLogger
import Orm
from VideoService import VideoService
from ChannelService import ChannelService
from DownloadInfoService import DownloadService
from DownloadUtil import DownLoadUtil
from func_timeout import func_set_timeout
import func_timeout
import requests
# python3 ./main_download.py --db="../db/youtube_prod.db" --logDir="./logs"
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='')
parser.add_argument('--db', type=str, default='')
parser.add_argument('--logDir', type=str, default='')
args = parser.parse_args()
Contant.db = args.db
Contant.logDir = args.logDir
initLogger()
Orm.ormInit()
list = DownloadService.findNotFinishList()
Logger.info("list size:{}".format(len(list)))
while (len(list) > 0):
for info in list:
try:
DownLoadUtil.downloadOne(info.videoId)
restTime = random.randint(1, 3)
Logger.info("间隔{}秒后继续...".format(restTime))
time.sleep(restTime)
except func_timeout.exceptions.FunctionTimedOut as e:
Logger.error("执行下载方法超时错误:{}".format(e))
loopRestTime = random.randint(1, 3)
Logger.info("循环间隔{}秒后继续...".format(loopRestTime))
time.sleep(loopRestTime)
list = DownloadService.findNotFinishList()
# 发送钉钉消息
webhook = "https://oapi.dingtalk.com/robot/send?access_token=c8c8d7d42c4eecd449dd303025ef968f647d1d8e8694e3fabc0ab5770d646dcb"
jsonData = {
"msgtype": "text",
"text": {
"content": "[Youtube]download finished"
}
}
requests.post(webhook, json=jsonData)
Logger.info("download发送钉钉消息成功...")

2
init/Contant.py

@ -0,0 +1,2 @@
db=""
logDir=""

6
init/LoggerUtils.py

@ -0,0 +1,6 @@
from loguru import logger
import Contant
Logger = logger
def initLogger():
logger.add(Contant.logDir+"/init_{time}.log", rotation="500MB", encoding="utf-8",
enqueue=True, compression="zip", retention="10 days")

65
init/Orm.py

@ -0,0 +1,65 @@
from peewee import *
import Contant
import argparse
from LoggerUtils import Logger
parser = argparse.ArgumentParser(description='')
parser.add_argument('--db', type=str, default='')
parser.add_argument('--logDir', type=str, default='')
args = parser.parse_args()
Contant.db = args.db
db = SqliteDatabase(Contant.db)
def ormInit():
Channel.create_table()
Vidoe.create_table()
DownloadInfo.create_table()
class BaseModel(Model):
class Meta:
database = db
# 频道信息
class Channel(BaseModel):
id = PrimaryKeyField()
channelId = CharField(null=False)
channelTitle = CharField(null=False)
channelLanguage = CharField()
channelReptileTime = CharField(null=True)
class Meta:
db_table = 'Channel'
# 视频信息
class Vidoe(BaseModel):
id = PrimaryKeyField()
videoId = CharField(null=False)
channelId = CharField(null=False)
videoTitle = CharField()
videoLen = IntegerField()
videoType = CharField()
videoPublishTime = CharField()
videoLanguage = CharField()
isDownload = IntegerField()
class Meta:
db_table = 'Vidoes'
# 下载信息
class DownloadInfo(BaseModel):
id = PrimaryKeyField()
videoId = CharField()
downloadType = IntegerField()
tryTime = IntegerField()
isFinished = IntegerField()
class Meta:
db_table = 'Download_info'

56
init/init.py

@ -0,0 +1,56 @@
from LoggerUtils import Logger, initLogger
import argparse
import Contant
from Orm import ormInit, Channel
import operator
from bs4 import BeautifulSoup as bs
from urllib.request import urlopen, Request
# py .\init.py --db=../db/youtube_prod.db --logDir=./logs
def saveChannel(channelUrl, language):
Logger.info("频道链接:"+channelUrl)
channelId = ""
channelName = ""
url_opener = urlopen(
Request(channelUrl, headers={'User-Agent': 'Mozilla'}))
videoInfo = bs(url_opener, features="html.parser")
links = videoInfo.find_all("link")
for link in links:
if operator.contains(str(link), "canonical"):
channelId = str(link['href']).split("/channel/")[1]
if operator.contains(str(link), "content="):
channelName = str(link['content'])
Logger.info("channelId:"+channelId)
Logger.info("channelName:"+channelName)
channel = Channel.get_or_none(Channel.channelId == channelId)
if channel != None:
Logger.info("频道已存在:" + channelId)
return
Channel.create(channelTitle=channelName,
channelId=channelId, channelLanguage=language)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='')
parser.add_argument('--db', type=str, default='')
parser.add_argument('--logDir', type=str, default='')
args = parser.parse_args()
Contant.db = args.db
Contant.logDir = args.logDir
initLogger()
ormInit()
Logger.info("SqlLite存放地址:"+Contant.db)
Logger.info("日志文件存放地址:"+Contant.logDir)
Logger.info("开始初始化...")
# checkInit()
# 读取txt文件获取需要的频道地址
Logger.info("开始读取需要新增的频道地址...")
urlList = []
# 打开文件
for line in open("urlList.txt"):
line = line.strip('\n')
urlList.append(line)
language = urlList[0]
for url in urlList:
if len(url) > 10:
saveChannel(url, language)

14
init/urlList.txt

@ -0,0 +1,14 @@
zh-TW
https://www.youtube.com/@TheStormMedia
https://www.youtube.com/@57ETFN
https://www.youtube.com/@MoneyNewWorld
https://www.youtube.com/@tvbsmoney
https://www.youtube.com/@TheMasterhsiao
https://www.youtube.com/@mvp5888
https://www.youtube.com/@HUNG64
https://www.youtube.com/@user-vc2vr6tw4h
https://www.youtube.com/ustv
https://www.youtube.com/@leon888
https://www.youtube.com/@smartmonthly-BW
https://www.youtube.com/@ustvstockonline
https://www.youtube.com/@AASTOCKS_AATV

90
init/urlList_hi.txt

@ -0,0 +1,90 @@
hi
https://www.youtube.com/@procapitalacademy
https://www.youtube.com/@TEACHERANISH
https://www.youtube.com/@MarketGurukul1
en
https://www.youtube.com/@VishalKhandelwalshow
https://www.youtube.com/@Elearnmarkets
https://www.youtube.com/@MarketsMojo
https://www.youtube.com/@TradeWithTrend
https://www.youtube.com/@SHAREKHAN
https://www.youtube.com/@AvadhutSatheTradingAcademy
ko
https://www.youtube.com/@E_TREND
https://www.youtube.com/@hkwownet
https://www.youtube.com/@giant_tv
https://www.youtube.com/@StrongStock
https://www.youtube.com/@stockwar999
https://www.youtube.com/@user-sp1du8pm6q
https://www.youtube.com/@talentinvestment
https://www.youtube.com/@future_economy
https://www.youtube.com/@user-sf7hm6xj8d
https://www.youtube.com/@user-xv9xi6pi9o
https://www.youtube.com/@user-rd8fd1xj9b
https://www.youtube.com/@lucky_tv
https://www.youtube.com/@Min_woo
https://www.youtube.com/@taver1123
https://www.youtube.com/@Super0Min
https://www.youtube.com/@ap5798
https://www.youtube.com/@drematree100
https://www.youtube.com/@MKeconomy_TV
https://www.youtube.com/@grit
https://www.youtube.com/@user-zn9js9fg5i
https://www.youtube.com/@youngikkim
https://www.youtube.com/@DonNawa
https://www.youtube.com/@woong-dal
https://www.youtube.com/@johnleeschool
https://www.youtube.com/@syukaworld-comics
https://www.youtube.com/@channelA-news
https://www.youtube.com/@user-bh7lr7pe9g
https://www.youtube.com/@singlefire
https://www.youtube.com/@moneyhi
https://www.youtube.com/@top.trader
https://www.youtube.com/@jusikdante
zh-TW
https://www.youtube.com/@kukantieh
ja
https://www.youtube.com/@DanTakahashi1
https://www.youtube.com/@tvtokyobiz
https://www.youtube.com/@SHO1112
https://www.youtube.com/@pivot8935
https://www.youtube.com/@nikkei
https://www.youtube.com/@toushikomon
https://www.youtube.com/@pivot8935
https://www.youtube.com/@NewsPicks/featured
https://www.youtube.com/@higedura24
https://www.youtube.com/@tvtokyobiz
https://www.youtube.com/@omaegaowattendayo
https://www.youtube.com/@info_ask1
https://www.youtube.com/@takaisanno/videos
https://www.youtube.com/@takaponjp
https://www.youtube.com/@tbsnewsdig
https://www.youtube.com/@rehacq
https://www.youtube.com/@mabuchi-mariko
https://www.youtube.com/@fp_nigu
https://www.youtube.com/@yukkuri-money
https://www.youtube.com/@SHO1112
https://www.youtube.com/@yohei-chokin
https://www.youtube.com/@user-yu9sj9gq7z/videos
https://www.youtube.com/@tesuta-clipping
https://www.youtube.com/@tradelabo2222
https://www.youtube.com/@jin115xx
https://www.youtube.com/@higedura24
https://www.youtube.com/@nobujuku
https://www.youtube.com/@tokyosoken
https://www.youtube.com/@user-hx7bn7hp9v
https://www.youtube.com/@SLokRE
https://www.youtube.com/@rehacq
https://www.youtube.com/@moha-p
https://www.youtube.com/results?search_query=Buffett+Taro%27s
https://www.youtube.com/@Gorikoro

7
init/urlList_ja.txt

@ -0,0 +1,7 @@
ja
https://www.youtube.com/@ryogakucho
https://www.youtube.com/@DanTakahashi1
https://www.youtube.com/@buffett_taro
https://www.youtube.com/@Tsubame104
https://www.youtube.com/@inc_academy
https://www.youtube.com/@kamioka01

1
sftp/Contant.py

@ -0,0 +1 @@
logDir=""

6
sftp/LoggerUtils.py

@ -0,0 +1,6 @@
from loguru import logger
import Contant
Logger = logger
def initLogger():
logger.add(Contant.logDir+"/sftp_{time}.log", rotation="500MB", encoding="utf-8",
enqueue=True, compression="zip", retention="10 days")

97
sftp/sftp.py

@ -0,0 +1,97 @@
import os
import shutil
import paramiko
import argparse
import Contant
from LoggerUtils import Logger, initLogger
import configparser
import requests
import time
# python3 sftp.py --local="/mnt/tmp_srt_file" --logDir="./logs"
# python3 sftp.py --local="/mnt/test_file" --logDir="./logs"
if __name__ == "__main__":
# 读取参数
parser = argparse.ArgumentParser(description="")
parser.add_argument("--local", type=str, default="")
parser.add_argument('--logDir', type=str, default='')
args = parser.parse_args()
Contant.logDir = args.logDir
initLogger()
# 读取配置文件
config = configparser.ConfigParser()
config.read('sftp_config.ini')
# 获取SFTP配置信息
hostname = config.get('sftp_config', 'hostname')
port = config.getint('sftp_config', 'port')
username = config.get('sftp_config', 'username')
password = config.get('sftp_config', 'password')
Logger.info("host:{},port:{},username:{},password:{}".format(
hostname, port, username, password))
ssh_client = paramiko.SSHClient()
ssh_client.set_missing_host_key_policy(paramiko.AutoAddPolicy())
sftp_client = None # 设置默认值
ssh_client.connect(hostname, port, username, password)
# 创建SFTP客户端
sftp_client = ssh_client.open_sftp()
Logger.info("SFTP客户端已经建立:{}".format(sftp_client))
remote_root = "/Inbound/YouTube Captions"
local_root = args.local
Logger.info("remote_root:{},local_root:{}".format(remote_root, local_root))
names = os.listdir(local_root)
for name in names:
# sftp创建文件夹
try:
sftp_client.chdir(remote_root + "/" + name)
except BaseException:
sftp_client.mkdir(remote_root + "/" + name)
sftp_client.chdir(remote_root + "/" + name)
# 遍历本地临时文件夹
srtList = os.listdir(local_root + "/" + name)
for srt in srtList:
# 获取远程文件路径以及本地文件路径
remotePath = remote_root + "/" + name + "/" + srt
localPath = local_root + "/" + name + "/" + srt
# 如果远程文件存在,则进行删除
try:
sftp_client.stat(remotePath)
# 如果文件存在,删除它
sftp_client.remove(remotePath)
Logger.info("Remote file '{}' deleted.".format(remotePath))
except FileNotFoundError:
Logger.info("Remote file '{}' not found.".format(remotePath))
# 上传本地文件
try:
# 判断远程地址长度,过长需要截取一部分
if len(remotePath) > 120:
remotePath = remotePath[:-20] + ".srt"
# 判断本地文件是否存在,存在则上传
if os.path.exists(localPath):
Logger.info("本地文件 '{}' 存在,开始上传.".format(localPath))
sftp_client.put(localPath, remotePath, confirm=False)
os.remove(localPath)
else:
Logger.info("本地文件 '{}' 不存在,无法上传.".format(localPath))
except Exception as e:
Logger.info("上传失败 '{}' 文件名长度{}".format(
remotePath, len(remotePath)))
Logger.error(e)
sftp_client.close()
sftp_client = ssh_client.open_sftp()
# 发送钉钉消息
webhook = "https://oapi.dingtalk.com/robot/send?access_token=c8c8d7d42c4eecd449dd303025ef968f647d1d8e8694e3fabc0ab5770d646dcb"
jsonData = {
"msgtype": "text",
"text": {
"content": "[Youtube]sftp finished"
}
}
requests.post(webhook, json=jsonData)
Logger.info("sftp发送钉钉消息成功...")

5
sftp/sftp_config.ini

@ -0,0 +1,5 @@
[sftp_config]
hostname = filetransfer.blackrock.com
port = 22
username = ftp_yunbo
password = s8v{8SJr

15
src/ChannelService.py

@ -0,0 +1,15 @@
import json
from Orm import Channel
from playhouse.shortcuts import model_to_dict, dict_to_model
class ChannelService:
def getOneByChannelId(channelId):
return Channel.get_or_none(Channel.channelId == channelId)
def updateTimeByChannelId(channelId, chageTime):
Channel.update(channelReptileTime=chageTime).where(
Channel.channelId == channelId).execute()
def getChannelList():
return Channel.select().execute()

4
src/Contant.py

@ -0,0 +1,4 @@
db=""
logDir=""
startTime=""
endTime=""

16
src/DownloadInfoService.py

@ -0,0 +1,16 @@
from Orm import DownloadInfo
class DownloadService:
def createOne(videoId, downloadType, tryTime, isFinished):
DownloadInfo.create(
videoId=videoId,
downloadType=downloadType,
tryTime=tryTime,
isFinished=isFinished
)
def updateInfoByVideoId(videoId, tryTime, isFinished):
DownloadInfo.update(tryTime=tryTime, isFinished=isFinished).where(
DownloadInfo.videoId == videoId).execute()

6
src/LoggerUtils.py

@ -0,0 +1,6 @@
from loguru import logger
import Contant
Logger = logger
def initLogger():
logger.add(Contant.logDir+"/main_{time}.log", rotation="500MB", encoding="utf-8",
enqueue=True, compression="zip", retention="10 days")

68
src/Orm.py

@ -0,0 +1,68 @@
from peewee import *
import Contant
import argparse
from LoggerUtils import Logger
parser = argparse.ArgumentParser(description='')
parser.add_argument('--db', type=str, default='')
parser.add_argument('--logDir', type=str, default='')
parser.add_argument("--start", type=str, default="")
parser.add_argument("--end", type=str, default="")
parser.add_argument("--channelId", type=str, default="")
args = parser.parse_args()
Contant.db = args.db
db = SqliteDatabase(Contant.db)
def ormInit():
Channel.create_table()
Video.create_table()
DownloadInfo.create_table()
class BaseModel(Model):
class Meta:
database = db
# 频道信息
class Channel(BaseModel):
id = PrimaryKeyField()
channelId = CharField(null=False)
channelTitle = CharField(null=False)
channelLanguage = CharField()
channelReptileTime = CharField(null=True)
class Meta:
db_table = 'Channel'
# 视频信息
class Video(BaseModel):
id = PrimaryKeyField()
videoId = CharField(null=False)
channelId = CharField(null=False)
videoTitle = CharField()
videoLen = IntegerField()
videoType = CharField()
videoPublishTime = CharField()
videoLanguage = CharField()
isDownload = IntegerField()
class Meta:
db_table = 'Vidoes'
# 下载信息
class DownloadInfo(BaseModel):
id = PrimaryKeyField()
videoId = CharField()
downloadType = IntegerField()
tryTime = IntegerField()
isFinished = IntegerField()
class Meta:
db_table = 'Download_info'

0
src/SrcTest.py

31
src/VideoService.py

@ -0,0 +1,31 @@
import json
from Orm import Video
from playhouse.shortcuts import model_to_dict, dict_to_model
class VideoService:
def getOneByVideoId(videoId):
return Video.get_or_none(Video.videoId == videoId)
def createOne(videoId, channelId, videoTitle, videoLen, videoType, videoPublishTime, videoLanguage, isDownload):
Video.create(videoId=videoId,
channelId=channelId,
videoTitle=videoTitle,
videoLen=videoLen,
videoType=videoType,
videoPublishTime=videoPublishTime,
videoLanguage=videoLanguage,
isDownload=isDownload)
def updateLenByVideoId(videoId, len):
Video.update(videoLen=len).where(Video.videoId == videoId).execute()
def getLastVideoByChannelId(channelId):
return Video.select().where(Video.channelId == channelId).order_by(Video.videoPublishTime.desc()).get()
def getFirstVideoByChannelId(channelId):
return Video.select().where(Video.channelId == channelId).order_by(Video.videoPublishTime).get()
def checkExist(channelId):
query = Video.select().where(Video.channelId == channelId)
return query.exists()

169
src/YouTubeUtils.py

@ -0,0 +1,169 @@
import httplib2
import googleapiclient.discovery
import googleapiclient.errors
from VideoService import VideoService
from ChannelService import ChannelService
from DownloadInfoService import DownloadService
from LoggerUtils import Logger
import operator
import time
class YouTubeUtil:
# apiKeys = ["AIzaSyDlRgmPXVQEjF2gbmomI5FUZX_uAOBmEGI", "AIzaSyBI5i5vFZpQErMnEXKMf0VUS2Bel8jGrTk",
# "AIzaSyAnmA0Ggy1yXsZZACfItmeZAa7wcmh6SbM", "AIzaSyC4O8tBoAfkupmBybxDah2JUxgj4ct5uk0",
# "AIzaSyDJ2S9Ijhw_hULx3nHvPUoGUpMENbZOIl8", "AIzaSyA87Ckpna3hOQ31nISs8V8rp--OLw0m6Aw",
# "AIzaSyDIWbV0EOLHkOr9tWpANose6ggd2r9vcLg", "AIzaSyBKE3lYwWFIYc9Vx4YKMbRpkOXigZlY52U"]
# AIzaSyCTBSbq0YjyxTtjmNsnDyKAwHamlv_ST-s
# AIzaSyAESnwtbTIBtU707iZowtQkmAo-qKuEOcY
# AIzaSyCsYUC5vN0pB6y9xsCj0B1ehAoqOJ3WMf0
# AIzaSyDjPkCgDQ9Tv_xcChjY2E6GpJ6IzngnD5I
# AIzaSyAxIycOdQYGB5kWhwe3B-kJAYRo7wOnp8o
apiKeys = [
"AIzaSyARaW3mqO9szQiHgWZR4el0HWvdyheSHBc",
"AIzaSyChPXesnVx6fweon_BckhR6UiJWvi5Ma4s"
# "AIzaSyCTBSbq0YjyxTtjmNsnDyKAwHamlv_ST-s",
# "AIzaSyAESnwtbTIBtU707iZowtQkmAo-qKuEOcY"
# "AIzaSyDjPkCgDQ9Tv_xcChjY2E6GpJ6IzngnD5I",
# "AIzaSyAxIycOdQYGB5kWhwe3B-kJAYRo7wOnp8o",
# "AIzaSyCsYUC5vN0pB6y9xsCj0B1ehAoqOJ3WMf0"
]
apiIndex = 0
def getYoutube():
# proxy_info = httplib2.ProxyInfo(
# proxy_type=httplib2.socks.PROXY_TYPE_HTTP, proxy_host="127.0.0.1", proxy_port=7890)
# http = httplib2.Http(timeout=10, proxy_info=proxy_info,
# disable_ssl_certificate_validation=False)
http = httplib2.Http(timeout=10, disable_ssl_certificate_validation=False)
api_service_name = "youtube"
api_version = "v3"
# 获取apiKey
apiKey = YouTubeUtil.apiKeys[YouTubeUtil.apiIndex]
Logger.info(
"当前APIKey:{},当前apiIndex:{},totalIndex:{}".format(
apiKey, YouTubeUtil.apiIndex, len(YouTubeUtil.apiKeys) - 1
)
)
# 等于7,还原成0
if YouTubeUtil.apiIndex == (len(YouTubeUtil.apiKeys) - 1):
YouTubeUtil.apiIndex = 0
else:
YouTubeUtil.apiIndex = YouTubeUtil.apiIndex + 1
# 获取对象
youtube = googleapiclient.discovery.build(
api_service_name, api_version, developerKey=apiKey, http=http
)
return youtube
def getVidoeLen(videoIds):
youtube = YouTubeUtil.getYoutube()
request = youtube.videos().list(part="contentDetails", id=videoIds)
response = request.execute()
response["items"][0]["contentDetails"]
return response
def getVideoLenByStr(str):
len = 0
str = str.split("PT")[1]
if operator.contains(str, "H"):
H = str.split("H")[0]
len = len + int(H) * 3600
str = str.split("H")[1]
if operator.contains(str, "M"):
M = str.split("M")[0]
len = len + int(M) * 60
str = str.split("M")[1]
if operator.contains(str, "S"):
S = str.split("S")[0]
len = len + int(S)
return len
def getByChannelId(channelId, startTime, endTime):
channel = ChannelService.getOneByChannelId(channelId)
if channel == None:
return
videoLanguage = str(channel.channelLanguage)
youtube = YouTubeUtil.getYoutube()
request = youtube.search().list(
part="snippet",
channelId=channelId,
maxResults=50,
order="date",
publishedAfter=startTime,
publishedBefore=endTime,
type="video",
)
response = request.execute()
while True:
videosRequest = ""
videosRequestCount = 0
idList = []
for i in response["items"]:
try:
videoId = i["id"]["videoId"]
publisTime = i["snippet"]["publishedAt"]
videoTitle = i["snippet"]["title"]
videoType = "video"
videoEntity = VideoService.getOneByVideoId(str(videoId))
if videoEntity == None:
VideoService.createOne(
videoId,
channelId,
videoTitle,
0,
videoType,
publisTime,
videoLanguage,
0,
)
DownloadService.createOne(videoId, 1, 0, 0)
videosRequest = videosRequest + "," + str(videoId)
videosRequestCount = videosRequestCount + 1
Logger.info(
"存储VideoUrl:https://www.youtube.com/watch?v=" + videoId
)
else:
Logger.info("已存在VideoId:{}".format(videoId))
idList.append(str(videoId))
if videosRequest != "" and videosRequestCount >= 10:
lenRes = YouTubeUtil.getVidoeLen(videosRequest)
for i in lenRes["items"]:
tmpId = i["id"]
videoLenStr = i["contentDetails"]["duration"]
videoLen = YouTubeUtil.getVideoLenByStr(videoLenStr)
VideoService.updateLenByVideoId(tmpId, videoLen)
Logger.info(
"更新时长,videoId:{},len:{}".format(tmpId, videoLen)
)
videosRequestCount = 0
videosRequest = ""
except:
pass
# 获取最后一个视频
vidoeo = VideoService.getLastVideoByChannelId(channelId)
ChannelService.updateTimeByChannelId(channelId, vidoeo.videoPublishTime)
time.sleep(5)
try:
# youtube = YouTubeUtil.getYoutube
request = youtube.search().list(
part="snippet",
channelId=channelId,
maxResults=50,
order="date",
publishedAfter=startTime,
publishedBefore=endTime,
type="video",
pageToken=response["nextPageToken"],
)
response = request.execute()
except Exception as e:
Logger.error(e)
print("no nextPageToken")
break

49
src/main.py

@ -0,0 +1,49 @@
import argparse
import Contant
import LoggerUtils
import Orm
from VideoService import VideoService
from YouTubeUtils import YouTubeUtil
from ChannelService import ChannelService
import requests
# py .\main.py --db=../db/youtube_prod.db --logDir=./logs --start="2023-09-10T00:00:01Z" --end="2023-09-11T00:00:01Z"
# python3 ./main.py --db="../db/youtube_prod.db" --logDir="./logs" --start="2024-03-25T00:10:01Z" --end="2024-03-26T00:10:01Z"
# python3 ./main.py --db="../db/youtube_prod.db" --logDir="./logs" --start="111" --end="222"
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="")
parser.add_argument("--db", type=str, default="")
parser.add_argument("--logDir", type=str, default="")
parser.add_argument("--start", type=str, default="")
parser.add_argument("--end", type=str, default="")
args = parser.parse_args()
Contant.db = args.db
Contant.logDir = args.logDir
Contant.startTime = args.start
Contant.endTime = args.end
LoggerUtils.initLogger()
Orm.ormInit()
LoggerUtils.Logger.info("db:{},logDir:{}".format(Contant.db, Contant.logDir))
LoggerUtils.Logger.info("starTime:{},endTime:{}".format(Contant.startTime, Contant.endTime))
# 执行查询
channelList = ChannelService.getChannelList()
LoggerUtils.Logger.info("list size:{}".format(len(channelList)))
for channel in channelList:
channelId = channel.channelId
LoggerUtils.Logger.info(
"channelId:{},startTime:{},endTime:{}".format(
channelId, Contant.startTime, Contant.endTime
)
)
YouTubeUtil.getByChannelId(channelId, Contant.startTime, Contant.endTime)
# 发送钉钉消息
webhook = "https://oapi.dingtalk.com/robot/send?access_token=c8c8d7d42c4eecd449dd303025ef968f647d1d8e8694e3fabc0ab5770d646dcb"
jsonData = {
"msgtype": "text",
"text": {
"content": "[Youtube]src finished"
}
}
requests.post(webhook, json=jsonData)
LoggerUtils.Logger.info("src发送钉钉消息成功...")

32
src/one_channel.py

@ -0,0 +1,32 @@
import argparse
import Contant
import LoggerUtils
import Orm
from VideoService import VideoService
from YouTubeUtils import YouTubeUtil
from ChannelService import ChannelService
import requests
# py .\main.py --db=../db/youtube_prod.db --logDir=./logs --start="2023-09-10T00:00:01Z" --end="2023-09-11T00:00:01Z"
# python3 ./main.py --db="../db/youtube_prod.db" --logDir="./logs" --start="2023-08-10T00:00:01Z" --end="2023-09-12T00:00:01Z"
# python3 ./main.py --db="../db/youtube_prod.db" --logDir="./logs" --start="111" --end="222"
# python3 ./one_channel.py --db="../db/youtube_prod.db" --logDir="./logs/one_channel" --start="2021-03-06T00:00:01Z" --end="2024-03-06T00:00:01Z" --channelId="UCzoF2M_RG3Qz10hP16vQOng"
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="")
parser.add_argument("--db", type=str, default="")
parser.add_argument("--logDir", type=str, default="")
parser.add_argument("--start", type=str, default="")
parser.add_argument("--end", type=str, default="")
parser.add_argument("--channelId", type=str, default="")
args = parser.parse_args()
Contant.db = args.db
Contant.logDir = args.logDir
Contant.startTime = args.start
Contant.endTime = args.end
channelId = args.channelId
LoggerUtils.initLogger()
Orm.ormInit()
LoggerUtils.Logger.info("db:{},logDir:{}".format(Contant.db, Contant.logDir))
LoggerUtils.Logger.info("channleId:{},starTime:{},endTime:{}".format(channelId, Contant.startTime, Contant.endTime))
YouTubeUtil.getByChannelId(channelId, Contant.startTime, Contant.endTime)

87
src_tmp.sh

@ -0,0 +1,87 @@
#!/bin/bash
cd /mnt/youtube_prod/src
# python3 ./one_channel.py --db="../db/youtube_prod.db" --logDir="./logs/one_channel" --start="2021-03-06T00:00:01Z" --end="2024-03-06T00:00:01Z" --channelId="UCpsfkRRT7L2nBnizBn_u9YA"
# sleep 600
# python3 ./one_channel.py --db="../db/youtube_prod.db" --logDir="./logs/one_channel" --start="2021-03-06T00:00:01Z" --end="2024-03-06T00:00:01Z" --channelId="UCRbT3P-2tmr-9l8D7jNoZMQ"
# sleep 600
# python3 ./one_channel.py --db="../db/youtube_prod.db" --logDir="./logs/one_channel" --start="2021-03-06T00:00:01Z" --end="2024-03-06T00:00:01Z" --channelId="UCPTy0BNqiv-0SdAvFgrXvXg"
# sleep 600
# python3 ./one_channel.py --db="../db/youtube_prod.db" --logDir="./logs/one_channel" --start="2021-03-06T00:00:01Z" --end="2024-03-06T00:00:01Z" --channelId="UCMlDu8Vuowmqz03kByFcUhw"
# sleep 600
# python3 ./one_channel.py --db="../db/youtube_prod.db" --logDir="./logs/one_channel" --start="2021-03-06T00:00:01Z" --end="2024-03-06T00:00:01Z" --channelId="UC5mn3VEg_9GY52G6eumKJRg"
# sleep 600
# python3 ./one_channel.py --db="../db/youtube_prod.db" --logDir="./logs/one_channel" --start="2021-03-06T00:00:01Z" --end="2024-03-06T00:00:01Z" --channelId="UClhhyZ0xyeOAEVdcr0N9KDA"
# sleep 600
# python3 ./one_channel.py --db="../db/youtube_prod.db" --logDir="./logs/one_channel" --start="2021-03-06T00:00:01Z" --end="2024-03-06T00:00:01Z" --channelId="UCBM86JVoHLqg9irpR2XKvGw"
# sleep 600
# python3 ./one_channel.py --db="../db/youtube_prod.db" --logDir="./logs/one_channel" --start="2021-03-06T00:00:01Z" --end="2024-03-06T00:00:01Z" --channelId="UCzp9CmDIFVNtzhyOjptIi4g"
# sleep 600
# python3 ./one_channel.py --db="../db/youtube_prod.db" --logDir="./logs/one_channel" --start="2021-03-06T00:00:01Z" --end="2024-03-06T00:00:01Z" --channelId="UCv-spDeZBGYVUI9eGXGaLSg"
# sleep 600
# python3 ./one_channel.py --db="../db/youtube_prod.db" --logDir="./logs/one_channel" --start="2021-03-06T00:00:01Z" --end="2024-03-06T00:00:01Z" --channelId="UCF08I8KEKTsBo22RIXFwTAA"
# sleep 600
# python3 ./one_channel.py --db="../db/youtube_prod.db" --logDir="./logs/one_channel" --start="2021-03-06T00:00:01Z" --end="2024-03-06T00:00:01Z" --channelId="UC5Mjj4LKlMtP_PXlIVYGxIQ"
# sleep 600
# python3 ./one_channel.py --db="../db/youtube_prod.db" --logDir="./logs/one_channel" --start="2021-03-06T00:00:01Z" --end="2024-03-06T00:00:01Z" --channelId="UCvil4OAt-zShzkKHsg9EQAw"
# sleep 600
# python3 ./one_channel.py --db="../db/youtube_prod.db" --logDir="./logs/one_channel" --start="2021-03-06T00:00:01Z" --end="2024-03-06T00:00:01Z" --channelId="UCI6C5V4J8FWRcLcOdh1yElw"
# sleep 600
# python3 ./one_channel.py --db="../db/youtube_prod.db" --logDir="./logs/one_channel" --start="2021-03-06T00:00:01Z" --end="2024-03-06T00:00:01Z" --channelId="UCOio3vyYLWiKlHSYRKW-9UA"
# sleep 600
# python3 ./one_channel.py --db="../db/youtube_prod.db" --logDir="./logs/one_channel" --start="2021-03-06T00:00:01Z" --end="2024-03-06T00:00:01Z" --channelId="UCaWi2foADm_lKAKnmeQwLSA"
# sleep 600
# python3 ./one_channel.py --db="../db/youtube_prod.db" --logDir="./logs/one_channel" --start="2021-03-06T00:00:01Z" --end="2024-03-06T00:00:01Z" --channelId="UCUFUOdQwKTWda7kKqxQwMxw"
# sleep 600
# python3 ./one_channel.py --db="../db/youtube_prod.db" --logDir="./logs/one_channel" --start="2021-03-06T00:00:01Z" --end="2024-03-06T00:00:01Z" --channelId="UCoZdXdFowKP0heWRkQ9RABQ"
# sleep 600
# python3 ./one_channel.py --db="../db/youtube_prod.db" --logDir="./logs/one_channel" --start="2021-03-06T00:00:01Z" --end="2024-03-06T00:00:01Z" --channelId="UCnfwIKyFYRuqZzzKBDt6JOA"
# sleep 600
# python3 ./one_channel.py --db="../db/youtube_prod.db" --logDir="./logs/one_channel" --start="2021-03-06T00:00:01Z" --end="2024-03-06T00:00:01Z" --channelId="UCnZJqzwt6LuRymM0jbqiD9A"
# sleep 600
# python3 ./one_channel.py --db="../db/youtube_prod.db" --logDir="./logs/one_channel" --start="2021-03-06T00:00:01Z" --end="2024-03-06T00:00:01Z" --channelId="UCHpGooMnVgnILywqrpqvZcQ"
# sleep 600
# python3 ./one_channel.py --db="../db/youtube_prod.db" --logDir="./logs/one_channel" --start="2021-03-06T00:00:01Z" --end="2024-03-06T00:00:01Z" --channelId="UCQIyAcoLsO3L0RMFQk7YMYA"
# sleep 600
# python3 ./one_channel.py --db="../db/youtube_prod.db" --logDir="./logs/one_channel" --start="2021-03-06T00:00:01Z" --end="2024-03-06T00:00:01Z" --channelId="UCYdHxiRAUUJhuE1DZsnWqXg"
# sleep 600
# python3 ./one_channel.py --db="../db/youtube_prod.db" --logDir="./logs/one_channel" --start="2021-03-06T00:00:01Z" --end="2024-03-06T00:00:01Z" --channelId="UCbOIEn95Rvnk97KRtSFqvbQ"
# sleep 600
# python3 ./one_channel.py --db="../db/youtube_prod.db" --logDir="./logs/one_channel" --start="2021-03-06T00:00:01Z" --end="2024-03-06T00:00:01Z" --channelId="UCXWOlSe2GHTev8QZhY_gMPg"
# sleep 600
# python3 ./one_channel.py --db="../db/youtube_prod.db" --logDir="./logs/one_channel" --start="2021-03-06T00:00:01Z" --end="2024-03-06T00:00:01Z" --channelId="UCJo6G1u0e_-wS-JQn3T-zEw"
# sleep 600
# python3 ./one_channel.py --db="../db/youtube_prod.db" --logDir="./logs/one_channel" --start="2021-03-06T00:00:01Z" --end="2024-03-06T00:00:01Z" --channelId="UCfq4V1DAuaojnr2ryvWNysw"
# sleep 600
# python3 ./one_channel.py --db="../db/youtube_prod.db" --logDir="./logs/one_channel" --start="2021-03-06T00:00:01Z" --end="2024-03-06T00:00:01Z" --channelId="UCFznPlqnBtRKQhtkm6GGoRQ"
# sleep 600
# python3 ./one_channel.py --db="../db/youtube_prod.db" --logDir="./logs/one_channel" --start="2021-03-06T00:00:01Z" --end="2024-03-06T00:00:01Z" --channelId="UC5CyCSvCdoEP-VgQmFq3iww"
# sleep 600
# python3 ./one_channel.py --db="../db/youtube_prod.db" --logDir="./logs/one_channel" --start="2021-03-06T00:00:01Z" --end="2024-03-06T00:00:01Z" --channelId="UC6mp159KMtzjhP65DmldR0A"
# sleep 600
# python3 ./one_channel.py --db="../db/youtube_prod.db" --logDir="./logs/one_channel" --start="2021-03-06T00:00:01Z" --end="2024-03-06T00:00:01Z" --channelId="UC7YLvjJf3lDJUQ-TsbWyBjg"
# sleep 600
# python3 ./one_channel.py --db="../db/youtube_prod.db" --logDir="./logs/one_channel" --start="2021-03-06T00:00:01Z" --end="2024-03-06T00:00:01Z" --channelId="UC6ij59Gy_HnqO4pFu9A_zgQ"
# sleep 600
# python3 ./one_channel.py --db="../db/youtube_prod.db" --logDir="./logs/one_channel" --start="2021-03-06T00:00:01Z" --end="2024-03-06T00:00:01Z" --channelId="UCpyjRAERLqcD_wI3qQnIY3A"
# sleep 600
# python3 ./one_channel.py --db="../db/youtube_prod.db" --logDir="./logs/one_channel" --start="2021-03-06T00:00:01Z" --end="2024-03-06T00:00:01Z" --channelId="UCSU_iBWoCnXe1VnAbQhO3Ug"
# sleep 600
# python3 ./one_channel.py --db="../db/youtube_prod.db" --logDir="./logs/one_channel" --start="2021-03-06T00:00:01Z" --end="2024-03-06T00:00:01Z" --channelId="UC6ZkHcW5QQubZ-Q6XYINE3Q"
# sleep 600
# python3 ./one_channel.py --db="../db/youtube_prod.db" --logDir="./logs/one_channel" --start="2021-03-06T00:00:01Z" --end="2024-03-06T00:00:01Z" --channelId="UCDpRrAXMYlxFz3a5-z8pE7w"
# sleep 600
python3 ./one_channel.py --db="../db/youtube_prod.db" --logDir="./logs/one_channel" --start="2021-03-06T00:00:01Z" --end="2024-03-06T00:00:01Z" --channelId="UCMec1m9iUC3agiEK-nsndSg"
sleep 600
python3 ./one_channel.py --db="../db/youtube_prod.db" --logDir="./logs/one_channel" --start="2021-03-06T00:00:01Z" --end="2024-03-06T00:00:01Z" --channelId="UCOmXyHRWpDFPYgs2VpoQEIw"
sleep 600
python3 ./one_channel.py --db="../db/youtube_prod.db" --logDir="./logs/one_channel" --start="2021-03-06T00:00:01Z" --end="2024-03-06T00:00:01Z" --channelId="UCPgT-N-DQ0K0H88skjaDgkA"
sleep 600
python3 ./one_channel.py --db="../db/youtube_prod.db" --logDir="./logs/one_channel" --start="2021-03-06T00:00:01Z" --end="2024-03-06T00:00:01Z" --channelId="UC40nk9kM2Ue8XQ9LsHQlKPA"
sleep 600
python3 ./one_channel.py --db="../db/youtube_prod.db" --logDir="./logs/one_channel" --start="2021-03-06T00:00:01Z" --end="2024-03-06T00:00:01Z" --channelId="UCaiV1-PUXDu2Nmx8iOZkofQ"
sleep 600
python3 ./one_channel.py --db="../db/youtube_prod.db" --logDir="./logs/one_channel" --start="2021-03-06T00:00:01Z" --end="2024-03-06T00:00:01Z" --channelId="UCDDneQi63kJAdr3i5VCPzHg"
sleep 600

10
start_download.sh

@ -0,0 +1,10 @@
#!/bin/bash
function log() {
local time_now=`date '+%Y-%m-%d %H:%M:%S'`
echo "$time_now [download] [info] $1" >> /mnt/youtube_prod/running.log
}
cd /mnt/youtube_prod/download
# /mnt/youtube_prod/start_download.sh
log "开始执行download..."
nohup python3 ./main_download.py --db="../db/youtube_prod.db" --logDir="./logs" >/dev/null 2>/mnt/youtube_prod/err.log &

11
start_sftp.sh

@ -0,0 +1,11 @@
#!/bin/bash
function log() {
local time_now=`date '+%Y-%m-%d %H:%M:%S'`
echo "$time_now [download] [info] $1" >> /mnt/youtube_prod/running.log
}
cd /mnt/youtube_prod/sftp
# /mnt/youtube_prod/start_download.sh
log "开始执行sftp..."
python3 ./sftp.py --local="/mnt/tmp_srt_file" --logDir="./logs"
rm -rf /mnt/tmp_srt_file

12
start_src.sh

@ -0,0 +1,12 @@
#!/bin/bash
function log() {
local time_now=`date '+%Y-%m-%d %H:%M:%S'`
echo "$time_now [src] [info] $1" >> /mnt/youtube_prod/running.log
}
cd /mnt/youtube_prod/src
start=`date '+%Y-%m-%dT%H:%M:%SZ' -d'-1 day'`
end=`date '+%Y-%m-%dT%H:%M:%SZ'`
log "开始执行src...startTime:"$start",endTime:"$end
# /mnt/youtube_prod/start_src.sh
nohup python3 ./main.py --db="../db/youtube_prod.db" --logDir="./logs" --start=$start --end=$end >/dev/null 2>/mnt/youtube_prod/err.log &

4
stop_download.sh

@ -0,0 +1,4 @@
#!/bin/bash
pid=`ps -ef | grep main_download | awk NR==1'{print $2}'`
echo $pid
kill -9 $pid

2
test.sh

@ -0,0 +1,2 @@
#!/bin/bash
echo "test"

9
test/test.py

@ -0,0 +1,9 @@
import requests
webhook = "https://oapi.dingtalk.com/robot/send?access_token=c8c8d7d42c4eecd449dd303025ef968f647d1d8e8694e3fabc0ab5770d646dcb"
jsonData = {
"msgtype": "text",
"text": {
"content": "[Youtube]aaaa"
}
}
requests.post(webhook, json=jsonData)

8
test/test2.py

@ -0,0 +1,8 @@
from youtube_transcript_api import YouTubeTranscriptApi
#zh-Hant
url = "https://www.youtube.com/watch?v=YbVger_nh-s"
list = YouTubeTranscriptApi.list_transcripts("_i5CoY_LMYs")
# videoSrt = YouTubeTranscriptApi.get_transcript(
# "gXeNXJrD-gw", languages=['zh-TW'])
print(list)
# print(videoSrt)

15
view_count/ChannelService.py

@ -0,0 +1,15 @@
import json
from Orm import Channel
from playhouse.shortcuts import model_to_dict, dict_to_model
class ChannelService:
def getOneByChannelId(channelId):
return Channel.get_or_none(Channel.channelId == channelId)
def updateTimeByChannelId(channelId, chageTime):
Channel.update(channelReptileTime=chageTime).where(
Channel.channelId == channelId).execute()
def getChannelList():
return Channel.select().execute()

10
view_count/Contant.py

@ -0,0 +1,10 @@
db=""
logDir=""
startTime=""
endTime=""
apiIndex = 0
apiKeys = [
"AIzaSyDjPkCgDQ9Tv_xcChjY2E6GpJ6IzngnD5I",
"AIzaSyAxIycOdQYGB5kWhwe3B-kJAYRo7wOnp8o",
"AIzaSyCsYUC5vN0pB6y9xsCj0B1ehAoqOJ3WMf0"
]

6
view_count/LoggerUtils.py

@ -0,0 +1,6 @@
from loguru import logger
import Contant
Logger = logger
def initLogger():
logger.add(Contant.logDir+"/main_{time}.log", rotation="500MB", encoding="utf-8",
enqueue=True, compression="zip", retention="10 days")

75
view_count/Orm.py

@ -0,0 +1,75 @@
from peewee import *
import Contant
import argparse
from LoggerUtils import Logger
parser = argparse.ArgumentParser(description='')
parser.add_argument('--db', type=str, default='')
parser.add_argument('--logDir', type=str, default='')
args = parser.parse_args()
Contant.db = args.db
db = SqliteDatabase(Contant.db)
def ormInit():
Channel.create_table()
Video.create_table()
DownloadInfo.create_table()
ViewCountInfo.create_table()
class BaseModel(Model):
class Meta:
database = db
# 频道信息
class Channel(BaseModel):
id = PrimaryKeyField()
channelId = CharField(null=False)
channelTitle = CharField(null=False)
channelLanguage = CharField()
channelReptileTime = CharField(null=True)
class Meta:
db_table = 'Channel'
# 视频信息
class Video(BaseModel):
id = PrimaryKeyField()
videoId = CharField(null=False)
channelId = CharField(null=False)
videoTitle = CharField()
videoLen = IntegerField()
videoType = CharField()
videoPublishTime = CharField()
videoLanguage = CharField()
isDownload = IntegerField()
class Meta:
db_table = 'Vidoes'
# 下载信息
class DownloadInfo(BaseModel):
id = PrimaryKeyField()
videoId = CharField()
downloadType = IntegerField()
tryTime = IntegerField()
isFinished = IntegerField()
class Meta:
db_table = 'Download_info'
# 播放量信息
class ViewCountInfo(BaseModel):
id = PrimaryKeyField()
videoId = CharField()
viewCount = CharField()
class Meta:
db_table = 'ViewCount_info'

33
view_count/VideoCountService.py

@ -0,0 +1,33 @@
import json
from Orm import ViewCountInfo
from playhouse.shortcuts import model_to_dict, dict_to_model
class ViewCountService:
def createOrUpdateOne(videoId, day,count):
query = ViewCountInfo.select().where(ViewCountInfo.videoId == videoId)
if not query:
countStr = "0"
for i in range(0,30):
if i != 29:
countStr = countStr + "," + "0"
list = countStr.split(",")
list[day-1] = count
countStr = ""
for i in range(0,30):
if i != 29:
countStr = countStr + str(list[i]) + ","
else:
countStr = countStr + str(list[i])
ViewCountInfo.create(videoId=videoId, viewCount=countStr)
else:
viewCountInfo = ViewCountInfo.select().where(ViewCountInfo.videoId == videoId).get()
list = viewCountInfo.viewCount.split(",")
list[day-1] = count
countStr = ""
for i in range(0,30):
if i != 29:
countStr = countStr + str(list[i]) + ","
else:
countStr = countStr + str(list[i])
ViewCountInfo.update(viewCount=countStr).where(ViewCountInfo.videoId == videoId).execute()

34
view_count/VideoService.py

@ -0,0 +1,34 @@
import json
from Orm import Video
from playhouse.shortcuts import model_to_dict, dict_to_model
class VideoService:
def getOneByVideoId(videoId):
return Video.get_or_none(Video.videoId == videoId)
def createOne(videoId, channelId, videoTitle, videoLen, videoType, videoPublishTime, videoLanguage, isDownload):
Video.create(videoId=videoId,
channelId=channelId,
videoTitle=videoTitle,
videoLen=videoLen,
videoType=videoType,
videoPublishTime=videoPublishTime,
videoLanguage=videoLanguage,
isDownload=isDownload)
def updateLenByVideoId(videoId, len):
Video.update(videoLen=len).where(Video.videoId == videoId).execute()
def getLastVideoByChannelId(channelId):
return Video.select().where(Video.channelId == channelId).order_by(Video.videoPublishTime.desc()).get()
def getFirstVideoByChannelId(channelId):
return Video.select().where(Video.channelId == channelId).order_by(Video.videoPublishTime).get()
def checkExist(channelId):
query = Video.select().where(Video.channelId == channelId)
return query.exists()
def getVideosByTime(startTime,endTime):
return Video.select().where(Video.videoPublishTime >= startTime,Video.videoPublishTime <= endTime).execute()

99
view_count/view_count_main.py

@ -0,0 +1,99 @@
import argparse
import random
import time
import Contant
from LoggerUtils import Logger, initLogger
import Orm
from VideoService import VideoService
from ChannelService import ChannelService
from VideoCountService import ViewCountService
from func_timeout import func_set_timeout
import func_timeout
import requests
import httplib2
import googleapiclient.discovery
import googleapiclient.errors
import datetime
apiIndex = 0
apiKeys = [
"AIzaSyDjPkCgDQ9Tv_xcChjY2E6GpJ6IzngnD5I",
"AIzaSyAxIycOdQYGB5kWhwe3B-kJAYRo7wOnp8o",
"AIzaSyCsYUC5vN0pB6y9xsCj0B1ehAoqOJ3WMf0"
]
def getYoutube():
proxy_info = httplib2.ProxyInfo(
proxy_type=httplib2.socks.PROXY_TYPE_HTTP, proxy_host="127.0.0.1", proxy_port=7890)
# http = httplib2.Http(timeout=10, proxy_info=proxy_info,
# disable_ssl_certificate_validation=False)
http = httplib2.Http(timeout=10, disable_ssl_certificate_validation=False)
# http = httplib2.Http(timeout=10, disable_ssl_certificate_validation=False)
api_service_name = "youtube"
api_version = "v3"
# 获取apiKey
apiKey = "AIzaSyARaW3mqO9szQiHgWZR4el0HWvdyheSHBc"
# 获取对象
youtube = googleapiclient.discovery.build(
api_service_name, api_version, developerKey=Contant.apiKeys[Contant.apiIndex], http=http
)
return youtube
def updateVideoViewCount(day, startTime, endTime):
list = VideoService.getVideosByTime(startTime, endTime)
Logger.info(len(list))
videoCount = 0
totalCount = 0
videosRequest = ""
youtube = getYoutube()
for video in list:
videoCount = videoCount + 1
totalCount = totalCount + 1
Logger.info(video.videoId)
videosRequest = videosRequest + "," + video.videoId
if videoCount == 50 or videoCount == len(list) or totalCount == len(list):
request = youtube.videos().list(part="statistics", id=videosRequest)
if Contant.apiIndex < (len(Contant.apiKeys) - 1):
Contant.apiIndex = Contant.apiIndex + 1
else:
Contant.apiIndex = 0
response = request.execute()
for item in response['items']:
try:
Logger.info(item)
ViewCountService.createOrUpdateOne(
item['id'], day, item['statistics']['viewCount'])
except Exception as e:
Logger.error("存储失败{}".format(item))
videosRequest = ""
videoCount = 0
# python ./view_count_main.py --db="../db/youtube_prod.db" --logDir="./logs" --start="2024-01-03T00:00:00Z" --end="2024-01-04T00:00:00Z"
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='')
parser.add_argument('--db', type=str, default='')
parser.add_argument('--logDir', type=str, default='')
args = parser.parse_args()
Contant.db = args.db
Contant.logDir = args.logDir
initLogger()
Orm.ormInit()
# 查询30天内的所有视屏
now = datetime.datetime.now()
zero_today = now.replace(hour=0, minute=0, second=0, microsecond=0)
end_today = now.replace(hour=23, minute=59, second=59, microsecond=0)
for i in range(1, 31):
startTime = zero_today+datetime.timedelta(days=-i)
endTime = end_today+datetime.timedelta(days=-i)
startTime = startTime.strftime("%Y-%m-%dT%H:%S:%MZ")
endTime = endTime.strftime("%Y-%m-%dT%H:%S:%MZ")
Logger.info("day:%d, startTime:%s, endTime:%s" %
(i, startTime, endTime))
updateVideoViewCount(i, startTime, endTime)
# zero_today = zero_today.strftime("%y-%m-%dT%H:%S:%MZ")
# print(zero_today)
Loading…
Cancel
Save