You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

206 lines
9.3 KiB

12 months ago
from shutil import copyfile
from youtube_transcript_api import YouTubeTranscriptApi
from youtube_transcript_api.formatters import SRTFormatter
from VideoService import VideoService
from ChannelService import ChannelService
from DownloadInfoService import DownloadService
from SrtFileService import SrtService
12 months ago
from LoggerUtils import Logger
import time
import os
from func_timeout import func_set_timeout
import operator
import pysrt
12 months ago
class DownLoadUtil:
formatter = SRTFormatter()
proxies = {"http": "http://127.0.0.1:7890",
"https": "https://127.0.0.1:7890"}
def iterateSrt(srtFilePath, videoId, channelId):
# 查询是否存在
if SrtService.checkByVideoId(videoId):
Logger.info("VideoId: {} 已收录", videoId)
return
subs = pysrt.open(srtFilePath)
ordinal = 1
for sub in subs:
srtStartTime = str(sub.start.to_time()).rstrip("0")
srtEndTime = str(sub.end.to_time()).rstrip("0")
SrtService.createOne(videoId, channelId, ordinal,
srtStartTime, srtEndTime, sub.text)
ordinal = ordinal + 1
12 months ago
@func_set_timeout(60)
def downloadOne(videoId):
# 获取数据
video = VideoService.getOneByVideoId(videoId)
channel = ChannelService.getOneByChannelId(str(video.channelId))
# 格式化title
videoTitle = str(video.videoTitle)
videoTitle = str(videoTitle).replace("/", u"\u2215")
videoTitle = str(videoTitle).replace("?", "")
videoTitle = str(videoTitle).replace("\\", "")
videoTitle = str(videoTitle).replace("|", "")
videoTitle = str(videoTitle).replace("<", "")
videoTitle = str(videoTitle).replace(">", "")
videoTitle = str(videoTitle).replace(":", "")
# 获取发布时间
videoPublishTime = str(video.videoPublishTime)
videoPublishTime = str(videoPublishTime).split("T")[0]
# 开始下载
Logger.info("开始下载...{}".format(videoId))
try:
mainPath = "E:/Work/youtubedownload/main"
tmpPath = "E:/Work/youtubedownload/tmp"
12 months ago
# 获取字幕
languages = str(video.videoLanguage)
storePath = "/mnt/srt_file"
cpPath = "/mnt/tmp_srt_file"
# TODO 测试代码删除
storePath = mainPath
cpPath = tmpPath
storePath = "{}/{}/{}-{}".format(
storePath, languages, channel.channelId, channel.channelTitle)
cpPath = "{}/{}/{}-{}".format(
cpPath, languages, channel.channelId, channel.channelTitle)
1
12 months ago
if not os.path.exists(storePath):
Logger.info("开始创建文件夹:" + storePath)
os.makedirs(storePath)
if not os.path.exists(cpPath):
Logger.info("开始创建文件夹:" + cpPath)
os.makedirs(cpPath)
storePath = "{}/{}.srt".format(storePath, videoId)
cpPath = "{}/{}.srt".format(cpPath, videoId)
12 months ago
if len(cpPath) > 120:
storePath = storePath[:-20] + ".srt"
cpPath = cpPath[:-20] + ".srt"
videoSrt = YouTubeTranscriptApi.get_transcript(
videoId, languages=[languages])
srt_formatted = DownLoadUtil.formatter.format_transcript(videoSrt)
Logger.info("文件地址...{}".format(storePath))
with open(storePath, 'w', encoding='utf-8') as srt_file:
srt_file.write(srt_formatted)
Logger.info("下载完成...{}".format(videoId))
copyfile(storePath, cpPath)
# 修改video数据
VideoService.updateIsDownloadByVideoId(videoId, 1)
# 修改downloadInfo
downloadInfo = DownloadService.getOneByVideoId(videoId, 1)
if downloadInfo is not None:
DownloadService.updateInfoByVideoId(
videoId, downloadInfo.tryTime + 1, 1, 1)
# 按句读取字幕文件
DownLoadUtil.iterateSrt(storePath, videoId, video.channelId)
12 months ago
except Exception as e:
Logger.error("下载失败...{}".format(videoId))
logStr = "Exception...{}".format(e)
Logger.error(logStr)
downloadInfo = DownloadService.getOneByVideoId(videoId, 1)
if operator.contains(logStr, "No transcripts"):
Logger.error("VideoId:{},不存在字幕文件".format(videoId))
if downloadInfo is not None:
DownloadService.changeDownloadType(
videoId, 0, 0, 1, 2)
elif operator.contains(logStr, "File name too long"):
# 文件名过长
languages = str(video.videoLanguage)
videoSrt = YouTubeTranscriptApi.get_transcript(
videoId, languages=[languages])
srt_formatted = DownLoadUtil.formatter.format_transcript(
videoSrt)
12 months ago
storePath = "/mnt/srt_file/" + str(channel.channelTitle) + "/" + \
videoPublishTime + "-" + languages + "-" + videoId + ".srt"
cpPath = "/mnt/tmp_srt_file/" + str(channel.channelTitle) + "/" + \
videoPublishTime + "-" + languages + "-" + videoId + ".srt"
if len(cpPath) > 120:
storePath = storePath[:-20] + ".srt"
cpPath = cpPath[:-20] + ".srt"
Logger.info("文件名过长,文件地址...{}".format(storePath))
with open(storePath, 'w', encoding='utf-8') as srt_file:
srt_file.write(srt_formatted)
Logger.info("下载完成...{}".format(videoId))
copyfile(storePath, cpPath)
# 修改video数据
VideoService.updateIsDownloadByVideoId(videoId, 1)
# 修改downloadInfo
downloadInfo = DownloadService.getOneByVideoId(videoId, 1)
if downloadInfo is not None:
DownloadService.updateInfoByVideoId(
videoId, downloadInfo.tryTime + 1, 1, 1)
else:
if downloadInfo is not None:
Logger.info("VideoId:{}开始重试第{}".format(
videoId, downloadInfo.tryTime + 1))
DownloadService.updateInfoByVideoId(
videoId, downloadInfo.tryTime + 1, 0, 1)
@func_set_timeout(60)
def downloadTwo(videoId):
# 获取数据
video = VideoService.getOneByVideoId(videoId, 2)
channel = ChannelService.getOneByChannelId(str(video.channelId))
# 格式化title
videoTitle = str(video.videoTitle)
videoTitle = str(videoTitle).replace("/", u"\u2215")
videoTitle = str(videoTitle).replace("?", "")
videoTitle = str(videoTitle).replace("\\", "")
videoTitle = str(videoTitle).replace("|", "")
videoTitle = str(videoTitle).replace("<", "")
videoTitle = str(videoTitle).replace(">", "")
videoTitle = str(videoTitle).replace(":", "")
# 获取发布时间
videoPublishTime = str(video.videoPublishTime)
videoPublishTime = str(videoPublishTime).split("T")[0]
# 开始下载
Logger.info("开始下载...{}".format(videoId))
try:
# 获取字幕
languages = str(video.videoLanguage)
storePath = "./download/" + str(channel.channelTitle)
if not os.path.exists(storePath):
Logger.info("开始创建文件夹:" + storePath)
os.makedirs(storePath)
storePath = storePath + "\\" + videoPublishTime + \
"-" + languages + "-" + videoTitle + ".srt"
videoSrt = YouTubeTranscriptApi.get_transcript(
videoId, languages=[languages])
srt_formatted = DownLoadUtil.formatter.format_transcript(videoSrt)
Logger.info("文件地址...{}".format(storePath))
with open(storePath, 'w', encoding='utf-8') as srt_file:
srt_file.write(srt_formatted)
Logger.info("下载完成...{}".format(videoId))
# 修改video数据
VideoService.updateIsDownloadByVideoId(videoId, 1)
# 修改downloadInfo
downloadInfo = DownloadService.getOneByVideoId(videoId, 2)
if downloadInfo is not None:
DownloadService.updateInfoByVideoId(
videoId, downloadInfo.tryTime + 1, 1, 2)
except Exception as e:
Logger.error("下载失败...{}".format(videoId))
logStr = "Exception...{}".format(e)
Logger.error(logStr)
downloadInfo = DownloadService.getOneByVideoId(videoId, 2)
if operator.contains(logStr, "No transcripts"):
Logger.error("VideoId:{},不存在字幕文件".format(videoId))
if downloadInfo is not None:
DownloadService.changeDownloadType(
videoId, 6, 0, 2, 3)
else:
if downloadInfo is not None:
Logger.info("VideoId:{}开始重试第{}".format(
videoId, downloadInfo.tryTime + 1))
DownloadService.updateInfoByVideoId(
videoId, downloadInfo.tryTime + 1, 0, 2)