From 09a22929681cf537c58582778031f768248b2d2a Mon Sep 17 00:00:00 2001 From: appolli Date: Sat, 24 Aug 2024 12:42:17 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BF=AE=E5=A4=8Dsearch=E5=92=8Cdownload?= =?UTF-8?q?=E7=9B=B8=E5=85=B3bug?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- common/DownloadUtils.py | 20 +++++++++++++------- common/YoutubeUtils.py | 24 ++++++++++++------------ search_video.py | 1 + service/SrtFileService.py | 6 ++++++ service/VideoService.py | 4 ++-- test.py | 27 ++++++++++++++++++--------- 6 files changed, 52 insertions(+), 30 deletions(-) diff --git a/common/DownloadUtils.py b/common/DownloadUtils.py index 8786e40..cf4d6f1 100644 --- a/common/DownloadUtils.py +++ b/common/DownloadUtils.py @@ -30,17 +30,24 @@ class DownloadUtil: Logger.info("VideoId: {} 已收录", videoId) return subs = pysrt.open(srtFilePath) + srtFiles = [] ordinal = 1 for sub in subs: srtStartTime = str(sub.start.to_time()).rstrip("0") + if ordinal == 1: + srtStartTime = sub.start.to_time() srtEndTime = str(sub.end.to_time()).rstrip("0") - SrtFileService.insertOne(videoId=videoId, channelId=channelId, ordinal=ordinal, - srtStartTime=srtStartTime, srtEndTime=srtEndTime, srtText=sub.text, isScan=0) + srtFile: Srtfile = Srtfile(videoId=videoId, channelId=channelId, ordinal=ordinal, + srtStartTime=srtStartTime, srtEndTime=srtEndTime, srtText=sub.text, isScan=0) ordinal = ordinal + 1 + srtFiles.append(srtFile) + # 批量插入字幕数据 + SrtFileService.insertList(srtFiles=srtFiles) + Logger.info( + f"读取srt文件成功 videoId:{videoId} channelId:{channelId} srtFilePath:{srtFilePath}") def downLoadMP3(videoId, storePath): - video:Video = VideoService.getOneByVideoId(videoId) - channel:Channel = ChannelService.queryOneByChannelId(video.channelId) + video: Video = VideoService.queryOneByVideoId(videoId) videoUrl = "https://www.youtube.com/watch?v={}".format(videoId) yt = YouTube(videoUrl, on_progress_callback=on_progress) ys = yt.streams.get_audio_only() @@ -48,7 +55,7 @@ class DownloadUtil: if not os.path.exists(mp3OutPutPath): Logger.info("开始创建文件夹:" + mp3OutPutPath) os.makedirs(mp3OutPutPath) - fileName = "{}.mp3".format(videoId) + fileName = "{}".format(videoId) ys.download(output_path=mp3OutPutPath, filename=fileName, mp3=True) @func_set_timeout(60) @@ -98,8 +105,7 @@ class DownloadUtil: videoId=videoId) if downloadInfo is not None: DownloadInfoService.updateIsFinishByVideoId(videoId, 1, 1) - DownloadUtil.iterateSrt(storePath, videoId, video.channelId) - pass + DownloadUtil.iterateSrt(storePathSrt, videoId, video.channelId) except Exception as e: Logger.error(e) logStr = "Exception...{}".format(e) diff --git a/common/YoutubeUtils.py b/common/YoutubeUtils.py index 7ad8c66..5a99aa6 100644 --- a/common/YoutubeUtils.py +++ b/common/YoutubeUtils.py @@ -22,10 +22,10 @@ class YouTubeUtil: # AIzaSyDjPkCgDQ9Tv_xcChjY2E6GpJ6IzngnD5I # AIzaSyAxIycOdQYGB5kWhwe3B-kJAYRo7wOnp8o apiKeys = [ - "AIzaSyARaW3mqO9szQiHgWZR4el0HWvdyheSHBc", - "AIzaSyChPXesnVx6fweon_BckhR6UiJWvi5Ma4s" + # "AIzaSyARaW3mqO9szQiHgWZR4el0HWvdyheSHBc", + # "AIzaSyChPXesnVx6fweon_BckhR6UiJWvi5Ma4s" - # "AIzaSyCTBSbq0YjyxTtjmNsnDyKAwHamlv_ST-s", + "AIzaSyCTBSbq0YjyxTtjmNsnDyKAwHamlv_ST-s" # "AIzaSyAESnwtbTIBtU707iZowtQkmAo-qKuEOcY" @@ -38,12 +38,12 @@ class YouTubeUtil: # 获取youtube对象 def getYoutube(): # 本地测试使用代码 - # proxy_info = httplib2.ProxyInfo( - # proxy_type=httplib2.socks.PROXY_TYPE_HTTP, proxy_host="127.0.0.1", proxy_port=7890) - # http = httplib2.Http(timeout=10, proxy_info=proxy_info, - # disable_ssl_certificate_validation=False) - http = httplib2.Http( - timeout=10, disable_ssl_certificate_validation=False) + proxy_info = httplib2.ProxyInfo( + proxy_type=httplib2.socks.PROXY_TYPE_HTTP, proxy_host="127.0.0.1", proxy_port=7890) + http = httplib2.Http(timeout=10, proxy_info=proxy_info, + disable_ssl_certificate_validation=False) + # http = httplib2.Http( + # timeout=10, disable_ssl_certificate_validation=False) api_service_name = "youtube" api_version = "v3" # 获取apiKey @@ -121,7 +121,7 @@ class YouTubeUtil: video: Video = VideoService.queryOneByVideoId(videoId) if video == None: VideoService.insertOne( - videoId=videoId, ChannelId=channelId, videoTitle=videoTitle, videoLen=0, + videoId=videoId, channelId=channelId, videoTitle=videoTitle, videoLen=0, videoType=videoType, videoPublishTime=publisTime, videoLanguage=videoLanguage, isDownload=0) videosRequest = videosRequest + "," + str(videoId) videosRequestCount = videosRequestCount + 1 @@ -146,8 +146,8 @@ class YouTubeUtil: ) videosRequestCount = 0 videosRequest = "" - except: - pass + except Exception as e: + Logger.error(e) # 获取最后一个视频 video: Video = VideoService.getLastVideoByChannelId(channelId) ChannelService.updateTimeByChannelId( diff --git a/search_video.py b/search_video.py index 7446664..ba2554a 100644 --- a/search_video.py +++ b/search_video.py @@ -10,6 +10,7 @@ from common.YoutubeUtils import YouTubeUtil import operator import argparse +# --start="2023-09-10T00:00:01Z" --end="2023-09-11T00:00:01Z" if __name__ == "__main__": # 读取参数 parser = argparse.ArgumentParser(description="") diff --git a/service/SrtFileService.py b/service/SrtFileService.py index dceb3ed..1234e0d 100644 --- a/service/SrtFileService.py +++ b/service/SrtFileService.py @@ -21,3 +21,9 @@ class SrtFileService: session.add(srtFile) session.commit() session.close() + + def insertList(srtFiles): + session = getSession() + session.bulk_save_objects(srtFiles) + session.commit() + session.close() diff --git a/service/VideoService.py b/service/VideoService.py index 5019a2f..daad155 100644 --- a/service/VideoService.py +++ b/service/VideoService.py @@ -19,9 +19,9 @@ class VideoService: session.close() return videos - def insertOne(videoId, ChannelId, videoTitle, videoLen, videoType, videoPublishTime, videoLanguage, isDownload): + def insertOne(videoId, channelId, videoTitle, videoLen, videoType, videoPublishTime, videoLanguage, isDownload): session = getSession() - video: Video = Video(videoId=videoId, ChannelId=ChannelId, videoTitle=videoTitle, + video: Video = Video(videoId=videoId, channelId=channelId, videoTitle=videoTitle, videoLen=videoLen, videoType=videoType, videoPublishTime=videoPublishTime, videoLanguage=videoLanguage, isDownload=isDownload) session.add(video) diff --git a/test.py b/test.py index 5c5c7f9..cfcc982 100644 --- a/test.py +++ b/test.py @@ -9,6 +9,7 @@ from entity.VideoEntity import Video from service.ChannelService import ChannelService from service.VideoService import VideoService from common.YoutubeUtils import YouTubeUtil +from common.DownloadUtils import DownloadUtil import operator import argparse @@ -35,12 +36,20 @@ if __name__ == "__main__": f'mysql+mysqlconnector://{dbUserName}:{dbPassword}@{dbHost}:{dbPort}/{dbDatabase}') Logger.info("连接mysql成功") - videoId = "oZhBWA3HNhA" - video = VideoService.queryOneByVideoId(videoId) - Logger.info(video) - # VideoService.updateLenByVideoId(videoId, 5344) - video = VideoService.getLastVideoByChannelId("UC67Wr_9pA4I0glIxDt_Cpyw") - if video == None: - Logger.info("meiyou") - else: - Logger.info(video.videoPublishTime) + # YouTubeUtil测试 + # channelId = "UCBM86JVoHLqg9irpR2XKvGw" + # startTime = "2024-08-22T00:00:01Z" + # endTime = "2024-08-24T00:00:01Z" + # YouTubeUtil.getByChannelId(channelId, startTime, endTime) + + # download测试 + # videoId = "pBSWhJV0VVU" + # channelId = "UCBM86JVoHLqg9irpR2XKvGw" + # rootPath = "D:/Work/Code/youtube_dev/mysql" + # storePath = "D:/Work/Code/youtube_dev/mysql/main/Korea/UCBM86JVoHLqg9irpR2XKvGw-달란트투자" + # srtFilePath = "D:/Work/Code/youtube_dev/mysql/main/Korea/UCBM86JVoHLqg9irpR2XKvGw-달란트투자/pBSWhJV0VVU.srt" + # DownloadUtil.downloadOne(videoId=videoId, rootPath=rootPath) + # DownloadUtil.iterateSrt(srtFilePath=srtFilePath, + # videoId=videoId, channelId=channelId) + # DownloadUtil.downLoadMP3(videoId=videoId, storePath=storePath) +