import httplib2 import googleapiclient.discovery import googleapiclient.errors from LoggerUtils import Logger import operator import time from entity.ChannelEntity import Channel from entity.VideoEntity import Video from service.ChannelService import ChannelService from service.VideoService import VideoService class YouTubeUtil: # apiKeys = ["AIzaSyDlRgmPXVQEjF2gbmomI5FUZX_uAOBmEGI", "AIzaSyBI5i5vFZpQErMnEXKMf0VUS2Bel8jGrTk", # "AIzaSyAnmA0Ggy1yXsZZACfItmeZAa7wcmh6SbM", "AIzaSyC4O8tBoAfkupmBybxDah2JUxgj4ct5uk0", # "AIzaSyDJ2S9Ijhw_hULx3nHvPUoGUpMENbZOIl8", "AIzaSyA87Ckpna3hOQ31nISs8V8rp--OLw0m6Aw", # "AIzaSyDIWbV0EOLHkOr9tWpANose6ggd2r9vcLg", "AIzaSyBKE3lYwWFIYc9Vx4YKMbRpkOXigZlY52U"] # AIzaSyCTBSbq0YjyxTtjmNsnDyKAwHamlv_ST-s # AIzaSyAESnwtbTIBtU707iZowtQkmAo-qKuEOcY # AIzaSyCsYUC5vN0pB6y9xsCj0B1ehAoqOJ3WMf0 # AIzaSyDjPkCgDQ9Tv_xcChjY2E6GpJ6IzngnD5I # AIzaSyAxIycOdQYGB5kWhwe3B-kJAYRo7wOnp8o apiKeys = [ "AIzaSyARaW3mqO9szQiHgWZR4el0HWvdyheSHBc", "AIzaSyChPXesnVx6fweon_BckhR6UiJWvi5Ma4s" # "AIzaSyCTBSbq0YjyxTtjmNsnDyKAwHamlv_ST-s", # "AIzaSyAESnwtbTIBtU707iZowtQkmAo-qKuEOcY" # "AIzaSyDjPkCgDQ9Tv_xcChjY2E6GpJ6IzngnD5I", # "AIzaSyAxIycOdQYGB5kWhwe3B-kJAYRo7wOnp8o", # "AIzaSyCsYUC5vN0pB6y9xsCj0B1ehAoqOJ3WMf0" ] apiIndex = 0 # 获取youtube对象 def getYoutube(): # 本地测试使用代码 # proxy_info = httplib2.ProxyInfo( # proxy_type=httplib2.socks.PROXY_TYPE_HTTP, proxy_host="127.0.0.1", proxy_port=7890) # http = httplib2.Http(timeout=10, proxy_info=proxy_info, # disable_ssl_certificate_validation=False) http = httplib2.Http( timeout=10, disable_ssl_certificate_validation=False) api_service_name = "youtube" api_version = "v3" # 获取apiKey apiKey = YouTubeUtil.apiKeys[YouTubeUtil.apiIndex] Logger.info( "当前APIKey:{},当前apiIndex:{},totalIndex:{}".format( apiKey, YouTubeUtil.apiIndex, len(YouTubeUtil.apiKeys) - 1 ) ) # 等于7,还原成0 if YouTubeUtil.apiIndex == (len(YouTubeUtil.apiKeys) - 1): YouTubeUtil.apiIndex = 0 else: YouTubeUtil.apiIndex = YouTubeUtil.apiIndex + 1 # 获取对象 youtube = googleapiclient.discovery.build( api_service_name, api_version, developerKey=apiKey, http=http ) return youtube def getVidoeLen(videoIds): youtube = YouTubeUtil.getYoutube() request = youtube.videos().list(part="contentDetails", id=videoIds) response = request.execute() response["items"][0]["contentDetails"] return response def getVideoLenByStr(str): len = 0 str = str.split("PT")[1] if operator.contains(str, "H"): H = str.split("H")[0] len = len + int(H) * 3600 str = str.split("H")[1] if operator.contains(str, "M"): M = str.split("M")[0] len = len + int(M) * 60 str = str.split("M")[1] if operator.contains(str, "S"): S = str.split("S")[0] len = len + int(S) return len def getByChannelId(channelId, startTime, endTime): channel = ChannelService.queryOneByChannelId(channelId) # 检查是否存在 if channel is None: Logger.info("没有相应的频道 channelId:{}".format(channelId)) return # 回去youtube并查询 videoLanguage = str(channel.channelLanguage) youtube = YouTubeUtil.getYoutube() request = youtube.search().list( part="snippet", channelId=channelId, maxResults=50, order="date", publishedAfter=startTime, publishedBefore=endTime, type="video", ) response = request.execute() while True: videosRequest = "" videosRequestCount = 0 idList = [] for i in response["items"]: try: videoId = i["id"]["videoId"] publisTime = i["snippet"]["publishedAt"] videoTitle = i["snippet"]["title"] videoType = "video" # 查询是否存在Video,如果不存在就插入 video: Video = VideoService.queryOneByVideoId(videoId) if video == None: VideoService.insertOne( videoId=videoId, ChannelId=channelId, videoTitle=videoTitle, videoLen=0, videoType=videoType, videoPublishTime=publisTime, videoLanguage=videoLanguage, isDownload=0) videosRequest = videosRequest + "," + str(videoId) videosRequestCount = videosRequestCount + 1 Logger.info( "存储VideoUrl:https://www.youtube.com/watch?v=" + videoId ) else: Logger.info("已存在VideoId:{}".format(videoId)) idList.append(str(videoId)) if videosRequest != "" and videosRequestCount >= 10: lenRes = YouTubeUtil.getVidoeLen(videosRequest) for i in lenRes["items"]: tmpId = i["id"] videoLenStr = i["contentDetails"]["duration"] videoLen = YouTubeUtil.getVideoLenByStr( videoLenStr) VideoService.updateLenByVideoId( videoId=tmpId, videoLen=videoLen) Logger.info( "更新时长,videoId:{},len:{}".format( tmpId, videoLen) ) videosRequestCount = 0 videosRequest = "" except: pass # 获取最后一个视频 video: Video = VideoService.getLastVideoByChannelId(channelId) ChannelService.updateTimeByChannelId( channelId, video.videoPublishTime) time.sleep(5) # 继续获取下一页 try: # youtube = YouTubeUtil.getYoutube request = youtube.search().list( part="snippet", channelId=channelId, maxResults=50, order="date", publishedAfter=startTime, publishedBefore=endTime, type="video", pageToken=response["nextPageToken"], ) response = request.execute() except Exception as e: Logger.error(e) print("no nextPageToken") break