You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

174 lines
7.0 KiB

import httplib2
import googleapiclient.discovery
import googleapiclient.errors
from LoggerUtils import Logger
import operator
import time
from entity.ChannelEntity import Channel
from entity.VideoEntity import Video
from service.ChannelService import ChannelService
from service.VideoService import VideoService
class YouTubeUtil:
# apiKeys = ["AIzaSyDlRgmPXVQEjF2gbmomI5FUZX_uAOBmEGI", "AIzaSyBI5i5vFZpQErMnEXKMf0VUS2Bel8jGrTk",
# "AIzaSyAnmA0Ggy1yXsZZACfItmeZAa7wcmh6SbM", "AIzaSyC4O8tBoAfkupmBybxDah2JUxgj4ct5uk0",
# "AIzaSyDJ2S9Ijhw_hULx3nHvPUoGUpMENbZOIl8", "AIzaSyA87Ckpna3hOQ31nISs8V8rp--OLw0m6Aw",
# "AIzaSyDIWbV0EOLHkOr9tWpANose6ggd2r9vcLg", "AIzaSyBKE3lYwWFIYc9Vx4YKMbRpkOXigZlY52U"]
# AIzaSyCTBSbq0YjyxTtjmNsnDyKAwHamlv_ST-s
# AIzaSyAESnwtbTIBtU707iZowtQkmAo-qKuEOcY
# AIzaSyCsYUC5vN0pB6y9xsCj0B1ehAoqOJ3WMf0
# AIzaSyDjPkCgDQ9Tv_xcChjY2E6GpJ6IzngnD5I
# AIzaSyAxIycOdQYGB5kWhwe3B-kJAYRo7wOnp8o
apiKeys = [
"AIzaSyARaW3mqO9szQiHgWZR4el0HWvdyheSHBc",
"AIzaSyChPXesnVx6fweon_BckhR6UiJWvi5Ma4s"
# "AIzaSyCTBSbq0YjyxTtjmNsnDyKAwHamlv_ST-s"
# "AIzaSyAESnwtbTIBtU707iZowtQkmAo-qKuEOcY"
# "AIzaSyDjPkCgDQ9Tv_xcChjY2E6GpJ6IzngnD5I",
# "AIzaSyAxIycOdQYGB5kWhwe3B-kJAYRo7wOnp8o",
# "AIzaSyCsYUC5vN0pB6y9xsCj0B1ehAoqOJ3WMf0"
]
apiIndex = 0
# 获取youtube对象
def getYoutube():
# 本地测试使用代码
# proxy_info = httplib2.ProxyInfo(
# proxy_type=httplib2.socks.PROXY_TYPE_HTTP, proxy_host="127.0.0.1", proxy_port=7890)
# http = httplib2.Http(timeout=10, proxy_info=proxy_info,
# disable_ssl_certificate_validation=False)
http = httplib2.Http(
timeout=10, disable_ssl_certificate_validation=False)
api_service_name = "youtube"
api_version = "v3"
# 获取apiKey
apiKey = YouTubeUtil.apiKeys[YouTubeUtil.apiIndex]
Logger.info(
"当前APIKey:{},当前apiIndex:{},totalIndex:{}".format(
apiKey, YouTubeUtil.apiIndex, len(YouTubeUtil.apiKeys) - 1
)
)
# 等于7,还原成0
if YouTubeUtil.apiIndex == (len(YouTubeUtil.apiKeys) - 1):
YouTubeUtil.apiIndex = 0
else:
YouTubeUtil.apiIndex = YouTubeUtil.apiIndex + 1
# 获取对象
youtube = googleapiclient.discovery.build(
api_service_name, api_version, developerKey=apiKey, http=http
)
return youtube
def getVidoeLen(videoIds):
youtube = YouTubeUtil.getYoutube()
request = youtube.videos().list(part="contentDetails", id=videoIds)
response = request.execute()
response["items"][0]["contentDetails"]
return response
def getVideoLenByStr(str):
len = 0
str = str.split("PT")[1]
if operator.contains(str, "H"):
H = str.split("H")[0]
len = len + int(H) * 3600
str = str.split("H")[1]
if operator.contains(str, "M"):
M = str.split("M")[0]
len = len + int(M) * 60
str = str.split("M")[1]
if operator.contains(str, "S"):
S = str.split("S")[0]
len = len + int(S)
return len
def getByChannelId(channelId, startTime, endTime):
channel = ChannelService.queryOneByChannelId(channelId)
# 检查是否存在
if channel is None:
Logger.info("没有相应的频道 channelId:{}".format(channelId))
return
# 回去youtube并查询
videoLanguage = str(channel.channelLanguage)
youtube = YouTubeUtil.getYoutube()
request = youtube.search().list(
part="snippet",
channelId=channelId,
maxResults=50,
order="date",
publishedAfter=startTime,
publishedBefore=endTime,
type="video",
)
response = request.execute()
while True:
videosRequest = ""
videosRequestCount = 0
idList = []
for i in response["items"]:
try:
videoId = i["id"]["videoId"]
publisTime = i["snippet"]["publishedAt"]
videoTitle = i["snippet"]["title"]
videoType = "video"
# 查询是否存在Video,如果不存在就插入
video: Video = VideoService.queryOneByVideoId(videoId)
if video == None:
VideoService.insertOne(
videoId=videoId, channelId=channelId, videoTitle=videoTitle, videoLen=0,
videoType=videoType, videoPublishTime=publisTime, videoLanguage=videoLanguage, isDownload=0)
videosRequest = videosRequest + "," + str(videoId)
videosRequestCount = videosRequestCount + 1
Logger.info(
"存储VideoUrl:https://www.youtube.com/watch?v=" + videoId
)
else:
Logger.info("已存在VideoId:{}".format(videoId))
idList.append(str(videoId))
if videosRequest != "" and videosRequestCount >= 10:
lenRes = YouTubeUtil.getVidoeLen(videosRequest)
for i in lenRes["items"]:
tmpId = i["id"]
videoLenStr = i["contentDetails"]["duration"]
videoLen = YouTubeUtil.getVideoLenByStr(
videoLenStr)
VideoService.updateLenByVideoId(
videoId=tmpId, videoLen=videoLen)
Logger.info(
"更新时长,videoId:{},len:{}".format(
tmpId, videoLen)
)
videosRequestCount = 0
videosRequest = ""
except Exception as e:
Logger.error(e)
# 获取最后一个视频
video: Video = VideoService.getLastVideoByChannelId(channelId)
ChannelService.updateTimeByChannelId(
channelId, video.videoPublishTime)
time.sleep(5)
# 继续获取下一页
try:
# youtube = YouTubeUtil.getYoutube
request = youtube.search().list(
part="snippet",
channelId=channelId,
maxResults=50,
order="date",
publishedAfter=startTime,
publishedBefore=endTime,
type="video",
pageToken=response["nextPageToken"],
)
response = request.execute()
except Exception as e:
Logger.error(e)
print("no nextPageToken")
break