You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
173 lines
7.0 KiB
173 lines
7.0 KiB
import httplib2
|
|
import googleapiclient.discovery
|
|
import googleapiclient.errors
|
|
from LoggerUtils import Logger
|
|
import operator
|
|
import time
|
|
from entity.ChannelEntity import Channel
|
|
from entity.VideoEntity import Video
|
|
from service.ChannelService import ChannelService
|
|
from service.VideoService import VideoService
|
|
|
|
|
|
class YouTubeUtil:
|
|
# apiKeys = ["AIzaSyDlRgmPXVQEjF2gbmomI5FUZX_uAOBmEGI", "AIzaSyBI5i5vFZpQErMnEXKMf0VUS2Bel8jGrTk",
|
|
# "AIzaSyAnmA0Ggy1yXsZZACfItmeZAa7wcmh6SbM", "AIzaSyC4O8tBoAfkupmBybxDah2JUxgj4ct5uk0",
|
|
# "AIzaSyDJ2S9Ijhw_hULx3nHvPUoGUpMENbZOIl8", "AIzaSyA87Ckpna3hOQ31nISs8V8rp--OLw0m6Aw",
|
|
# "AIzaSyDIWbV0EOLHkOr9tWpANose6ggd2r9vcLg", "AIzaSyBKE3lYwWFIYc9Vx4YKMbRpkOXigZlY52U"]
|
|
|
|
# AIzaSyCTBSbq0YjyxTtjmNsnDyKAwHamlv_ST-s
|
|
# AIzaSyAESnwtbTIBtU707iZowtQkmAo-qKuEOcY
|
|
# AIzaSyCsYUC5vN0pB6y9xsCj0B1ehAoqOJ3WMf0
|
|
# AIzaSyDjPkCgDQ9Tv_xcChjY2E6GpJ6IzngnD5I
|
|
# AIzaSyAxIycOdQYGB5kWhwe3B-kJAYRo7wOnp8o
|
|
apiKeys = [
|
|
"AIzaSyARaW3mqO9szQiHgWZR4el0HWvdyheSHBc",
|
|
"AIzaSyChPXesnVx6fweon_BckhR6UiJWvi5Ma4s"
|
|
|
|
# "AIzaSyCTBSbq0YjyxTtjmNsnDyKAwHamlv_ST-s",
|
|
# "AIzaSyAESnwtbTIBtU707iZowtQkmAo-qKuEOcY"
|
|
|
|
|
|
# "AIzaSyDjPkCgDQ9Tv_xcChjY2E6GpJ6IzngnD5I",
|
|
# "AIzaSyAxIycOdQYGB5kWhwe3B-kJAYRo7wOnp8o",
|
|
# "AIzaSyCsYUC5vN0pB6y9xsCj0B1ehAoqOJ3WMf0"
|
|
]
|
|
apiIndex = 0
|
|
|
|
# 获取youtube对象
|
|
def getYoutube():
|
|
# 本地测试使用代码
|
|
# proxy_info = httplib2.ProxyInfo(
|
|
# proxy_type=httplib2.socks.PROXY_TYPE_HTTP, proxy_host="127.0.0.1", proxy_port=7890)
|
|
# http = httplib2.Http(timeout=10, proxy_info=proxy_info,
|
|
# disable_ssl_certificate_validation=False)
|
|
http = httplib2.Http(
|
|
timeout=10, disable_ssl_certificate_validation=False)
|
|
api_service_name = "youtube"
|
|
api_version = "v3"
|
|
# 获取apiKey
|
|
apiKey = YouTubeUtil.apiKeys[YouTubeUtil.apiIndex]
|
|
Logger.info(
|
|
"当前APIKey:{},当前apiIndex:{},totalIndex:{}".format(
|
|
apiKey, YouTubeUtil.apiIndex, len(YouTubeUtil.apiKeys) - 1
|
|
)
|
|
)
|
|
# 等于7,还原成0
|
|
if YouTubeUtil.apiIndex == (len(YouTubeUtil.apiKeys) - 1):
|
|
YouTubeUtil.apiIndex = 0
|
|
else:
|
|
YouTubeUtil.apiIndex = YouTubeUtil.apiIndex + 1
|
|
|
|
# 获取对象
|
|
youtube = googleapiclient.discovery.build(
|
|
api_service_name, api_version, developerKey=apiKey, http=http
|
|
)
|
|
return youtube
|
|
|
|
def getVidoeLen(videoIds):
|
|
youtube = YouTubeUtil.getYoutube()
|
|
request = youtube.videos().list(part="contentDetails", id=videoIds)
|
|
response = request.execute()
|
|
response["items"][0]["contentDetails"]
|
|
return response
|
|
|
|
def getVideoLenByStr(str):
|
|
len = 0
|
|
str = str.split("PT")[1]
|
|
if operator.contains(str, "H"):
|
|
H = str.split("H")[0]
|
|
len = len + int(H) * 3600
|
|
str = str.split("H")[1]
|
|
if operator.contains(str, "M"):
|
|
M = str.split("M")[0]
|
|
len = len + int(M) * 60
|
|
str = str.split("M")[1]
|
|
if operator.contains(str, "S"):
|
|
S = str.split("S")[0]
|
|
len = len + int(S)
|
|
return len
|
|
|
|
def getByChannelId(channelId, startTime, endTime):
|
|
channel = ChannelService.queryOneByChannelId(channelId)
|
|
# 检查是否存在
|
|
if channel is None:
|
|
Logger.info("没有相应的频道 channelId:{}".format(channelId))
|
|
return
|
|
# 回去youtube并查询
|
|
videoLanguage = str(channel.channelLanguage)
|
|
youtube = YouTubeUtil.getYoutube()
|
|
request = youtube.search().list(
|
|
part="snippet",
|
|
channelId=channelId,
|
|
maxResults=50,
|
|
order="date",
|
|
publishedAfter=startTime,
|
|
publishedBefore=endTime,
|
|
type="video",
|
|
)
|
|
response = request.execute()
|
|
while True:
|
|
videosRequest = ""
|
|
videosRequestCount = 0
|
|
idList = []
|
|
for i in response["items"]:
|
|
try:
|
|
videoId = i["id"]["videoId"]
|
|
publisTime = i["snippet"]["publishedAt"]
|
|
videoTitle = i["snippet"]["title"]
|
|
videoType = "video"
|
|
# 查询是否存在Video,如果不存在就插入
|
|
video: Video = VideoService.queryOneByVideoId(videoId)
|
|
if video == None:
|
|
VideoService.insertOne(
|
|
videoId=videoId, ChannelId=channelId, videoTitle=videoTitle, videoLen=0,
|
|
videoType=videoType, videoPublishTime=publisTime, videoLanguage=videoLanguage, isDownload=0)
|
|
videosRequest = videosRequest + "," + str(videoId)
|
|
videosRequestCount = videosRequestCount + 1
|
|
Logger.info(
|
|
"存储VideoUrl:https://www.youtube.com/watch?v=" + videoId
|
|
)
|
|
else:
|
|
Logger.info("已存在VideoId:{}".format(videoId))
|
|
idList.append(str(videoId))
|
|
if videosRequest != "" and videosRequestCount >= 10:
|
|
lenRes = YouTubeUtil.getVidoeLen(videosRequest)
|
|
for i in lenRes["items"]:
|
|
tmpId = i["id"]
|
|
videoLenStr = i["contentDetails"]["duration"]
|
|
videoLen = YouTubeUtil.getVideoLenByStr(
|
|
videoLenStr)
|
|
VideoService.updateLenByVideoId(
|
|
videoId=tmpId, videoLen=videoLen)
|
|
Logger.info(
|
|
"更新时长,videoId:{},len:{}".format(
|
|
tmpId, videoLen)
|
|
)
|
|
videosRequestCount = 0
|
|
videosRequest = ""
|
|
except:
|
|
pass
|
|
# 获取最后一个视频
|
|
video: Video = VideoService.getLastVideoByChannelId(channelId)
|
|
ChannelService.updateTimeByChannelId(
|
|
channelId, video.videoPublishTime)
|
|
time.sleep(5)
|
|
# 继续获取下一页
|
|
try:
|
|
# youtube = YouTubeUtil.getYoutube
|
|
request = youtube.search().list(
|
|
part="snippet",
|
|
channelId=channelId,
|
|
maxResults=50,
|
|
order="date",
|
|
publishedAfter=startTime,
|
|
publishedBefore=endTime,
|
|
type="video",
|
|
pageToken=response["nextPageToken"],
|
|
)
|
|
response = request.execute()
|
|
except Exception as e:
|
|
Logger.error(e)
|
|
print("no nextPageToken")
|
|
break
|
|
|