zhangshu
6 months ago
9 changed files with 383 additions and 3 deletions
@ -0,0 +1,173 @@ |
|||
import httplib2 |
|||
import googleapiclient.discovery |
|||
import googleapiclient.errors |
|||
from LoggerUtils import Logger |
|||
import operator |
|||
import time |
|||
from entity.ChannelEntity import Channel |
|||
from entity.VideoEntity import Video |
|||
from service.ChannelService import ChannelService |
|||
from service.VideoService import VideoService |
|||
|
|||
|
|||
class YouTubeUtil: |
|||
# apiKeys = ["AIzaSyDlRgmPXVQEjF2gbmomI5FUZX_uAOBmEGI", "AIzaSyBI5i5vFZpQErMnEXKMf0VUS2Bel8jGrTk", |
|||
# "AIzaSyAnmA0Ggy1yXsZZACfItmeZAa7wcmh6SbM", "AIzaSyC4O8tBoAfkupmBybxDah2JUxgj4ct5uk0", |
|||
# "AIzaSyDJ2S9Ijhw_hULx3nHvPUoGUpMENbZOIl8", "AIzaSyA87Ckpna3hOQ31nISs8V8rp--OLw0m6Aw", |
|||
# "AIzaSyDIWbV0EOLHkOr9tWpANose6ggd2r9vcLg", "AIzaSyBKE3lYwWFIYc9Vx4YKMbRpkOXigZlY52U"] |
|||
|
|||
# AIzaSyCTBSbq0YjyxTtjmNsnDyKAwHamlv_ST-s |
|||
# AIzaSyAESnwtbTIBtU707iZowtQkmAo-qKuEOcY |
|||
# AIzaSyCsYUC5vN0pB6y9xsCj0B1ehAoqOJ3WMf0 |
|||
# AIzaSyDjPkCgDQ9Tv_xcChjY2E6GpJ6IzngnD5I |
|||
# AIzaSyAxIycOdQYGB5kWhwe3B-kJAYRo7wOnp8o |
|||
apiKeys = [ |
|||
"AIzaSyARaW3mqO9szQiHgWZR4el0HWvdyheSHBc", |
|||
"AIzaSyChPXesnVx6fweon_BckhR6UiJWvi5Ma4s" |
|||
|
|||
# "AIzaSyCTBSbq0YjyxTtjmNsnDyKAwHamlv_ST-s", |
|||
# "AIzaSyAESnwtbTIBtU707iZowtQkmAo-qKuEOcY" |
|||
|
|||
|
|||
# "AIzaSyDjPkCgDQ9Tv_xcChjY2E6GpJ6IzngnD5I", |
|||
# "AIzaSyAxIycOdQYGB5kWhwe3B-kJAYRo7wOnp8o", |
|||
# "AIzaSyCsYUC5vN0pB6y9xsCj0B1ehAoqOJ3WMf0" |
|||
] |
|||
apiIndex = 0 |
|||
|
|||
# 获取youtube对象 |
|||
def getYoutube(): |
|||
# 本地测试使用代码 |
|||
# proxy_info = httplib2.ProxyInfo( |
|||
# proxy_type=httplib2.socks.PROXY_TYPE_HTTP, proxy_host="127.0.0.1", proxy_port=7890) |
|||
# http = httplib2.Http(timeout=10, proxy_info=proxy_info, |
|||
# disable_ssl_certificate_validation=False) |
|||
http = httplib2.Http( |
|||
timeout=10, disable_ssl_certificate_validation=False) |
|||
api_service_name = "youtube" |
|||
api_version = "v3" |
|||
# 获取apiKey |
|||
apiKey = YouTubeUtil.apiKeys[YouTubeUtil.apiIndex] |
|||
Logger.info( |
|||
"当前APIKey:{},当前apiIndex:{},totalIndex:{}".format( |
|||
apiKey, YouTubeUtil.apiIndex, len(YouTubeUtil.apiKeys) - 1 |
|||
) |
|||
) |
|||
# 等于7,还原成0 |
|||
if YouTubeUtil.apiIndex == (len(YouTubeUtil.apiKeys) - 1): |
|||
YouTubeUtil.apiIndex = 0 |
|||
else: |
|||
YouTubeUtil.apiIndex = YouTubeUtil.apiIndex + 1 |
|||
|
|||
# 获取对象 |
|||
youtube = googleapiclient.discovery.build( |
|||
api_service_name, api_version, developerKey=apiKey, http=http |
|||
) |
|||
return youtube |
|||
|
|||
def getVidoeLen(videoIds): |
|||
youtube = YouTubeUtil.getYoutube() |
|||
request = youtube.videos().list(part="contentDetails", id=videoIds) |
|||
response = request.execute() |
|||
response["items"][0]["contentDetails"] |
|||
return response |
|||
|
|||
def getVideoLenByStr(str): |
|||
len = 0 |
|||
str = str.split("PT")[1] |
|||
if operator.contains(str, "H"): |
|||
H = str.split("H")[0] |
|||
len = len + int(H) * 3600 |
|||
str = str.split("H")[1] |
|||
if operator.contains(str, "M"): |
|||
M = str.split("M")[0] |
|||
len = len + int(M) * 60 |
|||
str = str.split("M")[1] |
|||
if operator.contains(str, "S"): |
|||
S = str.split("S")[0] |
|||
len = len + int(S) |
|||
return len |
|||
|
|||
def getByChannelId(channelId, startTime, endTime): |
|||
channel = ChannelService.queryOneByChannelId(channelId) |
|||
# 检查是否存在 |
|||
if channel is None: |
|||
Logger.info("没有相应的频道 channelId:{}".format(channelId)) |
|||
return |
|||
# 回去youtube并查询 |
|||
videoLanguage = str(channel.channelLanguage) |
|||
youtube = YouTubeUtil.getYoutube() |
|||
request = youtube.search().list( |
|||
part="snippet", |
|||
channelId=channelId, |
|||
maxResults=50, |
|||
order="date", |
|||
publishedAfter=startTime, |
|||
publishedBefore=endTime, |
|||
type="video", |
|||
) |
|||
response = request.execute() |
|||
while True: |
|||
videosRequest = "" |
|||
videosRequestCount = 0 |
|||
idList = [] |
|||
for i in response["items"]: |
|||
try: |
|||
videoId = i["id"]["videoId"] |
|||
publisTime = i["snippet"]["publishedAt"] |
|||
videoTitle = i["snippet"]["title"] |
|||
videoType = "video" |
|||
# 查询是否存在Video,如果不存在就插入 |
|||
video: Video = VideoService.queryOneByVideoId(videoId) |
|||
if video == None: |
|||
VideoService.insertOne( |
|||
videoId=videoId, ChannelId=channelId, videoTitle=videoTitle, videoLen=0, |
|||
videoType=videoType, videoPublishTime=publisTime, videoLanguage=videoLanguage, isDownload=0) |
|||
videosRequest = videosRequest + "," + str(videoId) |
|||
videosRequestCount = videosRequestCount + 1 |
|||
Logger.info( |
|||
"存储VideoUrl:https://www.youtube.com/watch?v=" + videoId |
|||
) |
|||
else: |
|||
Logger.info("已存在VideoId:{}".format(videoId)) |
|||
idList.append(str(videoId)) |
|||
if videosRequest != "" and videosRequestCount >= 10: |
|||
lenRes = YouTubeUtil.getVidoeLen(videosRequest) |
|||
for i in lenRes["items"]: |
|||
tmpId = i["id"] |
|||
videoLenStr = i["contentDetails"]["duration"] |
|||
videoLen = YouTubeUtil.getVideoLenByStr( |
|||
videoLenStr) |
|||
VideoService.updateLenByVideoId( |
|||
videoId=tmpId, videoLen=videoLen) |
|||
Logger.info( |
|||
"更新时长,videoId:{},len:{}".format( |
|||
tmpId, videoLen) |
|||
) |
|||
videosRequestCount = 0 |
|||
videosRequest = "" |
|||
except: |
|||
pass |
|||
# 获取最后一个视频 |
|||
video: Video = VideoService.getLastVideoByChannelId(channelId) |
|||
ChannelService.updateTimeByChannelId( |
|||
channelId, video.videoPublishTime) |
|||
time.sleep(5) |
|||
# 继续获取下一页 |
|||
try: |
|||
# youtube = YouTubeUtil.getYoutube |
|||
request = youtube.search().list( |
|||
part="snippet", |
|||
channelId=channelId, |
|||
maxResults=50, |
|||
order="date", |
|||
publishedAfter=startTime, |
|||
publishedBefore=endTime, |
|||
type="video", |
|||
pageToken=response["nextPageToken"], |
|||
) |
|||
response = request.execute() |
|||
except Exception as e: |
|||
Logger.error(e) |
|||
print("no nextPageToken") |
|||
break |
@ -0,0 +1,19 @@ |
|||
from sqlalchemy import create_engine, Column, Integer, String, Boolean |
|||
from sqlalchemy.ext.declarative import declarative_base |
|||
|
|||
# 如果没有创建 Base,请取消注释下一行 |
|||
Base = declarative_base() |
|||
|
|||
|
|||
class Video(Base): |
|||
__tablename__ = 'Videos' |
|||
|
|||
id = Column(Integer, primary_key=True, autoincrement=True) |
|||
videoId = Column(String(255), nullable=False) |
|||
channelId = Column(String(255), nullable=False) |
|||
videoTitle = Column(String(255), nullable=False) |
|||
videoLen = Column(Integer, nullable=False) |
|||
videoType = Column(String(255), nullable=False) |
|||
videoPublishTime = Column(String(255), nullable=False) |
|||
videoLanguage = Column(String(255), nullable=False) |
|||
isDownload = Column(Integer, nullable=False) |
@ -0,0 +1,50 @@ |
|||
from LoggerUtils import Logger, initLogger |
|||
from bs4 import BeautifulSoup as bs |
|||
from urllib.request import urlopen, Request |
|||
import json |
|||
import Contant |
|||
from sqlalchemy import create_engine |
|||
from entity.ChannelEntity import Channel |
|||
from service.ChannelService import ChannelService |
|||
from common.YoutubeUtils import YouTubeUtil |
|||
import operator |
|||
import argparse |
|||
|
|||
if __name__ == "__main__": |
|||
# 读取参数 |
|||
parser = argparse.ArgumentParser(description="") |
|||
parser.add_argument("--start", type=str, default="") |
|||
parser.add_argument("--end", type=str, default="") |
|||
args = parser.parse_args() |
|||
startTime = args.start |
|||
endTime = args.end |
|||
# 读取配置文件 |
|||
with open('search_video_config.json', 'r', encoding='utf-8') as f: |
|||
# 使用json.load()方法读取文件内容 |
|||
data = json.load(f) |
|||
|
|||
# 初始化日志 |
|||
Contant.logDir = data['log']['dir'] |
|||
Contant.logFileName = data['log']['fileName'] |
|||
initLogger(Contant.logDir, Contant.logFileName) |
|||
|
|||
# 连接mysql |
|||
dbHost = data['mysql']['host'] |
|||
dbPort = data['mysql']['port'] |
|||
dbUserName = data['mysql']['username'] |
|||
dbPassword = data['mysql']['password'] |
|||
dbDatabase = data['mysql']['database'] |
|||
Logger.info("尝试连接mysql host:'{}' port:'{}' username:'{}' password:'{}' database:'{}'", |
|||
dbHost, dbPort, dbUserName, dbPassword, dbDatabase) |
|||
Contant.engin = create_engine( |
|||
f'mysql+mysqlconnector://{dbUserName}:{dbPassword}@{dbHost}:{dbPort}/{dbDatabase}') |
|||
Logger.info("连接mysql成功") |
|||
|
|||
YouTubeUtil.getByChannelId("channel.channelId", startTime,endTime) |
|||
# 查询出所有Channel |
|||
# channels = ChannelService.queryAllChannel() |
|||
# Logger.info("Channels length:{}".format(len(channels))) |
|||
# for channel in channels: |
|||
# channel : Channel = channel |
|||
# # 通过channelId查询videos |
|||
# YouTubeUtil.getByChannelId(channel.channelId) |
@ -0,0 +1,13 @@ |
|||
{ |
|||
"mysql": { |
|||
"host": "47.108.20.249", |
|||
"port": "3306", |
|||
"username": "root", |
|||
"password": "casino888!", |
|||
"database": "youtube" |
|||
}, |
|||
"log": { |
|||
"dir": "./logs", |
|||
"fileName": "search_video" |
|||
} |
|||
} |
@ -0,0 +1,36 @@ |
|||
from entity.VideoEntity import Video |
|||
from common.Utils import getSession |
|||
from sqlalchemy import update |
|||
|
|||
|
|||
class VideoService: |
|||
|
|||
def queryOneByVideoId(videoId): |
|||
session = getSession() |
|||
video = session.query(Video).filter( |
|||
Video.videoId == videoId).one_or_none() |
|||
session.close() |
|||
return video |
|||
|
|||
def insertOne(videoId, ChannelId, videoTitle, videoLen, videoType, videoPublishTime, videoLanguage, isDownload): |
|||
session = getSession() |
|||
video: Video = Video(videoId=videoId, ChannelId=ChannelId, videoTitle=videoTitle, |
|||
videoLen=videoLen, videoType=videoType, videoPublishTime=videoPublishTime, |
|||
videoLanguage=videoLanguage, isDownload=isDownload) |
|||
session.add(video) |
|||
session.commit() |
|||
session.close() |
|||
|
|||
def updateLenByVideoId(videoId, videoLen): |
|||
session = getSession() |
|||
updateSql = update(Video).where( |
|||
Video.videoId == videoId).values(videoLen=videoLen) |
|||
resutl = session.execute(updateSql) |
|||
session.commit() |
|||
session.close() |
|||
|
|||
def getLastVideoByChannelId(channelId): |
|||
session = getSession() |
|||
video:Video = session.query(Video).where(Video.channelId==channelId).order_by(Video.videoPublishTime.desc()).first() |
|||
session.close() |
|||
return video |
@ -0,0 +1,46 @@ |
|||
from LoggerUtils import Logger, initLogger |
|||
from bs4 import BeautifulSoup as bs |
|||
from urllib.request import urlopen, Request |
|||
import json |
|||
import Contant |
|||
from sqlalchemy import create_engine |
|||
from entity.ChannelEntity import Channel |
|||
from entity.VideoEntity import Video |
|||
from service.ChannelService import ChannelService |
|||
from service.VideoService import VideoService |
|||
from common.YoutubeUtils import YouTubeUtil |
|||
import operator |
|||
import argparse |
|||
|
|||
if __name__ == "__main__": |
|||
# 读取配置文件 |
|||
with open('test_config.json', 'r', encoding='utf-8') as f: |
|||
# 使用json.load()方法读取文件内容 |
|||
data = json.load(f) |
|||
|
|||
# 初始化日志 |
|||
Contant.logDir = data['log']['dir'] |
|||
Contant.logFileName = data['log']['fileName'] |
|||
initLogger(Contant.logDir, Contant.logFileName) |
|||
|
|||
# 连接mysql |
|||
dbHost = data['mysql']['host'] |
|||
dbPort = data['mysql']['port'] |
|||
dbUserName = data['mysql']['username'] |
|||
dbPassword = data['mysql']['password'] |
|||
dbDatabase = data['mysql']['database'] |
|||
Logger.info("尝试连接mysql host:'{}' port:'{}' username:'{}' password:'{}' database:'{}'", |
|||
dbHost, dbPort, dbUserName, dbPassword, dbDatabase) |
|||
Contant.engin = create_engine( |
|||
f'mysql+mysqlconnector://{dbUserName}:{dbPassword}@{dbHost}:{dbPort}/{dbDatabase}') |
|||
Logger.info("连接mysql成功") |
|||
|
|||
videoId = "oZhBWA3HNhA" |
|||
video = VideoService.queryOneByVideoId(videoId) |
|||
Logger.info(video) |
|||
# VideoService.updateLenByVideoId(videoId, 5344) |
|||
video = VideoService.getLastVideoByChannelId("UC67Wr_9pA4I0glIxDt_Cpyw") |
|||
if video == None: |
|||
Logger.info("meiyou") |
|||
else: |
|||
Logger.info(video.videoPublishTime) |
@ -0,0 +1,13 @@ |
|||
{ |
|||
"mysql": { |
|||
"host": "47.108.20.249", |
|||
"port": "3306", |
|||
"username": "root", |
|||
"password": "casino888!", |
|||
"database": "youtube" |
|||
}, |
|||
"log": { |
|||
"dir": "./logs", |
|||
"fileName": "test" |
|||
} |
|||
} |
Loading…
Reference in new issue