|
|
|
from LoggerUtils import Logger, initLogger
|
|
|
|
from bs4 import BeautifulSoup as bs
|
|
|
|
from urllib.request import urlopen, Request
|
|
|
|
import json
|
|
|
|
import Contant
|
|
|
|
from sqlalchemy import create_engine
|
|
|
|
from entity.ChannelEntity import Channel
|
|
|
|
from service.ChannelService import ChannelService
|
|
|
|
import operator
|
|
|
|
|
|
|
|
|
|
|
|
def saveChannel(channelUrl, language, region):
|
|
|
|
Logger.info("频道链接:"+channelUrl)
|
|
|
|
channelId = ""
|
|
|
|
channelName = ""
|
|
|
|
url_opener = urlopen(
|
|
|
|
Request(channelUrl, headers={'User-Agent': 'Mozilla'}))
|
|
|
|
videoInfo = bs(url_opener, features="html.parser")
|
|
|
|
links = videoInfo.find_all("link")
|
|
|
|
for link in links:
|
|
|
|
if operator.contains(str(link), "canonical"):
|
|
|
|
channelId = str(link['href']).split("/channel/")[1]
|
|
|
|
if operator.contains(str(link), "content="):
|
|
|
|
channelName = str(link['content'])
|
|
|
|
Logger.info("channelId:"+channelId)
|
|
|
|
Logger.info("channelName:"+channelName)
|
|
|
|
channel: Channel = ChannelService.queryOneByChannelId(channelId)
|
|
|
|
if channel:
|
|
|
|
Logger.info("频道{}已存在".format(channelId))
|
|
|
|
return
|
|
|
|
ChannelService.insertOneByValues(
|
|
|
|
channelId=channelId, channelTitle=channelName, channelLanguage=language, region=region)
|
|
|
|
|
|
|
|
|
|
|
|
# py .\init.py --db=../db/youtube_prod.db --logDir=./logs
|
|
|
|
if __name__ == "__main__":
|
|
|
|
# 读取配置文件
|
|
|
|
with open('init_channel_config.json', 'r', encoding='utf-8') as f:
|
|
|
|
# 使用json.load()方法读取文件内容
|
|
|
|
data = json.load(f)
|
|
|
|
|
|
|
|
# 初始化日志
|
|
|
|
Contant.logDir = data['log']['dir']
|
|
|
|
Contant.logFileName = data['log']['fileName']
|
|
|
|
initLogger(Contant.logDir, Contant.logFileName)
|
|
|
|
|
|
|
|
# 连接mysql
|
|
|
|
dbHost = data['mysql']['host']
|
|
|
|
dbPort = data['mysql']['port']
|
|
|
|
dbUserName = data['mysql']['username']
|
|
|
|
dbPassword = data['mysql']['password']
|
|
|
|
dbDatabase = data['mysql']['database']
|
|
|
|
Logger.info("尝试连接mysql host:'{}' port:'{}' username:'{}' password:'{}' database:'{}'",
|
|
|
|
dbHost, dbPort, dbUserName, dbPassword, dbDatabase)
|
|
|
|
Contant.engin = create_engine(
|
|
|
|
f'mysql+mysqlconnector://{dbUserName}:{dbPassword}@{dbHost}:{dbPort}/{dbDatabase}')
|
|
|
|
Logger.info("连接mysql成功")
|
|
|
|
|
|
|
|
Logger.info("开始读取需要新增的频道地址...")
|
|
|
|
urlList = []
|
|
|
|
# 打开文件
|
|
|
|
for line in open("urlList.txt"):
|
|
|
|
line = line.strip('\n')
|
|
|
|
urlList.append(line)
|
|
|
|
language = urlList[0]
|
|
|
|
region = urlList[1]
|
|
|
|
Logger.info("language:{} region:{}".format(language, region))
|
|
|
|
for url in urlList:
|
|
|
|
if len(url) > 20:
|
|
|
|
saveChannel(url, language, region)
|