You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 

70 lines
2.6 KiB

from LoggerUtils import Logger, initLogger
from bs4 import BeautifulSoup as bs
from urllib.request import urlopen, Request
import json
import Contant
from sqlalchemy import create_engine
from entity.ChannelEntity import Channel
from service.ChannelService import ChannelService
import operator
def saveChannel(channelUrl, language, region):
Logger.info("频道链接:"+channelUrl)
channelId = ""
channelName = ""
url_opener = urlopen(
Request(channelUrl, headers={'User-Agent': 'Mozilla'}))
videoInfo = bs(url_opener, features="html.parser")
links = videoInfo.find_all("link")
for link in links:
if operator.contains(str(link), "canonical"):
channelId = str(link['href']).split("/channel/")[1]
if operator.contains(str(link), "content="):
channelName = str(link['content'])
Logger.info("channelId:"+channelId)
Logger.info("channelName:"+channelName)
channel: Channel = ChannelService.queryOneByChannelId(channelId)
if channel:
Logger.info("频道{}已存在".format(channelId))
return
ChannelService.insertOneByValues(
channelId=channelId, channelTitle=channelName, channelLanguage=language, region=region)
# py .\init.py --db=../db/youtube_prod.db --logDir=./logs
if __name__ == "__main__":
# 读取配置文件
with open('init_channel_config.json', 'r', encoding='utf-8') as f:
# 使用json.load()方法读取文件内容
data = json.load(f)
# 初始化日志
Contant.logDir = data['log']['dir']
Contant.logFileName = data['log']['fileName']
initLogger(Contant.logDir, Contant.logFileName)
# 连接mysql
dbHost = data['mysql']['host']
dbPort = data['mysql']['port']
dbUserName = data['mysql']['username']
dbPassword = data['mysql']['password']
dbDatabase = data['mysql']['database']
Logger.info("尝试连接mysql host:'{}' port:'{}' username:'{}' password:'{}' database:'{}'",
dbHost, dbPort, dbUserName, dbPassword, dbDatabase)
Contant.engin = create_engine(
f'mysql+mysqlconnector://{dbUserName}:{dbPassword}@{dbHost}:{dbPort}/{dbDatabase}')
Logger.info("连接mysql成功")
Logger.info("开始读取需要新增的频道地址...")
urlList = []
# 打开文件
for line in open("urlList.txt"):
line = line.strip('\n')
urlList.append(line)
language = urlList[0]
region = urlList[1]
Logger.info("language:{} region:{}".format(language, region))
for url in urlList:
if len(url) > 20:
saveChannel(url, language, region)