You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

57 lines
2.0 KiB

7 months ago
from LoggerUtils import Logger, initLogger
import argparse
import Contant
from Orm import ormInit, Channel
import operator
from bs4 import BeautifulSoup as bs
from urllib.request import urlopen, Request
# py .\init.py --db=../db/youtube_prod.db --logDir=./logs
def saveChannel(channelUrl, language):
Logger.info("频道链接:"+channelUrl)
channelId = ""
channelName = ""
url_opener = urlopen(
Request(channelUrl, headers={'User-Agent': 'Mozilla'}))
videoInfo = bs(url_opener, features="html.parser")
links = videoInfo.find_all("link")
for link in links:
if operator.contains(str(link), "canonical"):
channelId = str(link['href']).split("/channel/")[1]
if operator.contains(str(link), "content="):
channelName = str(link['content'])
Logger.info("channelId:"+channelId)
Logger.info("channelName:"+channelName)
channel = Channel.get_or_none(Channel.channelId == channelId)
if channel != None:
Logger.info("频道已存在:" + channelId)
return
Channel.create(channelTitle=channelName,
channelId=channelId, channelLanguage=language)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='')
parser.add_argument('--db', type=str, default='')
parser.add_argument('--logDir', type=str, default='')
args = parser.parse_args()
Contant.db = args.db
Contant.logDir = args.logDir
initLogger()
ormInit()
Logger.info("SqlLite存放地址:"+Contant.db)
Logger.info("日志文件存放地址:"+Contant.logDir)
Logger.info("开始初始化...")
# checkInit()
# 读取txt文件获取需要的频道地址
Logger.info("开始读取需要新增的频道地址...")
urlList = []
# 打开文件
for line in open("urlList.txt"):
line = line.strip('\n')
urlList.append(line)
language = urlList[0]
for url in urlList:
if len(url) > 10:
saveChannel(url, language)