You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
56 lines
2.0 KiB
56 lines
2.0 KiB
from LoggerUtils import Logger, initLogger
|
|
import argparse
|
|
import Contant
|
|
from Orm import ormInit, Channel
|
|
import operator
|
|
from bs4 import BeautifulSoup as bs
|
|
from urllib.request import urlopen, Request
|
|
|
|
# py .\init.py --db=../db/youtube_prod.db --logDir=./logs
|
|
def saveChannel(channelUrl, language):
|
|
Logger.info("频道链接:"+channelUrl)
|
|
channelId = ""
|
|
channelName = ""
|
|
url_opener = urlopen(
|
|
Request(channelUrl, headers={'User-Agent': 'Mozilla'}))
|
|
videoInfo = bs(url_opener, features="html.parser")
|
|
links = videoInfo.find_all("link")
|
|
for link in links:
|
|
if operator.contains(str(link), "canonical"):
|
|
channelId = str(link['href']).split("/channel/")[1]
|
|
if operator.contains(str(link), "content="):
|
|
channelName = str(link['content'])
|
|
Logger.info("channelId:"+channelId)
|
|
Logger.info("channelName:"+channelName)
|
|
channel = Channel.get_or_none(Channel.channelId == channelId)
|
|
if channel != None:
|
|
Logger.info("频道已存在:" + channelId)
|
|
return
|
|
Channel.create(channelTitle=channelName,
|
|
channelId=channelId, channelLanguage=language)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
parser = argparse.ArgumentParser(description='')
|
|
parser.add_argument('--db', type=str, default='')
|
|
parser.add_argument('--logDir', type=str, default='')
|
|
args = parser.parse_args()
|
|
Contant.db = args.db
|
|
Contant.logDir = args.logDir
|
|
initLogger()
|
|
ormInit()
|
|
Logger.info("SqlLite存放地址:"+Contant.db)
|
|
Logger.info("日志文件存放地址:"+Contant.logDir)
|
|
Logger.info("开始初始化...")
|
|
# checkInit()
|
|
# 读取txt文件获取需要的频道地址
|
|
Logger.info("开始读取需要新增的频道地址...")
|
|
urlList = []
|
|
# 打开文件
|
|
for line in open("urlList.txt"):
|
|
line = line.strip('\n')
|
|
urlList.append(line)
|
|
language = urlList[0]
|
|
for url in urlList:
|
|
if len(url) > 10:
|
|
saveChannel(url, language)
|
|
|