from LoggerUtils import Logger, initLogger import argparse import Contant from Orm import ormInit, Channel import operator from bs4 import BeautifulSoup as bs from urllib.request import urlopen, Request # py .\init.py --db=../db/youtube_prod.db --logDir=./logs def saveChannel(channelUrl, language): Logger.info("频道链接:"+channelUrl) channelId = "" channelName = "" url_opener = urlopen( Request(channelUrl, headers={'User-Agent': 'Mozilla'})) videoInfo = bs(url_opener, features="html.parser") links = videoInfo.find_all("link") for link in links: if operator.contains(str(link), "canonical"): channelId = str(link['href']).split("/channel/")[1] if operator.contains(str(link), "content="): channelName = str(link['content']) Logger.info("channelId:"+channelId) Logger.info("channelName:"+channelName) channel = Channel.get_or_none(Channel.channelId == channelId) if channel != None: Logger.info("频道已存在:" + channelId) return Channel.create(channelTitle=channelName, channelId=channelId, channelLanguage=language) if __name__ == "__main__": parser = argparse.ArgumentParser(description='') parser.add_argument('--db', type=str, default='') parser.add_argument('--logDir', type=str, default='') args = parser.parse_args() Contant.db = args.db Contant.logDir = args.logDir initLogger() ormInit() Logger.info("SqlLite存放地址:"+Contant.db) Logger.info("日志文件存放地址:"+Contant.logDir) Logger.info("开始初始化...") # checkInit() # 读取txt文件获取需要的频道地址 Logger.info("开始读取需要新增的频道地址...") urlList = [] # 打开文件 for line in open("urlList.txt"): line = line.strip('\n') urlList.append(line) language = urlList[0] for url in urlList: if len(url) > 10: saveChannel(url, language)