youtube下载
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

62 lines
2.1 KiB

import time
from LoggerUtils import Logger, initLogger
import argparse
import Contant
from Orm import ormInit, Channel
import operator
from bs4 import BeautifulSoup as bs
from urllib.request import urlopen, Request
# py .\init.py --db=../db/youtube_prod.db --logDir=./logs
def saveChannel(channelUrl, language):
Logger.info("频道链接:"+channelUrl)
channelId = ""
channelName = ""
url_opener = urlopen(
Request(channelUrl, headers={'User-Agent': 'Mozilla'}))
videoInfo = bs(url_opener, features="html.parser")
links = videoInfo.find_all("link")
for link in links:
if operator.contains(str(link), "canonical"):
channelId = str(link['href']).split("/channel/")[1]
if operator.contains(str(link), "content="):
channelName = str(link['content'])
Logger.info("channelId:"+channelId)
Logger.info("channelName:"+channelName)
channel = Channel.get_or_none(Channel.channelId == channelId)
if channel != None:
Logger.info("频道已存在:" + channelId)
return
Channel.create(channelTitle=channelName,
channelId=channelId, channelLanguage=language)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='')
parser.add_argument('--db', type=str, default='')
parser.add_argument('--logDir', type=str, default='')
args = parser.parse_args()
Contant.db = args.db
Contant.logDir = args.logDir
initLogger()
ormInit()
Logger.info("SqlLite存放地址:"+Contant.db)
Logger.info("日志文件存放地址:"+Contant.logDir)
Logger.info("开始初始化...")
# checkInit()
# 读取txt文件获取需要的频道地址
Logger.info("开始读取需要新增的频道地址...")
urlList = []
# 打开文件
for line in open("urlList.txt"):
line = line.strip('\n')
urlList.append(line)
# language = urlList[0]
for url_str in urlList:
if len(url_str) > 10:
url = url_str.split(" ")[0]
language = url_str.split(" ")[1]
Logger.info("url:{} ,language:{}", url, language)
saveChannel(url, language)