You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
74 lines
2.6 KiB
74 lines
2.6 KiB
6 months ago
|
from LoggerUtils import Logger, initLogger
|
||
|
from bs4 import BeautifulSoup as bs
|
||
|
from urllib.request import urlopen, Request
|
||
|
import json
|
||
|
import Contant
|
||
|
from sqlalchemy import create_engine
|
||
|
from entity.ChannelEntity import Channel
|
||
|
from service.ChannelService import ChannelService
|
||
|
import sqlite3
|
||
|
import pandas as pd
|
||
|
|
||
|
|
||
|
def moveChannel(sqliteDir):
|
||
|
# 读取excel
|
||
|
file_path = './channel_region.xlsx'
|
||
|
df = pd.read_excel(file_path)
|
||
|
|
||
|
# 从sqlite去读所有内容
|
||
|
conn = sqlite3.connect(sqliteDir)
|
||
|
cursor = conn.cursor()
|
||
|
cursor.execute("SELECT * from Channel_copy where is_copy = 0 limit 50")
|
||
|
channel_list = cursor.fetchall()
|
||
|
while len(channel_list) > 0:
|
||
|
for channel in channel_list:
|
||
|
Logger.info(channel)
|
||
|
channelId = channel[1]
|
||
|
channeTitle = channel[2]
|
||
|
language = channel[3]
|
||
|
repiteTime = channel[4]
|
||
|
flag = (df['channelID'] == channelId).any()
|
||
|
if flag:
|
||
|
region = df.loc[df['channelID'] == channelId]['region'].iloc[0]
|
||
|
else:
|
||
|
region = "unKnown"
|
||
|
ChannelService.insertOneByValues(
|
||
|
channelId, channeTitle, language, region, repiteTime)
|
||
|
cursor.execute(f"UPDATE Channel_copy SET is_copy = 1 WHERE channelId = '{channelId}'")
|
||
|
conn.commit()
|
||
|
cursor.execute("SELECT * from Channel_copy where is_copy = 0 limit 50")
|
||
|
channel_list = cursor.fetchall()
|
||
|
cursor.close()
|
||
|
conn.close()
|
||
|
|
||
|
|
||
|
# py .\init.py --db=../db/youtube_prod.db --logDir=./logs
|
||
|
if __name__ == "__main__":
|
||
|
# 读取配置文件
|
||
|
with open('move_data_config.json', 'r', encoding='utf-8') as f:
|
||
|
# 使用json.load()方法读取文件内容
|
||
|
data = json.load(f)
|
||
|
|
||
|
# 初始化日志
|
||
|
Contant.logDir = data['log']['dir']
|
||
|
Contant.logFileName = data['log']['fileName']
|
||
|
initLogger(Contant.logDir, Contant.logFileName)
|
||
|
|
||
|
# 连接mysql
|
||
|
dbHost = data['mysql']['host']
|
||
|
dbPort = data['mysql']['port']
|
||
|
dbUserName = data['mysql']['username']
|
||
|
dbPassword = data['mysql']['password']
|
||
|
dbDatabase = data['mysql']['database']
|
||
|
Logger.info("尝试连接mysql host:'{}' port:'{}' username:'{}' password:'{}' database:'{}'",
|
||
|
dbHost, dbPort, dbUserName, dbPassword, dbDatabase)
|
||
|
Contant.engin = create_engine(
|
||
|
f'mysql+mysqlconnector://{dbUserName}:{dbPassword}@{dbHost}:{dbPort}/{dbDatabase}')
|
||
|
Logger.info("连接mysql成功")
|
||
|
|
||
|
moveTable = data['sqlite']['table']
|
||
|
sqliteDir = data['sqlite']['dir']
|
||
|
Logger.info("move data table:{}".format(moveTable))
|
||
|
if moveTable == 'Channel':
|
||
|
moveChannel(sqliteDir)
|