diff --git a/.vscode/launch.json b/.vscode/launch.json index 4a37ab0..347b690 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -10,6 +10,13 @@ "request": "launch", "program": "init_channel.py", "console": "integratedTerminal" + }, + { + "name": "move_data", + "type": "debugpy", + "request": "launch", + "program": "move_data.py", + "console": "integratedTerminal" } ] } \ No newline at end of file diff --git a/channel_region.xlsx b/channel_region.xlsx new file mode 100644 index 0000000..17b0183 Binary files /dev/null and b/channel_region.xlsx differ diff --git a/move_data.py b/move_data.py new file mode 100644 index 0000000..7cec8d0 --- /dev/null +++ b/move_data.py @@ -0,0 +1,73 @@ +from LoggerUtils import Logger, initLogger +from bs4 import BeautifulSoup as bs +from urllib.request import urlopen, Request +import json +import Contant +from sqlalchemy import create_engine +from entity.ChannelEntity import Channel +from service.ChannelService import ChannelService +import sqlite3 +import pandas as pd + + +def moveChannel(sqliteDir): + # 读取excel + file_path = './channel_region.xlsx' + df = pd.read_excel(file_path) + + # 从sqlite去读所有内容 + conn = sqlite3.connect(sqliteDir) + cursor = conn.cursor() + cursor.execute("SELECT * from Channel_copy where is_copy = 0 limit 50") + channel_list = cursor.fetchall() + while len(channel_list) > 0: + for channel in channel_list: + Logger.info(channel) + channelId = channel[1] + channeTitle = channel[2] + language = channel[3] + repiteTime = channel[4] + flag = (df['channelID'] == channelId).any() + if flag: + region = df.loc[df['channelID'] == channelId]['region'].iloc[0] + else: + region = "unKnown" + ChannelService.insertOneByValues( + channelId, channeTitle, language, region, repiteTime) + cursor.execute(f"UPDATE Channel_copy SET is_copy = 1 WHERE channelId = '{channelId}'") + conn.commit() + cursor.execute("SELECT * from Channel_copy where is_copy = 0 limit 50") + channel_list = cursor.fetchall() + cursor.close() + conn.close() + + +# py .\init.py --db=../db/youtube_prod.db --logDir=./logs +if __name__ == "__main__": + # 读取配置文件 + with open('move_data_config.json', 'r', encoding='utf-8') as f: + # 使用json.load()方法读取文件内容 + data = json.load(f) + + # 初始化日志 + Contant.logDir = data['log']['dir'] + Contant.logFileName = data['log']['fileName'] + initLogger(Contant.logDir, Contant.logFileName) + + # 连接mysql + dbHost = data['mysql']['host'] + dbPort = data['mysql']['port'] + dbUserName = data['mysql']['username'] + dbPassword = data['mysql']['password'] + dbDatabase = data['mysql']['database'] + Logger.info("尝试连接mysql host:'{}' port:'{}' username:'{}' password:'{}' database:'{}'", + dbHost, dbPort, dbUserName, dbPassword, dbDatabase) + Contant.engin = create_engine( + f'mysql+mysqlconnector://{dbUserName}:{dbPassword}@{dbHost}:{dbPort}/{dbDatabase}') + Logger.info("连接mysql成功") + + moveTable = data['sqlite']['table'] + sqliteDir = data['sqlite']['dir'] + Logger.info("move data table:{}".format(moveTable)) + if moveTable == 'Channel': + moveChannel(sqliteDir) diff --git a/move_data_config.json b/move_data_config.json new file mode 100644 index 0000000..355ef1a --- /dev/null +++ b/move_data_config.json @@ -0,0 +1,17 @@ +{ + "mysql": { + "host": "47.108.20.249", + "port": "3306", + "username": "root", + "password": "casino888!", + "database": "youtube" + }, + "log": { + "dir": "./logs", + "fileName": "move_data" + }, + "sqlite":{ + "dir":"D:/Work/Code/youtube_dev/youtube_prod.db", + "table":"Channel" + } +} \ No newline at end of file diff --git a/service/ChannelService.py b/service/ChannelService.py index ff76ddc..30aa976 100644 --- a/service/ChannelService.py +++ b/service/ChannelService.py @@ -5,10 +5,12 @@ from common.Utils import getSession class ChannelService: # 新增一个channel - def insertOneByValues(channelId, channelTitle, channelLanguage, region): + def insertOneByValues(channelId, channelTitle, channelLanguage, region, channelReptileTime=None): session = getSession() channel = Channel(channelId=channelId, channelTitle=channelTitle, channelLanguage=channelLanguage, region=region) + if channelReptileTime: + channel.channelReptileTime = channelReptileTime session.add(channel) session.commit() session.close() @@ -21,6 +23,23 @@ class ChannelService: def queryOneByChannelId(channelId): session = getSession() - channel = session.query(Channel).filter(Channel.channelId == channelId).first() + channel = session.query(Channel).filter( + Channel.channelId == channelId).first() + session.close() + return channel + + def updtaByChannel(channelId, channelTitle=None, channelLanguage=None, channelReptileTime=None, region=None): + session = getSession() + update_channel = session.query(Channel).filter( + Channel.channelId == channelId).first() + if update_channel: + if channelTitle: + update_channel.channelTitle = channelTitle + if channelLanguage: + update_channel.channelLanguage = channelLanguage + if channelReptileTime: + update_channel.channelReptileTime = channelReptileTime + if region: + update_channel.region = region + session.commit() session.close() - return channel \ No newline at end of file