You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
79 lines
2.9 KiB
79 lines
2.9 KiB
5 months ago
|
from LoggerUtils import Logger, initLogger
|
||
|
from bs4 import BeautifulSoup as bs
|
||
|
from urllib.request import urlopen, Request
|
||
|
import json
|
||
|
import Contant
|
||
|
from sqlalchemy import create_engine
|
||
|
from entity.ChannelEntity import Channel
|
||
|
from entity.KeyWordEntity import Keyword
|
||
|
from service.ChannelService import ChannelService
|
||
|
from service.KeyWordService import KeyWordService
|
||
|
import operator
|
||
|
import argparse
|
||
|
import pandas as pd
|
||
|
|
||
|
|
||
|
if __name__ == "__main__":
|
||
|
parser = argparse.ArgumentParser(description='')
|
||
|
parser.add_argument('--file', type=str, default='')
|
||
|
args = parser.parse_args()
|
||
|
csvFile = args.file
|
||
|
# 读取配置文件
|
||
|
with open('insert_keyword_config.json', 'r', encoding='utf-8') as f:
|
||
|
# 使用json.load()方法读取文件内容
|
||
|
data = json.load(f)
|
||
|
|
||
|
# 初始化日志
|
||
|
Contant.logDir = data['log']['dir']
|
||
|
Contant.logFileName = data['log']['fileName']
|
||
|
initLogger(Contant.logDir, Contant.logFileName)
|
||
|
|
||
|
# 连接mysql
|
||
|
dbHost = data['mysql']['host']
|
||
|
dbPort = data['mysql']['port']
|
||
|
dbUserName = data['mysql']['username']
|
||
|
dbPassword = data['mysql']['password']
|
||
|
dbDatabase = data['mysql']['database']
|
||
|
Logger.info("尝试连接mysql host:'{}' port:'{}' username:'{}' password:'{}' database:'{}'",
|
||
|
dbHost, dbPort, dbUserName, dbPassword, dbDatabase)
|
||
|
Contant.engin = create_engine(
|
||
|
f'mysql+mysqlconnector://{dbUserName}:{dbPassword}@{dbHost}:{dbPort}/{dbDatabase}')
|
||
|
Logger.info("连接mysql成功")
|
||
|
|
||
|
# 读取csv文件
|
||
|
df = pd.read_csv(csvFile, encoding="utf-8")
|
||
|
length = df.shape[0]
|
||
|
keyWords = []
|
||
|
for i in range(0, length):
|
||
|
region = df.iloc[i]['market']
|
||
|
lname = str(df.iloc[i]["lname"]).strip()
|
||
|
sname = str(df.iloc[i]["sname"]).strip()
|
||
|
|
||
|
# 判断是否存在如果不存在就存入
|
||
|
region = "Taiwan"
|
||
|
keyWord: Keyword = KeyWordService.queryOneByRegionWord(
|
||
|
region=region, word=lname)
|
||
|
if keyWord == None:
|
||
|
keyWords.append(Keyword(region=region, word=lname))
|
||
|
Logger.info(f"region:{region},keyword:{lname}")
|
||
|
|
||
|
keyWord: Keyword = KeyWordService.queryOneByRegionWord(
|
||
|
region=region, word=sname)
|
||
|
if keyWord == None:
|
||
|
keyWords.append(Keyword(region=region, word=sname))
|
||
|
Logger.info(f"region:{region},keyword:{sname}")
|
||
|
|
||
|
region = "Hongkong"
|
||
|
keyWord: Keyword = KeyWordService.queryOneByRegionWord(
|
||
|
region=region, word=lname)
|
||
|
if keyWord == None:
|
||
|
keyWords.append(Keyword(region=region, word=lname))
|
||
|
Logger.info(f"region:{region},keyword:{lname}")
|
||
|
|
||
|
keyWord: Keyword = KeyWordService.queryOneByRegionWord(
|
||
|
region=region, word=sname)
|
||
|
if keyWord == None:
|
||
|
keyWords.append(Keyword(region=region, word=sname))
|
||
|
Logger.info(f"region:{region},keyword:{sname}")
|
||
|
KeyWordService.insterKeyWords(keyWords=keyWords)
|