from LoggerUtils import Logger, initLogger from bs4 import BeautifulSoup as bs from urllib.request import urlopen, Request import json import Contant from sqlalchemy import create_engine from entity.ChannelEntity import Channel from entity.KeyWordEntity import Keyword from service.ChannelService import ChannelService from service.KeyWordService import KeyWordService import operator import argparse import pandas as pd from common.Utils import getSession if __name__ == "__main__": parser = argparse.ArgumentParser(description='') parser.add_argument('--file', type=str, default='') args = parser.parse_args() csvFile = args.file # 读取配置文件 with open('insert_keyword_config.json', 'r', encoding='utf-8') as f: # 使用json.load()方法读取文件内容 data = json.load(f) # 初始化日志 Contant.logDir = data['log']['dir'] Contant.logFileName = data['log']['fileName'] initLogger(Contant.logDir, Contant.logFileName) # 连接mysql dbHost = data['mysql']['host'] dbPort = data['mysql']['port'] dbUserName = data['mysql']['username'] dbPassword = data['mysql']['password'] dbDatabase = data['mysql']['database'] Logger.info("尝试连接mysql host:'{}' port:'{}' username:'{}' password:'{}' database:'{}'", dbHost, dbPort, dbUserName, dbPassword, dbDatabase) Contant.engin = create_engine( f'mysql+mysqlconnector://{dbUserName}:{dbPassword}@{dbHost}:{dbPort}/{dbDatabase}') Logger.info("连接mysql成功") session = getSession() # 读取csv文件 df = pd.read_csv(csvFile, encoding="utf-8") length = df.shape[0] for i in range(0, length): region = df.iloc[i]['market'] lname = str(df.iloc[i]["lname"]).strip() sname = str(df.iloc[i]["sname"]).strip() # 判断是否存在如果不存在就存入 region = "Taiwan" keyWord: Keyword = session.query(Keyword).filter( Keyword.region == region, Keyword.word == lname).one_or_none() if keyWord == None: session.add(Keyword(region=region, word=lname)) session.commit() Logger.info(f"region:{region},keyword:{lname}") keyWord: Keyword = session.query(Keyword).filter( Keyword.region == region, Keyword.word == sname).one_or_none() if keyWord == None: session.add(Keyword(region=region, word=sname)) session.commit() Logger.info(f"region:{region},keyword:{sname}") region = "Hongkong" keyWord: Keyword = session.query(Keyword).filter( Keyword.region == region, Keyword.word == lname).one_or_none() if keyWord == None: session.add(Keyword(region=region, word=lname)) session.commit() Logger.info(f"region:{region},keyword:{lname}") keyWord: Keyword = session.query(Keyword).filter( Keyword.region == region, Keyword.word == sname).one_or_none() if keyWord == None: session.add(Keyword(region=region, word=sname)) session.commit() Logger.info(f"region:{region},keyword:{sname}")