appolli
5 months ago
5 changed files with 2309 additions and 0 deletions
@ -0,0 +1,13 @@ |
|||||
|
from sqlalchemy import Column, Integer, String, create_engine |
||||
|
from sqlalchemy.ext.declarative import declarative_base |
||||
|
|
||||
|
# 创建一个基类 |
||||
|
Base = declarative_base() |
||||
|
|
||||
|
|
||||
|
class Keyword(Base): |
||||
|
__tablename__ = 'Keyword' |
||||
|
|
||||
|
id = Column(Integer, primary_key=True, autoincrement=True) |
||||
|
region = Column(String(255), nullable=False) |
||||
|
word = Column(String(255), nullable=False) |
File diff suppressed because it is too large
@ -0,0 +1,78 @@ |
|||||
|
from LoggerUtils import Logger, initLogger |
||||
|
from bs4 import BeautifulSoup as bs |
||||
|
from urllib.request import urlopen, Request |
||||
|
import json |
||||
|
import Contant |
||||
|
from sqlalchemy import create_engine |
||||
|
from entity.ChannelEntity import Channel |
||||
|
from entity.KeyWordEntity import Keyword |
||||
|
from service.ChannelService import ChannelService |
||||
|
from service.KeyWordService import KeyWordService |
||||
|
import operator |
||||
|
import argparse |
||||
|
import pandas as pd |
||||
|
|
||||
|
|
||||
|
if __name__ == "__main__": |
||||
|
parser = argparse.ArgumentParser(description='') |
||||
|
parser.add_argument('--file', type=str, default='') |
||||
|
args = parser.parse_args() |
||||
|
csvFile = args.file |
||||
|
# 读取配置文件 |
||||
|
with open('insert_keyword_config.json', 'r', encoding='utf-8') as f: |
||||
|
# 使用json.load()方法读取文件内容 |
||||
|
data = json.load(f) |
||||
|
|
||||
|
# 初始化日志 |
||||
|
Contant.logDir = data['log']['dir'] |
||||
|
Contant.logFileName = data['log']['fileName'] |
||||
|
initLogger(Contant.logDir, Contant.logFileName) |
||||
|
|
||||
|
# 连接mysql |
||||
|
dbHost = data['mysql']['host'] |
||||
|
dbPort = data['mysql']['port'] |
||||
|
dbUserName = data['mysql']['username'] |
||||
|
dbPassword = data['mysql']['password'] |
||||
|
dbDatabase = data['mysql']['database'] |
||||
|
Logger.info("尝试连接mysql host:'{}' port:'{}' username:'{}' password:'{}' database:'{}'", |
||||
|
dbHost, dbPort, dbUserName, dbPassword, dbDatabase) |
||||
|
Contant.engin = create_engine( |
||||
|
f'mysql+mysqlconnector://{dbUserName}:{dbPassword}@{dbHost}:{dbPort}/{dbDatabase}') |
||||
|
Logger.info("连接mysql成功") |
||||
|
|
||||
|
# 读取csv文件 |
||||
|
df = pd.read_csv(csvFile, encoding="utf-8") |
||||
|
length = df.shape[0] |
||||
|
keyWords = [] |
||||
|
for i in range(0, length): |
||||
|
region = df.iloc[i]['market'] |
||||
|
lname = str(df.iloc[i]["lname"]).strip() |
||||
|
sname = str(df.iloc[i]["sname"]).strip() |
||||
|
|
||||
|
# 判断是否存在如果不存在就存入 |
||||
|
region = "Taiwan" |
||||
|
keyWord: Keyword = KeyWordService.queryOneByRegionWord( |
||||
|
region=region, word=lname) |
||||
|
if keyWord == None: |
||||
|
keyWords.append(Keyword(region=region, word=lname)) |
||||
|
Logger.info(f"region:{region},keyword:{lname}") |
||||
|
|
||||
|
keyWord: Keyword = KeyWordService.queryOneByRegionWord( |
||||
|
region=region, word=sname) |
||||
|
if keyWord == None: |
||||
|
keyWords.append(Keyword(region=region, word=sname)) |
||||
|
Logger.info(f"region:{region},keyword:{sname}") |
||||
|
|
||||
|
region = "Hongkong" |
||||
|
keyWord: Keyword = KeyWordService.queryOneByRegionWord( |
||||
|
region=region, word=lname) |
||||
|
if keyWord == None: |
||||
|
keyWords.append(Keyword(region=region, word=lname)) |
||||
|
Logger.info(f"region:{region},keyword:{lname}") |
||||
|
|
||||
|
keyWord: Keyword = KeyWordService.queryOneByRegionWord( |
||||
|
region=region, word=sname) |
||||
|
if keyWord == None: |
||||
|
keyWords.append(Keyword(region=region, word=sname)) |
||||
|
Logger.info(f"region:{region},keyword:{sname}") |
||||
|
KeyWordService.insterKeyWords(keyWords=keyWords) |
@ -0,0 +1,13 @@ |
|||||
|
{ |
||||
|
"mysql": { |
||||
|
"host": "47.108.20.249", |
||||
|
"port": "3306", |
||||
|
"username": "root", |
||||
|
"password": "casino888!", |
||||
|
"database": "youtube" |
||||
|
}, |
||||
|
"log": { |
||||
|
"dir": "./logs", |
||||
|
"fileName": "insert_keyword" |
||||
|
} |
||||
|
} |
@ -0,0 +1,26 @@ |
|||||
|
from entity.KeyWordEntity import Keyword |
||||
|
from common.Utils import getSession |
||||
|
from sqlalchemy import update |
||||
|
|
||||
|
|
||||
|
class KeyWordService: |
||||
|
|
||||
|
def insertOne(region, word): |
||||
|
session = getSession() |
||||
|
keyWord: Keyword = Keyword(region=region, word=word) |
||||
|
session.add(keyWord) |
||||
|
session.commit() |
||||
|
session.close() |
||||
|
|
||||
|
def queryOneByRegionWord(region, word): |
||||
|
session = getSession() |
||||
|
keyWord: Keyword = session.query(Keyword).filter( |
||||
|
Keyword.region == region, Keyword.word == word).one_or_none() |
||||
|
session.close() |
||||
|
return keyWord |
||||
|
|
||||
|
def insterKeyWords(keyWords): |
||||
|
session = getSession() |
||||
|
session.bulk_save_objects(keyWords) |
||||
|
session.commit() |
||||
|
session.close() |
Loading…
Reference in new issue