appolli
5 months ago
5 changed files with 2309 additions and 0 deletions
@ -0,0 +1,13 @@ |
|||
from sqlalchemy import Column, Integer, String, create_engine |
|||
from sqlalchemy.ext.declarative import declarative_base |
|||
|
|||
# 创建一个基类 |
|||
Base = declarative_base() |
|||
|
|||
|
|||
class Keyword(Base): |
|||
__tablename__ = 'Keyword' |
|||
|
|||
id = Column(Integer, primary_key=True, autoincrement=True) |
|||
region = Column(String(255), nullable=False) |
|||
word = Column(String(255), nullable=False) |
File diff suppressed because it is too large
@ -0,0 +1,78 @@ |
|||
from LoggerUtils import Logger, initLogger |
|||
from bs4 import BeautifulSoup as bs |
|||
from urllib.request import urlopen, Request |
|||
import json |
|||
import Contant |
|||
from sqlalchemy import create_engine |
|||
from entity.ChannelEntity import Channel |
|||
from entity.KeyWordEntity import Keyword |
|||
from service.ChannelService import ChannelService |
|||
from service.KeyWordService import KeyWordService |
|||
import operator |
|||
import argparse |
|||
import pandas as pd |
|||
|
|||
|
|||
if __name__ == "__main__": |
|||
parser = argparse.ArgumentParser(description='') |
|||
parser.add_argument('--file', type=str, default='') |
|||
args = parser.parse_args() |
|||
csvFile = args.file |
|||
# 读取配置文件 |
|||
with open('insert_keyword_config.json', 'r', encoding='utf-8') as f: |
|||
# 使用json.load()方法读取文件内容 |
|||
data = json.load(f) |
|||
|
|||
# 初始化日志 |
|||
Contant.logDir = data['log']['dir'] |
|||
Contant.logFileName = data['log']['fileName'] |
|||
initLogger(Contant.logDir, Contant.logFileName) |
|||
|
|||
# 连接mysql |
|||
dbHost = data['mysql']['host'] |
|||
dbPort = data['mysql']['port'] |
|||
dbUserName = data['mysql']['username'] |
|||
dbPassword = data['mysql']['password'] |
|||
dbDatabase = data['mysql']['database'] |
|||
Logger.info("尝试连接mysql host:'{}' port:'{}' username:'{}' password:'{}' database:'{}'", |
|||
dbHost, dbPort, dbUserName, dbPassword, dbDatabase) |
|||
Contant.engin = create_engine( |
|||
f'mysql+mysqlconnector://{dbUserName}:{dbPassword}@{dbHost}:{dbPort}/{dbDatabase}') |
|||
Logger.info("连接mysql成功") |
|||
|
|||
# 读取csv文件 |
|||
df = pd.read_csv(csvFile, encoding="utf-8") |
|||
length = df.shape[0] |
|||
keyWords = [] |
|||
for i in range(0, length): |
|||
region = df.iloc[i]['market'] |
|||
lname = str(df.iloc[i]["lname"]).strip() |
|||
sname = str(df.iloc[i]["sname"]).strip() |
|||
|
|||
# 判断是否存在如果不存在就存入 |
|||
region = "Taiwan" |
|||
keyWord: Keyword = KeyWordService.queryOneByRegionWord( |
|||
region=region, word=lname) |
|||
if keyWord == None: |
|||
keyWords.append(Keyword(region=region, word=lname)) |
|||
Logger.info(f"region:{region},keyword:{lname}") |
|||
|
|||
keyWord: Keyword = KeyWordService.queryOneByRegionWord( |
|||
region=region, word=sname) |
|||
if keyWord == None: |
|||
keyWords.append(Keyword(region=region, word=sname)) |
|||
Logger.info(f"region:{region},keyword:{sname}") |
|||
|
|||
region = "Hongkong" |
|||
keyWord: Keyword = KeyWordService.queryOneByRegionWord( |
|||
region=region, word=lname) |
|||
if keyWord == None: |
|||
keyWords.append(Keyword(region=region, word=lname)) |
|||
Logger.info(f"region:{region},keyword:{lname}") |
|||
|
|||
keyWord: Keyword = KeyWordService.queryOneByRegionWord( |
|||
region=region, word=sname) |
|||
if keyWord == None: |
|||
keyWords.append(Keyword(region=region, word=sname)) |
|||
Logger.info(f"region:{region},keyword:{sname}") |
|||
KeyWordService.insterKeyWords(keyWords=keyWords) |
@ -0,0 +1,13 @@ |
|||
{ |
|||
"mysql": { |
|||
"host": "47.108.20.249", |
|||
"port": "3306", |
|||
"username": "root", |
|||
"password": "casino888!", |
|||
"database": "youtube" |
|||
}, |
|||
"log": { |
|||
"dir": "./logs", |
|||
"fileName": "insert_keyword" |
|||
} |
|||
} |
@ -0,0 +1,26 @@ |
|||
from entity.KeyWordEntity import Keyword |
|||
from common.Utils import getSession |
|||
from sqlalchemy import update |
|||
|
|||
|
|||
class KeyWordService: |
|||
|
|||
def insertOne(region, word): |
|||
session = getSession() |
|||
keyWord: Keyword = Keyword(region=region, word=word) |
|||
session.add(keyWord) |
|||
session.commit() |
|||
session.close() |
|||
|
|||
def queryOneByRegionWord(region, word): |
|||
session = getSession() |
|||
keyWord: Keyword = session.query(Keyword).filter( |
|||
Keyword.region == region, Keyword.word == word).one_or_none() |
|||
session.close() |
|||
return keyWord |
|||
|
|||
def insterKeyWords(keyWords): |
|||
session = getSession() |
|||
session.bulk_save_objects(keyWords) |
|||
session.commit() |
|||
session.close() |
Loading…
Reference in new issue