Browse Source

新增insert_keyWord

master
appolli 5 months ago
parent
commit
f72ee70a8a
  1. 13
      entity/KeyWordEntity.py
  2. 2179
      hk_tw_names_20240703_v2.csv
  3. 78
      insert_keyword.py
  4. 13
      insert_keyword_config.json
  5. 26
      service/KeyWordService.py

13
entity/KeyWordEntity.py

@ -0,0 +1,13 @@
from sqlalchemy import Column, Integer, String, create_engine
from sqlalchemy.ext.declarative import declarative_base
# 创建一个基类
Base = declarative_base()
class Keyword(Base):
__tablename__ = 'Keyword'
id = Column(Integer, primary_key=True, autoincrement=True)
region = Column(String(255), nullable=False)
word = Column(String(255), nullable=False)

2179
hk_tw_names_20240703_v2.csv

File diff suppressed because it is too large

78
insert_keyword.py

@ -0,0 +1,78 @@
from LoggerUtils import Logger, initLogger
from bs4 import BeautifulSoup as bs
from urllib.request import urlopen, Request
import json
import Contant
from sqlalchemy import create_engine
from entity.ChannelEntity import Channel
from entity.KeyWordEntity import Keyword
from service.ChannelService import ChannelService
from service.KeyWordService import KeyWordService
import operator
import argparse
import pandas as pd
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='')
parser.add_argument('--file', type=str, default='')
args = parser.parse_args()
csvFile = args.file
# 读取配置文件
with open('insert_keyword_config.json', 'r', encoding='utf-8') as f:
# 使用json.load()方法读取文件内容
data = json.load(f)
# 初始化日志
Contant.logDir = data['log']['dir']
Contant.logFileName = data['log']['fileName']
initLogger(Contant.logDir, Contant.logFileName)
# 连接mysql
dbHost = data['mysql']['host']
dbPort = data['mysql']['port']
dbUserName = data['mysql']['username']
dbPassword = data['mysql']['password']
dbDatabase = data['mysql']['database']
Logger.info("尝试连接mysql host:'{}' port:'{}' username:'{}' password:'{}' database:'{}'",
dbHost, dbPort, dbUserName, dbPassword, dbDatabase)
Contant.engin = create_engine(
f'mysql+mysqlconnector://{dbUserName}:{dbPassword}@{dbHost}:{dbPort}/{dbDatabase}')
Logger.info("连接mysql成功")
# 读取csv文件
df = pd.read_csv(csvFile, encoding="utf-8")
length = df.shape[0]
keyWords = []
for i in range(0, length):
region = df.iloc[i]['market']
lname = str(df.iloc[i]["lname"]).strip()
sname = str(df.iloc[i]["sname"]).strip()
# 判断是否存在如果不存在就存入
region = "Taiwan"
keyWord: Keyword = KeyWordService.queryOneByRegionWord(
region=region, word=lname)
if keyWord == None:
keyWords.append(Keyword(region=region, word=lname))
Logger.info(f"region:{region},keyword:{lname}")
keyWord: Keyword = KeyWordService.queryOneByRegionWord(
region=region, word=sname)
if keyWord == None:
keyWords.append(Keyword(region=region, word=sname))
Logger.info(f"region:{region},keyword:{sname}")
region = "Hongkong"
keyWord: Keyword = KeyWordService.queryOneByRegionWord(
region=region, word=lname)
if keyWord == None:
keyWords.append(Keyword(region=region, word=lname))
Logger.info(f"region:{region},keyword:{lname}")
keyWord: Keyword = KeyWordService.queryOneByRegionWord(
region=region, word=sname)
if keyWord == None:
keyWords.append(Keyword(region=region, word=sname))
Logger.info(f"region:{region},keyword:{sname}")
KeyWordService.insterKeyWords(keyWords=keyWords)

13
insert_keyword_config.json

@ -0,0 +1,13 @@
{
"mysql": {
"host": "47.108.20.249",
"port": "3306",
"username": "root",
"password": "casino888!",
"database": "youtube"
},
"log": {
"dir": "./logs",
"fileName": "insert_keyword"
}
}

26
service/KeyWordService.py

@ -0,0 +1,26 @@
from entity.KeyWordEntity import Keyword
from common.Utils import getSession
from sqlalchemy import update
class KeyWordService:
def insertOne(region, word):
session = getSession()
keyWord: Keyword = Keyword(region=region, word=word)
session.add(keyWord)
session.commit()
session.close()
def queryOneByRegionWord(region, word):
session = getSession()
keyWord: Keyword = session.query(Keyword).filter(
Keyword.region == region, Keyword.word == word).one_or_none()
session.close()
return keyWord
def insterKeyWords(keyWords):
session = getSession()
session.bulk_save_objects(keyWords)
session.commit()
session.close()
Loading…
Cancel
Save