main
CriptDit 6 months ago
parent 4043c6cfc0
commit e26d273274

@ -3,12 +3,16 @@ import random
from DrissionPage import ChromiumOptions from DrissionPage import ChromiumOptions
from DrissionPage import ChromiumPage from DrissionPage import ChromiumPage
import time import time
import TF_IDF
path=r"C:\Program Files\Google\Chrome\Application\chrome.exe" path=r"C:\Program Files\Google\Chrome\Application\chrome.exe"
ChromiumOptions().set_browser_path(path).save() ChromiumOptions().set_browser_path(path).save()
class DataRequest: class DataRequest:
def __init__(self,keyword): def __init__(self,keyword, user):
self.user=user
self.tf_idf='false'
#创建文件对象 #创建文件对象
self.success='False' self.success='False'
self.resp = None self.resp = None
@ -79,13 +83,16 @@ class DataRequest:
return False return False
def sort(self): def sort(self):
#按照喜欢人数排序 if self.tf_idf=='false':
self.items.sort(key=lambda item:int(item['喜欢数量']),reverse=True) #按照喜欢人数排序
for i in self.items: self.items.sort(key=lambda item:int(item['喜欢数量']),reverse=True)
if self.is_in_csv(i['文章ID']): for i in self.items:
continue if self.is_in_csv(i['文章ID']):
else: continue
self.csv_writer.writerow(i) else:
self.csv_writer.writerow(i)
else:
TF_IDF.tf_idf(self.items, self.user)
def close(self): def close(self):
self.drive.listen.stop() self.drive.listen.stop()
@ -100,9 +107,6 @@ class DataRequest:
print('数据获取成功') print('数据获取成功')
break break
# print('[1]:',line.split(',')[1])
# break
if __name__=='__main__': if __name__=='__main__':
dataRequest=DataRequest('厦门旅游攻略') dataRequest=DataRequest('厦门旅游攻略')
for i in range(1): for i in range(1):

@ -361,11 +361,11 @@ class Mainpage:
def researchPage(self): def researchPage(self):
self.clear_csv_file() self.clear_csv_file()
rs=self.research.get() rs = self.research.get()
if rs=='': if rs == '':
messagebox.showinfo(title='提示', message='你还没有输入任何东西!') messagebox.showinfo(title='提示', message='你还没有输入任何东西!')
else: else:
if DataRequest.DataRequest(str(self.research.get())).success=='True': if DataRequest.DataRequest(str(self.research.get()), self.user).success == 'True':
self.content = '' self.content = ''
self.time = '' self.time = ''
@ -383,7 +383,7 @@ class Mainpage:
self.max_notes_count = 0 self.max_notes_count = 0
self.max_iamge_count = 0 self.max_iamge_count = 0
self.data_request = DataRequest.DataRequest(str(self.research.get())) self.data_request = DataRequest.DataRequest(str(self.research.get()), self.user)
time.sleep(random.uniform(0,1)) time.sleep(random.uniform(0,1))
self.init_notes_request() self.init_notes_request()
else: else:

@ -0,0 +1,33 @@
import csv
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
def tf_idf(items, user):
# 数据载入
articles = items
df = pd.DataFrame(articles)
# 提取TF-IDF特征
tfidf_vectorizer = TfidfVectorizer()
tfidf_matrix = tfidf_vectorizer.fit_transform(df['content'])
# 计算余弦相似度
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)
# 根据相似度推荐
def recommend_articles(article_id, user, cosine_sim=cosine_sim):
sim_scores = list(enumerate(cosine_sim[article_id - 1]))
sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
sim_scores = sim_scores[1:6] # 推荐前5个相似文章
article_indices = [i[0] for i in sim_scores]
with open('collections/{}.csv'.format(user), 'a+', encoding='utf-8',newline='') as f:
write=csv.DictWriter(f,fieldnames=['文章ID','content','时间','收藏数量','喜欢数量','图片资源'])
for j in article_indices:
write.writerow(items[j])
# 推荐与文章ID为1的旅游攻略相似的文章
recommend_articles(1,user)
Loading…
Cancel
Save