parent
4043c6cfc0
commit
e26d273274
@ -0,0 +1,33 @@
|
||||
import csv
|
||||
|
||||
import pandas as pd
|
||||
from sklearn.feature_extraction.text import TfidfVectorizer
|
||||
from sklearn.metrics.pairwise import cosine_similarity
|
||||
|
||||
def tf_idf(items, user):
|
||||
# 数据载入
|
||||
articles = items
|
||||
|
||||
df = pd.DataFrame(articles)
|
||||
|
||||
# 提取TF-IDF特征
|
||||
tfidf_vectorizer = TfidfVectorizer()
|
||||
tfidf_matrix = tfidf_vectorizer.fit_transform(df['content'])
|
||||
|
||||
# 计算余弦相似度
|
||||
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)
|
||||
|
||||
# 根据相似度推荐
|
||||
def recommend_articles(article_id, user, cosine_sim=cosine_sim):
|
||||
sim_scores = list(enumerate(cosine_sim[article_id - 1]))
|
||||
sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
|
||||
sim_scores = sim_scores[1:6] # 推荐前5个相似文章
|
||||
article_indices = [i[0] for i in sim_scores]
|
||||
with open('collections/{}.csv'.format(user), 'a+', encoding='utf-8',newline='') as f:
|
||||
write=csv.DictWriter(f,fieldnames=['文章ID','content','时间','收藏数量','喜欢数量','图片资源'])
|
||||
for j in article_indices:
|
||||
write.writerow(items[j])
|
||||
|
||||
|
||||
# 推荐与文章ID为1的旅游攻略相似的文章
|
||||
recommend_articles(1,user)
|
Loading…
Reference in new issue