import csv import pandas as pd from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.metrics.pairwise import cosine_similarity def tf_idf(items, user): # 数据载入 articles = items df = pd.DataFrame(articles) # 提取TF-IDF特征 tfidf_vectorizer = TfidfVectorizer() tfidf_matrix = tfidf_vectorizer.fit_transform(df['content']) # 计算余弦相似度 cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix) # 根据相似度推荐 def recommend_articles(article_id, user, cosine_sim=cosine_sim): sim_scores = list(enumerate(cosine_sim[article_id - 1])) sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True) sim_scores = sim_scores[1:6] # 推荐前5个相似文章 article_indices = [i[0] for i in sim_scores] with open('collections/{}.csv'.format(user), 'a+', encoding='utf-8',newline='') as f: write=csv.DictWriter(f,fieldnames=['文章ID','content','时间','收藏数量','喜欢数量','图片资源']) for j in article_indices: write.writerow(items[j]) # 推荐与文章ID为1的旅游攻略相似的文章 recommend_articles(1,user)