commitFinish

main
CriptDit 5 months ago
parent e8ccd854e7
commit 01fba4e1f2

@ -1,5 +1,7 @@
import csv import csv
import random import random
import urllib.parse
from DrissionPage import ChromiumOptions from DrissionPage import ChromiumOptions
from DrissionPage import ChromiumPage from DrissionPage import ChromiumPage
import time import time
@ -24,9 +26,12 @@ class DataRequest:
self.csv_writer=csv.DictWriter(self.f,fieldnames=['文章ID','文章标题','喜欢数量','图片资源']) self.csv_writer=csv.DictWriter(self.f,fieldnames=['文章ID','文章标题','喜欢数量','图片资源'])
# #
self.keyword=keyword self.keyword=keyword
self.url='https://www.xiaohongshu.com/search_result?keyword='+self.keyword+'&type=51'
self.url=''
#数据 #数据
self.items=[] self.items=[]
#初始化URL编码
self.set_url()
#打开浏览器 #打开浏览器
self.drive=ChromiumPage() self.drive=ChromiumPage()
self.drive.set.scroll.smooth(on_off=True) self.drive.set.scroll.smooth(on_off=True)
@ -37,6 +42,13 @@ class DataRequest:
self.drive.get(self.url) self.drive.get(self.url)
self.dataRequest() self.dataRequest()
def set_url(self):
self.url = ('https://www.xiaohongshu.com/search_result?keyword=' + self.url_encode(self.keyword))
def url_encode(self, s):
"""将字符串转化为URL编码"""
return urllib.parse.quote(s, safe='')
def clear_csv_file(self): def clear_csv_file(self):
with open(self.data_csv_file_path, 'w') as file1: # 使用'w'模式打开文件 with open(self.data_csv_file_path, 'w') as file1: # 使用'w'模式打开文件
pass # 不需要执行任何操作,因为打开文件时内容已被清空 pass # 不需要执行任何操作,因为打开文件时内容已被清空

@ -1,6 +1,7 @@
import csv import csv
import os import os
import random import random
import shutil
import tempfile import tempfile
import time import time
import tkinter as tk import tkinter as tk
@ -81,9 +82,9 @@ class Mainpage:
self.btn_liked = tk.Button(self.page, text='收藏攻略', bg='#D7EFFF',font=('SimHei',15,'bold'),command=self.func_collect) self.btn_liked = tk.Button(self.page, text='收藏攻略', bg='#D7EFFF',font=('SimHei',15,'bold'),command=self.func_collect)
self.btn_liked.place(relx=0.9, rely=0.2, relheight=0.05, relwidth=0.1) self.btn_liked.place(relx=0.9, rely=0.2, relheight=0.05, relwidth=0.1)
#
self.btn_dis_liked = tk.Button(self.page, text='取消收藏', bg='#D7EFFF',font=('SimHei',15,'bold'),command=self.func_discollect) # self.btn_dis_liked = tk.Button(self.page, text='取消收藏', bg='#D7EFFF',font=('SimHei',15,'bold'),command=self.re)
self.btn_dis_liked.place(relx=0.9, rely=0.3, relheight=0.05, relwidth=0.1) # self.btn_dis_liked.place(relx=0.9, rely=0.3, relheight=0.05, relwidth=0.1)
self.lable_like=tk.Label(self.page,text='喜欢人数:'+self.likes,font=('SimHei',15,'bold')) self.lable_like=tk.Label(self.page,text='喜欢人数:'+self.likes,font=('SimHei',15,'bold'))
self.lable_like.place(relx=0.05, rely=0.93) self.lable_like.place(relx=0.05, rely=0.93)
@ -234,7 +235,7 @@ class Mainpage:
self.btn_pre = tk.Button(self.page2, text='上一篇', bg='#D7EFFF',font=('SimHei',15,'bold'),command=self.c_pre_data) self.btn_pre = tk.Button(self.page2, text='上一篇', bg='#D7EFFF',font=('SimHei',15,'bold'),command=self.c_pre_data)
self.btn_pre.place(relx=0.45, rely=0.35, relheight=0.08, relwidth=0.09) self.btn_pre.place(relx=0.45, rely=0.35, relheight=0.08, relwidth=0.09)
self.btn_dis_liked = tk.Button(self.page2, text='取消收藏', bg='#D7EFFF',font=('SimHei',15,'bold'),command=self.func_discollect) self.btn_dis_liked = tk.Button(self.page2, text='取消收藏', bg='#D7EFFF',font=('SimHei',15,'bold'),command=self.remove_rows_with_content)
self.btn_dis_liked.place(relx=0.9, rely=0.3, relheight=0.05, relwidth=0.1) self.btn_dis_liked.place(relx=0.9, rely=0.3, relheight=0.05, relwidth=0.1)
def notes_request(self,next_back): def notes_request(self,next_back):
@ -528,6 +529,7 @@ class Mainpage:
for row in reader: for row in reader:
if i==self.current_data_index: if i==self.current_data_index:
dic['文章内容']=row[1] dic['文章内容']=row[1]
i+=1
with open('collections/{}.csv'.format(self.user),'a+',encoding='utf-8',newline='') as f: with open('collections/{}.csv'.format(self.user),'a+',encoding='utf-8',newline='') as f:
writer = csv.DictWriter(f, fieldnames=['文章ID','文章标题','图片资源','文章内容']) writer = csv.DictWriter(f, fieldnames=['文章ID','文章标题','图片资源','文章内容'])
@ -560,7 +562,26 @@ class Mainpage:
# 将临时文件重命名为原始文件名 # 将临时文件重命名为原始文件名
os.rename(temp.name, 'collections/{}.csv'.format(self.user)) os.rename(temp.name, 'collections/{}.csv'.format(self.user))
def remove_rows_with_content(self):
dis_collect.dis_collec(self.user, self.c_data_index)
# 创建一个临时文件
with tempfile.NamedTemporaryFile(mode='w', delete=False,encoding='utf-8',newline='') as tmpfile:
# 打开原始文件并读取内容
with open('collections/{}.csv'.format(self.user), 'r', newline='',encoding='utf-8') as infile:
reader = csv.reader(infile)
writer = csv.writer(tmpfile)
# 遍历每一行,如果不包含指定内容则写入临时文件
i=0
for row in reader:
if self.c_data_index != i:
writer.writerow(row)
i+=1
# 替换原始文件为临时文件
tmpfile.close()
shutil.move(tmpfile.name, 'collections/{}.csv'.format(self.user))
if __name__ == '__main__': if __name__ == '__main__':
root = tk.Tk() root = tk.Tk()

@ -15,15 +15,15 @@ def notes_request(url):
'Referer':'https://www.xiaohongshu.com', 'Referer':'https://www.xiaohongshu.com',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'} 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'}
cookies = { cookies = {
'abRequestId':'a3c8a855-ed85-57f6-91fe-3e0acedffde8', 'abRequestId':'a1d1855c-4c32-5307-a9f1-9d3f34a192cf',
'a1':'18f9b67ecdbh41bri2la0dgguminrd7dmmrk4pxf750000215521', 'a1':'1900bc790f5y2g9t1mkcnviv0sy4y0pfv4afgtuj250000186611',
'webId':'6d97ebd3e6051489537e7a6aa1d1cf99', 'webId':'b7d9160688e1684d896a4dc0dab65e36',
'gid':'yYijDKYydJ6JyYijDKWdS6UCfDx4yDF3J108FiWfkk763T28IFfWf4888Jy22Jy8WYf0SyjD', 'gid':'yj88DSWfdJhjyj88DSWj8497i2vJkj9y6TSI0VJh3h8Mvi284v8Uik888yYKKyy8Wq48fY4i',
'webBuild':'4.17.2', 'webBuild':'4.20.1',
'web_session':'040069b3643d32585fff79ee79344bd4b89bc6', 'web_session':'040069b3643d32585fffe2225c344b136354be',
'acw_tc':'459a6388281f21d6215ff80f1402afbfdf47eb5a1ef949b426ace99565a6fc50', 'acw_tc':'5a6bd4ac6c786b806cb96212649c76bb7a0ac62792834403c50f508645293c50',
'websectiga':'7750c37de43b7be9de8ed9ff8ea0e576519e8cd2157322eb972ecb429a7735d4', 'websectiga':'3633fe24d49c7dd0eb923edc8205740f10fdb18b25d424d2a2322c6196d2a4ad',
'sec_poison_id':'b1e1329b-735f-427c-bfd0-698a912c0366', 'sec_poison_id':'b2862aa3-62f2-48ce-9b27-2a44baa4b714',
'xsecappid':'xhs-pc-web' 'xsecappid':'xhs-pc-web'
} }

@ -1,5 +1,4 @@
import csv import csv
import pandas as pd import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity from sklearn.metrics.pairwise import cosine_similarity
@ -7,16 +6,12 @@ from sklearn.metrics.pairwise import cosine_similarity
def tf_idf(items, user): def tf_idf(items, user):
# 数据载入 # 数据载入
articles = items articles = items
df = pd.DataFrame(articles) df = pd.DataFrame(articles)
# 提取TF-IDF特征 # 提取TF-IDF特征
tfidf_vectorizer = TfidfVectorizer() tfidf_vectorizer = TfidfVectorizer()
tfidf_matrix = tfidf_vectorizer.fit_transform(df['content']) tfidf_matrix = tfidf_vectorizer.fit_transform(df['content'])
# 计算余弦相似度 # 计算余弦相似度
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix) cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)
# 根据相似度推荐 # 根据相似度推荐
def recommend_articles(article_id, user, cosine_sim=cosine_sim): def recommend_articles(article_id, user, cosine_sim=cosine_sim):
sim_scores = list(enumerate(cosine_sim[article_id - 1])) sim_scores = list(enumerate(cosine_sim[article_id - 1]))
@ -28,6 +23,5 @@ def tf_idf(items, user):
for j in article_indices: for j in article_indices:
write.writerow(items[j]) write.writerow(items[j])
# 推荐与文章ID为1的旅游攻略相似的文章 # 推荐与文章ID为1的旅游攻略相似的文章
recommend_articles(1,user) recommend_articles(1,user)

@ -0,0 +1 @@
652e1987000000001a01728c,上海CityWalk丨花一天时间打卡经典路线‼,"['http://sns-webpic-qc.xhscdn.com/202406121747/5d91eba5a30122575b2c4146228f05ae/1040g2sg30qamrg2c7m305o2pk3s08c1iul9cau0!nc_n_webp_mw_1', 'http://sns-webpic-qc.xhscdn.com/202406121747/acd054cac0306a40b3017b83b965aaae/1040g2sg30qamrg2c7m0g5o2pk3s08c1id68puj0!nc_n_webp_mw_1', 'http://sns-webpic-qc.xhscdn.com/202406121747/8a44ffaff123d9ac7cc8ce59804813cc/1040g2sg30qamrg2c7m105o2pk3s08c1irrp23qo!nc_n_webp_mw_1', 'http://sns-webpic-qc.xhscdn.com/202406121747/c838f8b3d6b4937fc9eb443eb9439a42/1040g2sg30qamrg2c7m005o2pk3s08c1ih4ij41g!nc_n_webp_mw_1', 'http://sns-webpic-qc.xhscdn.com/202406121747/a1091a8d90a659eb78aeeba1c27c11a0/1040g2sg30qamrg2c7m1g5o2pk3s08c1ierqbkeg!nc_n_webp_mw_1', 'http://sns-webpic-qc.xhscdn.com/202406121747/52484f0b10916edfec05be159b17b35c/1040g2sg30qamrg2c7m205o2pk3s08c1iinbrhl8!nc_n_webp_mw_1', 'http://sns-webpic-qc.xhscdn.com/202406121747/def3baa4ab87428dd9f7668c0e4a0d5b/1040g2sg30qamrg2c7m2g5o2pk3s08c1ivce38f8!nc_n_webp_mw_1', 'http://sns-webpic-qc.xhscdn.com/202406121747/720636b2cd678875f60aff8c59ac8bda/1040g2sg30qamrg2c7m3g5o2pk3s08c1i07b4gb0!nc_n_webp_mw_1', 'http://sns-webpic-qc.xhscdn.com/202406121747/422b9900a3da1949703eed0b17645597/1040g2sg30qamrg2c7m405o2pk3s08c1ira5no60!nc_n_webp_mw_1', 'http://sns-webpic-qc.xhscdn.com/202406121747/b7fb32de5dc7ef70ac09d95884107cd7/1040g2sg30qamrg2c7m4g5o2pk3s08c1iuenb4no!nc_n_webp_mw_1']",上海总要来一趟吧<br/>确实好拍又好逛<br/>如果你只有周末有时间<br/>可以看看我总结的这篇一日游攻略<br/>主打一个花zui少的时间<br/>逛完zui经典的上海<br/>.<br/>✅CityWalk路线<br/>武康大楼—武康路/长乐路—静安寺—陆家嘴三件套—轮渡—外滩(白天)—豫园/城隍庙—南京路步行街—外滩(夜景)<br/>.<br/>1⃣武康大楼<br/>🚈地铁10、11号线交通大学站7号口出<br/>作为上海地标建筑之一超火的网红打卡点始建于1924年中国历史文化名街附近有【宋庆龄故居】选择性游玩🎫20r<br/>.<br/>2⃣武康路/长乐路<br/>比较有上海当地人文气息的宝藏徒步路线,一路可以逛吃不停,街边都是梧桐树,秋天的梧桐树不用我说了吧!氛围感拉满,还有好多爆火的网红店都在这条路线上<br/>.<br/>3⃣静安寺<br/>作为上海zui古老的寺庙琉璃金瓦的古塔建筑被现代化建筑包围碰撞出了别致的美感<br/>🎫50如果不是特意要去拜佛可以不去zui佳打卡机位在对面人行天桥上如果到饭点了可以在静安寺站这附近吃楼下有商场<br/>.<br/>4⃣陆家嘴三件套<br/>📍导航【金茂君悦大酒店】地铁静安寺站乘至陆家嘴站8号口出<br/>📷zui佳拍摄机位需要站在花坛边缘zui容易拍的方式是手机打开广角手机就放在脚下摆好动作定时拍摄<br/>.<br/>5⃣轮渡—外滩白天<br/>🛥2块钱看尽黄浦江两岸风景<br/>打卡完三件套步行至【东昌路渡口】乘坐轮渡到【金陵东路渡口】2块钱坐上轮渡到了外滩就是上海zui核心的地标了这里有万国建筑群对面就是东方明珠群像<br/>.<br/>6⃣豫园/城隍庙(选择性游玩)<br/>豫园🎫30r/城隍庙10r商业街免费<br/>豫园是拥有四百多年历史的江南园林,纸醉金迷上海滩的一处世外桃源,一草一木都不输苏州园林,城隍庙就在豫园隔壁,蛮好逛的<br/>.<br/>7⃣南京路步行街<br/>上海zui具代表性的商业街区非常适合逛吃东方明珠的打卡机位也穿插在各个街道<br/>.<br/>8⃣外滩夜景<br/>来上海怎么能不看外滩夜景呢!金黄色的万国建筑群和对岸东方明珠建筑群无不展示了现代都市美<br/>.<br/>📍交通<br/>🚈高铁杭州东-上海虹桥站<br/>✅游玩路线以地铁+步行为主<br/>地铁可以刷zfb或者下载Metro大都会app<br/>.<br/>
1 652e1987000000001a01728c 上海CityWalk丨花一天时间打卡经典路线‼️ ['http://sns-webpic-qc.xhscdn.com/202406121747/5d91eba5a30122575b2c4146228f05ae/1040g2sg30qamrg2c7m305o2pk3s08c1iul9cau0!nc_n_webp_mw_1', 'http://sns-webpic-qc.xhscdn.com/202406121747/acd054cac0306a40b3017b83b965aaae/1040g2sg30qamrg2c7m0g5o2pk3s08c1id68puj0!nc_n_webp_mw_1', 'http://sns-webpic-qc.xhscdn.com/202406121747/8a44ffaff123d9ac7cc8ce59804813cc/1040g2sg30qamrg2c7m105o2pk3s08c1irrp23qo!nc_n_webp_mw_1', 'http://sns-webpic-qc.xhscdn.com/202406121747/c838f8b3d6b4937fc9eb443eb9439a42/1040g2sg30qamrg2c7m005o2pk3s08c1ih4ij41g!nc_n_webp_mw_1', 'http://sns-webpic-qc.xhscdn.com/202406121747/a1091a8d90a659eb78aeeba1c27c11a0/1040g2sg30qamrg2c7m1g5o2pk3s08c1ierqbkeg!nc_n_webp_mw_1', 'http://sns-webpic-qc.xhscdn.com/202406121747/52484f0b10916edfec05be159b17b35c/1040g2sg30qamrg2c7m205o2pk3s08c1iinbrhl8!nc_n_webp_mw_1', 'http://sns-webpic-qc.xhscdn.com/202406121747/def3baa4ab87428dd9f7668c0e4a0d5b/1040g2sg30qamrg2c7m2g5o2pk3s08c1ivce38f8!nc_n_webp_mw_1', 'http://sns-webpic-qc.xhscdn.com/202406121747/720636b2cd678875f60aff8c59ac8bda/1040g2sg30qamrg2c7m3g5o2pk3s08c1i07b4gb0!nc_n_webp_mw_1', 'http://sns-webpic-qc.xhscdn.com/202406121747/422b9900a3da1949703eed0b17645597/1040g2sg30qamrg2c7m405o2pk3s08c1ira5no60!nc_n_webp_mw_1', 'http://sns-webpic-qc.xhscdn.com/202406121747/b7fb32de5dc7ef70ac09d95884107cd7/1040g2sg30qamrg2c7m4g5o2pk3s08c1iuenb4no!nc_n_webp_mw_1'] 上海总要来一趟吧<br/>确实好拍又好逛<br/>如果你只有周末有时间<br/>可以看看我总结的这篇一日游攻略<br/>主打一个花zui少的时间<br/>逛完zui经典的上海<br/>.<br/>✅CityWalk路线<br/>武康大楼—武康路/长乐路—静安寺—陆家嘴三件套—轮渡—外滩(白天)—豫园/城隍庙—南京路步行街—外滩(夜景)<br/>.<br/>1️⃣武康大楼<br/>🚈地铁10、11号线交通大学站7号口出<br/>作为上海地标建筑之一,超火的网红打卡点,始建于1924年,中国历史文化名街,附近有【宋庆龄故居】选择性游玩(🎫20r)<br/>.<br/>2️⃣武康路/长乐路<br/>比较有上海当地人文气息的宝藏徒步路线,一路可以逛吃不停,街边都是梧桐树,秋天的梧桐树不用我说了吧!氛围感拉满,还有好多爆火的网红店都在这条路线上<br/>.<br/>3️⃣静安寺<br/>作为上海zui古老的寺庙,琉璃金瓦的古塔建筑被现代化建筑包围,碰撞出了别致的美感<br/>🎫50,如果不是特意要去拜佛可以不去,zui佳打卡机位在对面人行天桥上,(如果到饭点了,可以在静安寺站这附近吃,楼下有商场)<br/>.<br/>4️⃣陆家嘴三件套<br/>📍导航【金茂君悦大酒店】,地铁静安寺站乘至陆家嘴站8号口出<br/>📷zui佳拍摄机位需要站在花坛边缘,zui容易拍的方式是手机打开广角,手机就放在脚下,摆好动作定时拍摄<br/>.<br/>5️⃣轮渡—外滩(白天)<br/>🛥2块钱看尽黄浦江两岸风景<br/>打卡完三件套,步行至【东昌路渡口】,乘坐轮渡到【金陵东路渡口】,2块钱坐上轮渡,到了外滩就是上海zui核心的地标了,这里有万国建筑群,对面就是东方明珠群像<br/>.<br/>6️⃣豫园/城隍庙(选择性游玩)<br/>豫园🎫30r/城隍庙10r(商业街免费)<br/>豫园是拥有四百多年历史的江南园林,纸醉金迷上海滩的一处世外桃源,一草一木都不输苏州园林,城隍庙就在豫园隔壁,蛮好逛的<br/>.<br/>7️⃣南京路步行街<br/>上海zui具代表性的商业街区,非常适合逛吃,东方明珠的打卡机位也穿插在各个街道<br/>.<br/>8️⃣外滩(夜景)<br/>来上海怎么能不看外滩夜景呢!金黄色的万国建筑群和对岸东方明珠建筑群无不展示了现代都市美<br/>.<br/>📍交通<br/>🚈高铁杭州东-上海虹桥站<br/>✅游玩路线以地铁+步行为主<br/>地铁可以刷zfb或者下载Metro大都会app<br/>.<br/>

@ -0,0 +1,12 @@
import urllib.parse
def url_encode(s):
"""将字符串转化为URL编码"""
return urllib.parse.quote(s, safe='')
# 示例
input_string = input("请输入要编码的字符串:")
encoded_string = url_encode(input_string)
print(encoded_string)
Loading…
Cancel
Save