commit_0530

1 year ago · 83fd9e0165
parent 4bf39cc50a
commit 83fd9e0165
3 changed files with 1021 additions and 0 deletions
--- a/PythonRequest01/测试/测试.py
+++ b/PythonRequest01/测试/测试.py
@ -0,0 +1,137 @@
+import urllib.request
+from tkinter import messagebox
+import mysql.connector
+
+
+def connect_to_database():
+    try:
+        # 连接数据库
+        connection = mysql.connector.connect(
+            host="127.0.0.1",
+            user="root",
+            password="020425",
+            database="douban"
+        )
+        cursor = connection.cursor()
+        messagebox.showinfo("Success", "Connected to MySQL database successfully")
+        return connection, cursor
+    except Exception as e:
+        messagebox.showerror("Error", f"Error connecting to MySQL database: {e}")
+
+
+
+url = 'https://movie.douban.com/review/1627740/'
+
+
+headers = {
+    "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
+    "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6",
+    "Cache-Control": "max-age=0",
+    "Connection": "keep-alive",
+    "Cookie": 'll="118276"; bid=aRJ3WyvegZU; viewed="26979890"; ap_v=0,6.0',
+    "Host": "movie.douban.com",
+    "Referer": "https://movie.douban.com/subject/1308807/",
+    "Sec-Ch-Ua": '"Microsoft Edge";v="125", "Chromium";v="125", "Not.A/Brand";v="24"',
+    "Sec-Ch-Ua-Mobile": "?0",
+    "Sec-Ch-Ua-Platform": '"Windows"',
+    "Sec-Fetch-Dest": "document",
+    "Sec-Fetch-Mode": "navigate",
+    "Sec-Fetch-Site": "same-origin",
+    "Sec-Fetch-User": "?1",
+    "Upgrade-Insecure-Requests": "1",
+    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36 Edg/125.0.0.0"
+}
+
+
+# (1) 请求对象的定制
+request = urllib.request.Request(url, headers=headers)
+
+# (2)获取响应的数据
+response = urllib.request.urlopen(request)
+content = response.read().decode('utf-8')
+
+
+# 和上面的代码一样
+with open('电影评论.html', 'w', encoding='utf-8') as fp:
+    fp.write(content)
+
+
+
+
+from bs4 import BeautifulSoup
+
+soup = BeautifulSoup(open('电影评论.html',encoding='utf-8'),'lxml')
+
+# 获取节点内容
+
+tags_with_class = soup.find_all(class_='review-content clearfix')
+
+text_list = []
+
+# 遍历结果集中的每个标签对象，并获取其文本内容
+for tag in tags_with_class:
+    text_list.append(tag.text)
+
+# 将列表转换为字符串
+result_text = '\n'.join(text_list)
+
+# 打印文本内容
+print(result_text)
+
+
+with open('评论.txt', 'w', encoding='utf-8') as fp:
+    fp.write(result_text)
+
+
+
+import jieba
+import numpy
+from PIL import Image
+from wordcloud import WordCloud
+import matplotlib.pyplot as plt
+
+txt = open("../CommentRequest//评论.txt", "r", encoding='utf-8').read()
+words = " ".join(list(jieba.cut(txt)))
+
+counts = {}
+
+# 停用词表设置
+stopwords = [i.strip() for i in open("../Text/stopwords.txt", encoding='utf-8').readlines()]
+
+for word in words:
+    if len(word) == 1:
+        continue
+    else:
+        # 遍历所有词语，每出现一次其对应的值加 1
+        counts[word] = counts.get(word, 0) + 1
+
+items = list(counts.items())
+# 根据词语出现的次数进行从大到小排序
+items.sort(key=lambda x: x[1], reverse=True)
+
+mask = numpy.array(Image.open("../image/bg2.png"))
+wordcloud = WordCloud(font_path='../image/SimHei.ttf',
+                      width=800,
+                      height=400,
+                      background_color='white',
+                      mode='RGBA',
+                      max_words=150,
+                      stopwords=stopwords,
+                      # mask=mask
+                      ).generate(words)
+
+
+
+# 显示词云图
+plt.figure(figsize=(10, 5))
+plt.imshow(wordcloud, interpolation='bilinear')
+plt.title('', fontproperties='SimHei')
+plt.axis('off')  # 不显示坐标轴
+plt.show()
+
+# 保存词云图
+wordcloud.to_file("../image/my_wordcloud.png")
+
+
+
+
--- a/PythonRequest01/测试/电影评论.html
+++ b/PythonRequest01/测试/电影评论.html
--- a/PythonRequest01/测试/评论.txt
+++ b/PythonRequest01/测试/评论.txt