commit_0531

1 year ago · 86932401c2
parent 83fd9e0165
commit 86932401c2
3 changed files with 181 additions and 124 deletions
--- a/PythonRequest01/测试/my_wordcloud.png
+++ b/PythonRequest01/测试/my_wordcloud.png
--- a/PythonRequest01/测试/测试.py
+++ b/PythonRequest01/测试/测试.py
@ -1,137 +1,194 @@
 import urllib.request
-from tkinter import messagebox
+import tkinter as tk
+import tkinter.messagebox
+from PIL import Image, ImageTk
 import mysql.connector

+# 连接数据库
+connection = mysql.connector.connect(
+    host="127.0.0.1",
+    user="root",
+    password="020425",
+    database="douban"
+)
+
+# 使用 cursor() 方法创建一个游标对象 cursor
+cursor = connection.cursor()
+# s='三大队'
+
+# 创建窗口
+window = tk.Tk()
+window.title("豆瓣电影评论数据可视化")
+window.geometry("450x300")
+
+# welcome to image
+canvas = tk.Canvas(window, height=200, width=500)
+image_file = tk.PhotoImage(file="../image/img.png")
+image = canvas.create_image(0, 0, anchor='nw', image=image_file)
+canvas.pack(side='top')
+
+# 电影名
+tk.Label(window, text="电影名").place(x=100, y=230)
+
+
+# 输入框 提示
+var_search = tk.StringVar()
+var_search.set("哈尔的移动城堡")
+
+# 搜索的电影
+entry_search = tk.Entry(window, textvariable=var_search)
+entry_search.place(x=150,y=230)
+def search_movie():
+    movie = entry_search.get()
+
+    if movie == "":
+        tk.messagebox.showwarning(title="错误", message="请输入电影名")
+    # 判断数据库里面是否有该电影的URL
+    else:
+        data=tuple()
+        # 获取输入框中的电影名
+        movie = entry_search.get()
+        # 使用 execute()  方法执行 SQL 查询
+        cursor.execute("select url from douban01 where name=%s", (movie,))
+        # 使用 fetchone() 方法获取单条数据.
+        data = cursor.fetchone()
+        if data != None:
+            url = data[0]
+            # url = 'https://movie.douban.com/review/15344422/'
+            # url = urls[0]
+            print(data[0])
+            headers = {
+                "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
+                "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6",
+                "Cache-Control": "max-age=0",
+                "Connection": "keep-alive",
+                "Cookie": 'll="118276"; bid=aRJ3WyvegZU; viewed="26979890"; ap_v=0,6.0',
+                "Host": "movie.douban.com",
+                "Sec-Ch-Ua": '"Microsoft Edge";v="125", "Chromium";v="125", "Not.A/Brand";v="24"',
+                "Sec-Ch-Ua-Mobile": "?0",
+                "Sec-Ch-Ua-Platform": '"Windows"',
+                "Sec-Fetch-Dest": "document",
+                "Sec-Fetch-Mode": "navigate",
+                "Sec-Fetch-Site": "same-origin",
+                "Sec-Fetch-User": "?1",
+                "Upgrade-Insecure-Requests": "1",
+                "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36 Edg/125.0.0.0"
+            }
+
+            # (1) 请求对象的定制
+            request = urllib.request.Request(url, headers=headers)
+
+            # (2)获取响应的数据
+            response = urllib.request.urlopen(request)
+            content = response.read().decode('utf-8')
+
+            # 和上面的代码一样
+            with open('电影评论.html', 'w', encoding='utf-8') as fp:
+                fp.write(content)
+
+            from bs4 import BeautifulSoup
+
+            soup = BeautifulSoup(open('电影评论.html', encoding='utf-8'), 'lxml')
+
+            # 获取节点内容
+
+            tags_with_class = soup.find_all(class_='review-content clearfix')
+
+            text_list = []
+
+            # 遍历结果集中的每个标签对象，并获取其文本内容
+            for tag in tags_with_class:
+                text_list.append(tag.text)
+
+            # 将列表转换为字符串
+            result_text = '\n'.join(text_list)
+
+            # 打印文本内容
+            # print(result_text)
+
+            with open('评论.txt', 'w', encoding='utf-8') as fp:
+                fp.write(result_text)
+
+            import jieba
+            import numpy
+            from PIL import Image
+            from wordcloud import WordCloud
+            import matplotlib.pyplot as plt
+
+            txt = open("评论.txt", "r", encoding='utf-8').read()
+            words = " ".join(list(jieba.cut(txt)))
+
+            counts = {}
+
+            # 停用词表设置
+            stopwords = [i.strip() for i in open("../Text/stopwords.txt", encoding='utf-8').readlines()]
+
+            for word in words:
+                if len(word) == 1:
+                    continue
+                else:
+                    # 遍历所有词语，每出现一次其对应的值加 1
+                    counts[word] = counts.get(word, 0) + 1
+
+            items = list(counts.items())
+            # 根据词语出现的次数进行从大到小排序
+            items.sort(key=lambda x: x[1], reverse=True)
+
+            mask = numpy.array(Image.open("../image/bg2.png"))
+            wordcloud = WordCloud(font_path='../image/SimHei.ttf',
+                                  width=800,
+                                  height=400,
+                                  # mask=mask
+                                  background_color='white',
+                                  mode='RGBA',
+                                  max_words=150,
+                                  stopwords=stopwords,
+                                  # mask=mask
+                                  ).generate(words)
+
+            # 显示词云图
+            plt.figure(figsize=(10, 5))
+            plt.imshow(wordcloud, interpolation='bilinear')
+            plt.title('', fontproperties='SimHei')
+            plt.axis('off')  # 不显示坐标轴
+            # plt.show()
+
+            # 保存词云图
+            wordcloud.to_file("my_wordcloud.png")
+
+            # tk.messagebox.showinfo(title="搜索", message="你搜索了" + movie)
+            window_title = tk.Toplevel(window)
+            window_title.geometry("500x300")
+            window_title.title('《' + movie + '》' + "词云图")

-def connect_to_database():
-    try:
-        # 连接数据库
-        connection = mysql.connector.connect(
-            host="127.0.0.1",
-            user="root",
-            password="020425",
-            database="douban"
-        )
-        cursor = connection.cursor()
-        messagebox.showinfo("Success", "Connected to MySQL database successfully")
-        return connection, cursor
-    except Exception as e:
-        messagebox.showerror("Error", f"Error connecting to MySQL database: {e}")
-
-
-
-url = 'https://movie.douban.com/review/1627740/'
-
-
-headers = {
-    "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
-    "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6",
-    "Cache-Control": "max-age=0",
-    "Connection": "keep-alive",
-    "Cookie": 'll="118276"; bid=aRJ3WyvegZU; viewed="26979890"; ap_v=0,6.0',
-    "Host": "movie.douban.com",
-    "Referer": "https://movie.douban.com/subject/1308807/",
-    "Sec-Ch-Ua": '"Microsoft Edge";v="125", "Chromium";v="125", "Not.A/Brand";v="24"',
-    "Sec-Ch-Ua-Mobile": "?0",
-    "Sec-Ch-Ua-Platform": '"Windows"',
-    "Sec-Fetch-Dest": "document",
-    "Sec-Fetch-Mode": "navigate",
-    "Sec-Fetch-Site": "same-origin",
-    "Sec-Fetch-User": "?1",
-    "Upgrade-Insecure-Requests": "1",
-    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36 Edg/125.0.0.0"
-}
-
-
-# (1) 请求对象的定制
-request = urllib.request.Request(url, headers=headers)
-
-# (2)获取响应的数据
-response = urllib.request.urlopen(request)
-content = response.read().decode('utf-8')
-
-
-# 和上面的代码一样
-with open('电影评论.html', 'w', encoding='utf-8') as fp:
-    fp.write(content)
-
-
-
-
-from bs4 import BeautifulSoup
-
-soup = BeautifulSoup(open('电影评论.html',encoding='utf-8'),'lxml')
-
-# 获取节点内容
-
-tags_with_class = soup.find_all(class_='review-content clearfix')
-
-text_list = []
-
-# 遍历结果集中的每个标签对象，并获取其文本内容
-for tag in tags_with_class:
-    text_list.append(tag.text)
-
-# 将列表转换为字符串
-result_text = '\n'.join(text_list)
-
-# 打印文本内容
-print(result_text)
-
-
-with open('评论.txt', 'w', encoding='utf-8') as fp:
-    fp.write(result_text)
-
-
-
-import jieba
-import numpy
-from PIL import Image
-from wordcloud import WordCloud
-import matplotlib.pyplot as plt
-
-txt = open("../CommentRequest//评论.txt", "r", encoding='utf-8').read()
-words = " ".join(list(jieba.cut(txt)))
-
-counts = {}
+            # 加载图片
+            image = Image.open("my_wordcloud.png")
+            image_resized = image.resize((400, 280))
+            photo_image = ImageTk.PhotoImage(image=image_resized)

-# 停用词表设置
-stopwords = [i.strip() for i in open("../Text/stopwords.txt", encoding='utf-8').readlines()]
+            # 创建Canvas并添加图片
+            canvas = tk.Canvas(window_title, width=image_resized.width, height=image_resized.height)
+            canvas.pack()

-for word in words:
-    if len(word) == 1:
-        continue
-    else:
-        # 遍历所有词语，每出现一次其对应的值加 1
-        counts[word] = counts.get(word, 0) + 1
+            # 在Canvas上创建一个image对象
+            image_id = canvas.create_image(0, 0, anchor='nw', image=photo_image)

-items = list(counts.items())
-# 根据词语出现的次数进行从大到小排序
-items.sort(key=lambda x: x[1], reverse=True)
+            # 确保图片不会被垃圾回收
+            canvas.image = photo_image
+        else:
+            tk.messagebox.showwarning(title="错误", message="没有找到该电影")

-mask = numpy.array(Image.open("../image/bg2.png"))
-wordcloud = WordCloud(font_path='../image/SimHei.ttf',
-                      width=800,
-                      height=400,
-                      background_color='white',
-                      mode='RGBA',
-                      max_words=150,
-                      stopwords=stopwords,
-                      # mask=mask
-                      ).generate(words)


+# 搜索框
+bth_search = tk.Button(window, text="搜索", command=search_movie)
+bth_search.place(x=310,y=225)

-# 显示词云图
-plt.figure(figsize=(10, 5))
-plt.imshow(wordcloud, interpolation='bilinear')
-plt.title('', fontproperties='SimHei')
-plt.axis('off')  # 不显示坐标轴
-plt.show()

-# 保存词云图
-wordcloud.to_file("../image/my_wordcloud.png")
+window.mainloop()

+# 关闭数据库连接
+connection.close()



--- a/PythonRequest01/测试/电影评论.html
+++ b/PythonRequest01/测试/电影评论.html
@ -328,7 +328,7 @@
  <link rel="stylesheet" href="https://img1.doubanio.com/f/zerkalo/3aeb281ab0e4f2c7050458684acfeb6838441de9/css/review/editor/ng/setting_standalone.css" />
  
  <div class="main-bd" id="review-1627740-content"
-    data-ad-ext="有用9025 · 没用198">
+    data-ad-ext="有用9029 · 没用198">

    
  
@ -358,7 +358,7 @@
  
  <div class="main-panel-useful" data-rid="1627740" data-is_owner="false" data-can_vote="true" data-is_tv="false">
  <button class="btn useful_count j a_show_login" data-rid="1627740">
-      有用 9025
+      有用 9029
  </button>
  <button class="btn useless_count j a_show_login"  data-rid="1627740">
      没用 198
@ -871,7 +871,7 @@ for(var i = 0, l = accounts.length; i < l; i++) {



-  <!-- dae-web-zerkalo--default-59f6ddfb95-crsjw-->
+  <!-- dae-web-zerkalo--default-59f6ddfb95-kth7m-->

  <script>_SPLITTEST=''</script>
 </body>