commit_0531

1 year ago · 86932401c2
parent 83fd9e0165
commit 86932401c2
3 changed files with 181 additions and 124 deletions
--- a/PythonRequest01/测试/my_wordcloud.png
+++ b/PythonRequest01/测试/my_wordcloud.png
--- a/PythonRequest01/测试/测试.py
+++ b/PythonRequest01/测试/测试.py
@ -1,36 +1,69 @@
 import urllib.request
-from tkinter import messagebox
+import tkinter as tk
+import tkinter.messagebox
+from PIL import Image, ImageTk
 import mysql.connector

-
-def connect_to_database():
-    try:
-        # 连接数据库
-        connection = mysql.connector.connect(
+# 连接数据库
+connection = mysql.connector.connect(
    host="127.0.0.1",
    user="root",
    password="020425",
    database="douban"
-        )
-        cursor = connection.cursor()
-        messagebox.showinfo("Success", "Connected to MySQL database successfully")
-        return connection, cursor
-    except Exception as e:
-        messagebox.showerror("Error", f"Error connecting to MySQL database: {e}")
+)
+
+# 使用 cursor() 方法创建一个游标对象 cursor
+cursor = connection.cursor()
+# s='三大队'
+
+# 创建窗口
+window = tk.Tk()
+window.title("豆瓣电影评论数据可视化")
+window.geometry("450x300")

+# welcome to image
+canvas = tk.Canvas(window, height=200, width=500)
+image_file = tk.PhotoImage(file="../image/img.png")
+image = canvas.create_image(0, 0, anchor='nw', image=image_file)
+canvas.pack(side='top')

+# 电影名
+tk.Label(window, text="电影名").place(x=100, y=230)

-url = 'https://movie.douban.com/review/1627740/'

+# 输入框 提示
+var_search = tk.StringVar()
+var_search.set("哈尔的移动城堡")

-headers = {
+# 搜索的电影
+entry_search = tk.Entry(window, textvariable=var_search)
+entry_search.place(x=150,y=230)
+def search_movie():
+    movie = entry_search.get()
+
+    if movie == "":
+        tk.messagebox.showwarning(title="错误", message="请输入电影名")
+    # 判断数据库里面是否有该电影的URL
+    else:
+        data=tuple()
+        # 获取输入框中的电影名
+        movie = entry_search.get()
+        # 使用 execute()  方法执行 SQL 查询
+        cursor.execute("select url from douban01 where name=%s", (movie,))
+        # 使用 fetchone() 方法获取单条数据.
+        data = cursor.fetchone()
+        if data != None:
+            url = data[0]
+            # url = 'https://movie.douban.com/review/15344422/'
+            # url = urls[0]
+            print(data[0])
+            headers = {
                "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
                "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6",
                "Cache-Control": "max-age=0",
                "Connection": "keep-alive",
                "Cookie": 'll="118276"; bid=aRJ3WyvegZU; viewed="26979890"; ap_v=0,6.0',
                "Host": "movie.douban.com",
-    "Referer": "https://movie.douban.com/subject/1308807/",
                "Sec-Ch-Ua": '"Microsoft Edge";v="125", "Chromium";v="125", "Not.A/Brand";v="24"',
                "Sec-Ch-Ua-Mobile": "?0",
                "Sec-Ch-Ua-Platform": '"Windows"',
@ -40,79 +73,72 @@ headers = {
                "Sec-Fetch-User": "?1",
                "Upgrade-Insecure-Requests": "1",
                "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36 Edg/125.0.0.0"
-}
-
+            }

-# (1) 请求对象的定制
-request = urllib.request.Request(url, headers=headers)
+            # (1) 请求对象的定制
+            request = urllib.request.Request(url, headers=headers)

-# (2)获取响应的数据
-response = urllib.request.urlopen(request)
-content = response.read().decode('utf-8')
+            # (2)获取响应的数据
+            response = urllib.request.urlopen(request)
+            content = response.read().decode('utf-8')

-
-# 和上面的代码一样
-with open('电影评论.html', 'w', encoding='utf-8') as fp:
+            # 和上面的代码一样
+            with open('电影评论.html', 'w', encoding='utf-8') as fp:
                fp.write(content)

+            from bs4 import BeautifulSoup

+            soup = BeautifulSoup(open('电影评论.html', encoding='utf-8'), 'lxml')

+            # 获取节点内容

-from bs4 import BeautifulSoup
-
-soup = BeautifulSoup(open('电影评论.html',encoding='utf-8'),'lxml')
-
-# 获取节点内容
+            tags_with_class = soup.find_all(class_='review-content clearfix')

-tags_with_class = soup.find_all(class_='review-content clearfix')
+            text_list = []

-text_list = []
-
-# 遍历结果集中的每个标签对象，并获取其文本内容
-for tag in tags_with_class:
+            # 遍历结果集中的每个标签对象，并获取其文本内容
+            for tag in tags_with_class:
                text_list.append(tag.text)

-# 将列表转换为字符串
-result_text = '\n'.join(text_list)
-
-# 打印文本内容
-print(result_text)
+            # 将列表转换为字符串
+            result_text = '\n'.join(text_list)

+            # 打印文本内容
+            # print(result_text)

-with open('评论.txt', 'w', encoding='utf-8') as fp:
+            with open('评论.txt', 'w', encoding='utf-8') as fp:
                fp.write(result_text)

+            import jieba
+            import numpy
+            from PIL import Image
+            from wordcloud import WordCloud
+            import matplotlib.pyplot as plt

+            txt = open("评论.txt", "r", encoding='utf-8').read()
+            words = " ".join(list(jieba.cut(txt)))

-import jieba
-import numpy
-from PIL import Image
-from wordcloud import WordCloud
-import matplotlib.pyplot as plt
-
-txt = open("../CommentRequest//评论.txt", "r", encoding='utf-8').read()
-words = " ".join(list(jieba.cut(txt)))
+            counts = {}

-counts = {}
+            # 停用词表设置
+            stopwords = [i.strip() for i in open("../Text/stopwords.txt", encoding='utf-8').readlines()]

-# 停用词表设置
-stopwords = [i.strip() for i in open("../Text/stopwords.txt", encoding='utf-8').readlines()]
-
-for word in words:
+            for word in words:
                if len(word) == 1:
                    continue
                else:
                    # 遍历所有词语，每出现一次其对应的值加 1
                    counts[word] = counts.get(word, 0) + 1

-items = list(counts.items())
-# 根据词语出现的次数进行从大到小排序
-items.sort(key=lambda x: x[1], reverse=True)
+            items = list(counts.items())
+            # 根据词语出现的次数进行从大到小排序
+            items.sort(key=lambda x: x[1], reverse=True)

-mask = numpy.array(Image.open("../image/bg2.png"))
-wordcloud = WordCloud(font_path='../image/SimHei.ttf',
+            mask = numpy.array(Image.open("../image/bg2.png"))
+            wordcloud = WordCloud(font_path='../image/SimHei.ttf',
                                  width=800,
                                  height=400,
+                                  # mask=mask
                                  background_color='white',
                                  mode='RGBA',
                                  max_words=150,
@ -120,18 +146,49 @@ wordcloud = WordCloud(font_path='../image/SimHei.ttf',
                                  # mask=mask
                                  ).generate(words)

+            # 显示词云图
+            plt.figure(figsize=(10, 5))
+            plt.imshow(wordcloud, interpolation='bilinear')
+            plt.title('', fontproperties='SimHei')
+            plt.axis('off')  # 不显示坐标轴
+            # plt.show()
+
+            # 保存词云图
+            wordcloud.to_file("my_wordcloud.png")
+
+            # tk.messagebox.showinfo(title="搜索", message="你搜索了" + movie)
+            window_title = tk.Toplevel(window)
+            window_title.geometry("500x300")
+            window_title.title('《' + movie + '》' + "词云图")
+
+            # 加载图片
+            image = Image.open("my_wordcloud.png")
+            image_resized = image.resize((400, 280))
+            photo_image = ImageTk.PhotoImage(image=image_resized)
+
+            # 创建Canvas并添加图片
+            canvas = tk.Canvas(window_title, width=image_resized.width, height=image_resized.height)
+            canvas.pack()
+
+            # 在Canvas上创建一个image对象
+            image_id = canvas.create_image(0, 0, anchor='nw', image=photo_image)
+
+            # 确保图片不会被垃圾回收
+            canvas.image = photo_image
+        else:
+            tk.messagebox.showwarning(title="错误", message="没有找到该电影")
+
+

+# 搜索框
+bth_search = tk.Button(window, text="搜索", command=search_movie)
+bth_search.place(x=310,y=225)

-# 显示词云图
-plt.figure(figsize=(10, 5))
-plt.imshow(wordcloud, interpolation='bilinear')
-plt.title('', fontproperties='SimHei')
-plt.axis('off')  # 不显示坐标轴
-plt.show()

-# 保存词云图
-wordcloud.to_file("../image/my_wordcloud.png")
+window.mainloop()

+# 关闭数据库连接
+connection.close()



--- a/PythonRequest01/测试/电影评论.html
+++ b/PythonRequest01/测试/电影评论.html
@ -328,7 +328,7 @@
  <link rel="stylesheet" href="https://img1.doubanio.com/f/zerkalo/3aeb281ab0e4f2c7050458684acfeb6838441de9/css/review/editor/ng/setting_standalone.css" />
  
  <div class="main-bd" id="review-1627740-content"
-    data-ad-ext="有用9025 · 没用198">
+    data-ad-ext="有用9029 · 没用198">

    
  
@ -358,7 +358,7 @@
  
  <div class="main-panel-useful" data-rid="1627740" data-is_owner="false" data-can_vote="true" data-is_tv="false">
  <button class="btn useful_count j a_show_login" data-rid="1627740">
-      有用 9025
+      有用 9029
  </button>
  <button class="btn useless_count j a_show_login"  data-rid="1627740">
      没用 198
@ -871,7 +871,7 @@ for(var i = 0, l = accounts.length; i < l; i++) {



-  <!-- dae-web-zerkalo--default-59f6ddfb95-crsjw-->
+  <!-- dae-web-zerkalo--default-59f6ddfb95-kth7m-->

  <script>_SPLITTEST=''</script>
 </body>