commit_0531

main
dqs2956213868 9 months ago
parent 83fd9e0165
commit 86932401c2

Binary file not shown.

After

Width:  |  Height:  |  Size: 217 KiB

@ -1,137 +1,194 @@
import urllib.request
from tkinter import messagebox
import tkinter as tk
import tkinter.messagebox
from PIL import Image, ImageTk
import mysql.connector
# 连接数据库
connection = mysql.connector.connect(
host="127.0.0.1",
user="root",
password="020425",
database="douban"
)
# 使用 cursor() 方法创建一个游标对象 cursor
cursor = connection.cursor()
# s='三大队'
# 创建窗口
window = tk.Tk()
window.title("豆瓣电影评论数据可视化")
window.geometry("450x300")
# welcome to image
canvas = tk.Canvas(window, height=200, width=500)
image_file = tk.PhotoImage(file="../image/img.png")
image = canvas.create_image(0, 0, anchor='nw', image=image_file)
canvas.pack(side='top')
# 电影名
tk.Label(window, text="电影名").place(x=100, y=230)
# 输入框 提示
var_search = tk.StringVar()
var_search.set("哈尔的移动城堡")
# 搜索的电影
entry_search = tk.Entry(window, textvariable=var_search)
entry_search.place(x=150,y=230)
def search_movie():
movie = entry_search.get()
if movie == "":
tk.messagebox.showwarning(title="错误", message="请输入电影名")
# 判断数据库里面是否有该电影的URL
else:
data=tuple()
# 获取输入框中的电影名
movie = entry_search.get()
# 使用 execute() 方法执行 SQL 查询
cursor.execute("select url from douban01 where name=%s", (movie,))
# 使用 fetchone() 方法获取单条数据.
data = cursor.fetchone()
if data != None:
url = data[0]
# url = 'https://movie.douban.com/review/15344422/'
# url = urls[0]
print(data[0])
headers = {
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
"Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6",
"Cache-Control": "max-age=0",
"Connection": "keep-alive",
"Cookie": 'll="118276"; bid=aRJ3WyvegZU; viewed="26979890"; ap_v=0,6.0',
"Host": "movie.douban.com",
"Sec-Ch-Ua": '"Microsoft Edge";v="125", "Chromium";v="125", "Not.A/Brand";v="24"',
"Sec-Ch-Ua-Mobile": "?0",
"Sec-Ch-Ua-Platform": '"Windows"',
"Sec-Fetch-Dest": "document",
"Sec-Fetch-Mode": "navigate",
"Sec-Fetch-Site": "same-origin",
"Sec-Fetch-User": "?1",
"Upgrade-Insecure-Requests": "1",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36 Edg/125.0.0.0"
}
# (1) 请求对象的定制
request = urllib.request.Request(url, headers=headers)
# (2)获取响应的数据
response = urllib.request.urlopen(request)
content = response.read().decode('utf-8')
# 和上面的代码一样
with open('电影评论.html', 'w', encoding='utf-8') as fp:
fp.write(content)
from bs4 import BeautifulSoup
soup = BeautifulSoup(open('电影评论.html', encoding='utf-8'), 'lxml')
# 获取节点内容
tags_with_class = soup.find_all(class_='review-content clearfix')
text_list = []
# 遍历结果集中的每个标签对象,并获取其文本内容
for tag in tags_with_class:
text_list.append(tag.text)
# 将列表转换为字符串
result_text = '\n'.join(text_list)
# 打印文本内容
# print(result_text)
with open('评论.txt', 'w', encoding='utf-8') as fp:
fp.write(result_text)
import jieba
import numpy
from PIL import Image
from wordcloud import WordCloud
import matplotlib.pyplot as plt
txt = open("评论.txt", "r", encoding='utf-8').read()
words = " ".join(list(jieba.cut(txt)))
counts = {}
# 停用词表设置
stopwords = [i.strip() for i in open("../Text/stopwords.txt", encoding='utf-8').readlines()]
for word in words:
if len(word) == 1:
continue
else:
# 遍历所有词语,每出现一次其对应的值加 1
counts[word] = counts.get(word, 0) + 1
items = list(counts.items())
# 根据词语出现的次数进行从大到小排序
items.sort(key=lambda x: x[1], reverse=True)
mask = numpy.array(Image.open("../image/bg2.png"))
wordcloud = WordCloud(font_path='../image/SimHei.ttf',
width=800,
height=400,
# mask=mask
background_color='white',
mode='RGBA',
max_words=150,
stopwords=stopwords,
# mask=mask
).generate(words)
# 显示词云图
plt.figure(figsize=(10, 5))
plt.imshow(wordcloud, interpolation='bilinear')
plt.title('', fontproperties='SimHei')
plt.axis('off') # 不显示坐标轴
# plt.show()
# 保存词云图
wordcloud.to_file("my_wordcloud.png")
# tk.messagebox.showinfo(title="搜索", message="你搜索了" + movie)
window_title = tk.Toplevel(window)
window_title.geometry("500x300")
window_title.title('' + movie + '' + "词云图")
def connect_to_database():
try:
# 连接数据库
connection = mysql.connector.connect(
host="127.0.0.1",
user="root",
password="020425",
database="douban"
)
cursor = connection.cursor()
messagebox.showinfo("Success", "Connected to MySQL database successfully")
return connection, cursor
except Exception as e:
messagebox.showerror("Error", f"Error connecting to MySQL database: {e}")
url = 'https://movie.douban.com/review/1627740/'
headers = {
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
"Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6",
"Cache-Control": "max-age=0",
"Connection": "keep-alive",
"Cookie": 'll="118276"; bid=aRJ3WyvegZU; viewed="26979890"; ap_v=0,6.0',
"Host": "movie.douban.com",
"Referer": "https://movie.douban.com/subject/1308807/",
"Sec-Ch-Ua": '"Microsoft Edge";v="125", "Chromium";v="125", "Not.A/Brand";v="24"',
"Sec-Ch-Ua-Mobile": "?0",
"Sec-Ch-Ua-Platform": '"Windows"',
"Sec-Fetch-Dest": "document",
"Sec-Fetch-Mode": "navigate",
"Sec-Fetch-Site": "same-origin",
"Sec-Fetch-User": "?1",
"Upgrade-Insecure-Requests": "1",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36 Edg/125.0.0.0"
}
# (1) 请求对象的定制
request = urllib.request.Request(url, headers=headers)
# (2)获取响应的数据
response = urllib.request.urlopen(request)
content = response.read().decode('utf-8')
# 和上面的代码一样
with open('电影评论.html', 'w', encoding='utf-8') as fp:
fp.write(content)
from bs4 import BeautifulSoup
soup = BeautifulSoup(open('电影评论.html',encoding='utf-8'),'lxml')
# 获取节点内容
tags_with_class = soup.find_all(class_='review-content clearfix')
text_list = []
# 遍历结果集中的每个标签对象,并获取其文本内容
for tag in tags_with_class:
text_list.append(tag.text)
# 将列表转换为字符串
result_text = '\n'.join(text_list)
# 打印文本内容
print(result_text)
with open('评论.txt', 'w', encoding='utf-8') as fp:
fp.write(result_text)
import jieba
import numpy
from PIL import Image
from wordcloud import WordCloud
import matplotlib.pyplot as plt
txt = open("../CommentRequest//评论.txt", "r", encoding='utf-8').read()
words = " ".join(list(jieba.cut(txt)))
counts = {}
# 加载图片
image = Image.open("my_wordcloud.png")
image_resized = image.resize((400, 280))
photo_image = ImageTk.PhotoImage(image=image_resized)
# 停用词表设置
stopwords = [i.strip() for i in open("../Text/stopwords.txt", encoding='utf-8').readlines()]
# 创建Canvas并添加图片
canvas = tk.Canvas(window_title, width=image_resized.width, height=image_resized.height)
canvas.pack()
for word in words:
if len(word) == 1:
continue
else:
# 遍历所有词语,每出现一次其对应的值加 1
counts[word] = counts.get(word, 0) + 1
# 在Canvas上创建一个image对象
image_id = canvas.create_image(0, 0, anchor='nw', image=photo_image)
items = list(counts.items())
# 根据词语出现的次数进行从大到小排序
items.sort(key=lambda x: x[1], reverse=True)
# 确保图片不会被垃圾回收
canvas.image = photo_image
else:
tk.messagebox.showwarning(title="错误", message="没有找到该电影")
mask = numpy.array(Image.open("../image/bg2.png"))
wordcloud = WordCloud(font_path='../image/SimHei.ttf',
width=800,
height=400,
background_color='white',
mode='RGBA',
max_words=150,
stopwords=stopwords,
# mask=mask
).generate(words)
# 搜索框
bth_search = tk.Button(window, text="搜索", command=search_movie)
bth_search.place(x=310,y=225)
# 显示词云图
plt.figure(figsize=(10, 5))
plt.imshow(wordcloud, interpolation='bilinear')
plt.title('', fontproperties='SimHei')
plt.axis('off') # 不显示坐标轴
plt.show()
# 保存词云图
wordcloud.to_file("../image/my_wordcloud.png")
window.mainloop()
# 关闭数据库连接
connection.close()

@ -328,7 +328,7 @@
<link rel="stylesheet" href="https://img1.doubanio.com/f/zerkalo/3aeb281ab0e4f2c7050458684acfeb6838441de9/css/review/editor/ng/setting_standalone.css" />
<div class="main-bd" id="review-1627740-content"
data-ad-ext="有用9025 · 没用198">
data-ad-ext="有用9029 · 没用198">
@ -358,7 +358,7 @@
<div class="main-panel-useful" data-rid="1627740" data-is_owner="false" data-can_vote="true" data-is_tv="false">
<button class="btn useful_count j a_show_login" data-rid="1627740">
有用 9025
有用 9029
</button>
<button class="btn useless_count j a_show_login" data-rid="1627740">
没用 198
@ -871,7 +871,7 @@ for(var i = 0, l = accounts.length; i < l; i++) {
<!-- dae-web-zerkalo--default-59f6ddfb95-crsjw-->
<!-- dae-web-zerkalo--default-59f6ddfb95-kth7m-->
<script>_SPLITTEST=''</script>
</body>

Loading…
Cancel
Save