|
|
|
@ -1,36 +1,69 @@
|
|
|
|
|
import urllib.request
|
|
|
|
|
from tkinter import messagebox
|
|
|
|
|
import tkinter as tk
|
|
|
|
|
import tkinter.messagebox
|
|
|
|
|
from PIL import Image, ImageTk
|
|
|
|
|
import mysql.connector
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def connect_to_database():
|
|
|
|
|
try:
|
|
|
|
|
# 连接数据库
|
|
|
|
|
connection = mysql.connector.connect(
|
|
|
|
|
# 连接数据库
|
|
|
|
|
connection = mysql.connector.connect(
|
|
|
|
|
host="127.0.0.1",
|
|
|
|
|
user="root",
|
|
|
|
|
password="020425",
|
|
|
|
|
database="douban"
|
|
|
|
|
)
|
|
|
|
|
cursor = connection.cursor()
|
|
|
|
|
messagebox.showinfo("Success", "Connected to MySQL database successfully")
|
|
|
|
|
return connection, cursor
|
|
|
|
|
except Exception as e:
|
|
|
|
|
messagebox.showerror("Error", f"Error connecting to MySQL database: {e}")
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# 使用 cursor() 方法创建一个游标对象 cursor
|
|
|
|
|
cursor = connection.cursor()
|
|
|
|
|
# s='三大队'
|
|
|
|
|
|
|
|
|
|
# 创建窗口
|
|
|
|
|
window = tk.Tk()
|
|
|
|
|
window.title("豆瓣电影评论数据可视化")
|
|
|
|
|
window.geometry("450x300")
|
|
|
|
|
|
|
|
|
|
# welcome to image
|
|
|
|
|
canvas = tk.Canvas(window, height=200, width=500)
|
|
|
|
|
image_file = tk.PhotoImage(file="../image/img.png")
|
|
|
|
|
image = canvas.create_image(0, 0, anchor='nw', image=image_file)
|
|
|
|
|
canvas.pack(side='top')
|
|
|
|
|
|
|
|
|
|
# 电影名
|
|
|
|
|
tk.Label(window, text="电影名").place(x=100, y=230)
|
|
|
|
|
|
|
|
|
|
url = 'https://movie.douban.com/review/1627740/'
|
|
|
|
|
|
|
|
|
|
# 输入框 提示
|
|
|
|
|
var_search = tk.StringVar()
|
|
|
|
|
var_search.set("哈尔的移动城堡")
|
|
|
|
|
|
|
|
|
|
headers = {
|
|
|
|
|
# 搜索的电影
|
|
|
|
|
entry_search = tk.Entry(window, textvariable=var_search)
|
|
|
|
|
entry_search.place(x=150,y=230)
|
|
|
|
|
def search_movie():
|
|
|
|
|
movie = entry_search.get()
|
|
|
|
|
|
|
|
|
|
if movie == "":
|
|
|
|
|
tk.messagebox.showwarning(title="错误", message="请输入电影名")
|
|
|
|
|
# 判断数据库里面是否有该电影的URL
|
|
|
|
|
else:
|
|
|
|
|
data=tuple()
|
|
|
|
|
# 获取输入框中的电影名
|
|
|
|
|
movie = entry_search.get()
|
|
|
|
|
# 使用 execute() 方法执行 SQL 查询
|
|
|
|
|
cursor.execute("select url from douban01 where name=%s", (movie,))
|
|
|
|
|
# 使用 fetchone() 方法获取单条数据.
|
|
|
|
|
data = cursor.fetchone()
|
|
|
|
|
if data != None:
|
|
|
|
|
url = data[0]
|
|
|
|
|
# url = 'https://movie.douban.com/review/15344422/'
|
|
|
|
|
# url = urls[0]
|
|
|
|
|
print(data[0])
|
|
|
|
|
headers = {
|
|
|
|
|
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
|
|
|
|
|
"Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6",
|
|
|
|
|
"Cache-Control": "max-age=0",
|
|
|
|
|
"Connection": "keep-alive",
|
|
|
|
|
"Cookie": 'll="118276"; bid=aRJ3WyvegZU; viewed="26979890"; ap_v=0,6.0',
|
|
|
|
|
"Host": "movie.douban.com",
|
|
|
|
|
"Referer": "https://movie.douban.com/subject/1308807/",
|
|
|
|
|
"Sec-Ch-Ua": '"Microsoft Edge";v="125", "Chromium";v="125", "Not.A/Brand";v="24"',
|
|
|
|
|
"Sec-Ch-Ua-Mobile": "?0",
|
|
|
|
|
"Sec-Ch-Ua-Platform": '"Windows"',
|
|
|
|
@ -40,79 +73,72 @@ headers = {
|
|
|
|
|
"Sec-Fetch-User": "?1",
|
|
|
|
|
"Upgrade-Insecure-Requests": "1",
|
|
|
|
|
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36 Edg/125.0.0.0"
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
# (1) 请求对象的定制
|
|
|
|
|
request = urllib.request.Request(url, headers=headers)
|
|
|
|
|
# (1) 请求对象的定制
|
|
|
|
|
request = urllib.request.Request(url, headers=headers)
|
|
|
|
|
|
|
|
|
|
# (2)获取响应的数据
|
|
|
|
|
response = urllib.request.urlopen(request)
|
|
|
|
|
content = response.read().decode('utf-8')
|
|
|
|
|
# (2)获取响应的数据
|
|
|
|
|
response = urllib.request.urlopen(request)
|
|
|
|
|
content = response.read().decode('utf-8')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# 和上面的代码一样
|
|
|
|
|
with open('电影评论.html', 'w', encoding='utf-8') as fp:
|
|
|
|
|
# 和上面的代码一样
|
|
|
|
|
with open('电影评论.html', 'w', encoding='utf-8') as fp:
|
|
|
|
|
fp.write(content)
|
|
|
|
|
|
|
|
|
|
from bs4 import BeautifulSoup
|
|
|
|
|
|
|
|
|
|
soup = BeautifulSoup(open('电影评论.html', encoding='utf-8'), 'lxml')
|
|
|
|
|
|
|
|
|
|
# 获取节点内容
|
|
|
|
|
|
|
|
|
|
from bs4 import BeautifulSoup
|
|
|
|
|
|
|
|
|
|
soup = BeautifulSoup(open('电影评论.html',encoding='utf-8'),'lxml')
|
|
|
|
|
|
|
|
|
|
# 获取节点内容
|
|
|
|
|
tags_with_class = soup.find_all(class_='review-content clearfix')
|
|
|
|
|
|
|
|
|
|
tags_with_class = soup.find_all(class_='review-content clearfix')
|
|
|
|
|
text_list = []
|
|
|
|
|
|
|
|
|
|
text_list = []
|
|
|
|
|
|
|
|
|
|
# 遍历结果集中的每个标签对象,并获取其文本内容
|
|
|
|
|
for tag in tags_with_class:
|
|
|
|
|
# 遍历结果集中的每个标签对象,并获取其文本内容
|
|
|
|
|
for tag in tags_with_class:
|
|
|
|
|
text_list.append(tag.text)
|
|
|
|
|
|
|
|
|
|
# 将列表转换为字符串
|
|
|
|
|
result_text = '\n'.join(text_list)
|
|
|
|
|
|
|
|
|
|
# 打印文本内容
|
|
|
|
|
print(result_text)
|
|
|
|
|
# 将列表转换为字符串
|
|
|
|
|
result_text = '\n'.join(text_list)
|
|
|
|
|
|
|
|
|
|
# 打印文本内容
|
|
|
|
|
# print(result_text)
|
|
|
|
|
|
|
|
|
|
with open('评论.txt', 'w', encoding='utf-8') as fp:
|
|
|
|
|
with open('评论.txt', 'w', encoding='utf-8') as fp:
|
|
|
|
|
fp.write(result_text)
|
|
|
|
|
|
|
|
|
|
import jieba
|
|
|
|
|
import numpy
|
|
|
|
|
from PIL import Image
|
|
|
|
|
from wordcloud import WordCloud
|
|
|
|
|
import matplotlib.pyplot as plt
|
|
|
|
|
|
|
|
|
|
txt = open("评论.txt", "r", encoding='utf-8').read()
|
|
|
|
|
words = " ".join(list(jieba.cut(txt)))
|
|
|
|
|
|
|
|
|
|
import jieba
|
|
|
|
|
import numpy
|
|
|
|
|
from PIL import Image
|
|
|
|
|
from wordcloud import WordCloud
|
|
|
|
|
import matplotlib.pyplot as plt
|
|
|
|
|
|
|
|
|
|
txt = open("../CommentRequest//评论.txt", "r", encoding='utf-8').read()
|
|
|
|
|
words = " ".join(list(jieba.cut(txt)))
|
|
|
|
|
counts = {}
|
|
|
|
|
|
|
|
|
|
counts = {}
|
|
|
|
|
# 停用词表设置
|
|
|
|
|
stopwords = [i.strip() for i in open("../Text/stopwords.txt", encoding='utf-8').readlines()]
|
|
|
|
|
|
|
|
|
|
# 停用词表设置
|
|
|
|
|
stopwords = [i.strip() for i in open("../Text/stopwords.txt", encoding='utf-8').readlines()]
|
|
|
|
|
|
|
|
|
|
for word in words:
|
|
|
|
|
for word in words:
|
|
|
|
|
if len(word) == 1:
|
|
|
|
|
continue
|
|
|
|
|
else:
|
|
|
|
|
# 遍历所有词语,每出现一次其对应的值加 1
|
|
|
|
|
counts[word] = counts.get(word, 0) + 1
|
|
|
|
|
|
|
|
|
|
items = list(counts.items())
|
|
|
|
|
# 根据词语出现的次数进行从大到小排序
|
|
|
|
|
items.sort(key=lambda x: x[1], reverse=True)
|
|
|
|
|
items = list(counts.items())
|
|
|
|
|
# 根据词语出现的次数进行从大到小排序
|
|
|
|
|
items.sort(key=lambda x: x[1], reverse=True)
|
|
|
|
|
|
|
|
|
|
mask = numpy.array(Image.open("../image/bg2.png"))
|
|
|
|
|
wordcloud = WordCloud(font_path='../image/SimHei.ttf',
|
|
|
|
|
mask = numpy.array(Image.open("../image/bg2.png"))
|
|
|
|
|
wordcloud = WordCloud(font_path='../image/SimHei.ttf',
|
|
|
|
|
width=800,
|
|
|
|
|
height=400,
|
|
|
|
|
# mask=mask
|
|
|
|
|
background_color='white',
|
|
|
|
|
mode='RGBA',
|
|
|
|
|
max_words=150,
|
|
|
|
@ -120,18 +146,49 @@ wordcloud = WordCloud(font_path='../image/SimHei.ttf',
|
|
|
|
|
# mask=mask
|
|
|
|
|
).generate(words)
|
|
|
|
|
|
|
|
|
|
# 显示词云图
|
|
|
|
|
plt.figure(figsize=(10, 5))
|
|
|
|
|
plt.imshow(wordcloud, interpolation='bilinear')
|
|
|
|
|
plt.title('', fontproperties='SimHei')
|
|
|
|
|
plt.axis('off') # 不显示坐标轴
|
|
|
|
|
# plt.show()
|
|
|
|
|
|
|
|
|
|
# 保存词云图
|
|
|
|
|
wordcloud.to_file("my_wordcloud.png")
|
|
|
|
|
|
|
|
|
|
# tk.messagebox.showinfo(title="搜索", message="你搜索了" + movie)
|
|
|
|
|
window_title = tk.Toplevel(window)
|
|
|
|
|
window_title.geometry("500x300")
|
|
|
|
|
window_title.title('《' + movie + '》' + "词云图")
|
|
|
|
|
|
|
|
|
|
# 加载图片
|
|
|
|
|
image = Image.open("my_wordcloud.png")
|
|
|
|
|
image_resized = image.resize((400, 280))
|
|
|
|
|
photo_image = ImageTk.PhotoImage(image=image_resized)
|
|
|
|
|
|
|
|
|
|
# 创建Canvas并添加图片
|
|
|
|
|
canvas = tk.Canvas(window_title, width=image_resized.width, height=image_resized.height)
|
|
|
|
|
canvas.pack()
|
|
|
|
|
|
|
|
|
|
# 在Canvas上创建一个image对象
|
|
|
|
|
image_id = canvas.create_image(0, 0, anchor='nw', image=photo_image)
|
|
|
|
|
|
|
|
|
|
# 确保图片不会被垃圾回收
|
|
|
|
|
canvas.image = photo_image
|
|
|
|
|
else:
|
|
|
|
|
tk.messagebox.showwarning(title="错误", message="没有找到该电影")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# 搜索框
|
|
|
|
|
bth_search = tk.Button(window, text="搜索", command=search_movie)
|
|
|
|
|
bth_search.place(x=310,y=225)
|
|
|
|
|
|
|
|
|
|
# 显示词云图
|
|
|
|
|
plt.figure(figsize=(10, 5))
|
|
|
|
|
plt.imshow(wordcloud, interpolation='bilinear')
|
|
|
|
|
plt.title('', fontproperties='SimHei')
|
|
|
|
|
plt.axis('off') # 不显示坐标轴
|
|
|
|
|
plt.show()
|
|
|
|
|
|
|
|
|
|
# 保存词云图
|
|
|
|
|
wordcloud.to_file("../image/my_wordcloud.png")
|
|
|
|
|
window.mainloop()
|
|
|
|
|
|
|
|
|
|
# 关闭数据库连接
|
|
|
|
|
connection.close()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|