You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

90 lines
3.6 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

import pandas as pd
import pymysql
# 中文问题
from matplotlib.font_manager import FontProperties
# 深度学习模块sklearn
from sklearn.linear_model import LinearRegression
# train_test_split是sklearn用于划分数据集的即将原始数据集划分成测试集和训练集两部分的函数。
from sklearn.model_selection import train_test_split
# 设计字体
font = FontProperties(size=10)
# 加载数据集
# data = pd.read_sql('select * from data05', con=db_pymysql)
while True:
db_pymysql = pymysql.connect(host='localhost', port=3306, user='root', password='12345678', db='movie',use_unicode=True, charset='utf8')
data = pd.read_sql('select * from data05', con=db_pymysql)
sql = "select * from data"
mycursor = db_pymysql.cursor()
mycursor.execute(sql)
mydata = mycursor.fetchone()
if mydata !=None:
# 导演能力,编剧能力,演员能力,电影评分,票房
data1 = data[['directorCapacity', 'screenwriterCapacity', 'starringCapacity', 'movie_rating', 'boxOffice']]
data2 = data[['directorCapacity', 'screenwriterCapacity', 'starringCapacity', 'boxOffice']]
x = data[['directorCapacity', 'screenwriterCapacity', 'starringCapacity']]
y = data[['movie_rating']]
y1 = data[['boxOffice']]
# train_size测试集大小random-state:随机数种子(该组随机数的编号,再需要重复试验的时候,保证得到一组一样的随机数),主要是为了复现结果而设置的。
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=0)
x_train1, x_test1, y_train1, y_test1 = train_test_split(x, y1, test_size=0.2, random_state=0)
# 评分线性模型
# 导入线性模型,模型的参数默认
model = LinearRegression()
# 训练模型
model.fit(x_train, y_train)
# 票房预测模型
##导入线性模型,模型的参数默认
model1 = LinearRegression()
# 训练模型
model1.fit(x_train1, y_train1)
# 《爱情公寓》评分预测
directorCapacity =6.266667
starring = 6
screen = 6.8
X = [[directorCapacity, starring, screen]]
# 打印评分线性模型的准确率accuracy
print("评分预测模型准确率:")
accuracy = model.score(x_test, y_test)
print(accuracy)
print("电影所获电影评分预测:")
score = model.predict(X)[0][0]
print(score)
# 打印票房线性模型的准确率accuracy
print("票房预测模型准确率:")
office_accuracy = model1.score(x_test1, y_test1)
print(office_accuracy)
print("电影所获电影票房预测:")
Boxoffice = model1.predict(X)[0][0]
print(Boxoffice)
# 最佳阵容导演、编剧、演员能力值参数
director = float(mydata[1])
actor = float(mydata[2])
enditor = float(mydata[3])
X1 = [[director, actor, enditor]]
print("最佳阵容参演电影所获电影评分预测:")
bestteamscore = model.predict(X1)[0][0]
print(bestteamscore)
print("最佳阵容参演电影所获电影票房预测:")
bestteamBoxoffice = model1.predict(X1)[0][0]
print(bestteamBoxoffice)
data = (accuracy, score, office_accuracy, Boxoffice, bestteamscore, bestteamBoxoffice)
mysql = "insert into result(scoreaccuracy,Scorepredicts,fficeaccuracy,boxoffice,bestteamscore,bestteamboxoffice) values(%s,%s,%s,%s,%s,%s)"
mycursor.execute(mysql, data)
db_pymysql.commit()
print("成功")
break