|
|
import pandas as pd
|
|
|
import pymysql
|
|
|
# 中文问题
|
|
|
from matplotlib.font_manager import FontProperties
|
|
|
# 深度学习模块sklearn
|
|
|
from sklearn.linear_model import LinearRegression
|
|
|
# train_test_split是sklearn用于划分数据集的,即将原始数据集划分成测试集和训练集两部分的函数。
|
|
|
from sklearn.model_selection import train_test_split
|
|
|
|
|
|
|
|
|
# 设计字体
|
|
|
font = FontProperties(size=10)
|
|
|
|
|
|
# 加载数据集
|
|
|
# data = pd.read_sql('select * from data05', con=db_pymysql)
|
|
|
|
|
|
while True:
|
|
|
db_pymysql = pymysql.connect(host='localhost', port=3306, user='root', password='12345678', db='movie',use_unicode=True, charset='utf8')
|
|
|
data = pd.read_sql('select * from data05', con=db_pymysql)
|
|
|
sql = "select * from data"
|
|
|
mycursor = db_pymysql.cursor()
|
|
|
mycursor.execute(sql)
|
|
|
mydata = mycursor.fetchone()
|
|
|
|
|
|
if mydata !=None:
|
|
|
|
|
|
# 导演能力,编剧能力,演员能力,电影评分,票房
|
|
|
data1 = data[['directorCapacity', 'screenwriterCapacity', 'starringCapacity', 'movie_rating', 'boxOffice']]
|
|
|
data2 = data[['directorCapacity', 'screenwriterCapacity', 'starringCapacity', 'boxOffice']]
|
|
|
x = data[['directorCapacity', 'screenwriterCapacity', 'starringCapacity']]
|
|
|
y = data[['movie_rating']]
|
|
|
y1 = data[['boxOffice']]
|
|
|
|
|
|
# train_size;测试集大小,random-state:随机数种子(该组随机数的编号,再需要重复试验的时候,保证得到一组一样的随机数),主要是为了复现结果而设置的。
|
|
|
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=0)
|
|
|
x_train1, x_test1, y_train1, y_test1 = train_test_split(x, y1, test_size=0.2, random_state=0)
|
|
|
|
|
|
# 评分线性模型
|
|
|
# 导入线性模型,模型的参数默认
|
|
|
model = LinearRegression()
|
|
|
# 训练模型
|
|
|
model.fit(x_train, y_train)
|
|
|
|
|
|
# 票房预测模型
|
|
|
##导入线性模型,模型的参数默认
|
|
|
model1 = LinearRegression()
|
|
|
# 训练模型
|
|
|
model1.fit(x_train1, y_train1)
|
|
|
|
|
|
# 《爱情公寓》评分预测
|
|
|
directorCapacity =6.266667
|
|
|
starring = 6
|
|
|
screen = 6.8
|
|
|
|
|
|
X = [[directorCapacity, starring, screen]]
|
|
|
|
|
|
# 打印评分线性模型的准确率accuracy
|
|
|
print("评分预测模型准确率:")
|
|
|
accuracy = model.score(x_test, y_test)
|
|
|
print(accuracy)
|
|
|
print("电影所获电影评分预测:")
|
|
|
score = model.predict(X)[0][0]
|
|
|
print(score)
|
|
|
# 打印票房线性模型的准确率accuracy
|
|
|
print("票房预测模型准确率:")
|
|
|
office_accuracy = model1.score(x_test1, y_test1)
|
|
|
print(office_accuracy)
|
|
|
print("电影所获电影票房预测:")
|
|
|
Boxoffice = model1.predict(X)[0][0]
|
|
|
print(Boxoffice)
|
|
|
# 最佳阵容导演、编剧、演员能力值参数
|
|
|
director = float(mydata[1])
|
|
|
|
|
|
actor = float(mydata[2])
|
|
|
enditor = float(mydata[3])
|
|
|
X1 = [[director, actor, enditor]]
|
|
|
print("最佳阵容参演电影所获电影评分预测:")
|
|
|
bestteamscore = model.predict(X1)[0][0]
|
|
|
print(bestteamscore)
|
|
|
print("最佳阵容参演电影所获电影票房预测:")
|
|
|
bestteamBoxoffice = model1.predict(X1)[0][0]
|
|
|
|
|
|
print(bestteamBoxoffice)
|
|
|
|
|
|
data = (accuracy, score, office_accuracy, Boxoffice, bestteamscore, bestteamBoxoffice)
|
|
|
mysql = "insert into result(scoreaccuracy,Scorepredicts,fficeaccuracy,boxoffice,bestteamscore,bestteamboxoffice) values(%s,%s,%s,%s,%s,%s)"
|
|
|
mycursor.execute(mysql, data)
|
|
|
db_pymysql.commit()
|
|
|
print("成功")
|
|
|
break |