|
|
import pandas as pd
|
|
|
from sklearn.model_selection import train_test_split
|
|
|
from sklearn import linear_model #线性模型
|
|
|
import tkinter as tk
|
|
|
import matplotlib.pyplot as plt #绘图库
|
|
|
import seaborn as sns
|
|
|
import numpy as np
|
|
|
from flask import Flask, request, jsonify, render_template
|
|
|
import joblib
|
|
|
#直方图显示
|
|
|
def show_zhifangtu():
|
|
|
df1 = pd.read_csv(r"./house_data.csv")
|
|
|
# 直方图显示
|
|
|
df1.hist(bins=20, figsize=(10, 10))#20柱数
|
|
|
plt.show()
|
|
|
|
|
|
#各个特征与房价的关系图
|
|
|
def xianxinghuigui():
|
|
|
house = pd.read_csv(r"./house_data.csv")
|
|
|
house = house.astype(float)
|
|
|
col1 = house.columns #特征列
|
|
|
# 生成图表,总共有6个影响房价的特征,让各个特征与房价之间的关系进行可视化展示
|
|
|
for col in col1:
|
|
|
f, ax = plt.subplots(1, 1, figsize=(12, 8), sharex=True)
|
|
|
sns.regplot(x=col, y='price', data=house, ax=ax) #关系图绘制
|
|
|
x = ax.get_xlabel() #标签
|
|
|
y = ax.get_ylabel()
|
|
|
ax.set_xlabel(x, fontsize=18)
|
|
|
ax.set_ylabel(y, fontsize=18)
|
|
|
plt.show()
|
|
|
|
|
|
#热图显示
|
|
|
def hot_pictrue():
|
|
|
house = pd.read_csv(r"./house_data.csv")
|
|
|
house = house.astype(float)
|
|
|
# 计算各变量的相关性
|
|
|
corr = house.corr()
|
|
|
# 生成上三角形生成掩码,防止生成重复热图
|
|
|
mask = np.triu(np.ones_like(corr, dtype=bool))
|
|
|
# 建立matplotlib图
|
|
|
f, ax = plt.subplots(figsize=(11, 9))
|
|
|
# 生成自定义颜色的图表
|
|
|
cmap = sns.diverging_palette(200, 10, center='light', as_cmap=True)
|
|
|
# 用绘制热图并修正纵横比
|
|
|
sns.heatmap(corr, mask=mask ,cmap=cmap, vmax=1, vmin=-1, center=0, square=True, linewidths=.5,
|
|
|
cbar_kws={"shrink": .4}, annot=True)
|
|
|
plt.show()
|
|
|
|
|
|
"""
|
|
|
岭回归
|
|
|
lasso回归
|
|
|
"""
|
|
|
#使用岭回归算法模型和lasso回归算法模型比较,对房价数据进行训练和预测,并使用Tkinter库创建了一个简单的交互界面。
|
|
|
def train_and_predict():
|
|
|
# 训练集与测试集
|
|
|
df_dm = pd.read_csv(r"./house_data.csv")
|
|
|
df_dm.hist(bins=20,figsize=(10,10))
|
|
|
train_data_dm, test_data_dm = train_test_split(df_dm, train_size=0.8, random_state=3)
|
|
|
|
|
|
# 训练。使用Lasso方法来构建复杂模型,对房屋面积、卫生间数和卧室数这三个特征进行回归分析。
|
|
|
features = ['square', 'bathrooms', 'bedrooms']
|
|
|
complex_model_R = linear_model.Ridge(alpha=100) #用岭回归算法得到模型1
|
|
|
x = df_dm[features]
|
|
|
y = df_dm['price']
|
|
|
complex_model_R.fit(x, y)
|
|
|
score1 = complex_model_R.score(x, y) #岭回归算法模型评估分
|
|
|
|
|
|
complex_model_L = linear_model.Lasso(alpha=100) #lasso回归算法得到模型2
|
|
|
complex_model_L.fit(x, y)
|
|
|
score2 = complex_model_L.score(x, y) #lasso回归算法模型的评分
|
|
|
|
|
|
if score2 > score1: #两种模型评分比较,评分较大的模型较优
|
|
|
best_model = complex_model_L
|
|
|
else:
|
|
|
best_model = complex_model_R
|
|
|
|
|
|
# 传入模型
|
|
|
pred1 = best_model.predict(test_data_dm[features])
|
|
|
intercept = float(best_model.intercept_)
|
|
|
coef = list(best_model.coef_)
|
|
|
print('系数 Coefficients: {}'.format(coef))
|
|
|
print('截距 Intercept: {}'.format(intercept))
|
|
|
|
|
|
# 计算模型评分
|
|
|
print(best_model.score(df_dm[features], df_dm['price']))
|
|
|
|
|
|
# 使用图形进行展示
|
|
|
# 第1步,实例化object,建立窗口window
|
|
|
window = tk.Tk()
|
|
|
# 第2步,给窗口的可视化起名字
|
|
|
window.title('房价预测计算器-岭回归')
|
|
|
# 第3步,设定窗口的大小(长 * 宽)
|
|
|
window.geometry('500x350')
|
|
|
# 第4步,在图形界面上设定输入框控件entry框并放置
|
|
|
a = tk.Label(window, text="房屋面积:")
|
|
|
a.place(x='30', y='50', width='80', height='40')
|
|
|
e = tk.Entry(window, show=None) # 显示成明文形式
|
|
|
e.place(x='120', y='50', width='180', height='40')
|
|
|
b = tk.Label(window, text="卫生间数:")
|
|
|
b.place(x='30', y='120', width='80', height='40')
|
|
|
f = tk.Entry(window, show=None) # 显示成明文形式
|
|
|
f.place(x='120', y='120', width='180', height='40')
|
|
|
c = tk.Label(window, text="卧室数:")
|
|
|
c.place(x='30', y='190', width='80', height='40')
|
|
|
g = tk.Entry(window, show=None) # 显示成明文形式
|
|
|
g.place(x='120', y='190', width='180', height='40')
|
|
|
|
|
|
# 第5步,定义触发事件
|
|
|
def calculate(): # 在鼠标焦点处插入输入内容
|
|
|
var1 = e.get()
|
|
|
var2 = f.get()
|
|
|
var3 = g.get()
|
|
|
ans = coef[0] * float(var1) + coef[1] * float(var2) + coef[2] * float(var3) + intercept
|
|
|
ans = '%.2f' % ans
|
|
|
result.set(str(ans))
|
|
|
|
|
|
# 第6步,创建并放置一个按钮
|
|
|
b1 = tk.Button(window, text='预测房价', width=10, height=2, command=calculate)
|
|
|
b1.place(x='350', y='120', width='100', height='40')
|
|
|
# 第7步,创建并放置一个多行文本框text用以显示
|
|
|
w = tk.Label(window, text="预测房价(万元):")
|
|
|
w.place(x='30', y='250', width='120', height='50')
|
|
|
result = tk.StringVar()
|
|
|
show_dresult = tk.Label(window, bg='white', fg='black', font=('Arail', '16'), bd='0', textvariable=result,
|
|
|
anchor='e')
|
|
|
show_dresult.place(x='200', y='250', width='250', height='50')
|
|
|
# 第8步,主窗口循环显示
|
|
|
window.mainloop()
|
|
|
|
|
|
def main():
|
|
|
show_zhifangtu()#直方图
|
|
|
xianxinghuigui()#线性回归
|
|
|
hot_pictrue()#热图
|
|
|
train_and_predict()#机器训练预测
|
|
|
if __name__=="__main__":
|
|
|
main() |