You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

136 lines
5.5 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn import linear_model #线性模型
import tkinter as tk
import matplotlib.pyplot as plt #绘图库
import seaborn as sns
import numpy as np
from flask import Flask, request, jsonify, render_template
import joblib
#直方图显示
def show_zhifangtu():
df1 = pd.read_csv(r"./house_data.csv")
# 直方图显示
df1.hist(bins=20, figsize=(10, 10))#20柱数
plt.show()
#各个特征与房价的关系图
def xianxinghuigui():
house = pd.read_csv(r"./house_data.csv")
house = house.astype(float)
col1 = house.columns #特征列
# 生成图表,总共有6个影响房价的特征让各个特征与房价之间的关系进行可视化展示
for col in col1:
f, ax = plt.subplots(1, 1, figsize=(12, 8), sharex=True)
sns.regplot(x=col, y='price', data=house, ax=ax) #关系图绘制
x = ax.get_xlabel() #标签
y = ax.get_ylabel()
ax.set_xlabel(x, fontsize=18)
ax.set_ylabel(y, fontsize=18)
plt.show()
#热图显示
def hot_pictrue():
house = pd.read_csv(r"./house_data.csv")
house = house.astype(float)
# 计算各变量的相关性
corr = house.corr()
# 生成上三角形生成掩码,防止生成重复热图
mask = np.triu(np.ones_like(corr, dtype=bool))
# 建立matplotlib图
f, ax = plt.subplots(figsize=(11, 9))
# 生成自定义颜色的图表
cmap = sns.diverging_palette(200, 10, center='light', as_cmap=True)
# 用绘制热图并修正纵横比
sns.heatmap(corr, mask=mask ,cmap=cmap, vmax=1, vmin=-1, center=0, square=True, linewidths=.5,
cbar_kws={"shrink": .4}, annot=True)
plt.show()
"""
岭回归
lasso回归
"""
#使用岭回归算法模型和lasso回归算法模型比较对房价数据进行训练和预测并使用Tkinter库创建了一个简单的交互界面。
def train_and_predict():
# 训练集与测试集
df_dm = pd.read_csv(r"./house_data.csv")
df_dm.hist(bins=20,figsize=(10,10))
train_data_dm, test_data_dm = train_test_split(df_dm, train_size=0.8, random_state=3)
# 训练。使用Lasso方法来构建复杂模型对房屋面积、卫生间数和卧室数这三个特征进行回归分析。
features = ['square', 'bathrooms', 'bedrooms']
complex_model_R = linear_model.Ridge(alpha=100) #用岭回归算法得到模型1
x = df_dm[features]
y = df_dm['price']
complex_model_R.fit(x, y)
score1 = complex_model_R.score(x, y) #岭回归算法模型评估分
complex_model_L = linear_model.Lasso(alpha=100) #lasso回归算法得到模型2
complex_model_L.fit(x, y)
score2 = complex_model_L.score(x, y) #lasso回归算法模型的评分
if score2 > score1: #两种模型评分比较,评分较大的模型较优
best_model = complex_model_L
else:
best_model = complex_model_R
# 传入模型
pred1 = best_model.predict(test_data_dm[features])
intercept = float(best_model.intercept_)
coef = list(best_model.coef_)
print('系数 Coefficients: {}'.format(coef))
print('截距 Intercept: {}'.format(intercept))
# 计算模型评分
print(best_model.score(df_dm[features], df_dm['price']))
# 使用图形进行展示
# 第1步实例化object建立窗口window
window = tk.Tk()
# 第2步给窗口的可视化起名字
window.title('房价预测计算器-岭回归')
# 第3步设定窗口的大小(长 * 宽)
window.geometry('500x350')
# 第4步在图形界面上设定输入框控件entry框并放置
a = tk.Label(window, text="房屋面积:")
a.place(x='30', y='50', width='80', height='40')
e = tk.Entry(window, show=None) # 显示成明文形式
e.place(x='120', y='50', width='180', height='40')
b = tk.Label(window, text="卫生间数:")
b.place(x='30', y='120', width='80', height='40')
f = tk.Entry(window, show=None) # 显示成明文形式
f.place(x='120', y='120', width='180', height='40')
c = tk.Label(window, text="卧室数:")
c.place(x='30', y='190', width='80', height='40')
g = tk.Entry(window, show=None) # 显示成明文形式
g.place(x='120', y='190', width='180', height='40')
# 第5步定义触发事件
def calculate(): # 在鼠标焦点处插入输入内容
var1 = e.get()
var2 = f.get()
var3 = g.get()
ans = coef[0] * float(var1) + coef[1] * float(var2) + coef[2] * float(var3) + intercept
ans = '%.2f' % ans
result.set(str(ans))
# 第6步创建并放置一个按钮
b1 = tk.Button(window, text='预测房价', width=10, height=2, command=calculate)
b1.place(x='350', y='120', width='100', height='40')
# 第7步创建并放置一个多行文本框text用以显示
w = tk.Label(window, text="预测房价(万元):")
w.place(x='30', y='250', width='120', height='50')
result = tk.StringVar()
show_dresult = tk.Label(window, bg='white', fg='black', font=('Arail', '16'), bd='0', textvariable=result,
anchor='e')
show_dresult.place(x='200', y='250', width='250', height='50')
# 第8步主窗口循环显示
window.mainloop()
def main():
show_zhifangtu()#直方图
xianxinghuigui()#线性回归
hot_pictrue()#热图
train_and_predict()#机器训练预测
if __name__=="__main__":
main()