|
|
import pandas as pd
|
|
|
import numpy as np
|
|
|
import joblib
|
|
|
import matplotlib
|
|
|
matplotlib.use('Agg')
|
|
|
import matplotlib.pyplot as plt
|
|
|
import seaborn as sns
|
|
|
import os
|
|
|
|
|
|
# 添加项目根目录到Python路径
|
|
|
import sys
|
|
|
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
|
|
|
|
|
def create_visualizations():
|
|
|
"""
|
|
|
创建各种可视化图表来解释模型和数据
|
|
|
"""
|
|
|
# 读取数据
|
|
|
print("读取信贷数据...")
|
|
|
df = pd.read_csv('data/credit_data.csv')
|
|
|
|
|
|
# 设置图表样式
|
|
|
plt.style.use('seaborn-v0_8')
|
|
|
fig_size = (10, 6)
|
|
|
|
|
|
# 1. 违约分布
|
|
|
print("创建违约分布图...")
|
|
|
plt.figure(figsize=fig_size)
|
|
|
default_counts = df['default'].value_counts()
|
|
|
plt.pie(default_counts.values, labels=['正常', '违约'], autopct='%1.1f%%', startangle=90)
|
|
|
plt.title('信贷违约分布')
|
|
|
plt.tight_layout()
|
|
|
plt.savefig('visualization/default_distribution.png', dpi=300, bbox_inches='tight')
|
|
|
plt.close()
|
|
|
|
|
|
# 2. 年龄分布
|
|
|
print("创建年龄分布图...")
|
|
|
plt.figure(figsize=fig_size)
|
|
|
plt.hist(df['age'], bins=30, alpha=0.7, color='skyblue', edgecolor='black')
|
|
|
plt.xlabel('年龄')
|
|
|
plt.ylabel('频数')
|
|
|
plt.title('客户年龄分布')
|
|
|
plt.tight_layout()
|
|
|
plt.savefig('visualization/age_distribution.png', dpi=300, bbox_inches='tight')
|
|
|
plt.close()
|
|
|
|
|
|
# 3. 收入分布
|
|
|
print("创建收入分布图...")
|
|
|
plt.figure(figsize=fig_size)
|
|
|
plt.hist(df['income'], bins=30, alpha=0.7, color='lightgreen', edgecolor='black')
|
|
|
plt.xlabel('年收入')
|
|
|
plt.ylabel('频数')
|
|
|
plt.title('客户年收入分布')
|
|
|
plt.tight_layout()
|
|
|
plt.savefig('visualization/income_distribution.png', dpi=300, bbox_inches='tight')
|
|
|
plt.close()
|
|
|
|
|
|
# 4. 信用评分分布
|
|
|
print("创建信用评分分布图...")
|
|
|
plt.figure(figsize=fig_size)
|
|
|
plt.hist(df['credit_score'], bins=30, alpha=0.7, color='salmon', edgecolor='black')
|
|
|
plt.xlabel('信用评分')
|
|
|
plt.ylabel('频数')
|
|
|
plt.title('客户信用评分分布')
|
|
|
plt.tight_layout()
|
|
|
plt.savefig('visualization/credit_score_distribution.png', dpi=300, bbox_inches='tight')
|
|
|
plt.close()
|
|
|
|
|
|
# 5. 违约与年龄的关系
|
|
|
print("创建违约与年龄关系图...")
|
|
|
plt.figure(figsize=fig_size)
|
|
|
df.boxplot(column='age', by='default', ax=plt.gca())
|
|
|
plt.xlabel('是否违约 (0:正常, 1:违约)')
|
|
|
plt.ylabel('年龄')
|
|
|
plt.title('违约与年龄的关系')
|
|
|
plt.suptitle('') # 移除自动生成的标题
|
|
|
plt.tight_layout()
|
|
|
plt.savefig('visualization/default_vs_age.png', dpi=300, bbox_inches='tight')
|
|
|
plt.close()
|
|
|
|
|
|
# 6. 违约与收入的关系
|
|
|
print("创建违约与收入关系图...")
|
|
|
plt.figure(figsize=fig_size)
|
|
|
df.boxplot(column='income', by='default', ax=plt.gca())
|
|
|
plt.xlabel('是否违约 (0:正常, 1:违约)')
|
|
|
plt.ylabel('年收入')
|
|
|
plt.title('违约与年收入的关系')
|
|
|
plt.suptitle('') # 移除自动生成的标题
|
|
|
plt.tight_layout()
|
|
|
plt.savefig('visualization/default_vs_income.png', dpi=300, bbox_inches='tight')
|
|
|
plt.close()
|
|
|
|
|
|
# 7. 违约与信用评分的关系
|
|
|
print("创建违约与信用评分关系图...")
|
|
|
plt.figure(figsize=fig_size)
|
|
|
df.boxplot(column='credit_score', by='default', ax=plt.gca())
|
|
|
plt.xlabel('是否违约 (0:正常, 1:违约)')
|
|
|
plt.ylabel('信用评分')
|
|
|
plt.title('违约与信用评分的关系')
|
|
|
plt.suptitle('') # 移除自动生成的标题
|
|
|
plt.tight_layout()
|
|
|
plt.savefig('visualization/default_vs_credit_score.png', dpi=300, bbox_inches='tight')
|
|
|
plt.close()
|
|
|
|
|
|
# 8. 特征相关性热力图
|
|
|
print("创建特征相关性热力图...")
|
|
|
plt.figure(figsize=(12, 10))
|
|
|
# 只选择数值特征
|
|
|
numerical_features = ['age', 'income', 'employment_length', 'loan_amount',
|
|
|
'credit_score', 'debt_to_income', 'num_credit_lines', 'default']
|
|
|
correlation_matrix = df[numerical_features].corr()
|
|
|
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', center=0,
|
|
|
square=True, linewidths=0.5)
|
|
|
plt.title('特征相关性热力图')
|
|
|
plt.tight_layout()
|
|
|
plt.savefig('visualization/correlation_heatmap.png', dpi=300, bbox_inches='tight')
|
|
|
plt.close()
|
|
|
|
|
|
# 9. 教育水平与违约关系
|
|
|
print("创建教育水平与违约关系图...")
|
|
|
plt.figure(figsize=fig_size)
|
|
|
education_default = pd.crosstab(df['education'], df['default'], normalize='index')
|
|
|
education_default.plot(kind='bar', stacked=True, color=['skyblue', 'salmon'])
|
|
|
plt.xlabel('教育水平')
|
|
|
plt.ylabel('比例')
|
|
|
plt.title('不同教育水平的违约率')
|
|
|
plt.legend(['正常', '违约'])
|
|
|
plt.xticks(rotation=45)
|
|
|
plt.tight_layout()
|
|
|
plt.savefig('visualization/education_default.png', dpi=300, bbox_inches='tight')
|
|
|
plt.close()
|
|
|
|
|
|
# 10. 房产情况与违约关系
|
|
|
print("创建房产情况与违约关系图...")
|
|
|
plt.figure(figsize=fig_size)
|
|
|
home_default = pd.crosstab(df['home_ownership'], df['default'], normalize='index')
|
|
|
home_default.plot(kind='bar', stacked=True, color=['skyblue', 'salmon'])
|
|
|
plt.xlabel('房产情况')
|
|
|
plt.ylabel('比例')
|
|
|
plt.title('不同房产情况的违约率')
|
|
|
plt.legend(['正常', '违约'])
|
|
|
plt.xticks(rotation=45)
|
|
|
plt.tight_layout()
|
|
|
plt.savefig('visualization/home_default.png', dpi=300, bbox_inches='tight')
|
|
|
plt.close()
|
|
|
|
|
|
print("所有可视化图表已生成并保存到 visualization 目录")
|
|
|
|
|
|
def create_dashboard_html():
|
|
|
"""
|
|
|
创建一个HTML仪表板来展示所有可视化图表
|
|
|
"""
|
|
|
html_content = '''
|
|
|
<!DOCTYPE html>
|
|
|
<html lang="zh-CN">
|
|
|
<head>
|
|
|
<meta charset="UTF-8">
|
|
|
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
|
|
<title>信贷风险评估系统可视化仪表板</title>
|
|
|
<style>
|
|
|
body {
|
|
|
font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
|
|
|
line-height: 1.6;
|
|
|
color: #333;
|
|
|
max-width: 1200px;
|
|
|
margin: 0 auto;
|
|
|
padding: 20px;
|
|
|
background-color: #f5f5f5;
|
|
|
}
|
|
|
header {
|
|
|
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
|
|
|
color: white;
|
|
|
text-align: center;
|
|
|
padding: 2rem;
|
|
|
border-radius: 10px;
|
|
|
margin-bottom: 2rem;
|
|
|
box-shadow: 0 4px 6px rgba(0,0,0,0.1);
|
|
|
}
|
|
|
h1 {
|
|
|
margin: 0;
|
|
|
font-size: 2.5rem;
|
|
|
}
|
|
|
.subtitle {
|
|
|
font-size: 1.2rem;
|
|
|
opacity: 0.9;
|
|
|
margin-top: 0.5rem;
|
|
|
}
|
|
|
.dashboard {
|
|
|
display: grid;
|
|
|
grid-template-columns: repeat(auto-fit, minmax(500px, 1fr));
|
|
|
gap: 2rem;
|
|
|
margin-bottom: 2rem;
|
|
|
}
|
|
|
.card {
|
|
|
background: white;
|
|
|
border-radius: 10px;
|
|
|
box-shadow: 0 4px 6px rgba(0,0,0,0.1);
|
|
|
padding: 1.5rem;
|
|
|
transition: transform 0.3s ease;
|
|
|
}
|
|
|
.card:hover {
|
|
|
transform: translateY(-5px);
|
|
|
}
|
|
|
.card h2 {
|
|
|
color: #667eea;
|
|
|
border-bottom: 2px solid #667eea;
|
|
|
padding-bottom: 0.5rem;
|
|
|
margin-top: 0;
|
|
|
}
|
|
|
.chart-container {
|
|
|
text-align: center;
|
|
|
margin-top: 1rem;
|
|
|
}
|
|
|
.chart-container img {
|
|
|
max-width: 100%;
|
|
|
height: auto;
|
|
|
border-radius: 5px;
|
|
|
}
|
|
|
.insight {
|
|
|
background: #e3f2fd;
|
|
|
border-left: 4px solid #2196f3;
|
|
|
padding: 1rem;
|
|
|
margin: 1rem 0;
|
|
|
border-radius: 0 5px 5px 0;
|
|
|
}
|
|
|
footer {
|
|
|
text-align: center;
|
|
|
padding: 1rem;
|
|
|
background: white;
|
|
|
border-radius: 10px;
|
|
|
box-shadow: 0 4px 6px rgba(0,0,0,0.1);
|
|
|
}
|
|
|
@media (max-width: 768px) {
|
|
|
.dashboard {
|
|
|
grid-template-columns: 1fr;
|
|
|
}
|
|
|
body {
|
|
|
padding: 10px;
|
|
|
}
|
|
|
}
|
|
|
</style>
|
|
|
</head>
|
|
|
<body>
|
|
|
<header>
|
|
|
<h1>信贷风险评估系统可视化仪表板</h1>
|
|
|
<div class="subtitle">基于机器学习的可解释信贷风险分析</div>
|
|
|
</header>
|
|
|
|
|
|
<div class="dashboard">
|
|
|
<div class="card">
|
|
|
<h2>数据概览</h2>
|
|
|
<div class="chart-container">
|
|
|
<img src="default_distribution.png" alt="违约分布">
|
|
|
</div>
|
|
|
<div class="insight">
|
|
|
<strong>数据洞察:</strong> 数据集中违约客户占比3.73%,正常客户占比96.27%,数据分布符合现实情况。
|
|
|
</div>
|
|
|
</div>
|
|
|
|
|
|
<div class="card">
|
|
|
<h2>SHAP特征重要性</h2>
|
|
|
<div class="chart-container">
|
|
|
<img src="shap_feature_importance.png" alt="SHAP特征重要性">
|
|
|
</div>
|
|
|
<div class="insight">
|
|
|
<strong>模型洞察:</strong> SHAP分析提供了更精确的特征重要性评估,有助于理解模型决策过程。
|
|
|
</div>
|
|
|
</div>
|
|
|
|
|
|
<div class="card">
|
|
|
<h2>SHAP摘要图</h2>
|
|
|
<div class="chart-container">
|
|
|
<img src="shap_summary.png" alt="SHAP摘要图">
|
|
|
</div>
|
|
|
<div class="insight">
|
|
|
<strong>模型洞察:</strong> SHAP摘要图显示了每个特征如何影响模型输出,红色表示增加风险,蓝色表示降低风险。
|
|
|
</div>
|
|
|
</div>
|
|
|
|
|
|
<div class="card">
|
|
|
<h2>年龄分布</h2>
|
|
|
<div class="chart-container">
|
|
|
<img src="age_distribution.png" alt="年龄分布">
|
|
|
</div>
|
|
|
<div class="insight">
|
|
|
<strong>数据洞察:</strong> 客户年龄主要分布在25-45岁之间,这是信贷业务的主要目标群体。
|
|
|
</div>
|
|
|
</div>
|
|
|
|
|
|
<div class="card">
|
|
|
<h2>收入分布</h2>
|
|
|
<div class="chart-container">
|
|
|
<img src="income_distribution.png" alt="收入分布">
|
|
|
</div>
|
|
|
<div class="insight">
|
|
|
<strong>数据洞察:</strong> 客户年收入主要集中在较低水平,符合一般信贷客户群体特征。
|
|
|
</div>
|
|
|
</div>
|
|
|
|
|
|
<div class="card">
|
|
|
<h2>信用评分分布</h2>
|
|
|
<div class="chart-container">
|
|
|
<img src="credit_score_distribution.png" alt="信用评分分布">
|
|
|
</div>
|
|
|
<div class="insight">
|
|
|
<strong>数据洞察:</strong> 信用评分分布较为均匀,涵盖了从较差到优秀的各个等级。
|
|
|
</div>
|
|
|
</div>
|
|
|
|
|
|
<div class="card">
|
|
|
<h2>违约与年龄关系</h2>
|
|
|
<div class="chart-container">
|
|
|
<img src="default_vs_age.png" alt="违约与年龄关系">
|
|
|
</div>
|
|
|
<div class="insight">
|
|
|
<strong>风险洞察:</strong> 年龄与违约风险之间没有明显的线性关系,说明需要综合其他特征进行判断。
|
|
|
</div>
|
|
|
</div>
|
|
|
|
|
|
<div class="card">
|
|
|
<h2>违约与收入关系</h2>
|
|
|
<div class="chart-container">
|
|
|
<img src="default_vs_income.png" alt="违约与收入关系">
|
|
|
</div>
|
|
|
<div class="insight">
|
|
|
<strong>风险洞察:</strong> 收入较高的客户违约风险相对较低,但并非绝对,仍需考虑其他因素。
|
|
|
</div>
|
|
|
</div>
|
|
|
|
|
|
<div class="card">
|
|
|
<h2>违约与信用评分关系</h2>
|
|
|
<div class="chart-container">
|
|
|
<img src="default_vs_credit_score.png" alt="违约与信用评分关系">
|
|
|
</div>
|
|
|
<div class="insight">
|
|
|
<strong>风险洞察:</strong> 信用评分与违约风险呈明显负相关,信用评分越低,违约风险越高。
|
|
|
</div>
|
|
|
</div>
|
|
|
|
|
|
<div class="card">
|
|
|
<h2>特征相关性</h2>
|
|
|
<div class="chart-container">
|
|
|
<img src="correlation_heatmap.png" alt="特征相关性">
|
|
|
</div>
|
|
|
<div class="insight">
|
|
|
<strong>数据洞察:</strong> 多数特征之间相关性较低,说明特征具有较好的独立性,有利于模型训练。
|
|
|
</div>
|
|
|
</div>
|
|
|
|
|
|
<div class="card">
|
|
|
<h2>教育水平与违约关系</h2>
|
|
|
<div class="chart-container">
|
|
|
<img src="education_default.png" alt="教育水平与违约关系">
|
|
|
</div>
|
|
|
<div class="insight">
|
|
|
<strong>风险洞察:</strong> 教育水平较高的客户违约率相对较低,体现了教育对信用的影响。
|
|
|
</div>
|
|
|
</div>
|
|
|
|
|
|
<div class="card">
|
|
|
<h2>房产情况与违约关系</h2>
|
|
|
<div class="chart-container">
|
|
|
<img src="home_default.png" alt="房产情况与违约关系">
|
|
|
</div>
|
|
|
<div class="insight">
|
|
|
<strong>风险洞察:</strong> 拥有自有房产的客户违约率最低,租房客户的违约率相对较高。
|
|
|
</div>
|
|
|
</div>
|
|
|
</div>
|
|
|
|
|
|
<footer>
|
|
|
<p>信贷风险评估系统 © 2025 | 基于LightGBM和对抗自编码器的可解释AI模型</p>
|
|
|
</footer>
|
|
|
</body>
|
|
|
</html>
|
|
|
'''
|
|
|
|
|
|
with open('visualization/dashboard.html', 'w', encoding='utf-8') as f:
|
|
|
f.write(html_content)
|
|
|
|
|
|
print("可视化仪表板已生成: visualization/dashboard.html")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
create_visualizations()
|
|
|
create_dashboard_html()
|
|
|
print("可视化解释模块完成!") |