develop
周俊杰 4 months ago
commit b9ad0143ab

8
.idea/.gitignore vendored

@ -0,0 +1,8 @@
# Default ignored files
/shelf/
/workspace.xml
# Editor-based HTTP Client requests
/httpRequests/
# Datasource local storage ignored files
/dataSources/
/dataSources.local.xml

@ -0,0 +1,6 @@
<component name="InspectionProjectProfileManager">
<settings>
<option name="USE_PROJECT_PROFILE" value="false" />
<version value="1.0" />
</settings>
</component>

@ -0,0 +1,7 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="Black">
<option name="sdkName" value="Python 3.13 (大作业)" />
</component>
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.13 (大作业)" project-jdk-type="Python SDK" />
</project>

@ -0,0 +1,8 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/.idea/大作业.iml" filepath="$PROJECT_DIR$/.idea/大作业.iml" />
</modules>
</component>
</project>

@ -0,0 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="VcsDirectoryMappings">
<mapping directory="$PROJECT_DIR$" vcs="Git" />
</component>
</project>

@ -0,0 +1,10 @@
<?xml version="1.0" encoding="UTF-8"?>
<module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$">
<excludeFolder url="file://$MODULE_DIR$/.venv" />
</content>
<orderEntry type="jdk" jdkName="Python 3.13 (大作业)" jdkType="Python SDK" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
</module>

@ -0,0 +1,306 @@
import pandas as pd
import numpy as np
import joblib
from flask import Flask, request, jsonify, render_template_string
import os
# 添加项目根目录到Python路径
import sys
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
# 修改模型加载路径
MODEL_DIR = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'models')
from data.data_generator import preprocess_data
# 创建Flask应用
app = Flask(__name__)
# 全局变量存储模型和预处理器
model = None
scaler = None
le_education = None
le_home = None
le_purpose = None
explainer = None
def load_model():
"""
加载训练好的模型和预处理器
"""
global model, scaler, le_education, le_home, le_purpose, explainer
print("加载模型和预处理器...")
try:
model = joblib.load(os.path.join(MODEL_DIR, 'lightgbm_model.pkl'))
except:
# 如果LightGBM模型不存在回退到XGBoost模型
model = joblib.load(os.path.join(MODEL_DIR, 'xgboost_model.pkl'))
scaler = joblib.load(os.path.join(MODEL_DIR, 'scaler.pkl'))
le_education = joblib.load(os.path.join(MODEL_DIR, 'le_education.pkl'))
le_home = joblib.load(os.path.join(MODEL_DIR, 'le_home.pkl'))
le_purpose = joblib.load(os.path.join(MODEL_DIR, 'le_purpose.pkl'))
try:
explainer = joblib.load(os.path.join(MODEL_DIR, 'shap_explainer.pkl'))
except:
explainer = None
print("模型加载完成!")
@app.route('/')
def home():
"""
主页
"""
return render_template_string('''
<!DOCTYPE html>
<html>
<head>
<title>可解释的信贷风险评估系统</title>
<meta charset="utf-8">
<style>
body { font-family: Arial, sans-serif; margin: 40px; background-color: #f5f5f5; }
.container { max-width: 800px; margin: 0 auto; background-color: white; padding: 20px; border-radius: 10px; box-shadow: 0 0 10px rgba(0,0,0,0.1); }
h1 { color: #333; text-align: center; }
form { margin: 20px 0; }
.form-group { margin-bottom: 15px; }
label { display: block; margin-bottom: 5px; font-weight: bold; }
input, select { width: 100%; padding: 8px; border: 1px solid #ddd; border-radius: 4px; box-sizing: border-box; }
button { background-color: #4CAF50; color: white; padding: 10px 20px; border: none; border-radius: 4px; cursor: pointer; width: 100%; }
button:hover { background-color: #45a049; }
.result { margin-top: 20px; padding: 15px; border-radius: 4px; }
.risk-high { background-color: #ffebee; border-left: 5px solid #f44336; }
.risk-medium { background-color: #fff3e0; border-left: 5px solid #ff9800; }
.risk-low { background-color: #e8f5e9; border-left: 5px solid #4caf50; }
.explanation { margin-top: 20px; padding: 15px; background-color: #e3f2fd; border-left: 5px solid #2196f3; border-radius: 0 4px 4px 0; }
</style>
</head>
<body>
<div class="container">
<h1>可解释的信贷风险评估系统</h1>
<form id="predictionForm">
<div class="form-group">
<label for="age">年龄:</label>
<input type="number" id="age" name="age" min="18" max="80" required>
</div>
<div class="form-group">
<label for="income">年收入:</label>
<input type="number" id="income" name="income" min="10000" required>
</div>
<div class="form-group">
<label for="employment_length">就业年限:</label>
<input type="number" id="employment_length" name="employment_length" min="0" max="40" step="0.1" required>
</div>
<div class="form-group">
<label for="loan_amount">贷款金额:</label>
<input type="number" id="loan_amount" name="loan_amount" min="1000" required>
</div>
<div class="form-group">
<label for="credit_score">信用评分:</label>
<input type="number" id="credit_score" name="credit_score" min="300" max="850" required>
</div>
<div class="form-group">
<label for="debt_to_income">债务收入比:</label>
<input type="number" id="debt_to_income" name="debt_to_income" min="0" max="1" step="0.01" required>
</div>
<div class="form-group">
<label for="num_credit_lines">信贷账户数量:</label>
<input type="number" id="num_credit_lines" name="num_credit_lines" min="0" max="15" required>
</div>
<div class="form-group">
<label for="education">教育水平:</label>
<select id="education" name="education" required>
<option value="High School">高中</option>
<option value="Bachelor">学士</option>
<option value="Master">硕士</option>
<option value="PhD">博士</option>
</select>
</div>
<div class="form-group">
<label for="home_ownership">房产情况:</label>
<select id="home_ownership" name="home_ownership" required>
<option value="Rent">租房</option>
<option value="Mortgage">抵押</option>
<option value="Own">自有</option>
<option value="Other">其他</option>
</select>
</div>
<div class="form-group">
<label for="loan_purpose">贷款目的:</label>
<select id="loan_purpose" name="loan_purpose" required>
<option value="Debt Consolidation">债务整合</option>
<option value="Home Improvement">房屋改善</option>
<option value="Business">商业</option>
<option value="Personal">个人</option>
<option value="Medical">医疗</option>
</select>
</div>
<button type="submit">评估风险</button>
</form>
<div id="result"></div>
</div>
<script>
document.getElementById('predictionForm').addEventListener('submit', function(e) {
e.preventDefault();
const formData = new FormData(this);
const data = {};
for (let [key, value] of formData.entries()) {
data[key] = value;
}
fetch('/predict', {
method: 'POST',
headers: {
'Content-Type': 'application/json',
},
body: JSON.stringify(data)
})
.then(response => response.json())
.then(data => {
let riskClass = 'risk-low';
if (data.risk_probability > 0.5) {
riskClass = 'risk-high';
} else if (data.risk_probability > 0.2) {
riskClass = 'risk-medium';
}
let explanationHtml = '';
if (data.explanation) {
explanationHtml = `
<div class="explanation">
<h3>决策解释</h3>
<p>以下特征对本次风险评估结果产生了重要影响</p>
<ul>
${data.explanation.map(item => `<li>${item.feature}: ${item.effect}</li>`).join('')}
</ul>
</div>
`;
}
document.getElementById('result').innerHTML = `
<div class="result ${riskClass}">
<h3>风险评估结果</h3>
<p><strong>违约概率:</strong> ${(data.risk_probability * 100).toFixed(2)}%</p>
<p><strong>风险等级:</strong> ${riskClass === 'risk-high' ? '高风险' : riskClass === 'risk-medium' ? '中等风险' : '低风险'}</p>
<p><strong>建议:</strong> ${data.recommendation}</p>
</div>
${explanationHtml}
`;
})
.catch(error => {
console.error('Error:', error);
document.getElementById('result').innerHTML = '<div class="result risk-high"><p>评估出错,请重试。</p></div>';
});
});
</script>
</body>
</html>
''')
@app.route('/predict', methods=['POST'])
def predict():
"""
预测信贷风险
"""
global model, scaler, le_education, le_home, le_purpose, explainer
try:
# 获取请求数据
data = request.get_json()
# 创建DataFrame
df = pd.DataFrame([{
'age': float(data['age']),
'income': float(data['income']),
'employment_length': float(data['employment_length']),
'loan_amount': float(data['loan_amount']),
'credit_score': float(data['credit_score']),
'debt_to_income': float(data['debt_to_income']),
'num_credit_lines': int(data['num_credit_lines']),
'education': data['education'],
'home_ownership': data['home_ownership'],
'loan_purpose': data['loan_purpose'],
'default': 0 # 占位符
}])
# 删除目标变量
df = df.drop('default', axis=1)
# 编码分类变量
df['education'] = le_education.transform(df['education'])
df['home_ownership'] = le_home.transform(df['home_ownership'])
df['loan_purpose'] = le_purpose.transform(df['loan_purpose'])
# 标准化数值特征
df_scaled = scaler.transform(df)
df_scaled = pd.DataFrame(df_scaled, columns=df.columns)
# 预测
risk_probability = model.predict_proba(df_scaled)[0][1]
# 生成建议
if risk_probability > 0.5:
recommendation = "该客户违约风险较高,建议拒绝贷款申请或要求提供更多担保。"
elif risk_probability > 0.2:
recommendation = "该客户违约风险中等,建议谨慎审批,可考虑降低贷款额度或提高利率。"
else:
recommendation = "该客户违约风险较低,建议批准贷款申请。"
# 生成解释
explanation = None
if explainer is not None:
try:
# 计算SHAP值
shap_values = explainer.shap_values(df_scaled)
# 获取特征名称和SHAP值
feature_names = df.columns
shap_values_single = shap_values[0] if isinstance(shap_values, list) else shap_values
# 创建解释列表
explanation = []
for i, (feature, shap_val) in enumerate(zip(feature_names, shap_values_single[0])):
if abs(shap_val) > 0.01: # 只显示影响较大的特征
effect = "增加风险" if shap_val > 0 else "降低风险"
explanation.append({
"feature": feature,
"effect": effect,
"shap_value": shap_val
})
# 按SHAP值绝对值排序
explanation.sort(key=lambda x: abs(x["shap_value"]), reverse=True)
# 只保留前5个最重要的特征
explanation = explanation[:5]
# 格式化效果描述
for item in explanation:
item["effect"] = f"{item['effect']} (影响度: {abs(item['shap_value']):.3f})"
del item["shap_value"]
except Exception as e:
print(f"解释生成失败: {e}")
# 返回结果
result = {
'risk_probability': float(risk_probability),
'recommendation': recommendation
}
if explanation is not None:
result['explanation'] = explanation
return jsonify(result)
except Exception as e:
return jsonify({'error': str(e)}), 400
if __name__ == '__main__':
# 加载模型
load_model()
# 启动应用
app.run(debug=True, host='0.0.0.0', port=5000)

File diff suppressed because it is too large Load Diff

@ -0,0 +1,129 @@
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
def generate_credit_data(n_samples=10000):
"""
生成模拟的信贷数据集
特征包括
- age: 年龄
- income: 年收入
- employment_length: 就业年限
- loan_amount: 贷款金额
- credit_score: 信用评分
- debt_to_income: 债务收入比
- num_credit_lines: 信贷账户数量
- education: 教育水平
- home_ownership: 房产情况
- loan_purpose: 贷款目的
"""
np.random.seed(42)
# 生成特征
age = np.random.normal(35, 10, n_samples)
age = np.clip(age, 18, 80)
income = np.random.lognormal(10, 0.5, n_samples)
income = np.clip(income, 10000, 500000)
employment_length = np.random.exponential(2, n_samples)
employment_length = np.clip(employment_length, 0, 40)
loan_amount = np.random.lognormal(9, 0.8, n_samples)
loan_amount = np.clip(loan_amount, 1000, 200000)
credit_score = np.random.normal(650, 100, n_samples)
credit_score = np.clip(credit_score, 300, 850)
debt_to_income = np.random.beta(2, 5, n_samples)
num_credit_lines = np.random.poisson(3, n_samples)
num_credit_lines = np.clip(num_credit_lines, 0, 15)
education_levels = ['High School', 'Bachelor', 'Master', 'PhD']
education = np.random.choice(education_levels, n_samples, p=[0.3, 0.4, 0.2, 0.1])
ownership_types = ['Rent', 'Mortgage', 'Own', 'Other']
home_ownership = np.random.choice(ownership_types, n_samples, p=[0.3, 0.4, 0.25, 0.05])
purpose_types = ['Debt Consolidation', 'Home Improvement', 'Business', 'Personal', 'Medical']
loan_purpose = np.random.choice(purpose_types, n_samples, p=[0.4, 0.2, 0.15, 0.15, 0.1])
# 生成目标变量(违约概率)
# 根据特征计算违约概率(简化模型)
default_prob = (
-0.02 * age +
-0.00001 * income +
-0.1 * employment_length +
0.000005 * loan_amount +
-0.005 * credit_score +
2 * debt_to_income +
0.05 * num_credit_lines +
np.random.normal(0, 0.5, n_samples)
)
# 转换为逻辑函数得到违约概率
default_prob = 1 / (1 + np.exp(-default_prob))
# 根据违约概率生成实际违约标签
default = np.random.binomial(1, default_prob, n_samples)
# 创建DataFrame
data = pd.DataFrame({
'age': age,
'income': income,
'employment_length': employment_length,
'loan_amount': loan_amount,
'credit_score': credit_score,
'debt_to_income': debt_to_income,
'num_credit_lines': num_credit_lines,
'education': education,
'home_ownership': home_ownership,
'loan_purpose': loan_purpose,
'default': default
})
return data
def preprocess_data(df):
"""
数据预处理函数
"""
# 复制数据避免修改原始数据
data = df.copy()
# 编码分类变量
le_education = LabelEncoder()
le_home = LabelEncoder()
le_purpose = LabelEncoder()
data['education'] = le_education.fit_transform(data['education'])
data['home_ownership'] = le_home.fit_transform(data['home_ownership'])
data['loan_purpose'] = le_purpose.fit_transform(data['loan_purpose'])
# 分离特征和目标变量
X = data.drop('default', axis=1)
y = data['default']
# 标准化数值特征
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
X_scaled = pd.DataFrame(X_scaled, columns=X.columns)
return X_scaled, y, scaler, le_education, le_home, le_purpose
if __name__ == "__main__":
# 生成示例数据
print("生成信贷数据...")
df = generate_credit_data(10000)
print(f"数据形状: {df.shape}")
print("\n数据前5行:")
print(df.head())
print("\n违约分布:")
print(df['default'].value_counts())
print(f"\n违约率: {df['default'].mean():.2%}")
# 保存数据
df.to_csv('credit_risk_system/data/credit_data.csv', index=False)
print("\n数据已保存到 credit_risk_system/data/credit_data.csv")

@ -0,0 +1,156 @@
"""
可解释的信贷风险评估系统
"""
import os
import sys
import subprocess
import webbrowser
import time
def print_system_overview():
"""
打印系统概述
"""
print("=" * 60)
print("可解释的信贷风险评估系统")
print("=" * 60)
print("本系统基于LightGBM和对抗自编码器技术提供以下功能")
print("1. 信贷风险预测")
print("2. 模型决策解释")
print("3. 数据可视化分析")
print("4. Web API接口")
print("=" * 60)
def check_dependencies():
"""
检查必要的依赖包
"""
required_packages = ['numpy', 'pandas', 'sklearn', 'xgboost', 'lightgbm', 'torch', 'flask', 'shap', 'matplotlib', 'seaborn']
missing_packages = []
for package in required_packages:
try:
if package == 'sklearn':
import sklearn
else:
__import__(package)
except ImportError:
missing_packages.append(package)
if missing_packages:
print(f"缺少以下依赖包: {', '.join(missing_packages)}")
print("请运行: pip install " + " ".join(missing_packages))
return False
return True
def generate_sample_data():
"""
生成示例数据如果不存在
"""
data_path = "data/credit_data.csv"
if not os.path.exists(data_path):
print("生成示例信贷数据...")
sys.path.append('.')
from data.data_generator import generate_credit_data
df = generate_credit_data(10000)
df.to_csv(data_path, index=False)
print("示例数据已生成")
else:
print("示例数据已存在")
def train_models():
"""
训练模型如果模型不存在
"""
model_path = "models/lightgbm_model.pkl"
if not os.path.exists(model_path):
print("训练LightGBM模型...")
subprocess.run([sys.executable, "models/train_lightgbm.py"], check=True)
print("LightGBM模型训练完成")
else:
print("LightGBM模型已存在")
aae_path = "models/adversarial_autoencoder.pth"
if not os.path.exists(aae_path):
print("训练对抗自编码器...")
subprocess.run([sys.executable, "models/train_aae.py"], check=True)
print("对抗自编码器训练完成")
else:
print("对抗自编码器已存在")
def generate_explanations():
"""
生成模型解释如果解释文件不存在
"""
explanation_path = "visualization/shap_summary.png"
if not os.path.exists(explanation_path):
print("生成模型解释...")
subprocess.run([sys.executable, "utils/shap_explainer.py"], check=True)
print("模型解释生成完成")
else:
print("模型解释已存在")
def create_visualizations():
"""
创建可视化图表如果图表不存在
"""
viz_path = "visualization/dashboard.html"
if not os.path.exists(viz_path):
print("创建可视化图表...")
subprocess.run([sys.executable, "visualization/create_dashboard.py"], check=True)
print("可视化图表创建完成")
else:
print("可视化图表已存在")
def start_api_server():
"""
启动API服务器
"""
print("启动API服务器...")
print("服务器将在 http://127.0.0.1:5000 上运行")
print("按 Ctrl+C 停止服务器")
# 启动Flask应用
os.chdir('api')
subprocess.run([sys.executable, "app.py"], check=True)
def main():
"""
主函数
"""
print_system_overview()
if not check_dependencies():
return
# 创建必要的目录
directories = ['data', 'models', 'visualization', 'api']
for directory in directories:
if not os.path.exists(directory):
os.makedirs(directory)
try:
# 生成数据
generate_sample_data()
# 训练模型
train_models()
# 生成解释
generate_explanations()
# 创建可视化
create_visualizations()
# 启动API服务器
start_api_server()
except KeyboardInterrupt:
print("\n系统已停止")
except Exception as e:
print(f"系统运行出错: {e}")
if __name__ == "__main__":
main()

@ -0,0 +1,192 @@
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
import joblib
import os
import sys
# 添加项目根目录到Python路径
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
class AutoEncoder(nn.Module):
"""
自编码器用于异常检测
"""
def __init__(self, input_dim, hidden_dim1, hidden_dim2):
super(AutoEncoder, self).__init__()
# 编码器
self.encoder = nn.Sequential(
nn.Linear(input_dim, hidden_dim1),
nn.ReLU(True),
nn.Linear(hidden_dim1, hidden_dim2),
nn.ReLU(True)
)
# 解码器
self.decoder = nn.Sequential(
nn.Linear(hidden_dim2, hidden_dim1),
nn.ReLU(True),
nn.Linear(hidden_dim1, input_dim),
nn.ReLU(True)
)
def forward(self, x):
x = self.encoder(x)
x = self.decoder(x)
return x
class AdversarialAutoEncoder(nn.Module):
"""
对抗自编码器
"""
def __init__(self, input_dim, hidden_dim1, hidden_dim2, latent_dim):
super(AdversarialAutoEncoder, self).__init__()
# 编码器
self.encoder = nn.Sequential(
nn.Linear(input_dim, hidden_dim1),
nn.ReLU(True),
nn.Linear(hidden_dim1, hidden_dim2),
nn.ReLU(True),
nn.Linear(hidden_dim2, latent_dim),
nn.ReLU(True)
)
# 解码器
self.decoder = nn.Sequential(
nn.Linear(latent_dim, hidden_dim2),
nn.ReLU(True),
nn.Linear(hidden_dim2, hidden_dim1),
nn.ReLU(True),
nn.Linear(hidden_dim1, input_dim),
nn.Sigmoid() # 使用Sigmoid确保输出在0-1之间
)
# 判别器
self.discriminator = nn.Sequential(
nn.Linear(latent_dim, hidden_dim2),
nn.ReLU(True),
nn.Linear(hidden_dim2, hidden_dim1),
nn.ReLU(True),
nn.Linear(hidden_dim1, 1),
nn.Sigmoid()
)
def encode(self, x):
return self.encoder(x)
def decode(self, z):
return self.decoder(z)
def discriminate(self, z):
return self.discriminator(z)
def forward(self, x):
z = self.encode(x)
recon_x = self.decode(z)
return recon_x, z
def train_adversarial_autoencoder():
"""
训练对抗自编码器
"""
# 读取数据
print("读取信贷数据...")
df = pd.read_csv('data/credit_data.csv')
# 只使用数值特征进行自编码器训练
numerical_features = ['age', 'income', 'employment_length', 'loan_amount',
'credit_score', 'debt_to_income', 'num_credit_lines']
X = df[numerical_features]
# 标准化数据
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
# 转换为PyTorch张量
X_tensor = torch.FloatTensor(X_scaled)
# 设置模型参数
input_dim = X_tensor.shape[1]
hidden_dim1 = 64
hidden_dim2 = 32
latent_dim = 16
# 创建模型
model = AdversarialAutoEncoder(input_dim, hidden_dim1, hidden_dim2, latent_dim)
# 设置损失函数和优化器
reconstruction_criterion = nn.MSELoss()
adversarial_criterion = nn.BCELoss()
autoencoder_optimizer = optim.Adam(
list(model.encoder.parameters()) + list(model.decoder.parameters()),
lr=0.001
)
discriminator_optimizer = optim.Adam(model.discriminator.parameters(), lr=0.001)
# 训练模型
num_epochs = 100
batch_size = 64
print("开始训练对抗自编码器...")
for epoch in range(num_epochs):
for i in range(0, len(X_tensor), batch_size):
batch = X_tensor[i:i+batch_size]
# 训练自编码器
autoencoder_optimizer.zero_grad()
recon_batch, latent_batch = model(batch)
real_labels = torch.ones(batch.size(0), 1)
fake_labels = torch.zeros(batch.size(0), 1)
# 重构损失
recon_loss = reconstruction_criterion(recon_batch, batch)
# 对抗损失 - 生成器希望判别器将生成的潜在向量识别为真实
disc_fake = model.discriminate(latent_batch)
adversarial_loss = adversarial_criterion(disc_fake, real_labels)
autoencoder_loss = recon_loss + 0.1 * adversarial_loss
autoencoder_loss.backward()
autoencoder_optimizer.step()
# 训练判别器
discriminator_optimizer.zero_grad()
# 真实潜在向量(从标准正态分布采样)
real_latent = torch.randn(batch.size(0), latent_dim)
disc_real = model.discriminate(real_latent)
disc_real_loss = adversarial_criterion(disc_real, real_labels)
# 生成的潜在向量
disc_fake = model.discriminate(latent_batch.detach())
disc_fake_loss = adversarial_criterion(disc_fake, fake_labels)
discriminator_loss = disc_real_loss + disc_fake_loss
discriminator_loss.backward()
discriminator_optimizer.step()
if (epoch + 1) % 10 == 0:
print(f'Epoch [{epoch+1}/{num_epochs}], '
f'Recon Loss: {recon_loss.item():.4f}, '
f'Adversarial Loss: {adversarial_loss.item():.4f}, '
f'Discriminator Loss: {discriminator_loss.item():.4f}')
# 保存模型和标准化器
print("保存对抗自编码器模型...")
torch.save(model.state_dict(), 'models/adversarial_autoencoder.pth')
joblib.dump(scaler, 'models/ae_scaler.pkl')
return model, scaler
if __name__ == "__main__":
# 检查是否有可用的GPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"使用设备: {device}")
model, scaler = train_adversarial_autoencoder()
print("对抗自编码器训练完成!")

@ -0,0 +1,93 @@
import sys
import os
# 添加项目根目录到Python路径
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import lightgbm as lgb
import joblib
from data.data_generator import preprocess_data
def train_lightgbm_model():
"""
训练LightGBM模型用于信贷风险评估
"""
# 读取数据
print("读取信贷数据...")
df = pd.read_csv('data/credit_data.csv')
print(f"数据形状: {df.shape}")
# 数据预处理
print("数据预处理...")
X, y, scaler, le_education, le_home, le_purpose = preprocess_data(df)
# 分割数据集
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.2, random_state=42, stratify=y
)
print(f"训练集大小: {X_train.shape}")
print(f"测试集大小: {X_test.shape}")
# 创建LightGBM分类器
print("创建LightGBM模型...")
model = lgb.LGBMClassifier(
n_estimators=200,
max_depth=8,
learning_rate=0.05,
num_leaves=64,
subsample=0.8,
colsample_bytree=0.8,
random_state=42,
verbose=-1
)
# 训练模型
print("训练模型...")
model.fit(X_train, y_train,
eval_set=[(X_test, y_test)],
eval_metric='binary_logloss',
callbacks=[lgb.early_stopping(10), lgb.log_evaluation(10)])
# 预测
print("模型预测...")
y_pred = model.predict(X_test)
y_pred_proba = model.predict_proba(X_test)[:, 1]
# 评估模型
accuracy = accuracy_score(y_test, y_pred)
print(f"模型准确率: {accuracy:.4f}")
print("\n分类报告:")
print(classification_report(y_test, y_pred))
print("\n混淆矩阵:")
print(confusion_matrix(y_test, y_pred))
# 保存模型和预处理器
print("保存模型和预处理器...")
joblib.dump(model, 'models/lightgbm_model.pkl')
joblib.dump(scaler, 'models/scaler.pkl')
joblib.dump(le_education, 'models/le_education.pkl')
joblib.dump(le_home, 'models/le_home.pkl')
joblib.dump(le_purpose, 'models/le_purpose.pkl')
# 特征重要性
feature_importance = pd.DataFrame({
'feature': X.columns,
'importance': model.feature_importances_
}).sort_values('importance', ascending=False)
print("\n特征重要性:")
print(feature_importance)
return model, scaler, le_education, le_home, le_purpose
if __name__ == "__main__":
model, scaler, le_education, le_home, le_purpose = train_lightgbm_model()
print("\n模型训练完成!")

@ -0,0 +1,88 @@
import sys
import os
# 添加项目根目录到Python路径
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import xgboost as xgb
import joblib
from data.data_generator import preprocess_data
def train_xgboost_model():
"""
训练XGBoost模型用于信贷风险评估
"""
# 读取数据
print("读取信贷数据...")
df = pd.read_csv('data/credit_data.csv')
print(f"数据形状: {df.shape}")
# 数据预处理
print("数据预处理...")
X, y, scaler, le_education, le_home, le_purpose = preprocess_data(df)
# 分割数据集
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.2, random_state=42, stratify=y
)
print(f"训练集大小: {X_train.shape}")
print(f"测试集大小: {X_test.shape}")
# 创建XGBoost分类器
print("创建XGBoost模型...")
model = xgb.XGBClassifier(
n_estimators=100,
max_depth=6,
learning_rate=0.1,
subsample=0.8,
colsample_bytree=0.8,
random_state=42
)
# 训练模型
print("训练模型...")
model.fit(X_train, y_train)
# 预测
print("模型预测...")
y_pred = model.predict(X_test)
y_pred_proba = model.predict_proba(X_test)[:, 1]
# 评估模型
accuracy = accuracy_score(y_test, y_pred)
print(f"模型准确率: {accuracy:.4f}")
print("\n分类报告:")
print(classification_report(y_test, y_pred))
print("\n混淆矩阵:")
print(confusion_matrix(y_test, y_pred))
# 保存模型和预处理器
print("保存模型和预处理器...")
joblib.dump(model, 'models/xgboost_model.pkl')
joblib.dump(scaler, 'models/scaler.pkl')
joblib.dump(le_education, 'models/le_education.pkl')
joblib.dump(le_home, 'models/le_home.pkl')
joblib.dump(le_purpose, 'models/le_purpose.pkl')
# 特征重要性
feature_importance = pd.DataFrame({
'feature': X.columns,
'importance': model.feature_importances_
}).sort_values('importance', ascending=False)
print("\n特征重要性:")
print(feature_importance)
return model, scaler, le_education, le_home, le_purpose
if __name__ == "__main__":
model, scaler, le_education, le_home, le_purpose = train_xgboost_model()
print("\n模型训练完成!")

@ -0,0 +1,75 @@
import pandas as pd
import numpy as np
import joblib
import shap
import matplotlib
matplotlib.use('Agg') # 使用非交互式后端
import matplotlib.pyplot as plt
import os
# 添加项目根目录到Python路径
import sys
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from data.data_generator import preprocess_data
def explain_with_shap():
"""
使用SHAP解释LightGBM模型
"""
# 读取数据
print("读取信贷数据...")
df = pd.read_csv('data/credit_data.csv')
# 数据预处理
print("数据预处理...")
X, y, scaler, le_education, le_home, le_purpose = preprocess_data(df)
# 加载训练好的模型
print("加载模型...")
try:
model = joblib.load('models/lightgbm_model.pkl')
model_name = "LightGBM"
except:
# 如果LightGBM模型不存在回退到XGBoost模型
model = joblib.load('models/xgboost_model.pkl')
model_name = "XGBoost"
print(f"使用{model_name}模型")
# 选择一小部分数据进行解释(避免计算时间过长)
X_sample = X.iloc[:100] # 增加样本数量以获得更准确的解释
# 创建SHAP解释器
print("创建SHAP解释器...")
explainer = shap.TreeExplainer(model)
shap_values = explainer.shap_values(X_sample)
# 绘制特征重要性条形图
print("绘制SHAP特征重要性图...")
plt.figure(figsize=(10, 6))
shap.summary_plot(shap_values, X_sample, plot_type="bar", show=False)
plt.title(f'{model_name}模型SHAP特征重要性')
plt.tight_layout()
plt.savefig('visualization/shap_feature_importance.png', dpi=300, bbox_inches='tight')
plt.close()
# 绘制SHAP摘要图
print("绘制SHAP摘要图...")
plt.figure(figsize=(10, 8))
shap.summary_plot(shap_values, X_sample, show=False)
plt.title(f'{model_name}模型SHAP摘要图')
plt.tight_layout()
plt.savefig('visualization/shap_summary.png', dpi=300, bbox_inches='tight')
plt.close()
# 保存SHAP解释器以便在API中使用
joblib.dump(explainer, 'models/shap_explainer.pkl')
print("SHAP解释完成图表已保存到 visualization 目录")
return shap_values, X_sample
if __name__ == "__main__":
shap_values, X_sample = explain_with_shap()
print("SHAP解释模块集成完成!")

Binary file not shown.

After

Width:  |  Height:  |  Size: 56 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 269 KiB

@ -0,0 +1,386 @@
import pandas as pd
import numpy as np
import joblib
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
import seaborn as sns
import os
# 添加项目根目录到Python路径
import sys
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
def create_visualizations():
"""
创建各种可视化图表来解释模型和数据
"""
# 读取数据
print("读取信贷数据...")
df = pd.read_csv('data/credit_data.csv')
# 设置图表样式
plt.style.use('seaborn-v0_8')
fig_size = (10, 6)
# 1. 违约分布
print("创建违约分布图...")
plt.figure(figsize=fig_size)
default_counts = df['default'].value_counts()
plt.pie(default_counts.values, labels=['正常', '违约'], autopct='%1.1f%%', startangle=90)
plt.title('信贷违约分布')
plt.tight_layout()
plt.savefig('visualization/default_distribution.png', dpi=300, bbox_inches='tight')
plt.close()
# 2. 年龄分布
print("创建年龄分布图...")
plt.figure(figsize=fig_size)
plt.hist(df['age'], bins=30, alpha=0.7, color='skyblue', edgecolor='black')
plt.xlabel('年龄')
plt.ylabel('频数')
plt.title('客户年龄分布')
plt.tight_layout()
plt.savefig('visualization/age_distribution.png', dpi=300, bbox_inches='tight')
plt.close()
# 3. 收入分布
print("创建收入分布图...")
plt.figure(figsize=fig_size)
plt.hist(df['income'], bins=30, alpha=0.7, color='lightgreen', edgecolor='black')
plt.xlabel('年收入')
plt.ylabel('频数')
plt.title('客户年收入分布')
plt.tight_layout()
plt.savefig('visualization/income_distribution.png', dpi=300, bbox_inches='tight')
plt.close()
# 4. 信用评分分布
print("创建信用评分分布图...")
plt.figure(figsize=fig_size)
plt.hist(df['credit_score'], bins=30, alpha=0.7, color='salmon', edgecolor='black')
plt.xlabel('信用评分')
plt.ylabel('频数')
plt.title('客户信用评分分布')
plt.tight_layout()
plt.savefig('visualization/credit_score_distribution.png', dpi=300, bbox_inches='tight')
plt.close()
# 5. 违约与年龄的关系
print("创建违约与年龄关系图...")
plt.figure(figsize=fig_size)
df.boxplot(column='age', by='default', ax=plt.gca())
plt.xlabel('是否违约 (0:正常, 1:违约)')
plt.ylabel('年龄')
plt.title('违约与年龄的关系')
plt.suptitle('') # 移除自动生成的标题
plt.tight_layout()
plt.savefig('visualization/default_vs_age.png', dpi=300, bbox_inches='tight')
plt.close()
# 6. 违约与收入的关系
print("创建违约与收入关系图...")
plt.figure(figsize=fig_size)
df.boxplot(column='income', by='default', ax=plt.gca())
plt.xlabel('是否违约 (0:正常, 1:违约)')
plt.ylabel('年收入')
plt.title('违约与年收入的关系')
plt.suptitle('') # 移除自动生成的标题
plt.tight_layout()
plt.savefig('visualization/default_vs_income.png', dpi=300, bbox_inches='tight')
plt.close()
# 7. 违约与信用评分的关系
print("创建违约与信用评分关系图...")
plt.figure(figsize=fig_size)
df.boxplot(column='credit_score', by='default', ax=plt.gca())
plt.xlabel('是否违约 (0:正常, 1:违约)')
plt.ylabel('信用评分')
plt.title('违约与信用评分的关系')
plt.suptitle('') # 移除自动生成的标题
plt.tight_layout()
plt.savefig('visualization/default_vs_credit_score.png', dpi=300, bbox_inches='tight')
plt.close()
# 8. 特征相关性热力图
print("创建特征相关性热力图...")
plt.figure(figsize=(12, 10))
# 只选择数值特征
numerical_features = ['age', 'income', 'employment_length', 'loan_amount',
'credit_score', 'debt_to_income', 'num_credit_lines', 'default']
correlation_matrix = df[numerical_features].corr()
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', center=0,
square=True, linewidths=0.5)
plt.title('特征相关性热力图')
plt.tight_layout()
plt.savefig('visualization/correlation_heatmap.png', dpi=300, bbox_inches='tight')
plt.close()
# 9. 教育水平与违约关系
print("创建教育水平与违约关系图...")
plt.figure(figsize=fig_size)
education_default = pd.crosstab(df['education'], df['default'], normalize='index')
education_default.plot(kind='bar', stacked=True, color=['skyblue', 'salmon'])
plt.xlabel('教育水平')
plt.ylabel('比例')
plt.title('不同教育水平的违约率')
plt.legend(['正常', '违约'])
plt.xticks(rotation=45)
plt.tight_layout()
plt.savefig('visualization/education_default.png', dpi=300, bbox_inches='tight')
plt.close()
# 10. 房产情况与违约关系
print("创建房产情况与违约关系图...")
plt.figure(figsize=fig_size)
home_default = pd.crosstab(df['home_ownership'], df['default'], normalize='index')
home_default.plot(kind='bar', stacked=True, color=['skyblue', 'salmon'])
plt.xlabel('房产情况')
plt.ylabel('比例')
plt.title('不同房产情况的违约率')
plt.legend(['正常', '违约'])
plt.xticks(rotation=45)
plt.tight_layout()
plt.savefig('visualization/home_default.png', dpi=300, bbox_inches='tight')
plt.close()
print("所有可视化图表已生成并保存到 visualization 目录")
def create_dashboard_html():
"""
创建一个HTML仪表板来展示所有可视化图表
"""
html_content = '''
<!DOCTYPE html>
<html lang="zh-CN">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>信贷风险评估系统可视化仪表板</title>
<style>
body {
font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
line-height: 1.6;
color: #333;
max-width: 1200px;
margin: 0 auto;
padding: 20px;
background-color: #f5f5f5;
}
header {
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
color: white;
text-align: center;
padding: 2rem;
border-radius: 10px;
margin-bottom: 2rem;
box-shadow: 0 4px 6px rgba(0,0,0,0.1);
}
h1 {
margin: 0;
font-size: 2.5rem;
}
.subtitle {
font-size: 1.2rem;
opacity: 0.9;
margin-top: 0.5rem;
}
.dashboard {
display: grid;
grid-template-columns: repeat(auto-fit, minmax(500px, 1fr));
gap: 2rem;
margin-bottom: 2rem;
}
.card {
background: white;
border-radius: 10px;
box-shadow: 0 4px 6px rgba(0,0,0,0.1);
padding: 1.5rem;
transition: transform 0.3s ease;
}
.card:hover {
transform: translateY(-5px);
}
.card h2 {
color: #667eea;
border-bottom: 2px solid #667eea;
padding-bottom: 0.5rem;
margin-top: 0;
}
.chart-container {
text-align: center;
margin-top: 1rem;
}
.chart-container img {
max-width: 100%;
height: auto;
border-radius: 5px;
}
.insight {
background: #e3f2fd;
border-left: 4px solid #2196f3;
padding: 1rem;
margin: 1rem 0;
border-radius: 0 5px 5px 0;
}
footer {
text-align: center;
padding: 1rem;
background: white;
border-radius: 10px;
box-shadow: 0 4px 6px rgba(0,0,0,0.1);
}
@media (max-width: 768px) {
.dashboard {
grid-template-columns: 1fr;
}
body {
padding: 10px;
}
}
</style>
</head>
<body>
<header>
<h1>信贷风险评估系统可视化仪表板</h1>
<div class="subtitle">基于机器学习的可解释信贷风险分析</div>
</header>
<div class="dashboard">
<div class="card">
<h2>数据概览</h2>
<div class="chart-container">
<img src="default_distribution.png" alt="违约分布">
</div>
<div class="insight">
<strong>数据洞察:</strong> 数据集中违约客户占比3.73%正常客户占比96.27%数据分布符合现实情况
</div>
</div>
<div class="card">
<h2>SHAP特征重要性</h2>
<div class="chart-container">
<img src="shap_feature_importance.png" alt="SHAP特征重要性">
</div>
<div class="insight">
<strong>模型洞察:</strong> SHAP分析提供了更精确的特征重要性评估有助于理解模型决策过程
</div>
</div>
<div class="card">
<h2>SHAP摘要图</h2>
<div class="chart-container">
<img src="shap_summary.png" alt="SHAP摘要图">
</div>
<div class="insight">
<strong>模型洞察:</strong> SHAP摘要图显示了每个特征如何影响模型输出红色表示增加风险蓝色表示降低风险
</div>
</div>
<div class="card">
<h2>年龄分布</h2>
<div class="chart-container">
<img src="age_distribution.png" alt="年龄分布">
</div>
<div class="insight">
<strong>数据洞察:</strong> 客户年龄主要分布在25-45岁之间这是信贷业务的主要目标群体
</div>
</div>
<div class="card">
<h2>收入分布</h2>
<div class="chart-container">
<img src="income_distribution.png" alt="收入分布">
</div>
<div class="insight">
<strong>数据洞察:</strong> 客户年收入主要集中在较低水平符合一般信贷客户群体特征
</div>
</div>
<div class="card">
<h2>信用评分分布</h2>
<div class="chart-container">
<img src="credit_score_distribution.png" alt="信用评分分布">
</div>
<div class="insight">
<strong>数据洞察:</strong> 信用评分分布较为均匀涵盖了从较差到优秀的各个等级
</div>
</div>
<div class="card">
<h2>违约与年龄关系</h2>
<div class="chart-container">
<img src="default_vs_age.png" alt="违约与年龄关系">
</div>
<div class="insight">
<strong>风险洞察:</strong> 年龄与违约风险之间没有明显的线性关系说明需要综合其他特征进行判断
</div>
</div>
<div class="card">
<h2>违约与收入关系</h2>
<div class="chart-container">
<img src="default_vs_income.png" alt="违约与收入关系">
</div>
<div class="insight">
<strong>风险洞察:</strong> 收入较高的客户违约风险相对较低但并非绝对仍需考虑其他因素
</div>
</div>
<div class="card">
<h2>违约与信用评分关系</h2>
<div class="chart-container">
<img src="default_vs_credit_score.png" alt="违约与信用评分关系">
</div>
<div class="insight">
<strong>风险洞察:</strong> 信用评分与违约风险呈明显负相关信用评分越低违约风险越高
</div>
</div>
<div class="card">
<h2>特征相关性</h2>
<div class="chart-container">
<img src="correlation_heatmap.png" alt="特征相关性">
</div>
<div class="insight">
<strong>数据洞察:</strong> 多数特征之间相关性较低说明特征具有较好的独立性有利于模型训练
</div>
</div>
<div class="card">
<h2>教育水平与违约关系</h2>
<div class="chart-container">
<img src="education_default.png" alt="教育水平与违约关系">
</div>
<div class="insight">
<strong>风险洞察:</strong> 教育水平较高的客户违约率相对较低体现了教育对信用的影响
</div>
</div>
<div class="card">
<h2>房产情况与违约关系</h2>
<div class="chart-container">
<img src="home_default.png" alt="房产情况与违约关系">
</div>
<div class="insight">
<strong>风险洞察:</strong> 拥有自有房产的客户违约率最低租房客户的违约率相对较高
</div>
</div>
</div>
<footer>
<p>信贷风险评估系统 &copy; 2025 | 基于LightGBM和对抗自编码器的可解释AI模型</p>
</footer>
</body>
</html>
'''
with open('visualization/dashboard.html', 'w', encoding='utf-8') as f:
f.write(html_content)
print("可视化仪表板已生成: visualization/dashboard.html")
if __name__ == "__main__":
create_visualizations()
create_dashboard_html()
print("可视化解释模块完成!")

Binary file not shown.

After

Width:  |  Height:  |  Size: 57 KiB

@ -0,0 +1,224 @@
<!DOCTYPE html>
<html lang="zh-CN">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>信贷风险评估系统可视化仪表板</title>
<style>
body {
font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
line-height: 1.6;
color: #333;
max-width: 1200px;
margin: 0 auto;
padding: 20px;
background-color: #f5f5f5;
}
header {
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
color: white;
text-align: center;
padding: 2rem;
border-radius: 10px;
margin-bottom: 2rem;
box-shadow: 0 4px 6px rgba(0,0,0,0.1);
}
h1 {
margin: 0;
font-size: 2.5rem;
}
.subtitle {
font-size: 1.2rem;
opacity: 0.9;
margin-top: 0.5rem;
}
.dashboard {
display: grid;
grid-template-columns: repeat(auto-fit, minmax(500px, 1fr));
gap: 2rem;
margin-bottom: 2rem;
}
.card {
background: white;
border-radius: 10px;
box-shadow: 0 4px 6px rgba(0,0,0,0.1);
padding: 1.5rem;
transition: transform 0.3s ease;
}
.card:hover {
transform: translateY(-5px);
}
.card h2 {
color: #667eea;
border-bottom: 2px solid #667eea;
padding-bottom: 0.5rem;
margin-top: 0;
}
.chart-container {
text-align: center;
margin-top: 1rem;
}
.chart-container img {
max-width: 100%;
height: auto;
border-radius: 5px;
}
.insight {
background: #e3f2fd;
border-left: 4px solid #2196f3;
padding: 1rem;
margin: 1rem 0;
border-radius: 0 5px 5px 0;
}
footer {
text-align: center;
padding: 1rem;
background: white;
border-radius: 10px;
box-shadow: 0 4px 6px rgba(0,0,0,0.1);
}
@media (max-width: 768px) {
.dashboard {
grid-template-columns: 1fr;
}
body {
padding: 10px;
}
}
</style>
</head>
<body>
<header>
<h1>信贷风险评估系统可视化仪表板</h1>
<div class="subtitle">基于机器学习的可解释信贷风险分析</div>
</header>
<div class="dashboard">
<div class="card">
<h2>数据概览</h2>
<div class="chart-container">
<img src="default_distribution.png" alt="违约分布">
</div>
<div class="insight">
<strong>数据洞察:</strong> 数据集中违约客户占比3.73%正常客户占比96.27%,数据分布符合现实情况。
</div>
</div>
<div class="card">
<h2>SHAP特征重要性</h2>
<div class="chart-container">
<img src="shap_feature_importance.png" alt="SHAP特征重要性">
</div>
<div class="insight">
<strong>模型洞察:</strong> SHAP分析提供了更精确的特征重要性评估有助于理解模型决策过程。
</div>
</div>
<div class="card">
<h2>SHAP摘要图</h2>
<div class="chart-container">
<img src="shap_summary.png" alt="SHAP摘要图">
</div>
<div class="insight">
<strong>模型洞察:</strong> SHAP摘要图显示了每个特征如何影响模型输出红色表示增加风险蓝色表示降低风险。
</div>
</div>
<div class="card">
<h2>年龄分布</h2>
<div class="chart-container">
<img src="age_distribution.png" alt="年龄分布">
</div>
<div class="insight">
<strong>数据洞察:</strong> 客户年龄主要分布在25-45岁之间这是信贷业务的主要目标群体。
</div>
</div>
<div class="card">
<h2>收入分布</h2>
<div class="chart-container">
<img src="income_distribution.png" alt="收入分布">
</div>
<div class="insight">
<strong>数据洞察:</strong> 客户年收入主要集中在较低水平,符合一般信贷客户群体特征。
</div>
</div>
<div class="card">
<h2>信用评分分布</h2>
<div class="chart-container">
<img src="credit_score_distribution.png" alt="信用评分分布">
</div>
<div class="insight">
<strong>数据洞察:</strong> 信用评分分布较为均匀,涵盖了从较差到优秀的各个等级。
</div>
</div>
<div class="card">
<h2>违约与年龄关系</h2>
<div class="chart-container">
<img src="default_vs_age.png" alt="违约与年龄关系">
</div>
<div class="insight">
<strong>风险洞察:</strong> 年龄与违约风险之间没有明显的线性关系,说明需要综合其他特征进行判断。
</div>
</div>
<div class="card">
<h2>违约与收入关系</h2>
<div class="chart-container">
<img src="default_vs_income.png" alt="违约与收入关系">
</div>
<div class="insight">
<strong>风险洞察:</strong> 收入较高的客户违约风险相对较低,但并非绝对,仍需考虑其他因素。
</div>
</div>
<div class="card">
<h2>违约与信用评分关系</h2>
<div class="chart-container">
<img src="default_vs_credit_score.png" alt="违约与信用评分关系">
</div>
<div class="insight">
<strong>风险洞察:</strong> 信用评分与违约风险呈明显负相关,信用评分越低,违约风险越高。
</div>
</div>
<div class="card">
<h2>特征相关性</h2>
<div class="chart-container">
<img src="correlation_heatmap.png" alt="特征相关性">
</div>
<div class="insight">
<strong>数据洞察:</strong> 多数特征之间相关性较低,说明特征具有较好的独立性,有利于模型训练。
</div>
</div>
<div class="card">
<h2>教育水平与违约关系</h2>
<div class="chart-container">
<img src="education_default.png" alt="教育水平与违约关系">
</div>
<div class="insight">
<strong>风险洞察:</strong> 教育水平较高的客户违约率相对较低,体现了教育对信用的影响。
</div>
</div>
<div class="card">
<h2>房产情况与违约关系</h2>
<div class="chart-container">
<img src="home_default.png" alt="房产情况与违约关系">
</div>
<div class="insight">
<strong>风险洞察:</strong> 拥有自有房产的客户违约率最低,租房客户的违约率相对较高。
</div>
</div>
</div>
<footer>
<p>信贷风险评估系统 &copy; 2025 | 基于LightGBM和对抗自编码器的可解释AI模型</p>
</footer>
</body>
</html>

Binary file not shown.

After

Width:  |  Height:  |  Size: 58 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 52 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 57 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 88 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 58 KiB

@ -0,0 +1,11 @@
feature,importance
credit_score,0.13739304
debt_to_income,0.10605412
home_ownership,0.100115
age,0.09896107
income,0.09830476
loan_amount,0.097870015
employment_length,0.09592874
education,0.09347555
num_credit_lines,0.0879608
loan_purpose,0.0839369
1 feature importance
2 credit_score 0.13739304
3 debt_to_income 0.10605412
4 home_ownership 0.100115
5 age 0.09896107
6 income 0.09830476
7 loan_amount 0.097870015
8 employment_length 0.09592874
9 education 0.09347555
10 num_credit_lines 0.0879608
11 loan_purpose 0.0839369

Binary file not shown.

After

Width:  |  Height:  |  Size: 87 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 53 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 54 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 112 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 263 KiB

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save