You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

306 lines
13 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

import pandas as pd
import numpy as np
import joblib
from flask import Flask, request, jsonify, render_template_string
import os
# 添加项目根目录到Python路径
import sys
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
# 修改模型加载路径
MODEL_DIR = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'models')
from data.data_generator import preprocess_data
# 创建Flask应用
app = Flask(__name__)
# 全局变量存储模型和预处理器
model = None
scaler = None
le_education = None
le_home = None
le_purpose = None
explainer = None
def load_model():
"""
加载训练好的模型和预处理器
"""
global model, scaler, le_education, le_home, le_purpose, explainer
print("加载模型和预处理器...")
try:
model = joblib.load(os.path.join(MODEL_DIR, 'lightgbm_model.pkl'))
except:
# 如果LightGBM模型不存在回退到XGBoost模型
model = joblib.load(os.path.join(MODEL_DIR, 'xgboost_model.pkl'))
scaler = joblib.load(os.path.join(MODEL_DIR, 'scaler.pkl'))
le_education = joblib.load(os.path.join(MODEL_DIR, 'le_education.pkl'))
le_home = joblib.load(os.path.join(MODEL_DIR, 'le_home.pkl'))
le_purpose = joblib.load(os.path.join(MODEL_DIR, 'le_purpose.pkl'))
try:
explainer = joblib.load(os.path.join(MODEL_DIR, 'shap_explainer.pkl'))
except:
explainer = None
print("模型加载完成!")
@app.route('/')
def home():
"""
主页
"""
return render_template_string('''
<!DOCTYPE html>
<html>
<head>
<title>可解释的信贷风险评估系统</title>
<meta charset="utf-8">
<style>
body { font-family: Arial, sans-serif; margin: 40px; background-color: #f5f5f5; }
.container { max-width: 800px; margin: 0 auto; background-color: white; padding: 20px; border-radius: 10px; box-shadow: 0 0 10px rgba(0,0,0,0.1); }
h1 { color: #333; text-align: center; }
form { margin: 20px 0; }
.form-group { margin-bottom: 15px; }
label { display: block; margin-bottom: 5px; font-weight: bold; }
input, select { width: 100%; padding: 8px; border: 1px solid #ddd; border-radius: 4px; box-sizing: border-box; }
button { background-color: #4CAF50; color: white; padding: 10px 20px; border: none; border-radius: 4px; cursor: pointer; width: 100%; }
button:hover { background-color: #45a049; }
.result { margin-top: 20px; padding: 15px; border-radius: 4px; }
.risk-high { background-color: #ffebee; border-left: 5px solid #f44336; }
.risk-medium { background-color: #fff3e0; border-left: 5px solid #ff9800; }
.risk-low { background-color: #e8f5e9; border-left: 5px solid #4caf50; }
.explanation { margin-top: 20px; padding: 15px; background-color: #e3f2fd; border-left: 5px solid #2196f3; border-radius: 0 4px 4px 0; }
</style>
</head>
<body>
<div class="container">
<h1>可解释的信贷风险评估系统</h1>
<form id="predictionForm">
<div class="form-group">
<label for="age">年龄:</label>
<input type="number" id="age" name="age" min="18" max="80" required>
</div>
<div class="form-group">
<label for="income">年收入:</label>
<input type="number" id="income" name="income" min="10000" required>
</div>
<div class="form-group">
<label for="employment_length">就业年限:</label>
<input type="number" id="employment_length" name="employment_length" min="0" max="40" step="0.1" required>
</div>
<div class="form-group">
<label for="loan_amount">贷款金额:</label>
<input type="number" id="loan_amount" name="loan_amount" min="1000" required>
</div>
<div class="form-group">
<label for="credit_score">信用评分:</label>
<input type="number" id="credit_score" name="credit_score" min="300" max="850" required>
</div>
<div class="form-group">
<label for="debt_to_income">债务收入比:</label>
<input type="number" id="debt_to_income" name="debt_to_income" min="0" max="1" step="0.01" required>
</div>
<div class="form-group">
<label for="num_credit_lines">信贷账户数量:</label>
<input type="number" id="num_credit_lines" name="num_credit_lines" min="0" max="15" required>
</div>
<div class="form-group">
<label for="education">教育水平:</label>
<select id="education" name="education" required>
<option value="High School">高中</option>
<option value="Bachelor">学士</option>
<option value="Master">硕士</option>
<option value="PhD">博士</option>
</select>
</div>
<div class="form-group">
<label for="home_ownership">房产情况:</label>
<select id="home_ownership" name="home_ownership" required>
<option value="Rent">租房</option>
<option value="Mortgage">抵押</option>
<option value="Own">自有</option>
<option value="Other">其他</option>
</select>
</div>
<div class="form-group">
<label for="loan_purpose">贷款目的:</label>
<select id="loan_purpose" name="loan_purpose" required>
<option value="Debt Consolidation">债务整合</option>
<option value="Home Improvement">房屋改善</option>
<option value="Business">商业</option>
<option value="Personal">个人</option>
<option value="Medical">医疗</option>
</select>
</div>
<button type="submit">评估风险</button>
</form>
<div id="result"></div>
</div>
<script>
document.getElementById('predictionForm').addEventListener('submit', function(e) {
e.preventDefault();
const formData = new FormData(this);
const data = {};
for (let [key, value] of formData.entries()) {
data[key] = value;
}
fetch('/predict', {
method: 'POST',
headers: {
'Content-Type': 'application/json',
},
body: JSON.stringify(data)
})
.then(response => response.json())
.then(data => {
let riskClass = 'risk-low';
if (data.risk_probability > 0.5) {
riskClass = 'risk-high';
} else if (data.risk_probability > 0.2) {
riskClass = 'risk-medium';
}
let explanationHtml = '';
if (data.explanation) {
explanationHtml = `
<div class="explanation">
<h3>决策解释</h3>
<p>以下特征对本次风险评估结果产生了重要影响:</p>
<ul>
${data.explanation.map(item => `<li>${item.feature}: ${item.effect}</li>`).join('')}
</ul>
</div>
`;
}
document.getElementById('result').innerHTML = `
<div class="result ${riskClass}">
<h3>风险评估结果</h3>
<p><strong>违约概率:</strong> ${(data.risk_probability * 100).toFixed(2)}%</p>
<p><strong>风险等级:</strong> ${riskClass === 'risk-high' ? '高风险' : riskClass === 'risk-medium' ? '中等风险' : '低风险'}</p>
<p><strong>建议:</strong> ${data.recommendation}</p>
</div>
${explanationHtml}
`;
})
.catch(error => {
console.error('Error:', error);
document.getElementById('result').innerHTML = '<div class="result risk-high"><p>评估出错,请重试。</p></div>';
});
});
</script>
</body>
</html>
''')
@app.route('/predict', methods=['POST'])
def predict():
"""
预测信贷风险
"""
global model, scaler, le_education, le_home, le_purpose, explainer
try:
# 获取请求数据
data = request.get_json()
# 创建DataFrame
df = pd.DataFrame([{
'age': float(data['age']),
'income': float(data['income']),
'employment_length': float(data['employment_length']),
'loan_amount': float(data['loan_amount']),
'credit_score': float(data['credit_score']),
'debt_to_income': float(data['debt_to_income']),
'num_credit_lines': int(data['num_credit_lines']),
'education': data['education'],
'home_ownership': data['home_ownership'],
'loan_purpose': data['loan_purpose'],
'default': 0 # 占位符
}])
# 删除目标变量
df = df.drop('default', axis=1)
# 编码分类变量
df['education'] = le_education.transform(df['education'])
df['home_ownership'] = le_home.transform(df['home_ownership'])
df['loan_purpose'] = le_purpose.transform(df['loan_purpose'])
# 标准化数值特征
df_scaled = scaler.transform(df)
df_scaled = pd.DataFrame(df_scaled, columns=df.columns)
# 预测
risk_probability = model.predict_proba(df_scaled)[0][1]
# 生成建议
if risk_probability > 0.5:
recommendation = "该客户违约风险较高,建议拒绝贷款申请或要求提供更多担保。"
elif risk_probability > 0.2:
recommendation = "该客户违约风险中等,建议谨慎审批,可考虑降低贷款额度或提高利率。"
else:
recommendation = "该客户违约风险较低,建议批准贷款申请。"
# 生成解释
explanation = None
if explainer is not None:
try:
# 计算SHAP值
shap_values = explainer.shap_values(df_scaled)
# 获取特征名称和SHAP值
feature_names = df.columns
shap_values_single = shap_values[0] if isinstance(shap_values, list) else shap_values
# 创建解释列表
explanation = []
for i, (feature, shap_val) in enumerate(zip(feature_names, shap_values_single[0])):
if abs(shap_val) > 0.01: # 只显示影响较大的特征
effect = "增加风险" if shap_val > 0 else "降低风险"
explanation.append({
"feature": feature,
"effect": effect,
"shap_value": shap_val
})
# 按SHAP值绝对值排序
explanation.sort(key=lambda x: abs(x["shap_value"]), reverse=True)
# 只保留前5个最重要的特征
explanation = explanation[:5]
# 格式化效果描述
for item in explanation:
item["effect"] = f"{item['effect']} (影响度: {abs(item['shap_value']):.3f})"
del item["shap_value"]
except Exception as e:
print(f"解释生成失败: {e}")
# 返回结果
result = {
'risk_probability': float(risk_probability),
'recommendation': recommendation
}
if explanation is not None:
result['explanation'] = explanation
return jsonify(result)
except Exception as e:
return jsonify({'error': str(e)}), 400
if __name__ == '__main__':
# 加载模型
load_model()
# 启动应用
app.run(debug=True, host='0.0.0.0', port=5000)