import pandas as pd
import numpy as np
import joblib
from flask import Flask, request, jsonify, render_template_string
import os
# 添加项目根目录到Python路径
import sys
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
# 修改模型加载路径
MODEL_DIR = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'models')
from data.data_generator import preprocess_data
# 创建Flask应用
app = Flask(__name__)
# 全局变量存储模型和预处理器
model = None
scaler = None
le_education = None
le_home = None
le_purpose = None
explainer = None
def load_model():
"""
加载训练好的模型和预处理器
"""
global model, scaler, le_education, le_home, le_purpose, explainer
print("加载模型和预处理器...")
try:
model = joblib.load(os.path.join(MODEL_DIR, 'lightgbm_model.pkl'))
except:
# 如果LightGBM模型不存在,回退到XGBoost模型
model = joblib.load(os.path.join(MODEL_DIR, 'xgboost_model.pkl'))
scaler = joblib.load(os.path.join(MODEL_DIR, 'scaler.pkl'))
le_education = joblib.load(os.path.join(MODEL_DIR, 'le_education.pkl'))
le_home = joblib.load(os.path.join(MODEL_DIR, 'le_home.pkl'))
le_purpose = joblib.load(os.path.join(MODEL_DIR, 'le_purpose.pkl'))
try:
explainer = joblib.load(os.path.join(MODEL_DIR, 'shap_explainer.pkl'))
except:
explainer = None
print("模型加载完成!")
@app.route('/')
def home():
"""
主页
"""
return render_template_string('''
可解释的信贷风险评估系统
''')
@app.route('/predict', methods=['POST'])
def predict():
"""
预测信贷风险
"""
global model, scaler, le_education, le_home, le_purpose, explainer
try:
# 获取请求数据
data = request.get_json()
# 创建DataFrame
df = pd.DataFrame([{
'age': float(data['age']),
'income': float(data['income']),
'employment_length': float(data['employment_length']),
'loan_amount': float(data['loan_amount']),
'credit_score': float(data['credit_score']),
'debt_to_income': float(data['debt_to_income']),
'num_credit_lines': int(data['num_credit_lines']),
'education': data['education'],
'home_ownership': data['home_ownership'],
'loan_purpose': data['loan_purpose'],
'default': 0 # 占位符
}])
# 删除目标变量
df = df.drop('default', axis=1)
# 编码分类变量
df['education'] = le_education.transform(df['education'])
df['home_ownership'] = le_home.transform(df['home_ownership'])
df['loan_purpose'] = le_purpose.transform(df['loan_purpose'])
# 标准化数值特征
df_scaled = scaler.transform(df)
df_scaled = pd.DataFrame(df_scaled, columns=df.columns)
# 预测
risk_probability = model.predict_proba(df_scaled)[0][1]
# 生成建议
if risk_probability > 0.5:
recommendation = "该客户违约风险较高,建议拒绝贷款申请或要求提供更多担保。"
elif risk_probability > 0.2:
recommendation = "该客户违约风险中等,建议谨慎审批,可考虑降低贷款额度或提高利率。"
else:
recommendation = "该客户违约风险较低,建议批准贷款申请。"
# 生成解释
explanation = None
if explainer is not None:
try:
# 计算SHAP值
shap_values = explainer.shap_values(df_scaled)
# 获取特征名称和SHAP值
feature_names = df.columns
shap_values_single = shap_values[0] if isinstance(shap_values, list) else shap_values
# 创建解释列表
explanation = []
for i, (feature, shap_val) in enumerate(zip(feature_names, shap_values_single[0])):
if abs(shap_val) > 0.01: # 只显示影响较大的特征
effect = "增加风险" if shap_val > 0 else "降低风险"
explanation.append({
"feature": feature,
"effect": effect,
"shap_value": shap_val
})
# 按SHAP值绝对值排序
explanation.sort(key=lambda x: abs(x["shap_value"]), reverse=True)
# 只保留前5个最重要的特征
explanation = explanation[:5]
# 格式化效果描述
for item in explanation:
item["effect"] = f"{item['effect']} (影响度: {abs(item['shap_value']):.3f})"
del item["shap_value"]
except Exception as e:
print(f"解释生成失败: {e}")
# 返回结果
result = {
'risk_probability': float(risk_probability),
'recommendation': recommendation
}
if explanation is not None:
result['explanation'] = explanation
return jsonify(result)
except Exception as e:
return jsonify({'error': str(e)}), 400
if __name__ == '__main__':
# 加载模型
load_model()
# 启动应用
app.run(debug=True, host='0.0.0.0', port=5000)