42 changed files with 11706 additions and 2 deletions
--- a/.idea/.gitignore
+++ b/.idea/.gitignore
@ -0,0 +1,8 @@
 # Default ignored files
 /shelf/
 /workspace.xml
 # Editor-based HTTP Client requests
 /httpRequests/
 # Datasource local storage ignored files
 /dataSources/
 /dataSources.local.xml
--- a/.idea/inspectionProfiles/profiles_settings.xml
+++ b/.idea/inspectionProfiles/profiles_settings.xml
@ -0,0 +1,6 @@
 <component name="InspectionProjectProfileManager">
  <settings>
    <option name="USE_PROJECT_PROFILE" value="false" />
    <version value="1.0" />
  </settings>
 </component>
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@ -0,0 +1,7 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <project version="4">
  <component name="Black">
    <option name="sdkName" value="Python 3.13 (大作业)" />
  </component>
  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.13 (大作业)" project-jdk-type="Python SDK" />
 </project>
--- a/.idea/modules.xml
+++ b/.idea/modules.xml
@ -0,0 +1,8 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <project version="4">
  <component name="ProjectModuleManager">
    <modules>
      <module fileurl="file://$PROJECT_DIR$/.idea/大作业.iml" filepath="$PROJECT_DIR$/.idea/大作业.iml" />
    </modules>
  </component>
 </project>
--- a/.idea/vcs.xml
+++ b/.idea/vcs.xml
@ -0,0 +1,6 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <project version="4">
  <component name="VcsDirectoryMappings">
    <mapping directory="$PROJECT_DIR$" vcs="Git" />
  </component>
 </project>
--- a/.idea/大作业.iml
+++ b/.idea/大作业.iml
@ -0,0 +1,10 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <module type="PYTHON_MODULE" version="4">
  <component name="NewModuleRootManager">
    <content url="file://$MODULE_DIR$">
      <excludeFolder url="file://$MODULE_DIR$/.venv" />
    </content>
    <orderEntry type="jdk" jdkName="Python 3.13 (大作业)" jdkType="Python SDK" />
    <orderEntry type="sourceFolder" forTests="false" />
  </component>
 </module>
--- a/README.md
+++ b/README.md
@ -1,2 +0,0 @@
 # project
--- a/project/doc/人工智能基础大作业报告.docx
+++ b/project/doc/人工智能基础大作业报告.docx
--- a/project/src/credit_risk_system/api/app.py
+++ b/project/src/credit_risk_system/api/app.py
@ -0,0 +1,306 @@
 import pandas as pd
 import numpy as np
 import joblib
 from flask import Flask, request, jsonify, render_template_string
 import os
 # 添加项目根目录到Python路径
 import sys
 sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 # 修改模型加载路径
 MODEL_DIR = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'models')
 from data.data_generator import preprocess_data
 # 创建Flask应用
 app = Flask(__name__)
 # 全局变量存储模型和预处理器
 model = None
 scaler = None
 le_education = None
 le_home = None
 le_purpose = None
 explainer = None
 def load_model():
    """
    加载训练好的模型和预处理器
    """
    global model, scaler, le_education, le_home, le_purpose, explainer
    print("加载模型和预处理器...")
    try:
        model = joblib.load(os.path.join(MODEL_DIR, 'lightgbm_model.pkl'))
    except:
        # 如果LightGBM模型不存在，回退到XGBoost模型
        model = joblib.load(os.path.join(MODEL_DIR, 'xgboost_model.pkl'))
    scaler = joblib.load(os.path.join(MODEL_DIR, 'scaler.pkl'))
    le_education = joblib.load(os.path.join(MODEL_DIR, 'le_education.pkl'))
    le_home = joblib.load(os.path.join(MODEL_DIR, 'le_home.pkl'))
    le_purpose = joblib.load(os.path.join(MODEL_DIR, 'le_purpose.pkl'))
    try:
        explainer = joblib.load(os.path.join(MODEL_DIR, 'shap_explainer.pkl'))
    except:
        explainer = None
    print("模型加载完成!")
@app.route('/')
 def home():
    """
    主页
    """
    return render_template_string('''
    <!DOCTYPE html>
    <html>
    <head>
        <title>可解释的信贷风险评估系统</title>
        <meta charset="utf-8">
        <style>
            body { font-family: Arial, sans-serif; margin: 40px; background-color: #f5f5f5; }
            .container { max-width: 800px; margin: 0 auto; background-color: white; padding: 20px; border-radius: 10px; box-shadow: 0 0 10px rgba(0,0,0,0.1); }
            h1 { color: #333; text-align: center; }
            form { margin: 20px 0; }
            .form-group { margin-bottom: 15px; }
            label { display: block; margin-bottom: 5px; font-weight: bold; }
            input, select { width: 100%; padding: 8px; border: 1px solid #ddd; border-radius: 4px; box-sizing: border-box; }
            button { background-color: #4CAF50; color: white; padding: 10px 20px; border: none; border-radius: 4px; cursor: pointer; width: 100%; }
            button:hover { background-color: #45a049; }
            .result { margin-top: 20px; padding: 15px; border-radius: 4px; }
            .risk-high { background-color: #ffebee; border-left: 5px solid #f44336; }
            .risk-medium { background-color: #fff3e0; border-left: 5px solid #ff9800; }
            .risk-low { background-color: #e8f5e9; border-left: 5px solid #4caf50; }
            .explanation { margin-top: 20px; padding: 15px; background-color: #e3f2fd; border-left: 5px solid #2196f3; border-radius: 0 4px 4px 0; }
        </style>
    </head>
    <body>
        <div class="container">
            <h1>可解释的信贷风险评估系统</h1>
            <form id="predictionForm">
                <div class="form-group">
                    <label for="age">年龄:</label>
                    <input type="number" id="age" name="age" min="18" max="80" required>
                </div>
                <div class="form-group">
                    <label for="income">年收入:</label>
                    <input type="number" id="income" name="income" min="10000" required>
                </div>
                <div class="form-group">
                    <label for="employment_length">就业年限:</label>
                    <input type="number" id="employment_length" name="employment_length" min="0" max="40" step="0.1" required>
                </div>
                <div class="form-group">
                    <label for="loan_amount">贷款金额:</label>
                    <input type="number" id="loan_amount" name="loan_amount" min="1000" required>
                </div>
                <div class="form-group">
                    <label for="credit_score">信用评分:</label>
                    <input type="number" id="credit_score" name="credit_score" min="300" max="850" required>
                </div>
                <div class="form-group">
                    <label for="debt_to_income">债务收入比:</label>
                    <input type="number" id="debt_to_income" name="debt_to_income" min="0" max="1" step="0.01" required>
                </div>
                <div class="form-group">
                    <label for="num_credit_lines">信贷账户数量:</label>
                    <input type="number" id="num_credit_lines" name="num_credit_lines" min="0" max="15" required>
                </div>
                <div class="form-group">
                    <label for="education">教育水平:</label>
                    <select id="education" name="education" required>
                        <option value="High School">高中</option>
                        <option value="Bachelor">学士</option>
                        <option value="Master">硕士</option>
                        <option value="PhD">博士</option>
                    </select>
                </div>
                <div class="form-group">
                    <label for="home_ownership">房产情况:</label>
                    <select id="home_ownership" name="home_ownership" required>
                        <option value="Rent">租房</option>
                        <option value="Mortgage">抵押</option>
                        <option value="Own">自有</option>
                        <option value="Other">其他</option>
                    </select>
                </div>
                <div class="form-group">
                    <label for="loan_purpose">贷款目的:</label>
                    <select id="loan_purpose" name="loan_purpose" required>
                        <option value="Debt Consolidation">债务整合</option>
                        <option value="Home Improvement">房屋改善</option>
                        <option value="Business">商业</option>
                        <option value="Personal">个人</option>
                        <option value="Medical">医疗</option>
                    </select>
                </div>
                <button type="submit">评估风险</button>
            </form>
            <div id="result"></div>
        </div>
        <script>
            document.getElementById('predictionForm').addEventListener('submit', function(e) {
                e.preventDefault();
                const formData = new FormData(this);
                const data = {};
                for (let [key, value] of formData.entries()) {
                    data[key] = value;
                }
                fetch('/predict', {
                    method: 'POST',
                    headers: {
                        'Content-Type': 'application/json',
                    },
                    body: JSON.stringify(data)
                })
                .then(response => response.json())
                .then(data => {
                    let riskClass = 'risk-low';
                    if (data.risk_probability > 0.5) {
                        riskClass = 'risk-high';
                    } else if (data.risk_probability > 0.2) {
                        riskClass = 'risk-medium';
                    }
                    let explanationHtml = '';
                    if (data.explanation) {
                        explanationHtml = `
                            <div class="explanation">
                                <h3>决策解释</h3>
                                <p>以下特征对本次风险评估结果产生了重要影响：</p>
                                <ul>
                                    ${data.explanation.map(item => `<li>${item.feature}: ${item.effect}</li>`).join('')}
                                </ul>
                            </div>
                        `;
                    }
                    document.getElementById('result').innerHTML = `
                        <div class="result ${riskClass}">
                            <h3>风险评估结果</h3>
                            <p><strong>违约概率:</strong> ${(data.risk_probability * 100).toFixed(2)}%</p>
                            <p><strong>风险等级:</strong> ${riskClass === 'risk-high' ? '高风险' : riskClass === 'risk-medium' ? '中等风险' : '低风险'}</p>
                            <p><strong>建议:</strong> ${data.recommendation}</p>
                        </div>
                        ${explanationHtml}
                    `;
                })
                .catch(error => {
                    console.error('Error:', error);
                    document.getElementById('result').innerHTML = '<div class="result risk-high"><p>评估出错，请重试。</p></div>';
                });
            });
        </script>
    </body>
    </html>
    ''')
@app.route('/predict', methods=['POST'])
 def predict():
    """
    预测信贷风险
    """
    global model, scaler, le_education, le_home, le_purpose, explainer
    try:
        # 获取请求数据
        data = request.get_json()
        # 创建DataFrame
        df = pd.DataFrame([{
            'age': float(data['age']),
            'income': float(data['income']),
            'employment_length': float(data['employment_length']),
            'loan_amount': float(data['loan_amount']),
            'credit_score': float(data['credit_score']),
            'debt_to_income': float(data['debt_to_income']),
            'num_credit_lines': int(data['num_credit_lines']),
            'education': data['education'],
            'home_ownership': data['home_ownership'],
            'loan_purpose': data['loan_purpose'],
            'default': 0  # 占位符
        }])
        # 删除目标变量
        df = df.drop('default', axis=1)
        # 编码分类变量
        df['education'] = le_education.transform(df['education'])
        df['home_ownership'] = le_home.transform(df['home_ownership'])
        df['loan_purpose'] = le_purpose.transform(df['loan_purpose'])
        # 标准化数值特征
        df_scaled = scaler.transform(df)
        df_scaled = pd.DataFrame(df_scaled, columns=df.columns)
        # 预测
        risk_probability = model.predict_proba(df_scaled)[0][1]
        # 生成建议
        if risk_probability > 0.5:
            recommendation = "该客户违约风险较高，建议拒绝贷款申请或要求提供更多担保。"
        elif risk_probability > 0.2:
            recommendation = "该客户违约风险中等，建议谨慎审批，可考虑降低贷款额度或提高利率。"
        else:
            recommendation = "该客户违约风险较低，建议批准贷款申请。"
        # 生成解释
        explanation = None
        if explainer is not None:
            try:
                # 计算SHAP值
                shap_values = explainer.shap_values(df_scaled)
                # 获取特征名称和SHAP值
                feature_names = df.columns
                shap_values_single = shap_values[0] if isinstance(shap_values, list) else shap_values
                # 创建解释列表
                explanation = []
                for i, (feature, shap_val) in enumerate(zip(feature_names, shap_values_single[0])):
                    if abs(shap_val) > 0.01:  # 只显示影响较大的特征
                        effect = "增加风险" if shap_val > 0 else "降低风险"
                        explanation.append({
                            "feature": feature,
                            "effect": effect,
                            "shap_value": shap_val
                        })
                # 按SHAP值绝对值排序
                explanation.sort(key=lambda x: abs(x["shap_value"]), reverse=True)
                # 只保留前5个最重要的特征
                explanation = explanation[:5]
                # 格式化效果描述
                for item in explanation:
                    item["effect"] = f"{item['effect']} (影响度: {abs(item['shap_value']):.3f})"
                    del item["shap_value"]
            except Exception as e:
                print(f"解释生成失败: {e}")
        # 返回结果
        result = {
            'risk_probability': float(risk_probability),
            'recommendation': recommendation
        }
        if explanation is not None:
            result['explanation'] = explanation
        return jsonify(result)
    except Exception as e:
        return jsonify({'error': str(e)}), 400
 if __name__ == '__main__':
    # 加载模型
    load_model()
    # 启动应用
    app.run(debug=True, host='0.0.0.0', port=5000)
--- a/project/src/credit_risk_system/data/pycache/data_generator.cpython-313.pyc
+++ b/project/src/credit_risk_system/data/pycache/data_generator.cpython-313.pyc
--- a/project/src/credit_risk_system/data/credit_data.csv
+++ b/project/src/credit_risk_system/data/credit_data.csv
--- a/project/src/credit_risk_system/data/data_generator.py
+++ b/project/src/credit_risk_system/data/data_generator.py
@ -0,0 +1,129 @@
 import numpy as np
 import pandas as pd
 from sklearn.model_selection import train_test_split
 from sklearn.preprocessing import StandardScaler, LabelEncoder
 def generate_credit_data(n_samples=10000):
    """
    生成模拟的信贷数据集
    特征包括：
    - age: 年龄
    - income: 年收入
    - employment_length: 就业年限
    - loan_amount: 贷款金额
    - credit_score: 信用评分
    - debt_to_income: 债务收入比
    - num_credit_lines: 信贷账户数量
    - education: 教育水平
    - home_ownership: 房产情况
    - loan_purpose: 贷款目的
    """
    np.random.seed(42)
    # 生成特征
    age = np.random.normal(35, 10, n_samples)
    age = np.clip(age, 18, 80)
    income = np.random.lognormal(10, 0.5, n_samples)
    income = np.clip(income, 10000, 500000)
    employment_length = np.random.exponential(2, n_samples)
    employment_length = np.clip(employment_length, 0, 40)
    loan_amount = np.random.lognormal(9, 0.8, n_samples)
    loan_amount = np.clip(loan_amount, 1000, 200000)
    credit_score = np.random.normal(650, 100, n_samples)
    credit_score = np.clip(credit_score, 300, 850)
    debt_to_income = np.random.beta(2, 5, n_samples)
    num_credit_lines = np.random.poisson(3, n_samples)
    num_credit_lines = np.clip(num_credit_lines, 0, 15)
    education_levels = ['High School', 'Bachelor', 'Master', 'PhD']
    education = np.random.choice(education_levels, n_samples, p=[0.3, 0.4, 0.2, 0.1])
    ownership_types = ['Rent', 'Mortgage', 'Own', 'Other']
    home_ownership = np.random.choice(ownership_types, n_samples, p=[0.3, 0.4, 0.25, 0.05])
    purpose_types = ['Debt Consolidation', 'Home Improvement', 'Business', 'Personal', 'Medical']
    loan_purpose = np.random.choice(purpose_types, n_samples, p=[0.4, 0.2, 0.15, 0.15, 0.1])
    # 生成目标变量（违约概率）
    # 根据特征计算违约概率（简化模型）
    default_prob = (
        -0.02 * age +
        -0.00001 * income +
        -0.1 * employment_length +
        0.000005 * loan_amount +
        -0.005 * credit_score +
        2 * debt_to_income +
        0.05 * num_credit_lines +
        np.random.normal(0, 0.5, n_samples)
    )
    # 转换为逻辑函数得到违约概率
    default_prob = 1 / (1 + np.exp(-default_prob))
    # 根据违约概率生成实际违约标签
    default = np.random.binomial(1, default_prob, n_samples)
    # 创建DataFrame
    data = pd.DataFrame({
        'age': age,
        'income': income,
        'employment_length': employment_length,
        'loan_amount': loan_amount,
        'credit_score': credit_score,
        'debt_to_income': debt_to_income,
        'num_credit_lines': num_credit_lines,
        'education': education,
        'home_ownership': home_ownership,
        'loan_purpose': loan_purpose,
        'default': default
    })
    return data
 def preprocess_data(df):
    """
    数据预处理函数
    """
    # 复制数据避免修改原始数据
    data = df.copy()
    # 编码分类变量
    le_education = LabelEncoder()
    le_home = LabelEncoder()
    le_purpose = LabelEncoder()
    data['education'] = le_education.fit_transform(data['education'])
    data['home_ownership'] = le_home.fit_transform(data['home_ownership'])
    data['loan_purpose'] = le_purpose.fit_transform(data['loan_purpose'])
    # 分离特征和目标变量
    X = data.drop('default', axis=1)
    y = data['default']
    # 标准化数值特征
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)
    X_scaled = pd.DataFrame(X_scaled, columns=X.columns)
    return X_scaled, y, scaler, le_education, le_home, le_purpose
 if __name__ == "__main__":
    # 生成示例数据
    print("生成信贷数据...")
    df = generate_credit_data(10000)
    print(f"数据形状: {df.shape}")
    print("\n数据前5行:")
    print(df.head())
    print("\n违约分布:")
    print(df['default'].value_counts())
    print(f"\n违约率: {df['default'].mean():.2%}")
    # 保存数据
    df.to_csv('credit_risk_system/data/credit_data.csv', index=False)
    print("\n数据已保存到 credit_risk_system/data/credit_data.csv")
--- a/project/src/credit_risk_system/main.py
+++ b/project/src/credit_risk_system/main.py
@ -0,0 +1,156 @@
 """
 可解释的信贷风险评估系统
 """
 import os
 import sys
 import subprocess
 import webbrowser
 import time
 def print_system_overview():
    """
    打印系统概述
    """
    print("=" * 60)
    print("可解释的信贷风险评估系统")
    print("=" * 60)
    print("本系统基于LightGBM和对抗自编码器技术，提供以下功能：")
    print("1. 信贷风险预测")
    print("2. 模型决策解释")
    print("3. 数据可视化分析")
    print("4. Web API接口")
    print("=" * 60)
 def check_dependencies():
    """
    检查必要的依赖包
    """
    required_packages = ['numpy', 'pandas', 'sklearn', 'xgboost', 'lightgbm', 'torch', 'flask', 'shap', 'matplotlib', 'seaborn']
    missing_packages = []
    for package in required_packages:
        try:
            if package == 'sklearn':
                import sklearn
            else:
                __import__(package)
        except ImportError:
            missing_packages.append(package)
    if missing_packages:
        print(f"缺少以下依赖包: {', '.join(missing_packages)}")
        print("请运行: pip install " + " ".join(missing_packages))
        return False
    return True
 def generate_sample_data():
    """
    生成示例数据（如果不存在）
    """
    data_path = "data/credit_data.csv"
    if not os.path.exists(data_path):
        print("生成示例信贷数据...")
        sys.path.append('.')
        from data.data_generator import generate_credit_data
        df = generate_credit_data(10000)
        df.to_csv(data_path, index=False)
        print("示例数据已生成")
    else:
        print("示例数据已存在")
 def train_models():
    """
    训练模型（如果模型不存在）
    """
    model_path = "models/lightgbm_model.pkl"
    if not os.path.exists(model_path):
        print("训练LightGBM模型...")
        subprocess.run([sys.executable, "models/train_lightgbm.py"], check=True)
        print("LightGBM模型训练完成")
    else:
        print("LightGBM模型已存在")
    aae_path = "models/adversarial_autoencoder.pth"
    if not os.path.exists(aae_path):
        print("训练对抗自编码器...")
        subprocess.run([sys.executable, "models/train_aae.py"], check=True)
        print("对抗自编码器训练完成")
    else:
        print("对抗自编码器已存在")
 def generate_explanations():
    """
    生成模型解释（如果解释文件不存在）
    """
    explanation_path = "visualization/shap_summary.png"
    if not os.path.exists(explanation_path):
        print("生成模型解释...")
        subprocess.run([sys.executable, "utils/shap_explainer.py"], check=True)
        print("模型解释生成完成")
    else:
        print("模型解释已存在")
 def create_visualizations():
    """
    创建可视化图表（如果图表不存在）
    """
    viz_path = "visualization/dashboard.html"
    if not os.path.exists(viz_path):
        print("创建可视化图表...")
        subprocess.run([sys.executable, "visualization/create_dashboard.py"], check=True)
        print("可视化图表创建完成")
    else:
        print("可视化图表已存在")
 def start_api_server():
    """
    启动API服务器
    """
    print("启动API服务器...")
    print("服务器将在 http://127.0.0.1:5000 上运行")
    print("按 Ctrl+C 停止服务器")
    # 启动Flask应用
    os.chdir('api')
    subprocess.run([sys.executable, "app.py"], check=True)
 def main():
    """
    主函数
    """
    print_system_overview()
    if not check_dependencies():
        return
    # 创建必要的目录
    directories = ['data', 'models', 'visualization', 'api']
    for directory in directories:
        if not os.path.exists(directory):
            os.makedirs(directory)
    try:
        # 生成数据
        generate_sample_data()
        # 训练模型
        train_models()
        # 生成解释
        generate_explanations()
        # 创建可视化
        create_visualizations()
        # 启动API服务器
        start_api_server()
    except KeyboardInterrupt:
        print("\n系统已停止")
    except Exception as e:
        print(f"系统运行出错: {e}")
 if __name__ == "__main__":
    main()
--- a/project/src/credit_risk_system/models/adversarial_autoencoder.pth
+++ b/project/src/credit_risk_system/models/adversarial_autoencoder.pth
--- a/project/src/credit_risk_system/models/ae_scaler.pkl
+++ b/project/src/credit_risk_system/models/ae_scaler.pkl
--- a/project/src/credit_risk_system/models/le_education.pkl
+++ b/project/src/credit_risk_system/models/le_education.pkl
--- a/project/src/credit_risk_system/models/le_home.pkl
+++ b/project/src/credit_risk_system/models/le_home.pkl
--- a/project/src/credit_risk_system/models/le_purpose.pkl
+++ b/project/src/credit_risk_system/models/le_purpose.pkl
--- a/project/src/credit_risk_system/models/lightgbm_model.pkl
+++ b/project/src/credit_risk_system/models/lightgbm_model.pkl
--- a/project/src/credit_risk_system/models/scaler.pkl
+++ b/project/src/credit_risk_system/models/scaler.pkl
--- a/project/src/credit_risk_system/models/shap_explainer.pkl
+++ b/project/src/credit_risk_system/models/shap_explainer.pkl
--- a/project/src/credit_risk_system/models/train_aae.py
+++ b/project/src/credit_risk_system/models/train_aae.py
@ -0,0 +1,192 @@
 import torch
 import torch.nn as nn
 import torch.optim as optim
 import numpy as np
 import pandas as pd
 from sklearn.preprocessing import StandardScaler
 import joblib
 import os
 import sys
 # 添加项目根目录到Python路径
 sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 class AutoEncoder(nn.Module):
    """
    自编码器用于异常检测
    """
    def __init__(self, input_dim, hidden_dim1, hidden_dim2):
        super(AutoEncoder, self).__init__()
        # 编码器
        self.encoder = nn.Sequential(
            nn.Linear(input_dim, hidden_dim1),
            nn.ReLU(True),
            nn.Linear(hidden_dim1, hidden_dim2),
            nn.ReLU(True)
        )
        # 解码器
        self.decoder = nn.Sequential(
            nn.Linear(hidden_dim2, hidden_dim1),
            nn.ReLU(True),
            nn.Linear(hidden_dim1, input_dim),
            nn.ReLU(True)
        )
    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x
 class AdversarialAutoEncoder(nn.Module):
    """
    对抗自编码器
    """
    def __init__(self, input_dim, hidden_dim1, hidden_dim2, latent_dim):
        super(AdversarialAutoEncoder, self).__init__()
        # 编码器
        self.encoder = nn.Sequential(
            nn.Linear(input_dim, hidden_dim1),
            nn.ReLU(True),
            nn.Linear(hidden_dim1, hidden_dim2),
            nn.ReLU(True),
            nn.Linear(hidden_dim2, latent_dim),
            nn.ReLU(True)
        )
        # 解码器
        self.decoder = nn.Sequential(
            nn.Linear(latent_dim, hidden_dim2),
            nn.ReLU(True),
            nn.Linear(hidden_dim2, hidden_dim1),
            nn.ReLU(True),
            nn.Linear(hidden_dim1, input_dim),
            nn.Sigmoid()  # 使用Sigmoid确保输出在0-1之间
        )
        # 判别器
        self.discriminator = nn.Sequential(
            nn.Linear(latent_dim, hidden_dim2),
            nn.ReLU(True),
            nn.Linear(hidden_dim2, hidden_dim1),
            nn.ReLU(True),
            nn.Linear(hidden_dim1, 1),
            nn.Sigmoid()
        )
    def encode(self, x):
        return self.encoder(x)
    def decode(self, z):
        return self.decoder(z)
    def discriminate(self, z):
        return self.discriminator(z)
    def forward(self, x):
        z = self.encode(x)
        recon_x = self.decode(z)
        return recon_x, z
 def train_adversarial_autoencoder():
    """
    训练对抗自编码器
    """
    # 读取数据
    print("读取信贷数据...")
    df = pd.read_csv('data/credit_data.csv')
    # 只使用数值特征进行自编码器训练
    numerical_features = ['age', 'income', 'employment_length', 'loan_amount', 
                         'credit_score', 'debt_to_income', 'num_credit_lines']
    X = df[numerical_features]
    # 标准化数据
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)
    # 转换为PyTorch张量
    X_tensor = torch.FloatTensor(X_scaled)
    # 设置模型参数
    input_dim = X_tensor.shape[1]
    hidden_dim1 = 64
    hidden_dim2 = 32
    latent_dim = 16
    # 创建模型
    model = AdversarialAutoEncoder(input_dim, hidden_dim1, hidden_dim2, latent_dim)
    # 设置损失函数和优化器
    reconstruction_criterion = nn.MSELoss()
    adversarial_criterion = nn.BCELoss()
    autoencoder_optimizer = optim.Adam(
        list(model.encoder.parameters()) + list(model.decoder.parameters()), 
        lr=0.001
    )
    discriminator_optimizer = optim.Adam(model.discriminator.parameters(), lr=0.001)
    # 训练模型
    num_epochs = 100
    batch_size = 64
    print("开始训练对抗自编码器...")
    for epoch in range(num_epochs):
        for i in range(0, len(X_tensor), batch_size):
            batch = X_tensor[i:i+batch_size]
            # 训练自编码器
            autoencoder_optimizer.zero_grad()
            recon_batch, latent_batch = model(batch)
            real_labels = torch.ones(batch.size(0), 1)
            fake_labels = torch.zeros(batch.size(0), 1)
            # 重构损失
            recon_loss = reconstruction_criterion(recon_batch, batch)
            # 对抗损失 - 生成器希望判别器将生成的潜在向量识别为真实
            disc_fake = model.discriminate(latent_batch)
            adversarial_loss = adversarial_criterion(disc_fake, real_labels)
            autoencoder_loss = recon_loss + 0.1 * adversarial_loss
            autoencoder_loss.backward()
            autoencoder_optimizer.step()
            # 训练判别器
            discriminator_optimizer.zero_grad()
            # 真实潜在向量（从标准正态分布采样）
            real_latent = torch.randn(batch.size(0), latent_dim)
            disc_real = model.discriminate(real_latent)
            disc_real_loss = adversarial_criterion(disc_real, real_labels)
            # 生成的潜在向量
            disc_fake = model.discriminate(latent_batch.detach())
            disc_fake_loss = adversarial_criterion(disc_fake, fake_labels)
            discriminator_loss = disc_real_loss + disc_fake_loss
            discriminator_loss.backward()
            discriminator_optimizer.step()
        if (epoch + 1) % 10 == 0:
            print(f'Epoch [{epoch+1}/{num_epochs}], '
                  f'Recon Loss: {recon_loss.item():.4f}, '
                  f'Adversarial Loss: {adversarial_loss.item():.4f}, '
                  f'Discriminator Loss: {discriminator_loss.item():.4f}')
    # 保存模型和标准化器
    print("保存对抗自编码器模型...")
    torch.save(model.state_dict(), 'models/adversarial_autoencoder.pth')
    joblib.dump(scaler, 'models/ae_scaler.pkl')
    return model, scaler
 if __name__ == "__main__":
    # 检查是否有可用的GPU
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print(f"使用设备: {device}")
    model, scaler = train_adversarial_autoencoder()
    print("对抗自编码器训练完成!")
--- a/project/src/credit_risk_system/models/train_lightgbm.py
+++ b/project/src/credit_risk_system/models/train_lightgbm.py
@ -0,0 +1,93 @@
 import sys
 import os
 # 添加项目根目录到Python路径
 sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 import pandas as pd
 import numpy as np
 from sklearn.model_selection import train_test_split
 from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
 import lightgbm as lgb
 import joblib
 from data.data_generator import preprocess_data
 def train_lightgbm_model():
    """
    训练LightGBM模型用于信贷风险评估
    """
    # 读取数据
    print("读取信贷数据...")
    df = pd.read_csv('data/credit_data.csv')
    print(f"数据形状: {df.shape}")
    # 数据预处理
    print("数据预处理...")
    X, y, scaler, le_education, le_home, le_purpose = preprocess_data(df)
    # 分割数据集
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=42, stratify=y
    )
    print(f"训练集大小: {X_train.shape}")
    print(f"测试集大小: {X_test.shape}")
    # 创建LightGBM分类器
    print("创建LightGBM模型...")
    model = lgb.LGBMClassifier(
        n_estimators=200,
        max_depth=8,
        learning_rate=0.05,
        num_leaves=64,
        subsample=0.8,
        colsample_bytree=0.8,
        random_state=42,
        verbose=-1
    )
    # 训练模型
    print("训练模型...")
    model.fit(X_train, y_train,
              eval_set=[(X_test, y_test)],
              eval_metric='binary_logloss',
              callbacks=[lgb.early_stopping(10), lgb.log_evaluation(10)])
    # 预测
    print("模型预测...")
    y_pred = model.predict(X_test)
    y_pred_proba = model.predict_proba(X_test)[:, 1]
    # 评估模型
    accuracy = accuracy_score(y_test, y_pred)
    print(f"模型准确率: {accuracy:.4f}")
    print("\n分类报告:")
    print(classification_report(y_test, y_pred))
    print("\n混淆矩阵:")
    print(confusion_matrix(y_test, y_pred))
    # 保存模型和预处理器
    print("保存模型和预处理器...")
    joblib.dump(model, 'models/lightgbm_model.pkl')
    joblib.dump(scaler, 'models/scaler.pkl')
    joblib.dump(le_education, 'models/le_education.pkl')
    joblib.dump(le_home, 'models/le_home.pkl')
    joblib.dump(le_purpose, 'models/le_purpose.pkl')
    # 特征重要性
    feature_importance = pd.DataFrame({
        'feature': X.columns,
        'importance': model.feature_importances_
    }).sort_values('importance', ascending=False)
    print("\n特征重要性:")
    print(feature_importance)
    return model, scaler, le_education, le_home, le_purpose
 if __name__ == "__main__":
    model, scaler, le_education, le_home, le_purpose = train_lightgbm_model()
    print("\n模型训练完成!")
--- a/project/src/credit_risk_system/models/train_xgboost.py
+++ b/project/src/credit_risk_system/models/train_xgboost.py
@ -0,0 +1,88 @@
 import sys
 import os
 # 添加项目根目录到Python路径
 sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 import pandas as pd
 import numpy as np
 from sklearn.model_selection import train_test_split
 from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
 import xgboost as xgb
 import joblib
 from data.data_generator import preprocess_data
 def train_xgboost_model():
    """
    训练XGBoost模型用于信贷风险评估
    """
    # 读取数据
    print("读取信贷数据...")
    df = pd.read_csv('data/credit_data.csv')
    print(f"数据形状: {df.shape}")
    # 数据预处理
    print("数据预处理...")
    X, y, scaler, le_education, le_home, le_purpose = preprocess_data(df)
    # 分割数据集
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=42, stratify=y
    )
    print(f"训练集大小: {X_train.shape}")
    print(f"测试集大小: {X_test.shape}")
    # 创建XGBoost分类器
    print("创建XGBoost模型...")
    model = xgb.XGBClassifier(
        n_estimators=100,
        max_depth=6,
        learning_rate=0.1,
        subsample=0.8,
        colsample_bytree=0.8,
        random_state=42
    )
    # 训练模型
    print("训练模型...")
    model.fit(X_train, y_train)
    # 预测
    print("模型预测...")
    y_pred = model.predict(X_test)
    y_pred_proba = model.predict_proba(X_test)[:, 1]
    # 评估模型
    accuracy = accuracy_score(y_test, y_pred)
    print(f"模型准确率: {accuracy:.4f}")
    print("\n分类报告:")
    print(classification_report(y_test, y_pred))
    print("\n混淆矩阵:")
    print(confusion_matrix(y_test, y_pred))
    # 保存模型和预处理器
    print("保存模型和预处理器...")
    joblib.dump(model, 'models/xgboost_model.pkl')
    joblib.dump(scaler, 'models/scaler.pkl')
    joblib.dump(le_education, 'models/le_education.pkl')
    joblib.dump(le_home, 'models/le_home.pkl')
    joblib.dump(le_purpose, 'models/le_purpose.pkl')
    # 特征重要性
    feature_importance = pd.DataFrame({
        'feature': X.columns,
        'importance': model.feature_importances_
    }).sort_values('importance', ascending=False)
    print("\n特征重要性:")
    print(feature_importance)
    return model, scaler, le_education, le_home, le_purpose
 if __name__ == "__main__":
    model, scaler, le_education, le_home, le_purpose = train_xgboost_model()
    print("\n模型训练完成!")
--- a/project/src/credit_risk_system/models/xgboost_model.pkl
+++ b/project/src/credit_risk_system/models/xgboost_model.pkl
--- a/project/src/credit_risk_system/utils/shap_explainer.py
+++ b/project/src/credit_risk_system/utils/shap_explainer.py
@ -0,0 +1,75 @@
 import pandas as pd
 import numpy as np
 import joblib
 import shap
 import matplotlib
 matplotlib.use('Agg')  # 使用非交互式后端
 import matplotlib.pyplot as plt
 import os
 # 添加项目根目录到Python路径
 import sys
 sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 from data.data_generator import preprocess_data
 def explain_with_shap():
    """
    使用SHAP解释LightGBM模型
    """
    # 读取数据
    print("读取信贷数据...")
    df = pd.read_csv('data/credit_data.csv')
    # 数据预处理
    print("数据预处理...")
    X, y, scaler, le_education, le_home, le_purpose = preprocess_data(df)
    # 加载训练好的模型
    print("加载模型...")
    try:
        model = joblib.load('models/lightgbm_model.pkl')
        model_name = "LightGBM"
    except:
        # 如果LightGBM模型不存在，回退到XGBoost模型
        model = joblib.load('models/xgboost_model.pkl')
        model_name = "XGBoost"
    print(f"使用{model_name}模型")
    # 选择一小部分数据进行解释（避免计算时间过长）
    X_sample = X.iloc[:100]  # 增加样本数量以获得更准确的解释
    # 创建SHAP解释器
    print("创建SHAP解释器...")
    explainer = shap.TreeExplainer(model)
    shap_values = explainer.shap_values(X_sample)
    # 绘制特征重要性条形图
    print("绘制SHAP特征重要性图...")
    plt.figure(figsize=(10, 6))
    shap.summary_plot(shap_values, X_sample, plot_type="bar", show=False)
    plt.title(f'{model_name}模型SHAP特征重要性')
    plt.tight_layout()
    plt.savefig('visualization/shap_feature_importance.png', dpi=300, bbox_inches='tight')
    plt.close()
    # 绘制SHAP摘要图
    print("绘制SHAP摘要图...")
    plt.figure(figsize=(10, 8))
    shap.summary_plot(shap_values, X_sample, show=False)
    plt.title(f'{model_name}模型SHAP摘要图')
    plt.tight_layout()
    plt.savefig('visualization/shap_summary.png', dpi=300, bbox_inches='tight')
    plt.close()
    # 保存SHAP解释器以便在API中使用
    joblib.dump(explainer, 'models/shap_explainer.pkl')
    print("SHAP解释完成，图表已保存到 visualization 目录")
    return shap_values, X_sample
 if __name__ == "__main__":
    shap_values, X_sample = explain_with_shap()
    print("SHAP解释模块集成完成!")
--- a/project/src/credit_risk_system/visualization/age_distribution.png
+++ b/project/src/credit_risk_system/visualization/age_distribution.png
--- a/project/src/credit_risk_system/visualization/correlation_heatmap.png
+++ b/project/src/credit_risk_system/visualization/correlation_heatmap.png
--- a/project/src/credit_risk_system/visualization/create_dashboard.py
+++ b/project/src/credit_risk_system/visualization/create_dashboard.py
@ -0,0 +1,386 @@
 import pandas as pd
 import numpy as np
 import joblib
 import matplotlib
 matplotlib.use('Agg')
 import matplotlib.pyplot as plt
 import seaborn as sns
 import os
 # 添加项目根目录到Python路径
 import sys
 sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 def create_visualizations():
    """
    创建各种可视化图表来解释模型和数据
    """
    # 读取数据
    print("读取信贷数据...")
    df = pd.read_csv('data/credit_data.csv')
    # 设置图表样式
    plt.style.use('seaborn-v0_8')
    fig_size = (10, 6)
    # 1. 违约分布
    print("创建违约分布图...")
    plt.figure(figsize=fig_size)
    default_counts = df['default'].value_counts()
    plt.pie(default_counts.values, labels=['正常', '违约'], autopct='%1.1f%%', startangle=90)
    plt.title('信贷违约分布')
    plt.tight_layout()
    plt.savefig('visualization/default_distribution.png', dpi=300, bbox_inches='tight')
    plt.close()
    # 2. 年龄分布
    print("创建年龄分布图...")
    plt.figure(figsize=fig_size)
    plt.hist(df['age'], bins=30, alpha=0.7, color='skyblue', edgecolor='black')
    plt.xlabel('年龄')
    plt.ylabel('频数')
    plt.title('客户年龄分布')
    plt.tight_layout()
    plt.savefig('visualization/age_distribution.png', dpi=300, bbox_inches='tight')
    plt.close()
    # 3. 收入分布
    print("创建收入分布图...")
    plt.figure(figsize=fig_size)
    plt.hist(df['income'], bins=30, alpha=0.7, color='lightgreen', edgecolor='black')
    plt.xlabel('年收入')
    plt.ylabel('频数')
    plt.title('客户年收入分布')
    plt.tight_layout()
    plt.savefig('visualization/income_distribution.png', dpi=300, bbox_inches='tight')
    plt.close()
    # 4. 信用评分分布
    print("创建信用评分分布图...")
    plt.figure(figsize=fig_size)
    plt.hist(df['credit_score'], bins=30, alpha=0.7, color='salmon', edgecolor='black')
    plt.xlabel('信用评分')
    plt.ylabel('频数')
    plt.title('客户信用评分分布')
    plt.tight_layout()
    plt.savefig('visualization/credit_score_distribution.png', dpi=300, bbox_inches='tight')
    plt.close()
    # 5. 违约与年龄的关系
    print("创建违约与年龄关系图...")
    plt.figure(figsize=fig_size)
    df.boxplot(column='age', by='default', ax=plt.gca())
    plt.xlabel('是否违约 (0:正常, 1:违约)')
    plt.ylabel('年龄')
    plt.title('违约与年龄的关系')
    plt.suptitle('')  # 移除自动生成的标题
    plt.tight_layout()
    plt.savefig('visualization/default_vs_age.png', dpi=300, bbox_inches='tight')
    plt.close()
    # 6. 违约与收入的关系
    print("创建违约与收入关系图...")
    plt.figure(figsize=fig_size)
    df.boxplot(column='income', by='default', ax=plt.gca())
    plt.xlabel('是否违约 (0:正常, 1:违约)')
    plt.ylabel('年收入')
    plt.title('违约与年收入的关系')
    plt.suptitle('')  # 移除自动生成的标题
    plt.tight_layout()
    plt.savefig('visualization/default_vs_income.png', dpi=300, bbox_inches='tight')
    plt.close()
    # 7. 违约与信用评分的关系
    print("创建违约与信用评分关系图...")
    plt.figure(figsize=fig_size)
    df.boxplot(column='credit_score', by='default', ax=plt.gca())
    plt.xlabel('是否违约 (0:正常, 1:违约)')
    plt.ylabel('信用评分')
    plt.title('违约与信用评分的关系')
    plt.suptitle('')  # 移除自动生成的标题
    plt.tight_layout()
    plt.savefig('visualization/default_vs_credit_score.png', dpi=300, bbox_inches='tight')
    plt.close()
    # 8. 特征相关性热力图
    print("创建特征相关性热力图...")
    plt.figure(figsize=(12, 10))
    # 只选择数值特征
    numerical_features = ['age', 'income', 'employment_length', 'loan_amount', 
                         'credit_score', 'debt_to_income', 'num_credit_lines', 'default']
    correlation_matrix = df[numerical_features].corr()
    sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', center=0, 
                square=True, linewidths=0.5)
    plt.title('特征相关性热力图')
    plt.tight_layout()
    plt.savefig('visualization/correlation_heatmap.png', dpi=300, bbox_inches='tight')
    plt.close()
    # 9. 教育水平与违约关系
    print("创建教育水平与违约关系图...")
    plt.figure(figsize=fig_size)
    education_default = pd.crosstab(df['education'], df['default'], normalize='index')
    education_default.plot(kind='bar', stacked=True, color=['skyblue', 'salmon'])
    plt.xlabel('教育水平')
    plt.ylabel('比例')
    plt.title('不同教育水平的违约率')
    plt.legend(['正常', '违约'])
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.savefig('visualization/education_default.png', dpi=300, bbox_inches='tight')
    plt.close()
    # 10. 房产情况与违约关系
    print("创建房产情况与违约关系图...")
    plt.figure(figsize=fig_size)
    home_default = pd.crosstab(df['home_ownership'], df['default'], normalize='index')
    home_default.plot(kind='bar', stacked=True, color=['skyblue', 'salmon'])
    plt.xlabel('房产情况')
    plt.ylabel('比例')
    plt.title('不同房产情况的违约率')
    plt.legend(['正常', '违约'])
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.savefig('visualization/home_default.png', dpi=300, bbox_inches='tight')
    plt.close()
    print("所有可视化图表已生成并保存到 visualization 目录")
 def create_dashboard_html():
    """
    创建一个HTML仪表板来展示所有可视化图表
    """
    html_content = '''
    <!DOCTYPE html>
    <html lang="zh-CN">
    <head>
        <meta charset="UTF-8">
        <meta name="viewport" content="width=device-width, initial-scale=1.0">
        <title>信贷风险评估系统可视化仪表板</title>
        <style>
            body {
                font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
                line-height: 1.6;
                color: #333;
                max-width: 1200px;
                margin: 0 auto;
                padding: 20px;
                background-color: #f5f5f5;
            }
            header {
                background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
                color: white;
                text-align: center;
                padding: 2rem;
                border-radius: 10px;
                margin-bottom: 2rem;
                box-shadow: 0 4px 6px rgba(0,0,0,0.1);
            }
            h1 {
                margin: 0;
                font-size: 2.5rem;
            }
            .subtitle {
                font-size: 1.2rem;
                opacity: 0.9;
                margin-top: 0.5rem;
            }
            .dashboard {
                display: grid;
                grid-template-columns: repeat(auto-fit, minmax(500px, 1fr));
                gap: 2rem;
                margin-bottom: 2rem;
            }
            .card {
                background: white;
                border-radius: 10px;
                box-shadow: 0 4px 6px rgba(0,0,0,0.1);
                padding: 1.5rem;
                transition: transform 0.3s ease;
            }
            .card:hover {
                transform: translateY(-5px);
            }
            .card h2 {
                color: #667eea;
                border-bottom: 2px solid #667eea;
                padding-bottom: 0.5rem;
                margin-top: 0;
            }
            .chart-container {
                text-align: center;
                margin-top: 1rem;
            }
            .chart-container img {
                max-width: 100%;
                height: auto;
                border-radius: 5px;
            }
            .insight {
                background: #e3f2fd;
                border-left: 4px solid #2196f3;
                padding: 1rem;
                margin: 1rem 0;
                border-radius: 0 5px 5px 0;
            }
            footer {
                text-align: center;
                padding: 1rem;
                background: white;
                border-radius: 10px;
                box-shadow: 0 4px 6px rgba(0,0,0,0.1);
            }
            @media (max-width: 768px) {
                .dashboard {
                    grid-template-columns: 1fr;
                }
                body {
                    padding: 10px;
                }
            }
        </style>
    </head>
    <body>
        <header>
            <h1>信贷风险评估系统可视化仪表板</h1>
            <div class="subtitle">基于机器学习的可解释信贷风险分析</div>
        </header>
        <div class="dashboard">
            <div class="card">
                <h2>数据概览</h2>
                <div class="chart-container">
                    <img src="default_distribution.png" alt="违约分布">
                </div>
                <div class="insight">
                    <strong>数据洞察:</strong> 数据集中违约客户占比3.73%，正常客户占比96.27%，数据分布符合现实情况。
                </div>
            </div>
            <div class="card">
                <h2>SHAP特征重要性</h2>
                <div class="chart-container">
                    <img src="shap_feature_importance.png" alt="SHAP特征重要性">
                </div>
                <div class="insight">
                    <strong>模型洞察:</strong> SHAP分析提供了更精确的特征重要性评估，有助于理解模型决策过程。
                </div>
            </div>
            <div class="card">
                <h2>SHAP摘要图</h2>
                <div class="chart-container">
                    <img src="shap_summary.png" alt="SHAP摘要图">
                </div>
                <div class="insight">
                    <strong>模型洞察:</strong> SHAP摘要图显示了每个特征如何影响模型输出，红色表示增加风险，蓝色表示降低风险。
                </div>
            </div>
            <div class="card">
                <h2>年龄分布</h2>
                <div class="chart-container">
                    <img src="age_distribution.png" alt="年龄分布">
                </div>
                <div class="insight">
                    <strong>数据洞察:</strong> 客户年龄主要分布在25-45岁之间，这是信贷业务的主要目标群体。
                </div>
            </div>
            <div class="card">
                <h2>收入分布</h2>
                <div class="chart-container">
                    <img src="income_distribution.png" alt="收入分布">
                </div>
                <div class="insight">
                    <strong>数据洞察:</strong> 客户年收入主要集中在较低水平，符合一般信贷客户群体特征。
                </div>
            </div>
            <div class="card">
                <h2>信用评分分布</h2>
                <div class="chart-container">
                    <img src="credit_score_distribution.png" alt="信用评分分布">
                </div>
                <div class="insight">
                    <strong>数据洞察:</strong> 信用评分分布较为均匀，涵盖了从较差到优秀的各个等级。
                </div>
            </div>
            <div class="card">
                <h2>违约与年龄关系</h2>
                <div class="chart-container">
                    <img src="default_vs_age.png" alt="违约与年龄关系">
                </div>
                <div class="insight">
                    <strong>风险洞察:</strong> 年龄与违约风险之间没有明显的线性关系，说明需要综合其他特征进行判断。
                </div>
            </div>
            <div class="card">
                <h2>违约与收入关系</h2>
                <div class="chart-container">
                    <img src="default_vs_income.png" alt="违约与收入关系">
                </div>
                <div class="insight">
                    <strong>风险洞察:</strong> 收入较高的客户违约风险相对较低，但并非绝对，仍需考虑其他因素。
                </div>
            </div>
            <div class="card">
                <h2>违约与信用评分关系</h2>
                <div class="chart-container">
                    <img src="default_vs_credit_score.png" alt="违约与信用评分关系">
                </div>
                <div class="insight">
                    <strong>风险洞察:</strong> 信用评分与违约风险呈明显负相关，信用评分越低，违约风险越高。
                </div>
            </div>
            <div class="card">
                <h2>特征相关性</h2>
                <div class="chart-container">
                    <img src="correlation_heatmap.png" alt="特征相关性">
                </div>
                <div class="insight">
                    <strong>数据洞察:</strong> 多数特征之间相关性较低，说明特征具有较好的独立性，有利于模型训练。
                </div>
            </div>
            <div class="card">
                <h2>教育水平与违约关系</h2>
                <div class="chart-container">
                    <img src="education_default.png" alt="教育水平与违约关系">
                </div>
                <div class="insight">
                    <strong>风险洞察:</strong> 教育水平较高的客户违约率相对较低，体现了教育对信用的影响。
                </div>
            </div>
            <div class="card">
                <h2>房产情况与违约关系</h2>
                <div class="chart-container">
                    <img src="home_default.png" alt="房产情况与违约关系">
                </div>
                <div class="insight">
                    <strong>风险洞察:</strong> 拥有自有房产的客户违约率最低，租房客户的违约率相对较高。
                </div>
            </div>
        </div>
        <footer>
            <p>信贷风险评估系统 &copy; 2025 | 基于LightGBM和对抗自编码器的可解释AI模型</p>
        </footer>
    </body>
    </html>
    '''
    with open('visualization/dashboard.html', 'w', encoding='utf-8') as f:
        f.write(html_content)
    print("可视化仪表板已生成: visualization/dashboard.html")
 if __name__ == "__main__":
    create_visualizations()
    create_dashboard_html()
    print("可视化解释模块完成!")
--- a/project/src/credit_risk_system/visualization/credit_score_distribution.png
+++ b/project/src/credit_risk_system/visualization/credit_score_distribution.png
--- a/project/src/credit_risk_system/visualization/dashboard.html
+++ b/project/src/credit_risk_system/visualization/dashboard.html
@ -0,0 +1,224 @@
    <!DOCTYPE html>
    <html lang="zh-CN">
    <head>
        <meta charset="UTF-8">
        <meta name="viewport" content="width=device-width, initial-scale=1.0">
        <title>信贷风险评估系统可视化仪表板</title>
        <style>
            body {
                font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
                line-height: 1.6;
                color: #333;
                max-width: 1200px;
                margin: 0 auto;
                padding: 20px;
                background-color: #f5f5f5;
            }
            header {
                background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
                color: white;
                text-align: center;
                padding: 2rem;
                border-radius: 10px;
                margin-bottom: 2rem;
                box-shadow: 0 4px 6px rgba(0,0,0,0.1);
            }
            h1 {
                margin: 0;
                font-size: 2.5rem;
            }
            .subtitle {
                font-size: 1.2rem;
                opacity: 0.9;
                margin-top: 0.5rem;
            }
            .dashboard {
                display: grid;
                grid-template-columns: repeat(auto-fit, minmax(500px, 1fr));
                gap: 2rem;
                margin-bottom: 2rem;
            }
            .card {
                background: white;
                border-radius: 10px;
                box-shadow: 0 4px 6px rgba(0,0,0,0.1);
                padding: 1.5rem;
                transition: transform 0.3s ease;
            }
            .card:hover {
                transform: translateY(-5px);
            }
            .card h2 {
                color: #667eea;
                border-bottom: 2px solid #667eea;
                padding-bottom: 0.5rem;
                margin-top: 0;
            }
            .chart-container {
                text-align: center;
                margin-top: 1rem;
            }
            .chart-container img {
                max-width: 100%;
                height: auto;
                border-radius: 5px;
            }
            .insight {
                background: #e3f2fd;
                border-left: 4px solid #2196f3;
                padding: 1rem;
                margin: 1rem 0;
                border-radius: 0 5px 5px 0;
            }
            footer {
                text-align: center;
                padding: 1rem;
                background: white;
                border-radius: 10px;
                box-shadow: 0 4px 6px rgba(0,0,0,0.1);
            }
            @media (max-width: 768px) {
                .dashboard {
                    grid-template-columns: 1fr;
                }
                body {
                    padding: 10px;
                }
            }
        </style>
    </head>
    <body>
        <header>
            <h1>信贷风险评估系统可视化仪表板</h1>
            <div class="subtitle">基于机器学习的可解释信贷风险分析</div>
        </header>
        <div class="dashboard">
            <div class="card">
                <h2>数据概览</h2>
                <div class="chart-container">
                    <img src="default_distribution.png" alt="违约分布">
                </div>
                <div class="insight">
                    <strong>数据洞察:</strong> 数据集中违约客户占比3.73%，正常客户占比96.27%，数据分布符合现实情况。
                </div>
            </div>
            <div class="card">
                <h2>SHAP特征重要性</h2>
                <div class="chart-container">
                    <img src="shap_feature_importance.png" alt="SHAP特征重要性">
                </div>
                <div class="insight">
                    <strong>模型洞察:</strong> SHAP分析提供了更精确的特征重要性评估，有助于理解模型决策过程。
                </div>
            </div>
            <div class="card">
                <h2>SHAP摘要图</h2>
                <div class="chart-container">
                    <img src="shap_summary.png" alt="SHAP摘要图">
                </div>
                <div class="insight">
                    <strong>模型洞察:</strong> SHAP摘要图显示了每个特征如何影响模型输出，红色表示增加风险，蓝色表示降低风险。
                </div>
            </div>
            <div class="card">
                <h2>年龄分布</h2>
                <div class="chart-container">
                    <img src="age_distribution.png" alt="年龄分布">
                </div>
                <div class="insight">
                    <strong>数据洞察:</strong> 客户年龄主要分布在25-45岁之间，这是信贷业务的主要目标群体。
                </div>
            </div>
            <div class="card">
                <h2>收入分布</h2>
                <div class="chart-container">
                    <img src="income_distribution.png" alt="收入分布">
                </div>
                <div class="insight">
                    <strong>数据洞察:</strong> 客户年收入主要集中在较低水平，符合一般信贷客户群体特征。
                </div>
            </div>
            <div class="card">
                <h2>信用评分分布</h2>
                <div class="chart-container">
                    <img src="credit_score_distribution.png" alt="信用评分分布">
                </div>
                <div class="insight">
                    <strong>数据洞察:</strong> 信用评分分布较为均匀，涵盖了从较差到优秀的各个等级。
                </div>
            </div>
            <div class="card">
                <h2>违约与年龄关系</h2>
                <div class="chart-container">
                    <img src="default_vs_age.png" alt="违约与年龄关系">
                </div>
                <div class="insight">
                    <strong>风险洞察:</strong> 年龄与违约风险之间没有明显的线性关系，说明需要综合其他特征进行判断。
                </div>
            </div>
            <div class="card">
                <h2>违约与收入关系</h2>
                <div class="chart-container">
                    <img src="default_vs_income.png" alt="违约与收入关系">
                </div>
                <div class="insight">
                    <strong>风险洞察:</strong> 收入较高的客户违约风险相对较低，但并非绝对，仍需考虑其他因素。
                </div>
            </div>
            <div class="card">
                <h2>违约与信用评分关系</h2>
                <div class="chart-container">
                    <img src="default_vs_credit_score.png" alt="违约与信用评分关系">
                </div>
                <div class="insight">
                    <strong>风险洞察:</strong> 信用评分与违约风险呈明显负相关，信用评分越低，违约风险越高。
                </div>
            </div>
            <div class="card">
                <h2>特征相关性</h2>
                <div class="chart-container">
                    <img src="correlation_heatmap.png" alt="特征相关性">
                </div>
                <div class="insight">
                    <strong>数据洞察:</strong> 多数特征之间相关性较低，说明特征具有较好的独立性，有利于模型训练。
                </div>
            </div>
            <div class="card">
                <h2>教育水平与违约关系</h2>
                <div class="chart-container">
                    <img src="education_default.png" alt="教育水平与违约关系">
                </div>
                <div class="insight">
                    <strong>风险洞察:</strong> 教育水平较高的客户违约率相对较低，体现了教育对信用的影响。
                </div>
            </div>
            <div class="card">
                <h2>房产情况与违约关系</h2>
                <div class="chart-container">
                    <img src="home_default.png" alt="房产情况与违约关系">
                </div>
                <div class="insight">
                    <strong>风险洞察:</strong> 拥有自有房产的客户违约率最低，租房客户的违约率相对较高。
                </div>
            </div>
        </div>
        <footer>
            <p>信贷风险评估系统 &copy; 2025 | 基于LightGBM和对抗自编码器的可解释AI模型</p>
        </footer>
    </body>
    </html>
--- a/project/src/credit_risk_system/visualization/default_distribution.png
+++ b/project/src/credit_risk_system/visualization/default_distribution.png
--- a/project/src/credit_risk_system/visualization/default_vs_age.png
+++ b/project/src/credit_risk_system/visualization/default_vs_age.png
--- a/project/src/credit_risk_system/visualization/default_vs_credit_score.png
+++ b/project/src/credit_risk_system/visualization/default_vs_credit_score.png
--- a/project/src/credit_risk_system/visualization/default_vs_income.png
+++ b/project/src/credit_risk_system/visualization/default_vs_income.png
--- a/project/src/credit_risk_system/visualization/education_default.png
+++ b/project/src/credit_risk_system/visualization/education_default.png
--- a/project/src/credit_risk_system/visualization/feature_importance.csv
+++ b/project/src/credit_risk_system/visualization/feature_importance.csv
@ -0,0 +1,11 @@
 feature,importance
 credit_score,0.13739304
 debt_to_income,0.10605412
 home_ownership,0.100115
 age,0.09896107
 income,0.09830476
 loan_amount,0.097870015
 employment_length,0.09592874
 education,0.09347555
 num_credit_lines,0.0879608
 loan_purpose,0.0839369
--- a/project/src/credit_risk_system/visualization/feature_importance.png
+++ b/project/src/credit_risk_system/visualization/feature_importance.png
--- a/project/src/credit_risk_system/visualization/home_default.png
+++ b/project/src/credit_risk_system/visualization/home_default.png
--- a/project/src/credit_risk_system/visualization/income_distribution.png
+++ b/project/src/credit_risk_system/visualization/income_distribution.png
--- a/project/src/credit_risk_system/visualization/shap_feature_importance.png
+++ b/project/src/credit_risk_system/visualization/shap_feature_importance.png
--- a/project/src/credit_risk_system/visualization/shap_summary.png
+++ b/project/src/credit_risk_system/visualization/shap_summary.png