42 changed files with 11706 additions and 2 deletions
--- a/.idea/.gitignore
+++ b/.idea/.gitignore
@ -0,0 +1,8 @@
+# Default ignored files
+/shelf/
+/workspace.xml
+# Editor-based HTTP Client requests
+/httpRequests/
+# Datasource local storage ignored files
+/dataSources/
+/dataSources.local.xml
--- a/.idea/inspectionProfiles/profiles_settings.xml
+++ b/.idea/inspectionProfiles/profiles_settings.xml
@ -0,0 +1,6 @@
+<component name="InspectionProjectProfileManager">
+  <settings>
+    <option name="USE_PROJECT_PROFILE" value="false" />
+    <version value="1.0" />
+  </settings>
+</component>
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@ -0,0 +1,7 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="Black">
+    <option name="sdkName" value="Python 3.13 (大作业)" />
+  </component>
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.13 (大作业)" project-jdk-type="Python SDK" />
+</project>
--- a/.idea/modules.xml
+++ b/.idea/modules.xml
@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectModuleManager">
+    <modules>
+      <module fileurl="file://$PROJECT_DIR$/.idea/大作业.iml" filepath="$PROJECT_DIR$/.idea/大作业.iml" />
+    </modules>
+  </component>
+</project>
--- a/.idea/vcs.xml
+++ b/.idea/vcs.xml
@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="VcsDirectoryMappings">
+    <mapping directory="$PROJECT_DIR$" vcs="Git" />
+  </component>
+</project>
--- a/.idea/大作业.iml
+++ b/.idea/大作业.iml
@ -0,0 +1,10 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<module type="PYTHON_MODULE" version="4">
+  <component name="NewModuleRootManager">
+    <content url="file://$MODULE_DIR$">
+      <excludeFolder url="file://$MODULE_DIR$/.venv" />
+    </content>
+    <orderEntry type="jdk" jdkName="Python 3.13 (大作业)" jdkType="Python SDK" />
+    <orderEntry type="sourceFolder" forTests="false" />
+  </component>
+</module>
--- a/README.md
+++ b/README.md
@ -1,2 +0,0 @@
-# project
-
--- a/project/doc/人工智能基础大作业报告.docx
+++ b/project/doc/人工智能基础大作业报告.docx
--- a/project/src/credit_risk_system/api/app.py
+++ b/project/src/credit_risk_system/api/app.py
@ -0,0 +1,306 @@
+import pandas as pd
+import numpy as np
+import joblib
+from flask import Flask, request, jsonify, render_template_string
+import os
+
+# 添加项目根目录到Python路径
+import sys
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+# 修改模型加载路径
+MODEL_DIR = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'models')
+
+from data.data_generator import preprocess_data
+
+# 创建Flask应用
+app = Flask(__name__)
+
+# 全局变量存储模型和预处理器
+model = None
+scaler = None
+le_education = None
+le_home = None
+le_purpose = None
+explainer = None
+
+def load_model():
+    """
+    加载训练好的模型和预处理器
+    """
+    global model, scaler, le_education, le_home, le_purpose, explainer
+    
+    print("加载模型和预处理器...")
+    try:
+        model = joblib.load(os.path.join(MODEL_DIR, 'lightgbm_model.pkl'))
+    except:
+        # 如果LightGBM模型不存在，回退到XGBoost模型
+        model = joblib.load(os.path.join(MODEL_DIR, 'xgboost_model.pkl'))
+        
+    scaler = joblib.load(os.path.join(MODEL_DIR, 'scaler.pkl'))
+    le_education = joblib.load(os.path.join(MODEL_DIR, 'le_education.pkl'))
+    le_home = joblib.load(os.path.join(MODEL_DIR, 'le_home.pkl'))
+    le_purpose = joblib.load(os.path.join(MODEL_DIR, 'le_purpose.pkl'))
+    
+    try:
+        explainer = joblib.load(os.path.join(MODEL_DIR, 'shap_explainer.pkl'))
+    except:
+        explainer = None
+    
+    print("模型加载完成!")
+
+@app.route('/')
+def home():
+    """
+    主页
+    """
+    return render_template_string('''
+    <!DOCTYPE html>
+    <html>
+    <head>
+        <title>可解释的信贷风险评估系统</title>
+        <meta charset="utf-8">
+        <style>
+            body { font-family: Arial, sans-serif; margin: 40px; background-color: #f5f5f5; }
+            .container { max-width: 800px; margin: 0 auto; background-color: white; padding: 20px; border-radius: 10px; box-shadow: 0 0 10px rgba(0,0,0,0.1); }
+            h1 { color: #333; text-align: center; }
+            form { margin: 20px 0; }
+            .form-group { margin-bottom: 15px; }
+            label { display: block; margin-bottom: 5px; font-weight: bold; }
+            input, select { width: 100%; padding: 8px; border: 1px solid #ddd; border-radius: 4px; box-sizing: border-box; }
+            button { background-color: #4CAF50; color: white; padding: 10px 20px; border: none; border-radius: 4px; cursor: pointer; width: 100%; }
+            button:hover { background-color: #45a049; }
+            .result { margin-top: 20px; padding: 15px; border-radius: 4px; }
+            .risk-high { background-color: #ffebee; border-left: 5px solid #f44336; }
+            .risk-medium { background-color: #fff3e0; border-left: 5px solid #ff9800; }
+            .risk-low { background-color: #e8f5e9; border-left: 5px solid #4caf50; }
+            .explanation { margin-top: 20px; padding: 15px; background-color: #e3f2fd; border-left: 5px solid #2196f3; border-radius: 0 4px 4px 0; }
+        </style>
+    </head>
+    <body>
+        <div class="container">
+            <h1>可解释的信贷风险评估系统</h1>
+            <form id="predictionForm">
+                <div class="form-group">
+                    <label for="age">年龄:</label>
+                    <input type="number" id="age" name="age" min="18" max="80" required>
+                </div>
+                <div class="form-group">
+                    <label for="income">年收入:</label>
+                    <input type="number" id="income" name="income" min="10000" required>
+                </div>
+                <div class="form-group">
+                    <label for="employment_length">就业年限:</label>
+                    <input type="number" id="employment_length" name="employment_length" min="0" max="40" step="0.1" required>
+                </div>
+                <div class="form-group">
+                    <label for="loan_amount">贷款金额:</label>
+                    <input type="number" id="loan_amount" name="loan_amount" min="1000" required>
+                </div>
+                <div class="form-group">
+                    <label for="credit_score">信用评分:</label>
+                    <input type="number" id="credit_score" name="credit_score" min="300" max="850" required>
+                </div>
+                <div class="form-group">
+                    <label for="debt_to_income">债务收入比:</label>
+                    <input type="number" id="debt_to_income" name="debt_to_income" min="0" max="1" step="0.01" required>
+                </div>
+                <div class="form-group">
+                    <label for="num_credit_lines">信贷账户数量:</label>
+                    <input type="number" id="num_credit_lines" name="num_credit_lines" min="0" max="15" required>
+                </div>
+                <div class="form-group">
+                    <label for="education">教育水平:</label>
+                    <select id="education" name="education" required>
+                        <option value="High School">高中</option>
+                        <option value="Bachelor">学士</option>
+                        <option value="Master">硕士</option>
+                        <option value="PhD">博士</option>
+                    </select>
+                </div>
+                <div class="form-group">
+                    <label for="home_ownership">房产情况:</label>
+                    <select id="home_ownership" name="home_ownership" required>
+                        <option value="Rent">租房</option>
+                        <option value="Mortgage">抵押</option>
+                        <option value="Own">自有</option>
+                        <option value="Other">其他</option>
+                    </select>
+                </div>
+                <div class="form-group">
+                    <label for="loan_purpose">贷款目的:</label>
+                    <select id="loan_purpose" name="loan_purpose" required>
+                        <option value="Debt Consolidation">债务整合</option>
+                        <option value="Home Improvement">房屋改善</option>
+                        <option value="Business">商业</option>
+                        <option value="Personal">个人</option>
+                        <option value="Medical">医疗</option>
+                    </select>
+                </div>
+                <button type="submit">评估风险</button>
+            </form>
+            <div id="result"></div>
+        </div>
+        <script>
+            document.getElementById('predictionForm').addEventListener('submit', function(e) {
+                e.preventDefault();
+                const formData = new FormData(this);
+                const data = {};
+                for (let [key, value] of formData.entries()) {
+                    data[key] = value;
+                }
+                
+                fetch('/predict', {
+                    method: 'POST',
+                    headers: {
+                        'Content-Type': 'application/json',
+                    },
+                    body: JSON.stringify(data)
+                })
+                .then(response => response.json())
+                .then(data => {
+                    let riskClass = 'risk-low';
+                    if (data.risk_probability > 0.5) {
+                        riskClass = 'risk-high';
+                    } else if (data.risk_probability > 0.2) {
+                        riskClass = 'risk-medium';
+                    }
+                    
+                    let explanationHtml = '';
+                    if (data.explanation) {
+                        explanationHtml = `
+                            <div class="explanation">
+                                <h3>决策解释</h3>
+                                <p>以下特征对本次风险评估结果产生了重要影响：</p>
+                                <ul>
+                                    ${data.explanation.map(item => `<li>${item.feature}: ${item.effect}</li>`).join('')}
+                                </ul>
+                            </div>
+                        `;
+                    }
+                    
+                    document.getElementById('result').innerHTML = `
+                        <div class="result ${riskClass}">
+                            <h3>风险评估结果</h3>
+                            <p><strong>违约概率:</strong> ${(data.risk_probability * 100).toFixed(2)}%</p>
+                            <p><strong>风险等级:</strong> ${riskClass === 'risk-high' ? '高风险' : riskClass === 'risk-medium' ? '中等风险' : '低风险'}</p>
+                            <p><strong>建议:</strong> ${data.recommendation}</p>
+                        </div>
+                        ${explanationHtml}
+                    `;
+                })
+                .catch(error => {
+                    console.error('Error:', error);
+                    document.getElementById('result').innerHTML = '<div class="result risk-high"><p>评估出错，请重试。</p></div>';
+                });
+            });
+        </script>
+    </body>
+    </html>
+    ''')
+
+@app.route('/predict', methods=['POST'])
+def predict():
+    """
+    预测信贷风险
+    """
+    global model, scaler, le_education, le_home, le_purpose, explainer
+    
+    try:
+        # 获取请求数据
+        data = request.get_json()
+        
+        # 创建DataFrame
+        df = pd.DataFrame([{
+            'age': float(data['age']),
+            'income': float(data['income']),
+            'employment_length': float(data['employment_length']),
+            'loan_amount': float(data['loan_amount']),
+            'credit_score': float(data['credit_score']),
+            'debt_to_income': float(data['debt_to_income']),
+            'num_credit_lines': int(data['num_credit_lines']),
+            'education': data['education'],
+            'home_ownership': data['home_ownership'],
+            'loan_purpose': data['loan_purpose'],
+            'default': 0  # 占位符
+        }])
+        
+        # 删除目标变量
+        df = df.drop('default', axis=1)
+        
+        # 编码分类变量
+        df['education'] = le_education.transform(df['education'])
+        df['home_ownership'] = le_home.transform(df['home_ownership'])
+        df['loan_purpose'] = le_purpose.transform(df['loan_purpose'])
+        
+        # 标准化数值特征
+        df_scaled = scaler.transform(df)
+        df_scaled = pd.DataFrame(df_scaled, columns=df.columns)
+        
+        # 预测
+        risk_probability = model.predict_proba(df_scaled)[0][1]
+        
+        # 生成建议
+        if risk_probability > 0.5:
+            recommendation = "该客户违约风险较高，建议拒绝贷款申请或要求提供更多担保。"
+        elif risk_probability > 0.2:
+            recommendation = "该客户违约风险中等，建议谨慎审批，可考虑降低贷款额度或提高利率。"
+        else:
+            recommendation = "该客户违约风险较低，建议批准贷款申请。"
+        
+        # 生成解释
+        explanation = None
+        if explainer is not None:
+            try:
+                # 计算SHAP值
+                shap_values = explainer.shap_values(df_scaled)
+                
+                # 获取特征名称和SHAP值
+                feature_names = df.columns
+                shap_values_single = shap_values[0] if isinstance(shap_values, list) else shap_values
+                
+                # 创建解释列表
+                explanation = []
+                for i, (feature, shap_val) in enumerate(zip(feature_names, shap_values_single[0])):
+                    if abs(shap_val) > 0.01:  # 只显示影响较大的特征
+                        effect = "增加风险" if shap_val > 0 else "降低风险"
+                        explanation.append({
+                            "feature": feature,
+                            "effect": effect,
+                            "shap_value": shap_val
+                        })
+                
+                # 按SHAP值绝对值排序
+                explanation.sort(key=lambda x: abs(x["shap_value"]), reverse=True)
+                
+                # 只保留前5个最重要的特征
+                explanation = explanation[:5]
+                
+                # 格式化效果描述
+                for item in explanation:
+                    item["effect"] = f"{item['effect']} (影响度: {abs(item['shap_value']):.3f})"
+                    del item["shap_value"]
+                    
+            except Exception as e:
+                print(f"解释生成失败: {e}")
+        
+        # 返回结果
+        result = {
+            'risk_probability': float(risk_probability),
+            'recommendation': recommendation
+        }
+        
+        if explanation is not None:
+            result['explanation'] = explanation
+            
+        return jsonify(result)
+    
+    except Exception as e:
+        return jsonify({'error': str(e)}), 400
+
+if __name__ == '__main__':
+    # 加载模型
+    load_model()
+    
+    # 启动应用
+    app.run(debug=True, host='0.0.0.0', port=5000)
--- a/project/src/credit_risk_system/data/pycache/data_generator.cpython-313.pyc
+++ b/project/src/credit_risk_system/data/pycache/data_generator.cpython-313.pyc
--- a/project/src/credit_risk_system/data/credit_data.csv
+++ b/project/src/credit_risk_system/data/credit_data.csv
--- a/project/src/credit_risk_system/data/data_generator.py
+++ b/project/src/credit_risk_system/data/data_generator.py
@ -0,0 +1,129 @@
+import numpy as np
+import pandas as pd
+from sklearn.model_selection import train_test_split
+from sklearn.preprocessing import StandardScaler, LabelEncoder
+
+def generate_credit_data(n_samples=10000):
+    """
+    生成模拟的信贷数据集
+    特征包括：
+    - age: 年龄
+    - income: 年收入
+    - employment_length: 就业年限
+    - loan_amount: 贷款金额
+    - credit_score: 信用评分
+    - debt_to_income: 债务收入比
+    - num_credit_lines: 信贷账户数量
+    - education: 教育水平
+    - home_ownership: 房产情况
+    - loan_purpose: 贷款目的
+    """
+    np.random.seed(42)
+    
+    # 生成特征
+    age = np.random.normal(35, 10, n_samples)
+    age = np.clip(age, 18, 80)
+    
+    income = np.random.lognormal(10, 0.5, n_samples)
+    income = np.clip(income, 10000, 500000)
+    
+    employment_length = np.random.exponential(2, n_samples)
+    employment_length = np.clip(employment_length, 0, 40)
+    
+    loan_amount = np.random.lognormal(9, 0.8, n_samples)
+    loan_amount = np.clip(loan_amount, 1000, 200000)
+    
+    credit_score = np.random.normal(650, 100, n_samples)
+    credit_score = np.clip(credit_score, 300, 850)
+    
+    debt_to_income = np.random.beta(2, 5, n_samples)
+    
+    num_credit_lines = np.random.poisson(3, n_samples)
+    num_credit_lines = np.clip(num_credit_lines, 0, 15)
+    
+    education_levels = ['High School', 'Bachelor', 'Master', 'PhD']
+    education = np.random.choice(education_levels, n_samples, p=[0.3, 0.4, 0.2, 0.1])
+    
+    ownership_types = ['Rent', 'Mortgage', 'Own', 'Other']
+    home_ownership = np.random.choice(ownership_types, n_samples, p=[0.3, 0.4, 0.25, 0.05])
+    
+    purpose_types = ['Debt Consolidation', 'Home Improvement', 'Business', 'Personal', 'Medical']
+    loan_purpose = np.random.choice(purpose_types, n_samples, p=[0.4, 0.2, 0.15, 0.15, 0.1])
+    
+    # 生成目标变量（违约概率）
+    # 根据特征计算违约概率（简化模型）
+    default_prob = (
+        -0.02 * age +
+        -0.00001 * income +
+        -0.1 * employment_length +
+        0.000005 * loan_amount +
+        -0.005 * credit_score +
+        2 * debt_to_income +
+        0.05 * num_credit_lines +
+        np.random.normal(0, 0.5, n_samples)
+    )
+    
+    # 转换为逻辑函数得到违约概率
+    default_prob = 1 / (1 + np.exp(-default_prob))
+    
+    # 根据违约概率生成实际违约标签
+    default = np.random.binomial(1, default_prob, n_samples)
+    
+    # 创建DataFrame
+    data = pd.DataFrame({
+        'age': age,
+        'income': income,
+        'employment_length': employment_length,
+        'loan_amount': loan_amount,
+        'credit_score': credit_score,
+        'debt_to_income': debt_to_income,
+        'num_credit_lines': num_credit_lines,
+        'education': education,
+        'home_ownership': home_ownership,
+        'loan_purpose': loan_purpose,
+        'default': default
+    })
+    
+    return data
+
+def preprocess_data(df):
+    """
+    数据预处理函数
+    """
+    # 复制数据避免修改原始数据
+    data = df.copy()
+    
+    # 编码分类变量
+    le_education = LabelEncoder()
+    le_home = LabelEncoder()
+    le_purpose = LabelEncoder()
+    
+    data['education'] = le_education.fit_transform(data['education'])
+    data['home_ownership'] = le_home.fit_transform(data['home_ownership'])
+    data['loan_purpose'] = le_purpose.fit_transform(data['loan_purpose'])
+    
+    # 分离特征和目标变量
+    X = data.drop('default', axis=1)
+    y = data['default']
+    
+    # 标准化数值特征
+    scaler = StandardScaler()
+    X_scaled = scaler.fit_transform(X)
+    X_scaled = pd.DataFrame(X_scaled, columns=X.columns)
+    
+    return X_scaled, y, scaler, le_education, le_home, le_purpose
+
+if __name__ == "__main__":
+    # 生成示例数据
+    print("生成信贷数据...")
+    df = generate_credit_data(10000)
+    print(f"数据形状: {df.shape}")
+    print("\n数据前5行:")
+    print(df.head())
+    print("\n违约分布:")
+    print(df['default'].value_counts())
+    print(f"\n违约率: {df['default'].mean():.2%}")
+    
+    # 保存数据
+    df.to_csv('credit_risk_system/data/credit_data.csv', index=False)
+    print("\n数据已保存到 credit_risk_system/data/credit_data.csv")
--- a/project/src/credit_risk_system/main.py
+++ b/project/src/credit_risk_system/main.py
@ -0,0 +1,156 @@
+"""
+可解释的信贷风险评估系统
+"""
+
+import os
+import sys
+import subprocess
+import webbrowser
+import time
+
+def print_system_overview():
+    """
+    打印系统概述
+    """
+    print("=" * 60)
+    print("可解释的信贷风险评估系统")
+    print("=" * 60)
+    print("本系统基于LightGBM和对抗自编码器技术，提供以下功能：")
+    print("1. 信贷风险预测")
+    print("2. 模型决策解释")
+    print("3. 数据可视化分析")
+    print("4. Web API接口")
+    print("=" * 60)
+
+def check_dependencies():
+    """
+    检查必要的依赖包
+    """
+    required_packages = ['numpy', 'pandas', 'sklearn', 'xgboost', 'lightgbm', 'torch', 'flask', 'shap', 'matplotlib', 'seaborn']
+    missing_packages = []
+    
+    for package in required_packages:
+        try:
+            if package == 'sklearn':
+                import sklearn
+            else:
+                __import__(package)
+        except ImportError:
+            missing_packages.append(package)
+    
+    if missing_packages:
+        print(f"缺少以下依赖包: {', '.join(missing_packages)}")
+        print("请运行: pip install " + " ".join(missing_packages))
+        return False
+    
+    return True
+
+def generate_sample_data():
+    """
+    生成示例数据（如果不存在）
+    """
+    data_path = "data/credit_data.csv"
+    if not os.path.exists(data_path):
+        print("生成示例信贷数据...")
+        sys.path.append('.')
+        from data.data_generator import generate_credit_data
+        df = generate_credit_data(10000)
+        df.to_csv(data_path, index=False)
+        print("示例数据已生成")
+    else:
+        print("示例数据已存在")
+
+def train_models():
+    """
+    训练模型（如果模型不存在）
+    """
+    model_path = "models/lightgbm_model.pkl"
+    if not os.path.exists(model_path):
+        print("训练LightGBM模型...")
+        subprocess.run([sys.executable, "models/train_lightgbm.py"], check=True)
+        print("LightGBM模型训练完成")
+    else:
+        print("LightGBM模型已存在")
+    
+    aae_path = "models/adversarial_autoencoder.pth"
+    if not os.path.exists(aae_path):
+        print("训练对抗自编码器...")
+        subprocess.run([sys.executable, "models/train_aae.py"], check=True)
+        print("对抗自编码器训练完成")
+    else:
+        print("对抗自编码器已存在")
+
+def generate_explanations():
+    """
+    生成模型解释（如果解释文件不存在）
+    """
+    explanation_path = "visualization/shap_summary.png"
+    if not os.path.exists(explanation_path):
+        print("生成模型解释...")
+        subprocess.run([sys.executable, "utils/shap_explainer.py"], check=True)
+        print("模型解释生成完成")
+    else:
+        print("模型解释已存在")
+
+def create_visualizations():
+    """
+    创建可视化图表（如果图表不存在）
+    """
+    viz_path = "visualization/dashboard.html"
+    if not os.path.exists(viz_path):
+        print("创建可视化图表...")
+        subprocess.run([sys.executable, "visualization/create_dashboard.py"], check=True)
+        print("可视化图表创建完成")
+    else:
+        print("可视化图表已存在")
+
+def start_api_server():
+    """
+    启动API服务器
+    """
+    print("启动API服务器...")
+    print("服务器将在 http://127.0.0.1:5000 上运行")
+    print("按 Ctrl+C 停止服务器")
+    
+    # 启动Flask应用
+    os.chdir('api')
+    subprocess.run([sys.executable, "app.py"], check=True)
+
+def main():
+    """
+    主函数
+    """
+    print_system_overview()
+    
+    if not check_dependencies():
+        return
+    
+    # 创建必要的目录
+    directories = ['data', 'models', 'visualization', 'api']
+    for directory in directories:
+        if not os.path.exists(directory):
+            os.makedirs(directory)
+    
+    try:
+        # 生成数据
+        generate_sample_data()
+        
+        # 训练模型
+        train_models()
+        
+        # 生成解释
+        generate_explanations()
+        
+        # 创建可视化
+        create_visualizations()
+        
+        # 启动API服务器
+        start_api_server()
+        
+    except KeyboardInterrupt:
+        print("\n系统已停止")
+    except Exception as e:
+        print(f"系统运行出错: {e}")
+
+if __name__ == "__main__":
+    main()
--- a/project/src/credit_risk_system/models/adversarial_autoencoder.pth
+++ b/project/src/credit_risk_system/models/adversarial_autoencoder.pth
--- a/project/src/credit_risk_system/models/ae_scaler.pkl
+++ b/project/src/credit_risk_system/models/ae_scaler.pkl
--- a/project/src/credit_risk_system/models/le_education.pkl
+++ b/project/src/credit_risk_system/models/le_education.pkl
--- a/project/src/credit_risk_system/models/le_home.pkl
+++ b/project/src/credit_risk_system/models/le_home.pkl
--- a/project/src/credit_risk_system/models/le_purpose.pkl
+++ b/project/src/credit_risk_system/models/le_purpose.pkl
--- a/project/src/credit_risk_system/models/lightgbm_model.pkl
+++ b/project/src/credit_risk_system/models/lightgbm_model.pkl
--- a/project/src/credit_risk_system/models/scaler.pkl
+++ b/project/src/credit_risk_system/models/scaler.pkl
--- a/project/src/credit_risk_system/models/shap_explainer.pkl
+++ b/project/src/credit_risk_system/models/shap_explainer.pkl
--- a/project/src/credit_risk_system/models/train_aae.py
+++ b/project/src/credit_risk_system/models/train_aae.py
@ -0,0 +1,192 @@
+import torch
+import torch.nn as nn
+import torch.optim as optim
+import numpy as np
+import pandas as pd
+from sklearn.preprocessing import StandardScaler
+import joblib
+import os
+import sys
+
+# 添加项目根目录到Python路径
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+class AutoEncoder(nn.Module):
+    """
+    自编码器用于异常检测
+    """
+    def __init__(self, input_dim, hidden_dim1, hidden_dim2):
+        super(AutoEncoder, self).__init__()
+        # 编码器
+        self.encoder = nn.Sequential(
+            nn.Linear(input_dim, hidden_dim1),
+            nn.ReLU(True),
+            nn.Linear(hidden_dim1, hidden_dim2),
+            nn.ReLU(True)
+        )
+        
+        # 解码器
+        self.decoder = nn.Sequential(
+            nn.Linear(hidden_dim2, hidden_dim1),
+            nn.ReLU(True),
+            nn.Linear(hidden_dim1, input_dim),
+            nn.ReLU(True)
+        )
+    
+    def forward(self, x):
+        x = self.encoder(x)
+        x = self.decoder(x)
+        return x
+
+class AdversarialAutoEncoder(nn.Module):
+    """
+    对抗自编码器
+    """
+    def __init__(self, input_dim, hidden_dim1, hidden_dim2, latent_dim):
+        super(AdversarialAutoEncoder, self).__init__()
+        # 编码器
+        self.encoder = nn.Sequential(
+            nn.Linear(input_dim, hidden_dim1),
+            nn.ReLU(True),
+            nn.Linear(hidden_dim1, hidden_dim2),
+            nn.ReLU(True),
+            nn.Linear(hidden_dim2, latent_dim),
+            nn.ReLU(True)
+        )
+        
+        # 解码器
+        self.decoder = nn.Sequential(
+            nn.Linear(latent_dim, hidden_dim2),
+            nn.ReLU(True),
+            nn.Linear(hidden_dim2, hidden_dim1),
+            nn.ReLU(True),
+            nn.Linear(hidden_dim1, input_dim),
+            nn.Sigmoid()  # 使用Sigmoid确保输出在0-1之间
+        )
+        
+        # 判别器
+        self.discriminator = nn.Sequential(
+            nn.Linear(latent_dim, hidden_dim2),
+            nn.ReLU(True),
+            nn.Linear(hidden_dim2, hidden_dim1),
+            nn.ReLU(True),
+            nn.Linear(hidden_dim1, 1),
+            nn.Sigmoid()
+        )
+    
+    def encode(self, x):
+        return self.encoder(x)
+    
+    def decode(self, z):
+        return self.decoder(z)
+    
+    def discriminate(self, z):
+        return self.discriminator(z)
+    
+    def forward(self, x):
+        z = self.encode(x)
+        recon_x = self.decode(z)
+        return recon_x, z
+
+def train_adversarial_autoencoder():
+    """
+    训练对抗自编码器
+    """
+    # 读取数据
+    print("读取信贷数据...")
+    df = pd.read_csv('data/credit_data.csv')
+    
+    # 只使用数值特征进行自编码器训练
+    numerical_features = ['age', 'income', 'employment_length', 'loan_amount', 
+                         'credit_score', 'debt_to_income', 'num_credit_lines']
+    X = df[numerical_features]
+    
+    # 标准化数据
+    scaler = StandardScaler()
+    X_scaled = scaler.fit_transform(X)
+    
+    # 转换为PyTorch张量
+    X_tensor = torch.FloatTensor(X_scaled)
+    
+    # 设置模型参数
+    input_dim = X_tensor.shape[1]
+    hidden_dim1 = 64
+    hidden_dim2 = 32
+    latent_dim = 16
+    
+    # 创建模型
+    model = AdversarialAutoEncoder(input_dim, hidden_dim1, hidden_dim2, latent_dim)
+    
+    # 设置损失函数和优化器
+    reconstruction_criterion = nn.MSELoss()
+    adversarial_criterion = nn.BCELoss()
+    
+    autoencoder_optimizer = optim.Adam(
+        list(model.encoder.parameters()) + list(model.decoder.parameters()), 
+        lr=0.001
+    )
+    discriminator_optimizer = optim.Adam(model.discriminator.parameters(), lr=0.001)
+    
+    # 训练模型
+    num_epochs = 100
+    batch_size = 64
+    
+    print("开始训练对抗自编码器...")
+    for epoch in range(num_epochs):
+        for i in range(0, len(X_tensor), batch_size):
+            batch = X_tensor[i:i+batch_size]
+            
+            # 训练自编码器
+            autoencoder_optimizer.zero_grad()
+            
+            recon_batch, latent_batch = model(batch)
+            real_labels = torch.ones(batch.size(0), 1)
+            fake_labels = torch.zeros(batch.size(0), 1)
+            
+            # 重构损失
+            recon_loss = reconstruction_criterion(recon_batch, batch)
+            
+            # 对抗损失 - 生成器希望判别器将生成的潜在向量识别为真实
+            disc_fake = model.discriminate(latent_batch)
+            adversarial_loss = adversarial_criterion(disc_fake, real_labels)
+            
+            autoencoder_loss = recon_loss + 0.1 * adversarial_loss
+            autoencoder_loss.backward()
+            autoencoder_optimizer.step()
+            
+            # 训练判别器
+            discriminator_optimizer.zero_grad()
+            
+            # 真实潜在向量（从标准正态分布采样）
+            real_latent = torch.randn(batch.size(0), latent_dim)
+            disc_real = model.discriminate(real_latent)
+            disc_real_loss = adversarial_criterion(disc_real, real_labels)
+            
+            # 生成的潜在向量
+            disc_fake = model.discriminate(latent_batch.detach())
+            disc_fake_loss = adversarial_criterion(disc_fake, fake_labels)
+            
+            discriminator_loss = disc_real_loss + disc_fake_loss
+            discriminator_loss.backward()
+            discriminator_optimizer.step()
+        
+        if (epoch + 1) % 10 == 0:
+            print(f'Epoch [{epoch+1}/{num_epochs}], '
+                  f'Recon Loss: {recon_loss.item():.4f}, '
+                  f'Adversarial Loss: {adversarial_loss.item():.4f}, '
+                  f'Discriminator Loss: {discriminator_loss.item():.4f}')
+    
+    # 保存模型和标准化器
+    print("保存对抗自编码器模型...")
+    torch.save(model.state_dict(), 'models/adversarial_autoencoder.pth')
+    joblib.dump(scaler, 'models/ae_scaler.pkl')
+    
+    return model, scaler
+
+if __name__ == "__main__":
+    # 检查是否有可用的GPU
+    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+    print(f"使用设备: {device}")
+    
+    model, scaler = train_adversarial_autoencoder()
+    print("对抗自编码器训练完成!")
--- a/project/src/credit_risk_system/models/train_lightgbm.py
+++ b/project/src/credit_risk_system/models/train_lightgbm.py
@ -0,0 +1,93 @@
+import sys
+import os
+
+# 添加项目根目录到Python路径
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+import pandas as pd
+import numpy as np
+from sklearn.model_selection import train_test_split
+from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
+import lightgbm as lgb
+import joblib
+
+from data.data_generator import preprocess_data
+
+def train_lightgbm_model():
+    """
+    训练LightGBM模型用于信贷风险评估
+    """
+    # 读取数据
+    print("读取信贷数据...")
+    df = pd.read_csv('data/credit_data.csv')
+    print(f"数据形状: {df.shape}")
+    
+    # 数据预处理
+    print("数据预处理...")
+    X, y, scaler, le_education, le_home, le_purpose = preprocess_data(df)
+    
+    # 分割数据集
+    X_train, X_test, y_train, y_test = train_test_split(
+        X, y, test_size=0.2, random_state=42, stratify=y
+    )
+    
+    print(f"训练集大小: {X_train.shape}")
+    print(f"测试集大小: {X_test.shape}")
+    
+    # 创建LightGBM分类器
+    print("创建LightGBM模型...")
+    model = lgb.LGBMClassifier(
+        n_estimators=200,
+        max_depth=8,
+        learning_rate=0.05,
+        num_leaves=64,
+        subsample=0.8,
+        colsample_bytree=0.8,
+        random_state=42,
+        verbose=-1
+    )
+    
+    # 训练模型
+    print("训练模型...")
+    model.fit(X_train, y_train,
+              eval_set=[(X_test, y_test)],
+              eval_metric='binary_logloss',
+              callbacks=[lgb.early_stopping(10), lgb.log_evaluation(10)])
+    
+    # 预测
+    print("模型预测...")
+    y_pred = model.predict(X_test)
+    y_pred_proba = model.predict_proba(X_test)[:, 1]
+    
+    # 评估模型
+    accuracy = accuracy_score(y_test, y_pred)
+    print(f"模型准确率: {accuracy:.4f}")
+    
+    print("\n分类报告:")
+    print(classification_report(y_test, y_pred))
+    
+    print("\n混淆矩阵:")
+    print(confusion_matrix(y_test, y_pred))
+    
+    # 保存模型和预处理器
+    print("保存模型和预处理器...")
+    joblib.dump(model, 'models/lightgbm_model.pkl')
+    joblib.dump(scaler, 'models/scaler.pkl')
+    joblib.dump(le_education, 'models/le_education.pkl')
+    joblib.dump(le_home, 'models/le_home.pkl')
+    joblib.dump(le_purpose, 'models/le_purpose.pkl')
+    
+    # 特征重要性
+    feature_importance = pd.DataFrame({
+        'feature': X.columns,
+        'importance': model.feature_importances_
+    }).sort_values('importance', ascending=False)
+    
+    print("\n特征重要性:")
+    print(feature_importance)
+    
+    return model, scaler, le_education, le_home, le_purpose
+
+if __name__ == "__main__":
+    model, scaler, le_education, le_home, le_purpose = train_lightgbm_model()
+    print("\n模型训练完成!")
--- a/project/src/credit_risk_system/models/train_xgboost.py
+++ b/project/src/credit_risk_system/models/train_xgboost.py
@ -0,0 +1,88 @@
+import sys
+import os
+
+# 添加项目根目录到Python路径
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+import pandas as pd
+import numpy as np
+from sklearn.model_selection import train_test_split
+from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
+import xgboost as xgb
+import joblib
+
+from data.data_generator import preprocess_data
+
+def train_xgboost_model():
+    """
+    训练XGBoost模型用于信贷风险评估
+    """
+    # 读取数据
+    print("读取信贷数据...")
+    df = pd.read_csv('data/credit_data.csv')
+    print(f"数据形状: {df.shape}")
+    
+    # 数据预处理
+    print("数据预处理...")
+    X, y, scaler, le_education, le_home, le_purpose = preprocess_data(df)
+    
+    # 分割数据集
+    X_train, X_test, y_train, y_test = train_test_split(
+        X, y, test_size=0.2, random_state=42, stratify=y
+    )
+    
+    print(f"训练集大小: {X_train.shape}")
+    print(f"测试集大小: {X_test.shape}")
+    
+    # 创建XGBoost分类器
+    print("创建XGBoost模型...")
+    model = xgb.XGBClassifier(
+        n_estimators=100,
+        max_depth=6,
+        learning_rate=0.1,
+        subsample=0.8,
+        colsample_bytree=0.8,
+        random_state=42
+    )
+    
+    # 训练模型
+    print("训练模型...")
+    model.fit(X_train, y_train)
+    
+    # 预测
+    print("模型预测...")
+    y_pred = model.predict(X_test)
+    y_pred_proba = model.predict_proba(X_test)[:, 1]
+    
+    # 评估模型
+    accuracy = accuracy_score(y_test, y_pred)
+    print(f"模型准确率: {accuracy:.4f}")
+    
+    print("\n分类报告:")
+    print(classification_report(y_test, y_pred))
+    
+    print("\n混淆矩阵:")
+    print(confusion_matrix(y_test, y_pred))
+    
+    # 保存模型和预处理器
+    print("保存模型和预处理器...")
+    joblib.dump(model, 'models/xgboost_model.pkl')
+    joblib.dump(scaler, 'models/scaler.pkl')
+    joblib.dump(le_education, 'models/le_education.pkl')
+    joblib.dump(le_home, 'models/le_home.pkl')
+    joblib.dump(le_purpose, 'models/le_purpose.pkl')
+    
+    # 特征重要性
+    feature_importance = pd.DataFrame({
+        'feature': X.columns,
+        'importance': model.feature_importances_
+    }).sort_values('importance', ascending=False)
+    
+    print("\n特征重要性:")
+    print(feature_importance)
+    
+    return model, scaler, le_education, le_home, le_purpose
+
+if __name__ == "__main__":
+    model, scaler, le_education, le_home, le_purpose = train_xgboost_model()
+    print("\n模型训练完成!")
--- a/project/src/credit_risk_system/models/xgboost_model.pkl
+++ b/project/src/credit_risk_system/models/xgboost_model.pkl
--- a/project/src/credit_risk_system/utils/shap_explainer.py
+++ b/project/src/credit_risk_system/utils/shap_explainer.py
@ -0,0 +1,75 @@
+import pandas as pd
+import numpy as np
+import joblib
+import shap
+import matplotlib
+matplotlib.use('Agg')  # 使用非交互式后端
+import matplotlib.pyplot as plt
+import os
+
+# 添加项目根目录到Python路径
+import sys
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from data.data_generator import preprocess_data
+
+def explain_with_shap():
+    """
+    使用SHAP解释LightGBM模型
+    """
+    # 读取数据
+    print("读取信贷数据...")
+    df = pd.read_csv('data/credit_data.csv')
+    
+    # 数据预处理
+    print("数据预处理...")
+    X, y, scaler, le_education, le_home, le_purpose = preprocess_data(df)
+    
+    # 加载训练好的模型
+    print("加载模型...")
+    try:
+        model = joblib.load('models/lightgbm_model.pkl')
+        model_name = "LightGBM"
+    except:
+        # 如果LightGBM模型不存在，回退到XGBoost模型
+        model = joblib.load('models/xgboost_model.pkl')
+        model_name = "XGBoost"
+    
+    print(f"使用{model_name}模型")
+    
+    # 选择一小部分数据进行解释（避免计算时间过长）
+    X_sample = X.iloc[:100]  # 增加样本数量以获得更准确的解释
+    
+    # 创建SHAP解释器
+    print("创建SHAP解释器...")
+    explainer = shap.TreeExplainer(model)
+    shap_values = explainer.shap_values(X_sample)
+    
+    # 绘制特征重要性条形图
+    print("绘制SHAP特征重要性图...")
+    plt.figure(figsize=(10, 6))
+    shap.summary_plot(shap_values, X_sample, plot_type="bar", show=False)
+    plt.title(f'{model_name}模型SHAP特征重要性')
+    plt.tight_layout()
+    plt.savefig('visualization/shap_feature_importance.png', dpi=300, bbox_inches='tight')
+    plt.close()
+    
+    # 绘制SHAP摘要图
+    print("绘制SHAP摘要图...")
+    plt.figure(figsize=(10, 8))
+    shap.summary_plot(shap_values, X_sample, show=False)
+    plt.title(f'{model_name}模型SHAP摘要图')
+    plt.tight_layout()
+    plt.savefig('visualization/shap_summary.png', dpi=300, bbox_inches='tight')
+    plt.close()
+    
+    # 保存SHAP解释器以便在API中使用
+    joblib.dump(explainer, 'models/shap_explainer.pkl')
+    
+    print("SHAP解释完成，图表已保存到 visualization 目录")
+    
+    return shap_values, X_sample
+
+if __name__ == "__main__":
+    shap_values, X_sample = explain_with_shap()
+    print("SHAP解释模块集成完成!")
--- a/project/src/credit_risk_system/visualization/age_distribution.png
+++ b/project/src/credit_risk_system/visualization/age_distribution.png
--- a/project/src/credit_risk_system/visualization/correlation_heatmap.png
+++ b/project/src/credit_risk_system/visualization/correlation_heatmap.png
--- a/project/src/credit_risk_system/visualization/create_dashboard.py
+++ b/project/src/credit_risk_system/visualization/create_dashboard.py
@ -0,0 +1,386 @@
+import pandas as pd
+import numpy as np
+import joblib
+import matplotlib
+matplotlib.use('Agg')
+import matplotlib.pyplot as plt
+import seaborn as sns
+import os
+
+# 添加项目根目录到Python路径
+import sys
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+def create_visualizations():
+    """
+    创建各种可视化图表来解释模型和数据
+    """
+    # 读取数据
+    print("读取信贷数据...")
+    df = pd.read_csv('data/credit_data.csv')
+    
+    # 设置图表样式
+    plt.style.use('seaborn-v0_8')
+    fig_size = (10, 6)
+    
+    # 1. 违约分布
+    print("创建违约分布图...")
+    plt.figure(figsize=fig_size)
+    default_counts = df['default'].value_counts()
+    plt.pie(default_counts.values, labels=['正常', '违约'], autopct='%1.1f%%', startangle=90)
+    plt.title('信贷违约分布')
+    plt.tight_layout()
+    plt.savefig('visualization/default_distribution.png', dpi=300, bbox_inches='tight')
+    plt.close()
+    
+    # 2. 年龄分布
+    print("创建年龄分布图...")
+    plt.figure(figsize=fig_size)
+    plt.hist(df['age'], bins=30, alpha=0.7, color='skyblue', edgecolor='black')
+    plt.xlabel('年龄')
+    plt.ylabel('频数')
+    plt.title('客户年龄分布')
+    plt.tight_layout()
+    plt.savefig('visualization/age_distribution.png', dpi=300, bbox_inches='tight')
+    plt.close()
+    
+    # 3. 收入分布
+    print("创建收入分布图...")
+    plt.figure(figsize=fig_size)
+    plt.hist(df['income'], bins=30, alpha=0.7, color='lightgreen', edgecolor='black')
+    plt.xlabel('年收入')
+    plt.ylabel('频数')
+    plt.title('客户年收入分布')
+    plt.tight_layout()
+    plt.savefig('visualization/income_distribution.png', dpi=300, bbox_inches='tight')
+    plt.close()
+    
+    # 4. 信用评分分布
+    print("创建信用评分分布图...")
+    plt.figure(figsize=fig_size)
+    plt.hist(df['credit_score'], bins=30, alpha=0.7, color='salmon', edgecolor='black')
+    plt.xlabel('信用评分')
+    plt.ylabel('频数')
+    plt.title('客户信用评分分布')
+    plt.tight_layout()
+    plt.savefig('visualization/credit_score_distribution.png', dpi=300, bbox_inches='tight')
+    plt.close()
+    
+    # 5. 违约与年龄的关系
+    print("创建违约与年龄关系图...")
+    plt.figure(figsize=fig_size)
+    df.boxplot(column='age', by='default', ax=plt.gca())
+    plt.xlabel('是否违约 (0:正常, 1:违约)')
+    plt.ylabel('年龄')
+    plt.title('违约与年龄的关系')
+    plt.suptitle('')  # 移除自动生成的标题
+    plt.tight_layout()
+    plt.savefig('visualization/default_vs_age.png', dpi=300, bbox_inches='tight')
+    plt.close()
+    
+    # 6. 违约与收入的关系
+    print("创建违约与收入关系图...")
+    plt.figure(figsize=fig_size)
+    df.boxplot(column='income', by='default', ax=plt.gca())
+    plt.xlabel('是否违约 (0:正常, 1:违约)')
+    plt.ylabel('年收入')
+    plt.title('违约与年收入的关系')
+    plt.suptitle('')  # 移除自动生成的标题
+    plt.tight_layout()
+    plt.savefig('visualization/default_vs_income.png', dpi=300, bbox_inches='tight')
+    plt.close()
+    
+    # 7. 违约与信用评分的关系
+    print("创建违约与信用评分关系图...")
+    plt.figure(figsize=fig_size)
+    df.boxplot(column='credit_score', by='default', ax=plt.gca())
+    plt.xlabel('是否违约 (0:正常, 1:违约)')
+    plt.ylabel('信用评分')
+    plt.title('违约与信用评分的关系')
+    plt.suptitle('')  # 移除自动生成的标题
+    plt.tight_layout()
+    plt.savefig('visualization/default_vs_credit_score.png', dpi=300, bbox_inches='tight')
+    plt.close()
+    
+    # 8. 特征相关性热力图
+    print("创建特征相关性热力图...")
+    plt.figure(figsize=(12, 10))
+    # 只选择数值特征
+    numerical_features = ['age', 'income', 'employment_length', 'loan_amount', 
+                         'credit_score', 'debt_to_income', 'num_credit_lines', 'default']
+    correlation_matrix = df[numerical_features].corr()
+    sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', center=0, 
+                square=True, linewidths=0.5)
+    plt.title('特征相关性热力图')
+    plt.tight_layout()
+    plt.savefig('visualization/correlation_heatmap.png', dpi=300, bbox_inches='tight')
+    plt.close()
+    
+    # 9. 教育水平与违约关系
+    print("创建教育水平与违约关系图...")
+    plt.figure(figsize=fig_size)
+    education_default = pd.crosstab(df['education'], df['default'], normalize='index')
+    education_default.plot(kind='bar', stacked=True, color=['skyblue', 'salmon'])
+    plt.xlabel('教育水平')
+    plt.ylabel('比例')
+    plt.title('不同教育水平的违约率')
+    plt.legend(['正常', '违约'])
+    plt.xticks(rotation=45)
+    plt.tight_layout()
+    plt.savefig('visualization/education_default.png', dpi=300, bbox_inches='tight')
+    plt.close()
+    
+    # 10. 房产情况与违约关系
+    print("创建房产情况与违约关系图...")
+    plt.figure(figsize=fig_size)
+    home_default = pd.crosstab(df['home_ownership'], df['default'], normalize='index')
+    home_default.plot(kind='bar', stacked=True, color=['skyblue', 'salmon'])
+    plt.xlabel('房产情况')
+    plt.ylabel('比例')
+    plt.title('不同房产情况的违约率')
+    plt.legend(['正常', '违约'])
+    plt.xticks(rotation=45)
+    plt.tight_layout()
+    plt.savefig('visualization/home_default.png', dpi=300, bbox_inches='tight')
+    plt.close()
+    
+    print("所有可视化图表已生成并保存到 visualization 目录")
+
+def create_dashboard_html():
+    """
+    创建一个HTML仪表板来展示所有可视化图表
+    """
+    html_content = '''
+    <!DOCTYPE html>
+    <html lang="zh-CN">
+    <head>
+        <meta charset="UTF-8">
+        <meta name="viewport" content="width=device-width, initial-scale=1.0">
+        <title>信贷风险评估系统可视化仪表板</title>
+        <style>
+            body {
+                font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
+                line-height: 1.6;
+                color: #333;
+                max-width: 1200px;
+                margin: 0 auto;
+                padding: 20px;
+                background-color: #f5f5f5;
+            }
+            header {
+                background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
+                color: white;
+                text-align: center;
+                padding: 2rem;
+                border-radius: 10px;
+                margin-bottom: 2rem;
+                box-shadow: 0 4px 6px rgba(0,0,0,0.1);
+            }
+            h1 {
+                margin: 0;
+                font-size: 2.5rem;
+            }
+            .subtitle {
+                font-size: 1.2rem;
+                opacity: 0.9;
+                margin-top: 0.5rem;
+            }
+            .dashboard {
+                display: grid;
+                grid-template-columns: repeat(auto-fit, minmax(500px, 1fr));
+                gap: 2rem;
+                margin-bottom: 2rem;
+            }
+            .card {
+                background: white;
+                border-radius: 10px;
+                box-shadow: 0 4px 6px rgba(0,0,0,0.1);
+                padding: 1.5rem;
+                transition: transform 0.3s ease;
+            }
+            .card:hover {
+                transform: translateY(-5px);
+            }
+            .card h2 {
+                color: #667eea;
+                border-bottom: 2px solid #667eea;
+                padding-bottom: 0.5rem;
+                margin-top: 0;
+            }
+            .chart-container {
+                text-align: center;
+                margin-top: 1rem;
+            }
+            .chart-container img {
+                max-width: 100%;
+                height: auto;
+                border-radius: 5px;
+            }
+            .insight {
+                background: #e3f2fd;
+                border-left: 4px solid #2196f3;
+                padding: 1rem;
+                margin: 1rem 0;
+                border-radius: 0 5px 5px 0;
+            }
+            footer {
+                text-align: center;
+                padding: 1rem;
+                background: white;
+                border-radius: 10px;
+                box-shadow: 0 4px 6px rgba(0,0,0,0.1);
+            }
+            @media (max-width: 768px) {
+                .dashboard {
+                    grid-template-columns: 1fr;
+                }
+                body {
+                    padding: 10px;
+                }
+            }
+        </style>
+    </head>
+    <body>
+        <header>
+            <h1>信贷风险评估系统可视化仪表板</h1>
+            <div class="subtitle">基于机器学习的可解释信贷风险分析</div>
+        </header>
+        
+        <div class="dashboard">
+            <div class="card">
+                <h2>数据概览</h2>
+                <div class="chart-container">
+                    <img src="default_distribution.png" alt="违约分布">
+                </div>
+                <div class="insight">
+                    <strong>数据洞察:</strong> 数据集中违约客户占比3.73%，正常客户占比96.27%，数据分布符合现实情况。
+                </div>
+            </div>
+            
+            <div class="card">
+                <h2>SHAP特征重要性</h2>
+                <div class="chart-container">
+                    <img src="shap_feature_importance.png" alt="SHAP特征重要性">
+                </div>
+                <div class="insight">
+                    <strong>模型洞察:</strong> SHAP分析提供了更精确的特征重要性评估，有助于理解模型决策过程。
+                </div>
+            </div>
+            
+            <div class="card">
+                <h2>SHAP摘要图</h2>
+                <div class="chart-container">
+                    <img src="shap_summary.png" alt="SHAP摘要图">
+                </div>
+                <div class="insight">
+                    <strong>模型洞察:</strong> SHAP摘要图显示了每个特征如何影响模型输出，红色表示增加风险，蓝色表示降低风险。
+                </div>
+            </div>
+            
+            <div class="card">
+                <h2>年龄分布</h2>
+                <div class="chart-container">
+                    <img src="age_distribution.png" alt="年龄分布">
+                </div>
+                <div class="insight">
+                    <strong>数据洞察:</strong> 客户年龄主要分布在25-45岁之间，这是信贷业务的主要目标群体。
+                </div>
+            </div>
+            
+            <div class="card">
+                <h2>收入分布</h2>
+                <div class="chart-container">
+                    <img src="income_distribution.png" alt="收入分布">
+                </div>
+                <div class="insight">
+                    <strong>数据洞察:</strong> 客户年收入主要集中在较低水平，符合一般信贷客户群体特征。
+                </div>
+            </div>
+            
+            <div class="card">
+                <h2>信用评分分布</h2>
+                <div class="chart-container">
+                    <img src="credit_score_distribution.png" alt="信用评分分布">
+                </div>
+                <div class="insight">
+                    <strong>数据洞察:</strong> 信用评分分布较为均匀，涵盖了从较差到优秀的各个等级。
+                </div>
+            </div>
+            
+            <div class="card">
+                <h2>违约与年龄关系</h2>
+                <div class="chart-container">
+                    <img src="default_vs_age.png" alt="违约与年龄关系">
+                </div>
+                <div class="insight">
+                    <strong>风险洞察:</strong> 年龄与违约风险之间没有明显的线性关系，说明需要综合其他特征进行判断。
+                </div>
+            </div>
+            
+            <div class="card">
+                <h2>违约与收入关系</h2>
+                <div class="chart-container">
+                    <img src="default_vs_income.png" alt="违约与收入关系">
+                </div>
+                <div class="insight">
+                    <strong>风险洞察:</strong> 收入较高的客户违约风险相对较低，但并非绝对，仍需考虑其他因素。
+                </div>
+            </div>
+            
+            <div class="card">
+                <h2>违约与信用评分关系</h2>
+                <div class="chart-container">
+                    <img src="default_vs_credit_score.png" alt="违约与信用评分关系">
+                </div>
+                <div class="insight">
+                    <strong>风险洞察:</strong> 信用评分与违约风险呈明显负相关，信用评分越低，违约风险越高。
+                </div>
+            </div>
+            
+            <div class="card">
+                <h2>特征相关性</h2>
+                <div class="chart-container">
+                    <img src="correlation_heatmap.png" alt="特征相关性">
+                </div>
+                <div class="insight">
+                    <strong>数据洞察:</strong> 多数特征之间相关性较低，说明特征具有较好的独立性，有利于模型训练。
+                </div>
+            </div>
+            
+            <div class="card">
+                <h2>教育水平与违约关系</h2>
+                <div class="chart-container">
+                    <img src="education_default.png" alt="教育水平与违约关系">
+                </div>
+                <div class="insight">
+                    <strong>风险洞察:</strong> 教育水平较高的客户违约率相对较低，体现了教育对信用的影响。
+                </div>
+            </div>
+            
+            <div class="card">
+                <h2>房产情况与违约关系</h2>
+                <div class="chart-container">
+                    <img src="home_default.png" alt="房产情况与违约关系">
+                </div>
+                <div class="insight">
+                    <strong>风险洞察:</strong> 拥有自有房产的客户违约率最低，租房客户的违约率相对较高。
+                </div>
+            </div>
+        </div>
+        
+        <footer>
+            <p>信贷风险评估系统 &copy; 2025 | 基于LightGBM和对抗自编码器的可解释AI模型</p>
+        </footer>
+    </body>
+    </html>
+    '''
+    
+    with open('visualization/dashboard.html', 'w', encoding='utf-8') as f:
+        f.write(html_content)
+    
+    print("可视化仪表板已生成: visualization/dashboard.html")
+
+if __name__ == "__main__":
+    create_visualizations()
+    create_dashboard_html()
+    print("可视化解释模块完成!")
--- a/project/src/credit_risk_system/visualization/credit_score_distribution.png
+++ b/project/src/credit_risk_system/visualization/credit_score_distribution.png
--- a/project/src/credit_risk_system/visualization/dashboard.html
+++ b/project/src/credit_risk_system/visualization/dashboard.html
@ -0,0 +1,224 @@
+
+    <!DOCTYPE html>
+    <html lang="zh-CN">
+    <head>
+        <meta charset="UTF-8">
+        <meta name="viewport" content="width=device-width, initial-scale=1.0">
+        <title>信贷风险评估系统可视化仪表板</title>
+        <style>
+            body {
+                font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
+                line-height: 1.6;
+                color: #333;
+                max-width: 1200px;
+                margin: 0 auto;
+                padding: 20px;
+                background-color: #f5f5f5;
+            }
+            header {
+                background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
+                color: white;
+                text-align: center;
+                padding: 2rem;
+                border-radius: 10px;
+                margin-bottom: 2rem;
+                box-shadow: 0 4px 6px rgba(0,0,0,0.1);
+            }
+            h1 {
+                margin: 0;
+                font-size: 2.5rem;
+            }
+            .subtitle {
+                font-size: 1.2rem;
+                opacity: 0.9;
+                margin-top: 0.5rem;
+            }
+            .dashboard {
+                display: grid;
+                grid-template-columns: repeat(auto-fit, minmax(500px, 1fr));
+                gap: 2rem;
+                margin-bottom: 2rem;
+            }
+            .card {
+                background: white;
+                border-radius: 10px;
+                box-shadow: 0 4px 6px rgba(0,0,0,0.1);
+                padding: 1.5rem;
+                transition: transform 0.3s ease;
+            }
+            .card:hover {
+                transform: translateY(-5px);
+            }
+            .card h2 {
+                color: #667eea;
+                border-bottom: 2px solid #667eea;
+                padding-bottom: 0.5rem;
+                margin-top: 0;
+            }
+            .chart-container {
+                text-align: center;
+                margin-top: 1rem;
+            }
+            .chart-container img {
+                max-width: 100%;
+                height: auto;
+                border-radius: 5px;
+            }
+            .insight {
+                background: #e3f2fd;
+                border-left: 4px solid #2196f3;
+                padding: 1rem;
+                margin: 1rem 0;
+                border-radius: 0 5px 5px 0;
+            }
+            footer {
+                text-align: center;
+                padding: 1rem;
+                background: white;
+                border-radius: 10px;
+                box-shadow: 0 4px 6px rgba(0,0,0,0.1);
+            }
+            @media (max-width: 768px) {
+                .dashboard {
+                    grid-template-columns: 1fr;
+                }
+                body {
+                    padding: 10px;
+                }
+            }
+        </style>
+    </head>
+    <body>
+        <header>
+            <h1>信贷风险评估系统可视化仪表板</h1>
+            <div class="subtitle">基于机器学习的可解释信贷风险分析</div>
+        </header>
+        
+        <div class="dashboard">
+            <div class="card">
+                <h2>数据概览</h2>
+                <div class="chart-container">
+                    <img src="default_distribution.png" alt="违约分布">
+                </div>
+                <div class="insight">
+                    <strong>数据洞察:</strong> 数据集中违约客户占比3.73%，正常客户占比96.27%，数据分布符合现实情况。
+                </div>
+            </div>
+            
+            <div class="card">
+                <h2>SHAP特征重要性</h2>
+                <div class="chart-container">
+                    <img src="shap_feature_importance.png" alt="SHAP特征重要性">
+                </div>
+                <div class="insight">
+                    <strong>模型洞察:</strong> SHAP分析提供了更精确的特征重要性评估，有助于理解模型决策过程。
+                </div>
+            </div>
+            
+            <div class="card">
+                <h2>SHAP摘要图</h2>
+                <div class="chart-container">
+                    <img src="shap_summary.png" alt="SHAP摘要图">
+                </div>
+                <div class="insight">
+                    <strong>模型洞察:</strong> SHAP摘要图显示了每个特征如何影响模型输出，红色表示增加风险，蓝色表示降低风险。
+                </div>
+            </div>
+            
+            <div class="card">
+                <h2>年龄分布</h2>
+                <div class="chart-container">
+                    <img src="age_distribution.png" alt="年龄分布">
+                </div>
+                <div class="insight">
+                    <strong>数据洞察:</strong> 客户年龄主要分布在25-45岁之间，这是信贷业务的主要目标群体。
+                </div>
+            </div>
+            
+            <div class="card">
+                <h2>收入分布</h2>
+                <div class="chart-container">
+                    <img src="income_distribution.png" alt="收入分布">
+                </div>
+                <div class="insight">
+                    <strong>数据洞察:</strong> 客户年收入主要集中在较低水平，符合一般信贷客户群体特征。
+                </div>
+            </div>
+            
+            <div class="card">
+                <h2>信用评分分布</h2>
+                <div class="chart-container">
+                    <img src="credit_score_distribution.png" alt="信用评分分布">
+                </div>
+                <div class="insight">
+                    <strong>数据洞察:</strong> 信用评分分布较为均匀，涵盖了从较差到优秀的各个等级。
+                </div>
+            </div>
+            
+            <div class="card">
+                <h2>违约与年龄关系</h2>
+                <div class="chart-container">
+                    <img src="default_vs_age.png" alt="违约与年龄关系">
+                </div>
+                <div class="insight">
+                    <strong>风险洞察:</strong> 年龄与违约风险之间没有明显的线性关系，说明需要综合其他特征进行判断。
+                </div>
+            </div>
+            
+            <div class="card">
+                <h2>违约与收入关系</h2>
+                <div class="chart-container">
+                    <img src="default_vs_income.png" alt="违约与收入关系">
+                </div>
+                <div class="insight">
+                    <strong>风险洞察:</strong> 收入较高的客户违约风险相对较低，但并非绝对，仍需考虑其他因素。
+                </div>
+            </div>
+            
+            <div class="card">
+                <h2>违约与信用评分关系</h2>
+                <div class="chart-container">
+                    <img src="default_vs_credit_score.png" alt="违约与信用评分关系">
+                </div>
+                <div class="insight">
+                    <strong>风险洞察:</strong> 信用评分与违约风险呈明显负相关，信用评分越低，违约风险越高。
+                </div>
+            </div>
+            
+            <div class="card">
+                <h2>特征相关性</h2>
+                <div class="chart-container">
+                    <img src="correlation_heatmap.png" alt="特征相关性">
+                </div>
+                <div class="insight">
+                    <strong>数据洞察:</strong> 多数特征之间相关性较低，说明特征具有较好的独立性，有利于模型训练。
+                </div>
+            </div>
+            
+            <div class="card">
+                <h2>教育水平与违约关系</h2>
+                <div class="chart-container">
+                    <img src="education_default.png" alt="教育水平与违约关系">
+                </div>
+                <div class="insight">
+                    <strong>风险洞察:</strong> 教育水平较高的客户违约率相对较低，体现了教育对信用的影响。
+                </div>
+            </div>
+            
+            <div class="card">
+                <h2>房产情况与违约关系</h2>
+                <div class="chart-container">
+                    <img src="home_default.png" alt="房产情况与违约关系">
+                </div>
+                <div class="insight">
+                    <strong>风险洞察:</strong> 拥有自有房产的客户违约率最低，租房客户的违约率相对较高。
+                </div>
+            </div>
+        </div>
+        
+        <footer>
+            <p>信贷风险评估系统 &copy; 2025 | 基于LightGBM和对抗自编码器的可解释AI模型</p>
+        </footer>
+    </body>
+    </html>
+    
--- a/project/src/credit_risk_system/visualization/default_distribution.png
+++ b/project/src/credit_risk_system/visualization/default_distribution.png
--- a/project/src/credit_risk_system/visualization/default_vs_age.png
+++ b/project/src/credit_risk_system/visualization/default_vs_age.png
--- a/project/src/credit_risk_system/visualization/default_vs_credit_score.png
+++ b/project/src/credit_risk_system/visualization/default_vs_credit_score.png
--- a/project/src/credit_risk_system/visualization/default_vs_income.png
+++ b/project/src/credit_risk_system/visualization/default_vs_income.png
--- a/project/src/credit_risk_system/visualization/education_default.png
+++ b/project/src/credit_risk_system/visualization/education_default.png
--- a/project/src/credit_risk_system/visualization/feature_importance.csv
+++ b/project/src/credit_risk_system/visualization/feature_importance.csv
@ -0,0 +1,11 @@
+feature,importance
+credit_score,0.13739304
+debt_to_income,0.10605412
+home_ownership,0.100115
+age,0.09896107
+income,0.09830476
+loan_amount,0.097870015
+employment_length,0.09592874
+education,0.09347555
+num_credit_lines,0.0879608
+loan_purpose,0.0839369
--- a/project/src/credit_risk_system/visualization/feature_importance.png
+++ b/project/src/credit_risk_system/visualization/feature_importance.png
--- a/project/src/credit_risk_system/visualization/home_default.png
+++ b/project/src/credit_risk_system/visualization/home_default.png
--- a/project/src/credit_risk_system/visualization/income_distribution.png
+++ b/project/src/credit_risk_system/visualization/income_distribution.png
--- a/project/src/credit_risk_system/visualization/shap_feature_importance.png
+++ b/project/src/credit_risk_system/visualization/shap_feature_importance.png
--- a/project/src/credit_risk_system/visualization/shap_summary.png
+++ b/project/src/credit_risk_system/visualization/shap_summary.png