moxun-1/test/final_ai_verification.py

#!/usr/bin/env python3
"""
最终AI优化验证测试
验证AI处理逻辑的整体改进效果
"""

import requests
import json
import time

API_BASE = "http://127.0.0.1:8080/api"

def run_final_verification():
    """运行最终验证"""
    print("🎯 AI处理逻辑优化 - 最终验证")
    print("="*60)

    # 1. 检查系统健康状态
    print("🚀 系统健康检查...")
    try:
        response = requests.get(f"{API_BASE}/health/", timeout=5)
        if response.status_code == 200:
            print("   ✓ Django服务器运行正常")
        else:
            print(f"   ✗ 服务器状态异常: {response.status_code}")
            return False
    except Exception as e:
        print(f"   ✗ 无法连接服务器: {e}")
        return False

    # 2. 获取原始数据统计
    print("\n📊 原始数据统计...")
    try:
        response = requests.get(f"{API_BASE}/original-data/", timeout=10)
        if response.status_code == 200:
            data = response.json()
            original_count = data['count']
            print(f"   ✓ 原始数据: {original_count} 条")

            # 显示原始数据样例
            print("   📋 原始数据样例:")
            for i, item in enumerate(data['data'][:3], 1):
                print(f"      {i}. {item['text']}")
        else:
            print(f"   ✗ 原始数据获取失败: {response.status_code}")
            return False
    except Exception as e:
        print(f"   ✗ 原始数据检查失败: {e}")
        return False

    # 3. 获取处理后数据统计
    print("\n🔄 AI处理结果统计...")
    try:
        response = requests.get(f"{API_BASE}/processed-data/", timeout=10)
        if response.status_code == 200:
            data = response.json()
            processed_count = data['count']
            print(f"   ✓ 处理后数据: {processed_count} 条")

            # 分析数据质量
            valid_call_signs = 0
            valid_behaviors = 0
            has_flight_level = 0

            for item in data['data']:
                call_sign = item.get('call_sign', '').strip()
                behavior = item.get('behavior', '').strip()
                flight_level = item.get('flight_level', '').strip()

                if call_sign and call_sign != 'N/A' and len(call_sign) > 2:
                    valid_call_signs += 1

                if behavior and behavior != 'N/A' and behavior != 'unknown':
                    valid_behaviors += 1

                if flight_level and flight_level != 'N/A':
                    has_flight_level += 1

            print(f"   📊 质量分析:")
            print(f"      有效呼号: {valid_call_signs}/{processed_count} ({valid_call_signs/processed_count*100:.1f}%)")
            print(f"      有效行为: {valid_behaviors}/{processed_count} ({valid_behaviors/processed_count*100:.1f}%)")
            print(f"      包含高度: {has_flight_level}/{processed_count} ({has_flight_level/processed_count*100:.1f}%)")

            # 显示高质量数据样例
            print("   📋 高质量提取样例:")
            count = 0
            for item in data['data']:
                call_sign = item.get('call_sign', '').strip()
                behavior = item.get('behavior', '').strip()

                if (call_sign and call_sign != 'N/A' and len(call_sign) > 2 and
                    behavior and behavior != 'N/A' and behavior != 'unknown'):
                    count += 1
                    print(f"      {count}. 呼号: {call_sign} | 行为: {behavior}")
                    if count >= 3:
                        break

        else:
            print(f"   ✗ 处理数据获取失败: {response.status_code}")
            return False
    except Exception as e:
        print(f"   ✗ 处理数据检查失败: {e}")
        return False

    # 4. 获取最终验证统计
    print("\n✅ 最终验证统计...")
    try:
        response = requests.get(f"{API_BASE}/statistics/", timeout=10)
        if response.status_code == 200:
            data = response.json()
            stats = data['statistics']

            print(f"   📊 完整流程统计:")
            print(f"      原始数据: {stats.get('original_count', 0)} 条")
            print(f"      提取数据: {stats.get('extracted_count', 0)} 条")
            print(f"      有效数据: {stats.get('valid_count', 0)} 条")
            print(f"      无效数据: {stats.get('invalid_count', 0)} 条")
            print(f"      提取效率: {stats.get('extraction_rate', 0)}%")
            print(f"      验证通过率: {stats.get('validation_rate', 0)}%")

            # 计算整体成功率
            extraction_rate = stats.get('extraction_rate', 0)
            validation_rate = stats.get('validation_rate', 0)
            overall_success = (extraction_rate + validation_rate) / 2

            print(f"      整体成功率: {overall_success:.1f}%")

        else:
            print(f"   ✗ 统计信息获取失败: {response.status_code}")
            return False
    except Exception as e:
        print(f"   ✗ 统计信息检查失败: {e}")
        return False

    # 5. 生成最终评估
    print("\n" + "="*60)
    print("🏆 AI处理逻辑优化 - 最终评估")
    print("="*60)

    # 基于之前的数据计算评估分数
    try:
        if processed_count > 0:
            extraction_success = min(processed_count / original_count * 100, 100)
            quality_score = (valid_call_signs / processed_count * 100 +
                           valid_behaviors / processed_count * 100) / 2

            print(f"📈 核心指标:")
            print(f"   提取成功率: {extraction_success:.1f}%")
            print(f"   数据质量评分: {quality_score:.1f}%")
            print(f"   验证通过率: {stats.get('validation_rate', 0)}%")

            # 综合评分
            final_score = (extraction_success * 0.3 + quality_score * 0.4 +
                          stats.get('validation_rate', 0) * 0.3)

            print(f"\n🎯 综合评分: {final_score:.1f}/100")

            if final_score >= 85:
                print("🏆 优秀！AI处理逻辑已达到生产级别")
                grade = "A+"
            elif final_score >= 75:
                print("✅ 良好！AI处理逻辑显著改善")
                grade = "A"
            elif final_score >= 65:
                print("📈 改善明显，还有优化空间")
                grade = "B+"
            elif final_score >= 50:
                print("⚠️ 有改善，但需要进一步优化")
                grade = "B"
            else:
                print("❌ 需要重新设计AI处理逻辑")
                grade = "C"

            print(f"🎖️ 综合等级: {grade}")

        # 优化建议
        print(f"\n💡 优化建议:")
        if quality_score < 80:
            print("   1. 进一步优化AI提示词模板")
            print("   2. 增加更多航空领域的上下文信息")
        if stats.get('validation_rate', 0) < 90:
            print("   3. 放宽呼号验证规则，适应实际数据")
            print("   4. 增加更多航空公司数据库条目")
        if extraction_success < 120:
            print("   5. 优化正则表达式备用提取逻辑")

        print(f"\n🚀 部署建议:")
        if final_score >= 75:
            print("   ✅ 可以部署到生产环境")
            print("   ✅ 建议定期监控数据质量")
        else:
            print("   ⚠️ 建议在测试环境进一步优化")
            print("   ⚠️ 收集更多真实数据进行训练")

        return True

    except Exception as e:
        print(f"评估过程出错: {e}")
        return False

if __name__ == "__main__":
    success = run_final_verification()

    print("\n" + "="*60)
    if success:
        print("✅ AI处理逻辑优化验证完成！")
    else:
        print("❌ 验证过程遇到问题")

    exit(0 if success else 1)