#!/usr/bin/env python3 """ 最终AI优化验证测试 验证AI处理逻辑的整体改进效果 """ import requests import json import time API_BASE = "http://127.0.0.1:8080/api" def run_final_verification(): """运行最终验证""" print("🎯 AI处理逻辑优化 - 最终验证") print("="*60) # 1. 检查系统健康状态 print("🚀 系统健康检查...") try: response = requests.get(f"{API_BASE}/health/", timeout=5) if response.status_code == 200: print(" ✓ Django服务器运行正常") else: print(f" ✗ 服务器状态异常: {response.status_code}") return False except Exception as e: print(f" ✗ 无法连接服务器: {e}") return False # 2. 获取原始数据统计 print("\n📊 原始数据统计...") try: response = requests.get(f"{API_BASE}/original-data/", timeout=10) if response.status_code == 200: data = response.json() original_count = data['count'] print(f" ✓ 原始数据: {original_count} 条") # 显示原始数据样例 print(" 📋 原始数据样例:") for i, item in enumerate(data['data'][:3], 1): print(f" {i}. {item['text']}") else: print(f" ✗ 原始数据获取失败: {response.status_code}") return False except Exception as e: print(f" ✗ 原始数据检查失败: {e}") return False # 3. 获取处理后数据统计 print("\n🔄 AI处理结果统计...") try: response = requests.get(f"{API_BASE}/processed-data/", timeout=10) if response.status_code == 200: data = response.json() processed_count = data['count'] print(f" ✓ 处理后数据: {processed_count} 条") # 分析数据质量 valid_call_signs = 0 valid_behaviors = 0 has_flight_level = 0 for item in data['data']: call_sign = item.get('call_sign', '').strip() behavior = item.get('behavior', '').strip() flight_level = item.get('flight_level', '').strip() if call_sign and call_sign != 'N/A' and len(call_sign) > 2: valid_call_signs += 1 if behavior and behavior != 'N/A' and behavior != 'unknown': valid_behaviors += 1 if flight_level and flight_level != 'N/A': has_flight_level += 1 print(f" 📊 质量分析:") print(f" 有效呼号: {valid_call_signs}/{processed_count} ({valid_call_signs/processed_count*100:.1f}%)") print(f" 有效行为: {valid_behaviors}/{processed_count} ({valid_behaviors/processed_count*100:.1f}%)") print(f" 包含高度: {has_flight_level}/{processed_count} ({has_flight_level/processed_count*100:.1f}%)") # 显示高质量数据样例 print(" 📋 高质量提取样例:") count = 0 for item in data['data']: call_sign = item.get('call_sign', '').strip() behavior = item.get('behavior', '').strip() if (call_sign and call_sign != 'N/A' and len(call_sign) > 2 and behavior and behavior != 'N/A' and behavior != 'unknown'): count += 1 print(f" {count}. 呼号: {call_sign} | 行为: {behavior}") if count >= 3: break else: print(f" ✗ 处理数据获取失败: {response.status_code}") return False except Exception as e: print(f" ✗ 处理数据检查失败: {e}") return False # 4. 获取最终验证统计 print("\n✅ 最终验证统计...") try: response = requests.get(f"{API_BASE}/statistics/", timeout=10) if response.status_code == 200: data = response.json() stats = data['statistics'] print(f" 📊 完整流程统计:") print(f" 原始数据: {stats.get('original_count', 0)} 条") print(f" 提取数据: {stats.get('extracted_count', 0)} 条") print(f" 有效数据: {stats.get('valid_count', 0)} 条") print(f" 无效数据: {stats.get('invalid_count', 0)} 条") print(f" 提取效率: {stats.get('extraction_rate', 0)}%") print(f" 验证通过率: {stats.get('validation_rate', 0)}%") # 计算整体成功率 extraction_rate = stats.get('extraction_rate', 0) validation_rate = stats.get('validation_rate', 0) overall_success = (extraction_rate + validation_rate) / 2 print(f" 整体成功率: {overall_success:.1f}%") else: print(f" ✗ 统计信息获取失败: {response.status_code}") return False except Exception as e: print(f" ✗ 统计信息检查失败: {e}") return False # 5. 生成最终评估 print("\n" + "="*60) print("🏆 AI处理逻辑优化 - 最终评估") print("="*60) # 基于之前的数据计算评估分数 try: if processed_count > 0: extraction_success = min(processed_count / original_count * 100, 100) quality_score = (valid_call_signs / processed_count * 100 + valid_behaviors / processed_count * 100) / 2 print(f"📈 核心指标:") print(f" 提取成功率: {extraction_success:.1f}%") print(f" 数据质量评分: {quality_score:.1f}%") print(f" 验证通过率: {stats.get('validation_rate', 0)}%") # 综合评分 final_score = (extraction_success * 0.3 + quality_score * 0.4 + stats.get('validation_rate', 0) * 0.3) print(f"\n🎯 综合评分: {final_score:.1f}/100") if final_score >= 85: print("🏆 优秀!AI处理逻辑已达到生产级别") grade = "A+" elif final_score >= 75: print("✅ 良好!AI处理逻辑显著改善") grade = "A" elif final_score >= 65: print("📈 改善明显,还有优化空间") grade = "B+" elif final_score >= 50: print("⚠️ 有改善,但需要进一步优化") grade = "B" else: print("❌ 需要重新设计AI处理逻辑") grade = "C" print(f"🎖️ 综合等级: {grade}") # 优化建议 print(f"\n💡 优化建议:") if quality_score < 80: print(" 1. 进一步优化AI提示词模板") print(" 2. 增加更多航空领域的上下文信息") if stats.get('validation_rate', 0) < 90: print(" 3. 放宽呼号验证规则,适应实际数据") print(" 4. 增加更多航空公司数据库条目") if extraction_success < 120: print(" 5. 优化正则表达式备用提取逻辑") print(f"\n🚀 部署建议:") if final_score >= 75: print(" ✅ 可以部署到生产环境") print(" ✅ 建议定期监控数据质量") else: print(" ⚠️ 建议在测试环境进一步优化") print(" ⚠️ 收集更多真实数据进行训练") return True except Exception as e: print(f"评估过程出错: {e}") return False if __name__ == "__main__": success = run_final_verification() print("\n" + "="*60) if success: print("✅ AI处理逻辑优化验证完成!") else: print("❌ 验证过程遇到问题") exit(0 if success else 1)