You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
moxun-1/test/final_ai_verification.py

210 lines
8.3 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

#!/usr/bin/env python3
"""
最终AI优化验证测试
验证AI处理逻辑的整体改进效果
"""
import requests
import json
import time
API_BASE = "http://127.0.0.1:8080/api"
def run_final_verification():
"""运行最终验证"""
print("🎯 AI处理逻辑优化 - 最终验证")
print("="*60)
# 1. 检查系统健康状态
print("🚀 系统健康检查...")
try:
response = requests.get(f"{API_BASE}/health/", timeout=5)
if response.status_code == 200:
print(" ✓ Django服务器运行正常")
else:
print(f" ✗ 服务器状态异常: {response.status_code}")
return False
except Exception as e:
print(f" ✗ 无法连接服务器: {e}")
return False
# 2. 获取原始数据统计
print("\n📊 原始数据统计...")
try:
response = requests.get(f"{API_BASE}/original-data/", timeout=10)
if response.status_code == 200:
data = response.json()
original_count = data['count']
print(f" ✓ 原始数据: {original_count}")
# 显示原始数据样例
print(" 📋 原始数据样例:")
for i, item in enumerate(data['data'][:3], 1):
print(f" {i}. {item['text']}")
else:
print(f" ✗ 原始数据获取失败: {response.status_code}")
return False
except Exception as e:
print(f" ✗ 原始数据检查失败: {e}")
return False
# 3. 获取处理后数据统计
print("\n🔄 AI处理结果统计...")
try:
response = requests.get(f"{API_BASE}/processed-data/", timeout=10)
if response.status_code == 200:
data = response.json()
processed_count = data['count']
print(f" ✓ 处理后数据: {processed_count}")
# 分析数据质量
valid_call_signs = 0
valid_behaviors = 0
has_flight_level = 0
for item in data['data']:
call_sign = item.get('call_sign', '').strip()
behavior = item.get('behavior', '').strip()
flight_level = item.get('flight_level', '').strip()
if call_sign and call_sign != 'N/A' and len(call_sign) > 2:
valid_call_signs += 1
if behavior and behavior != 'N/A' and behavior != 'unknown':
valid_behaviors += 1
if flight_level and flight_level != 'N/A':
has_flight_level += 1
print(f" 📊 质量分析:")
print(f" 有效呼号: {valid_call_signs}/{processed_count} ({valid_call_signs/processed_count*100:.1f}%)")
print(f" 有效行为: {valid_behaviors}/{processed_count} ({valid_behaviors/processed_count*100:.1f}%)")
print(f" 包含高度: {has_flight_level}/{processed_count} ({has_flight_level/processed_count*100:.1f}%)")
# 显示高质量数据样例
print(" 📋 高质量提取样例:")
count = 0
for item in data['data']:
call_sign = item.get('call_sign', '').strip()
behavior = item.get('behavior', '').strip()
if (call_sign and call_sign != 'N/A' and len(call_sign) > 2 and
behavior and behavior != 'N/A' and behavior != 'unknown'):
count += 1
print(f" {count}. 呼号: {call_sign} | 行为: {behavior}")
if count >= 3:
break
else:
print(f" ✗ 处理数据获取失败: {response.status_code}")
return False
except Exception as e:
print(f" ✗ 处理数据检查失败: {e}")
return False
# 4. 获取最终验证统计
print("\n✅ 最终验证统计...")
try:
response = requests.get(f"{API_BASE}/statistics/", timeout=10)
if response.status_code == 200:
data = response.json()
stats = data['statistics']
print(f" 📊 完整流程统计:")
print(f" 原始数据: {stats.get('original_count', 0)}")
print(f" 提取数据: {stats.get('extracted_count', 0)}")
print(f" 有效数据: {stats.get('valid_count', 0)}")
print(f" 无效数据: {stats.get('invalid_count', 0)}")
print(f" 提取效率: {stats.get('extraction_rate', 0)}%")
print(f" 验证通过率: {stats.get('validation_rate', 0)}%")
# 计算整体成功率
extraction_rate = stats.get('extraction_rate', 0)
validation_rate = stats.get('validation_rate', 0)
overall_success = (extraction_rate + validation_rate) / 2
print(f" 整体成功率: {overall_success:.1f}%")
else:
print(f" ✗ 统计信息获取失败: {response.status_code}")
return False
except Exception as e:
print(f" ✗ 统计信息检查失败: {e}")
return False
# 5. 生成最终评估
print("\n" + "="*60)
print("🏆 AI处理逻辑优化 - 最终评估")
print("="*60)
# 基于之前的数据计算评估分数
try:
if processed_count > 0:
extraction_success = min(processed_count / original_count * 100, 100)
quality_score = (valid_call_signs / processed_count * 100 +
valid_behaviors / processed_count * 100) / 2
print(f"📈 核心指标:")
print(f" 提取成功率: {extraction_success:.1f}%")
print(f" 数据质量评分: {quality_score:.1f}%")
print(f" 验证通过率: {stats.get('validation_rate', 0)}%")
# 综合评分
final_score = (extraction_success * 0.3 + quality_score * 0.4 +
stats.get('validation_rate', 0) * 0.3)
print(f"\n🎯 综合评分: {final_score:.1f}/100")
if final_score >= 85:
print("🏆 优秀AI处理逻辑已达到生产级别")
grade = "A+"
elif final_score >= 75:
print("✅ 良好AI处理逻辑显著改善")
grade = "A"
elif final_score >= 65:
print("📈 改善明显,还有优化空间")
grade = "B+"
elif final_score >= 50:
print("⚠️ 有改善,但需要进一步优化")
grade = "B"
else:
print("❌ 需要重新设计AI处理逻辑")
grade = "C"
print(f"🎖️ 综合等级: {grade}")
# 优化建议
print(f"\n💡 优化建议:")
if quality_score < 80:
print(" 1. 进一步优化AI提示词模板")
print(" 2. 增加更多航空领域的上下文信息")
if stats.get('validation_rate', 0) < 90:
print(" 3. 放宽呼号验证规则,适应实际数据")
print(" 4. 增加更多航空公司数据库条目")
if extraction_success < 120:
print(" 5. 优化正则表达式备用提取逻辑")
print(f"\n🚀 部署建议:")
if final_score >= 75:
print(" ✅ 可以部署到生产环境")
print(" ✅ 建议定期监控数据质量")
else:
print(" ⚠️ 建议在测试环境进一步优化")
print(" ⚠️ 收集更多真实数据进行训练")
return True
except Exception as e:
print(f"评估过程出错: {e}")
return False
if __name__ == "__main__":
success = run_final_verification()
print("\n" + "="*60)
if success:
print("✅ AI处理逻辑优化验证完成")
else:
print("❌ 验证过程遇到问题")
exit(0 if success else 1)