You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
87 lines
2.6 KiB
87 lines
2.6 KiB
"""
|
|
数据处理工具示例
|
|
符合ToolInterface接口规范
|
|
"""
|
|
from typing import Dict, Any
|
|
import pandas as pd
|
|
import numpy as np
|
|
from ..core.orchestrator import ToolInterface, LogMixin
|
|
|
|
class DataProcessorTool(ToolInterface, LogMixin):
|
|
"""数据处理工具"""
|
|
|
|
def __init__(self):
|
|
self._name = "data_processor"
|
|
self._version = "1.0.0"
|
|
|
|
@property
|
|
def name(self) -> str:
|
|
return self._name
|
|
|
|
@property
|
|
def version(self) -> str:
|
|
return self._version
|
|
|
|
def execute(self, params: Dict[str, Any]) -> Dict[str, Any]:
|
|
"""执行数据处理"""
|
|
try:
|
|
self.logger.info(f"开始数据处理,参数: {params}")
|
|
|
|
# 输入验证
|
|
data = params.get("data", [])
|
|
operation = params.get("operation", "clean")
|
|
|
|
if not data:
|
|
raise ValueError("输入数据为空")
|
|
|
|
# 转换为DataFrame
|
|
df = pd.DataFrame(data)
|
|
|
|
# 执行不同操作
|
|
if operation == "clean":
|
|
result = self._clean_data(df)
|
|
elif operation == "analyze":
|
|
result = self._analyze_data(df)
|
|
elif operation == "transform":
|
|
result = self._transform_data(df)
|
|
else:
|
|
raise ValueError(f"不支持的操作: {operation}")
|
|
|
|
self.logger.info("数据处理完成")
|
|
return {
|
|
"status": "success",
|
|
"operation": operation,
|
|
"result": result,
|
|
"rows_processed": len(df)
|
|
}
|
|
|
|
except Exception as e:
|
|
self.logger.error(f"数据处理失败: {str(e)}")
|
|
raise
|
|
|
|
def _clean_data(self, df: pd.DataFrame) -> Dict:
|
|
"""数据清洗"""
|
|
# 移除空值
|
|
df_clean = df.dropna()
|
|
# 重置索引
|
|
df_clean = df_clean.reset_index(drop=True)
|
|
|
|
return {
|
|
"cleaned_data": df_clean.to_dict(orient="records"),
|
|
"removed_rows": len(df) - len(df_clean)
|
|
}
|
|
|
|
def _analyze_data(self, df: pd.DataFrame) -> Dict:
|
|
"""数据分析"""
|
|
numeric_cols = df.select_dtypes(include=[np.number]).columns
|
|
|
|
analysis = {}
|
|
for col in numeric_cols:
|
|
analysis[col] = {
|
|
"mean": float(df[col].mean()),
|
|
"std": float(df[col].std()),
|
|
"min": float(df[col].min()),
|
|
"max": float(df[col].max())
|
|
}
|
|
|
|
return {"analysis": analysis} |