From 1722592fe4c91db414f2811cd2f79a07f9c5c9f3 Mon Sep 17 00:00:00 2001 From: ay0w55p <1036713672@qq.com> Date: Tue, 15 Apr 2025 19:04:43 +0800 Subject: [PATCH] =?UTF-8?q?=E6=B7=BB=E5=8A=A0=E4=BA=86=E7=8F=AD=E9=95=BF?= =?UTF-8?q?=E5=86=99=E7=9A=84=E5=90=8E=E7=AB=AF=E6=96=87=E4=BB=B6=EF=BC=8C?= =?UTF-8?q?=E5=90=8E=E7=AB=AF=E5=85=84=E5=BC=9F=E4=BB=AC=E5=8F=AF=E4=BB=A5?= =?UTF-8?q?=E5=9F=BA=E4=BA=8E=E8=BF=99=E4=B8=AA=E4=BF=AE=E6=94=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- 班长后端/data_prewashing.py | 58 +++++++++++++++++++++++++++++++++ 班长后端/data_processing.py | 0 班长后端/data_request.py | 9 +++++ 班长后端/data_tosql.py | 14 ++++++++ 班长后端/main.py | 10 ++++++ 5 files changed, 91 insertions(+) create mode 100644 班长后端/data_prewashing.py create mode 100644 班长后端/data_processing.py create mode 100644 班长后端/data_request.py create mode 100644 班长后端/data_tosql.py create mode 100644 班长后端/main.py diff --git a/班长后端/data_prewashing.py b/班长后端/data_prewashing.py new file mode 100644 index 0000000..76ca936 --- /dev/null +++ b/班长后端/data_prewashing.py @@ -0,0 +1,58 @@ +import pandas as pd +from data_request import get_data_from_mysql + +# 将每一类的语音数据拼成一整段 +def aggregate_texts(group): + text = '' + for _, row in group.iterrows(): + key = row['type'].split('_')[-1] # 提取 AT/PI/PIAT + words = row['text'] + text += f"{key}: {words}\n" + return text + +# 构建新的type列 +def construct_new_type(group): + start_time = group['type'].apply(lambda x: x.split('_')[2]).min() + end_time = group['type'].apply(lambda x: x.split('_')[3]).max() + return f"{group['category'].iloc[0]}_{start_time}_{end_time}" + +# 整合表格中一段语音数据 +def reshape_data(df): + rows = [] + # 提取类别信息并聚合 + df['category'] = df['type'].apply(lambda x: '_'.join(x.split('_')[:2])) # 提取 uwb-atcc_ACCU-0agmXf + grouped = df.groupby('category') + for _, group in grouped: + row = {} + row['type'] = construct_new_type(group) + row['text'] = aggregate_texts(group) + row['segment_start_time'] = group['segment_start_time'].min() + row['segment_end_time'] = group['segment_end_time'].max() + row['duration'] = row['segment_end_time'] - row['segment_start_time'] + rows.append(row) + new_df = pd.DataFrame(rows) + return new_df + +# 识别飞机呼号 +def recognize_callsign(df): + return df + +# 总结该呼号的相关信息 +def summarize_callsign(df): + return df + +def wash_pipeline(df): + # 文本数据整合 + df = reshape_data(df) + # 识别飞机呼号 + df = recognize_callsign(df) + # 总结该呼号的相关信息 + df = summarize_callsign(df) + return df + +if __name__ == "__main__": + df = get_data_from_mysql('root', '1234', 'atc', 'origin_table') + # 只取前1000条数据 + df = df.head(1000) + df = wash_pipeline(df) + df.to_csv('..\\data\\wash_result.csv', index=False) \ No newline at end of file diff --git a/班长后端/data_processing.py b/班长后端/data_processing.py new file mode 100644 index 0000000..e69de29 diff --git a/班长后端/data_request.py b/班长后端/data_request.py new file mode 100644 index 0000000..1963709 --- /dev/null +++ b/班长后端/data_request.py @@ -0,0 +1,9 @@ +#从mysql数据库中获取数据 +import pandas as pd +from sqlalchemy import create_engine + +def get_data_from_mysql(user, password, database, table): + engine = create_engine(f'mysql+pymysql://{user}:{password}@localhost:3306/{database}') + + df = pd.read_sql(f"SELECT * FROM {table}", con=engine) + return df \ No newline at end of file diff --git a/班长后端/data_tosql.py b/班长后端/data_tosql.py new file mode 100644 index 0000000..4172536 --- /dev/null +++ b/班长后端/data_tosql.py @@ -0,0 +1,14 @@ +import pandas as pd +from sqlalchemy import create_engine, text + +def save_data_to_mysql(df, user, password, database, table): + engine = create_engine(f'mysql+pymysql://{user}:{password}@localhost:3306/{database}') + + df.to_sql(f'{table}', con=engine, if_exists='append', index=False) + + +if __name__ == '__main__': + # 读取并保存数据 + df = pd.read_csv('..\\data\\train2.csv').rename(columns={'id': 'type'}) + save_data_to_mysql(df, 'root', '1234', 'atc', 'origin_table') + print("数据导入完成,共插入{}条记录".format(len(df))) \ No newline at end of file diff --git a/班长后端/main.py b/班长后端/main.py new file mode 100644 index 0000000..18fe81d --- /dev/null +++ b/班长后端/main.py @@ -0,0 +1,10 @@ +import pandas as pd +from data_request import get_data_from_mysql +from data_prewashing import wash_pipeline +if __name__ == "__main__": + # 从mysql数据库中获取数据 + df = get_data_from_mysql('root', '1234', 'atc', 'origin_table') + df.drop(columns=['id'], inplace=True) + # 对数据进行预处理 + df = wash_pipeline(df) + \ No newline at end of file