From 1722592fe4c91db414f2811cd2f79a07f9c5c9f3 Mon Sep 17 00:00:00 2001
From: ay0w55p <1036713672@qq.com>
Date: Tue, 15 Apr 2025 19:04:43 +0800
Subject: [PATCH] =?UTF-8?q?=E6=B7=BB=E5=8A=A0=E4=BA=86=E7=8F=AD=E9=95=BF?=
 =?UTF-8?q?=E5=86=99=E7=9A=84=E5=90=8E=E7=AB=AF=E6=96=87=E4=BB=B6=EF=BC=8C?=
 =?UTF-8?q?=E5=90=8E=E7=AB=AF=E5=85=84=E5=BC=9F=E4=BB=AC=E5=8F=AF=E4=BB=A5?=
 =?UTF-8?q?=E5=9F=BA=E4=BA=8E=E8=BF=99=E4=B8=AA=E4=BF=AE=E6=94=B9?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 班长后端/data_prewashing.py | 58 +++++++++++++++++++++++++++++++++
 班长后端/data_processing.py |  0
 班长后端/data_request.py    |  9 +++++
 班长后端/data_tosql.py      | 14 ++++++++
 班长后端/main.py            | 10 ++++++
 5 files changed, 91 insertions(+)
 create mode 100644 班长后端/data_prewashing.py
 create mode 100644 班长后端/data_processing.py
 create mode 100644 班长后端/data_request.py
 create mode 100644 班长后端/data_tosql.py
 create mode 100644 班长后端/main.py

diff --git a/班长后端/data_prewashing.py b/班长后端/data_prewashing.py
new file mode 100644
index 0000000..76ca936
--- /dev/null
+++ b/班长后端/data_prewashing.py
@@ -0,0 +1,58 @@
+import pandas as pd
+from data_request import get_data_from_mysql
+
+# 将每一类的语音数据拼成一整段
+def aggregate_texts(group):
+    text = ''
+    for _, row in group.iterrows():
+        key = row['type'].split('_')[-1]  # 提取 AT/PI/PIAT
+        words = row['text']
+        text += f"{key}: {words}\n"
+    return text
+
+# 构建新的type列
+def construct_new_type(group):
+    start_time = group['type'].apply(lambda x: x.split('_')[2]).min()
+    end_time = group['type'].apply(lambda x: x.split('_')[3]).max()
+    return f"{group['category'].iloc[0]}_{start_time}_{end_time}"
+
+# 整合表格中一段语音数据
+def reshape_data(df):
+    rows = []
+    # 提取类别信息并聚合
+    df['category'] = df['type'].apply(lambda x: '_'.join(x.split('_')[:2]))  # 提取 uwb-atcc_ACCU-0agmXf
+    grouped = df.groupby('category')
+    for _, group in grouped:
+        row = {}
+        row['type'] = construct_new_type(group)
+        row['text'] = aggregate_texts(group)
+        row['segment_start_time'] = group['segment_start_time'].min()
+        row['segment_end_time'] = group['segment_end_time'].max()
+        row['duration'] = row['segment_end_time'] - row['segment_start_time']
+        rows.append(row)
+    new_df = pd.DataFrame(rows)
+    return new_df
+
+# 识别飞机呼号
+def recognize_callsign(df):
+    return df
+
+# 总结该呼号的相关信息
+def summarize_callsign(df):
+    return df
+
+def wash_pipeline(df):
+    # 文本数据整合
+    df = reshape_data(df)
+    # 识别飞机呼号
+    df = recognize_callsign(df)
+    # 总结该呼号的相关信息
+    df = summarize_callsign(df)
+    return df
+
+if __name__ == "__main__":
+    df = get_data_from_mysql('root', '1234', 'atc', 'origin_table')
+    # 只取前1000条数据
+    df = df.head(1000)
+    df = wash_pipeline(df)
+    df.to_csv('..\\data\\wash_result.csv', index=False)
\ No newline at end of file
diff --git a/班长后端/data_processing.py b/班长后端/data_processing.py
new file mode 100644
index 0000000..e69de29
diff --git a/班长后端/data_request.py b/班长后端/data_request.py
new file mode 100644
index 0000000..1963709
--- /dev/null
+++ b/班长后端/data_request.py
@@ -0,0 +1,9 @@
+#从mysql数据库中获取数据
+import pandas as pd
+from sqlalchemy import create_engine
+
+def get_data_from_mysql(user, password, database, table):
+    engine = create_engine(f'mysql+pymysql://{user}:{password}@localhost:3306/{database}')
+
+    df = pd.read_sql(f"SELECT * FROM {table}", con=engine)
+    return df
\ No newline at end of file
diff --git a/班长后端/data_tosql.py b/班长后端/data_tosql.py
new file mode 100644
index 0000000..4172536
--- /dev/null
+++ b/班长后端/data_tosql.py
@@ -0,0 +1,14 @@
+import pandas as pd
+from sqlalchemy import create_engine, text
+
+def save_data_to_mysql(df, user, password, database, table):
+    engine = create_engine(f'mysql+pymysql://{user}:{password}@localhost:3306/{database}')
+
+    df.to_sql(f'{table}', con=engine, if_exists='append', index=False)
+
+
+if __name__ == '__main__':
+    # 读取并保存数据
+    df = pd.read_csv('..\\data\\train2.csv').rename(columns={'id': 'type'})
+    save_data_to_mysql(df, 'root', '1234', 'atc', 'origin_table')
+    print("数据导入完成，共插入{}条记录".format(len(df)))
\ No newline at end of file
diff --git a/班长后端/main.py b/班长后端/main.py
new file mode 100644
index 0000000..18fe81d
--- /dev/null
+++ b/班长后端/main.py
@@ -0,0 +1,10 @@
+import pandas as pd
+from data_request import get_data_from_mysql
+from data_prewashing import wash_pipeline
+if __name__ == "__main__":
+    # 从mysql数据库中获取数据
+    df = get_data_from_mysql('root', '1234', 'atc', 'origin_table')
+    df.drop(columns=['id'], inplace=True)
+    # 对数据进行预处理
+    df = wash_pipeline(df)
+    
\ No newline at end of file