添加了班长写的后端文件,后端兄弟们可以基于这个修改

main
ay0w55p 5 months ago
parent f62eb3b8da
commit 1722592fe4

@ -0,0 +1,58 @@
import pandas as pd
from data_request import get_data_from_mysql
# 将每一类的语音数据拼成一整段
def aggregate_texts(group):
text = ''
for _, row in group.iterrows():
key = row['type'].split('_')[-1] # 提取 AT/PI/PIAT
words = row['text']
text += f"{key}: {words}\n"
return text
# 构建新的type列
def construct_new_type(group):
start_time = group['type'].apply(lambda x: x.split('_')[2]).min()
end_time = group['type'].apply(lambda x: x.split('_')[3]).max()
return f"{group['category'].iloc[0]}_{start_time}_{end_time}"
# 整合表格中一段语音数据
def reshape_data(df):
rows = []
# 提取类别信息并聚合
df['category'] = df['type'].apply(lambda x: '_'.join(x.split('_')[:2])) # 提取 uwb-atcc_ACCU-0agmXf
grouped = df.groupby('category')
for _, group in grouped:
row = {}
row['type'] = construct_new_type(group)
row['text'] = aggregate_texts(group)
row['segment_start_time'] = group['segment_start_time'].min()
row['segment_end_time'] = group['segment_end_time'].max()
row['duration'] = row['segment_end_time'] - row['segment_start_time']
rows.append(row)
new_df = pd.DataFrame(rows)
return new_df
# 识别飞机呼号
def recognize_callsign(df):
return df
# 总结该呼号的相关信息
def summarize_callsign(df):
return df
def wash_pipeline(df):
# 文本数据整合
df = reshape_data(df)
# 识别飞机呼号
df = recognize_callsign(df)
# 总结该呼号的相关信息
df = summarize_callsign(df)
return df
if __name__ == "__main__":
df = get_data_from_mysql('root', '1234', 'atc', 'origin_table')
# 只取前1000条数据
df = df.head(1000)
df = wash_pipeline(df)
df.to_csv('..\\data\\wash_result.csv', index=False)

@ -0,0 +1,9 @@
#从mysql数据库中获取数据
import pandas as pd
from sqlalchemy import create_engine
def get_data_from_mysql(user, password, database, table):
engine = create_engine(f'mysql+pymysql://{user}:{password}@localhost:3306/{database}')
df = pd.read_sql(f"SELECT * FROM {table}", con=engine)
return df

@ -0,0 +1,14 @@
import pandas as pd
from sqlalchemy import create_engine, text
def save_data_to_mysql(df, user, password, database, table):
engine = create_engine(f'mysql+pymysql://{user}:{password}@localhost:3306/{database}')
df.to_sql(f'{table}', con=engine, if_exists='append', index=False)
if __name__ == '__main__':
# 读取并保存数据
df = pd.read_csv('..\\data\\train2.csv').rename(columns={'id': 'type'})
save_data_to_mysql(df, 'root', '1234', 'atc', 'origin_table')
print("数据导入完成,共插入{}条记录".format(len(df)))

@ -0,0 +1,10 @@
import pandas as pd
from data_request import get_data_from_mysql
from data_prewashing import wash_pipeline
if __name__ == "__main__":
# 从mysql数据库中获取数据
df = get_data_from_mysql('root', '1234', 'atc', 'origin_table')
df.drop(columns=['id'], inplace=True)
# 对数据进行预处理
df = wash_pipeline(df)
Loading…
Cancel
Save