import torch import pandas as pd import numpy as np from torch_geometric.data import Data import config import random def build_graphs_from_dataframe(df, feature_cols, label_col): print("Building graphs... FINAL PAPER-LEVEL VERSION") # 1. 时间窗口改小一点,保证图的数量 df["time_window"] = (df["time"] // config.TIME_WINDOW).astype(int) window_groups = df.groupby("time_window") pyg_graph_list = [] for window_id, window_df in window_groups: n = len(window_df) # 【修复1】不限制上限,只限制下限 if n < 3: continue x = torch.tensor(window_df[feature_cols].values, dtype=torch.float) y = torch.tensor(window_df[label_col].values, dtype=torch.long) # 【修复2】快速构图:每个节点连前后5个 edge_index = [] for i in range(n): for j in range(i+1, min(i+6, n)): edge_index.append([i, j]) edge_index.append([j, i]) edge_index = torch.tensor(edge_index, dtype=torch.long).t().contiguous() if edge_index else torch.empty((2, 0), dtype=torch.long) # 【修复3】掩码先全设为True,后面全局划分 train_mask = torch.ones(n, dtype=torch.bool) val_mask = torch.zeros(n, dtype=torch.bool) test_mask = torch.zeros(n, dtype=torch.bool) graph = Data(x=x, edge_index=edge_index, y=y, train_mask=train_mask, val_mask=val_mask, test_mask=test_mask) pyg_graph_list.append(graph) # 最多15000张图,足够了 if len(pyg_graph_list) >= 15000: break # 【修复4】全局彻底打乱!保证训练集和测试集都有正常和攻击 print(f"Shuffling {len(pyg_graph_list)} graphs...") random.shuffle(pyg_graph_list) random.shuffle(pyg_graph_list) split = int(len(pyg_graph_list) * 0.7) train_graphs = pyg_graph_list[:split] test_graphs = pyg_graph_list[split:] # 给测试集打开 test_mask for g in test_graphs: g.train_mask[:] = False g.test_mask[:] = True print(f"Done! Train: {len(train_graphs)}, Test: {len(test_graphs)}") return train_graphs, test_graphs