ADSB/graph_builder.py

import torch
import pandas as pd
import numpy as np
from torch_geometric.data import Data
import config
import random

def build_graphs_from_dataframe(df, feature_cols, label_col):
    print("Building graphs... FINAL PAPER-LEVEL VERSION")

    # 1. 时间窗口改小一点，保证图的数量
    df["time_window"] = (df["time"] // config.TIME_WINDOW).astype(int)
    window_groups = df.groupby("time_window")
    pyg_graph_list = []

    for window_id, window_df in window_groups:
        n = len(window_df)
        # 【修复1】不限制上限，只限制下限
        if n < 3:
            continue

        x = torch.tensor(window_df[feature_cols].values, dtype=torch.float)
        y = torch.tensor(window_df[label_col].values, dtype=torch.long)

        # 【修复2】快速构图：每个节点连前后5个
        edge_index = []
        for i in range(n):
            for j in range(i+1, min(i+6, n)):
                edge_index.append([i, j])
                edge_index.append([j, i])
        edge_index = torch.tensor(edge_index, dtype=torch.long).t().contiguous() if edge_index else torch.empty((2, 0), dtype=torch.long)

        # 【修复3】掩码先全设为True，后面全局划分
        train_mask = torch.ones(n, dtype=torch.bool)
        val_mask = torch.zeros(n, dtype=torch.bool)
        test_mask = torch.zeros(n, dtype=torch.bool)

        graph = Data(x=x, edge_index=edge_index, y=y,
                     train_mask=train_mask, val_mask=val_mask, test_mask=test_mask)
        pyg_graph_list.append(graph)

        # 最多15000张图，足够了
        if len(pyg_graph_list) >= 15000:
            break

    # 【修复4】全局彻底打乱！保证训练集和测试集都有正常和攻击
    print(f"Shuffling {len(pyg_graph_list)} graphs...")
    random.shuffle(pyg_graph_list)
    random.shuffle(pyg_graph_list)

    split = int(len(pyg_graph_list) * 0.7)
    train_graphs = pyg_graph_list[:split]
    test_graphs = pyg_graph_list[split:]

    # 给测试集打开 test_mask
    for g in test_graphs:
        g.train_mask[:] = False
        g.test_mask[:] = True

    print(f"Done! Train: {len(train_graphs)}, Test: {len(test_graphs)}")
    return train_graphs, test_graphs