|
|
|
|
|
import pandas as pd
|
|
|
import networkx as nx
|
|
|
import numpy as np
|
|
|
import pickle
|
|
|
from tqdm import tqdm
|
|
|
import os
|
|
|
|
|
|
def load_network_data(csv_file):
|
|
|
"""
|
|
|
加载网络流数据
|
|
|
|
|
|
Args:
|
|
|
csv_file (str): CSV文件路径
|
|
|
|
|
|
Returns:
|
|
|
pd.DataFrame: 网络流数据
|
|
|
"""
|
|
|
print(f"正在加载数据文件: {csv_file}")
|
|
|
df = pd.read_csv(csv_file)
|
|
|
print(f"数据加载完成,共 {len(df)} 条记录")
|
|
|
print(f"数据字段: {list(df.columns)}")
|
|
|
return df
|
|
|
|
|
|
def build_network_graph(df):
|
|
|
"""
|
|
|
构建网络图结构
|
|
|
|
|
|
Args:
|
|
|
df (pd.DataFrame): 网络流数据
|
|
|
|
|
|
Returns:
|
|
|
nx.Graph: 网络图对象
|
|
|
"""
|
|
|
print("开始构建网络图结构...")
|
|
|
|
|
|
# 创建无向图
|
|
|
G = nx.Graph()
|
|
|
|
|
|
# 添加节点和边
|
|
|
for _, row in tqdm(df.iterrows(), total=len(df), desc="构建图中"):
|
|
|
source_ip = str(row['source_ip'])
|
|
|
target_ip = str(row['target_ip'])
|
|
|
weight = int(row['weight'])
|
|
|
|
|
|
# 添加节点
|
|
|
G.add_node(source_ip)
|
|
|
G.add_node(target_ip)
|
|
|
|
|
|
# 添加边(如果已存在则累加权重)
|
|
|
if G.has_edge(source_ip, target_ip):
|
|
|
G[source_ip][target_ip]['weight'] += weight
|
|
|
else:
|
|
|
G.add_edge(source_ip, target_ip, weight=weight)
|
|
|
|
|
|
print(f"图构建完成!")
|
|
|
print(f"节点数量: {G.number_of_nodes()}")
|
|
|
print(f"边数量: {G.number_of_edges()}")
|
|
|
|
|
|
return G
|
|
|
|
|
|
def analyze_graph_properties(G):
|
|
|
"""
|
|
|
分析图的基本属性
|
|
|
|
|
|
Args:
|
|
|
G (nx.Graph): 网络图对象
|
|
|
"""
|
|
|
print("\n=== 图结构分析 ===")
|
|
|
|
|
|
# 基本统计
|
|
|
print(f"节点数: {G.number_of_nodes()}")
|
|
|
print(f"边数: {G.number_of_edges()}")
|
|
|
print(f"图密度: {nx.density(G):.4f}")
|
|
|
|
|
|
# 连通性
|
|
|
if nx.is_connected(G):
|
|
|
print("图是连通的")
|
|
|
print(f"平均最短路径长度: {nx.average_shortest_path_length(G):.4f}")
|
|
|
print(f"图的直径: {nx.diameter(G)}")
|
|
|
else:
|
|
|
print("图不连通")
|
|
|
components = list(nx.connected_components(G))
|
|
|
print(f"连通分量数量: {len(components)}")
|
|
|
largest_component = max(components, key=len)
|
|
|
print(f"最大连通分量大小: {len(largest_component)}")
|
|
|
|
|
|
# 度分布
|
|
|
degrees = [d for n, d in G.degree()]
|
|
|
print(f"平均度: {np.mean(degrees):.2f}")
|
|
|
print(f"最大度: {max(degrees)}")
|
|
|
print(f"最小度: {min(degrees)}")
|
|
|
|
|
|
# 权重统计
|
|
|
weights = [data['weight'] for u, v, data in G.edges(data=True)]
|
|
|
print(f"边权重统计:")
|
|
|
print(f" 平均权重: {np.mean(weights):.2f}")
|
|
|
print(f" 最大权重: {max(weights)}")
|
|
|
print(f" 最小权重: {min(weights)}")
|
|
|
|
|
|
def save_graph(G, output_dir="output"):
|
|
|
"""
|
|
|
保存图结构
|
|
|
|
|
|
Args:
|
|
|
G (nx.Graph): 网络图对象
|
|
|
output_dir (str): 输出目录
|
|
|
"""
|
|
|
if not os.path.exists(output_dir):
|
|
|
os.makedirs(output_dir)
|
|
|
|
|
|
# 保存为pickle格式(推荐,保持所有属性)
|
|
|
graph_file = os.path.join(output_dir, "network_graph.pkl")
|
|
|
with open(graph_file, 'wb') as f:
|
|
|
pickle.dump(G, f)
|
|
|
print(f"图结构已保存到: {graph_file}")
|
|
|
|
|
|
# 保存为GEXF格式(用于Gephi可视化)
|
|
|
gexf_file = os.path.join(output_dir, "network_graph.gexf")
|
|
|
nx.write_gexf(G, gexf_file)
|
|
|
print(f"GEXF格式已保存到: {gexf_file}")
|
|
|
|
|
|
# 保存图统计信息
|
|
|
stats_file = os.path.join(output_dir, "graph_statistics.txt")
|
|
|
with open(stats_file, 'w', encoding='utf-8') as f:
|
|
|
f.write("网络图统计信息\n")
|
|
|
f.write("="*50 + "\n")
|
|
|
f.write(f"节点数量: {G.number_of_nodes()}\n")
|
|
|
f.write(f"边数量: {G.number_of_edges()}\n")
|
|
|
f.write(f"图密度: {nx.density(G):.4f}\n")
|
|
|
|
|
|
if nx.is_connected(G):
|
|
|
f.write(f"平均最短路径长度: {nx.average_shortest_path_length(G):.4f}\n")
|
|
|
f.write(f"图的直径: {nx.diameter(G)}\n")
|
|
|
else:
|
|
|
components = list(nx.connected_components(G))
|
|
|
f.write(f"连通分量数量: {len(components)}\n")
|
|
|
largest_component = max(components, key=len)
|
|
|
f.write(f"最大连通分量大小: {len(largest_component)}\n")
|
|
|
|
|
|
degrees = [d for n, d in G.degree()]
|
|
|
weights = [data['weight'] for u, v, data in G.edges(data=True)]
|
|
|
f.write(f"平均度: {np.mean(degrees):.2f}\n")
|
|
|
f.write(f"最大度: {max(degrees)}\n")
|
|
|
f.write(f"最小度: {min(degrees)}\n")
|
|
|
f.write(f"平均边权重: {np.mean(weights):.2f}\n")
|
|
|
f.write(f"最大边权重: {max(weights)}\n")
|
|
|
f.write(f"最小边权重: {min(weights)}\n")
|
|
|
|
|
|
print(f"统计信息已保存到: {stats_file}")
|
|
|
|
|
|
def main():
|
|
|
"""主函数"""
|
|
|
# 配置文件路径
|
|
|
csv_file = "源ip 目的ip weight ip_pair.csv"
|
|
|
output_dir = "output"
|
|
|
|
|
|
try:
|
|
|
# 1. 加载数据
|
|
|
df = load_network_data(csv_file)
|
|
|
|
|
|
# 2. 构建图结构
|
|
|
G = build_network_graph(df)
|
|
|
|
|
|
# 3. 分析图属性
|
|
|
analyze_graph_properties(G)
|
|
|
|
|
|
# 4. 保存图结构
|
|
|
save_graph(G, output_dir)
|
|
|
|
|
|
print("\n✅ 图构建完成!")
|
|
|
|
|
|
except FileNotFoundError:
|
|
|
print(f"❌ 错误: 找不到文件 {csv_file}")
|
|
|
except Exception as e:
|
|
|
print(f"❌ 发生错误: {str(e)}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
main()
|