You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

180 lines
5.5 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

import pandas as pd
import networkx as nx
import numpy as np
import pickle
from tqdm import tqdm
import os
def load_network_data(csv_file):
"""
加载网络流数据
Args:
csv_file (str): CSV文件路径
Returns:
pd.DataFrame: 网络流数据
"""
print(f"正在加载数据文件: {csv_file}")
df = pd.read_csv(csv_file)
print(f"数据加载完成,共 {len(df)} 条记录")
print(f"数据字段: {list(df.columns)}")
return df
def build_network_graph(df):
"""
构建网络图结构
Args:
df (pd.DataFrame): 网络流数据
Returns:
nx.Graph: 网络图对象
"""
print("开始构建网络图结构...")
# 创建无向图
G = nx.Graph()
# 添加节点和边
for _, row in tqdm(df.iterrows(), total=len(df), desc="构建图中"):
source_ip = str(row['source_ip'])
target_ip = str(row['target_ip'])
weight = int(row['weight'])
# 添加节点
G.add_node(source_ip)
G.add_node(target_ip)
# 添加边(如果已存在则累加权重)
if G.has_edge(source_ip, target_ip):
G[source_ip][target_ip]['weight'] += weight
else:
G.add_edge(source_ip, target_ip, weight=weight)
print(f"图构建完成!")
print(f"节点数量: {G.number_of_nodes()}")
print(f"边数量: {G.number_of_edges()}")
return G
def analyze_graph_properties(G):
"""
分析图的基本属性
Args:
G (nx.Graph): 网络图对象
"""
print("\n=== 图结构分析 ===")
# 基本统计
print(f"节点数: {G.number_of_nodes()}")
print(f"边数: {G.number_of_edges()}")
print(f"图密度: {nx.density(G):.4f}")
# 连通性
if nx.is_connected(G):
print("图是连通的")
print(f"平均最短路径长度: {nx.average_shortest_path_length(G):.4f}")
print(f"图的直径: {nx.diameter(G)}")
else:
print("图不连通")
components = list(nx.connected_components(G))
print(f"连通分量数量: {len(components)}")
largest_component = max(components, key=len)
print(f"最大连通分量大小: {len(largest_component)}")
# 度分布
degrees = [d for n, d in G.degree()]
print(f"平均度: {np.mean(degrees):.2f}")
print(f"最大度: {max(degrees)}")
print(f"最小度: {min(degrees)}")
# 权重统计
weights = [data['weight'] for u, v, data in G.edges(data=True)]
print(f"边权重统计:")
print(f" 平均权重: {np.mean(weights):.2f}")
print(f" 最大权重: {max(weights)}")
print(f" 最小权重: {min(weights)}")
def save_graph(G, output_dir="output"):
"""
保存图结构
Args:
G (nx.Graph): 网络图对象
output_dir (str): 输出目录
"""
if not os.path.exists(output_dir):
os.makedirs(output_dir)
# 保存为pickle格式推荐保持所有属性
graph_file = os.path.join(output_dir, "network_graph.pkl")
with open(graph_file, 'wb') as f:
pickle.dump(G, f)
print(f"图结构已保存到: {graph_file}")
# 保存为GEXF格式用于Gephi可视化
gexf_file = os.path.join(output_dir, "network_graph.gexf")
nx.write_gexf(G, gexf_file)
print(f"GEXF格式已保存到: {gexf_file}")
# 保存图统计信息
stats_file = os.path.join(output_dir, "graph_statistics.txt")
with open(stats_file, 'w', encoding='utf-8') as f:
f.write("网络图统计信息\n")
f.write("="*50 + "\n")
f.write(f"节点数量: {G.number_of_nodes()}\n")
f.write(f"边数量: {G.number_of_edges()}\n")
f.write(f"图密度: {nx.density(G):.4f}\n")
if nx.is_connected(G):
f.write(f"平均最短路径长度: {nx.average_shortest_path_length(G):.4f}\n")
f.write(f"图的直径: {nx.diameter(G)}\n")
else:
components = list(nx.connected_components(G))
f.write(f"连通分量数量: {len(components)}\n")
largest_component = max(components, key=len)
f.write(f"最大连通分量大小: {len(largest_component)}\n")
degrees = [d for n, d in G.degree()]
weights = [data['weight'] for u, v, data in G.edges(data=True)]
f.write(f"平均度: {np.mean(degrees):.2f}\n")
f.write(f"最大度: {max(degrees)}\n")
f.write(f"最小度: {min(degrees)}\n")
f.write(f"平均边权重: {np.mean(weights):.2f}\n")
f.write(f"最大边权重: {max(weights)}\n")
f.write(f"最小边权重: {min(weights)}\n")
print(f"统计信息已保存到: {stats_file}")
def main():
"""主函数"""
# 配置文件路径
csv_file = "源ip 目的ip weight ip_pair.csv"
output_dir = "output"
try:
# 1. 加载数据
df = load_network_data(csv_file)
# 2. 构建图结构
G = build_network_graph(df)
# 3. 分析图属性
analyze_graph_properties(G)
# 4. 保存图结构
save_graph(G, output_dir)
print("\n✅ 图构建完成!")
except FileNotFoundError:
print(f"❌ 错误: 找不到文件 {csv_file}")
except Exception as e:
print(f"❌ 发生错误: {str(e)}")
if __name__ == "__main__":
main()