You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
434 lines
16 KiB
434 lines
16 KiB
|
|
import pickle
|
|
import pandas as pd
|
|
import networkx as nx
|
|
import numpy as np
|
|
from tqdm import tqdm
|
|
import os
|
|
import matplotlib.pyplot as plt
|
|
import seaborn as sns
|
|
from collections import defaultdict
|
|
|
|
def load_graph(graph_file):
|
|
"""
|
|
加载保存的图结构
|
|
|
|
Args:
|
|
graph_file (str): 图文件路径
|
|
|
|
Returns:
|
|
nx.Graph: 网络图对象
|
|
"""
|
|
print(f"正在加载图结构: {graph_file}")
|
|
with open(graph_file, 'rb') as f:
|
|
G = pickle.load(f)
|
|
print(f"图加载完成 - 节点数: {G.number_of_nodes()}, 边数: {G.number_of_edges()}")
|
|
return G
|
|
|
|
def calculate_degree_centrality(G):
|
|
"""
|
|
计算度中心性
|
|
|
|
Args:
|
|
G (nx.Graph): 网络图对象
|
|
|
|
Returns:
|
|
dict: 节点度中心性字典
|
|
"""
|
|
print("计算度中心性...")
|
|
|
|
# 计算度中心性
|
|
degree_centrality = nx.degree_centrality(G)
|
|
|
|
# 计算加权度中心性(考虑边权重)
|
|
weighted_degree_centrality = {}
|
|
for node in G.nodes():
|
|
total_weight = sum(G[node][neighbor]['weight'] for neighbor in G.neighbors(node))
|
|
max_possible_weight = sum(data['weight'] for _, _, data in G.edges(data=True))
|
|
weighted_degree_centrality[node] = total_weight / max_possible_weight if max_possible_weight > 0 else 0
|
|
|
|
print(f"度中心性计算完成")
|
|
return degree_centrality, weighted_degree_centrality
|
|
|
|
def calculate_betweenness_centrality(G):
|
|
"""
|
|
计算介数中心性
|
|
|
|
Args:
|
|
G (nx.Graph): 网络图对象
|
|
|
|
Returns:
|
|
dict: 节点介数中心性字典
|
|
"""
|
|
print("计算介数中心性...")
|
|
|
|
# 检查图是否连通
|
|
if not nx.is_connected(G):
|
|
print("警告: 图不连通,将使用最大连通分量计算介数中心性")
|
|
# 找到最大连通分量
|
|
largest_cc = max(nx.connected_components(G), key=len)
|
|
G_largest = G.subgraph(largest_cc)
|
|
print(f"最大连通分量包含 {len(largest_cc)} 个节点")
|
|
else:
|
|
G_largest = G
|
|
|
|
# 计算介数中心性
|
|
betweenness_centrality = nx.betweenness_centrality(G_largest, weight='weight')
|
|
|
|
# 为不在最大连通分量中的节点设置介数中心性为0
|
|
full_betweenness_centrality = {}
|
|
for node in G.nodes():
|
|
if node in betweenness_centrality:
|
|
full_betweenness_centrality[node] = betweenness_centrality[node]
|
|
else:
|
|
full_betweenness_centrality[node] = 0.0
|
|
|
|
print(f"介数中心性计算完成")
|
|
return full_betweenness_centrality
|
|
|
|
def calculate_additional_centralities(G):
|
|
"""
|
|
计算其他重要的中心性指标
|
|
|
|
Args:
|
|
G (nx.Graph): 网络图对象
|
|
|
|
Returns:
|
|
dict: 包含各种中心性的字典
|
|
"""
|
|
print("计算其他中心性指标...")
|
|
|
|
centralities = {}
|
|
|
|
# 特征向量中心性
|
|
try:
|
|
eigenvector_centrality = nx.eigenvector_centrality(G, max_iter=1000, weight='weight')
|
|
centralities['eigenvector'] = eigenvector_centrality
|
|
print("特征向量中心性计算完成")
|
|
except Exception as e:
|
|
print(f"特征向量中心性计算失败: {e}")
|
|
centralities['eigenvector'] = {node: 0.0 for node in G.nodes()}
|
|
|
|
# 接近中心性(仅在连通分量内计算)
|
|
try:
|
|
if nx.is_connected(G):
|
|
closeness_centrality = nx.closeness_centrality(G, distance='weight')
|
|
centralities['closeness'] = closeness_centrality
|
|
print("接近中心性计算完成")
|
|
else:
|
|
# 对每个连通分量分别计算
|
|
closeness_centrality = {}
|
|
for component in nx.connected_components(G):
|
|
subgraph = G.subgraph(component)
|
|
if len(component) > 1: # 至少需要2个节点
|
|
sub_closeness = nx.closeness_centrality(subgraph, distance='weight')
|
|
closeness_centrality.update(sub_closeness)
|
|
else:
|
|
# 孤立节点的接近中心性为0
|
|
for node in component:
|
|
closeness_centrality[node] = 0.0
|
|
|
|
centralities['closeness'] = closeness_centrality
|
|
print("接近中心性计算完成(分连通分量)")
|
|
except Exception as e:
|
|
print(f"接近中心性计算失败: {e}")
|
|
centralities['closeness'] = {node: 0.0 for node in G.nodes()}
|
|
|
|
# PageRank中心性
|
|
try:
|
|
pagerank = nx.pagerank(G, weight='weight')
|
|
centralities['pagerank'] = pagerank
|
|
print("PageRank中心性计算完成")
|
|
except Exception as e:
|
|
print(f"PageRank中心性计算失败: {e}")
|
|
centralities['pagerank'] = {node: 0.0 for node in G.nodes()}
|
|
|
|
return centralities
|
|
|
|
def analyze_centrality_results(degree_cent, weighted_degree_cent, betweenness_cent, additional_cent):
|
|
"""
|
|
分析中心性结果
|
|
|
|
Args:
|
|
degree_cent (dict): 度中心性
|
|
weighted_degree_cent (dict): 加权度中心性
|
|
betweenness_cent (dict): 介数中心性
|
|
additional_cent (dict): 其他中心性
|
|
|
|
Returns:
|
|
dict: 分析结果
|
|
"""
|
|
print("\n=== 中心性分析结果 ===")
|
|
|
|
# 基本统计
|
|
centrality_stats = {}
|
|
|
|
for name, centrality in [
|
|
('degree', degree_cent),
|
|
('weighted_degree', weighted_degree_cent),
|
|
('betweenness', betweenness_cent),
|
|
('eigenvector', additional_cent.get('eigenvector', {})),
|
|
('closeness', additional_cent.get('closeness', {})),
|
|
('pagerank', additional_cent.get('pagerank', {}))
|
|
]:
|
|
values = list(centrality.values())
|
|
centrality_stats[name] = {
|
|
'mean': np.mean(values),
|
|
'std': np.std(values),
|
|
'min': np.min(values),
|
|
'max': np.max(values),
|
|
'median': np.median(values)
|
|
}
|
|
|
|
print(f"\n{name.upper()}中心性统计:")
|
|
print(f" 平均值: {centrality_stats[name]['mean']:.6f}")
|
|
print(f" 标准差: {centrality_stats[name]['std']:.6f}")
|
|
print(f" 最小值: {centrality_stats[name]['min']:.6f}")
|
|
print(f" 最大值: {centrality_stats[name]['max']:.6f}")
|
|
print(f" 中位数: {centrality_stats[name]['median']:.6f}")
|
|
|
|
# 找出关键节点
|
|
top_nodes = {}
|
|
for name, centrality in [
|
|
('degree', degree_cent),
|
|
('weighted_degree', weighted_degree_cent),
|
|
('betweenness', betweenness_cent),
|
|
('eigenvector', additional_cent.get('eigenvector', {})),
|
|
('closeness', additional_cent.get('closeness', {})),
|
|
('pagerank', additional_cent.get('pagerank', {}))
|
|
]:
|
|
top_nodes[name] = sorted(centrality.items(), key=lambda x: x[1], reverse=True)[:10]
|
|
print(f"\n{name.upper()}中心性前10节点:")
|
|
for i, (node, value) in enumerate(top_nodes[name], 1):
|
|
print(f" {i:2d}. {node}: {value:.6f}")
|
|
|
|
return {
|
|
'centrality_stats': centrality_stats,
|
|
'top_nodes': top_nodes
|
|
}
|
|
|
|
def visualize_centrality_results(degree_cent, weighted_degree_cent, betweenness_cent, additional_cent, analysis_results, output_dir="output"):
|
|
"""
|
|
可视化中心性结果
|
|
|
|
Args:
|
|
degree_cent (dict): 度中心性
|
|
weighted_degree_cent (dict): 加权度中心性
|
|
betweenness_cent (dict): 介数中心性
|
|
additional_cent (dict): 其他中心性
|
|
analysis_results (dict): 分析结果
|
|
output_dir (str): 输出目录
|
|
"""
|
|
print("生成中心性可视化图表...")
|
|
|
|
# 设置中文字体
|
|
plt.rcParams['font.sans-serif'] = ['SimHei', 'DejaVu Sans']
|
|
plt.rcParams['axes.unicode_minus'] = False
|
|
|
|
# 1. 中心性分布直方图
|
|
fig, axes = plt.subplots(2, 3, figsize=(18, 12))
|
|
fig.suptitle('各种中心性分布', fontsize=16)
|
|
|
|
centralities = [
|
|
('degree', degree_cent, '度中心性'),
|
|
('weighted_degree', weighted_degree_cent, '加权度中心性'),
|
|
('betweenness', betweenness_cent, '介数中心性'),
|
|
('eigenvector', additional_cent.get('eigenvector', {}), '特征向量中心性'),
|
|
('closeness', additional_cent.get('closeness', {}), '接近中心性'),
|
|
('pagerank', additional_cent.get('pagerank', {}), 'PageRank中心性')
|
|
]
|
|
|
|
for i, (name, centrality, title) in enumerate(centralities):
|
|
row, col = i // 3, i % 3
|
|
values = list(centrality.values())
|
|
|
|
axes[row, col].hist(values, bins=50, alpha=0.7, edgecolor='black')
|
|
axes[row, col].set_title(title)
|
|
axes[row, col].set_xlabel('中心性值')
|
|
axes[row, col].set_ylabel('频次')
|
|
axes[row, col].grid(True, alpha=0.3)
|
|
|
|
# 添加统计信息
|
|
mean_val = np.mean(values)
|
|
axes[row, col].axvline(mean_val, color='red', linestyle='--', alpha=0.7, label=f'平均值: {mean_val:.4f}')
|
|
axes[row, col].legend()
|
|
|
|
plt.tight_layout()
|
|
plt.savefig(os.path.join(output_dir, 'centrality_distributions.png'), dpi=300, bbox_inches='tight')
|
|
plt.close()
|
|
|
|
# 2. 前10节点对比图
|
|
top_nodes = analysis_results['top_nodes']
|
|
|
|
fig, axes = plt.subplots(2, 3, figsize=(18, 12))
|
|
fig.suptitle('各种中心性前10节点', fontsize=16)
|
|
|
|
for i, (name, title) in enumerate([
|
|
('degree', '度中心性'),
|
|
('weighted_degree', '加权度中心性'),
|
|
('betweenness', '介数中心性'),
|
|
('eigenvector', '特征向量中心性'),
|
|
('closeness', '接近中心性'),
|
|
('pagerank', 'PageRank中心性')
|
|
]):
|
|
row, col = i // 3, i % 3
|
|
|
|
if name in top_nodes and top_nodes[name]:
|
|
nodes = [item[0][:15] + '...' if len(item[0]) > 15 else item[0] for item in top_nodes[name]]
|
|
values = [item[1] for item in top_nodes[name]]
|
|
|
|
bars = axes[row, col].bar(range(len(nodes)), values, alpha=0.7, color='lightblue', edgecolor='black')
|
|
axes[row, col].set_title(title)
|
|
axes[row, col].set_xlabel('节点')
|
|
axes[row, col].set_ylabel('中心性值')
|
|
axes[row, col].set_xticks(range(len(nodes)))
|
|
axes[row, col].set_xticklabels(nodes, rotation=45, ha='right')
|
|
axes[row, col].grid(True, alpha=0.3)
|
|
|
|
# 在柱状图上添加数值标签
|
|
for bar, value in zip(bars, values):
|
|
axes[row, col].text(bar.get_x() + bar.get_width()/2, bar.get_height() + max(values)*0.01,
|
|
f'{value:.3f}', ha='center', va='bottom', fontsize=8)
|
|
|
|
plt.tight_layout()
|
|
plt.savefig(os.path.join(output_dir, 'top_nodes_comparison.png'), dpi=300, bbox_inches='tight')
|
|
plt.close()
|
|
|
|
# 3. 中心性相关性热力图
|
|
centrality_data = {
|
|
'Degree': degree_cent,
|
|
'Weighted_Degree': weighted_degree_cent,
|
|
'Betweenness': betweenness_cent,
|
|
'Eigenvector': additional_cent.get('eigenvector', {}),
|
|
'Closeness': additional_cent.get('closeness', {}),
|
|
'PageRank': additional_cent.get('pagerank', {})
|
|
}
|
|
|
|
# 创建DataFrame
|
|
df_centrality = pd.DataFrame(centrality_data)
|
|
correlation_matrix = df_centrality.corr()
|
|
|
|
plt.figure(figsize=(10, 8))
|
|
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', center=0,
|
|
square=True, fmt='.3f', cbar_kws={'label': '相关系数'})
|
|
plt.title('中心性指标相关性热力图')
|
|
plt.tight_layout()
|
|
plt.savefig(os.path.join(output_dir, 'centrality_correlation.png'), dpi=300, bbox_inches='tight')
|
|
plt.close()
|
|
|
|
print("中心性可视化图表已保存")
|
|
|
|
def save_centrality_results(degree_cent, weighted_degree_cent, betweenness_cent, additional_cent, analysis_results, output_dir="output"):
|
|
"""
|
|
保存中心性分析结果
|
|
|
|
Args:
|
|
degree_cent (dict): 度中心性
|
|
weighted_degree_cent (dict): 加权度中心性
|
|
betweenness_cent (dict): 介数中心性
|
|
additional_cent (dict): 其他中心性
|
|
analysis_results (dict): 分析结果
|
|
output_dir (str): 输出目录
|
|
"""
|
|
if not os.path.exists(output_dir):
|
|
os.makedirs(output_dir)
|
|
|
|
# 1. 保存所有中心性结果
|
|
centrality_file = os.path.join(output_dir, "centrality_analysis.csv")
|
|
|
|
# 创建DataFrame
|
|
all_nodes = set(degree_cent.keys())
|
|
centrality_data = []
|
|
|
|
for node in all_nodes:
|
|
row = {
|
|
'node': node,
|
|
'degree_centrality': degree_cent.get(node, 0),
|
|
'weighted_degree_centrality': weighted_degree_cent.get(node, 0),
|
|
'betweenness_centrality': betweenness_cent.get(node, 0),
|
|
'eigenvector_centrality': additional_cent.get('eigenvector', {}).get(node, 0),
|
|
'closeness_centrality': additional_cent.get('closeness', {}).get(node, 0),
|
|
'pagerank_centrality': additional_cent.get('pagerank', {}).get(node, 0)
|
|
}
|
|
centrality_data.append(row)
|
|
|
|
df_centrality = pd.DataFrame(centrality_data)
|
|
df_centrality.to_csv(centrality_file, index=False, encoding='utf-8')
|
|
print(f"中心性分析结果已保存到: {centrality_file}")
|
|
|
|
# 2. 保存统计信息
|
|
stats_file = os.path.join(output_dir, "centrality_statistics.txt")
|
|
with open(stats_file, 'w', encoding='utf-8') as f:
|
|
f.write("中心性分析统计结果\n")
|
|
f.write("="*60 + "\n")
|
|
|
|
centrality_stats = analysis_results['centrality_stats']
|
|
for name, stats in centrality_stats.items():
|
|
f.write(f"\n{name.upper()}中心性统计:\n")
|
|
f.write(f" 平均值: {stats['mean']:.6f}\n")
|
|
f.write(f" 标准差: {stats['std']:.6f}\n")
|
|
f.write(f" 最小值: {stats['min']:.6f}\n")
|
|
f.write(f" 最大值: {stats['max']:.6f}\n")
|
|
f.write(f" 中位数: {stats['median']:.6f}\n")
|
|
|
|
f.write("\n关键节点识别:\n")
|
|
top_nodes = analysis_results['top_nodes']
|
|
for name, top_list in top_nodes.items():
|
|
f.write(f"\n{name.upper()}中心性前10节点:\n")
|
|
for i, (node, value) in enumerate(top_list, 1):
|
|
f.write(f" {i:2d}. {node}: {value:.6f}\n")
|
|
|
|
print(f"中心性统计信息已保存到: {stats_file}")
|
|
|
|
# 3. 保存为pickle格式
|
|
pickle_file = os.path.join(output_dir, "centrality_results.pkl")
|
|
results = {
|
|
'degree_centrality': degree_cent,
|
|
'weighted_degree_centrality': weighted_degree_cent,
|
|
'betweenness_centrality': betweenness_cent,
|
|
'additional_centralities': additional_cent,
|
|
'analysis_results': analysis_results
|
|
}
|
|
with open(pickle_file, 'wb') as f:
|
|
pickle.dump(results, f)
|
|
print(f"完整中心性结果已保存到: {pickle_file}")
|
|
|
|
def main():
|
|
"""主函数"""
|
|
# 配置文件路径
|
|
graph_file = "output/network_graph.pkl"
|
|
output_dir = "output"
|
|
|
|
try:
|
|
# 1. 加载图结构
|
|
G = load_graph(graph_file)
|
|
|
|
# 2. 计算度中心性
|
|
degree_cent, weighted_degree_cent = calculate_degree_centrality(G)
|
|
|
|
# 3. 计算介数中心性
|
|
betweenness_cent = calculate_betweenness_centrality(G)
|
|
|
|
# 4. 计算其他中心性
|
|
additional_cent = calculate_additional_centralities(G)
|
|
|
|
# 5. 分析结果
|
|
analysis_results = analyze_centrality_results(degree_cent, weighted_degree_cent, betweenness_cent, additional_cent)
|
|
|
|
# 6. 可视化结果
|
|
visualize_centrality_results(degree_cent, weighted_degree_cent, betweenness_cent, additional_cent, analysis_results, output_dir)
|
|
|
|
# 7. 保存结果
|
|
save_centrality_results(degree_cent, weighted_degree_cent, betweenness_cent, additional_cent, analysis_results, output_dir)
|
|
|
|
print("\n✅ 中心性分析完成!")
|
|
|
|
except FileNotFoundError:
|
|
print(f"❌ 错误: 找不到图文件 {graph_file}")
|
|
print("请先运行 1_build_graph.py 构建图结构")
|
|
except Exception as e:
|
|
print(f"❌ 发生错误: {str(e)}")
|
|
|
|
if __name__ == "__main__":
|
|
main()
|