import pandas as pd import matplotlib.pyplot as plt import seaborn as sns # 测试样例数据 data = { 'server_name': ['Server1', 'Server1', 'Server2', 'Server2', 'Server1', 'Server1', 'Server2', 'Server2'], 'check_time': ['2024-03-01 10:00', '2024-03-01 11:00', '2024-03-31 12:00', '2024-04-15 13:00', '2024-06-01 14:00', '2024-06-30 15:00', '2024-12-25 16:00', '2024-12-31 17:00'], 'status': ['Normal', 'Error', 'Normal', 'Warning', 'Normal', 'Error', 'Warning', 'Normal'] } # 创建DataFrame df = pd.DataFrame(data) df['check_time'] = pd.to_datetime(df['check_time']) # 添加季节性、月度、周度、日度和小时特征 df['season'] = df['check_time'].dt.to_period('Q').astype(str) df['month'] = df['check_time'].dt.month_name() df['day_of_week'] = df['check_time'].dt.day_name() df['day'] = df['check_time'].dt.day df['hour'] = df['check_time'].dt.hour # 定义服务器存活状态 df['is_failure'] = df['status'].apply(lambda x: 1 if x not in ['Normal'] else 0) # 按时间粒度分组并计算故障次数和总检查次数 grouped = df.groupby(['server_name', 'season', 'month', 'day_of_week', 'day', 'hour'])['is_failure'].agg(Total='sum').reset_index() # 计算故障率 grouped['failure_rate'] = grouped['Total'] / grouped.groupby(['server_name', 'season', 'month', 'day_of_week', 'day', 'hour']).cumcount() + 1 # 为了可视化,我们使用pivot来重塑数据 pivot_season = grouped.pivot_table(index=['server_name', 'season'], columns='hour', values='failure_rate', fill_value=0) pivot_month = grouped.pivot_table(index=['server_name', 'month'], columns='day_of_week', values='failure_rate', fill_value=0) pivot_week = grouped.pivot_table(index=['server_name', 'day_of_week'], columns='day', values='failure_rate', fill_value=0) pivot_day = grouped.pivot_table(index=['server_name', 'day'], columns='hour', values='failure_rate', fill_value=0) # 可视化季节性故障率 sns.heatmap(pivot_season, annot=True, cmap='YlGnBu') plt.title('Seasonal Failure Rates by Server and Hour') plt.show() # 可视化月度故障率 sns.heatmap(pivot_month, annot=True, cmap='YlGnBu') plt.title('Monthly Failure Rates by Server and Day of Week') plt.show() # 可视化周度故障率 sns.heatmap(pivot_week, annot=True, cmap='YlGnBu') plt.title('Weekly Failure Rates by Server and Day') plt.show() # 可视化日度故障率 sns.heatmap(pivot_day, annot=True, cmap='YlGnBu') plt.title('Daily Failure Rates by Server and Hour') plt.show()