|
|
import pandas as pd
|
|
|
import matplotlib.pyplot as plt
|
|
|
import seaborn as sns
|
|
|
|
|
|
# 测试样例数据
|
|
|
data = {
|
|
|
'server_name': ['Server1', 'Server1', 'Server2', 'Server2',
|
|
|
'Server1', 'Server1', 'Server2', 'Server2'],
|
|
|
'check_time': ['2024-03-01 10:00', '2024-03-01 11:00', '2024-03-31 12:00',
|
|
|
'2024-04-15 13:00', '2024-06-01 14:00', '2024-06-30 15:00',
|
|
|
'2024-12-25 16:00', '2024-12-31 17:00'],
|
|
|
'status': ['Normal', 'Error', 'Normal', 'Warning',
|
|
|
'Normal', 'Error', 'Warning', 'Normal']
|
|
|
}
|
|
|
|
|
|
# 创建DataFrame
|
|
|
df = pd.DataFrame(data)
|
|
|
df['check_time'] = pd.to_datetime(df['check_time'])
|
|
|
|
|
|
# 添加季节性、月度、周度、日度和小时特征
|
|
|
df['season'] = df['check_time'].dt.to_period('Q').astype(str)
|
|
|
df['month'] = df['check_time'].dt.month_name()
|
|
|
df['day_of_week'] = df['check_time'].dt.day_name()
|
|
|
df['day'] = df['check_time'].dt.day
|
|
|
df['hour'] = df['check_time'].dt.hour
|
|
|
|
|
|
# 定义服务器存活状态
|
|
|
df['is_failure'] = df['status'].apply(lambda x: 1 if x not in ['Normal'] else 0)
|
|
|
|
|
|
# 按时间粒度分组并计算故障次数和总检查次数
|
|
|
grouped = df.groupby(['server_name', 'season', 'month', 'day_of_week', 'day', 'hour'])['is_failure'].agg(Total='sum').reset_index()
|
|
|
|
|
|
# 计算故障率
|
|
|
grouped['failure_rate'] = grouped['Total'] / grouped.groupby(['server_name', 'season', 'month', 'day_of_week', 'day', 'hour']).cumcount() + 1
|
|
|
|
|
|
# 为了可视化,我们使用pivot来重塑数据
|
|
|
pivot_season = grouped.pivot_table(index=['server_name', 'season'], columns='hour', values='failure_rate', fill_value=0)
|
|
|
pivot_month = grouped.pivot_table(index=['server_name', 'month'], columns='day_of_week', values='failure_rate', fill_value=0)
|
|
|
pivot_week = grouped.pivot_table(index=['server_name', 'day_of_week'], columns='day', values='failure_rate', fill_value=0)
|
|
|
pivot_day = grouped.pivot_table(index=['server_name', 'day'], columns='hour', values='failure_rate', fill_value=0)
|
|
|
|
|
|
# 可视化季节性故障率
|
|
|
sns.heatmap(pivot_season, annot=True, cmap='YlGnBu')
|
|
|
plt.title('Seasonal Failure Rates by Server and Hour')
|
|
|
plt.show()
|
|
|
|
|
|
# 可视化月度故障率
|
|
|
sns.heatmap(pivot_month, annot=True, cmap='YlGnBu')
|
|
|
plt.title('Monthly Failure Rates by Server and Day of Week')
|
|
|
plt.show()
|
|
|
|
|
|
# 可视化周度故障率
|
|
|
sns.heatmap(pivot_week, annot=True, cmap='YlGnBu')
|
|
|
plt.title('Weekly Failure Rates by Server and Day')
|
|
|
plt.show()
|
|
|
|
|
|
# 可视化日度故障率
|
|
|
sns.heatmap(pivot_day, annot=True, cmap='YlGnBu')
|
|
|
plt.title('Daily Failure Rates by Server and Hour')
|
|
|
plt.show() |