You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

51 lines
1.4 KiB

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
# 加载数据
df = pd.read_csv('/mnt/豆瓣电影_20250510_173909.csv')
# 将主演列中的空值填充为空字符串
df['主演'] = df['主演'].fillna('')
# 将主演列按斜杠分割并展开为新的行
actors = df['主演'].str.split(' / ').explode()
# 使用NumPy统计不同演员的参演数量
unique_actors, counts = np.unique(actors, return_counts=True)
# 创建DataFrame并排序
actor_df = pd.DataFrame({'actor': unique_actors, 'count': counts})
top_ten = actor_df.sort_values('count', ascending=False).head(10)
# 准备饼图数据
labels = top_ten['actor']
sizes = top_ten['count']
# 设置图片清晰度
plt.rcParams['figure.dpi'] = 300
# 设置中文字体
plt.rcParams['font.sans-serif'] = ['WenQuanYi Zen Hei']
# 绘制饼图
plt.figure(figsize=(10, 8))
plt.pie(sizes, labels=labels, autopct='%1.1f%%', startangle=90, pctdistance=0.85)
# 画一个白色的圆在中间,使饼图成为环形图
centre_circle = plt.Circle((0, 0), 0.70, fc='white')
fig = plt.gcf()
fig.gca().add_artist(centre_circle)
# 设置标题
plt.title('演员参演数量前十名占比', fontsize=14)
# 确保饼图是圆形
plt.axis('equal')
plt.tight_layout()
plt.show()
# 打印统计结果
print('演员参演数量前十名:')
for idx, row in top_ten.iterrows():
print(f"{row['actor']}: {row['count']}")