You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
51 lines
1.4 KiB
51 lines
1.4 KiB
import numpy as np
|
|
import matplotlib.pyplot as plt
|
|
import pandas as pd
|
|
|
|
# 加载数据
|
|
df = pd.read_csv('/mnt/豆瓣电影_20250510_173909.csv')
|
|
|
|
# 将主演列中的空值填充为空字符串
|
|
df['主演'] = df['主演'].fillna('')
|
|
|
|
# 将主演列按斜杠分割并展开为新的行
|
|
actors = df['主演'].str.split(' / ').explode()
|
|
|
|
# 使用NumPy统计不同演员的参演数量
|
|
unique_actors, counts = np.unique(actors, return_counts=True)
|
|
|
|
# 创建DataFrame并排序
|
|
actor_df = pd.DataFrame({'actor': unique_actors, 'count': counts})
|
|
top_ten = actor_df.sort_values('count', ascending=False).head(10)
|
|
|
|
# 准备饼图数据
|
|
labels = top_ten['actor']
|
|
sizes = top_ten['count']
|
|
|
|
# 设置图片清晰度
|
|
plt.rcParams['figure.dpi'] = 300
|
|
|
|
# 设置中文字体
|
|
plt.rcParams['font.sans-serif'] = ['WenQuanYi Zen Hei']
|
|
|
|
# 绘制饼图
|
|
plt.figure(figsize=(10, 8))
|
|
plt.pie(sizes, labels=labels, autopct='%1.1f%%', startangle=90, pctdistance=0.85)
|
|
|
|
# 画一个白色的圆在中间,使饼图成为环形图
|
|
centre_circle = plt.Circle((0, 0), 0.70, fc='white')
|
|
fig = plt.gcf()
|
|
fig.gca().add_artist(centre_circle)
|
|
|
|
# 设置标题
|
|
plt.title('演员参演数量前十名占比', fontsize=14)
|
|
|
|
# 确保饼图是圆形
|
|
plt.axis('equal')
|
|
plt.tight_layout()
|
|
plt.show()
|
|
|
|
# 打印统计结果
|
|
print('演员参演数量前十名:')
|
|
for idx, row in top_ten.iterrows():
|
|
print(f"{row['actor']}: {row['count']}部") |