parent
0b3031cd82
commit
9daba3a9aa
@ -0,0 +1,51 @@
|
||||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
import pandas as pd
|
||||
|
||||
# 加载数据
|
||||
df = pd.read_csv('/mnt/豆瓣电影_20250510_173909.csv')
|
||||
|
||||
# 将主演列中的空值填充为空字符串
|
||||
df['主演'] = df['主演'].fillna('')
|
||||
|
||||
# 将主演列按斜杠分割并展开为新的行
|
||||
actors = df['主演'].str.split(' / ').explode()
|
||||
|
||||
# 使用NumPy统计不同演员的参演数量
|
||||
unique_actors, counts = np.unique(actors, return_counts=True)
|
||||
|
||||
# 创建DataFrame并排序
|
||||
actor_df = pd.DataFrame({'actor': unique_actors, 'count': counts})
|
||||
top_ten = actor_df.sort_values('count', ascending=False).head(10)
|
||||
|
||||
# 准备饼图数据
|
||||
labels = top_ten['actor']
|
||||
sizes = top_ten['count']
|
||||
|
||||
# 设置图片清晰度
|
||||
plt.rcParams['figure.dpi'] = 300
|
||||
|
||||
# 设置中文字体
|
||||
plt.rcParams['font.sans-serif'] = ['WenQuanYi Zen Hei']
|
||||
|
||||
# 绘制饼图
|
||||
plt.figure(figsize=(10, 8))
|
||||
plt.pie(sizes, labels=labels, autopct='%1.1f%%', startangle=90, pctdistance=0.85)
|
||||
|
||||
# 画一个白色的圆在中间,使饼图成为环形图
|
||||
centre_circle = plt.Circle((0, 0), 0.70, fc='white')
|
||||
fig = plt.gcf()
|
||||
fig.gca().add_artist(centre_circle)
|
||||
|
||||
# 设置标题
|
||||
plt.title('演员参演数量前十名占比', fontsize=14)
|
||||
|
||||
# 确保饼图是圆形
|
||||
plt.axis('equal')
|
||||
plt.tight_layout()
|
||||
plt.show()
|
||||
|
||||
# 打印统计结果
|
||||
print('演员参演数量前十名:')
|
||||
for idx, row in top_ten.iterrows():
|
||||
print(f"{row['actor']}: {row['count']}部")
|
Loading…
Reference in new issue