|
|
|
@ -53,4 +53,63 @@ special
|
|
|
|
|
所以事情还是很紧急的
|
|
|
|
|
PPT汇报具体要求什么的4.11课上问问,同时也可以尝试能不能延期
|
|
|
|
|
现在急需将其完善,可以在功能上进一步简化,而图形界面能看即可,确保能交差
|
|
|
|
|
如果可以延期就再美化,不能就能交差即可
|
|
|
|
|
如果可以延期就再美化,不能就能交差即可
|
|
|
|
|
|
|
|
|
|
import pandas as pd
|
|
|
|
|
import numpy as np
|
|
|
|
|
import matplotlib.pyplot as plt
|
|
|
|
|
|
|
|
|
|
plt.rcParams['font.sans-serif'] = ['SimHei']
|
|
|
|
|
plt.rcParams['axes.unicode_minus'] = False
|
|
|
|
|
|
|
|
|
|
movies_df_new = pd.read_csv("/data/bigfiles/movie_metadata_new.csv")
|
|
|
|
|
################begin########################
|
|
|
|
|
#票房与预算关系
|
|
|
|
|
# 筛选2014年及之后的数据
|
|
|
|
|
df_2014_2016 = movies_df_new[movies_df_new['title_year'] >= 2014]
|
|
|
|
|
|
|
|
|
|
# 按预算升序排序
|
|
|
|
|
df_sorted = df_2014_2016.sort_values('budget')
|
|
|
|
|
|
|
|
|
|
# 绘制散点图
|
|
|
|
|
plt.figure(figsize=(10, 10))
|
|
|
|
|
plt.scatter(df_sorted['movie_title'], df_sorted['budget'], color='red', s=20, label='budget')
|
|
|
|
|
plt.scatter(df_sorted['movie_title'], df_sorted['gross'], color='green', s=20, label='gross')
|
|
|
|
|
plt.xticks([])
|
|
|
|
|
# 设置图表属性
|
|
|
|
|
plt.title('电影票房与预算的关系', fontsize=20)
|
|
|
|
|
plt.xlabel('movie_title')
|
|
|
|
|
plt.ylabel('budget/gross')
|
|
|
|
|
|
|
|
|
|
# 旋转x轴标签
|
|
|
|
|
|
|
|
|
|
plt.legend()
|
|
|
|
|
#################end#########################
|
|
|
|
|
plt.savefig("step5/result/pic1.png")
|
|
|
|
|
plt.close()
|
|
|
|
|
|
|
|
|
|
################begin########################
|
|
|
|
|
#数据归一化处理
|
|
|
|
|
# 筛选2015-2016年数据(不包含2014)
|
|
|
|
|
df_2015_2016 = movies_df_new[(movies_df_new['title_year'] > 2014) & (movies_df_new['title_year'] <= 2016)]
|
|
|
|
|
|
|
|
|
|
# 归一化处理
|
|
|
|
|
df_2015_2016['cast_total_likes_norm'] = df_2015_2016['cast_total_likes'] / df_2015_2016['cast_total_likes'].max()
|
|
|
|
|
df_2015_2016['gross_norm'] = df_2015_2016['gross'] / df_2015_2016['gross'].max()
|
|
|
|
|
|
|
|
|
|
# 演员的获赞总数与票房的关系
|
|
|
|
|
plt.figure(figsize=(10, 10))
|
|
|
|
|
plt.scatter(df_2015_2016['movie_title'], df_2015_2016['cast_total_likes_norm'], color='red', s=20, label='cast_total_likes')
|
|
|
|
|
plt.scatter(df_2015_2016['movie_title'], df_2015_2016['gross_norm'], color='green', s=20, label='gross')
|
|
|
|
|
plt.xticks([])
|
|
|
|
|
# 设置图表属性
|
|
|
|
|
plt.title('演员的获赞总数与票房的关系', fontsize=10)
|
|
|
|
|
plt.xlabel('movie_title')
|
|
|
|
|
plt.ylabel('cast_total_likes/gross')
|
|
|
|
|
plt.legend()
|
|
|
|
|
# 旋转x轴标签
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#################end#########################
|
|
|
|
|
plt.savefig("step5/result/pic2.png")
|
|
|
|
|
plt.close()
|