parent
d77a97762d
commit
5306b4ef0f
@ -0,0 +1,42 @@
|
||||
import re
|
||||
import requests
|
||||
import pandas as pd
|
||||
import matplotlib.pyplot as mp
|
||||
mp.rcParams['font.sans-serif'] = ['SimHei']
|
||||
c=[]
|
||||
for n in ['','_2','_3','_4'] :
|
||||
a='http://www.manmankan.com/dy2013/dianying/donghua/index{}.shtml'.format(n)
|
||||
A={'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36 Edg/96.0.1054.62'}
|
||||
b=requests.get(a,headers=A)
|
||||
b.encoding = 'utf-8'
|
||||
obj=re.compile('<dd style="display:none.*?<div class="title">(?P<name>.*?)</di.*?演:<span class="name">(?P<director>.*?)</span></d.*?电影">(?P<country>.*?)</a><a.*?上映:<span class="year">(?P<time>.*?)</spa',re.S)
|
||||
res=obj.finditer(b.text)
|
||||
for i in res:
|
||||
C=[]
|
||||
C.append(i.group('name'))
|
||||
C.append(i.group('director'))
|
||||
C.append(i.group('country'))
|
||||
C.append(i.group('time'))
|
||||
c.append(C)
|
||||
df=pd.DataFrame(columns=['电影名称',"导演",'地区','上映时间'])
|
||||
for i in range(len(c)):
|
||||
df=df.append({'电影名称':c[i][0],'导演':c[i][1],'地区':c[i][2],'上映时间':c[i][3]},ignore_index=True)
|
||||
A=df[df.地区=='大陆']
|
||||
B=df[df.地区=='美国']
|
||||
C=df[df.地区=='日本']
|
||||
D=df[(df.地区!='大陆') & (df.地区!='美国') & (df.地区!='日本')]
|
||||
shuliang=[len(A),len(B),len(C),len(D)]
|
||||
label=['大陆','美国','日本','其他']
|
||||
fig=mp.figure('fig',figsize=(100,100))
|
||||
explode=[0,0,0,0]
|
||||
mp.title('热门动画电影中各地区电影占比')
|
||||
mp.pie(shuliang,labels=label,explode=explode,colors=['red','green','purple','orange'],autopct='%.1f%%')
|
||||
mp.show()
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
Loading…
Reference in new issue