You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
43 lines
1.6 KiB
43 lines
1.6 KiB
import re
|
|
import requests
|
|
import pandas as pd
|
|
import matplotlib.pyplot as mp
|
|
mp.rcParams['font.sans-serif'] = ['SimHei']
|
|
c=[]
|
|
for n in ['','_2','_3','_4'] :
|
|
a='http://www.manmankan.com/dy2013/dianying/donghua/index{}.shtml'.format(n)
|
|
A={'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36 Edg/96.0.1054.62'}
|
|
b=requests.get(a,headers=A)
|
|
b.encoding = 'utf-8'
|
|
obj=re.compile('<dd style="display:none.*?<div class="title">(?P<name>.*?)</di.*?演:<span class="name">(?P<director>.*?)</span></d.*?电影">(?P<country>.*?)</a><a.*?上映:<span class="year">(?P<time>.*?)</spa',re.S)
|
|
res=obj.finditer(b.text)
|
|
for i in res:
|
|
C=[]
|
|
C.append(i.group('name'))
|
|
C.append(i.group('director'))
|
|
C.append(i.group('country'))
|
|
C.append(i.group('time'))
|
|
c.append(C)
|
|
df=pd.DataFrame(columns=['电影名称',"导演",'地区','上映时间'])
|
|
for i in range(len(c)):
|
|
df=df.append({'电影名称':c[i][0],'导演':c[i][1],'地区':c[i][2],'上映时间':c[i][3]},ignore_index=True)
|
|
A=df[df.地区=='大陆']
|
|
B=df[df.地区=='美国']
|
|
C=df[df.地区=='日本']
|
|
D=df[(df.地区!='大陆') & (df.地区!='美国') & (df.地区!='日本')]
|
|
shuliang=[len(A),len(B),len(C),len(D)]
|
|
label=['大陆','美国','日本','其他']
|
|
fig=mp.figure('fig',figsize=(100,100))
|
|
explode=[0,0,0,0]
|
|
mp.title('热门动画电影中各地区电影占比')
|
|
mp.pie(shuliang,labels=label,explode=explode,colors=['red','green','purple','orange'],autopct='%.1f%%')
|
|
mp.show()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|