parent
56f026812f
commit
4006830f05
@ -0,0 +1,41 @@
|
||||
import urllib.request
|
||||
from lxml import etree
|
||||
import requests
|
||||
import re
|
||||
|
||||
url='https://www.ibilibili.com/video/BV1o24y1F7wV/?spm_id_from=333.999.0.0&vd_source=59fecc30e7f4791084968599ca1f8b82'
|
||||
|
||||
headers={
|
||||
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36'
|
||||
}
|
||||
|
||||
request=urllib.request.Request(url=url,headers=headers)
|
||||
response=urllib.request.urlopen(request)
|
||||
content=response.read().decode('utf-8')
|
||||
tree=etree.HTML(content)
|
||||
danmuurl=tree.xpath('//div[@class="btn-group"]/a[3]/@href')
|
||||
|
||||
response2 = requests.get(url=danmuurl[0], headers=headers)
|
||||
response2.encoding = 'utf-8'
|
||||
content_list = re.findall('<d p=".*?">(.*?)</d>', response2.text)
|
||||
|
||||
for content in content_list:
|
||||
with open('弹幕.txt',mode='a',encoding='utf-8') as f:
|
||||
f.write(content)
|
||||
f.write('\n')
|
||||
|
||||
import jieba
|
||||
from wordcloud import WordCloud
|
||||
fp=open('弹幕.txt',mode='r',encoding='UTF-8')
|
||||
txt=fp.read()
|
||||
words=jieba.lcut(txt)
|
||||
new_txt=" ".join(words)
|
||||
wordcloud=WordCloud(font_path='simkai.ttf',\
|
||||
background_color='white',\
|
||||
height=400,\
|
||||
width=600,\
|
||||
max_font_size=100,\
|
||||
max_words=200
|
||||
).generate(new_txt)
|
||||
wordcloud.to_file('弹幕词云.jpg')
|
||||
|
Loading…
Reference in new issue