parent
56f026812f
commit
4006830f05
@ -0,0 +1,41 @@
|
|||||||
|
import urllib.request
|
||||||
|
from lxml import etree
|
||||||
|
import requests
|
||||||
|
import re
|
||||||
|
|
||||||
|
url='https://www.ibilibili.com/video/BV1o24y1F7wV/?spm_id_from=333.999.0.0&vd_source=59fecc30e7f4791084968599ca1f8b82'
|
||||||
|
|
||||||
|
headers={
|
||||||
|
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36'
|
||||||
|
}
|
||||||
|
|
||||||
|
request=urllib.request.Request(url=url,headers=headers)
|
||||||
|
response=urllib.request.urlopen(request)
|
||||||
|
content=response.read().decode('utf-8')
|
||||||
|
tree=etree.HTML(content)
|
||||||
|
danmuurl=tree.xpath('//div[@class="btn-group"]/a[3]/@href')
|
||||||
|
|
||||||
|
response2 = requests.get(url=danmuurl[0], headers=headers)
|
||||||
|
response2.encoding = 'utf-8'
|
||||||
|
content_list = re.findall('<d p=".*?">(.*?)</d>', response2.text)
|
||||||
|
|
||||||
|
for content in content_list:
|
||||||
|
with open('弹幕.txt',mode='a',encoding='utf-8') as f:
|
||||||
|
f.write(content)
|
||||||
|
f.write('\n')
|
||||||
|
|
||||||
|
import jieba
|
||||||
|
from wordcloud import WordCloud
|
||||||
|
fp=open('弹幕.txt',mode='r',encoding='UTF-8')
|
||||||
|
txt=fp.read()
|
||||||
|
words=jieba.lcut(txt)
|
||||||
|
new_txt=" ".join(words)
|
||||||
|
wordcloud=WordCloud(font_path='simkai.ttf',\
|
||||||
|
background_color='white',\
|
||||||
|
height=400,\
|
||||||
|
width=600,\
|
||||||
|
max_font_size=100,\
|
||||||
|
max_words=200
|
||||||
|
).generate(new_txt)
|
||||||
|
wordcloud.to_file('弹幕词云.jpg')
|
||||||
|
|
Loading…
Reference in new issue