用词云的方法做可视化处理

master
posql3f6g 2 years ago
parent 56f026812f
commit 4006830f05

@ -0,0 +1,41 @@
import urllib.request
from lxml import etree
import requests
import re
url='https://www.ibilibili.com/video/BV1o24y1F7wV/?spm_id_from=333.999.0.0&vd_source=59fecc30e7f4791084968599ca1f8b82'
headers={
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36'
}
request=urllib.request.Request(url=url,headers=headers)
response=urllib.request.urlopen(request)
content=response.read().decode('utf-8')
tree=etree.HTML(content)
danmuurl=tree.xpath('//div[@class="btn-group"]/a[3]/@href')
response2 = requests.get(url=danmuurl[0], headers=headers)
response2.encoding = 'utf-8'
content_list = re.findall('<d p=".*?">(.*?)</d>', response2.text)
for content in content_list:
with open('弹幕.txt',mode='a',encoding='utf-8') as f:
f.write(content)
f.write('\n')
import jieba
from wordcloud import WordCloud
fp=open('弹幕.txt',mode='r',encoding='UTF-8')
txt=fp.read()
words=jieba.lcut(txt)
new_txt=" ".join(words)
wordcloud=WordCloud(font_path='simkai.ttf',\
background_color='white',\
height=400,\
width=600,\
max_font_size=100,\
max_words=200
).generate(new_txt)
wordcloud.to_file('弹幕词云.jpg')
Loading…
Cancel
Save