You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
|
|
|
|
import requests
|
|
|
|
|
from bs4 import BeautifulSoup
|
|
|
|
|
import re
|
|
|
|
|
import time
|
|
|
|
|
import jieba
|
|
|
|
|
import wordcloud
|
|
|
|
|
import matplotlib.pyplot as plt
|
|
|
|
|
import pandas as pd
|
|
|
|
|
from pandas import ExcelWriter
|
|
|
|
|
from collections import Counter
|
|
|
|
|
|
|
|
|
|
def chuli(etxt):
|
|
|
|
|
#danmustr=''.join(i for i in etxt) #将所有弹幕拼接在一起
|
|
|
|
|
#words=list(jieba.cut(danmustr)) ###利用jieba库将弹幕按词进行切分
|
|
|
|
|
words=[i for i in etxt if len(i)>1] ###挑出长度大于1的词语(为去除诸如?,哈,啊等字符)
|
|
|
|
|
wc=wordcloud.WordCloud(background_color='white',height=1000,width=1000,font_path='simsun.ttc')#利用wordcloud库定义词云图片的信息
|
|
|
|
|
wc.generate(' '.join(words)) ##生成图片
|
|
|
|
|
print(wc)
|
|
|
|
|
plt.imshow(wc)
|
|
|
|
|
plt.show()
|