You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
42 lines
1.4 KiB
42 lines
1.4 KiB
|
|
import numpy as np
|
|
from wordcloud import WordCloud
|
|
import matplotlib.pyplot as plt
|
|
from wordcloud import STOPWORDS
|
|
def getTxt():
|
|
txt = open(r'C:\jieba-python\英文\xxx.txt').read()
|
|
txt = txt.lower()
|
|
for ch in '!"@#$%^&*()+,-./:;<=>?@[\\]_`~{|}':
|
|
txt = txt.replace(ch, ' ')
|
|
return txt
|
|
hamletTxt = getTxt()
|
|
words = hamletTxt.split()
|
|
counts = {}
|
|
for word in words:
|
|
counts[word] = counts.get(word,0) + 1
|
|
for word in list(STOPWORDS):
|
|
counts.pop(word, 0)
|
|
items = list(counts.items())
|
|
items.sort(key = lambda x:x[1], reverse = True)
|
|
infos, counts = [], []
|
|
for i in range(10):
|
|
word, count = items[i]
|
|
infos.append(word)
|
|
counts.append(count)
|
|
print('{0:<10}{1:>5}'.format(word, count))
|
|
plt.bar(range(len(infos)), counts, width=0.8)
|
|
plt.xticks(list(range(0, 10)), infos, fontsize=12)
|
|
for a, b in zip(np.arange(len(infos)), counts):
|
|
plt.text(a, b, '%d' % b, ha='center', va='bottom', fontsize=12)
|
|
plt.savefig(r'C:\jieba-python\英文\tb1.png', dpi=300, bbox_inches='tight')
|
|
text_y = open(r"C:\jieba-python\英文\xxx.txt").read()
|
|
wordcloud = WordCloud(width=800, height=600,background_color="white", max_words=2000)
|
|
ax = wordcloud.generate(text_y)
|
|
width,height = 24, 14
|
|
plt.figure()
|
|
plt.figure(figsize=(width,height))
|
|
plt.imshow(ax, interpolation='bilinear')
|
|
plt.axis('off')
|
|
plt.show()
|
|
wordcloud.to_file("C:\jieba-python\英文\\xxx.png")
|