You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

42 lines
1.4 KiB

import numpy as np
from wordcloud import WordCloud
import matplotlib.pyplot as plt
from wordcloud import STOPWORDS
def getTxt():
txt = open(r'C:\jieba-python\英文\xxx.txt').read()
txt = txt.lower()
for ch in '!"@#$%^&*()+,-./:;<=>?@[\\]_`~{|}':
txt = txt.replace(ch, ' ')
return txt
hamletTxt = getTxt()
words = hamletTxt.split()
counts = {}
for word in words:
counts[word] = counts.get(word,0) + 1
for word in list(STOPWORDS):
counts.pop(word, 0)
items = list(counts.items())
items.sort(key = lambda x:x[1], reverse = True)
infos, counts = [], []
for i in range(10):
word, count = items[i]
infos.append(word)
counts.append(count)
print('{0:<10}{1:>5}'.format(word, count))
plt.bar(range(len(infos)), counts, width=0.8)
plt.xticks(list(range(0, 10)), infos, fontsize=12)
for a, b in zip(np.arange(len(infos)), counts):
plt.text(a, b, '%d' % b, ha='center', va='bottom', fontsize=12)
plt.savefig(r'C:\jieba-python\英文\tb1.png', dpi=300, bbox_inches='tight')
text_y = open(r"C:\jieba-python\英文\xxx.txt").read()
wordcloud = WordCloud(width=800, height=600,background_color="white", max_words=2000)
ax = wordcloud.generate(text_y)
width,height = 24, 14
plt.figure()
plt.figure(figsize=(width,height))
plt.imshow(ax, interpolation='bilinear')
plt.axis('off')
plt.show()
wordcloud.to_file("C:\jieba-python\英文\\xxx.png")