You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
41 lines
1.5 KiB
41 lines
1.5 KiB
3 years ago
|
# -*- coding: utf-8 -*-
|
||
|
"""
|
||
|
Created on Tue Dec 21 08:29:04 2021
|
||
|
|
||
|
@author: 123
|
||
|
"""
|
||
|
|
||
|
|
||
|
import jieba
|
||
|
import wordcloud
|
||
|
|
||
|
|
||
|
|
||
|
#统计作者词频
|
||
|
txt = open("D:/计算机与人工智能/Python/课件/宋词.txt", "r", encoding='utf-8').read()
|
||
|
words = jieba.lcut(txt)
|
||
|
counts = {}
|
||
|
A={'赵佶','钱惟演','范仲淹','张先','晏殊','韩缜','宋祁','欧阳修','柳永','王安石','晏几道','苏轼','秦观','晁元礼','赵令畤','晁补之','周邦彦','贺铸','张元干','叶梦得','叶梦得','刘一生','韩疁','李邴','陈与义','蔡伸','周紫芝','李甲','李重元','万俟咏','徐伸','田为','曹组','间子翚','李玉','廖世美','吕滨老','鲁逸仲','岳飞','程垓','张孝祥','韩元吉','袁去华','陆淞','陆游','陈亮','范成大','辛弃疾','姜夔','刘过','严仁','章良能','俞国宝','张镃','史达祖','刘克庄','卢祖皋','潘牥','陆壑','吴文英'}
|
||
|
for word in words:
|
||
|
if word in A:
|
||
|
if len(word)==1:
|
||
|
continue
|
||
|
else:
|
||
|
counts[word]=counts.get(word,0) + 1
|
||
|
items = list(counts.items())
|
||
|
items.sort(key = lambda x:x[1], reverse = True)
|
||
|
for i in range(len(items)):
|
||
|
word,count= items[i]
|
||
|
print('{0:<10}{1:>5}'.format(word,count))
|
||
|
|
||
|
|
||
|
|
||
|
#生成词频云
|
||
|
txt1 = " ".join(words)
|
||
|
w = wordcloud.WordCloud( \
|
||
|
width = 1000, height = 700,\
|
||
|
background_color = "white",
|
||
|
font_path = "msyh.ttc"
|
||
|
)
|
||
|
w.generate(txt1)
|
||
|
w.to_file("grwordcloud.png")
|