From cf8e8663859add007aa15b0e0df6cca9668eea59 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9D=8E=E5=AD=90=E7=A5=A5?= Date: Fri, 1 Apr 2022 16:11:43 +0000 Subject: [PATCH] =?UTF-8?q?=E5=88=A0=E9=99=A4=E6=96=87=E4=BB=B6=20fruit.py?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- fruit.py | 168 ------------------------------------------------------- 1 file changed, 168 deletions(-) delete mode 100644 fruit.py diff --git a/fruit.py b/fruit.py deleted file mode 100644 index 1680f04..0000000 --- a/fruit.py +++ /dev/null @@ -1,168 +0,0 @@ -import glob -import os -import jieba -import wordcloud -from wordcloud import STOPWORDS -from matplotlib import pyplot as plt - -''' -纯中文词云 -''' -def word_cloud_Chinese(file): - fb = open(file, 'r', encoding="utf-8") - t = fb.read() - fb.close() - - stopwords = set() - content = [line.strip() for line in open('cn_stopwords.txt', 'r', encoding="utf-8").readlines()] - stopwords.update(content) - - ls = jieba.lcut(t) - txt = " ".join(ls) - w = wordcloud.WordCloud(font_path="STSONG.TTF", - width=700, - height=700, - background_color="white", - stopwords=stopwords) - w.generate(txt) - w.to_file("123.png") - - plt.imshow(w, interpolation='bilinear') - plt.axis('off') - plt.tight_layout() - plt.show() - - -''' -纯英文词云 -''' - - -def word_cloud_English(file): - fb = open(file, 'r', encoding="utf-8") - t = fb.read() - fb.close() - w = wordcloud.WordCloud(font_path="arial.ttf", - width=1000, - height=700, - background_color="white", - stopwords=STOPWORDS) - w.generate(t) - w.to_file("123.png") - - plt.imshow(w, interpolation='bilinear') - plt.axis('off') - plt.tight_layout() - plt.show() - - -''' -中英混合词云 -''' - - -def word_cloud_English_and_Chinese(file): - fb = open(file, 'r', encoding="utf-8") - t = fb.read() - fb.close() - stopwords = set() - content = [line.strip() for line in open('cn_stopwords.txt', 'r', encoding="utf-8").readlines()] - stopwords.update(content) - w = wordcloud.WordCloud(font_path="STSONG.TTF", - width=1000, - height=700, - background_color="white", - stopwords=stopwords, - collocations=False - ) - ls = jieba.lcut(t) - t = " ".join(ls) - w.generate(t) - w.to_file("123.png") - - plt.imshow(w, interpolation='bilinear') - - -''' -纯中文词频计数 -''' - - -def Chineseword(file): - txt = open(file, "r", encoding='utf-8').read() - counts = {} # 通过键值对的形式存储词语及其出现的次数 - for ch in " ,。:;,《》!?“\”' ''\n'": - txt = txt.replace(ch, "") # 将文本中特殊字符替换为空格 - words = jieba.lcut(txt) # 使用精确模式对文本进行分词 - - for word in words: - if (len(word) == 1): - continue - else: - counts[word] = counts.get(word, 0) + 1 # 遍历所有词语,每出现一次其对应的值加 1 - - items = list(counts.items()) - items.sort(key=lambda x: x[1], reverse=True) # 根据词语出现的次数进行从大到小排序 - for i in range(len(items)): - print(items[i]) - - -''' -纯英文词频计数 -''' - - -def Englishword(file): - fb = open(file, 'r', encoding="utf-8") - wordfile = {} - for line in fb: - line = line.lower() - sword = line.strip().split() - for word in sword: - if word in wordfile: - wordfile[word] += 1 - else: - wordfile[word] = 1 - wordfrehigh = [] - for wd, fy in wordfile.items(): - wordfrehigh.append((fy, wd)) - wordfrehigh.sort(reverse=True) - for wd in wordfrehigh: - print(wd) - fb.close() - - -''' -中英混合词频计数 -''' - - -def English_and_Chinese(file): - fb = open(file, 'r', encoding="utf-8") - t = fb.read() - ls = jieba.lcut(t) - t = " ".join(ls) - t = t.lower() - for ch in ",。?:;’“!——、~,《》.--?;:'\"!~' ''\n'": - t = t.replace(ch, " ") - t = t.split(" ") - - wordfile = {} - for line in t: - sword = line.split() - for word in sword: - if word in wordfile: - wordfile[word] += 1 - else: - wordfile[word] = 1 - wordfrehigh = [] - for wd, fy in wordfile.items(): - wordfrehigh.append((fy, wd)) - wordfrehigh.sort(reverse=True) - for wd in wordfrehigh: - print(wd) - fb.close() - - -English_and_Chinese("file.txt") -word_cloud_English_and_Chinese("file.txt")