删除文件 fruit.py

3 years ago · cf8e866385
parent 6d8cdefd28
commit cf8e866385
1 changed files with 0 additions and 168 deletions
--- a/fruit.py
+++ b/fruit.py
@ -1,168 +0,0 @@
 import glob
 import os
 import jieba
 import wordcloud
 from wordcloud import STOPWORDS
 from matplotlib import pyplot as plt
 '''
 纯中文词云
 '''
 def word_cloud_Chinese(file):
    fb = open(file, 'r', encoding="utf-8")
    t = fb.read()
    fb.close()
    stopwords = set()
    content = [line.strip() for line in open('cn_stopwords.txt', 'r', encoding="utf-8").readlines()]
    stopwords.update(content)
    ls = jieba.lcut(t)
    txt = " ".join(ls)
    w = wordcloud.WordCloud(font_path="STSONG.TTF",
                            width=700,
                            height=700,
                            background_color="white",
                            stopwords=stopwords)
    w.generate(txt)
    w.to_file("123.png")
    plt.imshow(w, interpolation='bilinear')
    plt.axis('off')
    plt.tight_layout()
    plt.show()
 '''
 纯英文词云
 '''
 def word_cloud_English(file):
    fb = open(file, 'r', encoding="utf-8")
    t = fb.read()
    fb.close()
    w = wordcloud.WordCloud(font_path="arial.ttf",
                            width=1000,
                            height=700,
                            background_color="white",
                            stopwords=STOPWORDS)
    w.generate(t)
    w.to_file("123.png")
    plt.imshow(w, interpolation='bilinear')
    plt.axis('off')
    plt.tight_layout()
    plt.show()
 '''
 中英混合词云
 '''
 def word_cloud_English_and_Chinese(file):
    fb = open(file, 'r', encoding="utf-8")
    t = fb.read()
    fb.close()
    stopwords = set()
    content = [line.strip() for line in open('cn_stopwords.txt', 'r', encoding="utf-8").readlines()]
    stopwords.update(content)
    w = wordcloud.WordCloud(font_path="STSONG.TTF",
                            width=1000,
                            height=700,
                            background_color="white",
                            stopwords=stopwords,
                            collocations=False
                            )
    ls = jieba.lcut(t)
    t = " ".join(ls)
    w.generate(t)
    w.to_file("123.png")
    plt.imshow(w, interpolation='bilinear')
 '''
 纯中文词频计数
 '''
 def Chineseword(file):
    txt = open(file, "r", encoding='utf-8').read()
    counts = {}  # 通过键值对的形式存储词语及其出现的次数
    for ch in " ，。：；,《》！？“\”' ''\n'":
        txt = txt.replace(ch, "")  # 将文本中特殊字符替换为空格
    words = jieba.lcut(txt)  # 使用精确模式对文本进行分词
    for word in words:
        if (len(word) == 1):
            continue
        else:
            counts[word] = counts.get(word, 0) + 1  # 遍历所有词语，每出现一次其对应的值加 1
    items = list(counts.items())
    items.sort(key=lambda x: x[1], reverse=True)  # 根据词语出现的次数进行从大到小排序
    for i in range(len(items)):
        print(items[i])
 '''
 纯英文词频计数
 '''
 def Englishword(file):
    fb = open(file, 'r', encoding="utf-8")
    wordfile = {}
    for line in fb:
        line = line.lower()
        sword = line.strip().split()
        for word in sword:
            if word in wordfile:
                wordfile[word] += 1
            else:
                wordfile[word] = 1
    wordfrehigh = []
    for wd, fy in wordfile.items():
        wordfrehigh.append((fy, wd))
    wordfrehigh.sort(reverse=True)
    for wd in wordfrehigh:
        print(wd)
    fb.close()
 '''
 中英混合词频计数
 '''
 def English_and_Chinese(file):
    fb = open(file, 'r', encoding="utf-8")
    t = fb.read()
    ls = jieba.lcut(t)
    t = " ".join(ls)
    t = t.lower()
    for ch in "，。？：；’“！——、~,《》.--?;:'\"!~' ''\n'":
        t = t.replace(ch, " ")
    t = t.split(" ")
    wordfile = {}
    for line in t:
        sword = line.split()
        for word in sword:
            if word in wordfile:
                wordfile[word] += 1
            else:
                wordfile[word] = 1
    wordfrehigh = []
    for wd, fy in wordfile.items():
        wordfrehigh.append((fy, wd))
    wordfrehigh.sort(reverse=True)
    for wd in wordfrehigh:
        print(wd)
    fb.close()
 English_and_Chinese("file.txt")
 word_cloud_English_and_Chinese("file.txt")