h

4 years ago · 6d8cdefd28
parent ae85c54a75
commit 6d8cdefd28
10 changed files with 485 additions and 0 deletions
--- a/123.png
+++ b/123.png
--- a/123.py
+++ b/123.py
@ -0,0 +1,161 @@
+import glob
+import os
+import jieba
+import wordcloud
+from wordcloud import STOPWORDS
+from matplotlib import pyplot as plt
+
+
+'''
+纯中文词云
+'''
+
+def word_cloud_Chinese(file):
+    fb = open(file, 'r', encoding="utf-8")
+    t = fb.read()
+    fb.close()
+
+    stopwords = set()
+    content = [line.strip() for line in open('cn_stopwords.txt', 'r',encoding="utf-8").readlines()]
+    stopwords.update(content)
+
+    ls = jieba.lcut(t)
+    txt = " ".join(ls)
+    w = wordcloud.WordCloud(font_path="STSONG.TTF",
+                            width=700,
+                            height=700,
+                            background_color="white",
+                            stopwords = stopwords)
+    w.generate(txt)
+    w.to_file("123.png")
+
+    plt.imshow(w,interpolation='bilinear')
+    plt.axis('off')
+    plt.tight_layout()
+    plt.show()
+
+'''
+纯英文词云
+'''
+def word_cloud_English(file):
+    fb = open(file, 'r', encoding="utf-8")
+    t = fb.read()
+    fb.close()
+    w = wordcloud.WordCloud(font_path="arial.ttf",
+                            width=1000,
+                            height=700,
+                            background_color="white",
+                            stopwords=STOPWORDS)
+    w.generate(t)
+    w.to_file("123.png")
+
+
+    plt.imshow(w,interpolation='bilinear')
+    plt.axis('off')
+    plt.tight_layout()
+    plt.show()
+
+
+'''
+中英混合词云
+'''
+def word_cloud_English_and_Chinese(file):
+    fb = open(file, 'r', encoding="utf-8")
+    t = fb.read()
+    fb.close()
+    stopwords = set()
+    content = [line.strip() for line in open('cn_stopwords.txt', 'r', encoding="utf-8").readlines()]
+    stopwords.update(content)
+    w = wordcloud.WordCloud(font_path="STSONG.TTF",
+                            width=1000,
+                            height=700,
+                            background_color="white",
+                            stopwords=stopwords,
+                            collocations = False
+                            )
+    ls = jieba.lcut(t)
+    t = " ".join(ls)
+    w.generate(t)
+    w.to_file("123.png")
+
+
+    plt.imshow(w,interpolation='bilinear')
+
+
+'''
+纯中文词频计数
+'''
+def Chineseword(file):
+    txt = open(file, "r", encoding='utf-8').read()
+    counts = {}  # 通过键值对的形式存储词语及其出现的次数
+    for ch in " ，。：；,《》！？“\”' ''\n'":
+        txt = txt.replace(ch, "")      # 将文本中特殊字符替换为空格
+    words = jieba.lcut(txt)  # 使用精确模式对文本进行分词
+
+    for word in words:
+        if(len(word)==1):
+            continue
+        else:
+            counts[word] = counts.get(word, 0) + 1  # 遍历所有词语，每出现一次其对应的值加 1
+
+    items = list(counts.items())
+    items.sort(key=lambda x: x[1], reverse=True)  # 根据词语出现的次数进行从大到小排序
+    for i in range(len(items)):
+        print(items[i])
+
+
+'''
+纯英文词频计数
+'''
+def Englishword(file):
+    fb = open(file, 'r', encoding="utf-8")
+    wordfile = {}
+    for line in fb:
+        line=line.lower()
+        sword = line.strip().split()
+        for word in sword:
+            if word in wordfile:
+                wordfile[word] += 1
+            else:
+                wordfile[word] = 1
+    wordfrehigh = []
+    for wd, fy in wordfile.items():
+        wordfrehigh.append((fy,wd))
+    wordfrehigh.sort(reverse=True)
+    for wd in wordfrehigh:
+        print(wd)
+    fb.close()
+
+
+
+'''
+中英混合词频计数
+'''
+def English_and_Chinese(file):
+    fb = open(file, 'r', encoding="utf-8")
+    t = fb.read()
+    ls = jieba.lcut(t)
+    t = " ".join(ls)
+    t=t.lower()
+    for ch in "，。？：；’“！——、~,《》.--?;:'\"!~' ''\n'":
+        t = t.replace(ch, " ")
+    t=t.split(" ")
+
+    wordfile = {}
+    for line in t:
+        sword = line.split()
+        for word in sword:
+            if word in wordfile:
+                wordfile[word] += 1
+            else:
+                wordfile[word] = 1
+    wordfrehigh = []
+    for wd, fy in wordfile.items():
+        wordfrehigh.append((fy,wd))
+    wordfrehigh.sort(reverse=True)
+    for wd in wordfrehigh:
+        print(wd)
+    fb.close()
+
+English_and_Chinese("file.txt")
+word_cloud_English_and_Chinese("file.txt")
--- a/cn_stopwords.txt
+++ b/cn_stopwords.txt
--- a/file.txt
+++ b/file.txt
@ -0,0 +1,4 @@
+Are these people so mean and powerful? Maybe it&#39;s because they showed the most humble smile in front of them three years ago, so now they want to get it back.&quot; With a bitter smile, Xiao Yan turned around in a lonely way and quietly returned to the team In the last row, a lonely figure, somewhat out of tune with the world around him. &quot;Next, Xiao Mei&quot; Hearing the tester&#39;s shout, a girl quickly ran out of the crowd. The girl had just appeared, and the nearby The voice of discussion was much smaller, and a pair of slightly fiery eyes firmly locked on the girl&#39;s cheeks. The girl was only about fourteen years old. Although it was not stunning, her childish little face was full of meaning. With a touch of charm, pure and charming, and contradictory, she has successfully become the focus of the audience.
+The girl stepped forward quickly, touching the dark magic stone tablet with small hands,
+and then slowly closed her eyes.
+萨达萨达是发生的故事大概十点多擦拭发我
--- a/file_read.cpython-310.pyc
+++ b/file_read.cpython-310.pyc
--- a/file_read.py
+++ b/file_read.py
@ -0,0 +1,91 @@
+# -*- coding=utf-8 -*-
+import os
+
+
+# 检验是否全是中文字符
+def is_all_chinese(strs):
+    for _char in strs:
+        if not '\u4e00' <= _char <= '\u9fa5':
+            return False
+    return True
+
+
+# 检验是否包含中文字符
+def is_chinese(strs):
+    for ch in strs:
+        if u'\u4e00' <= ch <= u'\u9fff':
+            return True
+    return False
+
+#
+def result():
+    path = "D:/Hiker/Ku/Python_ku/Python_ku_one/file"  # 文件夹目录，运行前记得修改!!!!!!!!!!!!
+    files = os.listdir(path)  # 得到文件夹下的所有文件名称
+    print(files[0])
+
+    txts = []
+    i = 1
+
+    for file in files:  # 遍历文件夹
+        position = path + '\\' + file  # 构造绝对路径，"\\"，其中一个'\'为转义符
+        print(i, '--- ', end='')
+        print(file, end='')
+        i = i + 1
+        with open(position, "r", encoding='utf-8') as f:  # 打开文件
+            data = f.read()  # 读取文件
+            if (is_all_chinese(data)):
+                print("  (纯中文)")
+
+
+
+            else:
+                if (is_chinese(data)):
+                    print("  (有英文有中文)")
+
+                else:
+                    print("  (纯英文)")
+
+def getf(b):
+    path = "D:/Hiker/Ku/Python_ku/Python_ku_one/file"  # 文件夹目录，运行前记得修改!!!!!!!!!!!!
+    files = os.listdir(path)  # 得到文件夹下的所有文件名称
+
+    return files[b]
+
+
+def chuli(b):
+    path = "D:/Hiker/Ku/Python_ku/Python_ku_one/file"  # 文件夹目录，运行前记得修改!!!!!!!!!!!!
+    files = os.listdir(path)  # 得到文件夹下的所有文件名称
+
+
+    txts = []
+    i = 1
+
+    position = path + '\\' + files[b]  # 构造绝对路径，"\\"，其中一个'\'为转义符
+
+
+
+    with open(position, "r", encoding='utf-8') as f:  # 打开文件
+        data = f.read()  # 读取文件
+        if (is_all_chinese(data)):
+            a='z'
+            return a
+
+
+
+
+        else:
+            if (is_chinese(data)):
+
+                a='zy'
+                return a
+
+            else:
+
+                a='y'
+                return a
+
+
+
+
+
+
--- a/fruit.cpython-310.pyc
+++ b/fruit.cpython-310.pyc
--- a/fruit.py
+++ b/fruit.py
@ -0,0 +1,168 @@
+import glob
+import os
+import jieba
+import wordcloud
+from wordcloud import STOPWORDS
+from matplotlib import pyplot as plt
+
+'''
+纯中文词云
+'''
+def word_cloud_Chinese(file):
+    fb = open(file, 'r', encoding="utf-8")
+    t = fb.read()
+    fb.close()
+
+    stopwords = set()
+    content = [line.strip() for line in open('cn_stopwords.txt', 'r', encoding="utf-8").readlines()]
+    stopwords.update(content)
+
+    ls = jieba.lcut(t)
+    txt = " ".join(ls)
+    w = wordcloud.WordCloud(font_path="STSONG.TTF",
+                            width=700,
+                            height=700,
+                            background_color="white",
+                            stopwords=stopwords)
+    w.generate(txt)
+    w.to_file("123.png")
+
+    plt.imshow(w, interpolation='bilinear')
+    plt.axis('off')
+    plt.tight_layout()
+    plt.show()
+
+
+'''
+纯英文词云
+'''
+
+
+def word_cloud_English(file):
+    fb = open(file, 'r', encoding="utf-8")
+    t = fb.read()
+    fb.close()
+    w = wordcloud.WordCloud(font_path="arial.ttf",
+                            width=1000,
+                            height=700,
+                            background_color="white",
+                            stopwords=STOPWORDS)
+    w.generate(t)
+    w.to_file("123.png")
+
+    plt.imshow(w, interpolation='bilinear')
+    plt.axis('off')
+    plt.tight_layout()
+    plt.show()
+
+
+'''
+中英混合词云
+'''
+
+
+def word_cloud_English_and_Chinese(file):
+    fb = open(file, 'r', encoding="utf-8")
+    t = fb.read()
+    fb.close()
+    stopwords = set()
+    content = [line.strip() for line in open('cn_stopwords.txt', 'r', encoding="utf-8").readlines()]
+    stopwords.update(content)
+    w = wordcloud.WordCloud(font_path="STSONG.TTF",
+                            width=1000,
+                            height=700,
+                            background_color="white",
+                            stopwords=stopwords,
+                            collocations=False
+                            )
+    ls = jieba.lcut(t)
+    t = " ".join(ls)
+    w.generate(t)
+    w.to_file("123.png")
+
+    plt.imshow(w, interpolation='bilinear')
+
+
+'''
+纯中文词频计数
+'''
+
+
+def Chineseword(file):
+    txt = open(file, "r", encoding='utf-8').read()
+    counts = {}  # 通过键值对的形式存储词语及其出现的次数
+    for ch in " ，。：；,《》！？“\”' ''\n'":
+        txt = txt.replace(ch, "")  # 将文本中特殊字符替换为空格
+    words = jieba.lcut(txt)  # 使用精确模式对文本进行分词
+
+    for word in words:
+        if (len(word) == 1):
+            continue
+        else:
+            counts[word] = counts.get(word, 0) + 1  # 遍历所有词语，每出现一次其对应的值加 1
+
+    items = list(counts.items())
+    items.sort(key=lambda x: x[1], reverse=True)  # 根据词语出现的次数进行从大到小排序
+    for i in range(len(items)):
+        print(items[i])
+
+
+'''
+纯英文词频计数
+'''
+
+
+def Englishword(file):
+    fb = open(file, 'r', encoding="utf-8")
+    wordfile = {}
+    for line in fb:
+        line = line.lower()
+        sword = line.strip().split()
+        for word in sword:
+            if word in wordfile:
+                wordfile[word] += 1
+            else:
+                wordfile[word] = 1
+    wordfrehigh = []
+    for wd, fy in wordfile.items():
+        wordfrehigh.append((fy, wd))
+    wordfrehigh.sort(reverse=True)
+    for wd in wordfrehigh:
+        print(wd)
+    fb.close()
+
+
+'''
+中英混合词频计数
+'''
+
+
+def English_and_Chinese(file):
+    fb = open(file, 'r', encoding="utf-8")
+    t = fb.read()
+    ls = jieba.lcut(t)
+    t = " ".join(ls)
+    t = t.lower()
+    for ch in "，。？：；’“！——、~,《》.--?;:'\"!~' ''\n'":
+        t = t.replace(ch, " ")
+    t = t.split(" ")
+
+    wordfile = {}
+    for line in t:
+        sword = line.split()
+        for word in sword:
+            if word in wordfile:
+                wordfile[word] += 1
+            else:
+                wordfile[word] = 1
+    wordfrehigh = []
+    for wd, fy in wordfile.items():
+        wordfrehigh.append((fy, wd))
+    wordfrehigh.sort(reverse=True)
+    for wd in wordfrehigh:
+        print(wd)
+    fb.close()
+
+
+English_and_Chinese("file.txt")
+word_cloud_English_and_Chinese("file.txt")
--- a/main.py
+++ b/main.py
@ -0,0 +1,54 @@
+# -*- coding=utf-8 -*-
+'''
+from file_read import result
+from file_read import chuli
+
+from fruit import word_cloud_English
+from fruit import word_cloud_Chinese
+from fruit import word_cloud_English_and_Chinese
+from fruit import Chineseword
+from fruit import Englishword
+from fruit import English_and_Chinese
+'''
+if __name__=='__main__':
+    print("是否已将文本导入指定文件夹")
+    a=input()
+    while True:
+        if a == '是':
+            from file_read import result
+            result()#如果确认已将文件移入指定文件夹，调用file_read中的result()函数，输出文件夹中的文件名称
+            print('你想选择哪一个文件进行分词：')
+            b=int(input())
+            #对选择的文件进行判断，是纯中文还是纯英文
+            from file_read import getf
+            name=getf(b-1)
+            print(name)
+            from file_read import chuli
+            dd=chuli(b-1)
+            print(dd)
+            if dd=='y':
+                from fruit import word_cloud_English
+                from fruit import Englishword
+                word_cloud_English(name)
+
+                Englishword(name)
+
+            elif dd=='z':
+                from fruit import word_cloud_Chinese
+                from fruit import Chineseword
+                word_cloud_Chinese(name)
+
+                Chineseword(name)
+
+            else:
+                from fruit import word_cloud_English_and_Chinese
+                from fruit import English_and_Chinese
+                word_cloud_English_and_Chinese(name)
+                English_and_Chinese(name)
+        else:
+            print("请将文本导入指定文件夹")
+
+        print('是否退出:')
+        answer=input()
+        if answer=='是':
+            break
--- a/two.txt
+++ b/two.txt
@ -0,0 +1,7 @@
+Are these people so mean and powerful? Maybe it&#39;s because they showed the most humble smile in front of them three years ago, so now they want to get it back.&quot; With a bitter smile, Xiao Yan turned around in a lonely way and quietly returned to the team In the last row, a lonely figure, somewhat out of tune with the world around him. &quot;Next, Xiao Mei&quot; Hearing the tester&#39;s shout, a girl quickly ran out of the crowd. The girl had just appeared, and the nearby The voice of discussion was much smaller, and a pair of slightly fiery eyes firmly locked on the girl&#39;s cheeks. The girl was only about fourteen years old. Although it was not stunning, her childish little face was full of meaning. With a touch of charm, pure and charming, and contradictory, she has successfully become the focus of the audience.
+The girl stepped forward quickly, touching the dark magic stone tablet with small hands,
+and then slowly closed her eyes.
+Are these people so mean and powerful? Maybe it&#39;s because they showed the most humble smile in front of them three years ago, so now they want to get it back.&quot; With a bitter smile, Xiao Yan turned around in a lonely way and quietly returned to the team In the last row, a lonely figure, somewhat out of tune with the world around him. &quot;Next, Xiao Mei&quot; Hearing the tester&#39;s shout, a girl quickly ran out of the crowd. The girl had just appeared, and the nearby The voice of discussion was much smaller, and a pair of slightly fiery eyes firmly locked on the girl&#39;s cheeks. The girl was only about fourteen years old. Although it was not stunning, her childish little face was full of meaning. With a touch of charm, pure and charming, and contradictory, she has successfully become the focus of the audience.
+The girl stepped forward quickly, touching the dark magic stone tablet with small hands,
+and then slowly closed her eyes.
+萨达萨达是发生的故事大概十点多擦拭发我