李子祥 3 years ago committed by Gitee
parent ae85c54a75
commit 6d8cdefd28
No known key found for this signature in database
GPG Key ID: 173E9B9CA92EEF8F

Binary file not shown.

After

Width:  |  Height:  |  Size: 292 KiB

161
123.py

@ -0,0 +1,161 @@
import glob
import os
import jieba
import wordcloud
from wordcloud import STOPWORDS
from matplotlib import pyplot as plt
'''
纯中文词云
'''
def word_cloud_Chinese(file):
fb = open(file, 'r', encoding="utf-8")
t = fb.read()
fb.close()
stopwords = set()
content = [line.strip() for line in open('cn_stopwords.txt', 'r',encoding="utf-8").readlines()]
stopwords.update(content)
ls = jieba.lcut(t)
txt = " ".join(ls)
w = wordcloud.WordCloud(font_path="STSONG.TTF",
width=700,
height=700,
background_color="white",
stopwords = stopwords)
w.generate(txt)
w.to_file("123.png")
plt.imshow(w,interpolation='bilinear')
plt.axis('off')
plt.tight_layout()
plt.show()
'''
纯英文词云
'''
def word_cloud_English(file):
fb = open(file, 'r', encoding="utf-8")
t = fb.read()
fb.close()
w = wordcloud.WordCloud(font_path="arial.ttf",
width=1000,
height=700,
background_color="white",
stopwords=STOPWORDS)
w.generate(t)
w.to_file("123.png")
plt.imshow(w,interpolation='bilinear')
plt.axis('off')
plt.tight_layout()
plt.show()
'''
中英混合词云
'''
def word_cloud_English_and_Chinese(file):
fb = open(file, 'r', encoding="utf-8")
t = fb.read()
fb.close()
stopwords = set()
content = [line.strip() for line in open('cn_stopwords.txt', 'r', encoding="utf-8").readlines()]
stopwords.update(content)
w = wordcloud.WordCloud(font_path="STSONG.TTF",
width=1000,
height=700,
background_color="white",
stopwords=stopwords,
collocations = False
)
ls = jieba.lcut(t)
t = " ".join(ls)
w.generate(t)
w.to_file("123.png")
plt.imshow(w,interpolation='bilinear')
'''
纯中文词频计数
'''
def Chineseword(file):
txt = open(file, "r", encoding='utf-8').read()
counts = {} # 通过键值对的形式存储词语及其出现的次数
for ch in " ,。:;,《》!?“\' ''\n'":
txt = txt.replace(ch, "") # 将文本中特殊字符替换为空格
words = jieba.lcut(txt) # 使用精确模式对文本进行分词
for word in words:
if(len(word)==1):
continue
else:
counts[word] = counts.get(word, 0) + 1 # 遍历所有词语,每出现一次其对应的值加 1
items = list(counts.items())
items.sort(key=lambda x: x[1], reverse=True) # 根据词语出现的次数进行从大到小排序
for i in range(len(items)):
print(items[i])
'''
纯英文词频计数
'''
def Englishword(file):
fb = open(file, 'r', encoding="utf-8")
wordfile = {}
for line in fb:
line=line.lower()
sword = line.strip().split()
for word in sword:
if word in wordfile:
wordfile[word] += 1
else:
wordfile[word] = 1
wordfrehigh = []
for wd, fy in wordfile.items():
wordfrehigh.append((fy,wd))
wordfrehigh.sort(reverse=True)
for wd in wordfrehigh:
print(wd)
fb.close()
'''
中英混合词频计数
'''
def English_and_Chinese(file):
fb = open(file, 'r', encoding="utf-8")
t = fb.read()
ls = jieba.lcut(t)
t = " ".join(ls)
t=t.lower()
for ch in ",。?:;’“!——、~,《》.--?;:'\"!~' ''\n'":
t = t.replace(ch, " ")
t=t.split(" ")
wordfile = {}
for line in t:
sword = line.split()
for word in sword:
if word in wordfile:
wordfile[word] += 1
else:
wordfile[word] = 1
wordfrehigh = []
for wd, fy in wordfile.items():
wordfrehigh.append((fy,wd))
wordfrehigh.sort(reverse=True)
for wd in wordfrehigh:
print(wd)
fb.close()
English_and_Chinese("file.txt")
word_cloud_English_and_Chinese("file.txt")

@ -0,0 +1,4 @@
Are these people so mean and powerful? Maybe it's because they showed the most humble smile in front of them three years ago, so now they want to get it back." With a bitter smile, Xiao Yan turned around in a lonely way and quietly returned to the team In the last row, a lonely figure, somewhat out of tune with the world around him. "Next, Xiao Mei" Hearing the tester's shout, a girl quickly ran out of the crowd. The girl had just appeared, and the nearby The voice of discussion was much smaller, and a pair of slightly fiery eyes firmly locked on the girl's cheeks. The girl was only about fourteen years old. Although it was not stunning, her childish little face was full of meaning. With a touch of charm, pure and charming, and contradictory, she has successfully become the focus of the audience.
The girl stepped forward quickly, touching the dark magic stone tablet with small hands,
and then slowly closed her eyes.
萨达萨达是发生的故事大概十点多擦拭发我

Binary file not shown.

@ -0,0 +1,91 @@
# -*- coding=utf-8 -*-
import os
# 检验是否全是中文字符
def is_all_chinese(strs):
for _char in strs:
if not '\u4e00' <= _char <= '\u9fa5':
return False
return True
# 检验是否包含中文字符
def is_chinese(strs):
for ch in strs:
if u'\u4e00' <= ch <= u'\u9fff':
return True
return False
#
def result():
path = "D:/Hiker/Ku/Python_ku/Python_ku_one/file" # 文件夹目录,运行前记得修改!!!!!!!!!!!!
files = os.listdir(path) # 得到文件夹下的所有文件名称
print(files[0])
txts = []
i = 1
for file in files: # 遍历文件夹
position = path + '\\' + file # 构造绝对路径,"\\",其中一个'\'为转义符
print(i, '--- ', end='')
print(file, end='')
i = i + 1
with open(position, "r", encoding='utf-8') as f: # 打开文件
data = f.read() # 读取文件
if (is_all_chinese(data)):
print(" (纯中文)")
else:
if (is_chinese(data)):
print(" (有英文有中文)")
else:
print(" (纯英文)")
def getf(b):
path = "D:/Hiker/Ku/Python_ku/Python_ku_one/file" # 文件夹目录,运行前记得修改!!!!!!!!!!!!
files = os.listdir(path) # 得到文件夹下的所有文件名称
return files[b]
def chuli(b):
path = "D:/Hiker/Ku/Python_ku/Python_ku_one/file" # 文件夹目录,运行前记得修改!!!!!!!!!!!!
files = os.listdir(path) # 得到文件夹下的所有文件名称
txts = []
i = 1
position = path + '\\' + files[b] # 构造绝对路径,"\\",其中一个'\'为转义符
with open(position, "r", encoding='utf-8') as f: # 打开文件
data = f.read() # 读取文件
if (is_all_chinese(data)):
a='z'
return a
else:
if (is_chinese(data)):
a='zy'
return a
else:
a='y'
return a

Binary file not shown.

@ -0,0 +1,168 @@
import glob
import os
import jieba
import wordcloud
from wordcloud import STOPWORDS
from matplotlib import pyplot as plt
'''
纯中文词云
'''
def word_cloud_Chinese(file):
fb = open(file, 'r', encoding="utf-8")
t = fb.read()
fb.close()
stopwords = set()
content = [line.strip() for line in open('cn_stopwords.txt', 'r', encoding="utf-8").readlines()]
stopwords.update(content)
ls = jieba.lcut(t)
txt = " ".join(ls)
w = wordcloud.WordCloud(font_path="STSONG.TTF",
width=700,
height=700,
background_color="white",
stopwords=stopwords)
w.generate(txt)
w.to_file("123.png")
plt.imshow(w, interpolation='bilinear')
plt.axis('off')
plt.tight_layout()
plt.show()
'''
纯英文词云
'''
def word_cloud_English(file):
fb = open(file, 'r', encoding="utf-8")
t = fb.read()
fb.close()
w = wordcloud.WordCloud(font_path="arial.ttf",
width=1000,
height=700,
background_color="white",
stopwords=STOPWORDS)
w.generate(t)
w.to_file("123.png")
plt.imshow(w, interpolation='bilinear')
plt.axis('off')
plt.tight_layout()
plt.show()
'''
中英混合词云
'''
def word_cloud_English_and_Chinese(file):
fb = open(file, 'r', encoding="utf-8")
t = fb.read()
fb.close()
stopwords = set()
content = [line.strip() for line in open('cn_stopwords.txt', 'r', encoding="utf-8").readlines()]
stopwords.update(content)
w = wordcloud.WordCloud(font_path="STSONG.TTF",
width=1000,
height=700,
background_color="white",
stopwords=stopwords,
collocations=False
)
ls = jieba.lcut(t)
t = " ".join(ls)
w.generate(t)
w.to_file("123.png")
plt.imshow(w, interpolation='bilinear')
'''
纯中文词频计数
'''
def Chineseword(file):
txt = open(file, "r", encoding='utf-8').read()
counts = {} # 通过键值对的形式存储词语及其出现的次数
for ch in " ,。:;,《》!?“\' ''\n'":
txt = txt.replace(ch, "") # 将文本中特殊字符替换为空格
words = jieba.lcut(txt) # 使用精确模式对文本进行分词
for word in words:
if (len(word) == 1):
continue
else:
counts[word] = counts.get(word, 0) + 1 # 遍历所有词语,每出现一次其对应的值加 1
items = list(counts.items())
items.sort(key=lambda x: x[1], reverse=True) # 根据词语出现的次数进行从大到小排序
for i in range(len(items)):
print(items[i])
'''
纯英文词频计数
'''
def Englishword(file):
fb = open(file, 'r', encoding="utf-8")
wordfile = {}
for line in fb:
line = line.lower()
sword = line.strip().split()
for word in sword:
if word in wordfile:
wordfile[word] += 1
else:
wordfile[word] = 1
wordfrehigh = []
for wd, fy in wordfile.items():
wordfrehigh.append((fy, wd))
wordfrehigh.sort(reverse=True)
for wd in wordfrehigh:
print(wd)
fb.close()
'''
中英混合词频计数
'''
def English_and_Chinese(file):
fb = open(file, 'r', encoding="utf-8")
t = fb.read()
ls = jieba.lcut(t)
t = " ".join(ls)
t = t.lower()
for ch in ",。?:;’“!——、~,《》.--?;:'\"!~' ''\n'":
t = t.replace(ch, " ")
t = t.split(" ")
wordfile = {}
for line in t:
sword = line.split()
for word in sword:
if word in wordfile:
wordfile[word] += 1
else:
wordfile[word] = 1
wordfrehigh = []
for wd, fy in wordfile.items():
wordfrehigh.append((fy, wd))
wordfrehigh.sort(reverse=True)
for wd in wordfrehigh:
print(wd)
fb.close()
English_and_Chinese("file.txt")
word_cloud_English_and_Chinese("file.txt")

@ -0,0 +1,54 @@
# -*- coding=utf-8 -*-
'''
from file_read import result
from file_read import chuli
from fruit import word_cloud_English
from fruit import word_cloud_Chinese
from fruit import word_cloud_English_and_Chinese
from fruit import Chineseword
from fruit import Englishword
from fruit import English_and_Chinese
'''
if __name__=='__main__':
print("是否已将文本导入指定文件夹")
a=input()
while True:
if a == '':
from file_read import result
result()#如果确认已将文件移入指定文件夹调用file_read中的result()函数,输出文件夹中的文件名称
print('你想选择哪一个文件进行分词:')
b=int(input())
#对选择的文件进行判断,是纯中文还是纯英文
from file_read import getf
name=getf(b-1)
print(name)
from file_read import chuli
dd=chuli(b-1)
print(dd)
if dd=='y':
from fruit import word_cloud_English
from fruit import Englishword
word_cloud_English(name)
Englishword(name)
elif dd=='z':
from fruit import word_cloud_Chinese
from fruit import Chineseword
word_cloud_Chinese(name)
Chineseword(name)
else:
from fruit import word_cloud_English_and_Chinese
from fruit import English_and_Chinese
word_cloud_English_and_Chinese(name)
English_and_Chinese(name)
else:
print("请将文本导入指定文件夹")
print('是否退出:')
answer=input()
if answer=='':
break

@ -0,0 +1,7 @@
Are these people so mean and powerful? Maybe it&#39;s because they showed the most humble smile in front of them three years ago, so now they want to get it back.&quot; With a bitter smile, Xiao Yan turned around in a lonely way and quietly returned to the team In the last row, a lonely figure, somewhat out of tune with the world around him. &quot;Next, Xiao Mei&quot; Hearing the tester&#39;s shout, a girl quickly ran out of the crowd. The girl had just appeared, and the nearby The voice of discussion was much smaller, and a pair of slightly fiery eyes firmly locked on the girl&#39;s cheeks. The girl was only about fourteen years old. Although it was not stunning, her childish little face was full of meaning. With a touch of charm, pure and charming, and contradictory, she has successfully become the focus of the audience.
The girl stepped forward quickly, touching the dark magic stone tablet with small hands,
and then slowly closed her eyes.
Are these people so mean and powerful? Maybe it&#39;s because they showed the most humble smile in front of them three years ago, so now they want to get it back.&quot; With a bitter smile, Xiao Yan turned around in a lonely way and quietly returned to the team In the last row, a lonely figure, somewhat out of tune with the world around him. &quot;Next, Xiao Mei&quot; Hearing the tester&#39;s shout, a girl quickly ran out of the crowd. The girl had just appeared, and the nearby The voice of discussion was much smaller, and a pair of slightly fiery eyes firmly locked on the girl&#39;s cheeks. The girl was only about fourteen years old. Although it was not stunning, her childish little face was full of meaning. With a touch of charm, pure and charming, and contradictory, she has successfully become the focus of the audience.
The girl stepped forward quickly, touching the dark magic stone tablet with small hands,
and then slowly closed her eyes.
萨达萨达是发生的故事大概十点多擦拭发我
Loading…
Cancel
Save