mirror of https://gitee.com/hikerxiang/ciyun
master
parent
ae85c54a75
commit
6d8cdefd28
@ -0,0 +1,161 @@
|
||||
import glob
|
||||
import os
|
||||
import jieba
|
||||
import wordcloud
|
||||
from wordcloud import STOPWORDS
|
||||
from matplotlib import pyplot as plt
|
||||
|
||||
|
||||
'''
|
||||
纯中文词云
|
||||
'''
|
||||
|
||||
def word_cloud_Chinese(file):
|
||||
fb = open(file, 'r', encoding="utf-8")
|
||||
t = fb.read()
|
||||
fb.close()
|
||||
|
||||
stopwords = set()
|
||||
content = [line.strip() for line in open('cn_stopwords.txt', 'r',encoding="utf-8").readlines()]
|
||||
stopwords.update(content)
|
||||
|
||||
ls = jieba.lcut(t)
|
||||
txt = " ".join(ls)
|
||||
w = wordcloud.WordCloud(font_path="STSONG.TTF",
|
||||
width=700,
|
||||
height=700,
|
||||
background_color="white",
|
||||
stopwords = stopwords)
|
||||
w.generate(txt)
|
||||
w.to_file("123.png")
|
||||
|
||||
plt.imshow(w,interpolation='bilinear')
|
||||
plt.axis('off')
|
||||
plt.tight_layout()
|
||||
plt.show()
|
||||
|
||||
'''
|
||||
纯英文词云
|
||||
'''
|
||||
def word_cloud_English(file):
|
||||
fb = open(file, 'r', encoding="utf-8")
|
||||
t = fb.read()
|
||||
fb.close()
|
||||
w = wordcloud.WordCloud(font_path="arial.ttf",
|
||||
width=1000,
|
||||
height=700,
|
||||
background_color="white",
|
||||
stopwords=STOPWORDS)
|
||||
w.generate(t)
|
||||
w.to_file("123.png")
|
||||
|
||||
|
||||
plt.imshow(w,interpolation='bilinear')
|
||||
plt.axis('off')
|
||||
plt.tight_layout()
|
||||
plt.show()
|
||||
|
||||
|
||||
'''
|
||||
中英混合词云
|
||||
'''
|
||||
def word_cloud_English_and_Chinese(file):
|
||||
fb = open(file, 'r', encoding="utf-8")
|
||||
t = fb.read()
|
||||
fb.close()
|
||||
stopwords = set()
|
||||
content = [line.strip() for line in open('cn_stopwords.txt', 'r', encoding="utf-8").readlines()]
|
||||
stopwords.update(content)
|
||||
w = wordcloud.WordCloud(font_path="STSONG.TTF",
|
||||
width=1000,
|
||||
height=700,
|
||||
background_color="white",
|
||||
stopwords=stopwords,
|
||||
collocations = False
|
||||
)
|
||||
ls = jieba.lcut(t)
|
||||
t = " ".join(ls)
|
||||
w.generate(t)
|
||||
w.to_file("123.png")
|
||||
|
||||
|
||||
plt.imshow(w,interpolation='bilinear')
|
||||
|
||||
|
||||
'''
|
||||
纯中文词频计数
|
||||
'''
|
||||
def Chineseword(file):
|
||||
txt = open(file, "r", encoding='utf-8').read()
|
||||
counts = {} # 通过键值对的形式存储词语及其出现的次数
|
||||
for ch in " ,。:;,《》!?“\”' ''\n'":
|
||||
txt = txt.replace(ch, "") # 将文本中特殊字符替换为空格
|
||||
words = jieba.lcut(txt) # 使用精确模式对文本进行分词
|
||||
|
||||
for word in words:
|
||||
if(len(word)==1):
|
||||
continue
|
||||
else:
|
||||
counts[word] = counts.get(word, 0) + 1 # 遍历所有词语,每出现一次其对应的值加 1
|
||||
|
||||
items = list(counts.items())
|
||||
items.sort(key=lambda x: x[1], reverse=True) # 根据词语出现的次数进行从大到小排序
|
||||
for i in range(len(items)):
|
||||
print(items[i])
|
||||
|
||||
|
||||
'''
|
||||
纯英文词频计数
|
||||
'''
|
||||
def Englishword(file):
|
||||
fb = open(file, 'r', encoding="utf-8")
|
||||
wordfile = {}
|
||||
for line in fb:
|
||||
line=line.lower()
|
||||
sword = line.strip().split()
|
||||
for word in sword:
|
||||
if word in wordfile:
|
||||
wordfile[word] += 1
|
||||
else:
|
||||
wordfile[word] = 1
|
||||
wordfrehigh = []
|
||||
for wd, fy in wordfile.items():
|
||||
wordfrehigh.append((fy,wd))
|
||||
wordfrehigh.sort(reverse=True)
|
||||
for wd in wordfrehigh:
|
||||
print(wd)
|
||||
fb.close()
|
||||
|
||||
|
||||
|
||||
'''
|
||||
中英混合词频计数
|
||||
'''
|
||||
def English_and_Chinese(file):
|
||||
fb = open(file, 'r', encoding="utf-8")
|
||||
t = fb.read()
|
||||
ls = jieba.lcut(t)
|
||||
t = " ".join(ls)
|
||||
t=t.lower()
|
||||
for ch in ",。?:;’“!——、~,《》.--?;:'\"!~' ''\n'":
|
||||
t = t.replace(ch, " ")
|
||||
t=t.split(" ")
|
||||
|
||||
wordfile = {}
|
||||
for line in t:
|
||||
sword = line.split()
|
||||
for word in sword:
|
||||
if word in wordfile:
|
||||
wordfile[word] += 1
|
||||
else:
|
||||
wordfile[word] = 1
|
||||
wordfrehigh = []
|
||||
for wd, fy in wordfile.items():
|
||||
wordfrehigh.append((fy,wd))
|
||||
wordfrehigh.sort(reverse=True)
|
||||
for wd in wordfrehigh:
|
||||
print(wd)
|
||||
fb.close()
|
||||
|
||||
English_and_Chinese("file.txt")
|
||||
word_cloud_English_and_Chinese("file.txt")
|
@ -0,0 +1,4 @@
|
||||
Are these people so mean and powerful? Maybe it's because they showed the most humble smile in front of them three years ago, so now they want to get it back." With a bitter smile, Xiao Yan turned around in a lonely way and quietly returned to the team In the last row, a lonely figure, somewhat out of tune with the world around him. "Next, Xiao Mei" Hearing the tester's shout, a girl quickly ran out of the crowd. The girl had just appeared, and the nearby The voice of discussion was much smaller, and a pair of slightly fiery eyes firmly locked on the girl's cheeks. The girl was only about fourteen years old. Although it was not stunning, her childish little face was full of meaning. With a touch of charm, pure and charming, and contradictory, she has successfully become the focus of the audience.
|
||||
The girl stepped forward quickly, touching the dark magic stone tablet with small hands,
|
||||
and then slowly closed her eyes.
|
||||
萨达萨达是发生的故事大概十点多擦拭发我
|
Binary file not shown.
@ -0,0 +1,91 @@
|
||||
# -*- coding=utf-8 -*-
|
||||
import os
|
||||
|
||||
|
||||
# 检验是否全是中文字符
|
||||
def is_all_chinese(strs):
|
||||
for _char in strs:
|
||||
if not '\u4e00' <= _char <= '\u9fa5':
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
# 检验是否包含中文字符
|
||||
def is_chinese(strs):
|
||||
for ch in strs:
|
||||
if u'\u4e00' <= ch <= u'\u9fff':
|
||||
return True
|
||||
return False
|
||||
|
||||
#
|
||||
def result():
|
||||
path = "D:/Hiker/Ku/Python_ku/Python_ku_one/file" # 文件夹目录,运行前记得修改!!!!!!!!!!!!
|
||||
files = os.listdir(path) # 得到文件夹下的所有文件名称
|
||||
print(files[0])
|
||||
|
||||
txts = []
|
||||
i = 1
|
||||
|
||||
for file in files: # 遍历文件夹
|
||||
position = path + '\\' + file # 构造绝对路径,"\\",其中一个'\'为转义符
|
||||
print(i, '--- ', end='')
|
||||
print(file, end='')
|
||||
i = i + 1
|
||||
with open(position, "r", encoding='utf-8') as f: # 打开文件
|
||||
data = f.read() # 读取文件
|
||||
if (is_all_chinese(data)):
|
||||
print(" (纯中文)")
|
||||
|
||||
|
||||
|
||||
else:
|
||||
if (is_chinese(data)):
|
||||
print(" (有英文有中文)")
|
||||
|
||||
else:
|
||||
print(" (纯英文)")
|
||||
|
||||
def getf(b):
|
||||
path = "D:/Hiker/Ku/Python_ku/Python_ku_one/file" # 文件夹目录,运行前记得修改!!!!!!!!!!!!
|
||||
files = os.listdir(path) # 得到文件夹下的所有文件名称
|
||||
|
||||
return files[b]
|
||||
|
||||
|
||||
def chuli(b):
|
||||
path = "D:/Hiker/Ku/Python_ku/Python_ku_one/file" # 文件夹目录,运行前记得修改!!!!!!!!!!!!
|
||||
files = os.listdir(path) # 得到文件夹下的所有文件名称
|
||||
|
||||
|
||||
txts = []
|
||||
i = 1
|
||||
|
||||
position = path + '\\' + files[b] # 构造绝对路径,"\\",其中一个'\'为转义符
|
||||
|
||||
|
||||
|
||||
with open(position, "r", encoding='utf-8') as f: # 打开文件
|
||||
data = f.read() # 读取文件
|
||||
if (is_all_chinese(data)):
|
||||
a='z'
|
||||
return a
|
||||
|
||||
|
||||
|
||||
|
||||
else:
|
||||
if (is_chinese(data)):
|
||||
|
||||
a='zy'
|
||||
return a
|
||||
|
||||
else:
|
||||
|
||||
a='y'
|
||||
return a
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
Binary file not shown.
@ -0,0 +1,168 @@
|
||||
import glob
|
||||
import os
|
||||
import jieba
|
||||
import wordcloud
|
||||
from wordcloud import STOPWORDS
|
||||
from matplotlib import pyplot as plt
|
||||
|
||||
'''
|
||||
纯中文词云
|
||||
'''
|
||||
def word_cloud_Chinese(file):
|
||||
fb = open(file, 'r', encoding="utf-8")
|
||||
t = fb.read()
|
||||
fb.close()
|
||||
|
||||
stopwords = set()
|
||||
content = [line.strip() for line in open('cn_stopwords.txt', 'r', encoding="utf-8").readlines()]
|
||||
stopwords.update(content)
|
||||
|
||||
ls = jieba.lcut(t)
|
||||
txt = " ".join(ls)
|
||||
w = wordcloud.WordCloud(font_path="STSONG.TTF",
|
||||
width=700,
|
||||
height=700,
|
||||
background_color="white",
|
||||
stopwords=stopwords)
|
||||
w.generate(txt)
|
||||
w.to_file("123.png")
|
||||
|
||||
plt.imshow(w, interpolation='bilinear')
|
||||
plt.axis('off')
|
||||
plt.tight_layout()
|
||||
plt.show()
|
||||
|
||||
|
||||
'''
|
||||
纯英文词云
|
||||
'''
|
||||
|
||||
|
||||
def word_cloud_English(file):
|
||||
fb = open(file, 'r', encoding="utf-8")
|
||||
t = fb.read()
|
||||
fb.close()
|
||||
w = wordcloud.WordCloud(font_path="arial.ttf",
|
||||
width=1000,
|
||||
height=700,
|
||||
background_color="white",
|
||||
stopwords=STOPWORDS)
|
||||
w.generate(t)
|
||||
w.to_file("123.png")
|
||||
|
||||
plt.imshow(w, interpolation='bilinear')
|
||||
plt.axis('off')
|
||||
plt.tight_layout()
|
||||
plt.show()
|
||||
|
||||
|
||||
'''
|
||||
中英混合词云
|
||||
'''
|
||||
|
||||
|
||||
def word_cloud_English_and_Chinese(file):
|
||||
fb = open(file, 'r', encoding="utf-8")
|
||||
t = fb.read()
|
||||
fb.close()
|
||||
stopwords = set()
|
||||
content = [line.strip() for line in open('cn_stopwords.txt', 'r', encoding="utf-8").readlines()]
|
||||
stopwords.update(content)
|
||||
w = wordcloud.WordCloud(font_path="STSONG.TTF",
|
||||
width=1000,
|
||||
height=700,
|
||||
background_color="white",
|
||||
stopwords=stopwords,
|
||||
collocations=False
|
||||
)
|
||||
ls = jieba.lcut(t)
|
||||
t = " ".join(ls)
|
||||
w.generate(t)
|
||||
w.to_file("123.png")
|
||||
|
||||
plt.imshow(w, interpolation='bilinear')
|
||||
|
||||
|
||||
'''
|
||||
纯中文词频计数
|
||||
'''
|
||||
|
||||
|
||||
def Chineseword(file):
|
||||
txt = open(file, "r", encoding='utf-8').read()
|
||||
counts = {} # 通过键值对的形式存储词语及其出现的次数
|
||||
for ch in " ,。:;,《》!?“\”' ''\n'":
|
||||
txt = txt.replace(ch, "") # 将文本中特殊字符替换为空格
|
||||
words = jieba.lcut(txt) # 使用精确模式对文本进行分词
|
||||
|
||||
for word in words:
|
||||
if (len(word) == 1):
|
||||
continue
|
||||
else:
|
||||
counts[word] = counts.get(word, 0) + 1 # 遍历所有词语,每出现一次其对应的值加 1
|
||||
|
||||
items = list(counts.items())
|
||||
items.sort(key=lambda x: x[1], reverse=True) # 根据词语出现的次数进行从大到小排序
|
||||
for i in range(len(items)):
|
||||
print(items[i])
|
||||
|
||||
|
||||
'''
|
||||
纯英文词频计数
|
||||
'''
|
||||
|
||||
|
||||
def Englishword(file):
|
||||
fb = open(file, 'r', encoding="utf-8")
|
||||
wordfile = {}
|
||||
for line in fb:
|
||||
line = line.lower()
|
||||
sword = line.strip().split()
|
||||
for word in sword:
|
||||
if word in wordfile:
|
||||
wordfile[word] += 1
|
||||
else:
|
||||
wordfile[word] = 1
|
||||
wordfrehigh = []
|
||||
for wd, fy in wordfile.items():
|
||||
wordfrehigh.append((fy, wd))
|
||||
wordfrehigh.sort(reverse=True)
|
||||
for wd in wordfrehigh:
|
||||
print(wd)
|
||||
fb.close()
|
||||
|
||||
|
||||
'''
|
||||
中英混合词频计数
|
||||
'''
|
||||
|
||||
|
||||
def English_and_Chinese(file):
|
||||
fb = open(file, 'r', encoding="utf-8")
|
||||
t = fb.read()
|
||||
ls = jieba.lcut(t)
|
||||
t = " ".join(ls)
|
||||
t = t.lower()
|
||||
for ch in ",。?:;’“!——、~,《》.--?;:'\"!~' ''\n'":
|
||||
t = t.replace(ch, " ")
|
||||
t = t.split(" ")
|
||||
|
||||
wordfile = {}
|
||||
for line in t:
|
||||
sword = line.split()
|
||||
for word in sword:
|
||||
if word in wordfile:
|
||||
wordfile[word] += 1
|
||||
else:
|
||||
wordfile[word] = 1
|
||||
wordfrehigh = []
|
||||
for wd, fy in wordfile.items():
|
||||
wordfrehigh.append((fy, wd))
|
||||
wordfrehigh.sort(reverse=True)
|
||||
for wd in wordfrehigh:
|
||||
print(wd)
|
||||
fb.close()
|
||||
|
||||
|
||||
English_and_Chinese("file.txt")
|
||||
word_cloud_English_and_Chinese("file.txt")
|
@ -0,0 +1,7 @@
|
||||
Are these people so mean and powerful? Maybe it's because they showed the most humble smile in front of them three years ago, so now they want to get it back." With a bitter smile, Xiao Yan turned around in a lonely way and quietly returned to the team In the last row, a lonely figure, somewhat out of tune with the world around him. "Next, Xiao Mei" Hearing the tester's shout, a girl quickly ran out of the crowd. The girl had just appeared, and the nearby The voice of discussion was much smaller, and a pair of slightly fiery eyes firmly locked on the girl's cheeks. The girl was only about fourteen years old. Although it was not stunning, her childish little face was full of meaning. With a touch of charm, pure and charming, and contradictory, she has successfully become the focus of the audience.
|
||||
The girl stepped forward quickly, touching the dark magic stone tablet with small hands,
|
||||
and then slowly closed her eyes.
|
||||
Are these people so mean and powerful? Maybe it's because they showed the most humble smile in front of them three years ago, so now they want to get it back." With a bitter smile, Xiao Yan turned around in a lonely way and quietly returned to the team In the last row, a lonely figure, somewhat out of tune with the world around him. "Next, Xiao Mei" Hearing the tester's shout, a girl quickly ran out of the crowd. The girl had just appeared, and the nearby The voice of discussion was much smaller, and a pair of slightly fiery eyes firmly locked on the girl's cheeks. The girl was only about fourteen years old. Although it was not stunning, her childish little face was full of meaning. With a touch of charm, pure and charming, and contradictory, she has successfully become the focus of the audience.
|
||||
The girl stepped forward quickly, touching the dark magic stone tablet with small hands,
|
||||
and then slowly closed her eyes.
|
||||
萨达萨达是发生的故事大概十点多擦拭发我
|
Loading…
Reference in new issue