parent
cd8186dd68
commit
5099345721
@ -0,0 +1,30 @@
|
|||||||
|
from flask import Flask, render_template, request, redirect, url_for
|
||||||
|
from collections import Counter
|
||||||
|
from cppy.cp_util import *
|
||||||
|
import os
|
||||||
|
|
||||||
|
app = Flask(__name__)
|
||||||
|
|
||||||
|
@app.route('/', methods=['GET', 'POST'])
|
||||||
|
def index():
|
||||||
|
if request.method == 'POST':
|
||||||
|
# 获取上传的文件
|
||||||
|
file = request.files['file']
|
||||||
|
|
||||||
|
# 保存临时文件并读取内容
|
||||||
|
filename = os.path.join('/temp', file.filename)
|
||||||
|
file.save(filename)
|
||||||
|
|
||||||
|
# 计算词频
|
||||||
|
words = extract_file_words(filename)
|
||||||
|
word_counts = Counter(words)
|
||||||
|
|
||||||
|
# 删除临时文件
|
||||||
|
os.remove(filename)
|
||||||
|
|
||||||
|
return render_template('result.html', word_counts=word_counts.most_common())
|
||||||
|
|
||||||
|
return render_template('index.html')
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
app.run(debug=True)
|
@ -0,0 +1,14 @@
|
|||||||
|
<!DOCTYPE html>
|
||||||
|
<html lang="en">
|
||||||
|
<head>
|
||||||
|
<meta charset="UTF-8">
|
||||||
|
<title>Upload Text File</title>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<h1>Upload a Text File to Count Word Frequencies</h1>
|
||||||
|
<form action="/" method="post" enctype="multipart/form-data">
|
||||||
|
<input type="file" name="file">
|
||||||
|
<input type="submit" value="Submit">
|
||||||
|
</form>
|
||||||
|
</body>
|
||||||
|
</html>
|
@ -0,0 +1,16 @@
|
|||||||
|
<!DOCTYPE html>
|
||||||
|
<html lang="en">
|
||||||
|
<head>
|
||||||
|
<meta charset="UTF-8">
|
||||||
|
<title>Word Frequencies</title>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<h1>Top Word Frequencies:</h1>
|
||||||
|
<ul>
|
||||||
|
{% for word, count in word_counts %}
|
||||||
|
<li>{{ word }}: {{ count }}</li>
|
||||||
|
{% endfor %}
|
||||||
|
</ul>
|
||||||
|
<a href="{{ url_for('index') }}">Back to Upload</a>
|
||||||
|
</body>
|
||||||
|
</html>
|
@ -0,0 +1,25 @@
|
|||||||
|
import requests
|
||||||
|
from cppy.cp_util import *
|
||||||
|
|
||||||
|
def main():
|
||||||
|
# 读测试文件的内容
|
||||||
|
content = read_file()
|
||||||
|
|
||||||
|
# 抽词
|
||||||
|
tokenize_response = requests.post("http://localhost:7770/tokenize", json={"text": content})
|
||||||
|
words = tokenize_response.json()["words"]
|
||||||
|
|
||||||
|
# 计算词频
|
||||||
|
count_response = requests.post("http://localhost:7771/count", json={"words": words})
|
||||||
|
word_count = count_response.json()["word_count"]
|
||||||
|
|
||||||
|
# 排序
|
||||||
|
sort_response = requests.post("http://localhost:7772/sort", json={"word_count": word_count})
|
||||||
|
top_10_words = sort_response.json()["top_10_words"]
|
||||||
|
|
||||||
|
print("Top 10 words:")
|
||||||
|
print_word_freqs(top_10_words)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
@ -0,0 +1,14 @@
|
|||||||
|
from fastapi import FastAPI
|
||||||
|
from collections import Counter
|
||||||
|
from cppy.cp_util import *
|
||||||
|
import uvicorn
|
||||||
|
|
||||||
|
app = FastAPI()
|
||||||
|
|
||||||
|
@app.post("/count")
|
||||||
|
async def count(words_list: dict): # {"words": ["word1", "word2", ...]}
|
||||||
|
word_count = Counter(words_list["words"])
|
||||||
|
return {"word_count": dict(word_count)}
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
uvicorn.run(app, host="127.0.0.1", port= 7771)
|
@ -0,0 +1,13 @@
|
|||||||
|
from fastapi import FastAPI
|
||||||
|
import uvicorn
|
||||||
|
|
||||||
|
app = FastAPI()
|
||||||
|
|
||||||
|
@app.post("/sort")
|
||||||
|
async def sort(word_count_dict: dict):
|
||||||
|
sorted_word_count = sorted(word_count_dict["word_count"].items(), key=lambda x: x[1], reverse=True)
|
||||||
|
top_10_words = sorted_word_count[:10]
|
||||||
|
return {"top_10_words": top_10_words}
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
uvicorn.run(app, host="127.0.0.1", port= 7772)
|
@ -0,0 +1,13 @@
|
|||||||
|
from fastapi import FastAPI
|
||||||
|
from cppy.cp_util import *
|
||||||
|
import uvicorn
|
||||||
|
|
||||||
|
app = FastAPI()
|
||||||
|
|
||||||
|
@app.post("/tokenize")
|
||||||
|
async def tokenize(text: str):
|
||||||
|
words = extract_str_words(text)
|
||||||
|
return {"words": words}
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
uvicorn.run(app, host="127.0.0.1", port= 7770)
|
Loading…
Reference in new issue