You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

32 lines
837 B

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

import asyncio
import aiofiles
from collections import Counter
from cppy.cp_util import *
#
# 协程: 有点复杂; 读文件的Io还是太快的爬虫
#
async def read_file(file_path):
async with aiofiles.open(file_path, 'r', encoding='utf-8') as file:
content = await file.read()
return content
async def count_words(text):
words = extract_str_words(text.lower())
word_counts = Counter(words)
return word_counts
async def main():
wordfreqs = Counter()
files = [ testfilepath ] * 10
for thisfile in files:
text = await read_file( thisfile )
top_words = await count_words(text)
wordfreqs += top_words
for word, count in wordfreqs.most_common(10):
print(f"{word}: {count//10}") # 突出 Io 的提升价值
# 运行异步主函数
asyncio.run(main())