|
|
|
@ -1,22 +1,30 @@
|
|
|
|
|
from cppy.cp_util import *
|
|
|
|
|
from collections import Counter
|
|
|
|
|
from cppy.cp_util import *
|
|
|
|
|
import re
|
|
|
|
|
|
|
|
|
|
class Pipe:
|
|
|
|
|
def __init__(self, func, *args, kwargs=None):
|
|
|
|
|
def __init__(self, func, *args, **kwargs):
|
|
|
|
|
# print( self, func, *args, **kwargs )
|
|
|
|
|
self.func = func
|
|
|
|
|
# self.args, self.kwargs= None ,None
|
|
|
|
|
if args : self.args = args
|
|
|
|
|
if kwargs: self.kwargs = kwargs
|
|
|
|
|
# print( self.args, self.kwargs)
|
|
|
|
|
self.args = args
|
|
|
|
|
self.kwargs = kwargs
|
|
|
|
|
|
|
|
|
|
def __or__(self, other):
|
|
|
|
|
return other(self._value)
|
|
|
|
|
|
|
|
|
|
def __call__(self, data):
|
|
|
|
|
self._value = self.func(data, *self.args, self.kwargs)
|
|
|
|
|
return Pipe(lambda x: self.func(x) or other.func(x))
|
|
|
|
|
# print(self.func.__name__, other.func.__name__ )
|
|
|
|
|
def composed_func():
|
|
|
|
|
print( other.func(self.func(self.args, self.kwargs), other.args, other.kwargs) )
|
|
|
|
|
return other.func(self.func(self.args, self.kwargs), other.args, other.kwargs)
|
|
|
|
|
return Pipe(composed_func)
|
|
|
|
|
|
|
|
|
|
# def __call__(self, *args, **kwargs):
|
|
|
|
|
# print( *args, **kwargs )
|
|
|
|
|
# return self.func(*args, **kwargs)
|
|
|
|
|
def __call__(self, data):
|
|
|
|
|
return self.func(data)
|
|
|
|
|
|
|
|
|
|
def read_file(filename):
|
|
|
|
|
with open(filename, 'r') as f:
|
|
|
|
|
with open(filename, 'r',encoding='utf-8') as f:
|
|
|
|
|
return f.read()
|
|
|
|
|
|
|
|
|
|
def split_words(text):
|
|
|
|
@ -28,8 +36,7 @@ def count_words(words):
|
|
|
|
|
def top_n_words(word_counts, n):
|
|
|
|
|
return word_counts.most_common(n)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# 使用管道
|
|
|
|
|
pipe = Pipe(extract_file_words) | Pipe(get_frequencies) | Pipe(sort_dict) | Pipe(print_word_freqs, 5)
|
|
|
|
|
pipe = Pipe(read_file) | Pipe(split_words) | Pipe(count_words) | Pipe(top_n_words, 10)
|
|
|
|
|
result = pipe(testfilepath)
|
|
|
|
|
print(result)
|
|
|
|
|
print(result)
|