pull/1/head
			
			
		
		
							parent
							
								
									2bfeabe429
								
							
						
					
					
						commit
						6edef230ac
					
				| @ -0,0 +1,70 @@ | |||||||
|  | 
 | ||||||
|  | import site | ||||||
|  | import os,re | ||||||
|  | import string,operator | ||||||
|  | 
 | ||||||
|  | ################################################################################ | ||||||
|  | #  变量 | ||||||
|  | ################################################################################ | ||||||
|  | testfilename = 'test.txt' | ||||||
|  | testfilename = 'pride-and-prejudice.txt' | ||||||
|  | testfilename = 'Prey.txt' | ||||||
|  | 
 | ||||||
|  | site_packages = site.getsitepackages() | ||||||
|  | for package in site_packages: | ||||||
|  |     if 'package' in  package: | ||||||
|  |         basePath = package | ||||||
|  | stopwordfilepath = os.path.join(basePath, 'cppy','data','stop_words.txt') | ||||||
|  | testfilepath = os.path.join(basePath, 'cppy','data',testfilename ) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | ################################################################################ | ||||||
|  | #  函数 | ||||||
|  | ################################################################################ | ||||||
|  | def read_file(path_to_file):     | ||||||
|  |     with open(path_to_file,encoding='utf-8') as f: | ||||||
|  |         data = f.read() | ||||||
|  |     return data | ||||||
|  | 
 | ||||||
|  | def re_split( data ): | ||||||
|  |     pattern = re.compile('[\W_]+') | ||||||
|  |     data = pattern.sub(' ', data).lower() | ||||||
|  |     return data.split() | ||||||
|  | 
 | ||||||
|  | def get_stopwords( path_to_file = stopwordfilepath ): | ||||||
|  |     with open(path_to_file,encoding='utf-8') as f: | ||||||
|  |         data = f.read().split(',')         | ||||||
|  |     data.extend(list(string.ascii_lowercase)) | ||||||
|  |     return data | ||||||
|  | 
 | ||||||
|  | def extract_file_words(path_to_file): | ||||||
|  |     word_list = re_split( read_file(path_to_file) ) | ||||||
|  |     stop_words = get_stopwords() | ||||||
|  |     return [ w for w in word_list if ( not w in stop_words ) and len(w) >= 3 ] | ||||||
|  | 
 | ||||||
|  | def extract_str_words(data_str): | ||||||
|  |     word_list = re_split( data_str ) | ||||||
|  |     stop_words = get_stopwords() | ||||||
|  |     return [ w for w in word_list if ( not w in stop_words ) and len(w) >= 3 ] | ||||||
|  | 
 | ||||||
|  | def count_word(word, word_freqs, stopwords): | ||||||
|  |     if word not in stopwords: | ||||||
|  |         word_freqs[word] = word_freqs.get(word, 0) + 1 | ||||||
|  | 
 | ||||||
|  | def get_frequencies(word_list):     | ||||||
|  |     word_freqs = {}   | ||||||
|  |     for word in word_list:   | ||||||
|  |         word_freqs[word] = word_freqs.get(word, 0) + 1     | ||||||
|  |     return word_freqs | ||||||
|  | 
 | ||||||
|  | def sort_dict (word_freq): | ||||||
|  |     return sorted(word_freq.items(), key=operator.itemgetter(1), reverse=True) | ||||||
|  |     # return sorted( word_freq, key=lambda x: x[1], reverse=True ) | ||||||
|  | 
 | ||||||
|  | def print_word_freqs( word_freqs, n = 10): | ||||||
|  |     for (w, c) in word_freqs[ :n ]: | ||||||
|  |         print( w, '-', c ) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def  test(): | ||||||
|  |     print( 'cppy welcome' ) | ||||||
											
												
													File diff suppressed because it is too large
													Load Diff
												
											
										
									
								
											
												
													File diff suppressed because it is too large
													Load Diff
												
											
										
									
								| @ -0,0 +1 @@ | |||||||
|  | a,able,about,across,after,all,almost,also,am,among,an,and,any,are,as,at,be,because,been,but,by,can,cannot,could,dear,did,do,does,either,else,ever,every,for,from,get,got,had,has,have,he,her,hers,him,his,how,however,i,if,in,into,is,it,its,just,least,let,like,likely,may,me,might,most,must,my,neither,no,nor,not,of,off,often,on,only,or,other,our,own,rather,said,say,says,she,should,since,so,some,than,that,the,their,them,then,there,these,they,this,tis,to,too,twas,us,wants,was,we,were,what,when,where,which,while,who,whom,why,will,with,would,yet,you,your | ||||||
| @ -0,0 +1,2 @@ | |||||||
|  | "Some acquaintance or other, my dear, I suppose; I am sure I do not | ||||||
|  | know." | ||||||
					Loading…
					
					
				
		Reference in new issue