You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
38 lines
1.1 KiB
38 lines
1.1 KiB
import os
|
|
import sys
|
|
import pandas as pd
|
|
import json
|
|
import time
|
|
from tqdm import tqdm
|
|
|
|
class HandleNaN(object):
|
|
|
|
def __init__(self):
|
|
const_path = sys.path[0].replace("\\clean_data", "")
|
|
f = open(const_path + "\\const.json", "r", encoding="utf8")
|
|
self.consts = json.loads(f.read())
|
|
|
|
self.dayline_file_prefix = self.consts["day_line_file_prefix"]['netease']
|
|
|
|
def handle_one(self,code):
|
|
try:
|
|
df = pd.read_csv("%s%s.csv" % (self.dayline_file_prefix, code), encoding="gbk")
|
|
except:
|
|
print("error")
|
|
return
|
|
df = df[df['日期'] > "20140101"]
|
|
return df.isna().any().any()
|
|
|
|
def handle_all(self):
|
|
time_start = time.time()
|
|
file_list = os.listdir(self.dayline_file_prefix)
|
|
for i in tqdm(range(len(file_list))):
|
|
if self.handle_one(file_list[i][0:6]): print(file_list[i])
|
|
time_end = time.time()
|
|
time_c= time_end - time_start
|
|
print("Finished! Take %s Seconds" % time_c)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
hn = HandleNaN()
|
|
hn.handle_all() |