import os import shutil from time import time import numpy as np import SimpleITK as sitk import nibabel as nib import scipy.ndimage as ndimage import h5py splits = ['train', 'test'] # 这里根据你的截图路径进行了修改 # 请确认 data/synapse/Abdomen/RawData/TrainSet 下面是否有 img 和 label 文件夹 # 如果没有,而是所有文件都在 TrainSet 根目录下,你需要手动把图片放入 img,标签放入 label for split in splits: if(split == 'train'): ct_path = './data/synapse/Abdomen/RawData/TrainSet/img' seg_path = './data/synapse/Abdomen/RawData/TrainSet/label' save_path = './data/synapse/train_npz/' # 修改为标准输出路径 else: ct_path = './data/synapse/Abdomen/RawData/TestSet/img' seg_path = './data/synapse/Abdomen/RawData/TestSet/label' save_path = './data/synapse/test_vol_h5/' # 修改为标准输出路径 if os.path.exists(save_path) is False: os.makedirs(save_path) # 使用 makedirs 以防父目录不存在 upper = 275 lower = -125 start_time = time() if not os.path.exists(ct_path): print(f"Error: 找不到路径 {ct_path},请检查你的文件夹结构是否包含 img 子文件夹") continue for ct_file in os.listdir(ct_path): # 过滤掉非 nii 文件 if not ct_file.endswith('.nii.gz'): continue ct = nib.load(os.path.join(ct_path, ct_file)) seg = nib.load(os.path.join(seg_path, ct_file.replace('img', 'label'))) #Convert them to numpy format, ct_array = ct.get_fdata() seg_array = seg.get_fdata() ct_array = np.clip(ct_array, lower, upper) #normalize each 3D image to [0, 1] ct_array = (ct_array - lower) / (upper - lower) ct_array = np.transpose(ct_array, (2, 0, 1)) seg_array = np.transpose(seg_array, (2, 0, 1)) print('Processing:', ct_file, 'Shape:', ct_array.shape) ct_number = ct_file.split('.')[0] if(split == 'test'): new_ct_name = ct_number.replace('img', 'case')+'.npy.h5' hf = h5py.File(os.path.join(save_path, new_ct_name), 'w') hf.create_dataset('image', data=ct_array) hf.create_dataset('label', data=seg_array) hf.close() continue for s_idx in range(ct_array.shape[0]): ct_array_s = ct_array[s_idx, :, :] seg_array_s = seg_array[s_idx, :, :] # 过滤掉全黑的切片,节省空间(可选,这里保持原逻辑) slice_no = "{:03d}".format(s_idx) new_ct_name = ct_number.replace('img', 'case') + '_slice' + slice_no np.savez(os.path.join(save_path, new_ct_name), image=ct_array_s, label=seg_array_s) print('Already used {:.3f} min'.format((time() - start_time) / 60)) print('-----------')