From fbc2cd3ad42b96d8da2b4cf871ea03bcc896a67a Mon Sep 17 00:00:00 2001 From: pfargwc8q <1547953171@qq.com> Date: Tue, 30 Dec 2025 14:21:50 +0800 Subject: [PATCH] ADD file via upload --- utils/preprocess_synapse_data.py | 78 ++++++++++++++++++++++++++++++++ 1 file changed, 78 insertions(+) create mode 100644 utils/preprocess_synapse_data.py diff --git a/utils/preprocess_synapse_data.py b/utils/preprocess_synapse_data.py new file mode 100644 index 0000000..376f318 --- /dev/null +++ b/utils/preprocess_synapse_data.py @@ -0,0 +1,78 @@ +import os +import shutil +from time import time + +import numpy as np +import SimpleITK as sitk +import nibabel as nib +import scipy.ndimage as ndimage +import h5py + +splits = ['train', 'test'] + +# 这里根据你的截图路径进行了修改 +# 请确认 data/synapse/Abdomen/RawData/TrainSet 下面是否有 img 和 label 文件夹 +# 如果没有,而是所有文件都在 TrainSet 根目录下,你需要手动把图片放入 img,标签放入 label +for split in splits: + if(split == 'train'): + ct_path = './data/synapse/Abdomen/RawData/TrainSet/img' + seg_path = './data/synapse/Abdomen/RawData/TrainSet/label' + save_path = './data/synapse/train_npz/' # 修改为标准输出路径 + else: + ct_path = './data/synapse/Abdomen/RawData/TestSet/img' + seg_path = './data/synapse/Abdomen/RawData/TestSet/label' + save_path = './data/synapse/test_vol_h5/' # 修改为标准输出路径 + + if os.path.exists(save_path) is False: + os.makedirs(save_path) # 使用 makedirs 以防父目录不存在 + + upper = 275 + lower = -125 + + start_time = time() + + if not os.path.exists(ct_path): + print(f"Error: 找不到路径 {ct_path},请检查你的文件夹结构是否包含 img 子文件夹") + continue + + for ct_file in os.listdir(ct_path): + # 过滤掉非 nii 文件 + if not ct_file.endswith('.nii.gz'): + continue + + ct = nib.load(os.path.join(ct_path, ct_file)) + seg = nib.load(os.path.join(seg_path, ct_file.replace('img', 'label'))) + + #Convert them to numpy format, + ct_array = ct.get_fdata() + seg_array = seg.get_fdata() + + ct_array = np.clip(ct_array, lower, upper) + + #normalize each 3D image to [0, 1] + ct_array = (ct_array - lower) / (upper - lower) + + ct_array = np.transpose(ct_array, (2, 0, 1)) + seg_array = np.transpose(seg_array, (2, 0, 1)) + + print('Processing:', ct_file, 'Shape:', ct_array.shape) + + ct_number = ct_file.split('.')[0] + if(split == 'test'): + new_ct_name = ct_number.replace('img', 'case')+'.npy.h5' + hf = h5py.File(os.path.join(save_path, new_ct_name), 'w') + hf.create_dataset('image', data=ct_array) + hf.create_dataset('label', data=seg_array) + hf.close() + continue + + for s_idx in range(ct_array.shape[0]): + ct_array_s = ct_array[s_idx, :, :] + seg_array_s = seg_array[s_idx, :, :] + # 过滤掉全黑的切片,节省空间(可选,这里保持原逻辑) + slice_no = "{:03d}".format(s_idx) + new_ct_name = ct_number.replace('img', 'case') + '_slice' + slice_no + np.savez(os.path.join(save_path, new_ct_name), image=ct_array_s, label=seg_array_s) + + print('Already used {:.3f} min'.format((time() - start_time) / 60)) + print('-----------') \ No newline at end of file