You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
EMCAD/utils/preprocess_synapse_data.py

78 lines
2.9 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

import os
import shutil
from time import time
import numpy as np
import SimpleITK as sitk
import nibabel as nib
import scipy.ndimage as ndimage
import h5py
splits = ['train', 'test']
# 这里根据你的截图路径进行了修改
# 请确认 data/synapse/Abdomen/RawData/TrainSet 下面是否有 img 和 label 文件夹
# 如果没有,而是所有文件都在 TrainSet 根目录下,你需要手动把图片放入 img标签放入 label
for split in splits:
if(split == 'train'):
ct_path = './data/synapse/Abdomen/RawData/TrainSet/img'
seg_path = './data/synapse/Abdomen/RawData/TrainSet/label'
save_path = './data/synapse/train_npz/' # 修改为标准输出路径
else:
ct_path = './data/synapse/Abdomen/RawData/TestSet/img'
seg_path = './data/synapse/Abdomen/RawData/TestSet/label'
save_path = './data/synapse/test_vol_h5/' # 修改为标准输出路径
if os.path.exists(save_path) is False:
os.makedirs(save_path) # 使用 makedirs 以防父目录不存在
upper = 275
lower = -125
start_time = time()
if not os.path.exists(ct_path):
print(f"Error: 找不到路径 {ct_path},请检查你的文件夹结构是否包含 img 子文件夹")
continue
for ct_file in os.listdir(ct_path):
# 过滤掉非 nii 文件
if not ct_file.endswith('.nii.gz'):
continue
ct = nib.load(os.path.join(ct_path, ct_file))
seg = nib.load(os.path.join(seg_path, ct_file.replace('img', 'label')))
#Convert them to numpy format,
ct_array = ct.get_fdata()
seg_array = seg.get_fdata()
ct_array = np.clip(ct_array, lower, upper)
#normalize each 3D image to [0, 1]
ct_array = (ct_array - lower) / (upper - lower)
ct_array = np.transpose(ct_array, (2, 0, 1))
seg_array = np.transpose(seg_array, (2, 0, 1))
print('Processing:', ct_file, 'Shape:', ct_array.shape)
ct_number = ct_file.split('.')[0]
if(split == 'test'):
new_ct_name = ct_number.replace('img', 'case')+'.npy.h5'
hf = h5py.File(os.path.join(save_path, new_ct_name), 'w')
hf.create_dataset('image', data=ct_array)
hf.create_dataset('label', data=seg_array)
hf.close()
continue
for s_idx in range(ct_array.shape[0]):
ct_array_s = ct_array[s_idx, :, :]
seg_array_s = seg_array[s_idx, :, :]
# 过滤掉全黑的切片,节省空间(可选,这里保持原逻辑)
slice_no = "{:03d}".format(s_idx)
new_ct_name = ct_number.replace('img', 'case') + '_slice' + slice_no
np.savez(os.path.join(save_path, new_ct_name), image=ct_array_s, label=seg_array_s)
print('Already used {:.3f} min'.format((time() - start_time) / 60))
print('-----------')