|
|
import os
|
|
|
import shutil
|
|
|
from time import time
|
|
|
|
|
|
import numpy as np
|
|
|
import SimpleITK as sitk
|
|
|
import nibabel as nib
|
|
|
import scipy.ndimage as ndimage
|
|
|
import h5py
|
|
|
|
|
|
splits = ['train', 'test']
|
|
|
|
|
|
# 这里根据你的截图路径进行了修改
|
|
|
# 请确认 data/synapse/Abdomen/RawData/TrainSet 下面是否有 img 和 label 文件夹
|
|
|
# 如果没有,而是所有文件都在 TrainSet 根目录下,你需要手动把图片放入 img,标签放入 label
|
|
|
for split in splits:
|
|
|
if(split == 'train'):
|
|
|
ct_path = './data/synapse/Abdomen/RawData/TrainSet/img'
|
|
|
seg_path = './data/synapse/Abdomen/RawData/TrainSet/label'
|
|
|
save_path = './data/synapse/train_npz/' # 修改为标准输出路径
|
|
|
else:
|
|
|
ct_path = './data/synapse/Abdomen/RawData/TestSet/img'
|
|
|
seg_path = './data/synapse/Abdomen/RawData/TestSet/label'
|
|
|
save_path = './data/synapse/test_vol_h5/' # 修改为标准输出路径
|
|
|
|
|
|
if os.path.exists(save_path) is False:
|
|
|
os.makedirs(save_path) # 使用 makedirs 以防父目录不存在
|
|
|
|
|
|
upper = 275
|
|
|
lower = -125
|
|
|
|
|
|
start_time = time()
|
|
|
|
|
|
if not os.path.exists(ct_path):
|
|
|
print(f"Error: 找不到路径 {ct_path},请检查你的文件夹结构是否包含 img 子文件夹")
|
|
|
continue
|
|
|
|
|
|
for ct_file in os.listdir(ct_path):
|
|
|
# 过滤掉非 nii 文件
|
|
|
if not ct_file.endswith('.nii.gz'):
|
|
|
continue
|
|
|
|
|
|
ct = nib.load(os.path.join(ct_path, ct_file))
|
|
|
seg = nib.load(os.path.join(seg_path, ct_file.replace('img', 'label')))
|
|
|
|
|
|
#Convert them to numpy format,
|
|
|
ct_array = ct.get_fdata()
|
|
|
seg_array = seg.get_fdata()
|
|
|
|
|
|
ct_array = np.clip(ct_array, lower, upper)
|
|
|
|
|
|
#normalize each 3D image to [0, 1]
|
|
|
ct_array = (ct_array - lower) / (upper - lower)
|
|
|
|
|
|
ct_array = np.transpose(ct_array, (2, 0, 1))
|
|
|
seg_array = np.transpose(seg_array, (2, 0, 1))
|
|
|
|
|
|
print('Processing:', ct_file, 'Shape:', ct_array.shape)
|
|
|
|
|
|
ct_number = ct_file.split('.')[0]
|
|
|
if(split == 'test'):
|
|
|
new_ct_name = ct_number.replace('img', 'case')+'.npy.h5'
|
|
|
hf = h5py.File(os.path.join(save_path, new_ct_name), 'w')
|
|
|
hf.create_dataset('image', data=ct_array)
|
|
|
hf.create_dataset('label', data=seg_array)
|
|
|
hf.close()
|
|
|
continue
|
|
|
|
|
|
for s_idx in range(ct_array.shape[0]):
|
|
|
ct_array_s = ct_array[s_idx, :, :]
|
|
|
seg_array_s = seg_array[s_idx, :, :]
|
|
|
# 过滤掉全黑的切片,节省空间(可选,这里保持原逻辑)
|
|
|
slice_no = "{:03d}".format(s_idx)
|
|
|
new_ct_name = ct_number.replace('img', 'case') + '_slice' + slice_no
|
|
|
np.savez(os.path.join(save_path, new_ct_name), image=ct_array_s, label=seg_array_s)
|
|
|
|
|
|
print('Already used {:.3f} min'.format((time() - start_time) / 60))
|
|
|
print('-----------') |