From fbc2cd3ad42b96d8da2b4cf871ea03bcc896a67a Mon Sep 17 00:00:00 2001
From: pfargwc8q <1547953171@qq.com>
Date: Tue, 30 Dec 2025 14:21:50 +0800
Subject: [PATCH] ADD file via upload

---
 utils/preprocess_synapse_data.py | 78 ++++++++++++++++++++++++++++++++
 1 file changed, 78 insertions(+)
 create mode 100644 utils/preprocess_synapse_data.py

diff --git a/utils/preprocess_synapse_data.py b/utils/preprocess_synapse_data.py
new file mode 100644
index 0000000..376f318
--- /dev/null
+++ b/utils/preprocess_synapse_data.py
@@ -0,0 +1,78 @@
+import os
+import shutil
+from time import time
+
+import numpy as np
+import SimpleITK as sitk
+import nibabel as nib
+import scipy.ndimage as ndimage
+import h5py
+
+splits = ['train', 'test']
+
+# 这里根据你的截图路径进行了修改
+# 请确认 data/synapse/Abdomen/RawData/TrainSet 下面是否有 img 和 label 文件夹
+# 如果没有，而是所有文件都在 TrainSet 根目录下，你需要手动把图片放入 img，标签放入 label
+for split in splits:
+    if(split == 'train'):
+        ct_path = './data/synapse/Abdomen/RawData/TrainSet/img' 
+        seg_path = './data/synapse/Abdomen/RawData/TrainSet/label' 
+        save_path = './data/synapse/train_npz/' # 修改为标准输出路径
+    else:
+        ct_path = './data/synapse/Abdomen/RawData/TestSet/img' 
+        seg_path = './data/synapse/Abdomen/RawData/TestSet/label'
+        save_path = './data/synapse/test_vol_h5/' # 修改为标准输出路径
+    
+    if os.path.exists(save_path) is False:
+        os.makedirs(save_path) # 使用 makedirs 以防父目录不存在
+
+    upper = 275 
+    lower = -125
+
+    start_time = time()
+
+    if not os.path.exists(ct_path):
+        print(f"Error: 找不到路径 {ct_path}，请检查你的文件夹结构是否包含 img 子文件夹")
+        continue
+
+    for ct_file in os.listdir(ct_path):
+        # 过滤掉非 nii 文件
+        if not ct_file.endswith('.nii.gz'):
+            continue
+
+        ct = nib.load(os.path.join(ct_path, ct_file))
+        seg = nib.load(os.path.join(seg_path, ct_file.replace('img', 'label')))
+
+        #Convert them to numpy format, 
+        ct_array = ct.get_fdata()
+        seg_array = seg.get_fdata()
+
+        ct_array = np.clip(ct_array, lower, upper)
+    
+        #normalize each 3D image to [0, 1] 
+        ct_array = (ct_array - lower) / (upper - lower)
+    
+        ct_array = np.transpose(ct_array, (2, 0, 1))
+        seg_array = np.transpose(seg_array, (2, 0, 1))
+    
+        print('Processing:', ct_file, 'Shape:', ct_array.shape)
+
+        ct_number = ct_file.split('.')[0]
+        if(split == 'test'):
+            new_ct_name = ct_number.replace('img', 'case')+'.npy.h5'
+            hf = h5py.File(os.path.join(save_path, new_ct_name), 'w')
+            hf.create_dataset('image', data=ct_array)
+            hf.create_dataset('label', data=seg_array)
+            hf.close()
+            continue
+        
+        for s_idx in range(ct_array.shape[0]):
+            ct_array_s = ct_array[s_idx, :, :]
+            seg_array_s = seg_array[s_idx, :, :]
+            # 过滤掉全黑的切片，节省空间（可选，这里保持原逻辑）
+            slice_no = "{:03d}".format(s_idx)
+            new_ct_name = ct_number.replace('img', 'case') + '_slice' + slice_no
+            np.savez(os.path.join(save_path, new_ct_name), image=ct_array_s, label=seg_array_s)
+
+        print('Already used {:.3f} min'.format((time() - start_time) / 60))
+        print('-----------')
\ No newline at end of file