From da68d58ad2092fb38142cac061457878be5e654f Mon Sep 17 00:00:00 2001
From: p752m8wit <2053833939@qq.com>
Date: Tue, 9 Nov 2021 17:36:55 +0800
Subject: [PATCH] ADD file via upload

---
 code/python/batchdealing.py | 147 ++++++++++++++++++++++++++++++++++++
 1 file changed, 147 insertions(+)
 create mode 100644 code/python/batchdealing.py

diff --git a/code/python/batchdealing.py b/code/python/batchdealing.py
new file mode 100644
index 0000000..ffe9dd3
--- /dev/null
+++ b/code/python/batchdealing.py
@@ -0,0 +1,147 @@
+# 输入图片处理
+
+import os
+import math
+import numpy as np
+import tensorflow as tf
+import matplotlib.pyplot as plt
+
+# -----------------生成图片路径和标签的List------------------------------------
+# 生成图片的存储位置 /Users/leixinhong/PycharmProjects/classification/teethimg/Re_train/
+train_dir = '/Users/leixinhong/PycharmProjects/classification/teethimg/Re_train/'
+
+one = []
+label_one = []
+two = []
+label_two = []
+three = []
+label_three = []
+four = []
+label_four = []
+five = []
+label_five = []
+six = []
+label_six = []
+seven = []
+label_seven = []
+eight = []
+label_eight = []
+nine = []
+label_nine = []
+
+
+# step1：获取'/Users/leixinhong/PycharmProjects/classification/teethimg/Re_train'下所有的图片路径名，存放到
+# 对应的列表中，同时贴上标签，存放到label列表中。
+# ratio是测试集的比例
+def get_files(file_dir, ratio):
+    for file in os.listdir(file_dir + '/one'):
+        one.append(file_dir + '/one' + '/' + file)
+        label_one.append(0)
+    for file in os.listdir(file_dir + '/two'):
+        two.append(file_dir + '/two' + '/' + file)
+        label_two.append(1)
+    for file in os.listdir(file_dir + '/three'):
+        three.append(file_dir + '/three' + '/' + file)
+        label_three.append(2)
+    for file in os.listdir(file_dir + '/four'):
+        four.append(file_dir + '/four' + '/' + file)
+        label_four.append(3)
+    for file in os.listdir(file_dir + '/five'):
+        five.append(file_dir + '/five' + '/' + file)
+        label_five.append(5)
+    for file in os.listdir(file_dir + '/six'):
+        six.append(file_dir + '/six' + '/' + file)
+        label_six.append(6)
+    for file in os.listdir(file_dir + '/seven'):
+        seven.append(file_dir + '/seven' + '/' + file)
+        label_seven.append(7)
+    for file in os.listdir(file_dir + '/eight'):
+        eight.append(file_dir + '/eight' + '/' + file)
+        label_eight.append(8)
+    for file in os.listdir(file_dir + '/nine'):
+        nine.append(file_dir + '/nine' + '/' + file)
+        label_nine.append(9)
+
+    # step2：对生成的图片路径和标签List做打乱处理把所有的数据合起来组成一个list（img和lab）
+    # np.hstack水平（按列）按顺序堆叠数组。
+    # >>> a = np.array((1,2,3))
+    # >>> b = np.array((2,3,4))
+    # >>> np.hstack((a,b))
+    # array([1, 2, 3, 2, 3, 4])
+    image_list = np.hstack((one, two, three, four, five, six, seven, eight, nine))
+    label_list = np.hstack((label_one, label_two, label_three, label_four, label_five, label_six, label_seven, label_eight, label_nine))
+
+    # 利用shuffle打乱顺序
+    temp = np.array([image_list, label_list])
+    temp = temp.transpose()
+    np.random.shuffle(temp)
+
+    # 从打乱的temp中再取出list（img和lab）
+    # image_list = list(temp[:, 0])
+    # label_list = list(temp[:, 1])
+    # label_list = [int(i) for i in label_list]
+    # return image_list, label_list
+
+    # 将所有的img和lab转换成list
+    all_image_list = list(temp[:, 0])
+    all_label_list = list(temp[:, 1])
+
+    # 将所得List分为两部分，一部分用来训练tra，一部分用来测试val
+    # ratio是测试集的比例
+    # n_sample全部样本数
+    n_sample = len(all_label_list)
+    n_val = int(math.ceil(n_sample * ratio))  # 测试样本数
+    n_train = n_sample - n_val  # 训练样本数
+
+    # 训练的图片和标签
+    tra_images = all_image_list[0:n_train]
+    tra_labels = all_label_list[0:n_train]
+    tra_labels = [int(float(i)) for i in tra_labels]
+
+    # 测试图片和标签
+    val_images = all_image_list[n_train:-1]
+    val_labels = all_label_list[n_train:-1]
+    val_labels = [int(float(i)) for i in val_labels]
+
+    return tra_images, tra_labels, val_images, val_labels
+
+
+# --------------------生成Batch----------------------------------------------
+
+# step1：将上面生成的List传入get_batch() ，转换类型，产生一个输入队列queue，因为img和lab
+# 是分开的，所以使用tf.train.slice_input_producer()，然后用tf.read_file()从队列中读取图像
+#   image_W, image_H, ：设置好固定的图像高度和宽度
+#   设置batch_size：每个batch要放多少张图片
+#   capacity：一个队列最大多少
+def get_batch(image, label, image_W, image_H, batch_size, capacity):
+    # 转换类型
+    image = tf.cast(image, tf.string)
+    label = tf.cast(label, tf.int32)
+
+    # make an input queue
+    # tf.train.slice_input_producer是一个tensor生成器，作用是按照设定，
+    # 每次从一个tensor列表中按顺序或者随机抽取出一个tensor放入文件名队列。
+    input_queue = tf.compat.v1.train.slice_input_producer([image, label])
+
+    label = input_queue[1]
+    image_contents = tf.io.read_file(input_queue[0])  # read img from a queue
+
+    # step2：将图像解码，不同类型的图像不能混在一起，要么只用jpeg，要么只用png等。
+    image = tf.image.decode_jpeg(image_contents, channels=3)
+
+    # step3：数据预处理，对图像进行旋转、缩放、裁剪、归一化等操作，让计算出的模型更健壮。
+    image = tf.image.resize_image_with_crop_or_pad(image, image_W, image_H)
+    image = tf.image.per_image_standardization(image)
+
+    # step4：生成batch
+    # image_batch: 4D tensor [batch_size, width, height, 3],dtype=tf.float32
+    # label_batch: 1D tensor [batch_size], dtype=tf.int32
+    image_batch, label_batch = tf.compat.v1.train.batch([image, label],
+                                              batch_size=batch_size,
+                                              num_threads=32,
+                                              capacity=capacity)
+    # 重新排列label，行数为[batch_size]
+    label_batch = tf.reshape(label_batch, [batch_size])
+    image_batch = tf.cast(image_batch, tf.float32)
+    return image_batch, label_batch
+