From da68d58ad2092fb38142cac061457878be5e654f Mon Sep 17 00:00:00 2001 From: p752m8wit <2053833939@qq.com> Date: Tue, 9 Nov 2021 17:36:55 +0800 Subject: [PATCH] ADD file via upload --- code/python/batchdealing.py | 147 ++++++++++++++++++++++++++++++++++++ 1 file changed, 147 insertions(+) create mode 100644 code/python/batchdealing.py diff --git a/code/python/batchdealing.py b/code/python/batchdealing.py new file mode 100644 index 0000000..ffe9dd3 --- /dev/null +++ b/code/python/batchdealing.py @@ -0,0 +1,147 @@ +# 输入图片处理 + +import os +import math +import numpy as np +import tensorflow as tf +import matplotlib.pyplot as plt + +# -----------------生成图片路径和标签的List------------------------------------ +# 生成图片的存储位置 /Users/leixinhong/PycharmProjects/classification/teethimg/Re_train/ +train_dir = '/Users/leixinhong/PycharmProjects/classification/teethimg/Re_train/' + +one = [] +label_one = [] +two = [] +label_two = [] +three = [] +label_three = [] +four = [] +label_four = [] +five = [] +label_five = [] +six = [] +label_six = [] +seven = [] +label_seven = [] +eight = [] +label_eight = [] +nine = [] +label_nine = [] + + +# step1:获取'/Users/leixinhong/PycharmProjects/classification/teethimg/Re_train'下所有的图片路径名,存放到 +# 对应的列表中,同时贴上标签,存放到label列表中。 +# ratio是测试集的比例 +def get_files(file_dir, ratio): + for file in os.listdir(file_dir + '/one'): + one.append(file_dir + '/one' + '/' + file) + label_one.append(0) + for file in os.listdir(file_dir + '/two'): + two.append(file_dir + '/two' + '/' + file) + label_two.append(1) + for file in os.listdir(file_dir + '/three'): + three.append(file_dir + '/three' + '/' + file) + label_three.append(2) + for file in os.listdir(file_dir + '/four'): + four.append(file_dir + '/four' + '/' + file) + label_four.append(3) + for file in os.listdir(file_dir + '/five'): + five.append(file_dir + '/five' + '/' + file) + label_five.append(5) + for file in os.listdir(file_dir + '/six'): + six.append(file_dir + '/six' + '/' + file) + label_six.append(6) + for file in os.listdir(file_dir + '/seven'): + seven.append(file_dir + '/seven' + '/' + file) + label_seven.append(7) + for file in os.listdir(file_dir + '/eight'): + eight.append(file_dir + '/eight' + '/' + file) + label_eight.append(8) + for file in os.listdir(file_dir + '/nine'): + nine.append(file_dir + '/nine' + '/' + file) + label_nine.append(9) + + # step2:对生成的图片路径和标签List做打乱处理把所有的数据合起来组成一个list(img和lab) + # np.hstack水平(按列)按顺序堆叠数组。 + # >>> a = np.array((1,2,3)) + # >>> b = np.array((2,3,4)) + # >>> np.hstack((a,b)) + # array([1, 2, 3, 2, 3, 4]) + image_list = np.hstack((one, two, three, four, five, six, seven, eight, nine)) + label_list = np.hstack((label_one, label_two, label_three, label_four, label_five, label_six, label_seven, label_eight, label_nine)) + + # 利用shuffle打乱顺序 + temp = np.array([image_list, label_list]) + temp = temp.transpose() + np.random.shuffle(temp) + + # 从打乱的temp中再取出list(img和lab) + # image_list = list(temp[:, 0]) + # label_list = list(temp[:, 1]) + # label_list = [int(i) for i in label_list] + # return image_list, label_list + + # 将所有的img和lab转换成list + all_image_list = list(temp[:, 0]) + all_label_list = list(temp[:, 1]) + + # 将所得List分为两部分,一部分用来训练tra,一部分用来测试val + # ratio是测试集的比例 + # n_sample全部样本数 + n_sample = len(all_label_list) + n_val = int(math.ceil(n_sample * ratio)) # 测试样本数 + n_train = n_sample - n_val # 训练样本数 + + # 训练的图片和标签 + tra_images = all_image_list[0:n_train] + tra_labels = all_label_list[0:n_train] + tra_labels = [int(float(i)) for i in tra_labels] + + # 测试图片和标签 + val_images = all_image_list[n_train:-1] + val_labels = all_label_list[n_train:-1] + val_labels = [int(float(i)) for i in val_labels] + + return tra_images, tra_labels, val_images, val_labels + + +# --------------------生成Batch---------------------------------------------- + +# step1:将上面生成的List传入get_batch() ,转换类型,产生一个输入队列queue,因为img和lab +# 是分开的,所以使用tf.train.slice_input_producer(),然后用tf.read_file()从队列中读取图像 +# image_W, image_H, :设置好固定的图像高度和宽度 +# 设置batch_size:每个batch要放多少张图片 +# capacity:一个队列最大多少 +def get_batch(image, label, image_W, image_H, batch_size, capacity): + # 转换类型 + image = tf.cast(image, tf.string) + label = tf.cast(label, tf.int32) + + # make an input queue + # tf.train.slice_input_producer是一个tensor生成器,作用是按照设定, + # 每次从一个tensor列表中按顺序或者随机抽取出一个tensor放入文件名队列。 + input_queue = tf.compat.v1.train.slice_input_producer([image, label]) + + label = input_queue[1] + image_contents = tf.io.read_file(input_queue[0]) # read img from a queue + + # step2:将图像解码,不同类型的图像不能混在一起,要么只用jpeg,要么只用png等。 + image = tf.image.decode_jpeg(image_contents, channels=3) + + # step3:数据预处理,对图像进行旋转、缩放、裁剪、归一化等操作,让计算出的模型更健壮。 + image = tf.image.resize_image_with_crop_or_pad(image, image_W, image_H) + image = tf.image.per_image_standardization(image) + + # step4:生成batch + # image_batch: 4D tensor [batch_size, width, height, 3],dtype=tf.float32 + # label_batch: 1D tensor [batch_size], dtype=tf.int32 + image_batch, label_batch = tf.compat.v1.train.batch([image, label], + batch_size=batch_size, + num_threads=32, + capacity=capacity) + # 重新排列label,行数为[batch_size] + label_batch = tf.reshape(label_batch, [batch_size]) + image_batch = tf.cast(image_batch, tf.float32) + return image_batch, label_batch +