diff --git a/EdgeDetection/code/边缘检测.py b/EdgeDetection/code/边缘检测.py new file mode 100644 index 0000000..ea48b98 --- /dev/null +++ b/EdgeDetection/code/边缘检测.py @@ -0,0 +1,67 @@ +import cv2 +import numpy as np +import matplotlib.pyplot as plt + +# 读取图像 +img = cv2.imread('D:/Python/EdgeDetection/img/person.jpg') +img_RGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # 转成RGB 方便后面显示 + +# 灰度化处理图像 +grayImage = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) + +# 阈值处理 +ret, binary = cv2.threshold(grayImage, 127, 255, cv2.THRESH_BINARY) + +# Roberts算子 +kernelx = np.array([[-1, 0], [0, 1]], dtype=int) +kernely = np.array([[0, -1], [1, 0]], dtype=int) +x = cv2.filter2D(binary, cv2.CV_16S, kernelx) # 掩模运算 +y = cv2.filter2D(binary, cv2.CV_16S, kernely) +absX = cv2.convertScaleAbs(x) +absY = cv2.convertScaleAbs(y) +Roberts = cv2.addWeighted(absX, 0.5, absY, 0.5, 0) # 图像融合 + +# Prewitt算子 +kernelx = np.array([[1, 1, 1], [0, 0, 0], [-1, -1, -1]], dtype=int) +kernely = np.array([[-1, 0, 1], [-1, 0, 1], [-1, 0, 1]], dtype=int) +x = cv2.filter2D(binary, cv2.CV_16S, kernelx) +y = cv2.filter2D(binary, cv2.CV_16S, kernely) +absX = cv2.convertScaleAbs(x) +absY = cv2.convertScaleAbs(y) +Prewitt = cv2.addWeighted(absX, 0.5, absY, 0.5, 0) + +# Sobel算子 +x = cv2.Sobel(binary, cv2.CV_16S, 1, 0) +y = cv2.Sobel(binary, cv2.CV_16S, 0, 1) +absX = cv2.convertScaleAbs(x) +absY = cv2.convertScaleAbs(y) +Sobel = cv2.addWeighted(absX, 0.5, absY, 0.5, 0) + +# Laplacian算子 +dst = cv2.Laplacian(binary, cv2.CV_16S, ksize=3) +Laplacian = cv2.convertScaleAbs(dst) + +# 高斯滤波(去噪) +gaussianBlur = cv2.GaussianBlur(grayImage, (3, 3), 0) +ret, binary = cv2.threshold(gaussianBlur, 127, 255, cv2.THRESH_BINARY) + +# LOG算子 +dst = cv2.Laplacian(binary, cv2.CV_16S, ksize=3) +LOG = cv2.convertScaleAbs(dst) + +# Canny算子 +Canny = cv2.Canny(gaussianBlur, 50, 150) + +# 用来正常显示中文标签 +plt.rcParams['font.sans-serif'] = ['SimHei'] + +# 显示图形 +plt.subplot(241), plt.imshow(img_RGB), plt.title('原始图像'), plt.axis('off') # 坐标轴关闭 +plt.subplot(242), plt.imshow(binary, cmap=plt.cm.gray), plt.title('二值图'), plt.axis('off') +plt.subplot(243), plt.imshow(Roberts, cmap=plt.cm.gray), plt.title('Roberts算子'), plt.axis('off') +plt.subplot(244), plt.imshow(Prewitt, cmap=plt.cm.gray), plt.title('Prewitt算子'), plt.axis('off') +plt.subplot(245), plt.imshow(Sobel, cmap=plt.cm.gray), plt.title('Sobel算子'), plt.axis('off') +plt.subplot(246), plt.imshow(Laplacian, cmap=plt.cm.gray), plt.title('Laplacian算子'), plt.axis('off') +plt.subplot(247), plt.imshow(LOG, cmap=plt.cm.gray), plt.title('LOG算子'), plt.axis('off') +plt.subplot(248), plt.imshow(Canny, cmap=plt.cm.gray), plt.title('Canny算子'), plt.axis('off') +plt.show() diff --git a/EdgeDetection/img/person.jpg b/EdgeDetection/img/person.jpg new file mode 100644 index 0000000..727e73f Binary files /dev/null and b/EdgeDetection/img/person.jpg differ diff --git a/EdgeDetection/result/result1.png b/EdgeDetection/result/result1.png new file mode 100644 index 0000000..8c9488b Binary files /dev/null and b/EdgeDetection/result/result1.png differ diff --git a/FrequencyDomainProcessing/code/低通滤波器.py b/FrequencyDomainProcessing/code/低通滤波器.py new file mode 100644 index 0000000..cbd5f3b --- /dev/null +++ b/FrequencyDomainProcessing/code/低通滤波器.py @@ -0,0 +1,83 @@ +import numpy as np +import cv2 as cv +import matplotlib.pyplot as plt + + +def frequency_filter(image, filter): + """ + :param image: + :param filter: 频域变换函数 + :return: + """ + fftImg = np.fft.fft2(image) # 对图像进行傅里叶变换 + fftImgShift = np.fft.fftshift(fftImg) # 傅里叶变换后坐标移动到图像中心 + handle_fftImgShift1 = fftImgShift*filter # 对傅里叶变换后的图像进行频域变换 + + handle_fftImgShift2 = np.fft.ifftshift(handle_fftImgShift1) + handle_fftImgShift3 = np.fft.ifft2(handle_fftImgShift2) + handle_fftImgShift4 = np.real(handle_fftImgShift3) # 傅里叶反变换后取频域 + return np.uint8(handle_fftImgShift4) + + +# 理想低通滤波器 +def ILPF(image, d0, n): + H = np.empty_like(image, dtype=float) + M, N = image.shape + mid_x = int(M/2) + mid_y = int(N/2) + for y in range(0, M): + for x in range(0, N): + d = np.sqrt((x - mid_x) ** 2 + (y - mid_y) ** 2) + if d <= d0: + H[y, x] = 1**n + else: + H[y, x] = 0**n + return H + + +# 巴特沃斯低通滤波器 +def BLPF(image, d0, n): + H = np.empty_like(image, float) + M, N = image.shape + mid_x = int(M/2) + mid_y = int(N/2) + for y in range(0, M): + for x in range(0, N): + d = np.sqrt((x - mid_x) ** 2 + (y - mid_y) ** 2) + H[y, x] = 1/(1+(d/d0)**(n)) + return H + + +# 高斯低通滤波器 +def GLPF(image, d0, n): + H = np.empty_like(image, float) + M, N = image.shape + mid_x = M/2 + mid_y = N/2 + for x in range(0, M): + for y in range(0, N): + d = np.sqrt((x - mid_x)**2 + (y - mid_y) ** 2) + H[x, y] = np.exp(-d**n/(2*d0**n)) + return H + + +# 读取图像 +image = cv.imread('D:/Python/FrequencyDomainProcessing/img/moon.jpg') +img_RGB = cv.cvtColor(image, cv.COLOR_BGR2RGB) # 转成RGB 方便后面显示 +grayImage = cv.cvtColor(img_RGB, cv.COLOR_BGR2GRAY) + +result2 = frequency_filter(grayImage, ILPF(grayImage, 60, n=1)) +result3 = frequency_filter(grayImage, BLPF(grayImage, 60, n=2)) +result4 = frequency_filter(grayImage, BLPF(grayImage, 90, n=2)) +result5 = frequency_filter(grayImage, GLPF(grayImage, 60, n=2)) +result6 = frequency_filter(grayImage, GLPF(grayImage, 90, n=2)) + +# 显示图形 +plt.rcParams['font.sans-serif'] = ['SimHei'] +plt.subplot(321), plt.imshow(grayImage, cmap=plt.cm.gray), plt.title('原始图像'), plt.axis('off') # 坐标轴关闭 +plt.subplot(322), plt.imshow(result2, cmap=plt.cm.gray), plt.title('理想低通滤波(D0=60)'), plt.axis('off') +plt.subplot(323), plt.imshow(result3, cmap=plt.cm.gray), plt.title('Butterwoth低通(D0=60,n=2)'), plt.axis('off') +plt.subplot(324), plt.imshow(result4, cmap=plt.cm.gray), plt.title('Butterwoth低通(D0=90,n=2)'), plt.axis('off') +plt.subplot(325), plt.imshow(result5, cmap=plt.cm.gray), plt.title('Gauss低通(D0=60,n=2)'), plt.axis('off') +plt.subplot(326), plt.imshow(result6, cmap=plt.cm.gray), plt.title('Gauss低通(D0=90,n=2)'), plt.axis('off') +plt.show() diff --git a/FrequencyDomainProcessing/img/moon.jpg b/FrequencyDomainProcessing/img/moon.jpg new file mode 100644 index 0000000..4f4dec8 Binary files /dev/null and b/FrequencyDomainProcessing/img/moon.jpg differ diff --git a/FrequencyDomainProcessing/result/result2.png b/FrequencyDomainProcessing/result/result2.png new file mode 100644 index 0000000..6f09992 Binary files /dev/null and b/FrequencyDomainProcessing/result/result2.png differ diff --git a/GeometricTransformation/code/几何变换.py b/GeometricTransformation/code/几何变换.py new file mode 100644 index 0000000..bec743d --- /dev/null +++ b/GeometricTransformation/code/几何变换.py @@ -0,0 +1,43 @@ +import cv2 +import numpy as np +from matplotlib import pyplot as plt + +# 读取图片 +img = cv2.imread("D:/Python/GeometricTransformation/img/person.jpg", cv2.IMREAD_UNCHANGED) +src = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) +rows, cols = src.shape[:2] + +# 图像缩放 +result1 = cv2.resize(src, None, fx=0.1, fy=0.1) +result2 = cv2.resize(src, (int(cols*10), int(rows*10))) + +# 图像旋转 +M1 = cv2.getRotationMatrix2D((cols / 2, rows / 2), 45, 1) +result3 = cv2.warpAffine(src, M1, (cols, rows)) +M2 = cv2.getRotationMatrix2D((cols / 2, rows / 2), 90, 1) +result4 = cv2.warpAffine(src, M2, (cols, rows)) +M3 = cv2.getRotationMatrix2D((cols / 2, rows / 2), 225, 1) +result5 = cv2.warpAffine(src, M3, (cols, rows)) + +# 图像翻转 +result6 = cv2.flip(src, 0) +result7 = cv2.flip(src, 1) +result8 = cv2.flip(src, -1) + +# 图像平移 +M = np.float32([[1, 0, 0], [0, 1, 300]]) +result9 = cv2.warpAffine(src, M, (cols, rows)) +M = np.float32([[1, 0, 0], [0, 1, -300]]) +result10 = cv2.warpAffine(src, M, (cols, rows)) +M = np.float32([[1, 0, 300], [0, 1, 0]]) +result11 = cv2.warpAffine(src, M, (cols, rows)) + +# 显示图形 +titles = ['原始图像', '图像缩小(10倍)', '图像放大(10倍)', '图像旋转(45°)', '图像旋转(90°)', '图像旋转(225°)', '图像翻转(X轴)', '图像翻转(Y轴)', '图像翻转(原点)', '图像平移(向下)', '图像平移(向上)', '图像平移(向右)'] +images = [src, result1, result2, result3, result4, result5, result6, result7, result8, result9, result10, result11] +plt.rcParams['font.sans-serif'] = ['SimHei'] +for i in range(12): + plt.subplot(4, 3, i + 1), plt.imshow(images[i], 'gray') + plt.title(titles[i], fontsize=8) + plt.xticks([]), plt.yticks([]) +plt.show() diff --git a/GeometricTransformation/img/person.jpg b/GeometricTransformation/img/person.jpg new file mode 100644 index 0000000..eccebd4 Binary files /dev/null and b/GeometricTransformation/img/person.jpg differ diff --git a/GeometricTransformation/result/result3.png b/GeometricTransformation/result/result3.png new file mode 100644 index 0000000..56ae6c4 Binary files /dev/null and b/GeometricTransformation/result/result3.png differ diff --git a/ImageNoise/code/图像噪声.py b/ImageNoise/code/图像噪声.py new file mode 100644 index 0000000..38b1bc6 --- /dev/null +++ b/ImageNoise/code/图像噪声.py @@ -0,0 +1,57 @@ +import numpy as np +import random +import cv2 +from matplotlib import pyplot as plt + + +def sp_noise(image, prob): + output = np.zeros(image.shape, np.uint8) + thres = 1 - prob + for i in range(image.shape[0]): + for j in range(image.shape[1]): + rdn = random.random() + if rdn < prob: + output[i][j] = 0 + elif rdn > thres: + output[i][j] = 255 + else: + output[i][j] = image[i][j] + return output + + +def gasuss_noise(image, mean=0, var=0.001): + image = np.array(image/255, dtype=float) + noise = np.random.normal(mean, var ** 0.5, image.shape) + out = image + noise + if out.min() < 0: + low_clip = -1. + else: + low_clip = 0. + out = np.clip(out, low_clip, 1.0) + out = np.uint8(out*255) + return out + + +# 读取图像 +img = cv2.imread('D:/Python/ImageNoise/img/pig.jpg', cv2.IMREAD_UNCHANGED) +img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) + +# 添加椒盐噪声,噪声比例为 0.1 +out1 = sp_noise(img, prob=0.1) + +# 添加高斯噪声,均值为0,方差为0.01 +out2 = gasuss_noise(img, mean=0, var=0.01) + +# 添加泊松噪声 +noise_type = np.random.poisson(lam=1, size=(874, 875, 3)).astype(dtype='uint8') # lam>=0 值越小,噪声频率就越少,size为图像尺寸 +out3 = noise_type+img # 将原图与噪声叠加 + +# 展示结果 +plt.rcParams['font.sans-serif'] = ['SimHei'] +titles = ['原始图像', '椒盐噪声(prob=0.1)', '高斯噪声(mean=0,var=0.01)', '泊松噪声(lam=1.0)'] +images = [img, out1, out2, out3] +for i in range(4): + plt.subplot(2, 2, i+1), plt.imshow(images[i], 'gray') + plt.title(titles[i]) + plt.xticks([]), plt.yticks([]) +plt.show() diff --git a/ImageNoise/img/pig.jpg b/ImageNoise/img/pig.jpg new file mode 100644 index 0000000..e8cdfc8 Binary files /dev/null and b/ImageNoise/img/pig.jpg differ diff --git a/ImageNoise/result/result4.png b/ImageNoise/result/result4.png new file mode 100644 index 0000000..8276551 Binary files /dev/null and b/ImageNoise/result/result4.png differ diff --git a/ImageOperation/code/图像运算.py b/ImageOperation/code/图像运算.py new file mode 100644 index 0000000..fe9b6e9 --- /dev/null +++ b/ImageOperation/code/图像运算.py @@ -0,0 +1,50 @@ +import cv2 +import numpy as np +from matplotlib import pyplot as plt + +# 图像加法 +img = cv2.imread('D:/Python/ImageOperation/img/boy.jpg', 1) +img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) +m = np.ones(img.shape, dtype="uint8")*100 +result_add = cv2.add(img, m) +# 图像透视变换 +height, width = img.shape[:2] +pts1 = np.float32([[119, 189], [1700, 189], [400, 1747], [1541, 1747]]) # 变成一个大头娃娃 +pts2 = np.float32([[0, 0], [width, 0], [0, height], [width, height]]) +matrix = cv2.getPerspectiveTransform(pts1, pts2) +result_warp = cv2.warpPerspective(img, matrix, (width, height)) + +# 图像融合 +tmp1 = cv2.imread('D:/Python/1.2-Image operation/img/beauty.jpg', 1) +img1 = cv2.cvtColor(tmp1, cv2.COLOR_BGR2RGB) +tmp2 = cv2.imread('D:/Python/1.2-Image operation/img/rose.jpg', 1) +img2 = cv2.cvtColor(tmp2, cv2.COLOR_BGR2RGB) +h, w, _ = img1.shape +temp = cv2.resize(img2, (w, h), interpolation=cv2.INTER_AREA) # x轴对应的是宽度w,插值方法默认为双线性插值 +result_addW = cv2.addWeighted(img1, 0.6, temp, 0.4, 0) + +# 图像与运算 +tmp3 = cv2.imread('D:/Python/1.2-Image operation/img/castle.jpg', cv2.IMREAD_GRAYSCALE) +rows, cols = tmp3.shape[:2] +circle = np.zeros((rows, cols), dtype="uint8") +cv2.circle(circle, (int(rows/2), int(cols/2)), 600, 255, -1) +result_and = cv2.bitwise_and(tmp3, circle) + +# 图像或运算 +result_or = cv2.bitwise_or(tmp3, circle) + +# 图像非运算 +result_not = cv2.bitwise_not(tmp3) + +# 图像异或运算 +result_xor = cv2.bitwise_xor(tmp3, circle) + +# 显示图像 +plt.rcParams['font.sans-serif'] = ['SimHei'] +titles = ['原图1', '图像加法(像素+100)', '透视变化', '原图2', '原图3', '图像融合', '原图4', '原图5', '与运算', '或运算', '非运算', '异或运算'] +images = [img, result_add, result_warp, img1, img2, result_addW, tmp3, circle, result_and, result_or, result_not, result_xor] +for i in range(12): + plt.subplot(4, 3, i+1), plt.imshow(images[i], 'gray') + plt.title(titles[i], fontsize=10) + plt.xticks([]), plt.yticks([]) +plt.show() diff --git a/ImageOperation/img/beauty.jpg b/ImageOperation/img/beauty.jpg new file mode 100644 index 0000000..71569bf Binary files /dev/null and b/ImageOperation/img/beauty.jpg differ diff --git a/ImageOperation/img/boy.jpg b/ImageOperation/img/boy.jpg new file mode 100644 index 0000000..d25f582 Binary files /dev/null and b/ImageOperation/img/boy.jpg differ diff --git a/ImageOperation/img/castle.jpg b/ImageOperation/img/castle.jpg new file mode 100644 index 0000000..6a65810 Binary files /dev/null and b/ImageOperation/img/castle.jpg differ diff --git a/ImageOperation/img/rose.jpg b/ImageOperation/img/rose.jpg new file mode 100644 index 0000000..eccfde8 Binary files /dev/null and b/ImageOperation/img/rose.jpg differ diff --git a/ImageOperation/result/result5.png b/ImageOperation/result/result5.png new file mode 100644 index 0000000..6d98377 Binary files /dev/null and b/ImageOperation/result/result5.png differ diff --git a/MorphologicalProcessing/code/形态学处理.py b/MorphologicalProcessing/code/形态学处理.py new file mode 100644 index 0000000..ed18ecc --- /dev/null +++ b/MorphologicalProcessing/code/形态学处理.py @@ -0,0 +1,40 @@ +import cv2 +import numpy as np +from matplotlib import pyplot as plt +# 读取图片 +src = cv2.imread("D:/Python/MorphologicalProcessing/img/CHA.jpg", cv2.IMREAD_UNCHANGED) + +# 设置卷积核 +kernel = np.ones((3, 3), np.uint8) + +# 腐蚀处理 +resutl_erode = cv2.erode(src, kernel) + +# 膨胀处理 +result_dilate = cv2.dilate(src, kernel) + +# 开运算 +result_open = cv2.morphologyEx(src, cv2.MORPH_OPEN, kernel) + +# 闭运算 +result_close = cv2.morphologyEx(src, cv2.MORPH_CLOSE, kernel) + +# 梯度运算 +result_gradient = cv2.morphologyEx(src, cv2.MORPH_GRADIENT, kernel) + +# 顶帽运算 +result_tophat = cv2.morphologyEx(src, cv2.MORPH_TOPHAT, kernel) + +# 黑帽运算 +result_blackhat = cv2.morphologyEx(src, cv2.MORPH_BLACKHAT, kernel) + +# 显示图片 +images = [src, result_open, resutl_erode, result_dilate, result_close, result_gradient, result_tophat, result_blackhat] +titles = ['原始图像', '腐蚀', '膨胀', '开运算', '闭运算', '梯度运算', '顶帽运算', '黑帽运算'] +plt.rcParams['font.sans-serif'] = ['SimHei'] +for i in range(2): + plt.subplot(2, 4, i*4+1), plt.imshow(images[i*4], 'gray'), plt.title(titles[i*4]), plt.axis('off') + plt.subplot(2, 4, i*4+2), plt.imshow(images[i*4+1], 'gray'), plt.title(titles[i*4+1]), plt.axis('off') + plt.subplot(2, 4, i*4+3), plt.imshow(images[i*4+2], 'gray'), plt.title(titles[i*4+2]), plt.axis('off') + plt.subplot(2, 4, i*4+4), plt.imshow(images[i*4+3], 'gray'), plt.title(titles[i*4+3]), plt.axis('off') +plt.show() diff --git a/MorphologicalProcessing/img/CHA.jpg b/MorphologicalProcessing/img/CHA.jpg new file mode 100644 index 0000000..776773c Binary files /dev/null and b/MorphologicalProcessing/img/CHA.jpg differ diff --git a/MorphologicalProcessing/result/result6.png b/MorphologicalProcessing/result/result6.png new file mode 100644 index 0000000..1fec43c Binary files /dev/null and b/MorphologicalProcessing/result/result6.png differ diff --git a/Output_Image/res_style_denoised_starry.jpg b/Output_Image/res_style_denoised_starry.jpg new file mode 100644 index 0000000..243344b Binary files /dev/null and b/Output_Image/res_style_denoised_starry.jpg differ diff --git a/Output_Image/res_style_scream.jpg b/Output_Image/res_style_scream.jpg new file mode 100644 index 0000000..6cfbf8b Binary files /dev/null and b/Output_Image/res_style_scream.jpg differ diff --git a/Output_Image/res_style_wave.jpg b/Output_Image/res_style_wave.jpg new file mode 100644 index 0000000..59c56fc Binary files /dev/null and b/Output_Image/res_style_wave.jpg differ diff --git a/Output_Image/result1.png b/Output_Image/result1.png new file mode 100644 index 0000000..8c9488b Binary files /dev/null and b/Output_Image/result1.png differ diff --git a/Output_Image/result10.png b/Output_Image/result10.png new file mode 100644 index 0000000..b72671d Binary files /dev/null and b/Output_Image/result10.png differ diff --git a/Output_Image/result11.png b/Output_Image/result11.png new file mode 100644 index 0000000..60b9fdc Binary files /dev/null and b/Output_Image/result11.png differ diff --git a/Output_Image/result12.png b/Output_Image/result12.png new file mode 100644 index 0000000..05bb2ab Binary files /dev/null and b/Output_Image/result12.png differ diff --git a/Output_Image/result2.png b/Output_Image/result2.png new file mode 100644 index 0000000..6f09992 Binary files /dev/null and b/Output_Image/result2.png differ diff --git a/Output_Image/result3.png b/Output_Image/result3.png new file mode 100644 index 0000000..56ae6c4 Binary files /dev/null and b/Output_Image/result3.png differ diff --git a/Output_Image/result4.png b/Output_Image/result4.png new file mode 100644 index 0000000..8276551 Binary files /dev/null and b/Output_Image/result4.png differ diff --git a/Output_Image/result5.png b/Output_Image/result5.png new file mode 100644 index 0000000..6d98377 Binary files /dev/null and b/Output_Image/result5.png differ diff --git a/Output_Image/result6.png b/Output_Image/result6.png new file mode 100644 index 0000000..1fec43c Binary files /dev/null and b/Output_Image/result6.png differ diff --git a/Output_Image/result7.png b/Output_Image/result7.png new file mode 100644 index 0000000..420a611 Binary files /dev/null and b/Output_Image/result7.png differ diff --git a/Output_Image/result8.png b/Output_Image/result8.png new file mode 100644 index 0000000..b64edb3 Binary files /dev/null and b/Output_Image/result8.png differ diff --git a/Output_Image/result9.png b/Output_Image/result9.png new file mode 100644 index 0000000..7eebc2d Binary files /dev/null and b/Output_Image/result9.png differ diff --git a/SpatialProcessing/Gray-scaleModification/code/灰度变化.py b/SpatialProcessing/Gray-scaleModification/code/灰度变化.py new file mode 100644 index 0000000..38140e1 --- /dev/null +++ b/SpatialProcessing/Gray-scaleModification/code/灰度变化.py @@ -0,0 +1,49 @@ +import cv2 +import numpy as np +import matplotlib.pyplot as plt + +# 线性灰度变换--图像反转 +img1 = cv2.imread('D:/Python/SpatialProcessing/Gray-scaleModification/img/castle.jpg', cv2.IMREAD_UNCHANGED) +src = cv2.cvtColor(img1, cv2.COLOR_BGR2RGB) +grayImage = cv2.cvtColor(src, cv2.COLOR_BGR2GRAY) +height = grayImage.shape[0] +width = grayImage.shape[1] +result_rev = np.zeros((height, width), np.uint8) +for i in range(height): + for j in range(width): + gray = 255 - grayImage[i, j] + result_rev[i, j] = np.uint8(gray) + + +# 非线性灰度变换--对数变换 +def log(c, img): + output = c * np.log(1.0 + img) + output = np.uint8(output + 0.5) + return output + + +result_log1 = log(25, src) +result_log2 = log(45, src) + + +# 非线性灰度变换--伽马变换 +def gamma(img, c, v): + lut = np.zeros(256, dtype=np.float32) + for i in range(256): + lut[i] = c * i ** v + output_img = cv2.LUT(img, lut) + output_img = np.uint8(output_img+0.5) + return output_img + + +result_gamma1 = gamma(src, 1, 0.8) +result_gamma2 = gamma(src, 1, 1.2) + +# 显示图片 +images = [src, result_rev, result_log1, result_log2, result_gamma1, result_gamma2] +titles = ['原始图像', '图像反转', '对数变化(c=25)', '对数变化(c=45)', '伽马变换(v=0.8)', '伽马变换(v=1.2)'] +plt.rcParams['font.sans-serif'] = ['SimHei'] +for i in range(3): + plt.subplot(3, 2, i*2+1), plt.imshow(images[i*2], 'gray'), plt.title(titles[i*2]), plt.axis('off') + plt.subplot(3, 2, i*2+2), plt.imshow(images[i*2+1], 'gray'), plt.title(titles[i*2+1]), plt.axis('off') +plt.show() diff --git a/SpatialProcessing/Gray-scaleModification/img/castle.jpg b/SpatialProcessing/Gray-scaleModification/img/castle.jpg new file mode 100644 index 0000000..63d53ce Binary files /dev/null and b/SpatialProcessing/Gray-scaleModification/img/castle.jpg differ diff --git a/SpatialProcessing/Gray-scaleModification/result/result7.png b/SpatialProcessing/Gray-scaleModification/result/result7.png new file mode 100644 index 0000000..420a611 Binary files /dev/null and b/SpatialProcessing/Gray-scaleModification/result/result7.png differ diff --git a/SpatialProcessing/HistogramModification/code/直方图修正.py b/SpatialProcessing/HistogramModification/code/直方图修正.py new file mode 100644 index 0000000..79b88f3 --- /dev/null +++ b/SpatialProcessing/HistogramModification/code/直方图修正.py @@ -0,0 +1,15 @@ +import cv2 +import matplotlib.pyplot as plt +# 原始图像 +img_gray = cv2.imread('D:/Python/SpatialProcessing/HistogramModification/img/cake.jpg', cv2.IMREAD_GRAYSCALE) + +# 直方图修正 +equ = cv2.equalizeHist(img_gray) + +# 显示图像 +plt.rcParams['font.sans-serif'] = ['SimHei'] +plt.subplot(221), plt.imshow(img_gray, cmap=plt.cm.gray), plt.title('原始图像'), plt.axis('off') +plt.subplot(222), plt.hist(img_gray.ravel(), 256), plt.title('灰度直方图') +plt.subplot(223), plt.imshow(equ, cmap=plt.cm.gray), plt.title('修正图像'), plt.axis('off') +plt.subplot(224), plt.hist(equ.ravel(), 256), plt.title('修正直方图') +plt.show() diff --git a/SpatialProcessing/HistogramModification/img/cake.jpg b/SpatialProcessing/HistogramModification/img/cake.jpg new file mode 100644 index 0000000..478ce55 Binary files /dev/null and b/SpatialProcessing/HistogramModification/img/cake.jpg differ diff --git a/SpatialProcessing/HistogramModification/result/result8.png b/SpatialProcessing/HistogramModification/result/result8.png new file mode 100644 index 0000000..b64edb3 Binary files /dev/null and b/SpatialProcessing/HistogramModification/result/result8.png differ diff --git a/SpatialProcessing/ImageSmoothing/code/图像平滑.py b/SpatialProcessing/ImageSmoothing/code/图像平滑.py new file mode 100644 index 0000000..0f69293 --- /dev/null +++ b/SpatialProcessing/ImageSmoothing/code/图像平滑.py @@ -0,0 +1,25 @@ +import cv2 +import matplotlib.pyplot as plt + +# 读取图片 +source = cv2.imread('D:/Python/SpatialProcessing/ImageSmoothing/img/moon.jpg', cv2.IMREAD_UNCHANGED) +source = cv2.cvtColor(source, cv2.COLOR_BGR2RGB) # 转成RGB 方便后面显示 + +# 均值滤波 +result1 = cv2.blur(source, (7, 7)) + +# 中值滤波 +result2 = cv2.medianBlur(source, 5) + +# 高斯滤波 +result3 = cv2.GaussianBlur(source, (5, 5), 0) + +# 显示图形 +titles = ['原始图片', '均值滤波', '中值滤波', '高斯滤波'] +images = [source, result1, result2, result3] +plt.rcParams['font.sans-serif'] = ['SimHei'] +for i in range(4): + plt.subplot(2, 2, i + 1), plt.imshow(images[i], 'gray') + plt.title(titles[i]) + plt.xticks([]), plt.yticks([]) +plt.show() diff --git a/SpatialProcessing/ImageSmoothing/img/moon.jpg b/SpatialProcessing/ImageSmoothing/img/moon.jpg new file mode 100644 index 0000000..4f4dec8 Binary files /dev/null and b/SpatialProcessing/ImageSmoothing/img/moon.jpg differ diff --git a/SpatialProcessing/ImageSmoothing/result/result9.png b/SpatialProcessing/ImageSmoothing/result/result9.png new file mode 100644 index 0000000..7eebc2d Binary files /dev/null and b/SpatialProcessing/ImageSmoothing/result/result9.png differ diff --git a/StyleMigration/.gitignore b/StyleMigration/.gitignore new file mode 100644 index 0000000..6fa1618 --- /dev/null +++ b/StyleMigration/.gitignore @@ -0,0 +1,99 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +env/ +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +*.egg-info/ +.installed.cfg +*.egg + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*,cover +.hypothesis/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# IPython Notebook +.ipynb_checkpoints + +# pyenv +.python-version + +# celery beat schedule file +celerybeat-schedule + +# dotenv +.env + +# virtualenv +venv/ +ENV/ + +# Spyder project settings +.spyderproject + +# Rope project settings +.ropeproject + + +slim-official/ +train2014 +generated/ +models/ +tensorboard/ +result/ +pretrained/ + diff --git a/StyleMigration/README.md b/StyleMigration/README.md new file mode 100644 index 0000000..5aeefeb --- /dev/null +++ b/StyleMigration/README.md @@ -0,0 +1,68 @@ +# fast-neural-style-tensorflow + +A tensorflow implementation for [Perceptual Losses for Real-Time Style Transfer and Super-Resolution](https://arxiv.org/abs/1603.08155). + +This code is based on [Tensorflow-Slim](https://github.com/tensorflow/models/tree/master/slim) and [OlavHN/fast-neural-style](https://github.com/OlavHN/fast-neural-style). + +## Samples: + +| configuration | style | sample | +| :---: | :----: | :----: | +| [wave.yml](https://github.com/hzy46/fast-neural-style-tensorflow/blob/master/conf/wave.yml) |![](https://github.com/hzy46/fast-neural-style-tensorflow/blob/master/img/results/style_wave.jpg)| ![](https://github.com/hzy46/fast-neural-style-tensorflow/blob/master/img/results/wave.jpg) | +| [cubist.yml](https://github.com/hzy46/fast-neural-style-tensorflow/blob/master/conf/cubist.yml) |![](https://github.com/hzy46/fast-neural-style-tensorflow/blob/master/img/results/style_cubist.jpg)| ![](https://github.com/hzy46/fast-neural-style-tensorflow/blob/master/img/results/cubist.jpg) | +| [denoised_starry.yml](https://github.com/hzy46/fast-neural-style-tensorflow/blob/master/conf/denoised_starry.yml) |![](https://github.com/hzy46/fast-neural-style-tensorflow/blob/master/img/results/style_denoised_starry.jpg)| ![](https://github.com/hzy46/fast-neural-style-tensorflow/blob/master/img/results/denoised_starry.jpg) | +| [mosaic.yml](https://github.com/hzy46/fast-neural-style-tensorflow/blob/master/conf/mosaic.yml) |![](https://github.com/hzy46/fast-neural-style-tensorflow/blob/master/img/results/style_mosaic.jpg)| ![](https://github.com/hzy46/fast-neural-style-tensorflow/blob/master/img/results/mosaic.jpg) | +| [scream.yml](https://github.com/hzy46/fast-neural-style-tensorflow/blob/master/conf/scream.yml) |![](https://github.com/hzy46/fast-neural-style-tensorflow/blob/master/img/results/style_scream.jpg)| ![](https://github.com/hzy46/fast-neural-style-tensorflow/blob/master/img/results/scream.jpg) | +| [feathers.yml](https://github.com/hzy46/fast-neural-style-tensorflow/blob/master/conf/feathers.yml) |![](https://github.com/hzy46/fast-neural-style-tensorflow/blob/master/img/results/style_feathers.jpg)| ![](https://github.com/hzy46/fast-neural-style-tensorflow/blob/master/img/results/feathers.jpg) | +| [udnie.yml](https://github.com/hzy46/fast-neural-style-tensorflow/blob/master/conf/udnie.yml) |![](https://github.com/hzy46/fast-neural-style-tensorflow/blob/master/img/results/style_udnie.jpg)| ![](https://github.com/hzy46/fast-neural-style-tensorflow/blob/master/img/results/udnie.jpg) | + +## Requirements and Prerequisites: +- Python 2.7.x +- Now support Tensorflow >= 1.0 + +Attention: This code also supports Tensorflow == 0.11. If it is your version, use the commit 5309a2a (git reset --hard 5309a2a). + +And make sure you installed pyyaml: +``` +pip install pyyaml +``` + +## Use Trained Models: + +You can download all the 7 trained models from [Baidu Drive](https://pan.baidu.com/s/1i4GTS4d). + +To generate a sample from the model "wave.ckpt-done", run: + +``` +python eval.py --model_file --image_file img/test.jpg +``` + +Then check out generated/res.jpg. + +## Train a Model: +To train a model from scratch, you should first download [VGG16 model](http://download.tensorflow.org/models/vgg_16_2016_08_28.tar.gz) from Tensorflow Slim. Extract the file vgg_16.ckpt. Then copy it to the folder pretrained/ : +``` +cd +mkdir pretrained +cp pretrained/ +``` + +Then download the [COCO dataset](http://msvocds.blob.core.windows.net/coco2014/train2014.zip). Please unzip it, and you will have a folder named "train2014" with many raw images in it. Then create a symbol link to it: +``` +cd +ln -s train2014 +``` + +Train the model of "wave": +``` +python train.py -c conf/wave.yml +``` + +(Optional) Use tensorboard: +``` +tensorboard --logdir models/wave/ +``` + +Checkpoints will be written to "models/wave/". + +View the [configuration file](https://github.com/hzy46/fast-neural-style-tensorflow/blob/master/conf/wave.yml) for details. diff --git a/StyleMigration/conf/candy.yml b/StyleMigration/conf/candy.yml new file mode 100644 index 0000000..1a48c22 --- /dev/null +++ b/StyleMigration/conf/candy.yml @@ -0,0 +1,26 @@ +## Basic configuration +style_image: img/candy.jpg # targeted style image +naming: "candy" # the name of this model. Determine the path to save checkpoint and events file. +model_path: models # root path to save checkpoint and events file. The final path would be / + +## Weight of the loss +content_weight: 1.0 # weight for content features loss +style_weight: 50.0 # weight for style features loss +tv_weight: 0.0 # weight for total variation loss + +## The size, the iter number to run +image_size: 256 +batch_size: 4 +epoch: 2 + +## Loss Network +loss_model: "vgg_16" +content_layers: # use these layers for content loss + - "vgg_16/conv3/conv3_3" +style_layers: # use these layers for style loss + - "vgg_16/conv1/conv1_2" + - "vgg_16/conv2/conv2_2" + - "vgg_16/conv3/conv3_3" + - "vgg_16/conv4/conv4_3" +checkpoint_exclude_scopes: "vgg_16/fc" # we only use the convolution layers, so ignore fc layers. +loss_model_file: "pretrained/vgg_16.ckpt" # the path to the checkpoint diff --git a/StyleMigration/conf/cubist.yml b/StyleMigration/conf/cubist.yml new file mode 100644 index 0000000..a57bd61 --- /dev/null +++ b/StyleMigration/conf/cubist.yml @@ -0,0 +1,26 @@ +## Basic configuration +style_image: img/cubist.jpg # targeted style image +naming: "cubist" # the name of this model. Determine the path to save checkpoint and events file. +model_path: models # root path to save checkpoint and events file. The final path would be / + +## Weight of the loss +content_weight: 1.0 # weight for content features loss +style_weight: 180.0 # weight for style features loss +tv_weight: 0.0 # weight for total variation loss + +## The size, the iter number to run +image_size: 256 +batch_size: 4 +epoch: 2 + +## Loss Network +loss_model: "vgg_16" +content_layers: # use these layers for content loss + - "vgg_16/conv3/conv3_3" +style_layers: # use these layers for style loss + - "vgg_16/conv1/conv1_2" + - "vgg_16/conv2/conv2_2" + - "vgg_16/conv3/conv3_3" + - "vgg_16/conv4/conv4_3" +checkpoint_exclude_scopes: "vgg_16/fc" # we only use the convolution layers, so ignore fc layers. +loss_model_file: "pretrained/vgg_16.ckpt" # the path to the checkpoint \ No newline at end of file diff --git a/StyleMigration/conf/denoised_starry.yml b/StyleMigration/conf/denoised_starry.yml new file mode 100644 index 0000000..fe3d35b --- /dev/null +++ b/StyleMigration/conf/denoised_starry.yml @@ -0,0 +1,26 @@ +## Basic configuration +style_image: img/denoised_starry.jpg # targeted style image +naming: "denoised_starry" # the name of this model. Determine the path to save checkpoint and events file. +model_path: models # root path to save checkpoint and events file. The final path would be / + +## Weight of the loss +content_weight: 1.0 # weight for content features loss +style_weight: 250 # weight for style features loss +tv_weight: 0.0 # weight for total variation loss + +## The size, the iter number to run +image_size: 256 +batch_size: 4 +epoch: 2 + +## Loss Network +loss_model: "vgg_16" +content_layers: # use these layers for content loss + - "vgg_16/conv3/conv3_3" +style_layers: # use these layers for style loss + - "vgg_16/conv1/conv1_2" + - "vgg_16/conv2/conv2_2" + - "vgg_16/conv3/conv3_3" + - "vgg_16/conv4/conv4_3" +checkpoint_exclude_scopes: "vgg_16/fc" # we only use the convolution layers, so ignore fc layers. +loss_model_file: "pretrained/vgg_16.ckpt" # the path to the checkpoint \ No newline at end of file diff --git a/StyleMigration/conf/feathers.yml b/StyleMigration/conf/feathers.yml new file mode 100644 index 0000000..950a0be --- /dev/null +++ b/StyleMigration/conf/feathers.yml @@ -0,0 +1,26 @@ +## Basic configuration +style_image: img/feathers.jpg # targeted style image +naming: "feathers" # the name of this model. Determine the path to save checkpoint and events file. +model_path: models # root path to save checkpoint and events file. The final path would be / + +## Weight of the loss +content_weight: 1.0 # weight for content features loss +style_weight: 220.0 # weight for style features loss +tv_weight: 0.0 # weight for total variation loss + +## The size, the iter number to run +image_size: 256 +batch_size: 4 +epoch: 2 + +## Loss Network +loss_model: "vgg_16" +content_layers: # use these layers for content loss + - "vgg_16/conv3/conv3_3" +style_layers: # use these layers for style loss + - "vgg_16/conv1/conv1_2" + - "vgg_16/conv2/conv2_2" + - "vgg_16/conv3/conv3_3" + - "vgg_16/conv4/conv4_3" +checkpoint_exclude_scopes: "vgg_16/fc" # we only use the convolution layers, so ignore fc layers. +loss_model_file: "pretrained/vgg_16.ckpt" # the path to the checkpoint \ No newline at end of file diff --git a/StyleMigration/conf/mosaic.yml b/StyleMigration/conf/mosaic.yml new file mode 100644 index 0000000..bcfa8cd --- /dev/null +++ b/StyleMigration/conf/mosaic.yml @@ -0,0 +1,26 @@ +## Basic configuration +style_image: img/mosaic.jpg # targeted style image +naming: "mosaic" # the name of this model. Determine the path to save checkpoint and events file. +model_path: models # root path to save checkpoint and events file. The final path would be / + +## Weight of the loss +content_weight: 1.0 # weight for content features loss +style_weight: 100.0 # weight for style features loss +tv_weight: 0.0 # weight for total variation loss + +## The size, the iter number to run +image_size: 256 +batch_size: 4 +epoch: 2 + +## Loss Network +loss_model: "vgg_16" +content_layers: # use these layers for content loss + - "vgg_16/conv3/conv3_3" +style_layers: # use these layers for style loss + - "vgg_16/conv1/conv1_2" + - "vgg_16/conv2/conv2_2" + - "vgg_16/conv3/conv3_3" + - "vgg_16/conv4/conv4_3" +checkpoint_exclude_scopes: "vgg_16/fc" # we only use the convolution layers, so ignore fc layers. +loss_model_file: "pretrained/vgg_16.ckpt" # the path to the checkpoint \ No newline at end of file diff --git a/StyleMigration/conf/scream.yml b/StyleMigration/conf/scream.yml new file mode 100644 index 0000000..9b24526 --- /dev/null +++ b/StyleMigration/conf/scream.yml @@ -0,0 +1,26 @@ +## Basic configuration +style_image: img/scream.jpg # targeted style image +naming: "scream" # the name of this model. Determine the path to save checkpoint and events file. +model_path: models # root path to save checkpoint and events file. The final path would be / + +## Weight of the loss +content_weight: 1.0 # weight for content features loss +style_weight: 250.0 # weight for style features loss +tv_weight: 0.0 # weight for total variation loss + +## The size, the iter number to run +image_size: 256 +batch_size: 4 +epoch: 2 + +## Loss Network +loss_model: "vgg_16" +content_layers: # use these layers for content loss + - "vgg_16/conv3/conv3_3" +style_layers: # use these layers for style loss + - "vgg_16/conv1/conv1_2" + - "vgg_16/conv2/conv2_2" + - "vgg_16/conv3/conv3_3" + - "vgg_16/conv4/conv4_3" +checkpoint_exclude_scopes: "vgg_16/fc" # we only use the convolution layers, so ignore fc layers. +loss_model_file: "pretrained/vgg_16.ckpt" # the path to the checkpoint \ No newline at end of file diff --git a/StyleMigration/conf/udnie.yml b/StyleMigration/conf/udnie.yml new file mode 100644 index 0000000..a5541eb --- /dev/null +++ b/StyleMigration/conf/udnie.yml @@ -0,0 +1,26 @@ +## Basic configuration +style_image: img/udnie.jpg # targeted style image +naming: "udnie" # the name of this model. Determine the path to save checkpoint and events file. +model_path: models # root path to save checkpoint and events file. The final path would be / + +## Weight of the loss +content_weight: 1.0 # weight for content features loss +style_weight: 200.0 # weight for style features loss +tv_weight: 0.0 # weight for total variation loss + +## The size, the iter number to run +image_size: 256 +batch_size: 4 +epoch: 2 + +## Loss Network +loss_model: "vgg_16" +content_layers: # use these layers for content loss + - "vgg_16/conv3/conv3_3" +style_layers: # use these layers for style loss + - "vgg_16/conv1/conv1_2" + - "vgg_16/conv2/conv2_2" + - "vgg_16/conv3/conv3_3" + - "vgg_16/conv4/conv4_3" +checkpoint_exclude_scopes: "vgg_16/fc" # we only use the convolution layers, so ignore fc layers. +loss_model_file: "pretrained/vgg_16.ckpt" # the path to the checkpoint \ No newline at end of file diff --git a/StyleMigration/conf/wave.yml b/StyleMigration/conf/wave.yml new file mode 100644 index 0000000..c593a8f --- /dev/null +++ b/StyleMigration/conf/wave.yml @@ -0,0 +1,26 @@ +## Basic configuration +style_image: img/wave.jpg # targeted style image +naming: "wave" # the name of this model. Determine the path to save checkpoint and events file. +model_path: models # root path to save checkpoint and events file. The final path would be / + +## Weight of the loss +content_weight: 1.0 # weight for content features loss +style_weight: 220.0 # weight for style features loss +tv_weight: 0.0 # weight for total variation loss + +## The size, the iter number to run +image_size: 256 +batch_size: 4 +epoch: 2 + +## Loss Network +loss_model: "vgg_16" +content_layers: # use these layers for content loss + - "vgg_16/conv3/conv3_3" +style_layers: # use these layers for style loss + - "vgg_16/conv1/conv1_2" + - "vgg_16/conv2/conv2_2" + - "vgg_16/conv3/conv3_3" + - "vgg_16/conv4/conv4_3" +checkpoint_exclude_scopes: "vgg_16/fc" # we only use the convolution layers, so ignore fc layers. +loss_model_file: "pretrained/vgg_16.ckpt" # the path to the checkpoint diff --git a/StyleMigration/eval.py b/StyleMigration/eval.py new file mode 100644 index 0000000..225d420 --- /dev/null +++ b/StyleMigration/eval.py @@ -0,0 +1,77 @@ +# coding: utf-8 +from __future__ import print_function +import tensorflow.compat.v1 as tf +tf.disable_v2_behavior() +from preprocessing import preprocessing_factory +import reader +import model +import time +import os + +tf.app.flags.DEFINE_string('loss_model', 'vgg_16', 'The name of the architecture to evaluate. ' + 'You can view all the support models in nets/nets_factory.py') +tf.app.flags.DEFINE_integer('image_size', 256, 'Image size to train.') +tf.app.flags.DEFINE_string("model_file", "models.ckpt", "") +tf.app.flags.DEFINE_string("image_file", "a.jpg", "") + +FLAGS = tf.app.flags.FLAGS + + +def main(_): + + # Get image's height and width. + height = 0 + width = 0 + with open(FLAGS.image_file, 'rb') as img: + with tf.Session().as_default() as sess: + if FLAGS.image_file.lower().endswith('png'): + image = sess.run(tf.image.decode_png(img.read())) + else: + image = sess.run(tf.image.decode_jpeg(img.read())) + height = image.shape[0] + width = image.shape[1] + tf.logging.info('Image size: %dx%d' % (width, height)) + + with tf.Graph().as_default(): + with tf.Session().as_default() as sess: + + # Read image data. + image_preprocessing_fn, _ = preprocessing_factory.get_preprocessing( + FLAGS.loss_model, + is_training=False) + image = reader.get_image(FLAGS.image_file, height, width, image_preprocessing_fn) + + # Add batch dimension + image = tf.expand_dims(image, 0) + + generated = model.net(image, training=False) + generated = tf.cast(generated, tf.uint8) + + # Remove batch dimension + generated = tf.squeeze(generated, [0]) + + # Restore model variables. + saver = tf.train.Saver(tf.global_variables(), write_version=tf.train.SaverDef.V1) + sess.run([tf.global_variables_initializer(), tf.local_variables_initializer()]) + # Use absolute path + FLAGS.model_file = os.path.abspath(FLAGS.model_file) + saver.restore(sess, FLAGS.model_file) + + # Make sure 'result' directory exists. + generated_file = 'result/res.jpg' + if os.path.exists('result') is False: + os.makedirs('result') + + # Generate and write image data to file. + with open(generated_file, 'wb') as img: + start_time = time.time() + img.write(sess.run(tf.image.encode_jpeg(generated))) + end_time = time.time() + tf.logging.info('Elapsed time: %fs' % (end_time - start_time)) + + tf.logging.info('Done. Please check %s.' % generated_file) + + +if __name__ == '__main__': + tf.logging.set_verbosity(tf.logging.INFO) + tf.app.run() diff --git a/StyleMigration/export.py b/StyleMigration/export.py new file mode 100644 index 0000000..2b37d58 --- /dev/null +++ b/StyleMigration/export.py @@ -0,0 +1,84 @@ +# coding: utf-8 +from __future__ import print_function +import tensorflow as tf +import argparse +import time +import os + +import model +import utils + + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument('-m', '--model_file', help='the path to the model file') + parser.add_argument('-n', '--model_name', default='transfer', help='the name of the model') + parser.add_argument('-d', dest='is_debug', action='store_true') + parser.set_defaults(is_debug=False) + return parser.parse_args() + + +def main(args): + g = tf.Graph() # A new graph + with g.as_default(): + with tf.Session() as sess: + # Building graph. + image_data = tf.placeholder(tf.int32, name='input_image') + height = tf.placeholder(tf.int32, name='height') + width = tf.placeholder(tf.int32, name='width') + + # Reshape data + image = tf.reshape(image_data, [height, width, 3]) + + processed_image = utils.mean_image_subtraction( + image, [123.68, 116.779, 103.939]) # Preprocessing image + batched_image = tf.expand_dims(processed_image, 0) # Add batch dimension + generated_image = model.net(batched_image, training=False) + casted_image = tf.cast(generated_image, tf.int32) + # Remove batch dimension + squeezed_image = tf.squeeze(casted_image, [0]) + cropped_image = tf.slice(squeezed_image, [0, 0, 0], [height, width, 3]) + # stylized_image = tf.image.encode_jpeg(squeezed_image, name='output_image') + stylized_image_data = tf.reshape(cropped_image, [-1], name='output_image') + + # Restore model variables. + saver = tf.train.Saver(tf.global_variables(), write_version=tf.train.SaverDef.V1) + sess.run([tf.global_variables_initializer(), tf.local_variables_initializer()]) + # Use absolute path. + model_file = os.path.abspath(args.model_file) + saver.restore(sess, model_file) + + if args.is_debug: + content_file = '/Users/Lex/Desktop/t.jpg' + generated_file = '/Users/Lex/Desktop/xwz-stylized.jpg' + + with open(generated_file, 'wb') as img: + image_bytes = tf.read_file(content_file) + input_array, decoded_image = sess.run([ + tf.reshape(tf.image.decode_jpeg(image_bytes, channels=3), [-1]), + tf.image.decode_jpeg(image_bytes, channels=3)]) + + start_time = time.time() + img.write(sess.run(tf.image.encode_jpeg(tf.cast(cropped_image, tf.uint8)), feed_dict={ + image_data: input_array, + height: decoded_image.shape[0], + width: decoded_image.shape[1]})) + end_time = time.time() + + tf.logging.info('Elapsed time: %fs' % (end_time - start_time)) + else: + output_graph_def = tf.graph_util.convert_variables_to_constants( + sess, sess.graph_def, output_node_names=['output_image']) + + with tf.gfile.FastGFile('/Users/Lex/Desktop/' + args.model_name + '.pb', mode='wb') as f: + f.write(output_graph_def.SerializeToString()) + + # tf.train.write_graph(g.as_graph_def(), '/Users/Lex/Desktop', + # args.model_name + '.pb', as_text=False) + + +if __name__ == '__main__': + tf.logging.set_verbosity(tf.logging.INFO) + args = parse_args() + print(args) + main(args) diff --git a/StyleMigration/img/denoised_starry.jpg b/StyleMigration/img/denoised_starry.jpg new file mode 100644 index 0000000..b43b6e2 Binary files /dev/null and b/StyleMigration/img/denoised_starry.jpg differ diff --git a/StyleMigration/img/ecnu.jpg b/StyleMigration/img/ecnu.jpg new file mode 100644 index 0000000..c76330f Binary files /dev/null and b/StyleMigration/img/ecnu.jpg differ diff --git a/StyleMigration/img/scream.jpg b/StyleMigration/img/scream.jpg new file mode 100644 index 0000000..ab54066 Binary files /dev/null and b/StyleMigration/img/scream.jpg differ diff --git a/StyleMigration/img/wave.jpg b/StyleMigration/img/wave.jpg new file mode 100644 index 0000000..44be8bf Binary files /dev/null and b/StyleMigration/img/wave.jpg differ diff --git a/StyleMigration/losses.py b/StyleMigration/losses.py new file mode 100644 index 0000000..fbf9bf1 --- /dev/null +++ b/StyleMigration/losses.py @@ -0,0 +1,108 @@ +# coding: utf-8 +from __future__ import print_function +import tensorflow as tf +from nets import nets_factory +from preprocessing import preprocessing_factory +import utils +import os + +slim = tf.contrib.slim + + +def gram(layer): + shape = tf.shape(layer) + num_images = shape[0] + width = shape[1] + height = shape[2] + num_filters = shape[3] + filters = tf.reshape(layer, tf.stack([num_images, -1, num_filters])) + grams = tf.matmul(filters, filters, transpose_a=True) / tf.to_float(width * height * num_filters) + + return grams + + +def get_style_features(FLAGS): + """ + For the "style_image", the preprocessing step is: + 1. Resize the shorter side to FLAGS.image_size + 2. Apply central crop + """ + with tf.Graph().as_default(): + network_fn = nets_factory.get_network_fn( + FLAGS.loss_model, + num_classes=1, + is_training=False) + image_preprocessing_fn, image_unprocessing_fn = preprocessing_factory.get_preprocessing( + FLAGS.loss_model, + is_training=False) + + # Get the style image data + size = FLAGS.image_size + img_bytes = tf.read_file(FLAGS.style_image) + if FLAGS.style_image.lower().endswith('png'): + image = tf.image.decode_png(img_bytes) + else: + image = tf.image.decode_jpeg(img_bytes) + # image = _aspect_preserving_resize(image, size) + + # Add the batch dimension + images = tf.expand_dims(image_preprocessing_fn(image, size, size), 0) + # images = tf.stack([image_preprocessing_fn(image, size, size)]) + + _, endpoints_dict = network_fn(images, spatial_squeeze=False) + features = [] + for layer in FLAGS.style_layers: + feature = endpoints_dict[layer] + feature = tf.squeeze(gram(feature), [0]) # remove the batch dimension + features.append(feature) + + with tf.Session() as sess: + # Restore variables for loss network. + init_func = utils._get_init_fn(FLAGS) + init_func(sess) + + # Make sure the 'generated' directory is exists. + if os.path.exists('generated') is False: + os.makedirs('generated') + # Indicate cropped style image path + save_file = 'generated/target_style_' + FLAGS.naming + '.jpg' + # Write preprocessed style image to indicated path + with open(save_file, 'wb') as f: + target_image = image_unprocessing_fn(images[0, :]) + value = tf.image.encode_jpeg(tf.cast(target_image, tf.uint8)) + f.write(sess.run(value)) + tf.logging.info('Target style pattern is saved to: %s.' % save_file) + + # Return the features those layers are use for measuring style loss. + return sess.run(features) + + +def style_loss(endpoints_dict, style_features_t, style_layers): + style_loss = 0 + style_loss_summary = {} + for style_gram, layer in zip(style_features_t, style_layers): + generated_images, _ = tf.split(endpoints_dict[layer], 2, 0) + size = tf.size(generated_images) + layer_style_loss = tf.nn.l2_loss(gram(generated_images) - style_gram) * 2 / tf.to_float(size) + style_loss_summary[layer] = layer_style_loss + style_loss += layer_style_loss + return style_loss, style_loss_summary + + +def content_loss(endpoints_dict, content_layers): + content_loss = 0 + for layer in content_layers: + generated_images, content_images = tf.split(endpoints_dict[layer], 2, 0) + size = tf.size(generated_images) + content_loss += tf.nn.l2_loss(generated_images - content_images) * 2 / tf.to_float(size) # remain the same as in the paper + return content_loss + + +def total_variation_loss(layer): + shape = tf.shape(layer) + height = shape[1] + width = shape[2] + y = tf.slice(layer, [0, 0, 0, 0], tf.stack([-1, height - 1, -1, -1])) - tf.slice(layer, [0, 1, 0, 0], [-1, -1, -1, -1]) + x = tf.slice(layer, [0, 0, 0, 0], tf.stack([-1, -1, width - 1, -1])) - tf.slice(layer, [0, 0, 1, 0], [-1, -1, -1, -1]) + loss = tf.nn.l2_loss(x) / tf.to_float(tf.size(x)) + tf.nn.l2_loss(y) / tf.to_float(tf.size(y)) + return loss diff --git a/StyleMigration/model.py b/StyleMigration/model.py new file mode 100644 index 0000000..5985231 --- /dev/null +++ b/StyleMigration/model.py @@ -0,0 +1,134 @@ +import tensorflow.compat.v1 as tf +tf.disable_v2_behavior() + + +def conv2d(x, input_filters, output_filters, kernel, strides, mode='REFLECT'): + with tf.variable_scope('conv'): + + shape = [kernel, kernel, input_filters, output_filters] + weight = tf.Variable(tf.truncated_normal(shape, stddev=0.1), name='weight') + x_padded = tf.pad(x, [[0, 0], [int(kernel / 2), int(kernel / 2)], [int(kernel / 2), int(kernel / 2)], [0, 0]], mode=mode) + return tf.nn.conv2d(x_padded, weight, strides=[1, strides, strides, 1], padding='VALID', name='conv') + + +def conv2d_transpose(x, input_filters, output_filters, kernel, strides): + with tf.variable_scope('conv_transpose'): + + shape = [kernel, kernel, output_filters, input_filters] + weight = tf.Variable(tf.truncated_normal(shape, stddev=0.1), name='weight') + + batch_size = tf.shape(x)[0] + height = tf.shape(x)[1] * strides + width = tf.shape(x)[2] * strides + output_shape = tf.stack([batch_size, height, width, output_filters]) + return tf.nn.conv2d_transpose(x, weight, output_shape, strides=[1, strides, strides, 1], name='conv_transpose') + + +def resize_conv2d(x, input_filters, output_filters, kernel, strides, training): + ''' + An alternative to transposed convolution where we first resize, then convolve. + See http://distill.pub/2016/deconv-checkerboard/ + + For some reason the shape needs to be statically known for gradient propagation + through tf.image.resize_images, but we only know that for fixed image size, so we + plumb through a "training" argument + ''' + with tf.variable_scope('conv_transpose'): + height = x.get_shape()[1].value if training else tf.shape(x)[1] + width = x.get_shape()[2].value if training else tf.shape(x)[2] + + new_height = height * strides * 2 + new_width = width * strides * 2 + + x_resized = tf.image.resize_images(x, [new_height, new_width], tf.image.ResizeMethod.NEAREST_NEIGHBOR) + + # shape = [kernel, kernel, input_filters, output_filters] + # weight = tf.Variable(tf.truncated_normal(shape, stddev=0.1), name='weight') + return conv2d(x_resized, input_filters, output_filters, kernel, strides) + + +def instance_norm(x): + epsilon = 1e-9 + + mean, var = tf.nn.moments(x, [1, 2], keep_dims=True) + + return tf.div(tf.subtract(x, mean), tf.sqrt(tf.add(var, epsilon))) + + +def batch_norm(x, size, training, decay=0.999): + beta = tf.Variable(tf.zeros([size]), name='beta') + scale = tf.Variable(tf.ones([size]), name='scale') + pop_mean = tf.Variable(tf.zeros([size])) + pop_var = tf.Variable(tf.ones([size])) + epsilon = 1e-3 + + batch_mean, batch_var = tf.nn.moments(x, [0, 1, 2]) + train_mean = tf.assign(pop_mean, pop_mean * decay + batch_mean * (1 - decay)) + train_var = tf.assign(pop_var, pop_var * decay + batch_var * (1 - decay)) + + def batch_statistics(): + with tf.control_dependencies([train_mean, train_var]): + return tf.nn.batch_normalization(x, batch_mean, batch_var, beta, scale, epsilon, name='batch_norm') + + def population_statistics(): + return tf.nn.batch_normalization(x, pop_mean, pop_var, beta, scale, epsilon, name='batch_norm') + + return tf.cond(training, batch_statistics, population_statistics) + + +def relu(input): + relu = tf.nn.relu(input) + # convert nan to zero (nan != nan) + nan_to_zero = tf.where(tf.equal(relu, relu), relu, tf.zeros_like(relu)) + return nan_to_zero + + +def residual(x, filters, kernel, strides): + with tf.variable_scope('residual'): + conv1 = conv2d(x, filters, filters, kernel, strides) + conv2 = conv2d(relu(conv1), filters, filters, kernel, strides) + + residual = x + conv2 + + return residual + + +def net(image, training): + # Less border effects when padding a little before passing through .. + image = tf.pad(image, [[0, 0], [10, 10], [10, 10], [0, 0]], mode='REFLECT') + + with tf.variable_scope('conv1'): + conv1 = relu(instance_norm(conv2d(image, 3, 32, 9, 1))) + with tf.variable_scope('conv2'): + conv2 = relu(instance_norm(conv2d(conv1, 32, 64, 3, 2))) + with tf.variable_scope('conv3'): + conv3 = relu(instance_norm(conv2d(conv2, 64, 128, 3, 2))) + with tf.variable_scope('res1'): + res1 = residual(conv3, 128, 3, 1) + with tf.variable_scope('res2'): + res2 = residual(res1, 128, 3, 1) + with tf.variable_scope('res3'): + res3 = residual(res2, 128, 3, 1) + with tf.variable_scope('res4'): + res4 = residual(res3, 128, 3, 1) + with tf.variable_scope('res5'): + res5 = residual(res4, 128, 3, 1) + # print(res5.get_shape()) + with tf.variable_scope('deconv1'): + # deconv1 = relu(instance_norm(conv2d_transpose(res5, 128, 64, 3, 2))) + deconv1 = relu(instance_norm(resize_conv2d(res5, 128, 64, 3, 2, training))) + with tf.variable_scope('deconv2'): + # deconv2 = relu(instance_norm(conv2d_transpose(deconv1, 64, 32, 3, 2))) + deconv2 = relu(instance_norm(resize_conv2d(deconv1, 64, 32, 3, 2, training))) + with tf.variable_scope('deconv3'): + # deconv_test = relu(instance_norm(conv2d(deconv2, 32, 32, 2, 1))) + deconv3 = tf.nn.tanh(instance_norm(conv2d(deconv2, 32, 3, 9, 1))) + + y = (deconv3 + 1) * 127.5 + + # Remove border effect reducing padding. + height = tf.shape(y)[1] + width = tf.shape(y)[2] + y = tf.slice(y, [0, 10, 10, 0], tf.stack([-1, height - 20, width - 20, -1])) + + return y diff --git a/StyleMigration/nets/__init__.py b/StyleMigration/nets/__init__.py new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/StyleMigration/nets/__init__.py @@ -0,0 +1 @@ + diff --git a/StyleMigration/nets/alexnet.py b/StyleMigration/nets/alexnet.py new file mode 100644 index 0000000..a6b93de --- /dev/null +++ b/StyleMigration/nets/alexnet.py @@ -0,0 +1,125 @@ +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Contains a model definition for AlexNet. + +This work was first described in: + ImageNet Classification with Deep Convolutional Neural Networks + Alex Krizhevsky, Ilya Sutskever and Geoffrey E. Hinton + +and later refined in: + One weird trick for parallelizing convolutional neural networks + Alex Krizhevsky, 2014 + +Here we provide the implementation proposed in "One weird trick" and not +"ImageNet Classification", as per the paper, the LRN layers have been removed. + +Usage: + with slim.arg_scope(alexnet.alexnet_v2_arg_scope()): + outputs, end_points = alexnet.alexnet_v2(inputs) + +@@alexnet_v2 +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import tensorflow as tf + +slim = tf.contrib.slim +trunc_normal = lambda stddev: tf.truncated_normal_initializer(0.0, stddev) + + +def alexnet_v2_arg_scope(weight_decay=0.0005): + with slim.arg_scope([slim.conv2d, slim.fully_connected], + activation_fn=tf.nn.relu, + biases_initializer=tf.constant_initializer(0.1), + weights_regularizer=slim.l2_regularizer(weight_decay)): + with slim.arg_scope([slim.conv2d], padding='SAME'): + with slim.arg_scope([slim.max_pool2d], padding='VALID') as arg_sc: + return arg_sc + + +def alexnet_v2(inputs, + num_classes=1000, + is_training=True, + dropout_keep_prob=0.5, + spatial_squeeze=True, + scope='alexnet_v2'): + """AlexNet version 2. + + Described in: http://arxiv.org/pdf/1404.5997v2.pdf + Parameters from: + github.com/akrizhevsky/cuda-convnet2/blob/master/layers/ + layers-imagenet-1gpu.cfg + + Note: All the fully_connected layers have been transformed to conv2d layers. + To use in classification mode, resize input to 224x224. To use in fully + convolutional mode, set spatial_squeeze to false. + The LRN layers have been removed and change the initializers from + random_normal_initializer to xavier_initializer. + + Args: + inputs: a tensor of size [batch_size, height, width, channels]. + num_classes: number of predicted classes. + is_training: whether or not the model is being trained. + dropout_keep_prob: the probability that activations are kept in the dropout + layers during training. + spatial_squeeze: whether or not should squeeze the spatial dimensions of the + outputs. Useful to remove unnecessary dimensions for classification. + scope: Optional scope for the variables. + + Returns: + the last op containing the log predictions and end_points dict. + """ + with tf.variable_scope(scope, 'alexnet_v2', [inputs]) as sc: + end_points_collection = sc.name + '_end_points' + # Collect outputs for conv2d, fully_connected and max_pool2d. + with slim.arg_scope([slim.conv2d, slim.fully_connected, slim.max_pool2d], + outputs_collections=[end_points_collection]): + net = slim.conv2d(inputs, 64, [11, 11], 4, padding='VALID', + scope='conv1') + net = slim.max_pool2d(net, [3, 3], 2, scope='pool1') + net = slim.conv2d(net, 192, [5, 5], scope='conv2') + net = slim.max_pool2d(net, [3, 3], 2, scope='pool2') + net = slim.conv2d(net, 384, [3, 3], scope='conv3') + net = slim.conv2d(net, 384, [3, 3], scope='conv4') + net = slim.conv2d(net, 256, [3, 3], scope='conv5') + net = slim.max_pool2d(net, [3, 3], 2, scope='pool5') + + # Use conv2d instead of fully_connected layers. + with slim.arg_scope([slim.conv2d], + weights_initializer=trunc_normal(0.005), + biases_initializer=tf.constant_initializer(0.1)): + net = slim.conv2d(net, 4096, [5, 5], padding='VALID', + scope='fc6') + net = slim.dropout(net, dropout_keep_prob, is_training=is_training, + scope='dropout6') + net = slim.conv2d(net, 4096, [1, 1], scope='fc7') + net = slim.dropout(net, dropout_keep_prob, is_training=is_training, + scope='dropout7') + net = slim.conv2d(net, num_classes, [1, 1], + activation_fn=None, + normalizer_fn=None, + biases_initializer=tf.zeros_initializer, + scope='fc8') + + # Convert end_points_collection into a end_point dict. + end_points = slim.utils.convert_collection_to_dict(end_points_collection) + if spatial_squeeze: + net = tf.squeeze(net, [1, 2], name='fc8/squeezed') + end_points[sc.name + '/fc8'] = net + return net, end_points +alexnet_v2.default_image_size = 224 diff --git a/StyleMigration/nets/alexnet_test.py b/StyleMigration/nets/alexnet_test.py new file mode 100644 index 0000000..493c460 --- /dev/null +++ b/StyleMigration/nets/alexnet_test.py @@ -0,0 +1,145 @@ +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for slim.nets.alexnet.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import tensorflow as tf + +from nets import alexnet + +slim = tf.contrib.slim + + +class AlexnetV2Test(tf.test.TestCase): + + def testBuild(self): + batch_size = 5 + height, width = 224, 224 + num_classes = 1000 + with self.test_session(): + inputs = tf.random_uniform((batch_size, height, width, 3)) + logits, _ = alexnet.alexnet_v2(inputs, num_classes) + self.assertEquals(logits.op.name, 'alexnet_v2/fc8/squeezed') + self.assertListEqual(logits.get_shape().as_list(), + [batch_size, num_classes]) + + def testFullyConvolutional(self): + batch_size = 1 + height, width = 300, 400 + num_classes = 1000 + with self.test_session(): + inputs = tf.random_uniform((batch_size, height, width, 3)) + logits, _ = alexnet.alexnet_v2(inputs, num_classes, spatial_squeeze=False) + self.assertEquals(logits.op.name, 'alexnet_v2/fc8/BiasAdd') + self.assertListEqual(logits.get_shape().as_list(), + [batch_size, 4, 7, num_classes]) + + def testEndPoints(self): + batch_size = 5 + height, width = 224, 224 + num_classes = 1000 + with self.test_session(): + inputs = tf.random_uniform((batch_size, height, width, 3)) + _, end_points = alexnet.alexnet_v2(inputs, num_classes) + expected_names = ['alexnet_v2/conv1', + 'alexnet_v2/pool1', + 'alexnet_v2/conv2', + 'alexnet_v2/pool2', + 'alexnet_v2/conv3', + 'alexnet_v2/conv4', + 'alexnet_v2/conv5', + 'alexnet_v2/pool5', + 'alexnet_v2/fc6', + 'alexnet_v2/fc7', + 'alexnet_v2/fc8' + ] + self.assertSetEqual(set(end_points.keys()), set(expected_names)) + + def testModelVariables(self): + batch_size = 5 + height, width = 224, 224 + num_classes = 1000 + with self.test_session(): + inputs = tf.random_uniform((batch_size, height, width, 3)) + alexnet.alexnet_v2(inputs, num_classes) + expected_names = ['alexnet_v2/conv1/weights', + 'alexnet_v2/conv1/biases', + 'alexnet_v2/conv2/weights', + 'alexnet_v2/conv2/biases', + 'alexnet_v2/conv3/weights', + 'alexnet_v2/conv3/biases', + 'alexnet_v2/conv4/weights', + 'alexnet_v2/conv4/biases', + 'alexnet_v2/conv5/weights', + 'alexnet_v2/conv5/biases', + 'alexnet_v2/fc6/weights', + 'alexnet_v2/fc6/biases', + 'alexnet_v2/fc7/weights', + 'alexnet_v2/fc7/biases', + 'alexnet_v2/fc8/weights', + 'alexnet_v2/fc8/biases', + ] + model_variables = [v.op.name for v in slim.get_model_variables()] + self.assertSetEqual(set(model_variables), set(expected_names)) + + def testEvaluation(self): + batch_size = 2 + height, width = 224, 224 + num_classes = 1000 + with self.test_session(): + eval_inputs = tf.random_uniform((batch_size, height, width, 3)) + logits, _ = alexnet.alexnet_v2(eval_inputs, is_training=False) + self.assertListEqual(logits.get_shape().as_list(), + [batch_size, num_classes]) + predictions = tf.argmax(logits, 1) + self.assertListEqual(predictions.get_shape().as_list(), [batch_size]) + + def testTrainEvalWithReuse(self): + train_batch_size = 2 + eval_batch_size = 1 + train_height, train_width = 224, 224 + eval_height, eval_width = 300, 400 + num_classes = 1000 + with self.test_session(): + train_inputs = tf.random_uniform( + (train_batch_size, train_height, train_width, 3)) + logits, _ = alexnet.alexnet_v2(train_inputs) + self.assertListEqual(logits.get_shape().as_list(), + [train_batch_size, num_classes]) + tf.get_variable_scope().reuse_variables() + eval_inputs = tf.random_uniform( + (eval_batch_size, eval_height, eval_width, 3)) + logits, _ = alexnet.alexnet_v2(eval_inputs, is_training=False, + spatial_squeeze=False) + self.assertListEqual(logits.get_shape().as_list(), + [eval_batch_size, 4, 7, num_classes]) + logits = tf.reduce_mean(logits, [1, 2]) + predictions = tf.argmax(logits, 1) + self.assertEquals(predictions.get_shape().as_list(), [eval_batch_size]) + + def testForward(self): + batch_size = 1 + height, width = 224, 224 + with self.test_session() as sess: + inputs = tf.random_uniform((batch_size, height, width, 3)) + logits, _ = alexnet.alexnet_v2(inputs) + sess.run(tf.initialize_all_variables()) + output = sess.run(logits) + self.assertTrue(output.any()) + +if __name__ == '__main__': + tf.test.main() diff --git a/StyleMigration/nets/cifarnet.py b/StyleMigration/nets/cifarnet.py new file mode 100644 index 0000000..371a9cb --- /dev/null +++ b/StyleMigration/nets/cifarnet.py @@ -0,0 +1,112 @@ +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Contains a variant of the CIFAR-10 model definition.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import tensorflow as tf + +slim = tf.contrib.slim + +trunc_normal = lambda stddev: tf.truncated_normal_initializer(stddev=stddev) + + +def cifarnet(images, num_classes=10, is_training=False, + dropout_keep_prob=0.5, + prediction_fn=slim.softmax, + scope='CifarNet'): + """Creates a variant of the CifarNet model. + + Note that since the output is a set of 'logits', the values fall in the + interval of (-infinity, infinity). Consequently, to convert the outputs to a + probability distribution over the characters, one will need to convert them + using the softmax function: + + logits = cifarnet.cifarnet(images, is_training=False) + probabilities = tf.nn.softmax(logits) + predictions = tf.argmax(logits, 1) + + Args: + images: A batch of `Tensors` of size [batch_size, height, width, channels]. + num_classes: the number of classes in the dataset. + is_training: specifies whether or not we're currently training the model. + This variable will determine the behaviour of the dropout layer. + dropout_keep_prob: the percentage of activation values that are retained. + prediction_fn: a function to get predictions out of logits. + scope: Optional variable_scope. + + Returns: + logits: the pre-softmax activations, a tensor of size + [batch_size, `num_classes`] + end_points: a dictionary from components of the network to the corresponding + activation. + """ + end_points = {} + + with tf.variable_scope(scope, 'CifarNet', [images, num_classes]): + net = slim.conv2d(images, 64, [5, 5], scope='conv1') + end_points['conv1'] = net + net = slim.max_pool2d(net, [2, 2], 2, scope='pool1') + end_points['pool1'] = net + net = tf.nn.lrn(net, 4, bias=1.0, alpha=0.001/9.0, beta=0.75, name='norm1') + net = slim.conv2d(net, 64, [5, 5], scope='conv2') + end_points['conv2'] = net + net = tf.nn.lrn(net, 4, bias=1.0, alpha=0.001/9.0, beta=0.75, name='norm2') + net = slim.max_pool2d(net, [2, 2], 2, scope='pool2') + end_points['pool2'] = net + net = slim.flatten(net) + end_points['Flatten'] = net + net = slim.fully_connected(net, 384, scope='fc3') + end_points['fc3'] = net + net = slim.dropout(net, dropout_keep_prob, is_training=is_training, + scope='dropout3') + net = slim.fully_connected(net, 192, scope='fc4') + end_points['fc4'] = net + logits = slim.fully_connected(net, num_classes, + biases_initializer=tf.zeros_initializer, + weights_initializer=trunc_normal(1/192.0), + weights_regularizer=None, + activation_fn=None, + scope='logits') + + end_points['Logits'] = logits + end_points['Predictions'] = prediction_fn(logits, scope='Predictions') + + return logits, end_points +cifarnet.default_image_size = 32 + + +def cifarnet_arg_scope(weight_decay=0.004): + """Defines the default cifarnet argument scope. + + Args: + weight_decay: The weight decay to use for regularizing the model. + + Returns: + An `arg_scope` to use for the inception v3 model. + """ + with slim.arg_scope( + [slim.conv2d], + weights_initializer=tf.truncated_normal_initializer(stddev=5e-2), + activation_fn=tf.nn.relu): + with slim.arg_scope( + [slim.fully_connected], + biases_initializer=tf.constant_initializer(0.1), + weights_initializer=trunc_normal(0.04), + weights_regularizer=slim.l2_regularizer(weight_decay), + activation_fn=tf.nn.relu) as sc: + return sc diff --git a/StyleMigration/nets/inception.py b/StyleMigration/nets/inception.py new file mode 100644 index 0000000..806c30b --- /dev/null +++ b/StyleMigration/nets/inception.py @@ -0,0 +1,36 @@ +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Brings all inception models under one namespace.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +# pylint: disable=unused-import +from nets.inception_resnet_v2 import inception_resnet_v2 +from nets.inception_resnet_v2 import inception_resnet_v2_arg_scope +from nets.inception_v1 import inception_v1 +from nets.inception_v1 import inception_v1_arg_scope +from nets.inception_v1 import inception_v1_base +from nets.inception_v2 import inception_v2 +from nets.inception_v2 import inception_v2_arg_scope +from nets.inception_v2 import inception_v2_base +from nets.inception_v3 import inception_v3 +from nets.inception_v3 import inception_v3_arg_scope +from nets.inception_v3 import inception_v3_base +from nets.inception_v4 import inception_v4 +from nets.inception_v4 import inception_v4_arg_scope +from nets.inception_v4 import inception_v4_base +# pylint: enable=unused-import diff --git a/StyleMigration/nets/inception_resnet_v2.py b/StyleMigration/nets/inception_resnet_v2.py new file mode 100644 index 0000000..4b3c5bd --- /dev/null +++ b/StyleMigration/nets/inception_resnet_v2.py @@ -0,0 +1,280 @@ +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Contains the definition of the Inception Resnet V2 architecture. + +As described in http://arxiv.org/abs/1602.07261. + + Inception-v4, Inception-ResNet and the Impact of Residual Connections + on Learning + Christian Szegedy, Sergey Ioffe, Vincent Vanhoucke, Alex Alemi +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + + +import tensorflow as tf + +slim = tf.contrib.slim + + +def block35(net, scale=1.0, activation_fn=tf.nn.relu, scope=None, reuse=None): + """Builds the 35x35 resnet block.""" + with tf.variable_scope(scope, 'Block35', [net], reuse=reuse): + with tf.variable_scope('Branch_0'): + tower_conv = slim.conv2d(net, 32, 1, scope='Conv2d_1x1') + with tf.variable_scope('Branch_1'): + tower_conv1_0 = slim.conv2d(net, 32, 1, scope='Conv2d_0a_1x1') + tower_conv1_1 = slim.conv2d(tower_conv1_0, 32, 3, scope='Conv2d_0b_3x3') + with tf.variable_scope('Branch_2'): + tower_conv2_0 = slim.conv2d(net, 32, 1, scope='Conv2d_0a_1x1') + tower_conv2_1 = slim.conv2d(tower_conv2_0, 48, 3, scope='Conv2d_0b_3x3') + tower_conv2_2 = slim.conv2d(tower_conv2_1, 64, 3, scope='Conv2d_0c_3x3') + mixed = tf.concat(3, [tower_conv, tower_conv1_1, tower_conv2_2]) + up = slim.conv2d(mixed, net.get_shape()[3], 1, normalizer_fn=None, + activation_fn=None, scope='Conv2d_1x1') + net += scale * up + if activation_fn: + net = activation_fn(net) + return net + + +def block17(net, scale=1.0, activation_fn=tf.nn.relu, scope=None, reuse=None): + """Builds the 17x17 resnet block.""" + with tf.variable_scope(scope, 'Block17', [net], reuse=reuse): + with tf.variable_scope('Branch_0'): + tower_conv = slim.conv2d(net, 192, 1, scope='Conv2d_1x1') + with tf.variable_scope('Branch_1'): + tower_conv1_0 = slim.conv2d(net, 128, 1, scope='Conv2d_0a_1x1') + tower_conv1_1 = slim.conv2d(tower_conv1_0, 160, [1, 7], + scope='Conv2d_0b_1x7') + tower_conv1_2 = slim.conv2d(tower_conv1_1, 192, [7, 1], + scope='Conv2d_0c_7x1') + mixed = tf.concat(3, [tower_conv, tower_conv1_2]) + up = slim.conv2d(mixed, net.get_shape()[3], 1, normalizer_fn=None, + activation_fn=None, scope='Conv2d_1x1') + net += scale * up + if activation_fn: + net = activation_fn(net) + return net + + +def block8(net, scale=1.0, activation_fn=tf.nn.relu, scope=None, reuse=None): + """Builds the 8x8 resnet block.""" + with tf.variable_scope(scope, 'Block8', [net], reuse=reuse): + with tf.variable_scope('Branch_0'): + tower_conv = slim.conv2d(net, 192, 1, scope='Conv2d_1x1') + with tf.variable_scope('Branch_1'): + tower_conv1_0 = slim.conv2d(net, 192, 1, scope='Conv2d_0a_1x1') + tower_conv1_1 = slim.conv2d(tower_conv1_0, 224, [1, 3], + scope='Conv2d_0b_1x3') + tower_conv1_2 = slim.conv2d(tower_conv1_1, 256, [3, 1], + scope='Conv2d_0c_3x1') + mixed = tf.concat(3, [tower_conv, tower_conv1_2]) + up = slim.conv2d(mixed, net.get_shape()[3], 1, normalizer_fn=None, + activation_fn=None, scope='Conv2d_1x1') + net += scale * up + if activation_fn: + net = activation_fn(net) + return net + + +def inception_resnet_v2(inputs, num_classes=1001, is_training=True, + dropout_keep_prob=0.8, + reuse=None, + scope='InceptionResnetV2'): + """Creates the Inception Resnet V2 model. + + Args: + inputs: a 4-D tensor of size [batch_size, height, width, 3]. + num_classes: number of predicted classes. + is_training: whether is training or not. + dropout_keep_prob: float, the fraction to keep before final layer. + reuse: whether or not the network and its variables should be reused. To be + able to reuse 'scope' must be given. + scope: Optional variable_scope. + + Returns: + logits: the logits outputs of the model. + end_points: the set of end_points from the inception model. + """ + end_points = {} + + with tf.variable_scope(scope, 'InceptionResnetV2', [inputs], reuse=reuse): + with slim.arg_scope([slim.batch_norm, slim.dropout], + is_training=is_training): + with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d], + stride=1, padding='SAME'): + + # 149 x 149 x 32 + net = slim.conv2d(inputs, 32, 3, stride=2, padding='VALID', + scope='Conv2d_1a_3x3') + end_points['Conv2d_1a_3x3'] = net + # 147 x 147 x 32 + net = slim.conv2d(net, 32, 3, padding='VALID', + scope='Conv2d_2a_3x3') + end_points['Conv2d_2a_3x3'] = net + # 147 x 147 x 64 + net = slim.conv2d(net, 64, 3, scope='Conv2d_2b_3x3') + end_points['Conv2d_2b_3x3'] = net + # 73 x 73 x 64 + net = slim.max_pool2d(net, 3, stride=2, padding='VALID', + scope='MaxPool_3a_3x3') + end_points['MaxPool_3a_3x3'] = net + # 73 x 73 x 80 + net = slim.conv2d(net, 80, 1, padding='VALID', + scope='Conv2d_3b_1x1') + end_points['Conv2d_3b_1x1'] = net + # 71 x 71 x 192 + net = slim.conv2d(net, 192, 3, padding='VALID', + scope='Conv2d_4a_3x3') + end_points['Conv2d_4a_3x3'] = net + # 35 x 35 x 192 + net = slim.max_pool2d(net, 3, stride=2, padding='VALID', + scope='MaxPool_5a_3x3') + end_points['MaxPool_5a_3x3'] = net + + # 35 x 35 x 320 + with tf.variable_scope('Mixed_5b'): + with tf.variable_scope('Branch_0'): + tower_conv = slim.conv2d(net, 96, 1, scope='Conv2d_1x1') + with tf.variable_scope('Branch_1'): + tower_conv1_0 = slim.conv2d(net, 48, 1, scope='Conv2d_0a_1x1') + tower_conv1_1 = slim.conv2d(tower_conv1_0, 64, 5, + scope='Conv2d_0b_5x5') + with tf.variable_scope('Branch_2'): + tower_conv2_0 = slim.conv2d(net, 64, 1, scope='Conv2d_0a_1x1') + tower_conv2_1 = slim.conv2d(tower_conv2_0, 96, 3, + scope='Conv2d_0b_3x3') + tower_conv2_2 = slim.conv2d(tower_conv2_1, 96, 3, + scope='Conv2d_0c_3x3') + with tf.variable_scope('Branch_3'): + tower_pool = slim.avg_pool2d(net, 3, stride=1, padding='SAME', + scope='AvgPool_0a_3x3') + tower_pool_1 = slim.conv2d(tower_pool, 64, 1, + scope='Conv2d_0b_1x1') + net = tf.concat(3, [tower_conv, tower_conv1_1, + tower_conv2_2, tower_pool_1]) + + end_points['Mixed_5b'] = net + net = slim.repeat(net, 10, block35, scale=0.17) + + # 17 x 17 x 1024 + with tf.variable_scope('Mixed_6a'): + with tf.variable_scope('Branch_0'): + tower_conv = slim.conv2d(net, 384, 3, stride=2, padding='VALID', + scope='Conv2d_1a_3x3') + with tf.variable_scope('Branch_1'): + tower_conv1_0 = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1') + tower_conv1_1 = slim.conv2d(tower_conv1_0, 256, 3, + scope='Conv2d_0b_3x3') + tower_conv1_2 = slim.conv2d(tower_conv1_1, 384, 3, + stride=2, padding='VALID', + scope='Conv2d_1a_3x3') + with tf.variable_scope('Branch_2'): + tower_pool = slim.max_pool2d(net, 3, stride=2, padding='VALID', + scope='MaxPool_1a_3x3') + net = tf.concat(3, [tower_conv, tower_conv1_2, tower_pool]) + + end_points['Mixed_6a'] = net + net = slim.repeat(net, 20, block17, scale=0.10) + + # Auxillary tower + with tf.variable_scope('AuxLogits'): + aux = slim.avg_pool2d(net, 5, stride=3, padding='VALID', + scope='Conv2d_1a_3x3') + aux = slim.conv2d(aux, 128, 1, scope='Conv2d_1b_1x1') + aux = slim.conv2d(aux, 768, aux.get_shape()[1:3], + padding='VALID', scope='Conv2d_2a_5x5') + aux = slim.flatten(aux) + aux = slim.fully_connected(aux, num_classes, activation_fn=None, + scope='Logits') + end_points['AuxLogits'] = aux + + with tf.variable_scope('Mixed_7a'): + with tf.variable_scope('Branch_0'): + tower_conv = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1') + tower_conv_1 = slim.conv2d(tower_conv, 384, 3, stride=2, + padding='VALID', scope='Conv2d_1a_3x3') + with tf.variable_scope('Branch_1'): + tower_conv1 = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1') + tower_conv1_1 = slim.conv2d(tower_conv1, 288, 3, stride=2, + padding='VALID', scope='Conv2d_1a_3x3') + with tf.variable_scope('Branch_2'): + tower_conv2 = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1') + tower_conv2_1 = slim.conv2d(tower_conv2, 288, 3, + scope='Conv2d_0b_3x3') + tower_conv2_2 = slim.conv2d(tower_conv2_1, 320, 3, stride=2, + padding='VALID', scope='Conv2d_1a_3x3') + with tf.variable_scope('Branch_3'): + tower_pool = slim.max_pool2d(net, 3, stride=2, padding='VALID', + scope='MaxPool_1a_3x3') + net = tf.concat(3, [tower_conv_1, tower_conv1_1, + tower_conv2_2, tower_pool]) + + end_points['Mixed_7a'] = net + + net = slim.repeat(net, 9, block8, scale=0.20) + net = block8(net, activation_fn=None) + + net = slim.conv2d(net, 1536, 1, scope='Conv2d_7b_1x1') + end_points['Conv2d_7b_1x1'] = net + + with tf.variable_scope('Logits'): + end_points['PrePool'] = net + net = slim.avg_pool2d(net, net.get_shape()[1:3], padding='VALID', + scope='AvgPool_1a_8x8') + net = slim.flatten(net) + + net = slim.dropout(net, dropout_keep_prob, is_training=is_training, + scope='Dropout') + + end_points['PreLogitsFlatten'] = net + logits = slim.fully_connected(net, num_classes, activation_fn=None, + scope='Logits') + end_points['Logits'] = logits + end_points['Predictions'] = tf.nn.softmax(logits, name='Predictions') + + return logits, end_points +inception_resnet_v2.default_image_size = 299 + + +def inception_resnet_v2_arg_scope(weight_decay=0.00004, + batch_norm_decay=0.9997, + batch_norm_epsilon=0.001): + """Yields the scope with the default parameters for inception_resnet_v2. + + Args: + weight_decay: the weight decay for weights variables. + batch_norm_decay: decay for the moving average of batch_norm momentums. + batch_norm_epsilon: small float added to variance to avoid dividing by zero. + + Returns: + a arg_scope with the parameters needed for inception_resnet_v2. + """ + # Set weight_decay for weights in conv2d and fully_connected layers. + with slim.arg_scope([slim.conv2d, slim.fully_connected], + weights_regularizer=slim.l2_regularizer(weight_decay), + biases_regularizer=slim.l2_regularizer(weight_decay)): + + batch_norm_params = { + 'decay': batch_norm_decay, + 'epsilon': batch_norm_epsilon, + } + # Set activation_fn and parameters for batch_norm. + with slim.arg_scope([slim.conv2d], activation_fn=tf.nn.relu, + normalizer_fn=slim.batch_norm, + normalizer_params=batch_norm_params) as scope: + return scope diff --git a/StyleMigration/nets/inception_resnet_v2_test.py b/StyleMigration/nets/inception_resnet_v2_test.py new file mode 100644 index 0000000..b74756a --- /dev/null +++ b/StyleMigration/nets/inception_resnet_v2_test.py @@ -0,0 +1,136 @@ +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for slim.inception_resnet_v2.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import tensorflow as tf + +from nets import inception + + +class InceptionTest(tf.test.TestCase): + + def testBuildLogits(self): + batch_size = 5 + height, width = 299, 299 + num_classes = 1000 + with self.test_session(): + inputs = tf.random_uniform((batch_size, height, width, 3)) + logits, _ = inception.inception_resnet_v2(inputs, num_classes) + self.assertTrue(logits.op.name.startswith('InceptionResnetV2/Logits')) + self.assertListEqual(logits.get_shape().as_list(), + [batch_size, num_classes]) + + def testBuildEndPoints(self): + batch_size = 5 + height, width = 299, 299 + num_classes = 1000 + with self.test_session(): + inputs = tf.random_uniform((batch_size, height, width, 3)) + _, end_points = inception.inception_resnet_v2(inputs, num_classes) + self.assertTrue('Logits' in end_points) + logits = end_points['Logits'] + self.assertListEqual(logits.get_shape().as_list(), + [batch_size, num_classes]) + self.assertTrue('AuxLogits' in end_points) + aux_logits = end_points['AuxLogits'] + self.assertListEqual(aux_logits.get_shape().as_list(), + [batch_size, num_classes]) + pre_pool = end_points['PrePool'] + self.assertListEqual(pre_pool.get_shape().as_list(), + [batch_size, 8, 8, 1536]) + + def testVariablesSetDevice(self): + batch_size = 5 + height, width = 299, 299 + num_classes = 1000 + with self.test_session(): + inputs = tf.random_uniform((batch_size, height, width, 3)) + # Force all Variables to reside on the device. + with tf.variable_scope('on_cpu'), tf.device('/cpu:0'): + inception.inception_resnet_v2(inputs, num_classes) + with tf.variable_scope('on_gpu'), tf.device('/gpu:0'): + inception.inception_resnet_v2(inputs, num_classes) + for v in tf.get_collection(tf.GraphKeys.VARIABLES, scope='on_cpu'): + self.assertDeviceEqual(v.device, '/cpu:0') + for v in tf.get_collection(tf.GraphKeys.VARIABLES, scope='on_gpu'): + self.assertDeviceEqual(v.device, '/gpu:0') + + def testHalfSizeImages(self): + batch_size = 5 + height, width = 150, 150 + num_classes = 1000 + with self.test_session(): + inputs = tf.random_uniform((batch_size, height, width, 3)) + logits, end_points = inception.inception_resnet_v2(inputs, num_classes) + self.assertTrue(logits.op.name.startswith('InceptionResnetV2/Logits')) + self.assertListEqual(logits.get_shape().as_list(), + [batch_size, num_classes]) + pre_pool = end_points['PrePool'] + self.assertListEqual(pre_pool.get_shape().as_list(), + [batch_size, 3, 3, 1536]) + + def testUnknownBatchSize(self): + batch_size = 1 + height, width = 299, 299 + num_classes = 1000 + with self.test_session() as sess: + inputs = tf.placeholder(tf.float32, (None, height, width, 3)) + logits, _ = inception.inception_resnet_v2(inputs, num_classes) + self.assertTrue(logits.op.name.startswith('InceptionResnetV2/Logits')) + self.assertListEqual(logits.get_shape().as_list(), + [None, num_classes]) + images = tf.random_uniform((batch_size, height, width, 3)) + sess.run(tf.initialize_all_variables()) + output = sess.run(logits, {inputs: images.eval()}) + self.assertEquals(output.shape, (batch_size, num_classes)) + + def testEvaluation(self): + batch_size = 2 + height, width = 299, 299 + num_classes = 1000 + with self.test_session() as sess: + eval_inputs = tf.random_uniform((batch_size, height, width, 3)) + logits, _ = inception.inception_resnet_v2(eval_inputs, + num_classes, + is_training=False) + predictions = tf.argmax(logits, 1) + sess.run(tf.initialize_all_variables()) + output = sess.run(predictions) + self.assertEquals(output.shape, (batch_size,)) + + def testTrainEvalWithReuse(self): + train_batch_size = 5 + eval_batch_size = 2 + height, width = 150, 150 + num_classes = 1000 + with self.test_session() as sess: + train_inputs = tf.random_uniform((train_batch_size, height, width, 3)) + inception.inception_resnet_v2(train_inputs, num_classes) + eval_inputs = tf.random_uniform((eval_batch_size, height, width, 3)) + logits, _ = inception.inception_resnet_v2(eval_inputs, + num_classes, + is_training=False, + reuse=True) + predictions = tf.argmax(logits, 1) + sess.run(tf.initialize_all_variables()) + output = sess.run(predictions) + self.assertEquals(output.shape, (eval_batch_size,)) + + +if __name__ == '__main__': + tf.test.main() diff --git a/StyleMigration/nets/inception_utils.py b/StyleMigration/nets/inception_utils.py new file mode 100644 index 0000000..66ee41f --- /dev/null +++ b/StyleMigration/nets/inception_utils.py @@ -0,0 +1,71 @@ +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Contains common code shared by all inception models. + +Usage of arg scope: + with slim.arg_scope(inception_arg_scope()): + logits, end_points = inception.inception_v3(images, num_classes, + is_training=is_training) + +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import tensorflow as tf + +slim = tf.contrib.slim + + +def inception_arg_scope(weight_decay=0.00004, + use_batch_norm=True, + batch_norm_decay=0.9997, + batch_norm_epsilon=0.001): + """Defines the default arg scope for inception models. + + Args: + weight_decay: The weight decay to use for regularizing the model. + use_batch_norm: "If `True`, batch_norm is applied after each convolution. + batch_norm_decay: Decay for batch norm moving average. + batch_norm_epsilon: Small float added to variance to avoid dividing by zero + in batch norm. + + Returns: + An `arg_scope` to use for the inception models. + """ + batch_norm_params = { + # Decay for the moving averages. + 'decay': batch_norm_decay, + # epsilon to prevent 0s in variance. + 'epsilon': batch_norm_epsilon, + # collection containing update_ops. + 'updates_collections': tf.GraphKeys.UPDATE_OPS, + } + if use_batch_norm: + normalizer_fn = slim.batch_norm + normalizer_params = batch_norm_params + else: + normalizer_fn = None + normalizer_params = {} + # Set weight_decay for weights in Conv and FC layers. + with slim.arg_scope([slim.conv2d, slim.fully_connected], + weights_regularizer=slim.l2_regularizer(weight_decay)): + with slim.arg_scope( + [slim.conv2d], + weights_initializer=slim.variance_scaling_initializer(), + activation_fn=tf.nn.relu, + normalizer_fn=normalizer_fn, + normalizer_params=normalizer_params) as sc: + return sc diff --git a/StyleMigration/nets/inception_v1.py b/StyleMigration/nets/inception_v1.py new file mode 100644 index 0000000..8f64479 --- /dev/null +++ b/StyleMigration/nets/inception_v1.py @@ -0,0 +1,305 @@ +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Contains the definition for inception v1 classification network.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import tensorflow as tf + +from nets import inception_utils + +slim = tf.contrib.slim +trunc_normal = lambda stddev: tf.truncated_normal_initializer(0.0, stddev) + + +def inception_v1_base(inputs, + final_endpoint='Mixed_5c', + scope='InceptionV1'): + """Defines the Inception V1 base architecture. + + This architecture is defined in: + Going deeper with convolutions + Christian Szegedy, Wei Liu, Yangqing Jia, Pierre Sermanet, Scott Reed, + Dragomir Anguelov, Dumitru Erhan, Vincent Vanhoucke, Andrew Rabinovich. + http://arxiv.org/pdf/1409.4842v1.pdf. + + Args: + inputs: a tensor of size [batch_size, height, width, channels]. + final_endpoint: specifies the endpoint to construct the network up to. It + can be one of ['Conv2d_1a_7x7', 'MaxPool_2a_3x3', 'Conv2d_2b_1x1', + 'Conv2d_2c_3x3', 'MaxPool_3a_3x3', 'Mixed_3b', 'Mixed_3c', + 'MaxPool_4a_3x3', 'Mixed_4b', 'Mixed_4c', 'Mixed_4d', 'Mixed_4e', + 'Mixed_4f', 'MaxPool_5a_2x2', 'Mixed_5b', 'Mixed_5c'] + scope: Optional variable_scope. + + Returns: + A dictionary from components of the network to the corresponding activation. + + Raises: + ValueError: if final_endpoint is not set to one of the predefined values. + """ + end_points = {} + with tf.variable_scope(scope, 'InceptionV1', [inputs]): + with slim.arg_scope( + [slim.conv2d, slim.fully_connected], + weights_initializer=trunc_normal(0.01)): + with slim.arg_scope([slim.conv2d, slim.max_pool2d], + stride=1, padding='SAME'): + end_point = 'Conv2d_1a_7x7' + net = slim.conv2d(inputs, 64, [7, 7], stride=2, scope=end_point) + end_points[end_point] = net + if final_endpoint == end_point: return net, end_points + end_point = 'MaxPool_2a_3x3' + net = slim.max_pool2d(net, [3, 3], stride=2, scope=end_point) + end_points[end_point] = net + if final_endpoint == end_point: return net, end_points + end_point = 'Conv2d_2b_1x1' + net = slim.conv2d(net, 64, [1, 1], scope=end_point) + end_points[end_point] = net + if final_endpoint == end_point: return net, end_points + end_point = 'Conv2d_2c_3x3' + net = slim.conv2d(net, 192, [3, 3], scope=end_point) + end_points[end_point] = net + if final_endpoint == end_point: return net, end_points + end_point = 'MaxPool_3a_3x3' + net = slim.max_pool2d(net, [3, 3], stride=2, scope=end_point) + end_points[end_point] = net + if final_endpoint == end_point: return net, end_points + + end_point = 'Mixed_3b' + with tf.variable_scope(end_point): + with tf.variable_scope('Branch_0'): + branch_0 = slim.conv2d(net, 64, [1, 1], scope='Conv2d_0a_1x1') + with tf.variable_scope('Branch_1'): + branch_1 = slim.conv2d(net, 96, [1, 1], scope='Conv2d_0a_1x1') + branch_1 = slim.conv2d(branch_1, 128, [3, 3], scope='Conv2d_0b_3x3') + with tf.variable_scope('Branch_2'): + branch_2 = slim.conv2d(net, 16, [1, 1], scope='Conv2d_0a_1x1') + branch_2 = slim.conv2d(branch_2, 32, [3, 3], scope='Conv2d_0b_3x3') + with tf.variable_scope('Branch_3'): + branch_3 = slim.max_pool2d(net, [3, 3], scope='MaxPool_0a_3x3') + branch_3 = slim.conv2d(branch_3, 32, [1, 1], scope='Conv2d_0b_1x1') + net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3]) + end_points[end_point] = net + if final_endpoint == end_point: return net, end_points + + end_point = 'Mixed_3c' + with tf.variable_scope(end_point): + with tf.variable_scope('Branch_0'): + branch_0 = slim.conv2d(net, 128, [1, 1], scope='Conv2d_0a_1x1') + with tf.variable_scope('Branch_1'): + branch_1 = slim.conv2d(net, 128, [1, 1], scope='Conv2d_0a_1x1') + branch_1 = slim.conv2d(branch_1, 192, [3, 3], scope='Conv2d_0b_3x3') + with tf.variable_scope('Branch_2'): + branch_2 = slim.conv2d(net, 32, [1, 1], scope='Conv2d_0a_1x1') + branch_2 = slim.conv2d(branch_2, 96, [3, 3], scope='Conv2d_0b_3x3') + with tf.variable_scope('Branch_3'): + branch_3 = slim.max_pool2d(net, [3, 3], scope='MaxPool_0a_3x3') + branch_3 = slim.conv2d(branch_3, 64, [1, 1], scope='Conv2d_0b_1x1') + net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3]) + end_points[end_point] = net + if final_endpoint == end_point: return net, end_points + + end_point = 'MaxPool_4a_3x3' + net = slim.max_pool2d(net, [3, 3], stride=2, scope=end_point) + end_points[end_point] = net + if final_endpoint == end_point: return net, end_points + + end_point = 'Mixed_4b' + with tf.variable_scope(end_point): + with tf.variable_scope('Branch_0'): + branch_0 = slim.conv2d(net, 192, [1, 1], scope='Conv2d_0a_1x1') + with tf.variable_scope('Branch_1'): + branch_1 = slim.conv2d(net, 96, [1, 1], scope='Conv2d_0a_1x1') + branch_1 = slim.conv2d(branch_1, 208, [3, 3], scope='Conv2d_0b_3x3') + with tf.variable_scope('Branch_2'): + branch_2 = slim.conv2d(net, 16, [1, 1], scope='Conv2d_0a_1x1') + branch_2 = slim.conv2d(branch_2, 48, [3, 3], scope='Conv2d_0b_3x3') + with tf.variable_scope('Branch_3'): + branch_3 = slim.max_pool2d(net, [3, 3], scope='MaxPool_0a_3x3') + branch_3 = slim.conv2d(branch_3, 64, [1, 1], scope='Conv2d_0b_1x1') + net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3]) + end_points[end_point] = net + if final_endpoint == end_point: return net, end_points + + end_point = 'Mixed_4c' + with tf.variable_scope(end_point): + with tf.variable_scope('Branch_0'): + branch_0 = slim.conv2d(net, 160, [1, 1], scope='Conv2d_0a_1x1') + with tf.variable_scope('Branch_1'): + branch_1 = slim.conv2d(net, 112, [1, 1], scope='Conv2d_0a_1x1') + branch_1 = slim.conv2d(branch_1, 224, [3, 3], scope='Conv2d_0b_3x3') + with tf.variable_scope('Branch_2'): + branch_2 = slim.conv2d(net, 24, [1, 1], scope='Conv2d_0a_1x1') + branch_2 = slim.conv2d(branch_2, 64, [3, 3], scope='Conv2d_0b_3x3') + with tf.variable_scope('Branch_3'): + branch_3 = slim.max_pool2d(net, [3, 3], scope='MaxPool_0a_3x3') + branch_3 = slim.conv2d(branch_3, 64, [1, 1], scope='Conv2d_0b_1x1') + net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3]) + end_points[end_point] = net + if final_endpoint == end_point: return net, end_points + + end_point = 'Mixed_4d' + with tf.variable_scope(end_point): + with tf.variable_scope('Branch_0'): + branch_0 = slim.conv2d(net, 128, [1, 1], scope='Conv2d_0a_1x1') + with tf.variable_scope('Branch_1'): + branch_1 = slim.conv2d(net, 128, [1, 1], scope='Conv2d_0a_1x1') + branch_1 = slim.conv2d(branch_1, 256, [3, 3], scope='Conv2d_0b_3x3') + with tf.variable_scope('Branch_2'): + branch_2 = slim.conv2d(net, 24, [1, 1], scope='Conv2d_0a_1x1') + branch_2 = slim.conv2d(branch_2, 64, [3, 3], scope='Conv2d_0b_3x3') + with tf.variable_scope('Branch_3'): + branch_3 = slim.max_pool2d(net, [3, 3], scope='MaxPool_0a_3x3') + branch_3 = slim.conv2d(branch_3, 64, [1, 1], scope='Conv2d_0b_1x1') + net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3]) + end_points[end_point] = net + if final_endpoint == end_point: return net, end_points + + end_point = 'Mixed_4e' + with tf.variable_scope(end_point): + with tf.variable_scope('Branch_0'): + branch_0 = slim.conv2d(net, 112, [1, 1], scope='Conv2d_0a_1x1') + with tf.variable_scope('Branch_1'): + branch_1 = slim.conv2d(net, 144, [1, 1], scope='Conv2d_0a_1x1') + branch_1 = slim.conv2d(branch_1, 288, [3, 3], scope='Conv2d_0b_3x3') + with tf.variable_scope('Branch_2'): + branch_2 = slim.conv2d(net, 32, [1, 1], scope='Conv2d_0a_1x1') + branch_2 = slim.conv2d(branch_2, 64, [3, 3], scope='Conv2d_0b_3x3') + with tf.variable_scope('Branch_3'): + branch_3 = slim.max_pool2d(net, [3, 3], scope='MaxPool_0a_3x3') + branch_3 = slim.conv2d(branch_3, 64, [1, 1], scope='Conv2d_0b_1x1') + net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3]) + end_points[end_point] = net + if final_endpoint == end_point: return net, end_points + + end_point = 'Mixed_4f' + with tf.variable_scope(end_point): + with tf.variable_scope('Branch_0'): + branch_0 = slim.conv2d(net, 256, [1, 1], scope='Conv2d_0a_1x1') + with tf.variable_scope('Branch_1'): + branch_1 = slim.conv2d(net, 160, [1, 1], scope='Conv2d_0a_1x1') + branch_1 = slim.conv2d(branch_1, 320, [3, 3], scope='Conv2d_0b_3x3') + with tf.variable_scope('Branch_2'): + branch_2 = slim.conv2d(net, 32, [1, 1], scope='Conv2d_0a_1x1') + branch_2 = slim.conv2d(branch_2, 128, [3, 3], scope='Conv2d_0b_3x3') + with tf.variable_scope('Branch_3'): + branch_3 = slim.max_pool2d(net, [3, 3], scope='MaxPool_0a_3x3') + branch_3 = slim.conv2d(branch_3, 128, [1, 1], scope='Conv2d_0b_1x1') + net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3]) + end_points[end_point] = net + if final_endpoint == end_point: return net, end_points + + end_point = 'MaxPool_5a_2x2' + net = slim.max_pool2d(net, [2, 2], stride=2, scope=end_point) + end_points[end_point] = net + if final_endpoint == end_point: return net, end_points + + end_point = 'Mixed_5b' + with tf.variable_scope(end_point): + with tf.variable_scope('Branch_0'): + branch_0 = slim.conv2d(net, 256, [1, 1], scope='Conv2d_0a_1x1') + with tf.variable_scope('Branch_1'): + branch_1 = slim.conv2d(net, 160, [1, 1], scope='Conv2d_0a_1x1') + branch_1 = slim.conv2d(branch_1, 320, [3, 3], scope='Conv2d_0b_3x3') + with tf.variable_scope('Branch_2'): + branch_2 = slim.conv2d(net, 32, [1, 1], scope='Conv2d_0a_1x1') + branch_2 = slim.conv2d(branch_2, 128, [3, 3], scope='Conv2d_0a_3x3') + with tf.variable_scope('Branch_3'): + branch_3 = slim.max_pool2d(net, [3, 3], scope='MaxPool_0a_3x3') + branch_3 = slim.conv2d(branch_3, 128, [1, 1], scope='Conv2d_0b_1x1') + net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3]) + end_points[end_point] = net + if final_endpoint == end_point: return net, end_points + + end_point = 'Mixed_5c' + with tf.variable_scope(end_point): + with tf.variable_scope('Branch_0'): + branch_0 = slim.conv2d(net, 384, [1, 1], scope='Conv2d_0a_1x1') + with tf.variable_scope('Branch_1'): + branch_1 = slim.conv2d(net, 192, [1, 1], scope='Conv2d_0a_1x1') + branch_1 = slim.conv2d(branch_1, 384, [3, 3], scope='Conv2d_0b_3x3') + with tf.variable_scope('Branch_2'): + branch_2 = slim.conv2d(net, 48, [1, 1], scope='Conv2d_0a_1x1') + branch_2 = slim.conv2d(branch_2, 128, [3, 3], scope='Conv2d_0b_3x3') + with tf.variable_scope('Branch_3'): + branch_3 = slim.max_pool2d(net, [3, 3], scope='MaxPool_0a_3x3') + branch_3 = slim.conv2d(branch_3, 128, [1, 1], scope='Conv2d_0b_1x1') + net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3]) + end_points[end_point] = net + if final_endpoint == end_point: return net, end_points + raise ValueError('Unknown final endpoint %s' % final_endpoint) + + +def inception_v1(inputs, + num_classes=1000, + is_training=True, + dropout_keep_prob=0.8, + prediction_fn=slim.softmax, + spatial_squeeze=True, + reuse=None, + scope='InceptionV1'): + """Defines the Inception V1 architecture. + + This architecture is defined in: + + Going deeper with convolutions + Christian Szegedy, Wei Liu, Yangqing Jia, Pierre Sermanet, Scott Reed, + Dragomir Anguelov, Dumitru Erhan, Vincent Vanhoucke, Andrew Rabinovich. + http://arxiv.org/pdf/1409.4842v1.pdf. + + The default image size used to train this network is 224x224. + + Args: + inputs: a tensor of size [batch_size, height, width, channels]. + num_classes: number of predicted classes. + is_training: whether is training or not. + dropout_keep_prob: the percentage of activation values that are retained. + prediction_fn: a function to get predictions out of logits. + spatial_squeeze: if True, logits is of shape is [B, C], if false logits is + of shape [B, 1, 1, C], where B is batch_size and C is number of classes. + reuse: whether or not the network and its variables should be reused. To be + able to reuse 'scope' must be given. + scope: Optional variable_scope. + + Returns: + logits: the pre-softmax activations, a tensor of size + [batch_size, num_classes] + end_points: a dictionary from components of the network to the corresponding + activation. + """ + # Final pooling and prediction + with tf.variable_scope(scope, 'InceptionV1', [inputs, num_classes], + reuse=reuse) as scope: + with slim.arg_scope([slim.batch_norm, slim.dropout], + is_training=is_training): + net, end_points = inception_v1_base(inputs, scope=scope) + with tf.variable_scope('Logits'): + net = slim.avg_pool2d(net, [7, 7], stride=1, scope='MaxPool_0a_7x7') + net = slim.dropout(net, + dropout_keep_prob, scope='Dropout_0b') + logits = slim.conv2d(net, num_classes, [1, 1], activation_fn=None, + normalizer_fn=None, scope='Conv2d_0c_1x1') + if spatial_squeeze: + logits = tf.squeeze(logits, [1, 2], name='SpatialSqueeze') + + end_points['Logits'] = logits + end_points['Predictions'] = prediction_fn(logits, scope='Predictions') + return logits, end_points +inception_v1.default_image_size = 224 + +inception_v1_arg_scope = inception_utils.inception_arg_scope diff --git a/StyleMigration/nets/inception_v1_test.py b/StyleMigration/nets/inception_v1_test.py new file mode 100644 index 0000000..a769538 --- /dev/null +++ b/StyleMigration/nets/inception_v1_test.py @@ -0,0 +1,210 @@ +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for nets.inception_v1.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np +import tensorflow as tf + +from nets import inception + +slim = tf.contrib.slim + + +class InceptionV1Test(tf.test.TestCase): + + def testBuildClassificationNetwork(self): + batch_size = 5 + height, width = 224, 224 + num_classes = 1000 + + inputs = tf.random_uniform((batch_size, height, width, 3)) + logits, end_points = inception.inception_v1(inputs, num_classes) + self.assertTrue(logits.op.name.startswith('InceptionV1/Logits')) + self.assertListEqual(logits.get_shape().as_list(), + [batch_size, num_classes]) + self.assertTrue('Predictions' in end_points) + self.assertListEqual(end_points['Predictions'].get_shape().as_list(), + [batch_size, num_classes]) + + def testBuildBaseNetwork(self): + batch_size = 5 + height, width = 224, 224 + + inputs = tf.random_uniform((batch_size, height, width, 3)) + mixed_6c, end_points = inception.inception_v1_base(inputs) + self.assertTrue(mixed_6c.op.name.startswith('InceptionV1/Mixed_5c')) + self.assertListEqual(mixed_6c.get_shape().as_list(), + [batch_size, 7, 7, 1024]) + expected_endpoints = ['Conv2d_1a_7x7', 'MaxPool_2a_3x3', 'Conv2d_2b_1x1', + 'Conv2d_2c_3x3', 'MaxPool_3a_3x3', 'Mixed_3b', + 'Mixed_3c', 'MaxPool_4a_3x3', 'Mixed_4b', 'Mixed_4c', + 'Mixed_4d', 'Mixed_4e', 'Mixed_4f', 'MaxPool_5a_2x2', + 'Mixed_5b', 'Mixed_5c'] + self.assertItemsEqual(end_points.keys(), expected_endpoints) + + def testBuildOnlyUptoFinalEndpoint(self): + batch_size = 5 + height, width = 224, 224 + endpoints = ['Conv2d_1a_7x7', 'MaxPool_2a_3x3', 'Conv2d_2b_1x1', + 'Conv2d_2c_3x3', 'MaxPool_3a_3x3', 'Mixed_3b', 'Mixed_3c', + 'MaxPool_4a_3x3', 'Mixed_4b', 'Mixed_4c', 'Mixed_4d', + 'Mixed_4e', 'Mixed_4f', 'MaxPool_5a_2x2', 'Mixed_5b', + 'Mixed_5c'] + for index, endpoint in enumerate(endpoints): + with tf.Graph().as_default(): + inputs = tf.random_uniform((batch_size, height, width, 3)) + out_tensor, end_points = inception.inception_v1_base( + inputs, final_endpoint=endpoint) + self.assertTrue(out_tensor.op.name.startswith( + 'InceptionV1/' + endpoint)) + self.assertItemsEqual(endpoints[:index+1], end_points) + + def testBuildAndCheckAllEndPointsUptoMixed5c(self): + batch_size = 5 + height, width = 224, 224 + + inputs = tf.random_uniform((batch_size, height, width, 3)) + _, end_points = inception.inception_v1_base(inputs, + final_endpoint='Mixed_5c') + endpoints_shapes = {'Conv2d_1a_7x7': [5, 112, 112, 64], + 'MaxPool_2a_3x3': [5, 56, 56, 64], + 'Conv2d_2b_1x1': [5, 56, 56, 64], + 'Conv2d_2c_3x3': [5, 56, 56, 192], + 'MaxPool_3a_3x3': [5, 28, 28, 192], + 'Mixed_3b': [5, 28, 28, 256], + 'Mixed_3c': [5, 28, 28, 480], + 'MaxPool_4a_3x3': [5, 14, 14, 480], + 'Mixed_4b': [5, 14, 14, 512], + 'Mixed_4c': [5, 14, 14, 512], + 'Mixed_4d': [5, 14, 14, 512], + 'Mixed_4e': [5, 14, 14, 528], + 'Mixed_4f': [5, 14, 14, 832], + 'MaxPool_5a_2x2': [5, 7, 7, 832], + 'Mixed_5b': [5, 7, 7, 832], + 'Mixed_5c': [5, 7, 7, 1024]} + + self.assertItemsEqual(endpoints_shapes.keys(), end_points.keys()) + for endpoint_name in endpoints_shapes: + expected_shape = endpoints_shapes[endpoint_name] + self.assertTrue(endpoint_name in end_points) + self.assertListEqual(end_points[endpoint_name].get_shape().as_list(), + expected_shape) + + def testModelHasExpectedNumberOfParameters(self): + batch_size = 5 + height, width = 224, 224 + inputs = tf.random_uniform((batch_size, height, width, 3)) + with slim.arg_scope(inception.inception_v1_arg_scope()): + inception.inception_v1_base(inputs) + total_params, _ = slim.model_analyzer.analyze_vars( + slim.get_model_variables()) + self.assertAlmostEqual(5607184, total_params) + + def testHalfSizeImages(self): + batch_size = 5 + height, width = 112, 112 + + inputs = tf.random_uniform((batch_size, height, width, 3)) + mixed_5c, _ = inception.inception_v1_base(inputs) + self.assertTrue(mixed_5c.op.name.startswith('InceptionV1/Mixed_5c')) + self.assertListEqual(mixed_5c.get_shape().as_list(), + [batch_size, 4, 4, 1024]) + + def testUnknownImageShape(self): + tf.reset_default_graph() + batch_size = 2 + height, width = 224, 224 + num_classes = 1000 + input_np = np.random.uniform(0, 1, (batch_size, height, width, 3)) + with self.test_session() as sess: + inputs = tf.placeholder(tf.float32, shape=(batch_size, None, None, 3)) + logits, end_points = inception.inception_v1(inputs, num_classes) + self.assertTrue(logits.op.name.startswith('InceptionV1/Logits')) + self.assertListEqual(logits.get_shape().as_list(), + [batch_size, num_classes]) + pre_pool = end_points['Mixed_5c'] + feed_dict = {inputs: input_np} + tf.initialize_all_variables().run() + pre_pool_out = sess.run(pre_pool, feed_dict=feed_dict) + self.assertListEqual(list(pre_pool_out.shape), [batch_size, 7, 7, 1024]) + + def testUnknowBatchSize(self): + batch_size = 1 + height, width = 224, 224 + num_classes = 1000 + + inputs = tf.placeholder(tf.float32, (None, height, width, 3)) + logits, _ = inception.inception_v1(inputs, num_classes) + self.assertTrue(logits.op.name.startswith('InceptionV1/Logits')) + self.assertListEqual(logits.get_shape().as_list(), + [None, num_classes]) + images = tf.random_uniform((batch_size, height, width, 3)) + + with self.test_session() as sess: + sess.run(tf.initialize_all_variables()) + output = sess.run(logits, {inputs: images.eval()}) + self.assertEquals(output.shape, (batch_size, num_classes)) + + def testEvaluation(self): + batch_size = 2 + height, width = 224, 224 + num_classes = 1000 + + eval_inputs = tf.random_uniform((batch_size, height, width, 3)) + logits, _ = inception.inception_v1(eval_inputs, num_classes, + is_training=False) + predictions = tf.argmax(logits, 1) + + with self.test_session() as sess: + sess.run(tf.initialize_all_variables()) + output = sess.run(predictions) + self.assertEquals(output.shape, (batch_size,)) + + def testTrainEvalWithReuse(self): + train_batch_size = 5 + eval_batch_size = 2 + height, width = 224, 224 + num_classes = 1000 + + train_inputs = tf.random_uniform((train_batch_size, height, width, 3)) + inception.inception_v1(train_inputs, num_classes) + eval_inputs = tf.random_uniform((eval_batch_size, height, width, 3)) + logits, _ = inception.inception_v1(eval_inputs, num_classes, reuse=True) + predictions = tf.argmax(logits, 1) + + with self.test_session() as sess: + sess.run(tf.initialize_all_variables()) + output = sess.run(predictions) + self.assertEquals(output.shape, (eval_batch_size,)) + + def testLogitsNotSqueezed(self): + num_classes = 25 + images = tf.random_uniform([1, 224, 224, 3]) + logits, _ = inception.inception_v1(images, + num_classes=num_classes, + spatial_squeeze=False) + + with self.test_session() as sess: + tf.initialize_all_variables().run() + logits_out = sess.run(logits) + self.assertListEqual(list(logits_out.shape), [1, 1, 1, num_classes]) + + +if __name__ == '__main__': + tf.test.main() diff --git a/StyleMigration/nets/inception_v2.py b/StyleMigration/nets/inception_v2.py new file mode 100644 index 0000000..6c9f100 --- /dev/null +++ b/StyleMigration/nets/inception_v2.py @@ -0,0 +1,520 @@ +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Contains the definition for inception v2 classification network.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import tensorflow as tf + +from nets import inception_utils + +slim = tf.contrib.slim +trunc_normal = lambda stddev: tf.truncated_normal_initializer(0.0, stddev) + + +def inception_v2_base(inputs, + final_endpoint='Mixed_5c', + min_depth=16, + depth_multiplier=1.0, + scope=None): + """Inception v2 (6a2). + + Constructs an Inception v2 network from inputs to the given final endpoint. + This method can construct the network up to the layer inception(5b) as + described in http://arxiv.org/abs/1502.03167. + + Args: + inputs: a tensor of shape [batch_size, height, width, channels]. + final_endpoint: specifies the endpoint to construct the network up to. It + can be one of ['Conv2d_1a_7x7', 'MaxPool_2a_3x3', 'Conv2d_2b_1x1', + 'Conv2d_2c_3x3', 'MaxPool_3a_3x3', 'Mixed_3b', 'Mixed_3c', 'Mixed_4a', + 'Mixed_4b', 'Mixed_4c', 'Mixed_4d', 'Mixed_4e', 'Mixed_5a', 'Mixed_5b', + 'Mixed_5c']. + min_depth: Minimum depth value (number of channels) for all convolution ops. + Enforced when depth_multiplier < 1, and not an active constraint when + depth_multiplier >= 1. + depth_multiplier: Float multiplier for the depth (number of channels) + for all convolution ops. The value must be greater than zero. Typical + usage will be to set this value in (0, 1) to reduce the number of + parameters or computation cost of the model. + scope: Optional variable_scope. + + Returns: + tensor_out: output tensor corresponding to the final_endpoint. + end_points: a set of activations for external use, for example summaries or + losses. + + Raises: + ValueError: if final_endpoint is not set to one of the predefined values, + or depth_multiplier <= 0 + """ + + # end_points will collect relevant activations for external use, for example + # summaries or losses. + end_points = {} + + # Used to find thinned depths for each layer. + if depth_multiplier <= 0: + raise ValueError('depth_multiplier is not greater than zero.') + depth = lambda d: max(int(d * depth_multiplier), min_depth) + + with tf.variable_scope(scope, 'InceptionV2', [inputs]): + with slim.arg_scope( + [slim.conv2d, slim.max_pool2d, slim.avg_pool2d, slim.separable_conv2d], + stride=1, padding='SAME'): + + # Note that sizes in the comments below assume an input spatial size of + # 224x224, however, the inputs can be of any size greater 32x32. + + # 224 x 224 x 3 + end_point = 'Conv2d_1a_7x7' + # depthwise_multiplier here is different from depth_multiplier. + # depthwise_multiplier determines the output channels of the initial + # depthwise conv (see docs for tf.nn.separable_conv2d), while + # depth_multiplier controls the # channels of the subsequent 1x1 + # convolution. Must have + # in_channels * depthwise_multipler <= out_channels + # so that the separable convolution is not overparameterized. + depthwise_multiplier = min(int(depth(64) / 3), 8) + net = slim.separable_conv2d( + inputs, depth(64), [7, 7], depth_multiplier=depthwise_multiplier, + stride=2, weights_initializer=trunc_normal(1.0), + scope=end_point) + end_points[end_point] = net + if end_point == final_endpoint: return net, end_points + # 112 x 112 x 64 + end_point = 'MaxPool_2a_3x3' + net = slim.max_pool2d(net, [3, 3], scope=end_point, stride=2) + end_points[end_point] = net + if end_point == final_endpoint: return net, end_points + # 56 x 56 x 64 + end_point = 'Conv2d_2b_1x1' + net = slim.conv2d(net, depth(64), [1, 1], scope=end_point, + weights_initializer=trunc_normal(0.1)) + end_points[end_point] = net + if end_point == final_endpoint: return net, end_points + # 56 x 56 x 64 + end_point = 'Conv2d_2c_3x3' + net = slim.conv2d(net, depth(192), [3, 3], scope=end_point) + end_points[end_point] = net + if end_point == final_endpoint: return net, end_points + # 56 x 56 x 192 + end_point = 'MaxPool_3a_3x3' + net = slim.max_pool2d(net, [3, 3], scope=end_point, stride=2) + end_points[end_point] = net + if end_point == final_endpoint: return net, end_points + # 28 x 28 x 192 + # Inception module. + end_point = 'Mixed_3b' + with tf.variable_scope(end_point): + with tf.variable_scope('Branch_0'): + branch_0 = slim.conv2d(net, depth(64), [1, 1], scope='Conv2d_0a_1x1') + with tf.variable_scope('Branch_1'): + branch_1 = slim.conv2d( + net, depth(64), [1, 1], + weights_initializer=trunc_normal(0.09), + scope='Conv2d_0a_1x1') + branch_1 = slim.conv2d(branch_1, depth(64), [3, 3], + scope='Conv2d_0b_3x3') + with tf.variable_scope('Branch_2'): + branch_2 = slim.conv2d( + net, depth(64), [1, 1], + weights_initializer=trunc_normal(0.09), + scope='Conv2d_0a_1x1') + branch_2 = slim.conv2d(branch_2, depth(96), [3, 3], + scope='Conv2d_0b_3x3') + branch_2 = slim.conv2d(branch_2, depth(96), [3, 3], + scope='Conv2d_0c_3x3') + with tf.variable_scope('Branch_3'): + branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3') + branch_3 = slim.conv2d( + branch_3, depth(32), [1, 1], + weights_initializer=trunc_normal(0.1), + scope='Conv2d_0b_1x1') + net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3]) + end_points[end_point] = net + if end_point == final_endpoint: return net, end_points + # 28 x 28 x 256 + end_point = 'Mixed_3c' + with tf.variable_scope(end_point): + with tf.variable_scope('Branch_0'): + branch_0 = slim.conv2d(net, depth(64), [1, 1], scope='Conv2d_0a_1x1') + with tf.variable_scope('Branch_1'): + branch_1 = slim.conv2d( + net, depth(64), [1, 1], + weights_initializer=trunc_normal(0.09), + scope='Conv2d_0a_1x1') + branch_1 = slim.conv2d(branch_1, depth(96), [3, 3], + scope='Conv2d_0b_3x3') + with tf.variable_scope('Branch_2'): + branch_2 = slim.conv2d( + net, depth(64), [1, 1], + weights_initializer=trunc_normal(0.09), + scope='Conv2d_0a_1x1') + branch_2 = slim.conv2d(branch_2, depth(96), [3, 3], + scope='Conv2d_0b_3x3') + branch_2 = slim.conv2d(branch_2, depth(96), [3, 3], + scope='Conv2d_0c_3x3') + with tf.variable_scope('Branch_3'): + branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3') + branch_3 = slim.conv2d( + branch_3, depth(64), [1, 1], + weights_initializer=trunc_normal(0.1), + scope='Conv2d_0b_1x1') + net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3]) + end_points[end_point] = net + if end_point == final_endpoint: return net, end_points + # 28 x 28 x 320 + end_point = 'Mixed_4a' + with tf.variable_scope(end_point): + with tf.variable_scope('Branch_0'): + branch_0 = slim.conv2d( + net, depth(128), [1, 1], + weights_initializer=trunc_normal(0.09), + scope='Conv2d_0a_1x1') + branch_0 = slim.conv2d(branch_0, depth(160), [3, 3], stride=2, + scope='Conv2d_1a_3x3') + with tf.variable_scope('Branch_1'): + branch_1 = slim.conv2d( + net, depth(64), [1, 1], + weights_initializer=trunc_normal(0.09), + scope='Conv2d_0a_1x1') + branch_1 = slim.conv2d( + branch_1, depth(96), [3, 3], scope='Conv2d_0b_3x3') + branch_1 = slim.conv2d( + branch_1, depth(96), [3, 3], stride=2, scope='Conv2d_1a_3x3') + with tf.variable_scope('Branch_2'): + branch_2 = slim.max_pool2d( + net, [3, 3], stride=2, scope='MaxPool_1a_3x3') + net = tf.concat(3, [branch_0, branch_1, branch_2]) + end_points[end_point] = net + if end_point == final_endpoint: return net, end_points + # 14 x 14 x 576 + end_point = 'Mixed_4b' + with tf.variable_scope(end_point): + with tf.variable_scope('Branch_0'): + branch_0 = slim.conv2d(net, depth(224), [1, 1], scope='Conv2d_0a_1x1') + with tf.variable_scope('Branch_1'): + branch_1 = slim.conv2d( + net, depth(64), [1, 1], + weights_initializer=trunc_normal(0.09), + scope='Conv2d_0a_1x1') + branch_1 = slim.conv2d( + branch_1, depth(96), [3, 3], scope='Conv2d_0b_3x3') + with tf.variable_scope('Branch_2'): + branch_2 = slim.conv2d( + net, depth(96), [1, 1], + weights_initializer=trunc_normal(0.09), + scope='Conv2d_0a_1x1') + branch_2 = slim.conv2d(branch_2, depth(128), [3, 3], + scope='Conv2d_0b_3x3') + branch_2 = slim.conv2d(branch_2, depth(128), [3, 3], + scope='Conv2d_0c_3x3') + with tf.variable_scope('Branch_3'): + branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3') + branch_3 = slim.conv2d( + branch_3, depth(128), [1, 1], + weights_initializer=trunc_normal(0.1), + scope='Conv2d_0b_1x1') + net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3]) + end_points[end_point] = net + if end_point == final_endpoint: return net, end_points + # 14 x 14 x 576 + end_point = 'Mixed_4c' + with tf.variable_scope(end_point): + with tf.variable_scope('Branch_0'): + branch_0 = slim.conv2d(net, depth(192), [1, 1], scope='Conv2d_0a_1x1') + with tf.variable_scope('Branch_1'): + branch_1 = slim.conv2d( + net, depth(96), [1, 1], + weights_initializer=trunc_normal(0.09), + scope='Conv2d_0a_1x1') + branch_1 = slim.conv2d(branch_1, depth(128), [3, 3], + scope='Conv2d_0b_3x3') + with tf.variable_scope('Branch_2'): + branch_2 = slim.conv2d( + net, depth(96), [1, 1], + weights_initializer=trunc_normal(0.09), + scope='Conv2d_0a_1x1') + branch_2 = slim.conv2d(branch_2, depth(128), [3, 3], + scope='Conv2d_0b_3x3') + branch_2 = slim.conv2d(branch_2, depth(128), [3, 3], + scope='Conv2d_0c_3x3') + with tf.variable_scope('Branch_3'): + branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3') + branch_3 = slim.conv2d( + branch_3, depth(128), [1, 1], + weights_initializer=trunc_normal(0.1), + scope='Conv2d_0b_1x1') + net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3]) + end_points[end_point] = net + if end_point == final_endpoint: return net, end_points + # 14 x 14 x 576 + end_point = 'Mixed_4d' + with tf.variable_scope(end_point): + with tf.variable_scope('Branch_0'): + branch_0 = slim.conv2d(net, depth(160), [1, 1], scope='Conv2d_0a_1x1') + with tf.variable_scope('Branch_1'): + branch_1 = slim.conv2d( + net, depth(128), [1, 1], + weights_initializer=trunc_normal(0.09), + scope='Conv2d_0a_1x1') + branch_1 = slim.conv2d(branch_1, depth(160), [3, 3], + scope='Conv2d_0b_3x3') + with tf.variable_scope('Branch_2'): + branch_2 = slim.conv2d( + net, depth(128), [1, 1], + weights_initializer=trunc_normal(0.09), + scope='Conv2d_0a_1x1') + branch_2 = slim.conv2d(branch_2, depth(160), [3, 3], + scope='Conv2d_0b_3x3') + branch_2 = slim.conv2d(branch_2, depth(160), [3, 3], + scope='Conv2d_0c_3x3') + with tf.variable_scope('Branch_3'): + branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3') + branch_3 = slim.conv2d( + branch_3, depth(96), [1, 1], + weights_initializer=trunc_normal(0.1), + scope='Conv2d_0b_1x1') + net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3]) + end_points[end_point] = net + if end_point == final_endpoint: return net, end_points + + # 14 x 14 x 576 + end_point = 'Mixed_4e' + with tf.variable_scope(end_point): + with tf.variable_scope('Branch_0'): + branch_0 = slim.conv2d(net, depth(96), [1, 1], scope='Conv2d_0a_1x1') + with tf.variable_scope('Branch_1'): + branch_1 = slim.conv2d( + net, depth(128), [1, 1], + weights_initializer=trunc_normal(0.09), + scope='Conv2d_0a_1x1') + branch_1 = slim.conv2d(branch_1, depth(192), [3, 3], + scope='Conv2d_0b_3x3') + with tf.variable_scope('Branch_2'): + branch_2 = slim.conv2d( + net, depth(160), [1, 1], + weights_initializer=trunc_normal(0.09), + scope='Conv2d_0a_1x1') + branch_2 = slim.conv2d(branch_2, depth(192), [3, 3], + scope='Conv2d_0b_3x3') + branch_2 = slim.conv2d(branch_2, depth(192), [3, 3], + scope='Conv2d_0c_3x3') + with tf.variable_scope('Branch_3'): + branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3') + branch_3 = slim.conv2d( + branch_3, depth(96), [1, 1], + weights_initializer=trunc_normal(0.1), + scope='Conv2d_0b_1x1') + net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3]) + end_points[end_point] = net + if end_point == final_endpoint: return net, end_points + # 14 x 14 x 576 + end_point = 'Mixed_5a' + with tf.variable_scope(end_point): + with tf.variable_scope('Branch_0'): + branch_0 = slim.conv2d( + net, depth(128), [1, 1], + weights_initializer=trunc_normal(0.09), + scope='Conv2d_0a_1x1') + branch_0 = slim.conv2d(branch_0, depth(192), [3, 3], stride=2, + scope='Conv2d_1a_3x3') + with tf.variable_scope('Branch_1'): + branch_1 = slim.conv2d( + net, depth(192), [1, 1], + weights_initializer=trunc_normal(0.09), + scope='Conv2d_0a_1x1') + branch_1 = slim.conv2d(branch_1, depth(256), [3, 3], + scope='Conv2d_0b_3x3') + branch_1 = slim.conv2d(branch_1, depth(256), [3, 3], stride=2, + scope='Conv2d_1a_3x3') + with tf.variable_scope('Branch_2'): + branch_2 = slim.max_pool2d(net, [3, 3], stride=2, + scope='MaxPool_1a_3x3') + net = tf.concat(3, [branch_0, branch_1, branch_2]) + end_points[end_point] = net + if end_point == final_endpoint: return net, end_points + # 7 x 7 x 1024 + end_point = 'Mixed_5b' + with tf.variable_scope(end_point): + with tf.variable_scope('Branch_0'): + branch_0 = slim.conv2d(net, depth(352), [1, 1], scope='Conv2d_0a_1x1') + with tf.variable_scope('Branch_1'): + branch_1 = slim.conv2d( + net, depth(192), [1, 1], + weights_initializer=trunc_normal(0.09), + scope='Conv2d_0a_1x1') + branch_1 = slim.conv2d(branch_1, depth(320), [3, 3], + scope='Conv2d_0b_3x3') + with tf.variable_scope('Branch_2'): + branch_2 = slim.conv2d( + net, depth(160), [1, 1], + weights_initializer=trunc_normal(0.09), + scope='Conv2d_0a_1x1') + branch_2 = slim.conv2d(branch_2, depth(224), [3, 3], + scope='Conv2d_0b_3x3') + branch_2 = slim.conv2d(branch_2, depth(224), [3, 3], + scope='Conv2d_0c_3x3') + with tf.variable_scope('Branch_3'): + branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3') + branch_3 = slim.conv2d( + branch_3, depth(128), [1, 1], + weights_initializer=trunc_normal(0.1), + scope='Conv2d_0b_1x1') + net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3]) + end_points[end_point] = net + if end_point == final_endpoint: return net, end_points + + # 7 x 7 x 1024 + end_point = 'Mixed_5c' + with tf.variable_scope(end_point): + with tf.variable_scope('Branch_0'): + branch_0 = slim.conv2d(net, depth(352), [1, 1], scope='Conv2d_0a_1x1') + with tf.variable_scope('Branch_1'): + branch_1 = slim.conv2d( + net, depth(192), [1, 1], + weights_initializer=trunc_normal(0.09), + scope='Conv2d_0a_1x1') + branch_1 = slim.conv2d(branch_1, depth(320), [3, 3], + scope='Conv2d_0b_3x3') + with tf.variable_scope('Branch_2'): + branch_2 = slim.conv2d( + net, depth(192), [1, 1], + weights_initializer=trunc_normal(0.09), + scope='Conv2d_0a_1x1') + branch_2 = slim.conv2d(branch_2, depth(224), [3, 3], + scope='Conv2d_0b_3x3') + branch_2 = slim.conv2d(branch_2, depth(224), [3, 3], + scope='Conv2d_0c_3x3') + with tf.variable_scope('Branch_3'): + branch_3 = slim.max_pool2d(net, [3, 3], scope='MaxPool_0a_3x3') + branch_3 = slim.conv2d( + branch_3, depth(128), [1, 1], + weights_initializer=trunc_normal(0.1), + scope='Conv2d_0b_1x1') + net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3]) + end_points[end_point] = net + if end_point == final_endpoint: return net, end_points + raise ValueError('Unknown final endpoint %s' % final_endpoint) + + +def inception_v2(inputs, + num_classes=1000, + is_training=True, + dropout_keep_prob=0.8, + min_depth=16, + depth_multiplier=1.0, + prediction_fn=slim.softmax, + spatial_squeeze=True, + reuse=None, + scope='InceptionV2'): + """Inception v2 model for classification. + + Constructs an Inception v2 network for classification as described in + http://arxiv.org/abs/1502.03167. + + The default image size used to train this network is 224x224. + + Args: + inputs: a tensor of shape [batch_size, height, width, channels]. + num_classes: number of predicted classes. + is_training: whether is training or not. + dropout_keep_prob: the percentage of activation values that are retained. + min_depth: Minimum depth value (number of channels) for all convolution ops. + Enforced when depth_multiplier < 1, and not an active constraint when + depth_multiplier >= 1. + depth_multiplier: Float multiplier for the depth (number of channels) + for all convolution ops. The value must be greater than zero. Typical + usage will be to set this value in (0, 1) to reduce the number of + parameters or computation cost of the model. + prediction_fn: a function to get predictions out of logits. + spatial_squeeze: if True, logits is of shape is [B, C], if false logits is + of shape [B, 1, 1, C], where B is batch_size and C is number of classes. + reuse: whether or not the network and its variables should be reused. To be + able to reuse 'scope' must be given. + scope: Optional variable_scope. + + Returns: + logits: the pre-softmax activations, a tensor of size + [batch_size, num_classes] + end_points: a dictionary from components of the network to the corresponding + activation. + + Raises: + ValueError: if final_endpoint is not set to one of the predefined values, + or depth_multiplier <= 0 + """ + if depth_multiplier <= 0: + raise ValueError('depth_multiplier is not greater than zero.') + + # Final pooling and prediction + with tf.variable_scope(scope, 'InceptionV2', [inputs, num_classes], + reuse=reuse) as scope: + with slim.arg_scope([slim.batch_norm, slim.dropout], + is_training=is_training): + net, end_points = inception_v2_base( + inputs, scope=scope, min_depth=min_depth, + depth_multiplier=depth_multiplier) + with tf.variable_scope('Logits'): + kernel_size = _reduced_kernel_size_for_small_input(net, [7, 7]) + net = slim.avg_pool2d(net, kernel_size, padding='VALID', + scope='AvgPool_1a_{}x{}'.format(*kernel_size)) + # 1 x 1 x 1024 + net = slim.dropout(net, keep_prob=dropout_keep_prob, scope='Dropout_1b') + logits = slim.conv2d(net, num_classes, [1, 1], activation_fn=None, + normalizer_fn=None, scope='Conv2d_1c_1x1') + if spatial_squeeze: + logits = tf.squeeze(logits, [1, 2], name='SpatialSqueeze') + end_points['Logits'] = logits + end_points['Predictions'] = prediction_fn(logits, scope='Predictions') + return logits, end_points +inception_v2.default_image_size = 224 + + +def _reduced_kernel_size_for_small_input(input_tensor, kernel_size): + """Define kernel size which is automatically reduced for small input. + + If the shape of the input images is unknown at graph construction time this + function assumes that the input images are is large enough. + + Args: + input_tensor: input tensor of size [batch_size, height, width, channels]. + kernel_size: desired kernel size of length 2: [kernel_height, kernel_width] + + Returns: + a tensor with the kernel size. + + TODO(jrru): Make this function work with unknown shapes. Theoretically, this + can be done with the code below. Problems are two-fold: (1) If the shape was + known, it will be lost. (2) inception.slim.ops._two_element_tuple cannot + handle tensors that define the kernel size. + shape = tf.shape(input_tensor) + return = tf.pack([tf.minimum(shape[1], kernel_size[0]), + tf.minimum(shape[2], kernel_size[1])]) + + """ + shape = input_tensor.get_shape().as_list() + if shape[1] is None or shape[2] is None: + kernel_size_out = kernel_size + else: + kernel_size_out = [min(shape[1], kernel_size[0]), + min(shape[2], kernel_size[1])] + return kernel_size_out + + +inception_v2_arg_scope = inception_utils.inception_arg_scope diff --git a/StyleMigration/nets/inception_v2_test.py b/StyleMigration/nets/inception_v2_test.py new file mode 100644 index 0000000..0ff850c --- /dev/null +++ b/StyleMigration/nets/inception_v2_test.py @@ -0,0 +1,262 @@ +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for nets.inception_v2.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np +import tensorflow as tf + +from nets import inception + +slim = tf.contrib.slim + + +class InceptionV2Test(tf.test.TestCase): + + def testBuildClassificationNetwork(self): + batch_size = 5 + height, width = 224, 224 + num_classes = 1000 + + inputs = tf.random_uniform((batch_size, height, width, 3)) + logits, end_points = inception.inception_v2(inputs, num_classes) + self.assertTrue(logits.op.name.startswith('InceptionV2/Logits')) + self.assertListEqual(logits.get_shape().as_list(), + [batch_size, num_classes]) + self.assertTrue('Predictions' in end_points) + self.assertListEqual(end_points['Predictions'].get_shape().as_list(), + [batch_size, num_classes]) + + def testBuildBaseNetwork(self): + batch_size = 5 + height, width = 224, 224 + + inputs = tf.random_uniform((batch_size, height, width, 3)) + mixed_5c, end_points = inception.inception_v2_base(inputs) + self.assertTrue(mixed_5c.op.name.startswith('InceptionV2/Mixed_5c')) + self.assertListEqual(mixed_5c.get_shape().as_list(), + [batch_size, 7, 7, 1024]) + expected_endpoints = ['Mixed_3b', 'Mixed_3c', 'Mixed_4a', 'Mixed_4b', + 'Mixed_4c', 'Mixed_4d', 'Mixed_4e', 'Mixed_5a', + 'Mixed_5b', 'Mixed_5c', 'Conv2d_1a_7x7', + 'MaxPool_2a_3x3', 'Conv2d_2b_1x1', 'Conv2d_2c_3x3', + 'MaxPool_3a_3x3'] + self.assertItemsEqual(end_points.keys(), expected_endpoints) + + def testBuildOnlyUptoFinalEndpoint(self): + batch_size = 5 + height, width = 224, 224 + endpoints = ['Conv2d_1a_7x7', 'MaxPool_2a_3x3', 'Conv2d_2b_1x1', + 'Conv2d_2c_3x3', 'MaxPool_3a_3x3', 'Mixed_3b', 'Mixed_3c', + 'Mixed_4a', 'Mixed_4b', 'Mixed_4c', 'Mixed_4d', 'Mixed_4e', + 'Mixed_5a', 'Mixed_5b', 'Mixed_5c'] + for index, endpoint in enumerate(endpoints): + with tf.Graph().as_default(): + inputs = tf.random_uniform((batch_size, height, width, 3)) + out_tensor, end_points = inception.inception_v2_base( + inputs, final_endpoint=endpoint) + self.assertTrue(out_tensor.op.name.startswith( + 'InceptionV2/' + endpoint)) + self.assertItemsEqual(endpoints[:index+1], end_points) + + def testBuildAndCheckAllEndPointsUptoMixed5c(self): + batch_size = 5 + height, width = 224, 224 + + inputs = tf.random_uniform((batch_size, height, width, 3)) + _, end_points = inception.inception_v2_base(inputs, + final_endpoint='Mixed_5c') + endpoints_shapes = {'Mixed_3b': [batch_size, 28, 28, 256], + 'Mixed_3c': [batch_size, 28, 28, 320], + 'Mixed_4a': [batch_size, 14, 14, 576], + 'Mixed_4b': [batch_size, 14, 14, 576], + 'Mixed_4c': [batch_size, 14, 14, 576], + 'Mixed_4d': [batch_size, 14, 14, 576], + 'Mixed_4e': [batch_size, 14, 14, 576], + 'Mixed_5a': [batch_size, 7, 7, 1024], + 'Mixed_5b': [batch_size, 7, 7, 1024], + 'Mixed_5c': [batch_size, 7, 7, 1024], + 'Conv2d_1a_7x7': [batch_size, 112, 112, 64], + 'MaxPool_2a_3x3': [batch_size, 56, 56, 64], + 'Conv2d_2b_1x1': [batch_size, 56, 56, 64], + 'Conv2d_2c_3x3': [batch_size, 56, 56, 192], + 'MaxPool_3a_3x3': [batch_size, 28, 28, 192]} + self.assertItemsEqual(endpoints_shapes.keys(), end_points.keys()) + for endpoint_name in endpoints_shapes: + expected_shape = endpoints_shapes[endpoint_name] + self.assertTrue(endpoint_name in end_points) + self.assertListEqual(end_points[endpoint_name].get_shape().as_list(), + expected_shape) + + def testModelHasExpectedNumberOfParameters(self): + batch_size = 5 + height, width = 224, 224 + inputs = tf.random_uniform((batch_size, height, width, 3)) + with slim.arg_scope(inception.inception_v2_arg_scope()): + inception.inception_v2_base(inputs) + total_params, _ = slim.model_analyzer.analyze_vars( + slim.get_model_variables()) + self.assertAlmostEqual(10173112, total_params) + + def testBuildEndPointsWithDepthMultiplierLessThanOne(self): + batch_size = 5 + height, width = 224, 224 + num_classes = 1000 + + inputs = tf.random_uniform((batch_size, height, width, 3)) + _, end_points = inception.inception_v2(inputs, num_classes) + + endpoint_keys = [key for key in end_points.keys() + if key.startswith('Mixed') or key.startswith('Conv')] + + _, end_points_with_multiplier = inception.inception_v2( + inputs, num_classes, scope='depth_multiplied_net', + depth_multiplier=0.5) + + for key in endpoint_keys: + original_depth = end_points[key].get_shape().as_list()[3] + new_depth = end_points_with_multiplier[key].get_shape().as_list()[3] + self.assertEqual(0.5 * original_depth, new_depth) + + def testBuildEndPointsWithDepthMultiplierGreaterThanOne(self): + batch_size = 5 + height, width = 224, 224 + num_classes = 1000 + + inputs = tf.random_uniform((batch_size, height, width, 3)) + _, end_points = inception.inception_v2(inputs, num_classes) + + endpoint_keys = [key for key in end_points.keys() + if key.startswith('Mixed') or key.startswith('Conv')] + + _, end_points_with_multiplier = inception.inception_v2( + inputs, num_classes, scope='depth_multiplied_net', + depth_multiplier=2.0) + + for key in endpoint_keys: + original_depth = end_points[key].get_shape().as_list()[3] + new_depth = end_points_with_multiplier[key].get_shape().as_list()[3] + self.assertEqual(2.0 * original_depth, new_depth) + + def testRaiseValueErrorWithInvalidDepthMultiplier(self): + batch_size = 5 + height, width = 224, 224 + num_classes = 1000 + + inputs = tf.random_uniform((batch_size, height, width, 3)) + with self.assertRaises(ValueError): + _ = inception.inception_v2(inputs, num_classes, depth_multiplier=-0.1) + with self.assertRaises(ValueError): + _ = inception.inception_v2(inputs, num_classes, depth_multiplier=0.0) + + def testHalfSizeImages(self): + batch_size = 5 + height, width = 112, 112 + num_classes = 1000 + + inputs = tf.random_uniform((batch_size, height, width, 3)) + logits, end_points = inception.inception_v2(inputs, num_classes) + self.assertTrue(logits.op.name.startswith('InceptionV2/Logits')) + self.assertListEqual(logits.get_shape().as_list(), + [batch_size, num_classes]) + pre_pool = end_points['Mixed_5c'] + self.assertListEqual(pre_pool.get_shape().as_list(), + [batch_size, 4, 4, 1024]) + + def testUnknownImageShape(self): + tf.reset_default_graph() + batch_size = 2 + height, width = 224, 224 + num_classes = 1000 + input_np = np.random.uniform(0, 1, (batch_size, height, width, 3)) + with self.test_session() as sess: + inputs = tf.placeholder(tf.float32, shape=(batch_size, None, None, 3)) + logits, end_points = inception.inception_v2(inputs, num_classes) + self.assertTrue(logits.op.name.startswith('InceptionV2/Logits')) + self.assertListEqual(logits.get_shape().as_list(), + [batch_size, num_classes]) + pre_pool = end_points['Mixed_5c'] + feed_dict = {inputs: input_np} + tf.initialize_all_variables().run() + pre_pool_out = sess.run(pre_pool, feed_dict=feed_dict) + self.assertListEqual(list(pre_pool_out.shape), [batch_size, 7, 7, 1024]) + + def testUnknowBatchSize(self): + batch_size = 1 + height, width = 224, 224 + num_classes = 1000 + + inputs = tf.placeholder(tf.float32, (None, height, width, 3)) + logits, _ = inception.inception_v2(inputs, num_classes) + self.assertTrue(logits.op.name.startswith('InceptionV2/Logits')) + self.assertListEqual(logits.get_shape().as_list(), + [None, num_classes]) + images = tf.random_uniform((batch_size, height, width, 3)) + + with self.test_session() as sess: + sess.run(tf.initialize_all_variables()) + output = sess.run(logits, {inputs: images.eval()}) + self.assertEquals(output.shape, (batch_size, num_classes)) + + def testEvaluation(self): + batch_size = 2 + height, width = 224, 224 + num_classes = 1000 + + eval_inputs = tf.random_uniform((batch_size, height, width, 3)) + logits, _ = inception.inception_v2(eval_inputs, num_classes, + is_training=False) + predictions = tf.argmax(logits, 1) + + with self.test_session() as sess: + sess.run(tf.initialize_all_variables()) + output = sess.run(predictions) + self.assertEquals(output.shape, (batch_size,)) + + def testTrainEvalWithReuse(self): + train_batch_size = 5 + eval_batch_size = 2 + height, width = 150, 150 + num_classes = 1000 + + train_inputs = tf.random_uniform((train_batch_size, height, width, 3)) + inception.inception_v2(train_inputs, num_classes) + eval_inputs = tf.random_uniform((eval_batch_size, height, width, 3)) + logits, _ = inception.inception_v2(eval_inputs, num_classes, reuse=True) + predictions = tf.argmax(logits, 1) + + with self.test_session() as sess: + sess.run(tf.initialize_all_variables()) + output = sess.run(predictions) + self.assertEquals(output.shape, (eval_batch_size,)) + + def testLogitsNotSqueezed(self): + num_classes = 25 + images = tf.random_uniform([1, 224, 224, 3]) + logits, _ = inception.inception_v2(images, + num_classes=num_classes, + spatial_squeeze=False) + + with self.test_session() as sess: + tf.initialize_all_variables().run() + logits_out = sess.run(logits) + self.assertListEqual(list(logits_out.shape), [1, 1, 1, num_classes]) + + +if __name__ == '__main__': + tf.test.main() diff --git a/StyleMigration/nets/inception_v3.py b/StyleMigration/nets/inception_v3.py new file mode 100644 index 0000000..5c5f965 --- /dev/null +++ b/StyleMigration/nets/inception_v3.py @@ -0,0 +1,560 @@ +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Contains the definition for inception v3 classification network.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import tensorflow as tf + +from nets import inception_utils + +slim = tf.contrib.slim +trunc_normal = lambda stddev: tf.truncated_normal_initializer(0.0, stddev) + + +def inception_v3_base(inputs, + final_endpoint='Mixed_7c', + min_depth=16, + depth_multiplier=1.0, + scope=None): + """Inception model from http://arxiv.org/abs/1512.00567. + + Constructs an Inception v3 network from inputs to the given final endpoint. + This method can construct the network up to the final inception block + Mixed_7c. + + Note that the names of the layers in the paper do not correspond to the names + of the endpoints registered by this function although they build the same + network. + + Here is a mapping from the old_names to the new names: + Old name | New name + ======================================= + conv0 | Conv2d_1a_3x3 + conv1 | Conv2d_2a_3x3 + conv2 | Conv2d_2b_3x3 + pool1 | MaxPool_3a_3x3 + conv3 | Conv2d_3b_1x1 + conv4 | Conv2d_4a_3x3 + pool2 | MaxPool_5a_3x3 + mixed_35x35x256a | Mixed_5b + mixed_35x35x288a | Mixed_5c + mixed_35x35x288b | Mixed_5d + mixed_17x17x768a | Mixed_6a + mixed_17x17x768b | Mixed_6b + mixed_17x17x768c | Mixed_6c + mixed_17x17x768d | Mixed_6d + mixed_17x17x768e | Mixed_6e + mixed_8x8x1280a | Mixed_7a + mixed_8x8x2048a | Mixed_7b + mixed_8x8x2048b | Mixed_7c + + Args: + inputs: a tensor of size [batch_size, height, width, channels]. + final_endpoint: specifies the endpoint to construct the network up to. It + can be one of ['Conv2d_1a_3x3', 'Conv2d_2a_3x3', 'Conv2d_2b_3x3', + 'MaxPool_3a_3x3', 'Conv2d_3b_1x1', 'Conv2d_4a_3x3', 'MaxPool_5a_3x3', + 'Mixed_5b', 'Mixed_5c', 'Mixed_5d', 'Mixed_6a', 'Mixed_6b', 'Mixed_6c', + 'Mixed_6d', 'Mixed_6e', 'Mixed_7a', 'Mixed_7b', 'Mixed_7c']. + min_depth: Minimum depth value (number of channels) for all convolution ops. + Enforced when depth_multiplier < 1, and not an active constraint when + depth_multiplier >= 1. + depth_multiplier: Float multiplier for the depth (number of channels) + for all convolution ops. The value must be greater than zero. Typical + usage will be to set this value in (0, 1) to reduce the number of + parameters or computation cost of the model. + scope: Optional variable_scope. + + Returns: + tensor_out: output tensor corresponding to the final_endpoint. + end_points: a set of activations for external use, for example summaries or + losses. + + Raises: + ValueError: if final_endpoint is not set to one of the predefined values, + or depth_multiplier <= 0 + """ + # end_points will collect relevant activations for external use, for example + # summaries or losses. + end_points = {} + + if depth_multiplier <= 0: + raise ValueError('depth_multiplier is not greater than zero.') + depth = lambda d: max(int(d * depth_multiplier), min_depth) + + with tf.variable_scope(scope, 'InceptionV3', [inputs]): + with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d], + stride=1, padding='VALID'): + # 299 x 299 x 3 + end_point = 'Conv2d_1a_3x3' + net = slim.conv2d(inputs, depth(32), [3, 3], stride=2, scope=end_point) + end_points[end_point] = net + if end_point == final_endpoint: return net, end_points + # 149 x 149 x 32 + end_point = 'Conv2d_2a_3x3' + net = slim.conv2d(net, depth(32), [3, 3], scope=end_point) + end_points[end_point] = net + if end_point == final_endpoint: return net, end_points + # 147 x 147 x 32 + end_point = 'Conv2d_2b_3x3' + net = slim.conv2d(net, depth(64), [3, 3], padding='SAME', scope=end_point) + end_points[end_point] = net + if end_point == final_endpoint: return net, end_points + # 147 x 147 x 64 + end_point = 'MaxPool_3a_3x3' + net = slim.max_pool2d(net, [3, 3], stride=2, scope=end_point) + end_points[end_point] = net + if end_point == final_endpoint: return net, end_points + # 73 x 73 x 64 + end_point = 'Conv2d_3b_1x1' + net = slim.conv2d(net, depth(80), [1, 1], scope=end_point) + end_points[end_point] = net + if end_point == final_endpoint: return net, end_points + # 73 x 73 x 80. + end_point = 'Conv2d_4a_3x3' + net = slim.conv2d(net, depth(192), [3, 3], scope=end_point) + end_points[end_point] = net + if end_point == final_endpoint: return net, end_points + # 71 x 71 x 192. + end_point = 'MaxPool_5a_3x3' + net = slim.max_pool2d(net, [3, 3], stride=2, scope=end_point) + end_points[end_point] = net + if end_point == final_endpoint: return net, end_points + # 35 x 35 x 192. + + # Inception blocks + with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d], + stride=1, padding='SAME'): + # mixed: 35 x 35 x 256. + end_point = 'Mixed_5b' + with tf.variable_scope(end_point): + with tf.variable_scope('Branch_0'): + branch_0 = slim.conv2d(net, depth(64), [1, 1], scope='Conv2d_0a_1x1') + with tf.variable_scope('Branch_1'): + branch_1 = slim.conv2d(net, depth(48), [1, 1], scope='Conv2d_0a_1x1') + branch_1 = slim.conv2d(branch_1, depth(64), [5, 5], + scope='Conv2d_0b_5x5') + with tf.variable_scope('Branch_2'): + branch_2 = slim.conv2d(net, depth(64), [1, 1], scope='Conv2d_0a_1x1') + branch_2 = slim.conv2d(branch_2, depth(96), [3, 3], + scope='Conv2d_0b_3x3') + branch_2 = slim.conv2d(branch_2, depth(96), [3, 3], + scope='Conv2d_0c_3x3') + with tf.variable_scope('Branch_3'): + branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3') + branch_3 = slim.conv2d(branch_3, depth(32), [1, 1], + scope='Conv2d_0b_1x1') + net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3]) + end_points[end_point] = net + if end_point == final_endpoint: return net, end_points + + # mixed_1: 35 x 35 x 288. + end_point = 'Mixed_5c' + with tf.variable_scope(end_point): + with tf.variable_scope('Branch_0'): + branch_0 = slim.conv2d(net, depth(64), [1, 1], scope='Conv2d_0a_1x1') + with tf.variable_scope('Branch_1'): + branch_1 = slim.conv2d(net, depth(48), [1, 1], scope='Conv2d_0b_1x1') + branch_1 = slim.conv2d(branch_1, depth(64), [5, 5], + scope='Conv_1_0c_5x5') + with tf.variable_scope('Branch_2'): + branch_2 = slim.conv2d(net, depth(64), [1, 1], + scope='Conv2d_0a_1x1') + branch_2 = slim.conv2d(branch_2, depth(96), [3, 3], + scope='Conv2d_0b_3x3') + branch_2 = slim.conv2d(branch_2, depth(96), [3, 3], + scope='Conv2d_0c_3x3') + with tf.variable_scope('Branch_3'): + branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3') + branch_3 = slim.conv2d(branch_3, depth(64), [1, 1], + scope='Conv2d_0b_1x1') + net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3]) + end_points[end_point] = net + if end_point == final_endpoint: return net, end_points + + # mixed_2: 35 x 35 x 288. + end_point = 'Mixed_5d' + with tf.variable_scope(end_point): + with tf.variable_scope('Branch_0'): + branch_0 = slim.conv2d(net, depth(64), [1, 1], scope='Conv2d_0a_1x1') + with tf.variable_scope('Branch_1'): + branch_1 = slim.conv2d(net, depth(48), [1, 1], scope='Conv2d_0a_1x1') + branch_1 = slim.conv2d(branch_1, depth(64), [5, 5], + scope='Conv2d_0b_5x5') + with tf.variable_scope('Branch_2'): + branch_2 = slim.conv2d(net, depth(64), [1, 1], scope='Conv2d_0a_1x1') + branch_2 = slim.conv2d(branch_2, depth(96), [3, 3], + scope='Conv2d_0b_3x3') + branch_2 = slim.conv2d(branch_2, depth(96), [3, 3], + scope='Conv2d_0c_3x3') + with tf.variable_scope('Branch_3'): + branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3') + branch_3 = slim.conv2d(branch_3, depth(64), [1, 1], + scope='Conv2d_0b_1x1') + net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3]) + end_points[end_point] = net + if end_point == final_endpoint: return net, end_points + + # mixed_3: 17 x 17 x 768. + end_point = 'Mixed_6a' + with tf.variable_scope(end_point): + with tf.variable_scope('Branch_0'): + branch_0 = slim.conv2d(net, depth(384), [3, 3], stride=2, + padding='VALID', scope='Conv2d_1a_1x1') + with tf.variable_scope('Branch_1'): + branch_1 = slim.conv2d(net, depth(64), [1, 1], scope='Conv2d_0a_1x1') + branch_1 = slim.conv2d(branch_1, depth(96), [3, 3], + scope='Conv2d_0b_3x3') + branch_1 = slim.conv2d(branch_1, depth(96), [3, 3], stride=2, + padding='VALID', scope='Conv2d_1a_1x1') + with tf.variable_scope('Branch_2'): + branch_2 = slim.max_pool2d(net, [3, 3], stride=2, padding='VALID', + scope='MaxPool_1a_3x3') + net = tf.concat(3, [branch_0, branch_1, branch_2]) + end_points[end_point] = net + if end_point == final_endpoint: return net, end_points + + # mixed4: 17 x 17 x 768. + end_point = 'Mixed_6b' + with tf.variable_scope(end_point): + with tf.variable_scope('Branch_0'): + branch_0 = slim.conv2d(net, depth(192), [1, 1], scope='Conv2d_0a_1x1') + with tf.variable_scope('Branch_1'): + branch_1 = slim.conv2d(net, depth(128), [1, 1], scope='Conv2d_0a_1x1') + branch_1 = slim.conv2d(branch_1, depth(128), [1, 7], + scope='Conv2d_0b_1x7') + branch_1 = slim.conv2d(branch_1, depth(192), [7, 1], + scope='Conv2d_0c_7x1') + with tf.variable_scope('Branch_2'): + branch_2 = slim.conv2d(net, depth(128), [1, 1], scope='Conv2d_0a_1x1') + branch_2 = slim.conv2d(branch_2, depth(128), [7, 1], + scope='Conv2d_0b_7x1') + branch_2 = slim.conv2d(branch_2, depth(128), [1, 7], + scope='Conv2d_0c_1x7') + branch_2 = slim.conv2d(branch_2, depth(128), [7, 1], + scope='Conv2d_0d_7x1') + branch_2 = slim.conv2d(branch_2, depth(192), [1, 7], + scope='Conv2d_0e_1x7') + with tf.variable_scope('Branch_3'): + branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3') + branch_3 = slim.conv2d(branch_3, depth(192), [1, 1], + scope='Conv2d_0b_1x1') + net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3]) + end_points[end_point] = net + if end_point == final_endpoint: return net, end_points + + # mixed_5: 17 x 17 x 768. + end_point = 'Mixed_6c' + with tf.variable_scope(end_point): + with tf.variable_scope('Branch_0'): + branch_0 = slim.conv2d(net, depth(192), [1, 1], scope='Conv2d_0a_1x1') + with tf.variable_scope('Branch_1'): + branch_1 = slim.conv2d(net, depth(160), [1, 1], scope='Conv2d_0a_1x1') + branch_1 = slim.conv2d(branch_1, depth(160), [1, 7], + scope='Conv2d_0b_1x7') + branch_1 = slim.conv2d(branch_1, depth(192), [7, 1], + scope='Conv2d_0c_7x1') + with tf.variable_scope('Branch_2'): + branch_2 = slim.conv2d(net, depth(160), [1, 1], scope='Conv2d_0a_1x1') + branch_2 = slim.conv2d(branch_2, depth(160), [7, 1], + scope='Conv2d_0b_7x1') + branch_2 = slim.conv2d(branch_2, depth(160), [1, 7], + scope='Conv2d_0c_1x7') + branch_2 = slim.conv2d(branch_2, depth(160), [7, 1], + scope='Conv2d_0d_7x1') + branch_2 = slim.conv2d(branch_2, depth(192), [1, 7], + scope='Conv2d_0e_1x7') + with tf.variable_scope('Branch_3'): + branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3') + branch_3 = slim.conv2d(branch_3, depth(192), [1, 1], + scope='Conv2d_0b_1x1') + net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3]) + end_points[end_point] = net + if end_point == final_endpoint: return net, end_points + # mixed_6: 17 x 17 x 768. + end_point = 'Mixed_6d' + with tf.variable_scope(end_point): + with tf.variable_scope('Branch_0'): + branch_0 = slim.conv2d(net, depth(192), [1, 1], scope='Conv2d_0a_1x1') + with tf.variable_scope('Branch_1'): + branch_1 = slim.conv2d(net, depth(160), [1, 1], scope='Conv2d_0a_1x1') + branch_1 = slim.conv2d(branch_1, depth(160), [1, 7], + scope='Conv2d_0b_1x7') + branch_1 = slim.conv2d(branch_1, depth(192), [7, 1], + scope='Conv2d_0c_7x1') + with tf.variable_scope('Branch_2'): + branch_2 = slim.conv2d(net, depth(160), [1, 1], scope='Conv2d_0a_1x1') + branch_2 = slim.conv2d(branch_2, depth(160), [7, 1], + scope='Conv2d_0b_7x1') + branch_2 = slim.conv2d(branch_2, depth(160), [1, 7], + scope='Conv2d_0c_1x7') + branch_2 = slim.conv2d(branch_2, depth(160), [7, 1], + scope='Conv2d_0d_7x1') + branch_2 = slim.conv2d(branch_2, depth(192), [1, 7], + scope='Conv2d_0e_1x7') + with tf.variable_scope('Branch_3'): + branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3') + branch_3 = slim.conv2d(branch_3, depth(192), [1, 1], + scope='Conv2d_0b_1x1') + net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3]) + end_points[end_point] = net + if end_point == final_endpoint: return net, end_points + + # mixed_7: 17 x 17 x 768. + end_point = 'Mixed_6e' + with tf.variable_scope(end_point): + with tf.variable_scope('Branch_0'): + branch_0 = slim.conv2d(net, depth(192), [1, 1], scope='Conv2d_0a_1x1') + with tf.variable_scope('Branch_1'): + branch_1 = slim.conv2d(net, depth(192), [1, 1], scope='Conv2d_0a_1x1') + branch_1 = slim.conv2d(branch_1, depth(192), [1, 7], + scope='Conv2d_0b_1x7') + branch_1 = slim.conv2d(branch_1, depth(192), [7, 1], + scope='Conv2d_0c_7x1') + with tf.variable_scope('Branch_2'): + branch_2 = slim.conv2d(net, depth(192), [1, 1], scope='Conv2d_0a_1x1') + branch_2 = slim.conv2d(branch_2, depth(192), [7, 1], + scope='Conv2d_0b_7x1') + branch_2 = slim.conv2d(branch_2, depth(192), [1, 7], + scope='Conv2d_0c_1x7') + branch_2 = slim.conv2d(branch_2, depth(192), [7, 1], + scope='Conv2d_0d_7x1') + branch_2 = slim.conv2d(branch_2, depth(192), [1, 7], + scope='Conv2d_0e_1x7') + with tf.variable_scope('Branch_3'): + branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3') + branch_3 = slim.conv2d(branch_3, depth(192), [1, 1], + scope='Conv2d_0b_1x1') + net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3]) + end_points[end_point] = net + if end_point == final_endpoint: return net, end_points + + # mixed_8: 8 x 8 x 1280. + end_point = 'Mixed_7a' + with tf.variable_scope(end_point): + with tf.variable_scope('Branch_0'): + branch_0 = slim.conv2d(net, depth(192), [1, 1], scope='Conv2d_0a_1x1') + branch_0 = slim.conv2d(branch_0, depth(320), [3, 3], stride=2, + padding='VALID', scope='Conv2d_1a_3x3') + with tf.variable_scope('Branch_1'): + branch_1 = slim.conv2d(net, depth(192), [1, 1], scope='Conv2d_0a_1x1') + branch_1 = slim.conv2d(branch_1, depth(192), [1, 7], + scope='Conv2d_0b_1x7') + branch_1 = slim.conv2d(branch_1, depth(192), [7, 1], + scope='Conv2d_0c_7x1') + branch_1 = slim.conv2d(branch_1, depth(192), [3, 3], stride=2, + padding='VALID', scope='Conv2d_1a_3x3') + with tf.variable_scope('Branch_2'): + branch_2 = slim.max_pool2d(net, [3, 3], stride=2, padding='VALID', + scope='MaxPool_1a_3x3') + net = tf.concat(3, [branch_0, branch_1, branch_2]) + end_points[end_point] = net + if end_point == final_endpoint: return net, end_points + # mixed_9: 8 x 8 x 2048. + end_point = 'Mixed_7b' + with tf.variable_scope(end_point): + with tf.variable_scope('Branch_0'): + branch_0 = slim.conv2d(net, depth(320), [1, 1], scope='Conv2d_0a_1x1') + with tf.variable_scope('Branch_1'): + branch_1 = slim.conv2d(net, depth(384), [1, 1], scope='Conv2d_0a_1x1') + branch_1 = tf.concat(3, [ + slim.conv2d(branch_1, depth(384), [1, 3], scope='Conv2d_0b_1x3'), + slim.conv2d(branch_1, depth(384), [3, 1], scope='Conv2d_0b_3x1')]) + with tf.variable_scope('Branch_2'): + branch_2 = slim.conv2d(net, depth(448), [1, 1], scope='Conv2d_0a_1x1') + branch_2 = slim.conv2d( + branch_2, depth(384), [3, 3], scope='Conv2d_0b_3x3') + branch_2 = tf.concat(3, [ + slim.conv2d(branch_2, depth(384), [1, 3], scope='Conv2d_0c_1x3'), + slim.conv2d(branch_2, depth(384), [3, 1], scope='Conv2d_0d_3x1')]) + with tf.variable_scope('Branch_3'): + branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3') + branch_3 = slim.conv2d( + branch_3, depth(192), [1, 1], scope='Conv2d_0b_1x1') + net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3]) + end_points[end_point] = net + if end_point == final_endpoint: return net, end_points + + # mixed_10: 8 x 8 x 2048. + end_point = 'Mixed_7c' + with tf.variable_scope(end_point): + with tf.variable_scope('Branch_0'): + branch_0 = slim.conv2d(net, depth(320), [1, 1], scope='Conv2d_0a_1x1') + with tf.variable_scope('Branch_1'): + branch_1 = slim.conv2d(net, depth(384), [1, 1], scope='Conv2d_0a_1x1') + branch_1 = tf.concat(3, [ + slim.conv2d(branch_1, depth(384), [1, 3], scope='Conv2d_0b_1x3'), + slim.conv2d(branch_1, depth(384), [3, 1], scope='Conv2d_0c_3x1')]) + with tf.variable_scope('Branch_2'): + branch_2 = slim.conv2d(net, depth(448), [1, 1], scope='Conv2d_0a_1x1') + branch_2 = slim.conv2d( + branch_2, depth(384), [3, 3], scope='Conv2d_0b_3x3') + branch_2 = tf.concat(3, [ + slim.conv2d(branch_2, depth(384), [1, 3], scope='Conv2d_0c_1x3'), + slim.conv2d(branch_2, depth(384), [3, 1], scope='Conv2d_0d_3x1')]) + with tf.variable_scope('Branch_3'): + branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3') + branch_3 = slim.conv2d( + branch_3, depth(192), [1, 1], scope='Conv2d_0b_1x1') + net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3]) + end_points[end_point] = net + if end_point == final_endpoint: return net, end_points + raise ValueError('Unknown final endpoint %s' % final_endpoint) + + +def inception_v3(inputs, + num_classes=1000, + is_training=True, + dropout_keep_prob=0.8, + min_depth=16, + depth_multiplier=1.0, + prediction_fn=slim.softmax, + spatial_squeeze=True, + reuse=None, + scope='InceptionV3'): + """Inception model from http://arxiv.org/abs/1512.00567. + + "Rethinking the Inception Architecture for Computer Vision" + + Christian Szegedy, Vincent Vanhoucke, Sergey Ioffe, Jonathon Shlens, + Zbigniew Wojna. + + With the default arguments this method constructs the exact model defined in + the paper. However, one can experiment with variations of the inception_v3 + network by changing arguments dropout_keep_prob, min_depth and + depth_multiplier. + + The default image size used to train this network is 299x299. + + Args: + inputs: a tensor of size [batch_size, height, width, channels]. + num_classes: number of predicted classes. + is_training: whether is training or not. + dropout_keep_prob: the percentage of activation values that are retained. + min_depth: Minimum depth value (number of channels) for all convolution ops. + Enforced when depth_multiplier < 1, and not an active constraint when + depth_multiplier >= 1. + depth_multiplier: Float multiplier for the depth (number of channels) + for all convolution ops. The value must be greater than zero. Typical + usage will be to set this value in (0, 1) to reduce the number of + parameters or computation cost of the model. + prediction_fn: a function to get predictions out of logits. + spatial_squeeze: if True, logits is of shape is [B, C], if false logits is + of shape [B, 1, 1, C], where B is batch_size and C is number of classes. + reuse: whether or not the network and its variables should be reused. To be + able to reuse 'scope' must be given. + scope: Optional variable_scope. + + Returns: + logits: the pre-softmax activations, a tensor of size + [batch_size, num_classes] + end_points: a dictionary from components of the network to the corresponding + activation. + + Raises: + ValueError: if 'depth_multiplier' is less than or equal to zero. + """ + if depth_multiplier <= 0: + raise ValueError('depth_multiplier is not greater than zero.') + depth = lambda d: max(int(d * depth_multiplier), min_depth) + + with tf.variable_scope(scope, 'InceptionV3', [inputs, num_classes], + reuse=reuse) as scope: + with slim.arg_scope([slim.batch_norm, slim.dropout], + is_training=is_training): + net, end_points = inception_v3_base( + inputs, scope=scope, min_depth=min_depth, + depth_multiplier=depth_multiplier) + + # Auxiliary Head logits + with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d], + stride=1, padding='SAME'): + aux_logits = end_points['Mixed_6e'] + with tf.variable_scope('AuxLogits'): + aux_logits = slim.avg_pool2d( + aux_logits, [5, 5], stride=3, padding='VALID', + scope='AvgPool_1a_5x5') + aux_logits = slim.conv2d(aux_logits, depth(128), [1, 1], + scope='Conv2d_1b_1x1') + + # Shape of feature map before the final layer. + kernel_size = _reduced_kernel_size_for_small_input( + aux_logits, [5, 5]) + aux_logits = slim.conv2d( + aux_logits, depth(768), kernel_size, + weights_initializer=trunc_normal(0.01), + padding='VALID', scope='Conv2d_2a_{}x{}'.format(*kernel_size)) + aux_logits = slim.conv2d( + aux_logits, num_classes, [1, 1], activation_fn=None, + normalizer_fn=None, weights_initializer=trunc_normal(0.001), + scope='Conv2d_2b_1x1') + if spatial_squeeze: + aux_logits = tf.squeeze(aux_logits, [1, 2], name='SpatialSqueeze') + end_points['AuxLogits'] = aux_logits + + # Final pooling and prediction + with tf.variable_scope('Logits'): + kernel_size = _reduced_kernel_size_for_small_input(net, [8, 8]) + net = slim.avg_pool2d(net, kernel_size, padding='VALID', + scope='AvgPool_1a_{}x{}'.format(*kernel_size)) + # 1 x 1 x 2048 + net = slim.dropout(net, keep_prob=dropout_keep_prob, scope='Dropout_1b') + end_points['PreLogits'] = net + # 2048 + logits = slim.conv2d(net, num_classes, [1, 1], activation_fn=None, + normalizer_fn=None, scope='Conv2d_1c_1x1') + if spatial_squeeze: + logits = tf.squeeze(logits, [1, 2], name='SpatialSqueeze') + # 1000 + end_points['Logits'] = logits + end_points['Predictions'] = prediction_fn(logits, scope='Predictions') + return logits, end_points +inception_v3.default_image_size = 299 + + +def _reduced_kernel_size_for_small_input(input_tensor, kernel_size): + """Define kernel size which is automatically reduced for small input. + + If the shape of the input images is unknown at graph construction time this + function assumes that the input images are is large enough. + + Args: + input_tensor: input tensor of size [batch_size, height, width, channels]. + kernel_size: desired kernel size of length 2: [kernel_height, kernel_width] + + Returns: + a tensor with the kernel size. + + TODO(jrru): Make this function work with unknown shapes. Theoretically, this + can be done with the code below. Problems are two-fold: (1) If the shape was + known, it will be lost. (2) inception.slim.ops._two_element_tuple cannot + handle tensors that define the kernel size. + shape = tf.shape(input_tensor) + return = tf.pack([tf.minimum(shape[1], kernel_size[0]), + tf.minimum(shape[2], kernel_size[1])]) + + """ + shape = input_tensor.get_shape().as_list() + if shape[1] is None or shape[2] is None: + kernel_size_out = kernel_size + else: + kernel_size_out = [min(shape[1], kernel_size[0]), + min(shape[2], kernel_size[1])] + return kernel_size_out + + +inception_v3_arg_scope = inception_utils.inception_arg_scope diff --git a/StyleMigration/nets/inception_v3_test.py b/StyleMigration/nets/inception_v3_test.py new file mode 100644 index 0000000..c82b265 --- /dev/null +++ b/StyleMigration/nets/inception_v3_test.py @@ -0,0 +1,292 @@ +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for nets.inception_v1.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np +import tensorflow as tf + +from nets import inception + +slim = tf.contrib.slim + + +class InceptionV3Test(tf.test.TestCase): + + def testBuildClassificationNetwork(self): + batch_size = 5 + height, width = 299, 299 + num_classes = 1000 + + inputs = tf.random_uniform((batch_size, height, width, 3)) + logits, end_points = inception.inception_v3(inputs, num_classes) + self.assertTrue(logits.op.name.startswith('InceptionV3/Logits')) + self.assertListEqual(logits.get_shape().as_list(), + [batch_size, num_classes]) + self.assertTrue('Predictions' in end_points) + self.assertListEqual(end_points['Predictions'].get_shape().as_list(), + [batch_size, num_classes]) + + def testBuildBaseNetwork(self): + batch_size = 5 + height, width = 299, 299 + + inputs = tf.random_uniform((batch_size, height, width, 3)) + final_endpoint, end_points = inception.inception_v3_base(inputs) + self.assertTrue(final_endpoint.op.name.startswith( + 'InceptionV3/Mixed_7c')) + self.assertListEqual(final_endpoint.get_shape().as_list(), + [batch_size, 8, 8, 2048]) + expected_endpoints = ['Conv2d_1a_3x3', 'Conv2d_2a_3x3', 'Conv2d_2b_3x3', + 'MaxPool_3a_3x3', 'Conv2d_3b_1x1', 'Conv2d_4a_3x3', + 'MaxPool_5a_3x3', 'Mixed_5b', 'Mixed_5c', 'Mixed_5d', + 'Mixed_6a', 'Mixed_6b', 'Mixed_6c', 'Mixed_6d', + 'Mixed_6e', 'Mixed_7a', 'Mixed_7b', 'Mixed_7c'] + self.assertItemsEqual(end_points.keys(), expected_endpoints) + + def testBuildOnlyUptoFinalEndpoint(self): + batch_size = 5 + height, width = 299, 299 + endpoints = ['Conv2d_1a_3x3', 'Conv2d_2a_3x3', 'Conv2d_2b_3x3', + 'MaxPool_3a_3x3', 'Conv2d_3b_1x1', 'Conv2d_4a_3x3', + 'MaxPool_5a_3x3', 'Mixed_5b', 'Mixed_5c', 'Mixed_5d', + 'Mixed_6a', 'Mixed_6b', 'Mixed_6c', 'Mixed_6d', + 'Mixed_6e', 'Mixed_7a', 'Mixed_7b', 'Mixed_7c'] + + for index, endpoint in enumerate(endpoints): + with tf.Graph().as_default(): + inputs = tf.random_uniform((batch_size, height, width, 3)) + out_tensor, end_points = inception.inception_v3_base( + inputs, final_endpoint=endpoint) + self.assertTrue(out_tensor.op.name.startswith( + 'InceptionV3/' + endpoint)) + self.assertItemsEqual(endpoints[:index+1], end_points) + + def testBuildAndCheckAllEndPointsUptoMixed7c(self): + batch_size = 5 + height, width = 299, 299 + + inputs = tf.random_uniform((batch_size, height, width, 3)) + _, end_points = inception.inception_v3_base( + inputs, final_endpoint='Mixed_7c') + endpoints_shapes = {'Conv2d_1a_3x3': [batch_size, 149, 149, 32], + 'Conv2d_2a_3x3': [batch_size, 147, 147, 32], + 'Conv2d_2b_3x3': [batch_size, 147, 147, 64], + 'MaxPool_3a_3x3': [batch_size, 73, 73, 64], + 'Conv2d_3b_1x1': [batch_size, 73, 73, 80], + 'Conv2d_4a_3x3': [batch_size, 71, 71, 192], + 'MaxPool_5a_3x3': [batch_size, 35, 35, 192], + 'Mixed_5b': [batch_size, 35, 35, 256], + 'Mixed_5c': [batch_size, 35, 35, 288], + 'Mixed_5d': [batch_size, 35, 35, 288], + 'Mixed_6a': [batch_size, 17, 17, 768], + 'Mixed_6b': [batch_size, 17, 17, 768], + 'Mixed_6c': [batch_size, 17, 17, 768], + 'Mixed_6d': [batch_size, 17, 17, 768], + 'Mixed_6e': [batch_size, 17, 17, 768], + 'Mixed_7a': [batch_size, 8, 8, 1280], + 'Mixed_7b': [batch_size, 8, 8, 2048], + 'Mixed_7c': [batch_size, 8, 8, 2048]} + self.assertItemsEqual(endpoints_shapes.keys(), end_points.keys()) + for endpoint_name in endpoints_shapes: + expected_shape = endpoints_shapes[endpoint_name] + self.assertTrue(endpoint_name in end_points) + self.assertListEqual(end_points[endpoint_name].get_shape().as_list(), + expected_shape) + + def testModelHasExpectedNumberOfParameters(self): + batch_size = 5 + height, width = 299, 299 + inputs = tf.random_uniform((batch_size, height, width, 3)) + with slim.arg_scope(inception.inception_v3_arg_scope()): + inception.inception_v3_base(inputs) + total_params, _ = slim.model_analyzer.analyze_vars( + slim.get_model_variables()) + self.assertAlmostEqual(21802784, total_params) + + def testBuildEndPoints(self): + batch_size = 5 + height, width = 299, 299 + num_classes = 1000 + + inputs = tf.random_uniform((batch_size, height, width, 3)) + _, end_points = inception.inception_v3(inputs, num_classes) + self.assertTrue('Logits' in end_points) + logits = end_points['Logits'] + self.assertListEqual(logits.get_shape().as_list(), + [batch_size, num_classes]) + self.assertTrue('AuxLogits' in end_points) + aux_logits = end_points['AuxLogits'] + self.assertListEqual(aux_logits.get_shape().as_list(), + [batch_size, num_classes]) + self.assertTrue('Mixed_7c' in end_points) + pre_pool = end_points['Mixed_7c'] + self.assertListEqual(pre_pool.get_shape().as_list(), + [batch_size, 8, 8, 2048]) + self.assertTrue('PreLogits' in end_points) + pre_logits = end_points['PreLogits'] + self.assertListEqual(pre_logits.get_shape().as_list(), + [batch_size, 1, 1, 2048]) + + def testBuildEndPointsWithDepthMultiplierLessThanOne(self): + batch_size = 5 + height, width = 299, 299 + num_classes = 1000 + + inputs = tf.random_uniform((batch_size, height, width, 3)) + _, end_points = inception.inception_v3(inputs, num_classes) + + endpoint_keys = [key for key in end_points.keys() + if key.startswith('Mixed') or key.startswith('Conv')] + + _, end_points_with_multiplier = inception.inception_v3( + inputs, num_classes, scope='depth_multiplied_net', + depth_multiplier=0.5) + + for key in endpoint_keys: + original_depth = end_points[key].get_shape().as_list()[3] + new_depth = end_points_with_multiplier[key].get_shape().as_list()[3] + self.assertEqual(0.5 * original_depth, new_depth) + + def testBuildEndPointsWithDepthMultiplierGreaterThanOne(self): + batch_size = 5 + height, width = 299, 299 + num_classes = 1000 + + inputs = tf.random_uniform((batch_size, height, width, 3)) + _, end_points = inception.inception_v3(inputs, num_classes) + + endpoint_keys = [key for key in end_points.keys() + if key.startswith('Mixed') or key.startswith('Conv')] + + _, end_points_with_multiplier = inception.inception_v3( + inputs, num_classes, scope='depth_multiplied_net', + depth_multiplier=2.0) + + for key in endpoint_keys: + original_depth = end_points[key].get_shape().as_list()[3] + new_depth = end_points_with_multiplier[key].get_shape().as_list()[3] + self.assertEqual(2.0 * original_depth, new_depth) + + def testRaiseValueErrorWithInvalidDepthMultiplier(self): + batch_size = 5 + height, width = 299, 299 + num_classes = 1000 + + inputs = tf.random_uniform((batch_size, height, width, 3)) + with self.assertRaises(ValueError): + _ = inception.inception_v3(inputs, num_classes, depth_multiplier=-0.1) + with self.assertRaises(ValueError): + _ = inception.inception_v3(inputs, num_classes, depth_multiplier=0.0) + + def testHalfSizeImages(self): + batch_size = 5 + height, width = 150, 150 + num_classes = 1000 + + inputs = tf.random_uniform((batch_size, height, width, 3)) + logits, end_points = inception.inception_v3(inputs, num_classes) + self.assertTrue(logits.op.name.startswith('InceptionV3/Logits')) + self.assertListEqual(logits.get_shape().as_list(), + [batch_size, num_classes]) + pre_pool = end_points['Mixed_7c'] + self.assertListEqual(pre_pool.get_shape().as_list(), + [batch_size, 3, 3, 2048]) + + def testUnknownImageShape(self): + tf.reset_default_graph() + batch_size = 2 + height, width = 299, 299 + num_classes = 1000 + input_np = np.random.uniform(0, 1, (batch_size, height, width, 3)) + with self.test_session() as sess: + inputs = tf.placeholder(tf.float32, shape=(batch_size, None, None, 3)) + logits, end_points = inception.inception_v3(inputs, num_classes) + self.assertListEqual(logits.get_shape().as_list(), + [batch_size, num_classes]) + pre_pool = end_points['Mixed_7c'] + feed_dict = {inputs: input_np} + tf.initialize_all_variables().run() + pre_pool_out = sess.run(pre_pool, feed_dict=feed_dict) + self.assertListEqual(list(pre_pool_out.shape), [batch_size, 8, 8, 2048]) + + def testUnknowBatchSize(self): + batch_size = 1 + height, width = 299, 299 + num_classes = 1000 + + inputs = tf.placeholder(tf.float32, (None, height, width, 3)) + logits, _ = inception.inception_v3(inputs, num_classes) + self.assertTrue(logits.op.name.startswith('InceptionV3/Logits')) + self.assertListEqual(logits.get_shape().as_list(), + [None, num_classes]) + images = tf.random_uniform((batch_size, height, width, 3)) + + with self.test_session() as sess: + sess.run(tf.initialize_all_variables()) + output = sess.run(logits, {inputs: images.eval()}) + self.assertEquals(output.shape, (batch_size, num_classes)) + + def testEvaluation(self): + batch_size = 2 + height, width = 299, 299 + num_classes = 1000 + + eval_inputs = tf.random_uniform((batch_size, height, width, 3)) + logits, _ = inception.inception_v3(eval_inputs, num_classes, + is_training=False) + predictions = tf.argmax(logits, 1) + + with self.test_session() as sess: + sess.run(tf.initialize_all_variables()) + output = sess.run(predictions) + self.assertEquals(output.shape, (batch_size,)) + + def testTrainEvalWithReuse(self): + train_batch_size = 5 + eval_batch_size = 2 + height, width = 150, 150 + num_classes = 1000 + + train_inputs = tf.random_uniform((train_batch_size, height, width, 3)) + inception.inception_v3(train_inputs, num_classes) + eval_inputs = tf.random_uniform((eval_batch_size, height, width, 3)) + logits, _ = inception.inception_v3(eval_inputs, num_classes, + is_training=False, reuse=True) + predictions = tf.argmax(logits, 1) + + with self.test_session() as sess: + sess.run(tf.initialize_all_variables()) + output = sess.run(predictions) + self.assertEquals(output.shape, (eval_batch_size,)) + + def testLogitsNotSqueezed(self): + num_classes = 25 + images = tf.random_uniform([1, 299, 299, 3]) + logits, _ = inception.inception_v3(images, + num_classes=num_classes, + spatial_squeeze=False) + + with self.test_session() as sess: + tf.initialize_all_variables().run() + logits_out = sess.run(logits) + self.assertListEqual(list(logits_out.shape), [1, 1, 1, num_classes]) + + +if __name__ == '__main__': + tf.test.main() diff --git a/StyleMigration/nets/inception_v4.py b/StyleMigration/nets/inception_v4.py new file mode 100644 index 0000000..0c581f7 --- /dev/null +++ b/StyleMigration/nets/inception_v4.py @@ -0,0 +1,323 @@ +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Contains the definition of the Inception V4 architecture. + +As described in http://arxiv.org/abs/1602.07261. + + Inception-v4, Inception-ResNet and the Impact of Residual Connections + on Learning + Christian Szegedy, Sergey Ioffe, Vincent Vanhoucke, Alex Alemi +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import tensorflow as tf + +from nets import inception_utils + +slim = tf.contrib.slim + + +def block_inception_a(inputs, scope=None, reuse=None): + """Builds Inception-A block for Inception v4 network.""" + # By default use stride=1 and SAME padding + with slim.arg_scope([slim.conv2d, slim.avg_pool2d, slim.max_pool2d], + stride=1, padding='SAME'): + with tf.variable_scope(scope, 'BlockInceptionA', [inputs], reuse=reuse): + with tf.variable_scope('Branch_0'): + branch_0 = slim.conv2d(inputs, 96, [1, 1], scope='Conv2d_0a_1x1') + with tf.variable_scope('Branch_1'): + branch_1 = slim.conv2d(inputs, 64, [1, 1], scope='Conv2d_0a_1x1') + branch_1 = slim.conv2d(branch_1, 96, [3, 3], scope='Conv2d_0b_3x3') + with tf.variable_scope('Branch_2'): + branch_2 = slim.conv2d(inputs, 64, [1, 1], scope='Conv2d_0a_1x1') + branch_2 = slim.conv2d(branch_2, 96, [3, 3], scope='Conv2d_0b_3x3') + branch_2 = slim.conv2d(branch_2, 96, [3, 3], scope='Conv2d_0c_3x3') + with tf.variable_scope('Branch_3'): + branch_3 = slim.avg_pool2d(inputs, [3, 3], scope='AvgPool_0a_3x3') + branch_3 = slim.conv2d(branch_3, 96, [1, 1], scope='Conv2d_0b_1x1') + return tf.concat(3, [branch_0, branch_1, branch_2, branch_3]) + + +def block_reduction_a(inputs, scope=None, reuse=None): + """Builds Reduction-A block for Inception v4 network.""" + # By default use stride=1 and SAME padding + with slim.arg_scope([slim.conv2d, slim.avg_pool2d, slim.max_pool2d], + stride=1, padding='SAME'): + with tf.variable_scope(scope, 'BlockReductionA', [inputs], reuse=reuse): + with tf.variable_scope('Branch_0'): + branch_0 = slim.conv2d(inputs, 384, [3, 3], stride=2, padding='VALID', + scope='Conv2d_1a_3x3') + with tf.variable_scope('Branch_1'): + branch_1 = slim.conv2d(inputs, 192, [1, 1], scope='Conv2d_0a_1x1') + branch_1 = slim.conv2d(branch_1, 224, [3, 3], scope='Conv2d_0b_3x3') + branch_1 = slim.conv2d(branch_1, 256, [3, 3], stride=2, + padding='VALID', scope='Conv2d_1a_3x3') + with tf.variable_scope('Branch_2'): + branch_2 = slim.max_pool2d(inputs, [3, 3], stride=2, padding='VALID', + scope='MaxPool_1a_3x3') + return tf.concat(3, [branch_0, branch_1, branch_2]) + + +def block_inception_b(inputs, scope=None, reuse=None): + """Builds Inception-B block for Inception v4 network.""" + # By default use stride=1 and SAME padding + with slim.arg_scope([slim.conv2d, slim.avg_pool2d, slim.max_pool2d], + stride=1, padding='SAME'): + with tf.variable_scope(scope, 'BlockInceptionB', [inputs], reuse=reuse): + with tf.variable_scope('Branch_0'): + branch_0 = slim.conv2d(inputs, 384, [1, 1], scope='Conv2d_0a_1x1') + with tf.variable_scope('Branch_1'): + branch_1 = slim.conv2d(inputs, 192, [1, 1], scope='Conv2d_0a_1x1') + branch_1 = slim.conv2d(branch_1, 224, [1, 7], scope='Conv2d_0b_1x7') + branch_1 = slim.conv2d(branch_1, 256, [7, 1], scope='Conv2d_0c_7x1') + with tf.variable_scope('Branch_2'): + branch_2 = slim.conv2d(inputs, 192, [1, 1], scope='Conv2d_0a_1x1') + branch_2 = slim.conv2d(branch_2, 192, [7, 1], scope='Conv2d_0b_7x1') + branch_2 = slim.conv2d(branch_2, 224, [1, 7], scope='Conv2d_0c_1x7') + branch_2 = slim.conv2d(branch_2, 224, [7, 1], scope='Conv2d_0d_7x1') + branch_2 = slim.conv2d(branch_2, 256, [1, 7], scope='Conv2d_0e_1x7') + with tf.variable_scope('Branch_3'): + branch_3 = slim.avg_pool2d(inputs, [3, 3], scope='AvgPool_0a_3x3') + branch_3 = slim.conv2d(branch_3, 128, [1, 1], scope='Conv2d_0b_1x1') + return tf.concat(3, [branch_0, branch_1, branch_2, branch_3]) + + +def block_reduction_b(inputs, scope=None, reuse=None): + """Builds Reduction-B block for Inception v4 network.""" + # By default use stride=1 and SAME padding + with slim.arg_scope([slim.conv2d, slim.avg_pool2d, slim.max_pool2d], + stride=1, padding='SAME'): + with tf.variable_scope(scope, 'BlockReductionB', [inputs], reuse=reuse): + with tf.variable_scope('Branch_0'): + branch_0 = slim.conv2d(inputs, 192, [1, 1], scope='Conv2d_0a_1x1') + branch_0 = slim.conv2d(branch_0, 192, [3, 3], stride=2, + padding='VALID', scope='Conv2d_1a_3x3') + with tf.variable_scope('Branch_1'): + branch_1 = slim.conv2d(inputs, 256, [1, 1], scope='Conv2d_0a_1x1') + branch_1 = slim.conv2d(branch_1, 256, [1, 7], scope='Conv2d_0b_1x7') + branch_1 = slim.conv2d(branch_1, 320, [7, 1], scope='Conv2d_0c_7x1') + branch_1 = slim.conv2d(branch_1, 320, [3, 3], stride=2, + padding='VALID', scope='Conv2d_1a_3x3') + with tf.variable_scope('Branch_2'): + branch_2 = slim.max_pool2d(inputs, [3, 3], stride=2, padding='VALID', + scope='MaxPool_1a_3x3') + return tf.concat(3, [branch_0, branch_1, branch_2]) + + +def block_inception_c(inputs, scope=None, reuse=None): + """Builds Inception-C block for Inception v4 network.""" + # By default use stride=1 and SAME padding + with slim.arg_scope([slim.conv2d, slim.avg_pool2d, slim.max_pool2d], + stride=1, padding='SAME'): + with tf.variable_scope(scope, 'BlockInceptionC', [inputs], reuse=reuse): + with tf.variable_scope('Branch_0'): + branch_0 = slim.conv2d(inputs, 256, [1, 1], scope='Conv2d_0a_1x1') + with tf.variable_scope('Branch_1'): + branch_1 = slim.conv2d(inputs, 384, [1, 1], scope='Conv2d_0a_1x1') + branch_1 = tf.concat(3, [ + slim.conv2d(branch_1, 256, [1, 3], scope='Conv2d_0b_1x3'), + slim.conv2d(branch_1, 256, [3, 1], scope='Conv2d_0c_3x1')]) + with tf.variable_scope('Branch_2'): + branch_2 = slim.conv2d(inputs, 384, [1, 1], scope='Conv2d_0a_1x1') + branch_2 = slim.conv2d(branch_2, 448, [3, 1], scope='Conv2d_0b_3x1') + branch_2 = slim.conv2d(branch_2, 512, [1, 3], scope='Conv2d_0c_1x3') + branch_2 = tf.concat(3, [ + slim.conv2d(branch_2, 256, [1, 3], scope='Conv2d_0d_1x3'), + slim.conv2d(branch_2, 256, [3, 1], scope='Conv2d_0e_3x1')]) + with tf.variable_scope('Branch_3'): + branch_3 = slim.avg_pool2d(inputs, [3, 3], scope='AvgPool_0a_3x3') + branch_3 = slim.conv2d(branch_3, 256, [1, 1], scope='Conv2d_0b_1x1') + return tf.concat(3, [branch_0, branch_1, branch_2, branch_3]) + + +def inception_v4_base(inputs, final_endpoint='Mixed_7d', scope=None): + """Creates the Inception V4 network up to the given final endpoint. + + Args: + inputs: a 4-D tensor of size [batch_size, height, width, 3]. + final_endpoint: specifies the endpoint to construct the network up to. + It can be one of [ 'Conv2d_1a_3x3', 'Conv2d_2a_3x3', 'Conv2d_2b_3x3', + 'Mixed_3a', 'Mixed_4a', 'Mixed_5a', 'Mixed_5b', 'Mixed_5c', 'Mixed_5d', + 'Mixed_5e', 'Mixed_6a', 'Mixed_6b', 'Mixed_6c', 'Mixed_6d', 'Mixed_6e', + 'Mixed_6f', 'Mixed_6g', 'Mixed_6h', 'Mixed_7a', 'Mixed_7b', 'Mixed_7c', + 'Mixed_7d'] + scope: Optional variable_scope. + + Returns: + logits: the logits outputs of the model. + end_points: the set of end_points from the inception model. + + Raises: + ValueError: if final_endpoint is not set to one of the predefined values, + """ + end_points = {} + + def add_and_check_final(name, net): + end_points[name] = net + return name == final_endpoint + + with tf.variable_scope(scope, 'InceptionV4', [inputs]): + with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d], + stride=1, padding='SAME'): + # 299 x 299 x 3 + net = slim.conv2d(inputs, 32, [3, 3], stride=2, + padding='VALID', scope='Conv2d_1a_3x3') + if add_and_check_final('Conv2d_1a_3x3', net): return net, end_points + # 149 x 149 x 32 + net = slim.conv2d(net, 32, [3, 3], padding='VALID', + scope='Conv2d_2a_3x3') + if add_and_check_final('Conv2d_2a_3x3', net): return net, end_points + # 147 x 147 x 32 + net = slim.conv2d(net, 64, [3, 3], scope='Conv2d_2b_3x3') + if add_and_check_final('Conv2d_2b_3x3', net): return net, end_points + # 147 x 147 x 64 + with tf.variable_scope('Mixed_3a'): + with tf.variable_scope('Branch_0'): + branch_0 = slim.max_pool2d(net, [3, 3], stride=2, padding='VALID', + scope='MaxPool_0a_3x3') + with tf.variable_scope('Branch_1'): + branch_1 = slim.conv2d(net, 96, [3, 3], stride=2, padding='VALID', + scope='Conv2d_0a_3x3') + net = tf.concat(3, [branch_0, branch_1]) + if add_and_check_final('Mixed_3a', net): return net, end_points + + # 73 x 73 x 160 + with tf.variable_scope('Mixed_4a'): + with tf.variable_scope('Branch_0'): + branch_0 = slim.conv2d(net, 64, [1, 1], scope='Conv2d_0a_1x1') + branch_0 = slim.conv2d(branch_0, 96, [3, 3], padding='VALID', + scope='Conv2d_1a_3x3') + with tf.variable_scope('Branch_1'): + branch_1 = slim.conv2d(net, 64, [1, 1], scope='Conv2d_0a_1x1') + branch_1 = slim.conv2d(branch_1, 64, [1, 7], scope='Conv2d_0b_1x7') + branch_1 = slim.conv2d(branch_1, 64, [7, 1], scope='Conv2d_0c_7x1') + branch_1 = slim.conv2d(branch_1, 96, [3, 3], padding='VALID', + scope='Conv2d_1a_3x3') + net = tf.concat(3, [branch_0, branch_1]) + if add_and_check_final('Mixed_4a', net): return net, end_points + + # 71 x 71 x 192 + with tf.variable_scope('Mixed_5a'): + with tf.variable_scope('Branch_0'): + branch_0 = slim.conv2d(net, 192, [3, 3], stride=2, padding='VALID', + scope='Conv2d_1a_3x3') + with tf.variable_scope('Branch_1'): + branch_1 = slim.max_pool2d(net, [3, 3], stride=2, padding='VALID', + scope='MaxPool_1a_3x3') + net = tf.concat(3, [branch_0, branch_1]) + if add_and_check_final('Mixed_5a', net): return net, end_points + + # 35 x 35 x 384 + # 4 x Inception-A blocks + for idx in xrange(4): + block_scope = 'Mixed_5' + chr(ord('b') + idx) + net = block_inception_a(net, block_scope) + if add_and_check_final(block_scope, net): return net, end_points + + # 35 x 35 x 384 + # Reduction-A block + net = block_reduction_a(net, 'Mixed_6a') + if add_and_check_final('Mixed_6a', net): return net, end_points + + # 17 x 17 x 1024 + # 7 x Inception-B blocks + for idx in xrange(7): + block_scope = 'Mixed_6' + chr(ord('b') + idx) + net = block_inception_b(net, block_scope) + if add_and_check_final(block_scope, net): return net, end_points + + # 17 x 17 x 1024 + # Reduction-B block + net = block_reduction_b(net, 'Mixed_7a') + if add_and_check_final('Mixed_7a', net): return net, end_points + + # 8 x 8 x 1536 + # 3 x Inception-C blocks + for idx in xrange(3): + block_scope = 'Mixed_7' + chr(ord('b') + idx) + net = block_inception_c(net, block_scope) + if add_and_check_final(block_scope, net): return net, end_points + raise ValueError('Unknown final endpoint %s' % final_endpoint) + + +def inception_v4(inputs, num_classes=1001, is_training=True, + dropout_keep_prob=0.8, + reuse=None, + scope='InceptionV4', + create_aux_logits=True): + """Creates the Inception V4 model. + + Args: + inputs: a 4-D tensor of size [batch_size, height, width, 3]. + num_classes: number of predicted classes. + is_training: whether is training or not. + dropout_keep_prob: float, the fraction to keep before final layer. + reuse: whether or not the network and its variables should be reused. To be + able to reuse 'scope' must be given. + scope: Optional variable_scope. + create_aux_logits: Whether to include the auxilliary logits. + + Returns: + logits: the logits outputs of the model. + end_points: the set of end_points from the inception model. + """ + end_points = {} + with tf.variable_scope(scope, 'InceptionV4', [inputs], reuse=reuse) as scope: + with slim.arg_scope([slim.batch_norm, slim.dropout], + is_training=is_training): + net, end_points = inception_v4_base(inputs, scope=scope) + + with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d], + stride=1, padding='SAME'): + # Auxiliary Head logits + if create_aux_logits: + with tf.variable_scope('AuxLogits'): + # 17 x 17 x 1024 + aux_logits = end_points['Mixed_6h'] + aux_logits = slim.avg_pool2d(aux_logits, [5, 5], stride=3, + padding='VALID', + scope='AvgPool_1a_5x5') + aux_logits = slim.conv2d(aux_logits, 128, [1, 1], + scope='Conv2d_1b_1x1') + aux_logits = slim.conv2d(aux_logits, 768, + aux_logits.get_shape()[1:3], + padding='VALID', scope='Conv2d_2a') + aux_logits = slim.flatten(aux_logits) + aux_logits = slim.fully_connected(aux_logits, num_classes, + activation_fn=None, + scope='Aux_logits') + end_points['AuxLogits'] = aux_logits + + # Final pooling and prediction + with tf.variable_scope('Logits'): + # 8 x 8 x 1536 + net = slim.avg_pool2d(net, net.get_shape()[1:3], padding='VALID', + scope='AvgPool_1a') + # 1 x 1 x 1536 + net = slim.dropout(net, dropout_keep_prob, scope='Dropout_1b') + net = slim.flatten(net, scope='PreLogitsFlatten') + end_points['PreLogitsFlatten'] = net + # 1536 + logits = slim.fully_connected(net, num_classes, activation_fn=None, + scope='Logits') + end_points['Logits'] = logits + end_points['Predictions'] = tf.nn.softmax(logits, name='Predictions') + return logits, end_points +inception_v4.default_image_size = 299 + + +inception_v4_arg_scope = inception_utils.inception_arg_scope diff --git a/StyleMigration/nets/inception_v4_test.py b/StyleMigration/nets/inception_v4_test.py new file mode 100644 index 0000000..607a947 --- /dev/null +++ b/StyleMigration/nets/inception_v4_test.py @@ -0,0 +1,216 @@ +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for slim.inception_v4.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import tensorflow as tf + +from nets import inception + + +class InceptionTest(tf.test.TestCase): + + def testBuildLogits(self): + batch_size = 5 + height, width = 299, 299 + num_classes = 1000 + inputs = tf.random_uniform((batch_size, height, width, 3)) + logits, end_points = inception.inception_v4(inputs, num_classes) + auxlogits = end_points['AuxLogits'] + predictions = end_points['Predictions'] + self.assertTrue(auxlogits.op.name.startswith('InceptionV4/AuxLogits')) + self.assertListEqual(auxlogits.get_shape().as_list(), + [batch_size, num_classes]) + self.assertTrue(logits.op.name.startswith('InceptionV4/Logits')) + self.assertListEqual(logits.get_shape().as_list(), + [batch_size, num_classes]) + self.assertTrue(predictions.op.name.startswith( + 'InceptionV4/Logits/Predictions')) + self.assertListEqual(predictions.get_shape().as_list(), + [batch_size, num_classes]) + + def testBuildWithoutAuxLogits(self): + batch_size = 5 + height, width = 299, 299 + num_classes = 1000 + inputs = tf.random_uniform((batch_size, height, width, 3)) + logits, endpoints = inception.inception_v4(inputs, num_classes, + create_aux_logits=False) + self.assertFalse('AuxLogits' in endpoints) + self.assertTrue(logits.op.name.startswith('InceptionV4/Logits')) + self.assertListEqual(logits.get_shape().as_list(), + [batch_size, num_classes]) + + def testAllEndPointsShapes(self): + batch_size = 5 + height, width = 299, 299 + num_classes = 1000 + inputs = tf.random_uniform((batch_size, height, width, 3)) + _, end_points = inception.inception_v4(inputs, num_classes) + endpoints_shapes = {'Conv2d_1a_3x3': [batch_size, 149, 149, 32], + 'Conv2d_2a_3x3': [batch_size, 147, 147, 32], + 'Conv2d_2b_3x3': [batch_size, 147, 147, 64], + 'Mixed_3a': [batch_size, 73, 73, 160], + 'Mixed_4a': [batch_size, 71, 71, 192], + 'Mixed_5a': [batch_size, 35, 35, 384], + # 4 x Inception-A blocks + 'Mixed_5b': [batch_size, 35, 35, 384], + 'Mixed_5c': [batch_size, 35, 35, 384], + 'Mixed_5d': [batch_size, 35, 35, 384], + 'Mixed_5e': [batch_size, 35, 35, 384], + # Reduction-A block + 'Mixed_6a': [batch_size, 17, 17, 1024], + # 7 x Inception-B blocks + 'Mixed_6b': [batch_size, 17, 17, 1024], + 'Mixed_6c': [batch_size, 17, 17, 1024], + 'Mixed_6d': [batch_size, 17, 17, 1024], + 'Mixed_6e': [batch_size, 17, 17, 1024], + 'Mixed_6f': [batch_size, 17, 17, 1024], + 'Mixed_6g': [batch_size, 17, 17, 1024], + 'Mixed_6h': [batch_size, 17, 17, 1024], + # Reduction-A block + 'Mixed_7a': [batch_size, 8, 8, 1536], + # 3 x Inception-C blocks + 'Mixed_7b': [batch_size, 8, 8, 1536], + 'Mixed_7c': [batch_size, 8, 8, 1536], + 'Mixed_7d': [batch_size, 8, 8, 1536], + # Logits and predictions + 'AuxLogits': [batch_size, num_classes], + 'PreLogitsFlatten': [batch_size, 1536], + 'Logits': [batch_size, num_classes], + 'Predictions': [batch_size, num_classes]} + self.assertItemsEqual(endpoints_shapes.keys(), end_points.keys()) + for endpoint_name in endpoints_shapes: + expected_shape = endpoints_shapes[endpoint_name] + self.assertTrue(endpoint_name in end_points) + self.assertListEqual(end_points[endpoint_name].get_shape().as_list(), + expected_shape) + + def testBuildBaseNetwork(self): + batch_size = 5 + height, width = 299, 299 + inputs = tf.random_uniform((batch_size, height, width, 3)) + net, end_points = inception.inception_v4_base(inputs) + self.assertTrue(net.op.name.startswith( + 'InceptionV4/Mixed_7d')) + self.assertListEqual(net.get_shape().as_list(), [batch_size, 8, 8, 1536]) + expected_endpoints = [ + 'Conv2d_1a_3x3', 'Conv2d_2a_3x3', 'Conv2d_2b_3x3', 'Mixed_3a', + 'Mixed_4a', 'Mixed_5a', 'Mixed_5b', 'Mixed_5c', 'Mixed_5d', + 'Mixed_5e', 'Mixed_6a', 'Mixed_6b', 'Mixed_6c', 'Mixed_6d', + 'Mixed_6e', 'Mixed_6f', 'Mixed_6g', 'Mixed_6h', 'Mixed_7a', + 'Mixed_7b', 'Mixed_7c', 'Mixed_7d'] + self.assertItemsEqual(end_points.keys(), expected_endpoints) + for name, op in end_points.iteritems(): + self.assertTrue(op.name.startswith('InceptionV4/' + name)) + + def testBuildOnlyUpToFinalEndpoint(self): + batch_size = 5 + height, width = 299, 299 + all_endpoints = [ + 'Conv2d_1a_3x3', 'Conv2d_2a_3x3', 'Conv2d_2b_3x3', 'Mixed_3a', + 'Mixed_4a', 'Mixed_5a', 'Mixed_5b', 'Mixed_5c', 'Mixed_5d', + 'Mixed_5e', 'Mixed_6a', 'Mixed_6b', 'Mixed_6c', 'Mixed_6d', + 'Mixed_6e', 'Mixed_6f', 'Mixed_6g', 'Mixed_6h', 'Mixed_7a', + 'Mixed_7b', 'Mixed_7c', 'Mixed_7d'] + for index, endpoint in enumerate(all_endpoints): + with tf.Graph().as_default(): + inputs = tf.random_uniform((batch_size, height, width, 3)) + out_tensor, end_points = inception.inception_v4_base( + inputs, final_endpoint=endpoint) + self.assertTrue(out_tensor.op.name.startswith( + 'InceptionV4/' + endpoint)) + self.assertItemsEqual(all_endpoints[:index+1], end_points) + + def testVariablesSetDevice(self): + batch_size = 5 + height, width = 299, 299 + num_classes = 1000 + inputs = tf.random_uniform((batch_size, height, width, 3)) + # Force all Variables to reside on the device. + with tf.variable_scope('on_cpu'), tf.device('/cpu:0'): + inception.inception_v4(inputs, num_classes) + with tf.variable_scope('on_gpu'), tf.device('/gpu:0'): + inception.inception_v4(inputs, num_classes) + for v in tf.get_collection(tf.GraphKeys.VARIABLES, scope='on_cpu'): + self.assertDeviceEqual(v.device, '/cpu:0') + for v in tf.get_collection(tf.GraphKeys.VARIABLES, scope='on_gpu'): + self.assertDeviceEqual(v.device, '/gpu:0') + + def testHalfSizeImages(self): + batch_size = 5 + height, width = 150, 150 + num_classes = 1000 + inputs = tf.random_uniform((batch_size, height, width, 3)) + logits, end_points = inception.inception_v4(inputs, num_classes) + self.assertTrue(logits.op.name.startswith('InceptionV4/Logits')) + self.assertListEqual(logits.get_shape().as_list(), + [batch_size, num_classes]) + pre_pool = end_points['Mixed_7d'] + self.assertListEqual(pre_pool.get_shape().as_list(), + [batch_size, 3, 3, 1536]) + + def testUnknownBatchSize(self): + batch_size = 1 + height, width = 299, 299 + num_classes = 1000 + with self.test_session() as sess: + inputs = tf.placeholder(tf.float32, (None, height, width, 3)) + logits, _ = inception.inception_v4(inputs, num_classes) + self.assertTrue(logits.op.name.startswith('InceptionV4/Logits')) + self.assertListEqual(logits.get_shape().as_list(), + [None, num_classes]) + images = tf.random_uniform((batch_size, height, width, 3)) + sess.run(tf.initialize_all_variables()) + output = sess.run(logits, {inputs: images.eval()}) + self.assertEquals(output.shape, (batch_size, num_classes)) + + def testEvaluation(self): + batch_size = 2 + height, width = 299, 299 + num_classes = 1000 + with self.test_session() as sess: + eval_inputs = tf.random_uniform((batch_size, height, width, 3)) + logits, _ = inception.inception_v4(eval_inputs, + num_classes, + is_training=False) + predictions = tf.argmax(logits, 1) + sess.run(tf.initialize_all_variables()) + output = sess.run(predictions) + self.assertEquals(output.shape, (batch_size,)) + + def testTrainEvalWithReuse(self): + train_batch_size = 5 + eval_batch_size = 2 + height, width = 150, 150 + num_classes = 1000 + with self.test_session() as sess: + train_inputs = tf.random_uniform((train_batch_size, height, width, 3)) + inception.inception_v4(train_inputs, num_classes) + eval_inputs = tf.random_uniform((eval_batch_size, height, width, 3)) + logits, _ = inception.inception_v4(eval_inputs, + num_classes, + is_training=False, + reuse=True) + predictions = tf.argmax(logits, 1) + sess.run(tf.initialize_all_variables()) + output = sess.run(predictions) + self.assertEquals(output.shape, (eval_batch_size,)) + + +if __name__ == '__main__': + tf.test.main() diff --git a/StyleMigration/nets/lenet.py b/StyleMigration/nets/lenet.py new file mode 100644 index 0000000..789d2bd --- /dev/null +++ b/StyleMigration/nets/lenet.py @@ -0,0 +1,93 @@ +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Contains a variant of the LeNet model definition.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import tensorflow as tf + +slim = tf.contrib.slim + + +def lenet(images, num_classes=10, is_training=False, + dropout_keep_prob=0.5, + prediction_fn=slim.softmax, + scope='LeNet'): + """Creates a variant of the LeNet model. + + Note that since the output is a set of 'logits', the values fall in the + interval of (-infinity, infinity). Consequently, to convert the outputs to a + probability distribution over the characters, one will need to convert them + using the softmax function: + + logits = lenet.lenet(images, is_training=False) + probabilities = tf.nn.softmax(logits) + predictions = tf.argmax(logits, 1) + + Args: + images: A batch of `Tensors` of size [batch_size, height, width, channels]. + num_classes: the number of classes in the dataset. + is_training: specifies whether or not we're currently training the model. + This variable will determine the behaviour of the dropout layer. + dropout_keep_prob: the percentage of activation values that are retained. + prediction_fn: a function to get predictions out of logits. + scope: Optional variable_scope. + + Returns: + logits: the pre-softmax activations, a tensor of size + [batch_size, `num_classes`] + end_points: a dictionary from components of the network to the corresponding + activation. + """ + end_points = {} + + with tf.variable_scope(scope, 'LeNet', [images, num_classes]): + net = slim.conv2d(images, 32, [5, 5], scope='conv1') + net = slim.max_pool2d(net, [2, 2], 2, scope='pool1') + net = slim.conv2d(net, 64, [5, 5], scope='conv2') + net = slim.max_pool2d(net, [2, 2], 2, scope='pool2') + net = slim.flatten(net) + end_points['Flatten'] = net + + net = slim.fully_connected(net, 1024, scope='fc3') + net = slim.dropout(net, dropout_keep_prob, is_training=is_training, + scope='dropout3') + logits = slim.fully_connected(net, num_classes, activation_fn=None, + scope='fc4') + + end_points['Logits'] = logits + end_points['Predictions'] = prediction_fn(logits, scope='Predictions') + + return logits, end_points +lenet.default_image_size = 28 + + +def lenet_arg_scope(weight_decay=0.0): + """Defines the default lenet argument scope. + + Args: + weight_decay: The weight decay to use for regularizing the model. + + Returns: + An `arg_scope` to use for the inception v3 model. + """ + with slim.arg_scope( + [slim.conv2d, slim.fully_connected], + weights_regularizer=slim.l2_regularizer(weight_decay), + weights_initializer=tf.truncated_normal_initializer(stddev=0.1), + activation_fn=tf.nn.relu) as sc: + return sc diff --git a/StyleMigration/nets/nets_factory.py b/StyleMigration/nets/nets_factory.py new file mode 100644 index 0000000..23d2dd6 --- /dev/null +++ b/StyleMigration/nets/nets_factory.py @@ -0,0 +1,109 @@ +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Contains a factory for building various models.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +import functools + +import tensorflow as tf + +from nets import alexnet +from nets import cifarnet +from nets import inception +from nets import lenet +from nets import overfeat +from nets import resnet_v1 +from nets import resnet_v2 +from nets import vgg + +slim = tf.contrib.slim + +networks_map = {'alexnet_v2': alexnet.alexnet_v2, + 'cifarnet': cifarnet.cifarnet, + 'overfeat': overfeat.overfeat, + 'vgg_a': vgg.vgg_a, + 'vgg_16': vgg.vgg_16, + 'vgg_19': vgg.vgg_19, + 'inception_v1': inception.inception_v1, + 'inception_v2': inception.inception_v2, + 'inception_v3': inception.inception_v3, + 'inception_v4': inception.inception_v4, + 'inception_resnet_v2': inception.inception_resnet_v2, + 'lenet': lenet.lenet, + 'resnet_v1_50': resnet_v1.resnet_v1_50, + 'resnet_v1_101': resnet_v1.resnet_v1_101, + 'resnet_v1_152': resnet_v1.resnet_v1_152, + 'resnet_v1_200': resnet_v1.resnet_v1_200, + 'resnet_v2_50': resnet_v2.resnet_v2_50, + 'resnet_v2_101': resnet_v2.resnet_v2_101, + 'resnet_v2_152': resnet_v2.resnet_v2_152, + 'resnet_v2_200': resnet_v2.resnet_v2_200, + } + +arg_scopes_map = {'alexnet_v2': alexnet.alexnet_v2_arg_scope, + 'cifarnet': cifarnet.cifarnet_arg_scope, + 'overfeat': overfeat.overfeat_arg_scope, + 'vgg_a': vgg.vgg_arg_scope, + 'vgg_16': vgg.vgg_arg_scope, + 'vgg_19': vgg.vgg_arg_scope, + 'inception_v1': inception.inception_v3_arg_scope, + 'inception_v2': inception.inception_v3_arg_scope, + 'inception_v3': inception.inception_v3_arg_scope, + 'inception_v4': inception.inception_v4_arg_scope, + 'inception_resnet_v2': + inception.inception_resnet_v2_arg_scope, + 'lenet': lenet.lenet_arg_scope, + 'resnet_v1_50': resnet_v1.resnet_arg_scope, + 'resnet_v1_101': resnet_v1.resnet_arg_scope, + 'resnet_v1_152': resnet_v1.resnet_arg_scope, + 'resnet_v1_200': resnet_v1.resnet_arg_scope, + 'resnet_v2_50': resnet_v2.resnet_arg_scope, + 'resnet_v2_101': resnet_v2.resnet_arg_scope, + 'resnet_v2_152': resnet_v2.resnet_arg_scope, + 'resnet_v2_200': resnet_v2.resnet_arg_scope, + } + + +def get_network_fn(name, num_classes, weight_decay=0.0, is_training=False): + """Returns a network_fn such as `logits, end_points = network_fn(images)`. + + Args: + name: The name of the network. + num_classes: The number of classes to use for classification. + weight_decay: The l2 coefficient for the model weights. + is_training: `True` if the model is being used for training and `False` + otherwise. + + Returns: + network_fn: A function that applies the model to a batch of images. It has + the following signature: + logits, end_points = network_fn(images) + Raises: + ValueError: If network `name` is not recognized. + """ + if name not in networks_map: + raise ValueError('Name of network unknown %s' % name) + arg_scope = arg_scopes_map[name](weight_decay=weight_decay) + func = networks_map[name] + @functools.wraps(func) + def network_fn(images, **kwargs): + with slim.arg_scope(arg_scope): + return func(images, num_classes, is_training=is_training, **kwargs) + if hasattr(func, 'default_image_size'): + network_fn.default_image_size = func.default_image_size + + return network_fn diff --git a/StyleMigration/nets/nets_factory_test.py b/StyleMigration/nets/nets_factory_test.py new file mode 100644 index 0000000..6ac723b --- /dev/null +++ b/StyleMigration/nets/nets_factory_test.py @@ -0,0 +1,46 @@ +# Copyright 2016 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Tests for slim.inception.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + + +import tensorflow as tf + +from nets import nets_factory + + +class NetworksTest(tf.test.TestCase): + + def testGetNetworkFn(self): + batch_size = 5 + num_classes = 1000 + for net in nets_factory.networks_map: + with self.test_session(): + net_fn = nets_factory.get_network_fn(net, num_classes) + # Most networks use 224 as their default_image_size + image_size = getattr(net_fn, 'default_image_size', 224) + inputs = tf.random_uniform((batch_size, image_size, image_size, 3)) + logits, end_points = net_fn(inputs) + self.assertTrue(isinstance(logits, tf.Tensor)) + self.assertTrue(isinstance(end_points, dict)) + self.assertEqual(logits.get_shape().as_list()[0], batch_size) + self.assertEqual(logits.get_shape().as_list()[-1], num_classes) + +if __name__ == '__main__': + tf.test.main() diff --git a/StyleMigration/nets/overfeat.py b/StyleMigration/nets/overfeat.py new file mode 100644 index 0000000..0c8f45c --- /dev/null +++ b/StyleMigration/nets/overfeat.py @@ -0,0 +1,118 @@ +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Contains the model definition for the OverFeat network. + +The definition for the network was obtained from: + OverFeat: Integrated Recognition, Localization and Detection using + Convolutional Networks + Pierre Sermanet, David Eigen, Xiang Zhang, Michael Mathieu, Rob Fergus and + Yann LeCun, 2014 + http://arxiv.org/abs/1312.6229 + +Usage: + with slim.arg_scope(overfeat.overfeat_arg_scope()): + outputs, end_points = overfeat.overfeat(inputs) + +@@overfeat +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import tensorflow as tf + +slim = tf.contrib.slim +trunc_normal = lambda stddev: tf.truncated_normal_initializer(0.0, stddev) + + +def overfeat_arg_scope(weight_decay=0.0005): + with slim.arg_scope([slim.conv2d, slim.fully_connected], + activation_fn=tf.nn.relu, + weights_regularizer=slim.l2_regularizer(weight_decay), + biases_initializer=tf.zeros_initializer): + with slim.arg_scope([slim.conv2d], padding='SAME'): + with slim.arg_scope([slim.max_pool2d], padding='VALID') as arg_sc: + return arg_sc + + +def overfeat(inputs, + num_classes=1000, + is_training=True, + dropout_keep_prob=0.5, + spatial_squeeze=True, + scope='overfeat'): + """Contains the model definition for the OverFeat network. + + The definition for the network was obtained from: + OverFeat: Integrated Recognition, Localization and Detection using + Convolutional Networks + Pierre Sermanet, David Eigen, Xiang Zhang, Michael Mathieu, Rob Fergus and + Yann LeCun, 2014 + http://arxiv.org/abs/1312.6229 + + Note: All the fully_connected layers have been transformed to conv2d layers. + To use in classification mode, resize input to 231x231. To use in fully + convolutional mode, set spatial_squeeze to false. + + Args: + inputs: a tensor of size [batch_size, height, width, channels]. + num_classes: number of predicted classes. + is_training: whether or not the model is being trained. + dropout_keep_prob: the probability that activations are kept in the dropout + layers during training. + spatial_squeeze: whether or not should squeeze the spatial dimensions of the + outputs. Useful to remove unnecessary dimensions for classification. + scope: Optional scope for the variables. + + Returns: + the last op containing the log predictions and end_points dict. + + """ + with tf.variable_scope(scope, 'overfeat', [inputs]) as sc: + end_points_collection = sc.name + '_end_points' + # Collect outputs for conv2d, fully_connected and max_pool2d + with slim.arg_scope([slim.conv2d, slim.fully_connected, slim.max_pool2d], + outputs_collections=end_points_collection): + net = slim.conv2d(inputs, 64, [11, 11], 4, padding='VALID', + scope='conv1') + net = slim.max_pool2d(net, [2, 2], scope='pool1') + net = slim.conv2d(net, 256, [5, 5], padding='VALID', scope='conv2') + net = slim.max_pool2d(net, [2, 2], scope='pool2') + net = slim.conv2d(net, 512, [3, 3], scope='conv3') + net = slim.conv2d(net, 1024, [3, 3], scope='conv4') + net = slim.conv2d(net, 1024, [3, 3], scope='conv5') + net = slim.max_pool2d(net, [2, 2], scope='pool5') + with slim.arg_scope([slim.conv2d], + weights_initializer=trunc_normal(0.005), + biases_initializer=tf.constant_initializer(0.1)): + # Use conv2d instead of fully_connected layers. + net = slim.conv2d(net, 3072, [6, 6], padding='VALID', scope='fc6') + net = slim.dropout(net, dropout_keep_prob, is_training=is_training, + scope='dropout6') + net = slim.conv2d(net, 4096, [1, 1], scope='fc7') + net = slim.dropout(net, dropout_keep_prob, is_training=is_training, + scope='dropout7') + net = slim.conv2d(net, num_classes, [1, 1], + activation_fn=None, + normalizer_fn=None, + biases_initializer=tf.zeros_initializer, + scope='fc8') + # Convert end_points_collection into a end_point dict. + end_points = slim.utils.convert_collection_to_dict(end_points_collection) + if spatial_squeeze: + net = tf.squeeze(net, [1, 2], name='fc8/squeezed') + end_points[sc.name + '/fc8'] = net + return net, end_points +overfeat.default_image_size = 231 diff --git a/StyleMigration/nets/overfeat_test.py b/StyleMigration/nets/overfeat_test.py new file mode 100644 index 0000000..71dbc9a --- /dev/null +++ b/StyleMigration/nets/overfeat_test.py @@ -0,0 +1,145 @@ +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for slim.nets.overfeat.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import tensorflow as tf + +from nets import overfeat + +slim = tf.contrib.slim + + +class OverFeatTest(tf.test.TestCase): + + def testBuild(self): + batch_size = 5 + height, width = 231, 231 + num_classes = 1000 + with self.test_session(): + inputs = tf.random_uniform((batch_size, height, width, 3)) + logits, _ = overfeat.overfeat(inputs, num_classes) + self.assertEquals(logits.op.name, 'overfeat/fc8/squeezed') + self.assertListEqual(logits.get_shape().as_list(), + [batch_size, num_classes]) + + def testFullyConvolutional(self): + batch_size = 1 + height, width = 281, 281 + num_classes = 1000 + with self.test_session(): + inputs = tf.random_uniform((batch_size, height, width, 3)) + logits, _ = overfeat.overfeat(inputs, num_classes, spatial_squeeze=False) + self.assertEquals(logits.op.name, 'overfeat/fc8/BiasAdd') + self.assertListEqual(logits.get_shape().as_list(), + [batch_size, 2, 2, num_classes]) + + def testEndPoints(self): + batch_size = 5 + height, width = 231, 231 + num_classes = 1000 + with self.test_session(): + inputs = tf.random_uniform((batch_size, height, width, 3)) + _, end_points = overfeat.overfeat(inputs, num_classes) + expected_names = ['overfeat/conv1', + 'overfeat/pool1', + 'overfeat/conv2', + 'overfeat/pool2', + 'overfeat/conv3', + 'overfeat/conv4', + 'overfeat/conv5', + 'overfeat/pool5', + 'overfeat/fc6', + 'overfeat/fc7', + 'overfeat/fc8' + ] + self.assertSetEqual(set(end_points.keys()), set(expected_names)) + + def testModelVariables(self): + batch_size = 5 + height, width = 231, 231 + num_classes = 1000 + with self.test_session(): + inputs = tf.random_uniform((batch_size, height, width, 3)) + overfeat.overfeat(inputs, num_classes) + expected_names = ['overfeat/conv1/weights', + 'overfeat/conv1/biases', + 'overfeat/conv2/weights', + 'overfeat/conv2/biases', + 'overfeat/conv3/weights', + 'overfeat/conv3/biases', + 'overfeat/conv4/weights', + 'overfeat/conv4/biases', + 'overfeat/conv5/weights', + 'overfeat/conv5/biases', + 'overfeat/fc6/weights', + 'overfeat/fc6/biases', + 'overfeat/fc7/weights', + 'overfeat/fc7/biases', + 'overfeat/fc8/weights', + 'overfeat/fc8/biases', + ] + model_variables = [v.op.name for v in slim.get_model_variables()] + self.assertSetEqual(set(model_variables), set(expected_names)) + + def testEvaluation(self): + batch_size = 2 + height, width = 231, 231 + num_classes = 1000 + with self.test_session(): + eval_inputs = tf.random_uniform((batch_size, height, width, 3)) + logits, _ = overfeat.overfeat(eval_inputs, is_training=False) + self.assertListEqual(logits.get_shape().as_list(), + [batch_size, num_classes]) + predictions = tf.argmax(logits, 1) + self.assertListEqual(predictions.get_shape().as_list(), [batch_size]) + + def testTrainEvalWithReuse(self): + train_batch_size = 2 + eval_batch_size = 1 + train_height, train_width = 231, 231 + eval_height, eval_width = 281, 281 + num_classes = 1000 + with self.test_session(): + train_inputs = tf.random_uniform( + (train_batch_size, train_height, train_width, 3)) + logits, _ = overfeat.overfeat(train_inputs) + self.assertListEqual(logits.get_shape().as_list(), + [train_batch_size, num_classes]) + tf.get_variable_scope().reuse_variables() + eval_inputs = tf.random_uniform( + (eval_batch_size, eval_height, eval_width, 3)) + logits, _ = overfeat.overfeat(eval_inputs, is_training=False, + spatial_squeeze=False) + self.assertListEqual(logits.get_shape().as_list(), + [eval_batch_size, 2, 2, num_classes]) + logits = tf.reduce_mean(logits, [1, 2]) + predictions = tf.argmax(logits, 1) + self.assertEquals(predictions.get_shape().as_list(), [eval_batch_size]) + + def testForward(self): + batch_size = 1 + height, width = 231, 231 + with self.test_session() as sess: + inputs = tf.random_uniform((batch_size, height, width, 3)) + logits, _ = overfeat.overfeat(inputs) + sess.run(tf.initialize_all_variables()) + output = sess.run(logits) + self.assertTrue(output.any()) + +if __name__ == '__main__': + tf.test.main() diff --git a/StyleMigration/nets/resnet_utils.py b/StyleMigration/nets/resnet_utils.py new file mode 100644 index 0000000..1e1dd82 --- /dev/null +++ b/StyleMigration/nets/resnet_utils.py @@ -0,0 +1,254 @@ +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Contains building blocks for various versions of Residual Networks. + +Residual networks (ResNets) were proposed in: + Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun + Deep Residual Learning for Image Recognition. arXiv:1512.03385, 2015 + +More variants were introduced in: + Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun + Identity Mappings in Deep Residual Networks. arXiv: 1603.05027, 2016 + +We can obtain different ResNet variants by changing the network depth, width, +and form of residual unit. This module implements the infrastructure for +building them. Concrete ResNet units and full ResNet networks are implemented in +the accompanying resnet_v1.py and resnet_v2.py modules. + +Compared to https://github.com/KaimingHe/deep-residual-networks, in the current +implementation we subsample the output activations in the last residual unit of +each block, instead of subsampling the input activations in the first residual +unit of each block. The two implementations give identical results but our +implementation is more memory efficient. +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import collections +import tensorflow as tf + +slim = tf.contrib.slim + + +class Block(collections.namedtuple('Block', ['scope', 'unit_fn', 'args'])): + """A named tuple describing a ResNet block. + + Its parts are: + scope: The scope of the `Block`. + unit_fn: The ResNet unit function which takes as input a `Tensor` and + returns another `Tensor` with the output of the ResNet unit. + args: A list of length equal to the number of units in the `Block`. The list + contains one (depth, depth_bottleneck, stride) tuple for each unit in the + block to serve as argument to unit_fn. + """ + + +def subsample(inputs, factor, scope=None): + """Subsamples the input along the spatial dimensions. + + Args: + inputs: A `Tensor` of size [batch, height_in, width_in, channels]. + factor: The subsampling factor. + scope: Optional variable_scope. + + Returns: + output: A `Tensor` of size [batch, height_out, width_out, channels] with the + input, either intact (if factor == 1) or subsampled (if factor > 1). + """ + if factor == 1: + return inputs + else: + return slim.max_pool2d(inputs, [1, 1], stride=factor, scope=scope) + + +def conv2d_same(inputs, num_outputs, kernel_size, stride, rate=1, scope=None): + """Strided 2-D convolution with 'SAME' padding. + + When stride > 1, then we do explicit zero-padding, followed by conv2d with + 'VALID' padding. + + Note that + + net = conv2d_same(inputs, num_outputs, 3, stride=stride) + + is equivalent to + + net = slim.conv2d(inputs, num_outputs, 3, stride=1, padding='SAME') + net = subsample(net, factor=stride) + + whereas + + net = slim.conv2d(inputs, num_outputs, 3, stride=stride, padding='SAME') + + is different when the input's height or width is even, which is why we add the + current function. For more details, see ResnetUtilsTest.testConv2DSameEven(). + + Args: + inputs: A 4-D tensor of size [batch, height_in, width_in, channels]. + num_outputs: An integer, the number of output filters. + kernel_size: An int with the kernel_size of the filters. + stride: An integer, the output stride. + rate: An integer, rate for atrous convolution. + scope: Scope. + + Returns: + output: A 4-D tensor of size [batch, height_out, width_out, channels] with + the convolution output. + """ + if stride == 1: + return slim.conv2d(inputs, num_outputs, kernel_size, stride=1, rate=rate, + padding='SAME', scope=scope) + else: + kernel_size_effective = kernel_size + (kernel_size - 1) * (rate - 1) + pad_total = kernel_size_effective - 1 + pad_beg = pad_total // 2 + pad_end = pad_total - pad_beg + inputs = tf.pad(inputs, + [[0, 0], [pad_beg, pad_end], [pad_beg, pad_end], [0, 0]]) + return slim.conv2d(inputs, num_outputs, kernel_size, stride=stride, + rate=rate, padding='VALID', scope=scope) + + +@slim.add_arg_scope +def stack_blocks_dense(net, blocks, output_stride=None, + outputs_collections=None): + """Stacks ResNet `Blocks` and controls output feature density. + + First, this function creates scopes for the ResNet in the form of + 'block_name/unit_1', 'block_name/unit_2', etc. + + Second, this function allows the user to explicitly control the ResNet + output_stride, which is the ratio of the input to output spatial resolution. + This is useful for dense prediction tasks such as semantic segmentation or + object detection. + + Most ResNets consist of 4 ResNet blocks and subsample the activations by a + factor of 2 when transitioning between consecutive ResNet blocks. This results + to a nominal ResNet output_stride equal to 8. If we set the output_stride to + half the nominal network stride (e.g., output_stride=4), then we compute + responses twice. + + Control of the output feature density is implemented by atrous convolution. + + Args: + net: A `Tensor` of size [batch, height, width, channels]. + blocks: A list of length equal to the number of ResNet `Blocks`. Each + element is a ResNet `Block` object describing the units in the `Block`. + output_stride: If `None`, then the output will be computed at the nominal + network stride. If output_stride is not `None`, it specifies the requested + ratio of input to output spatial resolution, which needs to be equal to + the product of unit strides from the start up to some level of the ResNet. + For example, if the ResNet employs units with strides 1, 2, 1, 3, 4, 1, + then valid values for the output_stride are 1, 2, 6, 24 or None (which + is equivalent to output_stride=24). + outputs_collections: Collection to add the ResNet block outputs. + + Returns: + net: Output tensor with stride equal to the specified output_stride. + + Raises: + ValueError: If the target output_stride is not valid. + """ + # The current_stride variable keeps track of the effective stride of the + # activations. This allows us to invoke atrous convolution whenever applying + # the next residual unit would result in the activations having stride larger + # than the target output_stride. + current_stride = 1 + + # The atrous convolution rate parameter. + rate = 1 + + for block in blocks: + with tf.variable_scope(block.scope, 'block', [net]) as sc: + for i, unit in enumerate(block.args): + if output_stride is not None and current_stride > output_stride: + raise ValueError('The target output_stride cannot be reached.') + + with tf.variable_scope('unit_%d' % (i + 1), values=[net]): + unit_depth, unit_depth_bottleneck, unit_stride = unit + + # If we have reached the target output_stride, then we need to employ + # atrous convolution with stride=1 and multiply the atrous rate by the + # current unit's stride for use in subsequent layers. + if output_stride is not None and current_stride == output_stride: + net = block.unit_fn(net, + depth=unit_depth, + depth_bottleneck=unit_depth_bottleneck, + stride=1, + rate=rate) + rate *= unit_stride + + else: + net = block.unit_fn(net, + depth=unit_depth, + depth_bottleneck=unit_depth_bottleneck, + stride=unit_stride, + rate=1) + current_stride *= unit_stride + net = slim.utils.collect_named_outputs(outputs_collections, sc.name, net) + + if output_stride is not None and current_stride != output_stride: + raise ValueError('The target output_stride cannot be reached.') + + return net + + +def resnet_arg_scope(weight_decay=0.0001, + batch_norm_decay=0.997, + batch_norm_epsilon=1e-5, + batch_norm_scale=True): + """Defines the default ResNet arg scope. + + TODO(gpapan): The batch-normalization related default values above are + appropriate for use in conjunction with the reference ResNet models + released at https://github.com/KaimingHe/deep-residual-networks. When + training ResNets from scratch, they might need to be tuned. + + Args: + weight_decay: The weight decay to use for regularizing the model. + batch_norm_decay: The moving average decay when estimating layer activation + statistics in batch normalization. + batch_norm_epsilon: Small constant to prevent division by zero when + normalizing activations by their variance in batch normalization. + batch_norm_scale: If True, uses an explicit `gamma` multiplier to scale the + activations in the batch normalization layer. + + Returns: + An `arg_scope` to use for the resnet models. + """ + batch_norm_params = { + 'decay': batch_norm_decay, + 'epsilon': batch_norm_epsilon, + 'scale': batch_norm_scale, + 'updates_collections': tf.GraphKeys.UPDATE_OPS, + } + + with slim.arg_scope( + [slim.conv2d], + weights_regularizer=slim.l2_regularizer(weight_decay), + weights_initializer=slim.variance_scaling_initializer(), + activation_fn=tf.nn.relu, + normalizer_fn=slim.batch_norm, + normalizer_params=batch_norm_params): + with slim.arg_scope([slim.batch_norm], **batch_norm_params): + # The following implies padding='SAME' for pool1, which makes feature + # alignment easier for dense prediction tasks. This is also used in + # https://github.com/facebook/fb.resnet.torch. However the accompanying + # code of 'Deep Residual Learning for Image Recognition' uses + # padding='VALID' for pool1. You can switch to that choice by setting + # slim.arg_scope([slim.max_pool2d], padding='VALID'). + with slim.arg_scope([slim.max_pool2d], padding='SAME') as arg_sc: + return arg_sc diff --git a/StyleMigration/nets/resnet_v1.py b/StyleMigration/nets/resnet_v1.py new file mode 100644 index 0000000..03d49ed --- /dev/null +++ b/StyleMigration/nets/resnet_v1.py @@ -0,0 +1,295 @@ +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Contains definitions for the original form of Residual Networks. + +The 'v1' residual networks (ResNets) implemented in this module were proposed +by: +[1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun + Deep Residual Learning for Image Recognition. arXiv:1512.03385 + +Other variants were introduced in: +[2] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun + Identity Mappings in Deep Residual Networks. arXiv: 1603.05027 + +The networks defined in this module utilize the bottleneck building block of +[1] with projection shortcuts only for increasing depths. They employ batch +normalization *after* every weight layer. This is the architecture used by +MSRA in the Imagenet and MSCOCO 2016 competition models ResNet-101 and +ResNet-152. See [2; Fig. 1a] for a comparison between the current 'v1' +architecture and the alternative 'v2' architecture of [2] which uses batch +normalization *before* every weight layer in the so-called full pre-activation +units. + +Typical use: + + from tensorflow.contrib.slim.nets import resnet_v1 + +ResNet-101 for image classification into 1000 classes: + + # inputs has shape [batch, 224, 224, 3] + with slim.arg_scope(resnet_v1.resnet_arg_scope()): + net, end_points = resnet_v1.resnet_v1_101(inputs, 1000, is_training=False) + +ResNet-101 for semantic segmentation into 21 classes: + + # inputs has shape [batch, 513, 513, 3] + with slim.arg_scope(resnet_v1.resnet_arg_scope()): + net, end_points = resnet_v1.resnet_v1_101(inputs, + 21, + is_training=False, + global_pool=False, + output_stride=16) +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import tensorflow as tf + +from nets import resnet_utils + + +resnet_arg_scope = resnet_utils.resnet_arg_scope +slim = tf.contrib.slim + + +@slim.add_arg_scope +def bottleneck(inputs, depth, depth_bottleneck, stride, rate=1, + outputs_collections=None, scope=None): + """Bottleneck residual unit variant with BN after convolutions. + + This is the original residual unit proposed in [1]. See Fig. 1(a) of [2] for + its definition. Note that we use here the bottleneck variant which has an + extra bottleneck layer. + + When putting together two consecutive ResNet blocks that use this unit, one + should use stride = 2 in the last unit of the first block. + + Args: + inputs: A tensor of size [batch, height, width, channels]. + depth: The depth of the ResNet unit output. + depth_bottleneck: The depth of the bottleneck layers. + stride: The ResNet unit's stride. Determines the amount of downsampling of + the units output compared to its input. + rate: An integer, rate for atrous convolution. + outputs_collections: Collection to add the ResNet unit output. + scope: Optional variable_scope. + + Returns: + The ResNet unit's output. + """ + with tf.variable_scope(scope, 'bottleneck_v1', [inputs]) as sc: + depth_in = slim.utils.last_dimension(inputs.get_shape(), min_rank=4) + if depth == depth_in: + shortcut = resnet_utils.subsample(inputs, stride, 'shortcut') + else: + shortcut = slim.conv2d(inputs, depth, [1, 1], stride=stride, + activation_fn=None, scope='shortcut') + + residual = slim.conv2d(inputs, depth_bottleneck, [1, 1], stride=1, + scope='conv1') + residual = resnet_utils.conv2d_same(residual, depth_bottleneck, 3, stride, + rate=rate, scope='conv2') + residual = slim.conv2d(residual, depth, [1, 1], stride=1, + activation_fn=None, scope='conv3') + + output = tf.nn.relu(shortcut + residual) + + return slim.utils.collect_named_outputs(outputs_collections, + sc.original_name_scope, + output) + + +def resnet_v1(inputs, + blocks, + num_classes=None, + is_training=True, + global_pool=True, + output_stride=None, + include_root_block=True, + reuse=None, + scope=None): + """Generator for v1 ResNet models. + + This function generates a family of ResNet v1 models. See the resnet_v1_*() + methods for specific model instantiations, obtained by selecting different + block instantiations that produce ResNets of various depths. + + Training for image classification on Imagenet is usually done with [224, 224] + inputs, resulting in [7, 7] feature maps at the output of the last ResNet + block for the ResNets defined in [1] that have nominal stride equal to 32. + However, for dense prediction tasks we advise that one uses inputs with + spatial dimensions that are multiples of 32 plus 1, e.g., [321, 321]. In + this case the feature maps at the ResNet output will have spatial shape + [(height - 1) / output_stride + 1, (width - 1) / output_stride + 1] + and corners exactly aligned with the input image corners, which greatly + facilitates alignment of the features to the image. Using as input [225, 225] + images results in [8, 8] feature maps at the output of the last ResNet block. + + For dense prediction tasks, the ResNet needs to run in fully-convolutional + (FCN) mode and global_pool needs to be set to False. The ResNets in [1, 2] all + have nominal stride equal to 32 and a good choice in FCN mode is to use + output_stride=16 in order to increase the density of the computed features at + small computational and memory overhead, cf. http://arxiv.org/abs/1606.00915. + + Args: + inputs: A tensor of size [batch, height_in, width_in, channels]. + blocks: A list of length equal to the number of ResNet blocks. Each element + is a resnet_utils.Block object describing the units in the block. + num_classes: Number of predicted classes for classification tasks. If None + we return the features before the logit layer. + is_training: whether is training or not. + global_pool: If True, we perform global average pooling before computing the + logits. Set to True for image classification, False for dense prediction. + output_stride: If None, then the output will be computed at the nominal + network stride. If output_stride is not None, it specifies the requested + ratio of input to output spatial resolution. + include_root_block: If True, include the initial convolution followed by + max-pooling, if False excludes it. + reuse: whether or not the network and its variables should be reused. To be + able to reuse 'scope' must be given. + scope: Optional variable_scope. + + Returns: + net: A rank-4 tensor of size [batch, height_out, width_out, channels_out]. + If global_pool is False, then height_out and width_out are reduced by a + factor of output_stride compared to the respective height_in and width_in, + else both height_out and width_out equal one. If num_classes is None, then + net is the output of the last ResNet block, potentially after global + average pooling. If num_classes is not None, net contains the pre-softmax + activations. + end_points: A dictionary from components of the network to the corresponding + activation. + + Raises: + ValueError: If the target output_stride is not valid. + """ + with tf.variable_scope(scope, 'resnet_v1', [inputs], reuse=reuse) as sc: + end_points_collection = sc.name + '_end_points' + with slim.arg_scope([slim.conv2d, bottleneck, + resnet_utils.stack_blocks_dense], + outputs_collections=end_points_collection): + with slim.arg_scope([slim.batch_norm], is_training=is_training): + net = inputs + if include_root_block: + if output_stride is not None: + if output_stride % 4 != 0: + raise ValueError('The output_stride needs to be a multiple of 4.') + output_stride /= 4 + net = resnet_utils.conv2d_same(net, 64, 7, stride=2, scope='conv1') + net = slim.max_pool2d(net, [3, 3], stride=2, scope='pool1') + net = resnet_utils.stack_blocks_dense(net, blocks, output_stride) + if global_pool: + # Global average pooling. + net = tf.reduce_mean(net, [1, 2], name='pool5', keep_dims=True) + if num_classes is not None: + net = slim.conv2d(net, num_classes, [1, 1], activation_fn=None, + normalizer_fn=None, scope='logits') + # Convert end_points_collection into a dictionary of end_points. + end_points = slim.utils.convert_collection_to_dict(end_points_collection) + if num_classes is not None: + end_points['predictions'] = slim.softmax(net, scope='predictions') + return net, end_points +resnet_v1.default_image_size = 224 + + +def resnet_v1_50(inputs, + num_classes=None, + is_training=True, + global_pool=True, + output_stride=None, + reuse=None, + scope='resnet_v1_50'): + """ResNet-50 model of [1]. See resnet_v1() for arg and return description.""" + blocks = [ + resnet_utils.Block( + 'block1', bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]), + resnet_utils.Block( + 'block2', bottleneck, [(512, 128, 1)] * 3 + [(512, 128, 2)]), + resnet_utils.Block( + 'block3', bottleneck, [(1024, 256, 1)] * 5 + [(1024, 256, 2)]), + resnet_utils.Block( + 'block4', bottleneck, [(2048, 512, 1)] * 3) + ] + return resnet_v1(inputs, blocks, num_classes, is_training, + global_pool=global_pool, output_stride=output_stride, + include_root_block=True, reuse=reuse, scope=scope) + + +def resnet_v1_101(inputs, + num_classes=None, + is_training=True, + global_pool=True, + output_stride=None, + reuse=None, + scope='resnet_v1_101'): + """ResNet-101 model of [1]. See resnet_v1() for arg and return description.""" + blocks = [ + resnet_utils.Block( + 'block1', bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]), + resnet_utils.Block( + 'block2', bottleneck, [(512, 128, 1)] * 3 + [(512, 128, 2)]), + resnet_utils.Block( + 'block3', bottleneck, [(1024, 256, 1)] * 22 + [(1024, 256, 2)]), + resnet_utils.Block( + 'block4', bottleneck, [(2048, 512, 1)] * 3) + ] + return resnet_v1(inputs, blocks, num_classes, is_training, + global_pool=global_pool, output_stride=output_stride, + include_root_block=True, reuse=reuse, scope=scope) + + +def resnet_v1_152(inputs, + num_classes=None, + is_training=True, + global_pool=True, + output_stride=None, + reuse=None, + scope='resnet_v1_152'): + """ResNet-152 model of [1]. See resnet_v1() for arg and return description.""" + blocks = [ + resnet_utils.Block( + 'block1', bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]), + resnet_utils.Block( + 'block2', bottleneck, [(512, 128, 1)] * 7 + [(512, 128, 2)]), + resnet_utils.Block( + 'block3', bottleneck, [(1024, 256, 1)] * 35 + [(1024, 256, 2)]), + resnet_utils.Block( + 'block4', bottleneck, [(2048, 512, 1)] * 3)] + return resnet_v1(inputs, blocks, num_classes, is_training, + global_pool=global_pool, output_stride=output_stride, + include_root_block=True, reuse=reuse, scope=scope) + + +def resnet_v1_200(inputs, + num_classes=None, + is_training=True, + global_pool=True, + output_stride=None, + reuse=None, + scope='resnet_v1_200'): + """ResNet-200 model of [2]. See resnet_v1() for arg and return description.""" + blocks = [ + resnet_utils.Block( + 'block1', bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]), + resnet_utils.Block( + 'block2', bottleneck, [(512, 128, 1)] * 23 + [(512, 128, 2)]), + resnet_utils.Block( + 'block3', bottleneck, [(1024, 256, 1)] * 35 + [(1024, 256, 2)]), + resnet_utils.Block( + 'block4', bottleneck, [(2048, 512, 1)] * 3)] + return resnet_v1(inputs, blocks, num_classes, is_training, + global_pool=global_pool, output_stride=output_stride, + include_root_block=True, reuse=reuse, scope=scope) diff --git a/StyleMigration/nets/resnet_v1_test.py b/StyleMigration/nets/resnet_v1_test.py new file mode 100644 index 0000000..6198212 --- /dev/null +++ b/StyleMigration/nets/resnet_v1_test.py @@ -0,0 +1,450 @@ +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for slim.nets.resnet_v1.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np +import tensorflow as tf + +from nets import resnet_utils +from nets import resnet_v1 + +slim = tf.contrib.slim + + +def create_test_input(batch_size, height, width, channels): + """Create test input tensor. + + Args: + batch_size: The number of images per batch or `None` if unknown. + height: The height of each image or `None` if unknown. + width: The width of each image or `None` if unknown. + channels: The number of channels per image or `None` if unknown. + + Returns: + Either a placeholder `Tensor` of dimension + [batch_size, height, width, channels] if any of the inputs are `None` or a + constant `Tensor` with the mesh grid values along the spatial dimensions. + """ + if None in [batch_size, height, width, channels]: + return tf.placeholder(tf.float32, (batch_size, height, width, channels)) + else: + return tf.to_float( + np.tile( + np.reshape( + np.reshape(np.arange(height), [height, 1]) + + np.reshape(np.arange(width), [1, width]), + [1, height, width, 1]), + [batch_size, 1, 1, channels])) + + +class ResnetUtilsTest(tf.test.TestCase): + + def testSubsampleThreeByThree(self): + x = tf.reshape(tf.to_float(tf.range(9)), [1, 3, 3, 1]) + x = resnet_utils.subsample(x, 2) + expected = tf.reshape(tf.constant([0, 2, 6, 8]), [1, 2, 2, 1]) + with self.test_session(): + self.assertAllClose(x.eval(), expected.eval()) + + def testSubsampleFourByFour(self): + x = tf.reshape(tf.to_float(tf.range(16)), [1, 4, 4, 1]) + x = resnet_utils.subsample(x, 2) + expected = tf.reshape(tf.constant([0, 2, 8, 10]), [1, 2, 2, 1]) + with self.test_session(): + self.assertAllClose(x.eval(), expected.eval()) + + def testConv2DSameEven(self): + n, n2 = 4, 2 + + # Input image. + x = create_test_input(1, n, n, 1) + + # Convolution kernel. + w = create_test_input(1, 3, 3, 1) + w = tf.reshape(w, [3, 3, 1, 1]) + + tf.get_variable('Conv/weights', initializer=w) + tf.get_variable('Conv/biases', initializer=tf.zeros([1])) + tf.get_variable_scope().reuse_variables() + + y1 = slim.conv2d(x, 1, [3, 3], stride=1, scope='Conv') + y1_expected = tf.to_float([[14, 28, 43, 26], + [28, 48, 66, 37], + [43, 66, 84, 46], + [26, 37, 46, 22]]) + y1_expected = tf.reshape(y1_expected, [1, n, n, 1]) + + y2 = resnet_utils.subsample(y1, 2) + y2_expected = tf.to_float([[14, 43], + [43, 84]]) + y2_expected = tf.reshape(y2_expected, [1, n2, n2, 1]) + + y3 = resnet_utils.conv2d_same(x, 1, 3, stride=2, scope='Conv') + y3_expected = y2_expected + + y4 = slim.conv2d(x, 1, [3, 3], stride=2, scope='Conv') + y4_expected = tf.to_float([[48, 37], + [37, 22]]) + y4_expected = tf.reshape(y4_expected, [1, n2, n2, 1]) + + with self.test_session() as sess: + sess.run(tf.initialize_all_variables()) + self.assertAllClose(y1.eval(), y1_expected.eval()) + self.assertAllClose(y2.eval(), y2_expected.eval()) + self.assertAllClose(y3.eval(), y3_expected.eval()) + self.assertAllClose(y4.eval(), y4_expected.eval()) + + def testConv2DSameOdd(self): + n, n2 = 5, 3 + + # Input image. + x = create_test_input(1, n, n, 1) + + # Convolution kernel. + w = create_test_input(1, 3, 3, 1) + w = tf.reshape(w, [3, 3, 1, 1]) + + tf.get_variable('Conv/weights', initializer=w) + tf.get_variable('Conv/biases', initializer=tf.zeros([1])) + tf.get_variable_scope().reuse_variables() + + y1 = slim.conv2d(x, 1, [3, 3], stride=1, scope='Conv') + y1_expected = tf.to_float([[14, 28, 43, 58, 34], + [28, 48, 66, 84, 46], + [43, 66, 84, 102, 55], + [58, 84, 102, 120, 64], + [34, 46, 55, 64, 30]]) + y1_expected = tf.reshape(y1_expected, [1, n, n, 1]) + + y2 = resnet_utils.subsample(y1, 2) + y2_expected = tf.to_float([[14, 43, 34], + [43, 84, 55], + [34, 55, 30]]) + y2_expected = tf.reshape(y2_expected, [1, n2, n2, 1]) + + y3 = resnet_utils.conv2d_same(x, 1, 3, stride=2, scope='Conv') + y3_expected = y2_expected + + y4 = slim.conv2d(x, 1, [3, 3], stride=2, scope='Conv') + y4_expected = y2_expected + + with self.test_session() as sess: + sess.run(tf.initialize_all_variables()) + self.assertAllClose(y1.eval(), y1_expected.eval()) + self.assertAllClose(y2.eval(), y2_expected.eval()) + self.assertAllClose(y3.eval(), y3_expected.eval()) + self.assertAllClose(y4.eval(), y4_expected.eval()) + + def _resnet_plain(self, inputs, blocks, output_stride=None, scope=None): + """A plain ResNet without extra layers before or after the ResNet blocks.""" + with tf.variable_scope(scope, values=[inputs]): + with slim.arg_scope([slim.conv2d], outputs_collections='end_points'): + net = resnet_utils.stack_blocks_dense(inputs, blocks, output_stride) + end_points = dict(tf.get_collection('end_points')) + return net, end_points + + def testEndPointsV1(self): + """Test the end points of a tiny v1 bottleneck network.""" + bottleneck = resnet_v1.bottleneck + blocks = [resnet_utils.Block('block1', bottleneck, [(4, 1, 1), (4, 1, 2)]), + resnet_utils.Block('block2', bottleneck, [(8, 2, 1), (8, 2, 1)])] + inputs = create_test_input(2, 32, 16, 3) + with slim.arg_scope(resnet_utils.resnet_arg_scope()): + _, end_points = self._resnet_plain(inputs, blocks, scope='tiny') + expected = [ + 'tiny/block1/unit_1/bottleneck_v1/shortcut', + 'tiny/block1/unit_1/bottleneck_v1/conv1', + 'tiny/block1/unit_1/bottleneck_v1/conv2', + 'tiny/block1/unit_1/bottleneck_v1/conv3', + 'tiny/block1/unit_2/bottleneck_v1/conv1', + 'tiny/block1/unit_2/bottleneck_v1/conv2', + 'tiny/block1/unit_2/bottleneck_v1/conv3', + 'tiny/block2/unit_1/bottleneck_v1/shortcut', + 'tiny/block2/unit_1/bottleneck_v1/conv1', + 'tiny/block2/unit_1/bottleneck_v1/conv2', + 'tiny/block2/unit_1/bottleneck_v1/conv3', + 'tiny/block2/unit_2/bottleneck_v1/conv1', + 'tiny/block2/unit_2/bottleneck_v1/conv2', + 'tiny/block2/unit_2/bottleneck_v1/conv3'] + self.assertItemsEqual(expected, end_points) + + def _stack_blocks_nondense(self, net, blocks): + """A simplified ResNet Block stacker without output stride control.""" + for block in blocks: + with tf.variable_scope(block.scope, 'block', [net]): + for i, unit in enumerate(block.args): + depth, depth_bottleneck, stride = unit + with tf.variable_scope('unit_%d' % (i + 1), values=[net]): + net = block.unit_fn(net, + depth=depth, + depth_bottleneck=depth_bottleneck, + stride=stride, + rate=1) + return net + + def _atrousValues(self, bottleneck): + """Verify the values of dense feature extraction by atrous convolution. + + Make sure that dense feature extraction by stack_blocks_dense() followed by + subsampling gives identical results to feature extraction at the nominal + network output stride using the simple self._stack_blocks_nondense() above. + + Args: + bottleneck: The bottleneck function. + """ + blocks = [ + resnet_utils.Block('block1', bottleneck, [(4, 1, 1), (4, 1, 2)]), + resnet_utils.Block('block2', bottleneck, [(8, 2, 1), (8, 2, 2)]), + resnet_utils.Block('block3', bottleneck, [(16, 4, 1), (16, 4, 2)]), + resnet_utils.Block('block4', bottleneck, [(32, 8, 1), (32, 8, 1)]) + ] + nominal_stride = 8 + + # Test both odd and even input dimensions. + height = 30 + width = 31 + with slim.arg_scope(resnet_utils.resnet_arg_scope()): + with slim.arg_scope([slim.batch_norm], is_training=False): + for output_stride in [1, 2, 4, 8, None]: + with tf.Graph().as_default(): + with self.test_session() as sess: + tf.set_random_seed(0) + inputs = create_test_input(1, height, width, 3) + # Dense feature extraction followed by subsampling. + output = resnet_utils.stack_blocks_dense(inputs, + blocks, + output_stride) + if output_stride is None: + factor = 1 + else: + factor = nominal_stride // output_stride + + output = resnet_utils.subsample(output, factor) + # Make the two networks use the same weights. + tf.get_variable_scope().reuse_variables() + # Feature extraction at the nominal network rate. + expected = self._stack_blocks_nondense(inputs, blocks) + sess.run(tf.initialize_all_variables()) + output, expected = sess.run([output, expected]) + self.assertAllClose(output, expected, atol=1e-4, rtol=1e-4) + + def testAtrousValuesBottleneck(self): + self._atrousValues(resnet_v1.bottleneck) + + +class ResnetCompleteNetworkTest(tf.test.TestCase): + """Tests with complete small ResNet v1 networks.""" + + def _resnet_small(self, + inputs, + num_classes=None, + is_training=True, + global_pool=True, + output_stride=None, + include_root_block=True, + reuse=None, + scope='resnet_v1_small'): + """A shallow and thin ResNet v1 for faster tests.""" + bottleneck = resnet_v1.bottleneck + blocks = [ + resnet_utils.Block( + 'block1', bottleneck, [(4, 1, 1)] * 2 + [(4, 1, 2)]), + resnet_utils.Block( + 'block2', bottleneck, [(8, 2, 1)] * 2 + [(8, 2, 2)]), + resnet_utils.Block( + 'block3', bottleneck, [(16, 4, 1)] * 2 + [(16, 4, 2)]), + resnet_utils.Block( + 'block4', bottleneck, [(32, 8, 1)] * 2)] + return resnet_v1.resnet_v1(inputs, blocks, num_classes, + is_training=is_training, + global_pool=global_pool, + output_stride=output_stride, + include_root_block=include_root_block, + reuse=reuse, + scope=scope) + + def testClassificationEndPoints(self): + global_pool = True + num_classes = 10 + inputs = create_test_input(2, 224, 224, 3) + with slim.arg_scope(resnet_utils.resnet_arg_scope()): + logits, end_points = self._resnet_small(inputs, num_classes, + global_pool=global_pool, + scope='resnet') + self.assertTrue(logits.op.name.startswith('resnet/logits')) + self.assertListEqual(logits.get_shape().as_list(), [2, 1, 1, num_classes]) + self.assertTrue('predictions' in end_points) + self.assertListEqual(end_points['predictions'].get_shape().as_list(), + [2, 1, 1, num_classes]) + + def testClassificationShapes(self): + global_pool = True + num_classes = 10 + inputs = create_test_input(2, 224, 224, 3) + with slim.arg_scope(resnet_utils.resnet_arg_scope()): + _, end_points = self._resnet_small(inputs, num_classes, + global_pool=global_pool, + scope='resnet') + endpoint_to_shape = { + 'resnet/block1': [2, 28, 28, 4], + 'resnet/block2': [2, 14, 14, 8], + 'resnet/block3': [2, 7, 7, 16], + 'resnet/block4': [2, 7, 7, 32]} + for endpoint in endpoint_to_shape: + shape = endpoint_to_shape[endpoint] + self.assertListEqual(end_points[endpoint].get_shape().as_list(), shape) + + def testFullyConvolutionalEndpointShapes(self): + global_pool = False + num_classes = 10 + inputs = create_test_input(2, 321, 321, 3) + with slim.arg_scope(resnet_utils.resnet_arg_scope()): + _, end_points = self._resnet_small(inputs, num_classes, + global_pool=global_pool, + scope='resnet') + endpoint_to_shape = { + 'resnet/block1': [2, 41, 41, 4], + 'resnet/block2': [2, 21, 21, 8], + 'resnet/block3': [2, 11, 11, 16], + 'resnet/block4': [2, 11, 11, 32]} + for endpoint in endpoint_to_shape: + shape = endpoint_to_shape[endpoint] + self.assertListEqual(end_points[endpoint].get_shape().as_list(), shape) + + def testRootlessFullyConvolutionalEndpointShapes(self): + global_pool = False + num_classes = 10 + inputs = create_test_input(2, 128, 128, 3) + with slim.arg_scope(resnet_utils.resnet_arg_scope()): + _, end_points = self._resnet_small(inputs, num_classes, + global_pool=global_pool, + include_root_block=False, + scope='resnet') + endpoint_to_shape = { + 'resnet/block1': [2, 64, 64, 4], + 'resnet/block2': [2, 32, 32, 8], + 'resnet/block3': [2, 16, 16, 16], + 'resnet/block4': [2, 16, 16, 32]} + for endpoint in endpoint_to_shape: + shape = endpoint_to_shape[endpoint] + self.assertListEqual(end_points[endpoint].get_shape().as_list(), shape) + + def testAtrousFullyConvolutionalEndpointShapes(self): + global_pool = False + num_classes = 10 + output_stride = 8 + inputs = create_test_input(2, 321, 321, 3) + with slim.arg_scope(resnet_utils.resnet_arg_scope()): + _, end_points = self._resnet_small(inputs, + num_classes, + global_pool=global_pool, + output_stride=output_stride, + scope='resnet') + endpoint_to_shape = { + 'resnet/block1': [2, 41, 41, 4], + 'resnet/block2': [2, 41, 41, 8], + 'resnet/block3': [2, 41, 41, 16], + 'resnet/block4': [2, 41, 41, 32]} + for endpoint in endpoint_to_shape: + shape = endpoint_to_shape[endpoint] + self.assertListEqual(end_points[endpoint].get_shape().as_list(), shape) + + def testAtrousFullyConvolutionalValues(self): + """Verify dense feature extraction with atrous convolution.""" + nominal_stride = 32 + for output_stride in [4, 8, 16, 32, None]: + with slim.arg_scope(resnet_utils.resnet_arg_scope()): + with tf.Graph().as_default(): + with self.test_session() as sess: + tf.set_random_seed(0) + inputs = create_test_input(2, 81, 81, 3) + # Dense feature extraction followed by subsampling. + output, _ = self._resnet_small(inputs, None, is_training=False, + global_pool=False, + output_stride=output_stride) + if output_stride is None: + factor = 1 + else: + factor = nominal_stride // output_stride + output = resnet_utils.subsample(output, factor) + # Make the two networks use the same weights. + tf.get_variable_scope().reuse_variables() + # Feature extraction at the nominal network rate. + expected, _ = self._resnet_small(inputs, None, is_training=False, + global_pool=False) + sess.run(tf.initialize_all_variables()) + self.assertAllClose(output.eval(), expected.eval(), + atol=1e-4, rtol=1e-4) + + def testUnknownBatchSize(self): + batch = 2 + height, width = 65, 65 + global_pool = True + num_classes = 10 + inputs = create_test_input(None, height, width, 3) + with slim.arg_scope(resnet_utils.resnet_arg_scope()): + logits, _ = self._resnet_small(inputs, num_classes, + global_pool=global_pool, + scope='resnet') + self.assertTrue(logits.op.name.startswith('resnet/logits')) + self.assertListEqual(logits.get_shape().as_list(), + [None, 1, 1, num_classes]) + images = create_test_input(batch, height, width, 3) + with self.test_session() as sess: + sess.run(tf.initialize_all_variables()) + output = sess.run(logits, {inputs: images.eval()}) + self.assertEqual(output.shape, (batch, 1, 1, num_classes)) + + def testFullyConvolutionalUnknownHeightWidth(self): + batch = 2 + height, width = 65, 65 + global_pool = False + inputs = create_test_input(batch, None, None, 3) + with slim.arg_scope(resnet_utils.resnet_arg_scope()): + output, _ = self._resnet_small(inputs, None, global_pool=global_pool) + self.assertListEqual(output.get_shape().as_list(), + [batch, None, None, 32]) + images = create_test_input(batch, height, width, 3) + with self.test_session() as sess: + sess.run(tf.initialize_all_variables()) + output = sess.run(output, {inputs: images.eval()}) + self.assertEqual(output.shape, (batch, 3, 3, 32)) + + def testAtrousFullyConvolutionalUnknownHeightWidth(self): + batch = 2 + height, width = 65, 65 + global_pool = False + output_stride = 8 + inputs = create_test_input(batch, None, None, 3) + with slim.arg_scope(resnet_utils.resnet_arg_scope()): + output, _ = self._resnet_small(inputs, + None, + global_pool=global_pool, + output_stride=output_stride) + self.assertListEqual(output.get_shape().as_list(), + [batch, None, None, 32]) + images = create_test_input(batch, height, width, 3) + with self.test_session() as sess: + sess.run(tf.initialize_all_variables()) + output = sess.run(output, {inputs: images.eval()}) + self.assertEqual(output.shape, (batch, 9, 9, 32)) + + +if __name__ == '__main__': + tf.test.main() diff --git a/StyleMigration/nets/resnet_v2.py b/StyleMigration/nets/resnet_v2.py new file mode 100644 index 0000000..9476db2 --- /dev/null +++ b/StyleMigration/nets/resnet_v2.py @@ -0,0 +1,302 @@ +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Contains definitions for the preactivation form of Residual Networks. + +Residual networks (ResNets) were originally proposed in: +[1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun + Deep Residual Learning for Image Recognition. arXiv:1512.03385 + +The full preactivation 'v2' ResNet variant implemented in this module was +introduced by: +[2] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun + Identity Mappings in Deep Residual Networks. arXiv: 1603.05027 + +The key difference of the full preactivation 'v2' variant compared to the +'v1' variant in [1] is the use of batch normalization before every weight layer. +Another difference is that 'v2' ResNets do not include an activation function in +the main pathway. Also see [2; Fig. 4e]. + +Typical use: + + from tensorflow.contrib.slim.nets import resnet_v2 + +ResNet-101 for image classification into 1000 classes: + + # inputs has shape [batch, 224, 224, 3] + with slim.arg_scope(resnet_v2.resnet_arg_scope()): + net, end_points = resnet_v2.resnet_v2_101(inputs, 1000, is_training=False) + +ResNet-101 for semantic segmentation into 21 classes: + + # inputs has shape [batch, 513, 513, 3] + with slim.arg_scope(resnet_v2.resnet_arg_scope(is_training)): + net, end_points = resnet_v2.resnet_v2_101(inputs, + 21, + is_training=False, + global_pool=False, + output_stride=16) +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import tensorflow as tf + +from nets import resnet_utils + +slim = tf.contrib.slim +resnet_arg_scope = resnet_utils.resnet_arg_scope + + +@slim.add_arg_scope +def bottleneck(inputs, depth, depth_bottleneck, stride, rate=1, + outputs_collections=None, scope=None): + """Bottleneck residual unit variant with BN before convolutions. + + This is the full preactivation residual unit variant proposed in [2]. See + Fig. 1(b) of [2] for its definition. Note that we use here the bottleneck + variant which has an extra bottleneck layer. + + When putting together two consecutive ResNet blocks that use this unit, one + should use stride = 2 in the last unit of the first block. + + Args: + inputs: A tensor of size [batch, height, width, channels]. + depth: The depth of the ResNet unit output. + depth_bottleneck: The depth of the bottleneck layers. + stride: The ResNet unit's stride. Determines the amount of downsampling of + the units output compared to its input. + rate: An integer, rate for atrous convolution. + outputs_collections: Collection to add the ResNet unit output. + scope: Optional variable_scope. + + Returns: + The ResNet unit's output. + """ + with tf.variable_scope(scope, 'bottleneck_v2', [inputs]) as sc: + depth_in = slim.utils.last_dimension(inputs.get_shape(), min_rank=4) + preact = slim.batch_norm(inputs, activation_fn=tf.nn.relu, scope='preact') + if depth == depth_in: + shortcut = resnet_utils.subsample(inputs, stride, 'shortcut') + else: + shortcut = slim.conv2d(preact, depth, [1, 1], stride=stride, + normalizer_fn=None, activation_fn=None, + scope='shortcut') + + residual = slim.conv2d(preact, depth_bottleneck, [1, 1], stride=1, + scope='conv1') + residual = resnet_utils.conv2d_same(residual, depth_bottleneck, 3, stride, + rate=rate, scope='conv2') + residual = slim.conv2d(residual, depth, [1, 1], stride=1, + normalizer_fn=None, activation_fn=None, + scope='conv3') + + output = shortcut + residual + + return slim.utils.collect_named_outputs(outputs_collections, + sc.original_name_scope, + output) + + +def resnet_v2(inputs, + blocks, + num_classes=None, + is_training=True, + global_pool=True, + output_stride=None, + include_root_block=True, + reuse=None, + scope=None): + """Generator for v2 (preactivation) ResNet models. + + This function generates a family of ResNet v2 models. See the resnet_v2_*() + methods for specific model instantiations, obtained by selecting different + block instantiations that produce ResNets of various depths. + + Training for image classification on Imagenet is usually done with [224, 224] + inputs, resulting in [7, 7] feature maps at the output of the last ResNet + block for the ResNets defined in [1] that have nominal stride equal to 32. + However, for dense prediction tasks we advise that one uses inputs with + spatial dimensions that are multiples of 32 plus 1, e.g., [321, 321]. In + this case the feature maps at the ResNet output will have spatial shape + [(height - 1) / output_stride + 1, (width - 1) / output_stride + 1] + and corners exactly aligned with the input image corners, which greatly + facilitates alignment of the features to the image. Using as input [225, 225] + images results in [8, 8] feature maps at the output of the last ResNet block. + + For dense prediction tasks, the ResNet needs to run in fully-convolutional + (FCN) mode and global_pool needs to be set to False. The ResNets in [1, 2] all + have nominal stride equal to 32 and a good choice in FCN mode is to use + output_stride=16 in order to increase the density of the computed features at + small computational and memory overhead, cf. http://arxiv.org/abs/1606.00915. + + Args: + inputs: A tensor of size [batch, height_in, width_in, channels]. + blocks: A list of length equal to the number of ResNet blocks. Each element + is a resnet_utils.Block object describing the units in the block. + num_classes: Number of predicted classes for classification tasks. If None + we return the features before the logit layer. + is_training: whether is training or not. + global_pool: If True, we perform global average pooling before computing the + logits. Set to True for image classification, False for dense prediction. + output_stride: If None, then the output will be computed at the nominal + network stride. If output_stride is not None, it specifies the requested + ratio of input to output spatial resolution. + include_root_block: If True, include the initial convolution followed by + max-pooling, if False excludes it. If excluded, `inputs` should be the + results of an activation-less convolution. + reuse: whether or not the network and its variables should be reused. To be + able to reuse 'scope' must be given. + scope: Optional variable_scope. + + + Returns: + net: A rank-4 tensor of size [batch, height_out, width_out, channels_out]. + If global_pool is False, then height_out and width_out are reduced by a + factor of output_stride compared to the respective height_in and width_in, + else both height_out and width_out equal one. If num_classes is None, then + net is the output of the last ResNet block, potentially after global + average pooling. If num_classes is not None, net contains the pre-softmax + activations. + end_points: A dictionary from components of the network to the corresponding + activation. + + Raises: + ValueError: If the target output_stride is not valid. + """ + with tf.variable_scope(scope, 'resnet_v2', [inputs], reuse=reuse) as sc: + end_points_collection = sc.name + '_end_points' + with slim.arg_scope([slim.conv2d, bottleneck, + resnet_utils.stack_blocks_dense], + outputs_collections=end_points_collection): + with slim.arg_scope([slim.batch_norm], is_training=is_training): + net = inputs + if include_root_block: + if output_stride is not None: + if output_stride % 4 != 0: + raise ValueError('The output_stride needs to be a multiple of 4.') + output_stride /= 4 + # We do not include batch normalization or activation functions in + # conv1 because the first ResNet unit will perform these. Cf. + # Appendix of [2]. + with slim.arg_scope([slim.conv2d], + activation_fn=None, normalizer_fn=None): + net = resnet_utils.conv2d_same(net, 64, 7, stride=2, scope='conv1') + net = slim.max_pool2d(net, [3, 3], stride=2, scope='pool1') + net = resnet_utils.stack_blocks_dense(net, blocks, output_stride) + # This is needed because the pre-activation variant does not have batch + # normalization or activation functions in the residual unit output. See + # Appendix of [2]. + net = slim.batch_norm(net, activation_fn=tf.nn.relu, scope='postnorm') + if global_pool: + # Global average pooling. + net = tf.reduce_mean(net, [1, 2], name='pool5', keep_dims=True) + if num_classes is not None: + net = slim.conv2d(net, num_classes, [1, 1], activation_fn=None, + normalizer_fn=None, scope='logits') + # Convert end_points_collection into a dictionary of end_points. + end_points = slim.utils.convert_collection_to_dict(end_points_collection) + if num_classes is not None: + end_points['predictions'] = slim.softmax(net, scope='predictions') + return net, end_points +resnet_v2.default_image_size = 224 + + +def resnet_v2_50(inputs, + num_classes=None, + is_training=True, + global_pool=True, + output_stride=None, + reuse=None, + scope='resnet_v2_50'): + """ResNet-50 model of [1]. See resnet_v2() for arg and return description.""" + blocks = [ + resnet_utils.Block( + 'block1', bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]), + resnet_utils.Block( + 'block2', bottleneck, [(512, 128, 1)] * 3 + [(512, 128, 2)]), + resnet_utils.Block( + 'block3', bottleneck, [(1024, 256, 1)] * 5 + [(1024, 256, 2)]), + resnet_utils.Block( + 'block4', bottleneck, [(2048, 512, 1)] * 3)] + return resnet_v2(inputs, blocks, num_classes, is_training=is_training, + global_pool=global_pool, output_stride=output_stride, + include_root_block=True, reuse=reuse, scope=scope) + + +def resnet_v2_101(inputs, + num_classes=None, + is_training=True, + global_pool=True, + output_stride=None, + reuse=None, + scope='resnet_v2_101'): + """ResNet-101 model of [1]. See resnet_v2() for arg and return description.""" + blocks = [ + resnet_utils.Block( + 'block1', bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]), + resnet_utils.Block( + 'block2', bottleneck, [(512, 128, 1)] * 3 + [(512, 128, 2)]), + resnet_utils.Block( + 'block3', bottleneck, [(1024, 256, 1)] * 22 + [(1024, 256, 2)]), + resnet_utils.Block( + 'block4', bottleneck, [(2048, 512, 1)] * 3)] + return resnet_v2(inputs, blocks, num_classes, is_training=is_training, + global_pool=global_pool, output_stride=output_stride, + include_root_block=True, reuse=reuse, scope=scope) + + +def resnet_v2_152(inputs, + num_classes=None, + is_training=True, + global_pool=True, + output_stride=None, + reuse=None, + scope='resnet_v2_152'): + """ResNet-152 model of [1]. See resnet_v2() for arg and return description.""" + blocks = [ + resnet_utils.Block( + 'block1', bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]), + resnet_utils.Block( + 'block2', bottleneck, [(512, 128, 1)] * 7 + [(512, 128, 2)]), + resnet_utils.Block( + 'block3', bottleneck, [(1024, 256, 1)] * 35 + [(1024, 256, 2)]), + resnet_utils.Block( + 'block4', bottleneck, [(2048, 512, 1)] * 3)] + return resnet_v2(inputs, blocks, num_classes, is_training=is_training, + global_pool=global_pool, output_stride=output_stride, + include_root_block=True, reuse=reuse, scope=scope) + + +def resnet_v2_200(inputs, + num_classes=None, + is_training=True, + global_pool=True, + output_stride=None, + reuse=None, + scope='resnet_v2_200'): + """ResNet-200 model of [2]. See resnet_v2() for arg and return description.""" + blocks = [ + resnet_utils.Block( + 'block1', bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]), + resnet_utils.Block( + 'block2', bottleneck, [(512, 128, 1)] * 23 + [(512, 128, 2)]), + resnet_utils.Block( + 'block3', bottleneck, [(1024, 256, 1)] * 35 + [(1024, 256, 2)]), + resnet_utils.Block( + 'block4', bottleneck, [(2048, 512, 1)] * 3)] + return resnet_v2(inputs, blocks, num_classes, is_training=is_training, + global_pool=global_pool, output_stride=output_stride, + include_root_block=True, reuse=reuse, scope=scope) diff --git a/StyleMigration/nets/resnet_v2_test.py b/StyleMigration/nets/resnet_v2_test.py new file mode 100644 index 0000000..1b4fa75 --- /dev/null +++ b/StyleMigration/nets/resnet_v2_test.py @@ -0,0 +1,453 @@ +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for slim.nets.resnet_v2.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np +import tensorflow as tf + +from nets import resnet_utils +from nets import resnet_v2 + +slim = tf.contrib.slim + + +def create_test_input(batch_size, height, width, channels): + """Create test input tensor. + + Args: + batch_size: The number of images per batch or `None` if unknown. + height: The height of each image or `None` if unknown. + width: The width of each image or `None` if unknown. + channels: The number of channels per image or `None` if unknown. + + Returns: + Either a placeholder `Tensor` of dimension + [batch_size, height, width, channels] if any of the inputs are `None` or a + constant `Tensor` with the mesh grid values along the spatial dimensions. + """ + if None in [batch_size, height, width, channels]: + return tf.placeholder(tf.float32, (batch_size, height, width, channels)) + else: + return tf.to_float( + np.tile( + np.reshape( + np.reshape(np.arange(height), [height, 1]) + + np.reshape(np.arange(width), [1, width]), + [1, height, width, 1]), + [batch_size, 1, 1, channels])) + + +class ResnetUtilsTest(tf.test.TestCase): + + def testSubsampleThreeByThree(self): + x = tf.reshape(tf.to_float(tf.range(9)), [1, 3, 3, 1]) + x = resnet_utils.subsample(x, 2) + expected = tf.reshape(tf.constant([0, 2, 6, 8]), [1, 2, 2, 1]) + with self.test_session(): + self.assertAllClose(x.eval(), expected.eval()) + + def testSubsampleFourByFour(self): + x = tf.reshape(tf.to_float(tf.range(16)), [1, 4, 4, 1]) + x = resnet_utils.subsample(x, 2) + expected = tf.reshape(tf.constant([0, 2, 8, 10]), [1, 2, 2, 1]) + with self.test_session(): + self.assertAllClose(x.eval(), expected.eval()) + + def testConv2DSameEven(self): + n, n2 = 4, 2 + + # Input image. + x = create_test_input(1, n, n, 1) + + # Convolution kernel. + w = create_test_input(1, 3, 3, 1) + w = tf.reshape(w, [3, 3, 1, 1]) + + tf.get_variable('Conv/weights', initializer=w) + tf.get_variable('Conv/biases', initializer=tf.zeros([1])) + tf.get_variable_scope().reuse_variables() + + y1 = slim.conv2d(x, 1, [3, 3], stride=1, scope='Conv') + y1_expected = tf.to_float([[14, 28, 43, 26], + [28, 48, 66, 37], + [43, 66, 84, 46], + [26, 37, 46, 22]]) + y1_expected = tf.reshape(y1_expected, [1, n, n, 1]) + + y2 = resnet_utils.subsample(y1, 2) + y2_expected = tf.to_float([[14, 43], + [43, 84]]) + y2_expected = tf.reshape(y2_expected, [1, n2, n2, 1]) + + y3 = resnet_utils.conv2d_same(x, 1, 3, stride=2, scope='Conv') + y3_expected = y2_expected + + y4 = slim.conv2d(x, 1, [3, 3], stride=2, scope='Conv') + y4_expected = tf.to_float([[48, 37], + [37, 22]]) + y4_expected = tf.reshape(y4_expected, [1, n2, n2, 1]) + + with self.test_session() as sess: + sess.run(tf.initialize_all_variables()) + self.assertAllClose(y1.eval(), y1_expected.eval()) + self.assertAllClose(y2.eval(), y2_expected.eval()) + self.assertAllClose(y3.eval(), y3_expected.eval()) + self.assertAllClose(y4.eval(), y4_expected.eval()) + + def testConv2DSameOdd(self): + n, n2 = 5, 3 + + # Input image. + x = create_test_input(1, n, n, 1) + + # Convolution kernel. + w = create_test_input(1, 3, 3, 1) + w = tf.reshape(w, [3, 3, 1, 1]) + + tf.get_variable('Conv/weights', initializer=w) + tf.get_variable('Conv/biases', initializer=tf.zeros([1])) + tf.get_variable_scope().reuse_variables() + + y1 = slim.conv2d(x, 1, [3, 3], stride=1, scope='Conv') + y1_expected = tf.to_float([[14, 28, 43, 58, 34], + [28, 48, 66, 84, 46], + [43, 66, 84, 102, 55], + [58, 84, 102, 120, 64], + [34, 46, 55, 64, 30]]) + y1_expected = tf.reshape(y1_expected, [1, n, n, 1]) + + y2 = resnet_utils.subsample(y1, 2) + y2_expected = tf.to_float([[14, 43, 34], + [43, 84, 55], + [34, 55, 30]]) + y2_expected = tf.reshape(y2_expected, [1, n2, n2, 1]) + + y3 = resnet_utils.conv2d_same(x, 1, 3, stride=2, scope='Conv') + y3_expected = y2_expected + + y4 = slim.conv2d(x, 1, [3, 3], stride=2, scope='Conv') + y4_expected = y2_expected + + with self.test_session() as sess: + sess.run(tf.initialize_all_variables()) + self.assertAllClose(y1.eval(), y1_expected.eval()) + self.assertAllClose(y2.eval(), y2_expected.eval()) + self.assertAllClose(y3.eval(), y3_expected.eval()) + self.assertAllClose(y4.eval(), y4_expected.eval()) + + def _resnet_plain(self, inputs, blocks, output_stride=None, scope=None): + """A plain ResNet without extra layers before or after the ResNet blocks.""" + with tf.variable_scope(scope, values=[inputs]): + with slim.arg_scope([slim.conv2d], outputs_collections='end_points'): + net = resnet_utils.stack_blocks_dense(inputs, blocks, output_stride) + end_points = dict(tf.get_collection('end_points')) + return net, end_points + + def testEndPointsV2(self): + """Test the end points of a tiny v2 bottleneck network.""" + bottleneck = resnet_v2.bottleneck + blocks = [resnet_utils.Block('block1', bottleneck, [(4, 1, 1), (4, 1, 2)]), + resnet_utils.Block('block2', bottleneck, [(8, 2, 1), (8, 2, 1)])] + inputs = create_test_input(2, 32, 16, 3) + with slim.arg_scope(resnet_utils.resnet_arg_scope()): + _, end_points = self._resnet_plain(inputs, blocks, scope='tiny') + expected = [ + 'tiny/block1/unit_1/bottleneck_v2/shortcut', + 'tiny/block1/unit_1/bottleneck_v2/conv1', + 'tiny/block1/unit_1/bottleneck_v2/conv2', + 'tiny/block1/unit_1/bottleneck_v2/conv3', + 'tiny/block1/unit_2/bottleneck_v2/conv1', + 'tiny/block1/unit_2/bottleneck_v2/conv2', + 'tiny/block1/unit_2/bottleneck_v2/conv3', + 'tiny/block2/unit_1/bottleneck_v2/shortcut', + 'tiny/block2/unit_1/bottleneck_v2/conv1', + 'tiny/block2/unit_1/bottleneck_v2/conv2', + 'tiny/block2/unit_1/bottleneck_v2/conv3', + 'tiny/block2/unit_2/bottleneck_v2/conv1', + 'tiny/block2/unit_2/bottleneck_v2/conv2', + 'tiny/block2/unit_2/bottleneck_v2/conv3'] + self.assertItemsEqual(expected, end_points) + + def _stack_blocks_nondense(self, net, blocks): + """A simplified ResNet Block stacker without output stride control.""" + for block in blocks: + with tf.variable_scope(block.scope, 'block', [net]): + for i, unit in enumerate(block.args): + depth, depth_bottleneck, stride = unit + with tf.variable_scope('unit_%d' % (i + 1), values=[net]): + net = block.unit_fn(net, + depth=depth, + depth_bottleneck=depth_bottleneck, + stride=stride, + rate=1) + return net + + def _atrousValues(self, bottleneck): + """Verify the values of dense feature extraction by atrous convolution. + + Make sure that dense feature extraction by stack_blocks_dense() followed by + subsampling gives identical results to feature extraction at the nominal + network output stride using the simple self._stack_blocks_nondense() above. + + Args: + bottleneck: The bottleneck function. + """ + blocks = [ + resnet_utils.Block('block1', bottleneck, [(4, 1, 1), (4, 1, 2)]), + resnet_utils.Block('block2', bottleneck, [(8, 2, 1), (8, 2, 2)]), + resnet_utils.Block('block3', bottleneck, [(16, 4, 1), (16, 4, 2)]), + resnet_utils.Block('block4', bottleneck, [(32, 8, 1), (32, 8, 1)]) + ] + nominal_stride = 8 + + # Test both odd and even input dimensions. + height = 30 + width = 31 + with slim.arg_scope(resnet_utils.resnet_arg_scope()): + with slim.arg_scope([slim.batch_norm], is_training=False): + for output_stride in [1, 2, 4, 8, None]: + with tf.Graph().as_default(): + with self.test_session() as sess: + tf.set_random_seed(0) + inputs = create_test_input(1, height, width, 3) + # Dense feature extraction followed by subsampling. + output = resnet_utils.stack_blocks_dense(inputs, + blocks, + output_stride) + if output_stride is None: + factor = 1 + else: + factor = nominal_stride // output_stride + + output = resnet_utils.subsample(output, factor) + # Make the two networks use the same weights. + tf.get_variable_scope().reuse_variables() + # Feature extraction at the nominal network rate. + expected = self._stack_blocks_nondense(inputs, blocks) + sess.run(tf.initialize_all_variables()) + output, expected = sess.run([output, expected]) + self.assertAllClose(output, expected, atol=1e-4, rtol=1e-4) + + def testAtrousValuesBottleneck(self): + self._atrousValues(resnet_v2.bottleneck) + + +class ResnetCompleteNetworkTest(tf.test.TestCase): + """Tests with complete small ResNet v2 networks.""" + + def _resnet_small(self, + inputs, + num_classes=None, + is_training=True, + global_pool=True, + output_stride=None, + include_root_block=True, + reuse=None, + scope='resnet_v2_small'): + """A shallow and thin ResNet v2 for faster tests.""" + bottleneck = resnet_v2.bottleneck + blocks = [ + resnet_utils.Block( + 'block1', bottleneck, [(4, 1, 1)] * 2 + [(4, 1, 2)]), + resnet_utils.Block( + 'block2', bottleneck, [(8, 2, 1)] * 2 + [(8, 2, 2)]), + resnet_utils.Block( + 'block3', bottleneck, [(16, 4, 1)] * 2 + [(16, 4, 2)]), + resnet_utils.Block( + 'block4', bottleneck, [(32, 8, 1)] * 2)] + return resnet_v2.resnet_v2(inputs, blocks, num_classes, + is_training=is_training, + global_pool=global_pool, + output_stride=output_stride, + include_root_block=include_root_block, + reuse=reuse, + scope=scope) + + def testClassificationEndPoints(self): + global_pool = True + num_classes = 10 + inputs = create_test_input(2, 224, 224, 3) + with slim.arg_scope(resnet_utils.resnet_arg_scope()): + logits, end_points = self._resnet_small(inputs, num_classes, + global_pool=global_pool, + scope='resnet') + self.assertTrue(logits.op.name.startswith('resnet/logits')) + self.assertListEqual(logits.get_shape().as_list(), [2, 1, 1, num_classes]) + self.assertTrue('predictions' in end_points) + self.assertListEqual(end_points['predictions'].get_shape().as_list(), + [2, 1, 1, num_classes]) + + def testClassificationShapes(self): + global_pool = True + num_classes = 10 + inputs = create_test_input(2, 224, 224, 3) + with slim.arg_scope(resnet_utils.resnet_arg_scope()): + _, end_points = self._resnet_small(inputs, num_classes, + global_pool=global_pool, + scope='resnet') + endpoint_to_shape = { + 'resnet/block1': [2, 28, 28, 4], + 'resnet/block2': [2, 14, 14, 8], + 'resnet/block3': [2, 7, 7, 16], + 'resnet/block4': [2, 7, 7, 32]} + for endpoint in endpoint_to_shape: + shape = endpoint_to_shape[endpoint] + self.assertListEqual(end_points[endpoint].get_shape().as_list(), shape) + + def testFullyConvolutionalEndpointShapes(self): + global_pool = False + num_classes = 10 + inputs = create_test_input(2, 321, 321, 3) + with slim.arg_scope(resnet_utils.resnet_arg_scope()): + _, end_points = self._resnet_small(inputs, num_classes, + global_pool=global_pool, + scope='resnet') + endpoint_to_shape = { + 'resnet/block1': [2, 41, 41, 4], + 'resnet/block2': [2, 21, 21, 8], + 'resnet/block3': [2, 11, 11, 16], + 'resnet/block4': [2, 11, 11, 32]} + for endpoint in endpoint_to_shape: + shape = endpoint_to_shape[endpoint] + self.assertListEqual(end_points[endpoint].get_shape().as_list(), shape) + + def testRootlessFullyConvolutionalEndpointShapes(self): + global_pool = False + num_classes = 10 + inputs = create_test_input(2, 128, 128, 3) + with slim.arg_scope(resnet_utils.resnet_arg_scope()): + _, end_points = self._resnet_small(inputs, num_classes, + global_pool=global_pool, + include_root_block=False, + scope='resnet') + endpoint_to_shape = { + 'resnet/block1': [2, 64, 64, 4], + 'resnet/block2': [2, 32, 32, 8], + 'resnet/block3': [2, 16, 16, 16], + 'resnet/block4': [2, 16, 16, 32]} + for endpoint in endpoint_to_shape: + shape = endpoint_to_shape[endpoint] + self.assertListEqual(end_points[endpoint].get_shape().as_list(), shape) + + def testAtrousFullyConvolutionalEndpointShapes(self): + global_pool = False + num_classes = 10 + output_stride = 8 + inputs = create_test_input(2, 321, 321, 3) + with slim.arg_scope(resnet_utils.resnet_arg_scope()): + _, end_points = self._resnet_small(inputs, + num_classes, + global_pool=global_pool, + output_stride=output_stride, + scope='resnet') + endpoint_to_shape = { + 'resnet/block1': [2, 41, 41, 4], + 'resnet/block2': [2, 41, 41, 8], + 'resnet/block3': [2, 41, 41, 16], + 'resnet/block4': [2, 41, 41, 32]} + for endpoint in endpoint_to_shape: + shape = endpoint_to_shape[endpoint] + self.assertListEqual(end_points[endpoint].get_shape().as_list(), shape) + + def testAtrousFullyConvolutionalValues(self): + """Verify dense feature extraction with atrous convolution.""" + nominal_stride = 32 + for output_stride in [4, 8, 16, 32, None]: + with slim.arg_scope(resnet_utils.resnet_arg_scope()): + with tf.Graph().as_default(): + with self.test_session() as sess: + tf.set_random_seed(0) + inputs = create_test_input(2, 81, 81, 3) + # Dense feature extraction followed by subsampling. + output, _ = self._resnet_small(inputs, None, + is_training=False, + global_pool=False, + output_stride=output_stride) + if output_stride is None: + factor = 1 + else: + factor = nominal_stride // output_stride + output = resnet_utils.subsample(output, factor) + # Make the two networks use the same weights. + tf.get_variable_scope().reuse_variables() + # Feature extraction at the nominal network rate. + expected, _ = self._resnet_small(inputs, None, + is_training=False, + global_pool=False) + sess.run(tf.initialize_all_variables()) + self.assertAllClose(output.eval(), expected.eval(), + atol=1e-4, rtol=1e-4) + + def testUnknownBatchSize(self): + batch = 2 + height, width = 65, 65 + global_pool = True + num_classes = 10 + inputs = create_test_input(None, height, width, 3) + with slim.arg_scope(resnet_utils.resnet_arg_scope()): + logits, _ = self._resnet_small(inputs, num_classes, + global_pool=global_pool, + scope='resnet') + self.assertTrue(logits.op.name.startswith('resnet/logits')) + self.assertListEqual(logits.get_shape().as_list(), + [None, 1, 1, num_classes]) + images = create_test_input(batch, height, width, 3) + with self.test_session() as sess: + sess.run(tf.initialize_all_variables()) + output = sess.run(logits, {inputs: images.eval()}) + self.assertEqual(output.shape, (batch, 1, 1, num_classes)) + + def testFullyConvolutionalUnknownHeightWidth(self): + batch = 2 + height, width = 65, 65 + global_pool = False + inputs = create_test_input(batch, None, None, 3) + with slim.arg_scope(resnet_utils.resnet_arg_scope()): + output, _ = self._resnet_small(inputs, None, + global_pool=global_pool) + self.assertListEqual(output.get_shape().as_list(), + [batch, None, None, 32]) + images = create_test_input(batch, height, width, 3) + with self.test_session() as sess: + sess.run(tf.initialize_all_variables()) + output = sess.run(output, {inputs: images.eval()}) + self.assertEqual(output.shape, (batch, 3, 3, 32)) + + def testAtrousFullyConvolutionalUnknownHeightWidth(self): + batch = 2 + height, width = 65, 65 + global_pool = False + output_stride = 8 + inputs = create_test_input(batch, None, None, 3) + with slim.arg_scope(resnet_utils.resnet_arg_scope()): + output, _ = self._resnet_small(inputs, + None, + global_pool=global_pool, + output_stride=output_stride) + self.assertListEqual(output.get_shape().as_list(), + [batch, None, None, 32]) + images = create_test_input(batch, height, width, 3) + with self.test_session() as sess: + sess.run(tf.initialize_all_variables()) + output = sess.run(output, {inputs: images.eval()}) + self.assertEqual(output.shape, (batch, 9, 9, 32)) + + +if __name__ == '__main__': + tf.test.main() diff --git a/StyleMigration/nets/vgg.py b/StyleMigration/nets/vgg.py new file mode 100644 index 0000000..26c2e32 --- /dev/null +++ b/StyleMigration/nets/vgg.py @@ -0,0 +1,227 @@ +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Contains model definitions for versions of the Oxford VGG network. +These model definitions were introduced in the following technical report: + Very Deep Convolutional Networks For Large-Scale Image Recognition + Karen Simonyan and Andrew Zisserman + arXiv technical report, 2015 + PDF: http://arxiv.org/pdf/1409.1556.pdf + ILSVRC 2014 Slides: http://www.robots.ox.ac.uk/~karen/pdf/ILSVRC_2014.pdf + CC-BY-4.0 +More information can be obtained from the VGG website: +www.robots.ox.ac.uk/~vgg/research/very_deep/ +Usage: + with slim.arg_scope(vgg.vgg_arg_scope()): + outputs, end_points = vgg.vgg_a(inputs) + with slim.arg_scope(vgg.vgg_arg_scope()): + outputs, end_points = vgg.vgg_16(inputs) +@@vgg_a +@@vgg_16 +@@vgg_19 +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import tensorflow as tf + +slim = tf.contrib.slim + + +def vgg_arg_scope(weight_decay=0.0005): + """Defines the VGG arg scope. + Args: + weight_decay: The l2 regularization coefficient. + Returns: + An arg_scope. + """ + with slim.arg_scope([slim.conv2d, slim.fully_connected], + activation_fn=tf.nn.relu, + weights_regularizer=slim.l2_regularizer(weight_decay), + biases_initializer=tf.zeros_initializer()): + with slim.arg_scope([slim.conv2d], padding='SAME') as arg_sc: + return arg_sc + + +def vgg_a(inputs, + num_classes=1000, + is_training=True, + dropout_keep_prob=0.5, + spatial_squeeze=True, + scope='vgg_a'): + """Oxford Net VGG 11-Layers version A Example. + Note: All the fully_connected layers have been transformed to conv2d layers. + To use in classification mode, resize input to 224x224. + Args: + inputs: a tensor of size [batch_size, height, width, channels]. + num_classes: number of predicted classes. + is_training: whether or not the model is being trained. + dropout_keep_prob: the probability that activations are kept in the dropout + layers during training. + spatial_squeeze: whether or not should squeeze the spatial dimensions of the + outputs. Useful to remove unnecessary dimensions for classification. + scope: Optional scope for the variables. + Returns: + the last op containing the log predictions and end_points dict. + """ + with tf.variable_scope(scope, 'vgg_a', [inputs]) as sc: + end_points_collection = sc.name + '_end_points' + # Collect outputs for conv2d, fully_connected and max_pool2d. + with slim.arg_scope([slim.conv2d, slim.max_pool2d], + outputs_collections=end_points_collection): + net = slim.repeat(inputs, 1, slim.conv2d, 64, [3, 3], scope='conv1') + net = slim.max_pool2d(net, [2, 2], scope='pool1') + net = slim.repeat(net, 1, slim.conv2d, 128, [3, 3], scope='conv2') + net = slim.max_pool2d(net, [2, 2], scope='pool2') + net = slim.repeat(net, 2, slim.conv2d, 256, [3, 3], scope='conv3') + net = slim.max_pool2d(net, [2, 2], scope='pool3') + net = slim.repeat(net, 2, slim.conv2d, 512, [3, 3], scope='conv4') + net = slim.max_pool2d(net, [2, 2], scope='pool4') + net = slim.repeat(net, 2, slim.conv2d, 512, [3, 3], scope='conv5') + net = slim.max_pool2d(net, [2, 2], scope='pool5') + # Use conv2d instead of fully_connected layers. + net = slim.conv2d(net, 4096, [7, 7], padding='VALID', scope='fc6') + net = slim.dropout(net, dropout_keep_prob, is_training=is_training, + scope='dropout6') + net = slim.conv2d(net, 4096, [1, 1], scope='fc7') + net = slim.dropout(net, dropout_keep_prob, is_training=is_training, + scope='dropout7') + net = slim.conv2d(net, num_classes, [1, 1], + activation_fn=None, + normalizer_fn=None, + scope='fc8') + # Convert end_points_collection into a end_point dict. + end_points = slim.utils.convert_collection_to_dict(end_points_collection) + if spatial_squeeze: + net = tf.squeeze(net, [1, 2], name='fc8/squeezed') + end_points[sc.name + '/fc8'] = net + return net, end_points +vgg_a.default_image_size = 224 + + +def vgg_16(inputs, + num_classes=1000, + is_training=True, + dropout_keep_prob=0.5, + spatial_squeeze=True, + scope='vgg_16'): + """Oxford Net VGG 16-Layers version D Example. + Note: All the fully_connected layers have been transformed to conv2d layers. + To use in classification mode, resize input to 224x224. + Args: + inputs: a tensor of size [batch_size, height, width, channels]. + num_classes: number of predicted classes. + is_training: whether or not the model is being trained. + dropout_keep_prob: the probability that activations are kept in the dropout + layers during training. + spatial_squeeze: whether or not should squeeze the spatial dimensions of the + outputs. Useful to remove unnecessary dimensions for classification. + scope: Optional scope for the variables. + Returns: + the last op containing the log predictions and end_points dict. + """ + with tf.variable_scope(scope, 'vgg_16', [inputs]) as sc: + end_points_collection = sc.name + '_end_points' + # Collect outputs for conv2d, fully_connected and max_pool2d. + with slim.arg_scope([slim.conv2d, slim.fully_connected, slim.max_pool2d], + outputs_collections=end_points_collection): + net = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], scope='conv1') + net = slim.max_pool2d(net, [2, 2], scope='pool1') + net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2') + net = slim.max_pool2d(net, [2, 2], scope='pool2') + net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], scope='conv3') + net = slim.max_pool2d(net, [2, 2], scope='pool3') + net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv4') + net = slim.max_pool2d(net, [2, 2], scope='pool4') + net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv5') + net = slim.max_pool2d(net, [2, 2], scope='pool5') + # Use conv2d instead of fully_connected layers. + net = slim.conv2d(net, 4096, [7, 7], padding='VALID', scope='fc6') + net = slim.dropout(net, dropout_keep_prob, is_training=is_training, + scope='dropout6') + net = slim.conv2d(net, 4096, [1, 1], scope='fc7') + net = slim.dropout(net, dropout_keep_prob, is_training=is_training, + scope='dropout7') + net = slim.conv2d(net, num_classes, [1, 1], + activation_fn=None, + normalizer_fn=None, + scope='fc8') + # Convert end_points_collection into a end_point dict. + end_points = slim.utils.convert_collection_to_dict(end_points_collection) + if spatial_squeeze: + net = tf.squeeze(net, [1, 2], name='fc8/squeezed') + end_points[sc.name + '/fc8'] = net + return net, end_points +vgg_16.default_image_size = 224 + + +def vgg_19(inputs, + num_classes=1000, + is_training=True, + dropout_keep_prob=0.5, + spatial_squeeze=True, + scope='vgg_19'): + """Oxford Net VGG 19-Layers version E Example. + Note: All the fully_connected layers have been transformed to conv2d layers. + To use in classification mode, resize input to 224x224. + Args: + inputs: a tensor of size [batch_size, height, width, channels]. + num_classes: number of predicted classes. + is_training: whether or not the model is being trained. + dropout_keep_prob: the probability that activations are kept in the dropout + layers during training. + spatial_squeeze: whether or not should squeeze the spatial dimensions of the + outputs. Useful to remove unnecessary dimensions for classification. + scope: Optional scope for the variables. + Returns: + the last op containing the log predictions and end_points dict. + """ + with tf.variable_scope(scope, 'vgg_19', [inputs]) as sc: + end_points_collection = sc.name + '_end_points' + # Collect outputs for conv2d, fully_connected and max_pool2d. + with slim.arg_scope([slim.conv2d, slim.fully_connected, slim.max_pool2d], + outputs_collections=end_points_collection): + net = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], scope='conv1') + net = slim.max_pool2d(net, [2, 2], scope='pool1') + net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2') + net = slim.max_pool2d(net, [2, 2], scope='pool2') + net = slim.repeat(net, 4, slim.conv2d, 256, [3, 3], scope='conv3') + net = slim.max_pool2d(net, [2, 2], scope='pool3') + net = slim.repeat(net, 4, slim.conv2d, 512, [3, 3], scope='conv4') + net = slim.max_pool2d(net, [2, 2], scope='pool4') + net = slim.repeat(net, 4, slim.conv2d, 512, [3, 3], scope='conv5') + net = slim.max_pool2d(net, [2, 2], scope='pool5') + # Use conv2d instead of fully_connected layers. + net = slim.conv2d(net, 4096, [7, 7], padding='VALID', scope='fc6') + net = slim.dropout(net, dropout_keep_prob, is_training=is_training, + scope='dropout6') + net = slim.conv2d(net, 4096, [1, 1], scope='fc7') + net = slim.dropout(net, dropout_keep_prob, is_training=is_training, + scope='dropout7') + net = slim.conv2d(net, num_classes, [1, 1], + activation_fn=None, + normalizer_fn=None, + scope='fc8') + # Convert end_points_collection into a end_point dict. + end_points = slim.utils.convert_collection_to_dict(end_points_collection) + if spatial_squeeze: + net = tf.squeeze(net, [1, 2], name='fc8/squeezed') + end_points[sc.name + '/fc8'] = net + return net, end_points +vgg_19.default_image_size = 224 + +# Alias +vgg_d = vgg_16 +vgg_e = vgg_19 \ No newline at end of file diff --git a/StyleMigration/nets/vgg_test.py b/StyleMigration/nets/vgg_test.py new file mode 100644 index 0000000..5927146 --- /dev/null +++ b/StyleMigration/nets/vgg_test.py @@ -0,0 +1,455 @@ +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for slim.nets.vgg.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import tensorflow as tf + +from nets import vgg + +slim = tf.contrib.slim + + +class VGGATest(tf.test.TestCase): + + def testBuild(self): + batch_size = 5 + height, width = 224, 224 + num_classes = 1000 + with self.test_session(): + inputs = tf.random_uniform((batch_size, height, width, 3)) + logits, _ = vgg.vgg_a(inputs, num_classes) + self.assertEquals(logits.op.name, 'vgg_a/fc8/squeezed') + self.assertListEqual(logits.get_shape().as_list(), + [batch_size, num_classes]) + + def testFullyConvolutional(self): + batch_size = 1 + height, width = 256, 256 + num_classes = 1000 + with self.test_session(): + inputs = tf.random_uniform((batch_size, height, width, 3)) + logits, _ = vgg.vgg_a(inputs, num_classes, spatial_squeeze=False) + self.assertEquals(logits.op.name, 'vgg_a/fc8/BiasAdd') + self.assertListEqual(logits.get_shape().as_list(), + [batch_size, 2, 2, num_classes]) + + def testEndPoints(self): + batch_size = 5 + height, width = 224, 224 + num_classes = 1000 + with self.test_session(): + inputs = tf.random_uniform((batch_size, height, width, 3)) + _, end_points = vgg.vgg_a(inputs, num_classes) + expected_names = ['vgg_a/conv1/conv1_1', + 'vgg_a/pool1', + 'vgg_a/conv2/conv2_1', + 'vgg_a/pool2', + 'vgg_a/conv3/conv3_1', + 'vgg_a/conv3/conv3_2', + 'vgg_a/pool3', + 'vgg_a/conv4/conv4_1', + 'vgg_a/conv4/conv4_2', + 'vgg_a/pool4', + 'vgg_a/conv5/conv5_1', + 'vgg_a/conv5/conv5_2', + 'vgg_a/pool5', + 'vgg_a/fc6', + 'vgg_a/fc7', + 'vgg_a/fc8' + ] + self.assertSetEqual(set(end_points.keys()), set(expected_names)) + + def testModelVariables(self): + batch_size = 5 + height, width = 224, 224 + num_classes = 1000 + with self.test_session(): + inputs = tf.random_uniform((batch_size, height, width, 3)) + vgg.vgg_a(inputs, num_classes) + expected_names = ['vgg_a/conv1/conv1_1/weights', + 'vgg_a/conv1/conv1_1/biases', + 'vgg_a/conv2/conv2_1/weights', + 'vgg_a/conv2/conv2_1/biases', + 'vgg_a/conv3/conv3_1/weights', + 'vgg_a/conv3/conv3_1/biases', + 'vgg_a/conv3/conv3_2/weights', + 'vgg_a/conv3/conv3_2/biases', + 'vgg_a/conv4/conv4_1/weights', + 'vgg_a/conv4/conv4_1/biases', + 'vgg_a/conv4/conv4_2/weights', + 'vgg_a/conv4/conv4_2/biases', + 'vgg_a/conv5/conv5_1/weights', + 'vgg_a/conv5/conv5_1/biases', + 'vgg_a/conv5/conv5_2/weights', + 'vgg_a/conv5/conv5_2/biases', + 'vgg_a/fc6/weights', + 'vgg_a/fc6/biases', + 'vgg_a/fc7/weights', + 'vgg_a/fc7/biases', + 'vgg_a/fc8/weights', + 'vgg_a/fc8/biases', + ] + model_variables = [v.op.name for v in slim.get_model_variables()] + self.assertSetEqual(set(model_variables), set(expected_names)) + + def testEvaluation(self): + batch_size = 2 + height, width = 224, 224 + num_classes = 1000 + with self.test_session(): + eval_inputs = tf.random_uniform((batch_size, height, width, 3)) + logits, _ = vgg.vgg_a(eval_inputs, is_training=False) + self.assertListEqual(logits.get_shape().as_list(), + [batch_size, num_classes]) + predictions = tf.argmax(logits, 1) + self.assertListEqual(predictions.get_shape().as_list(), [batch_size]) + + def testTrainEvalWithReuse(self): + train_batch_size = 2 + eval_batch_size = 1 + train_height, train_width = 224, 224 + eval_height, eval_width = 256, 256 + num_classes = 1000 + with self.test_session(): + train_inputs = tf.random_uniform( + (train_batch_size, train_height, train_width, 3)) + logits, _ = vgg.vgg_a(train_inputs) + self.assertListEqual(logits.get_shape().as_list(), + [train_batch_size, num_classes]) + tf.get_variable_scope().reuse_variables() + eval_inputs = tf.random_uniform( + (eval_batch_size, eval_height, eval_width, 3)) + logits, _ = vgg.vgg_a(eval_inputs, is_training=False, + spatial_squeeze=False) + self.assertListEqual(logits.get_shape().as_list(), + [eval_batch_size, 2, 2, num_classes]) + logits = tf.reduce_mean(logits, [1, 2]) + predictions = tf.argmax(logits, 1) + self.assertEquals(predictions.get_shape().as_list(), [eval_batch_size]) + + def testForward(self): + batch_size = 1 + height, width = 224, 224 + with self.test_session() as sess: + inputs = tf.random_uniform((batch_size, height, width, 3)) + logits, _ = vgg.vgg_a(inputs) + sess.run(tf.initialize_all_variables()) + output = sess.run(logits) + self.assertTrue(output.any()) + + +class VGG16Test(tf.test.TestCase): + + def testBuild(self): + batch_size = 5 + height, width = 224, 224 + num_classes = 1000 + with self.test_session(): + inputs = tf.random_uniform((batch_size, height, width, 3)) + logits, _ = vgg.vgg_16(inputs, num_classes) + self.assertEquals(logits.op.name, 'vgg_16/fc8/squeezed') + self.assertListEqual(logits.get_shape().as_list(), + [batch_size, num_classes]) + + def testFullyConvolutional(self): + batch_size = 1 + height, width = 256, 256 + num_classes = 1000 + with self.test_session(): + inputs = tf.random_uniform((batch_size, height, width, 3)) + logits, _ = vgg.vgg_16(inputs, num_classes, spatial_squeeze=False) + self.assertEquals(logits.op.name, 'vgg_16/fc8/BiasAdd') + self.assertListEqual(logits.get_shape().as_list(), + [batch_size, 2, 2, num_classes]) + + def testEndPoints(self): + batch_size = 5 + height, width = 224, 224 + num_classes = 1000 + with self.test_session(): + inputs = tf.random_uniform((batch_size, height, width, 3)) + _, end_points = vgg.vgg_16(inputs, num_classes) + expected_names = ['vgg_16/conv1/conv1_1', + 'vgg_16/conv1/conv1_2', + 'vgg_16/pool1', + 'vgg_16/conv2/conv2_1', + 'vgg_16/conv2/conv2_2', + 'vgg_16/pool2', + 'vgg_16/conv3/conv3_1', + 'vgg_16/conv3/conv3_2', + 'vgg_16/conv3/conv3_3', + 'vgg_16/pool3', + 'vgg_16/conv4/conv4_1', + 'vgg_16/conv4/conv4_2', + 'vgg_16/conv4/conv4_3', + 'vgg_16/pool4', + 'vgg_16/conv5/conv5_1', + 'vgg_16/conv5/conv5_2', + 'vgg_16/conv5/conv5_3', + 'vgg_16/pool5', + 'vgg_16/fc6', + 'vgg_16/fc7', + 'vgg_16/fc8' + ] + self.assertSetEqual(set(end_points.keys()), set(expected_names)) + + def testModelVariables(self): + batch_size = 5 + height, width = 224, 224 + num_classes = 1000 + with self.test_session(): + inputs = tf.random_uniform((batch_size, height, width, 3)) + vgg.vgg_16(inputs, num_classes) + expected_names = ['vgg_16/conv1/conv1_1/weights', + 'vgg_16/conv1/conv1_1/biases', + 'vgg_16/conv1/conv1_2/weights', + 'vgg_16/conv1/conv1_2/biases', + 'vgg_16/conv2/conv2_1/weights', + 'vgg_16/conv2/conv2_1/biases', + 'vgg_16/conv2/conv2_2/weights', + 'vgg_16/conv2/conv2_2/biases', + 'vgg_16/conv3/conv3_1/weights', + 'vgg_16/conv3/conv3_1/biases', + 'vgg_16/conv3/conv3_2/weights', + 'vgg_16/conv3/conv3_2/biases', + 'vgg_16/conv3/conv3_3/weights', + 'vgg_16/conv3/conv3_3/biases', + 'vgg_16/conv4/conv4_1/weights', + 'vgg_16/conv4/conv4_1/biases', + 'vgg_16/conv4/conv4_2/weights', + 'vgg_16/conv4/conv4_2/biases', + 'vgg_16/conv4/conv4_3/weights', + 'vgg_16/conv4/conv4_3/biases', + 'vgg_16/conv5/conv5_1/weights', + 'vgg_16/conv5/conv5_1/biases', + 'vgg_16/conv5/conv5_2/weights', + 'vgg_16/conv5/conv5_2/biases', + 'vgg_16/conv5/conv5_3/weights', + 'vgg_16/conv5/conv5_3/biases', + 'vgg_16/fc6/weights', + 'vgg_16/fc6/biases', + 'vgg_16/fc7/weights', + 'vgg_16/fc7/biases', + 'vgg_16/fc8/weights', + 'vgg_16/fc8/biases', + ] + model_variables = [v.op.name for v in slim.get_model_variables()] + self.assertSetEqual(set(model_variables), set(expected_names)) + + def testEvaluation(self): + batch_size = 2 + height, width = 224, 224 + num_classes = 1000 + with self.test_session(): + eval_inputs = tf.random_uniform((batch_size, height, width, 3)) + logits, _ = vgg.vgg_16(eval_inputs, is_training=False) + self.assertListEqual(logits.get_shape().as_list(), + [batch_size, num_classes]) + predictions = tf.argmax(logits, 1) + self.assertListEqual(predictions.get_shape().as_list(), [batch_size]) + + def testTrainEvalWithReuse(self): + train_batch_size = 2 + eval_batch_size = 1 + train_height, train_width = 224, 224 + eval_height, eval_width = 256, 256 + num_classes = 1000 + with self.test_session(): + train_inputs = tf.random_uniform( + (train_batch_size, train_height, train_width, 3)) + logits, _ = vgg.vgg_16(train_inputs) + self.assertListEqual(logits.get_shape().as_list(), + [train_batch_size, num_classes]) + tf.get_variable_scope().reuse_variables() + eval_inputs = tf.random_uniform( + (eval_batch_size, eval_height, eval_width, 3)) + logits, _ = vgg.vgg_16(eval_inputs, is_training=False, + spatial_squeeze=False) + self.assertListEqual(logits.get_shape().as_list(), + [eval_batch_size, 2, 2, num_classes]) + logits = tf.reduce_mean(logits, [1, 2]) + predictions = tf.argmax(logits, 1) + self.assertEquals(predictions.get_shape().as_list(), [eval_batch_size]) + + def testForward(self): + batch_size = 1 + height, width = 224, 224 + with self.test_session() as sess: + inputs = tf.random_uniform((batch_size, height, width, 3)) + logits, _ = vgg.vgg_16(inputs) + sess.run(tf.initialize_all_variables()) + output = sess.run(logits) + self.assertTrue(output.any()) + + +class VGG19Test(tf.test.TestCase): + + def testBuild(self): + batch_size = 5 + height, width = 224, 224 + num_classes = 1000 + with self.test_session(): + inputs = tf.random_uniform((batch_size, height, width, 3)) + logits, _ = vgg.vgg_19(inputs, num_classes) + self.assertEquals(logits.op.name, 'vgg_19/fc8/squeezed') + self.assertListEqual(logits.get_shape().as_list(), + [batch_size, num_classes]) + + def testFullyConvolutional(self): + batch_size = 1 + height, width = 256, 256 + num_classes = 1000 + with self.test_session(): + inputs = tf.random_uniform((batch_size, height, width, 3)) + logits, _ = vgg.vgg_19(inputs, num_classes, spatial_squeeze=False) + self.assertEquals(logits.op.name, 'vgg_19/fc8/BiasAdd') + self.assertListEqual(logits.get_shape().as_list(), + [batch_size, 2, 2, num_classes]) + + def testEndPoints(self): + batch_size = 5 + height, width = 224, 224 + num_classes = 1000 + with self.test_session(): + inputs = tf.random_uniform((batch_size, height, width, 3)) + _, end_points = vgg.vgg_19(inputs, num_classes) + expected_names = [ + 'vgg_19/conv1/conv1_1', + 'vgg_19/conv1/conv1_2', + 'vgg_19/pool1', + 'vgg_19/conv2/conv2_1', + 'vgg_19/conv2/conv2_2', + 'vgg_19/pool2', + 'vgg_19/conv3/conv3_1', + 'vgg_19/conv3/conv3_2', + 'vgg_19/conv3/conv3_3', + 'vgg_19/conv3/conv3_4', + 'vgg_19/pool3', + 'vgg_19/conv4/conv4_1', + 'vgg_19/conv4/conv4_2', + 'vgg_19/conv4/conv4_3', + 'vgg_19/conv4/conv4_4', + 'vgg_19/pool4', + 'vgg_19/conv5/conv5_1', + 'vgg_19/conv5/conv5_2', + 'vgg_19/conv5/conv5_3', + 'vgg_19/conv5/conv5_4', + 'vgg_19/pool5', + 'vgg_19/fc6', + 'vgg_19/fc7', + 'vgg_19/fc8' + ] + self.assertSetEqual(set(end_points.keys()), set(expected_names)) + + def testModelVariables(self): + batch_size = 5 + height, width = 224, 224 + num_classes = 1000 + with self.test_session(): + inputs = tf.random_uniform((batch_size, height, width, 3)) + vgg.vgg_19(inputs, num_classes) + expected_names = [ + 'vgg_19/conv1/conv1_1/weights', + 'vgg_19/conv1/conv1_1/biases', + 'vgg_19/conv1/conv1_2/weights', + 'vgg_19/conv1/conv1_2/biases', + 'vgg_19/conv2/conv2_1/weights', + 'vgg_19/conv2/conv2_1/biases', + 'vgg_19/conv2/conv2_2/weights', + 'vgg_19/conv2/conv2_2/biases', + 'vgg_19/conv3/conv3_1/weights', + 'vgg_19/conv3/conv3_1/biases', + 'vgg_19/conv3/conv3_2/weights', + 'vgg_19/conv3/conv3_2/biases', + 'vgg_19/conv3/conv3_3/weights', + 'vgg_19/conv3/conv3_3/biases', + 'vgg_19/conv3/conv3_4/weights', + 'vgg_19/conv3/conv3_4/biases', + 'vgg_19/conv4/conv4_1/weights', + 'vgg_19/conv4/conv4_1/biases', + 'vgg_19/conv4/conv4_2/weights', + 'vgg_19/conv4/conv4_2/biases', + 'vgg_19/conv4/conv4_3/weights', + 'vgg_19/conv4/conv4_3/biases', + 'vgg_19/conv4/conv4_4/weights', + 'vgg_19/conv4/conv4_4/biases', + 'vgg_19/conv5/conv5_1/weights', + 'vgg_19/conv5/conv5_1/biases', + 'vgg_19/conv5/conv5_2/weights', + 'vgg_19/conv5/conv5_2/biases', + 'vgg_19/conv5/conv5_3/weights', + 'vgg_19/conv5/conv5_3/biases', + 'vgg_19/conv5/conv5_4/weights', + 'vgg_19/conv5/conv5_4/biases', + 'vgg_19/fc6/weights', + 'vgg_19/fc6/biases', + 'vgg_19/fc7/weights', + 'vgg_19/fc7/biases', + 'vgg_19/fc8/weights', + 'vgg_19/fc8/biases', + ] + model_variables = [v.op.name for v in slim.get_model_variables()] + self.assertSetEqual(set(model_variables), set(expected_names)) + + def testEvaluation(self): + batch_size = 2 + height, width = 224, 224 + num_classes = 1000 + with self.test_session(): + eval_inputs = tf.random_uniform((batch_size, height, width, 3)) + logits, _ = vgg.vgg_19(eval_inputs, is_training=False) + self.assertListEqual(logits.get_shape().as_list(), + [batch_size, num_classes]) + predictions = tf.argmax(logits, 1) + self.assertListEqual(predictions.get_shape().as_list(), [batch_size]) + + def testTrainEvalWithReuse(self): + train_batch_size = 2 + eval_batch_size = 1 + train_height, train_width = 224, 224 + eval_height, eval_width = 256, 256 + num_classes = 1000 + with self.test_session(): + train_inputs = tf.random_uniform( + (train_batch_size, train_height, train_width, 3)) + logits, _ = vgg.vgg_19(train_inputs) + self.assertListEqual(logits.get_shape().as_list(), + [train_batch_size, num_classes]) + tf.get_variable_scope().reuse_variables() + eval_inputs = tf.random_uniform( + (eval_batch_size, eval_height, eval_width, 3)) + logits, _ = vgg.vgg_19(eval_inputs, is_training=False, + spatial_squeeze=False) + self.assertListEqual(logits.get_shape().as_list(), + [eval_batch_size, 2, 2, num_classes]) + logits = tf.reduce_mean(logits, [1, 2]) + predictions = tf.argmax(logits, 1) + self.assertEquals(predictions.get_shape().as_list(), [eval_batch_size]) + + def testForward(self): + batch_size = 1 + height, width = 224, 224 + with self.test_session() as sess: + inputs = tf.random_uniform((batch_size, height, width, 3)) + logits, _ = vgg.vgg_19(inputs) + sess.run(tf.initialize_all_variables()) + output = sess.run(logits) + self.assertTrue(output.any()) + +if __name__ == '__main__': + tf.test.main() diff --git a/StyleMigration/preprocessing/__init__.py b/StyleMigration/preprocessing/__init__.py new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/StyleMigration/preprocessing/__init__.py @@ -0,0 +1 @@ + diff --git a/StyleMigration/preprocessing/cifarnet_preprocessing.py b/StyleMigration/preprocessing/cifarnet_preprocessing.py new file mode 100644 index 0000000..01e11d3 --- /dev/null +++ b/StyleMigration/preprocessing/cifarnet_preprocessing.py @@ -0,0 +1,114 @@ +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Provides utilities to preprocess images in CIFAR-10. + +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import tensorflow.compat.v1 as tf +tf.disable_v2_behavior() +import tf_slim as slim + +_PADDING = 4 + + +def preprocess_for_train(image, + output_height, + output_width, + padding=_PADDING): + """Preprocesses the given image for training. + + Note that the actual resizing scale is sampled from + [`resize_size_min`, `resize_size_max`]. + + Args: + image: A `Tensor` representing an image of arbitrary size. + output_height: The height of the image after preprocessing. + output_width: The width of the image after preprocessing. + padding: The amound of padding before and after each dimension of the image. + + Returns: + A preprocessed image. + """ + tf.image_summary('image', tf.expand_dims(image, 0)) + + # Transform the image to floats. + image = tf.to_float(image) + if padding > 0: + image = tf.pad(image, [[padding, padding], [padding, padding], [0, 0]]) + # Randomly crop a [height, width] section of the image. + distorted_image = tf.random_crop(image, + [output_height, output_width, 3]) + + # Randomly flip the image horizontally. + distorted_image = tf.image.random_flip_left_right(distorted_image) + + tf.image_summary('distorted_image', tf.expand_dims(distorted_image, 0)) + + # Because these operations are not commutative, consider randomizing + # the order their operation. + distorted_image = tf.image.random_brightness(distorted_image, + max_delta=63) + distorted_image = tf.image.random_contrast(distorted_image, + lower=0.2, upper=1.8) + # Subtract off the mean and divide by the variance of the pixels. + return tf.image.per_image_whitening(distorted_image) + + +def preprocess_for_eval(image, output_height, output_width): + """Preprocesses the given image for evaluation. + + Args: + image: A `Tensor` representing an image of arbitrary size. + output_height: The height of the image after preprocessing. + output_width: The width of the image after preprocessing. + + Returns: + A preprocessed image. + """ + tf.image_summary('image', tf.expand_dims(image, 0)) + # Transform the image to floats. + image = tf.to_float(image) + + # Resize and crop if needed. + resized_image = tf.image.resize_image_with_crop_or_pad(image, + output_width, + output_height) + tf.image_summary('resized_image', tf.expand_dims(resized_image, 0)) + + # Subtract off the mean and divide by the variance of the pixels. + return tf.image.per_image_whitening(resized_image) + + +def preprocess_image(image, output_height, output_width, is_training=False): + """Preprocesses the given image. + + Args: + image: A `Tensor` representing an image of arbitrary size. + output_height: The height of the image after preprocessing. + output_width: The width of the image after preprocessing. + is_training: `True` if we're preprocessing the image for training and + `False` otherwise. + + Returns: + A preprocessed image. + """ + if is_training: + return preprocess_for_train(image, output_height, output_width) + else: + return preprocess_for_eval(image, output_height, output_width) diff --git a/StyleMigration/preprocessing/inception_preprocessing.py b/StyleMigration/preprocessing/inception_preprocessing.py new file mode 100644 index 0000000..2a89530 --- /dev/null +++ b/StyleMigration/preprocessing/inception_preprocessing.py @@ -0,0 +1,305 @@ +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Provides utilities to preprocess images for the Inception networks.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import tensorflow.compat.v1 as tf +tf.disable_v2_behavior() + +from tensorflow.python.ops import control_flow_ops + + +def apply_with_random_selector(x, func, num_cases): + """Computes func(x, sel), with sel sampled from [0...num_cases-1]. + + Args: + x: input Tensor. + func: Python function to apply. + num_cases: Python int32, number of cases to sample sel from. + + Returns: + The result of func(x, sel), where func receives the value of the + selector as a python integer, but sel is sampled dynamically. + """ + sel = tf.random_uniform([], maxval=num_cases, dtype=tf.int32) + # Pass the real x only to one of the func calls. + return control_flow_ops.merge([ + func(control_flow_ops.switch(x, tf.equal(sel, case))[1], case) + for case in range(num_cases)])[0] + + +def distort_color(image, color_ordering=0, fast_mode=True, scope=None): + """Distort the color of a Tensor image. + + Each color distortion is non-commutative and thus ordering of the color ops + matters. Ideally we would randomly permute the ordering of the color ops. + Rather then adding that level of complication, we select a distinct ordering + of color ops for each preprocessing thread. + + Args: + image: 3-D Tensor containing single image in [0, 1]. + color_ordering: Python int, a type of distortion (valid values: 0-3). + fast_mode: Avoids slower ops (random_hue and random_contrast) + scope: Optional scope for name_scope. + Returns: + 3-D Tensor color-distorted image on range [0, 1] + Raises: + ValueError: if color_ordering not in [0, 3] + """ + with tf.name_scope(scope, 'distort_color', [image]): + if fast_mode: + if color_ordering == 0: + image = tf.image.random_brightness(image, max_delta=32. / 255.) + image = tf.image.random_saturation(image, lower=0.5, upper=1.5) + else: + image = tf.image.random_saturation(image, lower=0.5, upper=1.5) + image = tf.image.random_brightness(image, max_delta=32. / 255.) + else: + if color_ordering == 0: + image = tf.image.random_brightness(image, max_delta=32. / 255.) + image = tf.image.random_saturation(image, lower=0.5, upper=1.5) + image = tf.image.random_hue(image, max_delta=0.2) + image = tf.image.random_contrast(image, lower=0.5, upper=1.5) + elif color_ordering == 1: + image = tf.image.random_saturation(image, lower=0.5, upper=1.5) + image = tf.image.random_brightness(image, max_delta=32. / 255.) + image = tf.image.random_contrast(image, lower=0.5, upper=1.5) + image = tf.image.random_hue(image, max_delta=0.2) + elif color_ordering == 2: + image = tf.image.random_contrast(image, lower=0.5, upper=1.5) + image = tf.image.random_hue(image, max_delta=0.2) + image = tf.image.random_brightness(image, max_delta=32. / 255.) + image = tf.image.random_saturation(image, lower=0.5, upper=1.5) + elif color_ordering == 3: + image = tf.image.random_hue(image, max_delta=0.2) + image = tf.image.random_saturation(image, lower=0.5, upper=1.5) + image = tf.image.random_contrast(image, lower=0.5, upper=1.5) + image = tf.image.random_brightness(image, max_delta=32. / 255.) + else: + raise ValueError('color_ordering must be in [0, 3]') + + # The random_* ops do not necessarily clamp. + return tf.clip_by_value(image, 0.0, 1.0) + + +def distorted_bounding_box_crop(image, + bbox, + min_object_covered=0.1, + aspect_ratio_range=(0.75, 1.33), + area_range=(0.05, 1.0), + max_attempts=100, + scope=None): + """Generates cropped_image using a one of the bboxes randomly distorted. + + See `tf.image.sample_distorted_bounding_box` for more documentation. + + Args: + image: 3-D Tensor of image (it will be converted to floats in [0, 1]). + bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords] + where each coordinate is [0, 1) and the coordinates are arranged + as [ymin, xmin, ymax, xmax]. If num_boxes is 0 then it would use the whole + image. + min_object_covered: An optional `float`. Defaults to `0.1`. The cropped + area of the image must contain at least this fraction of any bounding box + supplied. + aspect_ratio_range: An optional list of `floats`. The cropped area of the + image must have an aspect ratio = width / height within this range. + area_range: An optional list of `floats`. The cropped area of the image + must contain a fraction of the supplied image within in this range. + max_attempts: An optional `int`. Number of attempts at generating a cropped + region of the image of the specified constraints. After `max_attempts` + failures, return the entire image. + scope: Optional scope for name_scope. + Returns: + A tuple, a 3-D Tensor cropped_image and the distorted bbox + """ + with tf.name_scope(scope, 'distorted_bounding_box_crop', [image, bbox]): + # Each bounding box has shape [1, num_boxes, box coords] and + # the coordinates are ordered [ymin, xmin, ymax, xmax]. + + # A large fraction of image datasets contain a human-annotated bounding + # box delineating the region of the image containing the object of interest. + # We choose to create a new bounding box for the object which is a randomly + # distorted version of the human-annotated bounding box that obeys an + # allowed range of aspect ratios, sizes and overlap with the human-annotated + # bounding box. If no box is supplied, then we assume the bounding box is + # the entire image. + sample_distorted_bounding_box = tf.image.sample_distorted_bounding_box( + tf.shape(image), + bounding_boxes=bbox, + min_object_covered=min_object_covered, + aspect_ratio_range=aspect_ratio_range, + area_range=area_range, + max_attempts=max_attempts, + use_image_if_no_bounding_boxes=True) + bbox_begin, bbox_size, distort_bbox = sample_distorted_bounding_box + + # Crop the image to the specified bounding box. + cropped_image = tf.slice(image, bbox_begin, bbox_size) + return cropped_image, distort_bbox + + +def preprocess_for_train(image, height, width, bbox, + fast_mode=True, + scope=None): + """Distort one image for training a network. + + Distorting images provides a useful technique for augmenting the data + set during training in order to make the network invariant to aspects + of the image that do not effect the label. + + Additionally it would create image_summaries to display the different + transformations applied to the image. + + Args: + image: 3-D Tensor of image. If dtype is tf.float32 then the range should be + [0, 1], otherwise it would converted to tf.float32 assuming that the range + is [0, MAX], where MAX is largest positive representable number for + int(8/16/32) data type (see `tf.image.convert_image_dtype` for details). + height: integer + width: integer + bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords] + where each coordinate is [0, 1) and the coordinates are arranged + as [ymin, xmin, ymax, xmax]. + fast_mode: Optional boolean, if True avoids slower transformations (i.e. + bi-cubic resizing, random_hue or random_contrast). + scope: Optional scope for name_scope. + Returns: + 3-D float Tensor of distorted image used for training with range [-1, 1]. + """ + with tf.name_scope(scope, 'distort_image', [image, height, width, bbox]): + if bbox is None: + bbox = tf.constant([0.0, 0.0, 1.0, 1.0], + dtype=tf.float32, + shape=[1, 1, 4]) + if image.dtype != tf.float32: + image = tf.image.convert_image_dtype(image, dtype=tf.float32) + # Each bounding box has shape [1, num_boxes, box coords] and + # the coordinates are ordered [ymin, xmin, ymax, xmax]. + image_with_box = tf.image.draw_bounding_boxes(tf.expand_dims(image, 0), + bbox) + tf.image_summary('image_with_bounding_boxes', image_with_box) + + distorted_image, distorted_bbox = distorted_bounding_box_crop(image, bbox) + # Restore the shape since the dynamic slice based upon the bbox_size loses + # the third dimension. + distorted_image.set_shape([None, None, 3]) + image_with_distorted_box = tf.image.draw_bounding_boxes( + tf.expand_dims(image, 0), distorted_bbox) + tf.image_summary('images_with_distorted_bounding_box', + image_with_distorted_box) + + # This resizing operation may distort the images because the aspect + # ratio is not respected. We select a resize method in a round robin + # fashion based on the thread number. + # Note that ResizeMethod contains 4 enumerated resizing methods. + + # We select only 1 case for fast_mode bilinear. + num_resize_cases = 1 if fast_mode else 4 + distorted_image = apply_with_random_selector( + distorted_image, + lambda x, method: tf.image.resize_images(x, [height, width], method=method), + num_cases=num_resize_cases) + + tf.image_summary('cropped_resized_image', + tf.expand_dims(distorted_image, 0)) + + # Randomly flip the image horizontally. + distorted_image = tf.image.random_flip_left_right(distorted_image) + + # Randomly distort the colors. There are 4 ways to do it. + distorted_image = apply_with_random_selector( + distorted_image, + lambda x, ordering: distort_color(x, ordering, fast_mode), + num_cases=4) + + tf.image_summary('final_distorted_image', + tf.expand_dims(distorted_image, 0)) + distorted_image = tf.sub(distorted_image, 0.5) + distorted_image = tf.mul(distorted_image, 2.0) + return distorted_image + + +def preprocess_for_eval(image, height, width, + central_fraction=0.875, scope=None): + """Prepare one image for evaluation. + + If height and width are specified it would output an image with that size by + applying resize_bilinear. + + If central_fraction is specified it would cropt the central fraction of the + input image. + + Args: + image: 3-D Tensor of image. If dtype is tf.float32 then the range should be + [0, 1], otherwise it would converted to tf.float32 assuming that the range + is [0, MAX], where MAX is largest positive representable number for + int(8/16/32) data type (see `tf.image.convert_image_dtype` for details) + height: integer + width: integer + central_fraction: Optional Float, fraction of the image to crop. + scope: Optional scope for name_scope. + Returns: + 3-D float Tensor of prepared image. + """ + with tf.name_scope(scope, 'eval_image', [image, height, width]): + if image.dtype != tf.float32: + image = tf.image.convert_image_dtype(image, dtype=tf.float32) + # Crop the central region of the image with an area containing 87.5% of + # the original image. + if central_fraction: + image = tf.image.central_crop(image, central_fraction=central_fraction) + + if height and width: + # Resize the image to the specified height and width. + image = tf.expand_dims(image, 0) + image = tf.image.resize_bilinear(image, [height, width], + align_corners=False) + image = tf.squeeze(image, [0]) + image = tf.sub(image, 0.5) + image = tf.mul(image, 2.0) + return image + + +def preprocess_image(image, height, width, + is_training=False, + bbox=None, + fast_mode=True): + """Pre-process one image for training or evaluation. + + Args: + image: 3-D Tensor [height, width, channels] with the image. + height: integer, image expected height. + width: integer, image expected width. + is_training: Boolean. If true it would transform an image for train, + otherwise it would transform it for evaluation. + bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords] + where each coordinate is [0, 1) and the coordinates are arranged as + [ymin, xmin, ymax, xmax]. + fast_mode: Optional boolean, if True avoids slower transformations. + + Returns: + 3-D float Tensor containing an appropriately scaled image + + Raises: + ValueError: if user does not provide bounding box + """ + if is_training: + return preprocess_for_train(image, height, width, bbox, fast_mode) + else: + return preprocess_for_eval(image, height, width) diff --git a/StyleMigration/preprocessing/lenet_preprocessing.py b/StyleMigration/preprocessing/lenet_preprocessing.py new file mode 100644 index 0000000..7732a39 --- /dev/null +++ b/StyleMigration/preprocessing/lenet_preprocessing.py @@ -0,0 +1,44 @@ +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Provides utilities for preprocessing.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import tensorflow.compat.v1 as tf +tf.disable_v2_behavior() +import tf_slim as slim + + +def preprocess_image(image, output_height, output_width, is_training): + """Preprocesses the given image. + + Args: + image: A `Tensor` representing an image of arbitrary size. + output_height: The height of the image after preprocessing. + output_width: The width of the image after preprocessing. + is_training: `True` if we're preprocessing the image for training and + `False` otherwise. + + Returns: + A preprocessed image. + """ + image = tf.to_float(image) + image = tf.image.resize_image_with_crop_or_pad( + image, output_width, output_height) + image = tf.sub(image, 128.0) + image = tf.div(image, 128.0) + return image diff --git a/StyleMigration/preprocessing/preprocessing_factory.py b/StyleMigration/preprocessing/preprocessing_factory.py new file mode 100644 index 0000000..c6d3e37 --- /dev/null +++ b/StyleMigration/preprocessing/preprocessing_factory.py @@ -0,0 +1,76 @@ +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Contains a factory for building various models.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import tensorflow.compat.v1 as tf +tf.disable_v2_behavior() +import tf_slim as slim + +from preprocessing import cifarnet_preprocessing +from preprocessing import inception_preprocessing +from preprocessing import lenet_preprocessing +from preprocessing import vgg_preprocessing + + +def get_preprocessing(name, is_training=False): + """Returns preprocessing_fn(image, height, width, **kwargs). + + Args: + name: The name of the preprocessing function. + is_training: `True` if the model is being used for training and `False` + otherwise. + + Returns: + preprocessing_fn: A function that preprocessing a single image (pre-batch). + It has the following signature: + image = preprocessing_fn(image, output_height, output_width, ...). + + Raises: + ValueError: If Preprocessing `name` is not recognized. + """ + preprocessing_fn_map = { + 'cifarnet': cifarnet_preprocessing, + 'inception': inception_preprocessing, + 'inception_v1': inception_preprocessing, + 'inception_v2': inception_preprocessing, + 'inception_v3': inception_preprocessing, + 'inception_v4': inception_preprocessing, + 'inception_resnet_v2': inception_preprocessing, + 'lenet': lenet_preprocessing, + 'resnet_v1_50': vgg_preprocessing, + 'resnet_v1_101': vgg_preprocessing, + 'resnet_v1_152': vgg_preprocessing, + 'vgg': vgg_preprocessing, + 'vgg_a': vgg_preprocessing, + 'vgg_16': vgg_preprocessing, + 'vgg_19': vgg_preprocessing, + } + + if name not in preprocessing_fn_map: + raise ValueError('Preprocessing name [%s] was not recognized' % name) + + def preprocessing_fn(image, output_height, output_width, **kwargs): + return preprocessing_fn_map[name].preprocess_image( + image, output_height, output_width, is_training=is_training, **kwargs) + + def unprocessing_fn(image, **kwargs): + return preprocessing_fn_map[name].unprocess_image( + image, **kwargs) + + return preprocessing_fn, unprocessing_fn diff --git a/StyleMigration/preprocessing/vgg_preprocessing.py b/StyleMigration/preprocessing/vgg_preprocessing.py new file mode 100644 index 0000000..b067718 --- /dev/null +++ b/StyleMigration/preprocessing/vgg_preprocessing.py @@ -0,0 +1,393 @@ +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Provides utilities to preprocess images. + +The preprocessing steps for VGG were introduced in the following technical +report: + + Very Deep Convolutional Networks For Large-Scale Image Recognition + Karen Simonyan and Andrew Zisserman + arXiv technical report, 2015 + PDF: http://arxiv.org/pdf/1409.1556.pdf + ILSVRC 2014 Slides: http://www.robots.ox.ac.uk/~karen/pdf/ILSVRC_2014.pdf + CC-BY-4.0 + +More information can be obtained from the VGG website: +www.robots.ox.ac.uk/~vgg/research/very_deep/ +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import tensorflow.compat.v1 as tf +tf.disable_v2_behavior() +import tf_slim as slim + +from tensorflow.python.ops import control_flow_ops + + +_R_MEAN = 123.68 +_G_MEAN = 116.78 +_B_MEAN = 103.94 + +_RESIZE_SIDE_MIN = 256 +_RESIZE_SIDE_MAX = 512 + + +def _crop(image, offset_height, offset_width, crop_height, crop_width): + """Crops the given image using the provided offsets and sizes. + + Note that the method doesn't assume we know the input image size but it does + assume we know the input image rank. + + Args: + image: an image of shape [height, width, channels]. + offset_height: a scalar tensor indicating the height offset. + offset_width: a scalar tensor indicating the width offset. + crop_height: the height of the cropped image. + crop_width: the width of the cropped image. + + Returns: + the cropped (and resized) image. + + Raises: + InvalidArgumentError: if the rank is not 3 or if the image dimensions are + less than the crop size. + """ + original_shape = tf.shape(image) + + rank_assertion = tf.Assert( + tf.equal(tf.rank(image), 3), + ['Rank of image must be equal to 3.']) + cropped_shape = control_flow_ops.with_dependencies( + [rank_assertion], + tf.stack([crop_height, crop_width, original_shape[2]])) + + # print(original_shape[0], crop_height) + # print(original_shape[1], crop_width) + size_assertion = tf.Assert( + tf.logical_and( + tf.greater_equal(original_shape[0], crop_height), + tf.greater_equal(original_shape[1], crop_width)), + ['Crop size greater than the image size.']) + + offsets = tf.to_int32(tf.stack([offset_height, offset_width, 0])) + + # Use tf.slice instead of crop_to_bounding box as it accepts tensors to + # define the crop size. + image = control_flow_ops.with_dependencies( + [size_assertion], + tf.slice(image, offsets, cropped_shape)) + return tf.reshape(image, cropped_shape) + + +def _random_crop(image_list, crop_height, crop_width): + """Crops the given list of images. + + The function applies the same crop to each image in the list. This can be + effectively applied when there are multiple image inputs of the same + dimension such as: + + image, depths, normals = _random_crop([image, depths, normals], 120, 150) + + Args: + image_list: a list of image tensors of the same dimension but possibly + varying channel. + crop_height: the new height. + crop_width: the new width. + + Returns: + the image_list with cropped images. + + Raises: + ValueError: if there are multiple image inputs provided with different size + or the images are smaller than the crop dimensions. + """ + if not image_list: + raise ValueError('Empty image_list.') + + # Compute the rank assertions. + rank_assertions = [] + for i in range(len(image_list)): + image_rank = tf.rank(image_list[i]) + rank_assert = tf.Assert( + tf.equal(image_rank, 3), + ['Wrong rank for tensor %s [expected] [actual]', + image_list[i].name, 3, image_rank]) + rank_assertions.append(rank_assert) + + image_shape = control_flow_ops.with_dependencies( + [rank_assertions[0]], + tf.shape(image_list[0])) + image_height = image_shape[0] + image_width = image_shape[1] + crop_size_assert = tf.Assert( + tf.logical_and( + tf.greater_equal(image_height, crop_height), + tf.greater_equal(image_width, crop_width)), + ['Crop size greater than the image size.']) + + asserts = [rank_assertions[0], crop_size_assert] + + for i in range(1, len(image_list)): + image = image_list[i] + asserts.append(rank_assertions[i]) + shape = control_flow_ops.with_dependencies([rank_assertions[i]], + tf.shape(image)) + height = shape[0] + width = shape[1] + + height_assert = tf.Assert( + tf.equal(height, image_height), + ['Wrong height for tensor %s [expected][actual]', + image.name, height, image_height]) + width_assert = tf.Assert( + tf.equal(width, image_width), + ['Wrong width for tensor %s [expected][actual]', + image.name, width, image_width]) + asserts.extend([height_assert, width_assert]) + + # Create a random bounding box. + # + # Use tf.random_uniform and not numpy.random.rand as doing the former would + # generate random numbers at graph eval time, unlike the latter which + # generates random numbers at graph definition time. + max_offset_height = control_flow_ops.with_dependencies( + asserts, tf.reshape(image_height - crop_height + 1, [])) + max_offset_width = control_flow_ops.with_dependencies( + asserts, tf.reshape(image_width - crop_width + 1, [])) + offset_height = tf.random_uniform( + [], maxval=max_offset_height, dtype=tf.int32) + offset_width = tf.random_uniform( + [], maxval=max_offset_width, dtype=tf.int32) + + return [_crop(image, offset_height, offset_width, + crop_height, crop_width) for image in image_list] + + +def _central_crop(image_list, crop_height, crop_width): + """Performs central crops of the given image list. + + Args: + image_list: a list of image tensors of the same dimension but possibly + varying channel. + crop_height: the height of the image following the crop. + crop_width: the width of the image following the crop. + + Returns: + the list of cropped images. + """ + outputs = [] + for image in image_list: + image_height = tf.shape(image)[0] + image_width = tf.shape(image)[1] + + offset_height = (image_height - crop_height) / 2 + offset_width = (image_width - crop_width) / 2 + outputs.append(_crop(image, offset_height, offset_width, + crop_height, crop_width)) + return outputs + + +def _mean_image_subtraction(image, means): + """Subtracts the given means from each image channel. + + For example: + means = [123.68, 116.779, 103.939] + image = _mean_image_subtraction(image, means) + + Note that the rank of `image` must be known. + + Args: + image: a tensor of size [height, width, C]. + means: a C-vector of values to subtract from each channel. + + Returns: + the centered image. + + Raises: + ValueError: If the rank of `image` is unknown, if `image` has a rank other + than three or if the number of channels in `image` doesn't match the + number of values in `means`. + """ + if image.get_shape().ndims != 3: + raise ValueError('Input must be of size [height, width, C>0]') + num_channels = image.get_shape().as_list()[-1] + if len(means) != num_channels: + raise ValueError('len(means) must match the number of channels') + + channels = tf.split(image, num_channels, 2) + for i in range(num_channels): + channels[i] -= means[i] + return tf.concat(channels, 2) + + +def _mean_image_add(image, means): + if image.get_shape().ndims != 3: + raise ValueError('Input must be of size [height, width, C>0]') + num_channels = image.get_shape().as_list()[-1] + if len(means) != num_channels: + raise ValueError('len(means) must match the number of channels') + + channels = tf.split(image, num_channels, 2) + for i in range(num_channels): + channels[i] += means[i] + return tf.concat(channels, 2) + + +def _smallest_size_at_least(height, width, target_height, target_width): + """Computes new shape with the smallest side equal to `smallest_side`. + + Computes new shape with the smallest side equal to `smallest_side` while + preserving the original aspect ratio. + + Args: + height: an int32 scalar tensor indicating the current height. + width: an int32 scalar tensor indicating the current width. + smallest_side: A python integer or scalar `Tensor` indicating the size of + the smallest side after resize. + + Returns: + new_height: an int32 scalar tensor indicating the new height. + new_width: and int32 scalar tensor indicating the new width. + """ + target_height = tf.convert_to_tensor(target_height, dtype=tf.int32) + target_width = tf.convert_to_tensor(target_width, dtype=tf.int32) + + height = tf.to_float(height) + width = tf.to_float(width) + target_height = tf.to_float(target_height) + target_width = tf.to_float(target_width) + + scale = tf.cond(tf.greater(target_height / height, target_width / width), + lambda: target_height / height, + lambda: target_width / width) + new_height = tf.to_int32(tf.round(height * scale)) + new_width = tf.to_int32(tf.round(width * scale)) + return new_height, new_width + + +def _aspect_preserving_resize(image, target_height, target_width): + """Resize images preserving the original aspect ratio. + + Args: + image: A 3-D image `Tensor`. + smallest_side: A python integer or scalar `Tensor` indicating the size of + the smallest side after resize. + + Returns: + resized_image: A 3-D tensor containing the resized image. + """ + target_height = tf.convert_to_tensor(target_height, dtype=tf.int32) + target_width = tf.convert_to_tensor(target_width, dtype=tf.int32) + + shape = tf.shape(image) + height = shape[0] + width = shape[1] + new_height, new_width = _smallest_size_at_least(height, width, target_height, target_width) + image = tf.expand_dims(image, 0) + resized_image = tf.image.resize_bilinear(image, [new_height, new_width], + align_corners=False) + resized_image = tf.squeeze(resized_image) + resized_image.set_shape([None, None, 3]) + return resized_image + + +def preprocess_for_train(image, + output_height, + output_width, + resize_side_min=_RESIZE_SIDE_MIN, + resize_side_max=_RESIZE_SIDE_MAX): + """Preprocesses the given image for training. + + Note that the actual resizing scale is sampled from + [`resize_size_min`, `resize_size_max`]. + + Args: + image: A `Tensor` representing an image of arbitrary size. + output_height: The height of the image after preprocessing. + output_width: The width of the image after preprocessing. + resize_side_min: The lower bound for the smallest side of the image for + aspect-preserving resizing. + resize_side_max: The upper bound for the smallest side of the image for + aspect-preserving resizing. + + Returns: + A preprocessed image. + """ + resize_side = tf.random_uniform( + [], minval=resize_side_min, maxval=resize_side_max + 1, dtype=tf.int32) + + image = _aspect_preserving_resize(image, resize_side) + image = _random_crop([image], output_height, output_width)[0] + image.set_shape([output_height, output_width, 3]) + image = tf.to_float(image) + image = tf.image.random_flip_left_right(image) + return _mean_image_subtraction(image, [_R_MEAN, _G_MEAN, _B_MEAN]) + + +def preprocess_for_eval(image, output_height, output_width, resize_side): + """Preprocesses the given image for evaluation. + + Args: + image: A `Tensor` representing an image of arbitrary size. + output_height: The height of the image after preprocessing. + output_width: The width of the image after preprocessing. + + Returns: + A preprocessed image. + """ + image = _aspect_preserving_resize(image, output_height, output_width) + image = _central_crop([image], output_height, output_width)[0] + # image = tf.image.resize_image_with_crop_or_pad(image, output_height, output_width) + image.set_shape([output_height, output_width, 3]) + image = tf.to_float(image) + return _mean_image_subtraction(image, [_R_MEAN, _G_MEAN, _B_MEAN]) + + +def preprocess_image(image, output_height, output_width, is_training=False, + resize_side_min=_RESIZE_SIDE_MIN, + resize_side_max=_RESIZE_SIDE_MAX, + ): + """Preprocesses the given image. + + Args: + image: A `Tensor` representing an image of arbitrary size. + output_height: The height of the image after preprocessing. + output_width: The width of the image after preprocessing. + is_training: `True` if we're preprocessing the image for training and + `False` otherwise. + resize_side_min: The lower bound for the smallest side of the image for + aspect-preserving resizing. If `is_training` is `False`, then this value + is used for rescaling. + resize_side_max: The upper bound for the smallest side of the image for + aspect-preserving resizing. If `is_training` is `False`, this value is + ignored. Otherwise, the resize side is sampled from + [resize_size_min, resize_size_max]. + + Returns: + A preprocessed image. + """ + if is_training: + return preprocess_for_train(image, output_height, output_width, + resize_side_min, resize_side_max) + else: + return preprocess_for_eval(image, output_height, output_width, + resize_side_min) + + +def unprocess_image(image): + return _mean_image_add(image, [_R_MEAN, _G_MEAN, _B_MEAN]) diff --git a/StyleMigration/python b/StyleMigration/python new file mode 100644 index 0000000..e69de29 diff --git a/StyleMigration/reader.py b/StyleMigration/reader.py new file mode 100644 index 0000000..e1efee1 --- /dev/null +++ b/StyleMigration/reader.py @@ -0,0 +1,27 @@ +from os import listdir +from os.path import isfile, join +import tensorflow.compat.v1 as tf +tf.disable_v2_behavior() + + +def get_image(path, height, width, preprocess_fn): + png = path.lower().endswith('png') + img_bytes = tf.read_file(path) + image = tf.image.decode_png(img_bytes, channels=3) if png else tf.image.decode_jpeg(img_bytes, channels=3) + return preprocess_fn(image, height, width) + + +def image(batch_size, height, width, path, preprocess_fn, epochs=2, shuffle=True): + filenames = [join(path, f) for f in listdir(path) if isfile(join(path, f))] + if not shuffle: + filenames = sorted(filenames) + + png = filenames[0].lower().endswith('png') # If first file is a png, assume they all are + + filename_queue = tf.train.string_input_producer(filenames, shuffle=shuffle, num_epochs=epochs) + reader = tf.WholeFileReader() + _, img_bytes = reader.read(filename_queue) + image = tf.image.decode_png(img_bytes, channels=3) if png else tf.image.decode_jpeg(img_bytes, channels=3) + + processed_image = preprocess_fn(image, height, width) + return tf.train.batch([processed_image], batch_size, dynamic_pad=True) diff --git a/StyleMigration/train.py b/StyleMigration/train.py new file mode 100644 index 0000000..ad05fb6 --- /dev/null +++ b/StyleMigration/train.py @@ -0,0 +1,146 @@ +# coding: utf-8 +from __future__ import print_function +from __future__ import division +import tensorflow as tf +from nets import nets_factory +from preprocessing import preprocessing_factory +import reader +import model +import time +import losses +import utils +import os +import argparse + +slim = tf.contrib.slim + + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument('-c', '--conf', default='conf/mosaic.yml', help='the path to the conf file') + return parser.parse_args() + + +def main(FLAGS): + style_features_t = losses.get_style_features(FLAGS) + + # Make sure the training path exists. + training_path = os.path.join(FLAGS.model_path, FLAGS.naming) + if not(os.path.exists(training_path)): + os.makedirs(training_path) + + with tf.Graph().as_default(): + with tf.Session() as sess: + """Build Network""" + network_fn = nets_factory.get_network_fn( + FLAGS.loss_model, + num_classes=1, + is_training=False) + + image_preprocessing_fn, image_unprocessing_fn = preprocessing_factory.get_preprocessing( + FLAGS.loss_model, + is_training=False) + processed_images = reader.image(FLAGS.batch_size, FLAGS.image_size, FLAGS.image_size, + 'train2014/', image_preprocessing_fn, epochs=FLAGS.epoch) + generated = model.net(processed_images, training=True) + processed_generated = [image_preprocessing_fn(image, FLAGS.image_size, FLAGS.image_size) + for image in tf.unstack(generated, axis=0, num=FLAGS.batch_size) + ] + processed_generated = tf.stack(processed_generated) + _, endpoints_dict = network_fn(tf.concat([processed_generated, processed_images], 0), spatial_squeeze=False) + + # Log the structure of loss network + tf.logging.info('Loss network layers(You can define them in "content_layers" and "style_layers"):') + for key in endpoints_dict: + tf.logging.info(key) + + """Build Losses""" + content_loss = losses.content_loss(endpoints_dict, FLAGS.content_layers) + style_loss, style_loss_summary = losses.style_loss(endpoints_dict, style_features_t, FLAGS.style_layers) + tv_loss = losses.total_variation_loss(generated) # use the unprocessed image + + loss = FLAGS.style_weight * style_loss + FLAGS.content_weight * content_loss + FLAGS.tv_weight * tv_loss + + # Add Summary for visualization in tensorboard. + """Add Summary""" + tf.summary.scalar('losses/content_loss', content_loss) + tf.summary.scalar('losses/style_loss', style_loss) + tf.summary.scalar('losses/regularizer_loss', tv_loss) + + tf.summary.scalar('weighted_losses/weighted_content_loss', content_loss * FLAGS.content_weight) + tf.summary.scalar('weighted_losses/weighted_style_loss', style_loss * FLAGS.style_weight) + tf.summary.scalar('weighted_losses/weighted_regularizer_loss', tv_loss * FLAGS.tv_weight) + tf.summary.scalar('total_loss', loss) + + for layer in FLAGS.style_layers: + tf.summary.scalar('style_losses/' + layer, style_loss_summary[layer]) + tf.summary.image('generated', generated) + # tf.image_summary('processed_generated', processed_generated) # May be better? + tf.summary.image('origin', tf.stack([ + image_unprocessing_fn(image) for image in tf.unstack(processed_images, axis=0, num=FLAGS.batch_size) + ])) + summary = tf.summary.merge_all() + writer = tf.summary.FileWriter(training_path) + + """Prepare to Train""" + global_step = tf.Variable(0, name="global_step", trainable=False) + + variable_to_train = [] + for variable in tf.trainable_variables(): + if not(variable.name.startswith(FLAGS.loss_model)): + variable_to_train.append(variable) + train_op = tf.train.AdamOptimizer(1e-3).minimize(loss, global_step=global_step, var_list=variable_to_train) + + variables_to_restore = [] + for v in tf.global_variables(): + if not(v.name.startswith(FLAGS.loss_model)): + variables_to_restore.append(v) + saver = tf.train.Saver(variables_to_restore, write_version=tf.train.SaverDef.V1) + + sess.run([tf.global_variables_initializer(), tf.local_variables_initializer()]) + + # Restore variables for loss network. + init_func = utils._get_init_fn(FLAGS) + init_func(sess) + + # Restore variables for training model if the checkpoint file exists. + last_file = tf.train.latest_checkpoint(training_path) + if last_file: + tf.logging.info('Restoring model from {}'.format(last_file)) + saver.restore(sess, last_file) + + """Start Training""" + coord = tf.train.Coordinator() + threads = tf.train.start_queue_runners(coord=coord) + start_time = time.time() + try: + while not coord.should_stop(): + _, loss_t, step = sess.run([train_op, loss, global_step]) + elapsed_time = time.time() - start_time + start_time = time.time() + """logging""" + # print(step) + if step % 10 == 0: + tf.logging.info('step: %d, total Loss %f, secs/step: %f' % (step, loss_t, elapsed_time)) + """summary""" + if step % 25 == 0: + tf.logging.info('adding summary...') + summary_str = sess.run(summary) + writer.add_summary(summary_str, step) + writer.flush() + """checkpoint""" + if step % 1000 == 0: + saver.save(sess, os.path.join(training_path, 'fast-style-model.ckpt'), global_step=step) + except tf.errors.OutOfRangeError: + saver.save(sess, os.path.join(training_path, 'fast-style-model.ckpt-done')) + tf.logging.info('Done training -- epoch limit reached') + finally: + coord.request_stop() + coord.join(threads) + + +if __name__ == '__main__': + tf.logging.set_verbosity(tf.logging.INFO) + args = parse_args() + FLAGS = utils.read_conf_file(args.conf) + main(FLAGS) diff --git a/StyleMigration/utils.py b/StyleMigration/utils.py new file mode 100644 index 0000000..845ec3f --- /dev/null +++ b/StyleMigration/utils.py @@ -0,0 +1,66 @@ +import tensorflow as tf +import yaml + +slim = tf.contrib.slim + + +def _get_init_fn(FLAGS): + """ + This function is copied from TF slim. + + Returns a function run by the chief worker to warm-start the training. + + Note that the init_fn is only run when initializing the model during the very + first global step. + + Returns: + An init function run by the supervisor. + """ + tf.logging.info('Use pretrained model %s' % FLAGS.loss_model_file) + + exclusions = [] + if FLAGS.checkpoint_exclude_scopes: + exclusions = [scope.strip() + for scope in FLAGS.checkpoint_exclude_scopes.split(',')] + + # TODO(sguada) variables.filter_variables() + variables_to_restore = [] + for var in slim.get_model_variables(): + excluded = False + for exclusion in exclusions: + if var.op.name.startswith(exclusion): + excluded = True + break + if not excluded: + variables_to_restore.append(var) + + return slim.assign_from_checkpoint_fn( + FLAGS.loss_model_file, + variables_to_restore, + ignore_missing_vars=True) + + +class Flag(object): + def __init__(self, **entries): + self.__dict__.update(entries) + + +def read_conf_file(conf_file): + with open(conf_file) as f: + FLAGS = Flag(**yaml.load(f)) + return FLAGS + + +def mean_image_subtraction(image, means): + image = tf.to_float(image) + + num_channels = 3 + channels = tf.split(image, num_channels, 2) + for i in range(num_channels): + channels[i] -= means[i] + return tf.concat(channels, 2) + + +if __name__ == '__main__': + f = read_conf_file('conf/mosaic.yml') + print(f.loss_model_file) diff --git a/ThresholdSegmentation/AdaptiveThresholding/code/自适应阈值.py b/ThresholdSegmentation/AdaptiveThresholding/code/自适应阈值.py new file mode 100644 index 0000000..5653a25 --- /dev/null +++ b/ThresholdSegmentation/AdaptiveThresholding/code/自适应阈值.py @@ -0,0 +1,20 @@ +import cv2 +from matplotlib import pyplot as plt +# 载入原图 +img = cv2.imread('D:/Python/ThresholdSegmentation/AdaptiveThresholding/img/book.jpg', 0) + +# 全局阈值分割 +retval, img_global = cv2.threshold(img, 127, 255, cv2.THRESH_BINARY) + +# 自适应阈值分割 +img_ada_mean = cv2.adaptiveThreshold(img, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 15, 3) +img_ada_gaussian = cv2.adaptiveThreshold(img, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 15, 3) + +# 显示图像 +titles = ['Original Image', 'Global Thresholding (v = 127)', 'Adaptive Mean Thresholding', 'Adaptive Gaussian Thresholding'] +images = [img, img_global, img_ada_mean, img_ada_gaussian] +for i in range(4): + plt.subplot(2, 2, i+1), plt.imshow(images[i], 'gray') + plt.title(titles[i]) + plt.xticks([]), plt.yticks([]) +plt.show() diff --git a/ThresholdSegmentation/AdaptiveThresholding/img/book.jpg b/ThresholdSegmentation/AdaptiveThresholding/img/book.jpg new file mode 100644 index 0000000..375da1e Binary files /dev/null and b/ThresholdSegmentation/AdaptiveThresholding/img/book.jpg differ diff --git a/ThresholdSegmentation/AdaptiveThresholding/result/result10.png b/ThresholdSegmentation/AdaptiveThresholding/result/result10.png new file mode 100644 index 0000000..b72671d Binary files /dev/null and b/ThresholdSegmentation/AdaptiveThresholding/result/result10.png differ diff --git a/ThresholdSegmentation/GlobalThresholding/code/简单全局阈值.py b/ThresholdSegmentation/GlobalThresholding/code/简单全局阈值.py new file mode 100644 index 0000000..aac84d8 --- /dev/null +++ b/ThresholdSegmentation/GlobalThresholding/code/简单全局阈值.py @@ -0,0 +1,20 @@ +import cv2 +from matplotlib import pyplot as plt +# 载入原图 +img = cv2.imread('D:/Python/ThresholdSegmentation/GlobalThresholding/img/castle.jpg', 0) + +# 全局阈值分割 +ret, thresh1 = cv2.threshold(img, 127, 255, cv2.THRESH_BINARY) +ret, thresh2 = cv2.threshold(img, 127, 255, cv2.THRESH_BINARY_INV) +ret, thresh3 = cv2.threshold(img, 127, 255, cv2.THRESH_TRUNC) +ret, thresh4 = cv2.threshold(img, 127, 255, cv2.THRESH_TOZERO) +ret, thresh5 = cv2.threshold(img, 127, 255, cv2.THRESH_TOZERO_INV) + +# 显示图像 +titles = ['Original Image', 'BINARY', 'BINARY_INV', 'TRUNC', 'TOZERO', 'TOZERO_INV'] +images = [img, thresh1, thresh2, thresh3, thresh4, thresh5] +for i in range(6): + plt.subplot(2, 3, i+1), plt.imshow(images[i], 'gray') + plt.title(titles[i]) + plt.xticks([]), plt.yticks([]) +plt.show() diff --git a/ThresholdSegmentation/GlobalThresholding/img/castle.jpg b/ThresholdSegmentation/GlobalThresholding/img/castle.jpg new file mode 100644 index 0000000..edf7318 Binary files /dev/null and b/ThresholdSegmentation/GlobalThresholding/img/castle.jpg differ diff --git a/ThresholdSegmentation/GlobalThresholding/result/result11.png b/ThresholdSegmentation/GlobalThresholding/result/result11.png new file mode 100644 index 0000000..60b9fdc Binary files /dev/null and b/ThresholdSegmentation/GlobalThresholding/result/result11.png differ diff --git a/ThresholdSegmentation/OtsuThresholding/code/Otsu.py b/ThresholdSegmentation/OtsuThresholding/code/Otsu.py new file mode 100644 index 0000000..2962d5c --- /dev/null +++ b/ThresholdSegmentation/OtsuThresholding/code/Otsu.py @@ -0,0 +1,24 @@ +import cv2 +from matplotlib import pyplot as plt +# 载入原图 +img = cv2.imread('D:/Python/ThresholdSegmentation/OtsuThresholding/img/person.jpg', 0) + +# 全局阈值分割 +ret, img_global = cv2.threshold(img, 127, 255, cv2.THRESH_BINARY) + +# 大津法 +blur = cv2.GaussianBlur(img, (5, 5), 0) +ret, img_Otsu = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU) + +# 显示图片 +images = [img, 0, img_global, blur, 0, img_Otsu] +titles = ['原始图像', '直方图', '全局阈值分割(127)', '原始图像', '直方图(降噪处理)', 'OTSU分割'] +plt.rcParams['font.sans-serif'] = ['SimHei'] +for i in range(2): + plt.subplot(2, 3, i*3+1), plt.imshow(images[i*3], 'gray') + plt.title(titles[i*3]), plt.xticks([]), plt.yticks([]) + plt.subplot(2, 3, i*3+2), plt.hist(images[i*3].ravel(), 256) + plt.title(titles[i*3+1]), plt.xticks([]), plt.yticks([]) + plt.subplot(2, 3, i*3+3), plt.imshow(images[i*3+2], 'gray') + plt.title(titles[i*3+2]), plt.xticks([]), plt.yticks([]) +plt.show() diff --git a/ThresholdSegmentation/OtsuThresholding/img/person.jpg b/ThresholdSegmentation/OtsuThresholding/img/person.jpg new file mode 100644 index 0000000..59b4375 Binary files /dev/null and b/ThresholdSegmentation/OtsuThresholding/img/person.jpg differ diff --git a/ThresholdSegmentation/OtsuThresholding/result/result12.png b/ThresholdSegmentation/OtsuThresholding/result/result12.png new file mode 100644 index 0000000..05bb2ab Binary files /dev/null and b/ThresholdSegmentation/OtsuThresholding/result/result12.png differ