@ -0,0 +1,67 @@
|
||||
import cv2
|
||||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
# 读取图像
|
||||
img = cv2.imread('D:/Python/EdgeDetection/img/person.jpg')
|
||||
img_RGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # 转成RGB 方便后面显示
|
||||
|
||||
# 灰度化处理图像
|
||||
grayImage = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
|
||||
|
||||
# 阈值处理
|
||||
ret, binary = cv2.threshold(grayImage, 127, 255, cv2.THRESH_BINARY)
|
||||
|
||||
# Roberts算子
|
||||
kernelx = np.array([[-1, 0], [0, 1]], dtype=int)
|
||||
kernely = np.array([[0, -1], [1, 0]], dtype=int)
|
||||
x = cv2.filter2D(binary, cv2.CV_16S, kernelx) # 掩模运算
|
||||
y = cv2.filter2D(binary, cv2.CV_16S, kernely)
|
||||
absX = cv2.convertScaleAbs(x)
|
||||
absY = cv2.convertScaleAbs(y)
|
||||
Roberts = cv2.addWeighted(absX, 0.5, absY, 0.5, 0) # 图像融合
|
||||
|
||||
# Prewitt算子
|
||||
kernelx = np.array([[1, 1, 1], [0, 0, 0], [-1, -1, -1]], dtype=int)
|
||||
kernely = np.array([[-1, 0, 1], [-1, 0, 1], [-1, 0, 1]], dtype=int)
|
||||
x = cv2.filter2D(binary, cv2.CV_16S, kernelx)
|
||||
y = cv2.filter2D(binary, cv2.CV_16S, kernely)
|
||||
absX = cv2.convertScaleAbs(x)
|
||||
absY = cv2.convertScaleAbs(y)
|
||||
Prewitt = cv2.addWeighted(absX, 0.5, absY, 0.5, 0)
|
||||
|
||||
# Sobel算子
|
||||
x = cv2.Sobel(binary, cv2.CV_16S, 1, 0)
|
||||
y = cv2.Sobel(binary, cv2.CV_16S, 0, 1)
|
||||
absX = cv2.convertScaleAbs(x)
|
||||
absY = cv2.convertScaleAbs(y)
|
||||
Sobel = cv2.addWeighted(absX, 0.5, absY, 0.5, 0)
|
||||
|
||||
# Laplacian算子
|
||||
dst = cv2.Laplacian(binary, cv2.CV_16S, ksize=3)
|
||||
Laplacian = cv2.convertScaleAbs(dst)
|
||||
|
||||
# 高斯滤波(去噪)
|
||||
gaussianBlur = cv2.GaussianBlur(grayImage, (3, 3), 0)
|
||||
ret, binary = cv2.threshold(gaussianBlur, 127, 255, cv2.THRESH_BINARY)
|
||||
|
||||
# LOG算子
|
||||
dst = cv2.Laplacian(binary, cv2.CV_16S, ksize=3)
|
||||
LOG = cv2.convertScaleAbs(dst)
|
||||
|
||||
# Canny算子
|
||||
Canny = cv2.Canny(gaussianBlur, 50, 150)
|
||||
|
||||
# 用来正常显示中文标签
|
||||
plt.rcParams['font.sans-serif'] = ['SimHei']
|
||||
|
||||
# 显示图形
|
||||
plt.subplot(241), plt.imshow(img_RGB), plt.title('原始图像'), plt.axis('off') # 坐标轴关闭
|
||||
plt.subplot(242), plt.imshow(binary, cmap=plt.cm.gray), plt.title('二值图'), plt.axis('off')
|
||||
plt.subplot(243), plt.imshow(Roberts, cmap=plt.cm.gray), plt.title('Roberts算子'), plt.axis('off')
|
||||
plt.subplot(244), plt.imshow(Prewitt, cmap=plt.cm.gray), plt.title('Prewitt算子'), plt.axis('off')
|
||||
plt.subplot(245), plt.imshow(Sobel, cmap=plt.cm.gray), plt.title('Sobel算子'), plt.axis('off')
|
||||
plt.subplot(246), plt.imshow(Laplacian, cmap=plt.cm.gray), plt.title('Laplacian算子'), plt.axis('off')
|
||||
plt.subplot(247), plt.imshow(LOG, cmap=plt.cm.gray), plt.title('LOG算子'), plt.axis('off')
|
||||
plt.subplot(248), plt.imshow(Canny, cmap=plt.cm.gray), plt.title('Canny算子'), plt.axis('off')
|
||||
plt.show()
|
||||
|
After Width: | Height: | Size: 304 KiB |
|
After Width: | Height: | Size: 287 KiB |
@ -0,0 +1,83 @@
|
||||
import numpy as np
|
||||
import cv2 as cv
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
|
||||
def frequency_filter(image, filter):
|
||||
"""
|
||||
:param image:
|
||||
:param filter: 频域变换函数
|
||||
:return:
|
||||
"""
|
||||
fftImg = np.fft.fft2(image) # 对图像进行傅里叶变换
|
||||
fftImgShift = np.fft.fftshift(fftImg) # 傅里叶变换后坐标移动到图像中心
|
||||
handle_fftImgShift1 = fftImgShift*filter # 对傅里叶变换后的图像进行频域变换
|
||||
|
||||
handle_fftImgShift2 = np.fft.ifftshift(handle_fftImgShift1)
|
||||
handle_fftImgShift3 = np.fft.ifft2(handle_fftImgShift2)
|
||||
handle_fftImgShift4 = np.real(handle_fftImgShift3) # 傅里叶反变换后取频域
|
||||
return np.uint8(handle_fftImgShift4)
|
||||
|
||||
|
||||
# 理想低通滤波器
|
||||
def ILPF(image, d0, n):
|
||||
H = np.empty_like(image, dtype=float)
|
||||
M, N = image.shape
|
||||
mid_x = int(M/2)
|
||||
mid_y = int(N/2)
|
||||
for y in range(0, M):
|
||||
for x in range(0, N):
|
||||
d = np.sqrt((x - mid_x) ** 2 + (y - mid_y) ** 2)
|
||||
if d <= d0:
|
||||
H[y, x] = 1**n
|
||||
else:
|
||||
H[y, x] = 0**n
|
||||
return H
|
||||
|
||||
|
||||
# 巴特沃斯低通滤波器
|
||||
def BLPF(image, d0, n):
|
||||
H = np.empty_like(image, float)
|
||||
M, N = image.shape
|
||||
mid_x = int(M/2)
|
||||
mid_y = int(N/2)
|
||||
for y in range(0, M):
|
||||
for x in range(0, N):
|
||||
d = np.sqrt((x - mid_x) ** 2 + (y - mid_y) ** 2)
|
||||
H[y, x] = 1/(1+(d/d0)**(n))
|
||||
return H
|
||||
|
||||
|
||||
# 高斯低通滤波器
|
||||
def GLPF(image, d0, n):
|
||||
H = np.empty_like(image, float)
|
||||
M, N = image.shape
|
||||
mid_x = M/2
|
||||
mid_y = N/2
|
||||
for x in range(0, M):
|
||||
for y in range(0, N):
|
||||
d = np.sqrt((x - mid_x)**2 + (y - mid_y) ** 2)
|
||||
H[x, y] = np.exp(-d**n/(2*d0**n))
|
||||
return H
|
||||
|
||||
|
||||
# 读取图像
|
||||
image = cv.imread('D:/Python/FrequencyDomainProcessing/img/moon.jpg')
|
||||
img_RGB = cv.cvtColor(image, cv.COLOR_BGR2RGB) # 转成RGB 方便后面显示
|
||||
grayImage = cv.cvtColor(img_RGB, cv.COLOR_BGR2GRAY)
|
||||
|
||||
result2 = frequency_filter(grayImage, ILPF(grayImage, 60, n=1))
|
||||
result3 = frequency_filter(grayImage, BLPF(grayImage, 60, n=2))
|
||||
result4 = frequency_filter(grayImage, BLPF(grayImage, 90, n=2))
|
||||
result5 = frequency_filter(grayImage, GLPF(grayImage, 60, n=2))
|
||||
result6 = frequency_filter(grayImage, GLPF(grayImage, 90, n=2))
|
||||
|
||||
# 显示图形
|
||||
plt.rcParams['font.sans-serif'] = ['SimHei']
|
||||
plt.subplot(321), plt.imshow(grayImage, cmap=plt.cm.gray), plt.title('原始图像'), plt.axis('off') # 坐标轴关闭
|
||||
plt.subplot(322), plt.imshow(result2, cmap=plt.cm.gray), plt.title('理想低通滤波(D0=60)'), plt.axis('off')
|
||||
plt.subplot(323), plt.imshow(result3, cmap=plt.cm.gray), plt.title('Butterwoth低通(D0=60,n=2)'), plt.axis('off')
|
||||
plt.subplot(324), plt.imshow(result4, cmap=plt.cm.gray), plt.title('Butterwoth低通(D0=90,n=2)'), plt.axis('off')
|
||||
plt.subplot(325), plt.imshow(result5, cmap=plt.cm.gray), plt.title('Gauss低通(D0=60,n=2)'), plt.axis('off')
|
||||
plt.subplot(326), plt.imshow(result6, cmap=plt.cm.gray), plt.title('Gauss低通(D0=90,n=2)'), plt.axis('off')
|
||||
plt.show()
|
||||
|
After Width: | Height: | Size: 248 KiB |
|
After Width: | Height: | Size: 313 KiB |
@ -0,0 +1,43 @@
|
||||
import cv2
|
||||
import numpy as np
|
||||
from matplotlib import pyplot as plt
|
||||
|
||||
# 读取图片
|
||||
img = cv2.imread("D:/Python/GeometricTransformation/img/person.jpg", cv2.IMREAD_UNCHANGED)
|
||||
src = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
|
||||
rows, cols = src.shape[:2]
|
||||
|
||||
# 图像缩放
|
||||
result1 = cv2.resize(src, None, fx=0.1, fy=0.1)
|
||||
result2 = cv2.resize(src, (int(cols*10), int(rows*10)))
|
||||
|
||||
# 图像旋转
|
||||
M1 = cv2.getRotationMatrix2D((cols / 2, rows / 2), 45, 1)
|
||||
result3 = cv2.warpAffine(src, M1, (cols, rows))
|
||||
M2 = cv2.getRotationMatrix2D((cols / 2, rows / 2), 90, 1)
|
||||
result4 = cv2.warpAffine(src, M2, (cols, rows))
|
||||
M3 = cv2.getRotationMatrix2D((cols / 2, rows / 2), 225, 1)
|
||||
result5 = cv2.warpAffine(src, M3, (cols, rows))
|
||||
|
||||
# 图像翻转
|
||||
result6 = cv2.flip(src, 0)
|
||||
result7 = cv2.flip(src, 1)
|
||||
result8 = cv2.flip(src, -1)
|
||||
|
||||
# 图像平移
|
||||
M = np.float32([[1, 0, 0], [0, 1, 300]])
|
||||
result9 = cv2.warpAffine(src, M, (cols, rows))
|
||||
M = np.float32([[1, 0, 0], [0, 1, -300]])
|
||||
result10 = cv2.warpAffine(src, M, (cols, rows))
|
||||
M = np.float32([[1, 0, 300], [0, 1, 0]])
|
||||
result11 = cv2.warpAffine(src, M, (cols, rows))
|
||||
|
||||
# 显示图形
|
||||
titles = ['原始图像', '图像缩小(10倍)', '图像放大(10倍)', '图像旋转(45°)', '图像旋转(90°)', '图像旋转(225°)', '图像翻转(X轴)', '图像翻转(Y轴)', '图像翻转(原点)', '图像平移(向下)', '图像平移(向上)', '图像平移(向右)']
|
||||
images = [src, result1, result2, result3, result4, result5, result6, result7, result8, result9, result10, result11]
|
||||
plt.rcParams['font.sans-serif'] = ['SimHei']
|
||||
for i in range(12):
|
||||
plt.subplot(4, 3, i + 1), plt.imshow(images[i], 'gray')
|
||||
plt.title(titles[i], fontsize=8)
|
||||
plt.xticks([]), plt.yticks([])
|
||||
plt.show()
|
||||
|
After Width: | Height: | Size: 1.2 MiB |
|
After Width: | Height: | Size: 634 KiB |
|
After Width: | Height: | Size: 222 KiB |
|
After Width: | Height: | Size: 825 KiB |
@ -0,0 +1,50 @@
|
||||
import cv2
|
||||
import numpy as np
|
||||
from matplotlib import pyplot as plt
|
||||
|
||||
# 图像加法
|
||||
img = cv2.imread('D:/Python/ImageOperation/img/boy.jpg', 1)
|
||||
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
|
||||
m = np.ones(img.shape, dtype="uint8")*100
|
||||
result_add = cv2.add(img, m)
|
||||
# 图像透视变换
|
||||
height, width = img.shape[:2]
|
||||
pts1 = np.float32([[119, 189], [1700, 189], [400, 1747], [1541, 1747]]) # 变成一个大头娃娃
|
||||
pts2 = np.float32([[0, 0], [width, 0], [0, height], [width, height]])
|
||||
matrix = cv2.getPerspectiveTransform(pts1, pts2)
|
||||
result_warp = cv2.warpPerspective(img, matrix, (width, height))
|
||||
|
||||
# 图像融合
|
||||
tmp1 = cv2.imread('D:/Python/1.2-Image operation/img/beauty.jpg', 1)
|
||||
img1 = cv2.cvtColor(tmp1, cv2.COLOR_BGR2RGB)
|
||||
tmp2 = cv2.imread('D:/Python/1.2-Image operation/img/rose.jpg', 1)
|
||||
img2 = cv2.cvtColor(tmp2, cv2.COLOR_BGR2RGB)
|
||||
h, w, _ = img1.shape
|
||||
temp = cv2.resize(img2, (w, h), interpolation=cv2.INTER_AREA) # x轴对应的是宽度w,插值方法默认为双线性插值
|
||||
result_addW = cv2.addWeighted(img1, 0.6, temp, 0.4, 0)
|
||||
|
||||
# 图像与运算
|
||||
tmp3 = cv2.imread('D:/Python/1.2-Image operation/img/castle.jpg', cv2.IMREAD_GRAYSCALE)
|
||||
rows, cols = tmp3.shape[:2]
|
||||
circle = np.zeros((rows, cols), dtype="uint8")
|
||||
cv2.circle(circle, (int(rows/2), int(cols/2)), 600, 255, -1)
|
||||
result_and = cv2.bitwise_and(tmp3, circle)
|
||||
|
||||
# 图像或运算
|
||||
result_or = cv2.bitwise_or(tmp3, circle)
|
||||
|
||||
# 图像非运算
|
||||
result_not = cv2.bitwise_not(tmp3)
|
||||
|
||||
# 图像异或运算
|
||||
result_xor = cv2.bitwise_xor(tmp3, circle)
|
||||
|
||||
# 显示图像
|
||||
plt.rcParams['font.sans-serif'] = ['SimHei']
|
||||
titles = ['原图1', '图像加法(像素+100)', '透视变化', '原图2', '原图3', '图像融合', '原图4', '原图5', '与运算', '或运算', '非运算', '异或运算']
|
||||
images = [img, result_add, result_warp, img1, img2, result_addW, tmp3, circle, result_and, result_or, result_not, result_xor]
|
||||
for i in range(12):
|
||||
plt.subplot(4, 3, i+1), plt.imshow(images[i], 'gray')
|
||||
plt.title(titles[i], fontsize=10)
|
||||
plt.xticks([]), plt.yticks([])
|
||||
plt.show()
|
||||
|
After Width: | Height: | Size: 256 KiB |
|
After Width: | Height: | Size: 387 KiB |
|
After Width: | Height: | Size: 3.1 MiB |
|
After Width: | Height: | Size: 1.1 MiB |
|
After Width: | Height: | Size: 303 KiB |
@ -0,0 +1,40 @@
|
||||
import cv2
|
||||
import numpy as np
|
||||
from matplotlib import pyplot as plt
|
||||
# 读取图片
|
||||
src = cv2.imread("D:/Python/MorphologicalProcessing/img/CHA.jpg", cv2.IMREAD_UNCHANGED)
|
||||
|
||||
# 设置卷积核
|
||||
kernel = np.ones((3, 3), np.uint8)
|
||||
|
||||
# 腐蚀处理
|
||||
resutl_erode = cv2.erode(src, kernel)
|
||||
|
||||
# 膨胀处理
|
||||
result_dilate = cv2.dilate(src, kernel)
|
||||
|
||||
# 开运算
|
||||
result_open = cv2.morphologyEx(src, cv2.MORPH_OPEN, kernel)
|
||||
|
||||
# 闭运算
|
||||
result_close = cv2.morphologyEx(src, cv2.MORPH_CLOSE, kernel)
|
||||
|
||||
# 梯度运算
|
||||
result_gradient = cv2.morphologyEx(src, cv2.MORPH_GRADIENT, kernel)
|
||||
|
||||
# 顶帽运算
|
||||
result_tophat = cv2.morphologyEx(src, cv2.MORPH_TOPHAT, kernel)
|
||||
|
||||
# 黑帽运算
|
||||
result_blackhat = cv2.morphologyEx(src, cv2.MORPH_BLACKHAT, kernel)
|
||||
|
||||
# 显示图片
|
||||
images = [src, result_open, resutl_erode, result_dilate, result_close, result_gradient, result_tophat, result_blackhat]
|
||||
titles = ['原始图像', '腐蚀', '膨胀', '开运算', '闭运算', '梯度运算', '顶帽运算', '黑帽运算']
|
||||
plt.rcParams['font.sans-serif'] = ['SimHei']
|
||||
for i in range(2):
|
||||
plt.subplot(2, 4, i*4+1), plt.imshow(images[i*4], 'gray'), plt.title(titles[i*4]), plt.axis('off')
|
||||
plt.subplot(2, 4, i*4+2), plt.imshow(images[i*4+1], 'gray'), plt.title(titles[i*4+1]), plt.axis('off')
|
||||
plt.subplot(2, 4, i*4+3), plt.imshow(images[i*4+2], 'gray'), plt.title(titles[i*4+2]), plt.axis('off')
|
||||
plt.subplot(2, 4, i*4+4), plt.imshow(images[i*4+3], 'gray'), plt.title(titles[i*4+3]), plt.axis('off')
|
||||
plt.show()
|
||||
|
After Width: | Height: | Size: 38 KiB |
|
After Width: | Height: | Size: 110 KiB |
|
After Width: | Height: | Size: 1.9 MiB |
|
After Width: | Height: | Size: 1.5 MiB |
|
After Width: | Height: | Size: 1.7 MiB |
|
After Width: | Height: | Size: 287 KiB |
|
After Width: | Height: | Size: 401 KiB |
|
After Width: | Height: | Size: 371 KiB |
|
After Width: | Height: | Size: 407 KiB |
|
After Width: | Height: | Size: 313 KiB |
|
After Width: | Height: | Size: 634 KiB |
|
After Width: | Height: | Size: 825 KiB |
|
After Width: | Height: | Size: 303 KiB |
|
After Width: | Height: | Size: 110 KiB |
|
After Width: | Height: | Size: 390 KiB |
|
After Width: | Height: | Size: 277 KiB |
|
After Width: | Height: | Size: 506 KiB |
@ -0,0 +1,49 @@
|
||||
import cv2
|
||||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
# 线性灰度变换--图像反转
|
||||
img1 = cv2.imread('D:/Python/SpatialProcessing/Gray-scaleModification/img/castle.jpg', cv2.IMREAD_UNCHANGED)
|
||||
src = cv2.cvtColor(img1, cv2.COLOR_BGR2RGB)
|
||||
grayImage = cv2.cvtColor(src, cv2.COLOR_BGR2GRAY)
|
||||
height = grayImage.shape[0]
|
||||
width = grayImage.shape[1]
|
||||
result_rev = np.zeros((height, width), np.uint8)
|
||||
for i in range(height):
|
||||
for j in range(width):
|
||||
gray = 255 - grayImage[i, j]
|
||||
result_rev[i, j] = np.uint8(gray)
|
||||
|
||||
|
||||
# 非线性灰度变换--对数变换
|
||||
def log(c, img):
|
||||
output = c * np.log(1.0 + img)
|
||||
output = np.uint8(output + 0.5)
|
||||
return output
|
||||
|
||||
|
||||
result_log1 = log(25, src)
|
||||
result_log2 = log(45, src)
|
||||
|
||||
|
||||
# 非线性灰度变换--伽马变换
|
||||
def gamma(img, c, v):
|
||||
lut = np.zeros(256, dtype=np.float32)
|
||||
for i in range(256):
|
||||
lut[i] = c * i ** v
|
||||
output_img = cv2.LUT(img, lut)
|
||||
output_img = np.uint8(output_img+0.5)
|
||||
return output_img
|
||||
|
||||
|
||||
result_gamma1 = gamma(src, 1, 0.8)
|
||||
result_gamma2 = gamma(src, 1, 1.2)
|
||||
|
||||
# 显示图片
|
||||
images = [src, result_rev, result_log1, result_log2, result_gamma1, result_gamma2]
|
||||
titles = ['原始图像', '图像反转', '对数变化(c=25)', '对数变化(c=45)', '伽马变换(v=0.8)', '伽马变换(v=1.2)']
|
||||
plt.rcParams['font.sans-serif'] = ['SimHei']
|
||||
for i in range(3):
|
||||
plt.subplot(3, 2, i*2+1), plt.imshow(images[i*2], 'gray'), plt.title(titles[i*2]), plt.axis('off')
|
||||
plt.subplot(3, 2, i*2+2), plt.imshow(images[i*2+1], 'gray'), plt.title(titles[i*2+1]), plt.axis('off')
|
||||
plt.show()
|
||||
|
After Width: | Height: | Size: 1.6 MiB |
|
After Width: | Height: | Size: 390 KiB |
@ -0,0 +1,15 @@
|
||||
import cv2
|
||||
import matplotlib.pyplot as plt
|
||||
# 原始图像
|
||||
img_gray = cv2.imread('D:/Python/SpatialProcessing/HistogramModification/img/cake.jpg', cv2.IMREAD_GRAYSCALE)
|
||||
|
||||
# 直方图修正
|
||||
equ = cv2.equalizeHist(img_gray)
|
||||
|
||||
# 显示图像
|
||||
plt.rcParams['font.sans-serif'] = ['SimHei']
|
||||
plt.subplot(221), plt.imshow(img_gray, cmap=plt.cm.gray), plt.title('原始图像'), plt.axis('off')
|
||||
plt.subplot(222), plt.hist(img_gray.ravel(), 256), plt.title('灰度直方图')
|
||||
plt.subplot(223), plt.imshow(equ, cmap=plt.cm.gray), plt.title('修正图像'), plt.axis('off')
|
||||
plt.subplot(224), plt.hist(equ.ravel(), 256), plt.title('修正直方图')
|
||||
plt.show()
|
||||
|
After Width: | Height: | Size: 1.4 MiB |
|
After Width: | Height: | Size: 277 KiB |
@ -0,0 +1,25 @@
|
||||
import cv2
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
# 读取图片
|
||||
source = cv2.imread('D:/Python/SpatialProcessing/ImageSmoothing/img/moon.jpg', cv2.IMREAD_UNCHANGED)
|
||||
source = cv2.cvtColor(source, cv2.COLOR_BGR2RGB) # 转成RGB 方便后面显示
|
||||
|
||||
# 均值滤波
|
||||
result1 = cv2.blur(source, (7, 7))
|
||||
|
||||
# 中值滤波
|
||||
result2 = cv2.medianBlur(source, 5)
|
||||
|
||||
# 高斯滤波
|
||||
result3 = cv2.GaussianBlur(source, (5, 5), 0)
|
||||
|
||||
# 显示图形
|
||||
titles = ['原始图片', '均值滤波', '中值滤波', '高斯滤波']
|
||||
images = [source, result1, result2, result3]
|
||||
plt.rcParams['font.sans-serif'] = ['SimHei']
|
||||
for i in range(4):
|
||||
plt.subplot(2, 2, i + 1), plt.imshow(images[i], 'gray')
|
||||
plt.title(titles[i])
|
||||
plt.xticks([]), plt.yticks([])
|
||||
plt.show()
|
||||
|
After Width: | Height: | Size: 248 KiB |
|
After Width: | Height: | Size: 506 KiB |
@ -0,0 +1,99 @@
|
||||
# Byte-compiled / optimized / DLL files
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
*$py.class
|
||||
|
||||
# C extensions
|
||||
*.so
|
||||
|
||||
# Distribution / packaging
|
||||
.Python
|
||||
env/
|
||||
build/
|
||||
develop-eggs/
|
||||
dist/
|
||||
downloads/
|
||||
eggs/
|
||||
.eggs/
|
||||
lib/
|
||||
lib64/
|
||||
parts/
|
||||
sdist/
|
||||
var/
|
||||
*.egg-info/
|
||||
.installed.cfg
|
||||
*.egg
|
||||
|
||||
# PyInstaller
|
||||
# Usually these files are written by a python script from a template
|
||||
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
||||
*.manifest
|
||||
*.spec
|
||||
|
||||
# Installer logs
|
||||
pip-log.txt
|
||||
pip-delete-this-directory.txt
|
||||
|
||||
# Unit test / coverage reports
|
||||
htmlcov/
|
||||
.tox/
|
||||
.coverage
|
||||
.coverage.*
|
||||
.cache
|
||||
nosetests.xml
|
||||
coverage.xml
|
||||
*,cover
|
||||
.hypothesis/
|
||||
|
||||
# Translations
|
||||
*.mo
|
||||
*.pot
|
||||
|
||||
# Django stuff:
|
||||
*.log
|
||||
local_settings.py
|
||||
|
||||
# Flask stuff:
|
||||
instance/
|
||||
.webassets-cache
|
||||
|
||||
# Scrapy stuff:
|
||||
.scrapy
|
||||
|
||||
# Sphinx documentation
|
||||
docs/_build/
|
||||
|
||||
# PyBuilder
|
||||
target/
|
||||
|
||||
# IPython Notebook
|
||||
.ipynb_checkpoints
|
||||
|
||||
# pyenv
|
||||
.python-version
|
||||
|
||||
# celery beat schedule file
|
||||
celerybeat-schedule
|
||||
|
||||
# dotenv
|
||||
.env
|
||||
|
||||
# virtualenv
|
||||
venv/
|
||||
ENV/
|
||||
|
||||
# Spyder project settings
|
||||
.spyderproject
|
||||
|
||||
# Rope project settings
|
||||
.ropeproject
|
||||
|
||||
|
||||
slim-official/
|
||||
train2014
|
||||
generated/
|
||||
models/
|
||||
tensorboard/
|
||||
result/
|
||||
pretrained/
|
||||
|
||||
@ -0,0 +1,68 @@
|
||||
# fast-neural-style-tensorflow
|
||||
|
||||
A tensorflow implementation for [Perceptual Losses for Real-Time Style Transfer and Super-Resolution](https://arxiv.org/abs/1603.08155).
|
||||
|
||||
This code is based on [Tensorflow-Slim](https://github.com/tensorflow/models/tree/master/slim) and [OlavHN/fast-neural-style](https://github.com/OlavHN/fast-neural-style).
|
||||
|
||||
## Samples:
|
||||
|
||||
| configuration | style | sample |
|
||||
| :---: | :----: | :----: |
|
||||
| [wave.yml](https://github.com/hzy46/fast-neural-style-tensorflow/blob/master/conf/wave.yml) ||  |
|
||||
| [cubist.yml](https://github.com/hzy46/fast-neural-style-tensorflow/blob/master/conf/cubist.yml) ||  |
|
||||
| [denoised_starry.yml](https://github.com/hzy46/fast-neural-style-tensorflow/blob/master/conf/denoised_starry.yml) ||  |
|
||||
| [mosaic.yml](https://github.com/hzy46/fast-neural-style-tensorflow/blob/master/conf/mosaic.yml) ||  |
|
||||
| [scream.yml](https://github.com/hzy46/fast-neural-style-tensorflow/blob/master/conf/scream.yml) ||  |
|
||||
| [feathers.yml](https://github.com/hzy46/fast-neural-style-tensorflow/blob/master/conf/feathers.yml) ||  |
|
||||
| [udnie.yml](https://github.com/hzy46/fast-neural-style-tensorflow/blob/master/conf/udnie.yml) ||  |
|
||||
|
||||
## Requirements and Prerequisites:
|
||||
- Python 2.7.x
|
||||
- <b>Now support Tensorflow >= 1.0</b>
|
||||
|
||||
<b>Attention: This code also supports Tensorflow == 0.11. If it is your version, use the commit 5309a2a (git reset --hard 5309a2a).</b>
|
||||
|
||||
And make sure you installed pyyaml:
|
||||
```
|
||||
pip install pyyaml
|
||||
```
|
||||
|
||||
## Use Trained Models:
|
||||
|
||||
You can download all the 7 trained models from [Baidu Drive](https://pan.baidu.com/s/1i4GTS4d).
|
||||
|
||||
To generate a sample from the model "wave.ckpt-done", run:
|
||||
|
||||
```
|
||||
python eval.py --model_file <your path to wave.ckpt-done> --image_file img/test.jpg
|
||||
```
|
||||
|
||||
Then check out generated/res.jpg.
|
||||
|
||||
## Train a Model:
|
||||
To train a model from scratch, you should first download [VGG16 model](http://download.tensorflow.org/models/vgg_16_2016_08_28.tar.gz) from Tensorflow Slim. Extract the file vgg_16.ckpt. Then copy it to the folder pretrained/ :
|
||||
```
|
||||
cd <this repo>
|
||||
mkdir pretrained
|
||||
cp <your path to vgg_16.ckpt> pretrained/
|
||||
```
|
||||
|
||||
Then download the [COCO dataset](http://msvocds.blob.core.windows.net/coco2014/train2014.zip). Please unzip it, and you will have a folder named "train2014" with many raw images in it. Then create a symbol link to it:
|
||||
```
|
||||
cd <this repo>
|
||||
ln -s <your path to the folder "train2014"> train2014
|
||||
```
|
||||
|
||||
Train the model of "wave":
|
||||
```
|
||||
python train.py -c conf/wave.yml
|
||||
```
|
||||
|
||||
(Optional) Use tensorboard:
|
||||
```
|
||||
tensorboard --logdir models/wave/
|
||||
```
|
||||
|
||||
Checkpoints will be written to "models/wave/".
|
||||
|
||||
View the [configuration file](https://github.com/hzy46/fast-neural-style-tensorflow/blob/master/conf/wave.yml) for details.
|
||||
@ -0,0 +1,26 @@
|
||||
## Basic configuration
|
||||
style_image: img/candy.jpg # targeted style image
|
||||
naming: "candy" # the name of this model. Determine the path to save checkpoint and events file.
|
||||
model_path: models # root path to save checkpoint and events file. The final path would be <model_path>/<naming>
|
||||
|
||||
## Weight of the loss
|
||||
content_weight: 1.0 # weight for content features loss
|
||||
style_weight: 50.0 # weight for style features loss
|
||||
tv_weight: 0.0 # weight for total variation loss
|
||||
|
||||
## The size, the iter number to run
|
||||
image_size: 256
|
||||
batch_size: 4
|
||||
epoch: 2
|
||||
|
||||
## Loss Network
|
||||
loss_model: "vgg_16"
|
||||
content_layers: # use these layers for content loss
|
||||
- "vgg_16/conv3/conv3_3"
|
||||
style_layers: # use these layers for style loss
|
||||
- "vgg_16/conv1/conv1_2"
|
||||
- "vgg_16/conv2/conv2_2"
|
||||
- "vgg_16/conv3/conv3_3"
|
||||
- "vgg_16/conv4/conv4_3"
|
||||
checkpoint_exclude_scopes: "vgg_16/fc" # we only use the convolution layers, so ignore fc layers.
|
||||
loss_model_file: "pretrained/vgg_16.ckpt" # the path to the checkpoint
|
||||
@ -0,0 +1,26 @@
|
||||
## Basic configuration
|
||||
style_image: img/cubist.jpg # targeted style image
|
||||
naming: "cubist" # the name of this model. Determine the path to save checkpoint and events file.
|
||||
model_path: models # root path to save checkpoint and events file. The final path would be <model_path>/<naming>
|
||||
|
||||
## Weight of the loss
|
||||
content_weight: 1.0 # weight for content features loss
|
||||
style_weight: 180.0 # weight for style features loss
|
||||
tv_weight: 0.0 # weight for total variation loss
|
||||
|
||||
## The size, the iter number to run
|
||||
image_size: 256
|
||||
batch_size: 4
|
||||
epoch: 2
|
||||
|
||||
## Loss Network
|
||||
loss_model: "vgg_16"
|
||||
content_layers: # use these layers for content loss
|
||||
- "vgg_16/conv3/conv3_3"
|
||||
style_layers: # use these layers for style loss
|
||||
- "vgg_16/conv1/conv1_2"
|
||||
- "vgg_16/conv2/conv2_2"
|
||||
- "vgg_16/conv3/conv3_3"
|
||||
- "vgg_16/conv4/conv4_3"
|
||||
checkpoint_exclude_scopes: "vgg_16/fc" # we only use the convolution layers, so ignore fc layers.
|
||||
loss_model_file: "pretrained/vgg_16.ckpt" # the path to the checkpoint
|
||||
@ -0,0 +1,26 @@
|
||||
## Basic configuration
|
||||
style_image: img/denoised_starry.jpg # targeted style image
|
||||
naming: "denoised_starry" # the name of this model. Determine the path to save checkpoint and events file.
|
||||
model_path: models # root path to save checkpoint and events file. The final path would be <model_path>/<naming>
|
||||
|
||||
## Weight of the loss
|
||||
content_weight: 1.0 # weight for content features loss
|
||||
style_weight: 250 # weight for style features loss
|
||||
tv_weight: 0.0 # weight for total variation loss
|
||||
|
||||
## The size, the iter number to run
|
||||
image_size: 256
|
||||
batch_size: 4
|
||||
epoch: 2
|
||||
|
||||
## Loss Network
|
||||
loss_model: "vgg_16"
|
||||
content_layers: # use these layers for content loss
|
||||
- "vgg_16/conv3/conv3_3"
|
||||
style_layers: # use these layers for style loss
|
||||
- "vgg_16/conv1/conv1_2"
|
||||
- "vgg_16/conv2/conv2_2"
|
||||
- "vgg_16/conv3/conv3_3"
|
||||
- "vgg_16/conv4/conv4_3"
|
||||
checkpoint_exclude_scopes: "vgg_16/fc" # we only use the convolution layers, so ignore fc layers.
|
||||
loss_model_file: "pretrained/vgg_16.ckpt" # the path to the checkpoint
|
||||
@ -0,0 +1,26 @@
|
||||
## Basic configuration
|
||||
style_image: img/feathers.jpg # targeted style image
|
||||
naming: "feathers" # the name of this model. Determine the path to save checkpoint and events file.
|
||||
model_path: models # root path to save checkpoint and events file. The final path would be <model_path>/<naming>
|
||||
|
||||
## Weight of the loss
|
||||
content_weight: 1.0 # weight for content features loss
|
||||
style_weight: 220.0 # weight for style features loss
|
||||
tv_weight: 0.0 # weight for total variation loss
|
||||
|
||||
## The size, the iter number to run
|
||||
image_size: 256
|
||||
batch_size: 4
|
||||
epoch: 2
|
||||
|
||||
## Loss Network
|
||||
loss_model: "vgg_16"
|
||||
content_layers: # use these layers for content loss
|
||||
- "vgg_16/conv3/conv3_3"
|
||||
style_layers: # use these layers for style loss
|
||||
- "vgg_16/conv1/conv1_2"
|
||||
- "vgg_16/conv2/conv2_2"
|
||||
- "vgg_16/conv3/conv3_3"
|
||||
- "vgg_16/conv4/conv4_3"
|
||||
checkpoint_exclude_scopes: "vgg_16/fc" # we only use the convolution layers, so ignore fc layers.
|
||||
loss_model_file: "pretrained/vgg_16.ckpt" # the path to the checkpoint
|
||||
@ -0,0 +1,26 @@
|
||||
## Basic configuration
|
||||
style_image: img/mosaic.jpg # targeted style image
|
||||
naming: "mosaic" # the name of this model. Determine the path to save checkpoint and events file.
|
||||
model_path: models # root path to save checkpoint and events file. The final path would be <model_path>/<naming>
|
||||
|
||||
## Weight of the loss
|
||||
content_weight: 1.0 # weight for content features loss
|
||||
style_weight: 100.0 # weight for style features loss
|
||||
tv_weight: 0.0 # weight for total variation loss
|
||||
|
||||
## The size, the iter number to run
|
||||
image_size: 256
|
||||
batch_size: 4
|
||||
epoch: 2
|
||||
|
||||
## Loss Network
|
||||
loss_model: "vgg_16"
|
||||
content_layers: # use these layers for content loss
|
||||
- "vgg_16/conv3/conv3_3"
|
||||
style_layers: # use these layers for style loss
|
||||
- "vgg_16/conv1/conv1_2"
|
||||
- "vgg_16/conv2/conv2_2"
|
||||
- "vgg_16/conv3/conv3_3"
|
||||
- "vgg_16/conv4/conv4_3"
|
||||
checkpoint_exclude_scopes: "vgg_16/fc" # we only use the convolution layers, so ignore fc layers.
|
||||
loss_model_file: "pretrained/vgg_16.ckpt" # the path to the checkpoint
|
||||
@ -0,0 +1,26 @@
|
||||
## Basic configuration
|
||||
style_image: img/scream.jpg # targeted style image
|
||||
naming: "scream" # the name of this model. Determine the path to save checkpoint and events file.
|
||||
model_path: models # root path to save checkpoint and events file. The final path would be <model_path>/<naming>
|
||||
|
||||
## Weight of the loss
|
||||
content_weight: 1.0 # weight for content features loss
|
||||
style_weight: 250.0 # weight for style features loss
|
||||
tv_weight: 0.0 # weight for total variation loss
|
||||
|
||||
## The size, the iter number to run
|
||||
image_size: 256
|
||||
batch_size: 4
|
||||
epoch: 2
|
||||
|
||||
## Loss Network
|
||||
loss_model: "vgg_16"
|
||||
content_layers: # use these layers for content loss
|
||||
- "vgg_16/conv3/conv3_3"
|
||||
style_layers: # use these layers for style loss
|
||||
- "vgg_16/conv1/conv1_2"
|
||||
- "vgg_16/conv2/conv2_2"
|
||||
- "vgg_16/conv3/conv3_3"
|
||||
- "vgg_16/conv4/conv4_3"
|
||||
checkpoint_exclude_scopes: "vgg_16/fc" # we only use the convolution layers, so ignore fc layers.
|
||||
loss_model_file: "pretrained/vgg_16.ckpt" # the path to the checkpoint
|
||||
@ -0,0 +1,26 @@
|
||||
## Basic configuration
|
||||
style_image: img/udnie.jpg # targeted style image
|
||||
naming: "udnie" # the name of this model. Determine the path to save checkpoint and events file.
|
||||
model_path: models # root path to save checkpoint and events file. The final path would be <model_path>/<naming>
|
||||
|
||||
## Weight of the loss
|
||||
content_weight: 1.0 # weight for content features loss
|
||||
style_weight: 200.0 # weight for style features loss
|
||||
tv_weight: 0.0 # weight for total variation loss
|
||||
|
||||
## The size, the iter number to run
|
||||
image_size: 256
|
||||
batch_size: 4
|
||||
epoch: 2
|
||||
|
||||
## Loss Network
|
||||
loss_model: "vgg_16"
|
||||
content_layers: # use these layers for content loss
|
||||
- "vgg_16/conv3/conv3_3"
|
||||
style_layers: # use these layers for style loss
|
||||
- "vgg_16/conv1/conv1_2"
|
||||
- "vgg_16/conv2/conv2_2"
|
||||
- "vgg_16/conv3/conv3_3"
|
||||
- "vgg_16/conv4/conv4_3"
|
||||
checkpoint_exclude_scopes: "vgg_16/fc" # we only use the convolution layers, so ignore fc layers.
|
||||
loss_model_file: "pretrained/vgg_16.ckpt" # the path to the checkpoint
|
||||
@ -0,0 +1,26 @@
|
||||
## Basic configuration
|
||||
style_image: img/wave.jpg # targeted style image
|
||||
naming: "wave" # the name of this model. Determine the path to save checkpoint and events file.
|
||||
model_path: models # root path to save checkpoint and events file. The final path would be <model_path>/<naming>
|
||||
|
||||
## Weight of the loss
|
||||
content_weight: 1.0 # weight for content features loss
|
||||
style_weight: 220.0 # weight for style features loss
|
||||
tv_weight: 0.0 # weight for total variation loss
|
||||
|
||||
## The size, the iter number to run
|
||||
image_size: 256
|
||||
batch_size: 4
|
||||
epoch: 2
|
||||
|
||||
## Loss Network
|
||||
loss_model: "vgg_16"
|
||||
content_layers: # use these layers for content loss
|
||||
- "vgg_16/conv3/conv3_3"
|
||||
style_layers: # use these layers for style loss
|
||||
- "vgg_16/conv1/conv1_2"
|
||||
- "vgg_16/conv2/conv2_2"
|
||||
- "vgg_16/conv3/conv3_3"
|
||||
- "vgg_16/conv4/conv4_3"
|
||||
checkpoint_exclude_scopes: "vgg_16/fc" # we only use the convolution layers, so ignore fc layers.
|
||||
loss_model_file: "pretrained/vgg_16.ckpt" # the path to the checkpoint
|
||||
@ -0,0 +1,77 @@
|
||||
# coding: utf-8
|
||||
from __future__ import print_function
|
||||
import tensorflow.compat.v1 as tf
|
||||
tf.disable_v2_behavior()
|
||||
from preprocessing import preprocessing_factory
|
||||
import reader
|
||||
import model
|
||||
import time
|
||||
import os
|
||||
|
||||
tf.app.flags.DEFINE_string('loss_model', 'vgg_16', 'The name of the architecture to evaluate. '
|
||||
'You can view all the support models in nets/nets_factory.py')
|
||||
tf.app.flags.DEFINE_integer('image_size', 256, 'Image size to train.')
|
||||
tf.app.flags.DEFINE_string("model_file", "models.ckpt", "")
|
||||
tf.app.flags.DEFINE_string("image_file", "a.jpg", "")
|
||||
|
||||
FLAGS = tf.app.flags.FLAGS
|
||||
|
||||
|
||||
def main(_):
|
||||
|
||||
# Get image's height and width.
|
||||
height = 0
|
||||
width = 0
|
||||
with open(FLAGS.image_file, 'rb') as img:
|
||||
with tf.Session().as_default() as sess:
|
||||
if FLAGS.image_file.lower().endswith('png'):
|
||||
image = sess.run(tf.image.decode_png(img.read()))
|
||||
else:
|
||||
image = sess.run(tf.image.decode_jpeg(img.read()))
|
||||
height = image.shape[0]
|
||||
width = image.shape[1]
|
||||
tf.logging.info('Image size: %dx%d' % (width, height))
|
||||
|
||||
with tf.Graph().as_default():
|
||||
with tf.Session().as_default() as sess:
|
||||
|
||||
# Read image data.
|
||||
image_preprocessing_fn, _ = preprocessing_factory.get_preprocessing(
|
||||
FLAGS.loss_model,
|
||||
is_training=False)
|
||||
image = reader.get_image(FLAGS.image_file, height, width, image_preprocessing_fn)
|
||||
|
||||
# Add batch dimension
|
||||
image = tf.expand_dims(image, 0)
|
||||
|
||||
generated = model.net(image, training=False)
|
||||
generated = tf.cast(generated, tf.uint8)
|
||||
|
||||
# Remove batch dimension
|
||||
generated = tf.squeeze(generated, [0])
|
||||
|
||||
# Restore model variables.
|
||||
saver = tf.train.Saver(tf.global_variables(), write_version=tf.train.SaverDef.V1)
|
||||
sess.run([tf.global_variables_initializer(), tf.local_variables_initializer()])
|
||||
# Use absolute path
|
||||
FLAGS.model_file = os.path.abspath(FLAGS.model_file)
|
||||
saver.restore(sess, FLAGS.model_file)
|
||||
|
||||
# Make sure 'result' directory exists.
|
||||
generated_file = 'result/res.jpg'
|
||||
if os.path.exists('result') is False:
|
||||
os.makedirs('result')
|
||||
|
||||
# Generate and write image data to file.
|
||||
with open(generated_file, 'wb') as img:
|
||||
start_time = time.time()
|
||||
img.write(sess.run(tf.image.encode_jpeg(generated)))
|
||||
end_time = time.time()
|
||||
tf.logging.info('Elapsed time: %fs' % (end_time - start_time))
|
||||
|
||||
tf.logging.info('Done. Please check %s.' % generated_file)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
tf.logging.set_verbosity(tf.logging.INFO)
|
||||
tf.app.run()
|
||||
@ -0,0 +1,84 @@
|
||||
# coding: utf-8
|
||||
from __future__ import print_function
|
||||
import tensorflow as tf
|
||||
import argparse
|
||||
import time
|
||||
import os
|
||||
|
||||
import model
|
||||
import utils
|
||||
|
||||
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('-m', '--model_file', help='the path to the model file')
|
||||
parser.add_argument('-n', '--model_name', default='transfer', help='the name of the model')
|
||||
parser.add_argument('-d', dest='is_debug', action='store_true')
|
||||
parser.set_defaults(is_debug=False)
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def main(args):
|
||||
g = tf.Graph() # A new graph
|
||||
with g.as_default():
|
||||
with tf.Session() as sess:
|
||||
# Building graph.
|
||||
image_data = tf.placeholder(tf.int32, name='input_image')
|
||||
height = tf.placeholder(tf.int32, name='height')
|
||||
width = tf.placeholder(tf.int32, name='width')
|
||||
|
||||
# Reshape data
|
||||
image = tf.reshape(image_data, [height, width, 3])
|
||||
|
||||
processed_image = utils.mean_image_subtraction(
|
||||
image, [123.68, 116.779, 103.939]) # Preprocessing image
|
||||
batched_image = tf.expand_dims(processed_image, 0) # Add batch dimension
|
||||
generated_image = model.net(batched_image, training=False)
|
||||
casted_image = tf.cast(generated_image, tf.int32)
|
||||
# Remove batch dimension
|
||||
squeezed_image = tf.squeeze(casted_image, [0])
|
||||
cropped_image = tf.slice(squeezed_image, [0, 0, 0], [height, width, 3])
|
||||
# stylized_image = tf.image.encode_jpeg(squeezed_image, name='output_image')
|
||||
stylized_image_data = tf.reshape(cropped_image, [-1], name='output_image')
|
||||
|
||||
# Restore model variables.
|
||||
saver = tf.train.Saver(tf.global_variables(), write_version=tf.train.SaverDef.V1)
|
||||
sess.run([tf.global_variables_initializer(), tf.local_variables_initializer()])
|
||||
# Use absolute path.
|
||||
model_file = os.path.abspath(args.model_file)
|
||||
saver.restore(sess, model_file)
|
||||
|
||||
if args.is_debug:
|
||||
content_file = '/Users/Lex/Desktop/t.jpg'
|
||||
generated_file = '/Users/Lex/Desktop/xwz-stylized.jpg'
|
||||
|
||||
with open(generated_file, 'wb') as img:
|
||||
image_bytes = tf.read_file(content_file)
|
||||
input_array, decoded_image = sess.run([
|
||||
tf.reshape(tf.image.decode_jpeg(image_bytes, channels=3), [-1]),
|
||||
tf.image.decode_jpeg(image_bytes, channels=3)])
|
||||
|
||||
start_time = time.time()
|
||||
img.write(sess.run(tf.image.encode_jpeg(tf.cast(cropped_image, tf.uint8)), feed_dict={
|
||||
image_data: input_array,
|
||||
height: decoded_image.shape[0],
|
||||
width: decoded_image.shape[1]}))
|
||||
end_time = time.time()
|
||||
|
||||
tf.logging.info('Elapsed time: %fs' % (end_time - start_time))
|
||||
else:
|
||||
output_graph_def = tf.graph_util.convert_variables_to_constants(
|
||||
sess, sess.graph_def, output_node_names=['output_image'])
|
||||
|
||||
with tf.gfile.FastGFile('/Users/Lex/Desktop/' + args.model_name + '.pb', mode='wb') as f:
|
||||
f.write(output_graph_def.SerializeToString())
|
||||
|
||||
# tf.train.write_graph(g.as_graph_def(), '/Users/Lex/Desktop',
|
||||
# args.model_name + '.pb', as_text=False)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
tf.logging.set_verbosity(tf.logging.INFO)
|
||||
args = parse_args()
|
||||
print(args)
|
||||
main(args)
|
||||
|
After Width: | Height: | Size: 715 KiB |
|
After Width: | Height: | Size: 614 KiB |
|
After Width: | Height: | Size: 212 KiB |
|
After Width: | Height: | Size: 145 KiB |
@ -0,0 +1,108 @@
|
||||
# coding: utf-8
|
||||
from __future__ import print_function
|
||||
import tensorflow as tf
|
||||
from nets import nets_factory
|
||||
from preprocessing import preprocessing_factory
|
||||
import utils
|
||||
import os
|
||||
|
||||
slim = tf.contrib.slim
|
||||
|
||||
|
||||
def gram(layer):
|
||||
shape = tf.shape(layer)
|
||||
num_images = shape[0]
|
||||
width = shape[1]
|
||||
height = shape[2]
|
||||
num_filters = shape[3]
|
||||
filters = tf.reshape(layer, tf.stack([num_images, -1, num_filters]))
|
||||
grams = tf.matmul(filters, filters, transpose_a=True) / tf.to_float(width * height * num_filters)
|
||||
|
||||
return grams
|
||||
|
||||
|
||||
def get_style_features(FLAGS):
|
||||
"""
|
||||
For the "style_image", the preprocessing step is:
|
||||
1. Resize the shorter side to FLAGS.image_size
|
||||
2. Apply central crop
|
||||
"""
|
||||
with tf.Graph().as_default():
|
||||
network_fn = nets_factory.get_network_fn(
|
||||
FLAGS.loss_model,
|
||||
num_classes=1,
|
||||
is_training=False)
|
||||
image_preprocessing_fn, image_unprocessing_fn = preprocessing_factory.get_preprocessing(
|
||||
FLAGS.loss_model,
|
||||
is_training=False)
|
||||
|
||||
# Get the style image data
|
||||
size = FLAGS.image_size
|
||||
img_bytes = tf.read_file(FLAGS.style_image)
|
||||
if FLAGS.style_image.lower().endswith('png'):
|
||||
image = tf.image.decode_png(img_bytes)
|
||||
else:
|
||||
image = tf.image.decode_jpeg(img_bytes)
|
||||
# image = _aspect_preserving_resize(image, size)
|
||||
|
||||
# Add the batch dimension
|
||||
images = tf.expand_dims(image_preprocessing_fn(image, size, size), 0)
|
||||
# images = tf.stack([image_preprocessing_fn(image, size, size)])
|
||||
|
||||
_, endpoints_dict = network_fn(images, spatial_squeeze=False)
|
||||
features = []
|
||||
for layer in FLAGS.style_layers:
|
||||
feature = endpoints_dict[layer]
|
||||
feature = tf.squeeze(gram(feature), [0]) # remove the batch dimension
|
||||
features.append(feature)
|
||||
|
||||
with tf.Session() as sess:
|
||||
# Restore variables for loss network.
|
||||
init_func = utils._get_init_fn(FLAGS)
|
||||
init_func(sess)
|
||||
|
||||
# Make sure the 'generated' directory is exists.
|
||||
if os.path.exists('generated') is False:
|
||||
os.makedirs('generated')
|
||||
# Indicate cropped style image path
|
||||
save_file = 'generated/target_style_' + FLAGS.naming + '.jpg'
|
||||
# Write preprocessed style image to indicated path
|
||||
with open(save_file, 'wb') as f:
|
||||
target_image = image_unprocessing_fn(images[0, :])
|
||||
value = tf.image.encode_jpeg(tf.cast(target_image, tf.uint8))
|
||||
f.write(sess.run(value))
|
||||
tf.logging.info('Target style pattern is saved to: %s.' % save_file)
|
||||
|
||||
# Return the features those layers are use for measuring style loss.
|
||||
return sess.run(features)
|
||||
|
||||
|
||||
def style_loss(endpoints_dict, style_features_t, style_layers):
|
||||
style_loss = 0
|
||||
style_loss_summary = {}
|
||||
for style_gram, layer in zip(style_features_t, style_layers):
|
||||
generated_images, _ = tf.split(endpoints_dict[layer], 2, 0)
|
||||
size = tf.size(generated_images)
|
||||
layer_style_loss = tf.nn.l2_loss(gram(generated_images) - style_gram) * 2 / tf.to_float(size)
|
||||
style_loss_summary[layer] = layer_style_loss
|
||||
style_loss += layer_style_loss
|
||||
return style_loss, style_loss_summary
|
||||
|
||||
|
||||
def content_loss(endpoints_dict, content_layers):
|
||||
content_loss = 0
|
||||
for layer in content_layers:
|
||||
generated_images, content_images = tf.split(endpoints_dict[layer], 2, 0)
|
||||
size = tf.size(generated_images)
|
||||
content_loss += tf.nn.l2_loss(generated_images - content_images) * 2 / tf.to_float(size) # remain the same as in the paper
|
||||
return content_loss
|
||||
|
||||
|
||||
def total_variation_loss(layer):
|
||||
shape = tf.shape(layer)
|
||||
height = shape[1]
|
||||
width = shape[2]
|
||||
y = tf.slice(layer, [0, 0, 0, 0], tf.stack([-1, height - 1, -1, -1])) - tf.slice(layer, [0, 1, 0, 0], [-1, -1, -1, -1])
|
||||
x = tf.slice(layer, [0, 0, 0, 0], tf.stack([-1, -1, width - 1, -1])) - tf.slice(layer, [0, 0, 1, 0], [-1, -1, -1, -1])
|
||||
loss = tf.nn.l2_loss(x) / tf.to_float(tf.size(x)) + tf.nn.l2_loss(y) / tf.to_float(tf.size(y))
|
||||
return loss
|
||||
@ -0,0 +1,134 @@
|
||||
import tensorflow.compat.v1 as tf
|
||||
tf.disable_v2_behavior()
|
||||
|
||||
|
||||
def conv2d(x, input_filters, output_filters, kernel, strides, mode='REFLECT'):
|
||||
with tf.variable_scope('conv'):
|
||||
|
||||
shape = [kernel, kernel, input_filters, output_filters]
|
||||
weight = tf.Variable(tf.truncated_normal(shape, stddev=0.1), name='weight')
|
||||
x_padded = tf.pad(x, [[0, 0], [int(kernel / 2), int(kernel / 2)], [int(kernel / 2), int(kernel / 2)], [0, 0]], mode=mode)
|
||||
return tf.nn.conv2d(x_padded, weight, strides=[1, strides, strides, 1], padding='VALID', name='conv')
|
||||
|
||||
|
||||
def conv2d_transpose(x, input_filters, output_filters, kernel, strides):
|
||||
with tf.variable_scope('conv_transpose'):
|
||||
|
||||
shape = [kernel, kernel, output_filters, input_filters]
|
||||
weight = tf.Variable(tf.truncated_normal(shape, stddev=0.1), name='weight')
|
||||
|
||||
batch_size = tf.shape(x)[0]
|
||||
height = tf.shape(x)[1] * strides
|
||||
width = tf.shape(x)[2] * strides
|
||||
output_shape = tf.stack([batch_size, height, width, output_filters])
|
||||
return tf.nn.conv2d_transpose(x, weight, output_shape, strides=[1, strides, strides, 1], name='conv_transpose')
|
||||
|
||||
|
||||
def resize_conv2d(x, input_filters, output_filters, kernel, strides, training):
|
||||
'''
|
||||
An alternative to transposed convolution where we first resize, then convolve.
|
||||
See http://distill.pub/2016/deconv-checkerboard/
|
||||
|
||||
For some reason the shape needs to be statically known for gradient propagation
|
||||
through tf.image.resize_images, but we only know that for fixed image size, so we
|
||||
plumb through a "training" argument
|
||||
'''
|
||||
with tf.variable_scope('conv_transpose'):
|
||||
height = x.get_shape()[1].value if training else tf.shape(x)[1]
|
||||
width = x.get_shape()[2].value if training else tf.shape(x)[2]
|
||||
|
||||
new_height = height * strides * 2
|
||||
new_width = width * strides * 2
|
||||
|
||||
x_resized = tf.image.resize_images(x, [new_height, new_width], tf.image.ResizeMethod.NEAREST_NEIGHBOR)
|
||||
|
||||
# shape = [kernel, kernel, input_filters, output_filters]
|
||||
# weight = tf.Variable(tf.truncated_normal(shape, stddev=0.1), name='weight')
|
||||
return conv2d(x_resized, input_filters, output_filters, kernel, strides)
|
||||
|
||||
|
||||
def instance_norm(x):
|
||||
epsilon = 1e-9
|
||||
|
||||
mean, var = tf.nn.moments(x, [1, 2], keep_dims=True)
|
||||
|
||||
return tf.div(tf.subtract(x, mean), tf.sqrt(tf.add(var, epsilon)))
|
||||
|
||||
|
||||
def batch_norm(x, size, training, decay=0.999):
|
||||
beta = tf.Variable(tf.zeros([size]), name='beta')
|
||||
scale = tf.Variable(tf.ones([size]), name='scale')
|
||||
pop_mean = tf.Variable(tf.zeros([size]))
|
||||
pop_var = tf.Variable(tf.ones([size]))
|
||||
epsilon = 1e-3
|
||||
|
||||
batch_mean, batch_var = tf.nn.moments(x, [0, 1, 2])
|
||||
train_mean = tf.assign(pop_mean, pop_mean * decay + batch_mean * (1 - decay))
|
||||
train_var = tf.assign(pop_var, pop_var * decay + batch_var * (1 - decay))
|
||||
|
||||
def batch_statistics():
|
||||
with tf.control_dependencies([train_mean, train_var]):
|
||||
return tf.nn.batch_normalization(x, batch_mean, batch_var, beta, scale, epsilon, name='batch_norm')
|
||||
|
||||
def population_statistics():
|
||||
return tf.nn.batch_normalization(x, pop_mean, pop_var, beta, scale, epsilon, name='batch_norm')
|
||||
|
||||
return tf.cond(training, batch_statistics, population_statistics)
|
||||
|
||||
|
||||
def relu(input):
|
||||
relu = tf.nn.relu(input)
|
||||
# convert nan to zero (nan != nan)
|
||||
nan_to_zero = tf.where(tf.equal(relu, relu), relu, tf.zeros_like(relu))
|
||||
return nan_to_zero
|
||||
|
||||
|
||||
def residual(x, filters, kernel, strides):
|
||||
with tf.variable_scope('residual'):
|
||||
conv1 = conv2d(x, filters, filters, kernel, strides)
|
||||
conv2 = conv2d(relu(conv1), filters, filters, kernel, strides)
|
||||
|
||||
residual = x + conv2
|
||||
|
||||
return residual
|
||||
|
||||
|
||||
def net(image, training):
|
||||
# Less border effects when padding a little before passing through ..
|
||||
image = tf.pad(image, [[0, 0], [10, 10], [10, 10], [0, 0]], mode='REFLECT')
|
||||
|
||||
with tf.variable_scope('conv1'):
|
||||
conv1 = relu(instance_norm(conv2d(image, 3, 32, 9, 1)))
|
||||
with tf.variable_scope('conv2'):
|
||||
conv2 = relu(instance_norm(conv2d(conv1, 32, 64, 3, 2)))
|
||||
with tf.variable_scope('conv3'):
|
||||
conv3 = relu(instance_norm(conv2d(conv2, 64, 128, 3, 2)))
|
||||
with tf.variable_scope('res1'):
|
||||
res1 = residual(conv3, 128, 3, 1)
|
||||
with tf.variable_scope('res2'):
|
||||
res2 = residual(res1, 128, 3, 1)
|
||||
with tf.variable_scope('res3'):
|
||||
res3 = residual(res2, 128, 3, 1)
|
||||
with tf.variable_scope('res4'):
|
||||
res4 = residual(res3, 128, 3, 1)
|
||||
with tf.variable_scope('res5'):
|
||||
res5 = residual(res4, 128, 3, 1)
|
||||
# print(res5.get_shape())
|
||||
with tf.variable_scope('deconv1'):
|
||||
# deconv1 = relu(instance_norm(conv2d_transpose(res5, 128, 64, 3, 2)))
|
||||
deconv1 = relu(instance_norm(resize_conv2d(res5, 128, 64, 3, 2, training)))
|
||||
with tf.variable_scope('deconv2'):
|
||||
# deconv2 = relu(instance_norm(conv2d_transpose(deconv1, 64, 32, 3, 2)))
|
||||
deconv2 = relu(instance_norm(resize_conv2d(deconv1, 64, 32, 3, 2, training)))
|
||||
with tf.variable_scope('deconv3'):
|
||||
# deconv_test = relu(instance_norm(conv2d(deconv2, 32, 32, 2, 1)))
|
||||
deconv3 = tf.nn.tanh(instance_norm(conv2d(deconv2, 32, 3, 9, 1)))
|
||||
|
||||
y = (deconv3 + 1) * 127.5
|
||||
|
||||
# Remove border effect reducing padding.
|
||||
height = tf.shape(y)[1]
|
||||
width = tf.shape(y)[2]
|
||||
y = tf.slice(y, [0, 10, 10, 0], tf.stack([-1, height - 20, width - 20, -1]))
|
||||
|
||||
return y
|
||||
@ -0,0 +1 @@
|
||||
|
||||
@ -0,0 +1,125 @@
|
||||
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""Contains a model definition for AlexNet.
|
||||
|
||||
This work was first described in:
|
||||
ImageNet Classification with Deep Convolutional Neural Networks
|
||||
Alex Krizhevsky, Ilya Sutskever and Geoffrey E. Hinton
|
||||
|
||||
and later refined in:
|
||||
One weird trick for parallelizing convolutional neural networks
|
||||
Alex Krizhevsky, 2014
|
||||
|
||||
Here we provide the implementation proposed in "One weird trick" and not
|
||||
"ImageNet Classification", as per the paper, the LRN layers have been removed.
|
||||
|
||||
Usage:
|
||||
with slim.arg_scope(alexnet.alexnet_v2_arg_scope()):
|
||||
outputs, end_points = alexnet.alexnet_v2(inputs)
|
||||
|
||||
@@alexnet_v2
|
||||
"""
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import tensorflow as tf
|
||||
|
||||
slim = tf.contrib.slim
|
||||
trunc_normal = lambda stddev: tf.truncated_normal_initializer(0.0, stddev)
|
||||
|
||||
|
||||
def alexnet_v2_arg_scope(weight_decay=0.0005):
|
||||
with slim.arg_scope([slim.conv2d, slim.fully_connected],
|
||||
activation_fn=tf.nn.relu,
|
||||
biases_initializer=tf.constant_initializer(0.1),
|
||||
weights_regularizer=slim.l2_regularizer(weight_decay)):
|
||||
with slim.arg_scope([slim.conv2d], padding='SAME'):
|
||||
with slim.arg_scope([slim.max_pool2d], padding='VALID') as arg_sc:
|
||||
return arg_sc
|
||||
|
||||
|
||||
def alexnet_v2(inputs,
|
||||
num_classes=1000,
|
||||
is_training=True,
|
||||
dropout_keep_prob=0.5,
|
||||
spatial_squeeze=True,
|
||||
scope='alexnet_v2'):
|
||||
"""AlexNet version 2.
|
||||
|
||||
Described in: http://arxiv.org/pdf/1404.5997v2.pdf
|
||||
Parameters from:
|
||||
github.com/akrizhevsky/cuda-convnet2/blob/master/layers/
|
||||
layers-imagenet-1gpu.cfg
|
||||
|
||||
Note: All the fully_connected layers have been transformed to conv2d layers.
|
||||
To use in classification mode, resize input to 224x224. To use in fully
|
||||
convolutional mode, set spatial_squeeze to false.
|
||||
The LRN layers have been removed and change the initializers from
|
||||
random_normal_initializer to xavier_initializer.
|
||||
|
||||
Args:
|
||||
inputs: a tensor of size [batch_size, height, width, channels].
|
||||
num_classes: number of predicted classes.
|
||||
is_training: whether or not the model is being trained.
|
||||
dropout_keep_prob: the probability that activations are kept in the dropout
|
||||
layers during training.
|
||||
spatial_squeeze: whether or not should squeeze the spatial dimensions of the
|
||||
outputs. Useful to remove unnecessary dimensions for classification.
|
||||
scope: Optional scope for the variables.
|
||||
|
||||
Returns:
|
||||
the last op containing the log predictions and end_points dict.
|
||||
"""
|
||||
with tf.variable_scope(scope, 'alexnet_v2', [inputs]) as sc:
|
||||
end_points_collection = sc.name + '_end_points'
|
||||
# Collect outputs for conv2d, fully_connected and max_pool2d.
|
||||
with slim.arg_scope([slim.conv2d, slim.fully_connected, slim.max_pool2d],
|
||||
outputs_collections=[end_points_collection]):
|
||||
net = slim.conv2d(inputs, 64, [11, 11], 4, padding='VALID',
|
||||
scope='conv1')
|
||||
net = slim.max_pool2d(net, [3, 3], 2, scope='pool1')
|
||||
net = slim.conv2d(net, 192, [5, 5], scope='conv2')
|
||||
net = slim.max_pool2d(net, [3, 3], 2, scope='pool2')
|
||||
net = slim.conv2d(net, 384, [3, 3], scope='conv3')
|
||||
net = slim.conv2d(net, 384, [3, 3], scope='conv4')
|
||||
net = slim.conv2d(net, 256, [3, 3], scope='conv5')
|
||||
net = slim.max_pool2d(net, [3, 3], 2, scope='pool5')
|
||||
|
||||
# Use conv2d instead of fully_connected layers.
|
||||
with slim.arg_scope([slim.conv2d],
|
||||
weights_initializer=trunc_normal(0.005),
|
||||
biases_initializer=tf.constant_initializer(0.1)):
|
||||
net = slim.conv2d(net, 4096, [5, 5], padding='VALID',
|
||||
scope='fc6')
|
||||
net = slim.dropout(net, dropout_keep_prob, is_training=is_training,
|
||||
scope='dropout6')
|
||||
net = slim.conv2d(net, 4096, [1, 1], scope='fc7')
|
||||
net = slim.dropout(net, dropout_keep_prob, is_training=is_training,
|
||||
scope='dropout7')
|
||||
net = slim.conv2d(net, num_classes, [1, 1],
|
||||
activation_fn=None,
|
||||
normalizer_fn=None,
|
||||
biases_initializer=tf.zeros_initializer,
|
||||
scope='fc8')
|
||||
|
||||
# Convert end_points_collection into a end_point dict.
|
||||
end_points = slim.utils.convert_collection_to_dict(end_points_collection)
|
||||
if spatial_squeeze:
|
||||
net = tf.squeeze(net, [1, 2], name='fc8/squeezed')
|
||||
end_points[sc.name + '/fc8'] = net
|
||||
return net, end_points
|
||||
alexnet_v2.default_image_size = 224
|
||||
@ -0,0 +1,145 @@
|
||||
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""Tests for slim.nets.alexnet."""
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import tensorflow as tf
|
||||
|
||||
from nets import alexnet
|
||||
|
||||
slim = tf.contrib.slim
|
||||
|
||||
|
||||
class AlexnetV2Test(tf.test.TestCase):
|
||||
|
||||
def testBuild(self):
|
||||
batch_size = 5
|
||||
height, width = 224, 224
|
||||
num_classes = 1000
|
||||
with self.test_session():
|
||||
inputs = tf.random_uniform((batch_size, height, width, 3))
|
||||
logits, _ = alexnet.alexnet_v2(inputs, num_classes)
|
||||
self.assertEquals(logits.op.name, 'alexnet_v2/fc8/squeezed')
|
||||
self.assertListEqual(logits.get_shape().as_list(),
|
||||
[batch_size, num_classes])
|
||||
|
||||
def testFullyConvolutional(self):
|
||||
batch_size = 1
|
||||
height, width = 300, 400
|
||||
num_classes = 1000
|
||||
with self.test_session():
|
||||
inputs = tf.random_uniform((batch_size, height, width, 3))
|
||||
logits, _ = alexnet.alexnet_v2(inputs, num_classes, spatial_squeeze=False)
|
||||
self.assertEquals(logits.op.name, 'alexnet_v2/fc8/BiasAdd')
|
||||
self.assertListEqual(logits.get_shape().as_list(),
|
||||
[batch_size, 4, 7, num_classes])
|
||||
|
||||
def testEndPoints(self):
|
||||
batch_size = 5
|
||||
height, width = 224, 224
|
||||
num_classes = 1000
|
||||
with self.test_session():
|
||||
inputs = tf.random_uniform((batch_size, height, width, 3))
|
||||
_, end_points = alexnet.alexnet_v2(inputs, num_classes)
|
||||
expected_names = ['alexnet_v2/conv1',
|
||||
'alexnet_v2/pool1',
|
||||
'alexnet_v2/conv2',
|
||||
'alexnet_v2/pool2',
|
||||
'alexnet_v2/conv3',
|
||||
'alexnet_v2/conv4',
|
||||
'alexnet_v2/conv5',
|
||||
'alexnet_v2/pool5',
|
||||
'alexnet_v2/fc6',
|
||||
'alexnet_v2/fc7',
|
||||
'alexnet_v2/fc8'
|
||||
]
|
||||
self.assertSetEqual(set(end_points.keys()), set(expected_names))
|
||||
|
||||
def testModelVariables(self):
|
||||
batch_size = 5
|
||||
height, width = 224, 224
|
||||
num_classes = 1000
|
||||
with self.test_session():
|
||||
inputs = tf.random_uniform((batch_size, height, width, 3))
|
||||
alexnet.alexnet_v2(inputs, num_classes)
|
||||
expected_names = ['alexnet_v2/conv1/weights',
|
||||
'alexnet_v2/conv1/biases',
|
||||
'alexnet_v2/conv2/weights',
|
||||
'alexnet_v2/conv2/biases',
|
||||
'alexnet_v2/conv3/weights',
|
||||
'alexnet_v2/conv3/biases',
|
||||
'alexnet_v2/conv4/weights',
|
||||
'alexnet_v2/conv4/biases',
|
||||
'alexnet_v2/conv5/weights',
|
||||
'alexnet_v2/conv5/biases',
|
||||
'alexnet_v2/fc6/weights',
|
||||
'alexnet_v2/fc6/biases',
|
||||
'alexnet_v2/fc7/weights',
|
||||
'alexnet_v2/fc7/biases',
|
||||
'alexnet_v2/fc8/weights',
|
||||
'alexnet_v2/fc8/biases',
|
||||
]
|
||||
model_variables = [v.op.name for v in slim.get_model_variables()]
|
||||
self.assertSetEqual(set(model_variables), set(expected_names))
|
||||
|
||||
def testEvaluation(self):
|
||||
batch_size = 2
|
||||
height, width = 224, 224
|
||||
num_classes = 1000
|
||||
with self.test_session():
|
||||
eval_inputs = tf.random_uniform((batch_size, height, width, 3))
|
||||
logits, _ = alexnet.alexnet_v2(eval_inputs, is_training=False)
|
||||
self.assertListEqual(logits.get_shape().as_list(),
|
||||
[batch_size, num_classes])
|
||||
predictions = tf.argmax(logits, 1)
|
||||
self.assertListEqual(predictions.get_shape().as_list(), [batch_size])
|
||||
|
||||
def testTrainEvalWithReuse(self):
|
||||
train_batch_size = 2
|
||||
eval_batch_size = 1
|
||||
train_height, train_width = 224, 224
|
||||
eval_height, eval_width = 300, 400
|
||||
num_classes = 1000
|
||||
with self.test_session():
|
||||
train_inputs = tf.random_uniform(
|
||||
(train_batch_size, train_height, train_width, 3))
|
||||
logits, _ = alexnet.alexnet_v2(train_inputs)
|
||||
self.assertListEqual(logits.get_shape().as_list(),
|
||||
[train_batch_size, num_classes])
|
||||
tf.get_variable_scope().reuse_variables()
|
||||
eval_inputs = tf.random_uniform(
|
||||
(eval_batch_size, eval_height, eval_width, 3))
|
||||
logits, _ = alexnet.alexnet_v2(eval_inputs, is_training=False,
|
||||
spatial_squeeze=False)
|
||||
self.assertListEqual(logits.get_shape().as_list(),
|
||||
[eval_batch_size, 4, 7, num_classes])
|
||||
logits = tf.reduce_mean(logits, [1, 2])
|
||||
predictions = tf.argmax(logits, 1)
|
||||
self.assertEquals(predictions.get_shape().as_list(), [eval_batch_size])
|
||||
|
||||
def testForward(self):
|
||||
batch_size = 1
|
||||
height, width = 224, 224
|
||||
with self.test_session() as sess:
|
||||
inputs = tf.random_uniform((batch_size, height, width, 3))
|
||||
logits, _ = alexnet.alexnet_v2(inputs)
|
||||
sess.run(tf.initialize_all_variables())
|
||||
output = sess.run(logits)
|
||||
self.assertTrue(output.any())
|
||||
|
||||
if __name__ == '__main__':
|
||||
tf.test.main()
|
||||
@ -0,0 +1,112 @@
|
||||
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""Contains a variant of the CIFAR-10 model definition."""
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import tensorflow as tf
|
||||
|
||||
slim = tf.contrib.slim
|
||||
|
||||
trunc_normal = lambda stddev: tf.truncated_normal_initializer(stddev=stddev)
|
||||
|
||||
|
||||
def cifarnet(images, num_classes=10, is_training=False,
|
||||
dropout_keep_prob=0.5,
|
||||
prediction_fn=slim.softmax,
|
||||
scope='CifarNet'):
|
||||
"""Creates a variant of the CifarNet model.
|
||||
|
||||
Note that since the output is a set of 'logits', the values fall in the
|
||||
interval of (-infinity, infinity). Consequently, to convert the outputs to a
|
||||
probability distribution over the characters, one will need to convert them
|
||||
using the softmax function:
|
||||
|
||||
logits = cifarnet.cifarnet(images, is_training=False)
|
||||
probabilities = tf.nn.softmax(logits)
|
||||
predictions = tf.argmax(logits, 1)
|
||||
|
||||
Args:
|
||||
images: A batch of `Tensors` of size [batch_size, height, width, channels].
|
||||
num_classes: the number of classes in the dataset.
|
||||
is_training: specifies whether or not we're currently training the model.
|
||||
This variable will determine the behaviour of the dropout layer.
|
||||
dropout_keep_prob: the percentage of activation values that are retained.
|
||||
prediction_fn: a function to get predictions out of logits.
|
||||
scope: Optional variable_scope.
|
||||
|
||||
Returns:
|
||||
logits: the pre-softmax activations, a tensor of size
|
||||
[batch_size, `num_classes`]
|
||||
end_points: a dictionary from components of the network to the corresponding
|
||||
activation.
|
||||
"""
|
||||
end_points = {}
|
||||
|
||||
with tf.variable_scope(scope, 'CifarNet', [images, num_classes]):
|
||||
net = slim.conv2d(images, 64, [5, 5], scope='conv1')
|
||||
end_points['conv1'] = net
|
||||
net = slim.max_pool2d(net, [2, 2], 2, scope='pool1')
|
||||
end_points['pool1'] = net
|
||||
net = tf.nn.lrn(net, 4, bias=1.0, alpha=0.001/9.0, beta=0.75, name='norm1')
|
||||
net = slim.conv2d(net, 64, [5, 5], scope='conv2')
|
||||
end_points['conv2'] = net
|
||||
net = tf.nn.lrn(net, 4, bias=1.0, alpha=0.001/9.0, beta=0.75, name='norm2')
|
||||
net = slim.max_pool2d(net, [2, 2], 2, scope='pool2')
|
||||
end_points['pool2'] = net
|
||||
net = slim.flatten(net)
|
||||
end_points['Flatten'] = net
|
||||
net = slim.fully_connected(net, 384, scope='fc3')
|
||||
end_points['fc3'] = net
|
||||
net = slim.dropout(net, dropout_keep_prob, is_training=is_training,
|
||||
scope='dropout3')
|
||||
net = slim.fully_connected(net, 192, scope='fc4')
|
||||
end_points['fc4'] = net
|
||||
logits = slim.fully_connected(net, num_classes,
|
||||
biases_initializer=tf.zeros_initializer,
|
||||
weights_initializer=trunc_normal(1/192.0),
|
||||
weights_regularizer=None,
|
||||
activation_fn=None,
|
||||
scope='logits')
|
||||
|
||||
end_points['Logits'] = logits
|
||||
end_points['Predictions'] = prediction_fn(logits, scope='Predictions')
|
||||
|
||||
return logits, end_points
|
||||
cifarnet.default_image_size = 32
|
||||
|
||||
|
||||
def cifarnet_arg_scope(weight_decay=0.004):
|
||||
"""Defines the default cifarnet argument scope.
|
||||
|
||||
Args:
|
||||
weight_decay: The weight decay to use for regularizing the model.
|
||||
|
||||
Returns:
|
||||
An `arg_scope` to use for the inception v3 model.
|
||||
"""
|
||||
with slim.arg_scope(
|
||||
[slim.conv2d],
|
||||
weights_initializer=tf.truncated_normal_initializer(stddev=5e-2),
|
||||
activation_fn=tf.nn.relu):
|
||||
with slim.arg_scope(
|
||||
[slim.fully_connected],
|
||||
biases_initializer=tf.constant_initializer(0.1),
|
||||
weights_initializer=trunc_normal(0.04),
|
||||
weights_regularizer=slim.l2_regularizer(weight_decay),
|
||||
activation_fn=tf.nn.relu) as sc:
|
||||
return sc
|
||||
@ -0,0 +1,36 @@
|
||||
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""Brings all inception models under one namespace."""
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
# pylint: disable=unused-import
|
||||
from nets.inception_resnet_v2 import inception_resnet_v2
|
||||
from nets.inception_resnet_v2 import inception_resnet_v2_arg_scope
|
||||
from nets.inception_v1 import inception_v1
|
||||
from nets.inception_v1 import inception_v1_arg_scope
|
||||
from nets.inception_v1 import inception_v1_base
|
||||
from nets.inception_v2 import inception_v2
|
||||
from nets.inception_v2 import inception_v2_arg_scope
|
||||
from nets.inception_v2 import inception_v2_base
|
||||
from nets.inception_v3 import inception_v3
|
||||
from nets.inception_v3 import inception_v3_arg_scope
|
||||
from nets.inception_v3 import inception_v3_base
|
||||
from nets.inception_v4 import inception_v4
|
||||
from nets.inception_v4 import inception_v4_arg_scope
|
||||
from nets.inception_v4 import inception_v4_base
|
||||
# pylint: enable=unused-import
|
||||
@ -0,0 +1,280 @@
|
||||
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""Contains the definition of the Inception Resnet V2 architecture.
|
||||
|
||||
As described in http://arxiv.org/abs/1602.07261.
|
||||
|
||||
Inception-v4, Inception-ResNet and the Impact of Residual Connections
|
||||
on Learning
|
||||
Christian Szegedy, Sergey Ioffe, Vincent Vanhoucke, Alex Alemi
|
||||
"""
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
|
||||
import tensorflow as tf
|
||||
|
||||
slim = tf.contrib.slim
|
||||
|
||||
|
||||
def block35(net, scale=1.0, activation_fn=tf.nn.relu, scope=None, reuse=None):
|
||||
"""Builds the 35x35 resnet block."""
|
||||
with tf.variable_scope(scope, 'Block35', [net], reuse=reuse):
|
||||
with tf.variable_scope('Branch_0'):
|
||||
tower_conv = slim.conv2d(net, 32, 1, scope='Conv2d_1x1')
|
||||
with tf.variable_scope('Branch_1'):
|
||||
tower_conv1_0 = slim.conv2d(net, 32, 1, scope='Conv2d_0a_1x1')
|
||||
tower_conv1_1 = slim.conv2d(tower_conv1_0, 32, 3, scope='Conv2d_0b_3x3')
|
||||
with tf.variable_scope('Branch_2'):
|
||||
tower_conv2_0 = slim.conv2d(net, 32, 1, scope='Conv2d_0a_1x1')
|
||||
tower_conv2_1 = slim.conv2d(tower_conv2_0, 48, 3, scope='Conv2d_0b_3x3')
|
||||
tower_conv2_2 = slim.conv2d(tower_conv2_1, 64, 3, scope='Conv2d_0c_3x3')
|
||||
mixed = tf.concat(3, [tower_conv, tower_conv1_1, tower_conv2_2])
|
||||
up = slim.conv2d(mixed, net.get_shape()[3], 1, normalizer_fn=None,
|
||||
activation_fn=None, scope='Conv2d_1x1')
|
||||
net += scale * up
|
||||
if activation_fn:
|
||||
net = activation_fn(net)
|
||||
return net
|
||||
|
||||
|
||||
def block17(net, scale=1.0, activation_fn=tf.nn.relu, scope=None, reuse=None):
|
||||
"""Builds the 17x17 resnet block."""
|
||||
with tf.variable_scope(scope, 'Block17', [net], reuse=reuse):
|
||||
with tf.variable_scope('Branch_0'):
|
||||
tower_conv = slim.conv2d(net, 192, 1, scope='Conv2d_1x1')
|
||||
with tf.variable_scope('Branch_1'):
|
||||
tower_conv1_0 = slim.conv2d(net, 128, 1, scope='Conv2d_0a_1x1')
|
||||
tower_conv1_1 = slim.conv2d(tower_conv1_0, 160, [1, 7],
|
||||
scope='Conv2d_0b_1x7')
|
||||
tower_conv1_2 = slim.conv2d(tower_conv1_1, 192, [7, 1],
|
||||
scope='Conv2d_0c_7x1')
|
||||
mixed = tf.concat(3, [tower_conv, tower_conv1_2])
|
||||
up = slim.conv2d(mixed, net.get_shape()[3], 1, normalizer_fn=None,
|
||||
activation_fn=None, scope='Conv2d_1x1')
|
||||
net += scale * up
|
||||
if activation_fn:
|
||||
net = activation_fn(net)
|
||||
return net
|
||||
|
||||
|
||||
def block8(net, scale=1.0, activation_fn=tf.nn.relu, scope=None, reuse=None):
|
||||
"""Builds the 8x8 resnet block."""
|
||||
with tf.variable_scope(scope, 'Block8', [net], reuse=reuse):
|
||||
with tf.variable_scope('Branch_0'):
|
||||
tower_conv = slim.conv2d(net, 192, 1, scope='Conv2d_1x1')
|
||||
with tf.variable_scope('Branch_1'):
|
||||
tower_conv1_0 = slim.conv2d(net, 192, 1, scope='Conv2d_0a_1x1')
|
||||
tower_conv1_1 = slim.conv2d(tower_conv1_0, 224, [1, 3],
|
||||
scope='Conv2d_0b_1x3')
|
||||
tower_conv1_2 = slim.conv2d(tower_conv1_1, 256, [3, 1],
|
||||
scope='Conv2d_0c_3x1')
|
||||
mixed = tf.concat(3, [tower_conv, tower_conv1_2])
|
||||
up = slim.conv2d(mixed, net.get_shape()[3], 1, normalizer_fn=None,
|
||||
activation_fn=None, scope='Conv2d_1x1')
|
||||
net += scale * up
|
||||
if activation_fn:
|
||||
net = activation_fn(net)
|
||||
return net
|
||||
|
||||
|
||||
def inception_resnet_v2(inputs, num_classes=1001, is_training=True,
|
||||
dropout_keep_prob=0.8,
|
||||
reuse=None,
|
||||
scope='InceptionResnetV2'):
|
||||
"""Creates the Inception Resnet V2 model.
|
||||
|
||||
Args:
|
||||
inputs: a 4-D tensor of size [batch_size, height, width, 3].
|
||||
num_classes: number of predicted classes.
|
||||
is_training: whether is training or not.
|
||||
dropout_keep_prob: float, the fraction to keep before final layer.
|
||||
reuse: whether or not the network and its variables should be reused. To be
|
||||
able to reuse 'scope' must be given.
|
||||
scope: Optional variable_scope.
|
||||
|
||||
Returns:
|
||||
logits: the logits outputs of the model.
|
||||
end_points: the set of end_points from the inception model.
|
||||
"""
|
||||
end_points = {}
|
||||
|
||||
with tf.variable_scope(scope, 'InceptionResnetV2', [inputs], reuse=reuse):
|
||||
with slim.arg_scope([slim.batch_norm, slim.dropout],
|
||||
is_training=is_training):
|
||||
with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d],
|
||||
stride=1, padding='SAME'):
|
||||
|
||||
# 149 x 149 x 32
|
||||
net = slim.conv2d(inputs, 32, 3, stride=2, padding='VALID',
|
||||
scope='Conv2d_1a_3x3')
|
||||
end_points['Conv2d_1a_3x3'] = net
|
||||
# 147 x 147 x 32
|
||||
net = slim.conv2d(net, 32, 3, padding='VALID',
|
||||
scope='Conv2d_2a_3x3')
|
||||
end_points['Conv2d_2a_3x3'] = net
|
||||
# 147 x 147 x 64
|
||||
net = slim.conv2d(net, 64, 3, scope='Conv2d_2b_3x3')
|
||||
end_points['Conv2d_2b_3x3'] = net
|
||||
# 73 x 73 x 64
|
||||
net = slim.max_pool2d(net, 3, stride=2, padding='VALID',
|
||||
scope='MaxPool_3a_3x3')
|
||||
end_points['MaxPool_3a_3x3'] = net
|
||||
# 73 x 73 x 80
|
||||
net = slim.conv2d(net, 80, 1, padding='VALID',
|
||||
scope='Conv2d_3b_1x1')
|
||||
end_points['Conv2d_3b_1x1'] = net
|
||||
# 71 x 71 x 192
|
||||
net = slim.conv2d(net, 192, 3, padding='VALID',
|
||||
scope='Conv2d_4a_3x3')
|
||||
end_points['Conv2d_4a_3x3'] = net
|
||||
# 35 x 35 x 192
|
||||
net = slim.max_pool2d(net, 3, stride=2, padding='VALID',
|
||||
scope='MaxPool_5a_3x3')
|
||||
end_points['MaxPool_5a_3x3'] = net
|
||||
|
||||
# 35 x 35 x 320
|
||||
with tf.variable_scope('Mixed_5b'):
|
||||
with tf.variable_scope('Branch_0'):
|
||||
tower_conv = slim.conv2d(net, 96, 1, scope='Conv2d_1x1')
|
||||
with tf.variable_scope('Branch_1'):
|
||||
tower_conv1_0 = slim.conv2d(net, 48, 1, scope='Conv2d_0a_1x1')
|
||||
tower_conv1_1 = slim.conv2d(tower_conv1_0, 64, 5,
|
||||
scope='Conv2d_0b_5x5')
|
||||
with tf.variable_scope('Branch_2'):
|
||||
tower_conv2_0 = slim.conv2d(net, 64, 1, scope='Conv2d_0a_1x1')
|
||||
tower_conv2_1 = slim.conv2d(tower_conv2_0, 96, 3,
|
||||
scope='Conv2d_0b_3x3')
|
||||
tower_conv2_2 = slim.conv2d(tower_conv2_1, 96, 3,
|
||||
scope='Conv2d_0c_3x3')
|
||||
with tf.variable_scope('Branch_3'):
|
||||
tower_pool = slim.avg_pool2d(net, 3, stride=1, padding='SAME',
|
||||
scope='AvgPool_0a_3x3')
|
||||
tower_pool_1 = slim.conv2d(tower_pool, 64, 1,
|
||||
scope='Conv2d_0b_1x1')
|
||||
net = tf.concat(3, [tower_conv, tower_conv1_1,
|
||||
tower_conv2_2, tower_pool_1])
|
||||
|
||||
end_points['Mixed_5b'] = net
|
||||
net = slim.repeat(net, 10, block35, scale=0.17)
|
||||
|
||||
# 17 x 17 x 1024
|
||||
with tf.variable_scope('Mixed_6a'):
|
||||
with tf.variable_scope('Branch_0'):
|
||||
tower_conv = slim.conv2d(net, 384, 3, stride=2, padding='VALID',
|
||||
scope='Conv2d_1a_3x3')
|
||||
with tf.variable_scope('Branch_1'):
|
||||
tower_conv1_0 = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1')
|
||||
tower_conv1_1 = slim.conv2d(tower_conv1_0, 256, 3,
|
||||
scope='Conv2d_0b_3x3')
|
||||
tower_conv1_2 = slim.conv2d(tower_conv1_1, 384, 3,
|
||||
stride=2, padding='VALID',
|
||||
scope='Conv2d_1a_3x3')
|
||||
with tf.variable_scope('Branch_2'):
|
||||
tower_pool = slim.max_pool2d(net, 3, stride=2, padding='VALID',
|
||||
scope='MaxPool_1a_3x3')
|
||||
net = tf.concat(3, [tower_conv, tower_conv1_2, tower_pool])
|
||||
|
||||
end_points['Mixed_6a'] = net
|
||||
net = slim.repeat(net, 20, block17, scale=0.10)
|
||||
|
||||
# Auxillary tower
|
||||
with tf.variable_scope('AuxLogits'):
|
||||
aux = slim.avg_pool2d(net, 5, stride=3, padding='VALID',
|
||||
scope='Conv2d_1a_3x3')
|
||||
aux = slim.conv2d(aux, 128, 1, scope='Conv2d_1b_1x1')
|
||||
aux = slim.conv2d(aux, 768, aux.get_shape()[1:3],
|
||||
padding='VALID', scope='Conv2d_2a_5x5')
|
||||
aux = slim.flatten(aux)
|
||||
aux = slim.fully_connected(aux, num_classes, activation_fn=None,
|
||||
scope='Logits')
|
||||
end_points['AuxLogits'] = aux
|
||||
|
||||
with tf.variable_scope('Mixed_7a'):
|
||||
with tf.variable_scope('Branch_0'):
|
||||
tower_conv = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1')
|
||||
tower_conv_1 = slim.conv2d(tower_conv, 384, 3, stride=2,
|
||||
padding='VALID', scope='Conv2d_1a_3x3')
|
||||
with tf.variable_scope('Branch_1'):
|
||||
tower_conv1 = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1')
|
||||
tower_conv1_1 = slim.conv2d(tower_conv1, 288, 3, stride=2,
|
||||
padding='VALID', scope='Conv2d_1a_3x3')
|
||||
with tf.variable_scope('Branch_2'):
|
||||
tower_conv2 = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1')
|
||||
tower_conv2_1 = slim.conv2d(tower_conv2, 288, 3,
|
||||
scope='Conv2d_0b_3x3')
|
||||
tower_conv2_2 = slim.conv2d(tower_conv2_1, 320, 3, stride=2,
|
||||
padding='VALID', scope='Conv2d_1a_3x3')
|
||||
with tf.variable_scope('Branch_3'):
|
||||
tower_pool = slim.max_pool2d(net, 3, stride=2, padding='VALID',
|
||||
scope='MaxPool_1a_3x3')
|
||||
net = tf.concat(3, [tower_conv_1, tower_conv1_1,
|
||||
tower_conv2_2, tower_pool])
|
||||
|
||||
end_points['Mixed_7a'] = net
|
||||
|
||||
net = slim.repeat(net, 9, block8, scale=0.20)
|
||||
net = block8(net, activation_fn=None)
|
||||
|
||||
net = slim.conv2d(net, 1536, 1, scope='Conv2d_7b_1x1')
|
||||
end_points['Conv2d_7b_1x1'] = net
|
||||
|
||||
with tf.variable_scope('Logits'):
|
||||
end_points['PrePool'] = net
|
||||
net = slim.avg_pool2d(net, net.get_shape()[1:3], padding='VALID',
|
||||
scope='AvgPool_1a_8x8')
|
||||
net = slim.flatten(net)
|
||||
|
||||
net = slim.dropout(net, dropout_keep_prob, is_training=is_training,
|
||||
scope='Dropout')
|
||||
|
||||
end_points['PreLogitsFlatten'] = net
|
||||
logits = slim.fully_connected(net, num_classes, activation_fn=None,
|
||||
scope='Logits')
|
||||
end_points['Logits'] = logits
|
||||
end_points['Predictions'] = tf.nn.softmax(logits, name='Predictions')
|
||||
|
||||
return logits, end_points
|
||||
inception_resnet_v2.default_image_size = 299
|
||||
|
||||
|
||||
def inception_resnet_v2_arg_scope(weight_decay=0.00004,
|
||||
batch_norm_decay=0.9997,
|
||||
batch_norm_epsilon=0.001):
|
||||
"""Yields the scope with the default parameters for inception_resnet_v2.
|
||||
|
||||
Args:
|
||||
weight_decay: the weight decay for weights variables.
|
||||
batch_norm_decay: decay for the moving average of batch_norm momentums.
|
||||
batch_norm_epsilon: small float added to variance to avoid dividing by zero.
|
||||
|
||||
Returns:
|
||||
a arg_scope with the parameters needed for inception_resnet_v2.
|
||||
"""
|
||||
# Set weight_decay for weights in conv2d and fully_connected layers.
|
||||
with slim.arg_scope([slim.conv2d, slim.fully_connected],
|
||||
weights_regularizer=slim.l2_regularizer(weight_decay),
|
||||
biases_regularizer=slim.l2_regularizer(weight_decay)):
|
||||
|
||||
batch_norm_params = {
|
||||
'decay': batch_norm_decay,
|
||||
'epsilon': batch_norm_epsilon,
|
||||
}
|
||||
# Set activation_fn and parameters for batch_norm.
|
||||
with slim.arg_scope([slim.conv2d], activation_fn=tf.nn.relu,
|
||||
normalizer_fn=slim.batch_norm,
|
||||
normalizer_params=batch_norm_params) as scope:
|
||||
return scope
|
||||
@ -0,0 +1,136 @@
|
||||
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""Tests for slim.inception_resnet_v2."""
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import tensorflow as tf
|
||||
|
||||
from nets import inception
|
||||
|
||||
|
||||
class InceptionTest(tf.test.TestCase):
|
||||
|
||||
def testBuildLogits(self):
|
||||
batch_size = 5
|
||||
height, width = 299, 299
|
||||
num_classes = 1000
|
||||
with self.test_session():
|
||||
inputs = tf.random_uniform((batch_size, height, width, 3))
|
||||
logits, _ = inception.inception_resnet_v2(inputs, num_classes)
|
||||
self.assertTrue(logits.op.name.startswith('InceptionResnetV2/Logits'))
|
||||
self.assertListEqual(logits.get_shape().as_list(),
|
||||
[batch_size, num_classes])
|
||||
|
||||
def testBuildEndPoints(self):
|
||||
batch_size = 5
|
||||
height, width = 299, 299
|
||||
num_classes = 1000
|
||||
with self.test_session():
|
||||
inputs = tf.random_uniform((batch_size, height, width, 3))
|
||||
_, end_points = inception.inception_resnet_v2(inputs, num_classes)
|
||||
self.assertTrue('Logits' in end_points)
|
||||
logits = end_points['Logits']
|
||||
self.assertListEqual(logits.get_shape().as_list(),
|
||||
[batch_size, num_classes])
|
||||
self.assertTrue('AuxLogits' in end_points)
|
||||
aux_logits = end_points['AuxLogits']
|
||||
self.assertListEqual(aux_logits.get_shape().as_list(),
|
||||
[batch_size, num_classes])
|
||||
pre_pool = end_points['PrePool']
|
||||
self.assertListEqual(pre_pool.get_shape().as_list(),
|
||||
[batch_size, 8, 8, 1536])
|
||||
|
||||
def testVariablesSetDevice(self):
|
||||
batch_size = 5
|
||||
height, width = 299, 299
|
||||
num_classes = 1000
|
||||
with self.test_session():
|
||||
inputs = tf.random_uniform((batch_size, height, width, 3))
|
||||
# Force all Variables to reside on the device.
|
||||
with tf.variable_scope('on_cpu'), tf.device('/cpu:0'):
|
||||
inception.inception_resnet_v2(inputs, num_classes)
|
||||
with tf.variable_scope('on_gpu'), tf.device('/gpu:0'):
|
||||
inception.inception_resnet_v2(inputs, num_classes)
|
||||
for v in tf.get_collection(tf.GraphKeys.VARIABLES, scope='on_cpu'):
|
||||
self.assertDeviceEqual(v.device, '/cpu:0')
|
||||
for v in tf.get_collection(tf.GraphKeys.VARIABLES, scope='on_gpu'):
|
||||
self.assertDeviceEqual(v.device, '/gpu:0')
|
||||
|
||||
def testHalfSizeImages(self):
|
||||
batch_size = 5
|
||||
height, width = 150, 150
|
||||
num_classes = 1000
|
||||
with self.test_session():
|
||||
inputs = tf.random_uniform((batch_size, height, width, 3))
|
||||
logits, end_points = inception.inception_resnet_v2(inputs, num_classes)
|
||||
self.assertTrue(logits.op.name.startswith('InceptionResnetV2/Logits'))
|
||||
self.assertListEqual(logits.get_shape().as_list(),
|
||||
[batch_size, num_classes])
|
||||
pre_pool = end_points['PrePool']
|
||||
self.assertListEqual(pre_pool.get_shape().as_list(),
|
||||
[batch_size, 3, 3, 1536])
|
||||
|
||||
def testUnknownBatchSize(self):
|
||||
batch_size = 1
|
||||
height, width = 299, 299
|
||||
num_classes = 1000
|
||||
with self.test_session() as sess:
|
||||
inputs = tf.placeholder(tf.float32, (None, height, width, 3))
|
||||
logits, _ = inception.inception_resnet_v2(inputs, num_classes)
|
||||
self.assertTrue(logits.op.name.startswith('InceptionResnetV2/Logits'))
|
||||
self.assertListEqual(logits.get_shape().as_list(),
|
||||
[None, num_classes])
|
||||
images = tf.random_uniform((batch_size, height, width, 3))
|
||||
sess.run(tf.initialize_all_variables())
|
||||
output = sess.run(logits, {inputs: images.eval()})
|
||||
self.assertEquals(output.shape, (batch_size, num_classes))
|
||||
|
||||
def testEvaluation(self):
|
||||
batch_size = 2
|
||||
height, width = 299, 299
|
||||
num_classes = 1000
|
||||
with self.test_session() as sess:
|
||||
eval_inputs = tf.random_uniform((batch_size, height, width, 3))
|
||||
logits, _ = inception.inception_resnet_v2(eval_inputs,
|
||||
num_classes,
|
||||
is_training=False)
|
||||
predictions = tf.argmax(logits, 1)
|
||||
sess.run(tf.initialize_all_variables())
|
||||
output = sess.run(predictions)
|
||||
self.assertEquals(output.shape, (batch_size,))
|
||||
|
||||
def testTrainEvalWithReuse(self):
|
||||
train_batch_size = 5
|
||||
eval_batch_size = 2
|
||||
height, width = 150, 150
|
||||
num_classes = 1000
|
||||
with self.test_session() as sess:
|
||||
train_inputs = tf.random_uniform((train_batch_size, height, width, 3))
|
||||
inception.inception_resnet_v2(train_inputs, num_classes)
|
||||
eval_inputs = tf.random_uniform((eval_batch_size, height, width, 3))
|
||||
logits, _ = inception.inception_resnet_v2(eval_inputs,
|
||||
num_classes,
|
||||
is_training=False,
|
||||
reuse=True)
|
||||
predictions = tf.argmax(logits, 1)
|
||||
sess.run(tf.initialize_all_variables())
|
||||
output = sess.run(predictions)
|
||||
self.assertEquals(output.shape, (eval_batch_size,))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
tf.test.main()
|
||||
@ -0,0 +1,71 @@
|
||||
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""Contains common code shared by all inception models.
|
||||
|
||||
Usage of arg scope:
|
||||
with slim.arg_scope(inception_arg_scope()):
|
||||
logits, end_points = inception.inception_v3(images, num_classes,
|
||||
is_training=is_training)
|
||||
|
||||
"""
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import tensorflow as tf
|
||||
|
||||
slim = tf.contrib.slim
|
||||
|
||||
|
||||
def inception_arg_scope(weight_decay=0.00004,
|
||||
use_batch_norm=True,
|
||||
batch_norm_decay=0.9997,
|
||||
batch_norm_epsilon=0.001):
|
||||
"""Defines the default arg scope for inception models.
|
||||
|
||||
Args:
|
||||
weight_decay: The weight decay to use for regularizing the model.
|
||||
use_batch_norm: "If `True`, batch_norm is applied after each convolution.
|
||||
batch_norm_decay: Decay for batch norm moving average.
|
||||
batch_norm_epsilon: Small float added to variance to avoid dividing by zero
|
||||
in batch norm.
|
||||
|
||||
Returns:
|
||||
An `arg_scope` to use for the inception models.
|
||||
"""
|
||||
batch_norm_params = {
|
||||
# Decay for the moving averages.
|
||||
'decay': batch_norm_decay,
|
||||
# epsilon to prevent 0s in variance.
|
||||
'epsilon': batch_norm_epsilon,
|
||||
# collection containing update_ops.
|
||||
'updates_collections': tf.GraphKeys.UPDATE_OPS,
|
||||
}
|
||||
if use_batch_norm:
|
||||
normalizer_fn = slim.batch_norm
|
||||
normalizer_params = batch_norm_params
|
||||
else:
|
||||
normalizer_fn = None
|
||||
normalizer_params = {}
|
||||
# Set weight_decay for weights in Conv and FC layers.
|
||||
with slim.arg_scope([slim.conv2d, slim.fully_connected],
|
||||
weights_regularizer=slim.l2_regularizer(weight_decay)):
|
||||
with slim.arg_scope(
|
||||
[slim.conv2d],
|
||||
weights_initializer=slim.variance_scaling_initializer(),
|
||||
activation_fn=tf.nn.relu,
|
||||
normalizer_fn=normalizer_fn,
|
||||
normalizer_params=normalizer_params) as sc:
|
||||
return sc
|
||||
@ -0,0 +1,305 @@
|
||||
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""Contains the definition for inception v1 classification network."""
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import tensorflow as tf
|
||||
|
||||
from nets import inception_utils
|
||||
|
||||
slim = tf.contrib.slim
|
||||
trunc_normal = lambda stddev: tf.truncated_normal_initializer(0.0, stddev)
|
||||
|
||||
|
||||
def inception_v1_base(inputs,
|
||||
final_endpoint='Mixed_5c',
|
||||
scope='InceptionV1'):
|
||||
"""Defines the Inception V1 base architecture.
|
||||
|
||||
This architecture is defined in:
|
||||
Going deeper with convolutions
|
||||
Christian Szegedy, Wei Liu, Yangqing Jia, Pierre Sermanet, Scott Reed,
|
||||
Dragomir Anguelov, Dumitru Erhan, Vincent Vanhoucke, Andrew Rabinovich.
|
||||
http://arxiv.org/pdf/1409.4842v1.pdf.
|
||||
|
||||
Args:
|
||||
inputs: a tensor of size [batch_size, height, width, channels].
|
||||
final_endpoint: specifies the endpoint to construct the network up to. It
|
||||
can be one of ['Conv2d_1a_7x7', 'MaxPool_2a_3x3', 'Conv2d_2b_1x1',
|
||||
'Conv2d_2c_3x3', 'MaxPool_3a_3x3', 'Mixed_3b', 'Mixed_3c',
|
||||
'MaxPool_4a_3x3', 'Mixed_4b', 'Mixed_4c', 'Mixed_4d', 'Mixed_4e',
|
||||
'Mixed_4f', 'MaxPool_5a_2x2', 'Mixed_5b', 'Mixed_5c']
|
||||
scope: Optional variable_scope.
|
||||
|
||||
Returns:
|
||||
A dictionary from components of the network to the corresponding activation.
|
||||
|
||||
Raises:
|
||||
ValueError: if final_endpoint is not set to one of the predefined values.
|
||||
"""
|
||||
end_points = {}
|
||||
with tf.variable_scope(scope, 'InceptionV1', [inputs]):
|
||||
with slim.arg_scope(
|
||||
[slim.conv2d, slim.fully_connected],
|
||||
weights_initializer=trunc_normal(0.01)):
|
||||
with slim.arg_scope([slim.conv2d, slim.max_pool2d],
|
||||
stride=1, padding='SAME'):
|
||||
end_point = 'Conv2d_1a_7x7'
|
||||
net = slim.conv2d(inputs, 64, [7, 7], stride=2, scope=end_point)
|
||||
end_points[end_point] = net
|
||||
if final_endpoint == end_point: return net, end_points
|
||||
end_point = 'MaxPool_2a_3x3'
|
||||
net = slim.max_pool2d(net, [3, 3], stride=2, scope=end_point)
|
||||
end_points[end_point] = net
|
||||
if final_endpoint == end_point: return net, end_points
|
||||
end_point = 'Conv2d_2b_1x1'
|
||||
net = slim.conv2d(net, 64, [1, 1], scope=end_point)
|
||||
end_points[end_point] = net
|
||||
if final_endpoint == end_point: return net, end_points
|
||||
end_point = 'Conv2d_2c_3x3'
|
||||
net = slim.conv2d(net, 192, [3, 3], scope=end_point)
|
||||
end_points[end_point] = net
|
||||
if final_endpoint == end_point: return net, end_points
|
||||
end_point = 'MaxPool_3a_3x3'
|
||||
net = slim.max_pool2d(net, [3, 3], stride=2, scope=end_point)
|
||||
end_points[end_point] = net
|
||||
if final_endpoint == end_point: return net, end_points
|
||||
|
||||
end_point = 'Mixed_3b'
|
||||
with tf.variable_scope(end_point):
|
||||
with tf.variable_scope('Branch_0'):
|
||||
branch_0 = slim.conv2d(net, 64, [1, 1], scope='Conv2d_0a_1x1')
|
||||
with tf.variable_scope('Branch_1'):
|
||||
branch_1 = slim.conv2d(net, 96, [1, 1], scope='Conv2d_0a_1x1')
|
||||
branch_1 = slim.conv2d(branch_1, 128, [3, 3], scope='Conv2d_0b_3x3')
|
||||
with tf.variable_scope('Branch_2'):
|
||||
branch_2 = slim.conv2d(net, 16, [1, 1], scope='Conv2d_0a_1x1')
|
||||
branch_2 = slim.conv2d(branch_2, 32, [3, 3], scope='Conv2d_0b_3x3')
|
||||
with tf.variable_scope('Branch_3'):
|
||||
branch_3 = slim.max_pool2d(net, [3, 3], scope='MaxPool_0a_3x3')
|
||||
branch_3 = slim.conv2d(branch_3, 32, [1, 1], scope='Conv2d_0b_1x1')
|
||||
net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3])
|
||||
end_points[end_point] = net
|
||||
if final_endpoint == end_point: return net, end_points
|
||||
|
||||
end_point = 'Mixed_3c'
|
||||
with tf.variable_scope(end_point):
|
||||
with tf.variable_scope('Branch_0'):
|
||||
branch_0 = slim.conv2d(net, 128, [1, 1], scope='Conv2d_0a_1x1')
|
||||
with tf.variable_scope('Branch_1'):
|
||||
branch_1 = slim.conv2d(net, 128, [1, 1], scope='Conv2d_0a_1x1')
|
||||
branch_1 = slim.conv2d(branch_1, 192, [3, 3], scope='Conv2d_0b_3x3')
|
||||
with tf.variable_scope('Branch_2'):
|
||||
branch_2 = slim.conv2d(net, 32, [1, 1], scope='Conv2d_0a_1x1')
|
||||
branch_2 = slim.conv2d(branch_2, 96, [3, 3], scope='Conv2d_0b_3x3')
|
||||
with tf.variable_scope('Branch_3'):
|
||||
branch_3 = slim.max_pool2d(net, [3, 3], scope='MaxPool_0a_3x3')
|
||||
branch_3 = slim.conv2d(branch_3, 64, [1, 1], scope='Conv2d_0b_1x1')
|
||||
net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3])
|
||||
end_points[end_point] = net
|
||||
if final_endpoint == end_point: return net, end_points
|
||||
|
||||
end_point = 'MaxPool_4a_3x3'
|
||||
net = slim.max_pool2d(net, [3, 3], stride=2, scope=end_point)
|
||||
end_points[end_point] = net
|
||||
if final_endpoint == end_point: return net, end_points
|
||||
|
||||
end_point = 'Mixed_4b'
|
||||
with tf.variable_scope(end_point):
|
||||
with tf.variable_scope('Branch_0'):
|
||||
branch_0 = slim.conv2d(net, 192, [1, 1], scope='Conv2d_0a_1x1')
|
||||
with tf.variable_scope('Branch_1'):
|
||||
branch_1 = slim.conv2d(net, 96, [1, 1], scope='Conv2d_0a_1x1')
|
||||
branch_1 = slim.conv2d(branch_1, 208, [3, 3], scope='Conv2d_0b_3x3')
|
||||
with tf.variable_scope('Branch_2'):
|
||||
branch_2 = slim.conv2d(net, 16, [1, 1], scope='Conv2d_0a_1x1')
|
||||
branch_2 = slim.conv2d(branch_2, 48, [3, 3], scope='Conv2d_0b_3x3')
|
||||
with tf.variable_scope('Branch_3'):
|
||||
branch_3 = slim.max_pool2d(net, [3, 3], scope='MaxPool_0a_3x3')
|
||||
branch_3 = slim.conv2d(branch_3, 64, [1, 1], scope='Conv2d_0b_1x1')
|
||||
net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3])
|
||||
end_points[end_point] = net
|
||||
if final_endpoint == end_point: return net, end_points
|
||||
|
||||
end_point = 'Mixed_4c'
|
||||
with tf.variable_scope(end_point):
|
||||
with tf.variable_scope('Branch_0'):
|
||||
branch_0 = slim.conv2d(net, 160, [1, 1], scope='Conv2d_0a_1x1')
|
||||
with tf.variable_scope('Branch_1'):
|
||||
branch_1 = slim.conv2d(net, 112, [1, 1], scope='Conv2d_0a_1x1')
|
||||
branch_1 = slim.conv2d(branch_1, 224, [3, 3], scope='Conv2d_0b_3x3')
|
||||
with tf.variable_scope('Branch_2'):
|
||||
branch_2 = slim.conv2d(net, 24, [1, 1], scope='Conv2d_0a_1x1')
|
||||
branch_2 = slim.conv2d(branch_2, 64, [3, 3], scope='Conv2d_0b_3x3')
|
||||
with tf.variable_scope('Branch_3'):
|
||||
branch_3 = slim.max_pool2d(net, [3, 3], scope='MaxPool_0a_3x3')
|
||||
branch_3 = slim.conv2d(branch_3, 64, [1, 1], scope='Conv2d_0b_1x1')
|
||||
net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3])
|
||||
end_points[end_point] = net
|
||||
if final_endpoint == end_point: return net, end_points
|
||||
|
||||
end_point = 'Mixed_4d'
|
||||
with tf.variable_scope(end_point):
|
||||
with tf.variable_scope('Branch_0'):
|
||||
branch_0 = slim.conv2d(net, 128, [1, 1], scope='Conv2d_0a_1x1')
|
||||
with tf.variable_scope('Branch_1'):
|
||||
branch_1 = slim.conv2d(net, 128, [1, 1], scope='Conv2d_0a_1x1')
|
||||
branch_1 = slim.conv2d(branch_1, 256, [3, 3], scope='Conv2d_0b_3x3')
|
||||
with tf.variable_scope('Branch_2'):
|
||||
branch_2 = slim.conv2d(net, 24, [1, 1], scope='Conv2d_0a_1x1')
|
||||
branch_2 = slim.conv2d(branch_2, 64, [3, 3], scope='Conv2d_0b_3x3')
|
||||
with tf.variable_scope('Branch_3'):
|
||||
branch_3 = slim.max_pool2d(net, [3, 3], scope='MaxPool_0a_3x3')
|
||||
branch_3 = slim.conv2d(branch_3, 64, [1, 1], scope='Conv2d_0b_1x1')
|
||||
net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3])
|
||||
end_points[end_point] = net
|
||||
if final_endpoint == end_point: return net, end_points
|
||||
|
||||
end_point = 'Mixed_4e'
|
||||
with tf.variable_scope(end_point):
|
||||
with tf.variable_scope('Branch_0'):
|
||||
branch_0 = slim.conv2d(net, 112, [1, 1], scope='Conv2d_0a_1x1')
|
||||
with tf.variable_scope('Branch_1'):
|
||||
branch_1 = slim.conv2d(net, 144, [1, 1], scope='Conv2d_0a_1x1')
|
||||
branch_1 = slim.conv2d(branch_1, 288, [3, 3], scope='Conv2d_0b_3x3')
|
||||
with tf.variable_scope('Branch_2'):
|
||||
branch_2 = slim.conv2d(net, 32, [1, 1], scope='Conv2d_0a_1x1')
|
||||
branch_2 = slim.conv2d(branch_2, 64, [3, 3], scope='Conv2d_0b_3x3')
|
||||
with tf.variable_scope('Branch_3'):
|
||||
branch_3 = slim.max_pool2d(net, [3, 3], scope='MaxPool_0a_3x3')
|
||||
branch_3 = slim.conv2d(branch_3, 64, [1, 1], scope='Conv2d_0b_1x1')
|
||||
net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3])
|
||||
end_points[end_point] = net
|
||||
if final_endpoint == end_point: return net, end_points
|
||||
|
||||
end_point = 'Mixed_4f'
|
||||
with tf.variable_scope(end_point):
|
||||
with tf.variable_scope('Branch_0'):
|
||||
branch_0 = slim.conv2d(net, 256, [1, 1], scope='Conv2d_0a_1x1')
|
||||
with tf.variable_scope('Branch_1'):
|
||||
branch_1 = slim.conv2d(net, 160, [1, 1], scope='Conv2d_0a_1x1')
|
||||
branch_1 = slim.conv2d(branch_1, 320, [3, 3], scope='Conv2d_0b_3x3')
|
||||
with tf.variable_scope('Branch_2'):
|
||||
branch_2 = slim.conv2d(net, 32, [1, 1], scope='Conv2d_0a_1x1')
|
||||
branch_2 = slim.conv2d(branch_2, 128, [3, 3], scope='Conv2d_0b_3x3')
|
||||
with tf.variable_scope('Branch_3'):
|
||||
branch_3 = slim.max_pool2d(net, [3, 3], scope='MaxPool_0a_3x3')
|
||||
branch_3 = slim.conv2d(branch_3, 128, [1, 1], scope='Conv2d_0b_1x1')
|
||||
net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3])
|
||||
end_points[end_point] = net
|
||||
if final_endpoint == end_point: return net, end_points
|
||||
|
||||
end_point = 'MaxPool_5a_2x2'
|
||||
net = slim.max_pool2d(net, [2, 2], stride=2, scope=end_point)
|
||||
end_points[end_point] = net
|
||||
if final_endpoint == end_point: return net, end_points
|
||||
|
||||
end_point = 'Mixed_5b'
|
||||
with tf.variable_scope(end_point):
|
||||
with tf.variable_scope('Branch_0'):
|
||||
branch_0 = slim.conv2d(net, 256, [1, 1], scope='Conv2d_0a_1x1')
|
||||
with tf.variable_scope('Branch_1'):
|
||||
branch_1 = slim.conv2d(net, 160, [1, 1], scope='Conv2d_0a_1x1')
|
||||
branch_1 = slim.conv2d(branch_1, 320, [3, 3], scope='Conv2d_0b_3x3')
|
||||
with tf.variable_scope('Branch_2'):
|
||||
branch_2 = slim.conv2d(net, 32, [1, 1], scope='Conv2d_0a_1x1')
|
||||
branch_2 = slim.conv2d(branch_2, 128, [3, 3], scope='Conv2d_0a_3x3')
|
||||
with tf.variable_scope('Branch_3'):
|
||||
branch_3 = slim.max_pool2d(net, [3, 3], scope='MaxPool_0a_3x3')
|
||||
branch_3 = slim.conv2d(branch_3, 128, [1, 1], scope='Conv2d_0b_1x1')
|
||||
net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3])
|
||||
end_points[end_point] = net
|
||||
if final_endpoint == end_point: return net, end_points
|
||||
|
||||
end_point = 'Mixed_5c'
|
||||
with tf.variable_scope(end_point):
|
||||
with tf.variable_scope('Branch_0'):
|
||||
branch_0 = slim.conv2d(net, 384, [1, 1], scope='Conv2d_0a_1x1')
|
||||
with tf.variable_scope('Branch_1'):
|
||||
branch_1 = slim.conv2d(net, 192, [1, 1], scope='Conv2d_0a_1x1')
|
||||
branch_1 = slim.conv2d(branch_1, 384, [3, 3], scope='Conv2d_0b_3x3')
|
||||
with tf.variable_scope('Branch_2'):
|
||||
branch_2 = slim.conv2d(net, 48, [1, 1], scope='Conv2d_0a_1x1')
|
||||
branch_2 = slim.conv2d(branch_2, 128, [3, 3], scope='Conv2d_0b_3x3')
|
||||
with tf.variable_scope('Branch_3'):
|
||||
branch_3 = slim.max_pool2d(net, [3, 3], scope='MaxPool_0a_3x3')
|
||||
branch_3 = slim.conv2d(branch_3, 128, [1, 1], scope='Conv2d_0b_1x1')
|
||||
net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3])
|
||||
end_points[end_point] = net
|
||||
if final_endpoint == end_point: return net, end_points
|
||||
raise ValueError('Unknown final endpoint %s' % final_endpoint)
|
||||
|
||||
|
||||
def inception_v1(inputs,
|
||||
num_classes=1000,
|
||||
is_training=True,
|
||||
dropout_keep_prob=0.8,
|
||||
prediction_fn=slim.softmax,
|
||||
spatial_squeeze=True,
|
||||
reuse=None,
|
||||
scope='InceptionV1'):
|
||||
"""Defines the Inception V1 architecture.
|
||||
|
||||
This architecture is defined in:
|
||||
|
||||
Going deeper with convolutions
|
||||
Christian Szegedy, Wei Liu, Yangqing Jia, Pierre Sermanet, Scott Reed,
|
||||
Dragomir Anguelov, Dumitru Erhan, Vincent Vanhoucke, Andrew Rabinovich.
|
||||
http://arxiv.org/pdf/1409.4842v1.pdf.
|
||||
|
||||
The default image size used to train this network is 224x224.
|
||||
|
||||
Args:
|
||||
inputs: a tensor of size [batch_size, height, width, channels].
|
||||
num_classes: number of predicted classes.
|
||||
is_training: whether is training or not.
|
||||
dropout_keep_prob: the percentage of activation values that are retained.
|
||||
prediction_fn: a function to get predictions out of logits.
|
||||
spatial_squeeze: if True, logits is of shape is [B, C], if false logits is
|
||||
of shape [B, 1, 1, C], where B is batch_size and C is number of classes.
|
||||
reuse: whether or not the network and its variables should be reused. To be
|
||||
able to reuse 'scope' must be given.
|
||||
scope: Optional variable_scope.
|
||||
|
||||
Returns:
|
||||
logits: the pre-softmax activations, a tensor of size
|
||||
[batch_size, num_classes]
|
||||
end_points: a dictionary from components of the network to the corresponding
|
||||
activation.
|
||||
"""
|
||||
# Final pooling and prediction
|
||||
with tf.variable_scope(scope, 'InceptionV1', [inputs, num_classes],
|
||||
reuse=reuse) as scope:
|
||||
with slim.arg_scope([slim.batch_norm, slim.dropout],
|
||||
is_training=is_training):
|
||||
net, end_points = inception_v1_base(inputs, scope=scope)
|
||||
with tf.variable_scope('Logits'):
|
||||
net = slim.avg_pool2d(net, [7, 7], stride=1, scope='MaxPool_0a_7x7')
|
||||
net = slim.dropout(net,
|
||||
dropout_keep_prob, scope='Dropout_0b')
|
||||
logits = slim.conv2d(net, num_classes, [1, 1], activation_fn=None,
|
||||
normalizer_fn=None, scope='Conv2d_0c_1x1')
|
||||
if spatial_squeeze:
|
||||
logits = tf.squeeze(logits, [1, 2], name='SpatialSqueeze')
|
||||
|
||||
end_points['Logits'] = logits
|
||||
end_points['Predictions'] = prediction_fn(logits, scope='Predictions')
|
||||
return logits, end_points
|
||||
inception_v1.default_image_size = 224
|
||||
|
||||
inception_v1_arg_scope = inception_utils.inception_arg_scope
|
||||
@ -0,0 +1,210 @@
|
||||
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""Tests for nets.inception_v1."""
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import numpy as np
|
||||
import tensorflow as tf
|
||||
|
||||
from nets import inception
|
||||
|
||||
slim = tf.contrib.slim
|
||||
|
||||
|
||||
class InceptionV1Test(tf.test.TestCase):
|
||||
|
||||
def testBuildClassificationNetwork(self):
|
||||
batch_size = 5
|
||||
height, width = 224, 224
|
||||
num_classes = 1000
|
||||
|
||||
inputs = tf.random_uniform((batch_size, height, width, 3))
|
||||
logits, end_points = inception.inception_v1(inputs, num_classes)
|
||||
self.assertTrue(logits.op.name.startswith('InceptionV1/Logits'))
|
||||
self.assertListEqual(logits.get_shape().as_list(),
|
||||
[batch_size, num_classes])
|
||||
self.assertTrue('Predictions' in end_points)
|
||||
self.assertListEqual(end_points['Predictions'].get_shape().as_list(),
|
||||
[batch_size, num_classes])
|
||||
|
||||
def testBuildBaseNetwork(self):
|
||||
batch_size = 5
|
||||
height, width = 224, 224
|
||||
|
||||
inputs = tf.random_uniform((batch_size, height, width, 3))
|
||||
mixed_6c, end_points = inception.inception_v1_base(inputs)
|
||||
self.assertTrue(mixed_6c.op.name.startswith('InceptionV1/Mixed_5c'))
|
||||
self.assertListEqual(mixed_6c.get_shape().as_list(),
|
||||
[batch_size, 7, 7, 1024])
|
||||
expected_endpoints = ['Conv2d_1a_7x7', 'MaxPool_2a_3x3', 'Conv2d_2b_1x1',
|
||||
'Conv2d_2c_3x3', 'MaxPool_3a_3x3', 'Mixed_3b',
|
||||
'Mixed_3c', 'MaxPool_4a_3x3', 'Mixed_4b', 'Mixed_4c',
|
||||
'Mixed_4d', 'Mixed_4e', 'Mixed_4f', 'MaxPool_5a_2x2',
|
||||
'Mixed_5b', 'Mixed_5c']
|
||||
self.assertItemsEqual(end_points.keys(), expected_endpoints)
|
||||
|
||||
def testBuildOnlyUptoFinalEndpoint(self):
|
||||
batch_size = 5
|
||||
height, width = 224, 224
|
||||
endpoints = ['Conv2d_1a_7x7', 'MaxPool_2a_3x3', 'Conv2d_2b_1x1',
|
||||
'Conv2d_2c_3x3', 'MaxPool_3a_3x3', 'Mixed_3b', 'Mixed_3c',
|
||||
'MaxPool_4a_3x3', 'Mixed_4b', 'Mixed_4c', 'Mixed_4d',
|
||||
'Mixed_4e', 'Mixed_4f', 'MaxPool_5a_2x2', 'Mixed_5b',
|
||||
'Mixed_5c']
|
||||
for index, endpoint in enumerate(endpoints):
|
||||
with tf.Graph().as_default():
|
||||
inputs = tf.random_uniform((batch_size, height, width, 3))
|
||||
out_tensor, end_points = inception.inception_v1_base(
|
||||
inputs, final_endpoint=endpoint)
|
||||
self.assertTrue(out_tensor.op.name.startswith(
|
||||
'InceptionV1/' + endpoint))
|
||||
self.assertItemsEqual(endpoints[:index+1], end_points)
|
||||
|
||||
def testBuildAndCheckAllEndPointsUptoMixed5c(self):
|
||||
batch_size = 5
|
||||
height, width = 224, 224
|
||||
|
||||
inputs = tf.random_uniform((batch_size, height, width, 3))
|
||||
_, end_points = inception.inception_v1_base(inputs,
|
||||
final_endpoint='Mixed_5c')
|
||||
endpoints_shapes = {'Conv2d_1a_7x7': [5, 112, 112, 64],
|
||||
'MaxPool_2a_3x3': [5, 56, 56, 64],
|
||||
'Conv2d_2b_1x1': [5, 56, 56, 64],
|
||||
'Conv2d_2c_3x3': [5, 56, 56, 192],
|
||||
'MaxPool_3a_3x3': [5, 28, 28, 192],
|
||||
'Mixed_3b': [5, 28, 28, 256],
|
||||
'Mixed_3c': [5, 28, 28, 480],
|
||||
'MaxPool_4a_3x3': [5, 14, 14, 480],
|
||||
'Mixed_4b': [5, 14, 14, 512],
|
||||
'Mixed_4c': [5, 14, 14, 512],
|
||||
'Mixed_4d': [5, 14, 14, 512],
|
||||
'Mixed_4e': [5, 14, 14, 528],
|
||||
'Mixed_4f': [5, 14, 14, 832],
|
||||
'MaxPool_5a_2x2': [5, 7, 7, 832],
|
||||
'Mixed_5b': [5, 7, 7, 832],
|
||||
'Mixed_5c': [5, 7, 7, 1024]}
|
||||
|
||||
self.assertItemsEqual(endpoints_shapes.keys(), end_points.keys())
|
||||
for endpoint_name in endpoints_shapes:
|
||||
expected_shape = endpoints_shapes[endpoint_name]
|
||||
self.assertTrue(endpoint_name in end_points)
|
||||
self.assertListEqual(end_points[endpoint_name].get_shape().as_list(),
|
||||
expected_shape)
|
||||
|
||||
def testModelHasExpectedNumberOfParameters(self):
|
||||
batch_size = 5
|
||||
height, width = 224, 224
|
||||
inputs = tf.random_uniform((batch_size, height, width, 3))
|
||||
with slim.arg_scope(inception.inception_v1_arg_scope()):
|
||||
inception.inception_v1_base(inputs)
|
||||
total_params, _ = slim.model_analyzer.analyze_vars(
|
||||
slim.get_model_variables())
|
||||
self.assertAlmostEqual(5607184, total_params)
|
||||
|
||||
def testHalfSizeImages(self):
|
||||
batch_size = 5
|
||||
height, width = 112, 112
|
||||
|
||||
inputs = tf.random_uniform((batch_size, height, width, 3))
|
||||
mixed_5c, _ = inception.inception_v1_base(inputs)
|
||||
self.assertTrue(mixed_5c.op.name.startswith('InceptionV1/Mixed_5c'))
|
||||
self.assertListEqual(mixed_5c.get_shape().as_list(),
|
||||
[batch_size, 4, 4, 1024])
|
||||
|
||||
def testUnknownImageShape(self):
|
||||
tf.reset_default_graph()
|
||||
batch_size = 2
|
||||
height, width = 224, 224
|
||||
num_classes = 1000
|
||||
input_np = np.random.uniform(0, 1, (batch_size, height, width, 3))
|
||||
with self.test_session() as sess:
|
||||
inputs = tf.placeholder(tf.float32, shape=(batch_size, None, None, 3))
|
||||
logits, end_points = inception.inception_v1(inputs, num_classes)
|
||||
self.assertTrue(logits.op.name.startswith('InceptionV1/Logits'))
|
||||
self.assertListEqual(logits.get_shape().as_list(),
|
||||
[batch_size, num_classes])
|
||||
pre_pool = end_points['Mixed_5c']
|
||||
feed_dict = {inputs: input_np}
|
||||
tf.initialize_all_variables().run()
|
||||
pre_pool_out = sess.run(pre_pool, feed_dict=feed_dict)
|
||||
self.assertListEqual(list(pre_pool_out.shape), [batch_size, 7, 7, 1024])
|
||||
|
||||
def testUnknowBatchSize(self):
|
||||
batch_size = 1
|
||||
height, width = 224, 224
|
||||
num_classes = 1000
|
||||
|
||||
inputs = tf.placeholder(tf.float32, (None, height, width, 3))
|
||||
logits, _ = inception.inception_v1(inputs, num_classes)
|
||||
self.assertTrue(logits.op.name.startswith('InceptionV1/Logits'))
|
||||
self.assertListEqual(logits.get_shape().as_list(),
|
||||
[None, num_classes])
|
||||
images = tf.random_uniform((batch_size, height, width, 3))
|
||||
|
||||
with self.test_session() as sess:
|
||||
sess.run(tf.initialize_all_variables())
|
||||
output = sess.run(logits, {inputs: images.eval()})
|
||||
self.assertEquals(output.shape, (batch_size, num_classes))
|
||||
|
||||
def testEvaluation(self):
|
||||
batch_size = 2
|
||||
height, width = 224, 224
|
||||
num_classes = 1000
|
||||
|
||||
eval_inputs = tf.random_uniform((batch_size, height, width, 3))
|
||||
logits, _ = inception.inception_v1(eval_inputs, num_classes,
|
||||
is_training=False)
|
||||
predictions = tf.argmax(logits, 1)
|
||||
|
||||
with self.test_session() as sess:
|
||||
sess.run(tf.initialize_all_variables())
|
||||
output = sess.run(predictions)
|
||||
self.assertEquals(output.shape, (batch_size,))
|
||||
|
||||
def testTrainEvalWithReuse(self):
|
||||
train_batch_size = 5
|
||||
eval_batch_size = 2
|
||||
height, width = 224, 224
|
||||
num_classes = 1000
|
||||
|
||||
train_inputs = tf.random_uniform((train_batch_size, height, width, 3))
|
||||
inception.inception_v1(train_inputs, num_classes)
|
||||
eval_inputs = tf.random_uniform((eval_batch_size, height, width, 3))
|
||||
logits, _ = inception.inception_v1(eval_inputs, num_classes, reuse=True)
|
||||
predictions = tf.argmax(logits, 1)
|
||||
|
||||
with self.test_session() as sess:
|
||||
sess.run(tf.initialize_all_variables())
|
||||
output = sess.run(predictions)
|
||||
self.assertEquals(output.shape, (eval_batch_size,))
|
||||
|
||||
def testLogitsNotSqueezed(self):
|
||||
num_classes = 25
|
||||
images = tf.random_uniform([1, 224, 224, 3])
|
||||
logits, _ = inception.inception_v1(images,
|
||||
num_classes=num_classes,
|
||||
spatial_squeeze=False)
|
||||
|
||||
with self.test_session() as sess:
|
||||
tf.initialize_all_variables().run()
|
||||
logits_out = sess.run(logits)
|
||||
self.assertListEqual(list(logits_out.shape), [1, 1, 1, num_classes])
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
tf.test.main()
|
||||
@ -0,0 +1,520 @@
|
||||
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""Contains the definition for inception v2 classification network."""
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import tensorflow as tf
|
||||
|
||||
from nets import inception_utils
|
||||
|
||||
slim = tf.contrib.slim
|
||||
trunc_normal = lambda stddev: tf.truncated_normal_initializer(0.0, stddev)
|
||||
|
||||
|
||||
def inception_v2_base(inputs,
|
||||
final_endpoint='Mixed_5c',
|
||||
min_depth=16,
|
||||
depth_multiplier=1.0,
|
||||
scope=None):
|
||||
"""Inception v2 (6a2).
|
||||
|
||||
Constructs an Inception v2 network from inputs to the given final endpoint.
|
||||
This method can construct the network up to the layer inception(5b) as
|
||||
described in http://arxiv.org/abs/1502.03167.
|
||||
|
||||
Args:
|
||||
inputs: a tensor of shape [batch_size, height, width, channels].
|
||||
final_endpoint: specifies the endpoint to construct the network up to. It
|
||||
can be one of ['Conv2d_1a_7x7', 'MaxPool_2a_3x3', 'Conv2d_2b_1x1',
|
||||
'Conv2d_2c_3x3', 'MaxPool_3a_3x3', 'Mixed_3b', 'Mixed_3c', 'Mixed_4a',
|
||||
'Mixed_4b', 'Mixed_4c', 'Mixed_4d', 'Mixed_4e', 'Mixed_5a', 'Mixed_5b',
|
||||
'Mixed_5c'].
|
||||
min_depth: Minimum depth value (number of channels) for all convolution ops.
|
||||
Enforced when depth_multiplier < 1, and not an active constraint when
|
||||
depth_multiplier >= 1.
|
||||
depth_multiplier: Float multiplier for the depth (number of channels)
|
||||
for all convolution ops. The value must be greater than zero. Typical
|
||||
usage will be to set this value in (0, 1) to reduce the number of
|
||||
parameters or computation cost of the model.
|
||||
scope: Optional variable_scope.
|
||||
|
||||
Returns:
|
||||
tensor_out: output tensor corresponding to the final_endpoint.
|
||||
end_points: a set of activations for external use, for example summaries or
|
||||
losses.
|
||||
|
||||
Raises:
|
||||
ValueError: if final_endpoint is not set to one of the predefined values,
|
||||
or depth_multiplier <= 0
|
||||
"""
|
||||
|
||||
# end_points will collect relevant activations for external use, for example
|
||||
# summaries or losses.
|
||||
end_points = {}
|
||||
|
||||
# Used to find thinned depths for each layer.
|
||||
if depth_multiplier <= 0:
|
||||
raise ValueError('depth_multiplier is not greater than zero.')
|
||||
depth = lambda d: max(int(d * depth_multiplier), min_depth)
|
||||
|
||||
with tf.variable_scope(scope, 'InceptionV2', [inputs]):
|
||||
with slim.arg_scope(
|
||||
[slim.conv2d, slim.max_pool2d, slim.avg_pool2d, slim.separable_conv2d],
|
||||
stride=1, padding='SAME'):
|
||||
|
||||
# Note that sizes in the comments below assume an input spatial size of
|
||||
# 224x224, however, the inputs can be of any size greater 32x32.
|
||||
|
||||
# 224 x 224 x 3
|
||||
end_point = 'Conv2d_1a_7x7'
|
||||
# depthwise_multiplier here is different from depth_multiplier.
|
||||
# depthwise_multiplier determines the output channels of the initial
|
||||
# depthwise conv (see docs for tf.nn.separable_conv2d), while
|
||||
# depth_multiplier controls the # channels of the subsequent 1x1
|
||||
# convolution. Must have
|
||||
# in_channels * depthwise_multipler <= out_channels
|
||||
# so that the separable convolution is not overparameterized.
|
||||
depthwise_multiplier = min(int(depth(64) / 3), 8)
|
||||
net = slim.separable_conv2d(
|
||||
inputs, depth(64), [7, 7], depth_multiplier=depthwise_multiplier,
|
||||
stride=2, weights_initializer=trunc_normal(1.0),
|
||||
scope=end_point)
|
||||
end_points[end_point] = net
|
||||
if end_point == final_endpoint: return net, end_points
|
||||
# 112 x 112 x 64
|
||||
end_point = 'MaxPool_2a_3x3'
|
||||
net = slim.max_pool2d(net, [3, 3], scope=end_point, stride=2)
|
||||
end_points[end_point] = net
|
||||
if end_point == final_endpoint: return net, end_points
|
||||
# 56 x 56 x 64
|
||||
end_point = 'Conv2d_2b_1x1'
|
||||
net = slim.conv2d(net, depth(64), [1, 1], scope=end_point,
|
||||
weights_initializer=trunc_normal(0.1))
|
||||
end_points[end_point] = net
|
||||
if end_point == final_endpoint: return net, end_points
|
||||
# 56 x 56 x 64
|
||||
end_point = 'Conv2d_2c_3x3'
|
||||
net = slim.conv2d(net, depth(192), [3, 3], scope=end_point)
|
||||
end_points[end_point] = net
|
||||
if end_point == final_endpoint: return net, end_points
|
||||
# 56 x 56 x 192
|
||||
end_point = 'MaxPool_3a_3x3'
|
||||
net = slim.max_pool2d(net, [3, 3], scope=end_point, stride=2)
|
||||
end_points[end_point] = net
|
||||
if end_point == final_endpoint: return net, end_points
|
||||
# 28 x 28 x 192
|
||||
# Inception module.
|
||||
end_point = 'Mixed_3b'
|
||||
with tf.variable_scope(end_point):
|
||||
with tf.variable_scope('Branch_0'):
|
||||
branch_0 = slim.conv2d(net, depth(64), [1, 1], scope='Conv2d_0a_1x1')
|
||||
with tf.variable_scope('Branch_1'):
|
||||
branch_1 = slim.conv2d(
|
||||
net, depth(64), [1, 1],
|
||||
weights_initializer=trunc_normal(0.09),
|
||||
scope='Conv2d_0a_1x1')
|
||||
branch_1 = slim.conv2d(branch_1, depth(64), [3, 3],
|
||||
scope='Conv2d_0b_3x3')
|
||||
with tf.variable_scope('Branch_2'):
|
||||
branch_2 = slim.conv2d(
|
||||
net, depth(64), [1, 1],
|
||||
weights_initializer=trunc_normal(0.09),
|
||||
scope='Conv2d_0a_1x1')
|
||||
branch_2 = slim.conv2d(branch_2, depth(96), [3, 3],
|
||||
scope='Conv2d_0b_3x3')
|
||||
branch_2 = slim.conv2d(branch_2, depth(96), [3, 3],
|
||||
scope='Conv2d_0c_3x3')
|
||||
with tf.variable_scope('Branch_3'):
|
||||
branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
|
||||
branch_3 = slim.conv2d(
|
||||
branch_3, depth(32), [1, 1],
|
||||
weights_initializer=trunc_normal(0.1),
|
||||
scope='Conv2d_0b_1x1')
|
||||
net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3])
|
||||
end_points[end_point] = net
|
||||
if end_point == final_endpoint: return net, end_points
|
||||
# 28 x 28 x 256
|
||||
end_point = 'Mixed_3c'
|
||||
with tf.variable_scope(end_point):
|
||||
with tf.variable_scope('Branch_0'):
|
||||
branch_0 = slim.conv2d(net, depth(64), [1, 1], scope='Conv2d_0a_1x1')
|
||||
with tf.variable_scope('Branch_1'):
|
||||
branch_1 = slim.conv2d(
|
||||
net, depth(64), [1, 1],
|
||||
weights_initializer=trunc_normal(0.09),
|
||||
scope='Conv2d_0a_1x1')
|
||||
branch_1 = slim.conv2d(branch_1, depth(96), [3, 3],
|
||||
scope='Conv2d_0b_3x3')
|
||||
with tf.variable_scope('Branch_2'):
|
||||
branch_2 = slim.conv2d(
|
||||
net, depth(64), [1, 1],
|
||||
weights_initializer=trunc_normal(0.09),
|
||||
scope='Conv2d_0a_1x1')
|
||||
branch_2 = slim.conv2d(branch_2, depth(96), [3, 3],
|
||||
scope='Conv2d_0b_3x3')
|
||||
branch_2 = slim.conv2d(branch_2, depth(96), [3, 3],
|
||||
scope='Conv2d_0c_3x3')
|
||||
with tf.variable_scope('Branch_3'):
|
||||
branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
|
||||
branch_3 = slim.conv2d(
|
||||
branch_3, depth(64), [1, 1],
|
||||
weights_initializer=trunc_normal(0.1),
|
||||
scope='Conv2d_0b_1x1')
|
||||
net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3])
|
||||
end_points[end_point] = net
|
||||
if end_point == final_endpoint: return net, end_points
|
||||
# 28 x 28 x 320
|
||||
end_point = 'Mixed_4a'
|
||||
with tf.variable_scope(end_point):
|
||||
with tf.variable_scope('Branch_0'):
|
||||
branch_0 = slim.conv2d(
|
||||
net, depth(128), [1, 1],
|
||||
weights_initializer=trunc_normal(0.09),
|
||||
scope='Conv2d_0a_1x1')
|
||||
branch_0 = slim.conv2d(branch_0, depth(160), [3, 3], stride=2,
|
||||
scope='Conv2d_1a_3x3')
|
||||
with tf.variable_scope('Branch_1'):
|
||||
branch_1 = slim.conv2d(
|
||||
net, depth(64), [1, 1],
|
||||
weights_initializer=trunc_normal(0.09),
|
||||
scope='Conv2d_0a_1x1')
|
||||
branch_1 = slim.conv2d(
|
||||
branch_1, depth(96), [3, 3], scope='Conv2d_0b_3x3')
|
||||
branch_1 = slim.conv2d(
|
||||
branch_1, depth(96), [3, 3], stride=2, scope='Conv2d_1a_3x3')
|
||||
with tf.variable_scope('Branch_2'):
|
||||
branch_2 = slim.max_pool2d(
|
||||
net, [3, 3], stride=2, scope='MaxPool_1a_3x3')
|
||||
net = tf.concat(3, [branch_0, branch_1, branch_2])
|
||||
end_points[end_point] = net
|
||||
if end_point == final_endpoint: return net, end_points
|
||||
# 14 x 14 x 576
|
||||
end_point = 'Mixed_4b'
|
||||
with tf.variable_scope(end_point):
|
||||
with tf.variable_scope('Branch_0'):
|
||||
branch_0 = slim.conv2d(net, depth(224), [1, 1], scope='Conv2d_0a_1x1')
|
||||
with tf.variable_scope('Branch_1'):
|
||||
branch_1 = slim.conv2d(
|
||||
net, depth(64), [1, 1],
|
||||
weights_initializer=trunc_normal(0.09),
|
||||
scope='Conv2d_0a_1x1')
|
||||
branch_1 = slim.conv2d(
|
||||
branch_1, depth(96), [3, 3], scope='Conv2d_0b_3x3')
|
||||
with tf.variable_scope('Branch_2'):
|
||||
branch_2 = slim.conv2d(
|
||||
net, depth(96), [1, 1],
|
||||
weights_initializer=trunc_normal(0.09),
|
||||
scope='Conv2d_0a_1x1')
|
||||
branch_2 = slim.conv2d(branch_2, depth(128), [3, 3],
|
||||
scope='Conv2d_0b_3x3')
|
||||
branch_2 = slim.conv2d(branch_2, depth(128), [3, 3],
|
||||
scope='Conv2d_0c_3x3')
|
||||
with tf.variable_scope('Branch_3'):
|
||||
branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
|
||||
branch_3 = slim.conv2d(
|
||||
branch_3, depth(128), [1, 1],
|
||||
weights_initializer=trunc_normal(0.1),
|
||||
scope='Conv2d_0b_1x1')
|
||||
net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3])
|
||||
end_points[end_point] = net
|
||||
if end_point == final_endpoint: return net, end_points
|
||||
# 14 x 14 x 576
|
||||
end_point = 'Mixed_4c'
|
||||
with tf.variable_scope(end_point):
|
||||
with tf.variable_scope('Branch_0'):
|
||||
branch_0 = slim.conv2d(net, depth(192), [1, 1], scope='Conv2d_0a_1x1')
|
||||
with tf.variable_scope('Branch_1'):
|
||||
branch_1 = slim.conv2d(
|
||||
net, depth(96), [1, 1],
|
||||
weights_initializer=trunc_normal(0.09),
|
||||
scope='Conv2d_0a_1x1')
|
||||
branch_1 = slim.conv2d(branch_1, depth(128), [3, 3],
|
||||
scope='Conv2d_0b_3x3')
|
||||
with tf.variable_scope('Branch_2'):
|
||||
branch_2 = slim.conv2d(
|
||||
net, depth(96), [1, 1],
|
||||
weights_initializer=trunc_normal(0.09),
|
||||
scope='Conv2d_0a_1x1')
|
||||
branch_2 = slim.conv2d(branch_2, depth(128), [3, 3],
|
||||
scope='Conv2d_0b_3x3')
|
||||
branch_2 = slim.conv2d(branch_2, depth(128), [3, 3],
|
||||
scope='Conv2d_0c_3x3')
|
||||
with tf.variable_scope('Branch_3'):
|
||||
branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
|
||||
branch_3 = slim.conv2d(
|
||||
branch_3, depth(128), [1, 1],
|
||||
weights_initializer=trunc_normal(0.1),
|
||||
scope='Conv2d_0b_1x1')
|
||||
net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3])
|
||||
end_points[end_point] = net
|
||||
if end_point == final_endpoint: return net, end_points
|
||||
# 14 x 14 x 576
|
||||
end_point = 'Mixed_4d'
|
||||
with tf.variable_scope(end_point):
|
||||
with tf.variable_scope('Branch_0'):
|
||||
branch_0 = slim.conv2d(net, depth(160), [1, 1], scope='Conv2d_0a_1x1')
|
||||
with tf.variable_scope('Branch_1'):
|
||||
branch_1 = slim.conv2d(
|
||||
net, depth(128), [1, 1],
|
||||
weights_initializer=trunc_normal(0.09),
|
||||
scope='Conv2d_0a_1x1')
|
||||
branch_1 = slim.conv2d(branch_1, depth(160), [3, 3],
|
||||
scope='Conv2d_0b_3x3')
|
||||
with tf.variable_scope('Branch_2'):
|
||||
branch_2 = slim.conv2d(
|
||||
net, depth(128), [1, 1],
|
||||
weights_initializer=trunc_normal(0.09),
|
||||
scope='Conv2d_0a_1x1')
|
||||
branch_2 = slim.conv2d(branch_2, depth(160), [3, 3],
|
||||
scope='Conv2d_0b_3x3')
|
||||
branch_2 = slim.conv2d(branch_2, depth(160), [3, 3],
|
||||
scope='Conv2d_0c_3x3')
|
||||
with tf.variable_scope('Branch_3'):
|
||||
branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
|
||||
branch_3 = slim.conv2d(
|
||||
branch_3, depth(96), [1, 1],
|
||||
weights_initializer=trunc_normal(0.1),
|
||||
scope='Conv2d_0b_1x1')
|
||||
net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3])
|
||||
end_points[end_point] = net
|
||||
if end_point == final_endpoint: return net, end_points
|
||||
|
||||
# 14 x 14 x 576
|
||||
end_point = 'Mixed_4e'
|
||||
with tf.variable_scope(end_point):
|
||||
with tf.variable_scope('Branch_0'):
|
||||
branch_0 = slim.conv2d(net, depth(96), [1, 1], scope='Conv2d_0a_1x1')
|
||||
with tf.variable_scope('Branch_1'):
|
||||
branch_1 = slim.conv2d(
|
||||
net, depth(128), [1, 1],
|
||||
weights_initializer=trunc_normal(0.09),
|
||||
scope='Conv2d_0a_1x1')
|
||||
branch_1 = slim.conv2d(branch_1, depth(192), [3, 3],
|
||||
scope='Conv2d_0b_3x3')
|
||||
with tf.variable_scope('Branch_2'):
|
||||
branch_2 = slim.conv2d(
|
||||
net, depth(160), [1, 1],
|
||||
weights_initializer=trunc_normal(0.09),
|
||||
scope='Conv2d_0a_1x1')
|
||||
branch_2 = slim.conv2d(branch_2, depth(192), [3, 3],
|
||||
scope='Conv2d_0b_3x3')
|
||||
branch_2 = slim.conv2d(branch_2, depth(192), [3, 3],
|
||||
scope='Conv2d_0c_3x3')
|
||||
with tf.variable_scope('Branch_3'):
|
||||
branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
|
||||
branch_3 = slim.conv2d(
|
||||
branch_3, depth(96), [1, 1],
|
||||
weights_initializer=trunc_normal(0.1),
|
||||
scope='Conv2d_0b_1x1')
|
||||
net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3])
|
||||
end_points[end_point] = net
|
||||
if end_point == final_endpoint: return net, end_points
|
||||
# 14 x 14 x 576
|
||||
end_point = 'Mixed_5a'
|
||||
with tf.variable_scope(end_point):
|
||||
with tf.variable_scope('Branch_0'):
|
||||
branch_0 = slim.conv2d(
|
||||
net, depth(128), [1, 1],
|
||||
weights_initializer=trunc_normal(0.09),
|
||||
scope='Conv2d_0a_1x1')
|
||||
branch_0 = slim.conv2d(branch_0, depth(192), [3, 3], stride=2,
|
||||
scope='Conv2d_1a_3x3')
|
||||
with tf.variable_scope('Branch_1'):
|
||||
branch_1 = slim.conv2d(
|
||||
net, depth(192), [1, 1],
|
||||
weights_initializer=trunc_normal(0.09),
|
||||
scope='Conv2d_0a_1x1')
|
||||
branch_1 = slim.conv2d(branch_1, depth(256), [3, 3],
|
||||
scope='Conv2d_0b_3x3')
|
||||
branch_1 = slim.conv2d(branch_1, depth(256), [3, 3], stride=2,
|
||||
scope='Conv2d_1a_3x3')
|
||||
with tf.variable_scope('Branch_2'):
|
||||
branch_2 = slim.max_pool2d(net, [3, 3], stride=2,
|
||||
scope='MaxPool_1a_3x3')
|
||||
net = tf.concat(3, [branch_0, branch_1, branch_2])
|
||||
end_points[end_point] = net
|
||||
if end_point == final_endpoint: return net, end_points
|
||||
# 7 x 7 x 1024
|
||||
end_point = 'Mixed_5b'
|
||||
with tf.variable_scope(end_point):
|
||||
with tf.variable_scope('Branch_0'):
|
||||
branch_0 = slim.conv2d(net, depth(352), [1, 1], scope='Conv2d_0a_1x1')
|
||||
with tf.variable_scope('Branch_1'):
|
||||
branch_1 = slim.conv2d(
|
||||
net, depth(192), [1, 1],
|
||||
weights_initializer=trunc_normal(0.09),
|
||||
scope='Conv2d_0a_1x1')
|
||||
branch_1 = slim.conv2d(branch_1, depth(320), [3, 3],
|
||||
scope='Conv2d_0b_3x3')
|
||||
with tf.variable_scope('Branch_2'):
|
||||
branch_2 = slim.conv2d(
|
||||
net, depth(160), [1, 1],
|
||||
weights_initializer=trunc_normal(0.09),
|
||||
scope='Conv2d_0a_1x1')
|
||||
branch_2 = slim.conv2d(branch_2, depth(224), [3, 3],
|
||||
scope='Conv2d_0b_3x3')
|
||||
branch_2 = slim.conv2d(branch_2, depth(224), [3, 3],
|
||||
scope='Conv2d_0c_3x3')
|
||||
with tf.variable_scope('Branch_3'):
|
||||
branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
|
||||
branch_3 = slim.conv2d(
|
||||
branch_3, depth(128), [1, 1],
|
||||
weights_initializer=trunc_normal(0.1),
|
||||
scope='Conv2d_0b_1x1')
|
||||
net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3])
|
||||
end_points[end_point] = net
|
||||
if end_point == final_endpoint: return net, end_points
|
||||
|
||||
# 7 x 7 x 1024
|
||||
end_point = 'Mixed_5c'
|
||||
with tf.variable_scope(end_point):
|
||||
with tf.variable_scope('Branch_0'):
|
||||
branch_0 = slim.conv2d(net, depth(352), [1, 1], scope='Conv2d_0a_1x1')
|
||||
with tf.variable_scope('Branch_1'):
|
||||
branch_1 = slim.conv2d(
|
||||
net, depth(192), [1, 1],
|
||||
weights_initializer=trunc_normal(0.09),
|
||||
scope='Conv2d_0a_1x1')
|
||||
branch_1 = slim.conv2d(branch_1, depth(320), [3, 3],
|
||||
scope='Conv2d_0b_3x3')
|
||||
with tf.variable_scope('Branch_2'):
|
||||
branch_2 = slim.conv2d(
|
||||
net, depth(192), [1, 1],
|
||||
weights_initializer=trunc_normal(0.09),
|
||||
scope='Conv2d_0a_1x1')
|
||||
branch_2 = slim.conv2d(branch_2, depth(224), [3, 3],
|
||||
scope='Conv2d_0b_3x3')
|
||||
branch_2 = slim.conv2d(branch_2, depth(224), [3, 3],
|
||||
scope='Conv2d_0c_3x3')
|
||||
with tf.variable_scope('Branch_3'):
|
||||
branch_3 = slim.max_pool2d(net, [3, 3], scope='MaxPool_0a_3x3')
|
||||
branch_3 = slim.conv2d(
|
||||
branch_3, depth(128), [1, 1],
|
||||
weights_initializer=trunc_normal(0.1),
|
||||
scope='Conv2d_0b_1x1')
|
||||
net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3])
|
||||
end_points[end_point] = net
|
||||
if end_point == final_endpoint: return net, end_points
|
||||
raise ValueError('Unknown final endpoint %s' % final_endpoint)
|
||||
|
||||
|
||||
def inception_v2(inputs,
|
||||
num_classes=1000,
|
||||
is_training=True,
|
||||
dropout_keep_prob=0.8,
|
||||
min_depth=16,
|
||||
depth_multiplier=1.0,
|
||||
prediction_fn=slim.softmax,
|
||||
spatial_squeeze=True,
|
||||
reuse=None,
|
||||
scope='InceptionV2'):
|
||||
"""Inception v2 model for classification.
|
||||
|
||||
Constructs an Inception v2 network for classification as described in
|
||||
http://arxiv.org/abs/1502.03167.
|
||||
|
||||
The default image size used to train this network is 224x224.
|
||||
|
||||
Args:
|
||||
inputs: a tensor of shape [batch_size, height, width, channels].
|
||||
num_classes: number of predicted classes.
|
||||
is_training: whether is training or not.
|
||||
dropout_keep_prob: the percentage of activation values that are retained.
|
||||
min_depth: Minimum depth value (number of channels) for all convolution ops.
|
||||
Enforced when depth_multiplier < 1, and not an active constraint when
|
||||
depth_multiplier >= 1.
|
||||
depth_multiplier: Float multiplier for the depth (number of channels)
|
||||
for all convolution ops. The value must be greater than zero. Typical
|
||||
usage will be to set this value in (0, 1) to reduce the number of
|
||||
parameters or computation cost of the model.
|
||||
prediction_fn: a function to get predictions out of logits.
|
||||
spatial_squeeze: if True, logits is of shape is [B, C], if false logits is
|
||||
of shape [B, 1, 1, C], where B is batch_size and C is number of classes.
|
||||
reuse: whether or not the network and its variables should be reused. To be
|
||||
able to reuse 'scope' must be given.
|
||||
scope: Optional variable_scope.
|
||||
|
||||
Returns:
|
||||
logits: the pre-softmax activations, a tensor of size
|
||||
[batch_size, num_classes]
|
||||
end_points: a dictionary from components of the network to the corresponding
|
||||
activation.
|
||||
|
||||
Raises:
|
||||
ValueError: if final_endpoint is not set to one of the predefined values,
|
||||
or depth_multiplier <= 0
|
||||
"""
|
||||
if depth_multiplier <= 0:
|
||||
raise ValueError('depth_multiplier is not greater than zero.')
|
||||
|
||||
# Final pooling and prediction
|
||||
with tf.variable_scope(scope, 'InceptionV2', [inputs, num_classes],
|
||||
reuse=reuse) as scope:
|
||||
with slim.arg_scope([slim.batch_norm, slim.dropout],
|
||||
is_training=is_training):
|
||||
net, end_points = inception_v2_base(
|
||||
inputs, scope=scope, min_depth=min_depth,
|
||||
depth_multiplier=depth_multiplier)
|
||||
with tf.variable_scope('Logits'):
|
||||
kernel_size = _reduced_kernel_size_for_small_input(net, [7, 7])
|
||||
net = slim.avg_pool2d(net, kernel_size, padding='VALID',
|
||||
scope='AvgPool_1a_{}x{}'.format(*kernel_size))
|
||||
# 1 x 1 x 1024
|
||||
net = slim.dropout(net, keep_prob=dropout_keep_prob, scope='Dropout_1b')
|
||||
logits = slim.conv2d(net, num_classes, [1, 1], activation_fn=None,
|
||||
normalizer_fn=None, scope='Conv2d_1c_1x1')
|
||||
if spatial_squeeze:
|
||||
logits = tf.squeeze(logits, [1, 2], name='SpatialSqueeze')
|
||||
end_points['Logits'] = logits
|
||||
end_points['Predictions'] = prediction_fn(logits, scope='Predictions')
|
||||
return logits, end_points
|
||||
inception_v2.default_image_size = 224
|
||||
|
||||
|
||||
def _reduced_kernel_size_for_small_input(input_tensor, kernel_size):
|
||||
"""Define kernel size which is automatically reduced for small input.
|
||||
|
||||
If the shape of the input images is unknown at graph construction time this
|
||||
function assumes that the input images are is large enough.
|
||||
|
||||
Args:
|
||||
input_tensor: input tensor of size [batch_size, height, width, channels].
|
||||
kernel_size: desired kernel size of length 2: [kernel_height, kernel_width]
|
||||
|
||||
Returns:
|
||||
a tensor with the kernel size.
|
||||
|
||||
TODO(jrru): Make this function work with unknown shapes. Theoretically, this
|
||||
can be done with the code below. Problems are two-fold: (1) If the shape was
|
||||
known, it will be lost. (2) inception.slim.ops._two_element_tuple cannot
|
||||
handle tensors that define the kernel size.
|
||||
shape = tf.shape(input_tensor)
|
||||
return = tf.pack([tf.minimum(shape[1], kernel_size[0]),
|
||||
tf.minimum(shape[2], kernel_size[1])])
|
||||
|
||||
"""
|
||||
shape = input_tensor.get_shape().as_list()
|
||||
if shape[1] is None or shape[2] is None:
|
||||
kernel_size_out = kernel_size
|
||||
else:
|
||||
kernel_size_out = [min(shape[1], kernel_size[0]),
|
||||
min(shape[2], kernel_size[1])]
|
||||
return kernel_size_out
|
||||
|
||||
|
||||
inception_v2_arg_scope = inception_utils.inception_arg_scope
|
||||
@ -0,0 +1,262 @@
|
||||
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""Tests for nets.inception_v2."""
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import numpy as np
|
||||
import tensorflow as tf
|
||||
|
||||
from nets import inception
|
||||
|
||||
slim = tf.contrib.slim
|
||||
|
||||
|
||||
class InceptionV2Test(tf.test.TestCase):
|
||||
|
||||
def testBuildClassificationNetwork(self):
|
||||
batch_size = 5
|
||||
height, width = 224, 224
|
||||
num_classes = 1000
|
||||
|
||||
inputs = tf.random_uniform((batch_size, height, width, 3))
|
||||
logits, end_points = inception.inception_v2(inputs, num_classes)
|
||||
self.assertTrue(logits.op.name.startswith('InceptionV2/Logits'))
|
||||
self.assertListEqual(logits.get_shape().as_list(),
|
||||
[batch_size, num_classes])
|
||||
self.assertTrue('Predictions' in end_points)
|
||||
self.assertListEqual(end_points['Predictions'].get_shape().as_list(),
|
||||
[batch_size, num_classes])
|
||||
|
||||
def testBuildBaseNetwork(self):
|
||||
batch_size = 5
|
||||
height, width = 224, 224
|
||||
|
||||
inputs = tf.random_uniform((batch_size, height, width, 3))
|
||||
mixed_5c, end_points = inception.inception_v2_base(inputs)
|
||||
self.assertTrue(mixed_5c.op.name.startswith('InceptionV2/Mixed_5c'))
|
||||
self.assertListEqual(mixed_5c.get_shape().as_list(),
|
||||
[batch_size, 7, 7, 1024])
|
||||
expected_endpoints = ['Mixed_3b', 'Mixed_3c', 'Mixed_4a', 'Mixed_4b',
|
||||
'Mixed_4c', 'Mixed_4d', 'Mixed_4e', 'Mixed_5a',
|
||||
'Mixed_5b', 'Mixed_5c', 'Conv2d_1a_7x7',
|
||||
'MaxPool_2a_3x3', 'Conv2d_2b_1x1', 'Conv2d_2c_3x3',
|
||||
'MaxPool_3a_3x3']
|
||||
self.assertItemsEqual(end_points.keys(), expected_endpoints)
|
||||
|
||||
def testBuildOnlyUptoFinalEndpoint(self):
|
||||
batch_size = 5
|
||||
height, width = 224, 224
|
||||
endpoints = ['Conv2d_1a_7x7', 'MaxPool_2a_3x3', 'Conv2d_2b_1x1',
|
||||
'Conv2d_2c_3x3', 'MaxPool_3a_3x3', 'Mixed_3b', 'Mixed_3c',
|
||||
'Mixed_4a', 'Mixed_4b', 'Mixed_4c', 'Mixed_4d', 'Mixed_4e',
|
||||
'Mixed_5a', 'Mixed_5b', 'Mixed_5c']
|
||||
for index, endpoint in enumerate(endpoints):
|
||||
with tf.Graph().as_default():
|
||||
inputs = tf.random_uniform((batch_size, height, width, 3))
|
||||
out_tensor, end_points = inception.inception_v2_base(
|
||||
inputs, final_endpoint=endpoint)
|
||||
self.assertTrue(out_tensor.op.name.startswith(
|
||||
'InceptionV2/' + endpoint))
|
||||
self.assertItemsEqual(endpoints[:index+1], end_points)
|
||||
|
||||
def testBuildAndCheckAllEndPointsUptoMixed5c(self):
|
||||
batch_size = 5
|
||||
height, width = 224, 224
|
||||
|
||||
inputs = tf.random_uniform((batch_size, height, width, 3))
|
||||
_, end_points = inception.inception_v2_base(inputs,
|
||||
final_endpoint='Mixed_5c')
|
||||
endpoints_shapes = {'Mixed_3b': [batch_size, 28, 28, 256],
|
||||
'Mixed_3c': [batch_size, 28, 28, 320],
|
||||
'Mixed_4a': [batch_size, 14, 14, 576],
|
||||
'Mixed_4b': [batch_size, 14, 14, 576],
|
||||
'Mixed_4c': [batch_size, 14, 14, 576],
|
||||
'Mixed_4d': [batch_size, 14, 14, 576],
|
||||
'Mixed_4e': [batch_size, 14, 14, 576],
|
||||
'Mixed_5a': [batch_size, 7, 7, 1024],
|
||||
'Mixed_5b': [batch_size, 7, 7, 1024],
|
||||
'Mixed_5c': [batch_size, 7, 7, 1024],
|
||||
'Conv2d_1a_7x7': [batch_size, 112, 112, 64],
|
||||
'MaxPool_2a_3x3': [batch_size, 56, 56, 64],
|
||||
'Conv2d_2b_1x1': [batch_size, 56, 56, 64],
|
||||
'Conv2d_2c_3x3': [batch_size, 56, 56, 192],
|
||||
'MaxPool_3a_3x3': [batch_size, 28, 28, 192]}
|
||||
self.assertItemsEqual(endpoints_shapes.keys(), end_points.keys())
|
||||
for endpoint_name in endpoints_shapes:
|
||||
expected_shape = endpoints_shapes[endpoint_name]
|
||||
self.assertTrue(endpoint_name in end_points)
|
||||
self.assertListEqual(end_points[endpoint_name].get_shape().as_list(),
|
||||
expected_shape)
|
||||
|
||||
def testModelHasExpectedNumberOfParameters(self):
|
||||
batch_size = 5
|
||||
height, width = 224, 224
|
||||
inputs = tf.random_uniform((batch_size, height, width, 3))
|
||||
with slim.arg_scope(inception.inception_v2_arg_scope()):
|
||||
inception.inception_v2_base(inputs)
|
||||
total_params, _ = slim.model_analyzer.analyze_vars(
|
||||
slim.get_model_variables())
|
||||
self.assertAlmostEqual(10173112, total_params)
|
||||
|
||||
def testBuildEndPointsWithDepthMultiplierLessThanOne(self):
|
||||
batch_size = 5
|
||||
height, width = 224, 224
|
||||
num_classes = 1000
|
||||
|
||||
inputs = tf.random_uniform((batch_size, height, width, 3))
|
||||
_, end_points = inception.inception_v2(inputs, num_classes)
|
||||
|
||||
endpoint_keys = [key for key in end_points.keys()
|
||||
if key.startswith('Mixed') or key.startswith('Conv')]
|
||||
|
||||
_, end_points_with_multiplier = inception.inception_v2(
|
||||
inputs, num_classes, scope='depth_multiplied_net',
|
||||
depth_multiplier=0.5)
|
||||
|
||||
for key in endpoint_keys:
|
||||
original_depth = end_points[key].get_shape().as_list()[3]
|
||||
new_depth = end_points_with_multiplier[key].get_shape().as_list()[3]
|
||||
self.assertEqual(0.5 * original_depth, new_depth)
|
||||
|
||||
def testBuildEndPointsWithDepthMultiplierGreaterThanOne(self):
|
||||
batch_size = 5
|
||||
height, width = 224, 224
|
||||
num_classes = 1000
|
||||
|
||||
inputs = tf.random_uniform((batch_size, height, width, 3))
|
||||
_, end_points = inception.inception_v2(inputs, num_classes)
|
||||
|
||||
endpoint_keys = [key for key in end_points.keys()
|
||||
if key.startswith('Mixed') or key.startswith('Conv')]
|
||||
|
||||
_, end_points_with_multiplier = inception.inception_v2(
|
||||
inputs, num_classes, scope='depth_multiplied_net',
|
||||
depth_multiplier=2.0)
|
||||
|
||||
for key in endpoint_keys:
|
||||
original_depth = end_points[key].get_shape().as_list()[3]
|
||||
new_depth = end_points_with_multiplier[key].get_shape().as_list()[3]
|
||||
self.assertEqual(2.0 * original_depth, new_depth)
|
||||
|
||||
def testRaiseValueErrorWithInvalidDepthMultiplier(self):
|
||||
batch_size = 5
|
||||
height, width = 224, 224
|
||||
num_classes = 1000
|
||||
|
||||
inputs = tf.random_uniform((batch_size, height, width, 3))
|
||||
with self.assertRaises(ValueError):
|
||||
_ = inception.inception_v2(inputs, num_classes, depth_multiplier=-0.1)
|
||||
with self.assertRaises(ValueError):
|
||||
_ = inception.inception_v2(inputs, num_classes, depth_multiplier=0.0)
|
||||
|
||||
def testHalfSizeImages(self):
|
||||
batch_size = 5
|
||||
height, width = 112, 112
|
||||
num_classes = 1000
|
||||
|
||||
inputs = tf.random_uniform((batch_size, height, width, 3))
|
||||
logits, end_points = inception.inception_v2(inputs, num_classes)
|
||||
self.assertTrue(logits.op.name.startswith('InceptionV2/Logits'))
|
||||
self.assertListEqual(logits.get_shape().as_list(),
|
||||
[batch_size, num_classes])
|
||||
pre_pool = end_points['Mixed_5c']
|
||||
self.assertListEqual(pre_pool.get_shape().as_list(),
|
||||
[batch_size, 4, 4, 1024])
|
||||
|
||||
def testUnknownImageShape(self):
|
||||
tf.reset_default_graph()
|
||||
batch_size = 2
|
||||
height, width = 224, 224
|
||||
num_classes = 1000
|
||||
input_np = np.random.uniform(0, 1, (batch_size, height, width, 3))
|
||||
with self.test_session() as sess:
|
||||
inputs = tf.placeholder(tf.float32, shape=(batch_size, None, None, 3))
|
||||
logits, end_points = inception.inception_v2(inputs, num_classes)
|
||||
self.assertTrue(logits.op.name.startswith('InceptionV2/Logits'))
|
||||
self.assertListEqual(logits.get_shape().as_list(),
|
||||
[batch_size, num_classes])
|
||||
pre_pool = end_points['Mixed_5c']
|
||||
feed_dict = {inputs: input_np}
|
||||
tf.initialize_all_variables().run()
|
||||
pre_pool_out = sess.run(pre_pool, feed_dict=feed_dict)
|
||||
self.assertListEqual(list(pre_pool_out.shape), [batch_size, 7, 7, 1024])
|
||||
|
||||
def testUnknowBatchSize(self):
|
||||
batch_size = 1
|
||||
height, width = 224, 224
|
||||
num_classes = 1000
|
||||
|
||||
inputs = tf.placeholder(tf.float32, (None, height, width, 3))
|
||||
logits, _ = inception.inception_v2(inputs, num_classes)
|
||||
self.assertTrue(logits.op.name.startswith('InceptionV2/Logits'))
|
||||
self.assertListEqual(logits.get_shape().as_list(),
|
||||
[None, num_classes])
|
||||
images = tf.random_uniform((batch_size, height, width, 3))
|
||||
|
||||
with self.test_session() as sess:
|
||||
sess.run(tf.initialize_all_variables())
|
||||
output = sess.run(logits, {inputs: images.eval()})
|
||||
self.assertEquals(output.shape, (batch_size, num_classes))
|
||||
|
||||
def testEvaluation(self):
|
||||
batch_size = 2
|
||||
height, width = 224, 224
|
||||
num_classes = 1000
|
||||
|
||||
eval_inputs = tf.random_uniform((batch_size, height, width, 3))
|
||||
logits, _ = inception.inception_v2(eval_inputs, num_classes,
|
||||
is_training=False)
|
||||
predictions = tf.argmax(logits, 1)
|
||||
|
||||
with self.test_session() as sess:
|
||||
sess.run(tf.initialize_all_variables())
|
||||
output = sess.run(predictions)
|
||||
self.assertEquals(output.shape, (batch_size,))
|
||||
|
||||
def testTrainEvalWithReuse(self):
|
||||
train_batch_size = 5
|
||||
eval_batch_size = 2
|
||||
height, width = 150, 150
|
||||
num_classes = 1000
|
||||
|
||||
train_inputs = tf.random_uniform((train_batch_size, height, width, 3))
|
||||
inception.inception_v2(train_inputs, num_classes)
|
||||
eval_inputs = tf.random_uniform((eval_batch_size, height, width, 3))
|
||||
logits, _ = inception.inception_v2(eval_inputs, num_classes, reuse=True)
|
||||
predictions = tf.argmax(logits, 1)
|
||||
|
||||
with self.test_session() as sess:
|
||||
sess.run(tf.initialize_all_variables())
|
||||
output = sess.run(predictions)
|
||||
self.assertEquals(output.shape, (eval_batch_size,))
|
||||
|
||||
def testLogitsNotSqueezed(self):
|
||||
num_classes = 25
|
||||
images = tf.random_uniform([1, 224, 224, 3])
|
||||
logits, _ = inception.inception_v2(images,
|
||||
num_classes=num_classes,
|
||||
spatial_squeeze=False)
|
||||
|
||||
with self.test_session() as sess:
|
||||
tf.initialize_all_variables().run()
|
||||
logits_out = sess.run(logits)
|
||||
self.assertListEqual(list(logits_out.shape), [1, 1, 1, num_classes])
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
tf.test.main()
|
||||
@ -0,0 +1,560 @@
|
||||
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""Contains the definition for inception v3 classification network."""
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import tensorflow as tf
|
||||
|
||||
from nets import inception_utils
|
||||
|
||||
slim = tf.contrib.slim
|
||||
trunc_normal = lambda stddev: tf.truncated_normal_initializer(0.0, stddev)
|
||||
|
||||
|
||||
def inception_v3_base(inputs,
|
||||
final_endpoint='Mixed_7c',
|
||||
min_depth=16,
|
||||
depth_multiplier=1.0,
|
||||
scope=None):
|
||||
"""Inception model from http://arxiv.org/abs/1512.00567.
|
||||
|
||||
Constructs an Inception v3 network from inputs to the given final endpoint.
|
||||
This method can construct the network up to the final inception block
|
||||
Mixed_7c.
|
||||
|
||||
Note that the names of the layers in the paper do not correspond to the names
|
||||
of the endpoints registered by this function although they build the same
|
||||
network.
|
||||
|
||||
Here is a mapping from the old_names to the new names:
|
||||
Old name | New name
|
||||
=======================================
|
||||
conv0 | Conv2d_1a_3x3
|
||||
conv1 | Conv2d_2a_3x3
|
||||
conv2 | Conv2d_2b_3x3
|
||||
pool1 | MaxPool_3a_3x3
|
||||
conv3 | Conv2d_3b_1x1
|
||||
conv4 | Conv2d_4a_3x3
|
||||
pool2 | MaxPool_5a_3x3
|
||||
mixed_35x35x256a | Mixed_5b
|
||||
mixed_35x35x288a | Mixed_5c
|
||||
mixed_35x35x288b | Mixed_5d
|
||||
mixed_17x17x768a | Mixed_6a
|
||||
mixed_17x17x768b | Mixed_6b
|
||||
mixed_17x17x768c | Mixed_6c
|
||||
mixed_17x17x768d | Mixed_6d
|
||||
mixed_17x17x768e | Mixed_6e
|
||||
mixed_8x8x1280a | Mixed_7a
|
||||
mixed_8x8x2048a | Mixed_7b
|
||||
mixed_8x8x2048b | Mixed_7c
|
||||
|
||||
Args:
|
||||
inputs: a tensor of size [batch_size, height, width, channels].
|
||||
final_endpoint: specifies the endpoint to construct the network up to. It
|
||||
can be one of ['Conv2d_1a_3x3', 'Conv2d_2a_3x3', 'Conv2d_2b_3x3',
|
||||
'MaxPool_3a_3x3', 'Conv2d_3b_1x1', 'Conv2d_4a_3x3', 'MaxPool_5a_3x3',
|
||||
'Mixed_5b', 'Mixed_5c', 'Mixed_5d', 'Mixed_6a', 'Mixed_6b', 'Mixed_6c',
|
||||
'Mixed_6d', 'Mixed_6e', 'Mixed_7a', 'Mixed_7b', 'Mixed_7c'].
|
||||
min_depth: Minimum depth value (number of channels) for all convolution ops.
|
||||
Enforced when depth_multiplier < 1, and not an active constraint when
|
||||
depth_multiplier >= 1.
|
||||
depth_multiplier: Float multiplier for the depth (number of channels)
|
||||
for all convolution ops. The value must be greater than zero. Typical
|
||||
usage will be to set this value in (0, 1) to reduce the number of
|
||||
parameters or computation cost of the model.
|
||||
scope: Optional variable_scope.
|
||||
|
||||
Returns:
|
||||
tensor_out: output tensor corresponding to the final_endpoint.
|
||||
end_points: a set of activations for external use, for example summaries or
|
||||
losses.
|
||||
|
||||
Raises:
|
||||
ValueError: if final_endpoint is not set to one of the predefined values,
|
||||
or depth_multiplier <= 0
|
||||
"""
|
||||
# end_points will collect relevant activations for external use, for example
|
||||
# summaries or losses.
|
||||
end_points = {}
|
||||
|
||||
if depth_multiplier <= 0:
|
||||
raise ValueError('depth_multiplier is not greater than zero.')
|
||||
depth = lambda d: max(int(d * depth_multiplier), min_depth)
|
||||
|
||||
with tf.variable_scope(scope, 'InceptionV3', [inputs]):
|
||||
with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d],
|
||||
stride=1, padding='VALID'):
|
||||
# 299 x 299 x 3
|
||||
end_point = 'Conv2d_1a_3x3'
|
||||
net = slim.conv2d(inputs, depth(32), [3, 3], stride=2, scope=end_point)
|
||||
end_points[end_point] = net
|
||||
if end_point == final_endpoint: return net, end_points
|
||||
# 149 x 149 x 32
|
||||
end_point = 'Conv2d_2a_3x3'
|
||||
net = slim.conv2d(net, depth(32), [3, 3], scope=end_point)
|
||||
end_points[end_point] = net
|
||||
if end_point == final_endpoint: return net, end_points
|
||||
# 147 x 147 x 32
|
||||
end_point = 'Conv2d_2b_3x3'
|
||||
net = slim.conv2d(net, depth(64), [3, 3], padding='SAME', scope=end_point)
|
||||
end_points[end_point] = net
|
||||
if end_point == final_endpoint: return net, end_points
|
||||
# 147 x 147 x 64
|
||||
end_point = 'MaxPool_3a_3x3'
|
||||
net = slim.max_pool2d(net, [3, 3], stride=2, scope=end_point)
|
||||
end_points[end_point] = net
|
||||
if end_point == final_endpoint: return net, end_points
|
||||
# 73 x 73 x 64
|
||||
end_point = 'Conv2d_3b_1x1'
|
||||
net = slim.conv2d(net, depth(80), [1, 1], scope=end_point)
|
||||
end_points[end_point] = net
|
||||
if end_point == final_endpoint: return net, end_points
|
||||
# 73 x 73 x 80.
|
||||
end_point = 'Conv2d_4a_3x3'
|
||||
net = slim.conv2d(net, depth(192), [3, 3], scope=end_point)
|
||||
end_points[end_point] = net
|
||||
if end_point == final_endpoint: return net, end_points
|
||||
# 71 x 71 x 192.
|
||||
end_point = 'MaxPool_5a_3x3'
|
||||
net = slim.max_pool2d(net, [3, 3], stride=2, scope=end_point)
|
||||
end_points[end_point] = net
|
||||
if end_point == final_endpoint: return net, end_points
|
||||
# 35 x 35 x 192.
|
||||
|
||||
# Inception blocks
|
||||
with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d],
|
||||
stride=1, padding='SAME'):
|
||||
# mixed: 35 x 35 x 256.
|
||||
end_point = 'Mixed_5b'
|
||||
with tf.variable_scope(end_point):
|
||||
with tf.variable_scope('Branch_0'):
|
||||
branch_0 = slim.conv2d(net, depth(64), [1, 1], scope='Conv2d_0a_1x1')
|
||||
with tf.variable_scope('Branch_1'):
|
||||
branch_1 = slim.conv2d(net, depth(48), [1, 1], scope='Conv2d_0a_1x1')
|
||||
branch_1 = slim.conv2d(branch_1, depth(64), [5, 5],
|
||||
scope='Conv2d_0b_5x5')
|
||||
with tf.variable_scope('Branch_2'):
|
||||
branch_2 = slim.conv2d(net, depth(64), [1, 1], scope='Conv2d_0a_1x1')
|
||||
branch_2 = slim.conv2d(branch_2, depth(96), [3, 3],
|
||||
scope='Conv2d_0b_3x3')
|
||||
branch_2 = slim.conv2d(branch_2, depth(96), [3, 3],
|
||||
scope='Conv2d_0c_3x3')
|
||||
with tf.variable_scope('Branch_3'):
|
||||
branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
|
||||
branch_3 = slim.conv2d(branch_3, depth(32), [1, 1],
|
||||
scope='Conv2d_0b_1x1')
|
||||
net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3])
|
||||
end_points[end_point] = net
|
||||
if end_point == final_endpoint: return net, end_points
|
||||
|
||||
# mixed_1: 35 x 35 x 288.
|
||||
end_point = 'Mixed_5c'
|
||||
with tf.variable_scope(end_point):
|
||||
with tf.variable_scope('Branch_0'):
|
||||
branch_0 = slim.conv2d(net, depth(64), [1, 1], scope='Conv2d_0a_1x1')
|
||||
with tf.variable_scope('Branch_1'):
|
||||
branch_1 = slim.conv2d(net, depth(48), [1, 1], scope='Conv2d_0b_1x1')
|
||||
branch_1 = slim.conv2d(branch_1, depth(64), [5, 5],
|
||||
scope='Conv_1_0c_5x5')
|
||||
with tf.variable_scope('Branch_2'):
|
||||
branch_2 = slim.conv2d(net, depth(64), [1, 1],
|
||||
scope='Conv2d_0a_1x1')
|
||||
branch_2 = slim.conv2d(branch_2, depth(96), [3, 3],
|
||||
scope='Conv2d_0b_3x3')
|
||||
branch_2 = slim.conv2d(branch_2, depth(96), [3, 3],
|
||||
scope='Conv2d_0c_3x3')
|
||||
with tf.variable_scope('Branch_3'):
|
||||
branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
|
||||
branch_3 = slim.conv2d(branch_3, depth(64), [1, 1],
|
||||
scope='Conv2d_0b_1x1')
|
||||
net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3])
|
||||
end_points[end_point] = net
|
||||
if end_point == final_endpoint: return net, end_points
|
||||
|
||||
# mixed_2: 35 x 35 x 288.
|
||||
end_point = 'Mixed_5d'
|
||||
with tf.variable_scope(end_point):
|
||||
with tf.variable_scope('Branch_0'):
|
||||
branch_0 = slim.conv2d(net, depth(64), [1, 1], scope='Conv2d_0a_1x1')
|
||||
with tf.variable_scope('Branch_1'):
|
||||
branch_1 = slim.conv2d(net, depth(48), [1, 1], scope='Conv2d_0a_1x1')
|
||||
branch_1 = slim.conv2d(branch_1, depth(64), [5, 5],
|
||||
scope='Conv2d_0b_5x5')
|
||||
with tf.variable_scope('Branch_2'):
|
||||
branch_2 = slim.conv2d(net, depth(64), [1, 1], scope='Conv2d_0a_1x1')
|
||||
branch_2 = slim.conv2d(branch_2, depth(96), [3, 3],
|
||||
scope='Conv2d_0b_3x3')
|
||||
branch_2 = slim.conv2d(branch_2, depth(96), [3, 3],
|
||||
scope='Conv2d_0c_3x3')
|
||||
with tf.variable_scope('Branch_3'):
|
||||
branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
|
||||
branch_3 = slim.conv2d(branch_3, depth(64), [1, 1],
|
||||
scope='Conv2d_0b_1x1')
|
||||
net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3])
|
||||
end_points[end_point] = net
|
||||
if end_point == final_endpoint: return net, end_points
|
||||
|
||||
# mixed_3: 17 x 17 x 768.
|
||||
end_point = 'Mixed_6a'
|
||||
with tf.variable_scope(end_point):
|
||||
with tf.variable_scope('Branch_0'):
|
||||
branch_0 = slim.conv2d(net, depth(384), [3, 3], stride=2,
|
||||
padding='VALID', scope='Conv2d_1a_1x1')
|
||||
with tf.variable_scope('Branch_1'):
|
||||
branch_1 = slim.conv2d(net, depth(64), [1, 1], scope='Conv2d_0a_1x1')
|
||||
branch_1 = slim.conv2d(branch_1, depth(96), [3, 3],
|
||||
scope='Conv2d_0b_3x3')
|
||||
branch_1 = slim.conv2d(branch_1, depth(96), [3, 3], stride=2,
|
||||
padding='VALID', scope='Conv2d_1a_1x1')
|
||||
with tf.variable_scope('Branch_2'):
|
||||
branch_2 = slim.max_pool2d(net, [3, 3], stride=2, padding='VALID',
|
||||
scope='MaxPool_1a_3x3')
|
||||
net = tf.concat(3, [branch_0, branch_1, branch_2])
|
||||
end_points[end_point] = net
|
||||
if end_point == final_endpoint: return net, end_points
|
||||
|
||||
# mixed4: 17 x 17 x 768.
|
||||
end_point = 'Mixed_6b'
|
||||
with tf.variable_scope(end_point):
|
||||
with tf.variable_scope('Branch_0'):
|
||||
branch_0 = slim.conv2d(net, depth(192), [1, 1], scope='Conv2d_0a_1x1')
|
||||
with tf.variable_scope('Branch_1'):
|
||||
branch_1 = slim.conv2d(net, depth(128), [1, 1], scope='Conv2d_0a_1x1')
|
||||
branch_1 = slim.conv2d(branch_1, depth(128), [1, 7],
|
||||
scope='Conv2d_0b_1x7')
|
||||
branch_1 = slim.conv2d(branch_1, depth(192), [7, 1],
|
||||
scope='Conv2d_0c_7x1')
|
||||
with tf.variable_scope('Branch_2'):
|
||||
branch_2 = slim.conv2d(net, depth(128), [1, 1], scope='Conv2d_0a_1x1')
|
||||
branch_2 = slim.conv2d(branch_2, depth(128), [7, 1],
|
||||
scope='Conv2d_0b_7x1')
|
||||
branch_2 = slim.conv2d(branch_2, depth(128), [1, 7],
|
||||
scope='Conv2d_0c_1x7')
|
||||
branch_2 = slim.conv2d(branch_2, depth(128), [7, 1],
|
||||
scope='Conv2d_0d_7x1')
|
||||
branch_2 = slim.conv2d(branch_2, depth(192), [1, 7],
|
||||
scope='Conv2d_0e_1x7')
|
||||
with tf.variable_scope('Branch_3'):
|
||||
branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
|
||||
branch_3 = slim.conv2d(branch_3, depth(192), [1, 1],
|
||||
scope='Conv2d_0b_1x1')
|
||||
net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3])
|
||||
end_points[end_point] = net
|
||||
if end_point == final_endpoint: return net, end_points
|
||||
|
||||
# mixed_5: 17 x 17 x 768.
|
||||
end_point = 'Mixed_6c'
|
||||
with tf.variable_scope(end_point):
|
||||
with tf.variable_scope('Branch_0'):
|
||||
branch_0 = slim.conv2d(net, depth(192), [1, 1], scope='Conv2d_0a_1x1')
|
||||
with tf.variable_scope('Branch_1'):
|
||||
branch_1 = slim.conv2d(net, depth(160), [1, 1], scope='Conv2d_0a_1x1')
|
||||
branch_1 = slim.conv2d(branch_1, depth(160), [1, 7],
|
||||
scope='Conv2d_0b_1x7')
|
||||
branch_1 = slim.conv2d(branch_1, depth(192), [7, 1],
|
||||
scope='Conv2d_0c_7x1')
|
||||
with tf.variable_scope('Branch_2'):
|
||||
branch_2 = slim.conv2d(net, depth(160), [1, 1], scope='Conv2d_0a_1x1')
|
||||
branch_2 = slim.conv2d(branch_2, depth(160), [7, 1],
|
||||
scope='Conv2d_0b_7x1')
|
||||
branch_2 = slim.conv2d(branch_2, depth(160), [1, 7],
|
||||
scope='Conv2d_0c_1x7')
|
||||
branch_2 = slim.conv2d(branch_2, depth(160), [7, 1],
|
||||
scope='Conv2d_0d_7x1')
|
||||
branch_2 = slim.conv2d(branch_2, depth(192), [1, 7],
|
||||
scope='Conv2d_0e_1x7')
|
||||
with tf.variable_scope('Branch_3'):
|
||||
branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
|
||||
branch_3 = slim.conv2d(branch_3, depth(192), [1, 1],
|
||||
scope='Conv2d_0b_1x1')
|
||||
net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3])
|
||||
end_points[end_point] = net
|
||||
if end_point == final_endpoint: return net, end_points
|
||||
# mixed_6: 17 x 17 x 768.
|
||||
end_point = 'Mixed_6d'
|
||||
with tf.variable_scope(end_point):
|
||||
with tf.variable_scope('Branch_0'):
|
||||
branch_0 = slim.conv2d(net, depth(192), [1, 1], scope='Conv2d_0a_1x1')
|
||||
with tf.variable_scope('Branch_1'):
|
||||
branch_1 = slim.conv2d(net, depth(160), [1, 1], scope='Conv2d_0a_1x1')
|
||||
branch_1 = slim.conv2d(branch_1, depth(160), [1, 7],
|
||||
scope='Conv2d_0b_1x7')
|
||||
branch_1 = slim.conv2d(branch_1, depth(192), [7, 1],
|
||||
scope='Conv2d_0c_7x1')
|
||||
with tf.variable_scope('Branch_2'):
|
||||
branch_2 = slim.conv2d(net, depth(160), [1, 1], scope='Conv2d_0a_1x1')
|
||||
branch_2 = slim.conv2d(branch_2, depth(160), [7, 1],
|
||||
scope='Conv2d_0b_7x1')
|
||||
branch_2 = slim.conv2d(branch_2, depth(160), [1, 7],
|
||||
scope='Conv2d_0c_1x7')
|
||||
branch_2 = slim.conv2d(branch_2, depth(160), [7, 1],
|
||||
scope='Conv2d_0d_7x1')
|
||||
branch_2 = slim.conv2d(branch_2, depth(192), [1, 7],
|
||||
scope='Conv2d_0e_1x7')
|
||||
with tf.variable_scope('Branch_3'):
|
||||
branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
|
||||
branch_3 = slim.conv2d(branch_3, depth(192), [1, 1],
|
||||
scope='Conv2d_0b_1x1')
|
||||
net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3])
|
||||
end_points[end_point] = net
|
||||
if end_point == final_endpoint: return net, end_points
|
||||
|
||||
# mixed_7: 17 x 17 x 768.
|
||||
end_point = 'Mixed_6e'
|
||||
with tf.variable_scope(end_point):
|
||||
with tf.variable_scope('Branch_0'):
|
||||
branch_0 = slim.conv2d(net, depth(192), [1, 1], scope='Conv2d_0a_1x1')
|
||||
with tf.variable_scope('Branch_1'):
|
||||
branch_1 = slim.conv2d(net, depth(192), [1, 1], scope='Conv2d_0a_1x1')
|
||||
branch_1 = slim.conv2d(branch_1, depth(192), [1, 7],
|
||||
scope='Conv2d_0b_1x7')
|
||||
branch_1 = slim.conv2d(branch_1, depth(192), [7, 1],
|
||||
scope='Conv2d_0c_7x1')
|
||||
with tf.variable_scope('Branch_2'):
|
||||
branch_2 = slim.conv2d(net, depth(192), [1, 1], scope='Conv2d_0a_1x1')
|
||||
branch_2 = slim.conv2d(branch_2, depth(192), [7, 1],
|
||||
scope='Conv2d_0b_7x1')
|
||||
branch_2 = slim.conv2d(branch_2, depth(192), [1, 7],
|
||||
scope='Conv2d_0c_1x7')
|
||||
branch_2 = slim.conv2d(branch_2, depth(192), [7, 1],
|
||||
scope='Conv2d_0d_7x1')
|
||||
branch_2 = slim.conv2d(branch_2, depth(192), [1, 7],
|
||||
scope='Conv2d_0e_1x7')
|
||||
with tf.variable_scope('Branch_3'):
|
||||
branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
|
||||
branch_3 = slim.conv2d(branch_3, depth(192), [1, 1],
|
||||
scope='Conv2d_0b_1x1')
|
||||
net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3])
|
||||
end_points[end_point] = net
|
||||
if end_point == final_endpoint: return net, end_points
|
||||
|
||||
# mixed_8: 8 x 8 x 1280.
|
||||
end_point = 'Mixed_7a'
|
||||
with tf.variable_scope(end_point):
|
||||
with tf.variable_scope('Branch_0'):
|
||||
branch_0 = slim.conv2d(net, depth(192), [1, 1], scope='Conv2d_0a_1x1')
|
||||
branch_0 = slim.conv2d(branch_0, depth(320), [3, 3], stride=2,
|
||||
padding='VALID', scope='Conv2d_1a_3x3')
|
||||
with tf.variable_scope('Branch_1'):
|
||||
branch_1 = slim.conv2d(net, depth(192), [1, 1], scope='Conv2d_0a_1x1')
|
||||
branch_1 = slim.conv2d(branch_1, depth(192), [1, 7],
|
||||
scope='Conv2d_0b_1x7')
|
||||
branch_1 = slim.conv2d(branch_1, depth(192), [7, 1],
|
||||
scope='Conv2d_0c_7x1')
|
||||
branch_1 = slim.conv2d(branch_1, depth(192), [3, 3], stride=2,
|
||||
padding='VALID', scope='Conv2d_1a_3x3')
|
||||
with tf.variable_scope('Branch_2'):
|
||||
branch_2 = slim.max_pool2d(net, [3, 3], stride=2, padding='VALID',
|
||||
scope='MaxPool_1a_3x3')
|
||||
net = tf.concat(3, [branch_0, branch_1, branch_2])
|
||||
end_points[end_point] = net
|
||||
if end_point == final_endpoint: return net, end_points
|
||||
# mixed_9: 8 x 8 x 2048.
|
||||
end_point = 'Mixed_7b'
|
||||
with tf.variable_scope(end_point):
|
||||
with tf.variable_scope('Branch_0'):
|
||||
branch_0 = slim.conv2d(net, depth(320), [1, 1], scope='Conv2d_0a_1x1')
|
||||
with tf.variable_scope('Branch_1'):
|
||||
branch_1 = slim.conv2d(net, depth(384), [1, 1], scope='Conv2d_0a_1x1')
|
||||
branch_1 = tf.concat(3, [
|
||||
slim.conv2d(branch_1, depth(384), [1, 3], scope='Conv2d_0b_1x3'),
|
||||
slim.conv2d(branch_1, depth(384), [3, 1], scope='Conv2d_0b_3x1')])
|
||||
with tf.variable_scope('Branch_2'):
|
||||
branch_2 = slim.conv2d(net, depth(448), [1, 1], scope='Conv2d_0a_1x1')
|
||||
branch_2 = slim.conv2d(
|
||||
branch_2, depth(384), [3, 3], scope='Conv2d_0b_3x3')
|
||||
branch_2 = tf.concat(3, [
|
||||
slim.conv2d(branch_2, depth(384), [1, 3], scope='Conv2d_0c_1x3'),
|
||||
slim.conv2d(branch_2, depth(384), [3, 1], scope='Conv2d_0d_3x1')])
|
||||
with tf.variable_scope('Branch_3'):
|
||||
branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
|
||||
branch_3 = slim.conv2d(
|
||||
branch_3, depth(192), [1, 1], scope='Conv2d_0b_1x1')
|
||||
net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3])
|
||||
end_points[end_point] = net
|
||||
if end_point == final_endpoint: return net, end_points
|
||||
|
||||
# mixed_10: 8 x 8 x 2048.
|
||||
end_point = 'Mixed_7c'
|
||||
with tf.variable_scope(end_point):
|
||||
with tf.variable_scope('Branch_0'):
|
||||
branch_0 = slim.conv2d(net, depth(320), [1, 1], scope='Conv2d_0a_1x1')
|
||||
with tf.variable_scope('Branch_1'):
|
||||
branch_1 = slim.conv2d(net, depth(384), [1, 1], scope='Conv2d_0a_1x1')
|
||||
branch_1 = tf.concat(3, [
|
||||
slim.conv2d(branch_1, depth(384), [1, 3], scope='Conv2d_0b_1x3'),
|
||||
slim.conv2d(branch_1, depth(384), [3, 1], scope='Conv2d_0c_3x1')])
|
||||
with tf.variable_scope('Branch_2'):
|
||||
branch_2 = slim.conv2d(net, depth(448), [1, 1], scope='Conv2d_0a_1x1')
|
||||
branch_2 = slim.conv2d(
|
||||
branch_2, depth(384), [3, 3], scope='Conv2d_0b_3x3')
|
||||
branch_2 = tf.concat(3, [
|
||||
slim.conv2d(branch_2, depth(384), [1, 3], scope='Conv2d_0c_1x3'),
|
||||
slim.conv2d(branch_2, depth(384), [3, 1], scope='Conv2d_0d_3x1')])
|
||||
with tf.variable_scope('Branch_3'):
|
||||
branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
|
||||
branch_3 = slim.conv2d(
|
||||
branch_3, depth(192), [1, 1], scope='Conv2d_0b_1x1')
|
||||
net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3])
|
||||
end_points[end_point] = net
|
||||
if end_point == final_endpoint: return net, end_points
|
||||
raise ValueError('Unknown final endpoint %s' % final_endpoint)
|
||||
|
||||
|
||||
def inception_v3(inputs,
|
||||
num_classes=1000,
|
||||
is_training=True,
|
||||
dropout_keep_prob=0.8,
|
||||
min_depth=16,
|
||||
depth_multiplier=1.0,
|
||||
prediction_fn=slim.softmax,
|
||||
spatial_squeeze=True,
|
||||
reuse=None,
|
||||
scope='InceptionV3'):
|
||||
"""Inception model from http://arxiv.org/abs/1512.00567.
|
||||
|
||||
"Rethinking the Inception Architecture for Computer Vision"
|
||||
|
||||
Christian Szegedy, Vincent Vanhoucke, Sergey Ioffe, Jonathon Shlens,
|
||||
Zbigniew Wojna.
|
||||
|
||||
With the default arguments this method constructs the exact model defined in
|
||||
the paper. However, one can experiment with variations of the inception_v3
|
||||
network by changing arguments dropout_keep_prob, min_depth and
|
||||
depth_multiplier.
|
||||
|
||||
The default image size used to train this network is 299x299.
|
||||
|
||||
Args:
|
||||
inputs: a tensor of size [batch_size, height, width, channels].
|
||||
num_classes: number of predicted classes.
|
||||
is_training: whether is training or not.
|
||||
dropout_keep_prob: the percentage of activation values that are retained.
|
||||
min_depth: Minimum depth value (number of channels) for all convolution ops.
|
||||
Enforced when depth_multiplier < 1, and not an active constraint when
|
||||
depth_multiplier >= 1.
|
||||
depth_multiplier: Float multiplier for the depth (number of channels)
|
||||
for all convolution ops. The value must be greater than zero. Typical
|
||||
usage will be to set this value in (0, 1) to reduce the number of
|
||||
parameters or computation cost of the model.
|
||||
prediction_fn: a function to get predictions out of logits.
|
||||
spatial_squeeze: if True, logits is of shape is [B, C], if false logits is
|
||||
of shape [B, 1, 1, C], where B is batch_size and C is number of classes.
|
||||
reuse: whether or not the network and its variables should be reused. To be
|
||||
able to reuse 'scope' must be given.
|
||||
scope: Optional variable_scope.
|
||||
|
||||
Returns:
|
||||
logits: the pre-softmax activations, a tensor of size
|
||||
[batch_size, num_classes]
|
||||
end_points: a dictionary from components of the network to the corresponding
|
||||
activation.
|
||||
|
||||
Raises:
|
||||
ValueError: if 'depth_multiplier' is less than or equal to zero.
|
||||
"""
|
||||
if depth_multiplier <= 0:
|
||||
raise ValueError('depth_multiplier is not greater than zero.')
|
||||
depth = lambda d: max(int(d * depth_multiplier), min_depth)
|
||||
|
||||
with tf.variable_scope(scope, 'InceptionV3', [inputs, num_classes],
|
||||
reuse=reuse) as scope:
|
||||
with slim.arg_scope([slim.batch_norm, slim.dropout],
|
||||
is_training=is_training):
|
||||
net, end_points = inception_v3_base(
|
||||
inputs, scope=scope, min_depth=min_depth,
|
||||
depth_multiplier=depth_multiplier)
|
||||
|
||||
# Auxiliary Head logits
|
||||
with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d],
|
||||
stride=1, padding='SAME'):
|
||||
aux_logits = end_points['Mixed_6e']
|
||||
with tf.variable_scope('AuxLogits'):
|
||||
aux_logits = slim.avg_pool2d(
|
||||
aux_logits, [5, 5], stride=3, padding='VALID',
|
||||
scope='AvgPool_1a_5x5')
|
||||
aux_logits = slim.conv2d(aux_logits, depth(128), [1, 1],
|
||||
scope='Conv2d_1b_1x1')
|
||||
|
||||
# Shape of feature map before the final layer.
|
||||
kernel_size = _reduced_kernel_size_for_small_input(
|
||||
aux_logits, [5, 5])
|
||||
aux_logits = slim.conv2d(
|
||||
aux_logits, depth(768), kernel_size,
|
||||
weights_initializer=trunc_normal(0.01),
|
||||
padding='VALID', scope='Conv2d_2a_{}x{}'.format(*kernel_size))
|
||||
aux_logits = slim.conv2d(
|
||||
aux_logits, num_classes, [1, 1], activation_fn=None,
|
||||
normalizer_fn=None, weights_initializer=trunc_normal(0.001),
|
||||
scope='Conv2d_2b_1x1')
|
||||
if spatial_squeeze:
|
||||
aux_logits = tf.squeeze(aux_logits, [1, 2], name='SpatialSqueeze')
|
||||
end_points['AuxLogits'] = aux_logits
|
||||
|
||||
# Final pooling and prediction
|
||||
with tf.variable_scope('Logits'):
|
||||
kernel_size = _reduced_kernel_size_for_small_input(net, [8, 8])
|
||||
net = slim.avg_pool2d(net, kernel_size, padding='VALID',
|
||||
scope='AvgPool_1a_{}x{}'.format(*kernel_size))
|
||||
# 1 x 1 x 2048
|
||||
net = slim.dropout(net, keep_prob=dropout_keep_prob, scope='Dropout_1b')
|
||||
end_points['PreLogits'] = net
|
||||
# 2048
|
||||
logits = slim.conv2d(net, num_classes, [1, 1], activation_fn=None,
|
||||
normalizer_fn=None, scope='Conv2d_1c_1x1')
|
||||
if spatial_squeeze:
|
||||
logits = tf.squeeze(logits, [1, 2], name='SpatialSqueeze')
|
||||
# 1000
|
||||
end_points['Logits'] = logits
|
||||
end_points['Predictions'] = prediction_fn(logits, scope='Predictions')
|
||||
return logits, end_points
|
||||
inception_v3.default_image_size = 299
|
||||
|
||||
|
||||
def _reduced_kernel_size_for_small_input(input_tensor, kernel_size):
|
||||
"""Define kernel size which is automatically reduced for small input.
|
||||
|
||||
If the shape of the input images is unknown at graph construction time this
|
||||
function assumes that the input images are is large enough.
|
||||
|
||||
Args:
|
||||
input_tensor: input tensor of size [batch_size, height, width, channels].
|
||||
kernel_size: desired kernel size of length 2: [kernel_height, kernel_width]
|
||||
|
||||
Returns:
|
||||
a tensor with the kernel size.
|
||||
|
||||
TODO(jrru): Make this function work with unknown shapes. Theoretically, this
|
||||
can be done with the code below. Problems are two-fold: (1) If the shape was
|
||||
known, it will be lost. (2) inception.slim.ops._two_element_tuple cannot
|
||||
handle tensors that define the kernel size.
|
||||
shape = tf.shape(input_tensor)
|
||||
return = tf.pack([tf.minimum(shape[1], kernel_size[0]),
|
||||
tf.minimum(shape[2], kernel_size[1])])
|
||||
|
||||
"""
|
||||
shape = input_tensor.get_shape().as_list()
|
||||
if shape[1] is None or shape[2] is None:
|
||||
kernel_size_out = kernel_size
|
||||
else:
|
||||
kernel_size_out = [min(shape[1], kernel_size[0]),
|
||||
min(shape[2], kernel_size[1])]
|
||||
return kernel_size_out
|
||||
|
||||
|
||||
inception_v3_arg_scope = inception_utils.inception_arg_scope
|
||||
@ -0,0 +1,292 @@
|
||||
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""Tests for nets.inception_v1."""
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import numpy as np
|
||||
import tensorflow as tf
|
||||
|
||||
from nets import inception
|
||||
|
||||
slim = tf.contrib.slim
|
||||
|
||||
|
||||
class InceptionV3Test(tf.test.TestCase):
|
||||
|
||||
def testBuildClassificationNetwork(self):
|
||||
batch_size = 5
|
||||
height, width = 299, 299
|
||||
num_classes = 1000
|
||||
|
||||
inputs = tf.random_uniform((batch_size, height, width, 3))
|
||||
logits, end_points = inception.inception_v3(inputs, num_classes)
|
||||
self.assertTrue(logits.op.name.startswith('InceptionV3/Logits'))
|
||||
self.assertListEqual(logits.get_shape().as_list(),
|
||||
[batch_size, num_classes])
|
||||
self.assertTrue('Predictions' in end_points)
|
||||
self.assertListEqual(end_points['Predictions'].get_shape().as_list(),
|
||||
[batch_size, num_classes])
|
||||
|
||||
def testBuildBaseNetwork(self):
|
||||
batch_size = 5
|
||||
height, width = 299, 299
|
||||
|
||||
inputs = tf.random_uniform((batch_size, height, width, 3))
|
||||
final_endpoint, end_points = inception.inception_v3_base(inputs)
|
||||
self.assertTrue(final_endpoint.op.name.startswith(
|
||||
'InceptionV3/Mixed_7c'))
|
||||
self.assertListEqual(final_endpoint.get_shape().as_list(),
|
||||
[batch_size, 8, 8, 2048])
|
||||
expected_endpoints = ['Conv2d_1a_3x3', 'Conv2d_2a_3x3', 'Conv2d_2b_3x3',
|
||||
'MaxPool_3a_3x3', 'Conv2d_3b_1x1', 'Conv2d_4a_3x3',
|
||||
'MaxPool_5a_3x3', 'Mixed_5b', 'Mixed_5c', 'Mixed_5d',
|
||||
'Mixed_6a', 'Mixed_6b', 'Mixed_6c', 'Mixed_6d',
|
||||
'Mixed_6e', 'Mixed_7a', 'Mixed_7b', 'Mixed_7c']
|
||||
self.assertItemsEqual(end_points.keys(), expected_endpoints)
|
||||
|
||||
def testBuildOnlyUptoFinalEndpoint(self):
|
||||
batch_size = 5
|
||||
height, width = 299, 299
|
||||
endpoints = ['Conv2d_1a_3x3', 'Conv2d_2a_3x3', 'Conv2d_2b_3x3',
|
||||
'MaxPool_3a_3x3', 'Conv2d_3b_1x1', 'Conv2d_4a_3x3',
|
||||
'MaxPool_5a_3x3', 'Mixed_5b', 'Mixed_5c', 'Mixed_5d',
|
||||
'Mixed_6a', 'Mixed_6b', 'Mixed_6c', 'Mixed_6d',
|
||||
'Mixed_6e', 'Mixed_7a', 'Mixed_7b', 'Mixed_7c']
|
||||
|
||||
for index, endpoint in enumerate(endpoints):
|
||||
with tf.Graph().as_default():
|
||||
inputs = tf.random_uniform((batch_size, height, width, 3))
|
||||
out_tensor, end_points = inception.inception_v3_base(
|
||||
inputs, final_endpoint=endpoint)
|
||||
self.assertTrue(out_tensor.op.name.startswith(
|
||||
'InceptionV3/' + endpoint))
|
||||
self.assertItemsEqual(endpoints[:index+1], end_points)
|
||||
|
||||
def testBuildAndCheckAllEndPointsUptoMixed7c(self):
|
||||
batch_size = 5
|
||||
height, width = 299, 299
|
||||
|
||||
inputs = tf.random_uniform((batch_size, height, width, 3))
|
||||
_, end_points = inception.inception_v3_base(
|
||||
inputs, final_endpoint='Mixed_7c')
|
||||
endpoints_shapes = {'Conv2d_1a_3x3': [batch_size, 149, 149, 32],
|
||||
'Conv2d_2a_3x3': [batch_size, 147, 147, 32],
|
||||
'Conv2d_2b_3x3': [batch_size, 147, 147, 64],
|
||||
'MaxPool_3a_3x3': [batch_size, 73, 73, 64],
|
||||
'Conv2d_3b_1x1': [batch_size, 73, 73, 80],
|
||||
'Conv2d_4a_3x3': [batch_size, 71, 71, 192],
|
||||
'MaxPool_5a_3x3': [batch_size, 35, 35, 192],
|
||||
'Mixed_5b': [batch_size, 35, 35, 256],
|
||||
'Mixed_5c': [batch_size, 35, 35, 288],
|
||||
'Mixed_5d': [batch_size, 35, 35, 288],
|
||||
'Mixed_6a': [batch_size, 17, 17, 768],
|
||||
'Mixed_6b': [batch_size, 17, 17, 768],
|
||||
'Mixed_6c': [batch_size, 17, 17, 768],
|
||||
'Mixed_6d': [batch_size, 17, 17, 768],
|
||||
'Mixed_6e': [batch_size, 17, 17, 768],
|
||||
'Mixed_7a': [batch_size, 8, 8, 1280],
|
||||
'Mixed_7b': [batch_size, 8, 8, 2048],
|
||||
'Mixed_7c': [batch_size, 8, 8, 2048]}
|
||||
self.assertItemsEqual(endpoints_shapes.keys(), end_points.keys())
|
||||
for endpoint_name in endpoints_shapes:
|
||||
expected_shape = endpoints_shapes[endpoint_name]
|
||||
self.assertTrue(endpoint_name in end_points)
|
||||
self.assertListEqual(end_points[endpoint_name].get_shape().as_list(),
|
||||
expected_shape)
|
||||
|
||||
def testModelHasExpectedNumberOfParameters(self):
|
||||
batch_size = 5
|
||||
height, width = 299, 299
|
||||
inputs = tf.random_uniform((batch_size, height, width, 3))
|
||||
with slim.arg_scope(inception.inception_v3_arg_scope()):
|
||||
inception.inception_v3_base(inputs)
|
||||
total_params, _ = slim.model_analyzer.analyze_vars(
|
||||
slim.get_model_variables())
|
||||
self.assertAlmostEqual(21802784, total_params)
|
||||
|
||||
def testBuildEndPoints(self):
|
||||
batch_size = 5
|
||||
height, width = 299, 299
|
||||
num_classes = 1000
|
||||
|
||||
inputs = tf.random_uniform((batch_size, height, width, 3))
|
||||
_, end_points = inception.inception_v3(inputs, num_classes)
|
||||
self.assertTrue('Logits' in end_points)
|
||||
logits = end_points['Logits']
|
||||
self.assertListEqual(logits.get_shape().as_list(),
|
||||
[batch_size, num_classes])
|
||||
self.assertTrue('AuxLogits' in end_points)
|
||||
aux_logits = end_points['AuxLogits']
|
||||
self.assertListEqual(aux_logits.get_shape().as_list(),
|
||||
[batch_size, num_classes])
|
||||
self.assertTrue('Mixed_7c' in end_points)
|
||||
pre_pool = end_points['Mixed_7c']
|
||||
self.assertListEqual(pre_pool.get_shape().as_list(),
|
||||
[batch_size, 8, 8, 2048])
|
||||
self.assertTrue('PreLogits' in end_points)
|
||||
pre_logits = end_points['PreLogits']
|
||||
self.assertListEqual(pre_logits.get_shape().as_list(),
|
||||
[batch_size, 1, 1, 2048])
|
||||
|
||||
def testBuildEndPointsWithDepthMultiplierLessThanOne(self):
|
||||
batch_size = 5
|
||||
height, width = 299, 299
|
||||
num_classes = 1000
|
||||
|
||||
inputs = tf.random_uniform((batch_size, height, width, 3))
|
||||
_, end_points = inception.inception_v3(inputs, num_classes)
|
||||
|
||||
endpoint_keys = [key for key in end_points.keys()
|
||||
if key.startswith('Mixed') or key.startswith('Conv')]
|
||||
|
||||
_, end_points_with_multiplier = inception.inception_v3(
|
||||
inputs, num_classes, scope='depth_multiplied_net',
|
||||
depth_multiplier=0.5)
|
||||
|
||||
for key in endpoint_keys:
|
||||
original_depth = end_points[key].get_shape().as_list()[3]
|
||||
new_depth = end_points_with_multiplier[key].get_shape().as_list()[3]
|
||||
self.assertEqual(0.5 * original_depth, new_depth)
|
||||
|
||||
def testBuildEndPointsWithDepthMultiplierGreaterThanOne(self):
|
||||
batch_size = 5
|
||||
height, width = 299, 299
|
||||
num_classes = 1000
|
||||
|
||||
inputs = tf.random_uniform((batch_size, height, width, 3))
|
||||
_, end_points = inception.inception_v3(inputs, num_classes)
|
||||
|
||||
endpoint_keys = [key for key in end_points.keys()
|
||||
if key.startswith('Mixed') or key.startswith('Conv')]
|
||||
|
||||
_, end_points_with_multiplier = inception.inception_v3(
|
||||
inputs, num_classes, scope='depth_multiplied_net',
|
||||
depth_multiplier=2.0)
|
||||
|
||||
for key in endpoint_keys:
|
||||
original_depth = end_points[key].get_shape().as_list()[3]
|
||||
new_depth = end_points_with_multiplier[key].get_shape().as_list()[3]
|
||||
self.assertEqual(2.0 * original_depth, new_depth)
|
||||
|
||||
def testRaiseValueErrorWithInvalidDepthMultiplier(self):
|
||||
batch_size = 5
|
||||
height, width = 299, 299
|
||||
num_classes = 1000
|
||||
|
||||
inputs = tf.random_uniform((batch_size, height, width, 3))
|
||||
with self.assertRaises(ValueError):
|
||||
_ = inception.inception_v3(inputs, num_classes, depth_multiplier=-0.1)
|
||||
with self.assertRaises(ValueError):
|
||||
_ = inception.inception_v3(inputs, num_classes, depth_multiplier=0.0)
|
||||
|
||||
def testHalfSizeImages(self):
|
||||
batch_size = 5
|
||||
height, width = 150, 150
|
||||
num_classes = 1000
|
||||
|
||||
inputs = tf.random_uniform((batch_size, height, width, 3))
|
||||
logits, end_points = inception.inception_v3(inputs, num_classes)
|
||||
self.assertTrue(logits.op.name.startswith('InceptionV3/Logits'))
|
||||
self.assertListEqual(logits.get_shape().as_list(),
|
||||
[batch_size, num_classes])
|
||||
pre_pool = end_points['Mixed_7c']
|
||||
self.assertListEqual(pre_pool.get_shape().as_list(),
|
||||
[batch_size, 3, 3, 2048])
|
||||
|
||||
def testUnknownImageShape(self):
|
||||
tf.reset_default_graph()
|
||||
batch_size = 2
|
||||
height, width = 299, 299
|
||||
num_classes = 1000
|
||||
input_np = np.random.uniform(0, 1, (batch_size, height, width, 3))
|
||||
with self.test_session() as sess:
|
||||
inputs = tf.placeholder(tf.float32, shape=(batch_size, None, None, 3))
|
||||
logits, end_points = inception.inception_v3(inputs, num_classes)
|
||||
self.assertListEqual(logits.get_shape().as_list(),
|
||||
[batch_size, num_classes])
|
||||
pre_pool = end_points['Mixed_7c']
|
||||
feed_dict = {inputs: input_np}
|
||||
tf.initialize_all_variables().run()
|
||||
pre_pool_out = sess.run(pre_pool, feed_dict=feed_dict)
|
||||
self.assertListEqual(list(pre_pool_out.shape), [batch_size, 8, 8, 2048])
|
||||
|
||||
def testUnknowBatchSize(self):
|
||||
batch_size = 1
|
||||
height, width = 299, 299
|
||||
num_classes = 1000
|
||||
|
||||
inputs = tf.placeholder(tf.float32, (None, height, width, 3))
|
||||
logits, _ = inception.inception_v3(inputs, num_classes)
|
||||
self.assertTrue(logits.op.name.startswith('InceptionV3/Logits'))
|
||||
self.assertListEqual(logits.get_shape().as_list(),
|
||||
[None, num_classes])
|
||||
images = tf.random_uniform((batch_size, height, width, 3))
|
||||
|
||||
with self.test_session() as sess:
|
||||
sess.run(tf.initialize_all_variables())
|
||||
output = sess.run(logits, {inputs: images.eval()})
|
||||
self.assertEquals(output.shape, (batch_size, num_classes))
|
||||
|
||||
def testEvaluation(self):
|
||||
batch_size = 2
|
||||
height, width = 299, 299
|
||||
num_classes = 1000
|
||||
|
||||
eval_inputs = tf.random_uniform((batch_size, height, width, 3))
|
||||
logits, _ = inception.inception_v3(eval_inputs, num_classes,
|
||||
is_training=False)
|
||||
predictions = tf.argmax(logits, 1)
|
||||
|
||||
with self.test_session() as sess:
|
||||
sess.run(tf.initialize_all_variables())
|
||||
output = sess.run(predictions)
|
||||
self.assertEquals(output.shape, (batch_size,))
|
||||
|
||||
def testTrainEvalWithReuse(self):
|
||||
train_batch_size = 5
|
||||
eval_batch_size = 2
|
||||
height, width = 150, 150
|
||||
num_classes = 1000
|
||||
|
||||
train_inputs = tf.random_uniform((train_batch_size, height, width, 3))
|
||||
inception.inception_v3(train_inputs, num_classes)
|
||||
eval_inputs = tf.random_uniform((eval_batch_size, height, width, 3))
|
||||
logits, _ = inception.inception_v3(eval_inputs, num_classes,
|
||||
is_training=False, reuse=True)
|
||||
predictions = tf.argmax(logits, 1)
|
||||
|
||||
with self.test_session() as sess:
|
||||
sess.run(tf.initialize_all_variables())
|
||||
output = sess.run(predictions)
|
||||
self.assertEquals(output.shape, (eval_batch_size,))
|
||||
|
||||
def testLogitsNotSqueezed(self):
|
||||
num_classes = 25
|
||||
images = tf.random_uniform([1, 299, 299, 3])
|
||||
logits, _ = inception.inception_v3(images,
|
||||
num_classes=num_classes,
|
||||
spatial_squeeze=False)
|
||||
|
||||
with self.test_session() as sess:
|
||||
tf.initialize_all_variables().run()
|
||||
logits_out = sess.run(logits)
|
||||
self.assertListEqual(list(logits_out.shape), [1, 1, 1, num_classes])
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
tf.test.main()
|
||||
@ -0,0 +1,323 @@
|
||||
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""Contains the definition of the Inception V4 architecture.
|
||||
|
||||
As described in http://arxiv.org/abs/1602.07261.
|
||||
|
||||
Inception-v4, Inception-ResNet and the Impact of Residual Connections
|
||||
on Learning
|
||||
Christian Szegedy, Sergey Ioffe, Vincent Vanhoucke, Alex Alemi
|
||||
"""
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import tensorflow as tf
|
||||
|
||||
from nets import inception_utils
|
||||
|
||||
slim = tf.contrib.slim
|
||||
|
||||
|
||||
def block_inception_a(inputs, scope=None, reuse=None):
|
||||
"""Builds Inception-A block for Inception v4 network."""
|
||||
# By default use stride=1 and SAME padding
|
||||
with slim.arg_scope([slim.conv2d, slim.avg_pool2d, slim.max_pool2d],
|
||||
stride=1, padding='SAME'):
|
||||
with tf.variable_scope(scope, 'BlockInceptionA', [inputs], reuse=reuse):
|
||||
with tf.variable_scope('Branch_0'):
|
||||
branch_0 = slim.conv2d(inputs, 96, [1, 1], scope='Conv2d_0a_1x1')
|
||||
with tf.variable_scope('Branch_1'):
|
||||
branch_1 = slim.conv2d(inputs, 64, [1, 1], scope='Conv2d_0a_1x1')
|
||||
branch_1 = slim.conv2d(branch_1, 96, [3, 3], scope='Conv2d_0b_3x3')
|
||||
with tf.variable_scope('Branch_2'):
|
||||
branch_2 = slim.conv2d(inputs, 64, [1, 1], scope='Conv2d_0a_1x1')
|
||||
branch_2 = slim.conv2d(branch_2, 96, [3, 3], scope='Conv2d_0b_3x3')
|
||||
branch_2 = slim.conv2d(branch_2, 96, [3, 3], scope='Conv2d_0c_3x3')
|
||||
with tf.variable_scope('Branch_3'):
|
||||
branch_3 = slim.avg_pool2d(inputs, [3, 3], scope='AvgPool_0a_3x3')
|
||||
branch_3 = slim.conv2d(branch_3, 96, [1, 1], scope='Conv2d_0b_1x1')
|
||||
return tf.concat(3, [branch_0, branch_1, branch_2, branch_3])
|
||||
|
||||
|
||||
def block_reduction_a(inputs, scope=None, reuse=None):
|
||||
"""Builds Reduction-A block for Inception v4 network."""
|
||||
# By default use stride=1 and SAME padding
|
||||
with slim.arg_scope([slim.conv2d, slim.avg_pool2d, slim.max_pool2d],
|
||||
stride=1, padding='SAME'):
|
||||
with tf.variable_scope(scope, 'BlockReductionA', [inputs], reuse=reuse):
|
||||
with tf.variable_scope('Branch_0'):
|
||||
branch_0 = slim.conv2d(inputs, 384, [3, 3], stride=2, padding='VALID',
|
||||
scope='Conv2d_1a_3x3')
|
||||
with tf.variable_scope('Branch_1'):
|
||||
branch_1 = slim.conv2d(inputs, 192, [1, 1], scope='Conv2d_0a_1x1')
|
||||
branch_1 = slim.conv2d(branch_1, 224, [3, 3], scope='Conv2d_0b_3x3')
|
||||
branch_1 = slim.conv2d(branch_1, 256, [3, 3], stride=2,
|
||||
padding='VALID', scope='Conv2d_1a_3x3')
|
||||
with tf.variable_scope('Branch_2'):
|
||||
branch_2 = slim.max_pool2d(inputs, [3, 3], stride=2, padding='VALID',
|
||||
scope='MaxPool_1a_3x3')
|
||||
return tf.concat(3, [branch_0, branch_1, branch_2])
|
||||
|
||||
|
||||
def block_inception_b(inputs, scope=None, reuse=None):
|
||||
"""Builds Inception-B block for Inception v4 network."""
|
||||
# By default use stride=1 and SAME padding
|
||||
with slim.arg_scope([slim.conv2d, slim.avg_pool2d, slim.max_pool2d],
|
||||
stride=1, padding='SAME'):
|
||||
with tf.variable_scope(scope, 'BlockInceptionB', [inputs], reuse=reuse):
|
||||
with tf.variable_scope('Branch_0'):
|
||||
branch_0 = slim.conv2d(inputs, 384, [1, 1], scope='Conv2d_0a_1x1')
|
||||
with tf.variable_scope('Branch_1'):
|
||||
branch_1 = slim.conv2d(inputs, 192, [1, 1], scope='Conv2d_0a_1x1')
|
||||
branch_1 = slim.conv2d(branch_1, 224, [1, 7], scope='Conv2d_0b_1x7')
|
||||
branch_1 = slim.conv2d(branch_1, 256, [7, 1], scope='Conv2d_0c_7x1')
|
||||
with tf.variable_scope('Branch_2'):
|
||||
branch_2 = slim.conv2d(inputs, 192, [1, 1], scope='Conv2d_0a_1x1')
|
||||
branch_2 = slim.conv2d(branch_2, 192, [7, 1], scope='Conv2d_0b_7x1')
|
||||
branch_2 = slim.conv2d(branch_2, 224, [1, 7], scope='Conv2d_0c_1x7')
|
||||
branch_2 = slim.conv2d(branch_2, 224, [7, 1], scope='Conv2d_0d_7x1')
|
||||
branch_2 = slim.conv2d(branch_2, 256, [1, 7], scope='Conv2d_0e_1x7')
|
||||
with tf.variable_scope('Branch_3'):
|
||||
branch_3 = slim.avg_pool2d(inputs, [3, 3], scope='AvgPool_0a_3x3')
|
||||
branch_3 = slim.conv2d(branch_3, 128, [1, 1], scope='Conv2d_0b_1x1')
|
||||
return tf.concat(3, [branch_0, branch_1, branch_2, branch_3])
|
||||
|
||||
|
||||
def block_reduction_b(inputs, scope=None, reuse=None):
|
||||
"""Builds Reduction-B block for Inception v4 network."""
|
||||
# By default use stride=1 and SAME padding
|
||||
with slim.arg_scope([slim.conv2d, slim.avg_pool2d, slim.max_pool2d],
|
||||
stride=1, padding='SAME'):
|
||||
with tf.variable_scope(scope, 'BlockReductionB', [inputs], reuse=reuse):
|
||||
with tf.variable_scope('Branch_0'):
|
||||
branch_0 = slim.conv2d(inputs, 192, [1, 1], scope='Conv2d_0a_1x1')
|
||||
branch_0 = slim.conv2d(branch_0, 192, [3, 3], stride=2,
|
||||
padding='VALID', scope='Conv2d_1a_3x3')
|
||||
with tf.variable_scope('Branch_1'):
|
||||
branch_1 = slim.conv2d(inputs, 256, [1, 1], scope='Conv2d_0a_1x1')
|
||||
branch_1 = slim.conv2d(branch_1, 256, [1, 7], scope='Conv2d_0b_1x7')
|
||||
branch_1 = slim.conv2d(branch_1, 320, [7, 1], scope='Conv2d_0c_7x1')
|
||||
branch_1 = slim.conv2d(branch_1, 320, [3, 3], stride=2,
|
||||
padding='VALID', scope='Conv2d_1a_3x3')
|
||||
with tf.variable_scope('Branch_2'):
|
||||
branch_2 = slim.max_pool2d(inputs, [3, 3], stride=2, padding='VALID',
|
||||
scope='MaxPool_1a_3x3')
|
||||
return tf.concat(3, [branch_0, branch_1, branch_2])
|
||||
|
||||
|
||||
def block_inception_c(inputs, scope=None, reuse=None):
|
||||
"""Builds Inception-C block for Inception v4 network."""
|
||||
# By default use stride=1 and SAME padding
|
||||
with slim.arg_scope([slim.conv2d, slim.avg_pool2d, slim.max_pool2d],
|
||||
stride=1, padding='SAME'):
|
||||
with tf.variable_scope(scope, 'BlockInceptionC', [inputs], reuse=reuse):
|
||||
with tf.variable_scope('Branch_0'):
|
||||
branch_0 = slim.conv2d(inputs, 256, [1, 1], scope='Conv2d_0a_1x1')
|
||||
with tf.variable_scope('Branch_1'):
|
||||
branch_1 = slim.conv2d(inputs, 384, [1, 1], scope='Conv2d_0a_1x1')
|
||||
branch_1 = tf.concat(3, [
|
||||
slim.conv2d(branch_1, 256, [1, 3], scope='Conv2d_0b_1x3'),
|
||||
slim.conv2d(branch_1, 256, [3, 1], scope='Conv2d_0c_3x1')])
|
||||
with tf.variable_scope('Branch_2'):
|
||||
branch_2 = slim.conv2d(inputs, 384, [1, 1], scope='Conv2d_0a_1x1')
|
||||
branch_2 = slim.conv2d(branch_2, 448, [3, 1], scope='Conv2d_0b_3x1')
|
||||
branch_2 = slim.conv2d(branch_2, 512, [1, 3], scope='Conv2d_0c_1x3')
|
||||
branch_2 = tf.concat(3, [
|
||||
slim.conv2d(branch_2, 256, [1, 3], scope='Conv2d_0d_1x3'),
|
||||
slim.conv2d(branch_2, 256, [3, 1], scope='Conv2d_0e_3x1')])
|
||||
with tf.variable_scope('Branch_3'):
|
||||
branch_3 = slim.avg_pool2d(inputs, [3, 3], scope='AvgPool_0a_3x3')
|
||||
branch_3 = slim.conv2d(branch_3, 256, [1, 1], scope='Conv2d_0b_1x1')
|
||||
return tf.concat(3, [branch_0, branch_1, branch_2, branch_3])
|
||||
|
||||
|
||||
def inception_v4_base(inputs, final_endpoint='Mixed_7d', scope=None):
|
||||
"""Creates the Inception V4 network up to the given final endpoint.
|
||||
|
||||
Args:
|
||||
inputs: a 4-D tensor of size [batch_size, height, width, 3].
|
||||
final_endpoint: specifies the endpoint to construct the network up to.
|
||||
It can be one of [ 'Conv2d_1a_3x3', 'Conv2d_2a_3x3', 'Conv2d_2b_3x3',
|
||||
'Mixed_3a', 'Mixed_4a', 'Mixed_5a', 'Mixed_5b', 'Mixed_5c', 'Mixed_5d',
|
||||
'Mixed_5e', 'Mixed_6a', 'Mixed_6b', 'Mixed_6c', 'Mixed_6d', 'Mixed_6e',
|
||||
'Mixed_6f', 'Mixed_6g', 'Mixed_6h', 'Mixed_7a', 'Mixed_7b', 'Mixed_7c',
|
||||
'Mixed_7d']
|
||||
scope: Optional variable_scope.
|
||||
|
||||
Returns:
|
||||
logits: the logits outputs of the model.
|
||||
end_points: the set of end_points from the inception model.
|
||||
|
||||
Raises:
|
||||
ValueError: if final_endpoint is not set to one of the predefined values,
|
||||
"""
|
||||
end_points = {}
|
||||
|
||||
def add_and_check_final(name, net):
|
||||
end_points[name] = net
|
||||
return name == final_endpoint
|
||||
|
||||
with tf.variable_scope(scope, 'InceptionV4', [inputs]):
|
||||
with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d],
|
||||
stride=1, padding='SAME'):
|
||||
# 299 x 299 x 3
|
||||
net = slim.conv2d(inputs, 32, [3, 3], stride=2,
|
||||
padding='VALID', scope='Conv2d_1a_3x3')
|
||||
if add_and_check_final('Conv2d_1a_3x3', net): return net, end_points
|
||||
# 149 x 149 x 32
|
||||
net = slim.conv2d(net, 32, [3, 3], padding='VALID',
|
||||
scope='Conv2d_2a_3x3')
|
||||
if add_and_check_final('Conv2d_2a_3x3', net): return net, end_points
|
||||
# 147 x 147 x 32
|
||||
net = slim.conv2d(net, 64, [3, 3], scope='Conv2d_2b_3x3')
|
||||
if add_and_check_final('Conv2d_2b_3x3', net): return net, end_points
|
||||
# 147 x 147 x 64
|
||||
with tf.variable_scope('Mixed_3a'):
|
||||
with tf.variable_scope('Branch_0'):
|
||||
branch_0 = slim.max_pool2d(net, [3, 3], stride=2, padding='VALID',
|
||||
scope='MaxPool_0a_3x3')
|
||||
with tf.variable_scope('Branch_1'):
|
||||
branch_1 = slim.conv2d(net, 96, [3, 3], stride=2, padding='VALID',
|
||||
scope='Conv2d_0a_3x3')
|
||||
net = tf.concat(3, [branch_0, branch_1])
|
||||
if add_and_check_final('Mixed_3a', net): return net, end_points
|
||||
|
||||
# 73 x 73 x 160
|
||||
with tf.variable_scope('Mixed_4a'):
|
||||
with tf.variable_scope('Branch_0'):
|
||||
branch_0 = slim.conv2d(net, 64, [1, 1], scope='Conv2d_0a_1x1')
|
||||
branch_0 = slim.conv2d(branch_0, 96, [3, 3], padding='VALID',
|
||||
scope='Conv2d_1a_3x3')
|
||||
with tf.variable_scope('Branch_1'):
|
||||
branch_1 = slim.conv2d(net, 64, [1, 1], scope='Conv2d_0a_1x1')
|
||||
branch_1 = slim.conv2d(branch_1, 64, [1, 7], scope='Conv2d_0b_1x7')
|
||||
branch_1 = slim.conv2d(branch_1, 64, [7, 1], scope='Conv2d_0c_7x1')
|
||||
branch_1 = slim.conv2d(branch_1, 96, [3, 3], padding='VALID',
|
||||
scope='Conv2d_1a_3x3')
|
||||
net = tf.concat(3, [branch_0, branch_1])
|
||||
if add_and_check_final('Mixed_4a', net): return net, end_points
|
||||
|
||||
# 71 x 71 x 192
|
||||
with tf.variable_scope('Mixed_5a'):
|
||||
with tf.variable_scope('Branch_0'):
|
||||
branch_0 = slim.conv2d(net, 192, [3, 3], stride=2, padding='VALID',
|
||||
scope='Conv2d_1a_3x3')
|
||||
with tf.variable_scope('Branch_1'):
|
||||
branch_1 = slim.max_pool2d(net, [3, 3], stride=2, padding='VALID',
|
||||
scope='MaxPool_1a_3x3')
|
||||
net = tf.concat(3, [branch_0, branch_1])
|
||||
if add_and_check_final('Mixed_5a', net): return net, end_points
|
||||
|
||||
# 35 x 35 x 384
|
||||
# 4 x Inception-A blocks
|
||||
for idx in xrange(4):
|
||||
block_scope = 'Mixed_5' + chr(ord('b') + idx)
|
||||
net = block_inception_a(net, block_scope)
|
||||
if add_and_check_final(block_scope, net): return net, end_points
|
||||
|
||||
# 35 x 35 x 384
|
||||
# Reduction-A block
|
||||
net = block_reduction_a(net, 'Mixed_6a')
|
||||
if add_and_check_final('Mixed_6a', net): return net, end_points
|
||||
|
||||
# 17 x 17 x 1024
|
||||
# 7 x Inception-B blocks
|
||||
for idx in xrange(7):
|
||||
block_scope = 'Mixed_6' + chr(ord('b') + idx)
|
||||
net = block_inception_b(net, block_scope)
|
||||
if add_and_check_final(block_scope, net): return net, end_points
|
||||
|
||||
# 17 x 17 x 1024
|
||||
# Reduction-B block
|
||||
net = block_reduction_b(net, 'Mixed_7a')
|
||||
if add_and_check_final('Mixed_7a', net): return net, end_points
|
||||
|
||||
# 8 x 8 x 1536
|
||||
# 3 x Inception-C blocks
|
||||
for idx in xrange(3):
|
||||
block_scope = 'Mixed_7' + chr(ord('b') + idx)
|
||||
net = block_inception_c(net, block_scope)
|
||||
if add_and_check_final(block_scope, net): return net, end_points
|
||||
raise ValueError('Unknown final endpoint %s' % final_endpoint)
|
||||
|
||||
|
||||
def inception_v4(inputs, num_classes=1001, is_training=True,
|
||||
dropout_keep_prob=0.8,
|
||||
reuse=None,
|
||||
scope='InceptionV4',
|
||||
create_aux_logits=True):
|
||||
"""Creates the Inception V4 model.
|
||||
|
||||
Args:
|
||||
inputs: a 4-D tensor of size [batch_size, height, width, 3].
|
||||
num_classes: number of predicted classes.
|
||||
is_training: whether is training or not.
|
||||
dropout_keep_prob: float, the fraction to keep before final layer.
|
||||
reuse: whether or not the network and its variables should be reused. To be
|
||||
able to reuse 'scope' must be given.
|
||||
scope: Optional variable_scope.
|
||||
create_aux_logits: Whether to include the auxilliary logits.
|
||||
|
||||
Returns:
|
||||
logits: the logits outputs of the model.
|
||||
end_points: the set of end_points from the inception model.
|
||||
"""
|
||||
end_points = {}
|
||||
with tf.variable_scope(scope, 'InceptionV4', [inputs], reuse=reuse) as scope:
|
||||
with slim.arg_scope([slim.batch_norm, slim.dropout],
|
||||
is_training=is_training):
|
||||
net, end_points = inception_v4_base(inputs, scope=scope)
|
||||
|
||||
with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d],
|
||||
stride=1, padding='SAME'):
|
||||
# Auxiliary Head logits
|
||||
if create_aux_logits:
|
||||
with tf.variable_scope('AuxLogits'):
|
||||
# 17 x 17 x 1024
|
||||
aux_logits = end_points['Mixed_6h']
|
||||
aux_logits = slim.avg_pool2d(aux_logits, [5, 5], stride=3,
|
||||
padding='VALID',
|
||||
scope='AvgPool_1a_5x5')
|
||||
aux_logits = slim.conv2d(aux_logits, 128, [1, 1],
|
||||
scope='Conv2d_1b_1x1')
|
||||
aux_logits = slim.conv2d(aux_logits, 768,
|
||||
aux_logits.get_shape()[1:3],
|
||||
padding='VALID', scope='Conv2d_2a')
|
||||
aux_logits = slim.flatten(aux_logits)
|
||||
aux_logits = slim.fully_connected(aux_logits, num_classes,
|
||||
activation_fn=None,
|
||||
scope='Aux_logits')
|
||||
end_points['AuxLogits'] = aux_logits
|
||||
|
||||
# Final pooling and prediction
|
||||
with tf.variable_scope('Logits'):
|
||||
# 8 x 8 x 1536
|
||||
net = slim.avg_pool2d(net, net.get_shape()[1:3], padding='VALID',
|
||||
scope='AvgPool_1a')
|
||||
# 1 x 1 x 1536
|
||||
net = slim.dropout(net, dropout_keep_prob, scope='Dropout_1b')
|
||||
net = slim.flatten(net, scope='PreLogitsFlatten')
|
||||
end_points['PreLogitsFlatten'] = net
|
||||
# 1536
|
||||
logits = slim.fully_connected(net, num_classes, activation_fn=None,
|
||||
scope='Logits')
|
||||
end_points['Logits'] = logits
|
||||
end_points['Predictions'] = tf.nn.softmax(logits, name='Predictions')
|
||||
return logits, end_points
|
||||
inception_v4.default_image_size = 299
|
||||
|
||||
|
||||
inception_v4_arg_scope = inception_utils.inception_arg_scope
|
||||
@ -0,0 +1,216 @@
|
||||
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""Tests for slim.inception_v4."""
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import tensorflow as tf
|
||||
|
||||
from nets import inception
|
||||
|
||||
|
||||
class InceptionTest(tf.test.TestCase):
|
||||
|
||||
def testBuildLogits(self):
|
||||
batch_size = 5
|
||||
height, width = 299, 299
|
||||
num_classes = 1000
|
||||
inputs = tf.random_uniform((batch_size, height, width, 3))
|
||||
logits, end_points = inception.inception_v4(inputs, num_classes)
|
||||
auxlogits = end_points['AuxLogits']
|
||||
predictions = end_points['Predictions']
|
||||
self.assertTrue(auxlogits.op.name.startswith('InceptionV4/AuxLogits'))
|
||||
self.assertListEqual(auxlogits.get_shape().as_list(),
|
||||
[batch_size, num_classes])
|
||||
self.assertTrue(logits.op.name.startswith('InceptionV4/Logits'))
|
||||
self.assertListEqual(logits.get_shape().as_list(),
|
||||
[batch_size, num_classes])
|
||||
self.assertTrue(predictions.op.name.startswith(
|
||||
'InceptionV4/Logits/Predictions'))
|
||||
self.assertListEqual(predictions.get_shape().as_list(),
|
||||
[batch_size, num_classes])
|
||||
|
||||
def testBuildWithoutAuxLogits(self):
|
||||
batch_size = 5
|
||||
height, width = 299, 299
|
||||
num_classes = 1000
|
||||
inputs = tf.random_uniform((batch_size, height, width, 3))
|
||||
logits, endpoints = inception.inception_v4(inputs, num_classes,
|
||||
create_aux_logits=False)
|
||||
self.assertFalse('AuxLogits' in endpoints)
|
||||
self.assertTrue(logits.op.name.startswith('InceptionV4/Logits'))
|
||||
self.assertListEqual(logits.get_shape().as_list(),
|
||||
[batch_size, num_classes])
|
||||
|
||||
def testAllEndPointsShapes(self):
|
||||
batch_size = 5
|
||||
height, width = 299, 299
|
||||
num_classes = 1000
|
||||
inputs = tf.random_uniform((batch_size, height, width, 3))
|
||||
_, end_points = inception.inception_v4(inputs, num_classes)
|
||||
endpoints_shapes = {'Conv2d_1a_3x3': [batch_size, 149, 149, 32],
|
||||
'Conv2d_2a_3x3': [batch_size, 147, 147, 32],
|
||||
'Conv2d_2b_3x3': [batch_size, 147, 147, 64],
|
||||
'Mixed_3a': [batch_size, 73, 73, 160],
|
||||
'Mixed_4a': [batch_size, 71, 71, 192],
|
||||
'Mixed_5a': [batch_size, 35, 35, 384],
|
||||
# 4 x Inception-A blocks
|
||||
'Mixed_5b': [batch_size, 35, 35, 384],
|
||||
'Mixed_5c': [batch_size, 35, 35, 384],
|
||||
'Mixed_5d': [batch_size, 35, 35, 384],
|
||||
'Mixed_5e': [batch_size, 35, 35, 384],
|
||||
# Reduction-A block
|
||||
'Mixed_6a': [batch_size, 17, 17, 1024],
|
||||
# 7 x Inception-B blocks
|
||||
'Mixed_6b': [batch_size, 17, 17, 1024],
|
||||
'Mixed_6c': [batch_size, 17, 17, 1024],
|
||||
'Mixed_6d': [batch_size, 17, 17, 1024],
|
||||
'Mixed_6e': [batch_size, 17, 17, 1024],
|
||||
'Mixed_6f': [batch_size, 17, 17, 1024],
|
||||
'Mixed_6g': [batch_size, 17, 17, 1024],
|
||||
'Mixed_6h': [batch_size, 17, 17, 1024],
|
||||
# Reduction-A block
|
||||
'Mixed_7a': [batch_size, 8, 8, 1536],
|
||||
# 3 x Inception-C blocks
|
||||
'Mixed_7b': [batch_size, 8, 8, 1536],
|
||||
'Mixed_7c': [batch_size, 8, 8, 1536],
|
||||
'Mixed_7d': [batch_size, 8, 8, 1536],
|
||||
# Logits and predictions
|
||||
'AuxLogits': [batch_size, num_classes],
|
||||
'PreLogitsFlatten': [batch_size, 1536],
|
||||
'Logits': [batch_size, num_classes],
|
||||
'Predictions': [batch_size, num_classes]}
|
||||
self.assertItemsEqual(endpoints_shapes.keys(), end_points.keys())
|
||||
for endpoint_name in endpoints_shapes:
|
||||
expected_shape = endpoints_shapes[endpoint_name]
|
||||
self.assertTrue(endpoint_name in end_points)
|
||||
self.assertListEqual(end_points[endpoint_name].get_shape().as_list(),
|
||||
expected_shape)
|
||||
|
||||
def testBuildBaseNetwork(self):
|
||||
batch_size = 5
|
||||
height, width = 299, 299
|
||||
inputs = tf.random_uniform((batch_size, height, width, 3))
|
||||
net, end_points = inception.inception_v4_base(inputs)
|
||||
self.assertTrue(net.op.name.startswith(
|
||||
'InceptionV4/Mixed_7d'))
|
||||
self.assertListEqual(net.get_shape().as_list(), [batch_size, 8, 8, 1536])
|
||||
expected_endpoints = [
|
||||
'Conv2d_1a_3x3', 'Conv2d_2a_3x3', 'Conv2d_2b_3x3', 'Mixed_3a',
|
||||
'Mixed_4a', 'Mixed_5a', 'Mixed_5b', 'Mixed_5c', 'Mixed_5d',
|
||||
'Mixed_5e', 'Mixed_6a', 'Mixed_6b', 'Mixed_6c', 'Mixed_6d',
|
||||
'Mixed_6e', 'Mixed_6f', 'Mixed_6g', 'Mixed_6h', 'Mixed_7a',
|
||||
'Mixed_7b', 'Mixed_7c', 'Mixed_7d']
|
||||
self.assertItemsEqual(end_points.keys(), expected_endpoints)
|
||||
for name, op in end_points.iteritems():
|
||||
self.assertTrue(op.name.startswith('InceptionV4/' + name))
|
||||
|
||||
def testBuildOnlyUpToFinalEndpoint(self):
|
||||
batch_size = 5
|
||||
height, width = 299, 299
|
||||
all_endpoints = [
|
||||
'Conv2d_1a_3x3', 'Conv2d_2a_3x3', 'Conv2d_2b_3x3', 'Mixed_3a',
|
||||
'Mixed_4a', 'Mixed_5a', 'Mixed_5b', 'Mixed_5c', 'Mixed_5d',
|
||||
'Mixed_5e', 'Mixed_6a', 'Mixed_6b', 'Mixed_6c', 'Mixed_6d',
|
||||
'Mixed_6e', 'Mixed_6f', 'Mixed_6g', 'Mixed_6h', 'Mixed_7a',
|
||||
'Mixed_7b', 'Mixed_7c', 'Mixed_7d']
|
||||
for index, endpoint in enumerate(all_endpoints):
|
||||
with tf.Graph().as_default():
|
||||
inputs = tf.random_uniform((batch_size, height, width, 3))
|
||||
out_tensor, end_points = inception.inception_v4_base(
|
||||
inputs, final_endpoint=endpoint)
|
||||
self.assertTrue(out_tensor.op.name.startswith(
|
||||
'InceptionV4/' + endpoint))
|
||||
self.assertItemsEqual(all_endpoints[:index+1], end_points)
|
||||
|
||||
def testVariablesSetDevice(self):
|
||||
batch_size = 5
|
||||
height, width = 299, 299
|
||||
num_classes = 1000
|
||||
inputs = tf.random_uniform((batch_size, height, width, 3))
|
||||
# Force all Variables to reside on the device.
|
||||
with tf.variable_scope('on_cpu'), tf.device('/cpu:0'):
|
||||
inception.inception_v4(inputs, num_classes)
|
||||
with tf.variable_scope('on_gpu'), tf.device('/gpu:0'):
|
||||
inception.inception_v4(inputs, num_classes)
|
||||
for v in tf.get_collection(tf.GraphKeys.VARIABLES, scope='on_cpu'):
|
||||
self.assertDeviceEqual(v.device, '/cpu:0')
|
||||
for v in tf.get_collection(tf.GraphKeys.VARIABLES, scope='on_gpu'):
|
||||
self.assertDeviceEqual(v.device, '/gpu:0')
|
||||
|
||||
def testHalfSizeImages(self):
|
||||
batch_size = 5
|
||||
height, width = 150, 150
|
||||
num_classes = 1000
|
||||
inputs = tf.random_uniform((batch_size, height, width, 3))
|
||||
logits, end_points = inception.inception_v4(inputs, num_classes)
|
||||
self.assertTrue(logits.op.name.startswith('InceptionV4/Logits'))
|
||||
self.assertListEqual(logits.get_shape().as_list(),
|
||||
[batch_size, num_classes])
|
||||
pre_pool = end_points['Mixed_7d']
|
||||
self.assertListEqual(pre_pool.get_shape().as_list(),
|
||||
[batch_size, 3, 3, 1536])
|
||||
|
||||
def testUnknownBatchSize(self):
|
||||
batch_size = 1
|
||||
height, width = 299, 299
|
||||
num_classes = 1000
|
||||
with self.test_session() as sess:
|
||||
inputs = tf.placeholder(tf.float32, (None, height, width, 3))
|
||||
logits, _ = inception.inception_v4(inputs, num_classes)
|
||||
self.assertTrue(logits.op.name.startswith('InceptionV4/Logits'))
|
||||
self.assertListEqual(logits.get_shape().as_list(),
|
||||
[None, num_classes])
|
||||
images = tf.random_uniform((batch_size, height, width, 3))
|
||||
sess.run(tf.initialize_all_variables())
|
||||
output = sess.run(logits, {inputs: images.eval()})
|
||||
self.assertEquals(output.shape, (batch_size, num_classes))
|
||||
|
||||
def testEvaluation(self):
|
||||
batch_size = 2
|
||||
height, width = 299, 299
|
||||
num_classes = 1000
|
||||
with self.test_session() as sess:
|
||||
eval_inputs = tf.random_uniform((batch_size, height, width, 3))
|
||||
logits, _ = inception.inception_v4(eval_inputs,
|
||||
num_classes,
|
||||
is_training=False)
|
||||
predictions = tf.argmax(logits, 1)
|
||||
sess.run(tf.initialize_all_variables())
|
||||
output = sess.run(predictions)
|
||||
self.assertEquals(output.shape, (batch_size,))
|
||||
|
||||
def testTrainEvalWithReuse(self):
|
||||
train_batch_size = 5
|
||||
eval_batch_size = 2
|
||||
height, width = 150, 150
|
||||
num_classes = 1000
|
||||
with self.test_session() as sess:
|
||||
train_inputs = tf.random_uniform((train_batch_size, height, width, 3))
|
||||
inception.inception_v4(train_inputs, num_classes)
|
||||
eval_inputs = tf.random_uniform((eval_batch_size, height, width, 3))
|
||||
logits, _ = inception.inception_v4(eval_inputs,
|
||||
num_classes,
|
||||
is_training=False,
|
||||
reuse=True)
|
||||
predictions = tf.argmax(logits, 1)
|
||||
sess.run(tf.initialize_all_variables())
|
||||
output = sess.run(predictions)
|
||||
self.assertEquals(output.shape, (eval_batch_size,))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
tf.test.main()
|
||||
@ -0,0 +1,93 @@
|
||||
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""Contains a variant of the LeNet model definition."""
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import tensorflow as tf
|
||||
|
||||
slim = tf.contrib.slim
|
||||
|
||||
|
||||
def lenet(images, num_classes=10, is_training=False,
|
||||
dropout_keep_prob=0.5,
|
||||
prediction_fn=slim.softmax,
|
||||
scope='LeNet'):
|
||||
"""Creates a variant of the LeNet model.
|
||||
|
||||
Note that since the output is a set of 'logits', the values fall in the
|
||||
interval of (-infinity, infinity). Consequently, to convert the outputs to a
|
||||
probability distribution over the characters, one will need to convert them
|
||||
using the softmax function:
|
||||
|
||||
logits = lenet.lenet(images, is_training=False)
|
||||
probabilities = tf.nn.softmax(logits)
|
||||
predictions = tf.argmax(logits, 1)
|
||||
|
||||
Args:
|
||||
images: A batch of `Tensors` of size [batch_size, height, width, channels].
|
||||
num_classes: the number of classes in the dataset.
|
||||
is_training: specifies whether or not we're currently training the model.
|
||||
This variable will determine the behaviour of the dropout layer.
|
||||
dropout_keep_prob: the percentage of activation values that are retained.
|
||||
prediction_fn: a function to get predictions out of logits.
|
||||
scope: Optional variable_scope.
|
||||
|
||||
Returns:
|
||||
logits: the pre-softmax activations, a tensor of size
|
||||
[batch_size, `num_classes`]
|
||||
end_points: a dictionary from components of the network to the corresponding
|
||||
activation.
|
||||
"""
|
||||
end_points = {}
|
||||
|
||||
with tf.variable_scope(scope, 'LeNet', [images, num_classes]):
|
||||
net = slim.conv2d(images, 32, [5, 5], scope='conv1')
|
||||
net = slim.max_pool2d(net, [2, 2], 2, scope='pool1')
|
||||
net = slim.conv2d(net, 64, [5, 5], scope='conv2')
|
||||
net = slim.max_pool2d(net, [2, 2], 2, scope='pool2')
|
||||
net = slim.flatten(net)
|
||||
end_points['Flatten'] = net
|
||||
|
||||
net = slim.fully_connected(net, 1024, scope='fc3')
|
||||
net = slim.dropout(net, dropout_keep_prob, is_training=is_training,
|
||||
scope='dropout3')
|
||||
logits = slim.fully_connected(net, num_classes, activation_fn=None,
|
||||
scope='fc4')
|
||||
|
||||
end_points['Logits'] = logits
|
||||
end_points['Predictions'] = prediction_fn(logits, scope='Predictions')
|
||||
|
||||
return logits, end_points
|
||||
lenet.default_image_size = 28
|
||||
|
||||
|
||||
def lenet_arg_scope(weight_decay=0.0):
|
||||
"""Defines the default lenet argument scope.
|
||||
|
||||
Args:
|
||||
weight_decay: The weight decay to use for regularizing the model.
|
||||
|
||||
Returns:
|
||||
An `arg_scope` to use for the inception v3 model.
|
||||
"""
|
||||
with slim.arg_scope(
|
||||
[slim.conv2d, slim.fully_connected],
|
||||
weights_regularizer=slim.l2_regularizer(weight_decay),
|
||||
weights_initializer=tf.truncated_normal_initializer(stddev=0.1),
|
||||
activation_fn=tf.nn.relu) as sc:
|
||||
return sc
|
||||
@ -0,0 +1,109 @@
|
||||
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""Contains a factory for building various models."""
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
import functools
|
||||
|
||||
import tensorflow as tf
|
||||
|
||||
from nets import alexnet
|
||||
from nets import cifarnet
|
||||
from nets import inception
|
||||
from nets import lenet
|
||||
from nets import overfeat
|
||||
from nets import resnet_v1
|
||||
from nets import resnet_v2
|
||||
from nets import vgg
|
||||
|
||||
slim = tf.contrib.slim
|
||||
|
||||
networks_map = {'alexnet_v2': alexnet.alexnet_v2,
|
||||
'cifarnet': cifarnet.cifarnet,
|
||||
'overfeat': overfeat.overfeat,
|
||||
'vgg_a': vgg.vgg_a,
|
||||
'vgg_16': vgg.vgg_16,
|
||||
'vgg_19': vgg.vgg_19,
|
||||
'inception_v1': inception.inception_v1,
|
||||
'inception_v2': inception.inception_v2,
|
||||
'inception_v3': inception.inception_v3,
|
||||
'inception_v4': inception.inception_v4,
|
||||
'inception_resnet_v2': inception.inception_resnet_v2,
|
||||
'lenet': lenet.lenet,
|
||||
'resnet_v1_50': resnet_v1.resnet_v1_50,
|
||||
'resnet_v1_101': resnet_v1.resnet_v1_101,
|
||||
'resnet_v1_152': resnet_v1.resnet_v1_152,
|
||||
'resnet_v1_200': resnet_v1.resnet_v1_200,
|
||||
'resnet_v2_50': resnet_v2.resnet_v2_50,
|
||||
'resnet_v2_101': resnet_v2.resnet_v2_101,
|
||||
'resnet_v2_152': resnet_v2.resnet_v2_152,
|
||||
'resnet_v2_200': resnet_v2.resnet_v2_200,
|
||||
}
|
||||
|
||||
arg_scopes_map = {'alexnet_v2': alexnet.alexnet_v2_arg_scope,
|
||||
'cifarnet': cifarnet.cifarnet_arg_scope,
|
||||
'overfeat': overfeat.overfeat_arg_scope,
|
||||
'vgg_a': vgg.vgg_arg_scope,
|
||||
'vgg_16': vgg.vgg_arg_scope,
|
||||
'vgg_19': vgg.vgg_arg_scope,
|
||||
'inception_v1': inception.inception_v3_arg_scope,
|
||||
'inception_v2': inception.inception_v3_arg_scope,
|
||||
'inception_v3': inception.inception_v3_arg_scope,
|
||||
'inception_v4': inception.inception_v4_arg_scope,
|
||||
'inception_resnet_v2':
|
||||
inception.inception_resnet_v2_arg_scope,
|
||||
'lenet': lenet.lenet_arg_scope,
|
||||
'resnet_v1_50': resnet_v1.resnet_arg_scope,
|
||||
'resnet_v1_101': resnet_v1.resnet_arg_scope,
|
||||
'resnet_v1_152': resnet_v1.resnet_arg_scope,
|
||||
'resnet_v1_200': resnet_v1.resnet_arg_scope,
|
||||
'resnet_v2_50': resnet_v2.resnet_arg_scope,
|
||||
'resnet_v2_101': resnet_v2.resnet_arg_scope,
|
||||
'resnet_v2_152': resnet_v2.resnet_arg_scope,
|
||||
'resnet_v2_200': resnet_v2.resnet_arg_scope,
|
||||
}
|
||||
|
||||
|
||||
def get_network_fn(name, num_classes, weight_decay=0.0, is_training=False):
|
||||
"""Returns a network_fn such as `logits, end_points = network_fn(images)`.
|
||||
|
||||
Args:
|
||||
name: The name of the network.
|
||||
num_classes: The number of classes to use for classification.
|
||||
weight_decay: The l2 coefficient for the model weights.
|
||||
is_training: `True` if the model is being used for training and `False`
|
||||
otherwise.
|
||||
|
||||
Returns:
|
||||
network_fn: A function that applies the model to a batch of images. It has
|
||||
the following signature:
|
||||
logits, end_points = network_fn(images)
|
||||
Raises:
|
||||
ValueError: If network `name` is not recognized.
|
||||
"""
|
||||
if name not in networks_map:
|
||||
raise ValueError('Name of network unknown %s' % name)
|
||||
arg_scope = arg_scopes_map[name](weight_decay=weight_decay)
|
||||
func = networks_map[name]
|
||||
@functools.wraps(func)
|
||||
def network_fn(images, **kwargs):
|
||||
with slim.arg_scope(arg_scope):
|
||||
return func(images, num_classes, is_training=is_training, **kwargs)
|
||||
if hasattr(func, 'default_image_size'):
|
||||
network_fn.default_image_size = func.default_image_size
|
||||
|
||||
return network_fn
|
||||
@ -0,0 +1,46 @@
|
||||
# Copyright 2016 Google Inc. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
|
||||
"""Tests for slim.inception."""
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
|
||||
import tensorflow as tf
|
||||
|
||||
from nets import nets_factory
|
||||
|
||||
|
||||
class NetworksTest(tf.test.TestCase):
|
||||
|
||||
def testGetNetworkFn(self):
|
||||
batch_size = 5
|
||||
num_classes = 1000
|
||||
for net in nets_factory.networks_map:
|
||||
with self.test_session():
|
||||
net_fn = nets_factory.get_network_fn(net, num_classes)
|
||||
# Most networks use 224 as their default_image_size
|
||||
image_size = getattr(net_fn, 'default_image_size', 224)
|
||||
inputs = tf.random_uniform((batch_size, image_size, image_size, 3))
|
||||
logits, end_points = net_fn(inputs)
|
||||
self.assertTrue(isinstance(logits, tf.Tensor))
|
||||
self.assertTrue(isinstance(end_points, dict))
|
||||
self.assertEqual(logits.get_shape().as_list()[0], batch_size)
|
||||
self.assertEqual(logits.get_shape().as_list()[-1], num_classes)
|
||||
|
||||
if __name__ == '__main__':
|
||||
tf.test.main()
|
||||
@ -0,0 +1,118 @@
|
||||
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""Contains the model definition for the OverFeat network.
|
||||
|
||||
The definition for the network was obtained from:
|
||||
OverFeat: Integrated Recognition, Localization and Detection using
|
||||
Convolutional Networks
|
||||
Pierre Sermanet, David Eigen, Xiang Zhang, Michael Mathieu, Rob Fergus and
|
||||
Yann LeCun, 2014
|
||||
http://arxiv.org/abs/1312.6229
|
||||
|
||||
Usage:
|
||||
with slim.arg_scope(overfeat.overfeat_arg_scope()):
|
||||
outputs, end_points = overfeat.overfeat(inputs)
|
||||
|
||||
@@overfeat
|
||||
"""
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import tensorflow as tf
|
||||
|
||||
slim = tf.contrib.slim
|
||||
trunc_normal = lambda stddev: tf.truncated_normal_initializer(0.0, stddev)
|
||||
|
||||
|
||||
def overfeat_arg_scope(weight_decay=0.0005):
|
||||
with slim.arg_scope([slim.conv2d, slim.fully_connected],
|
||||
activation_fn=tf.nn.relu,
|
||||
weights_regularizer=slim.l2_regularizer(weight_decay),
|
||||
biases_initializer=tf.zeros_initializer):
|
||||
with slim.arg_scope([slim.conv2d], padding='SAME'):
|
||||
with slim.arg_scope([slim.max_pool2d], padding='VALID') as arg_sc:
|
||||
return arg_sc
|
||||
|
||||
|
||||
def overfeat(inputs,
|
||||
num_classes=1000,
|
||||
is_training=True,
|
||||
dropout_keep_prob=0.5,
|
||||
spatial_squeeze=True,
|
||||
scope='overfeat'):
|
||||
"""Contains the model definition for the OverFeat network.
|
||||
|
||||
The definition for the network was obtained from:
|
||||
OverFeat: Integrated Recognition, Localization and Detection using
|
||||
Convolutional Networks
|
||||
Pierre Sermanet, David Eigen, Xiang Zhang, Michael Mathieu, Rob Fergus and
|
||||
Yann LeCun, 2014
|
||||
http://arxiv.org/abs/1312.6229
|
||||
|
||||
Note: All the fully_connected layers have been transformed to conv2d layers.
|
||||
To use in classification mode, resize input to 231x231. To use in fully
|
||||
convolutional mode, set spatial_squeeze to false.
|
||||
|
||||
Args:
|
||||
inputs: a tensor of size [batch_size, height, width, channels].
|
||||
num_classes: number of predicted classes.
|
||||
is_training: whether or not the model is being trained.
|
||||
dropout_keep_prob: the probability that activations are kept in the dropout
|
||||
layers during training.
|
||||
spatial_squeeze: whether or not should squeeze the spatial dimensions of the
|
||||
outputs. Useful to remove unnecessary dimensions for classification.
|
||||
scope: Optional scope for the variables.
|
||||
|
||||
Returns:
|
||||
the last op containing the log predictions and end_points dict.
|
||||
|
||||
"""
|
||||
with tf.variable_scope(scope, 'overfeat', [inputs]) as sc:
|
||||
end_points_collection = sc.name + '_end_points'
|
||||
# Collect outputs for conv2d, fully_connected and max_pool2d
|
||||
with slim.arg_scope([slim.conv2d, slim.fully_connected, slim.max_pool2d],
|
||||
outputs_collections=end_points_collection):
|
||||
net = slim.conv2d(inputs, 64, [11, 11], 4, padding='VALID',
|
||||
scope='conv1')
|
||||
net = slim.max_pool2d(net, [2, 2], scope='pool1')
|
||||
net = slim.conv2d(net, 256, [5, 5], padding='VALID', scope='conv2')
|
||||
net = slim.max_pool2d(net, [2, 2], scope='pool2')
|
||||
net = slim.conv2d(net, 512, [3, 3], scope='conv3')
|
||||
net = slim.conv2d(net, 1024, [3, 3], scope='conv4')
|
||||
net = slim.conv2d(net, 1024, [3, 3], scope='conv5')
|
||||
net = slim.max_pool2d(net, [2, 2], scope='pool5')
|
||||
with slim.arg_scope([slim.conv2d],
|
||||
weights_initializer=trunc_normal(0.005),
|
||||
biases_initializer=tf.constant_initializer(0.1)):
|
||||
# Use conv2d instead of fully_connected layers.
|
||||
net = slim.conv2d(net, 3072, [6, 6], padding='VALID', scope='fc6')
|
||||
net = slim.dropout(net, dropout_keep_prob, is_training=is_training,
|
||||
scope='dropout6')
|
||||
net = slim.conv2d(net, 4096, [1, 1], scope='fc7')
|
||||
net = slim.dropout(net, dropout_keep_prob, is_training=is_training,
|
||||
scope='dropout7')
|
||||
net = slim.conv2d(net, num_classes, [1, 1],
|
||||
activation_fn=None,
|
||||
normalizer_fn=None,
|
||||
biases_initializer=tf.zeros_initializer,
|
||||
scope='fc8')
|
||||
# Convert end_points_collection into a end_point dict.
|
||||
end_points = slim.utils.convert_collection_to_dict(end_points_collection)
|
||||
if spatial_squeeze:
|
||||
net = tf.squeeze(net, [1, 2], name='fc8/squeezed')
|
||||
end_points[sc.name + '/fc8'] = net
|
||||
return net, end_points
|
||||
overfeat.default_image_size = 231
|
||||
@ -0,0 +1,145 @@
|
||||
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""Tests for slim.nets.overfeat."""
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import tensorflow as tf
|
||||
|
||||
from nets import overfeat
|
||||
|
||||
slim = tf.contrib.slim
|
||||
|
||||
|
||||
class OverFeatTest(tf.test.TestCase):
|
||||
|
||||
def testBuild(self):
|
||||
batch_size = 5
|
||||
height, width = 231, 231
|
||||
num_classes = 1000
|
||||
with self.test_session():
|
||||
inputs = tf.random_uniform((batch_size, height, width, 3))
|
||||
logits, _ = overfeat.overfeat(inputs, num_classes)
|
||||
self.assertEquals(logits.op.name, 'overfeat/fc8/squeezed')
|
||||
self.assertListEqual(logits.get_shape().as_list(),
|
||||
[batch_size, num_classes])
|
||||
|
||||
def testFullyConvolutional(self):
|
||||
batch_size = 1
|
||||
height, width = 281, 281
|
||||
num_classes = 1000
|
||||
with self.test_session():
|
||||
inputs = tf.random_uniform((batch_size, height, width, 3))
|
||||
logits, _ = overfeat.overfeat(inputs, num_classes, spatial_squeeze=False)
|
||||
self.assertEquals(logits.op.name, 'overfeat/fc8/BiasAdd')
|
||||
self.assertListEqual(logits.get_shape().as_list(),
|
||||
[batch_size, 2, 2, num_classes])
|
||||
|
||||
def testEndPoints(self):
|
||||
batch_size = 5
|
||||
height, width = 231, 231
|
||||
num_classes = 1000
|
||||
with self.test_session():
|
||||
inputs = tf.random_uniform((batch_size, height, width, 3))
|
||||
_, end_points = overfeat.overfeat(inputs, num_classes)
|
||||
expected_names = ['overfeat/conv1',
|
||||
'overfeat/pool1',
|
||||
'overfeat/conv2',
|
||||
'overfeat/pool2',
|
||||
'overfeat/conv3',
|
||||
'overfeat/conv4',
|
||||
'overfeat/conv5',
|
||||
'overfeat/pool5',
|
||||
'overfeat/fc6',
|
||||
'overfeat/fc7',
|
||||
'overfeat/fc8'
|
||||
]
|
||||
self.assertSetEqual(set(end_points.keys()), set(expected_names))
|
||||
|
||||
def testModelVariables(self):
|
||||
batch_size = 5
|
||||
height, width = 231, 231
|
||||
num_classes = 1000
|
||||
with self.test_session():
|
||||
inputs = tf.random_uniform((batch_size, height, width, 3))
|
||||
overfeat.overfeat(inputs, num_classes)
|
||||
expected_names = ['overfeat/conv1/weights',
|
||||
'overfeat/conv1/biases',
|
||||
'overfeat/conv2/weights',
|
||||
'overfeat/conv2/biases',
|
||||
'overfeat/conv3/weights',
|
||||
'overfeat/conv3/biases',
|
||||
'overfeat/conv4/weights',
|
||||
'overfeat/conv4/biases',
|
||||
'overfeat/conv5/weights',
|
||||
'overfeat/conv5/biases',
|
||||
'overfeat/fc6/weights',
|
||||
'overfeat/fc6/biases',
|
||||
'overfeat/fc7/weights',
|
||||
'overfeat/fc7/biases',
|
||||
'overfeat/fc8/weights',
|
||||
'overfeat/fc8/biases',
|
||||
]
|
||||
model_variables = [v.op.name for v in slim.get_model_variables()]
|
||||
self.assertSetEqual(set(model_variables), set(expected_names))
|
||||
|
||||
def testEvaluation(self):
|
||||
batch_size = 2
|
||||
height, width = 231, 231
|
||||
num_classes = 1000
|
||||
with self.test_session():
|
||||
eval_inputs = tf.random_uniform((batch_size, height, width, 3))
|
||||
logits, _ = overfeat.overfeat(eval_inputs, is_training=False)
|
||||
self.assertListEqual(logits.get_shape().as_list(),
|
||||
[batch_size, num_classes])
|
||||
predictions = tf.argmax(logits, 1)
|
||||
self.assertListEqual(predictions.get_shape().as_list(), [batch_size])
|
||||
|
||||
def testTrainEvalWithReuse(self):
|
||||
train_batch_size = 2
|
||||
eval_batch_size = 1
|
||||
train_height, train_width = 231, 231
|
||||
eval_height, eval_width = 281, 281
|
||||
num_classes = 1000
|
||||
with self.test_session():
|
||||
train_inputs = tf.random_uniform(
|
||||
(train_batch_size, train_height, train_width, 3))
|
||||
logits, _ = overfeat.overfeat(train_inputs)
|
||||
self.assertListEqual(logits.get_shape().as_list(),
|
||||
[train_batch_size, num_classes])
|
||||
tf.get_variable_scope().reuse_variables()
|
||||
eval_inputs = tf.random_uniform(
|
||||
(eval_batch_size, eval_height, eval_width, 3))
|
||||
logits, _ = overfeat.overfeat(eval_inputs, is_training=False,
|
||||
spatial_squeeze=False)
|
||||
self.assertListEqual(logits.get_shape().as_list(),
|
||||
[eval_batch_size, 2, 2, num_classes])
|
||||
logits = tf.reduce_mean(logits, [1, 2])
|
||||
predictions = tf.argmax(logits, 1)
|
||||
self.assertEquals(predictions.get_shape().as_list(), [eval_batch_size])
|
||||
|
||||
def testForward(self):
|
||||
batch_size = 1
|
||||
height, width = 231, 231
|
||||
with self.test_session() as sess:
|
||||
inputs = tf.random_uniform((batch_size, height, width, 3))
|
||||
logits, _ = overfeat.overfeat(inputs)
|
||||
sess.run(tf.initialize_all_variables())
|
||||
output = sess.run(logits)
|
||||
self.assertTrue(output.any())
|
||||
|
||||
if __name__ == '__main__':
|
||||
tf.test.main()
|
||||
@ -0,0 +1,254 @@
|
||||
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""Contains building blocks for various versions of Residual Networks.
|
||||
|
||||
Residual networks (ResNets) were proposed in:
|
||||
Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
|
||||
Deep Residual Learning for Image Recognition. arXiv:1512.03385, 2015
|
||||
|
||||
More variants were introduced in:
|
||||
Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
|
||||
Identity Mappings in Deep Residual Networks. arXiv: 1603.05027, 2016
|
||||
|
||||
We can obtain different ResNet variants by changing the network depth, width,
|
||||
and form of residual unit. This module implements the infrastructure for
|
||||
building them. Concrete ResNet units and full ResNet networks are implemented in
|
||||
the accompanying resnet_v1.py and resnet_v2.py modules.
|
||||
|
||||
Compared to https://github.com/KaimingHe/deep-residual-networks, in the current
|
||||
implementation we subsample the output activations in the last residual unit of
|
||||
each block, instead of subsampling the input activations in the first residual
|
||||
unit of each block. The two implementations give identical results but our
|
||||
implementation is more memory efficient.
|
||||
"""
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import collections
|
||||
import tensorflow as tf
|
||||
|
||||
slim = tf.contrib.slim
|
||||
|
||||
|
||||
class Block(collections.namedtuple('Block', ['scope', 'unit_fn', 'args'])):
|
||||
"""A named tuple describing a ResNet block.
|
||||
|
||||
Its parts are:
|
||||
scope: The scope of the `Block`.
|
||||
unit_fn: The ResNet unit function which takes as input a `Tensor` and
|
||||
returns another `Tensor` with the output of the ResNet unit.
|
||||
args: A list of length equal to the number of units in the `Block`. The list
|
||||
contains one (depth, depth_bottleneck, stride) tuple for each unit in the
|
||||
block to serve as argument to unit_fn.
|
||||
"""
|
||||
|
||||
|
||||
def subsample(inputs, factor, scope=None):
|
||||
"""Subsamples the input along the spatial dimensions.
|
||||
|
||||
Args:
|
||||
inputs: A `Tensor` of size [batch, height_in, width_in, channels].
|
||||
factor: The subsampling factor.
|
||||
scope: Optional variable_scope.
|
||||
|
||||
Returns:
|
||||
output: A `Tensor` of size [batch, height_out, width_out, channels] with the
|
||||
input, either intact (if factor == 1) or subsampled (if factor > 1).
|
||||
"""
|
||||
if factor == 1:
|
||||
return inputs
|
||||
else:
|
||||
return slim.max_pool2d(inputs, [1, 1], stride=factor, scope=scope)
|
||||
|
||||
|
||||
def conv2d_same(inputs, num_outputs, kernel_size, stride, rate=1, scope=None):
|
||||
"""Strided 2-D convolution with 'SAME' padding.
|
||||
|
||||
When stride > 1, then we do explicit zero-padding, followed by conv2d with
|
||||
'VALID' padding.
|
||||
|
||||
Note that
|
||||
|
||||
net = conv2d_same(inputs, num_outputs, 3, stride=stride)
|
||||
|
||||
is equivalent to
|
||||
|
||||
net = slim.conv2d(inputs, num_outputs, 3, stride=1, padding='SAME')
|
||||
net = subsample(net, factor=stride)
|
||||
|
||||
whereas
|
||||
|
||||
net = slim.conv2d(inputs, num_outputs, 3, stride=stride, padding='SAME')
|
||||
|
||||
is different when the input's height or width is even, which is why we add the
|
||||
current function. For more details, see ResnetUtilsTest.testConv2DSameEven().
|
||||
|
||||
Args:
|
||||
inputs: A 4-D tensor of size [batch, height_in, width_in, channels].
|
||||
num_outputs: An integer, the number of output filters.
|
||||
kernel_size: An int with the kernel_size of the filters.
|
||||
stride: An integer, the output stride.
|
||||
rate: An integer, rate for atrous convolution.
|
||||
scope: Scope.
|
||||
|
||||
Returns:
|
||||
output: A 4-D tensor of size [batch, height_out, width_out, channels] with
|
||||
the convolution output.
|
||||
"""
|
||||
if stride == 1:
|
||||
return slim.conv2d(inputs, num_outputs, kernel_size, stride=1, rate=rate,
|
||||
padding='SAME', scope=scope)
|
||||
else:
|
||||
kernel_size_effective = kernel_size + (kernel_size - 1) * (rate - 1)
|
||||
pad_total = kernel_size_effective - 1
|
||||
pad_beg = pad_total // 2
|
||||
pad_end = pad_total - pad_beg
|
||||
inputs = tf.pad(inputs,
|
||||
[[0, 0], [pad_beg, pad_end], [pad_beg, pad_end], [0, 0]])
|
||||
return slim.conv2d(inputs, num_outputs, kernel_size, stride=stride,
|
||||
rate=rate, padding='VALID', scope=scope)
|
||||
|
||||
|
||||
@slim.add_arg_scope
|
||||
def stack_blocks_dense(net, blocks, output_stride=None,
|
||||
outputs_collections=None):
|
||||
"""Stacks ResNet `Blocks` and controls output feature density.
|
||||
|
||||
First, this function creates scopes for the ResNet in the form of
|
||||
'block_name/unit_1', 'block_name/unit_2', etc.
|
||||
|
||||
Second, this function allows the user to explicitly control the ResNet
|
||||
output_stride, which is the ratio of the input to output spatial resolution.
|
||||
This is useful for dense prediction tasks such as semantic segmentation or
|
||||
object detection.
|
||||
|
||||
Most ResNets consist of 4 ResNet blocks and subsample the activations by a
|
||||
factor of 2 when transitioning between consecutive ResNet blocks. This results
|
||||
to a nominal ResNet output_stride equal to 8. If we set the output_stride to
|
||||
half the nominal network stride (e.g., output_stride=4), then we compute
|
||||
responses twice.
|
||||
|
||||
Control of the output feature density is implemented by atrous convolution.
|
||||
|
||||
Args:
|
||||
net: A `Tensor` of size [batch, height, width, channels].
|
||||
blocks: A list of length equal to the number of ResNet `Blocks`. Each
|
||||
element is a ResNet `Block` object describing the units in the `Block`.
|
||||
output_stride: If `None`, then the output will be computed at the nominal
|
||||
network stride. If output_stride is not `None`, it specifies the requested
|
||||
ratio of input to output spatial resolution, which needs to be equal to
|
||||
the product of unit strides from the start up to some level of the ResNet.
|
||||
For example, if the ResNet employs units with strides 1, 2, 1, 3, 4, 1,
|
||||
then valid values for the output_stride are 1, 2, 6, 24 or None (which
|
||||
is equivalent to output_stride=24).
|
||||
outputs_collections: Collection to add the ResNet block outputs.
|
||||
|
||||
Returns:
|
||||
net: Output tensor with stride equal to the specified output_stride.
|
||||
|
||||
Raises:
|
||||
ValueError: If the target output_stride is not valid.
|
||||
"""
|
||||
# The current_stride variable keeps track of the effective stride of the
|
||||
# activations. This allows us to invoke atrous convolution whenever applying
|
||||
# the next residual unit would result in the activations having stride larger
|
||||
# than the target output_stride.
|
||||
current_stride = 1
|
||||
|
||||
# The atrous convolution rate parameter.
|
||||
rate = 1
|
||||
|
||||
for block in blocks:
|
||||
with tf.variable_scope(block.scope, 'block', [net]) as sc:
|
||||
for i, unit in enumerate(block.args):
|
||||
if output_stride is not None and current_stride > output_stride:
|
||||
raise ValueError('The target output_stride cannot be reached.')
|
||||
|
||||
with tf.variable_scope('unit_%d' % (i + 1), values=[net]):
|
||||
unit_depth, unit_depth_bottleneck, unit_stride = unit
|
||||
|
||||
# If we have reached the target output_stride, then we need to employ
|
||||
# atrous convolution with stride=1 and multiply the atrous rate by the
|
||||
# current unit's stride for use in subsequent layers.
|
||||
if output_stride is not None and current_stride == output_stride:
|
||||
net = block.unit_fn(net,
|
||||
depth=unit_depth,
|
||||
depth_bottleneck=unit_depth_bottleneck,
|
||||
stride=1,
|
||||
rate=rate)
|
||||
rate *= unit_stride
|
||||
|
||||
else:
|
||||
net = block.unit_fn(net,
|
||||
depth=unit_depth,
|
||||
depth_bottleneck=unit_depth_bottleneck,
|
||||
stride=unit_stride,
|
||||
rate=1)
|
||||
current_stride *= unit_stride
|
||||
net = slim.utils.collect_named_outputs(outputs_collections, sc.name, net)
|
||||
|
||||
if output_stride is not None and current_stride != output_stride:
|
||||
raise ValueError('The target output_stride cannot be reached.')
|
||||
|
||||
return net
|
||||
|
||||
|
||||
def resnet_arg_scope(weight_decay=0.0001,
|
||||
batch_norm_decay=0.997,
|
||||
batch_norm_epsilon=1e-5,
|
||||
batch_norm_scale=True):
|
||||
"""Defines the default ResNet arg scope.
|
||||
|
||||
TODO(gpapan): The batch-normalization related default values above are
|
||||
appropriate for use in conjunction with the reference ResNet models
|
||||
released at https://github.com/KaimingHe/deep-residual-networks. When
|
||||
training ResNets from scratch, they might need to be tuned.
|
||||
|
||||
Args:
|
||||
weight_decay: The weight decay to use for regularizing the model.
|
||||
batch_norm_decay: The moving average decay when estimating layer activation
|
||||
statistics in batch normalization.
|
||||
batch_norm_epsilon: Small constant to prevent division by zero when
|
||||
normalizing activations by their variance in batch normalization.
|
||||
batch_norm_scale: If True, uses an explicit `gamma` multiplier to scale the
|
||||
activations in the batch normalization layer.
|
||||
|
||||
Returns:
|
||||
An `arg_scope` to use for the resnet models.
|
||||
"""
|
||||
batch_norm_params = {
|
||||
'decay': batch_norm_decay,
|
||||
'epsilon': batch_norm_epsilon,
|
||||
'scale': batch_norm_scale,
|
||||
'updates_collections': tf.GraphKeys.UPDATE_OPS,
|
||||
}
|
||||
|
||||
with slim.arg_scope(
|
||||
[slim.conv2d],
|
||||
weights_regularizer=slim.l2_regularizer(weight_decay),
|
||||
weights_initializer=slim.variance_scaling_initializer(),
|
||||
activation_fn=tf.nn.relu,
|
||||
normalizer_fn=slim.batch_norm,
|
||||
normalizer_params=batch_norm_params):
|
||||
with slim.arg_scope([slim.batch_norm], **batch_norm_params):
|
||||
# The following implies padding='SAME' for pool1, which makes feature
|
||||
# alignment easier for dense prediction tasks. This is also used in
|
||||
# https://github.com/facebook/fb.resnet.torch. However the accompanying
|
||||
# code of 'Deep Residual Learning for Image Recognition' uses
|
||||
# padding='VALID' for pool1. You can switch to that choice by setting
|
||||
# slim.arg_scope([slim.max_pool2d], padding='VALID').
|
||||
with slim.arg_scope([slim.max_pool2d], padding='SAME') as arg_sc:
|
||||
return arg_sc
|
||||
@ -0,0 +1,295 @@
|
||||
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""Contains definitions for the original form of Residual Networks.
|
||||
|
||||
The 'v1' residual networks (ResNets) implemented in this module were proposed
|
||||
by:
|
||||
[1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
|
||||
Deep Residual Learning for Image Recognition. arXiv:1512.03385
|
||||
|
||||
Other variants were introduced in:
|
||||
[2] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
|
||||
Identity Mappings in Deep Residual Networks. arXiv: 1603.05027
|
||||
|
||||
The networks defined in this module utilize the bottleneck building block of
|
||||
[1] with projection shortcuts only for increasing depths. They employ batch
|
||||
normalization *after* every weight layer. This is the architecture used by
|
||||
MSRA in the Imagenet and MSCOCO 2016 competition models ResNet-101 and
|
||||
ResNet-152. See [2; Fig. 1a] for a comparison between the current 'v1'
|
||||
architecture and the alternative 'v2' architecture of [2] which uses batch
|
||||
normalization *before* every weight layer in the so-called full pre-activation
|
||||
units.
|
||||
|
||||
Typical use:
|
||||
|
||||
from tensorflow.contrib.slim.nets import resnet_v1
|
||||
|
||||
ResNet-101 for image classification into 1000 classes:
|
||||
|
||||
# inputs has shape [batch, 224, 224, 3]
|
||||
with slim.arg_scope(resnet_v1.resnet_arg_scope()):
|
||||
net, end_points = resnet_v1.resnet_v1_101(inputs, 1000, is_training=False)
|
||||
|
||||
ResNet-101 for semantic segmentation into 21 classes:
|
||||
|
||||
# inputs has shape [batch, 513, 513, 3]
|
||||
with slim.arg_scope(resnet_v1.resnet_arg_scope()):
|
||||
net, end_points = resnet_v1.resnet_v1_101(inputs,
|
||||
21,
|
||||
is_training=False,
|
||||
global_pool=False,
|
||||
output_stride=16)
|
||||
"""
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import tensorflow as tf
|
||||
|
||||
from nets import resnet_utils
|
||||
|
||||
|
||||
resnet_arg_scope = resnet_utils.resnet_arg_scope
|
||||
slim = tf.contrib.slim
|
||||
|
||||
|
||||
@slim.add_arg_scope
|
||||
def bottleneck(inputs, depth, depth_bottleneck, stride, rate=1,
|
||||
outputs_collections=None, scope=None):
|
||||
"""Bottleneck residual unit variant with BN after convolutions.
|
||||
|
||||
This is the original residual unit proposed in [1]. See Fig. 1(a) of [2] for
|
||||
its definition. Note that we use here the bottleneck variant which has an
|
||||
extra bottleneck layer.
|
||||
|
||||
When putting together two consecutive ResNet blocks that use this unit, one
|
||||
should use stride = 2 in the last unit of the first block.
|
||||
|
||||
Args:
|
||||
inputs: A tensor of size [batch, height, width, channels].
|
||||
depth: The depth of the ResNet unit output.
|
||||
depth_bottleneck: The depth of the bottleneck layers.
|
||||
stride: The ResNet unit's stride. Determines the amount of downsampling of
|
||||
the units output compared to its input.
|
||||
rate: An integer, rate for atrous convolution.
|
||||
outputs_collections: Collection to add the ResNet unit output.
|
||||
scope: Optional variable_scope.
|
||||
|
||||
Returns:
|
||||
The ResNet unit's output.
|
||||
"""
|
||||
with tf.variable_scope(scope, 'bottleneck_v1', [inputs]) as sc:
|
||||
depth_in = slim.utils.last_dimension(inputs.get_shape(), min_rank=4)
|
||||
if depth == depth_in:
|
||||
shortcut = resnet_utils.subsample(inputs, stride, 'shortcut')
|
||||
else:
|
||||
shortcut = slim.conv2d(inputs, depth, [1, 1], stride=stride,
|
||||
activation_fn=None, scope='shortcut')
|
||||
|
||||
residual = slim.conv2d(inputs, depth_bottleneck, [1, 1], stride=1,
|
||||
scope='conv1')
|
||||
residual = resnet_utils.conv2d_same(residual, depth_bottleneck, 3, stride,
|
||||
rate=rate, scope='conv2')
|
||||
residual = slim.conv2d(residual, depth, [1, 1], stride=1,
|
||||
activation_fn=None, scope='conv3')
|
||||
|
||||
output = tf.nn.relu(shortcut + residual)
|
||||
|
||||
return slim.utils.collect_named_outputs(outputs_collections,
|
||||
sc.original_name_scope,
|
||||
output)
|
||||
|
||||
|
||||
def resnet_v1(inputs,
|
||||
blocks,
|
||||
num_classes=None,
|
||||
is_training=True,
|
||||
global_pool=True,
|
||||
output_stride=None,
|
||||
include_root_block=True,
|
||||
reuse=None,
|
||||
scope=None):
|
||||
"""Generator for v1 ResNet models.
|
||||
|
||||
This function generates a family of ResNet v1 models. See the resnet_v1_*()
|
||||
methods for specific model instantiations, obtained by selecting different
|
||||
block instantiations that produce ResNets of various depths.
|
||||
|
||||
Training for image classification on Imagenet is usually done with [224, 224]
|
||||
inputs, resulting in [7, 7] feature maps at the output of the last ResNet
|
||||
block for the ResNets defined in [1] that have nominal stride equal to 32.
|
||||
However, for dense prediction tasks we advise that one uses inputs with
|
||||
spatial dimensions that are multiples of 32 plus 1, e.g., [321, 321]. In
|
||||
this case the feature maps at the ResNet output will have spatial shape
|
||||
[(height - 1) / output_stride + 1, (width - 1) / output_stride + 1]
|
||||
and corners exactly aligned with the input image corners, which greatly
|
||||
facilitates alignment of the features to the image. Using as input [225, 225]
|
||||
images results in [8, 8] feature maps at the output of the last ResNet block.
|
||||
|
||||
For dense prediction tasks, the ResNet needs to run in fully-convolutional
|
||||
(FCN) mode and global_pool needs to be set to False. The ResNets in [1, 2] all
|
||||
have nominal stride equal to 32 and a good choice in FCN mode is to use
|
||||
output_stride=16 in order to increase the density of the computed features at
|
||||
small computational and memory overhead, cf. http://arxiv.org/abs/1606.00915.
|
||||
|
||||
Args:
|
||||
inputs: A tensor of size [batch, height_in, width_in, channels].
|
||||
blocks: A list of length equal to the number of ResNet blocks. Each element
|
||||
is a resnet_utils.Block object describing the units in the block.
|
||||
num_classes: Number of predicted classes for classification tasks. If None
|
||||
we return the features before the logit layer.
|
||||
is_training: whether is training or not.
|
||||
global_pool: If True, we perform global average pooling before computing the
|
||||
logits. Set to True for image classification, False for dense prediction.
|
||||
output_stride: If None, then the output will be computed at the nominal
|
||||
network stride. If output_stride is not None, it specifies the requested
|
||||
ratio of input to output spatial resolution.
|
||||
include_root_block: If True, include the initial convolution followed by
|
||||
max-pooling, if False excludes it.
|
||||
reuse: whether or not the network and its variables should be reused. To be
|
||||
able to reuse 'scope' must be given.
|
||||
scope: Optional variable_scope.
|
||||
|
||||
Returns:
|
||||
net: A rank-4 tensor of size [batch, height_out, width_out, channels_out].
|
||||
If global_pool is False, then height_out and width_out are reduced by a
|
||||
factor of output_stride compared to the respective height_in and width_in,
|
||||
else both height_out and width_out equal one. If num_classes is None, then
|
||||
net is the output of the last ResNet block, potentially after global
|
||||
average pooling. If num_classes is not None, net contains the pre-softmax
|
||||
activations.
|
||||
end_points: A dictionary from components of the network to the corresponding
|
||||
activation.
|
||||
|
||||
Raises:
|
||||
ValueError: If the target output_stride is not valid.
|
||||
"""
|
||||
with tf.variable_scope(scope, 'resnet_v1', [inputs], reuse=reuse) as sc:
|
||||
end_points_collection = sc.name + '_end_points'
|
||||
with slim.arg_scope([slim.conv2d, bottleneck,
|
||||
resnet_utils.stack_blocks_dense],
|
||||
outputs_collections=end_points_collection):
|
||||
with slim.arg_scope([slim.batch_norm], is_training=is_training):
|
||||
net = inputs
|
||||
if include_root_block:
|
||||
if output_stride is not None:
|
||||
if output_stride % 4 != 0:
|
||||
raise ValueError('The output_stride needs to be a multiple of 4.')
|
||||
output_stride /= 4
|
||||
net = resnet_utils.conv2d_same(net, 64, 7, stride=2, scope='conv1')
|
||||
net = slim.max_pool2d(net, [3, 3], stride=2, scope='pool1')
|
||||
net = resnet_utils.stack_blocks_dense(net, blocks, output_stride)
|
||||
if global_pool:
|
||||
# Global average pooling.
|
||||
net = tf.reduce_mean(net, [1, 2], name='pool5', keep_dims=True)
|
||||
if num_classes is not None:
|
||||
net = slim.conv2d(net, num_classes, [1, 1], activation_fn=None,
|
||||
normalizer_fn=None, scope='logits')
|
||||
# Convert end_points_collection into a dictionary of end_points.
|
||||
end_points = slim.utils.convert_collection_to_dict(end_points_collection)
|
||||
if num_classes is not None:
|
||||
end_points['predictions'] = slim.softmax(net, scope='predictions')
|
||||
return net, end_points
|
||||
resnet_v1.default_image_size = 224
|
||||
|
||||
|
||||
def resnet_v1_50(inputs,
|
||||
num_classes=None,
|
||||
is_training=True,
|
||||
global_pool=True,
|
||||
output_stride=None,
|
||||
reuse=None,
|
||||
scope='resnet_v1_50'):
|
||||
"""ResNet-50 model of [1]. See resnet_v1() for arg and return description."""
|
||||
blocks = [
|
||||
resnet_utils.Block(
|
||||
'block1', bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]),
|
||||
resnet_utils.Block(
|
||||
'block2', bottleneck, [(512, 128, 1)] * 3 + [(512, 128, 2)]),
|
||||
resnet_utils.Block(
|
||||
'block3', bottleneck, [(1024, 256, 1)] * 5 + [(1024, 256, 2)]),
|
||||
resnet_utils.Block(
|
||||
'block4', bottleneck, [(2048, 512, 1)] * 3)
|
||||
]
|
||||
return resnet_v1(inputs, blocks, num_classes, is_training,
|
||||
global_pool=global_pool, output_stride=output_stride,
|
||||
include_root_block=True, reuse=reuse, scope=scope)
|
||||
|
||||
|
||||
def resnet_v1_101(inputs,
|
||||
num_classes=None,
|
||||
is_training=True,
|
||||
global_pool=True,
|
||||
output_stride=None,
|
||||
reuse=None,
|
||||
scope='resnet_v1_101'):
|
||||
"""ResNet-101 model of [1]. See resnet_v1() for arg and return description."""
|
||||
blocks = [
|
||||
resnet_utils.Block(
|
||||
'block1', bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]),
|
||||
resnet_utils.Block(
|
||||
'block2', bottleneck, [(512, 128, 1)] * 3 + [(512, 128, 2)]),
|
||||
resnet_utils.Block(
|
||||
'block3', bottleneck, [(1024, 256, 1)] * 22 + [(1024, 256, 2)]),
|
||||
resnet_utils.Block(
|
||||
'block4', bottleneck, [(2048, 512, 1)] * 3)
|
||||
]
|
||||
return resnet_v1(inputs, blocks, num_classes, is_training,
|
||||
global_pool=global_pool, output_stride=output_stride,
|
||||
include_root_block=True, reuse=reuse, scope=scope)
|
||||
|
||||
|
||||
def resnet_v1_152(inputs,
|
||||
num_classes=None,
|
||||
is_training=True,
|
||||
global_pool=True,
|
||||
output_stride=None,
|
||||
reuse=None,
|
||||
scope='resnet_v1_152'):
|
||||
"""ResNet-152 model of [1]. See resnet_v1() for arg and return description."""
|
||||
blocks = [
|
||||
resnet_utils.Block(
|
||||
'block1', bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]),
|
||||
resnet_utils.Block(
|
||||
'block2', bottleneck, [(512, 128, 1)] * 7 + [(512, 128, 2)]),
|
||||
resnet_utils.Block(
|
||||
'block3', bottleneck, [(1024, 256, 1)] * 35 + [(1024, 256, 2)]),
|
||||
resnet_utils.Block(
|
||||
'block4', bottleneck, [(2048, 512, 1)] * 3)]
|
||||
return resnet_v1(inputs, blocks, num_classes, is_training,
|
||||
global_pool=global_pool, output_stride=output_stride,
|
||||
include_root_block=True, reuse=reuse, scope=scope)
|
||||
|
||||
|
||||
def resnet_v1_200(inputs,
|
||||
num_classes=None,
|
||||
is_training=True,
|
||||
global_pool=True,
|
||||
output_stride=None,
|
||||
reuse=None,
|
||||
scope='resnet_v1_200'):
|
||||
"""ResNet-200 model of [2]. See resnet_v1() for arg and return description."""
|
||||
blocks = [
|
||||
resnet_utils.Block(
|
||||
'block1', bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]),
|
||||
resnet_utils.Block(
|
||||
'block2', bottleneck, [(512, 128, 1)] * 23 + [(512, 128, 2)]),
|
||||
resnet_utils.Block(
|
||||
'block3', bottleneck, [(1024, 256, 1)] * 35 + [(1024, 256, 2)]),
|
||||
resnet_utils.Block(
|
||||
'block4', bottleneck, [(2048, 512, 1)] * 3)]
|
||||
return resnet_v1(inputs, blocks, num_classes, is_training,
|
||||
global_pool=global_pool, output_stride=output_stride,
|
||||
include_root_block=True, reuse=reuse, scope=scope)
|
||||
@ -0,0 +1,450 @@
|
||||
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""Tests for slim.nets.resnet_v1."""
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import numpy as np
|
||||
import tensorflow as tf
|
||||
|
||||
from nets import resnet_utils
|
||||
from nets import resnet_v1
|
||||
|
||||
slim = tf.contrib.slim
|
||||
|
||||
|
||||
def create_test_input(batch_size, height, width, channels):
|
||||
"""Create test input tensor.
|
||||
|
||||
Args:
|
||||
batch_size: The number of images per batch or `None` if unknown.
|
||||
height: The height of each image or `None` if unknown.
|
||||
width: The width of each image or `None` if unknown.
|
||||
channels: The number of channels per image or `None` if unknown.
|
||||
|
||||
Returns:
|
||||
Either a placeholder `Tensor` of dimension
|
||||
[batch_size, height, width, channels] if any of the inputs are `None` or a
|
||||
constant `Tensor` with the mesh grid values along the spatial dimensions.
|
||||
"""
|
||||
if None in [batch_size, height, width, channels]:
|
||||
return tf.placeholder(tf.float32, (batch_size, height, width, channels))
|
||||
else:
|
||||
return tf.to_float(
|
||||
np.tile(
|
||||
np.reshape(
|
||||
np.reshape(np.arange(height), [height, 1]) +
|
||||
np.reshape(np.arange(width), [1, width]),
|
||||
[1, height, width, 1]),
|
||||
[batch_size, 1, 1, channels]))
|
||||
|
||||
|
||||
class ResnetUtilsTest(tf.test.TestCase):
|
||||
|
||||
def testSubsampleThreeByThree(self):
|
||||
x = tf.reshape(tf.to_float(tf.range(9)), [1, 3, 3, 1])
|
||||
x = resnet_utils.subsample(x, 2)
|
||||
expected = tf.reshape(tf.constant([0, 2, 6, 8]), [1, 2, 2, 1])
|
||||
with self.test_session():
|
||||
self.assertAllClose(x.eval(), expected.eval())
|
||||
|
||||
def testSubsampleFourByFour(self):
|
||||
x = tf.reshape(tf.to_float(tf.range(16)), [1, 4, 4, 1])
|
||||
x = resnet_utils.subsample(x, 2)
|
||||
expected = tf.reshape(tf.constant([0, 2, 8, 10]), [1, 2, 2, 1])
|
||||
with self.test_session():
|
||||
self.assertAllClose(x.eval(), expected.eval())
|
||||
|
||||
def testConv2DSameEven(self):
|
||||
n, n2 = 4, 2
|
||||
|
||||
# Input image.
|
||||
x = create_test_input(1, n, n, 1)
|
||||
|
||||
# Convolution kernel.
|
||||
w = create_test_input(1, 3, 3, 1)
|
||||
w = tf.reshape(w, [3, 3, 1, 1])
|
||||
|
||||
tf.get_variable('Conv/weights', initializer=w)
|
||||
tf.get_variable('Conv/biases', initializer=tf.zeros([1]))
|
||||
tf.get_variable_scope().reuse_variables()
|
||||
|
||||
y1 = slim.conv2d(x, 1, [3, 3], stride=1, scope='Conv')
|
||||
y1_expected = tf.to_float([[14, 28, 43, 26],
|
||||
[28, 48, 66, 37],
|
||||
[43, 66, 84, 46],
|
||||
[26, 37, 46, 22]])
|
||||
y1_expected = tf.reshape(y1_expected, [1, n, n, 1])
|
||||
|
||||
y2 = resnet_utils.subsample(y1, 2)
|
||||
y2_expected = tf.to_float([[14, 43],
|
||||
[43, 84]])
|
||||
y2_expected = tf.reshape(y2_expected, [1, n2, n2, 1])
|
||||
|
||||
y3 = resnet_utils.conv2d_same(x, 1, 3, stride=2, scope='Conv')
|
||||
y3_expected = y2_expected
|
||||
|
||||
y4 = slim.conv2d(x, 1, [3, 3], stride=2, scope='Conv')
|
||||
y4_expected = tf.to_float([[48, 37],
|
||||
[37, 22]])
|
||||
y4_expected = tf.reshape(y4_expected, [1, n2, n2, 1])
|
||||
|
||||
with self.test_session() as sess:
|
||||
sess.run(tf.initialize_all_variables())
|
||||
self.assertAllClose(y1.eval(), y1_expected.eval())
|
||||
self.assertAllClose(y2.eval(), y2_expected.eval())
|
||||
self.assertAllClose(y3.eval(), y3_expected.eval())
|
||||
self.assertAllClose(y4.eval(), y4_expected.eval())
|
||||
|
||||
def testConv2DSameOdd(self):
|
||||
n, n2 = 5, 3
|
||||
|
||||
# Input image.
|
||||
x = create_test_input(1, n, n, 1)
|
||||
|
||||
# Convolution kernel.
|
||||
w = create_test_input(1, 3, 3, 1)
|
||||
w = tf.reshape(w, [3, 3, 1, 1])
|
||||
|
||||
tf.get_variable('Conv/weights', initializer=w)
|
||||
tf.get_variable('Conv/biases', initializer=tf.zeros([1]))
|
||||
tf.get_variable_scope().reuse_variables()
|
||||
|
||||
y1 = slim.conv2d(x, 1, [3, 3], stride=1, scope='Conv')
|
||||
y1_expected = tf.to_float([[14, 28, 43, 58, 34],
|
||||
[28, 48, 66, 84, 46],
|
||||
[43, 66, 84, 102, 55],
|
||||
[58, 84, 102, 120, 64],
|
||||
[34, 46, 55, 64, 30]])
|
||||
y1_expected = tf.reshape(y1_expected, [1, n, n, 1])
|
||||
|
||||
y2 = resnet_utils.subsample(y1, 2)
|
||||
y2_expected = tf.to_float([[14, 43, 34],
|
||||
[43, 84, 55],
|
||||
[34, 55, 30]])
|
||||
y2_expected = tf.reshape(y2_expected, [1, n2, n2, 1])
|
||||
|
||||
y3 = resnet_utils.conv2d_same(x, 1, 3, stride=2, scope='Conv')
|
||||
y3_expected = y2_expected
|
||||
|
||||
y4 = slim.conv2d(x, 1, [3, 3], stride=2, scope='Conv')
|
||||
y4_expected = y2_expected
|
||||
|
||||
with self.test_session() as sess:
|
||||
sess.run(tf.initialize_all_variables())
|
||||
self.assertAllClose(y1.eval(), y1_expected.eval())
|
||||
self.assertAllClose(y2.eval(), y2_expected.eval())
|
||||
self.assertAllClose(y3.eval(), y3_expected.eval())
|
||||
self.assertAllClose(y4.eval(), y4_expected.eval())
|
||||
|
||||
def _resnet_plain(self, inputs, blocks, output_stride=None, scope=None):
|
||||
"""A plain ResNet without extra layers before or after the ResNet blocks."""
|
||||
with tf.variable_scope(scope, values=[inputs]):
|
||||
with slim.arg_scope([slim.conv2d], outputs_collections='end_points'):
|
||||
net = resnet_utils.stack_blocks_dense(inputs, blocks, output_stride)
|
||||
end_points = dict(tf.get_collection('end_points'))
|
||||
return net, end_points
|
||||
|
||||
def testEndPointsV1(self):
|
||||
"""Test the end points of a tiny v1 bottleneck network."""
|
||||
bottleneck = resnet_v1.bottleneck
|
||||
blocks = [resnet_utils.Block('block1', bottleneck, [(4, 1, 1), (4, 1, 2)]),
|
||||
resnet_utils.Block('block2', bottleneck, [(8, 2, 1), (8, 2, 1)])]
|
||||
inputs = create_test_input(2, 32, 16, 3)
|
||||
with slim.arg_scope(resnet_utils.resnet_arg_scope()):
|
||||
_, end_points = self._resnet_plain(inputs, blocks, scope='tiny')
|
||||
expected = [
|
||||
'tiny/block1/unit_1/bottleneck_v1/shortcut',
|
||||
'tiny/block1/unit_1/bottleneck_v1/conv1',
|
||||
'tiny/block1/unit_1/bottleneck_v1/conv2',
|
||||
'tiny/block1/unit_1/bottleneck_v1/conv3',
|
||||
'tiny/block1/unit_2/bottleneck_v1/conv1',
|
||||
'tiny/block1/unit_2/bottleneck_v1/conv2',
|
||||
'tiny/block1/unit_2/bottleneck_v1/conv3',
|
||||
'tiny/block2/unit_1/bottleneck_v1/shortcut',
|
||||
'tiny/block2/unit_1/bottleneck_v1/conv1',
|
||||
'tiny/block2/unit_1/bottleneck_v1/conv2',
|
||||
'tiny/block2/unit_1/bottleneck_v1/conv3',
|
||||
'tiny/block2/unit_2/bottleneck_v1/conv1',
|
||||
'tiny/block2/unit_2/bottleneck_v1/conv2',
|
||||
'tiny/block2/unit_2/bottleneck_v1/conv3']
|
||||
self.assertItemsEqual(expected, end_points)
|
||||
|
||||
def _stack_blocks_nondense(self, net, blocks):
|
||||
"""A simplified ResNet Block stacker without output stride control."""
|
||||
for block in blocks:
|
||||
with tf.variable_scope(block.scope, 'block', [net]):
|
||||
for i, unit in enumerate(block.args):
|
||||
depth, depth_bottleneck, stride = unit
|
||||
with tf.variable_scope('unit_%d' % (i + 1), values=[net]):
|
||||
net = block.unit_fn(net,
|
||||
depth=depth,
|
||||
depth_bottleneck=depth_bottleneck,
|
||||
stride=stride,
|
||||
rate=1)
|
||||
return net
|
||||
|
||||
def _atrousValues(self, bottleneck):
|
||||
"""Verify the values of dense feature extraction by atrous convolution.
|
||||
|
||||
Make sure that dense feature extraction by stack_blocks_dense() followed by
|
||||
subsampling gives identical results to feature extraction at the nominal
|
||||
network output stride using the simple self._stack_blocks_nondense() above.
|
||||
|
||||
Args:
|
||||
bottleneck: The bottleneck function.
|
||||
"""
|
||||
blocks = [
|
||||
resnet_utils.Block('block1', bottleneck, [(4, 1, 1), (4, 1, 2)]),
|
||||
resnet_utils.Block('block2', bottleneck, [(8, 2, 1), (8, 2, 2)]),
|
||||
resnet_utils.Block('block3', bottleneck, [(16, 4, 1), (16, 4, 2)]),
|
||||
resnet_utils.Block('block4', bottleneck, [(32, 8, 1), (32, 8, 1)])
|
||||
]
|
||||
nominal_stride = 8
|
||||
|
||||
# Test both odd and even input dimensions.
|
||||
height = 30
|
||||
width = 31
|
||||
with slim.arg_scope(resnet_utils.resnet_arg_scope()):
|
||||
with slim.arg_scope([slim.batch_norm], is_training=False):
|
||||
for output_stride in [1, 2, 4, 8, None]:
|
||||
with tf.Graph().as_default():
|
||||
with self.test_session() as sess:
|
||||
tf.set_random_seed(0)
|
||||
inputs = create_test_input(1, height, width, 3)
|
||||
# Dense feature extraction followed by subsampling.
|
||||
output = resnet_utils.stack_blocks_dense(inputs,
|
||||
blocks,
|
||||
output_stride)
|
||||
if output_stride is None:
|
||||
factor = 1
|
||||
else:
|
||||
factor = nominal_stride // output_stride
|
||||
|
||||
output = resnet_utils.subsample(output, factor)
|
||||
# Make the two networks use the same weights.
|
||||
tf.get_variable_scope().reuse_variables()
|
||||
# Feature extraction at the nominal network rate.
|
||||
expected = self._stack_blocks_nondense(inputs, blocks)
|
||||
sess.run(tf.initialize_all_variables())
|
||||
output, expected = sess.run([output, expected])
|
||||
self.assertAllClose(output, expected, atol=1e-4, rtol=1e-4)
|
||||
|
||||
def testAtrousValuesBottleneck(self):
|
||||
self._atrousValues(resnet_v1.bottleneck)
|
||||
|
||||
|
||||
class ResnetCompleteNetworkTest(tf.test.TestCase):
|
||||
"""Tests with complete small ResNet v1 networks."""
|
||||
|
||||
def _resnet_small(self,
|
||||
inputs,
|
||||
num_classes=None,
|
||||
is_training=True,
|
||||
global_pool=True,
|
||||
output_stride=None,
|
||||
include_root_block=True,
|
||||
reuse=None,
|
||||
scope='resnet_v1_small'):
|
||||
"""A shallow and thin ResNet v1 for faster tests."""
|
||||
bottleneck = resnet_v1.bottleneck
|
||||
blocks = [
|
||||
resnet_utils.Block(
|
||||
'block1', bottleneck, [(4, 1, 1)] * 2 + [(4, 1, 2)]),
|
||||
resnet_utils.Block(
|
||||
'block2', bottleneck, [(8, 2, 1)] * 2 + [(8, 2, 2)]),
|
||||
resnet_utils.Block(
|
||||
'block3', bottleneck, [(16, 4, 1)] * 2 + [(16, 4, 2)]),
|
||||
resnet_utils.Block(
|
||||
'block4', bottleneck, [(32, 8, 1)] * 2)]
|
||||
return resnet_v1.resnet_v1(inputs, blocks, num_classes,
|
||||
is_training=is_training,
|
||||
global_pool=global_pool,
|
||||
output_stride=output_stride,
|
||||
include_root_block=include_root_block,
|
||||
reuse=reuse,
|
||||
scope=scope)
|
||||
|
||||
def testClassificationEndPoints(self):
|
||||
global_pool = True
|
||||
num_classes = 10
|
||||
inputs = create_test_input(2, 224, 224, 3)
|
||||
with slim.arg_scope(resnet_utils.resnet_arg_scope()):
|
||||
logits, end_points = self._resnet_small(inputs, num_classes,
|
||||
global_pool=global_pool,
|
||||
scope='resnet')
|
||||
self.assertTrue(logits.op.name.startswith('resnet/logits'))
|
||||
self.assertListEqual(logits.get_shape().as_list(), [2, 1, 1, num_classes])
|
||||
self.assertTrue('predictions' in end_points)
|
||||
self.assertListEqual(end_points['predictions'].get_shape().as_list(),
|
||||
[2, 1, 1, num_classes])
|
||||
|
||||
def testClassificationShapes(self):
|
||||
global_pool = True
|
||||
num_classes = 10
|
||||
inputs = create_test_input(2, 224, 224, 3)
|
||||
with slim.arg_scope(resnet_utils.resnet_arg_scope()):
|
||||
_, end_points = self._resnet_small(inputs, num_classes,
|
||||
global_pool=global_pool,
|
||||
scope='resnet')
|
||||
endpoint_to_shape = {
|
||||
'resnet/block1': [2, 28, 28, 4],
|
||||
'resnet/block2': [2, 14, 14, 8],
|
||||
'resnet/block3': [2, 7, 7, 16],
|
||||
'resnet/block4': [2, 7, 7, 32]}
|
||||
for endpoint in endpoint_to_shape:
|
||||
shape = endpoint_to_shape[endpoint]
|
||||
self.assertListEqual(end_points[endpoint].get_shape().as_list(), shape)
|
||||
|
||||
def testFullyConvolutionalEndpointShapes(self):
|
||||
global_pool = False
|
||||
num_classes = 10
|
||||
inputs = create_test_input(2, 321, 321, 3)
|
||||
with slim.arg_scope(resnet_utils.resnet_arg_scope()):
|
||||
_, end_points = self._resnet_small(inputs, num_classes,
|
||||
global_pool=global_pool,
|
||||
scope='resnet')
|
||||
endpoint_to_shape = {
|
||||
'resnet/block1': [2, 41, 41, 4],
|
||||
'resnet/block2': [2, 21, 21, 8],
|
||||
'resnet/block3': [2, 11, 11, 16],
|
||||
'resnet/block4': [2, 11, 11, 32]}
|
||||
for endpoint in endpoint_to_shape:
|
||||
shape = endpoint_to_shape[endpoint]
|
||||
self.assertListEqual(end_points[endpoint].get_shape().as_list(), shape)
|
||||
|
||||
def testRootlessFullyConvolutionalEndpointShapes(self):
|
||||
global_pool = False
|
||||
num_classes = 10
|
||||
inputs = create_test_input(2, 128, 128, 3)
|
||||
with slim.arg_scope(resnet_utils.resnet_arg_scope()):
|
||||
_, end_points = self._resnet_small(inputs, num_classes,
|
||||
global_pool=global_pool,
|
||||
include_root_block=False,
|
||||
scope='resnet')
|
||||
endpoint_to_shape = {
|
||||
'resnet/block1': [2, 64, 64, 4],
|
||||
'resnet/block2': [2, 32, 32, 8],
|
||||
'resnet/block3': [2, 16, 16, 16],
|
||||
'resnet/block4': [2, 16, 16, 32]}
|
||||
for endpoint in endpoint_to_shape:
|
||||
shape = endpoint_to_shape[endpoint]
|
||||
self.assertListEqual(end_points[endpoint].get_shape().as_list(), shape)
|
||||
|
||||
def testAtrousFullyConvolutionalEndpointShapes(self):
|
||||
global_pool = False
|
||||
num_classes = 10
|
||||
output_stride = 8
|
||||
inputs = create_test_input(2, 321, 321, 3)
|
||||
with slim.arg_scope(resnet_utils.resnet_arg_scope()):
|
||||
_, end_points = self._resnet_small(inputs,
|
||||
num_classes,
|
||||
global_pool=global_pool,
|
||||
output_stride=output_stride,
|
||||
scope='resnet')
|
||||
endpoint_to_shape = {
|
||||
'resnet/block1': [2, 41, 41, 4],
|
||||
'resnet/block2': [2, 41, 41, 8],
|
||||
'resnet/block3': [2, 41, 41, 16],
|
||||
'resnet/block4': [2, 41, 41, 32]}
|
||||
for endpoint in endpoint_to_shape:
|
||||
shape = endpoint_to_shape[endpoint]
|
||||
self.assertListEqual(end_points[endpoint].get_shape().as_list(), shape)
|
||||
|
||||
def testAtrousFullyConvolutionalValues(self):
|
||||
"""Verify dense feature extraction with atrous convolution."""
|
||||
nominal_stride = 32
|
||||
for output_stride in [4, 8, 16, 32, None]:
|
||||
with slim.arg_scope(resnet_utils.resnet_arg_scope()):
|
||||
with tf.Graph().as_default():
|
||||
with self.test_session() as sess:
|
||||
tf.set_random_seed(0)
|
||||
inputs = create_test_input(2, 81, 81, 3)
|
||||
# Dense feature extraction followed by subsampling.
|
||||
output, _ = self._resnet_small(inputs, None, is_training=False,
|
||||
global_pool=False,
|
||||
output_stride=output_stride)
|
||||
if output_stride is None:
|
||||
factor = 1
|
||||
else:
|
||||
factor = nominal_stride // output_stride
|
||||
output = resnet_utils.subsample(output, factor)
|
||||
# Make the two networks use the same weights.
|
||||
tf.get_variable_scope().reuse_variables()
|
||||
# Feature extraction at the nominal network rate.
|
||||
expected, _ = self._resnet_small(inputs, None, is_training=False,
|
||||
global_pool=False)
|
||||
sess.run(tf.initialize_all_variables())
|
||||
self.assertAllClose(output.eval(), expected.eval(),
|
||||
atol=1e-4, rtol=1e-4)
|
||||
|
||||
def testUnknownBatchSize(self):
|
||||
batch = 2
|
||||
height, width = 65, 65
|
||||
global_pool = True
|
||||
num_classes = 10
|
||||
inputs = create_test_input(None, height, width, 3)
|
||||
with slim.arg_scope(resnet_utils.resnet_arg_scope()):
|
||||
logits, _ = self._resnet_small(inputs, num_classes,
|
||||
global_pool=global_pool,
|
||||
scope='resnet')
|
||||
self.assertTrue(logits.op.name.startswith('resnet/logits'))
|
||||
self.assertListEqual(logits.get_shape().as_list(),
|
||||
[None, 1, 1, num_classes])
|
||||
images = create_test_input(batch, height, width, 3)
|
||||
with self.test_session() as sess:
|
||||
sess.run(tf.initialize_all_variables())
|
||||
output = sess.run(logits, {inputs: images.eval()})
|
||||
self.assertEqual(output.shape, (batch, 1, 1, num_classes))
|
||||
|
||||
def testFullyConvolutionalUnknownHeightWidth(self):
|
||||
batch = 2
|
||||
height, width = 65, 65
|
||||
global_pool = False
|
||||
inputs = create_test_input(batch, None, None, 3)
|
||||
with slim.arg_scope(resnet_utils.resnet_arg_scope()):
|
||||
output, _ = self._resnet_small(inputs, None, global_pool=global_pool)
|
||||
self.assertListEqual(output.get_shape().as_list(),
|
||||
[batch, None, None, 32])
|
||||
images = create_test_input(batch, height, width, 3)
|
||||
with self.test_session() as sess:
|
||||
sess.run(tf.initialize_all_variables())
|
||||
output = sess.run(output, {inputs: images.eval()})
|
||||
self.assertEqual(output.shape, (batch, 3, 3, 32))
|
||||
|
||||
def testAtrousFullyConvolutionalUnknownHeightWidth(self):
|
||||
batch = 2
|
||||
height, width = 65, 65
|
||||
global_pool = False
|
||||
output_stride = 8
|
||||
inputs = create_test_input(batch, None, None, 3)
|
||||
with slim.arg_scope(resnet_utils.resnet_arg_scope()):
|
||||
output, _ = self._resnet_small(inputs,
|
||||
None,
|
||||
global_pool=global_pool,
|
||||
output_stride=output_stride)
|
||||
self.assertListEqual(output.get_shape().as_list(),
|
||||
[batch, None, None, 32])
|
||||
images = create_test_input(batch, height, width, 3)
|
||||
with self.test_session() as sess:
|
||||
sess.run(tf.initialize_all_variables())
|
||||
output = sess.run(output, {inputs: images.eval()})
|
||||
self.assertEqual(output.shape, (batch, 9, 9, 32))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
tf.test.main()
|
||||
@ -0,0 +1,302 @@
|
||||
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""Contains definitions for the preactivation form of Residual Networks.
|
||||
|
||||
Residual networks (ResNets) were originally proposed in:
|
||||
[1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
|
||||
Deep Residual Learning for Image Recognition. arXiv:1512.03385
|
||||
|
||||
The full preactivation 'v2' ResNet variant implemented in this module was
|
||||
introduced by:
|
||||
[2] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
|
||||
Identity Mappings in Deep Residual Networks. arXiv: 1603.05027
|
||||
|
||||
The key difference of the full preactivation 'v2' variant compared to the
|
||||
'v1' variant in [1] is the use of batch normalization before every weight layer.
|
||||
Another difference is that 'v2' ResNets do not include an activation function in
|
||||
the main pathway. Also see [2; Fig. 4e].
|
||||
|
||||
Typical use:
|
||||
|
||||
from tensorflow.contrib.slim.nets import resnet_v2
|
||||
|
||||
ResNet-101 for image classification into 1000 classes:
|
||||
|
||||
# inputs has shape [batch, 224, 224, 3]
|
||||
with slim.arg_scope(resnet_v2.resnet_arg_scope()):
|
||||
net, end_points = resnet_v2.resnet_v2_101(inputs, 1000, is_training=False)
|
||||
|
||||
ResNet-101 for semantic segmentation into 21 classes:
|
||||
|
||||
# inputs has shape [batch, 513, 513, 3]
|
||||
with slim.arg_scope(resnet_v2.resnet_arg_scope(is_training)):
|
||||
net, end_points = resnet_v2.resnet_v2_101(inputs,
|
||||
21,
|
||||
is_training=False,
|
||||
global_pool=False,
|
||||
output_stride=16)
|
||||
"""
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import tensorflow as tf
|
||||
|
||||
from nets import resnet_utils
|
||||
|
||||
slim = tf.contrib.slim
|
||||
resnet_arg_scope = resnet_utils.resnet_arg_scope
|
||||
|
||||
|
||||
@slim.add_arg_scope
|
||||
def bottleneck(inputs, depth, depth_bottleneck, stride, rate=1,
|
||||
outputs_collections=None, scope=None):
|
||||
"""Bottleneck residual unit variant with BN before convolutions.
|
||||
|
||||
This is the full preactivation residual unit variant proposed in [2]. See
|
||||
Fig. 1(b) of [2] for its definition. Note that we use here the bottleneck
|
||||
variant which has an extra bottleneck layer.
|
||||
|
||||
When putting together two consecutive ResNet blocks that use this unit, one
|
||||
should use stride = 2 in the last unit of the first block.
|
||||
|
||||
Args:
|
||||
inputs: A tensor of size [batch, height, width, channels].
|
||||
depth: The depth of the ResNet unit output.
|
||||
depth_bottleneck: The depth of the bottleneck layers.
|
||||
stride: The ResNet unit's stride. Determines the amount of downsampling of
|
||||
the units output compared to its input.
|
||||
rate: An integer, rate for atrous convolution.
|
||||
outputs_collections: Collection to add the ResNet unit output.
|
||||
scope: Optional variable_scope.
|
||||
|
||||
Returns:
|
||||
The ResNet unit's output.
|
||||
"""
|
||||
with tf.variable_scope(scope, 'bottleneck_v2', [inputs]) as sc:
|
||||
depth_in = slim.utils.last_dimension(inputs.get_shape(), min_rank=4)
|
||||
preact = slim.batch_norm(inputs, activation_fn=tf.nn.relu, scope='preact')
|
||||
if depth == depth_in:
|
||||
shortcut = resnet_utils.subsample(inputs, stride, 'shortcut')
|
||||
else:
|
||||
shortcut = slim.conv2d(preact, depth, [1, 1], stride=stride,
|
||||
normalizer_fn=None, activation_fn=None,
|
||||
scope='shortcut')
|
||||
|
||||
residual = slim.conv2d(preact, depth_bottleneck, [1, 1], stride=1,
|
||||
scope='conv1')
|
||||
residual = resnet_utils.conv2d_same(residual, depth_bottleneck, 3, stride,
|
||||
rate=rate, scope='conv2')
|
||||
residual = slim.conv2d(residual, depth, [1, 1], stride=1,
|
||||
normalizer_fn=None, activation_fn=None,
|
||||
scope='conv3')
|
||||
|
||||
output = shortcut + residual
|
||||
|
||||
return slim.utils.collect_named_outputs(outputs_collections,
|
||||
sc.original_name_scope,
|
||||
output)
|
||||
|
||||
|
||||
def resnet_v2(inputs,
|
||||
blocks,
|
||||
num_classes=None,
|
||||
is_training=True,
|
||||
global_pool=True,
|
||||
output_stride=None,
|
||||
include_root_block=True,
|
||||
reuse=None,
|
||||
scope=None):
|
||||
"""Generator for v2 (preactivation) ResNet models.
|
||||
|
||||
This function generates a family of ResNet v2 models. See the resnet_v2_*()
|
||||
methods for specific model instantiations, obtained by selecting different
|
||||
block instantiations that produce ResNets of various depths.
|
||||
|
||||
Training for image classification on Imagenet is usually done with [224, 224]
|
||||
inputs, resulting in [7, 7] feature maps at the output of the last ResNet
|
||||
block for the ResNets defined in [1] that have nominal stride equal to 32.
|
||||
However, for dense prediction tasks we advise that one uses inputs with
|
||||
spatial dimensions that are multiples of 32 plus 1, e.g., [321, 321]. In
|
||||
this case the feature maps at the ResNet output will have spatial shape
|
||||
[(height - 1) / output_stride + 1, (width - 1) / output_stride + 1]
|
||||
and corners exactly aligned with the input image corners, which greatly
|
||||
facilitates alignment of the features to the image. Using as input [225, 225]
|
||||
images results in [8, 8] feature maps at the output of the last ResNet block.
|
||||
|
||||
For dense prediction tasks, the ResNet needs to run in fully-convolutional
|
||||
(FCN) mode and global_pool needs to be set to False. The ResNets in [1, 2] all
|
||||
have nominal stride equal to 32 and a good choice in FCN mode is to use
|
||||
output_stride=16 in order to increase the density of the computed features at
|
||||
small computational and memory overhead, cf. http://arxiv.org/abs/1606.00915.
|
||||
|
||||
Args:
|
||||
inputs: A tensor of size [batch, height_in, width_in, channels].
|
||||
blocks: A list of length equal to the number of ResNet blocks. Each element
|
||||
is a resnet_utils.Block object describing the units in the block.
|
||||
num_classes: Number of predicted classes for classification tasks. If None
|
||||
we return the features before the logit layer.
|
||||
is_training: whether is training or not.
|
||||
global_pool: If True, we perform global average pooling before computing the
|
||||
logits. Set to True for image classification, False for dense prediction.
|
||||
output_stride: If None, then the output will be computed at the nominal
|
||||
network stride. If output_stride is not None, it specifies the requested
|
||||
ratio of input to output spatial resolution.
|
||||
include_root_block: If True, include the initial convolution followed by
|
||||
max-pooling, if False excludes it. If excluded, `inputs` should be the
|
||||
results of an activation-less convolution.
|
||||
reuse: whether or not the network and its variables should be reused. To be
|
||||
able to reuse 'scope' must be given.
|
||||
scope: Optional variable_scope.
|
||||
|
||||
|
||||
Returns:
|
||||
net: A rank-4 tensor of size [batch, height_out, width_out, channels_out].
|
||||
If global_pool is False, then height_out and width_out are reduced by a
|
||||
factor of output_stride compared to the respective height_in and width_in,
|
||||
else both height_out and width_out equal one. If num_classes is None, then
|
||||
net is the output of the last ResNet block, potentially after global
|
||||
average pooling. If num_classes is not None, net contains the pre-softmax
|
||||
activations.
|
||||
end_points: A dictionary from components of the network to the corresponding
|
||||
activation.
|
||||
|
||||
Raises:
|
||||
ValueError: If the target output_stride is not valid.
|
||||
"""
|
||||
with tf.variable_scope(scope, 'resnet_v2', [inputs], reuse=reuse) as sc:
|
||||
end_points_collection = sc.name + '_end_points'
|
||||
with slim.arg_scope([slim.conv2d, bottleneck,
|
||||
resnet_utils.stack_blocks_dense],
|
||||
outputs_collections=end_points_collection):
|
||||
with slim.arg_scope([slim.batch_norm], is_training=is_training):
|
||||
net = inputs
|
||||
if include_root_block:
|
||||
if output_stride is not None:
|
||||
if output_stride % 4 != 0:
|
||||
raise ValueError('The output_stride needs to be a multiple of 4.')
|
||||
output_stride /= 4
|
||||
# We do not include batch normalization or activation functions in
|
||||
# conv1 because the first ResNet unit will perform these. Cf.
|
||||
# Appendix of [2].
|
||||
with slim.arg_scope([slim.conv2d],
|
||||
activation_fn=None, normalizer_fn=None):
|
||||
net = resnet_utils.conv2d_same(net, 64, 7, stride=2, scope='conv1')
|
||||
net = slim.max_pool2d(net, [3, 3], stride=2, scope='pool1')
|
||||
net = resnet_utils.stack_blocks_dense(net, blocks, output_stride)
|
||||
# This is needed because the pre-activation variant does not have batch
|
||||
# normalization or activation functions in the residual unit output. See
|
||||
# Appendix of [2].
|
||||
net = slim.batch_norm(net, activation_fn=tf.nn.relu, scope='postnorm')
|
||||
if global_pool:
|
||||
# Global average pooling.
|
||||
net = tf.reduce_mean(net, [1, 2], name='pool5', keep_dims=True)
|
||||
if num_classes is not None:
|
||||
net = slim.conv2d(net, num_classes, [1, 1], activation_fn=None,
|
||||
normalizer_fn=None, scope='logits')
|
||||
# Convert end_points_collection into a dictionary of end_points.
|
||||
end_points = slim.utils.convert_collection_to_dict(end_points_collection)
|
||||
if num_classes is not None:
|
||||
end_points['predictions'] = slim.softmax(net, scope='predictions')
|
||||
return net, end_points
|
||||
resnet_v2.default_image_size = 224
|
||||
|
||||
|
||||
def resnet_v2_50(inputs,
|
||||
num_classes=None,
|
||||
is_training=True,
|
||||
global_pool=True,
|
||||
output_stride=None,
|
||||
reuse=None,
|
||||
scope='resnet_v2_50'):
|
||||
"""ResNet-50 model of [1]. See resnet_v2() for arg and return description."""
|
||||
blocks = [
|
||||
resnet_utils.Block(
|
||||
'block1', bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]),
|
||||
resnet_utils.Block(
|
||||
'block2', bottleneck, [(512, 128, 1)] * 3 + [(512, 128, 2)]),
|
||||
resnet_utils.Block(
|
||||
'block3', bottleneck, [(1024, 256, 1)] * 5 + [(1024, 256, 2)]),
|
||||
resnet_utils.Block(
|
||||
'block4', bottleneck, [(2048, 512, 1)] * 3)]
|
||||
return resnet_v2(inputs, blocks, num_classes, is_training=is_training,
|
||||
global_pool=global_pool, output_stride=output_stride,
|
||||
include_root_block=True, reuse=reuse, scope=scope)
|
||||
|
||||
|
||||
def resnet_v2_101(inputs,
|
||||
num_classes=None,
|
||||
is_training=True,
|
||||
global_pool=True,
|
||||
output_stride=None,
|
||||
reuse=None,
|
||||
scope='resnet_v2_101'):
|
||||
"""ResNet-101 model of [1]. See resnet_v2() for arg and return description."""
|
||||
blocks = [
|
||||
resnet_utils.Block(
|
||||
'block1', bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]),
|
||||
resnet_utils.Block(
|
||||
'block2', bottleneck, [(512, 128, 1)] * 3 + [(512, 128, 2)]),
|
||||
resnet_utils.Block(
|
||||
'block3', bottleneck, [(1024, 256, 1)] * 22 + [(1024, 256, 2)]),
|
||||
resnet_utils.Block(
|
||||
'block4', bottleneck, [(2048, 512, 1)] * 3)]
|
||||
return resnet_v2(inputs, blocks, num_classes, is_training=is_training,
|
||||
global_pool=global_pool, output_stride=output_stride,
|
||||
include_root_block=True, reuse=reuse, scope=scope)
|
||||
|
||||
|
||||
def resnet_v2_152(inputs,
|
||||
num_classes=None,
|
||||
is_training=True,
|
||||
global_pool=True,
|
||||
output_stride=None,
|
||||
reuse=None,
|
||||
scope='resnet_v2_152'):
|
||||
"""ResNet-152 model of [1]. See resnet_v2() for arg and return description."""
|
||||
blocks = [
|
||||
resnet_utils.Block(
|
||||
'block1', bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]),
|
||||
resnet_utils.Block(
|
||||
'block2', bottleneck, [(512, 128, 1)] * 7 + [(512, 128, 2)]),
|
||||
resnet_utils.Block(
|
||||
'block3', bottleneck, [(1024, 256, 1)] * 35 + [(1024, 256, 2)]),
|
||||
resnet_utils.Block(
|
||||
'block4', bottleneck, [(2048, 512, 1)] * 3)]
|
||||
return resnet_v2(inputs, blocks, num_classes, is_training=is_training,
|
||||
global_pool=global_pool, output_stride=output_stride,
|
||||
include_root_block=True, reuse=reuse, scope=scope)
|
||||
|
||||
|
||||
def resnet_v2_200(inputs,
|
||||
num_classes=None,
|
||||
is_training=True,
|
||||
global_pool=True,
|
||||
output_stride=None,
|
||||
reuse=None,
|
||||
scope='resnet_v2_200'):
|
||||
"""ResNet-200 model of [2]. See resnet_v2() for arg and return description."""
|
||||
blocks = [
|
||||
resnet_utils.Block(
|
||||
'block1', bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]),
|
||||
resnet_utils.Block(
|
||||
'block2', bottleneck, [(512, 128, 1)] * 23 + [(512, 128, 2)]),
|
||||
resnet_utils.Block(
|
||||
'block3', bottleneck, [(1024, 256, 1)] * 35 + [(1024, 256, 2)]),
|
||||
resnet_utils.Block(
|
||||
'block4', bottleneck, [(2048, 512, 1)] * 3)]
|
||||
return resnet_v2(inputs, blocks, num_classes, is_training=is_training,
|
||||
global_pool=global_pool, output_stride=output_stride,
|
||||
include_root_block=True, reuse=reuse, scope=scope)
|
||||
@ -0,0 +1,453 @@
|
||||
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""Tests for slim.nets.resnet_v2."""
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import numpy as np
|
||||
import tensorflow as tf
|
||||
|
||||
from nets import resnet_utils
|
||||
from nets import resnet_v2
|
||||
|
||||
slim = tf.contrib.slim
|
||||
|
||||
|
||||
def create_test_input(batch_size, height, width, channels):
|
||||
"""Create test input tensor.
|
||||
|
||||
Args:
|
||||
batch_size: The number of images per batch or `None` if unknown.
|
||||
height: The height of each image or `None` if unknown.
|
||||
width: The width of each image or `None` if unknown.
|
||||
channels: The number of channels per image or `None` if unknown.
|
||||
|
||||
Returns:
|
||||
Either a placeholder `Tensor` of dimension
|
||||
[batch_size, height, width, channels] if any of the inputs are `None` or a
|
||||
constant `Tensor` with the mesh grid values along the spatial dimensions.
|
||||
"""
|
||||
if None in [batch_size, height, width, channels]:
|
||||
return tf.placeholder(tf.float32, (batch_size, height, width, channels))
|
||||
else:
|
||||
return tf.to_float(
|
||||
np.tile(
|
||||
np.reshape(
|
||||
np.reshape(np.arange(height), [height, 1]) +
|
||||
np.reshape(np.arange(width), [1, width]),
|
||||
[1, height, width, 1]),
|
||||
[batch_size, 1, 1, channels]))
|
||||
|
||||
|
||||
class ResnetUtilsTest(tf.test.TestCase):
|
||||
|
||||
def testSubsampleThreeByThree(self):
|
||||
x = tf.reshape(tf.to_float(tf.range(9)), [1, 3, 3, 1])
|
||||
x = resnet_utils.subsample(x, 2)
|
||||
expected = tf.reshape(tf.constant([0, 2, 6, 8]), [1, 2, 2, 1])
|
||||
with self.test_session():
|
||||
self.assertAllClose(x.eval(), expected.eval())
|
||||
|
||||
def testSubsampleFourByFour(self):
|
||||
x = tf.reshape(tf.to_float(tf.range(16)), [1, 4, 4, 1])
|
||||
x = resnet_utils.subsample(x, 2)
|
||||
expected = tf.reshape(tf.constant([0, 2, 8, 10]), [1, 2, 2, 1])
|
||||
with self.test_session():
|
||||
self.assertAllClose(x.eval(), expected.eval())
|
||||
|
||||
def testConv2DSameEven(self):
|
||||
n, n2 = 4, 2
|
||||
|
||||
# Input image.
|
||||
x = create_test_input(1, n, n, 1)
|
||||
|
||||
# Convolution kernel.
|
||||
w = create_test_input(1, 3, 3, 1)
|
||||
w = tf.reshape(w, [3, 3, 1, 1])
|
||||
|
||||
tf.get_variable('Conv/weights', initializer=w)
|
||||
tf.get_variable('Conv/biases', initializer=tf.zeros([1]))
|
||||
tf.get_variable_scope().reuse_variables()
|
||||
|
||||
y1 = slim.conv2d(x, 1, [3, 3], stride=1, scope='Conv')
|
||||
y1_expected = tf.to_float([[14, 28, 43, 26],
|
||||
[28, 48, 66, 37],
|
||||
[43, 66, 84, 46],
|
||||
[26, 37, 46, 22]])
|
||||
y1_expected = tf.reshape(y1_expected, [1, n, n, 1])
|
||||
|
||||
y2 = resnet_utils.subsample(y1, 2)
|
||||
y2_expected = tf.to_float([[14, 43],
|
||||
[43, 84]])
|
||||
y2_expected = tf.reshape(y2_expected, [1, n2, n2, 1])
|
||||
|
||||
y3 = resnet_utils.conv2d_same(x, 1, 3, stride=2, scope='Conv')
|
||||
y3_expected = y2_expected
|
||||
|
||||
y4 = slim.conv2d(x, 1, [3, 3], stride=2, scope='Conv')
|
||||
y4_expected = tf.to_float([[48, 37],
|
||||
[37, 22]])
|
||||
y4_expected = tf.reshape(y4_expected, [1, n2, n2, 1])
|
||||
|
||||
with self.test_session() as sess:
|
||||
sess.run(tf.initialize_all_variables())
|
||||
self.assertAllClose(y1.eval(), y1_expected.eval())
|
||||
self.assertAllClose(y2.eval(), y2_expected.eval())
|
||||
self.assertAllClose(y3.eval(), y3_expected.eval())
|
||||
self.assertAllClose(y4.eval(), y4_expected.eval())
|
||||
|
||||
def testConv2DSameOdd(self):
|
||||
n, n2 = 5, 3
|
||||
|
||||
# Input image.
|
||||
x = create_test_input(1, n, n, 1)
|
||||
|
||||
# Convolution kernel.
|
||||
w = create_test_input(1, 3, 3, 1)
|
||||
w = tf.reshape(w, [3, 3, 1, 1])
|
||||
|
||||
tf.get_variable('Conv/weights', initializer=w)
|
||||
tf.get_variable('Conv/biases', initializer=tf.zeros([1]))
|
||||
tf.get_variable_scope().reuse_variables()
|
||||
|
||||
y1 = slim.conv2d(x, 1, [3, 3], stride=1, scope='Conv')
|
||||
y1_expected = tf.to_float([[14, 28, 43, 58, 34],
|
||||
[28, 48, 66, 84, 46],
|
||||
[43, 66, 84, 102, 55],
|
||||
[58, 84, 102, 120, 64],
|
||||
[34, 46, 55, 64, 30]])
|
||||
y1_expected = tf.reshape(y1_expected, [1, n, n, 1])
|
||||
|
||||
y2 = resnet_utils.subsample(y1, 2)
|
||||
y2_expected = tf.to_float([[14, 43, 34],
|
||||
[43, 84, 55],
|
||||
[34, 55, 30]])
|
||||
y2_expected = tf.reshape(y2_expected, [1, n2, n2, 1])
|
||||
|
||||
y3 = resnet_utils.conv2d_same(x, 1, 3, stride=2, scope='Conv')
|
||||
y3_expected = y2_expected
|
||||
|
||||
y4 = slim.conv2d(x, 1, [3, 3], stride=2, scope='Conv')
|
||||
y4_expected = y2_expected
|
||||
|
||||
with self.test_session() as sess:
|
||||
sess.run(tf.initialize_all_variables())
|
||||
self.assertAllClose(y1.eval(), y1_expected.eval())
|
||||
self.assertAllClose(y2.eval(), y2_expected.eval())
|
||||
self.assertAllClose(y3.eval(), y3_expected.eval())
|
||||
self.assertAllClose(y4.eval(), y4_expected.eval())
|
||||
|
||||
def _resnet_plain(self, inputs, blocks, output_stride=None, scope=None):
|
||||
"""A plain ResNet without extra layers before or after the ResNet blocks."""
|
||||
with tf.variable_scope(scope, values=[inputs]):
|
||||
with slim.arg_scope([slim.conv2d], outputs_collections='end_points'):
|
||||
net = resnet_utils.stack_blocks_dense(inputs, blocks, output_stride)
|
||||
end_points = dict(tf.get_collection('end_points'))
|
||||
return net, end_points
|
||||
|
||||
def testEndPointsV2(self):
|
||||
"""Test the end points of a tiny v2 bottleneck network."""
|
||||
bottleneck = resnet_v2.bottleneck
|
||||
blocks = [resnet_utils.Block('block1', bottleneck, [(4, 1, 1), (4, 1, 2)]),
|
||||
resnet_utils.Block('block2', bottleneck, [(8, 2, 1), (8, 2, 1)])]
|
||||
inputs = create_test_input(2, 32, 16, 3)
|
||||
with slim.arg_scope(resnet_utils.resnet_arg_scope()):
|
||||
_, end_points = self._resnet_plain(inputs, blocks, scope='tiny')
|
||||
expected = [
|
||||
'tiny/block1/unit_1/bottleneck_v2/shortcut',
|
||||
'tiny/block1/unit_1/bottleneck_v2/conv1',
|
||||
'tiny/block1/unit_1/bottleneck_v2/conv2',
|
||||
'tiny/block1/unit_1/bottleneck_v2/conv3',
|
||||
'tiny/block1/unit_2/bottleneck_v2/conv1',
|
||||
'tiny/block1/unit_2/bottleneck_v2/conv2',
|
||||
'tiny/block1/unit_2/bottleneck_v2/conv3',
|
||||
'tiny/block2/unit_1/bottleneck_v2/shortcut',
|
||||
'tiny/block2/unit_1/bottleneck_v2/conv1',
|
||||
'tiny/block2/unit_1/bottleneck_v2/conv2',
|
||||
'tiny/block2/unit_1/bottleneck_v2/conv3',
|
||||
'tiny/block2/unit_2/bottleneck_v2/conv1',
|
||||
'tiny/block2/unit_2/bottleneck_v2/conv2',
|
||||
'tiny/block2/unit_2/bottleneck_v2/conv3']
|
||||
self.assertItemsEqual(expected, end_points)
|
||||
|
||||
def _stack_blocks_nondense(self, net, blocks):
|
||||
"""A simplified ResNet Block stacker without output stride control."""
|
||||
for block in blocks:
|
||||
with tf.variable_scope(block.scope, 'block', [net]):
|
||||
for i, unit in enumerate(block.args):
|
||||
depth, depth_bottleneck, stride = unit
|
||||
with tf.variable_scope('unit_%d' % (i + 1), values=[net]):
|
||||
net = block.unit_fn(net,
|
||||
depth=depth,
|
||||
depth_bottleneck=depth_bottleneck,
|
||||
stride=stride,
|
||||
rate=1)
|
||||
return net
|
||||
|
||||
def _atrousValues(self, bottleneck):
|
||||
"""Verify the values of dense feature extraction by atrous convolution.
|
||||
|
||||
Make sure that dense feature extraction by stack_blocks_dense() followed by
|
||||
subsampling gives identical results to feature extraction at the nominal
|
||||
network output stride using the simple self._stack_blocks_nondense() above.
|
||||
|
||||
Args:
|
||||
bottleneck: The bottleneck function.
|
||||
"""
|
||||
blocks = [
|
||||
resnet_utils.Block('block1', bottleneck, [(4, 1, 1), (4, 1, 2)]),
|
||||
resnet_utils.Block('block2', bottleneck, [(8, 2, 1), (8, 2, 2)]),
|
||||
resnet_utils.Block('block3', bottleneck, [(16, 4, 1), (16, 4, 2)]),
|
||||
resnet_utils.Block('block4', bottleneck, [(32, 8, 1), (32, 8, 1)])
|
||||
]
|
||||
nominal_stride = 8
|
||||
|
||||
# Test both odd and even input dimensions.
|
||||
height = 30
|
||||
width = 31
|
||||
with slim.arg_scope(resnet_utils.resnet_arg_scope()):
|
||||
with slim.arg_scope([slim.batch_norm], is_training=False):
|
||||
for output_stride in [1, 2, 4, 8, None]:
|
||||
with tf.Graph().as_default():
|
||||
with self.test_session() as sess:
|
||||
tf.set_random_seed(0)
|
||||
inputs = create_test_input(1, height, width, 3)
|
||||
# Dense feature extraction followed by subsampling.
|
||||
output = resnet_utils.stack_blocks_dense(inputs,
|
||||
blocks,
|
||||
output_stride)
|
||||
if output_stride is None:
|
||||
factor = 1
|
||||
else:
|
||||
factor = nominal_stride // output_stride
|
||||
|
||||
output = resnet_utils.subsample(output, factor)
|
||||
# Make the two networks use the same weights.
|
||||
tf.get_variable_scope().reuse_variables()
|
||||
# Feature extraction at the nominal network rate.
|
||||
expected = self._stack_blocks_nondense(inputs, blocks)
|
||||
sess.run(tf.initialize_all_variables())
|
||||
output, expected = sess.run([output, expected])
|
||||
self.assertAllClose(output, expected, atol=1e-4, rtol=1e-4)
|
||||
|
||||
def testAtrousValuesBottleneck(self):
|
||||
self._atrousValues(resnet_v2.bottleneck)
|
||||
|
||||
|
||||
class ResnetCompleteNetworkTest(tf.test.TestCase):
|
||||
"""Tests with complete small ResNet v2 networks."""
|
||||
|
||||
def _resnet_small(self,
|
||||
inputs,
|
||||
num_classes=None,
|
||||
is_training=True,
|
||||
global_pool=True,
|
||||
output_stride=None,
|
||||
include_root_block=True,
|
||||
reuse=None,
|
||||
scope='resnet_v2_small'):
|
||||
"""A shallow and thin ResNet v2 for faster tests."""
|
||||
bottleneck = resnet_v2.bottleneck
|
||||
blocks = [
|
||||
resnet_utils.Block(
|
||||
'block1', bottleneck, [(4, 1, 1)] * 2 + [(4, 1, 2)]),
|
||||
resnet_utils.Block(
|
||||
'block2', bottleneck, [(8, 2, 1)] * 2 + [(8, 2, 2)]),
|
||||
resnet_utils.Block(
|
||||
'block3', bottleneck, [(16, 4, 1)] * 2 + [(16, 4, 2)]),
|
||||
resnet_utils.Block(
|
||||
'block4', bottleneck, [(32, 8, 1)] * 2)]
|
||||
return resnet_v2.resnet_v2(inputs, blocks, num_classes,
|
||||
is_training=is_training,
|
||||
global_pool=global_pool,
|
||||
output_stride=output_stride,
|
||||
include_root_block=include_root_block,
|
||||
reuse=reuse,
|
||||
scope=scope)
|
||||
|
||||
def testClassificationEndPoints(self):
|
||||
global_pool = True
|
||||
num_classes = 10
|
||||
inputs = create_test_input(2, 224, 224, 3)
|
||||
with slim.arg_scope(resnet_utils.resnet_arg_scope()):
|
||||
logits, end_points = self._resnet_small(inputs, num_classes,
|
||||
global_pool=global_pool,
|
||||
scope='resnet')
|
||||
self.assertTrue(logits.op.name.startswith('resnet/logits'))
|
||||
self.assertListEqual(logits.get_shape().as_list(), [2, 1, 1, num_classes])
|
||||
self.assertTrue('predictions' in end_points)
|
||||
self.assertListEqual(end_points['predictions'].get_shape().as_list(),
|
||||
[2, 1, 1, num_classes])
|
||||
|
||||
def testClassificationShapes(self):
|
||||
global_pool = True
|
||||
num_classes = 10
|
||||
inputs = create_test_input(2, 224, 224, 3)
|
||||
with slim.arg_scope(resnet_utils.resnet_arg_scope()):
|
||||
_, end_points = self._resnet_small(inputs, num_classes,
|
||||
global_pool=global_pool,
|
||||
scope='resnet')
|
||||
endpoint_to_shape = {
|
||||
'resnet/block1': [2, 28, 28, 4],
|
||||
'resnet/block2': [2, 14, 14, 8],
|
||||
'resnet/block3': [2, 7, 7, 16],
|
||||
'resnet/block4': [2, 7, 7, 32]}
|
||||
for endpoint in endpoint_to_shape:
|
||||
shape = endpoint_to_shape[endpoint]
|
||||
self.assertListEqual(end_points[endpoint].get_shape().as_list(), shape)
|
||||
|
||||
def testFullyConvolutionalEndpointShapes(self):
|
||||
global_pool = False
|
||||
num_classes = 10
|
||||
inputs = create_test_input(2, 321, 321, 3)
|
||||
with slim.arg_scope(resnet_utils.resnet_arg_scope()):
|
||||
_, end_points = self._resnet_small(inputs, num_classes,
|
||||
global_pool=global_pool,
|
||||
scope='resnet')
|
||||
endpoint_to_shape = {
|
||||
'resnet/block1': [2, 41, 41, 4],
|
||||
'resnet/block2': [2, 21, 21, 8],
|
||||
'resnet/block3': [2, 11, 11, 16],
|
||||
'resnet/block4': [2, 11, 11, 32]}
|
||||
for endpoint in endpoint_to_shape:
|
||||
shape = endpoint_to_shape[endpoint]
|
||||
self.assertListEqual(end_points[endpoint].get_shape().as_list(), shape)
|
||||
|
||||
def testRootlessFullyConvolutionalEndpointShapes(self):
|
||||
global_pool = False
|
||||
num_classes = 10
|
||||
inputs = create_test_input(2, 128, 128, 3)
|
||||
with slim.arg_scope(resnet_utils.resnet_arg_scope()):
|
||||
_, end_points = self._resnet_small(inputs, num_classes,
|
||||
global_pool=global_pool,
|
||||
include_root_block=False,
|
||||
scope='resnet')
|
||||
endpoint_to_shape = {
|
||||
'resnet/block1': [2, 64, 64, 4],
|
||||
'resnet/block2': [2, 32, 32, 8],
|
||||
'resnet/block3': [2, 16, 16, 16],
|
||||
'resnet/block4': [2, 16, 16, 32]}
|
||||
for endpoint in endpoint_to_shape:
|
||||
shape = endpoint_to_shape[endpoint]
|
||||
self.assertListEqual(end_points[endpoint].get_shape().as_list(), shape)
|
||||
|
||||
def testAtrousFullyConvolutionalEndpointShapes(self):
|
||||
global_pool = False
|
||||
num_classes = 10
|
||||
output_stride = 8
|
||||
inputs = create_test_input(2, 321, 321, 3)
|
||||
with slim.arg_scope(resnet_utils.resnet_arg_scope()):
|
||||
_, end_points = self._resnet_small(inputs,
|
||||
num_classes,
|
||||
global_pool=global_pool,
|
||||
output_stride=output_stride,
|
||||
scope='resnet')
|
||||
endpoint_to_shape = {
|
||||
'resnet/block1': [2, 41, 41, 4],
|
||||
'resnet/block2': [2, 41, 41, 8],
|
||||
'resnet/block3': [2, 41, 41, 16],
|
||||
'resnet/block4': [2, 41, 41, 32]}
|
||||
for endpoint in endpoint_to_shape:
|
||||
shape = endpoint_to_shape[endpoint]
|
||||
self.assertListEqual(end_points[endpoint].get_shape().as_list(), shape)
|
||||
|
||||
def testAtrousFullyConvolutionalValues(self):
|
||||
"""Verify dense feature extraction with atrous convolution."""
|
||||
nominal_stride = 32
|
||||
for output_stride in [4, 8, 16, 32, None]:
|
||||
with slim.arg_scope(resnet_utils.resnet_arg_scope()):
|
||||
with tf.Graph().as_default():
|
||||
with self.test_session() as sess:
|
||||
tf.set_random_seed(0)
|
||||
inputs = create_test_input(2, 81, 81, 3)
|
||||
# Dense feature extraction followed by subsampling.
|
||||
output, _ = self._resnet_small(inputs, None,
|
||||
is_training=False,
|
||||
global_pool=False,
|
||||
output_stride=output_stride)
|
||||
if output_stride is None:
|
||||
factor = 1
|
||||
else:
|
||||
factor = nominal_stride // output_stride
|
||||
output = resnet_utils.subsample(output, factor)
|
||||
# Make the two networks use the same weights.
|
||||
tf.get_variable_scope().reuse_variables()
|
||||
# Feature extraction at the nominal network rate.
|
||||
expected, _ = self._resnet_small(inputs, None,
|
||||
is_training=False,
|
||||
global_pool=False)
|
||||
sess.run(tf.initialize_all_variables())
|
||||
self.assertAllClose(output.eval(), expected.eval(),
|
||||
atol=1e-4, rtol=1e-4)
|
||||
|
||||
def testUnknownBatchSize(self):
|
||||
batch = 2
|
||||
height, width = 65, 65
|
||||
global_pool = True
|
||||
num_classes = 10
|
||||
inputs = create_test_input(None, height, width, 3)
|
||||
with slim.arg_scope(resnet_utils.resnet_arg_scope()):
|
||||
logits, _ = self._resnet_small(inputs, num_classes,
|
||||
global_pool=global_pool,
|
||||
scope='resnet')
|
||||
self.assertTrue(logits.op.name.startswith('resnet/logits'))
|
||||
self.assertListEqual(logits.get_shape().as_list(),
|
||||
[None, 1, 1, num_classes])
|
||||
images = create_test_input(batch, height, width, 3)
|
||||
with self.test_session() as sess:
|
||||
sess.run(tf.initialize_all_variables())
|
||||
output = sess.run(logits, {inputs: images.eval()})
|
||||
self.assertEqual(output.shape, (batch, 1, 1, num_classes))
|
||||
|
||||
def testFullyConvolutionalUnknownHeightWidth(self):
|
||||
batch = 2
|
||||
height, width = 65, 65
|
||||
global_pool = False
|
||||
inputs = create_test_input(batch, None, None, 3)
|
||||
with slim.arg_scope(resnet_utils.resnet_arg_scope()):
|
||||
output, _ = self._resnet_small(inputs, None,
|
||||
global_pool=global_pool)
|
||||
self.assertListEqual(output.get_shape().as_list(),
|
||||
[batch, None, None, 32])
|
||||
images = create_test_input(batch, height, width, 3)
|
||||
with self.test_session() as sess:
|
||||
sess.run(tf.initialize_all_variables())
|
||||
output = sess.run(output, {inputs: images.eval()})
|
||||
self.assertEqual(output.shape, (batch, 3, 3, 32))
|
||||
|
||||
def testAtrousFullyConvolutionalUnknownHeightWidth(self):
|
||||
batch = 2
|
||||
height, width = 65, 65
|
||||
global_pool = False
|
||||
output_stride = 8
|
||||
inputs = create_test_input(batch, None, None, 3)
|
||||
with slim.arg_scope(resnet_utils.resnet_arg_scope()):
|
||||
output, _ = self._resnet_small(inputs,
|
||||
None,
|
||||
global_pool=global_pool,
|
||||
output_stride=output_stride)
|
||||
self.assertListEqual(output.get_shape().as_list(),
|
||||
[batch, None, None, 32])
|
||||
images = create_test_input(batch, height, width, 3)
|
||||
with self.test_session() as sess:
|
||||
sess.run(tf.initialize_all_variables())
|
||||
output = sess.run(output, {inputs: images.eval()})
|
||||
self.assertEqual(output.shape, (batch, 9, 9, 32))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
tf.test.main()
|
||||
@ -0,0 +1,227 @@
|
||||
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""Contains model definitions for versions of the Oxford VGG network.
|
||||
These model definitions were introduced in the following technical report:
|
||||
Very Deep Convolutional Networks For Large-Scale Image Recognition
|
||||
Karen Simonyan and Andrew Zisserman
|
||||
arXiv technical report, 2015
|
||||
PDF: http://arxiv.org/pdf/1409.1556.pdf
|
||||
ILSVRC 2014 Slides: http://www.robots.ox.ac.uk/~karen/pdf/ILSVRC_2014.pdf
|
||||
CC-BY-4.0
|
||||
More information can be obtained from the VGG website:
|
||||
www.robots.ox.ac.uk/~vgg/research/very_deep/
|
||||
Usage:
|
||||
with slim.arg_scope(vgg.vgg_arg_scope()):
|
||||
outputs, end_points = vgg.vgg_a(inputs)
|
||||
with slim.arg_scope(vgg.vgg_arg_scope()):
|
||||
outputs, end_points = vgg.vgg_16(inputs)
|
||||
@@vgg_a
|
||||
@@vgg_16
|
||||
@@vgg_19
|
||||
"""
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import tensorflow as tf
|
||||
|
||||
slim = tf.contrib.slim
|
||||
|
||||
|
||||
def vgg_arg_scope(weight_decay=0.0005):
|
||||
"""Defines the VGG arg scope.
|
||||
Args:
|
||||
weight_decay: The l2 regularization coefficient.
|
||||
Returns:
|
||||
An arg_scope.
|
||||
"""
|
||||
with slim.arg_scope([slim.conv2d, slim.fully_connected],
|
||||
activation_fn=tf.nn.relu,
|
||||
weights_regularizer=slim.l2_regularizer(weight_decay),
|
||||
biases_initializer=tf.zeros_initializer()):
|
||||
with slim.arg_scope([slim.conv2d], padding='SAME') as arg_sc:
|
||||
return arg_sc
|
||||
|
||||
|
||||
def vgg_a(inputs,
|
||||
num_classes=1000,
|
||||
is_training=True,
|
||||
dropout_keep_prob=0.5,
|
||||
spatial_squeeze=True,
|
||||
scope='vgg_a'):
|
||||
"""Oxford Net VGG 11-Layers version A Example.
|
||||
Note: All the fully_connected layers have been transformed to conv2d layers.
|
||||
To use in classification mode, resize input to 224x224.
|
||||
Args:
|
||||
inputs: a tensor of size [batch_size, height, width, channels].
|
||||
num_classes: number of predicted classes.
|
||||
is_training: whether or not the model is being trained.
|
||||
dropout_keep_prob: the probability that activations are kept in the dropout
|
||||
layers during training.
|
||||
spatial_squeeze: whether or not should squeeze the spatial dimensions of the
|
||||
outputs. Useful to remove unnecessary dimensions for classification.
|
||||
scope: Optional scope for the variables.
|
||||
Returns:
|
||||
the last op containing the log predictions and end_points dict.
|
||||
"""
|
||||
with tf.variable_scope(scope, 'vgg_a', [inputs]) as sc:
|
||||
end_points_collection = sc.name + '_end_points'
|
||||
# Collect outputs for conv2d, fully_connected and max_pool2d.
|
||||
with slim.arg_scope([slim.conv2d, slim.max_pool2d],
|
||||
outputs_collections=end_points_collection):
|
||||
net = slim.repeat(inputs, 1, slim.conv2d, 64, [3, 3], scope='conv1')
|
||||
net = slim.max_pool2d(net, [2, 2], scope='pool1')
|
||||
net = slim.repeat(net, 1, slim.conv2d, 128, [3, 3], scope='conv2')
|
||||
net = slim.max_pool2d(net, [2, 2], scope='pool2')
|
||||
net = slim.repeat(net, 2, slim.conv2d, 256, [3, 3], scope='conv3')
|
||||
net = slim.max_pool2d(net, [2, 2], scope='pool3')
|
||||
net = slim.repeat(net, 2, slim.conv2d, 512, [3, 3], scope='conv4')
|
||||
net = slim.max_pool2d(net, [2, 2], scope='pool4')
|
||||
net = slim.repeat(net, 2, slim.conv2d, 512, [3, 3], scope='conv5')
|
||||
net = slim.max_pool2d(net, [2, 2], scope='pool5')
|
||||
# Use conv2d instead of fully_connected layers.
|
||||
net = slim.conv2d(net, 4096, [7, 7], padding='VALID', scope='fc6')
|
||||
net = slim.dropout(net, dropout_keep_prob, is_training=is_training,
|
||||
scope='dropout6')
|
||||
net = slim.conv2d(net, 4096, [1, 1], scope='fc7')
|
||||
net = slim.dropout(net, dropout_keep_prob, is_training=is_training,
|
||||
scope='dropout7')
|
||||
net = slim.conv2d(net, num_classes, [1, 1],
|
||||
activation_fn=None,
|
||||
normalizer_fn=None,
|
||||
scope='fc8')
|
||||
# Convert end_points_collection into a end_point dict.
|
||||
end_points = slim.utils.convert_collection_to_dict(end_points_collection)
|
||||
if spatial_squeeze:
|
||||
net = tf.squeeze(net, [1, 2], name='fc8/squeezed')
|
||||
end_points[sc.name + '/fc8'] = net
|
||||
return net, end_points
|
||||
vgg_a.default_image_size = 224
|
||||
|
||||
|
||||
def vgg_16(inputs,
|
||||
num_classes=1000,
|
||||
is_training=True,
|
||||
dropout_keep_prob=0.5,
|
||||
spatial_squeeze=True,
|
||||
scope='vgg_16'):
|
||||
"""Oxford Net VGG 16-Layers version D Example.
|
||||
Note: All the fully_connected layers have been transformed to conv2d layers.
|
||||
To use in classification mode, resize input to 224x224.
|
||||
Args:
|
||||
inputs: a tensor of size [batch_size, height, width, channels].
|
||||
num_classes: number of predicted classes.
|
||||
is_training: whether or not the model is being trained.
|
||||
dropout_keep_prob: the probability that activations are kept in the dropout
|
||||
layers during training.
|
||||
spatial_squeeze: whether or not should squeeze the spatial dimensions of the
|
||||
outputs. Useful to remove unnecessary dimensions for classification.
|
||||
scope: Optional scope for the variables.
|
||||
Returns:
|
||||
the last op containing the log predictions and end_points dict.
|
||||
"""
|
||||
with tf.variable_scope(scope, 'vgg_16', [inputs]) as sc:
|
||||
end_points_collection = sc.name + '_end_points'
|
||||
# Collect outputs for conv2d, fully_connected and max_pool2d.
|
||||
with slim.arg_scope([slim.conv2d, slim.fully_connected, slim.max_pool2d],
|
||||
outputs_collections=end_points_collection):
|
||||
net = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], scope='conv1')
|
||||
net = slim.max_pool2d(net, [2, 2], scope='pool1')
|
||||
net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2')
|
||||
net = slim.max_pool2d(net, [2, 2], scope='pool2')
|
||||
net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], scope='conv3')
|
||||
net = slim.max_pool2d(net, [2, 2], scope='pool3')
|
||||
net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv4')
|
||||
net = slim.max_pool2d(net, [2, 2], scope='pool4')
|
||||
net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv5')
|
||||
net = slim.max_pool2d(net, [2, 2], scope='pool5')
|
||||
# Use conv2d instead of fully_connected layers.
|
||||
net = slim.conv2d(net, 4096, [7, 7], padding='VALID', scope='fc6')
|
||||
net = slim.dropout(net, dropout_keep_prob, is_training=is_training,
|
||||
scope='dropout6')
|
||||
net = slim.conv2d(net, 4096, [1, 1], scope='fc7')
|
||||
net = slim.dropout(net, dropout_keep_prob, is_training=is_training,
|
||||
scope='dropout7')
|
||||
net = slim.conv2d(net, num_classes, [1, 1],
|
||||
activation_fn=None,
|
||||
normalizer_fn=None,
|
||||
scope='fc8')
|
||||
# Convert end_points_collection into a end_point dict.
|
||||
end_points = slim.utils.convert_collection_to_dict(end_points_collection)
|
||||
if spatial_squeeze:
|
||||
net = tf.squeeze(net, [1, 2], name='fc8/squeezed')
|
||||
end_points[sc.name + '/fc8'] = net
|
||||
return net, end_points
|
||||
vgg_16.default_image_size = 224
|
||||
|
||||
|
||||
def vgg_19(inputs,
|
||||
num_classes=1000,
|
||||
is_training=True,
|
||||
dropout_keep_prob=0.5,
|
||||
spatial_squeeze=True,
|
||||
scope='vgg_19'):
|
||||
"""Oxford Net VGG 19-Layers version E Example.
|
||||
Note: All the fully_connected layers have been transformed to conv2d layers.
|
||||
To use in classification mode, resize input to 224x224.
|
||||
Args:
|
||||
inputs: a tensor of size [batch_size, height, width, channels].
|
||||
num_classes: number of predicted classes.
|
||||
is_training: whether or not the model is being trained.
|
||||
dropout_keep_prob: the probability that activations are kept in the dropout
|
||||
layers during training.
|
||||
spatial_squeeze: whether or not should squeeze the spatial dimensions of the
|
||||
outputs. Useful to remove unnecessary dimensions for classification.
|
||||
scope: Optional scope for the variables.
|
||||
Returns:
|
||||
the last op containing the log predictions and end_points dict.
|
||||
"""
|
||||
with tf.variable_scope(scope, 'vgg_19', [inputs]) as sc:
|
||||
end_points_collection = sc.name + '_end_points'
|
||||
# Collect outputs for conv2d, fully_connected and max_pool2d.
|
||||
with slim.arg_scope([slim.conv2d, slim.fully_connected, slim.max_pool2d],
|
||||
outputs_collections=end_points_collection):
|
||||
net = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], scope='conv1')
|
||||
net = slim.max_pool2d(net, [2, 2], scope='pool1')
|
||||
net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2')
|
||||
net = slim.max_pool2d(net, [2, 2], scope='pool2')
|
||||
net = slim.repeat(net, 4, slim.conv2d, 256, [3, 3], scope='conv3')
|
||||
net = slim.max_pool2d(net, [2, 2], scope='pool3')
|
||||
net = slim.repeat(net, 4, slim.conv2d, 512, [3, 3], scope='conv4')
|
||||
net = slim.max_pool2d(net, [2, 2], scope='pool4')
|
||||
net = slim.repeat(net, 4, slim.conv2d, 512, [3, 3], scope='conv5')
|
||||
net = slim.max_pool2d(net, [2, 2], scope='pool5')
|
||||
# Use conv2d instead of fully_connected layers.
|
||||
net = slim.conv2d(net, 4096, [7, 7], padding='VALID', scope='fc6')
|
||||
net = slim.dropout(net, dropout_keep_prob, is_training=is_training,
|
||||
scope='dropout6')
|
||||
net = slim.conv2d(net, 4096, [1, 1], scope='fc7')
|
||||
net = slim.dropout(net, dropout_keep_prob, is_training=is_training,
|
||||
scope='dropout7')
|
||||
net = slim.conv2d(net, num_classes, [1, 1],
|
||||
activation_fn=None,
|
||||
normalizer_fn=None,
|
||||
scope='fc8')
|
||||
# Convert end_points_collection into a end_point dict.
|
||||
end_points = slim.utils.convert_collection_to_dict(end_points_collection)
|
||||
if spatial_squeeze:
|
||||
net = tf.squeeze(net, [1, 2], name='fc8/squeezed')
|
||||
end_points[sc.name + '/fc8'] = net
|
||||
return net, end_points
|
||||
vgg_19.default_image_size = 224
|
||||
|
||||
# Alias
|
||||
vgg_d = vgg_16
|
||||
vgg_e = vgg_19
|
||||
@ -0,0 +1,455 @@
|
||||
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""Tests for slim.nets.vgg."""
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import tensorflow as tf
|
||||
|
||||
from nets import vgg
|
||||
|
||||
slim = tf.contrib.slim
|
||||
|
||||
|
||||
class VGGATest(tf.test.TestCase):
|
||||
|
||||
def testBuild(self):
|
||||
batch_size = 5
|
||||
height, width = 224, 224
|
||||
num_classes = 1000
|
||||
with self.test_session():
|
||||
inputs = tf.random_uniform((batch_size, height, width, 3))
|
||||
logits, _ = vgg.vgg_a(inputs, num_classes)
|
||||
self.assertEquals(logits.op.name, 'vgg_a/fc8/squeezed')
|
||||
self.assertListEqual(logits.get_shape().as_list(),
|
||||
[batch_size, num_classes])
|
||||
|
||||
def testFullyConvolutional(self):
|
||||
batch_size = 1
|
||||
height, width = 256, 256
|
||||
num_classes = 1000
|
||||
with self.test_session():
|
||||
inputs = tf.random_uniform((batch_size, height, width, 3))
|
||||
logits, _ = vgg.vgg_a(inputs, num_classes, spatial_squeeze=False)
|
||||
self.assertEquals(logits.op.name, 'vgg_a/fc8/BiasAdd')
|
||||
self.assertListEqual(logits.get_shape().as_list(),
|
||||
[batch_size, 2, 2, num_classes])
|
||||
|
||||
def testEndPoints(self):
|
||||
batch_size = 5
|
||||
height, width = 224, 224
|
||||
num_classes = 1000
|
||||
with self.test_session():
|
||||
inputs = tf.random_uniform((batch_size, height, width, 3))
|
||||
_, end_points = vgg.vgg_a(inputs, num_classes)
|
||||
expected_names = ['vgg_a/conv1/conv1_1',
|
||||
'vgg_a/pool1',
|
||||
'vgg_a/conv2/conv2_1',
|
||||
'vgg_a/pool2',
|
||||
'vgg_a/conv3/conv3_1',
|
||||
'vgg_a/conv3/conv3_2',
|
||||
'vgg_a/pool3',
|
||||
'vgg_a/conv4/conv4_1',
|
||||
'vgg_a/conv4/conv4_2',
|
||||
'vgg_a/pool4',
|
||||
'vgg_a/conv5/conv5_1',
|
||||
'vgg_a/conv5/conv5_2',
|
||||
'vgg_a/pool5',
|
||||
'vgg_a/fc6',
|
||||
'vgg_a/fc7',
|
||||
'vgg_a/fc8'
|
||||
]
|
||||
self.assertSetEqual(set(end_points.keys()), set(expected_names))
|
||||
|
||||
def testModelVariables(self):
|
||||
batch_size = 5
|
||||
height, width = 224, 224
|
||||
num_classes = 1000
|
||||
with self.test_session():
|
||||
inputs = tf.random_uniform((batch_size, height, width, 3))
|
||||
vgg.vgg_a(inputs, num_classes)
|
||||
expected_names = ['vgg_a/conv1/conv1_1/weights',
|
||||
'vgg_a/conv1/conv1_1/biases',
|
||||
'vgg_a/conv2/conv2_1/weights',
|
||||
'vgg_a/conv2/conv2_1/biases',
|
||||
'vgg_a/conv3/conv3_1/weights',
|
||||
'vgg_a/conv3/conv3_1/biases',
|
||||
'vgg_a/conv3/conv3_2/weights',
|
||||
'vgg_a/conv3/conv3_2/biases',
|
||||
'vgg_a/conv4/conv4_1/weights',
|
||||
'vgg_a/conv4/conv4_1/biases',
|
||||
'vgg_a/conv4/conv4_2/weights',
|
||||
'vgg_a/conv4/conv4_2/biases',
|
||||
'vgg_a/conv5/conv5_1/weights',
|
||||
'vgg_a/conv5/conv5_1/biases',
|
||||
'vgg_a/conv5/conv5_2/weights',
|
||||
'vgg_a/conv5/conv5_2/biases',
|
||||
'vgg_a/fc6/weights',
|
||||
'vgg_a/fc6/biases',
|
||||
'vgg_a/fc7/weights',
|
||||
'vgg_a/fc7/biases',
|
||||
'vgg_a/fc8/weights',
|
||||
'vgg_a/fc8/biases',
|
||||
]
|
||||
model_variables = [v.op.name for v in slim.get_model_variables()]
|
||||
self.assertSetEqual(set(model_variables), set(expected_names))
|
||||
|
||||
def testEvaluation(self):
|
||||
batch_size = 2
|
||||
height, width = 224, 224
|
||||
num_classes = 1000
|
||||
with self.test_session():
|
||||
eval_inputs = tf.random_uniform((batch_size, height, width, 3))
|
||||
logits, _ = vgg.vgg_a(eval_inputs, is_training=False)
|
||||
self.assertListEqual(logits.get_shape().as_list(),
|
||||
[batch_size, num_classes])
|
||||
predictions = tf.argmax(logits, 1)
|
||||
self.assertListEqual(predictions.get_shape().as_list(), [batch_size])
|
||||
|
||||
def testTrainEvalWithReuse(self):
|
||||
train_batch_size = 2
|
||||
eval_batch_size = 1
|
||||
train_height, train_width = 224, 224
|
||||
eval_height, eval_width = 256, 256
|
||||
num_classes = 1000
|
||||
with self.test_session():
|
||||
train_inputs = tf.random_uniform(
|
||||
(train_batch_size, train_height, train_width, 3))
|
||||
logits, _ = vgg.vgg_a(train_inputs)
|
||||
self.assertListEqual(logits.get_shape().as_list(),
|
||||
[train_batch_size, num_classes])
|
||||
tf.get_variable_scope().reuse_variables()
|
||||
eval_inputs = tf.random_uniform(
|
||||
(eval_batch_size, eval_height, eval_width, 3))
|
||||
logits, _ = vgg.vgg_a(eval_inputs, is_training=False,
|
||||
spatial_squeeze=False)
|
||||
self.assertListEqual(logits.get_shape().as_list(),
|
||||
[eval_batch_size, 2, 2, num_classes])
|
||||
logits = tf.reduce_mean(logits, [1, 2])
|
||||
predictions = tf.argmax(logits, 1)
|
||||
self.assertEquals(predictions.get_shape().as_list(), [eval_batch_size])
|
||||
|
||||
def testForward(self):
|
||||
batch_size = 1
|
||||
height, width = 224, 224
|
||||
with self.test_session() as sess:
|
||||
inputs = tf.random_uniform((batch_size, height, width, 3))
|
||||
logits, _ = vgg.vgg_a(inputs)
|
||||
sess.run(tf.initialize_all_variables())
|
||||
output = sess.run(logits)
|
||||
self.assertTrue(output.any())
|
||||
|
||||
|
||||
class VGG16Test(tf.test.TestCase):
|
||||
|
||||
def testBuild(self):
|
||||
batch_size = 5
|
||||
height, width = 224, 224
|
||||
num_classes = 1000
|
||||
with self.test_session():
|
||||
inputs = tf.random_uniform((batch_size, height, width, 3))
|
||||
logits, _ = vgg.vgg_16(inputs, num_classes)
|
||||
self.assertEquals(logits.op.name, 'vgg_16/fc8/squeezed')
|
||||
self.assertListEqual(logits.get_shape().as_list(),
|
||||
[batch_size, num_classes])
|
||||
|
||||
def testFullyConvolutional(self):
|
||||
batch_size = 1
|
||||
height, width = 256, 256
|
||||
num_classes = 1000
|
||||
with self.test_session():
|
||||
inputs = tf.random_uniform((batch_size, height, width, 3))
|
||||
logits, _ = vgg.vgg_16(inputs, num_classes, spatial_squeeze=False)
|
||||
self.assertEquals(logits.op.name, 'vgg_16/fc8/BiasAdd')
|
||||
self.assertListEqual(logits.get_shape().as_list(),
|
||||
[batch_size, 2, 2, num_classes])
|
||||
|
||||
def testEndPoints(self):
|
||||
batch_size = 5
|
||||
height, width = 224, 224
|
||||
num_classes = 1000
|
||||
with self.test_session():
|
||||
inputs = tf.random_uniform((batch_size, height, width, 3))
|
||||
_, end_points = vgg.vgg_16(inputs, num_classes)
|
||||
expected_names = ['vgg_16/conv1/conv1_1',
|
||||
'vgg_16/conv1/conv1_2',
|
||||
'vgg_16/pool1',
|
||||
'vgg_16/conv2/conv2_1',
|
||||
'vgg_16/conv2/conv2_2',
|
||||
'vgg_16/pool2',
|
||||
'vgg_16/conv3/conv3_1',
|
||||
'vgg_16/conv3/conv3_2',
|
||||
'vgg_16/conv3/conv3_3',
|
||||
'vgg_16/pool3',
|
||||
'vgg_16/conv4/conv4_1',
|
||||
'vgg_16/conv4/conv4_2',
|
||||
'vgg_16/conv4/conv4_3',
|
||||
'vgg_16/pool4',
|
||||
'vgg_16/conv5/conv5_1',
|
||||
'vgg_16/conv5/conv5_2',
|
||||
'vgg_16/conv5/conv5_3',
|
||||
'vgg_16/pool5',
|
||||
'vgg_16/fc6',
|
||||
'vgg_16/fc7',
|
||||
'vgg_16/fc8'
|
||||
]
|
||||
self.assertSetEqual(set(end_points.keys()), set(expected_names))
|
||||
|
||||
def testModelVariables(self):
|
||||
batch_size = 5
|
||||
height, width = 224, 224
|
||||
num_classes = 1000
|
||||
with self.test_session():
|
||||
inputs = tf.random_uniform((batch_size, height, width, 3))
|
||||
vgg.vgg_16(inputs, num_classes)
|
||||
expected_names = ['vgg_16/conv1/conv1_1/weights',
|
||||
'vgg_16/conv1/conv1_1/biases',
|
||||
'vgg_16/conv1/conv1_2/weights',
|
||||
'vgg_16/conv1/conv1_2/biases',
|
||||
'vgg_16/conv2/conv2_1/weights',
|
||||
'vgg_16/conv2/conv2_1/biases',
|
||||
'vgg_16/conv2/conv2_2/weights',
|
||||
'vgg_16/conv2/conv2_2/biases',
|
||||
'vgg_16/conv3/conv3_1/weights',
|
||||
'vgg_16/conv3/conv3_1/biases',
|
||||
'vgg_16/conv3/conv3_2/weights',
|
||||
'vgg_16/conv3/conv3_2/biases',
|
||||
'vgg_16/conv3/conv3_3/weights',
|
||||
'vgg_16/conv3/conv3_3/biases',
|
||||
'vgg_16/conv4/conv4_1/weights',
|
||||
'vgg_16/conv4/conv4_1/biases',
|
||||
'vgg_16/conv4/conv4_2/weights',
|
||||
'vgg_16/conv4/conv4_2/biases',
|
||||
'vgg_16/conv4/conv4_3/weights',
|
||||
'vgg_16/conv4/conv4_3/biases',
|
||||
'vgg_16/conv5/conv5_1/weights',
|
||||
'vgg_16/conv5/conv5_1/biases',
|
||||
'vgg_16/conv5/conv5_2/weights',
|
||||
'vgg_16/conv5/conv5_2/biases',
|
||||
'vgg_16/conv5/conv5_3/weights',
|
||||
'vgg_16/conv5/conv5_3/biases',
|
||||
'vgg_16/fc6/weights',
|
||||
'vgg_16/fc6/biases',
|
||||
'vgg_16/fc7/weights',
|
||||
'vgg_16/fc7/biases',
|
||||
'vgg_16/fc8/weights',
|
||||
'vgg_16/fc8/biases',
|
||||
]
|
||||
model_variables = [v.op.name for v in slim.get_model_variables()]
|
||||
self.assertSetEqual(set(model_variables), set(expected_names))
|
||||
|
||||
def testEvaluation(self):
|
||||
batch_size = 2
|
||||
height, width = 224, 224
|
||||
num_classes = 1000
|
||||
with self.test_session():
|
||||
eval_inputs = tf.random_uniform((batch_size, height, width, 3))
|
||||
logits, _ = vgg.vgg_16(eval_inputs, is_training=False)
|
||||
self.assertListEqual(logits.get_shape().as_list(),
|
||||
[batch_size, num_classes])
|
||||
predictions = tf.argmax(logits, 1)
|
||||
self.assertListEqual(predictions.get_shape().as_list(), [batch_size])
|
||||
|
||||
def testTrainEvalWithReuse(self):
|
||||
train_batch_size = 2
|
||||
eval_batch_size = 1
|
||||
train_height, train_width = 224, 224
|
||||
eval_height, eval_width = 256, 256
|
||||
num_classes = 1000
|
||||
with self.test_session():
|
||||
train_inputs = tf.random_uniform(
|
||||
(train_batch_size, train_height, train_width, 3))
|
||||
logits, _ = vgg.vgg_16(train_inputs)
|
||||
self.assertListEqual(logits.get_shape().as_list(),
|
||||
[train_batch_size, num_classes])
|
||||
tf.get_variable_scope().reuse_variables()
|
||||
eval_inputs = tf.random_uniform(
|
||||
(eval_batch_size, eval_height, eval_width, 3))
|
||||
logits, _ = vgg.vgg_16(eval_inputs, is_training=False,
|
||||
spatial_squeeze=False)
|
||||
self.assertListEqual(logits.get_shape().as_list(),
|
||||
[eval_batch_size, 2, 2, num_classes])
|
||||
logits = tf.reduce_mean(logits, [1, 2])
|
||||
predictions = tf.argmax(logits, 1)
|
||||
self.assertEquals(predictions.get_shape().as_list(), [eval_batch_size])
|
||||
|
||||
def testForward(self):
|
||||
batch_size = 1
|
||||
height, width = 224, 224
|
||||
with self.test_session() as sess:
|
||||
inputs = tf.random_uniform((batch_size, height, width, 3))
|
||||
logits, _ = vgg.vgg_16(inputs)
|
||||
sess.run(tf.initialize_all_variables())
|
||||
output = sess.run(logits)
|
||||
self.assertTrue(output.any())
|
||||
|
||||
|
||||
class VGG19Test(tf.test.TestCase):
|
||||
|
||||
def testBuild(self):
|
||||
batch_size = 5
|
||||
height, width = 224, 224
|
||||
num_classes = 1000
|
||||
with self.test_session():
|
||||
inputs = tf.random_uniform((batch_size, height, width, 3))
|
||||
logits, _ = vgg.vgg_19(inputs, num_classes)
|
||||
self.assertEquals(logits.op.name, 'vgg_19/fc8/squeezed')
|
||||
self.assertListEqual(logits.get_shape().as_list(),
|
||||
[batch_size, num_classes])
|
||||
|
||||
def testFullyConvolutional(self):
|
||||
batch_size = 1
|
||||
height, width = 256, 256
|
||||
num_classes = 1000
|
||||
with self.test_session():
|
||||
inputs = tf.random_uniform((batch_size, height, width, 3))
|
||||
logits, _ = vgg.vgg_19(inputs, num_classes, spatial_squeeze=False)
|
||||
self.assertEquals(logits.op.name, 'vgg_19/fc8/BiasAdd')
|
||||
self.assertListEqual(logits.get_shape().as_list(),
|
||||
[batch_size, 2, 2, num_classes])
|
||||
|
||||
def testEndPoints(self):
|
||||
batch_size = 5
|
||||
height, width = 224, 224
|
||||
num_classes = 1000
|
||||
with self.test_session():
|
||||
inputs = tf.random_uniform((batch_size, height, width, 3))
|
||||
_, end_points = vgg.vgg_19(inputs, num_classes)
|
||||
expected_names = [
|
||||
'vgg_19/conv1/conv1_1',
|
||||
'vgg_19/conv1/conv1_2',
|
||||
'vgg_19/pool1',
|
||||
'vgg_19/conv2/conv2_1',
|
||||
'vgg_19/conv2/conv2_2',
|
||||
'vgg_19/pool2',
|
||||
'vgg_19/conv3/conv3_1',
|
||||
'vgg_19/conv3/conv3_2',
|
||||
'vgg_19/conv3/conv3_3',
|
||||
'vgg_19/conv3/conv3_4',
|
||||
'vgg_19/pool3',
|
||||
'vgg_19/conv4/conv4_1',
|
||||
'vgg_19/conv4/conv4_2',
|
||||
'vgg_19/conv4/conv4_3',
|
||||
'vgg_19/conv4/conv4_4',
|
||||
'vgg_19/pool4',
|
||||
'vgg_19/conv5/conv5_1',
|
||||
'vgg_19/conv5/conv5_2',
|
||||
'vgg_19/conv5/conv5_3',
|
||||
'vgg_19/conv5/conv5_4',
|
||||
'vgg_19/pool5',
|
||||
'vgg_19/fc6',
|
||||
'vgg_19/fc7',
|
||||
'vgg_19/fc8'
|
||||
]
|
||||
self.assertSetEqual(set(end_points.keys()), set(expected_names))
|
||||
|
||||
def testModelVariables(self):
|
||||
batch_size = 5
|
||||
height, width = 224, 224
|
||||
num_classes = 1000
|
||||
with self.test_session():
|
||||
inputs = tf.random_uniform((batch_size, height, width, 3))
|
||||
vgg.vgg_19(inputs, num_classes)
|
||||
expected_names = [
|
||||
'vgg_19/conv1/conv1_1/weights',
|
||||
'vgg_19/conv1/conv1_1/biases',
|
||||
'vgg_19/conv1/conv1_2/weights',
|
||||
'vgg_19/conv1/conv1_2/biases',
|
||||
'vgg_19/conv2/conv2_1/weights',
|
||||
'vgg_19/conv2/conv2_1/biases',
|
||||
'vgg_19/conv2/conv2_2/weights',
|
||||
'vgg_19/conv2/conv2_2/biases',
|
||||
'vgg_19/conv3/conv3_1/weights',
|
||||
'vgg_19/conv3/conv3_1/biases',
|
||||
'vgg_19/conv3/conv3_2/weights',
|
||||
'vgg_19/conv3/conv3_2/biases',
|
||||
'vgg_19/conv3/conv3_3/weights',
|
||||
'vgg_19/conv3/conv3_3/biases',
|
||||
'vgg_19/conv3/conv3_4/weights',
|
||||
'vgg_19/conv3/conv3_4/biases',
|
||||
'vgg_19/conv4/conv4_1/weights',
|
||||
'vgg_19/conv4/conv4_1/biases',
|
||||
'vgg_19/conv4/conv4_2/weights',
|
||||
'vgg_19/conv4/conv4_2/biases',
|
||||
'vgg_19/conv4/conv4_3/weights',
|
||||
'vgg_19/conv4/conv4_3/biases',
|
||||
'vgg_19/conv4/conv4_4/weights',
|
||||
'vgg_19/conv4/conv4_4/biases',
|
||||
'vgg_19/conv5/conv5_1/weights',
|
||||
'vgg_19/conv5/conv5_1/biases',
|
||||
'vgg_19/conv5/conv5_2/weights',
|
||||
'vgg_19/conv5/conv5_2/biases',
|
||||
'vgg_19/conv5/conv5_3/weights',
|
||||
'vgg_19/conv5/conv5_3/biases',
|
||||
'vgg_19/conv5/conv5_4/weights',
|
||||
'vgg_19/conv5/conv5_4/biases',
|
||||
'vgg_19/fc6/weights',
|
||||
'vgg_19/fc6/biases',
|
||||
'vgg_19/fc7/weights',
|
||||
'vgg_19/fc7/biases',
|
||||
'vgg_19/fc8/weights',
|
||||
'vgg_19/fc8/biases',
|
||||
]
|
||||
model_variables = [v.op.name for v in slim.get_model_variables()]
|
||||
self.assertSetEqual(set(model_variables), set(expected_names))
|
||||
|
||||
def testEvaluation(self):
|
||||
batch_size = 2
|
||||
height, width = 224, 224
|
||||
num_classes = 1000
|
||||
with self.test_session():
|
||||
eval_inputs = tf.random_uniform((batch_size, height, width, 3))
|
||||
logits, _ = vgg.vgg_19(eval_inputs, is_training=False)
|
||||
self.assertListEqual(logits.get_shape().as_list(),
|
||||
[batch_size, num_classes])
|
||||
predictions = tf.argmax(logits, 1)
|
||||
self.assertListEqual(predictions.get_shape().as_list(), [batch_size])
|
||||
|
||||
def testTrainEvalWithReuse(self):
|
||||
train_batch_size = 2
|
||||
eval_batch_size = 1
|
||||
train_height, train_width = 224, 224
|
||||
eval_height, eval_width = 256, 256
|
||||
num_classes = 1000
|
||||
with self.test_session():
|
||||
train_inputs = tf.random_uniform(
|
||||
(train_batch_size, train_height, train_width, 3))
|
||||
logits, _ = vgg.vgg_19(train_inputs)
|
||||
self.assertListEqual(logits.get_shape().as_list(),
|
||||
[train_batch_size, num_classes])
|
||||
tf.get_variable_scope().reuse_variables()
|
||||
eval_inputs = tf.random_uniform(
|
||||
(eval_batch_size, eval_height, eval_width, 3))
|
||||
logits, _ = vgg.vgg_19(eval_inputs, is_training=False,
|
||||
spatial_squeeze=False)
|
||||
self.assertListEqual(logits.get_shape().as_list(),
|
||||
[eval_batch_size, 2, 2, num_classes])
|
||||
logits = tf.reduce_mean(logits, [1, 2])
|
||||
predictions = tf.argmax(logits, 1)
|
||||
self.assertEquals(predictions.get_shape().as_list(), [eval_batch_size])
|
||||
|
||||
def testForward(self):
|
||||
batch_size = 1
|
||||
height, width = 224, 224
|
||||
with self.test_session() as sess:
|
||||
inputs = tf.random_uniform((batch_size, height, width, 3))
|
||||
logits, _ = vgg.vgg_19(inputs)
|
||||
sess.run(tf.initialize_all_variables())
|
||||
output = sess.run(logits)
|
||||
self.assertTrue(output.any())
|
||||
|
||||
if __name__ == '__main__':
|
||||
tf.test.main()
|
||||
@ -0,0 +1 @@
|
||||
|
||||
@ -0,0 +1,114 @@
|
||||
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""Provides utilities to preprocess images in CIFAR-10.
|
||||
|
||||
"""
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import tensorflow.compat.v1 as tf
|
||||
tf.disable_v2_behavior()
|
||||
import tf_slim as slim
|
||||
|
||||
_PADDING = 4
|
||||
|
||||
|
||||
def preprocess_for_train(image,
|
||||
output_height,
|
||||
output_width,
|
||||
padding=_PADDING):
|
||||
"""Preprocesses the given image for training.
|
||||
|
||||
Note that the actual resizing scale is sampled from
|
||||
[`resize_size_min`, `resize_size_max`].
|
||||
|
||||
Args:
|
||||
image: A `Tensor` representing an image of arbitrary size.
|
||||
output_height: The height of the image after preprocessing.
|
||||
output_width: The width of the image after preprocessing.
|
||||
padding: The amound of padding before and after each dimension of the image.
|
||||
|
||||
Returns:
|
||||
A preprocessed image.
|
||||
"""
|
||||
tf.image_summary('image', tf.expand_dims(image, 0))
|
||||
|
||||
# Transform the image to floats.
|
||||
image = tf.to_float(image)
|
||||
if padding > 0:
|
||||
image = tf.pad(image, [[padding, padding], [padding, padding], [0, 0]])
|
||||
# Randomly crop a [height, width] section of the image.
|
||||
distorted_image = tf.random_crop(image,
|
||||
[output_height, output_width, 3])
|
||||
|
||||
# Randomly flip the image horizontally.
|
||||
distorted_image = tf.image.random_flip_left_right(distorted_image)
|
||||
|
||||
tf.image_summary('distorted_image', tf.expand_dims(distorted_image, 0))
|
||||
|
||||
# Because these operations are not commutative, consider randomizing
|
||||
# the order their operation.
|
||||
distorted_image = tf.image.random_brightness(distorted_image,
|
||||
max_delta=63)
|
||||
distorted_image = tf.image.random_contrast(distorted_image,
|
||||
lower=0.2, upper=1.8)
|
||||
# Subtract off the mean and divide by the variance of the pixels.
|
||||
return tf.image.per_image_whitening(distorted_image)
|
||||
|
||||
|
||||
def preprocess_for_eval(image, output_height, output_width):
|
||||
"""Preprocesses the given image for evaluation.
|
||||
|
||||
Args:
|
||||
image: A `Tensor` representing an image of arbitrary size.
|
||||
output_height: The height of the image after preprocessing.
|
||||
output_width: The width of the image after preprocessing.
|
||||
|
||||
Returns:
|
||||
A preprocessed image.
|
||||
"""
|
||||
tf.image_summary('image', tf.expand_dims(image, 0))
|
||||
# Transform the image to floats.
|
||||
image = tf.to_float(image)
|
||||
|
||||
# Resize and crop if needed.
|
||||
resized_image = tf.image.resize_image_with_crop_or_pad(image,
|
||||
output_width,
|
||||
output_height)
|
||||
tf.image_summary('resized_image', tf.expand_dims(resized_image, 0))
|
||||
|
||||
# Subtract off the mean and divide by the variance of the pixels.
|
||||
return tf.image.per_image_whitening(resized_image)
|
||||
|
||||
|
||||
def preprocess_image(image, output_height, output_width, is_training=False):
|
||||
"""Preprocesses the given image.
|
||||
|
||||
Args:
|
||||
image: A `Tensor` representing an image of arbitrary size.
|
||||
output_height: The height of the image after preprocessing.
|
||||
output_width: The width of the image after preprocessing.
|
||||
is_training: `True` if we're preprocessing the image for training and
|
||||
`False` otherwise.
|
||||
|
||||
Returns:
|
||||
A preprocessed image.
|
||||
"""
|
||||
if is_training:
|
||||
return preprocess_for_train(image, output_height, output_width)
|
||||
else:
|
||||
return preprocess_for_eval(image, output_height, output_width)
|
||||
@ -0,0 +1,305 @@
|
||||
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""Provides utilities to preprocess images for the Inception networks."""
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import tensorflow.compat.v1 as tf
|
||||
tf.disable_v2_behavior()
|
||||
|
||||
from tensorflow.python.ops import control_flow_ops
|
||||
|
||||
|
||||
def apply_with_random_selector(x, func, num_cases):
|
||||
"""Computes func(x, sel), with sel sampled from [0...num_cases-1].
|
||||
|
||||
Args:
|
||||
x: input Tensor.
|
||||
func: Python function to apply.
|
||||
num_cases: Python int32, number of cases to sample sel from.
|
||||
|
||||
Returns:
|
||||
The result of func(x, sel), where func receives the value of the
|
||||
selector as a python integer, but sel is sampled dynamically.
|
||||
"""
|
||||
sel = tf.random_uniform([], maxval=num_cases, dtype=tf.int32)
|
||||
# Pass the real x only to one of the func calls.
|
||||
return control_flow_ops.merge([
|
||||
func(control_flow_ops.switch(x, tf.equal(sel, case))[1], case)
|
||||
for case in range(num_cases)])[0]
|
||||
|
||||
|
||||
def distort_color(image, color_ordering=0, fast_mode=True, scope=None):
|
||||
"""Distort the color of a Tensor image.
|
||||
|
||||
Each color distortion is non-commutative and thus ordering of the color ops
|
||||
matters. Ideally we would randomly permute the ordering of the color ops.
|
||||
Rather then adding that level of complication, we select a distinct ordering
|
||||
of color ops for each preprocessing thread.
|
||||
|
||||
Args:
|
||||
image: 3-D Tensor containing single image in [0, 1].
|
||||
color_ordering: Python int, a type of distortion (valid values: 0-3).
|
||||
fast_mode: Avoids slower ops (random_hue and random_contrast)
|
||||
scope: Optional scope for name_scope.
|
||||
Returns:
|
||||
3-D Tensor color-distorted image on range [0, 1]
|
||||
Raises:
|
||||
ValueError: if color_ordering not in [0, 3]
|
||||
"""
|
||||
with tf.name_scope(scope, 'distort_color', [image]):
|
||||
if fast_mode:
|
||||
if color_ordering == 0:
|
||||
image = tf.image.random_brightness(image, max_delta=32. / 255.)
|
||||
image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
|
||||
else:
|
||||
image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
|
||||
image = tf.image.random_brightness(image, max_delta=32. / 255.)
|
||||
else:
|
||||
if color_ordering == 0:
|
||||
image = tf.image.random_brightness(image, max_delta=32. / 255.)
|
||||
image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
|
||||
image = tf.image.random_hue(image, max_delta=0.2)
|
||||
image = tf.image.random_contrast(image, lower=0.5, upper=1.5)
|
||||
elif color_ordering == 1:
|
||||
image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
|
||||
image = tf.image.random_brightness(image, max_delta=32. / 255.)
|
||||
image = tf.image.random_contrast(image, lower=0.5, upper=1.5)
|
||||
image = tf.image.random_hue(image, max_delta=0.2)
|
||||
elif color_ordering == 2:
|
||||
image = tf.image.random_contrast(image, lower=0.5, upper=1.5)
|
||||
image = tf.image.random_hue(image, max_delta=0.2)
|
||||
image = tf.image.random_brightness(image, max_delta=32. / 255.)
|
||||
image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
|
||||
elif color_ordering == 3:
|
||||
image = tf.image.random_hue(image, max_delta=0.2)
|
||||
image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
|
||||
image = tf.image.random_contrast(image, lower=0.5, upper=1.5)
|
||||
image = tf.image.random_brightness(image, max_delta=32. / 255.)
|
||||
else:
|
||||
raise ValueError('color_ordering must be in [0, 3]')
|
||||
|
||||
# The random_* ops do not necessarily clamp.
|
||||
return tf.clip_by_value(image, 0.0, 1.0)
|
||||
|
||||
|
||||
def distorted_bounding_box_crop(image,
|
||||
bbox,
|
||||
min_object_covered=0.1,
|
||||
aspect_ratio_range=(0.75, 1.33),
|
||||
area_range=(0.05, 1.0),
|
||||
max_attempts=100,
|
||||
scope=None):
|
||||
"""Generates cropped_image using a one of the bboxes randomly distorted.
|
||||
|
||||
See `tf.image.sample_distorted_bounding_box` for more documentation.
|
||||
|
||||
Args:
|
||||
image: 3-D Tensor of image (it will be converted to floats in [0, 1]).
|
||||
bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords]
|
||||
where each coordinate is [0, 1) and the coordinates are arranged
|
||||
as [ymin, xmin, ymax, xmax]. If num_boxes is 0 then it would use the whole
|
||||
image.
|
||||
min_object_covered: An optional `float`. Defaults to `0.1`. The cropped
|
||||
area of the image must contain at least this fraction of any bounding box
|
||||
supplied.
|
||||
aspect_ratio_range: An optional list of `floats`. The cropped area of the
|
||||
image must have an aspect ratio = width / height within this range.
|
||||
area_range: An optional list of `floats`. The cropped area of the image
|
||||
must contain a fraction of the supplied image within in this range.
|
||||
max_attempts: An optional `int`. Number of attempts at generating a cropped
|
||||
region of the image of the specified constraints. After `max_attempts`
|
||||
failures, return the entire image.
|
||||
scope: Optional scope for name_scope.
|
||||
Returns:
|
||||
A tuple, a 3-D Tensor cropped_image and the distorted bbox
|
||||
"""
|
||||
with tf.name_scope(scope, 'distorted_bounding_box_crop', [image, bbox]):
|
||||
# Each bounding box has shape [1, num_boxes, box coords] and
|
||||
# the coordinates are ordered [ymin, xmin, ymax, xmax].
|
||||
|
||||
# A large fraction of image datasets contain a human-annotated bounding
|
||||
# box delineating the region of the image containing the object of interest.
|
||||
# We choose to create a new bounding box for the object which is a randomly
|
||||
# distorted version of the human-annotated bounding box that obeys an
|
||||
# allowed range of aspect ratios, sizes and overlap with the human-annotated
|
||||
# bounding box. If no box is supplied, then we assume the bounding box is
|
||||
# the entire image.
|
||||
sample_distorted_bounding_box = tf.image.sample_distorted_bounding_box(
|
||||
tf.shape(image),
|
||||
bounding_boxes=bbox,
|
||||
min_object_covered=min_object_covered,
|
||||
aspect_ratio_range=aspect_ratio_range,
|
||||
area_range=area_range,
|
||||
max_attempts=max_attempts,
|
||||
use_image_if_no_bounding_boxes=True)
|
||||
bbox_begin, bbox_size, distort_bbox = sample_distorted_bounding_box
|
||||
|
||||
# Crop the image to the specified bounding box.
|
||||
cropped_image = tf.slice(image, bbox_begin, bbox_size)
|
||||
return cropped_image, distort_bbox
|
||||
|
||||
|
||||
def preprocess_for_train(image, height, width, bbox,
|
||||
fast_mode=True,
|
||||
scope=None):
|
||||
"""Distort one image for training a network.
|
||||
|
||||
Distorting images provides a useful technique for augmenting the data
|
||||
set during training in order to make the network invariant to aspects
|
||||
of the image that do not effect the label.
|
||||
|
||||
Additionally it would create image_summaries to display the different
|
||||
transformations applied to the image.
|
||||
|
||||
Args:
|
||||
image: 3-D Tensor of image. If dtype is tf.float32 then the range should be
|
||||
[0, 1], otherwise it would converted to tf.float32 assuming that the range
|
||||
is [0, MAX], where MAX is largest positive representable number for
|
||||
int(8/16/32) data type (see `tf.image.convert_image_dtype` for details).
|
||||
height: integer
|
||||
width: integer
|
||||
bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords]
|
||||
where each coordinate is [0, 1) and the coordinates are arranged
|
||||
as [ymin, xmin, ymax, xmax].
|
||||
fast_mode: Optional boolean, if True avoids slower transformations (i.e.
|
||||
bi-cubic resizing, random_hue or random_contrast).
|
||||
scope: Optional scope for name_scope.
|
||||
Returns:
|
||||
3-D float Tensor of distorted image used for training with range [-1, 1].
|
||||
"""
|
||||
with tf.name_scope(scope, 'distort_image', [image, height, width, bbox]):
|
||||
if bbox is None:
|
||||
bbox = tf.constant([0.0, 0.0, 1.0, 1.0],
|
||||
dtype=tf.float32,
|
||||
shape=[1, 1, 4])
|
||||
if image.dtype != tf.float32:
|
||||
image = tf.image.convert_image_dtype(image, dtype=tf.float32)
|
||||
# Each bounding box has shape [1, num_boxes, box coords] and
|
||||
# the coordinates are ordered [ymin, xmin, ymax, xmax].
|
||||
image_with_box = tf.image.draw_bounding_boxes(tf.expand_dims(image, 0),
|
||||
bbox)
|
||||
tf.image_summary('image_with_bounding_boxes', image_with_box)
|
||||
|
||||
distorted_image, distorted_bbox = distorted_bounding_box_crop(image, bbox)
|
||||
# Restore the shape since the dynamic slice based upon the bbox_size loses
|
||||
# the third dimension.
|
||||
distorted_image.set_shape([None, None, 3])
|
||||
image_with_distorted_box = tf.image.draw_bounding_boxes(
|
||||
tf.expand_dims(image, 0), distorted_bbox)
|
||||
tf.image_summary('images_with_distorted_bounding_box',
|
||||
image_with_distorted_box)
|
||||
|
||||
# This resizing operation may distort the images because the aspect
|
||||
# ratio is not respected. We select a resize method in a round robin
|
||||
# fashion based on the thread number.
|
||||
# Note that ResizeMethod contains 4 enumerated resizing methods.
|
||||
|
||||
# We select only 1 case for fast_mode bilinear.
|
||||
num_resize_cases = 1 if fast_mode else 4
|
||||
distorted_image = apply_with_random_selector(
|
||||
distorted_image,
|
||||
lambda x, method: tf.image.resize_images(x, [height, width], method=method),
|
||||
num_cases=num_resize_cases)
|
||||
|
||||
tf.image_summary('cropped_resized_image',
|
||||
tf.expand_dims(distorted_image, 0))
|
||||
|
||||
# Randomly flip the image horizontally.
|
||||
distorted_image = tf.image.random_flip_left_right(distorted_image)
|
||||
|
||||
# Randomly distort the colors. There are 4 ways to do it.
|
||||
distorted_image = apply_with_random_selector(
|
||||
distorted_image,
|
||||
lambda x, ordering: distort_color(x, ordering, fast_mode),
|
||||
num_cases=4)
|
||||
|
||||
tf.image_summary('final_distorted_image',
|
||||
tf.expand_dims(distorted_image, 0))
|
||||
distorted_image = tf.sub(distorted_image, 0.5)
|
||||
distorted_image = tf.mul(distorted_image, 2.0)
|
||||
return distorted_image
|
||||
|
||||
|
||||
def preprocess_for_eval(image, height, width,
|
||||
central_fraction=0.875, scope=None):
|
||||
"""Prepare one image for evaluation.
|
||||
|
||||
If height and width are specified it would output an image with that size by
|
||||
applying resize_bilinear.
|
||||
|
||||
If central_fraction is specified it would cropt the central fraction of the
|
||||
input image.
|
||||
|
||||
Args:
|
||||
image: 3-D Tensor of image. If dtype is tf.float32 then the range should be
|
||||
[0, 1], otherwise it would converted to tf.float32 assuming that the range
|
||||
is [0, MAX], where MAX is largest positive representable number for
|
||||
int(8/16/32) data type (see `tf.image.convert_image_dtype` for details)
|
||||
height: integer
|
||||
width: integer
|
||||
central_fraction: Optional Float, fraction of the image to crop.
|
||||
scope: Optional scope for name_scope.
|
||||
Returns:
|
||||
3-D float Tensor of prepared image.
|
||||
"""
|
||||
with tf.name_scope(scope, 'eval_image', [image, height, width]):
|
||||
if image.dtype != tf.float32:
|
||||
image = tf.image.convert_image_dtype(image, dtype=tf.float32)
|
||||
# Crop the central region of the image with an area containing 87.5% of
|
||||
# the original image.
|
||||
if central_fraction:
|
||||
image = tf.image.central_crop(image, central_fraction=central_fraction)
|
||||
|
||||
if height and width:
|
||||
# Resize the image to the specified height and width.
|
||||
image = tf.expand_dims(image, 0)
|
||||
image = tf.image.resize_bilinear(image, [height, width],
|
||||
align_corners=False)
|
||||
image = tf.squeeze(image, [0])
|
||||
image = tf.sub(image, 0.5)
|
||||
image = tf.mul(image, 2.0)
|
||||
return image
|
||||
|
||||
|
||||
def preprocess_image(image, height, width,
|
||||
is_training=False,
|
||||
bbox=None,
|
||||
fast_mode=True):
|
||||
"""Pre-process one image for training or evaluation.
|
||||
|
||||
Args:
|
||||
image: 3-D Tensor [height, width, channels] with the image.
|
||||
height: integer, image expected height.
|
||||
width: integer, image expected width.
|
||||
is_training: Boolean. If true it would transform an image for train,
|
||||
otherwise it would transform it for evaluation.
|
||||
bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords]
|
||||
where each coordinate is [0, 1) and the coordinates are arranged as
|
||||
[ymin, xmin, ymax, xmax].
|
||||
fast_mode: Optional boolean, if True avoids slower transformations.
|
||||
|
||||
Returns:
|
||||
3-D float Tensor containing an appropriately scaled image
|
||||
|
||||
Raises:
|
||||
ValueError: if user does not provide bounding box
|
||||
"""
|
||||
if is_training:
|
||||
return preprocess_for_train(image, height, width, bbox, fast_mode)
|
||||
else:
|
||||
return preprocess_for_eval(image, height, width)
|
||||
@ -0,0 +1,44 @@
|
||||
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""Provides utilities for preprocessing."""
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import tensorflow.compat.v1 as tf
|
||||
tf.disable_v2_behavior()
|
||||
import tf_slim as slim
|
||||
|
||||
|
||||
def preprocess_image(image, output_height, output_width, is_training):
|
||||
"""Preprocesses the given image.
|
||||
|
||||
Args:
|
||||
image: A `Tensor` representing an image of arbitrary size.
|
||||
output_height: The height of the image after preprocessing.
|
||||
output_width: The width of the image after preprocessing.
|
||||
is_training: `True` if we're preprocessing the image for training and
|
||||
`False` otherwise.
|
||||
|
||||
Returns:
|
||||
A preprocessed image.
|
||||
"""
|
||||
image = tf.to_float(image)
|
||||
image = tf.image.resize_image_with_crop_or_pad(
|
||||
image, output_width, output_height)
|
||||
image = tf.sub(image, 128.0)
|
||||
image = tf.div(image, 128.0)
|
||||
return image
|
||||
@ -0,0 +1,76 @@
|
||||
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""Contains a factory for building various models."""
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import tensorflow.compat.v1 as tf
|
||||
tf.disable_v2_behavior()
|
||||
import tf_slim as slim
|
||||
|
||||
from preprocessing import cifarnet_preprocessing
|
||||
from preprocessing import inception_preprocessing
|
||||
from preprocessing import lenet_preprocessing
|
||||
from preprocessing import vgg_preprocessing
|
||||
|
||||
|
||||
def get_preprocessing(name, is_training=False):
|
||||
"""Returns preprocessing_fn(image, height, width, **kwargs).
|
||||
|
||||
Args:
|
||||
name: The name of the preprocessing function.
|
||||
is_training: `True` if the model is being used for training and `False`
|
||||
otherwise.
|
||||
|
||||
Returns:
|
||||
preprocessing_fn: A function that preprocessing a single image (pre-batch).
|
||||
It has the following signature:
|
||||
image = preprocessing_fn(image, output_height, output_width, ...).
|
||||
|
||||
Raises:
|
||||
ValueError: If Preprocessing `name` is not recognized.
|
||||
"""
|
||||
preprocessing_fn_map = {
|
||||
'cifarnet': cifarnet_preprocessing,
|
||||
'inception': inception_preprocessing,
|
||||
'inception_v1': inception_preprocessing,
|
||||
'inception_v2': inception_preprocessing,
|
||||
'inception_v3': inception_preprocessing,
|
||||
'inception_v4': inception_preprocessing,
|
||||
'inception_resnet_v2': inception_preprocessing,
|
||||
'lenet': lenet_preprocessing,
|
||||
'resnet_v1_50': vgg_preprocessing,
|
||||
'resnet_v1_101': vgg_preprocessing,
|
||||
'resnet_v1_152': vgg_preprocessing,
|
||||
'vgg': vgg_preprocessing,
|
||||
'vgg_a': vgg_preprocessing,
|
||||
'vgg_16': vgg_preprocessing,
|
||||
'vgg_19': vgg_preprocessing,
|
||||
}
|
||||
|
||||
if name not in preprocessing_fn_map:
|
||||
raise ValueError('Preprocessing name [%s] was not recognized' % name)
|
||||
|
||||
def preprocessing_fn(image, output_height, output_width, **kwargs):
|
||||
return preprocessing_fn_map[name].preprocess_image(
|
||||
image, output_height, output_width, is_training=is_training, **kwargs)
|
||||
|
||||
def unprocessing_fn(image, **kwargs):
|
||||
return preprocessing_fn_map[name].unprocess_image(
|
||||
image, **kwargs)
|
||||
|
||||
return preprocessing_fn, unprocessing_fn
|
||||
@ -0,0 +1,393 @@
|
||||
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""Provides utilities to preprocess images.
|
||||
|
||||
The preprocessing steps for VGG were introduced in the following technical
|
||||
report:
|
||||
|
||||
Very Deep Convolutional Networks For Large-Scale Image Recognition
|
||||
Karen Simonyan and Andrew Zisserman
|
||||
arXiv technical report, 2015
|
||||
PDF: http://arxiv.org/pdf/1409.1556.pdf
|
||||
ILSVRC 2014 Slides: http://www.robots.ox.ac.uk/~karen/pdf/ILSVRC_2014.pdf
|
||||
CC-BY-4.0
|
||||
|
||||
More information can be obtained from the VGG website:
|
||||
www.robots.ox.ac.uk/~vgg/research/very_deep/
|
||||
"""
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import tensorflow.compat.v1 as tf
|
||||
tf.disable_v2_behavior()
|
||||
import tf_slim as slim
|
||||
|
||||
from tensorflow.python.ops import control_flow_ops
|
||||
|
||||
|
||||
_R_MEAN = 123.68
|
||||
_G_MEAN = 116.78
|
||||
_B_MEAN = 103.94
|
||||
|
||||
_RESIZE_SIDE_MIN = 256
|
||||
_RESIZE_SIDE_MAX = 512
|
||||
|
||||
|
||||
def _crop(image, offset_height, offset_width, crop_height, crop_width):
|
||||
"""Crops the given image using the provided offsets and sizes.
|
||||
|
||||
Note that the method doesn't assume we know the input image size but it does
|
||||
assume we know the input image rank.
|
||||
|
||||
Args:
|
||||
image: an image of shape [height, width, channels].
|
||||
offset_height: a scalar tensor indicating the height offset.
|
||||
offset_width: a scalar tensor indicating the width offset.
|
||||
crop_height: the height of the cropped image.
|
||||
crop_width: the width of the cropped image.
|
||||
|
||||
Returns:
|
||||
the cropped (and resized) image.
|
||||
|
||||
Raises:
|
||||
InvalidArgumentError: if the rank is not 3 or if the image dimensions are
|
||||
less than the crop size.
|
||||
"""
|
||||
original_shape = tf.shape(image)
|
||||
|
||||
rank_assertion = tf.Assert(
|
||||
tf.equal(tf.rank(image), 3),
|
||||
['Rank of image must be equal to 3.'])
|
||||
cropped_shape = control_flow_ops.with_dependencies(
|
||||
[rank_assertion],
|
||||
tf.stack([crop_height, crop_width, original_shape[2]]))
|
||||
|
||||
# print(original_shape[0], crop_height)
|
||||
# print(original_shape[1], crop_width)
|
||||
size_assertion = tf.Assert(
|
||||
tf.logical_and(
|
||||
tf.greater_equal(original_shape[0], crop_height),
|
||||
tf.greater_equal(original_shape[1], crop_width)),
|
||||
['Crop size greater than the image size.'])
|
||||
|
||||
offsets = tf.to_int32(tf.stack([offset_height, offset_width, 0]))
|
||||
|
||||
# Use tf.slice instead of crop_to_bounding box as it accepts tensors to
|
||||
# define the crop size.
|
||||
image = control_flow_ops.with_dependencies(
|
||||
[size_assertion],
|
||||
tf.slice(image, offsets, cropped_shape))
|
||||
return tf.reshape(image, cropped_shape)
|
||||
|
||||
|
||||
def _random_crop(image_list, crop_height, crop_width):
|
||||
"""Crops the given list of images.
|
||||
|
||||
The function applies the same crop to each image in the list. This can be
|
||||
effectively applied when there are multiple image inputs of the same
|
||||
dimension such as:
|
||||
|
||||
image, depths, normals = _random_crop([image, depths, normals], 120, 150)
|
||||
|
||||
Args:
|
||||
image_list: a list of image tensors of the same dimension but possibly
|
||||
varying channel.
|
||||
crop_height: the new height.
|
||||
crop_width: the new width.
|
||||
|
||||
Returns:
|
||||
the image_list with cropped images.
|
||||
|
||||
Raises:
|
||||
ValueError: if there are multiple image inputs provided with different size
|
||||
or the images are smaller than the crop dimensions.
|
||||
"""
|
||||
if not image_list:
|
||||
raise ValueError('Empty image_list.')
|
||||
|
||||
# Compute the rank assertions.
|
||||
rank_assertions = []
|
||||
for i in range(len(image_list)):
|
||||
image_rank = tf.rank(image_list[i])
|
||||
rank_assert = tf.Assert(
|
||||
tf.equal(image_rank, 3),
|
||||
['Wrong rank for tensor %s [expected] [actual]',
|
||||
image_list[i].name, 3, image_rank])
|
||||
rank_assertions.append(rank_assert)
|
||||
|
||||
image_shape = control_flow_ops.with_dependencies(
|
||||
[rank_assertions[0]],
|
||||
tf.shape(image_list[0]))
|
||||
image_height = image_shape[0]
|
||||
image_width = image_shape[1]
|
||||
crop_size_assert = tf.Assert(
|
||||
tf.logical_and(
|
||||
tf.greater_equal(image_height, crop_height),
|
||||
tf.greater_equal(image_width, crop_width)),
|
||||
['Crop size greater than the image size.'])
|
||||
|
||||
asserts = [rank_assertions[0], crop_size_assert]
|
||||
|
||||
for i in range(1, len(image_list)):
|
||||
image = image_list[i]
|
||||
asserts.append(rank_assertions[i])
|
||||
shape = control_flow_ops.with_dependencies([rank_assertions[i]],
|
||||
tf.shape(image))
|
||||
height = shape[0]
|
||||
width = shape[1]
|
||||
|
||||
height_assert = tf.Assert(
|
||||
tf.equal(height, image_height),
|
||||
['Wrong height for tensor %s [expected][actual]',
|
||||
image.name, height, image_height])
|
||||
width_assert = tf.Assert(
|
||||
tf.equal(width, image_width),
|
||||
['Wrong width for tensor %s [expected][actual]',
|
||||
image.name, width, image_width])
|
||||
asserts.extend([height_assert, width_assert])
|
||||
|
||||
# Create a random bounding box.
|
||||
#
|
||||
# Use tf.random_uniform and not numpy.random.rand as doing the former would
|
||||
# generate random numbers at graph eval time, unlike the latter which
|
||||
# generates random numbers at graph definition time.
|
||||
max_offset_height = control_flow_ops.with_dependencies(
|
||||
asserts, tf.reshape(image_height - crop_height + 1, []))
|
||||
max_offset_width = control_flow_ops.with_dependencies(
|
||||
asserts, tf.reshape(image_width - crop_width + 1, []))
|
||||
offset_height = tf.random_uniform(
|
||||
[], maxval=max_offset_height, dtype=tf.int32)
|
||||
offset_width = tf.random_uniform(
|
||||
[], maxval=max_offset_width, dtype=tf.int32)
|
||||
|
||||
return [_crop(image, offset_height, offset_width,
|
||||
crop_height, crop_width) for image in image_list]
|
||||
|
||||
|
||||
def _central_crop(image_list, crop_height, crop_width):
|
||||
"""Performs central crops of the given image list.
|
||||
|
||||
Args:
|
||||
image_list: a list of image tensors of the same dimension but possibly
|
||||
varying channel.
|
||||
crop_height: the height of the image following the crop.
|
||||
crop_width: the width of the image following the crop.
|
||||
|
||||
Returns:
|
||||
the list of cropped images.
|
||||
"""
|
||||
outputs = []
|
||||
for image in image_list:
|
||||
image_height = tf.shape(image)[0]
|
||||
image_width = tf.shape(image)[1]
|
||||
|
||||
offset_height = (image_height - crop_height) / 2
|
||||
offset_width = (image_width - crop_width) / 2
|
||||
outputs.append(_crop(image, offset_height, offset_width,
|
||||
crop_height, crop_width))
|
||||
return outputs
|
||||
|
||||
|
||||
def _mean_image_subtraction(image, means):
|
||||
"""Subtracts the given means from each image channel.
|
||||
|
||||
For example:
|
||||
means = [123.68, 116.779, 103.939]
|
||||
image = _mean_image_subtraction(image, means)
|
||||
|
||||
Note that the rank of `image` must be known.
|
||||
|
||||
Args:
|
||||
image: a tensor of size [height, width, C].
|
||||
means: a C-vector of values to subtract from each channel.
|
||||
|
||||
Returns:
|
||||
the centered image.
|
||||
|
||||
Raises:
|
||||
ValueError: If the rank of `image` is unknown, if `image` has a rank other
|
||||
than three or if the number of channels in `image` doesn't match the
|
||||
number of values in `means`.
|
||||
"""
|
||||
if image.get_shape().ndims != 3:
|
||||
raise ValueError('Input must be of size [height, width, C>0]')
|
||||
num_channels = image.get_shape().as_list()[-1]
|
||||
if len(means) != num_channels:
|
||||
raise ValueError('len(means) must match the number of channels')
|
||||
|
||||
channels = tf.split(image, num_channels, 2)
|
||||
for i in range(num_channels):
|
||||
channels[i] -= means[i]
|
||||
return tf.concat(channels, 2)
|
||||
|
||||
|
||||
def _mean_image_add(image, means):
|
||||
if image.get_shape().ndims != 3:
|
||||
raise ValueError('Input must be of size [height, width, C>0]')
|
||||
num_channels = image.get_shape().as_list()[-1]
|
||||
if len(means) != num_channels:
|
||||
raise ValueError('len(means) must match the number of channels')
|
||||
|
||||
channels = tf.split(image, num_channels, 2)
|
||||
for i in range(num_channels):
|
||||
channels[i] += means[i]
|
||||
return tf.concat(channels, 2)
|
||||
|
||||
|
||||
def _smallest_size_at_least(height, width, target_height, target_width):
|
||||
"""Computes new shape with the smallest side equal to `smallest_side`.
|
||||
|
||||
Computes new shape with the smallest side equal to `smallest_side` while
|
||||
preserving the original aspect ratio.
|
||||
|
||||
Args:
|
||||
height: an int32 scalar tensor indicating the current height.
|
||||
width: an int32 scalar tensor indicating the current width.
|
||||
smallest_side: A python integer or scalar `Tensor` indicating the size of
|
||||
the smallest side after resize.
|
||||
|
||||
Returns:
|
||||
new_height: an int32 scalar tensor indicating the new height.
|
||||
new_width: and int32 scalar tensor indicating the new width.
|
||||
"""
|
||||
target_height = tf.convert_to_tensor(target_height, dtype=tf.int32)
|
||||
target_width = tf.convert_to_tensor(target_width, dtype=tf.int32)
|
||||
|
||||
height = tf.to_float(height)
|
||||
width = tf.to_float(width)
|
||||
target_height = tf.to_float(target_height)
|
||||
target_width = tf.to_float(target_width)
|
||||
|
||||
scale = tf.cond(tf.greater(target_height / height, target_width / width),
|
||||
lambda: target_height / height,
|
||||
lambda: target_width / width)
|
||||
new_height = tf.to_int32(tf.round(height * scale))
|
||||
new_width = tf.to_int32(tf.round(width * scale))
|
||||
return new_height, new_width
|
||||
|
||||
|
||||
def _aspect_preserving_resize(image, target_height, target_width):
|
||||
"""Resize images preserving the original aspect ratio.
|
||||
|
||||
Args:
|
||||
image: A 3-D image `Tensor`.
|
||||
smallest_side: A python integer or scalar `Tensor` indicating the size of
|
||||
the smallest side after resize.
|
||||
|
||||
Returns:
|
||||
resized_image: A 3-D tensor containing the resized image.
|
||||
"""
|
||||
target_height = tf.convert_to_tensor(target_height, dtype=tf.int32)
|
||||
target_width = tf.convert_to_tensor(target_width, dtype=tf.int32)
|
||||
|
||||
shape = tf.shape(image)
|
||||
height = shape[0]
|
||||
width = shape[1]
|
||||
new_height, new_width = _smallest_size_at_least(height, width, target_height, target_width)
|
||||
image = tf.expand_dims(image, 0)
|
||||
resized_image = tf.image.resize_bilinear(image, [new_height, new_width],
|
||||
align_corners=False)
|
||||
resized_image = tf.squeeze(resized_image)
|
||||
resized_image.set_shape([None, None, 3])
|
||||
return resized_image
|
||||
|
||||
|
||||
def preprocess_for_train(image,
|
||||
output_height,
|
||||
output_width,
|
||||
resize_side_min=_RESIZE_SIDE_MIN,
|
||||
resize_side_max=_RESIZE_SIDE_MAX):
|
||||
"""Preprocesses the given image for training.
|
||||
|
||||
Note that the actual resizing scale is sampled from
|
||||
[`resize_size_min`, `resize_size_max`].
|
||||
|
||||
Args:
|
||||
image: A `Tensor` representing an image of arbitrary size.
|
||||
output_height: The height of the image after preprocessing.
|
||||
output_width: The width of the image after preprocessing.
|
||||
resize_side_min: The lower bound for the smallest side of the image for
|
||||
aspect-preserving resizing.
|
||||
resize_side_max: The upper bound for the smallest side of the image for
|
||||
aspect-preserving resizing.
|
||||
|
||||
Returns:
|
||||
A preprocessed image.
|
||||
"""
|
||||
resize_side = tf.random_uniform(
|
||||
[], minval=resize_side_min, maxval=resize_side_max + 1, dtype=tf.int32)
|
||||
|
||||
image = _aspect_preserving_resize(image, resize_side)
|
||||
image = _random_crop([image], output_height, output_width)[0]
|
||||
image.set_shape([output_height, output_width, 3])
|
||||
image = tf.to_float(image)
|
||||
image = tf.image.random_flip_left_right(image)
|
||||
return _mean_image_subtraction(image, [_R_MEAN, _G_MEAN, _B_MEAN])
|
||||
|
||||
|
||||
def preprocess_for_eval(image, output_height, output_width, resize_side):
|
||||
"""Preprocesses the given image for evaluation.
|
||||
|
||||
Args:
|
||||
image: A `Tensor` representing an image of arbitrary size.
|
||||
output_height: The height of the image after preprocessing.
|
||||
output_width: The width of the image after preprocessing.
|
||||
|
||||
Returns:
|
||||
A preprocessed image.
|
||||
"""
|
||||
image = _aspect_preserving_resize(image, output_height, output_width)
|
||||
image = _central_crop([image], output_height, output_width)[0]
|
||||
# image = tf.image.resize_image_with_crop_or_pad(image, output_height, output_width)
|
||||
image.set_shape([output_height, output_width, 3])
|
||||
image = tf.to_float(image)
|
||||
return _mean_image_subtraction(image, [_R_MEAN, _G_MEAN, _B_MEAN])
|
||||
|
||||
|
||||
def preprocess_image(image, output_height, output_width, is_training=False,
|
||||
resize_side_min=_RESIZE_SIDE_MIN,
|
||||
resize_side_max=_RESIZE_SIDE_MAX,
|
||||
):
|
||||
"""Preprocesses the given image.
|
||||
|
||||
Args:
|
||||
image: A `Tensor` representing an image of arbitrary size.
|
||||
output_height: The height of the image after preprocessing.
|
||||
output_width: The width of the image after preprocessing.
|
||||
is_training: `True` if we're preprocessing the image for training and
|
||||
`False` otherwise.
|
||||
resize_side_min: The lower bound for the smallest side of the image for
|
||||
aspect-preserving resizing. If `is_training` is `False`, then this value
|
||||
is used for rescaling.
|
||||
resize_side_max: The upper bound for the smallest side of the image for
|
||||
aspect-preserving resizing. If `is_training` is `False`, this value is
|
||||
ignored. Otherwise, the resize side is sampled from
|
||||
[resize_size_min, resize_size_max].
|
||||
|
||||
Returns:
|
||||
A preprocessed image.
|
||||
"""
|
||||
if is_training:
|
||||
return preprocess_for_train(image, output_height, output_width,
|
||||
resize_side_min, resize_side_max)
|
||||
else:
|
||||
return preprocess_for_eval(image, output_height, output_width,
|
||||
resize_side_min)
|
||||
|
||||
|
||||
def unprocess_image(image):
|
||||
return _mean_image_add(image, [_R_MEAN, _G_MEAN, _B_MEAN])
|
||||
@ -0,0 +1,27 @@
|
||||
from os import listdir
|
||||
from os.path import isfile, join
|
||||
import tensorflow.compat.v1 as tf
|
||||
tf.disable_v2_behavior()
|
||||
|
||||
|
||||
def get_image(path, height, width, preprocess_fn):
|
||||
png = path.lower().endswith('png')
|
||||
img_bytes = tf.read_file(path)
|
||||
image = tf.image.decode_png(img_bytes, channels=3) if png else tf.image.decode_jpeg(img_bytes, channels=3)
|
||||
return preprocess_fn(image, height, width)
|
||||
|
||||
|
||||
def image(batch_size, height, width, path, preprocess_fn, epochs=2, shuffle=True):
|
||||
filenames = [join(path, f) for f in listdir(path) if isfile(join(path, f))]
|
||||
if not shuffle:
|
||||
filenames = sorted(filenames)
|
||||
|
||||
png = filenames[0].lower().endswith('png') # If first file is a png, assume they all are
|
||||
|
||||
filename_queue = tf.train.string_input_producer(filenames, shuffle=shuffle, num_epochs=epochs)
|
||||
reader = tf.WholeFileReader()
|
||||
_, img_bytes = reader.read(filename_queue)
|
||||
image = tf.image.decode_png(img_bytes, channels=3) if png else tf.image.decode_jpeg(img_bytes, channels=3)
|
||||
|
||||
processed_image = preprocess_fn(image, height, width)
|
||||
return tf.train.batch([processed_image], batch_size, dynamic_pad=True)
|
||||
@ -0,0 +1,146 @@
|
||||
# coding: utf-8
|
||||
from __future__ import print_function
|
||||
from __future__ import division
|
||||
import tensorflow as tf
|
||||
from nets import nets_factory
|
||||
from preprocessing import preprocessing_factory
|
||||
import reader
|
||||
import model
|
||||
import time
|
||||
import losses
|
||||
import utils
|
||||
import os
|
||||
import argparse
|
||||
|
||||
slim = tf.contrib.slim
|
||||
|
||||
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('-c', '--conf', default='conf/mosaic.yml', help='the path to the conf file')
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def main(FLAGS):
|
||||
style_features_t = losses.get_style_features(FLAGS)
|
||||
|
||||
# Make sure the training path exists.
|
||||
training_path = os.path.join(FLAGS.model_path, FLAGS.naming)
|
||||
if not(os.path.exists(training_path)):
|
||||
os.makedirs(training_path)
|
||||
|
||||
with tf.Graph().as_default():
|
||||
with tf.Session() as sess:
|
||||
"""Build Network"""
|
||||
network_fn = nets_factory.get_network_fn(
|
||||
FLAGS.loss_model,
|
||||
num_classes=1,
|
||||
is_training=False)
|
||||
|
||||
image_preprocessing_fn, image_unprocessing_fn = preprocessing_factory.get_preprocessing(
|
||||
FLAGS.loss_model,
|
||||
is_training=False)
|
||||
processed_images = reader.image(FLAGS.batch_size, FLAGS.image_size, FLAGS.image_size,
|
||||
'train2014/', image_preprocessing_fn, epochs=FLAGS.epoch)
|
||||
generated = model.net(processed_images, training=True)
|
||||
processed_generated = [image_preprocessing_fn(image, FLAGS.image_size, FLAGS.image_size)
|
||||
for image in tf.unstack(generated, axis=0, num=FLAGS.batch_size)
|
||||
]
|
||||
processed_generated = tf.stack(processed_generated)
|
||||
_, endpoints_dict = network_fn(tf.concat([processed_generated, processed_images], 0), spatial_squeeze=False)
|
||||
|
||||
# Log the structure of loss network
|
||||
tf.logging.info('Loss network layers(You can define them in "content_layers" and "style_layers"):')
|
||||
for key in endpoints_dict:
|
||||
tf.logging.info(key)
|
||||
|
||||
"""Build Losses"""
|
||||
content_loss = losses.content_loss(endpoints_dict, FLAGS.content_layers)
|
||||
style_loss, style_loss_summary = losses.style_loss(endpoints_dict, style_features_t, FLAGS.style_layers)
|
||||
tv_loss = losses.total_variation_loss(generated) # use the unprocessed image
|
||||
|
||||
loss = FLAGS.style_weight * style_loss + FLAGS.content_weight * content_loss + FLAGS.tv_weight * tv_loss
|
||||
|
||||
# Add Summary for visualization in tensorboard.
|
||||
"""Add Summary"""
|
||||
tf.summary.scalar('losses/content_loss', content_loss)
|
||||
tf.summary.scalar('losses/style_loss', style_loss)
|
||||
tf.summary.scalar('losses/regularizer_loss', tv_loss)
|
||||
|
||||
tf.summary.scalar('weighted_losses/weighted_content_loss', content_loss * FLAGS.content_weight)
|
||||
tf.summary.scalar('weighted_losses/weighted_style_loss', style_loss * FLAGS.style_weight)
|
||||
tf.summary.scalar('weighted_losses/weighted_regularizer_loss', tv_loss * FLAGS.tv_weight)
|
||||
tf.summary.scalar('total_loss', loss)
|
||||
|
||||
for layer in FLAGS.style_layers:
|
||||
tf.summary.scalar('style_losses/' + layer, style_loss_summary[layer])
|
||||
tf.summary.image('generated', generated)
|
||||
# tf.image_summary('processed_generated', processed_generated) # May be better?
|
||||
tf.summary.image('origin', tf.stack([
|
||||
image_unprocessing_fn(image) for image in tf.unstack(processed_images, axis=0, num=FLAGS.batch_size)
|
||||
]))
|
||||
summary = tf.summary.merge_all()
|
||||
writer = tf.summary.FileWriter(training_path)
|
||||
|
||||
"""Prepare to Train"""
|
||||
global_step = tf.Variable(0, name="global_step", trainable=False)
|
||||
|
||||
variable_to_train = []
|
||||
for variable in tf.trainable_variables():
|
||||
if not(variable.name.startswith(FLAGS.loss_model)):
|
||||
variable_to_train.append(variable)
|
||||
train_op = tf.train.AdamOptimizer(1e-3).minimize(loss, global_step=global_step, var_list=variable_to_train)
|
||||
|
||||
variables_to_restore = []
|
||||
for v in tf.global_variables():
|
||||
if not(v.name.startswith(FLAGS.loss_model)):
|
||||
variables_to_restore.append(v)
|
||||
saver = tf.train.Saver(variables_to_restore, write_version=tf.train.SaverDef.V1)
|
||||
|
||||
sess.run([tf.global_variables_initializer(), tf.local_variables_initializer()])
|
||||
|
||||
# Restore variables for loss network.
|
||||
init_func = utils._get_init_fn(FLAGS)
|
||||
init_func(sess)
|
||||
|
||||
# Restore variables for training model if the checkpoint file exists.
|
||||
last_file = tf.train.latest_checkpoint(training_path)
|
||||
if last_file:
|
||||
tf.logging.info('Restoring model from {}'.format(last_file))
|
||||
saver.restore(sess, last_file)
|
||||
|
||||
"""Start Training"""
|
||||
coord = tf.train.Coordinator()
|
||||
threads = tf.train.start_queue_runners(coord=coord)
|
||||
start_time = time.time()
|
||||
try:
|
||||
while not coord.should_stop():
|
||||
_, loss_t, step = sess.run([train_op, loss, global_step])
|
||||
elapsed_time = time.time() - start_time
|
||||
start_time = time.time()
|
||||
"""logging"""
|
||||
# print(step)
|
||||
if step % 10 == 0:
|
||||
tf.logging.info('step: %d, total Loss %f, secs/step: %f' % (step, loss_t, elapsed_time))
|
||||
"""summary"""
|
||||
if step % 25 == 0:
|
||||
tf.logging.info('adding summary...')
|
||||
summary_str = sess.run(summary)
|
||||
writer.add_summary(summary_str, step)
|
||||
writer.flush()
|
||||
"""checkpoint"""
|
||||
if step % 1000 == 0:
|
||||
saver.save(sess, os.path.join(training_path, 'fast-style-model.ckpt'), global_step=step)
|
||||
except tf.errors.OutOfRangeError:
|
||||
saver.save(sess, os.path.join(training_path, 'fast-style-model.ckpt-done'))
|
||||
tf.logging.info('Done training -- epoch limit reached')
|
||||
finally:
|
||||
coord.request_stop()
|
||||
coord.join(threads)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
tf.logging.set_verbosity(tf.logging.INFO)
|
||||
args = parse_args()
|
||||
FLAGS = utils.read_conf_file(args.conf)
|
||||
main(FLAGS)
|
||||