# coding:utf-8 import random import cv2 as cv import numpy as np # from copy import deepcopy from PIL import Image, ImageDraw, ImageFont # from pylab import * # import imutils # import matplotlib.pyplot as graph from numpy import fft import math import matplotlib matplotlib.use('Agg') import os, sys import yaml from argparse import ArgumentParser from tqdm import tqdm import imageio import numpy as np from skimage.transform import resize from skimage import img_as_ubyte import torch from sync_batchnorm import DataParallelWithCallback from modules.generator import OcclusionAwareGenerator from modules.keypoint_detector import KPDetector from animate import normalize_kp from scipy.spatial import ConvexHull save_path = "resources/output/change.png" # 图片写入文字 def Drawworld(img, text, p_x, p_y, font_type, font_size, bold, color): pos = (p_x, p_y) out = cv.putText(img, text, pos, font_type, font_size, color, bold) cv.imwrite(save_path, img) return out # 空间转换 def color_space(img, c_type): # HSV色彩空间 if c_type == 1: out = cv.cvtColor(img, cv.COLOR_BGR2HSV) # 灰度处理 elif c_type == 2: out = cv.cvtColor(img, cv.COLOR_BGR2GRAY) elif c_type == 3: out = cv.cvtColor(img, cv.COLOR_BGR2BGRA) elif c_type == 4: out = cv.cvtColor(img, cv.COLOR_BGR2HLS) elif c_type == 5: out = cv.cvtColor(img, cv.COLOR_BGR2YUV) cv.imwrite(save_path, out) return out # FFT变换(傅里叶) def FFT(img): img = img[:, :, 0] # 灰度 f = np.fft.fft2(img) fshift = np.fft.fftshift(f) out = 20 * np.log(np.abs(fshift)) cv.imwrite(save_path, out) # DCT变换(离散余弦) def DCT(img): img = img[:, :, 0] img = img.astype(np.float32) out = cv.dct(img) cv.imwrite(save_path, out) # 仿射变换 def affinetransform(img): height, width = img.shape[:2] # 405x413 # 在原图像和目标图像上各选择三个点 matSrc = np.float32([[0, 0], [0, height - 1], [width - 1, 0]]) matDst = np.float32([[0, 0], [30, height - 30], [width - 30, 30]]) # 得到变换矩阵 matAffine = cv.getAffineTransform(matSrc, matDst) # 进行仿射变换 out = cv.warpAffine(img, matAffine, (width, height)) cv.imwrite(save_path, out) return out # 直方图线性拉伸 def Linear_hist(img): def linlamda(img): # y = ax+b # 计算原图中出现的最小灰度级和最大灰度级 # 使用函数计算 Imin, Imax = cv.minMaxLoc(img)[:2] # 使用numpy计算 # Imax = np.max(img) # Imin = np.min(img) Omin, Omax = 0, 255 # 计算a和b的值 a = float(Omax - Omin) / (Imax - Imin) b = Omin - a * Imin out = a * img + b out = out.astype(np.uint8) return out if (len(img.shape) == 2): # 判断是否为灰度图像 out = linlamda(img) else: # 不是灰度图像,则分别对三个通道进行线性变换 b = img[:, :, 0] g = img[:, :, 1] r = img[:, :, 2] b1 = linlamda(b) g1 = linlamda(g) r1 = linlamda(r) out = cv.merge([b1, g1, r1]) # 三个通道合并 cv.imwrite(save_path, out) return out # 非线性拉伸 def Ninear_hist(img): if (len(img.shape) == 2): # 灰度图像则进行gammab变换 gammab = img rows = img.shape[0] cols = img.shape[1] for i in range(rows): for j in range(cols): gammab[i][j] = 3 * pow(gammab[i][j], 0.8) out = gammab else: # 同理不为灰度图像,对三个通道进行gammab函数变换 b = img[:, :, 0] g = img[:, :, 1] r = img[:, :, 2] gammab = b rows = img.shape[0] cols = img.shape[1] for i in range(rows): for j in range(cols): gammab[i][j] = 3 * pow(gammab[i][j], 0.8) gammag = g rows = img.shape[0] cols = img.shape[1] for i in range(rows): for j in range(cols): gammag[i][j] = 3 * pow(gammag[i][j], 0.8) gammar = r rows = img.shape[0] cols = img.shape[1] for i in range(rows): for j in range(cols): gammar[i][j] = 3 * pow(gammar[i][j], 0.8) b = gammab g = gammag r = gammar out = cv.merge([b, g, r]) cv.imwrite(save_path, out) return out # 自适应均衡 def adaptive_equalization(img): clahe = cv.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8)) # 自适应均衡 if (len(img.shape) == 2): out = clahe.apply(img) else: b = img[:, :, 0] g = img[:, :, 1] r = img[:, :, 2] b1 = clahe.apply(b) g1 = clahe.apply(g) r1 = clahe.apply(r) out = cv.merge([b1, g1, r1]) cv.imwrite(save_path, out) return out # 全局均衡 def global_equalization(img): if (len(img.shape) == 2): out = cv.equalizeHist(img) else: b = img[:, :, 0] g = img[:, :, 1] r = img[:, :, 2] b1 = cv.equalizeHist(b) # 直方图均衡 g1 = cv.equalizeHist(g) r1 = cv.equalizeHist(r) out = cv.merge([b1, g1, r1]) cv.imwrite(save_path, out) return out # 均值平滑 def ave_blur(img): if (len(img.shape) == 2): gam = cv.blur(img, (5, 5)) out = img rows = img.shape[0] cols = img.shape[1] for i in range(rows): for j in range(cols): out[i][j] = gam[i][j] else: b = img[:, :, 0] g = img[:, :, 1] r = img[:, :, 2] gam = cv.blur(b, (5, 5)) b1 = b rows = img.shape[0] cols = img.shape[1] for i in range(rows): for j in range(cols): b1[i][j] = gam[i][j] gam = cv.blur(g, (5, 5)) g1 = g rows = img.shape[0] cols = img.shape[1] for i in range(rows): for j in range(cols): g1[i][j] = gam[i][j] gam = cv.blur(r, (5, 5)) r1 = r rows = img.shape[0] cols = img.shape[1] for i in range(rows): for j in range(cols): r1[i][j] = gam[i][j] out = cv.merge([b1, g1, r1]) cv.imwrite(save_path, out) return out # 高斯平滑 def gau_blur(img): if (len(img.shape) == 2): out = cv.GaussianBlur(img, (7, 7), 10) else: b = img[:, :, 0] g = img[:, :, 1] r = img[:, :, 2] b1 = cv.GaussianBlur(b, (7, 7), 10) g1 = cv.GaussianBlur(g, (7, 7), 10) r1 = cv.GaussianBlur(r, (7, 7), 10) out = cv.merge([b1, g1, r1]) cv.imwrite(save_path, out) return out # 中值平滑 def mid_blur(img): if (len(img.shape) == 2): out = cv.medianBlur(img, 5) else: b = img[:, :, 0] g = img[:, :, 1] r = img[:, :, 2] b1 = cv.medianBlur(b, 5) g1 = cv.medianBlur(g, 5) r1 = cv.medianBlur(r, 5) out = cv.merge([b1, g1, r1]) cv.imwrite(save_path, out) return out # 锐化1 # def l_sharpen1(img): # kernel = np.array([[0,1,0], [1,-4,1], [0,1,0]]) # out = cv.filter2D(img,-1, kernel=kernel) # cv.imwrite(save_path, out) # return out # # #锐化2 # def l_sharpen2(img): # kernel = np.array([[-1,-1,-1], [-1,8,-1], [-1,-1,-1]]) # out = cv.filter2D(img,-1, kernel=kernel) # cv.imwrite(save_path, out) # return out # 锐化3 def l_sharpen3(img): # kernel = np.array([[-1,-1,-1], [-1,9,-1], [-1,-1,-1]]) # out = cv.filter2D(img,-1, kernel=kernel) kernel = np.array([[0, -1, 0], [-1, 5, -1], [0, -1, 0]], np.float32) # 实现锐化处理,提高图像的对比度,提高立体感,轮廓更加清晰 out = cv.filter2D(img, -1, kernel) cv.imwrite(save_path, out) return out # 美颜 def makeup(image, value): b = image[:, :, 0] g = image[:, :, 1] r = image[:, :, 2] b2 = cv.medianBlur(b, 5) g2 = cv.medianBlur(g, 5) r2 = cv.medianBlur(r, 5) image = cv.merge([b2, g2, r2]) # kernel = np.array([[-1,-1,-1], [-1,9,-1], [-1,-1,-1]]) # image = cv2.filter2D(image,-1, kernel=kernel) value = int(value / 2) out = cv.bilateralFilter(image, value, value * 2, int(value / 2)) cv.imwrite(save_path, out) return out # 图像旋转 def rotate(image, angle): (h, w) = image.shape[:2] center = (w / 2, h / 2) Roa = cv.getRotationMatrix2D(center, angle, 1.0) out = cv.warpAffine(image, Roa, (w, h)) cv.imwrite(save_path, out) return out # 图像缩放 def changescale(image, size): out = cv.resize(image, (0, 0), fx=size, fy=size, interpolation=cv.INTER_NEAREST) cv.imwrite(save_path, out) return out # 图像翻转 def changeflip(image , direction): out = cv.flip(image, direction, dst=None) cv.imwrite(save_path, out) return out # 手调节对比度 def updateContrast(image, alpha, beta): out = np.uint8(np.clip((alpha * image + beta), 0, 255)) cv.imwrite(save_path, out) return out # 手动调节亮度 def updateBrightness(image, alpha, beta): out = np.uint8(np.clip((alpha * image + beta - 150), 0, 255)) cv.imwrite(save_path, out) return out # 图像投影矫正 def correct(image): pts1 = np.float32([[158, 25], [267, 136], [58, 66], [144, 212]]) # 变换后分别在左上、右上、左下、右下四个点 pts2 = np.float32([[0, 0], [320, 0], [0, 200], [320, 200]]) # 生成透视变换矩阵 M = cv.getPerspectiveTransform(pts1, pts2) # 进行透视变换 dst = cv.warpPerspective(image, M, (320, 200)) out = dst cv.imwrite(save_path, out) return out # 模糊消除 spoe.jpg文件 def removefuzzy(image): def motion_process(image_size, motion_angle): PSF = np.zeros(image_size) center_position = (image_size[0] - 1) / 2 slope_tan = math.tan(motion_angle * math.pi / 180) slope_cot = 1 / slope_tan if slope_tan <= 1: for i in range(15): offset = round(i * slope_tan) # ((center_position-i)*slope_tan) PSF[int(center_position + offset - 10), int(center_position - offset + 123)] = 2 return PSF / PSF.sum() # 对点扩散函数进行归一化亮度 else: for i in range(15): offset = round(i * slope_cot) PSF[int(center_position - offset - 10), int(center_position + offset + 123)] = 2 return PSF / PSF.sum() def wiener(input, PSF, eps, K=0.02): # 维纳滤波,K=0.01 input_fft = fft.fft2(input) PSF_fft = fft.fft2(PSF) + eps PSF_fft_1 = np.conj(PSF_fft) / (np.abs(PSF_fft) ** 2 + K) result = fft.ifft2(input_fft * PSF_fft_1) result = np.abs(fft.fftshift(result)) return result image = cv.cvtColor(image, cv.COLOR_BGR2GRAY) img_h = image.shape[0] img_w = image.shape[1] PSF = motion_process((img_h, img_w), 30) result = wiener(image, PSF, 1e-3) # 逆滤波 cv.imwrite(save_path, result) return result # 图像拼接 def conectImage(img1, img2): b1 = img1[:, :, 0] g1 = img1[:, :, 1] r1 = img1[:, :, 2] h, w, _ = img1.shape b2 = img2[:, :, 0] g2 = img2[:, :, 1] r2 = img2[:, :, 2] b2 = cv.resize(b2, (w, h), interpolation=cv.INTER_CUBIC) g2 = cv.resize(g2, (w, h), interpolation=cv.INTER_CUBIC) r2 = cv.resize(r2, (w, h), interpolation=cv.INTER_CUBIC) img2 = cv.merge([b2, g2, r2]) out = Image.new('RGBA', (2 * w, h)) img1 = Image.fromarray(cv.cvtColor(img1, cv.COLOR_BGR2RGB)) img2 = Image.fromarray(cv.cvtColor(img2, cv.COLOR_BGR2RGB)) out.paste(img1, (0, 0)) out.paste(img2, (w, 0)) out = cv.cvtColor(np.asarray(out), cv.COLOR_RGB2BGR) cv.imwrite(save_path, out) return out def back_vary(img): # 背景变换 p1.jpg img = cv.resize(img, None, fx=1, fy=1) rows, cols, channels = img.shape # rows,cols最后一定要是前景图片的,后面遍历图片需要用到 # cv2.imshow('img', img) # 转换hsv hsv = cv.cvtColor(img, cv.COLOR_BGR2HSV) # cv2.imshow('hsv', hsv) lower_blue = np.array([100, 100, 50]) upper_blue = np.array([130, 255, 255]) mask = cv.inRange(hsv, lower_blue, upper_blue) # cv2.imshow('Mask', mask) # 腐蚀膨胀 erode = cv.erode(mask, None, iterations=1) # cv2.imshow('erode', erode) dilate = cv.dilate(erode, None, iterations=1) # cv2.imshow('dilate', dilate) # 遍历替换 for i in range(rows): for j in range(cols): if dilate[i, j] == 255: # 0代表黑色的点 img[i, j] = (0, 0, 200) # 此处替换颜色,为BGR通道 cv.imwrite(save_path, img) return img def huantou(img1, img2): # 换头 people_p1/p2 .jpg head2 = img2[15:270, 55:270] img1[35:290, 55:270] = head2 cv.imwrite(save_path, img1) return img1 # 图像增强 def sharpImage(image): # 1. 灰度模式读取图像,图像名为CRH CRH = cv.cvtColor(image, cv.COLOR_BGR2GRAY) # 2. 计算图像梯度。首先要对读取的图像进行数据变换,因为使用了 # numpy对梯度进行数值计算,所以要使用 # CRH.astype('float')进行数据格式变换。 CRH = CRH.astype('float') row = CRH.shape[0] column = CRH.shape[1] gradient = np.zeros(CRH.shape) # 根据所讲的计算公式求梯度 for x in range(row - 1): for y in range(column - 1): gx = abs(CRH[x + 1, y] - CRH[x, y]) gy = abs(CRH[x, y + 1] - CRH[x, y]) gradient[x, y] = gx + gy # 3. 对图像进行增强,增强后的图像变量名为sharp sharp = CRH + gradient sharp = np.where(sharp > 255, 255, sharp) sharp = np.where(sharp < 0, 0, sharp) # 数据类型变换 sharp = sharp.astype('uint8') cv.imwrite(save_path, sharp) return sharp # Robert算子 def robs(image): # 读取图像 img = image # 1. 灰度化处理图像 grayImage = cv.cvtColor(img, cv.COLOR_BGR2GRAY) # 2. Roberts算子 kernelx = np.array([[-1, 0], [0, 1]], dtype=int) kernely = np.array([[0, -1], [1, 0]], dtype=int) # 3. 卷积操作 x = cv.filter2D(grayImage, cv.CV_16S, kernelx) y = cv.filter2D(grayImage, cv.CV_16S, kernely) # 4. 数据格式转换 absX = cv.convertScaleAbs(x) absY = cv.convertScaleAbs(y) Roberts = cv.addWeighted(absX, 0.5, absY, 0.5, 0) # 保存图像 cv.imwrite(save_path, Roberts) return Roberts # prewitt算子 def prewitt(image): # 读取图像 img = image # 1. 灰度化处理图像 grayImage = cv.cvtColor(img, cv.COLOR_BGR2GRAY) # 2. 求Prewitt 算子 kernelx = np.array([[1, 1, 1], [0, 0, 0], [-1, -1, -1]]) kernely = np.array([[-1, 0, 1], [-1, 0, 1], [-1, 0, 1]]) prewittX = cv.filter2D(grayImage, cv.CV_16S, kernelx) prewittY = cv.filter2D(grayImage, cv.CV_16S, kernely) # 3. 数据格式转换 absX = cv.convertScaleAbs(prewittX) absY = cv.convertScaleAbs(prewittY) # 4. 组合图像 prewitt = cv.addWeighted(absX, 0.5, absY, 0.5, 0) # 保存图像 cv.imwrite(save_path, prewitt) return prewitt # sobel算子 def sob(image): # 读取图像 img = image # 1. 灰度化处理图像 grayImage = cv.cvtColor(img, cv.COLOR_BGR2GRAY) # 2. 求Sobel 算子 kernelx = cv.Sobel(grayImage, cv.CV_16S, 1, 0) kernely = cv.Sobel(grayImage, cv.CV_16S, 0, 1) # x = cv.filter2D(grayImage, cv.CV_16S, kernelx) # y = cv.filter2D(grayImage, cv.CV_16S, kernely) # 3. 数据格式转换 absX = cv.convertScaleAbs(kernelx) absY = cv.convertScaleAbs(kernely) # 4. 组合图像 Sobel = cv.addWeighted(absX, 0.5, absY, 0.5, 0) # 保存图像 cv.imwrite(save_path, Sobel) return Sobel def lap(image): # 读取图像 img = image # 1. 灰度化处理图像 grayImage = cv.cvtColor(img, cv.COLOR_BGR2GRAY) # 2. 高斯滤波 grayImage = cv.GaussianBlur(grayImage, (5, 5), 0) # 3. 拉普拉斯算法 dst = cv.Laplacian(grayImage, cv.CV_16S, ksize=3) # 4. 数据格式转换 Laplacian = cv.convertScaleAbs(dst) # 保存图像 cv.imwrite(save_path, Laplacian) return Laplacian def _log(image): # 读取图像 img = image # 1. 灰度转换 grayImage = cv.cvtColor(img, cv.COLOR_BGR2RGB) # 2. 边缘扩充处理图像并使用高斯滤波处理该图像 image = cv.copyMakeBorder(grayImage, 2, 2, 2, 2, borderType=cv.BORDER_REPLICATE) image = cv.GaussianBlur(image, (3, 3), 0, 0) # 3. 使用Numpy定义LoG算子 m1 = np.array( [[0, 0, -1, 0, 0], [0, -1, -2, -1, 0], [-1, -2, 16, -2, -1], [0, -1, -2, -1, 0], [0, 0, -1, 0, 0]]) # 4. 卷积运算 # 为了使卷积对每个像素都进行运算,原图像的边缘像素要对准模板的中心。 # 由于图像边缘扩大了2像素,因此要从位置2到行(列)-2 rows = image.shape[0] cols = image.shape[1] image1 = np.zeros(image.shape) for k in range(0, 2): for i in range(2, rows - 2): for j in range(2, cols - 2): image1[i, j] = np.sum((m1 * image[i - 2:i + 3, j - 2:j + 3, k])) # 5. 数据格式转换 image1 = cv.convertScaleAbs(image1) cv.imwrite(save_path, image1) return image1 # Canny算子 def cny(img): # 读取图像 src = img # 1. 高斯滤波 blur = cv.GaussianBlur(src, (3, 3), 0) # 2. 灰度转换 grayImage = cv.cvtColor(blur, cv.COLOR_BGR2GRAY) # 3. 求x,y方向的Sobel算子 gradx = cv.Sobel(grayImage, cv.CV_16SC1, 1, 0) grady = cv.Sobel(grayImage, cv.CV_16SC1, 0, 1) # 4. 使用Canny函数处理图像,x,y分别是3求出来的梯度,低阈值50,高阈值150 edge_output = cv.Canny(gradx, grady, 50, 150) cv.imwrite(save_path, edge_output) return img # 高斯噪声 def gaussNoise(image): img = image # 将图片的像素值归一化,存入矩阵中 img = np.array(img/255, dtype=float) image = np.array(image / 255, dtype=float) # 生成正态分布的噪声,其中0表示均值,0.1表示方差 noise = np.random.normal(0, 0.1, image.shape) # 将噪声叠加到图片上 out = image + noise # 将图像的归一化像素值控制在0和1之间,防止噪声越界 out = np.clip(out, 0.0, 1.0) # 将图像的像素值恢复到0到255之间 out = np.uint8(out * 255) cv.imwrite(save_path, out) return out # 椒盐噪声 def saltNoise(image): img = image output = np.zeros(img.shape, np.uint8) prob = 0.2 thres = 1 - prob # 遍历图像,获取叠加噪声后的图像 for i in range(image.shape[0]): for j in range(image.shape[1]): rdn = random.random() if rdn < prob: # 添加胡椒噪声 output[i][j] = 0 elif rdn > thres: # 添加食盐噪声 output[i][j] = 255 else: # 不添加噪声 output[i][j] = image[i][j] cv.imwrite(save_path, output) return output # 算数均值滤波器 def arithFilter(image): img = image b = img[:, :, 0] g = img[:, :, 1] r = img[:, :, 2] # 待输出的图片 b1 = np.zeros(b.shape, np.uint8) # 遍历图像,进行均值滤波 for i in range(b.shape[0]): for j in range(b.shape[1]): # 滤波器内像素值的和 sum = 0 k = 1 # 遍历滤波器内的像素值 if i < k or i > (b.shape[0] - k - 1) or j < k or j > (b.shape[1] - k - 1): b1[i][j] = b[i][j] else: for m in range(-1, 2): for n in range(-1, 2): # 防止越界 if 0 <= (i + m) < b.shape[0] and 0 <= (j + n) < b.shape[1]: # 像素值求和 sum += b[i + m][j + n] # 求均值,作为最终的像素值 b1[i][j] = (sum / 9).astype(int) g1 = np.zeros(g.shape, np.uint8) # 遍历图像,进行均值滤波 for i in range(g.shape[0]): for j in range(g.shape[1]): # 滤波器内像素值的和 sum = 0 k = 1 # 遍历滤波器内的像素值 if i < k or i > (g.shape[0] - k - 1) or j < k or j > (g.shape[1] - k - 1): g1[i][j] = g[i][j] else: for m in range(-1, 2): for n in range(-1, 2): # 防止越界 if 0 <= (i + m) < g.shape[0] and 0 <= (j + n) < g.shape[1]: # 像素值求和 sum += g[i + m][j + n] # 求均值,作为最终的像素值 g1[i][j] = (sum / 9).astype(int) r1 = np.zeros(r.shape, np.uint8) # 遍历图像,进行均值滤波 for i in range(r.shape[0]): for j in range(r.shape[1]): # 滤波器内像素值的和 sum = 0 k = 1 # 遍历滤波器内的像素值 if i < k or i > (g.shape[0] - k - 1) or j < k or j > (r.shape[1] - k - 1): r1[i][j] = r[i][j] else: for m in range(-1, 2): for n in range(-1, 2): # 防止越界 if 0 <= (i + m) < r.shape[0] and 0 <= (j + n) < r.shape[1]: # 像素值求和 sum += r[i + m][j + n] # 求均值,作为最终的像素值 r1[i][j] = (sum / 9).astype(int) output = cv.merge([b1, g1, r1]) cv.imwrite(save_path, output) return output # 几何均值滤波器 def geometryFilter(image): # img = image # output = np.zeros(img.shape, np.uint8) # # 遍历图像,进行均值滤波 # for i in range(img.shape[0]): # for j in range(img.shape[1]): # # 计算均值,完成对图片src的几何均值滤波 # ji = 1.0 # # 遍历滤波器内的像素值 # for n in range(-1, 2): # if 0 <= j + n < img.shape[1]: # ji *= img[i][j + n] # # # 滤波器的大小为1*3 # output[i][j] = (pow(ji, 1 / 3)).astype(int) # cv.imwrite(save_path, output) # return output img = image b = img[:, :, 0] g = img[:, :, 1] r = img[:, :, 2] b1 = np.zeros(b.shape, np.uint8) # 遍历图像,进行均值滤波 for i in range(b.shape[0]): for j in range(b.shape[1]): # 计算均值,完成对图片src的几何均值滤波 ji = 1.0 # 遍历滤波器内的像素值 for m in range(-1, 2): for n in range(-1, 2): if 0 <= j + n < b.shape[1] and 0 <= i+m < b.shape[0]: ji *= b[i+m][j + n] # 滤波器的大小为1*3 b1[i][j] = (pow(ji, 1 / 9)).astype(int) g1 = np.zeros(g.shape, np.uint8) # 遍历图像,进行均值滤波 for i in range(g.shape[0]): for j in range(g.shape[1]): # 计算均值,完成对图片src的几何均值滤波 ji = 1.0 # 遍历滤波器内的像素值 for m in range(-1, 2): for n in range(-1, 2): if 0 <= j + n < g.shape[1] and 0 <= i+m < g.shape[0]: ji *= g[i+m][j + n] # 滤波器的大小为1*3 g1[i][j] = (pow(ji, 1 / 9)).astype(int) r1 = np.zeros(r.shape, np.uint8) # 遍历图像,进行均值滤波 for i in range(r.shape[0]): for j in range(r.shape[1]): # 计算均值,完成对图片src的几何均值滤波 ji = 1.0 # 遍历滤波器内的像素值 for m in range(-1, 2): for n in range(-1, 2): if 0 <= j + n < r.shape[1] and 0 <= i+m < r.shape[0]: ji *= r[i+m][j + n] # 滤波器的大小为1*3 r1[i][j] = (pow(ji, 1 / 9)).astype(int) output = cv.merge([b1, g1, r1]) cv.imwrite(save_path, output) return output # 谐波均值滤波器 def harmonyFilter(image): # img = cv.cvtColor(image, cv.COLOR_BGR2GRAY) # # 待输出的图片 # output = np.zeros(img.shape, np.uint8) # # 遍历图像,进行均值滤波 # for i in range(img.shape[0]): # for j in range(img.shape[1]): # # 计算均值,完成对图片src的几何均值滤波 # sum = 0 # # 遍历滤波器内的像素值 # for m in range(-1, 2): # for n in range(-1, 2): # # 防止越界 # if 0 <= i + m < img.shape[0] and 0 <= j + n < img.shape[1]: # # 像素值求和 # if img[i + m][j + n].all() != 0: # sum += (1.0/img[i + m][j + n]) # output[i][j] = sum * 9 # cv.imwrite(save_path, output) # return output img = image b = img[:, :, 0] g = img[:, :, 1] r = img[:, :, 2] b1 = np.zeros(b.shape, np.uint8) # 遍历图像,进行均值滤波 for i in range(b.shape[0]): for j in range(b.shape[1]): # 计算均值,完成对图片src的几何均值滤波 sum = 0; # 遍历滤波器内的像素值 for m in range(-1, 2): for n in range(-1, 2): if 0 <= j + n < b.shape[1] and 0 <= i + m < b.shape[0]: if b[i + m][j + n] != 0: sum += 1.0/b[i + m][j + n] # 滤波器的大小为1*3 if sum != 0: b1[i][j] = (9/sum).astype(int) g1 = np.zeros(g.shape, np.uint8) # 遍历图像,进行均值滤波 for i in range(g.shape[0]): for j in range(g.shape[1]): # 计算均值,完成对图片src的几何均值滤波 sum = 0 # 遍历滤波器内的像素值 for m in range(-1, 2): for n in range(-1, 2): if 0 <= j + n < g.shape[1] and 0 <= i + m < g.shape[0]: if g[i + m][j + n] != 0: sum += 1.0/g[i + m][j + n] # 滤波器的大小为1*3 if sum != 0: g1[i][j] = (9/sum).astype(int) r1 = np.zeros(r.shape, np.uint8) # 遍历图像,进行均值滤波 for i in range(r.shape[0]): for j in range(r.shape[1]): # 计算均值,完成对图片src的几何均值滤波 sum = 0 # 遍历滤波器内的像素值 for m in range(-1, 2): for n in range(-1, 2): if 0 <= j + n < r.shape[1] and 0 <= i + m < r.shape[0]: if r[i + m][j + n] != 0: sum += 1.0/r[i + m][j + n] # 滤波器的大小为1*3 if sum != 0: r1[i][j] = (9/sum).astype(int) output = cv.merge([b1, g1, r1]) cv.imwrite(save_path, output) return output # 最大值排序滤波器 def maxSortFilter(image): img = image b = img[:, :, 0] g = img[:, :, 1] r = img[:, :, 2] b1 = np.zeros(b.shape, np.uint8) array = [] for i in range(b.shape[0]): for j in range(b.shape[1]): # 最大值滤波器 array.clear() for m in range(-1, 2): for n in range(-1, 2): if 0 <= i + m < b.shape[0] and 0 <= j + n < b.shape[1]: array.append((b[i + m][j + n]).tolist()) b1[i][j] = max(array) g1 = np.zeros(g.shape, np.uint8) array = [] for i in range(g.shape[0]): for j in range(g.shape[1]): # 最大值滤波器 array.clear() for m in range(-1, 2): for n in range(-1, 2): if 0 <= i + m < g.shape[0] and 0 <= j + n < g.shape[1]: array.append((g[i + m][j + n]).tolist()) g1[i][j] = max(array) r1 = np.zeros(r.shape, np.uint8) array = [] for i in range(r.shape[0]): for j in range(r.shape[1]): # 最大值滤波器 array.clear() for m in range(-1, 2): for n in range(-1, 2): if 0 <= i + m < r.shape[0] and 0 <= j + n < r.shape[1]: array.append((r[i + m][j + n]).tolist()) r1[i][j] = max(array) output = cv.merge([b1, g1, r1]) cv.imwrite(save_path, output) return output # 中值排序滤波器 def mediumSortFilter(image): img = image b = img[:, :, 0] g = img[:, :, 1] r = img[:, :, 2] b1 = np.zeros(b.shape, np.uint8) array = [] for i in range(b.shape[0]): for j in range(b.shape[1]): # 最大值滤波器 array.clear() for m in range(-1, 2): for n in range(-1, 2): if 0 <= i + m < b.shape[0] and 0 <= j + n < b.shape[1]: array.append((b[i + m][j + n]).tolist()) b1[i][j] = np.median(array) g1 = np.zeros(g.shape, np.uint8) array = [] for i in range(g.shape[0]): for j in range(g.shape[1]): # 最大值滤波器 array.clear() for m in range(-1, 2): for n in range(-1, 2): if 0 <= i + m < g.shape[0] and 0 <= j + n < g.shape[1]: array.append((g[i + m][j + n]).tolist()) g1[i][j] = np.median(array) r1 = np.zeros(r.shape, np.uint8) array = [] for i in range(r.shape[0]): for j in range(r.shape[1]): # 最大值滤波器 array.clear() for m in range(-1, 2): for n in range(-1, 2): if 0 <= i + m < r.shape[0] and 0 <= j + n < r.shape[1]: array.append((r[i + m][j + n]).tolist()) r1[i][j] = np.median(array) output = cv.merge([b1, g1, r1]) cv.imwrite(save_path, output) return output # 最小值排序滤波器 def minSortFilter(image): img = image b = img[:, :, 0] g = img[:, :, 1] r = img[:, :, 2] b1 = np.zeros(b.shape, np.uint8) array = [] for i in range(b.shape[0]): for j in range(b.shape[1]): # 最大值滤波器 array.clear() for m in range(-1, 2): for n in range(-1, 2): if 0 <= i + m < b.shape[0] and 0 <= j + n < b.shape[1]: array.append((b[i + m][j + n]).tolist()) b1[i][j] = min(array) g1 = np.zeros(g.shape, np.uint8) array = [] for i in range(g.shape[0]): for j in range(g.shape[1]): # 最大值滤波器 array.clear() for m in range(-1, 2): for n in range(-1, 2): if 0 <= i + m < g.shape[0] and 0 <= j + n < g.shape[1]: array.append((g[i + m][j + n]).tolist()) g1[i][j] = min(array) r1 = np.zeros(r.shape, np.uint8) array = [] for i in range(r.shape[0]): for j in range(r.shape[1]): # 最大值滤波器 array.clear() for m in range(-1, 2): for n in range(-1, 2): if 0 <= i + m < r.shape[0] and 0 <= j + n < r.shape[1]: array.append((r[i + m][j + n]).tolist()) r1[i][j] = min(array) output = cv.merge([b1, g1, r1]) cv.imwrite(save_path, output) return output # 高通滤波器 def HighPassFilter(image): img = image b = img[:, :, 0] g = img[:, :, 1] r = img[:, :, 2] min = 150 b1 = np.zeros(b.shape, np.uint8) for i in range(b.shape[0]): for j in range(b.shape[1]): if b[i][j] > min: b1[i][j] = b[i][j] else: b1[i][j] = 0 g1 = np.zeros(g.shape, np.uint8) for i in range(g.shape[0]): for j in range(g.shape[1]): if g[i][j] > min: g1[i][j] = g[i][j] else: g1[i][j] = 0 r1 = np.zeros(r.shape, np.uint8) for i in range(r.shape[0]): for j in range(r.shape[1]): if r[i][j] > min: r1[i][j] = r[i][j] else: r1[i][j] = 0 output = cv.merge([b1, g1, r1]) cv.imwrite(save_path, output) return output # 低通滤波器 def LowPassFilter(image): img = image b = img[:, :, 0] g = img[:, :, 1] r = img[:, :, 2] max = 100 b1 = np.zeros(b.shape, np.uint8) for i in range(b.shape[0]): for j in range(b.shape[1]): if b[i][j] < max: b1[i][j] = b[i][j] else: b1[i][j] = 255 g1 = np.zeros(g.shape, np.uint8) for i in range(g.shape[0]): for j in range(g.shape[1]): if g[i][j] < max: g1[i][j] = g[i][j] else: g1[i][j] = 255 r1 = np.zeros(r.shape, np.uint8) for i in range(r.shape[0]): for j in range(r.shape[1]): if r[i][j] < max: r1[i][j] = r[i][j] else: r1[i][j] = 255 output = cv.merge([b1, g1, r1]) cv.imwrite(save_path, output) return output # 带通滤波器 def BrandPassFilter(image): img = image b = img[:, :, 0] g = img[:, :, 1] r = img[:, :, 2] min = 100 max = 200 b1 = np.zeros(b.shape, np.uint8) for i in range(b.shape[0]): for j in range(b.shape[1]): if min < b[i][j] < max: b1[i][j] = b[i][j] else: b1[i][j] = 0 g1 = np.zeros(g.shape, np.uint8) for i in range(g.shape[0]): for j in range(g.shape[1]): if min < b[i][j] < max: g1[i][j] = g[i][j] else: g1[i][j] = 0 r1 = np.zeros(r.shape, np.uint8) for i in range(r.shape[0]): for j in range(r.shape[1]): if min < b[i][j] < max: r1[i][j] = r[i][j] else: r1[i][j] = 0 output = cv.merge([b1, g1, r1]) cv.imwrite(save_path, output) return output #图像动作驱动 if sys.version_info[0] < 3: raise Exception("You must use Python 3 or higher. Recommended version is Python 3.7") def load_checkpoints(config_path, checkpoint_path, cpu=False): with open(config_path) as f: config = yaml.load(f) generator = OcclusionAwareGenerator(**config['model_params']['generator_params'], **config['model_params']['common_params']) if not cpu: generator.cuda() kp_detector = KPDetector(**config['model_params']['kp_detector_params'], **config['model_params']['common_params']) if not cpu: kp_detector.cuda() if cpu: checkpoint = torch.load(checkpoint_path, map_location=torch.device('cpu')) else: checkpoint = torch.load(checkpoint_path) generator.load_state_dict(checkpoint['generator']) kp_detector.load_state_dict(checkpoint['kp_detector']) if not cpu: generator = DataParallelWithCallback(generator) kp_detector = DataParallelWithCallback(kp_detector) generator.eval() kp_detector.eval() return generator, kp_detector def make_animation(source_image, driving_video, generator, kp_detector, relative=True, adapt_movement_scale=True, cpu=False): with torch.no_grad(): predictions = [] source = torch.tensor(source_image[np.newaxis].astype(np.float32)).permute(0, 3, 1, 2) if not cpu: source = source.cuda() driving = torch.tensor(np.array(driving_video)[np.newaxis].astype(np.float32)).permute(0, 4, 1, 2, 3) kp_source = kp_detector(source) kp_driving_initial = kp_detector(driving[:, :, 0]) for frame_idx in tqdm(range(driving.shape[2])): driving_frame = driving[:, :, frame_idx] if not cpu: driving_frame = driving_frame.cuda() kp_driving = kp_detector(driving_frame) kp_norm = normalize_kp(kp_source=kp_source, kp_driving=kp_driving, kp_driving_initial=kp_driving_initial, use_relative_movement=relative, use_relative_jacobian=relative, adapt_movement_scale=adapt_movement_scale) out = generator(source, kp_source=kp_source, kp_driving=kp_norm) predictions.append(np.transpose(out['prediction'].data.cpu().numpy(), [0, 2, 3, 1])[0]) return predictions def find_best_frame(source, driving, cpu=False): import face_alignment def normalize_kp(kp): kp = kp - kp.mean(axis=0, keepdims=True) area = ConvexHull(kp[:, :2]).volume area = np.sqrt(area) kp[:, :2] = kp[:, :2] / area return kp fa = face_alignment.FaceAlignment(face_alignment.LandmarksType._2D, flip_input=True, device='cpu' if cpu else 'cuda') kp_source = fa.get_landmarks(255 * source)[0] kp_source = normalize_kp(kp_source) norm = float('inf') frame_num = 0 for i, image in tqdm(enumerate(driving)): kp_driving = fa.get_landmarks(255 * image)[0] kp_driving = normalize_kp(kp_driving) new_norm = (np.abs(kp_source - kp_driving) ** 2).sum() if new_norm < norm: norm = new_norm frame_num = i return frame_num def actiondrive(): parser = ArgumentParser() parser.add_argument("--config", default='config/vox-adv-256.yaml', help="path to config") parser.add_argument("--checkpoint", default='vox-cpk.pth.tar', help="path to checkpoint to restore") parser.add_argument("--source_image", default='sup-mat/5.png', help="path to source image") parser.add_argument("--driving_video", default='sup-mat/source.mp4', help="path to driving video") parser.add_argument("--result_video", default='05.mp4', help="path to output") parser.add_argument("--relative", dest="relative", action="store_true", help="use relative or absolute keypoint coordinates") parser.add_argument("--adapt_scale", dest="adapt_scale", action="store_true", help="adapt movement scale based on convex hull of keypoints") parser.add_argument("--find_best_frame", dest="find_best_frame", action="store_true", help="Generate from the frame that is the most alligned with source. (Only for faces, requires face_aligment lib)") parser.add_argument("--best_frame", dest="best_frame", type=int, default=None, help="Set frame to start from.") parser.add_argument("--cpu", dest="cpu", action="store_true", help="cpu mode.") parser.set_defaults(relative=False) parser.set_defaults(adapt_scale=False) opt = parser.parse_args() source_image = imageio.imread(opt.source_image) reader = imageio.get_reader(opt.driving_video) fps = reader.get_meta_data()['fps'] driving_video = [] try: for im in reader: driving_video.append(im) except RuntimeError: pass reader.close() source_image = resize(source_image, (256, 256))[..., :3] driving_video = [resize(frame, (256, 256))[..., :3] for frame in driving_video] generator, kp_detector = load_checkpoints(config_path=opt.config, checkpoint_path=opt.checkpoint, cpu=True) if opt.find_best_frame or opt.best_frame is not None: i = opt.best_frame if opt.best_frame is not None else find_best_frame(source_image, driving_video, cpu=opt.cpu) print("Best frame: " + str(i)) driving_forward = driving_video[i:] driving_backward = driving_video[:(i + 1)][::-1] predictions_forward = make_animation(source_image, driving_forward, generator, kp_detector, relative=opt.relative, adapt_movement_scale=opt.adapt_scale, cpu=opt.cpu) predictions_backward = make_animation(source_image, driving_backward, generator, kp_detector, relative=opt.relative, adapt_movement_scale=opt.adapt_scale, cpu=opt.cpu) predictions = predictions_backward[::-1] + predictions_forward[1:] else: predictions = make_animation(source_image, driving_video, generator, kp_detector, relative=opt.relative, adapt_movement_scale=opt.adapt_scale, cpu=True) imageio.mimsave(opt.result_video, [img_as_ubyte(frame) for frame in predictions], fps=fps) if __name__ == "__main__": cv.waitKey(0) # 等待用户操作,里面等待参数是毫秒,我们填写0,代表是永远,等待用户操作 cv.destroyAllWindows() # 销毁所有窗口