You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1357 lines
40 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

# coding:utf-8
import random
import cv2 as cv
import numpy as np
# from copy import deepcopy
from PIL import Image, ImageDraw, ImageFont
# from pylab import *
# import imutils
# import matplotlib.pyplot as graph
from numpy import fft
import math
import matplotlib
matplotlib.use('Agg')
import os, sys
import yaml
from argparse import ArgumentParser
from tqdm import tqdm
import imageio
import numpy as np
from skimage.transform import resize
from skimage import img_as_ubyte
import torch
from sync_batchnorm import DataParallelWithCallback
from modules.generator import OcclusionAwareGenerator
from modules.keypoint_detector import KPDetector
from animate import normalize_kp
from scipy.spatial import ConvexHull
save_path = "resources/output/change.png"
# 图片写入文字
def Drawworld(img, text, p_x, p_y, font_type, font_size, bold, color):
pos = (p_x, p_y)
out = cv.putText(img, text, pos, font_type, font_size, color, bold)
cv.imwrite(save_path, img)
return out
# 空间转换
def color_space(img, c_type):
# HSV色彩空间
if c_type == 1:
out = cv.cvtColor(img, cv.COLOR_BGR2HSV)
# 灰度处理
elif c_type == 2:
out = cv.cvtColor(img, cv.COLOR_BGR2GRAY)
elif c_type == 3:
out = cv.cvtColor(img, cv.COLOR_BGR2BGRA)
elif c_type == 4:
out = cv.cvtColor(img, cv.COLOR_BGR2HLS)
elif c_type == 5:
out = cv.cvtColor(img, cv.COLOR_BGR2YUV)
cv.imwrite(save_path, out)
return out
# FFT变换傅里叶
def FFT(img):
img = img[:, :, 0] # 灰度
f = np.fft.fft2(img)
fshift = np.fft.fftshift(f)
out = 20 * np.log(np.abs(fshift))
cv.imwrite(save_path, out)
# DCT变换离散余弦
def DCT(img):
img = img[:, :, 0]
img = img.astype(np.float32)
out = cv.dct(img)
cv.imwrite(save_path, out)
# 仿射变换
def affinetransform(img):
height, width = img.shape[:2] # 405x413
# 在原图像和目标图像上各选择三个点
matSrc = np.float32([[0, 0], [0, height - 1], [width - 1, 0]])
matDst = np.float32([[0, 0], [30, height - 30], [width - 30, 30]])
# 得到变换矩阵
matAffine = cv.getAffineTransform(matSrc, matDst)
# 进行仿射变换
out = cv.warpAffine(img, matAffine, (width, height))
cv.imwrite(save_path, out)
return out
# 直方图线性拉伸
def Linear_hist(img):
def linlamda(img): # y = ax+b
# 计算原图中出现的最小灰度级和最大灰度级
# 使用函数计算
Imin, Imax = cv.minMaxLoc(img)[:2]
# 使用numpy计算
# Imax = np.max(img)
# Imin = np.min(img)
Omin, Omax = 0, 255
# 计算a和b的值
a = float(Omax - Omin) / (Imax - Imin)
b = Omin - a * Imin
out = a * img + b
out = out.astype(np.uint8)
return out
if (len(img.shape) == 2): # 判断是否为灰度图像
out = linlamda(img)
else: # 不是灰度图像,则分别对三个通道进行线性变换
b = img[:, :, 0]
g = img[:, :, 1]
r = img[:, :, 2]
b1 = linlamda(b)
g1 = linlamda(g)
r1 = linlamda(r)
out = cv.merge([b1, g1, r1]) # 三个通道合并
cv.imwrite(save_path, out)
return out
# 非线性拉伸
def Ninear_hist(img):
if (len(img.shape) == 2): # 灰度图像则进行gammab变换
gammab = img
rows = img.shape[0]
cols = img.shape[1]
for i in range(rows):
for j in range(cols):
gammab[i][j] = 3 * pow(gammab[i][j], 0.8)
out = gammab
else: # 同理不为灰度图像对三个通道进行gammab函数变换
b = img[:, :, 0]
g = img[:, :, 1]
r = img[:, :, 2]
gammab = b
rows = img.shape[0]
cols = img.shape[1]
for i in range(rows):
for j in range(cols):
gammab[i][j] = 3 * pow(gammab[i][j], 0.8)
gammag = g
rows = img.shape[0]
cols = img.shape[1]
for i in range(rows):
for j in range(cols):
gammag[i][j] = 3 * pow(gammag[i][j], 0.8)
gammar = r
rows = img.shape[0]
cols = img.shape[1]
for i in range(rows):
for j in range(cols):
gammar[i][j] = 3 * pow(gammar[i][j], 0.8)
b = gammab
g = gammag
r = gammar
out = cv.merge([b, g, r])
cv.imwrite(save_path, out)
return out
# 自适应均衡
def adaptive_equalization(img):
clahe = cv.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8)) # 自适应均衡
if (len(img.shape) == 2):
out = clahe.apply(img)
else:
b = img[:, :, 0]
g = img[:, :, 1]
r = img[:, :, 2]
b1 = clahe.apply(b)
g1 = clahe.apply(g)
r1 = clahe.apply(r)
out = cv.merge([b1, g1, r1])
cv.imwrite(save_path, out)
return out
# 全局均衡
def global_equalization(img):
if (len(img.shape) == 2):
out = cv.equalizeHist(img)
else:
b = img[:, :, 0]
g = img[:, :, 1]
r = img[:, :, 2]
b1 = cv.equalizeHist(b) # 直方图均衡
g1 = cv.equalizeHist(g)
r1 = cv.equalizeHist(r)
out = cv.merge([b1, g1, r1])
cv.imwrite(save_path, out)
return out
# 均值平滑
def ave_blur(img):
if (len(img.shape) == 2):
gam = cv.blur(img, (5, 5))
out = img
rows = img.shape[0]
cols = img.shape[1]
for i in range(rows):
for j in range(cols):
out[i][j] = gam[i][j]
else:
b = img[:, :, 0]
g = img[:, :, 1]
r = img[:, :, 2]
gam = cv.blur(b, (5, 5))
b1 = b
rows = img.shape[0]
cols = img.shape[1]
for i in range(rows):
for j in range(cols):
b1[i][j] = gam[i][j]
gam = cv.blur(g, (5, 5))
g1 = g
rows = img.shape[0]
cols = img.shape[1]
for i in range(rows):
for j in range(cols):
g1[i][j] = gam[i][j]
gam = cv.blur(r, (5, 5))
r1 = r
rows = img.shape[0]
cols = img.shape[1]
for i in range(rows):
for j in range(cols):
r1[i][j] = gam[i][j]
out = cv.merge([b1, g1, r1])
cv.imwrite(save_path, out)
return out
# 高斯平滑
def gau_blur(img):
if (len(img.shape) == 2):
out = cv.GaussianBlur(img, (7, 7), 10)
else:
b = img[:, :, 0]
g = img[:, :, 1]
r = img[:, :, 2]
b1 = cv.GaussianBlur(b, (7, 7), 10)
g1 = cv.GaussianBlur(g, (7, 7), 10)
r1 = cv.GaussianBlur(r, (7, 7), 10)
out = cv.merge([b1, g1, r1])
cv.imwrite(save_path, out)
return out
# 中值平滑
def mid_blur(img):
if (len(img.shape) == 2):
out = cv.medianBlur(img, 5)
else:
b = img[:, :, 0]
g = img[:, :, 1]
r = img[:, :, 2]
b1 = cv.medianBlur(b, 5)
g1 = cv.medianBlur(g, 5)
r1 = cv.medianBlur(r, 5)
out = cv.merge([b1, g1, r1])
cv.imwrite(save_path, out)
return out
# 锐化1
# def l_sharpen1(img):
# kernel = np.array([[0,1,0], [1,-4,1], [0,1,0]])
# out = cv.filter2D(img,-1, kernel=kernel)
# cv.imwrite(save_path, out)
# return out
#
# #锐化2
# def l_sharpen2(img):
# kernel = np.array([[-1,-1,-1], [-1,8,-1], [-1,-1,-1]])
# out = cv.filter2D(img,-1, kernel=kernel)
# cv.imwrite(save_path, out)
# return out
# 锐化3
def l_sharpen3(img):
# kernel = np.array([[-1,-1,-1], [-1,9,-1], [-1,-1,-1]])
# out = cv.filter2D(img,-1, kernel=kernel)
kernel = np.array([[0, -1, 0], [-1, 5, -1], [0, -1, 0]], np.float32) # 实现锐化处理,提高图像的对比度,提高立体感,轮廓更加清晰
out = cv.filter2D(img, -1, kernel)
cv.imwrite(save_path, out)
return out
# 美颜
def makeup(image, value):
b = image[:, :, 0]
g = image[:, :, 1]
r = image[:, :, 2]
b2 = cv.medianBlur(b, 5)
g2 = cv.medianBlur(g, 5)
r2 = cv.medianBlur(r, 5)
image = cv.merge([b2, g2, r2])
# kernel = np.array([[-1,-1,-1], [-1,9,-1], [-1,-1,-1]])
# image = cv2.filter2D(image,-1, kernel=kernel)
value = int(value / 2)
out = cv.bilateralFilter(image, value, value * 2, int(value / 2))
cv.imwrite(save_path, out)
return out
# 图像旋转
def rotate(image, angle):
(h, w) = image.shape[:2]
center = (w / 2, h / 2)
Roa = cv.getRotationMatrix2D(center, angle, 1.0)
out = cv.warpAffine(image, Roa, (w, h))
cv.imwrite(save_path, out)
return out
# 图像缩放
def changescale(image, size):
out = cv.resize(image, (0, 0), fx=size, fy=size, interpolation=cv.INTER_NEAREST)
cv.imwrite(save_path, out)
return out
# 图像翻转
def changeflip(image , direction):
out = cv.flip(image, direction, dst=None)
cv.imwrite(save_path, out)
return out
# 手调节对比度
def updateContrast(image, alpha, beta):
out = np.uint8(np.clip((alpha * image + beta), 0, 255))
cv.imwrite(save_path, out)
return out
# 手动调节亮度
def updateBrightness(image, alpha, beta):
out = np.uint8(np.clip((alpha * image + beta - 150), 0, 255))
cv.imwrite(save_path, out)
return out
# 图像投影矫正
def correct(image):
pts1 = np.float32([[158, 25], [267, 136], [58, 66], [144, 212]])
# 变换后分别在左上、右上、左下、右下四个点
pts2 = np.float32([[0, 0], [320, 0], [0, 200], [320, 200]])
# 生成透视变换矩阵
M = cv.getPerspectiveTransform(pts1, pts2)
# 进行透视变换
dst = cv.warpPerspective(image, M, (320, 200))
out = dst
cv.imwrite(save_path, out)
return out
# 模糊消除 spoe.jpg文件
def removefuzzy(image):
def motion_process(image_size, motion_angle):
PSF = np.zeros(image_size)
center_position = (image_size[0] - 1) / 2
slope_tan = math.tan(motion_angle * math.pi / 180)
slope_cot = 1 / slope_tan
if slope_tan <= 1:
for i in range(15):
offset = round(i * slope_tan) # ((center_position-i)*slope_tan)
PSF[int(center_position + offset - 10), int(center_position - offset + 123)] = 2
return PSF / PSF.sum() # 对点扩散函数进行归一化亮度
else:
for i in range(15):
offset = round(i * slope_cot)
PSF[int(center_position - offset - 10), int(center_position + offset + 123)] = 2
return PSF / PSF.sum()
def wiener(input, PSF, eps, K=0.02): # 维纳滤波K=0.01
input_fft = fft.fft2(input)
PSF_fft = fft.fft2(PSF) + eps
PSF_fft_1 = np.conj(PSF_fft) / (np.abs(PSF_fft) ** 2 + K)
result = fft.ifft2(input_fft * PSF_fft_1)
result = np.abs(fft.fftshift(result))
return result
image = cv.cvtColor(image, cv.COLOR_BGR2GRAY)
img_h = image.shape[0]
img_w = image.shape[1]
PSF = motion_process((img_h, img_w), 30)
result = wiener(image, PSF, 1e-3) # 逆滤波
cv.imwrite(save_path, result)
return result
# 图像拼接
def conectImage(img1, img2):
b1 = img1[:, :, 0]
g1 = img1[:, :, 1]
r1 = img1[:, :, 2]
h, w, _ = img1.shape
b2 = img2[:, :, 0]
g2 = img2[:, :, 1]
r2 = img2[:, :, 2]
b2 = cv.resize(b2, (w, h), interpolation=cv.INTER_CUBIC)
g2 = cv.resize(g2, (w, h), interpolation=cv.INTER_CUBIC)
r2 = cv.resize(r2, (w, h), interpolation=cv.INTER_CUBIC)
img2 = cv.merge([b2, g2, r2])
out = Image.new('RGBA', (2 * w, h))
img1 = Image.fromarray(cv.cvtColor(img1, cv.COLOR_BGR2RGB))
img2 = Image.fromarray(cv.cvtColor(img2, cv.COLOR_BGR2RGB))
out.paste(img1, (0, 0))
out.paste(img2, (w, 0))
out = cv.cvtColor(np.asarray(out), cv.COLOR_RGB2BGR)
cv.imwrite(save_path, out)
return out
def back_vary(img): # 背景变换 p1.jpg
img = cv.resize(img, None, fx=1, fy=1)
rows, cols, channels = img.shape # rowscols最后一定要是前景图片的后面遍历图片需要用到
# cv2.imshow('img', img)
# 转换hsv
hsv = cv.cvtColor(img, cv.COLOR_BGR2HSV)
# cv2.imshow('hsv', hsv)
lower_blue = np.array([100, 100, 50])
upper_blue = np.array([130, 255, 255])
mask = cv.inRange(hsv, lower_blue, upper_blue)
# cv2.imshow('Mask', mask)
# 腐蚀膨胀
erode = cv.erode(mask, None, iterations=1)
# cv2.imshow('erode', erode)
dilate = cv.dilate(erode, None, iterations=1)
# cv2.imshow('dilate', dilate)
# 遍历替换
for i in range(rows):
for j in range(cols):
if dilate[i, j] == 255: # 0代表黑色的点
img[i, j] = (0, 0, 200) # 此处替换颜色为BGR通道
cv.imwrite(save_path, img)
return img
def huantou(img1, img2): # 换头 people_p1/p2 .jpg
head2 = img2[15:270, 55:270]
img1[35:290, 55:270] = head2
cv.imwrite(save_path, img1)
return img1
# 图像增强
def sharpImage(image):
# 1. 灰度模式读取图像图像名为CRH
CRH = cv.cvtColor(image, cv.COLOR_BGR2GRAY)
# 2. 计算图像梯度。首先要对读取的图像进行数据变换,因为使用了
# numpy对梯度进行数值计算所以要使用
# CRH.astype('float')进行数据格式变换。
CRH = CRH.astype('float')
row = CRH.shape[0]
column = CRH.shape[1]
gradient = np.zeros(CRH.shape)
# 根据所讲的计算公式求梯度
for x in range(row - 1):
for y in range(column - 1):
gx = abs(CRH[x + 1, y] - CRH[x, y])
gy = abs(CRH[x, y + 1] - CRH[x, y])
gradient[x, y] = gx + gy
# 3. 对图像进行增强增强后的图像变量名为sharp
sharp = CRH + gradient
sharp = np.where(sharp > 255, 255, sharp)
sharp = np.where(sharp < 0, 0, sharp)
# 数据类型变换
sharp = sharp.astype('uint8')
cv.imwrite(save_path, sharp)
return sharp
# Robert算子
def robs(image):
# 读取图像
img = image
# 1. 灰度化处理图像
grayImage = cv.cvtColor(img, cv.COLOR_BGR2GRAY)
# 2. Roberts算子
kernelx = np.array([[-1, 0], [0, 1]], dtype=int)
kernely = np.array([[0, -1], [1, 0]], dtype=int)
# 3. 卷积操作
x = cv.filter2D(grayImage, cv.CV_16S, kernelx)
y = cv.filter2D(grayImage, cv.CV_16S, kernely)
# 4. 数据格式转换
absX = cv.convertScaleAbs(x)
absY = cv.convertScaleAbs(y)
Roberts = cv.addWeighted(absX, 0.5, absY, 0.5, 0)
# 保存图像
cv.imwrite(save_path, Roberts)
return Roberts
# prewitt算子
def prewitt(image):
# 读取图像
img = image
# 1. 灰度化处理图像
grayImage = cv.cvtColor(img, cv.COLOR_BGR2GRAY)
# 2. 求Prewitt 算子
kernelx = np.array([[1, 1, 1], [0, 0, 0], [-1, -1, -1]])
kernely = np.array([[-1, 0, 1], [-1, 0, 1], [-1, 0, 1]])
prewittX = cv.filter2D(grayImage, cv.CV_16S, kernelx)
prewittY = cv.filter2D(grayImage, cv.CV_16S, kernely)
# 3. 数据格式转换
absX = cv.convertScaleAbs(prewittX)
absY = cv.convertScaleAbs(prewittY)
# 4. 组合图像
prewitt = cv.addWeighted(absX, 0.5, absY, 0.5, 0)
# 保存图像
cv.imwrite(save_path, prewitt)
return prewitt
# sobel算子
def sob(image):
# 读取图像
img = image
# 1. 灰度化处理图像
grayImage = cv.cvtColor(img, cv.COLOR_BGR2GRAY)
# 2. 求Sobel 算子
kernelx = cv.Sobel(grayImage, cv.CV_16S, 1, 0)
kernely = cv.Sobel(grayImage, cv.CV_16S, 0, 1)
# x = cv.filter2D(grayImage, cv.CV_16S, kernelx)
# y = cv.filter2D(grayImage, cv.CV_16S, kernely)
# 3. 数据格式转换
absX = cv.convertScaleAbs(kernelx)
absY = cv.convertScaleAbs(kernely)
# 4. 组合图像
Sobel = cv.addWeighted(absX, 0.5, absY, 0.5, 0)
# 保存图像
cv.imwrite(save_path, Sobel)
return Sobel
def lap(image):
# 读取图像
img = image
# 1. 灰度化处理图像
grayImage = cv.cvtColor(img, cv.COLOR_BGR2GRAY)
# 2. 高斯滤波
grayImage = cv.GaussianBlur(grayImage, (5, 5), 0)
# 3. 拉普拉斯算法
dst = cv.Laplacian(grayImage, cv.CV_16S, ksize=3)
# 4. 数据格式转换
Laplacian = cv.convertScaleAbs(dst)
# 保存图像
cv.imwrite(save_path, Laplacian)
return Laplacian
def _log(image):
# 读取图像
img = image
# 1. 灰度转换
grayImage = cv.cvtColor(img, cv.COLOR_BGR2RGB)
# 2. 边缘扩充处理图像并使用高斯滤波处理该图像
image = cv.copyMakeBorder(grayImage, 2, 2, 2, 2, borderType=cv.BORDER_REPLICATE)
image = cv.GaussianBlur(image, (3, 3), 0, 0)
# 3. 使用Numpy定义LoG算子
m1 = np.array(
[[0, 0, -1, 0, 0], [0, -1, -2, -1, 0], [-1, -2, 16, -2, -1], [0, -1, -2, -1, 0], [0, 0, -1, 0, 0]])
# 4. 卷积运算
# 为了使卷积对每个像素都进行运算,原图像的边缘像素要对准模板的中心。
# 由于图像边缘扩大了2像素因此要从位置2到行(列)-2
rows = image.shape[0]
cols = image.shape[1]
image1 = np.zeros(image.shape)
for k in range(0, 2):
for i in range(2, rows - 2):
for j in range(2, cols - 2):
image1[i, j] = np.sum((m1 * image[i - 2:i + 3, j - 2:j + 3, k]))
# 5. 数据格式转换
image1 = cv.convertScaleAbs(image1)
cv.imwrite(save_path, image1)
return image1
# Canny算子
def cny(img):
# 读取图像
src = img
# 1. 高斯滤波
blur = cv.GaussianBlur(src, (3, 3), 0)
# 2. 灰度转换
grayImage = cv.cvtColor(blur, cv.COLOR_BGR2GRAY)
# 3. 求xy方向的Sobel算子
gradx = cv.Sobel(grayImage, cv.CV_16SC1, 1, 0)
grady = cv.Sobel(grayImage, cv.CV_16SC1, 0, 1)
# 4. 使用Canny函数处理图像x,y分别是3求出来的梯度低阈值50高阈值150
edge_output = cv.Canny(gradx, grady, 50, 150)
cv.imwrite(save_path, edge_output)
return img
# 高斯噪声
def gaussNoise(image):
img = image
# 将图片的像素值归一化,存入矩阵中
img = np.array(img/255, dtype=float)
image = np.array(image / 255, dtype=float)
# 生成正态分布的噪声其中0表示均值0.1表示方差
noise = np.random.normal(0, 0.1, image.shape)
# 将噪声叠加到图片上
out = image + noise
# 将图像的归一化像素值控制在0和1之间防止噪声越界
out = np.clip(out, 0.0, 1.0)
# 将图像的像素值恢复到0到255之间
out = np.uint8(out * 255)
cv.imwrite(save_path, out)
return out
# 椒盐噪声
def saltNoise(image):
img = image
output = np.zeros(img.shape, np.uint8)
prob = 0.2
thres = 1 - prob
# 遍历图像,获取叠加噪声后的图像
for i in range(image.shape[0]):
for j in range(image.shape[1]):
rdn = random.random()
if rdn < prob:
# 添加胡椒噪声
output[i][j] = 0
elif rdn > thres:
# 添加食盐噪声
output[i][j] = 255
else:
# 不添加噪声
output[i][j] = image[i][j]
cv.imwrite(save_path, output)
return output
# 算数均值滤波器
def arithFilter(image):
img = image
b = img[:, :, 0]
g = img[:, :, 1]
r = img[:, :, 2]
# 待输出的图片
b1 = np.zeros(b.shape, np.uint8)
# 遍历图像,进行均值滤波
for i in range(b.shape[0]):
for j in range(b.shape[1]):
# 滤波器内像素值的和
sum = 0
k = 1
# 遍历滤波器内的像素值
if i < k or i > (b.shape[0] - k - 1) or j < k or j > (b.shape[1] - k - 1):
b1[i][j] = b[i][j]
else:
for m in range(-1, 2):
for n in range(-1, 2):
# 防止越界
if 0 <= (i + m) < b.shape[0] and 0 <= (j + n) < b.shape[1]:
# 像素值求和
sum += b[i + m][j + n]
# 求均值,作为最终的像素值
b1[i][j] = (sum / 9).astype(int)
g1 = np.zeros(g.shape, np.uint8)
# 遍历图像,进行均值滤波
for i in range(g.shape[0]):
for j in range(g.shape[1]):
# 滤波器内像素值的和
sum = 0
k = 1
# 遍历滤波器内的像素值
if i < k or i > (g.shape[0] - k - 1) or j < k or j > (g.shape[1] - k - 1):
g1[i][j] = g[i][j]
else:
for m in range(-1, 2):
for n in range(-1, 2):
# 防止越界
if 0 <= (i + m) < g.shape[0] and 0 <= (j + n) < g.shape[1]:
# 像素值求和
sum += g[i + m][j + n]
# 求均值,作为最终的像素值
g1[i][j] = (sum / 9).astype(int)
r1 = np.zeros(r.shape, np.uint8)
# 遍历图像,进行均值滤波
for i in range(r.shape[0]):
for j in range(r.shape[1]):
# 滤波器内像素值的和
sum = 0
k = 1
# 遍历滤波器内的像素值
if i < k or i > (g.shape[0] - k - 1) or j < k or j > (r.shape[1] - k - 1):
r1[i][j] = r[i][j]
else:
for m in range(-1, 2):
for n in range(-1, 2):
# 防止越界
if 0 <= (i + m) < r.shape[0] and 0 <= (j + n) < r.shape[1]:
# 像素值求和
sum += r[i + m][j + n]
# 求均值,作为最终的像素值
r1[i][j] = (sum / 9).astype(int)
output = cv.merge([b1, g1, r1])
cv.imwrite(save_path, output)
return output
# 几何均值滤波器
def geometryFilter(image):
# img = image
# output = np.zeros(img.shape, np.uint8)
# # 遍历图像,进行均值滤波
# for i in range(img.shape[0]):
# for j in range(img.shape[1]):
# # 计算均值,完成对图片src的几何均值滤波
# ji = 1.0
# # 遍历滤波器内的像素值
# for n in range(-1, 2):
# if 0 <= j + n < img.shape[1]:
# ji *= img[i][j + n]
#
# # 滤波器的大小为1*3
# output[i][j] = (pow(ji, 1 / 3)).astype(int)
# cv.imwrite(save_path, output)
# return output
img = image
b = img[:, :, 0]
g = img[:, :, 1]
r = img[:, :, 2]
b1 = np.zeros(b.shape, np.uint8)
# 遍历图像,进行均值滤波
for i in range(b.shape[0]):
for j in range(b.shape[1]):
# 计算均值,完成对图片src的几何均值滤波
ji = 1.0
# 遍历滤波器内的像素值
for m in range(-1, 2):
for n in range(-1, 2):
if 0 <= j + n < b.shape[1] and 0 <= i+m < b.shape[0]:
ji *= b[i+m][j + n]
# 滤波器的大小为1*3
b1[i][j] = (pow(ji, 1 / 9)).astype(int)
g1 = np.zeros(g.shape, np.uint8)
# 遍历图像,进行均值滤波
for i in range(g.shape[0]):
for j in range(g.shape[1]):
# 计算均值,完成对图片src的几何均值滤波
ji = 1.0
# 遍历滤波器内的像素值
for m in range(-1, 2):
for n in range(-1, 2):
if 0 <= j + n < g.shape[1] and 0 <= i+m < g.shape[0]:
ji *= g[i+m][j + n]
# 滤波器的大小为1*3
g1[i][j] = (pow(ji, 1 / 9)).astype(int)
r1 = np.zeros(r.shape, np.uint8)
# 遍历图像,进行均值滤波
for i in range(r.shape[0]):
for j in range(r.shape[1]):
# 计算均值,完成对图片src的几何均值滤波
ji = 1.0
# 遍历滤波器内的像素值
for m in range(-1, 2):
for n in range(-1, 2):
if 0 <= j + n < r.shape[1] and 0 <= i+m < r.shape[0]:
ji *= r[i+m][j + n]
# 滤波器的大小为1*3
r1[i][j] = (pow(ji, 1 / 9)).astype(int)
output = cv.merge([b1, g1, r1])
cv.imwrite(save_path, output)
return output
# 谐波均值滤波器
def harmonyFilter(image):
# img = cv.cvtColor(image, cv.COLOR_BGR2GRAY)
# # 待输出的图片
# output = np.zeros(img.shape, np.uint8)
# # 遍历图像,进行均值滤波
# for i in range(img.shape[0]):
# for j in range(img.shape[1]):
# # 计算均值,完成对图片src的几何均值滤波
# sum = 0
# # 遍历滤波器内的像素值
# for m in range(-1, 2):
# for n in range(-1, 2):
# # 防止越界
# if 0 <= i + m < img.shape[0] and 0 <= j + n < img.shape[1]:
# # 像素值求和
# if img[i + m][j + n].all() != 0:
# sum += (1.0/img[i + m][j + n])
# output[i][j] = sum * 9
# cv.imwrite(save_path, output)
# return output
img = image
b = img[:, :, 0]
g = img[:, :, 1]
r = img[:, :, 2]
b1 = np.zeros(b.shape, np.uint8)
# 遍历图像,进行均值滤波
for i in range(b.shape[0]):
for j in range(b.shape[1]):
# 计算均值,完成对图片src的几何均值滤波
sum = 0;
# 遍历滤波器内的像素值
for m in range(-1, 2):
for n in range(-1, 2):
if 0 <= j + n < b.shape[1] and 0 <= i + m < b.shape[0]:
if b[i + m][j + n] != 0:
sum += 1.0/b[i + m][j + n]
# 滤波器的大小为1*3
if sum != 0:
b1[i][j] = (9/sum).astype(int)
g1 = np.zeros(g.shape, np.uint8)
# 遍历图像,进行均值滤波
for i in range(g.shape[0]):
for j in range(g.shape[1]):
# 计算均值,完成对图片src的几何均值滤波
sum = 0
# 遍历滤波器内的像素值
for m in range(-1, 2):
for n in range(-1, 2):
if 0 <= j + n < g.shape[1] and 0 <= i + m < g.shape[0]:
if g[i + m][j + n] != 0:
sum += 1.0/g[i + m][j + n]
# 滤波器的大小为1*3
if sum != 0:
g1[i][j] = (9/sum).astype(int)
r1 = np.zeros(r.shape, np.uint8)
# 遍历图像,进行均值滤波
for i in range(r.shape[0]):
for j in range(r.shape[1]):
# 计算均值,完成对图片src的几何均值滤波
sum = 0
# 遍历滤波器内的像素值
for m in range(-1, 2):
for n in range(-1, 2):
if 0 <= j + n < r.shape[1] and 0 <= i + m < r.shape[0]:
if r[i + m][j + n] != 0:
sum += 1.0/r[i + m][j + n]
# 滤波器的大小为1*3
if sum != 0:
r1[i][j] = (9/sum).astype(int)
output = cv.merge([b1, g1, r1])
cv.imwrite(save_path, output)
return output
# 最大值排序滤波器
def maxSortFilter(image):
img = image
b = img[:, :, 0]
g = img[:, :, 1]
r = img[:, :, 2]
b1 = np.zeros(b.shape, np.uint8)
array = []
for i in range(b.shape[0]):
for j in range(b.shape[1]):
# 最大值滤波器
array.clear()
for m in range(-1, 2):
for n in range(-1, 2):
if 0 <= i + m < b.shape[0] and 0 <= j + n < b.shape[1]:
array.append((b[i + m][j + n]).tolist())
b1[i][j] = max(array)
g1 = np.zeros(g.shape, np.uint8)
array = []
for i in range(g.shape[0]):
for j in range(g.shape[1]):
# 最大值滤波器
array.clear()
for m in range(-1, 2):
for n in range(-1, 2):
if 0 <= i + m < g.shape[0] and 0 <= j + n < g.shape[1]:
array.append((g[i + m][j + n]).tolist())
g1[i][j] = max(array)
r1 = np.zeros(r.shape, np.uint8)
array = []
for i in range(r.shape[0]):
for j in range(r.shape[1]):
# 最大值滤波器
array.clear()
for m in range(-1, 2):
for n in range(-1, 2):
if 0 <= i + m < r.shape[0] and 0 <= j + n < r.shape[1]:
array.append((r[i + m][j + n]).tolist())
r1[i][j] = max(array)
output = cv.merge([b1, g1, r1])
cv.imwrite(save_path, output)
return output
# 中值排序滤波器
def mediumSortFilter(image):
img = image
b = img[:, :, 0]
g = img[:, :, 1]
r = img[:, :, 2]
b1 = np.zeros(b.shape, np.uint8)
array = []
for i in range(b.shape[0]):
for j in range(b.shape[1]):
# 最大值滤波器
array.clear()
for m in range(-1, 2):
for n in range(-1, 2):
if 0 <= i + m < b.shape[0] and 0 <= j + n < b.shape[1]:
array.append((b[i + m][j + n]).tolist())
b1[i][j] = np.median(array)
g1 = np.zeros(g.shape, np.uint8)
array = []
for i in range(g.shape[0]):
for j in range(g.shape[1]):
# 最大值滤波器
array.clear()
for m in range(-1, 2):
for n in range(-1, 2):
if 0 <= i + m < g.shape[0] and 0 <= j + n < g.shape[1]:
array.append((g[i + m][j + n]).tolist())
g1[i][j] = np.median(array)
r1 = np.zeros(r.shape, np.uint8)
array = []
for i in range(r.shape[0]):
for j in range(r.shape[1]):
# 最大值滤波器
array.clear()
for m in range(-1, 2):
for n in range(-1, 2):
if 0 <= i + m < r.shape[0] and 0 <= j + n < r.shape[1]:
array.append((r[i + m][j + n]).tolist())
r1[i][j] = np.median(array)
output = cv.merge([b1, g1, r1])
cv.imwrite(save_path, output)
return output
# 最小值排序滤波器
def minSortFilter(image):
img = image
b = img[:, :, 0]
g = img[:, :, 1]
r = img[:, :, 2]
b1 = np.zeros(b.shape, np.uint8)
array = []
for i in range(b.shape[0]):
for j in range(b.shape[1]):
# 最大值滤波器
array.clear()
for m in range(-1, 2):
for n in range(-1, 2):
if 0 <= i + m < b.shape[0] and 0 <= j + n < b.shape[1]:
array.append((b[i + m][j + n]).tolist())
b1[i][j] = min(array)
g1 = np.zeros(g.shape, np.uint8)
array = []
for i in range(g.shape[0]):
for j in range(g.shape[1]):
# 最大值滤波器
array.clear()
for m in range(-1, 2):
for n in range(-1, 2):
if 0 <= i + m < g.shape[0] and 0 <= j + n < g.shape[1]:
array.append((g[i + m][j + n]).tolist())
g1[i][j] = min(array)
r1 = np.zeros(r.shape, np.uint8)
array = []
for i in range(r.shape[0]):
for j in range(r.shape[1]):
# 最大值滤波器
array.clear()
for m in range(-1, 2):
for n in range(-1, 2):
if 0 <= i + m < r.shape[0] and 0 <= j + n < r.shape[1]:
array.append((r[i + m][j + n]).tolist())
r1[i][j] = min(array)
output = cv.merge([b1, g1, r1])
cv.imwrite(save_path, output)
return output
# 高通滤波器
def HighPassFilter(image):
img = image
b = img[:, :, 0]
g = img[:, :, 1]
r = img[:, :, 2]
min = 150
b1 = np.zeros(b.shape, np.uint8)
for i in range(b.shape[0]):
for j in range(b.shape[1]):
if b[i][j] > min:
b1[i][j] = b[i][j]
else:
b1[i][j] = 0
g1 = np.zeros(g.shape, np.uint8)
for i in range(g.shape[0]):
for j in range(g.shape[1]):
if g[i][j] > min:
g1[i][j] = g[i][j]
else:
g1[i][j] = 0
r1 = np.zeros(r.shape, np.uint8)
for i in range(r.shape[0]):
for j in range(r.shape[1]):
if r[i][j] > min:
r1[i][j] = r[i][j]
else:
r1[i][j] = 0
output = cv.merge([b1, g1, r1])
cv.imwrite(save_path, output)
return output
# 低通滤波器
def LowPassFilter(image):
img = image
b = img[:, :, 0]
g = img[:, :, 1]
r = img[:, :, 2]
max = 100
b1 = np.zeros(b.shape, np.uint8)
for i in range(b.shape[0]):
for j in range(b.shape[1]):
if b[i][j] < max:
b1[i][j] = b[i][j]
else:
b1[i][j] = 255
g1 = np.zeros(g.shape, np.uint8)
for i in range(g.shape[0]):
for j in range(g.shape[1]):
if g[i][j] < max:
g1[i][j] = g[i][j]
else:
g1[i][j] = 255
r1 = np.zeros(r.shape, np.uint8)
for i in range(r.shape[0]):
for j in range(r.shape[1]):
if r[i][j] < max:
r1[i][j] = r[i][j]
else:
r1[i][j] = 255
output = cv.merge([b1, g1, r1])
cv.imwrite(save_path, output)
return output
# 带通滤波器
def BrandPassFilter(image):
img = image
b = img[:, :, 0]
g = img[:, :, 1]
r = img[:, :, 2]
min = 100
max = 200
b1 = np.zeros(b.shape, np.uint8)
for i in range(b.shape[0]):
for j in range(b.shape[1]):
if min < b[i][j] < max:
b1[i][j] = b[i][j]
else:
b1[i][j] = 0
g1 = np.zeros(g.shape, np.uint8)
for i in range(g.shape[0]):
for j in range(g.shape[1]):
if min < b[i][j] < max:
g1[i][j] = g[i][j]
else:
g1[i][j] = 0
r1 = np.zeros(r.shape, np.uint8)
for i in range(r.shape[0]):
for j in range(r.shape[1]):
if min < b[i][j] < max:
r1[i][j] = r[i][j]
else:
r1[i][j] = 0
output = cv.merge([b1, g1, r1])
cv.imwrite(save_path, output)
return output
#图像动作驱动
if sys.version_info[0] < 3:
raise Exception("You must use Python 3 or higher. Recommended version is Python 3.7")
def load_checkpoints(config_path, checkpoint_path, cpu=False):
with open(config_path) as f:
config = yaml.load(f)
generator = OcclusionAwareGenerator(**config['model_params']['generator_params'],
**config['model_params']['common_params'])
if not cpu:
generator.cuda()
kp_detector = KPDetector(**config['model_params']['kp_detector_params'],
**config['model_params']['common_params'])
if not cpu:
kp_detector.cuda()
if cpu:
checkpoint = torch.load(checkpoint_path, map_location=torch.device('cpu'))
else:
checkpoint = torch.load(checkpoint_path)
generator.load_state_dict(checkpoint['generator'])
kp_detector.load_state_dict(checkpoint['kp_detector'])
if not cpu:
generator = DataParallelWithCallback(generator)
kp_detector = DataParallelWithCallback(kp_detector)
generator.eval()
kp_detector.eval()
return generator, kp_detector
def make_animation(source_image, driving_video, generator, kp_detector, relative=True, adapt_movement_scale=True,
cpu=False):
with torch.no_grad():
predictions = []
source = torch.tensor(source_image[np.newaxis].astype(np.float32)).permute(0, 3, 1, 2)
if not cpu:
source = source.cuda()
driving = torch.tensor(np.array(driving_video)[np.newaxis].astype(np.float32)).permute(0, 4, 1, 2, 3)
kp_source = kp_detector(source)
kp_driving_initial = kp_detector(driving[:, :, 0])
for frame_idx in tqdm(range(driving.shape[2])):
driving_frame = driving[:, :, frame_idx]
if not cpu:
driving_frame = driving_frame.cuda()
kp_driving = kp_detector(driving_frame)
kp_norm = normalize_kp(kp_source=kp_source, kp_driving=kp_driving,
kp_driving_initial=kp_driving_initial, use_relative_movement=relative,
use_relative_jacobian=relative, adapt_movement_scale=adapt_movement_scale)
out = generator(source, kp_source=kp_source, kp_driving=kp_norm)
predictions.append(np.transpose(out['prediction'].data.cpu().numpy(), [0, 2, 3, 1])[0])
return predictions
def find_best_frame(source, driving, cpu=False):
import face_alignment
def normalize_kp(kp):
kp = kp - kp.mean(axis=0, keepdims=True)
area = ConvexHull(kp[:, :2]).volume
area = np.sqrt(area)
kp[:, :2] = kp[:, :2] / area
return kp
fa = face_alignment.FaceAlignment(face_alignment.LandmarksType._2D, flip_input=True,
device='cpu' if cpu else 'cuda')
kp_source = fa.get_landmarks(255 * source)[0]
kp_source = normalize_kp(kp_source)
norm = float('inf')
frame_num = 0
for i, image in tqdm(enumerate(driving)):
kp_driving = fa.get_landmarks(255 * image)[0]
kp_driving = normalize_kp(kp_driving)
new_norm = (np.abs(kp_source - kp_driving) ** 2).sum()
if new_norm < norm:
norm = new_norm
frame_num = i
return frame_num
def actiondrive():
parser = ArgumentParser()
parser.add_argument("--config", default='config/vox-adv-256.yaml', help="path to config")
parser.add_argument("--checkpoint", default='vox-cpk.pth.tar', help="path to checkpoint to restore")
parser.add_argument("--source_image", default='sup-mat/5.png', help="path to source image")
parser.add_argument("--driving_video", default='sup-mat/source.mp4', help="path to driving video")
parser.add_argument("--result_video", default='05.mp4', help="path to output")
parser.add_argument("--relative", dest="relative", action="store_true",
help="use relative or absolute keypoint coordinates")
parser.add_argument("--adapt_scale", dest="adapt_scale", action="store_true",
help="adapt movement scale based on convex hull of keypoints")
parser.add_argument("--find_best_frame", dest="find_best_frame", action="store_true",
help="Generate from the frame that is the most alligned with source. (Only for faces, requires face_aligment lib)")
parser.add_argument("--best_frame", dest="best_frame", type=int, default=None,
help="Set frame to start from.")
parser.add_argument("--cpu", dest="cpu", action="store_true", help="cpu mode.")
parser.set_defaults(relative=False)
parser.set_defaults(adapt_scale=False)
opt = parser.parse_args()
source_image = imageio.imread(opt.source_image)
reader = imageio.get_reader(opt.driving_video)
fps = reader.get_meta_data()['fps']
driving_video = []
try:
for im in reader:
driving_video.append(im)
except RuntimeError:
pass
reader.close()
source_image = resize(source_image, (256, 256))[..., :3]
driving_video = [resize(frame, (256, 256))[..., :3] for frame in driving_video]
generator, kp_detector = load_checkpoints(config_path=opt.config, checkpoint_path=opt.checkpoint, cpu=True)
if opt.find_best_frame or opt.best_frame is not None:
i = opt.best_frame if opt.best_frame is not None else find_best_frame(source_image, driving_video, cpu=opt.cpu)
print("Best frame: " + str(i))
driving_forward = driving_video[i:]
driving_backward = driving_video[:(i + 1)][::-1]
predictions_forward = make_animation(source_image, driving_forward, generator, kp_detector,
relative=opt.relative, adapt_movement_scale=opt.adapt_scale, cpu=opt.cpu)
predictions_backward = make_animation(source_image, driving_backward, generator, kp_detector,
relative=opt.relative, adapt_movement_scale=opt.adapt_scale, cpu=opt.cpu)
predictions = predictions_backward[::-1] + predictions_forward[1:]
else:
predictions = make_animation(source_image, driving_video, generator, kp_detector, relative=opt.relative,
adapt_movement_scale=opt.adapt_scale, cpu=True)
imageio.mimsave(opt.result_video, [img_as_ubyte(frame) for frame in predictions], fps=fps)
if __name__ == "__main__":
cv.waitKey(0) # 等待用户操作里面等待参数是毫秒我们填写0代表是永远等待用户操作
cv.destroyAllWindows() # 销毁所有窗口