|
|
|
|
@ -0,0 +1,98 @@
|
|
|
|
|
#include <stdio.h>
|
|
|
|
|
#include <time.h>
|
|
|
|
|
|
|
|
|
|
// 应用可分离的3×3 GaussianBlur到给定的二维数组src上,并将结果存到二维数组dst中
|
|
|
|
|
void applySeparableGaussianBlur(float src[][5], float dst[][5], int h, int w, float kx[3], float ky[3]) {
|
|
|
|
|
float buf[3][3]; // 用于存储行内卷积的中间结果
|
|
|
|
|
|
|
|
|
|
// 宏定义,用于循环利用 buf 数组的3个行缓冲区,确保索引在0~2之间
|
|
|
|
|
#define INDEX_MOD(i) ((i) % 3)
|
|
|
|
|
|
|
|
|
|
// 先进行行方向的卷积
|
|
|
|
|
for (int i = 1; i < h - 1; i++) {
|
|
|
|
|
for (int j = 1; j < w - 1; j++) {
|
|
|
|
|
buf[INDEX_MOD(i)][j] = kx[0] * src[i - 1][j] + kx[1] * src[i][j] + kx[2] * src[i + 1][j];
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// 再进行列方向的卷积
|
|
|
|
|
for (int j = 1; j < w - 1; j++) {
|
|
|
|
|
for (int i = 1; i < h - 1; i++) {
|
|
|
|
|
dst[i][j] = ky[0] * buf[INDEX_MOD(i - 1)][j] + ky[1] * buf[INDEX_MOD(i)][j] + ky[2] * buf[INDEX_MOD(i + 1)][j];
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
#undef INDEX_MOD
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// 原始的应用3×3 GaussianBlur函数(与步骤1中的类似,方便对比)
|
|
|
|
|
void applyGaussianBlur(float src[][5], float dst[][5], int h, int w, float kernel[3][3]) {
|
|
|
|
|
for (int i = 1; i < h - 1; i++) {
|
|
|
|
|
for (int j = 1; j < w - 1; j++) {
|
|
|
|
|
float sum = 0.0;
|
|
|
|
|
for (int m = 0; m < 3; m++) {
|
|
|
|
|
for (int n = 0; n < 3; n++) {
|
|
|
|
|
sum += kernel[m][n] * src[i - 1 + m][j - 1 + n];
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
dst[i][j] = sum;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
int main() {
|
|
|
|
|
// 输入图像矩阵(这里示例一个5×5的矩阵)
|
|
|
|
|
float image[5][5] = {
|
|
|
|
|
{1, 2, 3, 4, 5},
|
|
|
|
|
{6, 7, 8, 9, 10},
|
|
|
|
|
{11, 12, 13, 14, 15},
|
|
|
|
|
{16, 17, 18, 19, 20},
|
|
|
|
|
{21, 22, 23, 24, 25}
|
|
|
|
|
};
|
|
|
|
|
// 常用的kernel_size=3的kernel
|
|
|
|
|
float kernel[3][3] = {
|
|
|
|
|
{1.0 / 16, 2.0 / 16, 1.0 / 16},
|
|
|
|
|
{2.0 / 16, 4.0 / 16, 2.0 / 16},
|
|
|
|
|
{1.0 / 16, 2.0 / 16, 1.0 / 16}
|
|
|
|
|
};
|
|
|
|
|
float result[5][5];
|
|
|
|
|
float result_optimized[5][5];
|
|
|
|
|
// 拆分kernel矩阵为行方向和列方向两个向量
|
|
|
|
|
float kx[3] = {kernel[0][0], kernel[0][1], kernel[0][2]};
|
|
|
|
|
float ky[3] = {kernel[0][0], kernel[1][0], kernel[2][0]};
|
|
|
|
|
|
|
|
|
|
// 记录原始版本开始时间
|
|
|
|
|
clock_t start_time_original = clock();
|
|
|
|
|
applyGaussianBlur(image, result, 5, 5, kernel);
|
|
|
|
|
// 记录原始版本结束时间
|
|
|
|
|
clock_t end_time_original = clock();
|
|
|
|
|
double elapsed_time_original = (double)(end_time_original - start_time_original) / CLOCKS_PER_SEC;
|
|
|
|
|
|
|
|
|
|
// 记录优化版本开始时间
|
|
|
|
|
clock_t start_time_optimized = clock();
|
|
|
|
|
applySeparableGaussianBlur(image, result_optimized, 5, 5, kx, ky);
|
|
|
|
|
// 记录优化版本结束时间
|
|
|
|
|
clock_t end_time_optimized = clock();
|
|
|
|
|
double elapsed_time_optimized = (double)(end_time_optimized - start_time_optimized) / CLOCKS_PER_SEC;
|
|
|
|
|
|
|
|
|
|
// 输出原始版本结果
|
|
|
|
|
printf("Original Gaussian Blur Result Matrix:\n");
|
|
|
|
|
for (int i = 0; i < 5; i++) {
|
|
|
|
|
for (int j = 0; j < 5; j++) {
|
|
|
|
|
printf("%.2f ", result[i][j]);
|
|
|
|
|
}
|
|
|
|
|
printf("\n");
|
|
|
|
|
}
|
|
|
|
|
printf("Original Elapsed time: %.6f seconds\n", elapsed_time_original);
|
|
|
|
|
|
|
|
|
|
// 输出优化版本结果
|
|
|
|
|
printf("Optimized Gaussian Blur Result Matrix:\n");
|
|
|
|
|
for (int i = 0; i < 5; i++) {
|
|
|
|
|
for (int j = 0; j < 5; j++) {
|
|
|
|
|
printf("%.2f ", result_optimized[i][j]);
|
|
|
|
|
}
|
|
|
|
|
printf("\n");
|
|
|
|
|
}
|
|
|
|
|
printf("Optimized Elapsed time: %.6f seconds\n", elapsed_time_optimized);
|
|
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
|
}
|