diff --git a/cxy_opt/3.c b/cxy_opt/3.c new file mode 100644 index 0000000..4f546e7 --- /dev/null +++ b/cxy_opt/3.c @@ -0,0 +1,61 @@ +#include +#include +#include +#include +#include +#include "render.h" +#define IDX(n) ((n) % 3) + +void applySeparableGaussianBlur(float src[][MAT_SIZE], float dst[][MAT_SIZE], + float kx[], float ky[]); +int main() { + float inputImage[MAT_SIZE][MAT_SIZE]; + Render(inputImage); + float kernel[3][3] = { + {1.0f/16, 2.0f/16, 1.0f/16}, + {2.0f/16, 4.0f/16, 2.0f/16}, + {1.0f/16, 2.0f/16, 1.0f/16} + }; + float kx[4] = {1.0f/4, 1.0f/2, 1.0f/4, 0.0f}; // 防止越界多定义一个 + float ky[4] = {1.0f/4, 1.0f/2, 1.0f/4, 0.0f}; + float outputImage[MAT_SIZE][MAT_SIZE] = {0}; + clock_t start = clock(); + applySeparableGaussianBlur(inputImage, outputImage, kx, ky); + clock_t end = clock(); + printf("Time: %lf s\n", (double)(end-start) / CLOCKS_PER_SEC); + Print(outputImage); +} + +void applySeparableGaussianBlur(float src[][MAT_SIZE], float dst[][MAT_SIZE], + float kx[], float ky[]) { + int i, j; + float buf[3][MAT_SIZE]; + float32x4_t kx_vec = vld1q_f32(kx); + float32x4_t ky_vec = vld1q_f32(ky); + // 计算前两行的行内卷积 + float32x4_t* left, *mid, *right, *result; + for(i=0; i<2; i++) + for(j=1; j