#include #include #include #include #include #define IDX(n) ((n) % 3) void applySeparableGaussianBlur(float src[][100], float dst[][100], int h, int w, float kx[], float ky[]); void print(float a[][100], int h, int w); int main() { float inputImage[5][100] = { {1,2,3,4,5}, {6,7,8,9,10}, {11,12,13,14,15}, {16,17,18,19,20}, {21,22,23,24,25} }; float kernel[3][3] = { {1.0f/16, 2.0f/16, 1.0f/16}, {2.0f/16, 4.0f/16, 2.0f/16}, {1.0f/16, 2.0f/16, 1.0f/16} }; float kx[4] = {0.25f, 0.5f, 0.25f, 0.0f}; // 防止越界多定义一个 float ky[4] = {0.25f, 0.5f, 0.25f, 0.0f}; float outputImage[5][100] = {0}; clock_t start = clock(); applySeparableGaussianBlur(inputImage, outputImage, 5, 5, kx, ky); clock_t end = clock(); printf("Time: %lf s\n", (double)(end-start) / CLOCKS_PER_SEC); print(outputImage, 5, 5); } void applySeparableGaussianBlur(float src[][100], float dst[][100], int h, int w, float kx[], float ky[]) { int i, j; float buf[3][101] = {0}; float32x4_t kx_vec = vld1q_f32(kx); float32x4_t ky_vec = vld1q_f32(ky); // 计算前两行的行内卷积 for(i=0; i<2; i++) for(j=1; j