diff --git a/task3_3.cpp b/task3_3.cpp new file mode 100644 index 0000000..3fb6b08 --- /dev/null +++ b/task3_3.cpp @@ -0,0 +1,97 @@ +#include +#include +#include +#include + +#define H 5 +#define W 500 + +float kx[3]={0.25,0.5,0.25}; +float ky[3]={0.25,0.5,0.25}; + +float32x4_t kx_0={kx[0],kx[0],kx[0],kx[0]}; +float32x4_t kx_1={kx[1],kx[1],kx[1],kx[1]}; +float32x4_t kx_2={kx[2],kx[2],kx[2],kx[2]}; + +float32x4_t ky_0={ky[0],ky[0],ky[0],ky[0]}; +float32x4_t ky_1={ky[1],ky[1],ky[1],ky[1]}; +float32x4_t ky_2={ky[2],ky[2],ky[2],ky[2]}; + +void applySeparableGaussianBlur(float src[H][W], float dst[H][W], int h, int w, float kx[3], float ky[3]) +{ + float buf[H][W-1]={0}; + + for(int k = 0; k< h; ++k) + { + for (int j = 1; j < w - 1;j+=4) + { + + float32x4_t left =vld1q_f32(&src[k][j-1]); + float32x4_t mid =vld1q_f32(&src[k][j]); + float32x4_t right =vld1q_f32(&src[k][j+1]); + + float32x4_t result =vmulq_f32(left, kx_0); + result=vmlaq_f32(result,mid,kx_1); + result=vmlaq_f32(result,right,kx_2); + + vst1q_f32(&buf[k][j],result); + } + } + //开始进行可分离卷积 + for (int i = 2; i < h ; i++) + { + // 计算当前行的行内卷积 + for (int j = 1; j < w - 1; j+=4) + { + + float32x4_t left1 =vld1q_f32(&src[i][j-1]); + float32x4_t mid1 =vld1q_f32(&src[i][j]); + float32x4_t right1 =vld1q_f32(&src[i][j+1]); + + float32x4_t result1 =vmulq_f32(left1, ky_0); + result1=vmlaq_f32(result1,mid1,ky_1); + result1=vmlaq_f32(result1,right1,ky_2); + + vst1q_f32(&dst[i-1][j],result1); + } + } + +} +int main() +{ + float inputImage[H][W]={0}; + float dst[H][W]={0}; + float a[H][W]={0}; + + for(int i=0;i