From 34242ab741d040aab3500e0caa2a12663a260ec8 Mon Sep 17 00:00:00 2001 From: pv3e4i5aj Date: Sat, 7 Dec 2024 02:46:35 +0800 Subject: [PATCH] Add task3_2.cpp --- task3_2.cpp | 100 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 100 insertions(+) create mode 100644 task3_2.cpp diff --git a/task3_2.cpp b/task3_2.cpp new file mode 100644 index 0000000..3862ac5 --- /dev/null +++ b/task3_2.cpp @@ -0,0 +1,100 @@ +#include +#include +#include +#include + +#define H 5 +#define W 500 +void advanced_applySeparableGaussianBlur(float src[H][W], float dst[H][W], int h, int w, float kx[3], float ky[3]) +{ float buf[H][W-1]={0}; + float32x4_t kx_vec=vld1q_f32(kx); + float32x4_t ky_vec=vld1q_f32(ky); + for(int k = 0; k< 2; ++k) + { + for (int j = 1; j < w - 1; j+=4) + { + float32x4_t left =vld1q_f32(&src[k][j-1]); + float32x4_t mid =vld1q_f32(&src[k][j]); + float32x4_t right =vld1q_f32(&src[k][j+1]); + + float32x4_t result =vmulq_lane_f32(left,vget_low_f32(kx_vec),0); + result=vmlaq_lane_f32(result,mid,vget_low_f32(kx_vec),1); + result=vmlaq_lane_f32(result,right,vget_high_f32(kx_vec),0); + + vst1q_f32(&buf[k][j],result); + } + } + + + //开始进行可分离卷积 + for (int i = 1; i < h - 1; ++i) + { + //进行行间的卷积得到最终像素值 + for (int j = 1; j < w - 1; j+=4) + { + float32x4_t left =vld1q_f32(&src[i+1][j-1]); + float32x4_t mid =vld1q_f32(&src[i+1][j]); + float32x4_t right =vld1q_f32(&src[i+1][j+1]); + + float32x4_t result =vmulq_lane_f32(left,vget_low_f32(kx_vec),0); + result=vmlaq_lane_f32(result,mid,vget_low_f32(kx_vec),1); + result=vmlaq_lane_f32(result,right,vget_high_f32(kx_vec),0); + + vst1q_f32(&buf[IDX(i + 1)][j],result); + + + float32x4_t left1 =vld1q_f32(&buf[IDX(i - 1)][j]); + float32x4_t mid1 =vld1q_f32(&buf[IDX(i )][j]); + float32x4_t right1 =vld1q_f32(&buf[IDX(i + 1)][j]); + + float32x4_t result1 =vmulq_lane_f32(left1,vget_low_f32(kx_vec),0); + result1=vmlaq_lane_f32(result1,mid1,vget_low_f32(kx_vec),1); + result1=vmlaq_lane_f32(result1,right1,vget_high_f32(kx_vec),0); + + vst1q_f32(&dst[i][j],result1); + } + } +} +int main() +{ + float inputImage[H][W]={0}; + float dst[H][W]={0}; + float a[H][W]={0}; + + for(int i=0;i