Add task3_3.cpp

main
pv3e4i5aj 8 months ago
parent 34242ab741
commit bf766b549b

@ -0,0 +1,97 @@
#include <stdio.h>
#include <ctime>
#include <stdlib.h>
#include<arm_neon.h>
#define H 5
#define W 500
float kx[3]={0.25,0.5,0.25};
float ky[3]={0.25,0.5,0.25};
float32x4_t kx_0={kx[0],kx[0],kx[0],kx[0]};
float32x4_t kx_1={kx[1],kx[1],kx[1],kx[1]};
float32x4_t kx_2={kx[2],kx[2],kx[2],kx[2]};
float32x4_t ky_0={ky[0],ky[0],ky[0],ky[0]};
float32x4_t ky_1={ky[1],ky[1],ky[1],ky[1]};
float32x4_t ky_2={ky[2],ky[2],ky[2],ky[2]};
void applySeparableGaussianBlur(float src[H][W], float dst[H][W], int h, int w, float kx[3], float ky[3])
{
float buf[H][W-1]={0};
for(int k = 0; k< h; ++k)
{
for (int j = 1; j < w - 1;j+=4)
{
float32x4_t left =vld1q_f32(&src[k][j-1]);
float32x4_t mid =vld1q_f32(&src[k][j]);
float32x4_t right =vld1q_f32(&src[k][j+1]);
float32x4_t result =vmulq_f32(left, kx_0);
result=vmlaq_f32(result,mid,kx_1);
result=vmlaq_f32(result,right,kx_2);
vst1q_f32(&buf[k][j],result);
}
}
//开始进行可分离卷积
for (int i = 2; i < h ; i++)
{
// 计算当前行的行内卷积
for (int j = 1; j < w - 1; j+=4)
{
float32x4_t left1 =vld1q_f32(&src[i][j-1]);
float32x4_t mid1 =vld1q_f32(&src[i][j]);
float32x4_t right1 =vld1q_f32(&src[i][j+1]);
float32x4_t result1 =vmulq_f32(left1, ky_0);
result1=vmlaq_f32(result1,mid1,ky_1);
result1=vmlaq_f32(result1,right1,ky_2);
vst1q_f32(&dst[i-1][j],result1);
}
}
}
int main()
{
float inputImage[H][W]={0};
float dst[H][W]={0};
float a[H][W]={0};
for(int i=0;i<H;i++)
{
for(int j=0;j<W;j++)
{
inputImage[i][j]=W*i+j+1;
dst[i][j]=W*i+j+1;
a[i][j]=W*i+j+1;
}
}
clock_t start = clock();
applySeparableGaussianBlur(inputImage, dst, H, W, kx, ky);
clock_t end = clock();
double time_spent = double(end - start) / CLOCKS_PER_SEC;
printf("运行时间:%lf秒\n",time_spent);
for(int i=0;i<H;i++)
{
for(int j=0;j<W;j++)
{
if(i==0||i==H-1||j==0||j==W-1)
{
printf("%.1f ",a[i][j]);
}else
{
printf("%.1f ",dst[i][j]);
}
}
printf("\n");
}
}
Loading…
Cancel
Save