You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
113 lines
3.1 KiB
113 lines
3.1 KiB
#include <stdio.h>
|
|
#include <ctime>
|
|
#include <stdlib.h>
|
|
#include<arm_neon.h>
|
|
|
|
#define H 5
|
|
#define W 5
|
|
#define IDX(n) ((n)%3)
|
|
void applySeparableGaussianBlur(float src[H][W], float dst[H][W], int h, int w, float kx[3], float ky[3])
|
|
{
|
|
float buf[H][W-1]={0};
|
|
float32x4_t kx_vec=vld1q_f32(kx);
|
|
float32x4_t ky_vec=vld1q_f32(ky);
|
|
|
|
for(int k = 0; k< 2; ++k)
|
|
{
|
|
for(int j=1;j<w-1;j+=4)
|
|
{
|
|
float32x4_t left =vld1q_f32(&src[k][j-1]);
|
|
float32x4_t mid =vld1q_f32(&src[k][j]);
|
|
float32x4_t right =vld1q_f32(&src[k][j+1]);
|
|
|
|
float32x4_t result =vmulq_lane_f32(left,vget_low_f32(kx_vec),0);
|
|
result=vmlaq_lane_f32(result,mid,vget_low_f32(kx_vec),1);
|
|
result=vmlaq_lane_f32(result,right,vget_high_f32(kx_vec),0);
|
|
|
|
vst1q_f32(&buf[k][j],result);
|
|
}
|
|
}
|
|
//开始进行可分离卷积
|
|
for (int i = 1; i < h - 1; ++i)
|
|
{
|
|
//进行行间的卷积得到最终像素值
|
|
for (int j = 1; j < w - 1; j+=4)
|
|
{
|
|
float32x4_t left =vld1q_f32(&src[i+1][j-1]);
|
|
float32x4_t mid =vld1q_f32(&src[i+1][j]);
|
|
float32x4_t right =vld1q_f32(&src[i+1][j+1]);
|
|
|
|
float32x4_t result =vmulq_lane_f32(left,vget_low_f32(kx_vec),0);
|
|
result=vmlaq_lane_f32(result,mid,vget_low_f32(kx_vec),1);
|
|
result=vmlaq_lane_f32(result,right,vget_high_f32(kx_vec),0);
|
|
|
|
vst1q_f32(&buf[IDX(i + 1)][j],result);
|
|
|
|
|
|
float32x4_t left1 =vld1q_f32(&buf[IDX(i - 1)][j]);
|
|
float32x4_t mid1 =vld1q_f32(&buf[IDX(i )][j]);
|
|
float32x4_t right1 =vld1q_f32(&buf[IDX(i + 1)][j]);
|
|
|
|
float32x4_t result1 =vmulq_lane_f32(left1,vget_low_f32(kx_vec),0);
|
|
result1=vmlaq_lane_f32(result1,mid1,vget_low_f32(kx_vec),1);
|
|
result1=vmlaq_lane_f32(result1,right1,vget_high_f32(kx_vec),0);
|
|
|
|
vst1q_f32(&dst[i][j],result1);
|
|
}
|
|
}
|
|
}
|
|
int main()
|
|
{
|
|
float inputImage[H][W]=
|
|
{
|
|
{1,2,3,4,5},
|
|
{6,7,8,9,10},
|
|
{11,12,13,14,15},
|
|
{16,17,18,19,20},
|
|
{21,22,23,24,25}
|
|
};
|
|
|
|
float kx[3]={0.25,0.5,0.25};
|
|
float ky[3]={0.25,0.5,0.25};
|
|
|
|
float dst[H][W]=
|
|
{
|
|
{1,2,3,4,5},
|
|
{6,7,8,9,10},
|
|
{11,12,13,14,15},
|
|
{16,17,18,19,20},
|
|
{21,22,23,24,25}
|
|
};
|
|
|
|
float a[H][W]=
|
|
{
|
|
{1,2,3,4,5},
|
|
{6,7,8,9,10},
|
|
{11,12,13,14,15},
|
|
{16,17,18,19,20},
|
|
{21,22,23,24,25}
|
|
};
|
|
clock_t start = clock();
|
|
applySeparableGaussianBlur(inputImage, dst, H, W, kx, ky);
|
|
clock_t end = clock();
|
|
double time_spent = double(end - start) / CLOCKS_PER_SEC;
|
|
printf("运行时间:%lf秒\ndst矩阵结果为\n",time_spent);
|
|
for(int i=0;i<H;i++)
|
|
{
|
|
for(int j=0;j<W;j++)
|
|
{
|
|
if(i==0||i==H-1||j==0||j==W-1)
|
|
{
|
|
printf("%.1f ",a[i][j]);
|
|
}else
|
|
{
|
|
printf("%.1f ",dst[i][j]);
|
|
|
|
}
|
|
}
|
|
printf("\n");
|
|
}
|
|
|
|
}
|
|
|