You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
57 lines
1.6 KiB
57 lines
1.6 KiB
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <ctime>
|
|
#include <iostream>
|
|
#include <ostream>
|
|
#include <arm_neon.h>
|
|
int main(){
|
|
void applySeparableGaussianBlur(float src[][100],float dst[][100],int h,int w,float kx[3],float ky[3]);
|
|
int i,j;
|
|
float inputImage[5][100]={
|
|
{1,2,3,4,5},
|
|
{6,7,8,9,10},
|
|
{11,12,13,14,15},
|
|
{16,17,18,19,20},
|
|
{21,22,23,24,25}
|
|
};
|
|
float outputImage[5][100];
|
|
float kx[3]={0.25,0.5,0.25};
|
|
float ky[3]={0.25,0.5,0.25};
|
|
clock_t start=clock();
|
|
applySeparableGaussianBlur(inputImage,outputImage,5,5,kx,ky);
|
|
clock_t end=clock();
|
|
std::cout<<"²½Öè3ÔËËãʱ¼ä£º"<<double(end-start)/CLOCKS_PER_SEC<<"Ãë"<<std::endl;
|
|
for(i=1;i<5-1;i++){
|
|
for(j=1;j<5-1;j++){
|
|
printf("%5.2f ",outputImage[i][j]);
|
|
}
|
|
printf("\n");
|
|
}
|
|
}
|
|
void applySeparableGaussianBlur(float src[][100],float dst[][100],int h,int w,float kx[3],float ky[3]){
|
|
#define IDX(n) ((n)%3)
|
|
float buf[3][3]={0};
|
|
int i,j;
|
|
float32x4_t kx_vec=vld1q_f32(kx);
|
|
float32x4_t ky_vec=vld1q_f32(ky);
|
|
for(i=0;i<2;i++){
|
|
for(j=1;j<w-1;j++){
|
|
buf[i][j]=src[i][j-1]*kx[0]+src[i][j]*kx[1]+src[i][j+1]*kx[2];
|
|
}
|
|
}
|
|
for(i=1;i<h-1;i++){
|
|
for(j=1;j<w-1;j+=4){
|
|
float32x4_t left=vld1q_f32(&src[i+1][j-1]);
|
|
float32x4_t mid=vld1q_f32(&src[i+1][j]);
|
|
float32x4_t right=vld1q_f32(&src[i+1][j+1]);
|
|
float32x4_t result=vmulq_lane_f32(left,vget_low_f32(kx_vec),0);
|
|
result=vmlaq_lane_f32(result,mid,vget_low_f32(kx_vec),1);
|
|
result=vmlaq_lane_f32(result,right,vget_high_f32(kx_vec),0);
|
|
vst1q_f32(&buf[IDX(i+1)][j],result);
|
|
}
|
|
for(j=1;j<w-1;j++){
|
|
dst[i][j]=buf[IDX(i-1)][j]*ky[0]+buf[IDX(i)][j]*ky[1]+buf[IDX(i+1)][j]*ky[2];
|
|
}
|
|
}
|
|
}
|