parent
e3bbb80ea6
commit
56bf719f79
@ -0,0 +1,56 @@
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <ctime>
|
||||
#include <iostream>
|
||||
#include <ostream>
|
||||
#include <arm_neon.h>
|
||||
int main(){
|
||||
void applySeparableGaussianBlur(float src[][100],float dst[][100],int h,int w,float kx[3],float ky[3]);
|
||||
int i,j;
|
||||
float inputImage[5][100]={
|
||||
{1,2,3,4,5},
|
||||
{6,7,8,9,10},
|
||||
{11,12,13,14,15},
|
||||
{16,17,18,19,20},
|
||||
{21,22,23,24,25}
|
||||
};
|
||||
float outputImage[5][100];
|
||||
float kx[3]={0.25,0.5,0.25};
|
||||
float ky[3]={0.25,0.5,0.25};
|
||||
clock_t start=clock();
|
||||
applySeparableGaussianBlur(inputImage,outputImage,5,5,kx,ky);
|
||||
clock_t end=clock();
|
||||
std::cout<<"²½Öè3ÔËËãʱ¼ä£º"<<double(end-start)/CLOCKS_PER_SEC<<"Ãë"<<std::endl;
|
||||
for(i=1;i<5-1;i++){
|
||||
for(j=1;j<5-1;j++){
|
||||
printf("%5.2f ",outputImage[i][j]);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
}
|
||||
void applySeparableGaussianBlur(float src[][100],float dst[][100],int h,int w,float kx[3],float ky[3]){
|
||||
#define IDX(n) ((n)%3)
|
||||
float buf[3][3]={0};
|
||||
int i,j;
|
||||
float32x4_t kx_vec=vld1q_f32(kx);
|
||||
float32x4_t ky_vec=vld1q_f32(ky);
|
||||
for(i=0;i<2;i++){
|
||||
for(j=1;j<w-1;j++){
|
||||
buf[i][j]=src[i][j-1]*kx[0]+src[i][j]*kx[1]+src[i][j+1]*kx[2];
|
||||
}
|
||||
}
|
||||
for(i=1;i<h-1;i++){
|
||||
for(j=1;j<w-1;j+=4){
|
||||
float32x4_t left=vld1q_f32(&src[i+1][j-1]);
|
||||
float32x4_t mid=vld1q_f32(&src[i+1][j]);
|
||||
float32x4_t right=vld1q_f32(&src[i+1][j+1]);
|
||||
float32x4_t result=vmulq_lane_f32(left,vget_low_f32(kx_vec),0);
|
||||
result=vmlaq_lane_f32(result,mid,vget_low_f32(kx_vec),1);
|
||||
result=vmlaq_lane_f32(result,right,vget_high_f32(kx_vec),0);
|
||||
vst1q_f32(&buf[IDX(i+1)][j],result);
|
||||
}
|
||||
for(j=1;j<w-1;j++){
|
||||
dst[i][j]=buf[IDX(i-1)][j]*ky[0]+buf[IDX(i)][j]*ky[1]+buf[IDX(i+1)][j]*ky[2];
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in new issue