parent
							
								
									20dbf9bdcd
								
							
						
					
					
						commit
						c2e67dd6ee
					
				| @ -0,0 +1,112 @@ | ||||
| #include <stdio.h> | ||||
| #include <ctime> | ||||
| #include <stdlib.h> | ||||
| #include<arm_neon.h> | ||||
| 
 | ||||
| #define H 5 | ||||
| #define W 5 | ||||
| #define IDX(n) ((n)%3) | ||||
| void applySeparableGaussianBlur(float src[H][W], float dst[H][W], int h, int w, float kx[3], float ky[3]) | ||||
| { | ||||
|     float buf[H][W-1]={0}; | ||||
|     float32x4_t kx_vec=vld1q_f32(kx); | ||||
|     float32x4_t ky_vec=vld1q_f32(ky); | ||||
| 
 | ||||
|         for(int k = 0; k< 2; ++k) | ||||
|         { | ||||
|             for(int j=1;j<w-1;j+=4) | ||||
|             { | ||||
|                 float32x4_t left =vld1q_f32(&src[k][j-1]); | ||||
|                 float32x4_t mid =vld1q_f32(&src[k][j]); | ||||
|                 float32x4_t right =vld1q_f32(&src[k][j+1]); | ||||
| 
 | ||||
|                 float32x4_t result =vmulq_lane_f32(left,vget_low_f32(kx_vec),0); | ||||
|                 result=vmlaq_lane_f32(result,mid,vget_low_f32(kx_vec),1); | ||||
|                 result=vmlaq_lane_f32(result,right,vget_high_f32(kx_vec),0); | ||||
| 
 | ||||
|                 vst1q_f32(&buf[k][j],result);    | ||||
|             }   | ||||
|         } | ||||
|         //开始进行可分离卷积
 | ||||
|         for (int i = 1; i < h - 1; ++i)  | ||||
|         { | ||||
|         //进行行间的卷积得到最终像素值
 | ||||
|         for (int j = 1; j < w - 1; j+=4)  | ||||
|         { | ||||
|             float32x4_t left =vld1q_f32(&src[i+1][j-1]); | ||||
|             float32x4_t mid =vld1q_f32(&src[i+1][j]); | ||||
|             float32x4_t right =vld1q_f32(&src[i+1][j+1]); | ||||
| 
 | ||||
|             float32x4_t result =vmulq_lane_f32(left,vget_low_f32(kx_vec),0); | ||||
|             result=vmlaq_lane_f32(result,mid,vget_low_f32(kx_vec),1); | ||||
|             result=vmlaq_lane_f32(result,right,vget_high_f32(kx_vec),0); | ||||
| 
 | ||||
|             vst1q_f32(&buf[IDX(i + 1)][j],result);        | ||||
| 
 | ||||
| 
 | ||||
|             float32x4_t left1 =vld1q_f32(&buf[IDX(i - 1)][j]); | ||||
|             float32x4_t mid1 =vld1q_f32(&buf[IDX(i )][j]); | ||||
|             float32x4_t right1 =vld1q_f32(&buf[IDX(i + 1)][j]); | ||||
| 
 | ||||
|             float32x4_t result1 =vmulq_lane_f32(left1,vget_low_f32(kx_vec),0); | ||||
|             result1=vmlaq_lane_f32(result1,mid1,vget_low_f32(kx_vec),1); | ||||
|             result1=vmlaq_lane_f32(result1,right1,vget_high_f32(kx_vec),0); | ||||
| 
 | ||||
|             vst1q_f32(&dst[i][j],result1);   | ||||
|         } | ||||
|         } | ||||
| } | ||||
| int main() | ||||
| { | ||||
|     float inputImage[H][W]= | ||||
|     { | ||||
|         {1,2,3,4,5}, | ||||
|         {6,7,8,9,10}, | ||||
|         {11,12,13,14,15}, | ||||
|         {16,17,18,19,20}, | ||||
|         {21,22,23,24,25} | ||||
|     }; | ||||
| 
 | ||||
|     float kx[3]={0.25,0.5,0.25}; | ||||
|     float ky[3]={0.25,0.5,0.25}; | ||||
| 
 | ||||
|      float dst[H][W]= | ||||
|     { | ||||
|         {1,2,3,4,5}, | ||||
|         {6,7,8,9,10}, | ||||
|         {11,12,13,14,15}, | ||||
|         {16,17,18,19,20}, | ||||
|         {21,22,23,24,25} | ||||
|     }; | ||||
| 
 | ||||
|      float a[H][W]= | ||||
|     { | ||||
|         {1,2,3,4,5}, | ||||
|         {6,7,8,9,10}, | ||||
|         {11,12,13,14,15}, | ||||
|         {16,17,18,19,20}, | ||||
|         {21,22,23,24,25} | ||||
|     }; | ||||
|     clock_t start = clock(); | ||||
|     applySeparableGaussianBlur(inputImage, dst, H, W, kx, ky); | ||||
|     clock_t end = clock(); | ||||
|     double time_spent = double(end - start) / CLOCKS_PER_SEC; | ||||
|     printf("运行时间:%lf秒\ndst矩阵结果为\n",time_spent); | ||||
|     for(int i=0;i<H;i++) | ||||
|     { | ||||
|         for(int j=0;j<W;j++) | ||||
|         { | ||||
|             if(i==0||i==H-1||j==0||j==W-1) | ||||
|             { | ||||
|                 printf("%.1f ",a[i][j]); | ||||
|             }else | ||||
|             { | ||||
|                 printf("%.1f ",dst[i][j]); | ||||
| 
 | ||||
|             } | ||||
|         } | ||||
|         printf("\n"); | ||||
|     } | ||||
| 
 | ||||
| } | ||||
| 
 | ||||
					Loading…
					
					
				
		Reference in new issue