|
|
|
@ -0,0 +1,58 @@
|
|
|
|
|
#include <stdio.h>
|
|
|
|
|
#include <math.h>
|
|
|
|
|
#include <time.h>
|
|
|
|
|
#include <arm_neon.h>
|
|
|
|
|
|
|
|
|
|
void applySeparableGaussianBlur(float src[5][5], float dst[5][5], int h, int w, float kx[3], float ky[3]) {
|
|
|
|
|
float buf[5][5] = {0};
|
|
|
|
|
float32x4_t kx_vec = vld1q_f32(kx); // 加载 kx
|
|
|
|
|
float32x4_t ky_vec = vld1q_f32(ky); // 加载 ky
|
|
|
|
|
|
|
|
|
|
for (int i = 1; i < h - 1; i++) {
|
|
|
|
|
for (int j = 1; j < w - 1; j++) { // 修正步长为 w-1
|
|
|
|
|
float32x4_t left = vld1q_f32(src[i-1][j]);
|
|
|
|
|
float32x4_t mid = vld1q_f32(src[i][j]);
|
|
|
|
|
float32x4_t right = vld1q_f32(src[i+1][j]);
|
|
|
|
|
float32x4_t result = vmlaq_lane_f32(left, vget_low_f32(kx_vec), 0);
|
|
|
|
|
result = vmlaq_lane_f32(result, mid, vget_low_f32(kx_vec), 1);
|
|
|
|
|
result = vmlaq_lane_f32(result, right, vget_high_f32(kx_vec), 0);
|
|
|
|
|
vst1q_f32(buf[i][j], result);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for (int i = 1; i < h - 1; i++) {
|
|
|
|
|
for (int j = 1; j < w - 1; j++) { // 修正步长为 w-1
|
|
|
|
|
float top = buf[i-1][j];
|
|
|
|
|
float mid = buf[i][j];
|
|
|
|
|
float bottom = buf[i+1][j];
|
|
|
|
|
float result = ky[0] * top + ky[1] * mid + ky[2] * bottom;
|
|
|
|
|
dst[i][j] = result;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
int main() {
|
|
|
|
|
float src[5][5] = {
|
|
|
|
|
{0, 0, 0, 0, 0},
|
|
|
|
|
{0, 1, 2, 3, 0},
|
|
|
|
|
{0, 4, 5, 6, 0},
|
|
|
|
|
{0, 7, 8, 9, 0},
|
|
|
|
|
{0, 0, 0, 0, 0}
|
|
|
|
|
};
|
|
|
|
|
float dst[5][5] = {0};
|
|
|
|
|
float kx[3] = {0.25, 0.5, 0.25};
|
|
|
|
|
float ky[3] = {0.25, 0.5, 0.25};
|
|
|
|
|
|
|
|
|
|
clock_t start = clock();
|
|
|
|
|
applySeparableGaussianBlur(src, dst, 5, 5, kx, ky);
|
|
|
|
|
clock_t end = clock();
|
|
|
|
|
printf("矩阵结果:\n");
|
|
|
|
|
for (int i = 0; i < 5; i++) {
|
|
|
|
|
for (int j = 0; j < 5; j++) {
|
|
|
|
|
printf("%.2f ", dst[i][j]);
|
|
|
|
|
}
|
|
|
|
|
printf("\n");
|
|
|
|
|
}
|
|
|
|
|
printf("步骤3运行时间:%f秒\n", (double)(end - start) / CLOCKS_PER_SEC);
|
|
|
|
|
return 0;
|
|
|
|
|
}
|