From 019016058a3c9c4b5c0c8ef79d39e0ef6f2e9b65 Mon Sep 17 00:00:00 2001 From: pypzov7ui <2137546866@qq.com> Date: Fri, 6 Dec 2024 22:25:32 +0800 Subject: [PATCH] ADD file via upload --- s3.c | 66 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) create mode 100644 s3.c diff --git a/s3.c b/s3.c new file mode 100644 index 0000000..aeeb16d --- /dev/null +++ b/s3.c @@ -0,0 +1,66 @@ +#include +#include +#include +#include + +void applySeparableGaussianBlur(float src[5][5], float dst[5][5], int h, int w, float kx[3], float ky[3]) { + float buf[5][5] = {0}; + + float32x4_t kx_vec = vld1q_f32(kx); + float32x4_t ky_vec = vld1q_f32(ky); + + for (int i = 1; i < h - 1; i++) { + for (int j = 1; j < w - 1; j += 4) { + float32x4_t left = vld1q_f32(&src[i][j-1]); + float32x4_t mid = vld1q_f32(&src[i][j]); + float32x4_t right = vld1q_f32(&src[i][j+1]); + + float32x4_t result = vmulq_lane_f32(left, vget_low_f32(kx_vec), 0); // kx[0] * left + result = vmlaq_lane_f32(result, mid, vget_low_f32(kx_vec), 1); // + kx[1] * mid + result = vmlaq_lane_f32(result, right, vget_high_f32(kx_vec), 0); // + kx[2] * right + + vst1q_f32(&buf[i][j], result); + } + } + + for (int i = 1; i < h - 1; i++) { + for (int j = 1; j < w - 1; j++) { + float top = buf[i - 1][j]; + float mid = buf[i][j]; + float bottom = buf[i + 1][j]; + + float result = ky[0] * top + ky[1] * mid + ky[2] * bottom; + + dst[i][j] = result; + } + } +} + +int main() { + float src[5][5] = { + {0, 0, 0, 0, 0}, + {0, 1, 2, 3, 0}, + {0, 4, 5, 6, 0}, + {0, 7, 8, 9, 0}, + {0, 0, 0, 0, 0} + }; + float dst[5][5] = {0}; + float kx[3] = {0.25, 0.5, 0.25}; + float ky[3] = {0.25, 0.5, 0.25}; + + + clock_t start = clock(); + applySeparableGaussianBlur(src, dst, 5, 5, kx, ky); + clock_t end = clock(); + + printf("输出结果:\n"); + for (int i = 0; i < 5; i++) { + for (int j = 0; j < 5; j++) { + printf("%.2f ", dst[i][j]); + } + printf("\n"); + } + printf("运行时间:%fs\n", (double)(end - start) / CLOCKS_PER_SEC); + + return 0; +}