From 79294d3dbd7941913a38198d5c259bcd7bbb0c7c Mon Sep 17 00:00:00 2001 From: pypzov7ui <2137546866@qq.com> Date: Fri, 29 Nov 2024 11:11:03 +0800 Subject: [PATCH] ADD file via upload --- step2.c | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) create mode 100644 step2.c diff --git a/step2.c b/step2.c new file mode 100644 index 0000000..a8610d1 --- /dev/null +++ b/step2.c @@ -0,0 +1,38 @@ +#include +#include +#include +#include + +#define SIZE 1024 + +void vector_add_optimized(float* A, float* B, float* C, int size) { + int i = 0; + for (; i <= size - 4; i += 4) { + float32x4_t vecA = vld1q_f32(&A[i]); + float32x4_t vecB = vld1q_f32(&B[i]); + float32x4_t result = vaddq_f32(vecA, vecB); + vst1q_f32(&C[i], result); + } + + for (; i < size; i++) { + C[i] = A[i] + B[i]; + } +} + +int main() { + float A[SIZE], B[SIZE], C[SIZE]; + + for (int i = 0; i < SIZE; i++) { + A[i] = rand() % 100; + B[i] = rand() % 100; + } + + clock_t start = clock(); + vector_add_optimized(A, B, C, SIZE); + clock_t end = clock(); + + double time_taken = (double)(end - start) / CLOCKS_PER_SEC; + printf("NEON 优化的向量加法: %f seconds\n", time_taken); + + return 0; +}