ADD file via upload

10 months ago · 524134c899
parent a2ad0e1ee7
commit 524134c899
1 changed files with 50 additions and 0 deletions
--- a/t2.cpp
+++ b/t2.cpp
@ -0,0 +1,50 @@
+#include <arm_neon.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+
+#define SIZE 1024
+
+// NEON优化的向量加法函数
+void vector_add_optimized(float* A, float* B, float* C, int size) {
+    for (int i = 0; i < size; i += 4) {
+        // 加载A和B的4个连续元素到NEON寄存器中
+        float32x4_t a = vld1q_f32(A + i);
+        float32x4_t b = vld1q_f32(B + i);
+        // 执行向量加法
+        float32x4_t c = vaddq_f32(a, b);
+        // 将结果存回C
+        vst1q_f32(C + i, c);
+    }
+}
+
+int main() {
+    // 分配内存并初始化向量
+    float *A = (float*)malloc(SIZE * sizeof(float));
+    float *B = (float*)malloc(SIZE * sizeof(float));
+    float *C = (float*)malloc(SIZE * sizeof(float));
+
+    // 随机初始化向量A和B
+    srand((unsigned int)time(NULL));
+    for (int i = 0; i < SIZE; ++i) {
+        A[i] = rand() % 100;
+        B[i] = rand() % 100;
+    }
+
+    // 计时开始
+    clock_t start = clock();
+    // 执行NEON优化的向量加法
+    vector_add_optimized(A, B, C, SIZE);
+    // 计时结束
+    clock_t end = clock();
+
+    // 输出运行时间
+    printf("NEON优化向量加法耗时: %lf 毫秒\n", 1000.0 * (end - start) / CLOCKS_PER_SEC);
+
+    // 释放内存
+    free(A);
+    free(B);
+    free(C);
+
+    return 0;
+}