diff --git a/vector_addition.c b/vector_addition.c new file mode 100644 index 0000000..be8f20b --- /dev/null +++ b/vector_addition.c @@ -0,0 +1,60 @@ + +#include +#include +#include +#include + +#define SIZE 1024 + +// 基础向量加法 +void vector_add(float* A, float* B, float* C, int size) { + for (int i = 0; i < size; i++) { + C[i] = A[i] + B[i]; + } +} + +// NEON 优化向量加法 +void vector_add_optimized(float* A, float* B, float* C, int size) { + int i = 0; + for (; i <= size - 4; i += 4) { + float32x4_t vecA = vld1q_f32(&A[i]); + float32x4_t vecB = vld1q_f32(&B[i]); + float32x4_t vecC = vaddq_f32(vecA, vecB); + vst1q_f32(&C[i], vecC); + } + for (; i < size; i++) { // 处理剩余元素 + C[i] = A[i] + B[i]; + } +} + +int main() { + float* A = (float*)malloc(SIZE * sizeof(float)); + float* B = (float*)malloc(SIZE * sizeof(float)); + float* C = (float*)malloc(SIZE * sizeof(float)); + + srand(time(NULL)); + for (int i = 0; i < SIZE; i++) { + A[i] = rand() % 100; + B[i] = rand() % 100; + } + + clock_t start, end; + + // 基础向量加法 + start = clock(); + vector_add(A, B, C, SIZE); + end = clock(); + printf("基础向量加法时间: %f 秒\n", (double)(end - start) / CLOCKS_PER_SEC); + + // NEON 优化向量加法 + start = clock(); + vector_add_optimized(A, B, C, SIZE); + end = clock(); + printf("NEON 优化向量加法时间: %f 秒\n", (double)(end - start) / CLOCKS_PER_SEC); + + free(A); + free(B); + free(C); + + return 0; +}