You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

55 lines
1.6 KiB

#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <arm_neon.h>
#define SIZE 1024
// 基础向量加法和矩阵乘法函数定义...
// NEON优化向量加法和矩阵乘法函数定义...
void printTime(clock_t start, clock_t end, const char* description) {
printf("%s 耗时: %lf 毫秒\n", description, 1000.0 * (end - start) / CLOCKS_PER_SEC);
}
int main() {
float *A, *B, *C;
float **A_matrix, **B_matrix, **C_matrix;
// 初始化向量和矩阵...
// 基础向量加法
clock_t start_add = clock();
vector_add(A, B, C, SIZE);
clock_t end_add = clock();
printTime(start_add, end_add, "基础向量加法");
// NEON优化向量加法
clock_t start_add_optimized = clock();
vector_add_optimized(A, B, C, SIZE);
clock_t end_add_optimized = clock();
printTime(start_add_optimized, end_add_optimized, "NEON优化向量加法");
// 基础矩阵乘法
clock_t start_mul = clock();
matmul(A_matrix, B_matrix, C_matrix, SIZE);
clock_t end_mul = clock();
printTime(start_mul, end_mul, "基础矩阵乘法");
// NEON优化矩阵乘法
clock_t start_mul_optimized = clock();
matmul_optimized(A_matrix, B_matrix, C_matrix, SIZE);
clock_t end_mul_optimized = clock();
printTime(start_mul_optimized, end_mul_optimized, "NEON优化矩阵乘法");
// 分析NEON优化的加速效果
double add_speedup = (end_add - start_add) / (end_add_optimized - start_add_optimized);
double mul_speedup = (end_mul - start_mul) / (end_mul_optimized - start_mul_optimized);
printf("向量加法加速比: %lf\n", add_speedup);
printf("矩阵乘法加速比: %lf\n", mul_speedup);
// 清理内存...
return 0;
}7q