parent
07cdb8b4bb
commit
636ed7c572
@ -0,0 +1,54 @@
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <time.h>
|
||||
#include <arm_neon.h>
|
||||
|
||||
#define SIZE 1024
|
||||
|
||||
// 基础向量加法和矩阵乘法函数定义...
|
||||
|
||||
// NEON优化向量加法和矩阵乘法函数定义...
|
||||
|
||||
void printTime(clock_t start, clock_t end, const char* description) {
|
||||
printf("%s 耗时: %lf 毫秒\n", description, 1000.0 * (end - start) / CLOCKS_PER_SEC);
|
||||
}
|
||||
|
||||
int main() {
|
||||
float *A, *B, *C;
|
||||
float **A_matrix, **B_matrix, **C_matrix;
|
||||
|
||||
// 初始化向量和矩阵...
|
||||
|
||||
// 基础向量加法
|
||||
clock_t start_add = clock();
|
||||
vector_add(A, B, C, SIZE);
|
||||
clock_t end_add = clock();
|
||||
printTime(start_add, end_add, "基础向量加法");
|
||||
|
||||
// NEON优化向量加法
|
||||
clock_t start_add_optimized = clock();
|
||||
vector_add_optimized(A, B, C, SIZE);
|
||||
clock_t end_add_optimized = clock();
|
||||
printTime(start_add_optimized, end_add_optimized, "NEON优化向量加法");
|
||||
|
||||
// 基础矩阵乘法
|
||||
clock_t start_mul = clock();
|
||||
matmul(A_matrix, B_matrix, C_matrix, SIZE);
|
||||
clock_t end_mul = clock();
|
||||
printTime(start_mul, end_mul, "基础矩阵乘法");
|
||||
|
||||
// NEON优化矩阵乘法
|
||||
clock_t start_mul_optimized = clock();
|
||||
matmul_optimized(A_matrix, B_matrix, C_matrix, SIZE);
|
||||
clock_t end_mul_optimized = clock();
|
||||
printTime(start_mul_optimized, end_mul_optimized, "NEON优化矩阵乘法");
|
||||
|
||||
// 分析NEON优化的加速效果
|
||||
double add_speedup = (end_add - start_add) / (end_add_optimized - start_add_optimized);
|
||||
double mul_speedup = (end_mul - start_mul) / (end_mul_optimized - start_mul_optimized);
|
||||
printf("向量加法加速比: %lf\n", add_speedup);
|
||||
printf("矩阵乘法加速比: %lf\n", mul_speedup);
|
||||
|
||||
// 清理内存...
|
||||
return 0;
|
||||
}7q
|
Loading…
Reference in new issue