You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
55 lines
1.6 KiB
55 lines
1.6 KiB
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <time.h>
|
|
#include <arm_neon.h>
|
|
|
|
#define SIZE 1024
|
|
|
|
// 基础向量加法和矩阵乘法函数定义...
|
|
|
|
// NEON优化向量加法和矩阵乘法函数定义...
|
|
|
|
void printTime(clock_t start, clock_t end, const char* description) {
|
|
printf("%s 耗时: %lf 毫秒\n", description, 1000.0 * (end - start) / CLOCKS_PER_SEC);
|
|
}
|
|
|
|
int main() {
|
|
float *A, *B, *C;
|
|
float **A_matrix, **B_matrix, **C_matrix;
|
|
|
|
// 初始化向量和矩阵...
|
|
|
|
// 基础向量加法
|
|
clock_t start_add = clock();
|
|
vector_add(A, B, C, SIZE);
|
|
clock_t end_add = clock();
|
|
printTime(start_add, end_add, "基础向量加法");
|
|
|
|
// NEON优化向量加法
|
|
clock_t start_add_optimized = clock();
|
|
vector_add_optimized(A, B, C, SIZE);
|
|
clock_t end_add_optimized = clock();
|
|
printTime(start_add_optimized, end_add_optimized, "NEON优化向量加法");
|
|
|
|
// 基础矩阵乘法
|
|
clock_t start_mul = clock();
|
|
matmul(A_matrix, B_matrix, C_matrix, SIZE);
|
|
clock_t end_mul = clock();
|
|
printTime(start_mul, end_mul, "基础矩阵乘法");
|
|
|
|
// NEON优化矩阵乘法
|
|
clock_t start_mul_optimized = clock();
|
|
matmul_optimized(A_matrix, B_matrix, C_matrix, SIZE);
|
|
clock_t end_mul_optimized = clock();
|
|
printTime(start_mul_optimized, end_mul_optimized, "NEON优化矩阵乘法");
|
|
|
|
// 分析NEON优化的加速效果
|
|
double add_speedup = (end_add - start_add) / (end_add_optimized - start_add_optimized);
|
|
double mul_speedup = (end_mul - start_mul) / (end_mul_optimized - start_mul_optimized);
|
|
printf("向量加法加速比: %lf\n", add_speedup);
|
|
printf("矩阵乘法加速比: %lf\n", mul_speedup);
|
|
|
|
// 清理内存...
|
|
return 0;
|
|
}7q
|