csyby/t7.cpp

#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <arm_neon.h>

#define SIZE 1024

// 基础向量加法和矩阵乘法函数定义...

// NEON优化向量加法和矩阵乘法函数定义...

void printTime(clock_t start, clock_t end, const char* description) {
    printf("%s 耗时: %lf 毫秒\n", description, 1000.0 * (end - start) / CLOCKS_PER_SEC);
}

int main() {
    float *A, *B, *C;
    float **A_matrix, **B_matrix, **C_matrix;

    // 初始化向量和矩阵...

    // 基础向量加法
    clock_t start_add = clock();
    vector_add(A, B, C, SIZE);
    clock_t end_add = clock();
    printTime(start_add, end_add, "基础向量加法");

    // NEON优化向量加法
    clock_t start_add_optimized = clock();
    vector_add_optimized(A, B, C, SIZE);
    clock_t end_add_optimized = clock();
    printTime(start_add_optimized, end_add_optimized, "NEON优化向量加法");

    // 基础矩阵乘法
    clock_t start_mul = clock();
    matmul(A_matrix, B_matrix, C_matrix, SIZE);
    clock_t end_mul = clock();
    printTime(start_mul, end_mul, "基础矩阵乘法");

    // NEON优化矩阵乘法
    clock_t start_mul_optimized = clock();
    matmul_optimized(A_matrix, B_matrix, C_matrix, SIZE);
    clock_t end_mul_optimized = clock();
    printTime(start_mul_optimized, end_mul_optimized, "NEON优化矩阵乘法");

    // 分析NEON优化的加速效果
    double add_speedup = (end_add - start_add) / (end_add_optimized - start_add_optimized);
    double mul_speedup = (end_mul - start_mul) / (end_mul_optimized - start_mul_optimized);
    printf("向量加法加速比: %lf\n", add_speedup);
    printf("矩阵乘法加速比: %lf\n", mul_speedup);

    // 清理内存...
    return 0;
}7q