parent
0da95cbafa
commit
f44797a958
@ -0,0 +1,34 @@
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <ctime>
|
||||
#include <arm_neon.h>
|
||||
#define SIZE 1024
|
||||
|
||||
void vector_add_optimized(float* A, float* B, float* C, int size){
|
||||
for(int i=0;i<size;i+=4){
|
||||
float32x4_t vecA=vld1q_f32(&A[i]);
|
||||
float32x4_t vecB=vld1q_f32(&B[i]);
|
||||
float32x4_t vecC=vaddq_f32(vecA,vecB);
|
||||
vst1q_f32(&C[i],vecC);
|
||||
}
|
||||
}
|
||||
|
||||
int main(){
|
||||
float A[SIZE],B[SIZE],C[SIZE];
|
||||
|
||||
//初始化
|
||||
srand(time(NULL));
|
||||
for(int i=0;i<SIZE;i++){
|
||||
A[i]=rand()%100;
|
||||
B[i]=rand()%100;
|
||||
}
|
||||
|
||||
//计时并输出
|
||||
clock_t start=clock();
|
||||
vector_add_optimized(A,B,C,SIZE);
|
||||
clock_t end=clock();
|
||||
printf("优化向量加法时间:%f秒\n",(double)(end-start)/CLOCKS_PER_SEC);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
Loading…
Reference in new issue