You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
32 lines
777 B
32 lines
777 B
#include <arm_neon.h>
|
|
#include <stdio.h>
|
|
#define SIZE 1024
|
|
#include<ctime>
|
|
#include<iostream>
|
|
#include <stdlib.h>
|
|
#include <time.h>
|
|
|
|
|
|
void vector_add_optimized(float* A, float* B, float* C, int size){
|
|
for(int i=0;i<size;i+=4){
|
|
float32x4_t vecA=vld1q_f32(&A[i]);
|
|
float32x4_t vecB=vld1q_f32(&B[i]);
|
|
float32x4_t vecC=vaddq_f32(vecA,vecB);
|
|
vst1q_f32(&C[i],vecC);
|
|
}
|
|
}
|
|
int main(){
|
|
float A[SIZE];
|
|
float B[SIZE];
|
|
float C[SIZE];
|
|
for(int i=0;i<SIZE;++i){
|
|
A[i]=(float)rand() / (float)RAND_MAX * 100;
|
|
B[i]=(float)rand() / (float)RAND_MAX * 100;
|
|
}
|
|
clock_t start=clock();
|
|
vector_add_optimized(A,B,C,SIZE);
|
|
clock_t end=clock();
|
|
printf("Initial vector addition time: %lf seconds\n", (double)(end - start) / CLOCKS_PER_SEC);
|
|
return 0;
|
|
}
|