You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

68 lines
2.0 KiB

#include <stdio.h>
#include <time.h>
#include <stdlib.h>
#include <iostream>
#include <ostream>
#include <arm_neon.h>
void sparse_matmul_coo(float **A_denseMatrix,float **B_denseMatrix,float **C_denseMatrix,int MAX){
for(int i=0;i<MAX;++i){
for(int j=0;j<MAX;++j){
float32x4_t vecC_denseMatrix=vdupq_n_f32(0.0);
for(int k=0;k<MAX;k+=4){
float32x4_t vecA_denseMatrix=vld1q_f32(&A_denseMatrix[i][k]);
float32x4_t vecB_denseMatrix=vld1q_f32(&B_denseMatrix[i][k]);
vecC_denseMatrix=vmlaq_f32(vecC_denseMatrix,vecA_denseMatrix,vecB_denseMatrix);
C_denseMatrix[i][j]=vgetq_lane_f32(vecC_denseMatrix,0)+vgetq_lane_f32(vecC_denseMatrix,1)
+vgetq_lane_f32(vecC_denseMatrix,2)+vgetq_lane_f32(vecC_denseMatrix,3);
}
}
}
}
int main(){
float A_values[]={1,2,3,4,5};
int A_rowIndex[]={0,0,1,2,2};
int A_colIndex[]={0,2,1,0,2};
int A_nonZeroCount=5;
float B_values[]={6,8,7,9};
int B_rowIndex[]={0,2,1,2};
int B_colIndex[]={0,0,1,2};
int B_nonZeroCount=4;
int MAX;
if(A_nonZeroCount>B_nonZeroCount)MAX=A_nonZeroCount;
else MAX=B_nonZeroCount;
float C_values[MAX];
int C_rowIndex[MAX];
int C_colIndex[MAX];
int C_nonZeroCount=0;
int ROWS=MAX,COLS=MAX;
float **A_denseMatrix=new float*[MAX];
float **B_denseMatrix=new float*[MAX];
float **C_denseMatrix=new float*[MAX];
for(int i=0;i<MAX;++i){
A_denseMatrix[i]=new float[MAX];
B_denseMatrix[i]=new float[MAX];
C_denseMatrix[i]=new float[MAX];
}
for(int i=0;i<A_nonZeroCount;i++){
int row=A_rowIndex[i];
int col=A_colIndex[i];
A_denseMatrix[row][col]=A_values[i];
}
for(int i=0;i<B_nonZeroCount;i++){
int row=B_rowIndex[i];
int col=B_colIndex[i];
B_denseMatrix[row][col]=B_values[i];
}
clock_t start=clock();
sparse_matmul_coo(A_denseMatrix,B_denseMatrix,C_denseMatrix,MAX);
clock_t end=clock();
std::cout<<"优化的稀疏矩阵乘法时间:"<< double(end-start)/CLOCKS_PER_SEC<<""<<std::endl;
}