You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
68 lines
2.0 KiB
68 lines
2.0 KiB
#include <stdio.h>
|
|
#include <time.h>
|
|
#include <stdlib.h>
|
|
#include <iostream>
|
|
#include <ostream>
|
|
#include <arm_neon.h>
|
|
void sparse_matmul_coo(float **A_denseMatrix,float **B_denseMatrix,float **C_denseMatrix,int MAX){
|
|
for(int i=0;i<MAX;++i){
|
|
for(int j=0;j<MAX;++j){
|
|
float32x4_t vecC_denseMatrix=vdupq_n_f32(0.0);
|
|
for(int k=0;k<MAX;k+=4){
|
|
float32x4_t vecA_denseMatrix=vld1q_f32(&A_denseMatrix[i][k]);
|
|
float32x4_t vecB_denseMatrix=vld1q_f32(&B_denseMatrix[i][k]);
|
|
vecC_denseMatrix=vmlaq_f32(vecC_denseMatrix,vecA_denseMatrix,vecB_denseMatrix);
|
|
C_denseMatrix[i][j]=vgetq_lane_f32(vecC_denseMatrix,0)+vgetq_lane_f32(vecC_denseMatrix,1)
|
|
+vgetq_lane_f32(vecC_denseMatrix,2)+vgetq_lane_f32(vecC_denseMatrix,3);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
int main(){
|
|
float A_values[]={1,2,3,4,5};
|
|
int A_rowIndex[]={0,0,1,2,2};
|
|
int A_colIndex[]={0,2,1,0,2};
|
|
int A_nonZeroCount=5;
|
|
|
|
float B_values[]={6,8,7,9};
|
|
int B_rowIndex[]={0,2,1,2};
|
|
int B_colIndex[]={0,0,1,2};
|
|
int B_nonZeroCount=4;
|
|
|
|
int MAX;
|
|
if(A_nonZeroCount>B_nonZeroCount)MAX=A_nonZeroCount;
|
|
else MAX=B_nonZeroCount;
|
|
|
|
float C_values[MAX];
|
|
int C_rowIndex[MAX];
|
|
int C_colIndex[MAX];
|
|
int C_nonZeroCount=0;
|
|
|
|
int ROWS=MAX,COLS=MAX;
|
|
float **A_denseMatrix=new float*[MAX];
|
|
float **B_denseMatrix=new float*[MAX];
|
|
float **C_denseMatrix=new float*[MAX];
|
|
for(int i=0;i<MAX;++i){
|
|
A_denseMatrix[i]=new float[MAX];
|
|
B_denseMatrix[i]=new float[MAX];
|
|
C_denseMatrix[i]=new float[MAX];
|
|
}
|
|
for(int i=0;i<A_nonZeroCount;i++){
|
|
int row=A_rowIndex[i];
|
|
int col=A_colIndex[i];
|
|
A_denseMatrix[row][col]=A_values[i];
|
|
}
|
|
for(int i=0;i<B_nonZeroCount;i++){
|
|
int row=B_rowIndex[i];
|
|
int col=B_colIndex[i];
|
|
B_denseMatrix[row][col]=B_values[i];
|
|
}
|
|
|
|
|
|
|
|
clock_t start=clock();
|
|
sparse_matmul_coo(A_denseMatrix,B_denseMatrix,C_denseMatrix,MAX);
|
|
|
|
clock_t end=clock();
|
|
std::cout<<"优化的稀疏矩阵乘法时间:"<< double(end-start)/CLOCKS_PER_SEC<<"秒"<<std::endl;
|
|
} |