parent
bf9731e1ec
commit
dacdbdfb17
@ -0,0 +1,68 @@
|
||||
#include <stdio.h>
|
||||
#include <time.h>
|
||||
#include <stdlib.h>
|
||||
#include <iostream>
|
||||
#include <ostream>
|
||||
#include <arm_neon.h>
|
||||
void sparse_matmul_coo(float **A_denseMatrix,float **B_denseMatrix,float **C_denseMatrix,int MAX){
|
||||
for(int i=0;i<MAX;++i){
|
||||
for(int j=0;j<MAX;++j){
|
||||
float32x4_t vecC_denseMatrix=vdupq_n_f32(0.0);
|
||||
for(int k=0;k<MAX;k+=4){
|
||||
float32x4_t vecA_denseMatrix=vld1q_f32(&A_denseMatrix[i][k]);
|
||||
float32x4_t vecB_denseMatrix=vld1q_f32(&B_denseMatrix[i][k]);
|
||||
vecC_denseMatrix=vmlaq_f32(vecC_denseMatrix,vecA_denseMatrix,vecB_denseMatrix);
|
||||
C_denseMatrix[i][j]=vgetq_lane_f32(vecC_denseMatrix,0)+vgetq_lane_f32(vecC_denseMatrix,1)
|
||||
+vgetq_lane_f32(vecC_denseMatrix,2)+vgetq_lane_f32(vecC_denseMatrix,3);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
int main(){
|
||||
float A_values[]={1,2,3,4,5};
|
||||
int A_rowIndex[]={0,0,1,2,2};
|
||||
int A_colIndex[]={0,2,1,0,2};
|
||||
int A_nonZeroCount=5;
|
||||
|
||||
float B_values[]={6,8,7,9};
|
||||
int B_rowIndex[]={0,2,1,2};
|
||||
int B_colIndex[]={0,0,1,2};
|
||||
int B_nonZeroCount=4;
|
||||
|
||||
int MAX;
|
||||
if(A_nonZeroCount>B_nonZeroCount)MAX=A_nonZeroCount;
|
||||
else MAX=B_nonZeroCount;
|
||||
|
||||
float C_values[MAX];
|
||||
int C_rowIndex[MAX];
|
||||
int C_colIndex[MAX];
|
||||
int C_nonZeroCount=0;
|
||||
|
||||
int ROWS=MAX,COLS=MAX;
|
||||
float **A_denseMatrix=new float*[MAX];
|
||||
float **B_denseMatrix=new float*[MAX];
|
||||
float **C_denseMatrix=new float*[MAX];
|
||||
for(int i=0;i<MAX;++i){
|
||||
A_denseMatrix[i]=new float[MAX];
|
||||
B_denseMatrix[i]=new float[MAX];
|
||||
C_denseMatrix[i]=new float[MAX];
|
||||
}
|
||||
for(int i=0;i<A_nonZeroCount;i++){
|
||||
int row=A_rowIndex[i];
|
||||
int col=A_colIndex[i];
|
||||
A_denseMatrix[row][col]=A_values[i];
|
||||
}
|
||||
for(int i=0;i<B_nonZeroCount;i++){
|
||||
int row=B_rowIndex[i];
|
||||
int col=B_colIndex[i];
|
||||
B_denseMatrix[row][col]=B_values[i];
|
||||
}
|
||||
|
||||
|
||||
|
||||
clock_t start=clock();
|
||||
sparse_matmul_coo(A_denseMatrix,B_denseMatrix,C_denseMatrix,MAX);
|
||||
|
||||
clock_t end=clock();
|
||||
std::cout<<"优化的稀疏矩阵乘法时间:"<< double(end-start)/CLOCKS_PER_SEC<<"秒"<<std::endl;
|
||||
}
|
Loading…
Reference in new issue