ADD file via upload

1 year ago · c6d73ae4d0
parent 97a4811632
commit c6d73ae4d0
1 changed files with 76 additions and 0 deletions
--- a/6.cpp
+++ b/6.cpp
@ -0,0 +1,76 @@
+#include <stdio.h>
+#include <ctime>
+#include <arm_neon.h>
+#define SIZE 1024
+
+void sparse_matmul_optimized(float** A, float** B, float** C, int n) { 
+	for(int i=0;i<n;i++){
+        for(int j=0;j<n;j++){
+			float32x4_t vecC=vdupq_n_f32(0.0);
+            for(int k=0;k<n;k+=4){
+                float32x4_t vecA=vld1q_f32(&A[i][k]);
+                float32x4_t vecB=vld1q_f32(&B[k][j]);
+                vecC=vmlaq_f32(vecC,vecA,vecB);
+
+            }
+            C[i][j]=vgetq_lane_f32(vecC,0)+
+            		vgetq_lane_f32(vecC,1)+
+            		vgetq_lane_f32(vecC,2)+
+            		vgetq_lane_f32(vecC,3);            
+        }
+    }
+}
+
+int main(){
+	//矩阵 A 的 COO 格式
+	float A_values[]={1,2,3,4,5};
+	int A_rowIndex[]={0,0,1,2,2};
+	int A_colIndex[]={0,2,1,0,2};
+	int A_nonZeroCount=5;
+	
+	//矩阵 B 的 COO 格式
+	float B_values[]={6,8,7,9};
+	int B_rowIndex[]={0,2,1,2};
+	int B_colIndex[]={0,0,1,2};
+	int B_nonZeroCount=4;
+	
+	//分配矩阵内存
+    float** A=new float*[SIZE];
+    float** B=new float*[SIZE];
+    float** C=new float*[SIZE];
+    
+    for(int i=0;i<SIZE;i++){
+        A[i]=new float[SIZE];
+        B[i]=new float[SIZE];
+        C[i]=new float[SIZE];
+    }
+    
+	//遍历稀疏矩阵的非零元素，将它们放入常规矩阵 
+	for(int i=0;i<A_nonZeroCount;i++){
+		int row=A_rowIndex[i];
+		int col=A_colIndex[i];
+		A[row][col]=A_values[i];
+	}
+	for(int i=0;i<B_nonZeroCount;i++){
+		int row=B_rowIndex[i];
+		int col=B_colIndex[i];
+		B[row][col]=B_values[i];
+	}
+    
+    //计时并输出 
+    clock_t start=clock();
+    sparse_matmul_optimized(A,B,C,SIZE); 
+    clock_t end=clock();
+    printf("优化稀疏矩阵乘法时间：%f秒\n",(double)(end-start)/CLOCKS_PER_SEC); 
+    
+	for(int i=0;i<SIZE;i++){
+    	delete[] A[i];
+    	delete[] B[i];
+    	delete[] C[i];
+	}
+	delete[] A;
+	delete[] B;
+	delete[] C;
+	    
+    return 0;
+}