diff --git a/6 b/6 deleted file mode 100644 index 167b45a..0000000 --- a/6 +++ /dev/null @@ -1,129 +0,0 @@ -#include -#include -#include - -#include - -#define ROWS 1024 -#define COLS 1024 - - -void sparseToDense(float* values, int* rowIndex, int* colIndex, int nonZeroCount, float denseMatrix[ROWS][COLS]) { - - for (int i = 0; i < ROWS; i++) { - for (int j = 0; j < COLS; j++) { - denseMatrix[i][j] = 0; - } - } - - - for (int k = 0; k < nonZeroCount; k++) { - int row = rowIndex[k]; - int col = colIndex[k]; - float value = values[k]; - denseMatrix[row][col] = value; - } -} - - -void matmul_optimized(float** A, float** B, float** C, int n) { - for (int i = 0; i < n; i++) { - for (int j = 0; j < n; j++) { - float32x4_t vecC = vdupq_n_f32(0); - for (int k = 0; k < n; k += 4) { - - float32x4_t vecA = vld1q_f32(&A[i][k]); - - float32x4_t vecB = vld1q_f32(&B[k][j]); - - - vecC = vmlaq_f32(vecC, vecA, vecB); - } - - C[i][j] = vgetq_lane_f32(vecC, 0) + vgetq_lane_f32(vecC, 1) + - vgetq_lane_f32(vecC, 2) + vgetq_lane_f32(vecC, 3); - } - } -} - -int main() { - - float A_values[] = {1, 2, 3}; - int A_rowIndex[] = {0, 1, 2}; - int A_colIndex[] = {0, 1, 2}; - int A_nonZeroCount = 3; - - - float B_values[] = {4, 5, 6}; - int B_rowIndex[] = {0, 1, 2}; - int B_colIndex[] = {0, 1, 2}; - int B_nonZeroCount = 3; - - - float denseMatrixA[ROWS][COLS]; - - float denseMatrixB[ROWS][COLS]; - - float resultMatrix[ROWS][COLS]; - - - sparseToDense(A_values, A_rowIndex, A_colIndex, A_nonZeroCount, denseMatrixA); - - printf("常规矩阵A:\n"); - for (int i = 0; i < ROWS; i++) { - for (int j = 0; j < COLS; j++) { - printf("%f ", denseMatrixA[i][j]); - } - printf("\n"); - } - - - sparseToDense(B_values, B_rowIndex, B_colIndex, B_nonZeroCount, denseMatrixB); - - printf("常规矩阵B:\n"); - for (int i = 0; i < ROWS; i++) { - for (int j = 0; j < COLS; j++) { - printf("%f ", denseMatrixB[i][j]); - } - printf("\n"); - } - - - float* matrixAPtr[ROWS]; - for (int i = 0; i < ROWS; i++) { - matrixAPtr[i] = denseMatrixA[i]; - } - float* matrixBPtr[ROWS]; - for (int i = 0; i < ROWS; i++) { - matrixBPtr[i] = denseMatrixB[i]; - } - float* resultMatrixPtr[ROWS]; - for (int i = 0; i < ROWS; i++) { - resultMatrixPtr[i] = resultMatrix[i]; - } - - clock_t start_time, end_time; - - start_time = clock(); - - - matmul_optimized((float**)matrixAPtr, (float**)matrixBPtr, (float**)resultMatrixPtr, ROWS); - - - end_time = clock(); - - - double elapsed_time = ((double)(end_time - start_time)) / CLOCKS_PER_SEC; - printf("优化的稀疏矩阵乘法(使用NEON)的运行时间:%f 秒\n", elapsed_time); - - - printf("结果矩阵:\n"); - for (int i = 0; i < ROWS; i++) { - for (int j = 0; j < COLS; j++) { - printf("%f ", resultMatrix[i][j]); - } - printf("\n"); - } - - return 0; -} \ No newline at end of file