diff --git a/vector_matmul_2.c b/vector_matmul_2.c new file mode 100644 index 0000000..9de4717 --- /dev/null +++ b/vector_matmul_2.c @@ -0,0 +1,162 @@ +#define _CRT_SECURE_NO_WARNINGS +#include +#include +#include +#include + +#define ROWS 4 +#define COLS 4 + +typedef struct { + float* values; + int* rowIndex; + int* colIndex; + int nonZeroCount; +} SparseMatrix; + +SparseMatrix* createSparseMatrix(int nonZeroCount) { + SparseMatrix* matrix = (SparseMatrix*)malloc(sizeof(SparseMatrix)); + matrix->values = (float*)malloc(nonZeroCount * sizeof(float)); + matrix->rowIndex = (int*)malloc(nonZeroCount * sizeof(int)); + matrix->colIndex = (int*)malloc(nonZeroCount * sizeof(int)); + matrix->nonZeroCount = nonZeroCount; + return matrix; +} + +void freeSparseMatrix(SparseMatrix* matrix) { + free(matrix->values); + free(matrix->rowIndex); + free(matrix->colIndex); + free(matrix); +} + +void sparseToDense(SparseMatrix* sparse, float dense[ROWS][COLS]) { + for (int i = 0; i < ROWS; i++) { + for (int j = 0; j < COLS; j++) { + dense[i][j] = 0.0f; + } + } + + for (int i = 0; i < sparse->nonZeroCount; i++) { + int row = sparse->rowIndex[i]; + int col = sparse->colIndex[i]; + dense[row][col] = sparse->values[i]; + } +} + +void printDenseMatrix(float dense[ROWS][COLS]) { + for (int i = 0; i < ROWS; i++) { + for (int j = 0; j < COLS; j++) { + printf("%5.1f ", dense[i][j]); + } + printf("\n"); + } +} + +SparseMatrix* sparse_matmul(SparseMatrix* A, SparseMatrix* B) { + int maxNonZeroCount = A->nonZeroCount * B->nonZeroCount; + SparseMatrix* C = createSparseMatrix(maxNonZeroCount); + int count = 0; + + for (int i = 0; i < A->nonZeroCount; i++) { + float aValue = A->values[i]; + int aRow = A->rowIndex[i]; + int aCol = A->colIndex[i]; + + for (int j = 0; j < B->nonZeroCount; j++) { + if (aCol == B->rowIndex[j]) { + float bValue = B->values[j]; + int bCol = B->colIndex[j]; + + int found = 0; + for (int k = 0; k < count; k++) { + if (C->rowIndex[k] == aRow && C->colIndex[k] == bCol) { + C->values[k] += aValue * bValue; + found = 1; + break; + } + } + if (!found) { + C->values[count] = aValue * bValue; + C->rowIndex[count] = aRow; + C->colIndex[count] = bCol; + count++; + } + } + } + } + + C->nonZeroCount = count; + + C->values = (float*)realloc(C->values, count * sizeof(float)); + C->rowIndex = (int*)realloc(C->rowIndex, count * sizeof(int)); + C->colIndex = (int*)realloc(C->colIndex, count * sizeof(int)); + + return C; +} + +void neonSparseMatMul(SparseMatrix* A, SparseMatrix* B, float C[ROWS][COLS]) { + for (int i = 0; i < ROWS; i++) { + for (int j = 0; j < COLS; j++) { + C[i][j] = 0.0f; + } + } + + for (int i = 0; i < A->nonZeroCount; i++) { + float aValue = A->values[i]; + int aRow = A->rowIndex[i]; + int aCol = A->colIndex[i]; + + for (int j = 0; j < B->nonZeroCount; j++) { + if (aCol == B->rowIndex[j]) { + float bValue = B->values[j]; + int bCol = B->colIndex[j]; + + float32x4_t cValue = vld1q_f32(&C[aRow][bCol]); + cValue = vmlaq_n_f32(cValue, vdupq_n_f32(aValue), bValue); + vst1q_f32(&C[aRow][bCol], cValue); + } + } + } +} + +int main() { + SparseMatrix* A = createSparseMatrix(4); + A->values[0] = 1.0; A->rowIndex[0] = 0; A->colIndex[0] = 0; + A->values[1] = 2.0; A->rowIndex[1] = 0; A->colIndex[1] = 2; + A->values[2] = 3.0; A->rowIndex[2] = 1; A->colIndex[2] = 1; + A->values[3] = 4.0; A->rowIndex[3] = 2; A->colIndex[3] = 0; + + SparseMatrix* B = createSparseMatrix(4); + B->values[0] = 5.0; B->rowIndex[0] = 0; B->colIndex[0] = 1; + B->values[1] = 6.0; B->rowIndex[1] = 1; B->colIndex[1] = 2; + B->values[2] = 7.0; B->rowIndex[2] = 2; B->colIndex[2] = 0; + B->values[3] = 8.0; B->rowIndex[3] = 2; B->colIndex[3] = 1; + + float C[ROWS][COLS]; + + clock_t start = clock(); + + neonSparseMatMul(A, B, C); + + clock_t end = clock(); + double time_taken = (double)(end - start) / CLOCKS_PER_SEC; + + float denseA[ROWS][COLS], denseB[ROWS][COLS]; + sparseToDense(A, denseA); + sparseToDense(B, denseB); + + printf("普通矩阵 A:\n"); + printDenseMatrix(denseA); + printf("普通矩阵 B:\n"); + printDenseMatrix(denseB); + + printf("稀疏矩阵乘法结果:\n"); + printDenseMatrix(C); + printf("稀疏矩阵乘法运行时间: %f 秒\n", time_taken); + + freeSparseMatrix(A); + freeSparseMatrix(B); + + return 0; +} \ No newline at end of file