You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

119 lines
3.5 KiB

#include <stdio.h>
#include <stdlib.h>
#include <arm_neon.h>
#include <time.h>
typedef struct {
int* values;
int* rowIndex;
int* colIndex;
int nonZeroCount;
int rows;
int cols;
} SparseMatrix;
void initSparseMatrix(SparseMatrix* mat, int rows, int cols, int nonZeroCount) {
mat->rows = rows;
mat->cols = cols;
mat->nonZeroCount = nonZeroCount;
mat->values = (int*)malloc(nonZeroCount * sizeof(int));
mat->rowIndex = (int*)malloc(nonZeroCount * sizeof(int));
mat->colIndex = (int*)malloc(nonZeroCount * sizeof(int));
}
void printSparseMatrix(SparseMatrix* mat) {
printf("Values: ");
for (int i = 0; i < mat->nonZeroCount; i++) {
printf("%d ", mat->values[i]);
}
printf("\nRow indices: ");
for (int i = 0; i < mat->nonZeroCount; i++) {
printf("%d ", mat->rowIndex[i]);
}
printf("\nColumn indices: ");
for (int i = 0; i < mat->nonZeroCount; i++) {
printf("%d ", mat->colIndex[i]);
}
printf("\n");
}
void sparse_matmul_optimized(SparseMatrix* A, SparseMatrix* B, float* C, int resultRows, int resultCols) {
for (int i = 0; i < resultRows * resultCols; i++) {
C[i] = 0.0f;
}
for (int i = 0; i < A->nonZeroCount; i++) {
int rowA = A->rowIndex[i];
int colA = A->colIndex[i];
int valueA = A->values[i];
for (int j = 0; j < B->nonZeroCount; j++) {
int rowB = B->rowIndex[j];
int colB = B->colIndex[j];
int valueB = B->values[j];
if (colA == rowB) {
float32x4_t vecA = vdupq_n_f32(valueA);
float32x4_t vecB = vdupq_n_f32(valueB);
float32x4_t vecC = vld1q_f32(&C[rowA * resultCols + colB]);
vecC = vmlaq_f32(vecC, vecA, vecB);
vst1q_f32(&C[rowA * resultCols + colB], vecC);
}
}
}
}
int main() {
SparseMatrix A, B;
int nonZeroCountA = 4, nonZeroCountB = 4;
initSparseMatrix(&A, 3, 3, nonZeroCountA);
A.values[0] = 5; A.rowIndex[0] = 0; A.colIndex[0] = 0;
A.values[1] = 8; A.rowIndex[1] = 0; A.colIndex[1] = 2;
A.values[2] = 3; A.rowIndex[2] = 1; A.colIndex[2] = 1;
A.values[3] = 6; A.rowIndex[3] = 2; A.colIndex[3] = 0;
initSparseMatrix(&B, 3, 3, nonZeroCountB);
B.values[0] = 2; B.rowIndex[0] = 0; B.colIndex[0] = 1;
B.values[1] = 7; B.rowIndex[1] = 1; B.colIndex[1] = 0;
B.values[2] = 4; B.rowIndex[2] = 2; B.colIndex[2] = 2;
B.values[3] = 9; B.rowIndex[3] = 2; B.colIndex[3] = 1;
printf("Matrix A:\n");
printSparseMatrix(&A);
printf("\nMatrix B:\n");
printSparseMatrix(&B);
int resultRows = A.rows;
int resultCols = B.cols;
float* C = (float*)malloc(resultRows * resultCols * sizeof(float));
clock_t start = clock();
sparse_matmul_optimized(&A, &B, C, resultRows, resultCols);
clock_t end = clock();
printf("\nMatrix C (Result):\n");
for (int i = 0; i < resultRows; i++) {
for (int j = 0; j < resultCols; j++) {
printf("%.2f ", C[i * resultCols + j]);
}
printf("\n");
}
double time_taken = (double)(end - start) / CLOCKS_PER_SEC;
printf("\nSparse Matrix Multiplication Time: %f seconds\n", time_taken);
free(A.values);
free(A.rowIndex);
free(A.colIndex);
free(B.values);
free(B.rowIndex);
free(B.colIndex);
free(C);
return 0;
}