diff --git a/Chen/CHEN_1.c b/Chen/CHEN_1.c new file mode 100644 index 0000000..aba197a --- /dev/null +++ b/Chen/CHEN_1.c @@ -0,0 +1,27 @@ +#include +#include +#include +#define SIZE 1024 + +void vector_add_optimized(float* A, float* B, float* C, int size); +int main() { + float A[SIZE], B[SIZE], C[SIZE]; + int i; + srand(time(0)); + for(i=0; i +#include +#include +#include +#define SIZE 1024 + +void vector_add_optimized(float* A, float* B, float* C, int size); +int main() { + float A[SIZE], B[SIZE], C[SIZE]; + int i; + srand(time(0)); + for(i=0; i +#include +#include +#define SIZE 1024 + +void matmul_optimized(float** A, float** B, float** C, int n); +int main() { + int n = SIZE; + + // 分配内存空间 + float** A = (float**)malloc(sizeof(float*) * n); + float** B = (float**)malloc(sizeof(float*) * n); + float** C = (float**)malloc(sizeof(float*) * n); + int i, j; + for(i=0; i +#include +#include +#include +#define SIZE 1024 + +void matmul_optimized(float** A, float** B, float** C, int n); +int main() { + int n = SIZE; + + // 分配内存空间 + float** A = (float**)malloc(sizeof(float*) * n); + float** B = (float**)malloc(sizeof(float*) * n); + float** C = (float**)malloc(sizeof(float*) * n); + int i, j; + for(i=0; i +#include +#include +#define SIZE 1024 + +void sparce_matmul_coo(float*, int*, int*, int, + float*, int*, int*, int, float*, int*, int*, int*); +int main() { + // 矩阵 A 的 COO 格式 + float A_values[] = {1, 2, 3, 4, 5}; + int A_rowIndex[] = {0, 0, 1, 2, 2}; + int A_colIndex[] = {0, 2, 1, 0, 2}; + int A_nonZeroCount = 5; + // 矩阵 B 的 Coo 格式 + float B_values[] = {6, 8, 7, 9}; + int B_rowIndex[] = {0, 2, 1, 2}; + int B_colIndex[] = {0, 0, 1, 2}; + int B_nonZeroCount = 4;// 结果矩阵 C 的 Coo 格式 + float C_values[SIZE]; + int C_rowIndex[SIZE]; + int C_colIndex[SIZE]; + int C_nonZeroCount = 0; + + clock_t start = clock(); + sparce_matmul_coo(A_values, A_rowIndex, A_colIndex, A_nonZeroCount, + B_values, B_rowIndex, B_colIndex, B_nonZeroCount, + C_values, C_rowIndex, C_colIndex, &C_nonZeroCount); + clock_t end = clock(); + printf("基础的稀疏矩阵乘法时间:%lf\n", (double)(end-start) / CLOCKS_PER_SEC); +} + +void sparce_matmul_coo(float* A_values, int* A_rowIndex, int* A_colIndex, int A_nonZeroCount, + float* B_values, int* B_rowIndex, int* B_colIndex, int B_nonZeroCount, + float* C_values, int* C_rowIndex, int* C_colIndex, int* C_nonZeroCount) { + int currentIndex = 0; + int i, j, k; + int rowA, colA, rowB, colB; + float valueA, valueB, product; + // 遍历 A 的非零元素 + for(i=0; i +#include +#include +#include +#include +#define SIZE 1024 +#define DENSE_MATRIX_SIZE 5 + +float** sparce_matmul_coo(float* A_values, int* A_rowIndex, int* A_colIndex, int A_nonZeroCount, + float* B_values, int* B_rowIndex, int* B_colIndex, int B_nonZeroCount); +void matmul_optimized(float** A, float** B, float** C, int n); +void print_matrix(float** m, int rows, int cols); +int main() { + // 矩阵 A 的 COO 格式 + float A_values[] = {1, 2, 3, 4, 5}; + int A_rowIndex[] = {0, 0, 1, 2, 2}; + int A_colIndex[] = {0, 2, 1, 0, 2}; + int A_nonZeroCount = 5; + // 矩阵 B 的 Coo 格式 + float B_values[] = {6, 8, 7, 9}; + int B_rowIndex[] = {0, 2, 1, 2}; + int B_colIndex[] = {0, 0, 1, 2}; + int B_nonZeroCount = 4;// 结果矩阵 C 的 Coo 格式 + float C_values[SIZE]; + int C_rowIndex[SIZE]; + int C_colIndex[SIZE]; + int C_nonZeroCount = 0; + + clock_t start = clock(); + float** ans = sparce_matmul_coo(A_values, A_rowIndex, A_colIndex, A_nonZeroCount, + B_values, B_rowIndex, B_colIndex, B_nonZeroCount); + clock_t end = clock(); + printf("优化的稀疏矩阵乘法时间:%lf\n", (double)(end-start) / CLOCKS_PER_SEC); + print_matrix(ans, DENSE_MATRIX_SIZE, DENSE_MATRIX_SIZE); +} + +float** sparce_matmul_coo(float* A_values, int* A_rowIndex, int* A_colIndex, int A_nonZeroCount, + float* B_values, int* B_rowIndex, int* B_colIndex, int B_nonZeroCount) { + // 分配内存空间 + float** A = (float**)malloc(sizeof(float*) * DENSE_MATRIX_SIZE); + float** B = (float**)malloc(sizeof(float*) * DENSE_MATRIX_SIZE); + float** C = (float**)malloc(sizeof(float*) * DENSE_MATRIX_SIZE); + int i; + for(i=0; i