From 387e354d5b28e90b1bd241cd2e9bbd04cf89a95e Mon Sep 17 00:00:00 2001 From: pv3e4i5aj Date: Sat, 23 Nov 2024 18:10:44 +0800 Subject: [PATCH] Add task6.cpp --- task6.cpp | 153 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 153 insertions(+) create mode 100644 task6.cpp diff --git a/task6.cpp b/task6.cpp new file mode 100644 index 0000000..9d4de34 --- /dev/null +++ b/task6.cpp @@ -0,0 +1,153 @@ +#include +#include +#include +#include + +#define M 27 +#define N 25 +#define Q 12 +//A(M*N),B(N*Q),C(M*Q),transposed(Q*N)对于矩阵维度的说明 + +void transposeMatrix(float** matrix, float** transposed) { + for (int i = 0; i < N; i++) { + for (int j = 0; j < Q; j++) { + transposed[j][i] = matrix[i][j]; + } + } +} + +void matmul_optimized(float** A, float** B, float** C) { + for (int i = 0; i < M; i++) { + for (int j = 0; j < Q; j++) { + float sum = 0.0f; + for (int k = 0; k < N; k += 4) + { + float32x4_t vecA, vecB, vecC; + if (k + 4 <= N) + { + // 加载A和B的4个元素,进行向量化计算 + vecA = vld1q_f32(&A[i][k]); + vecB = vld1q_f32(&B[j][k]); + // 向量化乘法并累加结果 + vecC = vmulq_f32(vecA, vecB); + sum += vgetq_lane_f32(vecC, 0) + vgetq_lane_f32(vecC, 1) + + vgetq_lane_f32(vecC, 2) + vgetq_lane_f32(vecC, 3); + } + else + { + // 处理剩余的元素 + for (int m = k; m < N; m++) + { + sum += A[i][m] * B[j][m]; + } + } + } + C[i][j] = sum; + } + } +} +int main() +{ + //矩阵 A 的 COO 格式 + float A_values[] = {1, 2, 3,4,5}; + int A_rowIndex[] = {0, 0, 1, 2, 2}; + int A_colIndex[] = {0, 2, 1,0, 2}; + int A_nonZeroCount = 5; + // 矩阵 B 的 COO 格式 + float B_values[] = {6,8,7,9}; + int B_rowIndex[] = {0,2, 1, 2}; + int B_colIndex[] ={0,0,1, 2}; + int B_nonZeroCount=4; + + + //动态分配内存 + float** denseMatrixA = (float**)malloc(M * sizeof(float*)); + float** denseMatrixB = (float**)malloc(N * sizeof(float*)); + float** C = (float**)malloc(M * sizeof(float*)); + float** transposed = (float**)malloc(Q * sizeof(float*)); + + for (int i = 0; i < M; i++) { + denseMatrixA[i] = (float*)malloc(N * sizeof(float)); + C[i] = (float*)malloc(Q * sizeof(float)); + } + for (int i = 0; i < N; ++i) + { + denseMatrixB[i] = (float*)malloc(Q * sizeof(float)); + } + for (int i = 0; i < Q; ++i) + { + transposed[i] = (float*)malloc(N * sizeof(float)); + } + + // 实现稀疏矩阵转换为普通矩阵 + for(int i=0;i