You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
94 lines
2.6 KiB
94 lines
2.6 KiB
#include <iostream>
|
|
#include <vector>
|
|
#include <ctime>
|
|
using namespace std;
|
|
|
|
// 稀疏矩阵 COO 格式定义
|
|
struct SparseMatrix {
|
|
vector<float> values;
|
|
vector<int> rowIndex;
|
|
vector<int> colIndex;
|
|
int rows, cols;
|
|
};
|
|
|
|
// 将稀疏矩阵转换为普通矩阵
|
|
void sparseToDense(const SparseMatrix& sparse, vector<vector<float>>& dense) {
|
|
dense.assign(sparse.rows, vector<float>(sparse.cols, 0.0f));
|
|
for (size_t i = 0; i < sparse.values.size(); ++i) {
|
|
dense[sparse.rowIndex[i]][sparse.colIndex[i]] = sparse.values[i];
|
|
}
|
|
}
|
|
|
|
// 手动展开优化的稠密矩阵乘法
|
|
void matmulOptimized(float** A, float** B, float** C, int n) {
|
|
for (int i = 0; i < n; ++i) {
|
|
for (int j = 0; j < n; ++j) {
|
|
float result = 0.0f;
|
|
for (int k = 0; k < n; k += 4) {
|
|
// 模拟向量化操作:每次处理 4 个元素
|
|
result += A[i][k] * B[k][j];
|
|
if (k + 1 < n) result += A[i][k + 1] * B[k + 1][j];
|
|
if (k + 2 < n) result += A[i][k + 2] * B[k + 2][j];
|
|
if (k + 3 < n) result += A[i][k + 3] * B[k + 3][j];
|
|
}
|
|
C[i][j] = result;
|
|
}
|
|
}
|
|
}
|
|
|
|
int main() {
|
|
// 初始化稀疏矩阵 (COO 格式)
|
|
SparseMatrix sparse = {
|
|
{1, 2, 3}, // 非零值
|
|
{0, 1, 2}, // 行索引
|
|
{0, 1, 2}, // 列索引
|
|
3, 3 // 行数和列数
|
|
};
|
|
|
|
// 稀疏矩阵转换为普通矩阵
|
|
vector<vector<float>> denseMatrix;
|
|
sparseToDense(sparse, denseMatrix);
|
|
|
|
// 打印转换后的普通矩阵
|
|
cout << "Dense Matrix:" << endl;
|
|
for (const auto& row : denseMatrix) {
|
|
for (float val : row) {
|
|
cout << val << " ";
|
|
}
|
|
cout << endl;
|
|
}
|
|
|
|
// 测试手动展开的矩阵乘法
|
|
int size = 3;
|
|
float** A = new float* [size];
|
|
float** B = new float* [size];
|
|
float** C = new float* [size];
|
|
for (int i = 0; i < size; ++i) {
|
|
A[i] = new float[size];
|
|
B[i] = new float[size];
|
|
C[i] = new float[size];
|
|
for (int j = 0; j < size; ++j) {
|
|
A[i][j] = denseMatrix[i][j];
|
|
B[i][j] = denseMatrix[j][i]; // 设为转置矩阵方便验证
|
|
}
|
|
}
|
|
|
|
clock_t start = clock();
|
|
matmulOptimized(A, B, C, size);
|
|
clock_t end = clock();
|
|
|
|
cout << "手动向量化的稀疏矩阵乘法用时: "
|
|
<< (double)(end - start) / CLOCKS_PER_SEC << "秒" << endl;
|
|
|
|
// 释放动态内存
|
|
for (int i = 0; i < size; ++i) {
|
|
delete[] A[i];
|
|
delete[] B[i];
|
|
delete[] C[i];
|
|
}
|
|
delete[] A;
|
|
delete[] B;
|
|
delete[] C;
|
|
|
|
return 0;
|
|
} |