From b7dba516829e73be6b13e54982783bd89989f2a6 Mon Sep 17 00:00:00 2001 From: p8sljnpht <3178612685@qq.com> Date: Tue, 3 Dec 2024 21:30:18 +0800 Subject: [PATCH 1/5] ADD file via upload --- cxy_opt/1.c | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 cxy_opt/1.c diff --git a/cxy_opt/1.c b/cxy_opt/1.c new file mode 100644 index 0000000..2f912a8 --- /dev/null +++ b/cxy_opt/1.c @@ -0,0 +1,33 @@ +#include +#include +#include +#include +#include "render.h" + +void applyGaussianBlur(float src[][MAT_SIZE], float dst[][MAT_SIZE], float kernel[3][3]); +int main() { + float inputImage[MAT_SIZE][MAT_SIZE]; + Render(inputImage); + float kernel[3][3] = { + {1.0f/16, 2.0f/16, 1.0f/16}, + {2.0f/16, 4.0f/16, 2.0f/16}, + {1.0f/16, 2.0f/16, 1.0f/16} + }; + float outputImage[MAT_SIZE][MAT_SIZE]={0}; + clock_t start = clock(); + applyGaussianBlur(inputImage, outputImage, kernel); + clock_t end = clock(); + printf("Time: %lf s\n", (double)(end-start) / CLOCKS_PER_SEC); + Print(outputImage); +} + +void applyGaussianBlur(float src[][MAT_SIZE], float dst[][MAT_SIZE], float kernel[3][3]) { + int i, j; + for(i=1; i Date: Tue, 3 Dec 2024 21:30:31 +0800 Subject: [PATCH 2/5] ADD file via upload --- cxy_opt/2.c | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) create mode 100644 cxy_opt/2.c diff --git a/cxy_opt/2.c b/cxy_opt/2.c new file mode 100644 index 0000000..dc0b8ed --- /dev/null +++ b/cxy_opt/2.c @@ -0,0 +1,38 @@ +#include +#include +#include +#include +#include "render.h" +#define IDX(n) ((n) % 3) + +void applySeparableGaussianBlur(float[][MAT_SIZE], float[][MAT_SIZE], float[3], float[3]); +int main() { + float inputImage[MAT_SIZE][MAT_SIZE]; + Render(inputImage); + + float kx[3] = {1.0f/4, 1.0f/2, 1.0f/4}; + float ky[3] = {1.0f/4, 1.0f/2, 1.0f/4}; + float outputImage[MAT_SIZE][MAT_SIZE] = {0}; + clock_t start = clock(); + applySeparableGaussianBlur(inputImage, outputImage, kx, ky); + clock_t end = clock(); + printf("Time: %lf s\n", (double)(end-start) / CLOCKS_PER_SEC); + Print(outputImage); +} + +void applySeparableGaussianBlur(float src[][MAT_SIZE], float dst[][MAT_SIZE], float kx[3], float ky[3]) { + float buf[3][MAT_SIZE+3]; + int i, j; + // 计算前两行的行内卷积 + for(i=0; i<2; i++) + for(j=1; j Date: Tue, 3 Dec 2024 21:30:41 +0800 Subject: [PATCH 3/5] ADD file via upload --- cxy_opt/3.c | 61 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) create mode 100644 cxy_opt/3.c diff --git a/cxy_opt/3.c b/cxy_opt/3.c new file mode 100644 index 0000000..4f546e7 --- /dev/null +++ b/cxy_opt/3.c @@ -0,0 +1,61 @@ +#include +#include +#include +#include +#include +#include "render.h" +#define IDX(n) ((n) % 3) + +void applySeparableGaussianBlur(float src[][MAT_SIZE], float dst[][MAT_SIZE], + float kx[], float ky[]); +int main() { + float inputImage[MAT_SIZE][MAT_SIZE]; + Render(inputImage); + float kernel[3][3] = { + {1.0f/16, 2.0f/16, 1.0f/16}, + {2.0f/16, 4.0f/16, 2.0f/16}, + {1.0f/16, 2.0f/16, 1.0f/16} + }; + float kx[4] = {1.0f/4, 1.0f/2, 1.0f/4, 0.0f}; // 防止越界多定义一个 + float ky[4] = {1.0f/4, 1.0f/2, 1.0f/4, 0.0f}; + float outputImage[MAT_SIZE][MAT_SIZE] = {0}; + clock_t start = clock(); + applySeparableGaussianBlur(inputImage, outputImage, kx, ky); + clock_t end = clock(); + printf("Time: %lf s\n", (double)(end-start) / CLOCKS_PER_SEC); + Print(outputImage); +} + +void applySeparableGaussianBlur(float src[][MAT_SIZE], float dst[][MAT_SIZE], + float kx[], float ky[]) { + int i, j; + float buf[3][MAT_SIZE]; + float32x4_t kx_vec = vld1q_f32(kx); + float32x4_t ky_vec = vld1q_f32(ky); + // 计算前两行的行内卷积 + float32x4_t* left, *mid, *right, *result; + for(i=0; i<2; i++) + for(j=1; j Date: Tue, 3 Dec 2024 21:30:52 +0800 Subject: [PATCH 4/5] ADD file via upload --- cxy_opt/render.h | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100644 cxy_opt/render.h diff --git a/cxy_opt/render.h b/cxy_opt/render.h new file mode 100644 index 0000000..c7200a8 --- /dev/null +++ b/cxy_opt/render.h @@ -0,0 +1,40 @@ +#ifndef __RENDER_H +#define __RENDER_H +#pragma GCC optimize ("O3") +#include +#include + +#define MAT_SIZE 514 +#define RAND_SEED 114514 +void Render(float a[][MAT_SIZE]); +void Print(float a[][MAT_SIZE]); + +void Render(float a[][MAT_SIZE]) +{ + srand(RAND_SEED); + int i, j; + for(i=0; i Date: Tue, 3 Dec 2024 21:33:14 +0800 Subject: [PATCH 5/5] Update render.h --- cxy_opt/render.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cxy_opt/render.h b/cxy_opt/render.h index c7200a8..aed0851 100644 --- a/cxy_opt/render.h +++ b/cxy_opt/render.h @@ -1,6 +1,6 @@ #ifndef __RENDER_H #define __RENDER_H -#pragma GCC optimize ("O3") +#pragma GCC optimize ("O1") #include #include