Merge pull request 'Problem 1-3' (#1) from p8sljnpht/opcomplex:main into main

1 year ago · 75a99273b2
parent abbbe24a85 ebcc723cf3
commit 75a99273b2
3 changed files with 189 additions and 0 deletions
--- a/cxy/1.c
+++ b/cxy/1.c
@ -0,0 +1,52 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+#include <string.h>
+
+void applyGaussianBlur(void*, void*, int, int, float[3][3]);
+void print(void* _a, int h, int w);
+int main() {
+	float inputImage[5][5] = {
+		{1,2,3,4,5},
+		{6,7,8,9,10},
+		{11,12,13,14,15},
+		{16,17,18,19,20},
+		{21,22,23,24,25}
+	};
+	float kernel[3][3] = {
+		{1.0f/16, 2.0f/16, 1.0f/16},
+		{2.0f/16, 4.0f/16, 2.0f/16},
+		{1.0f/16, 2.0f/16, 1.0f/16}
+	};
+	float outputImage[5][5] = {0};
+	clock_t start = clock();
+	applyGaussianBlur(inputImage, outputImage, 5, 5, kernel);
+	clock_t end   = clock();
+	printf("Time: %lf s\n", (double)(end-start) / CLOCKS_PER_SEC);
+	print(outputImage, 5, 5);
+}
+
+void applyGaussianBlur(void* _src, void* _dst, 
+	int h, int w, float kernel[3][3]) {
+	int i, j;
+	float (*src)[w] = (float(*)[w])_src;
+	float (*dst)[w] = (float(*)[w])_dst;
+	for(i=1; i<h-1; i++)
+	for(j=1; j<w-1; j++) {
+		dst[i][j] = 
+		src[i-1][j-1]*kernel[0][0]+src[i-1][j  ]*kernel[0][1]+src[i-1][j+1]*kernel[0][2]+
+		src[i  ][j-1]*kernel[1][0]+src[i  ][j  ]*kernel[1][1]+src[i  ][j+1]*kernel[1][2]+
+		src[i+1][j-1]*kernel[2][0]+src[i+1][j  ]*kernel[2][1]+src[i+1][j+1]*kernel[2][2];
+	}
+}
+
+void print(void* _a, int h, int w) {
+	float (*a)[w] = (float(*)[w])_a;
+	int i, j;
+	for(i=0; i<h; i++) {
+		for(j=0; j<w; j++) {
+			printf("%5.1f ", a[i][j]);
+		}
+		printf("\n");
+	}
+}
--- a/cxy/2.c
+++ b/cxy/2.c
@ -0,0 +1,61 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+#include <string.h>
+#define IDX(n) ((n) % 3)
+
+void applySeparableGaussianBlur(void*, void*, int, int, float[3], float[3]);
+void print(void*, int h, int w);
+int main() {
+	float inputImage[5][5] = {
+		{1,2,3,4,5},
+		{6,7,8,9,10},
+		{11,12,13,14,15},
+		{16,17,18,19,20},
+		{21,22,23,24,25}
+	};
+	float kernel[3][3] = {
+		{1.0f/16, 2.0f/16, 1.0f/16},
+		{2.0f/16, 4.0f/16, 2.0f/16},
+		{1.0f/16, 2.0f/16, 1.0f/16}
+	};
+	float kx[3] = {0.25f, 0.5f, 0.25f};
+	float ky[3] = {0.25f, 0.5f, 0.25f};
+	float outputImage[5][5] = {0};
+	clock_t start = clock();
+	applySeparableGaussianBlur(inputImage, outputImage, 5, 5, kx, ky);
+	clock_t end   = clock();
+	printf("Time: %lf s\n", (double)(end-start) / CLOCKS_PER_SEC);
+	print(outputImage, 5, 5);
+}
+
+void applySeparableGaussianBlur(void* _src, void* _dst, 
+	int h, int w, float kx[3], float ky[3]) {
+	float buf[3][101] = {0};
+	int i, j;
+	float (*src)[w] = (float(*)[w])_src;
+	float (*dst)[w] = (float(*)[w])_dst;
+	// 计算前两行的行内卷积
+	for(i=0; i<2; i++)
+	for(j=1; j<w-1; j++) {
+		buf[i][j] = src[i][j-1]*kx[0]+src[i][j]*kx[1]+src[i][j+1]*kx[2];
+	}
+	// 利用buf进行累计
+	for(i=1; i<h-1; i++) {
+		for(j=1; j<w-1; j++)
+			buf[IDX(i+1)][j] = src[i+1][j-1]*kx[0]+src[i+1][j]*kx[1]+src[i+1][j+1]*kx[2];
+		for(j=1; j<w-1; j++)
+			dst[i][j] = buf[IDX(i-1)][j]*ky[0]+buf[IDX(i)][j]*ky[1]+buf[IDX(i+1)][j]*ky[2];
+	}
+}
+
+void print(void* _a, int h, int w) {
+	float (*a)[w] = (float(*)[w])_a;
+	int i, j;
+	for(i=0; i<h; i++) {
+		for(j=0; j<w; j++) {
+			printf("%5.1f ", a[i][j]);
+		}
+		printf("\n");
+	}
+}
--- a/cxy/3.c
+++ b/cxy/3.c
@ -0,0 +1,76 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+#include <string.h>
+#include <arm_neon.h>
+#define IDX(n) ((n) % 3)
+
+void applySeparableGaussianBlur(float src[][100], float dst[][100], 
+	int h, int w, float kx[], float ky[]);
+void print(float a[][100], int h, int w);
+int main() {
+	float inputImage[5][100] = {
+		{1,2,3,4,5},
+		{6,7,8,9,10},
+		{11,12,13,14,15},
+		{16,17,18,19,20},
+		{21,22,23,24,25}
+	};
+	float kernel[3][3] = {
+		{1.0f/16, 2.0f/16, 1.0f/16},
+		{2.0f/16, 4.0f/16, 2.0f/16},
+		{1.0f/16, 2.0f/16, 1.0f/16}
+	};
+	float kx[4] = {0.25f, 0.5f, 0.25f, 0.0f}; // 防止越界多定义一个
+	float ky[4] = {0.25f, 0.5f, 0.25f, 0.0f};
+	float outputImage[5][100] = {0};
+	clock_t start = clock();
+	applySeparableGaussianBlur(inputImage, outputImage, 5, 5, kx, ky);
+	clock_t end   = clock();
+	printf("Time: %lf s\n", (double)(end-start) / CLOCKS_PER_SEC);
+	print(outputImage, 5, 5);
+}
+
+void applySeparableGaussianBlur(float src[][100], float dst[][100], 
+	int h, int w, float kx[], float ky[]) {
+	int i, j;
+	float buf[3][101] = {0};
+	float32x4_t kx_vec = vld1q_f32(kx);
+	float32x4_t ky_vec = vld1q_f32(ky);
+	// 计算前两行的行内卷积
+	for(i=0; i<2; i++)
+	for(j=1; j<w-1; j+=4) {
+		float32x4_t left  = vld1q_f32(&src[i][j-1]);
+		float32x4_t mid   = vld1q_f32(&src[i][j]);
+		float32x4_t right = vld1q_f32(&src[i][j+1]);
+		float32x4_t result = vmulq_lane_f32(left,  vget_low_f32(kx_vec), 0);
+				   result += vmulq_lane_f32(mid,   vget_low_f32(kx_vec), 1);
+				   result += vmulq_lane_f32(right, vget_high_f32(kx_vec), 0);
+		vst1q_f32(&buf[i][j], result);
+		
+	}
+	// 利用buf进行累计
+	for(i=1; i<h-1; i++) {
+		for(j=1; j<w-1; j+=4) {
+			float32x4_t left  = vld1q_f32(&src[i+1][j-1]);
+			float32x4_t mid   = vld1q_f32(&src[i+1][j]);
+			float32x4_t right = vld1q_f32(&src[i+1][j+1]);
+			float32x4_t result = vmulq_lane_f32(left,  vget_low_f32(kx_vec), 0);
+					   result += vmulq_lane_f32(mid,   vget_low_f32(kx_vec), 1);
+					   result += vmulq_lane_f32(right, vget_high_f32(kx_vec), 0);
+			vst1q_f32(&buf[IDX(i+1)][j], result);
+		}
+		for(j=1; j<w-1; j++)
+			dst[i][j] = buf[IDX(i-1)][j]*ky[0]+buf[IDX(i)][j]*ky[1]+buf[IDX(i+1)][j]*ky[2];
+	}
+}
+
+void print(float a[][100], int h, int w) {
+	int i, j;
+	for(i=0; i<h; i++) {
+		for(j=0; j<w; j++) {
+			printf("%5.1f ", a[i][j]);
+		}
+		printf("\n");
+	}
+}