feat: add source code, resources and documents

3 years ago · eac7cee6cd
parent 37b667760c
commit eac7cee6cd
25 changed files with 1087 additions and 1 deletions
--- a/README.md
+++ b/README.md
@ -1,2 +1,70 @@
-# simple_scan
+# 数字图像处理实践项目：文件扫描软件

+*--- 小组成员: 姜业恒 10205101498 ---*  
+
+## 概述  
+
+本项目为数字图像处理期末的实践项目, 包括了第一项和第二项的要求。 本项目不仅实现了《数字图像处理》课程教学中的主要内容, 还增加了实际应用的场景, 即：将使用普通照相机或手机的摄像头拍摄的文档图片转化为易于识别的类似于扫描得到的图片。
+
+## 项目结构  
+
+- readme.md --- 本文件  
+- requirement.txt --- 项目需要的依赖, 可通过pip安装  
+- img_util/ --- python模块, 实现了课程中的主要内容, 每个子模块的内容在文件的注释中都有详细解释, 其中有一些功能由于opencv已经有使用起来非常简单的实现, 所以仅在注释中标注出来  
+- resource/ --- 存放readme.md需要的资源文件  
+- main.py --- 程序入口, 运行此文件开始程序
+
+## 应用程序介绍  
+
+本项目通过课程中的各种知识, 实现了一个可以在实际场景中应用的程序——“简单扫描”。此程序可以将普通照相机或手机拍摄的文档图片转化为易于识别的类似于扫描得到的图片的功能. 具体来说, 这个应用程序可以读取一张图片, 并可以根据需要调整为原图、灰度、黑白三种模式, 黑白模式可以选择自动或手动调整阈值; 另外, 还可以将图片中的透视关系进行矫正。最终, 可以将操作过后的图片进行保存。
+
+## 应用程序用法  
+1. 检查运行环境  
+
+首先查看python的版本, 考虑到兼容性问题, 最好使用python 3.9以上的版本运行程序; 另外, 通过"`pip install -r requirement.txt`"命令可以通过requirement.txt文件自动安装本程序需要的依赖库  
+
+2. 运行程序  
+
+运行环境配置无误后, 运行main.py启动程序, 界面如下图所示  
+![img_main](resource/img_main.png)
+
+3. 选择图片与更改模式  
+
+点击界面上方的`选择图片`按钮选择一张输入的图片
+![img_select](resource/img_select.png)
+
+选择完毕后, 界面会展示图片的原图  
+![img_orig](resource/img_orig.png)
+
+点击第二排的`灰度`选项, 可以将图片切换到灰度模式  
+![img_gray](resource/img_gray.png)
+
+点击第二排的`黑白(自动)`选项, 可以将图片切换到黑白模式, 但是其阈值是由程序自动判断得到的  
+![img_bw](resource/img_bw.png)
+
+点击第二排的`黑白(手动)`选项, 可以将图片切换到黑白模式, 但是其阈值是需要手动调整右侧的滑动条来确定  
+![img_bw_hand](resource/img_bwhand.png)
+
+4. 视角透视矫正  
+
+某些时候, 拍摄的视角并非是正对文档的, 需要进行透视的矫正, 这时候可以点击第一排的`透视矫正`进行矫正  
+点击之后, 可以看到弹出了帮助窗口  
+![img_pers_1](resource/img_pers_1.png)
+
+点击`确定`以后, 弹出新的窗口, 需要以左上角、右上角、右下角、左下角的顺时针顺序以`Ctrl`+单击的方法确定图片中需要矫正的区域的四个角  
+![img_pers_2](resource/img_pers_2.png)
+
+接下来, 如误选了顶点, 可以使用`R`键清除; 如果不想继续矫正, 可以使用`Esc`退出; 如果已经确定, 可以使用`Enter`键确定  
+![img_pers_3](resource/img_pers_3.png)
+
+5. 保存  
+
+处理完毕后, 点击第一排的`保存`按钮即可保存文件
+![img_save](resource/img_save.png)
+
+6. 应用程序效果  
+
+处理前:
+![img_input](resource/example.jpg)
+处理后:
+![img_output](resource/output.png)
--- a/img_util/init.py
+++ b/img_util/init.py
--- a/img_util/pycache/init.cpython-39.pyc
+++ b/img_util/pycache/init.cpython-39.pyc
--- a/img_util/pycache/gui_util.cpython-39.pyc
+++ b/img_util/pycache/gui_util.cpython-39.pyc
--- a/img_util/pycache/segmentation.cpython-39.pyc
+++ b/img_util/pycache/segmentation.cpython-39.pyc
--- a/img_util/basic_methods.py
+++ b/img_util/basic_methods.py
@ -0,0 +1,62 @@
+# -*- coding:utf-8 -*-
+
+"""
+此模块包括对图像的基本操作方法, 如灰度化、平移、旋转等，对应教学中的第一章到第三章
+对于图像间的加减乘除，图像改变大小等操作，都可以通过opencv中的函数调用做到, 这里不再做出新的实现
+"""
+
+import numpy as np
+import cv2
+
+
+def my_gray(inp: np.ndarray):
+    """
+    My implementation of gray scale of a BGR image.
+    @param inp: an image array of BGR type
+    @return: gray image of inp
+    """
+    ret = np.zeros((inp.shape[0], inp.shape[1]), np.uint8)
+    for i in range(0, inp.shape[0]):
+        for j in range(0, inp.shape[1]):
+            ret[i][j] = int(0.299 * inp[i][j][2] + 0.587 * inp[i][j][1] + 0.114 * inp[i][j][0])
+    return ret
+
+
+# * var of img:X Y
+# plus cv2.add(X, Y)
+# minus cv2.subtract(X, Y)
+# multiply cv2.multiply(X, Y)
+# divide cv2.divide(X, Y)
+
+
+def translate(img, x, y, dsize, flags=None, borderMode=None, borderValue=None):
+    """
+    translate an image
+    @param img:input image
+    @param x:the distance of x, right if positive, left if negative
+    @param y:the distance of y, down if positive, up if negative
+    @param dsize:output size
+    @param flags:see cv2.warpAffine
+    @param borderMode:see cv2.warpAffine
+    @param borderValue:see cv2.warpAffine
+    @return:the image after translation
+    """
+    M = np.float32([[1, 0, x], [0, 1, y]])
+    return cv2.warpAffine(img, M, dsize, flags, borderMode, borderValue)
+
+
+def rotate(img, center, angel, scale, dsize, flags=None, borderMode=None, borderValue=None):
+    """
+    rotate an image
+    @param img:input image
+    @param center:center of rotation
+    @param angel:angel of rotation
+    @param scale:scale factor after rotation
+    @param dsize:output size
+    @param flags:see cv2.warpAffine
+    @param borderMode:see cv2.warpAffine
+    @param borderValue:see cv2.warpAffine
+    @return:the image after translation
+    """
+    M = cv2.getRotationMatrix2D(center, angel, scale)
+    return cv2.warpAffine(img, M, dsize, flags, borderMode, borderValue)
--- a/img_util/contrast.py
+++ b/img_util/contrast.py
@ -0,0 +1,103 @@
+# -*- coding:utf-8 -*-
+
+"""
+此模块包括各种增强图片对比度的方法, 包括线性灰度变换、分段线性灰度变换、对数灰度变换、指数灰度变换等，对应教学中的第四章
+对于直方图均衡化等的操作, 可以直接调用opencv中的函数进行实现, 这里不再做出新的实现
+"""
+
+import numpy as np
+import cv2
+
+
+def linear_gray_trans(img: np.ndarray, a, b, c, d):
+    """
+    out_img = d if img>b; [(d-c)/(b-a)]*(img-a)+c if img in[a,b]; c if img < a;
+    @param img: input img
+    @param a: left bound of trans
+    @param b: right bound of trans
+    @param c: min gray scale
+    @param d: max gray scale
+    @return: out_img
+    """
+    out_img = np.zeros(img.shape, np.uint8)
+    for i in range(img.shape[0]):
+        for j in range(img.shape[1]):
+            x = img[i][j]
+            if x > b:
+                out_img[i][j] = d
+            elif x < a:
+                out_img[i][j] = c
+            else:
+                out_img[i][j] = (d-c)*(img[i][j]-a)/(b-a) + c
+    out_img = np.around(out_img).astype(np.uint8)
+    return out_img
+
+
+def piecewise_linear_gray_trans(img, a, b, c, d, m):
+    """
+    out_img = [(m-d)/(m_f-b)]*(img-b)+d   if img in [b,m_f]; \n
+     ...    = [(d-c)/(b-a)]*(img-a)+c     if img in [a,b);   \n
+     ...    = (c/a)*img                   if img in [0,a);   \n
+    * m_f is max value of input image
+    @param img: input image
+    @param a: left bound of expansion area
+    @param b: right bound of expansion area
+    @param c: min value of expansion area after trans
+    @param d: max value of expansion area after trans
+    @param m: the max value of out_img
+    @return: out_img
+    """
+    out_img = np.zeros(img.shape, np.uint8)
+    mf = np.max(img)
+    for i in range(img.shape[0]):
+        for j in range(img.shape[1]):
+            x = img[i][j]
+            if x >= b:
+                out_img[i][j] = (m - d) * (img[i][j] - b) / (mf - b) + d
+            elif x < a:
+                out_img[i][j] = c * img[i][j] / a
+            else:
+                out_img[i][j] = (d - c) * (img[i][j] - a) / (b - a) + c
+    out_img = np.around(out_img).astype(np.uint8)
+    return out_img
+
+
+def logarithm_gray_trans(img, a, b, c):
+    """
+    out_img = a + ln(img+1)/[b*ln(c)]
+    @param img:input image
+    @param a:a
+    @param b:b
+    @param c:c
+    @return:out_img
+    """
+    out_img = np.zeros(img.shape, np.uint8)
+    div = b * np.log(c)
+
+    for i in range(img.shape[0]):
+        for j in range(img.shape[1]):
+            out_img = a + np.log(img[i][j]+1) / div
+    out_img = np.around(out_img).astype(np.uint8)
+    return out_img
+
+
+def exponent_gray_trans(img, a, b, c):
+    """
+    out_img = b^[c*(img-a)] - 1
+    @param img:input image
+    @param a:a
+    @param b:b
+    @param c:c
+    @return:out_img
+    """
+    out_img = np.zeros(img.shape, np.uint8)
+
+    for i in range(img.shape[0]):
+        for j in range(img.shape[1]):
+            out_img = b ** (c*(img[i][j]-a)) - 1
+    out_img = np.around(out_img).astype(np.uint8)
+    return out_img
+
+
+# calculate hist: cv2.calcHist([img], [0], None, [256], [0, 255])
+# equalize hist: cv2.equalizeHist(img)
--- a/img_util/gui_util.py
+++ b/img_util/gui_util.py
@ -0,0 +1,92 @@
+# -*- coding:utf-8 -*-
+"""
+此模块包括实际应用需要用到的一些GUI特性, 如通过用户界面选择四个点进行透视变换,
+"""
+import cv2
+import math
+import tkinter as tk
+import tkinter.messagebox as msg
+import numpy as np
+
+
+SUPPORT_FILETYPE = [('PNG', '.png'), ('JPEG', '.jpg'), ('TIFF', '.tiff'),
+                    ('Bitmap', '.bmp'), ('WEBP', '.webp'), ('All', '*')]
+
+
+def showAuto(window_n, img):
+    WIN_W = 1280
+
+    TEMPTK = tk.Tk()
+    SCREEN_W, SCREEN_H = TEMPTK.winfo_screenwidth(), TEMPTK.winfo_screenheight()
+
+    cv2.namedWindow(window_n, flags=cv2.WINDOW_NORMAL)
+
+    act_w, act_h = WIN_W, math.ceil(WIN_W * img.shape[0] / img.shape[1])
+
+    if act_h > SCREEN_H:
+        act_w, act_h = math.ceil(act_w * SCREEN_H * 0.9 / act_h), math.ceil(SCREEN_H * 0.9)
+
+    cv2.resizeWindow(window_n, act_w, act_h)
+    cv2.moveWindow(window_n, 0, 0)
+    TEMPTK.destroy()
+
+
+def getFourPointGUI(ori_img, img_name):
+    img = ori_img.copy()
+
+    window_n = 'Persp'+img_name
+    showAuto(window_n, img)
+
+    point_list = []
+
+    def on_mouse(ev, x, y, flags, userdata):
+        if flags == cv2.EVENT_FLAG_CTRLKEY and ev == cv2.EVENT_LBUTTONUP:
+            if len(point_list) < 4:
+                point_list.append([x, y])
+                cv2.circle(img, (x, y), 12, (125, 256 * len(point_list) // 4, 255 - 32 * len(point_list) // 4), -1)
+            else:
+                __import__('winsound').MessageBeep(-1)
+
+    cv2.setMouseCallback(window_n, on_mouse, img)
+
+    while True:
+        if not cv2.getWindowProperty(window_n, cv2.WND_PROP_VISIBLE):
+            break
+        cv2.imshow(window_n, img)
+        wk = cv2.waitKey(20)
+        if wk == 27:
+            cv2.destroyWindow(window_n)
+            return None
+        elif wk == 13:
+            if len(point_list) != 4:
+                __import__('winsound').MessageBeep(-1)
+                continue
+            cv2.destroyWindow(window_n)
+            return np.array(point_list, dtype='float32')
+
+        elif wk == ord('r'):
+            img = ori_img.copy()
+            point_list.clear()
+
+
+def getAreaSize(pl: list):
+    get_len = lambda ta, tb: math.sqrt((ta[0]-tb[0])**2+(ta[1]-tb[1])**2)
+    return max(get_len(pl[0], pl[1]), get_len(pl[2], pl[3])), max(get_len(pl[0], pl[3]), get_len(pl[1], pl[2]))
+
+
+def changePerspGui(img, img_name):
+    msg.showinfo('使用方法', '''在之后弹出的窗口之中, 请使用Ctrl+左键单击按顺时针顺序选择需要转换视角的区域的四个顶点
+Esc: 退出
+Enter: 确定
+R: 清除已选择的点
+Ctrl+Click: 选择顶点
+''')
+    pl = getFourPointGUI(img, img_name)
+    if pl is None:
+        return
+
+    h, w = img.shape[0], img.shape[1]
+    wt, ht = getAreaSize(pl)
+    dst = np.float32([[0, 0], [wt, 0], [wt, ht], [0, ht]])
+    trans = cv2.getPerspectiveTransform(pl, dst)
+    return cv2.warpPerspective(img, trans, (math.ceil(wt), math.ceil(ht)))
--- a/img_util/morphology.py
+++ b/img_util/morphology.py
@ -0,0 +1,17 @@
+import cv2
+import numpy as np
+
+"""
+此模块为形态学操作的模块, 对应课程中的第八章, 由于在opencv中已有现成的函数, 所以不再定义新的函数, opencv中的函数提示可以参考下面的注释
+"""
+
+# structure element (3*3 cross as example):
+#   kernel = cv2.getStructuringElement(cv2.MORPH_CROSS, 3) or np.array([[0,1,0], [1,1,1], [0,1,0]], dtype=np.uint8)
+# dilate:
+#   cv2.dilate(src, kernel, anchor=(-1, -1)) or cv2.morphologyEx(img, cv2.MORPH_DILATE, kernel, anchor=(-1, -1))
+# erode:
+#   cv2.erode(img, kernel) or cv2.morphologyEx(img, cv2.MORPH_ERODE, kernel)
+# open:
+#   cv2.morphologyEx(img, cv2.MORPH_OPEN, kernel)
+# close:
+#   cv2.morphologyEx(img, cv2.MORPH_CLOSE, kernel)
--- a/img_util/segmentation.py
+++ b/img_util/segmentation.py
@ -0,0 +1,45 @@
+# -*- coding:utf-8 -*-
+
+"""
+此模块包括对图像分割的多种方法，如Roberts算子、Sobel算子、Prewitt算子、Laplacian算子等方法实现的边缘检测，对应教学中的第五章
+对于Canny算法、Hough变换等操作，都可以通过opencv中的函数调用做到, 这里不再做出新的实现
+"""
+
+import numpy as np
+import cv2
+
+
+def roberts_edge(img):
+    kernelx = np.array([[-1, 0], [0, 1]], dtype=int)
+    kernely = np.array([[0, -1], [1, 0]], dtype=int)
+
+    x = cv2.convertScaleAbs(cv2.filter2D(img, cv2.CV_16S, kernelx))
+    y = cv2.convertScaleAbs(cv2.filter2D(img, cv2.CV_16S, kernely))
+
+    return cv2.addWeighted(x, 0.5, y, 0.5, 0)
+
+
+def sobel_edge(img):
+    x = cv2.convertScaleAbs(cv2.Sobel(img, cv2.CV_16S, 1, 0))
+    y = cv2.convertScaleAbs(cv2.Sobel(img, cv2.CV_16S, 0, 1))
+    return cv2.addWeighted(x, 0.5, y, 0.5, 0)
+
+
+def prewitt_edge(img):
+    kernelx = np.array([[1, 1, 1], [0, 0, 0], [-1, -1, -1]], dtype=int)
+    kernely = np.array([[-1, 0, 1], [-1, 0, 1], [-1, 0, 1]], dtype=int)
+
+    x = cv2.convertScaleAbs(cv2.filter2D(img, cv2.CV_16S, kernelx))
+    y = cv2.convertScaleAbs(cv2.filter2D(img, cv2.CV_16S, kernely))
+
+    return cv2.addWeighted(x, 0.5, y, 0.5, 0)
+
+
+def laplacian_edge(img):
+    kernel = np.array([[0, 1, 0], [1, -4, 1], [0, 1, 1]], dtype=int)
+    return cv2.convertScaleAbs(cv2.filter2D(img, cv2.CV_16S, kernel))
+
+
+# Laplacian: cv2.Laplacian(img, cv2.CV_16S, ksize=3)
+# Hough: cv2.HoughLines(img, 1, np.pi/180, thresh) or cv2.HoughLinesP
+# Canny: cv2.Canny(img, thresh1, thresh2)
--- a/img_util/smooth_sharpen_recover.py
+++ b/img_util/smooth_sharpen_recover.py
@ -0,0 +1,472 @@
+# -*- coding:utf-8 -*-
+
+"""
+此模块首先包括图像平滑的多种方法, 如空域上的均值滤波、邻域平均法滤波、中值滤波，频域上的理想低通滤波、巴特沃斯低通滤波、高斯低通滤波和
+梯形低通滤波等, 对应教学中的第六章;
+同时, 此模块还包括图像锐化的多种方法, 如频域上的理想高通滤波、巴特沃斯高通滤波、高斯高通滤波和梯形高通滤波等, 以及空域上的利用典型梯度
+算法、Roberts算子、Sobel算子、Prewitt算子、Laplacian算子等算法进行梯度微分法锐化, 也包括利用反锐化掩膜法进行锐化的方法, 对应教学中
+的第七章
+另外, 此模块也包括了图像恢复需要的多种方法, 如添加椒盐噪声或高斯噪声, 以及几何均值滤波、谐波均值滤波和逆谐波均值滤波以及各种排序统计滤波器如
+最大值滤波、最小值滤波、中值滤波、中点滤波和修正的Alpha滤波等等, 对应教学中的第九章
+"""
+
+import math
+import random
+
+import numpy as np
+import cv2
+import segmentation as sgm
+
+# constant
+GRAD_ALG_TYPICAL = 0
+GRAD_ALG_ROBERTS = 1
+GRAD_ALG_SOBEL = 2
+GRAD_ALG_PREWITT = 3
+GRAD_ALG_LAPLACIAN = 4
+
+FILTER_MAX = 0
+FILTER_MIN = 1
+FILTER_MIDPOINT = 2
+FILTER_MEDIAN = 3
+FILTER_ALP_TRIMMED = 4
+
+
+# smoothing
+def mean_filter(img, ksize=3):
+    """
+    Apply a mean filter on img
+    @param img:input img
+    @param ksize:size of kernel, should be odd number greater than or equal 3
+    @return:output img
+    """
+    if ksize < 3 or ksize % 2 == 0:
+        raise Exception('ksize should be an odd number greater than or equal 3.\n')
+    kernel = np.ones((ksize, ksize)) / ksize**2
+    return cv2.filter2D(img, -1, kernel)
+
+
+def neighbor_avg_filter(img, ksize=3):
+    """
+    Apply a neighborhood average filter on img
+    @param img:input img
+    @param ksize:size of kernel, should be odd number greater than or equal 3
+    @return:output img
+    """
+    if ksize < 3 or ksize % 2 == 0:
+        raise Exception('ksize should be an odd number greater than or equal 3.\n')
+    kernel = np.ones((ksize, ksize)) / (ksize**2 - 1)
+    kernel[ksize//2][ksize//2] = 0
+    return cv2.filter2D(img, -1, kernel)
+
+
+def median_filter(img, ksize=3):
+    """
+    Apply a median filter on img
+    @param img:input img
+    @param ksize:size of kernel, should be odd number greater than or equal 3
+    @return:output img
+    """
+    if ksize < 3 or ksize % 2 == 0:
+        raise Exception('ksize should be an odd number greater than or equal 3.\n')
+    return cv2.medianBlur(img, ksize)
+
+
+def ideal_low_pass_filter(img, cutoff):
+    """
+    Apply an ideal lowpass filter on img
+    @param img:input img
+    @param cutoff:the cutoff frequency of filter
+    @return:output img which has type cv2.uint8
+    """
+    f = np.fft.fftshift(cv2.dft(np.float32(img), flags=cv2.DFT_COMPLEX_OUTPUT))
+    out = np.zeros_like(f)
+    h, w = f.shape[:2]
+    c2 = cutoff ** 2
+    for i in range(h):
+        for j in range(w):
+            d2 = (i - h/2)**2 + (j - w/2)**2
+            if d2 <= c2:
+                out[i][j] = f[i][j]
+            else:
+                out[i][j] = [0, 0]
+    rt = cv2.idft(np.fft.ifftshift(out))
+    rt = cv2.magnitude(rt[:, :, 0], rt[:, :, 1])
+    rt = np.around(cv2.normalize(rt, None, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_32F) * 255)
+    return np.uint8(rt)
+
+
+def butterWorth_low_pass_filter(img, cutoff, n):
+    """
+        Apply a ButterWorth lowpass filter on img
+            H(u,v) = 1 / [1+(D/cutoff)^2n] where D is sqrt(u^2, v^2)
+        @param img:input img
+        @param cutoff:the cutoff frequency of filter
+        @param n:the order of filter
+        @return:output img which has type cv2.uint8
+        """
+    f = np.fft.fftshift(cv2.dft(np.float32(img), flags=cv2.DFT_COMPLEX_OUTPUT))
+    out = np.zeros_like(f)
+    h, w = f.shape[:2]
+    c2 = cutoff ** 2
+    for i in range(h):
+        for j in range(w):
+            d2 = (i - h / 2) ** 2 + (j - w / 2) ** 2
+            out[i][j] = f[i][j] / (1 + pow(d2/c2, n))
+    rt = cv2.idft(np.fft.ifftshift(out))
+    rt = cv2.magnitude(rt[:, :, 0], rt[:, :, 1])
+    rt = np.around(cv2.normalize(rt, None, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_32F) * 255)
+    return np.uint8(rt)
+
+
+def gaussian_low_pass_filter(img, cutoff):
+    """
+        Apply a Gaussian low pass filter on img
+            H(u,v) = exp(-D^2 / 2 cutoff^2)
+        @param img:input img
+        @param cutoff:the cutoff frequency of filter (should be positive)
+        @return:output img which has type cv2.uint8
+    """
+    if cutoff == 0:
+        raise Exception('cutoff of gaussian filter should be positive')
+    f = np.fft.fftshift(cv2.dft(np.float32(img), flags=cv2.DFT_COMPLEX_OUTPUT))
+    out = np.zeros_like(f)
+    h, w = f.shape[:2]
+    c22 = 2 * (cutoff ** 2)
+    for i in range(h):
+        for j in range(w):
+            d2 = (i - h / 2) ** 2 + (j - w / 2) ** 2
+            out[i][j] = f[i][j] * (np.exp(-d2/c22))
+    rt = cv2.idft(np.fft.ifftshift(out))
+    rt = cv2.magnitude(rt[:, :, 0], rt[:, :, 1])
+    rt = np.around(cv2.normalize(rt, None, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_32F) * 255)
+    return np.uint8(rt)
+
+
+def trapezoidal_low_pass_filter(img, d0, d1):
+    """
+    Apply a trapezoidal low pass filter on img
+        H(u,v) = 1              if D <= d0; \n
+        ...... = 0              if D >  d1; \n
+        ...... = (D-d1)/(d0-d1) if D in (d0,d1]; \n
+    @param img:input image
+    @param d0:d0 (cutoff start)
+    @param d1:d1 (cutoff over) (d0 <= d1)
+    @return:output img which has type cv2.uint8
+    """
+    f = np.fft.fftshift(cv2.dft(np.float32(img), flags=cv2.DFT_COMPLEX_OUTPUT))
+    out = np.zeros_like(f)
+    h, w = f.shape[:2]
+
+    for i in range(h):
+        for j in range(w):
+            d = math.sqrt((i - h / 2) ** 2 + (j - w / 2) ** 2)
+            if d <= d0:
+                out[i][j] = f[i][j]
+            elif d > d1:
+                pass
+            else:
+                out[i][j] = f[i][j] * (d-d1) / (d0-d1)
+
+    rt = cv2.idft(np.fft.ifftshift(out))
+    rt = cv2.magnitude(rt[:, :, 0], rt[:, :, 1])
+    rt = np.around(cv2.normalize(rt, None, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_32F) * 255)
+    return np.uint8(rt)
+
+
+# sharpening
+def ideal_high_pass_filter(img, cutoff):
+    """
+        Apply an ideal highpass filter on img
+        @param img:input img
+        @param cutoff:the cutoff frequency of filter (should be 0 or positive)
+        @return:output img which has type cv2.uint8
+        """
+    f = np.fft.fftshift(cv2.dft(np.float32(img), flags=cv2.DFT_COMPLEX_OUTPUT))
+    out = np.zeros_like(f)
+    h, w = f.shape[:2]
+    c2 = cutoff ** 2
+    for i in range(h):
+        for j in range(w):
+            d2 = (i - h / 2) ** 2 + (j - w / 2) ** 2
+            if d2 > c2:
+                out[i][j] = f[i][j]
+            else:
+                print('kd')
+                out[i][j] = [0, 0]
+    rt = cv2.idft(np.fft.ifftshift(out))
+    rt = cv2.magnitude(rt[:, :, 0], rt[:, :, 1])
+    rt = np.around(cv2.normalize(rt, None, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_32F) * 255)
+    return np.uint8(rt)
+
+
+def butterWorth_high_pass_filter(img, cutoff, n):
+    """
+        Apply a ButterWorth highpass filter on img
+            H(u,v) = 1 / [1+(cutoff/D)^2n] where D is sqrt(u^2, v^2)
+        @param img:input img
+        @param cutoff:the cutoff frequency of filter (should be 0 or positive)
+        @param n:the order of filter
+        @return:output img which has type cv2.uint8
+        """
+    f = np.fft.fftshift(cv2.dft(np.float32(img), flags=cv2.DFT_COMPLEX_OUTPUT))
+    out = np.zeros_like(f)
+    h, w = f.shape[:2]
+    c2 = cutoff ** 2
+    for i in range(h):
+        for j in range(w):
+            d2 = (i - h / 2) ** 2 + (j - w / 2) ** 2
+            if d2 == 0:
+                continue
+            out[i][j] = f[i][j] / (1 + pow(c2/d2, n))
+    rt = cv2.idft(np.fft.ifftshift(out))
+    rt = cv2.magnitude(rt[:, :, 0], rt[:, :, 1])
+    rt = np.around(cv2.normalize(rt, None, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_32F) * 255)
+    return np.uint8(rt)
+
+
+def gaussian_high_pass_filter(img, cutoff):
+    """
+        Apply a Gaussian highpass filter on img
+            H(u,v) = 1 - exp(-D^2 / 2 cutoff^2)
+        @param img:input img
+        @param cutoff:the cutoff frequency of filter (should be positive)
+        @return:output img which has type cv2.uint8
+        """
+    if cutoff == 0:
+        raise Exception('cutoff of gaussian filter should be positive')
+    f = np.fft.fftshift(cv2.dft(np.float32(img), flags=cv2.DFT_COMPLEX_OUTPUT))
+    out = np.zeros_like(f)
+    h, w = f.shape[:2]
+    c22 = 2 * (cutoff ** 2)
+    for i in range(h):
+        for j in range(w):
+            d2 = (i - h / 2) ** 2 + (j - w / 2) ** 2
+            out[i][j] = f[i][j] * (1 - np.exp(-d2/c22))
+    rt = cv2.idft(np.fft.ifftshift(out))
+    rt = cv2.magnitude(rt[:, :, 0], rt[:, :, 1])
+    rt = np.around(cv2.normalize(rt, None, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_32F) * 255)
+    return np.uint8(rt)
+
+
+def trapezoidal_high_pass_filter(img, d0, d1):
+    """
+    Apply a trapezoidal highpass filter on img
+        H(u,v) = 0              if D <= d1; \n
+        ...... = 1              if D >  d0; \n
+        ...... = (D-d1)/(d0-d1) if D in (d1,d0]; \n
+    @param img:input image
+    @param d0:d0 (cutoff start)
+    @param d1:d1 (cutoff over) (d1 <= d0)
+    @return:output img which has type cv2.uint8
+    """
+    f = np.fft.fftshift(cv2.dft(np.float32(img), flags=cv2.DFT_COMPLEX_OUTPUT))
+    out = np.zeros_like(f)
+    h, w = f.shape[:2]
+
+    for i in range(h):
+        for j in range(w):
+            d = math.sqrt((i - h / 2) ** 2 + (j - w / 2) ** 2)
+            if d <= d1:
+                pass
+            elif d > d0:
+                out[i][j] = f[i][j]
+            else:
+                out[i][j] = f[i][j] * (d-d1) / (d0-d1)
+
+    rt = cv2.idft(np.fft.ifftshift(out))
+    rt = cv2.magnitude(rt[:, :, 0], rt[:, :, 1])
+    rt = np.around(cv2.normalize(rt, None, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_32F) * 255)
+    return np.uint8(rt)
+
+
+def grad_sharpen(img, method, thresh=None, lg=None, lb=None):
+    """
+    Apply gradient method to sharpen img
+    @param img: input img
+    @param method: gradient algorithm, see the constants in this module
+    @param thresh: if not None, use this value to specify background and edge; if None, only show gradient image
+    @param lg:if not None, edge's gray scale will be set to lg
+    @param lb:if not None, background's gray scale will be set to lb
+    @return:output image
+    """
+    if method == GRAD_ALG_TYPICAL:
+        kernelx = np.array([[-1, 1], [0, 0]], dtype=int)
+        kernely = np.array([[-1, 0], [1, 0]], dtype=int)
+        x = cv2.convertScaleAbs(cv2.filter2D(img, cv2.CV_16S, kernelx))
+        y = cv2.convertScaleAbs(cv2.filter2D(img, cv2.CV_16S, kernely))
+        gr = cv2.addWeighted(x, 0.5, y, 0.5, 0)
+    elif method == GRAD_ALG_SOBEL:
+        gr = sgm.sobel_edge(img)
+    elif method == GRAD_ALG_PREWITT:
+        gr = sgm.prewitt_edge(img)
+    elif method == GRAD_ALG_ROBERTS:
+        gr = sgm.roberts_edge(img)
+    elif method == GRAD_ALG_LAPLACIAN:
+        gr = sgm.laplacian_edge(img)
+    else:
+        raise Exception('Grad algorithm not exists: ' + str(method))
+
+    if thresh is None:
+        return gr
+    for i in range(gr.shape[0]):
+        for j in range(gr.shape[1]):
+            if gr[i][j] < thresh:
+                if lb is None:
+                    gr[i][j] = img[i][j]
+                else:
+                    gr[i][j] = lb
+            elif lg is not None:
+                gr[i][j] = lg
+    return gr
+
+
+def unsharp_mask(img, c, ksize=3):
+    """
+    Using unsharp mask method on img to get sharpened img
+    @param img: input image
+    @param c: the coefficient of the mask
+    @param ksize: kernel size, should be an odd number greater than or equal 3
+    @return: sharpened img
+    """
+    if ksize < 3 or ksize % 2 == 0:
+        raise Exception('ksize should be an odd number greater than or equal 3.\n')
+    w = np.ones((ksize, ksize)) * (-c) / ksize ** 2
+    w[ksize//2][ksize//2] = w[ksize//2][ksize//2] + c + 1
+    return cv2.filter2D(img, -1, w)
+
+
+# recovery
+def add_gaussian_noise(img, loc=0.0, scale=1.0):
+    """
+    Add Gaussian noise on image
+    @param img: input image
+    @param loc: mean of normal distribution
+    @param scale: standard deviation of normal distribution
+    @return:the output image
+    """
+    timg = np.array(img/255, dtype=float)
+    noise = np.random.normal(loc, scale, timg.shape)
+    out = np.clip(timg + noise, 0.0, 1.0)
+    return np.uint8(out * 255)
+
+
+def add_pepper_salt_noise(img, prob_p, prob_s):
+    """
+    Add pepper and salt noise on image
+    @param img: input image
+    @param prob_p: probability of pepper noise [0.0, 1.0]
+    @param prob_s: probability of salt noise [0.0, 1.0-prob_p]
+    @return: output image
+    """
+    if prob_p > 1.0 or prob_p < 0.0:
+        raise Exception('prob_p should be in [0.0, 1.0]')
+    if prob_s < 0.0 or prob_s + prob_p > 1.0:
+        raise Exception('prob_s should be in [0.0, 1.0-prob_p]')
+    thresh = 1 - prob_s
+    rt = np.zeros_like(img)
+    for i in range(img.shape[0]):
+        for j in range(img.shape[1]):
+            rdn = random.random()
+            if rdn < prob_p:
+                pass
+            elif rdn > thresh:
+                rt[i][j] = 255
+            else:
+                rt[i][j] = img[i][j]
+    return rt
+
+
+def geomean_filter(img, ksize=3):
+    """
+    Apply a geometry mean filter on img
+    @param img: input img
+    @param ksize: size of kernel, should be odd number greater than or equal 3
+    @return: output img
+    """
+    if ksize < 3 or ksize % 2 == 0:
+        raise Exception('ksize should be an odd number greater than or equal 3.\n')
+    out = np.zeros_like(img)
+    padding = ksize // 2
+    order = 1 / ksize**2
+    for i in range(padding, out.shape[0]-padding):
+        for j in range(padding, out.shape[1]-padding):
+            summ = np.prod(img[i-padding:i+padding+1, j-padding:j+padding+1]*1.0)
+            out[i][j] = np.power(summ, order)
+    return out
+
+
+def harmonic_mean_filter(img, ksize=3):
+    """
+        Apply a harmonic mean filter on img
+        @param img: input img
+        @param ksize: size of kernel, should be odd number greater than or equal 3
+        @return: output img
+        """
+    if ksize < 3 or ksize % 2 == 0:
+        raise Exception('ksize should be an odd number greater than or equal 3.\n')
+    out = np.zeros_like(img)
+    padding = ksize // 2
+    order = ksize ** 2
+    for i in range(padding, out.shape[0] - padding):
+        for j in range(padding, out.shape[1] - padding):
+            summ = np.sum(1.0 / img[i - padding:i + padding + 1, j - padding:j + padding + 1])
+            out[i][j] = order / summ
+    return out
+
+
+def contra_harmonic_mean_filter(img, ksize=3, q=-1):
+    """
+        Apply a contra harmonic mean filter on img
+        @param img: input img
+        @param ksize: size of kernel, should be odd number greater than or equal 3
+        @param q: the q value of filter
+        @return: output img
+        """
+    if ksize < 3 or ksize % 2 == 0:
+        raise Exception('ksize should be an odd number greater than or equal 3.\n')
+    out = np.zeros_like(img)
+    img = np.float32(img)
+    padding = ksize // 2
+    for i in range(padding, out.shape[0] - padding):
+        for j in range(padding, out.shape[1] - padding):
+            temp = img[i - padding:i + padding + 1, j - padding:j + padding + 1]
+            numerator = np.sum(np.power(temp, q+1))
+            denominator = np.sum(np.power(temp, q))
+            out[i][j] = numerator / denominator
+    return out
+
+
+def statistic_filter(img, ksize=3, method=FILTER_MEDIAN, d=None):
+    """
+    Apply order-statistic filter on img
+    @param img: input image
+    @param ksize: size of kernel, should be odd number greater than or equal 3
+    @param method: the method of filter, including FILTER_MEDIAN/ _MAX/ _MIN/ _MIDPOINT/ _ALP_TRIMMED
+    @param d: d value of filter when method is FILTER_ALP_TRIMMED (alpha-trimmed method), d in [0, ksize^2-1/2]
+    @return: output image
+    """
+    if ksize < 3 or ksize % 2 == 0:
+        raise Exception('ksize should be an odd number greater than or equal 3.\n')
+    out = np.zeros_like(img)
+    padding = ksize // 2
+    k2 = ksize ** 2
+    for i in range(padding, out.shape[0] - padding):
+        for j in range(padding, out.shape[1] - padding):
+            temp = img[i - padding:i + padding + 1, j - padding:j + padding + 1]
+            if method == FILTER_MAX:
+                out[i][j] = np.max(temp)
+            elif method == FILTER_MIN:
+                out[i][j] = np.min(temp)
+            elif method == FILTER_MIDPOINT:
+                out[i][j] = (np.max(temp)+np.min(temp))/2
+            elif method == FILTER_MEDIAN:
+                out[i][j] = np.median(temp)
+            elif method == FILTER_ALP_TRIMMED:
+                if not (0 <= d <= ((ksize ** 2) - 1) / 2):
+                    raise Exception('d value should in [0, ksize^2-1/2]')
+                temp = np.sort(temp, axis=None)[d:k2-d]
+                out[i][j] = np.average(temp)
+            else:
+                raise Exception('method not recognized')
+
+    return out
--- a/main.py
+++ b/main.py
@ -0,0 +1,225 @@
+#!usr/bin/python3
+# -*- coding:utf-8 -*-
+import random
+import tkinter as tk
+import tkinter.filedialog as filedialog
+import tkinter.messagebox as msg
+import PIL.Image as Img
+import PIL.ImageTk as Imgtk
+
+import cv2
+import numpy as np
+
+import img_util.gui_util as gu
+
+root = tk.Tk()
+con: tk.Label = None
+sc_th: tk.Scale = None
+
+start = False
+trans = False
+isave = True
+
+radi_n = tk.IntVar()
+thresh = tk.IntVar(value=125)
+
+img_path = ''
+
+img_o = None
+img_tk = None
+img_o_not = None
+
+SCREEN_W, SCREEN_H = root.winfo_screenwidth(), root.winfo_screenheight()
+WIN_W = 1280
+INI_H = int(SCREEN_H * 3/4)
+
+img = None
+out_img = None
+img_not = None
+
+temp_out, use_temp = None, False
+
+
+def resizeImg(w, h, wb, hb, p_image):
+    f = min(wb/w, hb/h)
+    return p_image.resize((int(f*w), int(f*h)))
+
+
+def resizeImgAuto(wb, hb, p_image: Img.Image):
+    return resizeImg(p_image.width, p_image.height, wb, hb, p_image)
+
+
+def selectFile():
+    global img_path, img_tk, img_o, img, img_not, img_o_not, out_img, con, start, isave
+    img_path = filedialog.askopenfilename()
+    if len(img_path) == 0:
+        return
+
+    img_o = resizeImgAuto(WIN_W-2, SCREEN_H*2/3-2, Img.open(img_path))
+    img_tk = Imgtk.PhotoImage(img_o)
+    img_o_not = img_o.copy()
+
+    # img = cv2.imread(img_path)
+    img = cv2.imdecode(np.fromfile(img_path, dtype=np.uint8), cv2.IMREAD_COLOR)
+    out_img = img.copy()
+    img_not = img.copy()
+    con.config(image=img_tk)
+    radi_n.set(0)
+
+    start = True
+    isave = True
+
+
+def saveFile():
+    global isave
+    if not start:
+        return
+    save_path = filedialog.asksaveasfilename(filetypes=gu.SUPPORT_FILETYPE, defaultextension=gu.SUPPORT_FILETYPE)
+    if len(save_path) == 0:
+        return
+    # cv2.imwrite(save_path, out_img)
+    ext = str()
+    for i in range(len(save_path)-1, -1, -1):
+        if save_path[i] == '.':
+            ext = save_path[i:]
+    if len(ext) == 0:
+        msg.showerror('错误', '请输入文件后缀名')
+    try:
+        cv2.imencode(ext, out_img)[1].tofile(save_path)
+        isave = True
+    except:
+        msg.showerror('错误', '未受支持的文件后缀名')
+
+
+def changePer():
+    global img, img_o, img_tk, out_img, trans, isave
+    if not start:
+        return
+    newp = gu.changePerspGui(img, str(random.random()))
+    if newp is None:
+        return
+    img_o = resizeImgAuto(WIN_W-2, SCREEN_H*2/3-2, Img.fromarray(newp))
+    img_tk = Imgtk.PhotoImage(img_o)
+
+    img = newp
+    out_img = img.copy()
+    con.config(image=img_tk)
+    radi_n.set(0)
+    trans = True
+    isave = False
+
+
+def unPer():
+    global img, img_o, img_tk, out_img, img_not, img_o_not, trans, isave
+
+    if not start or not trans or not msg.askyesno('撤回', '您确定要撤销所有的透视矫正操作吗? '):
+        return
+
+    img = img_not.copy()
+    out_img = img.copy()
+
+    img_o = img_o_not.copy()
+    img_tk = Imgtk.PhotoImage(img_o)
+    con.config(image=img_tk)
+    trans = False
+    isave = False
+    radi_n.set(0)
+
+
+def changeThresh(pos):
+    global out_img, img_tk, temp_out, use_temp, isave
+    if not use_temp:
+        print('Error')
+        return
+    binary, out_img = cv2.threshold(temp_out, thresh.get(), 255, cv2.THRESH_BINARY)
+    img_tk = Imgtk.PhotoImage(resizeImgAuto(WIN_W - 2, SCREEN_H * 2 / 3 - 2, Img.fromarray(out_img)))
+    con.config(image=img_tk)
+    isave = False
+
+
+def selectRadio():
+    global img_tk, out_img, img, img_o, use_temp, temp_out, isave
+    if not start:
+        return
+    rg = radi_n.get()
+    if rg == 3:
+        temp_out = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+        binary, out_img = cv2.threshold(temp_out, thresh.get(), 255, cv2.THRESH_BINARY)
+        img_tk = Imgtk.PhotoImage(resizeImgAuto(WIN_W - 2, SCREEN_H * 2 / 3 - 2, Img.fromarray(out_img)))
+    elif rg == 2:
+        out_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+        binary, out_img = cv2.threshold(out_img, 0, 255, cv2.THRESH_OTSU)
+        img_tk = Imgtk.PhotoImage(resizeImgAuto(WIN_W-2, SCREEN_H*2/3-2, Img.fromarray(out_img)))
+    elif rg == 1:
+        out_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+        img_tk = Imgtk.PhotoImage(resizeImgAuto(WIN_W-2, SCREEN_H*2/3-2, Img.fromarray(out_img)))
+    else:
+        out_img = img.copy()
+        img_tk = Imgtk.PhotoImage(img_o)
+
+    use_temp = (rg == 3)
+    if use_temp:
+        sc_th.config(state='normal', takefocus=True)
+    else:
+        sc_th.config(state='disabled', takefocus=False)
+
+    con.config(image=img_tk)
+    isave = False
+
+
+def versionInfo():
+    msg.showinfo('版本信息', '简单扫描 --- By 姜业恒 10205101498 \n 版本: Alpha v0.01 \n \
+ 此版本仅用于华东师范大学《数字图像处理》课程期末作业 \n\
+  (课程代码: SOFT0031132210.02) \n\
+ 暂不开放未经作者允许对此软件进行修改或售卖的权利')
+
+
+def onclose():
+    global isave
+    if isave or msg.askokcancel('简单扫描', '您还有未保存的修改, 确定退出吗?'):
+        root.destroy()
+
+
+def main():
+    global con, sc_th
+    root.title('简单扫描 -- By 姜业恒 10205101498')
+    root.geometry('x'.join([str(WIN_W), str(INI_H)])+'+10+10')
+
+    fr_top = tk.Frame(root)
+    fr_med = tk.Frame(root)
+    sep_line = tk.Canvas(root, background='#dddddd', width=WIN_W, height=1)
+    con = tk.Label(root, text='\n图像预览处\n\n\n\n\n\n')
+    bt_choose = tk.Button(fr_top, text='选择图片', command=lambda: selectFile())
+    bt_save = tk.Button(fr_top, text='保存', command=lambda: saveFile())
+    bt_changeP = tk.Button(fr_top, text='透视矫正', command=changePer)
+    bt_unP = tk.Button(fr_top, text='撤销透视矫正', command=unPer)
+    bt_info = tk.Button(fr_top, text='版本信息', command=versionInfo)
+    rd_ori = tk.Radiobutton(fr_med, text='原图', variable=radi_n, value=0, command=selectRadio)
+    rd_gr = tk.Radiobutton(fr_med, text='灰度', variable=radi_n, value=1, command=selectRadio)
+    rd_bw = tk.Radiobutton(fr_med, text='黑白(自动)', variable=radi_n, value=2, command=selectRadio)
+    rd_th = tk.Radiobutton(fr_med, text='黑白(手动)', variable=radi_n, value=3, command=selectRadio)
+    sc_th = tk.Scale(fr_med, variable=thresh, from_=0, to=255, orient='horizontal', length=int(WIN_W/2),
+                     command=changeThresh, state='disabled', takefocus=False)
+
+    fr_top.pack()
+    fr_med.pack()
+    # sc_th.pack()
+    sep_line.pack()
+    bt_choose.grid(row=0, column=0)
+    bt_save.grid(row=0, column=1)
+    bt_changeP.grid(row=0, column=2)
+    bt_unP.grid(row=0, column=3)
+    bt_info.grid(row=0, column=4)
+    rd_ori.grid(row=0, column=0)
+    rd_gr.grid(row=0, column=1)
+    rd_bw.grid(row=0, column=2)
+    rd_th.grid(row=0, column=3)
+    sc_th.grid(row=0, column=4)
+    con.pack()
+
+    root.protocol('WM_DELETE_WINDOW', onclose)
+    root.mainloop()
+
+
+if __name__ == '__main__':
+    main()
--- a/requirements.txt
+++ b/requirements.txt
@ -0,0 +1,2 @@
+numpy==1.21.0
+opencv-python==4.6.0.66
--- a/resource/example.jpg
+++ b/resource/example.jpg
--- a/resource/img_bw.png
+++ b/resource/img_bw.png
--- a/resource/img_bwhand.png
+++ b/resource/img_bwhand.png
--- a/resource/img_gray.png
+++ b/resource/img_gray.png
--- a/resource/img_main.png
+++ b/resource/img_main.png
--- a/resource/img_orig.png
+++ b/resource/img_orig.png
--- a/resource/img_pers_1.png
+++ b/resource/img_pers_1.png
--- a/resource/img_pers_2.png
+++ b/resource/img_pers_2.png
--- a/resource/img_pers_3.png
+++ b/resource/img_pers_3.png
--- a/resource/img_save.png
+++ b/resource/img_save.png
--- a/resource/img_select.png
+++ b/resource/img_select.png
--- a/resource/output.png
+++ b/resource/output.png