@ -1,2 +1,70 @@
|
||||
# simple_scan
|
||||
# 数字图像处理实践项目:文件扫描软件
|
||||
|
||||
*--- 小组成员: 姜业恒 10205101498 ---*
|
||||
|
||||
## 概述
|
||||
|
||||
本项目为数字图像处理期末的实践项目, 包括了第一项和第二项的要求。 本项目不仅实现了《数字图像处理》课程教学中的主要内容, 还增加了实际应用的场景, 即:将使用普通照相机或手机的摄像头拍摄的文档图片转化为易于识别的类似于扫描得到的图片。
|
||||
|
||||
## 项目结构
|
||||
|
||||
- readme.md --- 本文件
|
||||
- requirement.txt --- 项目需要的依赖, 可通过pip安装
|
||||
- img_util/ --- python模块, 实现了课程中的主要内容, 每个子模块的内容在文件的注释中都有详细解释, 其中有一些功能由于opencv已经有使用起来非常简单的实现, 所以仅在注释中标注出来
|
||||
- resource/ --- 存放readme.md需要的资源文件
|
||||
- main.py --- 程序入口, 运行此文件开始程序
|
||||
|
||||
## 应用程序介绍
|
||||
|
||||
本项目通过课程中的各种知识, 实现了一个可以在实际场景中应用的程序——“简单扫描”。此程序可以将普通照相机或手机拍摄的文档图片转化为易于识别的类似于扫描得到的图片的功能. 具体来说, 这个应用程序可以读取一张图片, 并可以根据需要调整为原图、灰度、黑白三种模式, 黑白模式可以选择自动或手动调整阈值; 另外, 还可以将图片中的透视关系进行矫正。最终, 可以将操作过后的图片进行保存。
|
||||
|
||||
## 应用程序用法
|
||||
1. 检查运行环境
|
||||
|
||||
首先查看python的版本, 考虑到兼容性问题, 最好使用python 3.9以上的版本运行程序; 另外, 通过"`pip install -r requirement.txt`"命令可以通过requirement.txt文件自动安装本程序需要的依赖库
|
||||
|
||||
2. 运行程序
|
||||
|
||||
运行环境配置无误后, 运行main.py启动程序, 界面如下图所示
|
||||
![img_main](resource/img_main.png)
|
||||
|
||||
3. 选择图片与更改模式
|
||||
|
||||
点击界面上方的`选择图片`按钮选择一张输入的图片
|
||||
![img_select](resource/img_select.png)
|
||||
|
||||
选择完毕后, 界面会展示图片的原图
|
||||
![img_orig](resource/img_orig.png)
|
||||
|
||||
点击第二排的`灰度`选项, 可以将图片切换到灰度模式
|
||||
![img_gray](resource/img_gray.png)
|
||||
|
||||
点击第二排的`黑白(自动)`选项, 可以将图片切换到黑白模式, 但是其阈值是由程序自动判断得到的
|
||||
![img_bw](resource/img_bw.png)
|
||||
|
||||
点击第二排的`黑白(手动)`选项, 可以将图片切换到黑白模式, 但是其阈值是需要手动调整右侧的滑动条来确定
|
||||
![img_bw_hand](resource/img_bwhand.png)
|
||||
|
||||
4. 视角透视矫正
|
||||
|
||||
某些时候, 拍摄的视角并非是正对文档的, 需要进行透视的矫正, 这时候可以点击第一排的`透视矫正`进行矫正
|
||||
点击之后, 可以看到弹出了帮助窗口
|
||||
![img_pers_1](resource/img_pers_1.png)
|
||||
|
||||
点击`确定`以后, 弹出新的窗口, 需要以左上角、右上角、右下角、左下角的顺时针顺序以`Ctrl`+单击的方法确定图片中需要矫正的区域的四个角
|
||||
![img_pers_2](resource/img_pers_2.png)
|
||||
|
||||
接下来, 如误选了顶点, 可以使用`R`键清除; 如果不想继续矫正, 可以使用`Esc`退出; 如果已经确定, 可以使用`Enter`键确定
|
||||
![img_pers_3](resource/img_pers_3.png)
|
||||
|
||||
5. 保存
|
||||
|
||||
处理完毕后, 点击第一排的`保存`按钮即可保存文件
|
||||
![img_save](resource/img_save.png)
|
||||
|
||||
6. 应用程序效果
|
||||
|
||||
处理前:
|
||||
![img_input](resource/example.jpg)
|
||||
处理后:
|
||||
![img_output](resource/output.png)
|
||||
|
@ -0,0 +1,92 @@
|
||||
# -*- coding:utf-8 -*-
|
||||
"""
|
||||
此模块包括实际应用需要用到的一些GUI特性, 如通过用户界面选择四个点进行透视变换,
|
||||
"""
|
||||
import cv2
|
||||
import math
|
||||
import tkinter as tk
|
||||
import tkinter.messagebox as msg
|
||||
import numpy as np
|
||||
|
||||
|
||||
SUPPORT_FILETYPE = [('PNG', '.png'), ('JPEG', '.jpg'), ('TIFF', '.tiff'),
|
||||
('Bitmap', '.bmp'), ('WEBP', '.webp'), ('All', '*')]
|
||||
|
||||
|
||||
def showAuto(window_n, img):
|
||||
WIN_W = 1280
|
||||
|
||||
TEMPTK = tk.Tk()
|
||||
SCREEN_W, SCREEN_H = TEMPTK.winfo_screenwidth(), TEMPTK.winfo_screenheight()
|
||||
|
||||
cv2.namedWindow(window_n, flags=cv2.WINDOW_NORMAL)
|
||||
|
||||
act_w, act_h = WIN_W, math.ceil(WIN_W * img.shape[0] / img.shape[1])
|
||||
|
||||
if act_h > SCREEN_H:
|
||||
act_w, act_h = math.ceil(act_w * SCREEN_H * 0.9 / act_h), math.ceil(SCREEN_H * 0.9)
|
||||
|
||||
cv2.resizeWindow(window_n, act_w, act_h)
|
||||
cv2.moveWindow(window_n, 0, 0)
|
||||
TEMPTK.destroy()
|
||||
|
||||
|
||||
def getFourPointGUI(ori_img, img_name):
|
||||
img = ori_img.copy()
|
||||
|
||||
window_n = 'Persp'+img_name
|
||||
showAuto(window_n, img)
|
||||
|
||||
point_list = []
|
||||
|
||||
def on_mouse(ev, x, y, flags, userdata):
|
||||
if flags == cv2.EVENT_FLAG_CTRLKEY and ev == cv2.EVENT_LBUTTONUP:
|
||||
if len(point_list) < 4:
|
||||
point_list.append([x, y])
|
||||
cv2.circle(img, (x, y), 12, (125, 256 * len(point_list) // 4, 255 - 32 * len(point_list) // 4), -1)
|
||||
else:
|
||||
__import__('winsound').MessageBeep(-1)
|
||||
|
||||
cv2.setMouseCallback(window_n, on_mouse, img)
|
||||
|
||||
while True:
|
||||
if not cv2.getWindowProperty(window_n, cv2.WND_PROP_VISIBLE):
|
||||
break
|
||||
cv2.imshow(window_n, img)
|
||||
wk = cv2.waitKey(20)
|
||||
if wk == 27:
|
||||
cv2.destroyWindow(window_n)
|
||||
return None
|
||||
elif wk == 13:
|
||||
if len(point_list) != 4:
|
||||
__import__('winsound').MessageBeep(-1)
|
||||
continue
|
||||
cv2.destroyWindow(window_n)
|
||||
return np.array(point_list, dtype='float32')
|
||||
|
||||
elif wk == ord('r'):
|
||||
img = ori_img.copy()
|
||||
point_list.clear()
|
||||
|
||||
|
||||
def getAreaSize(pl: list):
|
||||
get_len = lambda ta, tb: math.sqrt((ta[0]-tb[0])**2+(ta[1]-tb[1])**2)
|
||||
return max(get_len(pl[0], pl[1]), get_len(pl[2], pl[3])), max(get_len(pl[0], pl[3]), get_len(pl[1], pl[2]))
|
||||
|
||||
|
||||
def changePerspGui(img, img_name):
|
||||
msg.showinfo('使用方法', '''在之后弹出的窗口之中, 请使用Ctrl+左键单击按顺时针顺序选择需要转换视角的区域的四个顶点
|
||||
Esc: 退出
|
||||
Enter: 确定
|
||||
R: 清除已选择的点
|
||||
Ctrl+Click: 选择顶点
|
||||
''')
|
||||
pl = getFourPointGUI(img, img_name)
|
||||
if pl is None:
|
||||
return
|
||||
|
||||
h, w = img.shape[0], img.shape[1]
|
||||
wt, ht = getAreaSize(pl)
|
||||
dst = np.float32([[0, 0], [wt, 0], [wt, ht], [0, ht]])
|
||||
trans = cv2.getPerspectiveTransform(pl, dst)
|
||||
return cv2.warpPerspective(img, trans, (math.ceil(wt), math.ceil(ht)))
|
@ -0,0 +1,17 @@
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
"""
|
||||
此模块为形态学操作的模块, 对应课程中的第八章, 由于在opencv中已有现成的函数, 所以不再定义新的函数, opencv中的函数提示可以参考下面的注释
|
||||
"""
|
||||
|
||||
# structure element (3*3 cross as example):
|
||||
# kernel = cv2.getStructuringElement(cv2.MORPH_CROSS, 3) or np.array([[0,1,0], [1,1,1], [0,1,0]], dtype=np.uint8)
|
||||
# dilate:
|
||||
# cv2.dilate(src, kernel, anchor=(-1, -1)) or cv2.morphologyEx(img, cv2.MORPH_DILATE, kernel, anchor=(-1, -1))
|
||||
# erode:
|
||||
# cv2.erode(img, kernel) or cv2.morphologyEx(img, cv2.MORPH_ERODE, kernel)
|
||||
# open:
|
||||
# cv2.morphologyEx(img, cv2.MORPH_OPEN, kernel)
|
||||
# close:
|
||||
# cv2.morphologyEx(img, cv2.MORPH_CLOSE, kernel)
|
@ -0,0 +1,225 @@
|
||||
#!usr/bin/python3
|
||||
# -*- coding:utf-8 -*-
|
||||
import random
|
||||
import tkinter as tk
|
||||
import tkinter.filedialog as filedialog
|
||||
import tkinter.messagebox as msg
|
||||
import PIL.Image as Img
|
||||
import PIL.ImageTk as Imgtk
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
import img_util.gui_util as gu
|
||||
|
||||
root = tk.Tk()
|
||||
con: tk.Label = None
|
||||
sc_th: tk.Scale = None
|
||||
|
||||
start = False
|
||||
trans = False
|
||||
isave = True
|
||||
|
||||
radi_n = tk.IntVar()
|
||||
thresh = tk.IntVar(value=125)
|
||||
|
||||
img_path = ''
|
||||
|
||||
img_o = None
|
||||
img_tk = None
|
||||
img_o_not = None
|
||||
|
||||
SCREEN_W, SCREEN_H = root.winfo_screenwidth(), root.winfo_screenheight()
|
||||
WIN_W = 1280
|
||||
INI_H = int(SCREEN_H * 3/4)
|
||||
|
||||
img = None
|
||||
out_img = None
|
||||
img_not = None
|
||||
|
||||
temp_out, use_temp = None, False
|
||||
|
||||
|
||||
def resizeImg(w, h, wb, hb, p_image):
|
||||
f = min(wb/w, hb/h)
|
||||
return p_image.resize((int(f*w), int(f*h)))
|
||||
|
||||
|
||||
def resizeImgAuto(wb, hb, p_image: Img.Image):
|
||||
return resizeImg(p_image.width, p_image.height, wb, hb, p_image)
|
||||
|
||||
|
||||
def selectFile():
|
||||
global img_path, img_tk, img_o, img, img_not, img_o_not, out_img, con, start, isave
|
||||
img_path = filedialog.askopenfilename()
|
||||
if len(img_path) == 0:
|
||||
return
|
||||
|
||||
img_o = resizeImgAuto(WIN_W-2, SCREEN_H*2/3-2, Img.open(img_path))
|
||||
img_tk = Imgtk.PhotoImage(img_o)
|
||||
img_o_not = img_o.copy()
|
||||
|
||||
# img = cv2.imread(img_path)
|
||||
img = cv2.imdecode(np.fromfile(img_path, dtype=np.uint8), cv2.IMREAD_COLOR)
|
||||
out_img = img.copy()
|
||||
img_not = img.copy()
|
||||
con.config(image=img_tk)
|
||||
radi_n.set(0)
|
||||
|
||||
start = True
|
||||
isave = True
|
||||
|
||||
|
||||
def saveFile():
|
||||
global isave
|
||||
if not start:
|
||||
return
|
||||
save_path = filedialog.asksaveasfilename(filetypes=gu.SUPPORT_FILETYPE, defaultextension=gu.SUPPORT_FILETYPE)
|
||||
if len(save_path) == 0:
|
||||
return
|
||||
# cv2.imwrite(save_path, out_img)
|
||||
ext = str()
|
||||
for i in range(len(save_path)-1, -1, -1):
|
||||
if save_path[i] == '.':
|
||||
ext = save_path[i:]
|
||||
if len(ext) == 0:
|
||||
msg.showerror('错误', '请输入文件后缀名')
|
||||
try:
|
||||
cv2.imencode(ext, out_img)[1].tofile(save_path)
|
||||
isave = True
|
||||
except:
|
||||
msg.showerror('错误', '未受支持的文件后缀名')
|
||||
|
||||
|
||||
def changePer():
|
||||
global img, img_o, img_tk, out_img, trans, isave
|
||||
if not start:
|
||||
return
|
||||
newp = gu.changePerspGui(img, str(random.random()))
|
||||
if newp is None:
|
||||
return
|
||||
img_o = resizeImgAuto(WIN_W-2, SCREEN_H*2/3-2, Img.fromarray(newp))
|
||||
img_tk = Imgtk.PhotoImage(img_o)
|
||||
|
||||
img = newp
|
||||
out_img = img.copy()
|
||||
con.config(image=img_tk)
|
||||
radi_n.set(0)
|
||||
trans = True
|
||||
isave = False
|
||||
|
||||
|
||||
def unPer():
|
||||
global img, img_o, img_tk, out_img, img_not, img_o_not, trans, isave
|
||||
|
||||
if not start or not trans or not msg.askyesno('撤回', '您确定要撤销所有的透视矫正操作吗? '):
|
||||
return
|
||||
|
||||
img = img_not.copy()
|
||||
out_img = img.copy()
|
||||
|
||||
img_o = img_o_not.copy()
|
||||
img_tk = Imgtk.PhotoImage(img_o)
|
||||
con.config(image=img_tk)
|
||||
trans = False
|
||||
isave = False
|
||||
radi_n.set(0)
|
||||
|
||||
|
||||
def changeThresh(pos):
|
||||
global out_img, img_tk, temp_out, use_temp, isave
|
||||
if not use_temp:
|
||||
print('Error')
|
||||
return
|
||||
binary, out_img = cv2.threshold(temp_out, thresh.get(), 255, cv2.THRESH_BINARY)
|
||||
img_tk = Imgtk.PhotoImage(resizeImgAuto(WIN_W - 2, SCREEN_H * 2 / 3 - 2, Img.fromarray(out_img)))
|
||||
con.config(image=img_tk)
|
||||
isave = False
|
||||
|
||||
|
||||
def selectRadio():
|
||||
global img_tk, out_img, img, img_o, use_temp, temp_out, isave
|
||||
if not start:
|
||||
return
|
||||
rg = radi_n.get()
|
||||
if rg == 3:
|
||||
temp_out = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
|
||||
binary, out_img = cv2.threshold(temp_out, thresh.get(), 255, cv2.THRESH_BINARY)
|
||||
img_tk = Imgtk.PhotoImage(resizeImgAuto(WIN_W - 2, SCREEN_H * 2 / 3 - 2, Img.fromarray(out_img)))
|
||||
elif rg == 2:
|
||||
out_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
|
||||
binary, out_img = cv2.threshold(out_img, 0, 255, cv2.THRESH_OTSU)
|
||||
img_tk = Imgtk.PhotoImage(resizeImgAuto(WIN_W-2, SCREEN_H*2/3-2, Img.fromarray(out_img)))
|
||||
elif rg == 1:
|
||||
out_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
|
||||
img_tk = Imgtk.PhotoImage(resizeImgAuto(WIN_W-2, SCREEN_H*2/3-2, Img.fromarray(out_img)))
|
||||
else:
|
||||
out_img = img.copy()
|
||||
img_tk = Imgtk.PhotoImage(img_o)
|
||||
|
||||
use_temp = (rg == 3)
|
||||
if use_temp:
|
||||
sc_th.config(state='normal', takefocus=True)
|
||||
else:
|
||||
sc_th.config(state='disabled', takefocus=False)
|
||||
|
||||
con.config(image=img_tk)
|
||||
isave = False
|
||||
|
||||
|
||||
def versionInfo():
|
||||
msg.showinfo('版本信息', '简单扫描 --- By 姜业恒 10205101498 \n 版本: Alpha v0.01 \n \
|
||||
此版本仅用于华东师范大学《数字图像处理》课程期末作业 \n\
|
||||
(课程代码: SOFT0031132210.02) \n\
|
||||
暂不开放未经作者允许对此软件进行修改或售卖的权利')
|
||||
|
||||
|
||||
def onclose():
|
||||
global isave
|
||||
if isave or msg.askokcancel('简单扫描', '您还有未保存的修改, 确定退出吗?'):
|
||||
root.destroy()
|
||||
|
||||
|
||||
def main():
|
||||
global con, sc_th
|
||||
root.title('简单扫描 -- By 姜业恒 10205101498')
|
||||
root.geometry('x'.join([str(WIN_W), str(INI_H)])+'+10+10')
|
||||
|
||||
fr_top = tk.Frame(root)
|
||||
fr_med = tk.Frame(root)
|
||||
sep_line = tk.Canvas(root, background='#dddddd', width=WIN_W, height=1)
|
||||
con = tk.Label(root, text='\n图像预览处\n\n\n\n\n\n')
|
||||
bt_choose = tk.Button(fr_top, text='选择图片', command=lambda: selectFile())
|
||||
bt_save = tk.Button(fr_top, text='保存', command=lambda: saveFile())
|
||||
bt_changeP = tk.Button(fr_top, text='透视矫正', command=changePer)
|
||||
bt_unP = tk.Button(fr_top, text='撤销透视矫正', command=unPer)
|
||||
bt_info = tk.Button(fr_top, text='版本信息', command=versionInfo)
|
||||
rd_ori = tk.Radiobutton(fr_med, text='原图', variable=radi_n, value=0, command=selectRadio)
|
||||
rd_gr = tk.Radiobutton(fr_med, text='灰度', variable=radi_n, value=1, command=selectRadio)
|
||||
rd_bw = tk.Radiobutton(fr_med, text='黑白(自动)', variable=radi_n, value=2, command=selectRadio)
|
||||
rd_th = tk.Radiobutton(fr_med, text='黑白(手动)', variable=radi_n, value=3, command=selectRadio)
|
||||
sc_th = tk.Scale(fr_med, variable=thresh, from_=0, to=255, orient='horizontal', length=int(WIN_W/2),
|
||||
command=changeThresh, state='disabled', takefocus=False)
|
||||
|
||||
fr_top.pack()
|
||||
fr_med.pack()
|
||||
# sc_th.pack()
|
||||
sep_line.pack()
|
||||
bt_choose.grid(row=0, column=0)
|
||||
bt_save.grid(row=0, column=1)
|
||||
bt_changeP.grid(row=0, column=2)
|
||||
bt_unP.grid(row=0, column=3)
|
||||
bt_info.grid(row=0, column=4)
|
||||
rd_ori.grid(row=0, column=0)
|
||||
rd_gr.grid(row=0, column=1)
|
||||
rd_bw.grid(row=0, column=2)
|
||||
rd_th.grid(row=0, column=3)
|
||||
sc_th.grid(row=0, column=4)
|
||||
con.pack()
|
||||
|
||||
root.protocol('WM_DELETE_WINDOW', onclose)
|
||||
root.mainloop()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
@ -0,0 +1,2 @@
|
||||
numpy==1.21.0
|
||||
opencv-python==4.6.0.66
|
After Width: | Height: | Size: 377 KiB |
After Width: | Height: | Size: 151 KiB |
After Width: | Height: | Size: 158 KiB |
After Width: | Height: | Size: 468 KiB |
After Width: | Height: | Size: 33 KiB |
After Width: | Height: | Size: 543 KiB |
After Width: | Height: | Size: 68 KiB |
After Width: | Height: | Size: 524 KiB |
After Width: | Height: | Size: 510 KiB |
After Width: | Height: | Size: 201 KiB |
After Width: | Height: | Size: 96 KiB |
After Width: | Height: | Size: 26 KiB |