You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

181 lines
4.9 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

# 导入工具包
import numpy as np
import argparse
import cv2
import pytesseract
import os
from PIL import Image
# def process_image(image_path):
# try:
# # 使用Pillow库打开并显示图片
# with Image.open(image_path) as img:
# img.show()
# # 在这里添加更多处理图片的代码...
# except IOError:
# print(f"无法打开图片: {image_path}")
def main():
parser = argparse.ArgumentParser(description="处理图片的脚本")
parser.add_argument("-i", "--image", required=True, help="指定要处理的图片的路径")
args = parser.parse_args()
# process_image(args.image)
print("main")
def order_points(pts):
# 一共4个坐标点
rect = np.zeros((4, 2), dtype = "float32")
# 按顺序找到对应坐标0123分别是 左上,右上,右下,左下
# 计算左上,右下
s = pts.sum(axis = 1)
rect[0] = pts[np.argmin(s)]
rect[2] = pts[np.argmax(s)]
# 计算右上和左下
diff = np.diff(pts, axis = 1)
rect[1] = pts[np.argmin(diff)]
rect[3] = pts[np.argmax(diff)]
return rect
def four_point_transform(image, pts):
# 获取输入坐标点
rect = order_points(pts)
(tl, tr, br, bl) = rect
# 计算输入的w和h值
widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2))
widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2))
maxWidth = max(int(widthA), int(widthB))
heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2))
heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2))
maxHeight = max(int(heightA), int(heightB))
# 变换后对应坐标位置
dst = np.array([
[0, 0],
[maxWidth - 1, 0],
[maxWidth - 1, maxHeight - 1],
[0, maxHeight - 1]], dtype = "float32")
# 计算变换矩阵
M = cv2.getPerspectiveTransform(rect, dst)
warped = cv2.warpPerspective(image, M, (maxWidth, maxHeight))
# 返回变换后结果
return warped
def resize(image, width=None, height=None, inter=cv2.INTER_AREA):
dim = None
(h, w) = image.shape[:2]
if width is None and height is None:
return image
if width is None:
r = height / float(h)
dim = (int(w * r), height)
else:
r = width / float(w)
dim = (width, int(h * r))
resized = cv2.resize(image, dim, interpolation=inter)
return resized
# 设置参数
ap = argparse.ArgumentParser()
ap.add_argument("-i", "--image", required = True,
help = "Path to the image to be scanned")
args = vars(ap.parse_args())
print("arg")
print(__name__)
if __name__ == "__main__":
main()
# 读取输入
image = cv2.imread(args["image"])
#坐标也会相同变化
ratio = image.shape[0] / 500.0
orig = image.copy()
image = resize(orig, height = 500)
# 预处理
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) # 色彩空间转换
gray = cv2.GaussianBlur(gray, (5, 5), 0) # 高斯模糊
edged = cv2.Canny(gray, 75, 200)
# 展示预处理结果
print("STEP 1: 边缘检测")
# cv2.imshow("Image", image)
# cv2.imshow("Edged", edged)
# cv2.waitKey(0)
# cv2.destroyAllWindows()
# 轮廓检测
cnts = cv2.findContours(edged.copy(), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)[0]
cnts = sorted(cnts, key = cv2.contourArea, reverse = True)[:5] # 降序排列+切片
# 遍历轮廓
for c in cnts:
# 计算轮廓近似
peri = cv2.arcLength(c, True)
# C表示输入的点集数组
# epsilon表示从原始轮廓到近似轮廓的最大距离它是一个准确度参数
# True表示封闭的
approx = cv2.approxPolyDP(c, 0.10 * peri, True) # 近似
# 4个点的时候就拿出来
if len(approx) == 4:
screenCnt = approx
break
# 展示结果
print("STEP 2: 获取轮廓")
cv2.drawContours(image, [screenCnt], -1, (0, 255, 0), 2)
# cv2.imshow("Outline", image)
# cv2.waitKey(0)
# cv2.destroyAllWindows()
# 透视变换
warped = four_point_transform(orig, screenCnt.reshape(4, 2) * ratio)
# 二值处理
warped = cv2.cvtColor(warped, cv2.COLOR_BGR2GRAY)
ref = cv2.threshold(warped, 100, 255, cv2.THRESH_BINARY)[1]
cv2.imwrite('scan.jpg', ref)
# 展示结果
print("STEP 3: 变换")
# cv2.imshow("Original", resize(orig, height = 650))
# cv2.imshow("Scanned", resize(ref, height = 650))
# cv2.waitKey(0)
# cv2.destroyAllWindows()
# OCR扫描
preprocess = "blur"
if preprocess == "thresh":
gray = cv2.threshold(ref, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]
if preprocess == "blur":
gray = cv2.medianBlur(ref, 3)
# cv2.imshow("Detect", gray)
filename = "{}.png".format(os.getpid())
cv2.imwrite(filename, gray)
text = pytesseract.image_to_string(Image.open(filename)) # 调用Tesseract OCR引擎对保存的图像进行文本识别
os.remove(filename)
encodings = ['utf-8', 'latin1', 'iso-8859-1', 'cp1252', 'gbk', 'big5']
for encoding in encodings:
try:
with open("out.txt", 'w', encoding=encoding, errors="replace") as file:
file.write(text)
break
except UnicodeDecodeError:
continue
file.close()
print("text is written to out.txt")
# Wait for pressing any key
cv2.waitKey(0)
cv2.destroyAllWindows()