main
parent
5f4775ef2d
commit
faec49b807
Binary file not shown.
@ -0,0 +1,11 @@
|
||||
images_temp/
|
||||
output.xlsx
|
||||
result.xls
|
||||
build
|
||||
test_files/
|
||||
config.yml
|
||||
*.xlsx
|
||||
*.xls
|
||||
*.pdf
|
||||
.vscode/
|
||||
__pycache__
|
||||
@ -0,0 +1,21 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2019 绯末
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
@ -0,0 +1,2 @@
|
||||
secret_id: AKIDd9U12121212121B2hS6YHvtg
|
||||
secret_key: jGkBUgh1212122112mgR2KahBZllZA
|
||||
@ -0,0 +1,30 @@
|
||||
import sys
|
||||
import os
|
||||
import ocr
|
||||
import draw
|
||||
import xlrd
|
||||
import xlwt
|
||||
import yaml_class
|
||||
from xlutils.copy import copy
|
||||
|
||||
pic_path = sys.argv[1]
|
||||
|
||||
pic_name = os.path.split(pic_path)[-1].split(".")[0]
|
||||
|
||||
print('doing')
|
||||
|
||||
# 使用ocr进行转换
|
||||
config = yaml_class.get_yaml_data("config.yml")
|
||||
jpg_name = './images_temp/'+pic_name+'/0.jpg'
|
||||
trans = ocr.OCR()
|
||||
path_excel = trans.img_to_excel(
|
||||
pic_name,
|
||||
image_path=pic_name,
|
||||
secret_id=config['secret_id'],
|
||||
secret_key=config['secret_key'],
|
||||
)
|
||||
|
||||
old_excel = xlrd.open_workbook('output.xlsx')
|
||||
new_excel = copy(old_excel)
|
||||
ws = new_excel.get_sheet(0)
|
||||
new_excel.save(pic_name+'.xls')
|
||||
@ -0,0 +1,10 @@
|
||||
{
|
||||
"systemParams": "win32-x64-83",
|
||||
"modulesFolders": [],
|
||||
"flags": [],
|
||||
"linkedModules": [],
|
||||
"topLevelPatterns": [],
|
||||
"lockfileEntries": {},
|
||||
"files": [],
|
||||
"artifacts": {}
|
||||
}
|
||||
@ -0,0 +1,33 @@
|
||||
import sys
|
||||
import os
|
||||
import ocr
|
||||
import draw
|
||||
import xlrd
|
||||
import xlwt
|
||||
import yaml_class
|
||||
from xlutils.copy import copy
|
||||
|
||||
pdf_path = sys.argv[1]
|
||||
|
||||
pdf_name = os.path.split(pdf_path)[-1].split(".")[0]
|
||||
|
||||
print('doing')
|
||||
|
||||
# PDF按每页转为JPG文件
|
||||
draw.pdf2jpg(pdf_path, './images_temp/'+pdf_name+'/')
|
||||
|
||||
# 使用ocr进行转换
|
||||
config = yaml_class.get_yaml_data("config.yml")
|
||||
jpg_name = './images_temp/'+pdf_name+'/0.jpg'
|
||||
trans = ocr.OCR()
|
||||
path_excel = trans.img_to_excel(
|
||||
pdf_name,
|
||||
image_path=jpg_name,
|
||||
secret_id=config['secret_id'],
|
||||
secret_key=config['secret_key'],
|
||||
)
|
||||
|
||||
old_excel = xlrd.open_workbook(pdf_name+'.xlsx')
|
||||
new_excel = copy(old_excel)
|
||||
ws = new_excel.get_sheet(0)
|
||||
new_excel.save(pdf_name+'.xls')
|
||||
Binary file not shown.
@ -0,0 +1,13 @@
|
||||
import yaml
|
||||
import os
|
||||
|
||||
|
||||
def get_yaml_data(yaml_file):
|
||||
# 打开yaml文件
|
||||
file = open(yaml_file, 'r', encoding="utf-8")
|
||||
file_data = file.read()
|
||||
file.close()
|
||||
|
||||
# 将字符串转化为字典或列表
|
||||
data = yaml.load(file_data)
|
||||
return data
|
||||
@ -0,0 +1,4 @@
|
||||
# THIS IS AN AUTOGENERATED FILE. DO NOT EDIT THIS FILE DIRECTLY.
|
||||
# yarn lockfile v1
|
||||
|
||||
|
||||
Loading…
Reference in new issue