|
|
|
@ -2,7 +2,7 @@ from PySide6.QtWidgets import (
|
|
|
|
|
QDialog, QVBoxLayout, QHBoxLayout, QGroupBox,
|
|
|
|
|
QPushButton, QLabel, QFileDialog, QProgressBar,
|
|
|
|
|
QComboBox, QCheckBox, QListWidget, QMessageBox,
|
|
|
|
|
QRadioButton
|
|
|
|
|
QRadioButton, QInputDialog, QLineEdit
|
|
|
|
|
)
|
|
|
|
|
from PySide6.QtCore import Qt, Signal, Slot, QThread
|
|
|
|
|
from pathlib import Path
|
|
|
|
@ -14,7 +14,7 @@ from src.utils.file_utils import FileUtils
|
|
|
|
|
|
|
|
|
|
class BatchOCRWorker(QThread):
|
|
|
|
|
"""批量OCR处理线程"""
|
|
|
|
|
progress_updated = Signal(int, int, str, bool)
|
|
|
|
|
progress_updated = Signal(int, int, str, int) # 修改为发送状态码而不是布尔值
|
|
|
|
|
file_progress_updated = Signal(int, int) # 当前文件的进度
|
|
|
|
|
finished = Signal(dict)
|
|
|
|
|
|
|
|
|
@ -26,12 +26,25 @@ class BatchOCRWorker(QThread):
|
|
|
|
|
self.options = options
|
|
|
|
|
|
|
|
|
|
def run(self):
|
|
|
|
|
results = self.engine.process_batch(
|
|
|
|
|
self.files,
|
|
|
|
|
self.output_dir,
|
|
|
|
|
self.options,
|
|
|
|
|
lambda current, total, file, success: self.progress_updated.emit(current, total, file, success)
|
|
|
|
|
)
|
|
|
|
|
results = {}
|
|
|
|
|
total = len(self.files)
|
|
|
|
|
|
|
|
|
|
# 确保输出目录存在
|
|
|
|
|
output_path = Path(self.output_dir)
|
|
|
|
|
output_path.mkdir(parents=True, exist_ok=True)
|
|
|
|
|
|
|
|
|
|
for i, input_file in enumerate(self.files):
|
|
|
|
|
input_path = Path(input_file)
|
|
|
|
|
output_file = output_path / f"{input_path.stem}_ocr{input_path.suffix}"
|
|
|
|
|
|
|
|
|
|
# 处理文件并获取结果码
|
|
|
|
|
result_code = self.engine.process_file(input_file, output_file, self.options)
|
|
|
|
|
results[input_file] = result_code
|
|
|
|
|
|
|
|
|
|
# 发送进度更新
|
|
|
|
|
success = result_code > 0 # 成功或已OCR过都视为"成功"
|
|
|
|
|
self.progress_updated.emit(i + 1, total, input_file, result_code)
|
|
|
|
|
|
|
|
|
|
self.finished.emit(results)
|
|
|
|
|
|
|
|
|
|
class BatchDialog(QDialog):
|
|
|
|
@ -108,11 +121,62 @@ class BatchDialog(QDialog):
|
|
|
|
|
ocr_group = QGroupBox("OCR选项")
|
|
|
|
|
ocr_layout = QVBoxLayout(ocr_group)
|
|
|
|
|
|
|
|
|
|
# 语言选择
|
|
|
|
|
language_layout = QHBoxLayout()
|
|
|
|
|
language_layout.addWidget(QLabel("OCR语言:"))
|
|
|
|
|
self.language_combo = QComboBox()
|
|
|
|
|
self.language_combo.setToolTip("选择OCR识别使用的语言")
|
|
|
|
|
|
|
|
|
|
# 添加可用的语言
|
|
|
|
|
# 常用语言列表
|
|
|
|
|
common_langs = ['eng', 'chi_sim', 'chi_tra', 'jpn', 'kor']
|
|
|
|
|
|
|
|
|
|
# 首先添加常用语言
|
|
|
|
|
if self.ocr_engine.available_languages:
|
|
|
|
|
# 添加常用语言组
|
|
|
|
|
common_available = [lang for lang in common_langs if lang in self.ocr_engine.available_languages]
|
|
|
|
|
if common_available:
|
|
|
|
|
self.language_combo.addItem("--- 常用语言 ---", None)
|
|
|
|
|
for lang_code in common_available:
|
|
|
|
|
lang_name = self.ocr_engine.get_language_name(lang_code)
|
|
|
|
|
self.language_combo.addItem(lang_name, lang_code)
|
|
|
|
|
|
|
|
|
|
# 添加其他语言组
|
|
|
|
|
other_available = [lang for lang in self.ocr_engine.available_languages
|
|
|
|
|
if lang not in common_langs]
|
|
|
|
|
if other_available:
|
|
|
|
|
self.language_combo.addItem("--- 其他语言 ---", None)
|
|
|
|
|
# 按名称排序
|
|
|
|
|
other_langs_sorted = sorted(
|
|
|
|
|
other_available,
|
|
|
|
|
key=lambda x: self.ocr_engine.get_language_name(x)
|
|
|
|
|
)
|
|
|
|
|
for lang_code in other_langs_sorted:
|
|
|
|
|
lang_name = self.ocr_engine.get_language_name(lang_code)
|
|
|
|
|
self.language_combo.addItem(lang_name, lang_code)
|
|
|
|
|
else:
|
|
|
|
|
# 如果没有常用语言,直接添加所有语言
|
|
|
|
|
for lang_code in self.ocr_engine.available_languages:
|
|
|
|
|
lang_name = self.ocr_engine.get_language_name(lang_code)
|
|
|
|
|
self.language_combo.addItem(lang_name, lang_code)
|
|
|
|
|
|
|
|
|
|
# 设置默认语言
|
|
|
|
|
default_lang = self.config.get('default_options.language', 'eng')
|
|
|
|
|
index = self.language_combo.findData(default_lang)
|
|
|
|
|
if index >= 0:
|
|
|
|
|
self.language_combo.setCurrentIndex(index)
|
|
|
|
|
|
|
|
|
|
language_layout.addWidget(self.language_combo)
|
|
|
|
|
ocr_layout.addLayout(language_layout)
|
|
|
|
|
|
|
|
|
|
# 使用配置文件
|
|
|
|
|
config_layout = QHBoxLayout()
|
|
|
|
|
config_layout.addWidget(QLabel("使用配置文件:"))
|
|
|
|
|
self.config_combo = QComboBox()
|
|
|
|
|
self.config_combo.addItems(["默认配置"])
|
|
|
|
|
self.config_combo.addItem("默认配置")
|
|
|
|
|
# 添加已保存的配置
|
|
|
|
|
self.load_saved_configs()
|
|
|
|
|
self.config_combo.currentIndexChanged.connect(self.on_config_changed)
|
|
|
|
|
self.save_config_btn = QPushButton("保存当前配置")
|
|
|
|
|
self.save_config_btn.clicked.connect(self.save_current_config)
|
|
|
|
|
config_layout.addWidget(self.config_combo, 1)
|
|
|
|
@ -253,8 +317,35 @@ class BatchDialog(QDialog):
|
|
|
|
|
|
|
|
|
|
def save_current_config(self):
|
|
|
|
|
"""保存当前配置"""
|
|
|
|
|
# 这里可以实现保存当前配置的功能
|
|
|
|
|
QMessageBox.information(self, "提示", "配置保存功能尚未实现")
|
|
|
|
|
# 获取当前配置名称
|
|
|
|
|
config_name, ok = QInputDialog.getText(
|
|
|
|
|
self,
|
|
|
|
|
"保存配置",
|
|
|
|
|
"请输入配置名称:",
|
|
|
|
|
QLineEdit.Normal,
|
|
|
|
|
"我的OCR配置"
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
if ok and config_name:
|
|
|
|
|
# 收集当前配置
|
|
|
|
|
current_config = {
|
|
|
|
|
"language": self.language_combo.currentData(),
|
|
|
|
|
"deskew": self.deskew_cb.isChecked(),
|
|
|
|
|
"rotate_pages": self.rotate_cb.isChecked(),
|
|
|
|
|
"clean": self.clean_cb.isChecked(),
|
|
|
|
|
"optimize": self.optimize_cb.isChecked()
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
# 保存到配置中
|
|
|
|
|
saved_configs = self.config.get('saved_configs', {})
|
|
|
|
|
saved_configs[config_name] = current_config
|
|
|
|
|
self.config.set('saved_configs', saved_configs)
|
|
|
|
|
|
|
|
|
|
# 更新下拉框
|
|
|
|
|
self.config_combo.addItem(config_name)
|
|
|
|
|
self.config_combo.setCurrentText(config_name)
|
|
|
|
|
|
|
|
|
|
QMessageBox.information(self, "成功", f"配置 \"{config_name}\" 已保存")
|
|
|
|
|
|
|
|
|
|
def start_batch_ocr(self):
|
|
|
|
|
"""开始批量OCR处理"""
|
|
|
|
@ -273,12 +364,31 @@ class BatchDialog(QDialog):
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
# 收集OCR选项
|
|
|
|
|
options = {
|
|
|
|
|
options = {}
|
|
|
|
|
|
|
|
|
|
# 获取选中的语言代码
|
|
|
|
|
lang_index = self.language_combo.currentIndex()
|
|
|
|
|
lang_data = self.language_combo.itemData(lang_index)
|
|
|
|
|
if lang_data: # 确保不是分隔符
|
|
|
|
|
options["language"] = lang_data
|
|
|
|
|
else:
|
|
|
|
|
# 如果选中了分隔符,尝试找到下一个有效选项
|
|
|
|
|
for i in range(lang_index + 1, self.language_combo.count()):
|
|
|
|
|
next_data = self.language_combo.itemData(i)
|
|
|
|
|
if next_data:
|
|
|
|
|
self.language_combo.setCurrentIndex(i)
|
|
|
|
|
options["language"] = next_data
|
|
|
|
|
break
|
|
|
|
|
# 如果没有找到,使用默认语言
|
|
|
|
|
if "language" not in options:
|
|
|
|
|
options["language"] = "eng"
|
|
|
|
|
|
|
|
|
|
options.update({
|
|
|
|
|
"deskew": self.deskew_cb.isChecked(),
|
|
|
|
|
"rotate_pages": self.rotate_cb.isChecked(),
|
|
|
|
|
"clean": self.clean_cb.isChecked(),
|
|
|
|
|
"optimize": self.optimize_cb.isChecked()
|
|
|
|
|
}
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
# 禁用UI元素
|
|
|
|
|
self.start_btn.setEnabled(False)
|
|
|
|
@ -328,15 +438,28 @@ class BatchDialog(QDialog):
|
|
|
|
|
self.output_dir_btn.setEnabled(True)
|
|
|
|
|
self.output_dir_edit.setEnabled(True)
|
|
|
|
|
|
|
|
|
|
@Slot(int, int, str, bool)
|
|
|
|
|
def update_progress(self, current, total, file, success):
|
|
|
|
|
@Slot(int, int, str, int)
|
|
|
|
|
def update_progress(self, current, total, file, result_code):
|
|
|
|
|
"""更新总进度"""
|
|
|
|
|
percent = int(current * 100 / total)
|
|
|
|
|
self.total_progress_bar.setValue(percent)
|
|
|
|
|
|
|
|
|
|
file_name = Path(file).name
|
|
|
|
|
status = "成功" if success else "失败"
|
|
|
|
|
self.status_label.setText(f"处理 {file_name}: {status} ({current}/{total})")
|
|
|
|
|
|
|
|
|
|
# 根据状态码设置状态文本和颜色
|
|
|
|
|
if result_code == 1:
|
|
|
|
|
status = "成功"
|
|
|
|
|
status_color = "green"
|
|
|
|
|
elif result_code == 2:
|
|
|
|
|
status = "已OCR过"
|
|
|
|
|
status_color = "blue"
|
|
|
|
|
else:
|
|
|
|
|
status = "失败"
|
|
|
|
|
status_color = "red"
|
|
|
|
|
|
|
|
|
|
# 使用HTML格式化状态文本
|
|
|
|
|
status_text = f"处理 {file_name}: <span style='color: {status_color};'>{status}</span> ({current}/{total})"
|
|
|
|
|
self.status_label.setText(status_text)
|
|
|
|
|
|
|
|
|
|
@Slot(int, int)
|
|
|
|
|
def update_file_progress(self, current, total):
|
|
|
|
@ -347,17 +470,80 @@ class BatchDialog(QDialog):
|
|
|
|
|
@Slot(dict)
|
|
|
|
|
def ocr_finished(self, results):
|
|
|
|
|
"""OCR处理完成"""
|
|
|
|
|
success_count = sum(1 for success in results.values() if success)
|
|
|
|
|
success_count = 0
|
|
|
|
|
already_ocr_count = 0
|
|
|
|
|
failed_count = 0
|
|
|
|
|
|
|
|
|
|
for result_code in results.values():
|
|
|
|
|
if result_code == 1: # 成功
|
|
|
|
|
success_count += 1
|
|
|
|
|
elif result_code == 2: # 已OCR过
|
|
|
|
|
already_ocr_count += 1
|
|
|
|
|
else: # 失败
|
|
|
|
|
failed_count += 1
|
|
|
|
|
|
|
|
|
|
total_count = len(results)
|
|
|
|
|
|
|
|
|
|
self.status_label.setText(f"处理完成: {success_count}/{total_count} 文件成功")
|
|
|
|
|
# 构建状态消息
|
|
|
|
|
status_msg = f"处理完成: {success_count}/{total_count} 文件成功"
|
|
|
|
|
if already_ocr_count > 0:
|
|
|
|
|
status_msg += f", {already_ocr_count} 文件已OCR过"
|
|
|
|
|
|
|
|
|
|
# 启用UI元素
|
|
|
|
|
self.enable_ui()
|
|
|
|
|
self.status_label.setText(status_msg)
|
|
|
|
|
|
|
|
|
|
# 启用按钮
|
|
|
|
|
self.start_btn.setEnabled(True)
|
|
|
|
|
self.cancel_btn.setEnabled(False)
|
|
|
|
|
self.close_btn.setEnabled(True)
|
|
|
|
|
|
|
|
|
|
# 构建完成消息
|
|
|
|
|
message = f"批量OCR处理已完成\n成功: {success_count} 文件"
|
|
|
|
|
if already_ocr_count > 0:
|
|
|
|
|
message += f"\n已OCR过: {already_ocr_count} 文件"
|
|
|
|
|
message += f"\n失败: {failed_count} 文件"
|
|
|
|
|
|
|
|
|
|
# 显示完成消息
|
|
|
|
|
QMessageBox.information(
|
|
|
|
|
self,
|
|
|
|
|
"处理完成",
|
|
|
|
|
f"批量OCR处理已完成\n成功: {success_count} 文件\n失败: {total_count - success_count} 文件"
|
|
|
|
|
)
|
|
|
|
|
message
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
def load_saved_configs(self):
|
|
|
|
|
"""加载已保存的配置"""
|
|
|
|
|
saved_configs = self.config.get('saved_configs', {})
|
|
|
|
|
for config_name in saved_configs.keys():
|
|
|
|
|
self.config_combo.addItem(config_name)
|
|
|
|
|
|
|
|
|
|
def on_config_changed(self, index):
|
|
|
|
|
"""配置选择改变事件"""
|
|
|
|
|
config_name = self.config_combo.currentText()
|
|
|
|
|
if config_name == "默认配置":
|
|
|
|
|
# 加载默认配置
|
|
|
|
|
self.deskew_cb.setChecked(self.config.get('default_options.deskew', True))
|
|
|
|
|
self.rotate_cb.setChecked(self.config.get('default_options.rotate_pages', True))
|
|
|
|
|
self.clean_cb.setChecked(self.config.get('default_options.clean', False))
|
|
|
|
|
self.optimize_cb.setChecked(self.config.get('default_options.optimize', True))
|
|
|
|
|
|
|
|
|
|
# 设置默认语言
|
|
|
|
|
default_lang = self.config.get('default_options.language', 'eng')
|
|
|
|
|
index = self.language_combo.findData(default_lang)
|
|
|
|
|
if index >= 0:
|
|
|
|
|
self.language_combo.setCurrentIndex(index)
|
|
|
|
|
else:
|
|
|
|
|
# 加载已保存的配置
|
|
|
|
|
saved_configs = self.config.get('saved_configs', {})
|
|
|
|
|
if config_name in saved_configs:
|
|
|
|
|
config = saved_configs[config_name]
|
|
|
|
|
|
|
|
|
|
# 设置选项
|
|
|
|
|
self.deskew_cb.setChecked(config.get('deskew', True))
|
|
|
|
|
self.rotate_cb.setChecked(config.get('rotate_pages', True))
|
|
|
|
|
self.clean_cb.setChecked(config.get('clean', False))
|
|
|
|
|
self.optimize_cb.setChecked(config.get('optimize', True))
|
|
|
|
|
|
|
|
|
|
# 设置语言
|
|
|
|
|
lang = config.get('language', 'eng')
|
|
|
|
|
index = self.language_combo.findData(lang)
|
|
|
|
|
if index >= 0:
|
|
|
|
|
self.language_combo.setCurrentIndex(index)
|