diff --git a/.DS_Store b/.DS_Store index 3487410..c4b63d3 100644 Binary files a/.DS_Store and b/.DS_Store differ diff --git a/.idea/misc.xml b/.idea/misc.xml index 35d9e0e..7199a1e 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -1,4 +1,7 @@ + + \ No newline at end of file diff --git a/README.md b/README.md index 1041de9..a64adc3 100644 --- a/README.md +++ b/README.md @@ -46,6 +46,68 @@ git clone https://github.com/yourusername/OCRmyPDF-GUI.git cd OCRmyPDF-GUI ``` +## 安装Tesseract语言包 + +默认情况下,OCRmyPDF只安装英语语言包。要使用其他语言进行OCR,需要安装额外的语言包: + +### macOS + +```bash +# 安装所有语言包 +brew install tesseract-lang + +# 或者手动安装特定语言包 +# 1. 下载语言包文件,例如简体中文: +# https://github.com/tesseract-ocr/tessdata/raw/main/chi_sim.traineddata +# 2. 复制到Tesseract的tessdata目录: +# sudo cp chi_sim.traineddata /opt/homebrew/share/tessdata/ +# 或 +# sudo cp chi_sim.traineddata /usr/local/share/tessdata/ +``` + +### Ubuntu/Debian + +```bash +# 安装特定语言包,例如简体中文: +sudo apt-get install tesseract-ocr-chi-sim + +# 查看所有可用语言包: +apt-cache search tesseract-ocr +``` + +### Fedora + +```bash +# 安装特定语言包,例如简体中文: +sudo dnf install tesseract-langpack-chi_sim + +# 查看所有可用语言包: +dnf search tesseract +``` + +### Windows + +1. 从以下网址下载所需语言包文件: + https://github.com/tesseract-ocr/tessdata/ + +2. 将下载的`.traineddata`文件放置在Tesseract安装目录的tessdata文件夹中,通常位于: + `C:\Program Files\Tesseract-OCR\tessdata` + +### 常用语言代码 + +- `eng` - 英语 +- `chi_sim` - 简体中文 +- `chi_tra` - 繁体中文 +- `jpn` - 日语 +- `kor` - 韩语 +- `fra` - 法语 +- `deu` - 德语 +- `rus` - 俄语 +- `spa` - 西班牙语 +- `ita` - 意大利语 + +更多信息请参考:[OCRmyPDF语言包文档](https://ocrmypdf.readthedocs.io/en/latest/languages.html) + ## 使用方法 运行启动脚本: diff --git a/docs/.DS_Store b/docs/.DS_Store new file mode 100644 index 0000000..916e4f3 Binary files /dev/null and b/docs/.DS_Store differ diff --git a/docs/ppt图片/1. 项目结构图.png b/docs/ppt图片/1. 项目结构图.png new file mode 100644 index 0000000..452de01 Binary files /dev/null and b/docs/ppt图片/1. 项目结构图.png differ diff --git a/docs/ppt图片/2. 应用程序架构图.png b/docs/ppt图片/2. 应用程序架构图.png new file mode 100644 index 0000000..e0716f9 Binary files /dev/null and b/docs/ppt图片/2. 应用程序架构图.png differ diff --git a/docs/ppt图片/3. 处理流程图.png b/docs/ppt图片/3. 处理流程图.png new file mode 100644 index 0000000..48091b5 Binary files /dev/null and b/docs/ppt图片/3. 处理流程图.png differ diff --git a/docs/ppt图片/4. 多线程处理机制.png b/docs/ppt图片/4. 多线程处理机制.png new file mode 100644 index 0000000..b8f3c3f Binary files /dev/null and b/docs/ppt图片/4. 多线程处理机制.png differ diff --git a/docs/ppt图片/5. OCR文件检测逻辑.png b/docs/ppt图片/5. OCR文件检测逻辑.png new file mode 100644 index 0000000..b30859f Binary files /dev/null and b/docs/ppt图片/5. OCR文件检测逻辑.png differ diff --git a/docs/ppt图片/6. 配置管理系统.png b/docs/ppt图片/6. 配置管理系统.png new file mode 100644 index 0000000..a07a05c Binary files /dev/null and b/docs/ppt图片/6. 配置管理系统.png differ diff --git a/docs/ppt图片/7. 项目功能概览.png b/docs/ppt图片/7. 项目功能概览.png new file mode 100644 index 0000000..a66315e Binary files /dev/null and b/docs/ppt图片/7. 项目功能概览.png differ diff --git a/docs/ppt图片/8. 状态码处理逻辑.png b/docs/ppt图片/8. 状态码处理逻辑.png new file mode 100644 index 0000000..8788615 Binary files /dev/null and b/docs/ppt图片/8. 状态码处理逻辑.png differ diff --git a/docs/开源软件维护报告文档.docx b/docs/开源软件维护报告文档.docx new file mode 100644 index 0000000..5cde4ff Binary files /dev/null and b/docs/开源软件维护报告文档.docx differ diff --git a/docs/文档模板-开源软件维护报告文档.docx b/docs/文档模板-开源软件维护报告文档.docx deleted file mode 100644 index 1d921df..0000000 Binary files a/docs/文档模板-开源软件维护报告文档.docx and /dev/null differ diff --git a/requirements.txt b/requirements.txt index 2256d12..66307f9 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,3 @@ PySide6>=6.5.0 -pytest>=7.0.0 \ No newline at end of file +pytest>=7.0.0 +ocrmypdf>=15.0.0 \ No newline at end of file diff --git a/resources/.DS_Store b/resources/.DS_Store new file mode 100644 index 0000000..ba1fb6b Binary files /dev/null and b/resources/.DS_Store differ diff --git a/src/.DS_Store b/src/.DS_Store new file mode 100644 index 0000000..d81da7e Binary files /dev/null and b/src/.DS_Store differ diff --git a/src/core/__pycache__/__init__.cpython-313.pyc b/src/core/__pycache__/__init__.cpython-313.pyc new file mode 100644 index 0000000..d9ca637 Binary files /dev/null and b/src/core/__pycache__/__init__.cpython-313.pyc differ diff --git a/src/core/__pycache__/ocr_engine.cpython-313.pyc b/src/core/__pycache__/ocr_engine.cpython-313.pyc new file mode 100644 index 0000000..d6ff2b6 Binary files /dev/null and b/src/core/__pycache__/ocr_engine.cpython-313.pyc differ diff --git a/src/core/ocr_engine.py b/src/core/ocr_engine.py index 312acbb..ff6f154 100644 --- a/src/core/ocr_engine.py +++ b/src/core/ocr_engine.py @@ -19,11 +19,76 @@ class OCREngine: ) if result.returncode == 0: self.logger.info(f"OCRmyPDF命令行工具可用: {result.stdout.strip()}") + # 获取支持的语言列表 + self.available_languages = self.get_available_languages() + self.logger.info(f"可用的OCR语言: {', '.join(self.available_languages)}") else: self.logger.warning("OCRmyPDF命令行工具返回错误") + self.available_languages = [] except FileNotFoundError: self.logger.error("OCRmyPDF命令行工具未找到") + self.available_languages = [] + def get_available_languages(self): + """获取系统中已安装的Tesseract语言包列表""" + try: + result = subprocess.run( + ["tesseract", "--list-langs"], + capture_output=True, + text=True, + check=False + ) + if result.returncode == 0: + # 解析输出,跳过第一行(标题行) + languages = result.stdout.strip().split('\n')[1:] + return [lang.strip() for lang in languages] + return [] + except Exception as e: + self.logger.error(f"获取语言列表失败: {e}") + return [] + + def get_language_name(self, lang_code): + """获取语言代码对应的显示名称""" + language_names = { + 'eng': '英语 (English)', + 'chi_sim': '简体中文 (Chinese Simplified)', + 'chi_tra': '繁体中文 (Chinese Traditional)', + 'jpn': '日语 (Japanese)', + 'kor': '韩语 (Korean)', + 'fra': '法语 (French)', + 'deu': '德语 (German)', + 'rus': '俄语 (Russian)', + 'spa': '西班牙语 (Spanish)', + 'ita': '意大利语 (Italian)', + 'por': '葡萄牙语 (Portuguese)', + 'nld': '荷兰语 (Dutch)', + 'ara': '阿拉伯语 (Arabic)', + 'hin': '印地语 (Hindi)', + 'vie': '越南语 (Vietnamese)', + 'tha': '泰语 (Thai)', + 'tur': '土耳其语 (Turkish)', + 'heb': '希伯来语 (Hebrew)', + 'swe': '瑞典语 (Swedish)', + 'fin': '芬兰语 (Finnish)', + 'dan': '丹麦语 (Danish)', + 'nor': '挪威语 (Norwegian)', + 'pol': '波兰语 (Polish)', + 'ukr': '乌克兰语 (Ukrainian)', + 'ces': '捷克语 (Czech)', + 'slk': '斯洛伐克语 (Slovak)', + 'hun': '匈牙利语 (Hungarian)', + 'ron': '罗马尼亚语 (Romanian)', + 'bul': '保加利亚语 (Bulgarian)', + 'ell': '希腊语 (Greek)', + 'ind': '印度尼西亚语 (Indonesian)', + 'msa': '马来语 (Malay)', + 'cat': '加泰罗尼亚语 (Catalan)', + 'lav': '拉脱维亚语 (Latvian)', + 'lit': '立陶宛语 (Lithuanian)', + 'est': '爱沙尼亚语 (Estonian)' + } + return language_names.get(lang_code, lang_code) + def process_file(self, input_file, output_file, options=None): """ 使用OCRmyPDF处理单个文件 @@ -34,36 +99,106 @@ class OCREngine: options (dict): OCR选项 Returns: - bool: 处理是否成功 + int: 处理结果状态码 + 0 - 失败 + 1 - 成功 + 2 - 文件已有文本层(已OCR过) """ if options is None: options = {} self.logger.info(f"处理文件: {input_file} -> {output_file}") + # 检查输入文件是否存在 + if not Path(input_file).exists(): + self.logger.error(f"输入文件不存在: {input_file}") + return 0 + + # 检查输入文件是否可读 + if not os.access(input_file, os.R_OK): + self.logger.error(f"输入文件不可读: {input_file}") + return 0 + + # 检查输出目录是否可写 + output_dir = Path(output_file).parent + if not os.access(output_dir, os.W_OK): + self.logger.error(f"输出目录不可写: {output_dir}") + return 0 + + # 处理文件 + result = self._process_file_internal(input_file, output_file, options, force_ocr=False) + + # 如果失败且错误是因为已有文本层,返回特殊状态码 + if not result and self._last_error_is_existing_text(): + self.logger.info(f"文件 {input_file} 已有文本层,无需OCR处理") + return 2 + + # 返回常规状态码 + return 1 if result else 0 + + def _last_error_is_existing_text(self): + """检查上次错误是否因为PDF已有文本层""" + if hasattr(self, 'last_error') and isinstance(self.last_error, str): + return "page already has text" in self.last_error + return False + + def _process_file_internal(self, input_file, output_file, options, force_ocr=False): + """ + 内部方法:使用OCRmyPDF处理单个文件 + + Args: + input_file (str): 输入PDF文件路径 + output_file (str): 输出PDF文件路径 + options (dict): OCR选项 + force_ocr (bool): 是否强制OCR + + Returns: + bool: 处理是否成功 + """ # 构建命令行参数 cmd = ["ocrmypdf"] - # 添加语言选项 - 默认使用英文 - cmd.extend(["-l", "eng"]) + # 添加优化选项(必须在其他选项之前) + if options.get('optimize', False): + cmd.extend(["-O", "1"]) # 使用1级优化 + self.logger.info("启用优化输出文件大小") + + # 添加语言选项 + lang = options.get('language', 'eng') + if lang in self.available_languages: + cmd.extend(["-l", lang]) + self.logger.info(f"使用语言: {lang}") + else: + self.logger.warning(f"不支持的语言: {lang},使用默认语言(eng)") + cmd.extend(["-l", "eng"]) # 添加其他选项 if options.get('deskew', False): cmd.append("--deskew") + self.logger.info("启用自动校正倾斜页面") if options.get('rotate_pages', False): cmd.append("--rotate-pages") + self.logger.info("启用自动旋转页面") if options.get('clean', False): cmd.append("--clean") + self.logger.info("启用清理图像") if 'jobs' in options: cmd.extend(["--jobs", str(options['jobs'])]) + self.logger.info(f"使用 {options['jobs']} 个处理线程") if 'output_type' in options: cmd.extend(["--output-type", options['output_type']]) + self.logger.info(f"输出类型: {options['output_type']}") + + # 添加强制OCR选项 + if force_ocr: + cmd.append("--force-ocr") + self.logger.info("启用强制OCR处理") - # 添加输入和输出文件 + # 添加输入和输出文件(必须在最后) cmd.extend([str(input_file), str(output_file)]) # 执行命令 @@ -79,9 +214,12 @@ class OCREngine: self.logger.info("OCRmyPDF命令执行成功") return True else: + self.last_error = result.stderr self.logger.error(f"OCRmyPDF命令执行失败: {result.stderr}") + self.logger.error(f"命令输出: {result.stdout}") return False except Exception as e: + self.last_error = str(e) self.logger.error(f"执行OCRmyPDF命令时出错: {e}") return False @@ -96,7 +234,7 @@ class OCREngine: progress_callback (callable): 进度回调函数,接收参数(current, total, file, success) Returns: - dict: 处理结果,键为输入文件路径,值为处理是否成功 + dict: 处理结果,键为输入文件路径,值为处理结果状态码(0-失败,1-成功,2-已OCR过) """ results = {} total = len(file_list) @@ -110,10 +248,12 @@ class OCREngine: output_file = output_path / f"{input_path.stem}_ocr{input_path.suffix}" self.logger.info(f"处理文件 {i+1}/{total}: {input_file}") - success = self.process_file(input_file, output_file, options) - results[input_file] = success + result_code = self.process_file(input_file, output_file, options) + results[input_file] = result_code if progress_callback: + # 对于回调,我们将状态码2(已OCR过)也视为"成功",只是一种特殊的成功情况 + success = result_code > 0 progress_callback(i + 1, total, input_file, success) return results \ No newline at end of file diff --git a/src/gui/.DS_Store b/src/gui/.DS_Store new file mode 100644 index 0000000..388579d Binary files /dev/null and b/src/gui/.DS_Store differ diff --git a/src/gui/batch_dialog.py b/src/gui/batch_dialog.py index 050abc6..17347f8 100644 --- a/src/gui/batch_dialog.py +++ b/src/gui/batch_dialog.py @@ -2,7 +2,7 @@ from PySide6.QtWidgets import ( QDialog, QVBoxLayout, QHBoxLayout, QGroupBox, QPushButton, QLabel, QFileDialog, QProgressBar, QComboBox, QCheckBox, QListWidget, QMessageBox, - QRadioButton + QRadioButton, QInputDialog, QLineEdit ) from PySide6.QtCore import Qt, Signal, Slot, QThread from pathlib import Path @@ -14,7 +14,7 @@ from src.utils.file_utils import FileUtils class BatchOCRWorker(QThread): """批量OCR处理线程""" - progress_updated = Signal(int, int, str, bool) + progress_updated = Signal(int, int, str, int) # 修改为发送状态码而不是布尔值 file_progress_updated = Signal(int, int) # 当前文件的进度 finished = Signal(dict) @@ -26,12 +26,25 @@ class BatchOCRWorker(QThread): self.options = options def run(self): - results = self.engine.process_batch( - self.files, - self.output_dir, - self.options, - lambda current, total, file, success: self.progress_updated.emit(current, total, file, success) - ) + results = {} + total = len(self.files) + + # 确保输出目录存在 + output_path = Path(self.output_dir) + output_path.mkdir(parents=True, exist_ok=True) + + for i, input_file in enumerate(self.files): + input_path = Path(input_file) + output_file = output_path / f"{input_path.stem}_ocr{input_path.suffix}" + + # 处理文件并获取结果码 + result_code = self.engine.process_file(input_file, output_file, self.options) + results[input_file] = result_code + + # 发送进度更新 + success = result_code > 0 # 成功或已OCR过都视为"成功" + self.progress_updated.emit(i + 1, total, input_file, result_code) + self.finished.emit(results) class BatchDialog(QDialog): @@ -108,11 +121,62 @@ class BatchDialog(QDialog): ocr_group = QGroupBox("OCR选项") ocr_layout = QVBoxLayout(ocr_group) + # 语言选择 + language_layout = QHBoxLayout() + language_layout.addWidget(QLabel("OCR语言:")) + self.language_combo = QComboBox() + self.language_combo.setToolTip("选择OCR识别使用的语言") + + # 添加可用的语言 + # 常用语言列表 + common_langs = ['eng', 'chi_sim', 'chi_tra', 'jpn', 'kor'] + + # 首先添加常用语言 + if self.ocr_engine.available_languages: + # 添加常用语言组 + common_available = [lang for lang in common_langs if lang in self.ocr_engine.available_languages] + if common_available: + self.language_combo.addItem("--- 常用语言 ---", None) + for lang_code in common_available: + lang_name = self.ocr_engine.get_language_name(lang_code) + self.language_combo.addItem(lang_name, lang_code) + + # 添加其他语言组 + other_available = [lang for lang in self.ocr_engine.available_languages + if lang not in common_langs] + if other_available: + self.language_combo.addItem("--- 其他语言 ---", None) + # 按名称排序 + other_langs_sorted = sorted( + other_available, + key=lambda x: self.ocr_engine.get_language_name(x) + ) + for lang_code in other_langs_sorted: + lang_name = self.ocr_engine.get_language_name(lang_code) + self.language_combo.addItem(lang_name, lang_code) + else: + # 如果没有常用语言,直接添加所有语言 + for lang_code in self.ocr_engine.available_languages: + lang_name = self.ocr_engine.get_language_name(lang_code) + self.language_combo.addItem(lang_name, lang_code) + + # 设置默认语言 + default_lang = self.config.get('default_options.language', 'eng') + index = self.language_combo.findData(default_lang) + if index >= 0: + self.language_combo.setCurrentIndex(index) + + language_layout.addWidget(self.language_combo) + ocr_layout.addLayout(language_layout) + # 使用配置文件 config_layout = QHBoxLayout() config_layout.addWidget(QLabel("使用配置文件:")) self.config_combo = QComboBox() - self.config_combo.addItems(["默认配置"]) + self.config_combo.addItem("默认配置") + # 添加已保存的配置 + self.load_saved_configs() + self.config_combo.currentIndexChanged.connect(self.on_config_changed) self.save_config_btn = QPushButton("保存当前配置") self.save_config_btn.clicked.connect(self.save_current_config) config_layout.addWidget(self.config_combo, 1) @@ -253,8 +317,35 @@ class BatchDialog(QDialog): def save_current_config(self): """保存当前配置""" - # 这里可以实现保存当前配置的功能 - QMessageBox.information(self, "提示", "配置保存功能尚未实现") + # 获取当前配置名称 + config_name, ok = QInputDialog.getText( + self, + "保存配置", + "请输入配置名称:", + QLineEdit.Normal, + "我的OCR配置" + ) + + if ok and config_name: + # 收集当前配置 + current_config = { + "language": self.language_combo.currentData(), + "deskew": self.deskew_cb.isChecked(), + "rotate_pages": self.rotate_cb.isChecked(), + "clean": self.clean_cb.isChecked(), + "optimize": self.optimize_cb.isChecked() + } + + # 保存到配置中 + saved_configs = self.config.get('saved_configs', {}) + saved_configs[config_name] = current_config + self.config.set('saved_configs', saved_configs) + + # 更新下拉框 + self.config_combo.addItem(config_name) + self.config_combo.setCurrentText(config_name) + + QMessageBox.information(self, "成功", f"配置 \"{config_name}\" 已保存") def start_batch_ocr(self): """开始批量OCR处理""" @@ -273,12 +364,31 @@ class BatchDialog(QDialog): return # 收集OCR选项 - options = { + options = {} + + # 获取选中的语言代码 + lang_index = self.language_combo.currentIndex() + lang_data = self.language_combo.itemData(lang_index) + if lang_data: # 确保不是分隔符 + options["language"] = lang_data + else: + # 如果选中了分隔符,尝试找到下一个有效选项 + for i in range(lang_index + 1, self.language_combo.count()): + next_data = self.language_combo.itemData(i) + if next_data: + self.language_combo.setCurrentIndex(i) + options["language"] = next_data + break + # 如果没有找到,使用默认语言 + if "language" not in options: + options["language"] = "eng" + + options.update({ "deskew": self.deskew_cb.isChecked(), "rotate_pages": self.rotate_cb.isChecked(), "clean": self.clean_cb.isChecked(), "optimize": self.optimize_cb.isChecked() - } + }) # 禁用UI元素 self.start_btn.setEnabled(False) @@ -328,15 +438,28 @@ class BatchDialog(QDialog): self.output_dir_btn.setEnabled(True) self.output_dir_edit.setEnabled(True) - @Slot(int, int, str, bool) - def update_progress(self, current, total, file, success): + @Slot(int, int, str, int) + def update_progress(self, current, total, file, result_code): """更新总进度""" percent = int(current * 100 / total) self.total_progress_bar.setValue(percent) file_name = Path(file).name - status = "成功" if success else "失败" - self.status_label.setText(f"处理 {file_name}: {status} ({current}/{total})") + + # 根据状态码设置状态文本和颜色 + if result_code == 1: + status = "成功" + status_color = "green" + elif result_code == 2: + status = "已OCR过" + status_color = "blue" + else: + status = "失败" + status_color = "red" + + # 使用HTML格式化状态文本 + status_text = f"处理 {file_name}: {status} ({current}/{total})" + self.status_label.setText(status_text) @Slot(int, int) def update_file_progress(self, current, total): @@ -347,17 +470,80 @@ class BatchDialog(QDialog): @Slot(dict) def ocr_finished(self, results): """OCR处理完成""" - success_count = sum(1 for success in results.values() if success) + success_count = 0 + already_ocr_count = 0 + failed_count = 0 + + for result_code in results.values(): + if result_code == 1: # 成功 + success_count += 1 + elif result_code == 2: # 已OCR过 + already_ocr_count += 1 + else: # 失败 + failed_count += 1 + total_count = len(results) - self.status_label.setText(f"处理完成: {success_count}/{total_count} 文件成功") + # 构建状态消息 + status_msg = f"处理完成: {success_count}/{total_count} 文件成功" + if already_ocr_count > 0: + status_msg += f", {already_ocr_count} 文件已OCR过" - # 启用UI元素 - self.enable_ui() + self.status_label.setText(status_msg) + + # 启用按钮 + self.start_btn.setEnabled(True) + self.cancel_btn.setEnabled(False) + self.close_btn.setEnabled(True) + + # 构建完成消息 + message = f"批量OCR处理已完成\n成功: {success_count} 文件" + if already_ocr_count > 0: + message += f"\n已OCR过: {already_ocr_count} 文件" + message += f"\n失败: {failed_count} 文件" # 显示完成消息 QMessageBox.information( self, "处理完成", - f"批量OCR处理已完成\n成功: {success_count} 文件\n失败: {total_count - success_count} 文件" - ) \ No newline at end of file + message + ) + + def load_saved_configs(self): + """加载已保存的配置""" + saved_configs = self.config.get('saved_configs', {}) + for config_name in saved_configs.keys(): + self.config_combo.addItem(config_name) + + def on_config_changed(self, index): + """配置选择改变事件""" + config_name = self.config_combo.currentText() + if config_name == "默认配置": + # 加载默认配置 + self.deskew_cb.setChecked(self.config.get('default_options.deskew', True)) + self.rotate_cb.setChecked(self.config.get('default_options.rotate_pages', True)) + self.clean_cb.setChecked(self.config.get('default_options.clean', False)) + self.optimize_cb.setChecked(self.config.get('default_options.optimize', True)) + + # 设置默认语言 + default_lang = self.config.get('default_options.language', 'eng') + index = self.language_combo.findData(default_lang) + if index >= 0: + self.language_combo.setCurrentIndex(index) + else: + # 加载已保存的配置 + saved_configs = self.config.get('saved_configs', {}) + if config_name in saved_configs: + config = saved_configs[config_name] + + # 设置选项 + self.deskew_cb.setChecked(config.get('deskew', True)) + self.rotate_cb.setChecked(config.get('rotate_pages', True)) + self.clean_cb.setChecked(config.get('clean', False)) + self.optimize_cb.setChecked(config.get('optimize', True)) + + # 设置语言 + lang = config.get('language', 'eng') + index = self.language_combo.findData(lang) + if index >= 0: + self.language_combo.setCurrentIndex(index) \ No newline at end of file diff --git a/src/gui/main_window.py b/src/gui/main_window.py index b0a617b..4011b4c 100644 --- a/src/gui/main_window.py +++ b/src/gui/main_window.py @@ -109,6 +109,54 @@ class MainWindow(QMainWindow): options_group = QGroupBox("OCR选项") options_layout = QVBoxLayout(options_group) + # 语言选择 + language_layout = QHBoxLayout() + language_layout.addWidget(QLabel("OCR语言:")) + self.language_combo = QComboBox() + self.language_combo.setToolTip("选择OCR识别使用的语言") + + # 添加可用的语言 + # 常用语言列表 + common_langs = ['eng', 'chi_sim', 'chi_tra', 'jpn', 'kor'] + + # 首先添加常用语言 + if self.ocr_engine.available_languages: + # 添加常用语言组 + common_available = [lang for lang in common_langs if lang in self.ocr_engine.available_languages] + if common_available: + self.language_combo.addItem("--- 常用语言 ---", None) + for lang_code in common_available: + lang_name = self.ocr_engine.get_language_name(lang_code) + self.language_combo.addItem(lang_name, lang_code) + + # 添加其他语言组 + other_available = [lang for lang in self.ocr_engine.available_languages + if lang not in common_langs] + if other_available: + self.language_combo.addItem("--- 其他语言 ---", None) + # 按名称排序 + other_langs_sorted = sorted( + other_available, + key=lambda x: self.ocr_engine.get_language_name(x) + ) + for lang_code in other_langs_sorted: + lang_name = self.ocr_engine.get_language_name(lang_code) + self.language_combo.addItem(lang_name, lang_code) + else: + # 如果没有常用语言,直接添加所有语言 + for lang_code in self.ocr_engine.available_languages: + lang_name = self.ocr_engine.get_language_name(lang_code) + self.language_combo.addItem(lang_name, lang_code) + + # 设置默认语言 + default_lang = self.config.get('default_options.language', 'eng') + index = self.language_combo.findData(default_lang) + if index >= 0: + self.language_combo.setCurrentIndex(index) + + language_layout.addWidget(self.language_combo) + options_layout.addLayout(language_layout) + # 处理选项 self.deskew_cb = QCheckBox("自动校正倾斜页面") self.deskew_cb.setChecked(self.config.get('default_options.deskew', True)) @@ -284,14 +332,68 @@ class MainWindow(QMainWindow): return # 收集OCR选项 - options = { + options = {} + + # 获取选中的语言代码 + lang_index = self.language_combo.currentIndex() + lang_data = self.language_combo.itemData(lang_index) + if lang_data: # 确保不是分隔符 + options["language"] = lang_data + else: + # 如果选中了分隔符,尝试找到下一个有效选项 + for i in range(lang_index + 1, self.language_combo.count()): + next_data = self.language_combo.itemData(i) + if next_data: + self.language_combo.setCurrentIndex(i) + options["language"] = next_data + break + # 如果没有找到,使用默认语言 + if "language" not in options: + options["language"] = "eng" + + options.update({ "deskew": self.deskew_cb.isChecked(), "rotate_pages": self.rotate_cb.isChecked(), "clean": self.clean_cb.isChecked(), "optimize": self.optimize_cb.isChecked() - } + }) - # 禁用UI元素 + # 如果只有一个文件,先检查是否已OCR过 + if len(self.selected_files) == 1: + input_file = self.selected_files[0] + input_path = Path(input_file) + output_file = Path(output_dir) / f"{input_path.stem}_ocr{input_path.suffix}" + + # 检查是否已OCR过 + result_code = self.ocr_engine.process_file(input_file, output_file, options) + + if result_code == 2: # 已OCR过 + QMessageBox.information( + self, + "文件已OCR过", + f"文件 {input_path.name} 已有文本层,无需再次OCR处理。" + ) + return + + # 如果成功或失败,也直接显示结果并返回 + if result_code == 1: + QMessageBox.information( + self, + "处理完成", + f"文件 {input_path.name} OCR处理成功。" + ) + # 添加到最近使用的输出目录 + self.config.add_recent_output_dir(output_dir) + return + else: + QMessageBox.critical( + self, + "处理失败", + f"文件 {input_path.name} OCR处理失败,请查看日志了解详情。" + ) + return + + # 多个文件时,禁用UI元素 self.start_btn.setEnabled(False) self.cancel_btn.setEnabled(True) self.add_files_btn.setEnabled(False) @@ -342,27 +444,54 @@ class MainWindow(QMainWindow): self.progress_bar.setValue(percent) file_name = Path(file).name - status = "成功" if success else "失败" - self.status_label.setText(f"处理 {file_name}: {status} ({current}/{total})") - self.statusBar.showMessage(f"处理 {file_name}: {status} ({current}/{total})") + if success: + status = "成功" + else: + status = "失败" + + status_text = f"处理 {file_name}: {status} ({current}/{total})" + self.status_label.setText(status_text) + self.statusBar.showMessage(f"处理 {file_name}: {'成功' if success else '失败'} ({current}/{total})") @Slot(dict) def ocr_finished(self, results): """OCR处理完成""" - success_count = sum(1 for success in results.values() if success) + success_count = 0 + already_ocr_count = 0 + failed_count = 0 + + for result_code in results.values(): + if result_code == 1: # 成功 + success_count += 1 + elif result_code == 2: # 已OCR过 + already_ocr_count += 1 + else: # 失败 + failed_count += 1 + total_count = len(results) - self.status_label.setText(f"处理完成: {success_count}/{total_count} 文件成功") - self.statusBar.showMessage(f"OCR处理完成: {success_count}/{total_count} 文件成功") + # 构建状态消息 + status_msg = f"处理完成: {success_count}/{total_count} 文件成功" + if already_ocr_count > 0: + status_msg += f", {already_ocr_count} 文件已OCR过" + + self.status_label.setText(status_msg) + self.statusBar.showMessage(status_msg) # 启用UI元素 self.enable_ui() + # 构建完成消息 + message = f"OCR处理已完成\n成功: {success_count} 文件" + if already_ocr_count > 0: + message += f"\n已OCR过: {already_ocr_count} 文件" + message += f"\n失败: {failed_count} 文件" + # 显示完成消息 QMessageBox.information( self, "处理完成", - f"OCR处理已完成\n成功: {success_count} 文件\n失败: {total_count - success_count} 文件" + message ) def show_settings(self): diff --git a/src/gui/settings.py b/src/gui/settings.py index 5aeef56..373c3c7 100644 --- a/src/gui/settings.py +++ b/src/gui/settings.py @@ -1,11 +1,13 @@ from PySide6.QtWidgets import ( QDialog, QVBoxLayout, QHBoxLayout, QTabWidget, QPushButton, QLabel, QComboBox, QCheckBox, - QGroupBox, QSpinBox, QRadioButton + QGroupBox, QSpinBox, QRadioButton, QMessageBox, + QWidget ) from PySide6.QtCore import Qt from src.core.config import Config +from src.core.ocr_engine import OCREngine class SettingsDialog(QDialog): """设置对话框""" @@ -103,6 +105,62 @@ class SettingsDialog(QDialog): """设置OCR选项卡""" layout = QVBoxLayout(tab) + # 默认语言 + language_group = QGroupBox("默认OCR语言") + language_layout = QVBoxLayout(language_group) + + self.language_combo = QComboBox() + self.language_combo.setToolTip("选择默认的OCR识别语言") + + # 添加可用的语言 + ocr_engine = OCREngine() + # 常用语言列表 + common_langs = ['eng', 'chi_sim', 'chi_tra', 'jpn', 'kor'] + + # 首先添加常用语言 + if ocr_engine.available_languages: + # 添加常用语言组 + common_available = [lang for lang in common_langs if lang in ocr_engine.available_languages] + if common_available: + self.language_combo.addItem("--- 常用语言 ---", None) + for lang_code in common_available: + lang_name = ocr_engine.get_language_name(lang_code) + self.language_combo.addItem(lang_name, lang_code) + + # 添加其他语言组 + other_available = [lang for lang in ocr_engine.available_languages + if lang not in common_langs] + if other_available: + self.language_combo.addItem("--- 其他语言 ---", None) + # 按名称排序 + other_langs_sorted = sorted( + other_available, + key=lambda x: ocr_engine.get_language_name(x) + ) + for lang_code in other_langs_sorted: + lang_name = ocr_engine.get_language_name(lang_code) + self.language_combo.addItem(lang_name, lang_code) + else: + # 如果没有常用语言,直接添加所有语言 + for lang_code in ocr_engine.available_languages: + lang_name = ocr_engine.get_language_name(lang_code) + self.language_combo.addItem(lang_name, lang_code) + + # 设置当前默认语言 + default_lang = self.config.get('default_options.language', 'eng') + index = self.language_combo.findData(default_lang) + if index >= 0: + self.language_combo.setCurrentIndex(index) + + # 添加刷新语言列表按钮 + lang_buttons_layout = QHBoxLayout() + self.refresh_langs_btn = QPushButton("刷新语言列表") + self.refresh_langs_btn.clicked.connect(self.refresh_languages) + lang_buttons_layout.addWidget(self.refresh_langs_btn) + + language_layout.addWidget(self.language_combo) + language_layout.addLayout(lang_buttons_layout) + # 默认选项 options_group = QGroupBox("默认处理选项") options_layout = QVBoxLayout(options_group) @@ -191,6 +249,23 @@ class SettingsDialog(QDialog): self.config.set('general.max_recent_files', self.recent_files_spin.value()) # 保存OCR设置 + # 获取选中的语言代码 + lang_index = self.language_combo.currentIndex() + lang_data = self.language_combo.itemData(lang_index) + if lang_data: # 确保不是分隔符 + self.config.set('default_options.language', lang_data) + else: + # 如果选中了分隔符,尝试找到下一个有效选项 + for i in range(lang_index + 1, self.language_combo.count()): + next_data = self.language_combo.itemData(i) + if next_data: + self.language_combo.setCurrentIndex(i) + self.config.set('default_options.language', next_data) + break + # 如果没有找到,使用默认语言 + if not self.language_combo.currentData(): + self.config.set('default_options.language', 'eng') + self.config.set('default_options.deskew', self.deskew_cb.isChecked()) self.config.set('default_options.rotate_pages', self.rotate_cb.isChecked()) self.config.set('default_options.clean', self.clean_cb.isChecked()) @@ -209,3 +284,58 @@ class SettingsDialog(QDialog): self.config.set('ui.theme', 'system') super().accept() + + def refresh_languages(self): + """刷新可用语言列表""" + ocr_engine = OCREngine() + # 重新获取可用语言 + ocr_engine.available_languages = ocr_engine.get_available_languages() + + # 保存当前选择的语言 + current_lang = self.language_combo.currentData() + + # 清空并重新填充语言列表 + self.language_combo.clear() + + # 常用语言列表 + common_langs = ['eng', 'chi_sim', 'chi_tra', 'jpn', 'kor'] + + # 首先添加常用语言 + if ocr_engine.available_languages: + # 添加常用语言组 + common_available = [lang for lang in common_langs if lang in ocr_engine.available_languages] + if common_available: + self.language_combo.addItem("--- 常用语言 ---", None) + for lang_code in common_available: + lang_name = ocr_engine.get_language_name(lang_code) + self.language_combo.addItem(lang_name, lang_code) + + # 添加其他语言组 + other_available = [lang for lang in ocr_engine.available_languages + if lang not in common_langs] + if other_available: + self.language_combo.addItem("--- 其他语言 ---", None) + # 按名称排序 + other_langs_sorted = sorted( + other_available, + key=lambda x: ocr_engine.get_language_name(x) + ) + for lang_code in other_langs_sorted: + lang_name = ocr_engine.get_language_name(lang_code) + self.language_combo.addItem(lang_name, lang_code) + else: + # 如果没有常用语言,直接添加所有语言 + for lang_code in ocr_engine.available_languages: + lang_name = ocr_engine.get_language_name(lang_code) + self.language_combo.addItem(lang_name, lang_code) + + # 尝试恢复之前选择的语言 + index = self.language_combo.findData(current_lang) + if index >= 0: + self.language_combo.setCurrentIndex(index) + + QMessageBox.information(self, "刷新完成", f"已刷新语言列表,共找到 {len(ocr_engine.available_languages)} 种语言。") + + def download_language_pack(self): + """下载Tesseract语言包 - 已移除""" + pass diff --git a/src/utils/__pycache__/__init__.cpython-313.pyc b/src/utils/__pycache__/__init__.cpython-313.pyc new file mode 100644 index 0000000..e1c8518 Binary files /dev/null and b/src/utils/__pycache__/__init__.cpython-313.pyc differ diff --git a/src/utils/__pycache__/config_manager.cpython-313.pyc b/src/utils/__pycache__/config_manager.cpython-313.pyc new file mode 100644 index 0000000..a2cd7a0 Binary files /dev/null and b/src/utils/__pycache__/config_manager.cpython-313.pyc differ diff --git a/src/utils/__pycache__/file_utils.cpython-313.pyc b/src/utils/__pycache__/file_utils.cpython-313.pyc new file mode 100644 index 0000000..47baf6a Binary files /dev/null and b/src/utils/__pycache__/file_utils.cpython-313.pyc differ