commit dadabf11d1b9d173da014e0ded74fc59b91449b8 Author: Swei-xiao Date: Tue Jun 3 14:58:26 2025 +0800 测试提交 diff --git a/ocrmypdf-gui/.DS_Store b/ocrmypdf-gui/.DS_Store new file mode 100644 index 0000000..3487410 Binary files /dev/null and b/ocrmypdf-gui/.DS_Store differ diff --git a/ocrmypdf-gui/.idea/.gitignore b/ocrmypdf-gui/.idea/.gitignore new file mode 100644 index 0000000..13566b8 --- /dev/null +++ b/ocrmypdf-gui/.idea/.gitignore @@ -0,0 +1,8 @@ +# Default ignored files +/shelf/ +/workspace.xml +# Editor-based HTTP Client requests +/httpRequests/ +# Datasource local storage ignored files +/dataSources/ +/dataSources.local.xml diff --git a/ocrmypdf-gui/.idea/OCRmyPDF-GUI.iml b/ocrmypdf-gui/.idea/OCRmyPDF-GUI.iml new file mode 100644 index 0000000..3215f97 --- /dev/null +++ b/ocrmypdf-gui/.idea/OCRmyPDF-GUI.iml @@ -0,0 +1,14 @@ + + + + + + + + + + + + \ No newline at end of file diff --git a/ocrmypdf-gui/.idea/inspectionProfiles/profiles_settings.xml b/ocrmypdf-gui/.idea/inspectionProfiles/profiles_settings.xml new file mode 100644 index 0000000..105ce2d --- /dev/null +++ b/ocrmypdf-gui/.idea/inspectionProfiles/profiles_settings.xml @@ -0,0 +1,6 @@ + + + + \ No newline at end of file diff --git a/ocrmypdf-gui/.idea/misc.xml b/ocrmypdf-gui/.idea/misc.xml new file mode 100644 index 0000000..35d9e0e --- /dev/null +++ b/ocrmypdf-gui/.idea/misc.xml @@ -0,0 +1,4 @@ + + + + \ No newline at end of file diff --git a/ocrmypdf-gui/.idea/modules.xml b/ocrmypdf-gui/.idea/modules.xml new file mode 100644 index 0000000..6d2a594 --- /dev/null +++ b/ocrmypdf-gui/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/ocrmypdf-gui/.idea/vcs.xml b/ocrmypdf-gui/.idea/vcs.xml new file mode 100644 index 0000000..94a25f7 --- /dev/null +++ b/ocrmypdf-gui/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/ocrmypdf-gui/README.md b/ocrmypdf-gui/README.md new file mode 100644 index 0000000..1041de9 --- /dev/null +++ b/ocrmypdf-gui/README.md @@ -0,0 +1,79 @@ +# OCRmyPDF GUI + +OCRmyPDF的图形用户界面,让OCR处理PDF文件变得简单。 + +## 功能特点 + +- 简洁直观的图形界面 +- 批量处理PDF文件 +- 拖放支持 +- 多语言OCR支持 +- 可自定义OCR选项 +- 保存处理配置 + +## 安装要求 + +- Python 3.7+ +- OCRmyPDF +- Tesseract OCR +- PySide6 (Qt for Python) + +## 安装步骤 + +1. 安装OCRmyPDF和其依赖: + +```bash +# macOS +brew install ocrmypdf + +# Ubuntu/Debian +apt install ocrmypdf + +# 或使用pip +pip install ocrmypdf +``` + +2. 安装GUI依赖: + +```bash +pip install PySide6 +``` + +3. 克隆本仓库: + +```bash +git clone https://github.com/yourusername/OCRmyPDF-GUI.git +cd OCRmyPDF-GUI +``` + +## 使用方法 + +运行启动脚本: + +```bash +python run.py +``` + +或在Windows上双击`run.py`文件。 + +## 开发计划 + +- [ ] 高级OCR选项 +- [ ] 多语言界面 +- [ ] 暗黑模式 +- [ ] 自定义输出文件名模板 +- [ ] 处理历史记录 + +## 贡献 + +欢迎提交Pull Request或Issue。 + +## 许可证 + +本项目采用与OCRmyPDF相同的许可证。 + +## 致谢 + +- [OCRmyPDF](https://github.com/ocrmypdf/OCRmyPDF) - 强大的OCR工具 +- [Tesseract OCR](https://github.com/tesseract-ocr/tesseract) - OCR引擎 +- [Qt for Python (PySide6)](https://wiki.qt.io/Qt_for_Python) - GUI框架 \ No newline at end of file diff --git a/ocrmypdf-gui/docs/文档模板-开源软件维护报告文档.docx b/ocrmypdf-gui/docs/文档模板-开源软件维护报告文档.docx new file mode 100644 index 0000000..1d921df Binary files /dev/null and b/ocrmypdf-gui/docs/文档模板-开源软件维护报告文档.docx differ diff --git a/ocrmypdf-gui/requirements.txt b/ocrmypdf-gui/requirements.txt new file mode 100644 index 0000000..2256d12 --- /dev/null +++ b/ocrmypdf-gui/requirements.txt @@ -0,0 +1,2 @@ +PySide6>=6.5.0 +pytest>=7.0.0 \ No newline at end of file diff --git a/ocrmypdf-gui/run.py b/ocrmypdf-gui/run.py new file mode 100644 index 0000000..a06f501 --- /dev/null +++ b/ocrmypdf-gui/run.py @@ -0,0 +1,18 @@ +#!/usr/bin/env python3 +""" +OCRmyPDF GUI 启动脚本 +""" + +import sys +import os +from pathlib import Path + +# 添加项目根目录到Python路径 +project_root = Path(__file__).resolve().parent +sys.path.insert(0, str(project_root)) + +# 导入主模块 +from src.main import main + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/ocrmypdf-gui/src/__init__.py b/ocrmypdf-gui/src/__init__.py new file mode 100644 index 0000000..0519ecb --- /dev/null +++ b/ocrmypdf-gui/src/__init__.py @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/ocrmypdf-gui/src/__pycache__/__init__.cpython-313.pyc b/ocrmypdf-gui/src/__pycache__/__init__.cpython-313.pyc new file mode 100644 index 0000000..f044463 Binary files /dev/null and b/ocrmypdf-gui/src/__pycache__/__init__.cpython-313.pyc differ diff --git a/ocrmypdf-gui/src/__pycache__/__init__.cpython-39.pyc b/ocrmypdf-gui/src/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000..8f955d7 Binary files /dev/null and b/ocrmypdf-gui/src/__pycache__/__init__.cpython-39.pyc differ diff --git a/ocrmypdf-gui/src/__pycache__/main.cpython-313.pyc b/ocrmypdf-gui/src/__pycache__/main.cpython-313.pyc new file mode 100644 index 0000000..d3be1c9 Binary files /dev/null and b/ocrmypdf-gui/src/__pycache__/main.cpython-313.pyc differ diff --git a/ocrmypdf-gui/src/__pycache__/main.cpython-39.pyc b/ocrmypdf-gui/src/__pycache__/main.cpython-39.pyc new file mode 100644 index 0000000..39c92f9 Binary files /dev/null and b/ocrmypdf-gui/src/__pycache__/main.cpython-39.pyc differ diff --git a/ocrmypdf-gui/src/core/__init__.py b/ocrmypdf-gui/src/core/__init__.py new file mode 100644 index 0000000..0519ecb --- /dev/null +++ b/ocrmypdf-gui/src/core/__init__.py @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/ocrmypdf-gui/src/core/__pycache__/__init__.cpython-39.pyc b/ocrmypdf-gui/src/core/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000..3b71ea3 Binary files /dev/null and b/ocrmypdf-gui/src/core/__pycache__/__init__.cpython-39.pyc differ diff --git a/ocrmypdf-gui/src/core/__pycache__/config.cpython-39.pyc b/ocrmypdf-gui/src/core/__pycache__/config.cpython-39.pyc new file mode 100644 index 0000000..41cfc2a Binary files /dev/null and b/ocrmypdf-gui/src/core/__pycache__/config.cpython-39.pyc differ diff --git a/ocrmypdf-gui/src/core/__pycache__/ocr_engine.cpython-39.pyc b/ocrmypdf-gui/src/core/__pycache__/ocr_engine.cpython-39.pyc new file mode 100644 index 0000000..3b1cb28 Binary files /dev/null and b/ocrmypdf-gui/src/core/__pycache__/ocr_engine.cpython-39.pyc differ diff --git a/ocrmypdf-gui/src/core/config.py b/ocrmypdf-gui/src/core/config.py new file mode 100644 index 0000000..557a2eb --- /dev/null +++ b/ocrmypdf-gui/src/core/config.py @@ -0,0 +1,129 @@ +import json +import os +from pathlib import Path +import logging + +class Config: + """配置管理类,负责加载和保存应用程序配置""" + + def __init__(self): + self.logger = logging.getLogger(__name__) + self.config_dir = Path.home() / ".ocrmypdf-gui" + self.config_file = self.config_dir / "config.json" + self.default_config = { + "recent_files": [], + "recent_output_dirs": [], + "default_options": { + "deskew": True, + "rotate_pages": True, + "clean": False, + "output_type": "pdfa", + "jobs": 4 + }, + "ui": { + "theme": "system", + "language": "zh_CN" + } + } + self.current_config = self.default_config.copy() + self.load_config() + + def load_config(self): + """加载配置文件""" + if not self.config_dir.exists(): + self.config_dir.mkdir(parents=True, exist_ok=True) + + if self.config_file.exists(): + try: + with open(self.config_file, 'r', encoding='utf-8') as f: + loaded_config = json.load(f) + # 合并配置,保留默认值 + self._merge_config(self.current_config, loaded_config) + self.logger.info("配置文件加载成功") + except Exception as e: + self.logger.error(f"加载配置文件出错: {e}") + else: + self.logger.info("配置文件不存在,使用默认配置") + self.save_config() + + def save_config(self): + """保存配置文件""" + try: + with open(self.config_file, 'w', encoding='utf-8') as f: + json.dump(self.current_config, f, indent=2, ensure_ascii=False) + self.logger.info("配置文件保存成功") + except Exception as e: + self.logger.error(f"保存配置文件出错: {e}") + + def _merge_config(self, target, source): + """递归合并配置字典""" + for key, value in source.items(): + if key in target and isinstance(target[key], dict) and isinstance(value, dict): + self._merge_config(target[key], value) + else: + target[key] = value + + def get(self, key, default=None): + """获取配置项 + + Args: + key: 配置项键名,支持点号分隔的多级键名,如 'ui.theme' + default: 如果配置项不存在,返回的默认值 + + Returns: + 配置项的值 + """ + keys = key.split('.') + value = self.current_config + + for k in keys: + if isinstance(value, dict) and k in value: + value = value[k] + else: + return default + + return value + + def set(self, key, value): + """设置配置项 + + Args: + key: 配置项键名,支持点号分隔的多级键名,如 'ui.theme' + value: 配置项的值 + """ + keys = key.split('.') + target = self.current_config + + for i, k in enumerate(keys[:-1]): + if k not in target: + target[k] = {} + target = target[k] + + target[keys[-1]] = value + self.save_config() + + def add_recent_file(self, file_path): + """添加最近使用的文件 + + Args: + file_path: 文件路径 + """ + recent_files = self.get('recent_files', []) + if file_path in recent_files: + recent_files.remove(file_path) + recent_files.insert(0, file_path) + # 保留最近的10个文件 + self.set('recent_files', recent_files[:10]) + + def add_recent_output_dir(self, dir_path): + """添加最近使用的输出目录 + + Args: + dir_path: 目录路径 + """ + recent_dirs = self.get('recent_output_dirs', []) + if dir_path in recent_dirs: + recent_dirs.remove(dir_path) + recent_dirs.insert(0, dir_path) + # 保留最近的10个目录 + self.set('recent_output_dirs', recent_dirs[:10]) \ No newline at end of file diff --git a/ocrmypdf-gui/src/core/ocr_engine.py b/ocrmypdf-gui/src/core/ocr_engine.py new file mode 100644 index 0000000..312acbb --- /dev/null +++ b/ocrmypdf-gui/src/core/ocr_engine.py @@ -0,0 +1,119 @@ +import logging +import subprocess +from pathlib import Path +import sys +import os + +class OCREngine: + """OCR引擎类,封装OCRmyPDF的调用""" + + def __init__(self): + self.logger = logging.getLogger(__name__) + # 检查命令行工具是否可用 + try: + result = subprocess.run( + ["ocrmypdf", "--version"], + capture_output=True, + text=True, + check=False + ) + if result.returncode == 0: + self.logger.info(f"OCRmyPDF命令行工具可用: {result.stdout.strip()}") + else: + self.logger.warning("OCRmyPDF命令行工具返回错误") + except FileNotFoundError: + self.logger.error("OCRmyPDF命令行工具未找到") + + def process_file(self, input_file, output_file, options=None): + """ + 使用OCRmyPDF处理单个文件 + + Args: + input_file (str): 输入PDF文件路径 + output_file (str): 输出PDF文件路径 + options (dict): OCR选项 + + Returns: + bool: 处理是否成功 + """ + if options is None: + options = {} + + self.logger.info(f"处理文件: {input_file} -> {output_file}") + + # 构建命令行参数 + cmd = ["ocrmypdf"] + + # 添加语言选项 - 默认使用英文 + cmd.extend(["-l", "eng"]) + + # 添加其他选项 + if options.get('deskew', False): + cmd.append("--deskew") + + if options.get('rotate_pages', False): + cmd.append("--rotate-pages") + + if options.get('clean', False): + cmd.append("--clean") + + if 'jobs' in options: + cmd.extend(["--jobs", str(options['jobs'])]) + + if 'output_type' in options: + cmd.extend(["--output-type", options['output_type']]) + + # 添加输入和输出文件 + cmd.extend([str(input_file), str(output_file)]) + + # 执行命令 + self.logger.debug(f"执行命令: {' '.join(cmd)}") + try: + result = subprocess.run( + cmd, + capture_output=True, + text=True, + check=False + ) + if result.returncode == 0: + self.logger.info("OCRmyPDF命令执行成功") + return True + else: + self.logger.error(f"OCRmyPDF命令执行失败: {result.stderr}") + return False + except Exception as e: + self.logger.error(f"执行OCRmyPDF命令时出错: {e}") + return False + + def process_batch(self, file_list, output_dir, options=None, progress_callback=None): + """ + 批量处理文件 + + Args: + file_list (list): 输入文件列表 + output_dir (str): 输出目录 + options (dict): OCR选项 + progress_callback (callable): 进度回调函数,接收参数(current, total, file, success) + + Returns: + dict: 处理结果,键为输入文件路径,值为处理是否成功 + """ + results = {} + total = len(file_list) + + # 确保输出目录存在 + output_path = Path(output_dir) + output_path.mkdir(parents=True, exist_ok=True) + + for i, input_file in enumerate(file_list): + input_path = Path(input_file) + output_file = output_path / f"{input_path.stem}_ocr{input_path.suffix}" + + self.logger.info(f"处理文件 {i+1}/{total}: {input_file}") + success = self.process_file(input_file, output_file, options) + results[input_file] = success + + if progress_callback: + progress_callback(i + 1, total, input_file, success) + + return results \ No newline at end of file diff --git a/ocrmypdf-gui/src/gui/__init__.py b/ocrmypdf-gui/src/gui/__init__.py new file mode 100644 index 0000000..0519ecb --- /dev/null +++ b/ocrmypdf-gui/src/gui/__init__.py @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/ocrmypdf-gui/src/gui/__pycache__/__init__.cpython-39.pyc b/ocrmypdf-gui/src/gui/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000..fdb6e7f Binary files /dev/null and b/ocrmypdf-gui/src/gui/__pycache__/__init__.cpython-39.pyc differ diff --git a/ocrmypdf-gui/src/gui/__pycache__/batch_dialog.cpython-39.pyc b/ocrmypdf-gui/src/gui/__pycache__/batch_dialog.cpython-39.pyc new file mode 100644 index 0000000..a760ac9 Binary files /dev/null and b/ocrmypdf-gui/src/gui/__pycache__/batch_dialog.cpython-39.pyc differ diff --git a/ocrmypdf-gui/src/gui/__pycache__/main_window.cpython-39.pyc b/ocrmypdf-gui/src/gui/__pycache__/main_window.cpython-39.pyc new file mode 100644 index 0000000..35f0a2d Binary files /dev/null and b/ocrmypdf-gui/src/gui/__pycache__/main_window.cpython-39.pyc differ diff --git a/ocrmypdf-gui/src/gui/__pycache__/settings.cpython-39.pyc b/ocrmypdf-gui/src/gui/__pycache__/settings.cpython-39.pyc new file mode 100644 index 0000000..f4156eb Binary files /dev/null and b/ocrmypdf-gui/src/gui/__pycache__/settings.cpython-39.pyc differ diff --git a/ocrmypdf-gui/src/gui/batch_dialog.py b/ocrmypdf-gui/src/gui/batch_dialog.py new file mode 100644 index 0000000..050abc6 --- /dev/null +++ b/ocrmypdf-gui/src/gui/batch_dialog.py @@ -0,0 +1,363 @@ +from PySide6.QtWidgets import ( + QDialog, QVBoxLayout, QHBoxLayout, QGroupBox, + QPushButton, QLabel, QFileDialog, QProgressBar, + QComboBox, QCheckBox, QListWidget, QMessageBox, + QRadioButton +) +from PySide6.QtCore import Qt, Signal, Slot, QThread +from pathlib import Path +import os + +from src.core.ocr_engine import OCREngine +from src.core.config import Config +from src.utils.file_utils import FileUtils + +class BatchOCRWorker(QThread): + """批量OCR处理线程""" + progress_updated = Signal(int, int, str, bool) + file_progress_updated = Signal(int, int) # 当前文件的进度 + finished = Signal(dict) + + def __init__(self, engine, files, output_dir, options): + super().__init__() + self.engine = engine + self.files = files + self.output_dir = output_dir + self.options = options + + def run(self): + results = self.engine.process_batch( + self.files, + self.output_dir, + self.options, + lambda current, total, file, success: self.progress_updated.emit(current, total, file, success) + ) + self.finished.emit(results) + +class BatchDialog(QDialog): + """批量处理对话框""" + + def __init__(self, parent=None): + super().__init__(parent) + self.setWindowTitle("批量OCR处理") + self.resize(700, 500) + + self.config = Config() + self.ocr_engine = OCREngine() + self.selected_files = [] + + self.init_ui() + + def init_ui(self): + """初始化UI""" + # 主布局 + main_layout = QVBoxLayout(self) + + # 文件选择区域 + file_group = QGroupBox("文件选择") + file_layout = QVBoxLayout(file_group) + + file_buttons_layout = QHBoxLayout() + self.add_files_btn = QPushButton("添加文件") + self.add_files_btn.clicked.connect(self.add_files) + self.add_folder_btn = QPushButton("添加文件夹") + self.add_folder_btn.clicked.connect(self.add_folder) + self.clear_files_btn = QPushButton("清除") + self.clear_files_btn.clicked.connect(self.clear_files) + self.select_all_btn = QPushButton("全选") + self.select_all_btn.clicked.connect(self.select_all_files) + + file_buttons_layout.addWidget(self.add_files_btn) + file_buttons_layout.addWidget(self.add_folder_btn) + file_buttons_layout.addWidget(self.clear_files_btn) + file_buttons_layout.addWidget(self.select_all_btn) + file_buttons_layout.addStretch() + + self.file_list = QListWidget() + self.file_list.setSelectionMode(QListWidget.SelectionMode.ExtendedSelection) + + file_layout.addLayout(file_buttons_layout) + file_layout.addWidget(self.file_list) + + # 输出选项 + output_group = QGroupBox("输出选项") + output_layout = QVBoxLayout(output_group) + + # 输出目录 + output_dir_layout = QHBoxLayout() + output_dir_layout.addWidget(QLabel("输出目录:")) + self.output_dir_edit = QComboBox() + self.output_dir_edit.setEditable(True) + self.output_dir_edit.addItems(self.config.get('recent_output_dirs', [])) + self.output_dir_btn = QPushButton("浏览...") + self.output_dir_btn.clicked.connect(self.select_output_dir) + output_dir_layout.addWidget(self.output_dir_edit, 1) + output_dir_layout.addWidget(self.output_dir_btn) + + # 输出文件命名 + naming_layout = QHBoxLayout() + naming_layout.addWidget(QLabel("输出文件命名:")) + self.naming_combo = QComboBox() + self.naming_combo.addItems(["原文件名_ocr", "原文件名", "自定义前缀_原文件名"]) + naming_layout.addWidget(self.naming_combo, 1) + + output_layout.addLayout(output_dir_layout) + output_layout.addLayout(naming_layout) + + # OCR选项 + ocr_group = QGroupBox("OCR选项") + ocr_layout = QVBoxLayout(ocr_group) + + # 使用配置文件 + config_layout = QHBoxLayout() + config_layout.addWidget(QLabel("使用配置文件:")) + self.config_combo = QComboBox() + self.config_combo.addItems(["默认配置"]) + self.save_config_btn = QPushButton("保存当前配置") + self.save_config_btn.clicked.connect(self.save_current_config) + config_layout.addWidget(self.config_combo, 1) + config_layout.addWidget(self.save_config_btn) + + # 处理选项 + self.deskew_cb = QCheckBox("自动校正倾斜页面") + self.deskew_cb.setChecked(self.config.get('default_options.deskew', True)) + + self.rotate_cb = QCheckBox("自动旋转页面") + self.rotate_cb.setChecked(self.config.get('default_options.rotate_pages', True)) + + self.clean_cb = QCheckBox("清理图像") + self.clean_cb.setChecked(self.config.get('default_options.clean', False)) + + self.optimize_cb = QCheckBox("优化输出文件大小") + self.optimize_cb.setChecked(self.config.get('default_options.optimize', True)) + + # 添加到布局 + ocr_layout.addLayout(config_layout) + ocr_layout.addWidget(self.deskew_cb) + ocr_layout.addWidget(self.rotate_cb) + ocr_layout.addWidget(self.clean_cb) + ocr_layout.addWidget(self.optimize_cb) + + # 进度条 + progress_group = QGroupBox("处理进度") + progress_layout = QVBoxLayout(progress_group) + + # 总进度 + total_progress_layout = QHBoxLayout() + total_progress_layout.addWidget(QLabel("总进度:")) + self.total_progress_bar = QProgressBar() + total_progress_layout.addWidget(self.total_progress_bar) + + # 当前文件进度 + file_progress_layout = QHBoxLayout() + file_progress_layout.addWidget(QLabel("当前文件:")) + self.file_progress_bar = QProgressBar() + file_progress_layout.addWidget(self.file_progress_bar) + + self.status_label = QLabel("准备就绪") + + progress_layout.addLayout(total_progress_layout) + progress_layout.addLayout(file_progress_layout) + progress_layout.addWidget(self.status_label) + + # 操作按钮 + buttons_layout = QHBoxLayout() + self.start_btn = QPushButton("开始批量处理") + self.start_btn.clicked.connect(self.start_batch_ocr) + self.cancel_btn = QPushButton("取消") + self.cancel_btn.clicked.connect(self.cancel_batch_ocr) + self.cancel_btn.setEnabled(False) + self.close_btn = QPushButton("关闭") + self.close_btn.clicked.connect(self.reject) + + buttons_layout.addStretch() + buttons_layout.addWidget(self.start_btn) + buttons_layout.addWidget(self.cancel_btn) + buttons_layout.addWidget(self.close_btn) + + # 添加所有元素到主布局 + main_layout.addWidget(file_group) + main_layout.addWidget(output_group) + main_layout.addWidget(ocr_group) + main_layout.addWidget(progress_group) + main_layout.addLayout(buttons_layout) + + def add_files(self): + """添加文件""" + files, _ = QFileDialog.getOpenFileNames( + self, + "选择PDF文件", + "", + "PDF文件 (*.pdf);;所有文件 (*.*)" + ) + + if files: + self.add_files_to_list(files) + + def add_folder(self): + """添加文件夹""" + folder = QFileDialog.getExistingDirectory( + self, + "选择包含PDF文件的文件夹" + ) + + if folder: + pdf_files = FileUtils.get_pdf_files_in_dir(folder, recursive=True) + if pdf_files: + self.add_files_to_list(pdf_files) + else: + QMessageBox.information(self, "提示", "所选文件夹中未找到PDF文件") + + def add_files_to_list(self, files): + """添加文件到列表""" + # 过滤已存在的文件 + new_files = [f for f in files if f not in self.selected_files] + if not new_files: + return + + self.selected_files.extend(new_files) + + # 更新列表显示 + self.file_list.clear() + for file in self.selected_files: + self.file_list.addItem(Path(file).name) + + # 更新状态 + self.status_label.setText(f"已添加 {len(self.selected_files)} 个文件") + + # 保存最近使用的文件 + for file in new_files: + self.config.add_recent_file(file) + + def clear_files(self): + """清除文件列表""" + self.selected_files = [] + self.file_list.clear() + self.status_label.setText("文件列表已清空") + + def select_all_files(self): + """全选文件""" + self.file_list.selectAll() + + def select_output_dir(self): + """选择输出目录""" + dir_path = QFileDialog.getExistingDirectory( + self, + "选择输出目录", + "" + ) + + if dir_path: + self.output_dir_edit.setCurrentText(dir_path) + self.config.add_recent_output_dir(dir_path) + + def save_current_config(self): + """保存当前配置""" + # 这里可以实现保存当前配置的功能 + QMessageBox.information(self, "提示", "配置保存功能尚未实现") + + def start_batch_ocr(self): + """开始批量OCR处理""" + if not self.selected_files: + QMessageBox.warning(self, "警告", "未选择文件") + return + + output_dir = self.output_dir_edit.currentText() + if not output_dir: + QMessageBox.warning(self, "警告", "未选择输出目录") + return + + # 确保输出目录存在 + if not FileUtils.ensure_dir(output_dir): + QMessageBox.critical(self, "错误", f"无法创建输出目录: {output_dir}") + return + + # 收集OCR选项 + options = { + "deskew": self.deskew_cb.isChecked(), + "rotate_pages": self.rotate_cb.isChecked(), + "clean": self.clean_cb.isChecked(), + "optimize": self.optimize_cb.isChecked() + } + + # 禁用UI元素 + self.start_btn.setEnabled(False) + self.cancel_btn.setEnabled(True) + self.add_files_btn.setEnabled(False) + self.add_folder_btn.setEnabled(False) + self.clear_files_btn.setEnabled(False) + self.select_all_btn.setEnabled(False) + self.output_dir_btn.setEnabled(False) + self.output_dir_edit.setEnabled(False) + + # 重置进度条 + self.total_progress_bar.setValue(0) + self.file_progress_bar.setValue(0) + self.status_label.setText("处理中...") + + # 创建并启动工作线程 + self.worker = BatchOCRWorker( + self.ocr_engine, + self.selected_files, + output_dir, + options + ) + self.worker.progress_updated.connect(self.update_progress) + self.worker.file_progress_updated.connect(self.update_file_progress) + self.worker.finished.connect(self.ocr_finished) + self.worker.start() + + def cancel_batch_ocr(self): + """取消批量OCR处理""" + if hasattr(self, 'worker') and self.worker.isRunning(): + self.worker.terminate() + self.worker.wait() + self.status_label.setText("处理已取消") + + # 启用UI元素 + self.enable_ui() + + def enable_ui(self): + """启用UI元素""" + self.start_btn.setEnabled(True) + self.cancel_btn.setEnabled(False) + self.add_files_btn.setEnabled(True) + self.add_folder_btn.setEnabled(True) + self.clear_files_btn.setEnabled(True) + self.select_all_btn.setEnabled(True) + self.output_dir_btn.setEnabled(True) + self.output_dir_edit.setEnabled(True) + + @Slot(int, int, str, bool) + def update_progress(self, current, total, file, success): + """更新总进度""" + percent = int(current * 100 / total) + self.total_progress_bar.setValue(percent) + + file_name = Path(file).name + status = "成功" if success else "失败" + self.status_label.setText(f"处理 {file_name}: {status} ({current}/{total})") + + @Slot(int, int) + def update_file_progress(self, current, total): + """更新当前文件进度""" + percent = int(current * 100 / total) if total > 0 else 0 + self.file_progress_bar.setValue(percent) + + @Slot(dict) + def ocr_finished(self, results): + """OCR处理完成""" + success_count = sum(1 for success in results.values() if success) + total_count = len(results) + + self.status_label.setText(f"处理完成: {success_count}/{total_count} 文件成功") + + # 启用UI元素 + self.enable_ui() + + # 显示完成消息 + QMessageBox.information( + self, + "处理完成", + f"批量OCR处理已完成\n成功: {success_count} 文件\n失败: {total_count - success_count} 文件" + ) \ No newline at end of file diff --git a/ocrmypdf-gui/src/gui/main_window.py b/ocrmypdf-gui/src/gui/main_window.py new file mode 100644 index 0000000..b0a617b --- /dev/null +++ b/ocrmypdf-gui/src/gui/main_window.py @@ -0,0 +1,417 @@ +import sys +import logging +from pathlib import Path + +from PySide6.QtWidgets import ( + QMainWindow, QWidget, QVBoxLayout, QHBoxLayout, + QPushButton, QLabel, QFileDialog, QProgressBar, + QComboBox, QCheckBox, QGroupBox, QListWidget, + QMessageBox, QStatusBar, QMenu, QMenuBar +) +from PySide6.QtCore import Qt, Signal, Slot, QThread +from PySide6.QtGui import QIcon, QDragEnterEvent, QDropEvent, QAction + +from src.core.ocr_engine import OCREngine +from src.core.config import Config +from src.utils.file_utils import FileUtils +from src.gui.settings import SettingsDialog +from src.gui.batch_dialog import BatchDialog + +class OCRWorker(QThread): + """OCR处理线程""" + progress_updated = Signal(int, int, str, bool) + finished = Signal(dict) + + def __init__(self, engine, files, output_dir, options): + super().__init__() + self.engine = engine + self.files = files + self.output_dir = output_dir + self.options = options + + def run(self): + results = self.engine.process_batch( + self.files, + self.output_dir, + self.options, + lambda current, total, file, success: self.progress_updated.emit(current, total, file, success) + ) + self.finished.emit(results) + +class MainWindow(QMainWindow): + """主窗口类""" + + def __init__(self): + super().__init__() + self.logger = logging.getLogger(__name__) + self.setWindowTitle("OCRmyPDF GUI") + self.resize(800, 600) + self.setAcceptDrops(True) # 启用拖放 + + self.config = Config() + self.ocr_engine = OCREngine() + self.selected_files = [] + + self.init_ui() + self.logger.info("主窗口初始化完成") + + def init_ui(self): + """初始化UI""" + # 创建菜单栏 + self.create_menu_bar() + + # 创建状态栏 + self.statusBar = QStatusBar() + self.setStatusBar(self.statusBar) + self.statusBar.showMessage("就绪") + + # 创建中央部件 + central_widget = QWidget() + self.setCentralWidget(central_widget) + + # 主布局 + main_layout = QVBoxLayout(central_widget) + + # 文件选择区域 + file_group = QGroupBox("文件选择") + file_layout = QVBoxLayout(file_group) + + file_buttons_layout = QHBoxLayout() + self.add_files_btn = QPushButton("添加文件") + self.add_files_btn.clicked.connect(self.add_files) + self.add_folder_btn = QPushButton("添加文件夹") + self.add_folder_btn.clicked.connect(self.add_folder) + self.clear_files_btn = QPushButton("清除文件") + self.clear_files_btn.clicked.connect(self.clear_files) + file_buttons_layout.addWidget(self.add_files_btn) + file_buttons_layout.addWidget(self.add_folder_btn) + file_buttons_layout.addWidget(self.clear_files_btn) + file_buttons_layout.addStretch() + + self.file_list = QListWidget() + self.file_list.setSelectionMode(QListWidget.SelectionMode.ExtendedSelection) + + file_layout.addLayout(file_buttons_layout) + file_layout.addWidget(self.file_list) + + # 输出目录选择 + output_layout = QHBoxLayout() + output_layout.addWidget(QLabel("输出目录:")) + self.output_dir_edit = QComboBox() + self.output_dir_edit.setEditable(True) + self.output_dir_edit.addItems(self.config.get('recent_output_dirs', [])) + self.output_dir_btn = QPushButton("浏览...") + self.output_dir_btn.clicked.connect(self.select_output_dir) + output_layout.addWidget(self.output_dir_edit, 1) + output_layout.addWidget(self.output_dir_btn) + + # OCR选项 + options_group = QGroupBox("OCR选项") + options_layout = QVBoxLayout(options_group) + + # 处理选项 + self.deskew_cb = QCheckBox("自动校正倾斜页面") + self.deskew_cb.setChecked(self.config.get('default_options.deskew', True)) + self.rotate_cb = QCheckBox("自动旋转页面") + self.rotate_cb.setChecked(self.config.get('default_options.rotate_pages', True)) + self.clean_cb = QCheckBox("清理图像") + self.clean_cb.setChecked(self.config.get('default_options.clean', False)) + self.optimize_cb = QCheckBox("优化输出文件大小") + self.optimize_cb.setChecked(self.config.get('default_options.optimize', True)) + + options_layout.addWidget(self.deskew_cb) + options_layout.addWidget(self.rotate_cb) + options_layout.addWidget(self.clean_cb) + options_layout.addWidget(self.optimize_cb) + + # 进度条 + progress_layout = QVBoxLayout() + self.progress_bar = QProgressBar() + self.progress_bar.setRange(0, 100) + self.progress_bar.setValue(0) + self.status_label = QLabel("准备就绪") + progress_layout.addWidget(self.progress_bar) + progress_layout.addWidget(self.status_label) + + # 操作按钮 + buttons_layout = QHBoxLayout() + self.start_btn = QPushButton("开始OCR处理") + self.start_btn.clicked.connect(self.start_ocr) + self.cancel_btn = QPushButton("取消") + self.cancel_btn.clicked.connect(self.cancel_ocr) + self.cancel_btn.setEnabled(False) + buttons_layout.addStretch() + buttons_layout.addWidget(self.start_btn) + buttons_layout.addWidget(self.cancel_btn) + + # 添加所有元素到主布局 + main_layout.addWidget(file_group) + main_layout.addLayout(output_layout) + main_layout.addWidget(options_group) + main_layout.addLayout(progress_layout) + main_layout.addLayout(buttons_layout) + + def create_menu_bar(self): + """创建菜单栏""" + menu_bar = QMenuBar() + self.setMenuBar(menu_bar) + + # 文件菜单 + file_menu = QMenu("文件(&F)", self) + menu_bar.addMenu(file_menu) + + add_files_action = QAction("添加文件(&A)...", self) + add_files_action.triggered.connect(self.add_files) + file_menu.addAction(add_files_action) + + add_folder_action = QAction("添加文件夹(&D)...", self) + add_folder_action.triggered.connect(self.add_folder) + file_menu.addAction(add_folder_action) + + file_menu.addSeparator() + + batch_action = QAction("批量处理(&B)...", self) + batch_action.triggered.connect(self.show_batch_dialog) + file_menu.addAction(batch_action) + + file_menu.addSeparator() + + exit_action = QAction("退出(&X)", self) + exit_action.triggered.connect(self.close) + file_menu.addAction(exit_action) + + # 编辑菜单 + edit_menu = QMenu("编辑(&E)", self) + menu_bar.addMenu(edit_menu) + + clear_action = QAction("清除文件列表(&C)", self) + clear_action.triggered.connect(self.clear_files) + edit_menu.addAction(clear_action) + + settings_action = QAction("设置(&S)...", self) + settings_action.triggered.connect(self.show_settings) + edit_menu.addAction(settings_action) + + # 帮助菜单 + help_menu = QMenu("帮助(&H)", self) + menu_bar.addMenu(help_menu) + + about_action = QAction("关于(&A)", self) + about_action.triggered.connect(self.show_about) + help_menu.addAction(about_action) + + def add_files(self): + """添加文件""" + files, _ = QFileDialog.getOpenFileNames( + self, + "选择PDF文件", + "", + "PDF文件 (*.pdf);;所有文件 (*.*)" + ) + + if files: + self.add_files_to_list(files) + + def add_folder(self): + """添加文件夹""" + folder = QFileDialog.getExistingDirectory( + self, + "选择包含PDF文件的文件夹" + ) + + if folder: + pdf_files = FileUtils.get_pdf_files_in_dir(folder, recursive=True) + if pdf_files: + self.add_files_to_list(pdf_files) + else: + QMessageBox.information(self, "提示", "所选文件夹中未找到PDF文件") + + def add_files_to_list(self, files): + """添加文件到列表""" + # 过滤已存在的文件 + new_files = [f for f in files if f not in self.selected_files] + if not new_files: + return + + self.selected_files.extend(new_files) + + # 更新列表显示 + self.file_list.clear() + for file in self.selected_files: + self.file_list.addItem(Path(file).name) + + # 更新状态 + self.status_label.setText(f"已添加 {len(self.selected_files)} 个文件") + self.statusBar.showMessage(f"已添加 {len(self.selected_files)} 个文件") + + # 保存最近使用的文件 + for file in new_files: + self.config.add_recent_file(file) + + def clear_files(self): + """清除文件列表""" + self.selected_files = [] + self.file_list.clear() + self.status_label.setText("文件列表已清空") + self.statusBar.showMessage("文件列表已清空") + + def select_output_dir(self): + """选择输出目录""" + dir_path = QFileDialog.getExistingDirectory( + self, + "选择输出目录", + "" + ) + + if dir_path: + self.output_dir_edit.setCurrentText(dir_path) + self.config.add_recent_output_dir(dir_path) + + def start_ocr(self): + """开始OCR处理""" + if not self.selected_files: + QMessageBox.warning(self, "警告", "未选择文件") + return + + output_dir = self.output_dir_edit.currentText() + if not output_dir: + QMessageBox.warning(self, "警告", "未选择输出目录") + return + + # 确保输出目录存在 + if not FileUtils.ensure_dir(output_dir): + QMessageBox.critical(self, "错误", f"无法创建输出目录: {output_dir}") + return + + # 收集OCR选项 + options = { + "deskew": self.deskew_cb.isChecked(), + "rotate_pages": self.rotate_cb.isChecked(), + "clean": self.clean_cb.isChecked(), + "optimize": self.optimize_cb.isChecked() + } + + # 禁用UI元素 + self.start_btn.setEnabled(False) + self.cancel_btn.setEnabled(True) + self.add_files_btn.setEnabled(False) + self.add_folder_btn.setEnabled(False) + self.clear_files_btn.setEnabled(False) + self.output_dir_btn.setEnabled(False) + self.output_dir_edit.setEnabled(False) + self.progress_bar.setValue(0) + self.status_label.setText("处理中...") + self.statusBar.showMessage("OCR处理中...") + + # 创建并启动工作线程 + self.worker = OCRWorker( + self.ocr_engine, + self.selected_files, + output_dir, + options + ) + self.worker.progress_updated.connect(self.update_progress) + self.worker.finished.connect(self.ocr_finished) + self.worker.start() + + def cancel_ocr(self): + """取消OCR处理""" + if hasattr(self, 'worker') and self.worker.isRunning(): + self.worker.terminate() + self.worker.wait() + self.status_label.setText("处理已取消") + self.statusBar.showMessage("OCR处理已取消") + + # 启用UI元素 + self.enable_ui() + + def enable_ui(self): + """启用UI元素""" + self.start_btn.setEnabled(True) + self.cancel_btn.setEnabled(False) + self.add_files_btn.setEnabled(True) + self.add_folder_btn.setEnabled(True) + self.clear_files_btn.setEnabled(True) + self.output_dir_btn.setEnabled(True) + self.output_dir_edit.setEnabled(True) + + @Slot(int, int, str, bool) + def update_progress(self, current, total, file, success): + """更新进度""" + percent = int(current * 100 / total) + self.progress_bar.setValue(percent) + + file_name = Path(file).name + status = "成功" if success else "失败" + self.status_label.setText(f"处理 {file_name}: {status} ({current}/{total})") + self.statusBar.showMessage(f"处理 {file_name}: {status} ({current}/{total})") + + @Slot(dict) + def ocr_finished(self, results): + """OCR处理完成""" + success_count = sum(1 for success in results.values() if success) + total_count = len(results) + + self.status_label.setText(f"处理完成: {success_count}/{total_count} 文件成功") + self.statusBar.showMessage(f"OCR处理完成: {success_count}/{total_count} 文件成功") + + # 启用UI元素 + self.enable_ui() + + # 显示完成消息 + QMessageBox.information( + self, + "处理完成", + f"OCR处理已完成\n成功: {success_count} 文件\n失败: {total_count - success_count} 文件" + ) + + def show_settings(self): + """显示设置对话框""" + dialog = SettingsDialog(self) + if dialog.exec(): + # 更新UI以反映新设置 + self.deskew_cb.setChecked(self.config.get('default_options.deskew', True)) + self.rotate_cb.setChecked(self.config.get('default_options.rotate_pages', True)) + self.clean_cb.setChecked(self.config.get('default_options.clean', False)) + self.optimize_cb.setChecked(self.config.get('default_options.optimize', True)) + + def show_batch_dialog(self): + """显示批量处理对话框""" + dialog = BatchDialog(self) + dialog.exec() + + def show_about(self): + """显示关于对话框""" + QMessageBox.about( + self, + "关于 OCRmyPDF GUI", + "OCRmyPDF GUI v0.1.0\n\n" + "OCRmyPDF的图形用户界面\n\n" + "基于OCRmyPDF开源项目\n" + "https://github.com/ocrmypdf/OCRmyPDF" + ) + + def dragEnterEvent(self, event: QDragEnterEvent): + """拖拽进入事件""" + if event.mimeData().hasUrls(): + event.acceptProposedAction() + + def dropEvent(self, event: QDropEvent): + """拖拽放下事件""" + urls = event.mimeData().urls() + files = [] + + for url in urls: + path = url.toLocalFile() + if Path(path).is_dir(): + # 如果是目录,获取目录中的所有PDF文件 + pdf_files = FileUtils.get_pdf_files_in_dir(path, recursive=True) + files.extend(pdf_files) + elif FileUtils.is_valid_pdf(path): + # 如果是PDF文件,直接添加 + files.append(path) + + if files: + self.add_files_to_list(files) + + event.acceptProposedAction() \ No newline at end of file diff --git a/ocrmypdf-gui/src/gui/settings.py b/ocrmypdf-gui/src/gui/settings.py new file mode 100644 index 0000000..5aeef56 --- /dev/null +++ b/ocrmypdf-gui/src/gui/settings.py @@ -0,0 +1,211 @@ +from PySide6.QtWidgets import ( + QDialog, QVBoxLayout, QHBoxLayout, QTabWidget, + QPushButton, QLabel, QComboBox, QCheckBox, + QGroupBox, QSpinBox, QRadioButton +) +from PySide6.QtCore import Qt + +from src.core.config import Config + +class SettingsDialog(QDialog): + """设置对话框""" + + def __init__(self, parent=None): + super().__init__(parent) + self.setWindowTitle("设置") + self.resize(500, 400) + + self.config = Config() + self.init_ui() + + def init_ui(self): + """初始化UI""" + # 主布局 + main_layout = QVBoxLayout(self) + + # 创建选项卡 + tab_widget = QTabWidget() + main_layout.addWidget(tab_widget) + + # 常规选项卡 + general_tab = QWidget() + tab_widget.addTab(general_tab, "常规") + self.setup_general_tab(general_tab) + + # OCR选项卡 + ocr_tab = QWidget() + tab_widget.addTab(ocr_tab, "OCR") + self.setup_ocr_tab(ocr_tab) + + # 界面选项卡 + ui_tab = QWidget() + tab_widget.addTab(ui_tab, "界面") + self.setup_ui_tab(ui_tab) + + # 按钮区域 + button_layout = QHBoxLayout() + button_layout.addStretch() + + self.ok_btn = QPushButton("确定") + self.ok_btn.clicked.connect(self.accept) + self.cancel_btn = QPushButton("取消") + self.cancel_btn.clicked.connect(self.reject) + + button_layout.addWidget(self.ok_btn) + button_layout.addWidget(self.cancel_btn) + + main_layout.addLayout(button_layout) + + def setup_general_tab(self, tab): + """设置常规选项卡""" + layout = QVBoxLayout(tab) + + # 启动选项 + startup_group = QGroupBox("启动选项") + startup_layout = QVBoxLayout(startup_group) + + self.check_update_cb = QCheckBox("启动时检查更新") + self.check_update_cb.setChecked(self.config.get('general.check_update_on_startup', False)) + + self.show_welcome_cb = QCheckBox("显示欢迎页面") + self.show_welcome_cb.setChecked(self.config.get('general.show_welcome', True)) + + self.remember_window_cb = QCheckBox("记住窗口大小和位置") + self.remember_window_cb.setChecked(self.config.get('general.remember_window_geometry', True)) + + startup_layout.addWidget(self.check_update_cb) + startup_layout.addWidget(self.show_welcome_cb) + startup_layout.addWidget(self.remember_window_cb) + + # 文件历史 + history_group = QGroupBox("文件历史") + history_layout = QVBoxLayout(history_group) + + recent_files_layout = QHBoxLayout() + recent_files_layout.addWidget(QLabel("最近文件数量:")) + self.recent_files_spin = QSpinBox() + self.recent_files_spin.setRange(0, 30) + self.recent_files_spin.setValue(self.config.get('general.max_recent_files', 10)) + recent_files_layout.addWidget(self.recent_files_spin) + recent_files_layout.addStretch() + + self.clear_history_btn = QPushButton("清除历史记录") + self.clear_history_btn.clicked.connect(self.clear_history) + + history_layout.addLayout(recent_files_layout) + history_layout.addWidget(self.clear_history_btn) + + layout.addWidget(startup_group) + layout.addWidget(history_group) + layout.addStretch() + + def setup_ocr_tab(self, tab): + """设置OCR选项卡""" + layout = QVBoxLayout(tab) + + # 默认选项 + options_group = QGroupBox("默认处理选项") + options_layout = QVBoxLayout(options_group) + + self.deskew_cb = QCheckBox("自动校正倾斜页面") + self.deskew_cb.setChecked(self.config.get('default_options.deskew', True)) + + self.rotate_cb = QCheckBox("自动旋转页面") + self.rotate_cb.setChecked(self.config.get('default_options.rotate_pages', True)) + + self.clean_cb = QCheckBox("清理图像") + self.clean_cb.setChecked(self.config.get('default_options.clean', False)) + + self.optimize_cb = QCheckBox("优化输出文件大小") + self.optimize_cb.setChecked(self.config.get('default_options.optimize', True)) + + options_layout.addWidget(self.deskew_cb) + options_layout.addWidget(self.rotate_cb) + options_layout.addWidget(self.clean_cb) + options_layout.addWidget(self.optimize_cb) + + # 输出类型 + output_group = QGroupBox("默认输出类型") + output_layout = QVBoxLayout(output_group) + + self.output_type_combo = QComboBox() + self.output_type_combo.addItems(["pdf", "pdfa", "pdfa-1", "pdfa-2", "pdfa-3"]) + self.output_type_combo.setCurrentText(self.config.get('default_options.output_type', 'pdfa')) + + output_layout.addWidget(self.output_type_combo) + + layout.addWidget(options_group) + layout.addWidget(output_group) + layout.addStretch() + + def setup_ui_tab(self, tab): + """设置界面选项卡""" + layout = QVBoxLayout(tab) + + # 语言 + language_group = QGroupBox("界面语言") + language_layout = QVBoxLayout(language_group) + + self.ui_language_combo = QComboBox() + self.ui_language_combo.addItems(["简体中文", "English"]) + current_lang = "简体中文" if self.config.get('ui.language') == 'zh_CN' else "English" + self.ui_language_combo.setCurrentText(current_lang) + + language_layout.addWidget(self.ui_language_combo) + + # 主题 + theme_group = QGroupBox("主题") + theme_layout = QVBoxLayout(theme_group) + + self.light_theme_rb = QRadioButton("浅色") + self.dark_theme_rb = QRadioButton("深色") + self.system_theme_rb = QRadioButton("跟随系统") + + current_theme = self.config.get('ui.theme', 'system') + if current_theme == 'light': + self.light_theme_rb.setChecked(True) + elif current_theme == 'dark': + self.dark_theme_rb.setChecked(True) + else: + self.system_theme_rb.setChecked(True) + + theme_layout.addWidget(self.light_theme_rb) + theme_layout.addWidget(self.dark_theme_rb) + theme_layout.addWidget(self.system_theme_rb) + + layout.addWidget(language_group) + layout.addWidget(theme_group) + layout.addStretch() + + def clear_history(self): + """清除历史记录""" + self.config.set('recent_files', []) + self.config.set('recent_output_dirs', []) + + def accept(self): + """确定按钮点击事件""" + # 保存常规设置 + self.config.set('general.check_update_on_startup', self.check_update_cb.isChecked()) + self.config.set('general.show_welcome', self.show_welcome_cb.isChecked()) + self.config.set('general.remember_window_geometry', self.remember_window_cb.isChecked()) + self.config.set('general.max_recent_files', self.recent_files_spin.value()) + + # 保存OCR设置 + self.config.set('default_options.deskew', self.deskew_cb.isChecked()) + self.config.set('default_options.rotate_pages', self.rotate_cb.isChecked()) + self.config.set('default_options.clean', self.clean_cb.isChecked()) + self.config.set('default_options.optimize', self.optimize_cb.isChecked()) + self.config.set('default_options.output_type', self.output_type_combo.currentText()) + + # 保存界面设置 + ui_lang = 'zh_CN' if self.ui_language_combo.currentText() == '简体中文' else 'en_US' + self.config.set('ui.language', ui_lang) + + if self.light_theme_rb.isChecked(): + self.config.set('ui.theme', 'light') + elif self.dark_theme_rb.isChecked(): + self.config.set('ui.theme', 'dark') + else: + self.config.set('ui.theme', 'system') + + super().accept() diff --git a/ocrmypdf-gui/src/gui/widgets/__init__.py b/ocrmypdf-gui/src/gui/widgets/__init__.py new file mode 100644 index 0000000..0519ecb --- /dev/null +++ b/ocrmypdf-gui/src/gui/widgets/__init__.py @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/ocrmypdf-gui/src/main.py b/ocrmypdf-gui/src/main.py new file mode 100644 index 0000000..86ed824 --- /dev/null +++ b/ocrmypdf-gui/src/main.py @@ -0,0 +1,48 @@ +import sys +import logging +from pathlib import Path +from PySide6.QtWidgets import QApplication +from PySide6.QtCore import QTranslator, QLocale + +from src.gui.main_window import MainWindow +from src.core.config import Config + +def setup_logging(): + """设置日志系统""" + log_dir = Path.home() / ".ocrmypdf-gui" + log_dir.mkdir(exist_ok=True, parents=True) + log_file = log_dir / "ocrmypdf-gui.log" + + logging.basicConfig( + level=logging.INFO, + format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', + handlers=[ + logging.StreamHandler(), + logging.FileHandler(log_file) + ] + ) + +def main(): + """程序入口""" + # 设置日志 + setup_logging() + logger = logging.getLogger(__name__) + logger.info("启动 OCRmyPDF GUI") + + # 创建应用 + app = QApplication(sys.argv) + app.setApplicationName("OCRmyPDF GUI") + app.setOrganizationName("OCRmyPDF") + + # 加载配置 + config = Config() + + # 创建并显示主窗口 + window = MainWindow() + window.show() + + # 运行应用 + sys.exit(app.exec()) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/ocrmypdf-gui/src/utils/__init__.py b/ocrmypdf-gui/src/utils/__init__.py new file mode 100644 index 0000000..0519ecb --- /dev/null +++ b/ocrmypdf-gui/src/utils/__init__.py @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/ocrmypdf-gui/src/utils/__pycache__/__init__.cpython-39.pyc b/ocrmypdf-gui/src/utils/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000..92374e4 Binary files /dev/null and b/ocrmypdf-gui/src/utils/__pycache__/__init__.cpython-39.pyc differ diff --git a/ocrmypdf-gui/src/utils/__pycache__/file_utils.cpython-39.pyc b/ocrmypdf-gui/src/utils/__pycache__/file_utils.cpython-39.pyc new file mode 100644 index 0000000..418d99a Binary files /dev/null and b/ocrmypdf-gui/src/utils/__pycache__/file_utils.cpython-39.pyc differ diff --git a/ocrmypdf-gui/src/utils/file_utils.py b/ocrmypdf-gui/src/utils/file_utils.py new file mode 100644 index 0000000..ffd353c --- /dev/null +++ b/ocrmypdf-gui/src/utils/file_utils.py @@ -0,0 +1,124 @@ +import os +import shutil +from pathlib import Path +import logging + +class FileUtils: + """文件工具类,提供文件操作相关的功能""" + + @staticmethod + def ensure_dir(dir_path): + """ + 确保目录存在,如果不存在则创建 + + Args: + dir_path: 目录路径 + + Returns: + bool: 操作是否成功 + """ + try: + Path(dir_path).mkdir(parents=True, exist_ok=True) + return True + except Exception as e: + logging.error(f"创建目录失败: {e}") + return False + + @staticmethod + def is_valid_pdf(file_path): + """ + 检查文件是否是有效的PDF文件 + + Args: + file_path: 文件路径 + + Returns: + bool: 是否是有效的PDF文件 + """ + if not Path(file_path).exists(): + return False + + # 简单检查文件扩展名 + if not str(file_path).lower().endswith('.pdf'): + return False + + # 检查文件头部是否包含PDF标识 + try: + with open(file_path, 'rb') as f: + header = f.read(5) + return header == b'%PDF-' + except Exception: + return False + + @staticmethod + def get_pdf_files_in_dir(dir_path, recursive=False): + """ + 获取目录中的所有PDF文件 + + Args: + dir_path: 目录路径 + recursive: 是否递归搜索子目录 + + Returns: + list: PDF文件路径列表 + """ + pdf_files = [] + dir_path = Path(dir_path) + + if not dir_path.exists() or not dir_path.is_dir(): + return pdf_files + + if recursive: + for root, _, files in os.walk(dir_path): + for file in files: + file_path = Path(root) / file + if FileUtils.is_valid_pdf(file_path): + pdf_files.append(str(file_path)) + else: + for file in dir_path.iterdir(): + if file.is_file() and FileUtils.is_valid_pdf(file): + pdf_files.append(str(file)) + + return pdf_files + + @staticmethod + def get_file_size_str(file_path): + """ + 获取文件大小的字符串表示 + + Args: + file_path: 文件路径 + + Returns: + str: 文件大小字符串,如 "1.2 MB" + """ + try: + size = Path(file_path).stat().st_size + + for unit in ['B', 'KB', 'MB', 'GB', 'TB']: + if size < 1024.0: + return f"{size:.1f} {unit}" + size /= 1024.0 + + return f"{size:.1f} PB" + except Exception: + return "未知大小" + + @staticmethod + def copy_file(src, dst): + """ + 复制文件 + + Args: + src: 源文件路径 + dst: 目标文件路径 + + Returns: + bool: 操作是否成功 + """ + try: + shutil.copy2(src, dst) + return True + except Exception as e: + logging.error(f"复制文件失败: {e}") + return False \ No newline at end of file diff --git a/ocrmypdf-gui/src/utils/log_utils.py b/ocrmypdf-gui/src/utils/log_utils.py new file mode 100644 index 0000000..f52ef1c --- /dev/null +++ b/ocrmypdf-gui/src/utils/log_utils.py @@ -0,0 +1,88 @@ +import logging +import sys +from pathlib import Path +from PySide6.QtCore import QObject, Signal + +class QtLogHandler(logging.Handler, QObject): + """Qt日志处理器,将日志消息发送到Qt信号""" + + log_message = Signal(str, int) # 参数:消息文本,日志级别 + + def __init__(self): + logging.Handler.__init__(self) + QObject.__init__(self) + self.setFormatter(logging.Formatter('%(message)s')) + + def emit(self, record): + msg = self.format(record) + self.log_message.emit(msg, record.levelno) + +class LogUtils: + """日志工具类,提供日志相关的功能""" + + @staticmethod + def setup_logging(log_file=None, console=True, level=logging.INFO): + """ + 设置日志系统 + + Args: + log_file: 日志文件路径,如果为None则不输出到文件 + console: 是否输出到控制台 + level: 日志级别 + """ + # 配置根日志记录器 + root_logger = logging.getLogger() + root_logger.setLevel(level) + + # 清除现有的处理器 + for handler in root_logger.handlers[:]: + root_logger.removeHandler(handler) + + # 创建格式化器 + formatter = logging.Formatter( + '%(asctime)s - %(name)s - %(levelname)s - %(message)s' + ) + + # 添加控制台处理器 + if console: + console_handler = logging.StreamHandler(sys.stdout) + console_handler.setFormatter(formatter) + root_logger.addHandler(console_handler) + + # 添加文件处理器 + if log_file: + # 确保日志目录存在 + log_path = Path(log_file) + log_path.parent.mkdir(parents=True, exist_ok=True) + + file_handler = logging.FileHandler(log_file, encoding='utf-8') + file_handler.setFormatter(formatter) + root_logger.addHandler(file_handler) + + @staticmethod + def get_qt_handler(): + """ + 获取Qt日志处理器 + + Returns: + QtLogHandler: Qt日志处理器实例 + """ + handler = QtLogHandler() + handler.setFormatter(logging.Formatter('%(message)s')) + return handler + + @staticmethod + def add_qt_handler(logger_name=None): + """ + 添加Qt日志处理器到指定的日志记录器 + + Args: + logger_name: 日志记录器名称,如果为None则使用根日志记录器 + + Returns: + QtLogHandler: 添加的Qt日志处理器 + """ + logger = logging.getLogger(logger_name) + handler = LogUtils.get_qt_handler() + logger.addHandler(handler) + return handler \ No newline at end of file