diff --git a/ocrmypdf-gui/.idea/.gitignore b/.idea/.gitignore similarity index 100% rename from ocrmypdf-gui/.idea/.gitignore rename to .idea/.gitignore diff --git a/ocrmypdf-gui/.idea/OCRmyPDF-GUI.iml b/.idea/OCRmyPDF-GUI.iml similarity index 100% rename from ocrmypdf-gui/.idea/OCRmyPDF-GUI.iml rename to .idea/OCRmyPDF-GUI.iml diff --git a/ocrmypdf-gui/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml similarity index 100% rename from ocrmypdf-gui/.idea/inspectionProfiles/profiles_settings.xml rename to .idea/inspectionProfiles/profiles_settings.xml diff --git a/ocrmypdf-gui/.idea/misc.xml b/.idea/misc.xml similarity index 62% rename from ocrmypdf-gui/.idea/misc.xml rename to .idea/misc.xml index 35d9e0e..7199a1e 100644 --- a/ocrmypdf-gui/.idea/misc.xml +++ b/.idea/misc.xml @@ -1,4 +1,7 @@ + + \ No newline at end of file diff --git a/ocrmypdf-gui/.idea/modules.xml b/.idea/modules.xml similarity index 100% rename from ocrmypdf-gui/.idea/modules.xml rename to .idea/modules.xml diff --git a/ocrmypdf-gui/.idea/vcs.xml b/.idea/vcs.xml similarity index 100% rename from ocrmypdf-gui/.idea/vcs.xml rename to .idea/vcs.xml diff --git a/PDF/扫描书籍_小王子(The Little Princ(1).pdf b/PDF/扫描书籍_小王子(The Little Princ(1).pdf new file mode 100644 index 0000000..817d767 Binary files /dev/null and b/PDF/扫描书籍_小王子(The Little Princ(1).pdf differ diff --git a/PDF/英文.pdf b/PDF/英文.pdf new file mode 100644 index 0000000..47d357d Binary files /dev/null and b/PDF/英文.pdf differ diff --git a/PDF/英文2.pdf b/PDF/英文2.pdf new file mode 100644 index 0000000..47d357d Binary files /dev/null and b/PDF/英文2.pdf differ diff --git a/PDF/英文3.pdf b/PDF/英文3.pdf new file mode 100644 index 0000000..8363db4 Binary files /dev/null and b/PDF/英文3.pdf differ diff --git a/README.md b/README.md new file mode 100644 index 0000000..a64adc3 --- /dev/null +++ b/README.md @@ -0,0 +1,141 @@ +# OCRmyPDF GUI + +OCRmyPDF的图形用户界面,让OCR处理PDF文件变得简单。 + +## 功能特点 + +- 简洁直观的图形界面 +- 批量处理PDF文件 +- 拖放支持 +- 多语言OCR支持 +- 可自定义OCR选项 +- 保存处理配置 + +## 安装要求 + +- Python 3.7+ +- OCRmyPDF +- Tesseract OCR +- PySide6 (Qt for Python) + +## 安装步骤 + +1. 安装OCRmyPDF和其依赖: + +```bash +# macOS +brew install ocrmypdf + +# Ubuntu/Debian +apt install ocrmypdf + +# 或使用pip +pip install ocrmypdf +``` + +2. 安装GUI依赖: + +```bash +pip install PySide6 +``` + +3. 克隆本仓库: + +```bash +git clone https://github.com/yourusername/OCRmyPDF-GUI.git +cd OCRmyPDF-GUI +``` + +## 安装Tesseract语言包 + +默认情况下,OCRmyPDF只安装英语语言包。要使用其他语言进行OCR,需要安装额外的语言包: + +### macOS + +```bash +# 安装所有语言包 +brew install tesseract-lang + +# 或者手动安装特定语言包 +# 1. 下载语言包文件,例如简体中文: +# https://github.com/tesseract-ocr/tessdata/raw/main/chi_sim.traineddata +# 2. 复制到Tesseract的tessdata目录: +# sudo cp chi_sim.traineddata /opt/homebrew/share/tessdata/ +# 或 +# sudo cp chi_sim.traineddata /usr/local/share/tessdata/ +``` + +### Ubuntu/Debian + +```bash +# 安装特定语言包,例如简体中文: +sudo apt-get install tesseract-ocr-chi-sim + +# 查看所有可用语言包: +apt-cache search tesseract-ocr +``` + +### Fedora + +```bash +# 安装特定语言包,例如简体中文: +sudo dnf install tesseract-langpack-chi_sim + +# 查看所有可用语言包: +dnf search tesseract +``` + +### Windows + +1. 从以下网址下载所需语言包文件: + https://github.com/tesseract-ocr/tessdata/ + +2. 将下载的`.traineddata`文件放置在Tesseract安装目录的tessdata文件夹中,通常位于: + `C:\Program Files\Tesseract-OCR\tessdata` + +### 常用语言代码 + +- `eng` - 英语 +- `chi_sim` - 简体中文 +- `chi_tra` - 繁体中文 +- `jpn` - 日语 +- `kor` - 韩语 +- `fra` - 法语 +- `deu` - 德语 +- `rus` - 俄语 +- `spa` - 西班牙语 +- `ita` - 意大利语 + +更多信息请参考:[OCRmyPDF语言包文档](https://ocrmypdf.readthedocs.io/en/latest/languages.html) + +## 使用方法 + +运行启动脚本: + +```bash +python run.py +``` + +或在Windows上双击`run.py`文件。 + +## 开发计划 + +- [ ] 高级OCR选项 +- [ ] 多语言界面 +- [ ] 暗黑模式 +- [ ] 自定义输出文件名模板 +- [ ] 处理历史记录 + +## 贡献 + +欢迎提交Pull Request或Issue。 + +## 许可证 + +本项目采用与OCRmyPDF相同的许可证。 + +## 致谢 + +- [OCRmyPDF](https://github.com/ocrmypdf/OCRmyPDF) - 强大的OCR工具 +- [Tesseract OCR](https://github.com/tesseract-ocr/tesseract) - OCR引擎 +- [Qt for Python (PySide6)](https://wiki.qt.io/Qt_for_Python) - GUI框架 \ No newline at end of file diff --git a/ocrmypdf-gui/docs/文档模板-开源软件维护报告文档.docx b/docs/文档模板-开源软件维护报告文档.docx similarity index 87% rename from ocrmypdf-gui/docs/文档模板-开源软件维护报告文档.docx rename to docs/文档模板-开源软件维护报告文档.docx index 1d921df..30df1d2 100644 Binary files a/ocrmypdf-gui/docs/文档模板-开源软件维护报告文档.docx and b/docs/文档模板-开源软件维护报告文档.docx differ diff --git a/ocrmypdf-gui/.DS_Store b/ocrmypdf-gui/.DS_Store deleted file mode 100644 index 3487410..0000000 Binary files a/ocrmypdf-gui/.DS_Store and /dev/null differ diff --git a/ocrmypdf-gui/README.md b/ocrmypdf-gui/README.md deleted file mode 100644 index 1041de9..0000000 --- a/ocrmypdf-gui/README.md +++ /dev/null @@ -1,79 +0,0 @@ -# OCRmyPDF GUI - -OCRmyPDF的图形用户界面,让OCR处理PDF文件变得简单。 - -## 功能特点 - -- 简洁直观的图形界面 -- 批量处理PDF文件 -- 拖放支持 -- 多语言OCR支持 -- 可自定义OCR选项 -- 保存处理配置 - -## 安装要求 - -- Python 3.7+ -- OCRmyPDF -- Tesseract OCR -- PySide6 (Qt for Python) - -## 安装步骤 - -1. 安装OCRmyPDF和其依赖: - -```bash -# macOS -brew install ocrmypdf - -# Ubuntu/Debian -apt install ocrmypdf - -# 或使用pip -pip install ocrmypdf -``` - -2. 安装GUI依赖: - -```bash -pip install PySide6 -``` - -3. 克隆本仓库: - -```bash -git clone https://github.com/yourusername/OCRmyPDF-GUI.git -cd OCRmyPDF-GUI -``` - -## 使用方法 - -运行启动脚本: - -```bash -python run.py -``` - -或在Windows上双击`run.py`文件。 - -## 开发计划 - -- [ ] 高级OCR选项 -- [ ] 多语言界面 -- [ ] 暗黑模式 -- [ ] 自定义输出文件名模板 -- [ ] 处理历史记录 - -## 贡献 - -欢迎提交Pull Request或Issue。 - -## 许可证 - -本项目采用与OCRmyPDF相同的许可证。 - -## 致谢 - -- [OCRmyPDF](https://github.com/ocrmypdf/OCRmyPDF) - 强大的OCR工具 -- [Tesseract OCR](https://github.com/tesseract-ocr/tesseract) - OCR引擎 -- [Qt for Python (PySide6)](https://wiki.qt.io/Qt_for_Python) - GUI框架 \ No newline at end of file diff --git a/ocrmypdf-gui/requirements.txt b/requirements.txt similarity index 100% rename from ocrmypdf-gui/requirements.txt rename to requirements.txt diff --git a/ocrmypdf-gui/run.py b/run.py similarity index 100% rename from ocrmypdf-gui/run.py rename to run.py diff --git a/ocrmypdf-gui/src/__init__.py b/src/__init__.py similarity index 100% rename from ocrmypdf-gui/src/__init__.py rename to src/__init__.py diff --git a/ocrmypdf-gui/src/__pycache__/__init__.cpython-313.pyc b/src/__pycache__/__init__.cpython-313.pyc similarity index 100% rename from ocrmypdf-gui/src/__pycache__/__init__.cpython-313.pyc rename to src/__pycache__/__init__.cpython-313.pyc diff --git a/ocrmypdf-gui/src/__pycache__/__init__.cpython-39.pyc b/src/__pycache__/__init__.cpython-39.pyc similarity index 100% rename from ocrmypdf-gui/src/__pycache__/__init__.cpython-39.pyc rename to src/__pycache__/__init__.cpython-39.pyc diff --git a/ocrmypdf-gui/src/__pycache__/main.cpython-313.pyc b/src/__pycache__/main.cpython-313.pyc similarity index 100% rename from ocrmypdf-gui/src/__pycache__/main.cpython-313.pyc rename to src/__pycache__/main.cpython-313.pyc diff --git a/ocrmypdf-gui/src/__pycache__/main.cpython-39.pyc b/src/__pycache__/main.cpython-39.pyc similarity index 100% rename from ocrmypdf-gui/src/__pycache__/main.cpython-39.pyc rename to src/__pycache__/main.cpython-39.pyc diff --git a/ocrmypdf-gui/src/core/__init__.py b/src/core/__init__.py similarity index 100% rename from ocrmypdf-gui/src/core/__init__.py rename to src/core/__init__.py diff --git a/ocrmypdf-gui/src/core/__pycache__/__init__.cpython-39.pyc b/src/core/__pycache__/__init__.cpython-39.pyc similarity index 100% rename from ocrmypdf-gui/src/core/__pycache__/__init__.cpython-39.pyc rename to src/core/__pycache__/__init__.cpython-39.pyc diff --git a/ocrmypdf-gui/src/core/__pycache__/config.cpython-39.pyc b/src/core/__pycache__/config.cpython-39.pyc similarity index 100% rename from ocrmypdf-gui/src/core/__pycache__/config.cpython-39.pyc rename to src/core/__pycache__/config.cpython-39.pyc diff --git a/ocrmypdf-gui/src/core/__pycache__/ocr_engine.cpython-39.pyc b/src/core/__pycache__/ocr_engine.cpython-39.pyc similarity index 100% rename from ocrmypdf-gui/src/core/__pycache__/ocr_engine.cpython-39.pyc rename to src/core/__pycache__/ocr_engine.cpython-39.pyc diff --git a/ocrmypdf-gui/src/core/config.py b/src/core/config.py similarity index 100% rename from ocrmypdf-gui/src/core/config.py rename to src/core/config.py diff --git a/ocrmypdf-gui/src/core/ocr_engine.py b/src/core/ocr_engine.py similarity index 54% rename from ocrmypdf-gui/src/core/ocr_engine.py rename to src/core/ocr_engine.py index 312acbb..902e5e7 100644 --- a/ocrmypdf-gui/src/core/ocr_engine.py +++ b/src/core/ocr_engine.py @@ -19,11 +19,76 @@ class OCREngine: ) if result.returncode == 0: self.logger.info(f"OCRmyPDF命令行工具可用: {result.stdout.strip()}") + # 获取支持的语言列表 + self.available_languages = self.get_available_languages() + self.logger.info(f"可用的OCR语言: {', '.join(self.available_languages)}") else: self.logger.warning("OCRmyPDF命令行工具返回错误") + self.available_languages = [] except FileNotFoundError: self.logger.error("OCRmyPDF命令行工具未找到") + self.available_languages = [] + def get_available_languages(self): + """获取系统中已安装的Tesseract语言包列表""" + try: + result = subprocess.run( + ["tesseract", "--list-langs"], + capture_output=True, + text=True, + check=False + ) + if result.returncode == 0: + # 解析输出,跳过第一行(标题行) + languages = result.stdout.strip().split('\n')[1:] + return [lang.strip() for lang in languages] + return [] + except Exception as e: + self.logger.error(f"获取语言列表失败: {e}") + return [] + + def get_language_name(self, lang_code): + """获取语言代码对应的显示名称""" + language_names = { + 'eng': '英语 (English)', + 'chi_sim': '简体中文 (Chinese Simplified)', + 'chi_tra': '繁体中文 (Chinese Traditional)', + 'jpn': '日语 (Japanese)', + 'kor': '韩语 (Korean)', + 'fra': '法语 (French)', + 'deu': '德语 (German)', + 'rus': '俄语 (Russian)', + 'spa': '西班牙语 (Spanish)', + 'ita': '意大利语 (Italian)', + 'por': '葡萄牙语 (Portuguese)', + 'nld': '荷兰语 (Dutch)', + 'ara': '阿拉伯语 (Arabic)', + 'hin': '印地语 (Hindi)', + 'vie': '越南语 (Vietnamese)', + 'tha': '泰语 (Thai)', + 'tur': '土耳其语 (Turkish)', + 'heb': '希伯来语 (Hebrew)', + 'swe': '瑞典语 (Swedish)', + 'fin': '芬兰语 (Finnish)', + 'dan': '丹麦语 (Danish)', + 'nor': '挪威语 (Norwegian)', + 'pol': '波兰语 (Polish)', + 'ukr': '乌克兰语 (Ukrainian)', + 'ces': '捷克语 (Czech)', + 'slk': '斯洛伐克语 (Slovak)', + 'hun': '匈牙利语 (Hungarian)', + 'ron': '罗马尼亚语 (Romanian)', + 'bul': '保加利亚语 (Bulgarian)', + 'ell': '希腊语 (Greek)', + 'ind': '印度尼西亚语 (Indonesian)', + 'msa': '马来语 (Malay)', + 'cat': '加泰罗尼亚语 (Catalan)', + 'lav': '拉脱维亚语 (Latvian)', + 'lit': '立陶宛语 (Lithuanian)', + 'est': '爱沙尼亚语 (Estonian)' + } + return language_names.get(lang_code, lang_code) + def process_file(self, input_file, output_file, options=None): """ 使用OCRmyPDF处理单个文件 @@ -44,8 +109,13 @@ class OCREngine: # 构建命令行参数 cmd = ["ocrmypdf"] - # 添加语言选项 - 默认使用英文 - cmd.extend(["-l", "eng"]) + # 添加语言选项 + lang = options.get('language', 'eng') + if lang in self.available_languages: + cmd.extend(["-l", lang]) + else: + self.logger.warning(f"不支持的语言: {lang},使用默认语言(eng)") + cmd.extend(["-l", "eng"]) # 添加其他选项 if options.get('deskew', False): @@ -57,6 +127,10 @@ class OCREngine: if options.get('clean', False): cmd.append("--clean") + if options.get('optimize', False): + cmd.append("-O") + cmd.append("1") # 使用级别1的优化 + if 'jobs' in options: cmd.extend(["--jobs", str(options['jobs'])]) diff --git a/ocrmypdf-gui/src/gui/__init__.py b/src/gui/__init__.py similarity index 100% rename from ocrmypdf-gui/src/gui/__init__.py rename to src/gui/__init__.py diff --git a/ocrmypdf-gui/src/gui/__pycache__/__init__.cpython-39.pyc b/src/gui/__pycache__/__init__.cpython-39.pyc similarity index 100% rename from ocrmypdf-gui/src/gui/__pycache__/__init__.cpython-39.pyc rename to src/gui/__pycache__/__init__.cpython-39.pyc diff --git a/ocrmypdf-gui/src/gui/__pycache__/batch_dialog.cpython-39.pyc b/src/gui/__pycache__/batch_dialog.cpython-39.pyc similarity index 100% rename from ocrmypdf-gui/src/gui/__pycache__/batch_dialog.cpython-39.pyc rename to src/gui/__pycache__/batch_dialog.cpython-39.pyc diff --git a/ocrmypdf-gui/src/gui/__pycache__/main_window.cpython-39.pyc b/src/gui/__pycache__/main_window.cpython-39.pyc similarity index 100% rename from ocrmypdf-gui/src/gui/__pycache__/main_window.cpython-39.pyc rename to src/gui/__pycache__/main_window.cpython-39.pyc diff --git a/ocrmypdf-gui/src/gui/__pycache__/settings.cpython-39.pyc b/src/gui/__pycache__/settings.cpython-39.pyc similarity index 100% rename from ocrmypdf-gui/src/gui/__pycache__/settings.cpython-39.pyc rename to src/gui/__pycache__/settings.cpython-39.pyc diff --git a/ocrmypdf-gui/src/gui/batch_dialog.py b/src/gui/batch_dialog.py similarity index 80% rename from ocrmypdf-gui/src/gui/batch_dialog.py rename to src/gui/batch_dialog.py index 050abc6..72e42b9 100644 --- a/ocrmypdf-gui/src/gui/batch_dialog.py +++ b/src/gui/batch_dialog.py @@ -108,6 +108,54 @@ class BatchDialog(QDialog): ocr_group = QGroupBox("OCR选项") ocr_layout = QVBoxLayout(ocr_group) + # 语言选择 + language_layout = QHBoxLayout() + language_layout.addWidget(QLabel("OCR语言:")) + self.language_combo = QComboBox() + self.language_combo.setToolTip("选择OCR识别使用的语言") + + # 添加可用的语言 + # 常用语言列表 + common_langs = ['eng', 'chi_sim', 'chi_tra', 'jpn', 'kor'] + + # 首先添加常用语言 + if self.ocr_engine.available_languages: + # 添加常用语言组 + common_available = [lang for lang in common_langs if lang in self.ocr_engine.available_languages] + if common_available: + self.language_combo.addItem("--- 常用语言 ---", None) + for lang_code in common_available: + lang_name = self.ocr_engine.get_language_name(lang_code) + self.language_combo.addItem(lang_name, lang_code) + + # 添加其他语言组 + other_available = [lang for lang in self.ocr_engine.available_languages + if lang not in common_langs] + if other_available: + self.language_combo.addItem("--- 其他语言 ---", None) + # 按名称排序 + other_langs_sorted = sorted( + other_available, + key=lambda x: self.ocr_engine.get_language_name(x) + ) + for lang_code in other_langs_sorted: + lang_name = self.ocr_engine.get_language_name(lang_code) + self.language_combo.addItem(lang_name, lang_code) + else: + # 如果没有常用语言,直接添加所有语言 + for lang_code in self.ocr_engine.available_languages: + lang_name = self.ocr_engine.get_language_name(lang_code) + self.language_combo.addItem(lang_name, lang_code) + + # 设置默认语言 + default_lang = self.config.get('default_options.language', 'eng') + index = self.language_combo.findData(default_lang) + if index >= 0: + self.language_combo.setCurrentIndex(index) + + language_layout.addWidget(self.language_combo) + ocr_layout.addLayout(language_layout) + # 使用配置文件 config_layout = QHBoxLayout() config_layout.addWidget(QLabel("使用配置文件:")) @@ -273,12 +321,31 @@ class BatchDialog(QDialog): return # 收集OCR选项 - options = { + options = {} + + # 获取选中的语言代码 + lang_index = self.language_combo.currentIndex() + lang_data = self.language_combo.itemData(lang_index) + if lang_data: # 确保不是分隔符 + options["language"] = lang_data + else: + # 如果选中了分隔符,尝试找到下一个有效选项 + for i in range(lang_index + 1, self.language_combo.count()): + next_data = self.language_combo.itemData(i) + if next_data: + self.language_combo.setCurrentIndex(i) + options["language"] = next_data + break + # 如果没有找到,使用默认语言 + if "language" not in options: + options["language"] = "eng" + + options.update({ "deskew": self.deskew_cb.isChecked(), "rotate_pages": self.rotate_cb.isChecked(), "clean": self.clean_cb.isChecked(), "optimize": self.optimize_cb.isChecked() - } + }) # 禁用UI元素 self.start_btn.setEnabled(False) diff --git a/ocrmypdf-gui/src/gui/main_window.py b/src/gui/main_window.py similarity index 82% rename from ocrmypdf-gui/src/gui/main_window.py rename to src/gui/main_window.py index b0a617b..bd2587a 100644 --- a/ocrmypdf-gui/src/gui/main_window.py +++ b/src/gui/main_window.py @@ -109,6 +109,54 @@ class MainWindow(QMainWindow): options_group = QGroupBox("OCR选项") options_layout = QVBoxLayout(options_group) + # 语言选择 + language_layout = QHBoxLayout() + language_layout.addWidget(QLabel("OCR语言:")) + self.language_combo = QComboBox() + self.language_combo.setToolTip("选择OCR识别使用的语言") + + # 添加可用的语言 + # 常用语言列表 + common_langs = ['eng', 'chi_sim', 'chi_tra', 'jpn', 'kor'] + + # 首先添加常用语言 + if self.ocr_engine.available_languages: + # 添加常用语言组 + common_available = [lang for lang in common_langs if lang in self.ocr_engine.available_languages] + if common_available: + self.language_combo.addItem("--- 常用语言 ---", None) + for lang_code in common_available: + lang_name = self.ocr_engine.get_language_name(lang_code) + self.language_combo.addItem(lang_name, lang_code) + + # 添加其他语言组 + other_available = [lang for lang in self.ocr_engine.available_languages + if lang not in common_langs] + if other_available: + self.language_combo.addItem("--- 其他语言 ---", None) + # 按名称排序 + other_langs_sorted = sorted( + other_available, + key=lambda x: self.ocr_engine.get_language_name(x) + ) + for lang_code in other_langs_sorted: + lang_name = self.ocr_engine.get_language_name(lang_code) + self.language_combo.addItem(lang_name, lang_code) + else: + # 如果没有常用语言,直接添加所有语言 + for lang_code in self.ocr_engine.available_languages: + lang_name = self.ocr_engine.get_language_name(lang_code) + self.language_combo.addItem(lang_name, lang_code) + + # 设置默认语言 + default_lang = self.config.get('default_options.language', 'eng') + index = self.language_combo.findData(default_lang) + if index >= 0: + self.language_combo.setCurrentIndex(index) + + language_layout.addWidget(self.language_combo) + options_layout.addLayout(language_layout) + # 处理选项 self.deskew_cb = QCheckBox("自动校正倾斜页面") self.deskew_cb.setChecked(self.config.get('default_options.deskew', True)) @@ -284,12 +332,31 @@ class MainWindow(QMainWindow): return # 收集OCR选项 - options = { + options = {} + + # 获取选中的语言代码 + lang_index = self.language_combo.currentIndex() + lang_data = self.language_combo.itemData(lang_index) + if lang_data: # 确保不是分隔符 + options["language"] = lang_data + else: + # 如果选中了分隔符,尝试找到下一个有效选项 + for i in range(lang_index + 1, self.language_combo.count()): + next_data = self.language_combo.itemData(i) + if next_data: + self.language_combo.setCurrentIndex(i) + options["language"] = next_data + break + # 如果没有找到,使用默认语言 + if "language" not in options: + options["language"] = "eng" + + options.update({ "deskew": self.deskew_cb.isChecked(), "rotate_pages": self.rotate_cb.isChecked(), "clean": self.clean_cb.isChecked(), "optimize": self.optimize_cb.isChecked() - } + }) # 禁用UI元素 self.start_btn.setEnabled(False) diff --git a/ocrmypdf-gui/src/gui/settings.py b/src/gui/settings.py similarity index 56% rename from ocrmypdf-gui/src/gui/settings.py rename to src/gui/settings.py index 5aeef56..c6237a3 100644 --- a/ocrmypdf-gui/src/gui/settings.py +++ b/src/gui/settings.py @@ -1,11 +1,12 @@ from PySide6.QtWidgets import ( QDialog, QVBoxLayout, QHBoxLayout, QTabWidget, QPushButton, QLabel, QComboBox, QCheckBox, - QGroupBox, QSpinBox, QRadioButton + QGroupBox, QSpinBox, QRadioButton, QMessageBox ) from PySide6.QtCore import Qt from src.core.config import Config +from src.core.ocr_engine import OCREngine class SettingsDialog(QDialog): """设置对话框""" @@ -103,6 +104,62 @@ class SettingsDialog(QDialog): """设置OCR选项卡""" layout = QVBoxLayout(tab) + # 默认语言 + language_group = QGroupBox("默认OCR语言") + language_layout = QVBoxLayout(language_group) + + self.language_combo = QComboBox() + self.language_combo.setToolTip("选择默认的OCR识别语言") + + # 添加可用的语言 + ocr_engine = OCREngine() + # 常用语言列表 + common_langs = ['eng', 'chi_sim', 'chi_tra', 'jpn', 'kor'] + + # 首先添加常用语言 + if ocr_engine.available_languages: + # 添加常用语言组 + common_available = [lang for lang in common_langs if lang in ocr_engine.available_languages] + if common_available: + self.language_combo.addItem("--- 常用语言 ---", None) + for lang_code in common_available: + lang_name = ocr_engine.get_language_name(lang_code) + self.language_combo.addItem(lang_name, lang_code) + + # 添加其他语言组 + other_available = [lang for lang in ocr_engine.available_languages + if lang not in common_langs] + if other_available: + self.language_combo.addItem("--- 其他语言 ---", None) + # 按名称排序 + other_langs_sorted = sorted( + other_available, + key=lambda x: ocr_engine.get_language_name(x) + ) + for lang_code in other_langs_sorted: + lang_name = ocr_engine.get_language_name(lang_code) + self.language_combo.addItem(lang_name, lang_code) + else: + # 如果没有常用语言,直接添加所有语言 + for lang_code in ocr_engine.available_languages: + lang_name = ocr_engine.get_language_name(lang_code) + self.language_combo.addItem(lang_name, lang_code) + + # 设置当前默认语言 + default_lang = self.config.get('default_options.language', 'eng') + index = self.language_combo.findData(default_lang) + if index >= 0: + self.language_combo.setCurrentIndex(index) + + # 添加刷新语言列表按钮 + lang_buttons_layout = QHBoxLayout() + self.refresh_langs_btn = QPushButton("刷新语言列表") + self.refresh_langs_btn.clicked.connect(self.refresh_languages) + lang_buttons_layout.addWidget(self.refresh_langs_btn) + + language_layout.addWidget(self.language_combo) + language_layout.addLayout(lang_buttons_layout) + # 默认选项 options_group = QGroupBox("默认处理选项") options_layout = QVBoxLayout(options_group) @@ -191,6 +248,23 @@ class SettingsDialog(QDialog): self.config.set('general.max_recent_files', self.recent_files_spin.value()) # 保存OCR设置 + # 获取选中的语言代码 + lang_index = self.language_combo.currentIndex() + lang_data = self.language_combo.itemData(lang_index) + if lang_data: # 确保不是分隔符 + self.config.set('default_options.language', lang_data) + else: + # 如果选中了分隔符,尝试找到下一个有效选项 + for i in range(lang_index + 1, self.language_combo.count()): + next_data = self.language_combo.itemData(i) + if next_data: + self.language_combo.setCurrentIndex(i) + self.config.set('default_options.language', next_data) + break + # 如果没有找到,使用默认语言 + if not self.language_combo.currentData(): + self.config.set('default_options.language', 'eng') + self.config.set('default_options.deskew', self.deskew_cb.isChecked()) self.config.set('default_options.rotate_pages', self.rotate_cb.isChecked()) self.config.set('default_options.clean', self.clean_cb.isChecked()) @@ -209,3 +283,58 @@ class SettingsDialog(QDialog): self.config.set('ui.theme', 'system') super().accept() + + def refresh_languages(self): + """刷新可用语言列表""" + ocr_engine = OCREngine() + # 重新获取可用语言 + ocr_engine.available_languages = ocr_engine.get_available_languages() + + # 保存当前选择的语言 + current_lang = self.language_combo.currentData() + + # 清空并重新填充语言列表 + self.language_combo.clear() + + # 常用语言列表 + common_langs = ['eng', 'chi_sim', 'chi_tra', 'jpn', 'kor'] + + # 首先添加常用语言 + if ocr_engine.available_languages: + # 添加常用语言组 + common_available = [lang for lang in common_langs if lang in ocr_engine.available_languages] + if common_available: + self.language_combo.addItem("--- 常用语言 ---", None) + for lang_code in common_available: + lang_name = ocr_engine.get_language_name(lang_code) + self.language_combo.addItem(lang_name, lang_code) + + # 添加其他语言组 + other_available = [lang for lang in ocr_engine.available_languages + if lang not in common_langs] + if other_available: + self.language_combo.addItem("--- 其他语言 ---", None) + # 按名称排序 + other_langs_sorted = sorted( + other_available, + key=lambda x: ocr_engine.get_language_name(x) + ) + for lang_code in other_langs_sorted: + lang_name = ocr_engine.get_language_name(lang_code) + self.language_combo.addItem(lang_name, lang_code) + else: + # 如果没有常用语言,直接添加所有语言 + for lang_code in ocr_engine.available_languages: + lang_name = ocr_engine.get_language_name(lang_code) + self.language_combo.addItem(lang_name, lang_code) + + # 尝试恢复之前选择的语言 + index = self.language_combo.findData(current_lang) + if index >= 0: + self.language_combo.setCurrentIndex(index) + + QMessageBox.information(self, "刷新完成", f"已刷新语言列表,共找到 {len(ocr_engine.available_languages)} 种语言。") + + def download_language_pack(self): + """下载Tesseract语言包 - 已移除""" + pass diff --git a/ocrmypdf-gui/src/gui/widgets/__init__.py b/src/gui/widgets/__init__.py similarity index 100% rename from ocrmypdf-gui/src/gui/widgets/__init__.py rename to src/gui/widgets/__init__.py diff --git a/ocrmypdf-gui/src/main.py b/src/main.py similarity index 100% rename from ocrmypdf-gui/src/main.py rename to src/main.py diff --git a/ocrmypdf-gui/src/utils/__init__.py b/src/utils/__init__.py similarity index 100% rename from ocrmypdf-gui/src/utils/__init__.py rename to src/utils/__init__.py diff --git a/ocrmypdf-gui/src/utils/__pycache__/__init__.cpython-39.pyc b/src/utils/__pycache__/__init__.cpython-39.pyc similarity index 100% rename from ocrmypdf-gui/src/utils/__pycache__/__init__.cpython-39.pyc rename to src/utils/__pycache__/__init__.cpython-39.pyc diff --git a/ocrmypdf-gui/src/utils/__pycache__/file_utils.cpython-39.pyc b/src/utils/__pycache__/file_utils.cpython-39.pyc similarity index 100% rename from ocrmypdf-gui/src/utils/__pycache__/file_utils.cpython-39.pyc rename to src/utils/__pycache__/file_utils.cpython-39.pyc diff --git a/ocrmypdf-gui/src/utils/file_utils.py b/src/utils/file_utils.py similarity index 100% rename from ocrmypdf-gui/src/utils/file_utils.py rename to src/utils/file_utils.py diff --git a/ocrmypdf-gui/src/utils/log_utils.py b/src/utils/log_utils.py similarity index 100% rename from ocrmypdf-gui/src/utils/log_utils.py rename to src/utils/log_utils.py