You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

124 lines
3.3 KiB

import os
import shutil
from pathlib import Path
import logging
class FileUtils:
"""文件工具类,提供文件操作相关的功能"""
@staticmethod
def ensure_dir(dir_path):
"""
确保目录存在,如果不存在则创建
Args:
dir_path: 目录路径
Returns:
bool: 操作是否成功
"""
try:
Path(dir_path).mkdir(parents=True, exist_ok=True)
return True
except Exception as e:
logging.error(f"创建目录失败: {e}")
return False
@staticmethod
def is_valid_pdf(file_path):
"""
检查文件是否是有效的PDF文件
Args:
file_path: 文件路径
Returns:
bool: 是否是有效的PDF文件
"""
if not Path(file_path).exists():
return False
# 简单检查文件扩展名
if not str(file_path).lower().endswith('.pdf'):
return False
# 检查文件头部是否包含PDF标识
try:
with open(file_path, 'rb') as f:
header = f.read(5)
return header == b'%PDF-'
except Exception:
return False
@staticmethod
def get_pdf_files_in_dir(dir_path, recursive=False):
"""
获取目录中的所有PDF文件
Args:
dir_path: 目录路径
recursive: 是否递归搜索子目录
Returns:
list: PDF文件路径列表
"""
pdf_files = []
dir_path = Path(dir_path)
if not dir_path.exists() or not dir_path.is_dir():
return pdf_files
if recursive:
for root, _, files in os.walk(dir_path):
for file in files:
file_path = Path(root) / file
if FileUtils.is_valid_pdf(file_path):
pdf_files.append(str(file_path))
else:
for file in dir_path.iterdir():
if file.is_file() and FileUtils.is_valid_pdf(file):
pdf_files.append(str(file))
return pdf_files
@staticmethod
def get_file_size_str(file_path):
"""
获取文件大小的字符串表示
Args:
file_path: 文件路径
Returns:
str: 文件大小字符串,如 "1.2 MB"
"""
try:
size = Path(file_path).stat().st_size
for unit in ['B', 'KB', 'MB', 'GB', 'TB']:
if size < 1024.0:
return f"{size:.1f} {unit}"
size /= 1024.0
return f"{size:.1f} PB"
except Exception:
return "未知大小"
@staticmethod
def copy_file(src, dst):
"""
复制文件
Args:
src: 源文件路径
dst: 目标文件路径
Returns:
bool: 操作是否成功
"""
try:
shutil.copy2(src, dst)
return True
except Exception as e:
logging.error(f"复制文件失败: {e}")
return False