import requests import torch from PIL import Image from transformers import pipeline from paddleocr import PaddleOCR from transformers import BlipProcessor, BlipForConditionalGeneration class translate_models: @staticmethod def translate_models(file_path, flag, pairs): # pairs = 1 # 默认为英转中 # flag = 2 # flag:判断是进行文字识别还是图片理解 1:文字识别 2:图片理解 # image_path_identify = 'D:\\py\\pythonProject\\image\\00056221.jpg' # 用来文字识别的图片所在的文件路径 # image_path_understand = r"C:\Users\16173\Desktop\ccf_cv\blue-throat-7077261__340.jpg" # 用来进行图片理解的文件路径 """事实证明除中文外,英语转成其他语言更难""" language_pairs = { "en-zh": "Helsinki-NLP/opus-mt-en-zh", # 1:英语到中文 "zh-en": "Helsinki-NLP/opus-mt-zh-en", # 2:中文到英语 "en-fr": "Helsinki-NLP/opus-mt-en-fr", # 3:英语到法语 "fr-en": "Helsinki-NLP/opus-mt-fr-en", # 4:法语到英语 "en-es": "Helsinki-NLP/opus-mt-en-es", # 5:英语到西班牙语 "es-en": "Helsinki-NLP/opus-mt-es-en", # 6:西班牙语到英语 "en-de": "Helsinki-NLP/opus-mt-en-de", # 7:英语到德语 "de-en": "Helsinki-NLP/opus-mt-de-en", # 8:德语到英语 "en-ru": "Helsinki-NLP/opus-mt-en-ru", # 9:英语到俄语 "ru-en": "Helsinki-NLP/opus-mt-ru-en", # 10:俄语到英语 "zh-fr": "Helsinki-NLP/opus-mt-zh-fr", # 11:中文到法语 "fr-zh": "Helsinki-NLP/opus-mt-fr-zh", # 12:法语到中文 } if flag == 1: image_path_identify = file_path# 用来文字识别的图片所在的文件路径 if torch.cuda.is_available(): # 使用 GPU ocr = PaddleOCR(use_angle_cls=True, lang='ch', use_gpu=True) print("使用 GPU 进行 OCR") else: # 使用 CPU ocr = PaddleOCR(use_angle_cls=True, lang='ch', use_gpu=False) print("使用 CPU 进行 OCR") flag = 0 result = ocr.ocr(image_path_identify, cls=True) # 提取纯文字内容 extracted_text = [res[1][0] for res in result[0]] """-----------------------------------------这是文字识别的输出--------------------------------------------------""" print("文字识别结果如下:") for text in extracted_text: print(text) if flag == 2: # 图片理解模块 image_path_understand = file_path # 用来进行图片理解的文件路径 processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base") model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base") image_path = image_path_understand # 本地图片路径 image = Image.open(image_path) # 处理图片 inputs = processor(images=image, return_tensors="pt") output = model.generate(**inputs) # 解码生成的描述 description = processor.decode(output[0], skip_special_tokens=True) print(description) if pairs == 1: language_pair = "en-zh" if pairs == 2: language_pair = "zh-en" if pairs == 3: language_pair = "en-fr" if pairs == 4: language_pair = "fr-en" if pairs == 5: language_pair = "en-es" if pairs == 6: language_pair = "es-en" if pairs == 7: language_pair = "en-de" if pairs == 8: language_pair = "de-en" if pairs == 9: language_pair = "en-ru" if pairs == 10: language_pair = "ru-en" if pairs == 11: language_pair = "zh-fr" if pairs == 12: language_pair = "fr-zh" translator = pipeline("translation", model=language_pairs[language_pair]) # 翻译文本 result = translator(description, max_length=40) translation_text = result[0]['translation_text'] # 要把这里的translation_text传给前端///// print(translation_text) return translation_text