RealTime_ScreenScanning_Tra.../src/screen-ocr-1.0.0/ocr_app/utils.py

import hashlib
import requests
import time
import hmac
import json
import mss
import io
from PIL import Image, ImageDraw, ImageFont
from paddleocr import PaddleOCR
import base64


def take_screenshot():
    with mss.mss() as sct:
        # 捕获整个屏幕
        screenshot = sct.shot(output="result.jpg")

def sign_request_tencent(secret_id, secret_key, method, endpoint, uri, params):
    SERVICE = 'tmt'
    timestamp = int(time.time())
    date = time.strftime('%Y-%m-%d', time.gmtime(timestamp))

    # 1. Build Canonical Request String
    http_request_method = method
    canonical_uri = uri
    canonical_querystring = ''
    canonical_headers = f'content-type:application/json\nhost:{endpoint}\n'
    signed_headers = 'content-type;host'
    payload_hash = hashlib.sha256(json.dumps(params).encode('utf-8')).hexdigest()
    canonical_request = (http_request_method + '\n' +
                         canonical_uri + '\n' +
                         canonical_querystring + '\n' +
                         canonical_headers + '\n' +
                         signed_headers + '\n' +
                         payload_hash)

    # 2. Build String to Sign
    algorithm = 'TC3-HMAC-SHA256'
    credential_scope = f"{date}/{SERVICE}/tc3_request"
    string_to_sign = (algorithm + '\n' +
                      str(timestamp) + '\n' +
                      credential_scope + '\n' +
                      hashlib.sha256(canonical_request.encode('utf-8')).hexdigest())

    # 3. Sign String
    def sign(key, msg):
        return hmac.new(key, msg.encode('utf-8'), hashlib.sha256).digest()

    secret_date = sign(('TC3' + secret_key).encode('utf-8'), date)
    secret_service = sign(secret_date, SERVICE)
    secret_signing = sign(secret_service, 'tc3_request')
    signature = hmac.new(secret_signing, string_to_sign.encode('utf-8'), hashlib.sha256).hexdigest()

    # 4. Build Authorization Header
    authorization = (f"{algorithm} "
                     f"Credential={secret_id}/{credential_scope}, "
                     f"SignedHeaders={signed_headers}, "
                     f"Signature={signature}")

    return authorization, timestamp

# 定义百度翻译函数
def translate_to_chinese_baidu(text):
    APP_ID = '20240909002145465'
    SECRET_KEY = 'JSYATnaiL1qi8NRfcpHj'
    url = "http://api.fanyi.baidu.com/api/trans/vip/translate"
    salt = str(time.time())
    sign = hashlib.md5((APP_ID + text + salt + SECRET_KEY).encode('utf-8')).hexdigest()
    params = {
        'q': text,
        'from': 'en',
        'to': 'zh',
        'appid': APP_ID,
        'salt': salt,
        'sign': sign
    }
    response = requests.get(url, params=params)
    result = response.json()

    # 添加错误处理和日志记录
    if 'trans_result' in result:
        return result['trans_result'][0]['dst']
    else:
        # 打印错误信息和完整的API响应
        print(f"翻译API响应错误: {result}")
        return text  # 如果翻译失败，返回原文


def translate_to_chinese_tencent(text):
    SECRET_ID = 'AKIDELOFsGROg9B0hieOuCH9nCQnwrZ5NRJy'
    SECRET_KEY = 'Zx87sTs50iDoOpZ6RXP4UjqlV5TdbO0R'
    REGION = 'ap-beijing'
    ENDPOINT = 'tmt.tencentcloudapi.com'
    VERSION = '2018-03-21'
    ACTION = 'TextTranslate'
    params = {
        "SourceText": text,
        "Source": "en",
        "Target": "zh",
        "ProjectId": 0
    }

    method = 'POST'
    uri = '/'
    authorization, timestamp = sign_request_tencent(SECRET_ID, SECRET_KEY, method, ENDPOINT, uri, params)

    headers = {
        'Content-Type': 'application/json',
        'Host': ENDPOINT,
        'X-TC-Action': ACTION,
        'X-TC-Timestamp': str(timestamp),
        'X-TC-Version': VERSION,
        'X-TC-Region': REGION,
        'Authorization': authorization
    }

    response = requests.post(f'https://{ENDPOINT}{uri}', headers=headers, data=json.dumps(params))
    result = response.json()

    if 'Response' in result and 'TargetText' in result['Response']:
        return result['Response']['TargetText']
    else:
        print(f"翻译API响应错误: {result}")
        return text  # 如果翻译失败，返回原文


def translate_text(mode):
    ocr = PaddleOCR(use_angle_cls=True, lang="en", ocr_version="PP-OCRv4")  # need to run only once to download and load model into memory
    img_path = './result.jpg'
    slice = {'horizontal_stride': 300, 'vertical_stride': 500, 'merge_x_thres': 50, 'merge_y_thres': 35}
    # 加载图像
    image = Image.open(img_path).convert('RGB')
    draw = ImageDraw.Draw(image)
    results = ocr.ocr(img_path, cls=True)
    # 处理并绘制结果
    for res in results:
        for line in res:
            box = [tuple(point) for point in line[0]]
            # 找出边界框
            box = [(min(point[0] for point in box), min(point[1] for point in box)),
                   (max(point[0] for point in box), max(point[1] for point in box))]
            txt = line[1][0]
            height = box[1][1] - box[0][1]
            font = ImageFont.truetype("./simfang.ttf", size=int(height))  # 根据需要调整大小
            print(txt)
            if mode == "baidu":
                translated_text = translate_to_chinese_baidu(txt)
                draw.rectangle(box, outline="white", width=height, fill="white")  # 绘制矩形
                draw.text((box[0][0], box[0][1]), translated_text, fill="black", font=font)  # 在矩形上方绘制文本
                # time.sleep(0.6)
            elif mode == "tencent":
                translated_text = translate_to_chinese_tencent(txt)
                draw.rectangle(box, outline="white", width=1, fill="white")  # 绘制矩形
                draw.text((box[0][0], box[0][1]), translated_text, fill="black", font=font)  # 在矩形上方绘制文本
                # time.sleep(0.6)
            else:
                print("当前不支持该种翻译模式")
    image.save(img_path, 'JPEG')
    return image

# 音频识别翻译

def get_access_token():
    API_KEY = "DlBJVQvNy3pC0v04bGNoqK9r"
    SECRET_KEY = "ssHRyWUQ4bzK6Yj65D3ZYU0uU5w3X8RB"
    url = "https://aip.baidubce.com/oauth/2.0/token"
    params = {"grant_type": "client_credentials", "client_id": API_KEY, "client_secret": SECRET_KEY}
    response = requests.post(url, params=params)
    return response.json().get("access_token")

def translate_audio(audio_base64):
    token = get_access_token()
    url = f"https://aip.baidubce.com/rpc/2.0/mt/v2/speech-translation?access_token={token}"
    payload = json.dumps({
        "from": "en",
        "to": "zh",
        "format": "wav",
        "voice": audio_base64
    })
    headers = {'Content-Type': 'application/json'}
    response = requests.post(url, headers=headers, data=payload)
    return response.json()

def process_audio_for_translation(audio_base64):
    result = translate_audio(audio_base64)
    if 'error_code' in result:
        print("Error:", result['error_msg'])
    else:
        result = result['result']['target']
    return result