You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
RealTime_ScreenScanning_Tra.../src/screen-ocr-1.0.0/ocr_app/utils.py

190 lines
6.8 KiB

import hashlib
import requests
import time
import hmac
import json
import mss
import io
from PIL import Image, ImageDraw, ImageFont
from paddleocr import PaddleOCR
import base64
def take_screenshot():
with mss.mss() as sct:
# 捕获整个屏幕
screenshot = sct.shot(output="result.jpg")
def sign_request_tencent(secret_id, secret_key, method, endpoint, uri, params):
SERVICE = 'tmt'
timestamp = int(time.time())
date = time.strftime('%Y-%m-%d', time.gmtime(timestamp))
# 1. Build Canonical Request String
http_request_method = method
canonical_uri = uri
canonical_querystring = ''
canonical_headers = f'content-type:application/json\nhost:{endpoint}\n'
signed_headers = 'content-type;host'
payload_hash = hashlib.sha256(json.dumps(params).encode('utf-8')).hexdigest()
canonical_request = (http_request_method + '\n' +
canonical_uri + '\n' +
canonical_querystring + '\n' +
canonical_headers + '\n' +
signed_headers + '\n' +
payload_hash)
# 2. Build String to Sign
algorithm = 'TC3-HMAC-SHA256'
credential_scope = f"{date}/{SERVICE}/tc3_request"
string_to_sign = (algorithm + '\n' +
str(timestamp) + '\n' +
credential_scope + '\n' +
hashlib.sha256(canonical_request.encode('utf-8')).hexdigest())
# 3. Sign String
def sign(key, msg):
return hmac.new(key, msg.encode('utf-8'), hashlib.sha256).digest()
secret_date = sign(('TC3' + secret_key).encode('utf-8'), date)
secret_service = sign(secret_date, SERVICE)
secret_signing = sign(secret_service, 'tc3_request')
signature = hmac.new(secret_signing, string_to_sign.encode('utf-8'), hashlib.sha256).hexdigest()
# 4. Build Authorization Header
authorization = (f"{algorithm} "
f"Credential={secret_id}/{credential_scope}, "
f"SignedHeaders={signed_headers}, "
f"Signature={signature}")
return authorization, timestamp
# 定义百度翻译函数
def translate_to_chinese_baidu(text):
APP_ID = '20240909002145465'
SECRET_KEY = 'JSYATnaiL1qi8NRfcpHj'
url = "http://api.fanyi.baidu.com/api/trans/vip/translate"
salt = str(time.time())
sign = hashlib.md5((APP_ID + text + salt + SECRET_KEY).encode('utf-8')).hexdigest()
params = {
'q': text,
'from': 'en',
'to': 'zh',
'appid': APP_ID,
'salt': salt,
'sign': sign
}
response = requests.get(url, params=params)
result = response.json()
# 添加错误处理和日志记录
if 'trans_result' in result:
return result['trans_result'][0]['dst']
else:
# 打印错误信息和完整的API响应
print(f"翻译API响应错误: {result}")
return text # 如果翻译失败,返回原文
def translate_to_chinese_tencent(text):
SECRET_ID = 'AKIDELOFsGROg9B0hieOuCH9nCQnwrZ5NRJy'
SECRET_KEY = 'Zx87sTs50iDoOpZ6RXP4UjqlV5TdbO0R'
REGION = 'ap-beijing'
ENDPOINT = 'tmt.tencentcloudapi.com'
VERSION = '2018-03-21'
ACTION = 'TextTranslate'
params = {
"SourceText": text,
"Source": "en",
"Target": "zh",
"ProjectId": 0
}
method = 'POST'
uri = '/'
authorization, timestamp = sign_request_tencent(SECRET_ID, SECRET_KEY, method, ENDPOINT, uri, params)
headers = {
'Content-Type': 'application/json',
'Host': ENDPOINT,
'X-TC-Action': ACTION,
'X-TC-Timestamp': str(timestamp),
'X-TC-Version': VERSION,
'X-TC-Region': REGION,
'Authorization': authorization
}
response = requests.post(f'https://{ENDPOINT}{uri}', headers=headers, data=json.dumps(params))
result = response.json()
if 'Response' in result and 'TargetText' in result['Response']:
return result['Response']['TargetText']
else:
print(f"翻译API响应错误: {result}")
return text # 如果翻译失败,返回原文
def translate_text(mode):
ocr = PaddleOCR(use_angle_cls=True, lang="en", ocr_version="PP-OCRv4") # need to run only once to download and load model into memory
img_path = './result.jpg'
slice = {'horizontal_stride': 300, 'vertical_stride': 500, 'merge_x_thres': 50, 'merge_y_thres': 35}
# 加载图像
image = Image.open(img_path).convert('RGB')
draw = ImageDraw.Draw(image)
results = ocr.ocr(img_path, cls=True)
# 处理并绘制结果
for res in results:
for line in res:
box = [tuple(point) for point in line[0]]
# 找出边界框
box = [(min(point[0] for point in box), min(point[1] for point in box)),
(max(point[0] for point in box), max(point[1] for point in box))]
txt = line[1][0]
height = box[1][1] - box[0][1]
font = ImageFont.truetype("./simfang.ttf", size=int(height)) # 根据需要调整大小
print(txt)
if mode == "baidu":
translated_text = translate_to_chinese_baidu(txt)
draw.rectangle(box, outline="white", width=height, fill="white") # 绘制矩形
draw.text((box[0][0], box[0][1]), translated_text, fill="black", font=font) # 在矩形上方绘制文本
# time.sleep(0.6)
elif mode == "tencent":
translated_text = translate_to_chinese_tencent(txt)
draw.rectangle(box, outline="white", width=1, fill="white") # 绘制矩形
draw.text((box[0][0], box[0][1]), translated_text, fill="black", font=font) # 在矩形上方绘制文本
# time.sleep(0.6)
else:
print("当前不支持该种翻译模式")
image.save(img_path, 'JPEG')
return image
# 音频识别翻译
def get_access_token():
API_KEY = "DlBJVQvNy3pC0v04bGNoqK9r"
SECRET_KEY = "ssHRyWUQ4bzK6Yj65D3ZYU0uU5w3X8RB"
url = "https://aip.baidubce.com/oauth/2.0/token"
params = {"grant_type": "client_credentials", "client_id": API_KEY, "client_secret": SECRET_KEY}
response = requests.post(url, params=params)
return response.json().get("access_token")
def translate_audio(audio_base64):
token = get_access_token()
url = f"https://aip.baidubce.com/rpc/2.0/mt/v2/speech-translation?access_token={token}"
payload = json.dumps({
"from": "en",
"to": "zh",
"format": "wav",
"voice": audio_base64
})
headers = {'Content-Type': 'application/json'}
response = requests.post(url, headers=headers, data=payload)
return response.json()
def process_audio_for_translation(audio_base64):
result = translate_audio(audio_base64)
if 'error_code' in result:
print("Error:", result['error_msg'])
else:
result = result['result']['target']
return result