45 lines
1.5 KiB
Python
45 lines
1.5 KiB
Python
# -*- coding: utf-8 -*-
|
||
"""英译中,入库前统一翻译"""
|
||
import os
|
||
import re
|
||
from typing import Optional
|
||
|
||
|
||
def _is_mostly_chinese(text: str) -> bool:
|
||
if not text or len(text.strip()) < 2:
|
||
return False
|
||
chinese = len(re.findall(r"[\u4e00-\u9fff]", text))
|
||
return chinese / max(len(text), 1) > 0.3
|
||
|
||
|
||
def translate_to_chinese(text: str) -> str:
|
||
"""将文本翻译成中文,失败或已是中文则返回原文。
|
||
|
||
说明:
|
||
- 默认关闭外部翻译(deep_translator),直接返回原文,避免因网络或代理问题阻塞整条流水线。
|
||
- 如需开启翻译,可显式设置环境变量 TRANSLATE_DISABLED=0。
|
||
"""
|
||
if not text or not text.strip():
|
||
return text
|
||
# 默认禁用翻译:TRANSLATE_DISABLED 未设置时视为开启(值为 "1")
|
||
if os.environ.get("TRANSLATE_DISABLED", "1") == "1":
|
||
return text
|
||
s = str(text).strip()
|
||
if len(s) > 2000:
|
||
s = s[:2000]
|
||
if _is_mostly_chinese(s):
|
||
return text
|
||
for translator in ["google", "mymemory"]:
|
||
try:
|
||
if translator == "google":
|
||
from deep_translator import GoogleTranslator
|
||
out = GoogleTranslator(source="auto", target="zh-CN").translate(s)
|
||
else:
|
||
from deep_translator import MyMemoryTranslator
|
||
out = MyMemoryTranslator(source="auto", target="zh-CN").translate(s)
|
||
if out and out.strip() and out != s:
|
||
return out
|
||
except Exception:
|
||
continue
|
||
return text
|