# -*- coding: utf-8 -*- """英译中,入库前统一翻译""" import os import re from typing import Optional def _is_mostly_chinese(text: str) -> bool: if not text or len(text.strip()) < 2: return False chinese = len(re.findall(r"[\u4e00-\u9fff]", text)) return chinese / max(len(text), 1) > 0.3 def translate_to_chinese(text: str) -> str: """将文本翻译成中文,失败或已是中文则返回原文。 说明: - 默认关闭外部翻译(deep_translator),直接返回原文,避免因网络或代理问题阻塞整条流水线。 - 如需开启翻译,可显式设置环境变量 TRANSLATE_DISABLED=0。 """ if not text or not text.strip(): return text # 默认禁用翻译:TRANSLATE_DISABLED 未设置时视为开启(值为 "1") if os.environ.get("TRANSLATE_DISABLED", "1") == "1": return text s = str(text).strip() if len(s) > 2000: s = s[:2000] if _is_mostly_chinese(s): return text for translator in ["google", "mymemory"]: try: if translator == "google": from deep_translator import GoogleTranslator out = GoogleTranslator(source="auto", target="zh-CN").translate(s) else: from deep_translator import MyMemoryTranslator out = MyMemoryTranslator(source="auto", target="zh-CN").translate(s) if out and out.strip() and out != s: return out except Exception: continue return text