# -*- coding: utf-8 -*-
"""英译中，入库前统一翻译"""
import os
import re
from typing import Optional


def _is_mostly_chinese(text: str) -> bool:
    if not text or len(text.strip()) < 2:
        return False
    chinese = len(re.findall(r"[\u4e00-\u9fff]", text))
    return chinese / max(len(text), 1) > 0.3


def translate_to_chinese(text: str) -> str:
    """将文本翻译成中文，失败或已是中文则返回原文。

    说明：
    - 默认关闭外部翻译（deep_translator），直接返回原文，避免因网络或代理问题阻塞整条流水线。
    - 如需开启翻译，可显式设置环境变量 TRANSLATE_DISABLED=0。
    """
    if not text or not text.strip():
        return text
    # 默认禁用翻译：TRANSLATE_DISABLED 未设置时视为开启（值为 "1"）
    if os.environ.get("TRANSLATE_DISABLED", "1") == "1":
        return text
    s = str(text).strip()
    if len(s) > 2000:
        s = s[:2000]
    if _is_mostly_chinese(s):
        return text
    for translator in ["google", "mymemory"]:
        try:
            if translator == "google":
                from deep_translator import GoogleTranslator
                out = GoogleTranslator(source="auto", target="zh-CN").translate(s)
            else:
                from deep_translator import MyMemoryTranslator
                out = MyMemoryTranslator(source="auto", target="zh-CN").translate(s)
            if out and out.strip() and out != s:
                return out
        except Exception:
            continue
    return text