usa/crawler/parser_ai.py

# -*- coding: utf-8 -*-
"""
AI 新闻分类与严重度判定
优先 DASHSCOPE_API_KEY（通义，无需 Ollama），否则 Ollama，最后规则
设置 PARSER_AI_DISABLED=1 可只用规则（更快）
"""
import os
from typing import Literal, Optional, Tuple

Category = Literal["deployment", "alert", "intel", "diplomatic", "other"]
Severity = Literal["low", "medium", "high", "critical"]

PARSER_AI_DISABLED = os.environ.get("PARSER_AI_DISABLED", "0") == "1"
OLLAMA_MODEL = os.environ.get("OLLAMA_MODEL", "llama3.1")
DASHSCOPE_API_KEY = os.environ.get("DASHSCOPE_API_KEY", "").strip()

_CATEGORIES = ("deployment", "alert", "intel", "diplomatic", "other")
_SEVERITIES = ("low", "medium", "high", "critical")


def _parse_ai_response(text: str) -> Tuple[Category, Severity]:
    """从 AI 回复解析 category:severity"""
    t = (text or "").strip().lower()
    cat, sev = "other", "low"
    for c in _CATEGORIES:
        if c in t:
            cat = c
            break
    for s in _SEVERITIES:
        if s in t:
            sev = s
            break
    return cat, sev  # type: ignore


def _call_dashscope(text: str, timeout: int = 6) -> Optional[Tuple[Category, Severity]]:
    """调用阿里云通义（DashScope）分类，无需 Ollama。需设置 DASHSCOPE_API_KEY"""
    if not DASHSCOPE_API_KEY or PARSER_AI_DISABLED:
        return None
    try:
        import dashscope
        from http import HTTPStatus
        dashscope.api_key = DASHSCOPE_API_KEY
        prompt = f"""Classify this news about US-Iran/middle east (one line only):
- category: deployment|alert|intel|diplomatic|other
- severity: low|medium|high|critical

News: {text[:300]}

Reply format: category:severity (e.g. alert:high)"""
        r = dashscope.Generation.call(
            model="qwen-turbo",
            messages=[{"role": "user", "content": prompt}],
            result_format="message",
            max_tokens=32,
        )
        if r.status_code != HTTPStatus.OK:
            return None
        out = r.output.get("choices", [{}])[0].get("message", {}).get("content", "")
        return _parse_ai_response(out)
    except Exception:
        return None


def _call_ollama(text: str, timeout: int = 5) -> Optional[Tuple[Category, Severity]]:
    """调用 Ollama 本地模型。需先运行 ollama run llama3.1"""
    if PARSER_AI_DISABLED:
        return None
    try:
        import requests
        prompt = f"""Classify this news about US-Iran/middle east (one line only):
- category: deployment|alert|intel|diplomatic|other
- severity: low|medium|high|critical

News: {text[:300]}

Reply format: category:severity (e.g. alert:high)"""
        r = requests.post(
            "http://localhost:11434/api/chat",
            json={
                "model": OLLAMA_MODEL,
                "messages": [{"role": "user", "content": prompt}],
                "stream": False,
                "options": {"num_predict": 32},
            },
            timeout=timeout,
        )
        if r.status_code != 200:
            return None
        out = r.json().get("message", {}).get("content", "")
        return _parse_ai_response(out)
    except Exception:
        return None


def _rule_classify(text: str) -> Category:
    from parser import classify
    return classify(text)


def _rule_severity(text: str, category: Category) -> Severity:
    from parser import severity
    return severity(text, category)


def _call_ai(text: str) -> Optional[Tuple[Category, Severity]]:
    """优先通义，再 Ollama"""
    if DASHSCOPE_API_KEY:
        return _call_dashscope(text)
    return _call_ollama(text)


def classify(text: str) -> Category:
    """分类。AI 失败时回退规则"""
    res = _call_ai(text)
    if res:
        return res[0]
    return _rule_classify(text)


def severity(text: str, category: Category) -> Severity:
    """严重度。AI 失败时回退规则"""
    res = _call_ai(text)
    if res:
        return res[1]
    return _rule_severity(text, category)


def classify_and_severity(text: str) -> Tuple[Category, Severity]:
    """一次调用返回分类和严重度（减少 AI 调用）"""
    if PARSER_AI_DISABLED:
        from parser import classify, severity
        c = classify(text)
        return c, severity(text, c)
    res = _call_ai(text)
    if res:
        return res
    return _rule_classify(text), _rule_severity(text, _rule_classify(text))