fix: 优化数据

2026-03-02 11:28:13 +08:00
parent 4a8fff5a00
commit 004d10b283
39 changed files with 1106 additions and 56 deletions
--- a/crawler/parser_ai.py
+++ b/crawler/parser_ai.py
@@ -0,0 +1,101 @@
+# -*- coding: utf-8 -*-
+"""
+AI 新闻分类与严重度判定
+优先使用 Ollama 本地模型（免费），失败则回退到规则
+设置 PARSER_AI_DISABLED=1 可只用规则（更快）
+"""
+import os
+from typing import Literal, Optional, Tuple
+
+Category = Literal["deployment", "alert", "intel", "diplomatic", "other"]
+Severity = Literal["low", "medium", "high", "critical"]
+
+PARSER_AI_DISABLED = os.environ.get("PARSER_AI_DISABLED", "0") == "1"
+OLLAMA_MODEL = os.environ.get("OLLAMA_MODEL", "llama3.1")  # 或 qwen2.5:7b
+
+_CATEGORIES = ("deployment", "alert", "intel", "diplomatic", "other")
+_SEVERITIES = ("low", "medium", "high", "critical")
+
+
+def _parse_ai_response(text: str) -> Tuple[Category, Severity]:
+    """从 AI 回复解析 category:severity"""
+    t = (text or "").strip().lower()
+    cat, sev = "other", "low"
+    for c in _CATEGORIES:
+        if c in t:
+            cat = c
+            break
+    for s in _SEVERITIES:
+        if s in t:
+            sev = s
+            break
+    return cat, sev  # type: ignore
+
+
+def _call_ollama(text: str, timeout: int = 5) -> Optional[Tuple[Category, Severity]]:
+    """调用 Ollama 本地模型。需先运行 ollama run llama3.1 或 qwen2.5:7b"""
+    if PARSER_AI_DISABLED:
+        return None
+    try:
+        import requests
+        prompt = f"""Classify this news about US-Iran/middle east (one line only):
+- category: deployment|alert|intel|diplomatic|other
+- severity: low|medium|high|critical
+
+News: {text[:300]}
+
+Reply format: category:severity (e.g. alert:high)"""
+        r = requests.post(
+            "http://localhost:11434/api/chat",
+            json={
+                "model": OLLAMA_MODEL,
+                "messages": [{"role": "user", "content": prompt}],
+                "stream": False,
+                "options": {"num_predict": 32},
+            },
+            timeout=timeout,
+        )
+        if r.status_code != 200:
+            return None
+        out = r.json().get("message", {}).get("content", "")
+        return _parse_ai_response(out)
+    except Exception:
+        return None
+
+
+def _rule_classify(text: str) -> Category:
+    from parser import classify
+    return classify(text)
+
+
+def _rule_severity(text: str, category: Category) -> Severity:
+    from parser import severity
+    return severity(text, category)
+
+
+def classify(text: str) -> Category:
+    """分类。AI 失败时回退规则"""
+    res = _call_ollama(text)
+    if res:
+        return res[0]
+    return _rule_classify(text)
+
+
+def severity(text: str, category: Category) -> Severity:
+    """严重度。AI 失败时回退规则"""
+    res = _call_ollama(text)
+    if res:
+        return res[1]
+    return _rule_severity(text, category)
+
+
+def classify_and_severity(text: str) -> Tuple[Category, Severity]:
+    """一次调用返回分类和严重度（减少 AI 调用）"""
+    if PARSER_AI_DISABLED:
+        from parser import classify, severity
+        c = classify(text)
+        return c, severity(text, c)
+    res = _call_ollama(text)
+    if res:
+        return res
+    return _rule_classify(text), _rule_severity(text, _rule_classify(text))