Files
usa/crawler/parser_ai.py
2026-03-03 17:27:55 +08:00

139 lines
4.3 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
# -*- coding: utf-8 -*-
"""
AI 新闻分类与严重度判定
优先 DASHSCOPE_API_KEY通义无需 Ollama否则 Ollama最后规则
设置 PARSER_AI_DISABLED=1 可只用规则(更快)
"""
import os
from typing import Literal, Optional, Tuple
Category = Literal["deployment", "alert", "intel", "diplomatic", "other"]
Severity = Literal["low", "medium", "high", "critical"]
PARSER_AI_DISABLED = os.environ.get("PARSER_AI_DISABLED", "0") == "1"
OLLAMA_MODEL = os.environ.get("OLLAMA_MODEL", "llama3.1")
DASHSCOPE_API_KEY = os.environ.get("DASHSCOPE_API_KEY", "").strip()
_CATEGORIES = ("deployment", "alert", "intel", "diplomatic", "other")
_SEVERITIES = ("low", "medium", "high", "critical")
def _parse_ai_response(text: str) -> Tuple[Category, Severity]:
"""从 AI 回复解析 category:severity"""
t = (text or "").strip().lower()
cat, sev = "other", "low"
for c in _CATEGORIES:
if c in t:
cat = c
break
for s in _SEVERITIES:
if s in t:
sev = s
break
return cat, sev # type: ignore
def _call_dashscope(text: str, timeout: int = 6) -> Optional[Tuple[Category, Severity]]:
"""调用阿里云通义DashScope分类无需 Ollama。需设置 DASHSCOPE_API_KEY"""
if not DASHSCOPE_API_KEY or PARSER_AI_DISABLED:
return None
try:
import dashscope
from http import HTTPStatus
dashscope.api_key = DASHSCOPE_API_KEY
prompt = f"""Classify this news about US-Iran/middle east (one line only):
- category: deployment|alert|intel|diplomatic|other
- severity: low|medium|high|critical
News: {text[:300]}
Reply format: category:severity (e.g. alert:high)"""
r = dashscope.Generation.call(
model="qwen-turbo",
messages=[{"role": "user", "content": prompt}],
result_format="message",
max_tokens=32,
)
if r.status_code != HTTPStatus.OK:
return None
out = r.output.get("choices", [{}])[0].get("message", {}).get("content", "")
return _parse_ai_response(out)
except Exception:
return None
def _call_ollama(text: str, timeout: int = 5) -> Optional[Tuple[Category, Severity]]:
"""调用 Ollama 本地模型。需先运行 ollama run llama3.1"""
if PARSER_AI_DISABLED:
return None
try:
import requests
prompt = f"""Classify this news about US-Iran/middle east (one line only):
- category: deployment|alert|intel|diplomatic|other
- severity: low|medium|high|critical
News: {text[:300]}
Reply format: category:severity (e.g. alert:high)"""
r = requests.post(
"http://localhost:11434/api/chat",
json={
"model": OLLAMA_MODEL,
"messages": [{"role": "user", "content": prompt}],
"stream": False,
"options": {"num_predict": 32},
},
timeout=timeout,
)
if r.status_code != 200:
return None
out = r.json().get("message", {}).get("content", "")
return _parse_ai_response(out)
except Exception:
return None
def _rule_classify(text: str) -> Category:
from parser import classify
return classify(text)
def _rule_severity(text: str, category: Category) -> Severity:
from parser import severity
return severity(text, category)
def _call_ai(text: str) -> Optional[Tuple[Category, Severity]]:
"""优先通义,再 Ollama"""
if DASHSCOPE_API_KEY:
return _call_dashscope(text)
return _call_ollama(text)
def classify(text: str) -> Category:
"""分类。AI 失败时回退规则"""
res = _call_ai(text)
if res:
return res[0]
return _rule_classify(text)
def severity(text: str, category: Category) -> Severity:
"""严重度。AI 失败时回退规则"""
res = _call_ai(text)
if res:
return res[1]
return _rule_severity(text, category)
def classify_and_severity(text: str) -> Tuple[Category, Severity]:
"""一次调用返回分类和严重度(减少 AI 调用)"""
if PARSER_AI_DISABLED:
from parser import classify, severity
c = classify(text)
return c, severity(text, c)
res = _call_ai(text)
if res:
return res
return _rule_classify(text), _rule_severity(text, _rule_classify(text))