fix: 优化数据
This commit is contained in:
101
crawler/parser_ai.py
Normal file
101
crawler/parser_ai.py
Normal file
@@ -0,0 +1,101 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
AI 新闻分类与严重度判定
|
||||
优先使用 Ollama 本地模型(免费),失败则回退到规则
|
||||
设置 PARSER_AI_DISABLED=1 可只用规则(更快)
|
||||
"""
|
||||
import os
|
||||
from typing import Literal, Optional, Tuple
|
||||
|
||||
Category = Literal["deployment", "alert", "intel", "diplomatic", "other"]
|
||||
Severity = Literal["low", "medium", "high", "critical"]
|
||||
|
||||
PARSER_AI_DISABLED = os.environ.get("PARSER_AI_DISABLED", "0") == "1"
|
||||
OLLAMA_MODEL = os.environ.get("OLLAMA_MODEL", "llama3.1") # 或 qwen2.5:7b
|
||||
|
||||
_CATEGORIES = ("deployment", "alert", "intel", "diplomatic", "other")
|
||||
_SEVERITIES = ("low", "medium", "high", "critical")
|
||||
|
||||
|
||||
def _parse_ai_response(text: str) -> Tuple[Category, Severity]:
|
||||
"""从 AI 回复解析 category:severity"""
|
||||
t = (text or "").strip().lower()
|
||||
cat, sev = "other", "low"
|
||||
for c in _CATEGORIES:
|
||||
if c in t:
|
||||
cat = c
|
||||
break
|
||||
for s in _SEVERITIES:
|
||||
if s in t:
|
||||
sev = s
|
||||
break
|
||||
return cat, sev # type: ignore
|
||||
|
||||
|
||||
def _call_ollama(text: str, timeout: int = 5) -> Optional[Tuple[Category, Severity]]:
|
||||
"""调用 Ollama 本地模型。需先运行 ollama run llama3.1 或 qwen2.5:7b"""
|
||||
if PARSER_AI_DISABLED:
|
||||
return None
|
||||
try:
|
||||
import requests
|
||||
prompt = f"""Classify this news about US-Iran/middle east (one line only):
|
||||
- category: deployment|alert|intel|diplomatic|other
|
||||
- severity: low|medium|high|critical
|
||||
|
||||
News: {text[:300]}
|
||||
|
||||
Reply format: category:severity (e.g. alert:high)"""
|
||||
r = requests.post(
|
||||
"http://localhost:11434/api/chat",
|
||||
json={
|
||||
"model": OLLAMA_MODEL,
|
||||
"messages": [{"role": "user", "content": prompt}],
|
||||
"stream": False,
|
||||
"options": {"num_predict": 32},
|
||||
},
|
||||
timeout=timeout,
|
||||
)
|
||||
if r.status_code != 200:
|
||||
return None
|
||||
out = r.json().get("message", {}).get("content", "")
|
||||
return _parse_ai_response(out)
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
def _rule_classify(text: str) -> Category:
|
||||
from parser import classify
|
||||
return classify(text)
|
||||
|
||||
|
||||
def _rule_severity(text: str, category: Category) -> Severity:
|
||||
from parser import severity
|
||||
return severity(text, category)
|
||||
|
||||
|
||||
def classify(text: str) -> Category:
|
||||
"""分类。AI 失败时回退规则"""
|
||||
res = _call_ollama(text)
|
||||
if res:
|
||||
return res[0]
|
||||
return _rule_classify(text)
|
||||
|
||||
|
||||
def severity(text: str, category: Category) -> Severity:
|
||||
"""严重度。AI 失败时回退规则"""
|
||||
res = _call_ollama(text)
|
||||
if res:
|
||||
return res[1]
|
||||
return _rule_severity(text, category)
|
||||
|
||||
|
||||
def classify_and_severity(text: str) -> Tuple[Category, Severity]:
|
||||
"""一次调用返回分类和严重度(减少 AI 调用)"""
|
||||
if PARSER_AI_DISABLED:
|
||||
from parser import classify, severity
|
||||
c = classify(text)
|
||||
return c, severity(text, c)
|
||||
res = _call_ollama(text)
|
||||
if res:
|
||||
return res
|
||||
return _rule_classify(text), _rule_severity(text, _rule_classify(text))
|
||||
Reference in New Issue
Block a user