Files
usa/crawler/parser.py
2026-03-02 01:00:04 +08:00

53 lines
1.6 KiB
Python

# -*- coding: utf-8 -*-
"""新闻分类与严重度判定"""
import re
from typing import Literal
Category = Literal["deployment", "alert", "intel", "diplomatic", "other"]
Severity = Literal["low", "medium", "high", "critical"]
# 分类关键词
CAT_DEPLOYMENT = ["deploy", "carrier", "航母", "military build", "troop", "forces"]
CAT_ALERT = ["strike", "attack", "fire", "blast", "hit", "爆炸", "袭击", "打击"]
CAT_INTEL = ["satellite", "intel", "image", "surveillance", "卫星", "情报"]
CAT_DIPLOMATIC = ["talk", "negotiation", "diplomat", "sanction", "谈判", "制裁"]
def _match(text: str, words: list[str]) -> bool:
t = (text or "").lower()
for w in words:
if w.lower() in t:
return True
return False
def classify(text: str) -> Category:
if _match(text, CAT_ALERT):
return "alert"
if _match(text, CAT_DEPLOYMENT):
return "deployment"
if _match(text, CAT_INTEL):
return "intel"
if _match(text, CAT_DIPLOMATIC):
return "diplomatic"
return "other"
def severity(text: str, category: Category) -> Severity:
t = (text or "").lower()
critical = [
"nuclear", "", "strike", "attack", "killed", "dead", "casualty",
"war", "invasion", "袭击", "打击", "死亡",
]
high = [
"missile", "drone", "bomb", "explosion", "blasted", "fire",
"导弹", "无人机", "爆炸", "轰炸",
]
if _match(t, critical):
return "critical"
if _match(t, high) or category == "alert":
return "high"
if category == "deployment":
return "medium"
return "low"