fix:优化数据来源
This commit is contained in:
52
crawler/parser.py
Normal file
52
crawler/parser.py
Normal file
@@ -0,0 +1,52 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""新闻分类与严重度判定"""
|
||||
import re
|
||||
from typing import Literal
|
||||
|
||||
Category = Literal["deployment", "alert", "intel", "diplomatic", "other"]
|
||||
Severity = Literal["low", "medium", "high", "critical"]
|
||||
|
||||
# 分类关键词
|
||||
CAT_DEPLOYMENT = ["deploy", "carrier", "航母", "military build", "troop", "forces"]
|
||||
CAT_ALERT = ["strike", "attack", "fire", "blast", "hit", "爆炸", "袭击", "打击"]
|
||||
CAT_INTEL = ["satellite", "intel", "image", "surveillance", "卫星", "情报"]
|
||||
CAT_DIPLOMATIC = ["talk", "negotiation", "diplomat", "sanction", "谈判", "制裁"]
|
||||
|
||||
|
||||
def _match(text: str, words: list[str]) -> bool:
|
||||
t = (text or "").lower()
|
||||
for w in words:
|
||||
if w.lower() in t:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def classify(text: str) -> Category:
|
||||
if _match(text, CAT_ALERT):
|
||||
return "alert"
|
||||
if _match(text, CAT_DEPLOYMENT):
|
||||
return "deployment"
|
||||
if _match(text, CAT_INTEL):
|
||||
return "intel"
|
||||
if _match(text, CAT_DIPLOMATIC):
|
||||
return "diplomatic"
|
||||
return "other"
|
||||
|
||||
|
||||
def severity(text: str, category: Category) -> Severity:
|
||||
t = (text or "").lower()
|
||||
critical = [
|
||||
"nuclear", "核", "strike", "attack", "killed", "dead", "casualty",
|
||||
"war", "invasion", "袭击", "打击", "死亡",
|
||||
]
|
||||
high = [
|
||||
"missile", "drone", "bomb", "explosion", "blasted", "fire",
|
||||
"导弹", "无人机", "爆炸", "轰炸",
|
||||
]
|
||||
if _match(t, critical):
|
||||
return "critical"
|
||||
if _match(t, high) or category == "alert":
|
||||
return "high"
|
||||
if category == "deployment":
|
||||
return "medium"
|
||||
return "low"
|
||||
Reference in New Issue
Block a user