Files
usa/crawler/panel_schema.py
2026-03-02 11:28:13 +08:00

43 lines
1.5 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
# -*- coding: utf-8 -*-
"""
前端面板完整数据 schema与 DB / situationData / useReplaySituation 对齐
爬虫 + AI 清洗后的数据必须符合此 schema 才能正确更新前端
"""
from typing import Any, Dict, List, Literal, Optional, Tuple
# 事件脉络
SITUATION_UPDATE_CATEGORIES = ("deployment", "alert", "intel", "diplomatic", "other")
SITUATION_UPDATE_SEVERITIES = ("low", "medium", "high", "critical")
SUMMARY_MAX_LEN = 120
# 战损
CombatLossesRow = Dict[str, Any] # bases_destroyed, bases_damaged, personnel_killed, ...
# 时间序列(回放用)
TimeSeriesPoint = Tuple[str, int] # (ISO time, value)
# AI 可从新闻中提取的字段
EXTRACTABLE_FIELDS = {
"situation_update": ["summary", "category", "severity", "timestamp"],
"combat_losses": ["personnel_killed", "personnel_wounded", "civilian_killed", "civilian_wounded", "bases_destroyed", "bases_damaged", "aircraft", "warships", "armor", "vehicles"],
"retaliation": ["value"], # 0-100
"wall_street_trend": ["time", "value"], # 0-100
"conflict_stats": ["estimated_casualties", "estimated_strike_count"],
}
def validate_category(cat: str) -> str:
return cat if cat in SITUATION_UPDATE_CATEGORIES else "other"
def validate_severity(sev: str) -> str:
return sev if sev in SITUATION_UPDATE_SEVERITIES else "medium"
def validate_summary(s: str, max_len: int = SUMMARY_MAX_LEN) -> str:
import re
if not s or not isinstance(s, str):
return ""
t = re.sub(r"\s+", " ", str(s).strip())[:max_len]
return re.sub(r"[\x00-\x1f]", "", t).rstrip()