fix: 优化数据

This commit is contained in:
Daniel
2026-03-02 11:28:13 +08:00
parent 4a8fff5a00
commit 004d10b283
39 changed files with 1106 additions and 56 deletions

42
crawler/panel_schema.py Normal file
View File

@@ -0,0 +1,42 @@
# -*- coding: utf-8 -*-
"""
前端面板完整数据 schema与 DB / situationData / useReplaySituation 对齐
爬虫 + AI 清洗后的数据必须符合此 schema 才能正确更新前端
"""
from typing import Any, Dict, List, Literal, Optional, Tuple
# 事件脉络
SITUATION_UPDATE_CATEGORIES = ("deployment", "alert", "intel", "diplomatic", "other")
SITUATION_UPDATE_SEVERITIES = ("low", "medium", "high", "critical")
SUMMARY_MAX_LEN = 120
# 战损
CombatLossesRow = Dict[str, Any] # bases_destroyed, bases_damaged, personnel_killed, ...
# 时间序列(回放用)
TimeSeriesPoint = Tuple[str, int] # (ISO time, value)
# AI 可从新闻中提取的字段
EXTRACTABLE_FIELDS = {
"situation_update": ["summary", "category", "severity", "timestamp"],
"combat_losses": ["personnel_killed", "personnel_wounded", "civilian_killed", "civilian_wounded", "bases_destroyed", "bases_damaged", "aircraft", "warships", "armor", "vehicles"],
"retaliation": ["value"], # 0-100
"wall_street_trend": ["time", "value"], # 0-100
"conflict_stats": ["estimated_casualties", "estimated_strike_count"],
}
def validate_category(cat: str) -> str:
return cat if cat in SITUATION_UPDATE_CATEGORIES else "other"
def validate_severity(sev: str) -> str:
return sev if sev in SITUATION_UPDATE_SEVERITIES else "medium"
def validate_summary(s: str, max_len: int = SUMMARY_MAX_LEN) -> str:
import re
if not s or not isinstance(s, str):
return ""
t = re.sub(r"\s+", " ", str(s).strip())[:max_len]
return re.sub(r"[\x00-\x1f]", "", t).rstrip()