fix: 优化数据
This commit is contained in:
42
crawler/panel_schema.py
Normal file
42
crawler/panel_schema.py
Normal file
@@ -0,0 +1,42 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
前端面板完整数据 schema,与 DB / situationData / useReplaySituation 对齐
|
||||
爬虫 + AI 清洗后的数据必须符合此 schema 才能正确更新前端
|
||||
"""
|
||||
from typing import Any, Dict, List, Literal, Optional, Tuple
|
||||
|
||||
# 事件脉络
|
||||
SITUATION_UPDATE_CATEGORIES = ("deployment", "alert", "intel", "diplomatic", "other")
|
||||
SITUATION_UPDATE_SEVERITIES = ("low", "medium", "high", "critical")
|
||||
SUMMARY_MAX_LEN = 120
|
||||
|
||||
# 战损
|
||||
CombatLossesRow = Dict[str, Any] # bases_destroyed, bases_damaged, personnel_killed, ...
|
||||
|
||||
# 时间序列(回放用)
|
||||
TimeSeriesPoint = Tuple[str, int] # (ISO time, value)
|
||||
|
||||
# AI 可从新闻中提取的字段
|
||||
EXTRACTABLE_FIELDS = {
|
||||
"situation_update": ["summary", "category", "severity", "timestamp"],
|
||||
"combat_losses": ["personnel_killed", "personnel_wounded", "civilian_killed", "civilian_wounded", "bases_destroyed", "bases_damaged", "aircraft", "warships", "armor", "vehicles"],
|
||||
"retaliation": ["value"], # 0-100
|
||||
"wall_street_trend": ["time", "value"], # 0-100
|
||||
"conflict_stats": ["estimated_casualties", "estimated_strike_count"],
|
||||
}
|
||||
|
||||
|
||||
def validate_category(cat: str) -> str:
|
||||
return cat if cat in SITUATION_UPDATE_CATEGORIES else "other"
|
||||
|
||||
|
||||
def validate_severity(sev: str) -> str:
|
||||
return sev if sev in SITUATION_UPDATE_SEVERITIES else "medium"
|
||||
|
||||
|
||||
def validate_summary(s: str, max_len: int = SUMMARY_MAX_LEN) -> str:
|
||||
import re
|
||||
if not s or not isinstance(s, str):
|
||||
return ""
|
||||
t = re.sub(r"\s+", " ", str(s).strip())[:max_len]
|
||||
return re.sub(r"[\x00-\x1f]", "", t).rstrip()
|
||||
Reference in New Issue
Block a user