fix: 优化虫 机制,新增伊朗支援

This commit is contained in:
Daniel
2026-03-06 10:34:52 +08:00
parent 89145a6743
commit 9f2442f2e3
20 changed files with 411 additions and 62 deletions

View File

@@ -26,6 +26,12 @@ MAX_DELTA_PER_MERGE = {
"civilian_ships": 20, "airport_port": 10,
}
# 反击情绪 / 华尔街:合理区间,避免爬虫单条提取 0 或 100 导致指标归零或打满
RETALIATION_SMOOTH_WEIGHT = 0.6 # 当前值权重1 - 此值为新值权重,使更新平滑
RETALIATION_HISTORY_MAX_ROWS = 300 # 反击历史条数上限,供前端曲线与回放使用
WALL_STREET_TREND_MAX_ROWS = 200 # 趋势表保留最近条数,避免无限增长
VALUE_CLAMP_MIN, VALUE_CLAMP_MAX = 1, 99 # 0/100 视为异常,写入前夹在 [1,99]
def _clamp_delta(key: str, value: int) -> int:
"""单次增量上限,避免误提「累计」导致波动"""
@@ -200,38 +206,69 @@ def merge(extracted: Dict[str, Any], db_path: Optional[str] = None) -> bool:
updated = True
except Exception:
pass
# retaliation
# retaliation:平滑更新,避免单条新闻 0/100 导致指标归零或打满
if "retaliation" in extracted:
r = extracted["retaliation"]
conn.execute("INSERT OR REPLACE INTO retaliation_current (id, value) VALUES (1, ?)", (r["value"],))
conn.execute("INSERT INTO retaliation_history (time, value) VALUES (?, ?)", (r["time"], r["value"]))
raw = max(VALUE_CLAMP_MIN, min(VALUE_CLAMP_MAX, int(r.get("value", 50))))
row = conn.execute("SELECT value FROM retaliation_current WHERE id = 1").fetchone()
current = int(row[0]) if row else 50
current = max(VALUE_CLAMP_MIN, min(VALUE_CLAMP_MAX, current))
new_val = round(
RETALIATION_SMOOTH_WEIGHT * current + (1 - RETALIATION_SMOOTH_WEIGHT) * raw
)
new_val = max(VALUE_CLAMP_MIN, min(VALUE_CLAMP_MAX, new_val))
ts = (r.get("time") or datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%S.000Z"))[:25]
conn.execute("INSERT OR REPLACE INTO retaliation_current (id, value) VALUES (1, ?)", (new_val,))
conn.execute("INSERT INTO retaliation_history (time, value) VALUES (?, ?)", (ts, new_val))
n_ret = conn.execute("SELECT COUNT(*) FROM retaliation_history").fetchone()[0]
if n_ret > RETALIATION_HISTORY_MAX_ROWS:
conn.execute(
"DELETE FROM retaliation_history WHERE id IN (SELECT id FROM retaliation_history ORDER BY time ASC LIMIT ?)",
(n_ret - RETALIATION_HISTORY_MAX_ROWS,),
)
updated = True
# wall_street_trend
# wall_street_trend:限幅后写入,并保留最近 N 条避免表无限增长
if "wall_street" in extracted:
w = extracted["wall_street"]
conn.execute("INSERT INTO wall_street_trend (time, value) VALUES (?, ?)", (w["time"], w["value"]))
raw = int(w.get("value", 50))
val = max(VALUE_CLAMP_MIN, min(VALUE_CLAMP_MAX, raw))
ts = (w.get("time") or datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%S.000Z"))[:25]
conn.execute("INSERT INTO wall_street_trend (time, value) VALUES (?, ?)", (ts, val))
n = conn.execute("SELECT COUNT(*) FROM wall_street_trend").fetchone()[0]
if n > WALL_STREET_TREND_MAX_ROWS:
conn.execute(
"DELETE FROM wall_street_trend WHERE id IN (SELECT id FROM wall_street_trend ORDER BY time ASC LIMIT ?)",
(n - WALL_STREET_TREND_MAX_ROWS,),
)
updated = True
# key_location更新双方攻击地点美军基地被打击 side=us伊朗设施被打击 side=iran的 status/damage_level
# key_location更新双方攻击地点美军基地被打击 side=us伊朗设施被打击 side=iran的 status/damage_level/attacked_at
event_time = extracted.get("_event_time") or datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%S.000Z")
if "key_location_updates" in extracted:
try:
for u in extracted["key_location_updates"]:
kw_raw = (u.get("name_keywords") or "").strip()
if not kw_raw:
continue
# 支持 "a|b|c" 或 "a b c" 分隔
kw = [k.strip() for k in kw_raw.replace("|", " ").split() if k.strip()]
side = u.get("side")
status = (u.get("status") or "attacked")[:20]
dmg = u.get("damage_level", 2)
if not kw or side not in ("us", "iran"):
continue
# 简化name LIKE '%kw%' 对每个关键词 OR 连接,支持中英文
attacked_at = (u.get("attacked_at") or event_time)[:25]
conditions = " OR ".join("name LIKE ?" for _ in kw)
params = [status, dmg, side] + [f"%{k}%" for k in kw]
cur = conn.execute(
f"UPDATE key_location SET status=?, damage_level=? WHERE side=? AND ({conditions})",
params,
)
params_with_at = [status, dmg, attacked_at, side] + [f"%{k}%" for k in kw]
try:
cur = conn.execute(
f"UPDATE key_location SET status=?, damage_level=?, attacked_at=? WHERE side=? AND ({conditions})",
params_with_at,
)
except sqlite3.OperationalError:
params_no_at = [status, dmg, side] + [f"%{k}%" for k in kw]
cur = conn.execute(
f"UPDATE key_location SET status=?, damage_level=? WHERE side=? AND ({conditions})",
params_no_at,
)
if cur.rowcount > 0:
updated = True
except Exception: