fix: 更新数据面板的驱动方式

2026-03-02 23:21:07 +08:00
parent ef60f18cb0
commit 92914e6522
22 changed files with 427 additions and 62 deletions
--- a/crawler/pycache/db_merge.cpython-39.pyc
+++ b/crawler/pycache/db_merge.cpython-39.pyc
--- a/crawler/pycache/extractor_rules.cpython-39.pyc
+++ b/crawler/pycache/extractor_rules.cpython-39.pyc
--- a/crawler/db_merge.py
+++ b/crawler/db_merge.py
@@ -140,18 +140,19 @@ def merge(extracted: Dict[str, Any], db_path: Optional[str] = None) -> bool:
        if "key_location_updates" in extracted:
            try:
                for u in extracted["key_location_updates"]:
-                    kw = (u.get("name_keywords") or "").replace("|", " ").split()
+                    kw_raw = (u.get("name_keywords") or "").strip()
+                    if not kw_raw:
+                        continue
+                    # 支持 "a|b|c" 或 "a b c" 分隔
+                    kw = [k.strip() for k in kw_raw.replace("|", " ").split() if k.strip()]
                    side = u.get("side")
-                    status = u.get("status", "attacked")[:20]
+                    status = (u.get("status") or "attacked")[:20]
                    dmg = u.get("damage_level", 2)
                    if not kw or side not in ("us", "iran"):
                        continue
-                    conditions = " OR ".join(
-                        "(LOWER(name) LIKE ? OR name LIKE ?)" for _ in kw
-                    )
-                    params = [status, dmg, side]
-                    for k in kw:
-                        params.extend([f"%{k}%", f"%{k}%"])
+                    # 简化：name LIKE '%kw%' 对每个关键词 OR 连接，支持中英文
+                    conditions = " OR ".join("name LIKE ?" for _ in kw)
+                    params = [status, dmg, side] + [f"%{k}%" for k in kw]
                    cur = conn.execute(
                        f"UPDATE key_location SET status=?, damage_level=? WHERE side=? AND ({conditions})",
                        params,
--- a/crawler/extractor_ai.py
+++ b/crawler/extractor_ai.py
@@ -30,8 +30,10 @@ def _call_ollama_extract(text: str, timeout: int = 10) -> Optional[Dict[str, Any
 - 战损（仅当新闻明确提及数字时填写，格式 us_XXX / iran_XXX）:
  us_personnel_killed, iran_personnel_killed, us_personnel_wounded, iran_personnel_wounded,
  us_civilian_killed, iran_civilian_killed, us_civilian_wounded, iran_civilian_wounded,
-  us_bases_destroyed, iran_bases_destroyed, us_bases_damaged, iran_bases_damaged,
-  us_aircraft, iran_aircraft, us_warships, iran_warships, us_armor, iran_armor, us_vehicles, iran_vehicles
+  us_bases_destroyed, iran_bases_destroyed, us_bases_damaged, iran_bases_damaged.
+  重要：bases_* 仅指已确认损毁/受损的基地数量；"军事目标"/targets 等泛指不是基地，若报道只说"X个军事目标遭袭"而无具体基地名，不填写 bases_*
+  us_aircraft, iran_aircraft, us_warships, iran_warships, us_armor, iran_armor, us_vehicles, iran_vehicles,
+  us_drones, iran_drones, us_missiles, iran_missiles, us_helicopters, iran_helicopters, us_submarines, iran_submarines
 - retaliation_sentiment: 0-100，仅当新闻涉及伊朗报复情绪时
 - wall_street_value: 0-100，仅当新闻涉及美股/市场反应时
 - key_location_updates: 当新闻提及具体基地/地点遭袭时，数组项 { "name_keywords": "asad|阿萨德|assad", "side": "us", "status": "attacked", "damage_level": 1-3 }
@@ -79,7 +81,7 @@ def extract_from_news(text: str, timestamp: Optional[str] = None) -> Dict[str, A
    # combat_losses 增量（仅数字字段）
    loss_us = {}
    loss_ir = {}
-    for k in ["personnel_killed", "personnel_wounded", "civilian_killed", "civilian_wounded", "bases_destroyed", "bases_damaged", "aircraft", "warships", "armor", "vehicles"]:
+    for k in ["personnel_killed", "personnel_wounded", "civilian_killed", "civilian_wounded", "bases_destroyed", "bases_damaged", "aircraft", "warships", "armor", "vehicles", "drones", "missiles", "helicopters", "submarines"]:
        uk = f"us_{k}"
        ik = f"iran_{k}"
        if uk in parsed and isinstance(parsed[uk], (int, float)):
--- a/crawler/extractor_dashscope.py
+++ b/crawler/extractor_dashscope.py
@@ -33,11 +33,13 @@ def _call_dashscope_extract(text: str, timeout: int = 15) -> Optional[Dict[str,
 - 战损（仅当新闻明确提及数字时填写）:
  us_personnel_killed, iran_personnel_killed, us_personnel_wounded, iran_personnel_wounded,
  us_civilian_killed, iran_civilian_killed, us_civilian_wounded, iran_civilian_wounded,
-  us_bases_destroyed, iran_bases_destroyed, us_bases_damaged, iran_bases_damaged,
-  us_aircraft, iran_aircraft, us_warships, iran_warships, us_armor, iran_armor, us_vehicles, iran_vehicles
+  us_bases_destroyed, iran_bases_destroyed, us_bases_damaged, iran_bases_damaged.
+  重要：bases_* 仅指已确认损毁/受损的基地数量；"军事目标"/"targets"等泛指不是基地，若报道只说"X个军事目标遭袭"而无具体基地名，不填写 bases_*
+  us_aircraft, iran_aircraft, us_warships, iran_warships, us_armor, iran_armor, us_vehicles, iran_vehicles,
+  us_drones, iran_drones, us_missiles, iran_missiles, us_helicopters, iran_helicopters, us_submarines, iran_submarines
 - retaliation_sentiment: 0-100，仅当新闻涉及伊朗报复/反击情绪时
 - wall_street_value: 0-100，仅当新闻涉及美股/市场反应时
- key_location_updates: 当新闻提及具体基地遭袭时，数组 [{{"name_keywords":"阿萨德|asad|assad","side":"us","status":"attacked","damage_level":1-3}}]
+- key_location_updates: 当新闻提及具体基地/设施遭袭时必填，数组 [{{"name_keywords":"阿萨德|asad|assad|阿因","side":"us","status":"attacked","damage_level":1-3}}]。常用关键词：阿萨德|asad|巴格达|baghdad|乌代德|udeid|埃尔比勒|erbil|因吉尔利克|incirlik|德黑兰|tehran|阿巴斯|abbas|布什尔|bushehr|伊斯法罕|isfahan|纳坦兹|natanz

 原文：
 {str(text)[:800]}
@@ -82,7 +84,8 @@ def extract_from_news(text: str, timestamp: Optional[str] = None) -> Dict[str, A
    loss_us = {}
    loss_ir = {}
    for k in ["personnel_killed", "personnel_wounded", "civilian_killed", "civilian_wounded",
-              "bases_destroyed", "bases_damaged", "aircraft", "warships", "armor", "vehicles"]:
+              "bases_destroyed", "bases_damaged", "aircraft", "warships", "armor", "vehicles",
+              "drones", "missiles", "helicopters", "submarines"]:
        uk, ik = f"us_{k}", f"iran_{k}"
        if uk in parsed and isinstance(parsed[uk], (int, float)):
            loss_us[k] = max(0, int(parsed[uk]))
--- a/crawler/extractor_rules.py
+++ b/crawler/extractor_rules.py
@@ -36,6 +36,8 @@ def extract_from_news(text: str, timestamp: Optional[str] = None) -> Dict[str, A
    if v is not None:
        loss_us["personnel_killed"] = v
    v = _first_int(t, r"(\d+)\s*名?\s*(?:美军|美国)\s*受伤")
+    if v is None and ("美军" in (text or "") or "美国" in (text or "")):
+        v = _first_int(text or t, r"另有\s*(\d+)\s*人\s*受伤")
    if v is not None:
        loss_us["personnel_wounded"] = v
    v = _first_int(t, r"美军\s*伤亡\s*(\d+)")
@@ -57,7 +59,7 @@ def extract_from_news(text: str, timestamp: Optional[str] = None) -> Dict[str, A
    v = _first_int(t, r"(\d+)\s*名?\s*伊朗\s*伤亡")
    if v is not None:
        loss_ir["personnel_killed"] = v
-    v = _first_int(t, r"(\d+)\s*名?\s*(?:伊朗|伊朗军队)\s*(?:死亡|阵亡)")
+    v = _first_int(t, r"(\d+)\s*名?\s*(?:伊朗|伊朗军队)[\s\w]*(?:死亡|阵亡)")
    if v is not None:
        loss_ir["personnel_killed"] = v
    v = _first_int(t, r"(\d+)\s*名?\s*伊朗\s*受伤")
@@ -75,28 +77,42 @@ def extract_from_news(text: str, timestamp: Optional[str] = None) -> Dict[str, A
    if v is not None:
        loss_ir["personnel_wounded"] = v

-    # 平民伤亡（中英文）
+    # 平民伤亡（中英文，按阵营归属）
    v = _first_int(t, r"(\d+)\s*名?\s*平民\s*(?:伤亡|死亡)")
    if v is not None:
-        loss_us["civilian_killed"] = v
-    v = _first_int(t, r"(\d+)[\s\w]*(?:civilian|civil)[\s\w]*(?:killed|dead)") if loss_us.get("civilian_killed") is None else None
+        if "伊朗" in text or "iran" in t:
+            loss_ir["civilian_killed"] = v
+        else:
+            loss_us["civilian_killed"] = v
+    v = _first_int(t, r"(\d+)[\s\w]*(?:civilian|civil)[\s\w]*(?:killed|dead)") if loss_us.get("civilian_killed") is None and loss_ir.get("civilian_killed") is None else None
    if v is not None:
-        loss_us["civilian_killed"] = v
+        if "iran" in t:
+            loss_ir["civilian_killed"] = v
+        else:
+            loss_us["civilian_killed"] = v
    v = _first_int(t, r"(\d+)[\s\w]*(?:civilian|civil)[\s\w]*(?:wounded|injured)")
    if v is not None:
-        loss_us["civilian_wounded"] = v
+        if "iran" in t:
+            loss_ir["civilian_wounded"] = v
+        else:
+            loss_us["civilian_wounded"] = v
+    v = _first_int(text or t, r"伊朗[\s\w]*(?:空袭|打击)[\s\w]*造成[^\d]*(\d+)[\s\w]*(?:平民|人|伤亡)")
+    if v is not None:
+        loss_ir["civilian_killed"] = v

-    # 基地损毁（美方基地居多）+ 中文
-    v = _first_int(t, r"(\d+)[\s\w]*(?:base|基地)[\s\w]*(?:destroyed|leveled|摧毁|夷平)")
-    if v is not None:
-        loss_us["bases_destroyed"] = v
-    v = _first_int(t, r"(\d+)[\s\w]*(?:base|基地)[\s\w]*(?:damaged|hit|struck|受损|袭击)")
-    if v is not None:
-        loss_us["bases_damaged"] = v
-    if ("base" in t or "基地" in t) and ("destroy" in t or "level" in t or "摧毁" in t or "夷平" in t) and not loss_us.get("bases_destroyed"):
-        loss_us["bases_destroyed"] = 1
-    if ("base" in t or "基地" in t) and ("damage" in t or "hit" in t or "struck" in t or "strike" in t or "袭击" in t or "受损" in t) and not loss_us.get("bases_damaged"):
-        loss_us["bases_damaged"] = 1
+    # 基地损毁（仅匹配 base/基地，排除"军事目标"等泛指）
+    skip_bases = "军事目标" in (text or "") and "基地" not in (text or "") and "base" not in t
+    if not skip_bases:
+        v = _first_int(t, r"(\d+)[\s\w]*(?:base|基地)[\s\w]*(?:destroyed|leveled|摧毁|夷平)")
+        if v is not None:
+            loss_us["bases_destroyed"] = v
+        v = _first_int(t, r"(\d+)[\s\w]*(?:base|基地)[\s\w]*(?:damaged|hit|struck|受损|袭击)")
+        if v is not None:
+            loss_us["bases_damaged"] = v
+        if ("base" in t or "基地" in t) and ("destroy" in t or "level" in t or "摧毁" in t or "夷平" in t) and not loss_us.get("bases_destroyed"):
+            loss_us["bases_destroyed"] = 1
+        if ("base" in t or "基地" in t) and ("damage" in t or "hit" in t or "struck" in t or "strike" in t or "袭击" in t or "受损" in t) and not loss_us.get("bases_damaged"):
+            loss_us["bases_damaged"] = 1

    # 战机 / 舰船（根据上下文判断阵营）
    v = _first_int(t, r"(\d+)[\s\w]*(?:aircraft|plane|jet|fighter|f-?16|f-?35|f-?18)[\s\w]*(?:down|destroyed|lost|shot)")
@@ -114,6 +130,48 @@ def extract_from_news(text: str, timestamp: Optional[str] = None) -> Dict[str, A
        else:
            loss_us["warships"] = v

+    # 无人机 drone / uav / 无人机
+    v = _first_int(t, r"(\d+)[\s\w]*(?:drone|uav|无人机)[\s\w]*(?:down|destroyed|shot|击落|摧毁)")
+    if v is None:
+        v = _first_int(text or t, r"(?:击落|摧毁)[^\d]*(\d+)[\s\w]*(?:drone|uav|无人机|架)")
+    if v is None:
+        v = _first_int(t, r"(?:drone|uav|无人机)[\s\w]*(\d+)[\s\w]*(?:down|destroyed|shot|击落|摧毁)")
+    if v is not None:
+        if "iran" in t or "iranian" in t or "shahed" in t or "沙希德" in t or "伊朗" in (text or ""):
+            loss_ir["drones"] = v
+        else:
+            loss_us["drones"] = v
+
+    # 导弹 missile / 导弹
+    v = _first_int(t, r"(\d+)[\s\w]*(?:missile|导弹)[\s\w]*(?:fired|launched|intercepted|destroyed|发射|拦截|击落)")
+    if v is not None:
+        if "iran" in t or "iranian" in t:
+            loss_ir["missiles"] = v
+        else:
+            loss_us["missiles"] = v
+    v = _first_int(t, r"(?:missile|导弹)[\s\w]*(\d+)[\s\w]*(?:fired|launched|intercepted|destroyed|发射|拦截)") if not loss_us.get("missiles") and not loss_ir.get("missiles") else None
+    if v is not None:
+        if "iran" in t:
+            loss_ir["missiles"] = v
+        else:
+            loss_us["missiles"] = v
+
+    # 直升机 helicopter / 直升机
+    v = _first_int(t, r"(\d+)[\s\w]*(?:helicopter|直升机)[\s\w]*(?:down|destroyed|crashed|crashes|击落|坠毁)")
+    if v is not None:
+        if "iran" in t or "iranian" in t:
+            loss_ir["helicopters"] = v
+        else:
+            loss_us["helicopters"] = v
+
+    # 潜艇 submarine / 潜艇
+    v = _first_int(t, r"(\d+)[\s\w]*(?:submarine|潜艇)[\s\w]*(?:sunk|damaged|hit|destroyed|击沉|受损)")
+    if v is not None:
+        if "iran" in t or "iranian" in t:
+            loss_ir["submarines"] = v
+        else:
+            loss_us["submarines"] = v
+
    if loss_us:
        out.setdefault("combat_losses_delta", {})["us"] = loss_us
    if loss_ir:
@@ -124,11 +182,14 @@ def extract_from_news(text: str, timestamp: Optional[str] = None) -> Dict[str, A
        out["wall_street"] = {"time": ts, "value": 55}

    # key_location_updates：受袭基地（与 key_location.name 匹配）
-    # 新闻提及基地遭袭时，更新对应基地 status
-    base_attacked = ("base" in t or "基地" in t) and ("attack" in t or "hit" in t or "strike" in t or "damage" in t or "袭击" in t or "打击" in t)
+    # 新闻提及基地遭袭时，更新对应基地 status；放宽触发词以匹配更多英文报道
+    attack_words = ("attack" in t or "attacked" in t or "hit" in t or "strike" in t or "struck" in t or "strikes" in t
+                    or "damage" in t or "damaged" in t or "target" in t or "targeted" in t or "bomb" in t or "bombed" in t
+                    or "袭击" in (text or "") or "遭袭" in (text or "") or "打击" in (text or "") or "受损" in (text or "") or "摧毁" in (text or ""))
+    base_attacked = ("base" in t or "基地" in t or "outpost" in t or "facility" in t) and attack_words
    if base_attacked:
        updates: list = []
-        # 常见美军基地关键词 -> name_keywords（用于 db_merge 的 LIKE 匹配）
+        # 常见美军基地关键词 -> name_keywords（用于 db_merge 的 LIKE 匹配，需与 key_location.name 能匹配）
        bases_all = [
            ("阿萨德|阿因|asad|assad|ain", "us"),
            ("巴格达|baghdad", "us"),
--- a/crawler/requirements.txt
+++ b/crawler/requirements.txt
@@ -1,5 +1,6 @@
 requests>=2.31.0
 feedparser>=6.0.0
+pytest>=7.0.0
 fastapi>=0.109.0
 uvicorn>=0.27.0
 deep-translator>=1.11.0
--- a/crawler/tests/init.py
+++ b/crawler/tests/init.py
@@ -0,0 +1 @@
+# crawler tests
--- a/crawler/tests/pycache/init.cpython-39.pyc
+++ b/crawler/tests/pycache/init.cpython-39.pyc
--- a/crawler/tests/pycache/test_extraction.cpython-39-pytest-8.4.2.pyc
+++ b/crawler/tests/pycache/test_extraction.cpython-39-pytest-8.4.2.pyc
--- a/crawler/tests/test_extraction.py
+++ b/crawler/tests/test_extraction.py
@@ -0,0 +1,198 @@
+# -*- coding: utf-8 -*-
+"""
+爬虫数据清洗与字段映射测试
+验证 extractor_rules、extractor_dashscope、db_merge 的正确性
+"""
+import os
+import sqlite3
+import tempfile
+from pathlib import Path
+
+import pytest
+
+# 确保 crawler 在 path 中
+ROOT = Path(__file__).resolve().parent.parent
+if str(ROOT) not in __import__("sys").path:
+    __import__("sys").path.insert(0, str(ROOT))
+
+from extractor_rules import extract_from_news as extract_rules
+
+
+class TestExtractorRules:
+    """规则提取器单元测试"""
+
+    def test_trump_1000_targets_no_bases(self):
+        """特朗普说伊朗有1000个军事目标遭到袭击 -> 不应提取 bases_destroyed/bases_damaged"""
+        text = "特朗普说伊朗有1000个军事目标遭到袭击，美国已做好进一步打击准备"
+        out = extract_rules(text)
+        delta = out.get("combat_losses_delta", {})
+        for side in ("us", "iran"):
+            if side in delta:
+                assert delta[side].get("bases_destroyed") is None, f"{side} bases_destroyed 不应被提取"
+                assert delta[side].get("bases_damaged") is None, f"{side} bases_damaged 不应被提取"
+
+    def test_base_damaged_when_explicit(self):
+        """阿萨德基地遭袭 -> 应提取 key_location_updates，且 combat_losses 若有则正确"""
+        text = "阿萨德空军基地遭袭，损失严重"
+        out = extract_rules(text)
+        # 规则会触发 key_location_updates（因为 base_attacked 且匹配 阿萨德）
+        assert "key_location_updates" in out
+        kl = out["key_location_updates"]
+        assert len(kl) >= 1
+        assert any(u.get("side") == "us" and "阿萨德" in (u.get("name_keywords") or "") for u in kl)
+
+    def test_us_personnel_killed(self):
+        """3名美军阵亡 -> personnel_killed=3"""
+        text = "据报道，3名美军阵亡，另有5人受伤"
+        out = extract_rules(text)
+        assert "combat_losses_delta" in out
+        us = out["combat_losses_delta"].get("us", {})
+        assert us.get("personnel_killed") == 3
+        assert us.get("personnel_wounded") == 5
+
+    def test_iran_personnel_killed(self):
+        """10名伊朗士兵死亡"""
+        text = "伊朗方面称10名伊朗士兵死亡"
+        out = extract_rules(text)
+        iran = out.get("combat_losses_delta", {}).get("iran", {})
+        assert iran.get("personnel_killed") == 10
+
+    def test_civilian_us_context(self):
+        """美军空袭造成50名平民伤亡 -> loss_us"""
+        text = "美军空袭造成50名平民伤亡"
+        out = extract_rules(text)
+        us = out.get("combat_losses_delta", {}).get("us", {})
+        assert us.get("civilian_killed") == 50
+
+    def test_civilian_iran_context(self):
+        """伊朗空袭造成伊拉克平民50人伤亡 -> loss_ir"""
+        text = "伊朗空袭造成伊拉克平民50人伤亡"
+        out = extract_rules(text)
+        iran = out.get("combat_losses_delta", {}).get("iran", {})
+        assert iran.get("civilian_killed") == 50
+
+    def test_drone_attribution_iran(self):
+        """美军击落伊朗10架无人机 -> iran drones=10"""
+        text = "美军击落伊朗10架无人机"
+        out = extract_rules(text)
+        iran = out.get("combat_losses_delta", {}).get("iran", {})
+        assert iran.get("drones") == 10
+
+    def test_empty_or_short_text(self):
+        """短文本或无内容 -> 无 combat_losses"""
+        assert extract_rules("") == {} or "combat_losses_delta" not in extract_rules("")
+        assert "combat_losses_delta" not in extract_rules("abc") or not extract_rules("abc").get("combat_losses_delta")
+
+
+class TestDbMerge:
+    """db_merge 字段映射与增量逻辑测试"""
+
+    @pytest.fixture
+    def temp_db(self):
+        with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as f:
+            path = f.name
+        yield path
+        try:
+            os.unlink(path)
+        except OSError:
+            pass
+
+    def test_merge_combat_losses_delta(self, temp_db):
+        """merge 正确将 combat_losses_delta 叠加到 DB"""
+        from db_merge import merge
+
+        merge({"combat_losses_delta": {"us": {"personnel_killed": 3, "personnel_wounded": 2}}}, db_path=temp_db)
+        merge({"combat_losses_delta": {"us": {"personnel_killed": 2}}}, db_path=temp_db)
+
+        conn = sqlite3.connect(temp_db)
+        row = conn.execute("SELECT personnel_killed, personnel_wounded FROM combat_losses WHERE side='us'").fetchone()
+        conn.close()
+        assert row[0] == 5
+        assert row[1] == 2
+
+    def test_merge_all_combat_fields(self, temp_db):
+        """merge 正确映射所有 combat_losses 字段"""
+        from db_merge import merge
+
+        delta = {
+            "personnel_killed": 1,
+            "personnel_wounded": 2,
+            "civilian_killed": 3,
+            "civilian_wounded": 4,
+            "bases_destroyed": 1,
+            "bases_damaged": 2,
+            "aircraft": 3,
+            "warships": 4,
+            "armor": 5,
+            "vehicles": 6,
+            "drones": 7,
+            "missiles": 8,
+            "helicopters": 9,
+            "submarines": 10,
+        }
+        merge({"combat_losses_delta": {"iran": delta}}, db_path=temp_db)
+
+        conn = sqlite3.connect(temp_db)
+        row = conn.execute(
+            """SELECT personnel_killed, personnel_wounded, civilian_killed, civilian_wounded,
+               bases_destroyed, bases_damaged, aircraft, warships, armor, vehicles,
+               drones, missiles, helicopters, submarines FROM combat_losses WHERE side='iran'"""
+        ).fetchone()
+        conn.close()
+        assert row == (1, 2, 3, 4, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10)
+
+    def test_merge_key_location_requires_table(self, temp_db):
+        """key_location_updates 需要 key_location 表中有行才能更新"""
+        from db_merge import merge
+
+        conn = sqlite3.connect(temp_db)
+        conn.execute(
+            """CREATE TABLE IF NOT EXISTS key_location (id INTEGER PRIMARY KEY, side TEXT, name TEXT, lat REAL, lng REAL, type TEXT, region TEXT, status TEXT, damage_level INTEGER)"""
+        )
+        conn.execute(
+            "INSERT INTO key_location (side, name, lat, lng, type, region, status, damage_level) VALUES ('us', '阿萨德空军基地', 33.0, 43.0, 'Base', 'IRQ', 'operational', 0)"
+        )
+        conn.commit()
+        conn.close()
+
+        merge(
+            {"key_location_updates": [{"name_keywords": "阿萨德|asad", "side": "us", "status": "attacked", "damage_level": 2}]},
+            db_path=temp_db,
+        )
+
+        conn = sqlite3.connect(temp_db)
+        row = conn.execute("SELECT status, damage_level FROM key_location WHERE name LIKE '%阿萨德%'").fetchone()
+        conn.close()
+        assert row[0] == "attacked"
+        assert row[1] == 2
+
+
+class TestEndToEndTrumpExample:
+    """端到端：特朗普 1000 军事目标案例"""
+
+    def test_full_pipeline_trump_no_bases(self, tmp_path):
+        """完整流程：规则提取 + merge，特朗普案例不应增加 bases"""
+        from db_merge import merge
+
+        db_path = str(tmp_path / "test.db")
+        (tmp_path / "test.db").touch()  # 确保文件存在，merge 才会执行
+        merge({"combat_losses_delta": {"us": {"bases_destroyed": 0, "bases_damaged": 0}, "iran": {"bases_destroyed": 0, "bases_damaged": 0}}}, db_path=db_path)
+
+        text = "特朗普说伊朗有1000个军事目标遭到袭击"
+        out = extract_rules(text)
+        # 规则提取不应包含 bases
+        assert "combat_losses_delta" not in out or (
+            "iran" not in out.get("combat_losses_delta", {})
+            or out["combat_losses_delta"].get("iran", {}).get("bases_destroyed") is None
+            and out["combat_losses_delta"].get("iran", {}).get("bases_damaged") is None
+        )
+        if "combat_losses_delta" in out:
+            merge(out, db_path=db_path)
+
+        conn = sqlite3.connect(db_path)
+        iran = conn.execute("SELECT bases_destroyed, bases_damaged FROM combat_losses WHERE side='iran'").fetchone()
+        conn.close()
+        # 若提取器没输出 bases，merge 不会改；若有错误输出则需要为 0
+        if iran:
+            assert iran[0] == 0
+            assert iran[1] == 0