fix: 优化docker 镜像

This commit is contained in:
Daniel
2026-03-02 14:10:43 +08:00
parent 783a69dad1
commit 36576592a2
25 changed files with 491 additions and 58 deletions

Binary file not shown.

View File

@@ -39,11 +39,37 @@ RSS_FEEDS = [
"https://www.aljazeera.com/xml/rss/middleeast.xml",
]
# 关键词过滤:至少匹配一个才会入库
# 关键词过滤:至少匹配一个才会入库(与地图区域对应:伊拉克/叙利亚/海湾/红海/地中海等)
KEYWORDS = [
"iran", "iranian", "tehran", "以色列", "israel",
"usa", "us ", "american", "美军", "美国",
"middle east", "中东", "persian gulf", "波斯湾",
# 伊朗
"iran", "iranian", "tehran", "德黑兰", "bushehr", "布什尔", "abbas", "阿巴斯",
# 以色列 / 巴勒斯坦
"israel", "以色列", "hamas", "gaza", "加沙", "hezbollah", "真主党",
# 美国
"usa", "us ", "american", "美军", "美国", "pentagon",
# 区域(地图覆盖)
"middle east", "中东", "persian gulf", "波斯湾", "gulf of oman", "阿曼湾",
"arabian sea", "阿拉伯海", "red sea", "红海", "mediterranean", "地中海",
"strait of hormuz", "霍尔木兹",
# 伊拉克 / 叙利亚
"iraq", "伊拉克", "baghdad", "巴格达", "erbil", "埃尔比勒", "basra", "巴士拉",
"syria", "叙利亚", "damascus", "大马士革", "deir", "代尔祖尔",
# 海湾国家
"saudi", "沙特", "riyadh", "利雅得", "qatar", "卡塔尔", "doha", "多哈",
"uae", "emirates", "阿联酋", "dubai", "迪拜", "abu dhabi",
"bahrain", "巴林", "kuwait", "科威特", "oman", "阿曼", "yemen", "也门",
# 约旦 / 土耳其 / 埃及 / 吉布提 / 黎巴嫩
"jordan", "约旦", "amman", "安曼",
"lebanon", "黎巴嫩",
"turkey", "土耳其", "incirlik", "因吉尔利克",
"egypt", "埃及", "cairo", "开罗", "sinai", "西奈",
"djibouti", "吉布提",
# 军事 / 基地
"al-asad", "al asad", "阿萨德", "al udeid", "乌代德", "incirlik",
"strike", "attack", "military", "missile", "", "nuclear",
"carrier", "航母", "houthi", "胡塞", "hamas",
"carrier", "航母", "drone", "uav", "无人机", "retaliation", "报复",
"base", "基地", "troops", "troop", "soldier", "personnel",
# 胡塞 / 武装 / 军力
"houthi", "胡塞", "houthis",
"idf", "irgc", "革命卫队", "qassem soleimani", "苏莱曼尼",
]

View File

@@ -67,7 +67,7 @@ def merge(extracted: Dict[str, Any], db_path: Optional[str] = None) -> bool:
)
if conn.total_changes > 0:
updated = True
# combat_losses增量叠加到当前值
# combat_losses增量叠加到当前值,无行则先插入初始行
if "combat_losses_delta" in extracted:
for side, delta in extracted["combat_losses_delta"].items():
if side not in ("us", "iran"):
@@ -77,13 +77,14 @@ def merge(extracted: Dict[str, Any], db_path: Optional[str] = None) -> bool:
"SELECT personnel_killed,personnel_wounded,civilian_killed,civilian_wounded,bases_destroyed,bases_damaged,aircraft,warships,armor,vehicles FROM combat_losses WHERE side = ?",
(side,),
).fetchone()
if not row:
continue
cur = {
"personnel_killed": row[0], "personnel_wounded": row[1], "civilian_killed": row[2] or 0,
"civilian_wounded": row[3] or 0, "bases_destroyed": row[4], "bases_damaged": row[5],
"aircraft": row[6], "warships": row[7], "armor": row[8], "vehicles": row[9],
}
cur = {"personnel_killed": 0, "personnel_wounded": 0, "civilian_killed": 0, "civilian_wounded": 0,
"bases_destroyed": 0, "bases_damaged": 0, "aircraft": 0, "warships": 0, "armor": 0, "vehicles": 0}
if row:
cur = {
"personnel_killed": row[0], "personnel_wounded": row[1], "civilian_killed": row[2] or 0,
"civilian_wounded": row[3] or 0, "bases_destroyed": row[4], "bases_damaged": row[5],
"aircraft": row[6], "warships": row[7], "armor": row[8], "vehicles": row[9],
}
pk = max(0, (cur["personnel_killed"] or 0) + delta.get("personnel_killed", 0))
pw = max(0, (cur["personnel_wounded"] or 0) + delta.get("personnel_wounded", 0))
ck = max(0, (cur["civilian_killed"] or 0) + delta.get("civilian_killed", 0))
@@ -95,11 +96,18 @@ def merge(extracted: Dict[str, Any], db_path: Optional[str] = None) -> bool:
ar = max(0, (cur["armor"] or 0) + delta.get("armor", 0))
vh = max(0, (cur["vehicles"] or 0) + delta.get("vehicles", 0))
ts = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%S.000Z")
conn.execute(
"""UPDATE combat_losses SET personnel_killed=?, personnel_wounded=?, civilian_killed=?, civilian_wounded=?,
bases_destroyed=?, bases_damaged=?, aircraft=?, warships=?, armor=?, vehicles=?, updated_at=? WHERE side=?""",
(pk, pw, ck, cw, bd, bm, ac, ws, ar, vh, ts, side),
)
if row:
conn.execute(
"""UPDATE combat_losses SET personnel_killed=?, personnel_wounded=?, civilian_killed=?, civilian_wounded=?,
bases_destroyed=?, bases_damaged=?, aircraft=?, warships=?, armor=?, vehicles=?, updated_at=? WHERE side=?""",
(pk, pw, ck, cw, bd, bm, ac, ws, ar, vh, ts, side),
)
else:
conn.execute(
"""INSERT OR REPLACE INTO combat_losses (side, personnel_killed, personnel_wounded, civilian_killed, civilian_wounded,
bases_destroyed, bases_damaged, aircraft, warships, armor, vehicles, updated_at) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""",
(side, pk, pw, ck, cw, bd, bm, ac, ws, ar, vh, ts),
)
if conn.total_changes > 0:
updated = True
except Exception:
@@ -115,6 +123,30 @@ def merge(extracted: Dict[str, Any], db_path: Optional[str] = None) -> bool:
w = extracted["wall_street"]
conn.execute("INSERT INTO wall_street_trend (time, value) VALUES (?, ?)", (w["time"], w["value"]))
updated = True
# key_location更新受袭基地 status/damage_level
if "key_location_updates" in extracted:
try:
for u in extracted["key_location_updates"]:
kw = (u.get("name_keywords") or "").replace("|", " ").split()
side = u.get("side")
status = u.get("status", "attacked")[:20]
dmg = u.get("damage_level", 2)
if not kw or side not in ("us", "iran"):
continue
conditions = " OR ".join(
"(LOWER(name) LIKE ? OR name LIKE ?)" for _ in kw
)
params = [status, dmg, side]
for k in kw:
params.extend([f"%{k}%", f"%{k}%"])
cur = conn.execute(
f"UPDATE key_location SET status=?, damage_level=? WHERE side=? AND ({conditions})",
params,
)
if cur.rowcount > 0:
updated = True
except Exception:
pass
if updated:
conn.execute("INSERT OR REPLACE INTO situation (id, data, updated_at) VALUES (1, '{}', ?)", (datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%S.000Z"),))
conn.commit()

View File

@@ -27,11 +27,16 @@ def _call_ollama_extract(text: str, timeout: int = 10) -> Optional[Dict[str, Any
- summary: 1-2句中文事实≤80字
- category: deployment|alert|intel|diplomatic|other
- severity: low|medium|high|critical
- us_personnel_killed, iran_personnel_killed 等:仅当新闻明确提及具体数字时填写
- 战损(仅当新闻明确提及数字时填写,格式 us_XXX / iran_XXX:
us_personnel_killed, iran_personnel_killed, us_personnel_wounded, iran_personnel_wounded,
us_civilian_killed, iran_civilian_killed, us_civilian_wounded, iran_civilian_wounded,
us_bases_destroyed, iran_bases_destroyed, us_bases_damaged, iran_bases_damaged,
us_aircraft, iran_aircraft, us_warships, iran_warships, us_armor, iran_armor, us_vehicles, iran_vehicles
- retaliation_sentiment: 0-100仅当新闻涉及伊朗报复情绪时
- wall_street_value: 0-100仅当新闻涉及美股/市场反应时
- key_location_updates: 当新闻提及具体基地/地点遭袭时,数组项 { "name_keywords": "asad|阿萨德|assad", "side": "us", "status": "attacked", "damage_level": 1-3 }
原文:{str(text)[:500]}
原文:{str(text)[:800]}
直接输出 JSON不要解释"""
r = requests.post(
@@ -97,4 +102,17 @@ def extract_from_news(text: str, timestamp: Optional[str] = None) -> Dict[str, A
v = parsed["wall_street_value"]
if isinstance(v, (int, float)) and 0 <= v <= 100:
out["wall_street"] = {"time": ts, "value": int(v)}
# key_location_updates受袭基地
if "key_location_updates" in parsed and isinstance(parsed["key_location_updates"], list):
valid = []
for u in parsed["key_location_updates"]:
if isinstance(u, dict) and u.get("name_keywords") and u.get("side") in ("us", "iran"):
valid.append({
"name_keywords": str(u["name_keywords"]),
"side": u["side"],
"status": str(u.get("status", "attacked"))[:20],
"damage_level": min(3, max(1, int(u["damage_level"]))) if isinstance(u.get("damage_level"), (int, float)) else 2,
})
if valid:
out["key_location_updates"] = valid
return out

View File

@@ -24,18 +24,64 @@ def extract_from_news(text: str, timestamp: Optional[str] = None) -> Dict[str, A
t = (text or "").lower()
loss_us, loss_ir = {}, {}
v = _first_int(t, r"(?:us|american|u\.?s\.?)[\s\w]*(?:say|report)[\s\w]*(\d+)[\s\w]*(?:troop|soldier|killed|dead)")
# 美军人员伤亡
v = _first_int(t, r"(?:us|american|u\.?s\.?)[\s\w]*(?:say|report)[\s\w]*(\d+)[\s\w]*(?:troop|soldier|military)[\s\w]*(?:killed|dead)")
if v is not None:
loss_us["personnel_killed"] = v
v = _first_int(t, r"(\d+)[\s\w]*(?:us|american)[\s\w]*(?:troop|soldier|killed|dead)")
v = _first_int(t, r"(\d+)[\s\w]*(?:us|american)[\s\w]*(?:troop|soldier|military)[\s\w]*(?:killed|dead)")
if v is not None:
loss_us["personnel_killed"] = v
v = _first_int(t, r"(?:iran|iranian)[\s\w]*(?:say|report)[\s\w]*(\d+)[\s\w]*(?:troop|soldier|killed|dead)")
v = _first_int(t, r"(?:us|american)[\s\w]*(\d+)[\s\w]*(?:wounded|injured)")
if v is not None:
loss_us["personnel_wounded"] = v
# 伊朗人员伤亡
v = _first_int(t, r"(?:iran|iranian)[\s\w]*(?:say|report)[\s\w]*(\d+)[\s\w]*(?:troop|soldier|guard|killed|dead)")
if v is not None:
loss_ir["personnel_killed"] = v
v = _first_int(t, r"(\d+)[\s\w]*(?:iranian|iran)[\s\w]*(?:troop|soldier|killed|dead)")
v = _first_int(t, r"(\d+)[\s\w]*(?:iranian|iran)[\s\w]*(?:troop|soldier|guard|killed|dead)")
if v is not None:
loss_ir["personnel_killed"] = v
v = _first_int(t, r"(?:iran|iranian)[\s\w]*(\d+)[\s\w]*(?:wounded|injured)")
if v is not None:
loss_ir["personnel_wounded"] = v
# 平民伤亡(多不区分阵营,计入双方或仅 us 因多为美国基地周边)
v = _first_int(t, r"(\d+)[\s\w]*(?:civilian|civil)[\s\w]*(?:killed|dead)")
if v is not None:
loss_us["civilian_killed"] = v
v = _first_int(t, r"(\d+)[\s\w]*(?:civilian|civil)[\s\w]*(?:wounded|injured)")
if v is not None:
loss_us["civilian_wounded"] = v
# 基地损毁(美方基地居多)
v = _first_int(t, r"(\d+)[\s\w]*(?:base)[\s\w]*(?:destroyed|leveled)")
if v is not None:
loss_us["bases_destroyed"] = v
v = _first_int(t, r"(\d+)[\s\w]*(?:base)[\s\w]*(?:damaged|hit|struck)")
if v is not None:
loss_us["bases_damaged"] = v
if "base" in t and ("destroy" in t or "level" in t) and not loss_us.get("bases_destroyed"):
loss_us["bases_destroyed"] = 1
if "base" in t and ("damage" in t or "hit" in t or "struck" in t or "strike" in t) and not loss_us.get("bases_damaged"):
loss_us["bases_damaged"] = 1
# 战机 / 舰船(根据上下文判断阵营)
v = _first_int(t, r"(\d+)[\s\w]*(?:aircraft|plane|jet|fighter|f-?16|f-?35|f-?18)[\s\w]*(?:down|destroyed|lost|shot)")
if v is not None:
if "us" in t or "american" in t or "u.s" in t:
loss_us["aircraft"] = v
elif "iran" in t:
loss_ir["aircraft"] = v
else:
loss_us["aircraft"] = v
v = _first_int(t, r"(\d+)[\s\w]*(?:ship|destroyer|warship|vessel)[\s\w]*(?:hit|damaged|sunk)")
if v is not None:
if "iran" in t:
loss_ir["warships"] = v
else:
loss_us["warships"] = v
if loss_us:
out.setdefault("combat_losses_delta", {})["us"] = loss_us

View File

@@ -14,13 +14,13 @@ from datetime import datetime
from pathlib import Path
from typing import List, Optional
import asyncio
import logging
import requests
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from apscheduler.schedulers.background import BackgroundScheduler
logging.getLogger("apscheduler.scheduler").setLevel(logging.ERROR)
logging.getLogger("uvicorn").setLevel(logging.INFO)
app = FastAPI(title="GDELT Conflict Service")
app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_methods=["*"])
@@ -263,8 +263,9 @@ def _extract_and_merge_panel_data(items: list) -> None:
from extractor_ai import extract_from_news
from datetime import timezone
merged_any = False
# 只对前几条有足够文本的新闻做提取,避免 Ollama 调用过多
for it in items[:5]:
# 规则模式可多处理几条(无 OllamaAI 模式限制 5 条避免调用过多
limit = 10 if os.environ.get("CLEANER_AI_DISABLED", "0") == "1" else 5
for it in items[:limit]:
text = (it.get("title", "") or "") + " " + (it.get("summary", "") or "")
if len(text.strip()) < 20:
continue
@@ -292,12 +293,22 @@ def _extract_and_merge_panel_data(items: list) -> None:
# ==========================
# 定时任务(RSS 更频繁,优先保证事件脉络实时
# 定时任务(asyncio 后台任务,避免 APScheduler executor 关闭竞态
# ==========================
scheduler = BackgroundScheduler()
scheduler.add_job(fetch_news, "interval", seconds=RSS_INTERVAL_SEC, max_instances=2, coalesce=True)
scheduler.add_job(fetch_gdelt_events, "interval", seconds=FETCH_INTERVAL_SEC, max_instances=2, coalesce=True)
scheduler.start()
_bg_task: Optional[asyncio.Task] = None
async def _periodic_fetch() -> None:
loop = asyncio.get_event_loop()
while True:
try:
await loop.run_in_executor(None, fetch_news)
await loop.run_in_executor(None, fetch_gdelt_events)
except asyncio.CancelledError:
break
except Exception as e:
print(f" [warn] 定时抓取: {e}")
await asyncio.sleep(min(RSS_INTERVAL_SEC, FETCH_INTERVAL_SEC))
# ==========================
@@ -356,18 +367,23 @@ def _get_conflict_stats() -> dict:
@app.on_event("startup")
def startup():
# 新闻优先启动,确保事件脉络有数据
fetch_news()
fetch_gdelt_events()
async def startup():
global _bg_task
loop = asyncio.get_event_loop()
await loop.run_in_executor(None, fetch_news)
await loop.run_in_executor(None, fetch_gdelt_events)
_bg_task = asyncio.create_task(_periodic_fetch())
@app.on_event("shutdown")
def shutdown():
try:
scheduler.shutdown(wait=False)
except Exception:
pass
async def shutdown():
global _bg_task
if _bg_task and not _bg_task.done():
_bg_task.cancel()
try:
await _bg_task
except asyncio.CancelledError:
pass
if __name__ == "__main__":

View File

@@ -2,5 +2,4 @@ requests>=2.31.0
feedparser>=6.0.0
fastapi>=0.109.0
uvicorn>=0.27.0
apscheduler>=3.10.0
deep-translator>=1.11.0