This commit is contained in:
Daniel
2026-03-04 16:48:17 +08:00
parent 64f4c438c3
commit 26938449f0
34 changed files with 956 additions and 500 deletions

View File

@@ -297,8 +297,43 @@ def _rss_to_gdelt_fallback() -> None:
LAST_FETCH = {"items": 0, "inserted": 0, "error": None}
def _refresh_panel_data() -> int:
"""从近期事件重新提取并合并战损/据点等面板实时数据,不依赖本轮是否有新 RSS。返回合并条数。"""
if not os.path.exists(DB_PATH):
return 0
try:
from db_merge import merge
use_dashscope = bool(os.environ.get("DASHSCOPE_API_KEY", "").strip())
if use_dashscope:
from extractor_dashscope import extract_from_news
elif os.environ.get("CLEANER_AI_DISABLED", "0") == "1":
from extractor_rules import extract_from_news
else:
from extractor_ai import extract_from_news
conn = sqlite3.connect(DB_PATH, timeout=10)
rows = conn.execute(
"SELECT id, timestamp, category, summary FROM situation_update ORDER BY timestamp DESC LIMIT 50"
).fetchall()
conn.close()
merged = 0
for r in rows:
uid, ts, cat, summary = r
text = ((cat or "") + " " + (summary or "")).strip()
if len(text) < 20:
continue
try:
extracted = extract_from_news(text, timestamp=ts)
if extracted and merge(extracted, db_path=DB_PATH):
merged += 1
except Exception:
pass
return merged
except Exception:
return 0
def fetch_news() -> None:
"""执行完整写库流水线GDELT 禁用时用 RSS 回填 gdelt_events,再通知 Node"""
"""执行完整写库流水线;产出看板实时数据(战损、据点、冲突事件)+ 事件脉络。GDELT 禁用时用 RSS 回填 gdelt_events。"""
try:
from pipeline import run_full_pipeline
LAST_FETCH["error"] = None
@@ -314,7 +349,7 @@ def fetch_news() -> None:
_rss_to_gdelt_fallback()
_notify_node()
ts = datetime.now().strftime("%H:%M:%S")
print(f"[{ts}] RSS 抓取 {n_fetched} 条,去重新增 {n_news}资讯,写入事件脉络 {n_panel}")
print(f"[{ts}] 抓取 {n_fetched} 条,去重新增 {n_news},写脉络 {n_panel} → 面板实时数据(战损/据点)已由本批提取更新")
if n_fetched == 0:
print(f"[{ts}] 0 条检查网络、RSS 源或 KEYWORDS 过滤)")
except Exception as e:
@@ -322,6 +357,10 @@ def fetch_news() -> None:
print(f"[{datetime.now().strftime('%H:%M:%S')}] 新闻抓取失败: {e}")
# 每 N 轮做一次「从近期事件回填面板实时数据」,保证战损/据点等与最新内容一致
BACKFILL_CYCLES = int(os.environ.get("BACKFILL_CYCLES", "2"))
_cycle_count = 0
# ==========================
# 定时任务asyncio 后台任务,避免 APScheduler executor 关闭竞态)
# ==========================
@@ -329,11 +368,20 @@ _bg_task: Optional[asyncio.Task] = None
async def _periodic_fetch() -> None:
global _cycle_count
loop = asyncio.get_event_loop()
while True:
try:
await loop.run_in_executor(None, fetch_news)
await loop.run_in_executor(None, fetch_gdelt_events)
_cycle_count += 1
if _cycle_count >= BACKFILL_CYCLES:
_cycle_count = 0
merged = _refresh_panel_data()
if merged > 0:
_notify_node()
ts = datetime.now().strftime("%H:%M:%S")
print(f"[{ts}] 面板实时数据回填:从近期事件合并 {merged} 条(战损/据点)")
except asyncio.CancelledError:
break
except Exception as e: