fix: 优化docker 镜像

This commit is contained in:
Daniel
2026-03-02 14:10:43 +08:00
parent 783a69dad1
commit 36576592a2
25 changed files with 491 additions and 58 deletions

View File

@@ -14,13 +14,13 @@ from datetime import datetime
from pathlib import Path
from typing import List, Optional
import asyncio
import logging
import requests
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from apscheduler.schedulers.background import BackgroundScheduler
logging.getLogger("apscheduler.scheduler").setLevel(logging.ERROR)
logging.getLogger("uvicorn").setLevel(logging.INFO)
app = FastAPI(title="GDELT Conflict Service")
app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_methods=["*"])
@@ -263,8 +263,9 @@ def _extract_and_merge_panel_data(items: list) -> None:
from extractor_ai import extract_from_news
from datetime import timezone
merged_any = False
# 只对前几条有足够文本的新闻做提取,避免 Ollama 调用过多
for it in items[:5]:
# 规则模式可多处理几条(无 OllamaAI 模式限制 5 条避免调用过多
limit = 10 if os.environ.get("CLEANER_AI_DISABLED", "0") == "1" else 5
for it in items[:limit]:
text = (it.get("title", "") or "") + " " + (it.get("summary", "") or "")
if len(text.strip()) < 20:
continue
@@ -292,12 +293,22 @@ def _extract_and_merge_panel_data(items: list) -> None:
# ==========================
# 定时任务(RSS 更频繁,优先保证事件脉络实时
# 定时任务(asyncio 后台任务,避免 APScheduler executor 关闭竞态
# ==========================
scheduler = BackgroundScheduler()
scheduler.add_job(fetch_news, "interval", seconds=RSS_INTERVAL_SEC, max_instances=2, coalesce=True)
scheduler.add_job(fetch_gdelt_events, "interval", seconds=FETCH_INTERVAL_SEC, max_instances=2, coalesce=True)
scheduler.start()
_bg_task: Optional[asyncio.Task] = None
async def _periodic_fetch() -> None:
loop = asyncio.get_event_loop()
while True:
try:
await loop.run_in_executor(None, fetch_news)
await loop.run_in_executor(None, fetch_gdelt_events)
except asyncio.CancelledError:
break
except Exception as e:
print(f" [warn] 定时抓取: {e}")
await asyncio.sleep(min(RSS_INTERVAL_SEC, FETCH_INTERVAL_SEC))
# ==========================
@@ -356,18 +367,23 @@ def _get_conflict_stats() -> dict:
@app.on_event("startup")
def startup():
# 新闻优先启动,确保事件脉络有数据
fetch_news()
fetch_gdelt_events()
async def startup():
global _bg_task
loop = asyncio.get_event_loop()
await loop.run_in_executor(None, fetch_news)
await loop.run_in_executor(None, fetch_gdelt_events)
_bg_task = asyncio.create_task(_periodic_fetch())
@app.on_event("shutdown")
def shutdown():
try:
scheduler.shutdown(wait=False)
except Exception:
pass
async def shutdown():
global _bg_task
if _bg_task and not _bg_task.done():
_bg_task.cancel()
try:
await _bg_task
except asyncio.CancelledError:
pass
if __name__ == "__main__":