fix: 优化docker 镜像
This commit is contained in:
@@ -14,13 +14,13 @@ from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import List, Optional
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
import requests
|
||||
from fastapi import FastAPI
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from apscheduler.schedulers.background import BackgroundScheduler
|
||||
|
||||
logging.getLogger("apscheduler.scheduler").setLevel(logging.ERROR)
|
||||
logging.getLogger("uvicorn").setLevel(logging.INFO)
|
||||
app = FastAPI(title="GDELT Conflict Service")
|
||||
app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_methods=["*"])
|
||||
|
||||
@@ -263,8 +263,9 @@ def _extract_and_merge_panel_data(items: list) -> None:
|
||||
from extractor_ai import extract_from_news
|
||||
from datetime import timezone
|
||||
merged_any = False
|
||||
# 只对前几条有足够文本的新闻做提取,避免 Ollama 调用过多
|
||||
for it in items[:5]:
|
||||
# 规则模式可多处理几条(无 Ollama);AI 模式限制 5 条避免调用过多
|
||||
limit = 10 if os.environ.get("CLEANER_AI_DISABLED", "0") == "1" else 5
|
||||
for it in items[:limit]:
|
||||
text = (it.get("title", "") or "") + " " + (it.get("summary", "") or "")
|
||||
if len(text.strip()) < 20:
|
||||
continue
|
||||
@@ -292,12 +293,22 @@ def _extract_and_merge_panel_data(items: list) -> None:
|
||||
|
||||
|
||||
# ==========================
|
||||
# 定时任务(RSS 更频繁,优先保证事件脉络实时)
|
||||
# 定时任务(asyncio 后台任务,避免 APScheduler executor 关闭竞态)
|
||||
# ==========================
|
||||
scheduler = BackgroundScheduler()
|
||||
scheduler.add_job(fetch_news, "interval", seconds=RSS_INTERVAL_SEC, max_instances=2, coalesce=True)
|
||||
scheduler.add_job(fetch_gdelt_events, "interval", seconds=FETCH_INTERVAL_SEC, max_instances=2, coalesce=True)
|
||||
scheduler.start()
|
||||
_bg_task: Optional[asyncio.Task] = None
|
||||
|
||||
|
||||
async def _periodic_fetch() -> None:
|
||||
loop = asyncio.get_event_loop()
|
||||
while True:
|
||||
try:
|
||||
await loop.run_in_executor(None, fetch_news)
|
||||
await loop.run_in_executor(None, fetch_gdelt_events)
|
||||
except asyncio.CancelledError:
|
||||
break
|
||||
except Exception as e:
|
||||
print(f" [warn] 定时抓取: {e}")
|
||||
await asyncio.sleep(min(RSS_INTERVAL_SEC, FETCH_INTERVAL_SEC))
|
||||
|
||||
|
||||
# ==========================
|
||||
@@ -356,18 +367,23 @@ def _get_conflict_stats() -> dict:
|
||||
|
||||
|
||||
@app.on_event("startup")
|
||||
def startup():
|
||||
# 新闻优先启动,确保事件脉络有数据
|
||||
fetch_news()
|
||||
fetch_gdelt_events()
|
||||
async def startup():
|
||||
global _bg_task
|
||||
loop = asyncio.get_event_loop()
|
||||
await loop.run_in_executor(None, fetch_news)
|
||||
await loop.run_in_executor(None, fetch_gdelt_events)
|
||||
_bg_task = asyncio.create_task(_periodic_fetch())
|
||||
|
||||
|
||||
@app.on_event("shutdown")
|
||||
def shutdown():
|
||||
try:
|
||||
scheduler.shutdown(wait=False)
|
||||
except Exception:
|
||||
pass
|
||||
async def shutdown():
|
||||
global _bg_task
|
||||
if _bg_task and not _bg_task.done():
|
||||
_bg_task.cancel()
|
||||
try:
|
||||
await _bg_task
|
||||
except asyncio.CancelledError:
|
||||
pass
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
Reference in New Issue
Block a user