fix: 优化后端数据更新机制
This commit is contained in:
@@ -283,93 +283,34 @@ def _rss_to_gdelt_fallback() -> None:
|
||||
|
||||
|
||||
# ==========================
|
||||
# RSS 新闻抓取:资讯落库(去重) → AI 提取 → 面板数据落库 → 通知前端
|
||||
# RSS 新闻抓取:使用统一流水线(抓取 → 清洗 → 去重 → 映射 → 写表 → 通知)
|
||||
# ==========================
|
||||
LAST_FETCH = {"items": 0, "inserted": 0, "error": None}
|
||||
|
||||
|
||||
def fetch_news() -> None:
|
||||
"""执行完整写库流水线;GDELT 禁用时用 RSS 回填 gdelt_events,再通知 Node。"""
|
||||
try:
|
||||
from scrapers.rss_scraper import fetch_all
|
||||
from db_writer import write_updates
|
||||
from news_storage import save_and_dedup
|
||||
from translate_utils import translate_to_chinese
|
||||
from cleaner_ai import clean_news_for_panel
|
||||
from cleaner_ai import ensure_category, ensure_severity
|
||||
from pipeline import run_full_pipeline
|
||||
LAST_FETCH["error"] = None
|
||||
items = fetch_all()
|
||||
for it in items:
|
||||
raw_title = translate_to_chinese(it.get("title", "") or "")
|
||||
raw_summary = translate_to_chinese(it.get("summary", "") or it.get("title", ""))
|
||||
it["title"] = clean_news_for_panel(raw_title, max_len=80)
|
||||
it["summary"] = clean_news_for_panel(raw_summary or raw_title, max_len=120)
|
||||
it["category"] = ensure_category(it.get("category", "other"))
|
||||
it["severity"] = ensure_severity(it.get("severity", "medium"))
|
||||
it["source"] = it.get("source") or "rss"
|
||||
# 1. 历史去重:资讯内容落库 news_content(独立表,便于后续消费)
|
||||
new_items, n_news = save_and_dedup(items, db_path=DB_PATH)
|
||||
# 2. 面板展示:新增资讯写入 situation_update(供前端 recentUpdates)
|
||||
n_panel = write_updates(new_items) if new_items else 0
|
||||
LAST_FETCH["items"] = len(items)
|
||||
n_fetched, n_news, n_panel = run_full_pipeline(
|
||||
db_path=DB_PATH,
|
||||
api_base=API_BASE,
|
||||
translate=True,
|
||||
notify=False,
|
||||
)
|
||||
LAST_FETCH["items"] = n_fetched
|
||||
LAST_FETCH["inserted"] = n_news
|
||||
# 3. AI 提取 + 合并到 combat_losses / key_location 等
|
||||
if new_items:
|
||||
_extract_and_merge_panel_data(new_items)
|
||||
# GDELT 禁用时用 RSS 填充 gdelt_events,使地图有冲突点
|
||||
if GDELT_DISABLED:
|
||||
_rss_to_gdelt_fallback()
|
||||
_notify_node()
|
||||
print(f"[{datetime.now().strftime('%H:%M:%S')}] RSS 抓取 {len(items)} 条,去重后新增 {n_news} 条资讯,面板 {n_panel} 条")
|
||||
if n_fetched > 0:
|
||||
print(f"[{datetime.now().strftime('%H:%M:%S')}] RSS 抓取 {n_fetched} 条,去重后新增 {n_news} 条资讯,面板 {n_panel} 条")
|
||||
except Exception as e:
|
||||
LAST_FETCH["error"] = str(e)
|
||||
print(f"[{datetime.now().strftime('%H:%M:%S')}] 新闻抓取失败: {e}")
|
||||
|
||||
|
||||
def _extract_and_merge_panel_data(items: list) -> None:
|
||||
"""AI 分析提取面板相关数据,清洗后落库"""
|
||||
if not items or not os.path.exists(DB_PATH):
|
||||
return
|
||||
try:
|
||||
from db_merge import merge
|
||||
use_dashscope = bool(os.environ.get("DASHSCOPE_API_KEY", "").strip())
|
||||
if use_dashscope:
|
||||
from extractor_dashscope import extract_from_news
|
||||
limit = 10
|
||||
elif os.environ.get("CLEANER_AI_DISABLED", "0") == "1":
|
||||
from extractor_rules import extract_from_news
|
||||
limit = 25
|
||||
else:
|
||||
from extractor_ai import extract_from_news
|
||||
limit = 10
|
||||
from datetime import timezone
|
||||
merged_any = False
|
||||
for it in items[:limit]:
|
||||
text = (it.get("title", "") or "") + " " + (it.get("summary", "") or "")
|
||||
if len(text.strip()) < 20:
|
||||
continue
|
||||
pub = it.get("published")
|
||||
ts = None
|
||||
if pub:
|
||||
try:
|
||||
if isinstance(pub, str):
|
||||
pub_dt = datetime.fromisoformat(pub.replace("Z", "+00:00"))
|
||||
else:
|
||||
pub_dt = pub
|
||||
if pub_dt.tzinfo:
|
||||
pub_dt = pub_dt.astimezone(timezone.utc)
|
||||
ts = pub_dt.strftime("%Y-%m-%dT%H:%M:%S.000Z")
|
||||
except Exception:
|
||||
pass
|
||||
extracted = extract_from_news(text, timestamp=ts)
|
||||
if extracted:
|
||||
if merge(extracted, db_path=DB_PATH):
|
||||
merged_any = True
|
||||
if merged_any:
|
||||
_notify_node()
|
||||
except Exception as e:
|
||||
print(f" [warn] AI 面板数据提取/合并: {e}")
|
||||
|
||||
|
||||
# ==========================
|
||||
# 定时任务(asyncio 后台任务,避免 APScheduler executor 关闭竞态)
|
||||
# ==========================
|
||||
|
||||
Reference in New Issue
Block a user