fix: 优化后端数据更新机制
This commit is contained in:
@@ -1,5 +1,5 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""爬虫入口:定时抓取 → 解析 → 入库 → 通知 API"""
|
||||
"""爬虫入口:定时执行完整写库流水线(抓取 → 清洗 → 去重 → 映射 → 更新表 → 通知 API)"""
|
||||
import time
|
||||
import sys
|
||||
from pathlib import Path
|
||||
@@ -8,34 +8,18 @@ from pathlib import Path
|
||||
sys.path.insert(0, str(Path(__file__).resolve().parent))
|
||||
|
||||
from config import DB_PATH, API_BASE, CRAWL_INTERVAL
|
||||
from scrapers.rss_scraper import fetch_all
|
||||
from db_writer import write_updates
|
||||
|
||||
|
||||
def notify_api() -> bool:
|
||||
"""调用 Node API 触发立即广播"""
|
||||
try:
|
||||
import urllib.request
|
||||
req = urllib.request.Request(
|
||||
f"{API_BASE}/api/crawler/notify",
|
||||
method="POST",
|
||||
headers={"Content-Type": "application/json"},
|
||||
)
|
||||
with urllib.request.urlopen(req, timeout=5) as resp:
|
||||
return resp.status == 200
|
||||
except Exception as e:
|
||||
print(f" [warn] notify API failed: {e}")
|
||||
return False
|
||||
from pipeline import run_full_pipeline
|
||||
|
||||
|
||||
def run_once() -> int:
|
||||
items = fetch_all()
|
||||
if not items:
|
||||
return 0
|
||||
n = write_updates(items)
|
||||
if n > 0:
|
||||
notify_api()
|
||||
return n
|
||||
"""执行一轮:抓取、清洗、去重、映射、写表、通知。返回本轮新增条数(面板或资讯)。"""
|
||||
n_fetched, n_news, n_panel = run_full_pipeline(
|
||||
db_path=DB_PATH,
|
||||
api_base=API_BASE,
|
||||
translate=True,
|
||||
notify=True,
|
||||
)
|
||||
return n_panel or n_news
|
||||
|
||||
|
||||
def main() -> None:
|
||||
@@ -45,7 +29,7 @@ def main() -> None:
|
||||
try:
|
||||
n = run_once()
|
||||
if n > 0:
|
||||
print(f"[{time.strftime('%H:%M:%S')}] Inserted {n} new update(s)")
|
||||
print(f"[{time.strftime('%H:%M:%S')}] 抓取完成,去重后新增 {n} 条,已写库并通知 API")
|
||||
except KeyboardInterrupt:
|
||||
break
|
||||
except Exception as e:
|
||||
|
||||
Reference in New Issue
Block a user