usa/crawler/main.py

# -*- coding: utf-8 -*-
"""爬虫入口：定时抓取 → 解析 → 入库 → 通知 API"""
import time
import sys
from pathlib import Path

# 确保能导入 config
sys.path.insert(0, str(Path(__file__).resolve().parent))

from config import DB_PATH, API_BASE, CRAWL_INTERVAL
from scrapers.rss_scraper import fetch_all
from db_writer import write_updates


def notify_api() -> bool:
    """调用 Node API 触发立即广播"""
    try:
        import urllib.request
        req = urllib.request.Request(
            f"{API_BASE}/api/crawler/notify",
            method="POST",
            headers={"Content-Type": "application/json"},
        )
        with urllib.request.urlopen(req, timeout=5) as resp:
            return resp.status == 200
    except Exception as e:
        print(f"  [warn] notify API failed: {e}")
        return False


def run_once() -> int:
    items = fetch_all()
    if not items:
        return 0
    n = write_updates(items)
    if n > 0:
        notify_api()
    return n


def main() -> None:
    print("Crawler started. DB:", DB_PATH)
    print("API:", API_BASE, "| Interval:", CRAWL_INTERVAL, "s")
    while True:
        try:
            n = run_once()
            if n > 0:
                print(f"[{time.strftime('%H:%M:%S')}] Inserted {n} new update(s)")
        except KeyboardInterrupt:
            break
        except Exception as e:
            print(f"[{time.strftime('%H:%M:%S')}] Error: {e}")
        time.sleep(CRAWL_INTERVAL)


if __name__ == "__main__":
    main()