# -*- coding: utf-8 -*- """爬虫入口:定时抓取 → 解析 → 入库 → 通知 API""" import time import sys from pathlib import Path # 确保能导入 config sys.path.insert(0, str(Path(__file__).resolve().parent)) from config import DB_PATH, API_BASE, CRAWL_INTERVAL from scrapers.rss_scraper import fetch_all from db_writer import write_updates def notify_api() -> bool: """调用 Node API 触发立即广播""" try: import urllib.request req = urllib.request.Request( f"{API_BASE}/api/crawler/notify", method="POST", headers={"Content-Type": "application/json"}, ) with urllib.request.urlopen(req, timeout=5) as resp: return resp.status == 200 except Exception as e: print(f" [warn] notify API failed: {e}") return False def run_once() -> int: items = fetch_all() if not items: return 0 n = write_updates(items) if n > 0: notify_api() return n def main() -> None: print("Crawler started. DB:", DB_PATH) print("API:", API_BASE, "| Interval:", CRAWL_INTERVAL, "s") while True: try: n = run_once() if n > 0: print(f"[{time.strftime('%H:%M:%S')}] Inserted {n} new update(s)") except KeyboardInterrupt: break except Exception as e: print(f"[{time.strftime('%H:%M:%S')}] Error: {e}") time.sleep(CRAWL_INTERVAL) if __name__ == "__main__": main()