Files
usa/crawler/main.py
2026-03-02 01:00:04 +08:00

58 lines
1.5 KiB
Python

# -*- coding: utf-8 -*-
"""爬虫入口:定时抓取 → 解析 → 入库 → 通知 API"""
import time
import sys
from pathlib import Path
# 确保能导入 config
sys.path.insert(0, str(Path(__file__).resolve().parent))
from config import DB_PATH, API_BASE, CRAWL_INTERVAL
from scrapers.rss_scraper import fetch_all
from db_writer import write_updates
def notify_api() -> bool:
"""调用 Node API 触发立即广播"""
try:
import urllib.request
req = urllib.request.Request(
f"{API_BASE}/api/crawler/notify",
method="POST",
headers={"Content-Type": "application/json"},
)
with urllib.request.urlopen(req, timeout=5) as resp:
return resp.status == 200
except Exception as e:
print(f" [warn] notify API failed: {e}")
return False
def run_once() -> int:
items = fetch_all()
if not items:
return 0
n = write_updates(items)
if n > 0:
notify_api()
return n
def main() -> None:
print("Crawler started. DB:", DB_PATH)
print("API:", API_BASE, "| Interval:", CRAWL_INTERVAL, "s")
while True:
try:
n = run_once()
if n > 0:
print(f"[{time.strftime('%H:%M:%S')}] Inserted {n} new update(s)")
except KeyboardInterrupt:
break
except Exception as e:
print(f"[{time.strftime('%H:%M:%S')}] Error: {e}")
time.sleep(CRAWL_INTERVAL)
if __name__ == "__main__":
main()