58 lines
1.5 KiB
Python
58 lines
1.5 KiB
Python
# -*- coding: utf-8 -*-
|
|
"""爬虫入口:定时抓取 → 解析 → 入库 → 通知 API"""
|
|
import time
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
# 确保能导入 config
|
|
sys.path.insert(0, str(Path(__file__).resolve().parent))
|
|
|
|
from config import DB_PATH, API_BASE, CRAWL_INTERVAL
|
|
from scrapers.rss_scraper import fetch_all
|
|
from db_writer import write_updates
|
|
|
|
|
|
def notify_api() -> bool:
|
|
"""调用 Node API 触发立即广播"""
|
|
try:
|
|
import urllib.request
|
|
req = urllib.request.Request(
|
|
f"{API_BASE}/api/crawler/notify",
|
|
method="POST",
|
|
headers={"Content-Type": "application/json"},
|
|
)
|
|
with urllib.request.urlopen(req, timeout=5) as resp:
|
|
return resp.status == 200
|
|
except Exception as e:
|
|
print(f" [warn] notify API failed: {e}")
|
|
return False
|
|
|
|
|
|
def run_once() -> int:
|
|
items = fetch_all()
|
|
if not items:
|
|
return 0
|
|
n = write_updates(items)
|
|
if n > 0:
|
|
notify_api()
|
|
return n
|
|
|
|
|
|
def main() -> None:
|
|
print("Crawler started. DB:", DB_PATH)
|
|
print("API:", API_BASE, "| Interval:", CRAWL_INTERVAL, "s")
|
|
while True:
|
|
try:
|
|
n = run_once()
|
|
if n > 0:
|
|
print(f"[{time.strftime('%H:%M:%S')}] Inserted {n} new update(s)")
|
|
except KeyboardInterrupt:
|
|
break
|
|
except Exception as e:
|
|
print(f"[{time.strftime('%H:%M:%S')}] Error: {e}")
|
|
time.sleep(CRAWL_INTERVAL)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|