fix:优化数据来源
This commit is contained in:
57
crawler/main.py
Normal file
57
crawler/main.py
Normal file
@@ -0,0 +1,57 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""爬虫入口:定时抓取 → 解析 → 入库 → 通知 API"""
|
||||
import time
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
# 确保能导入 config
|
||||
sys.path.insert(0, str(Path(__file__).resolve().parent))
|
||||
|
||||
from config import DB_PATH, API_BASE, CRAWL_INTERVAL
|
||||
from scrapers.rss_scraper import fetch_all
|
||||
from db_writer import write_updates
|
||||
|
||||
|
||||
def notify_api() -> bool:
|
||||
"""调用 Node API 触发立即广播"""
|
||||
try:
|
||||
import urllib.request
|
||||
req = urllib.request.Request(
|
||||
f"{API_BASE}/api/crawler/notify",
|
||||
method="POST",
|
||||
headers={"Content-Type": "application/json"},
|
||||
)
|
||||
with urllib.request.urlopen(req, timeout=5) as resp:
|
||||
return resp.status == 200
|
||||
except Exception as e:
|
||||
print(f" [warn] notify API failed: {e}")
|
||||
return False
|
||||
|
||||
|
||||
def run_once() -> int:
|
||||
items = fetch_all()
|
||||
if not items:
|
||||
return 0
|
||||
n = write_updates(items)
|
||||
if n > 0:
|
||||
notify_api()
|
||||
return n
|
||||
|
||||
|
||||
def main() -> None:
|
||||
print("Crawler started. DB:", DB_PATH)
|
||||
print("API:", API_BASE, "| Interval:", CRAWL_INTERVAL, "s")
|
||||
while True:
|
||||
try:
|
||||
n = run_once()
|
||||
if n > 0:
|
||||
print(f"[{time.strftime('%H:%M:%S')}] Inserted {n} new update(s)")
|
||||
except KeyboardInterrupt:
|
||||
break
|
||||
except Exception as e:
|
||||
print(f"[{time.strftime('%H:%M:%S')}] Error: {e}")
|
||||
time.sleep(CRAWL_INTERVAL)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user