Files
Airtep/gig-poc/apps/api/app/services/traffic_guard.py
2026-04-01 14:19:25 +08:00

109 lines
4.1 KiB
Python

from __future__ import annotations
import time
from collections import deque
from threading import Lock
import httpx
from app.core.config import Settings
from app.core.logging import logger
class TrafficGuard:
def __init__(self, settings: Settings):
self.settings = settings
self._lock = Lock()
self._minute = 0
self._minute_count = 0
self._open_until = 0.0
self._events: deque[tuple[float, int]] = deque()
self._requests = 0
self._rate_limited = 0
self._circuit_blocked = 0
self._avg_latency_ms = 0.0
self._alert_last_sent = 0.0
def allow(self, path: str) -> tuple[bool, str]:
now = time.time()
with self._lock:
minute = int(now // 60)
if self._minute != minute:
self._minute = minute
self._minute_count = 0
if self._minute_count >= self.settings.app_rate_limit_per_minute:
self._rate_limited += 1
return False, "rate_limited"
if self._open_until > now and not self._is_exempt(path):
self._circuit_blocked += 1
return False, "circuit_open"
self._minute_count += 1
self._requests += 1
return True, "ok"
def record(self, status_code: int, latency_ms: float) -> None:
now = time.time()
with self._lock:
self._events.append((now, status_code))
self._avg_latency_ms = self._ema(self._avg_latency_ms, latency_ms)
self._trim(now)
total = len(self._events)
if total < self.settings.app_circuit_breaker_min_requests:
return
errors = sum(1 for _, code in self._events if code >= 500)
error_rate = errors / total
if error_rate >= self.settings.app_circuit_breaker_error_rate:
self._open_until = now + self.settings.app_circuit_breaker_cooldown_seconds
self._send_alert(
"app circuit opened",
{
"error_rate": round(error_rate, 4),
"window_requests": total,
"cooldown_seconds": self.settings.app_circuit_breaker_cooldown_seconds,
},
)
def snapshot(self) -> dict[str, float | int]:
now = time.time()
with self._lock:
self._trim(now)
total = len(self._events)
errors = sum(1 for _, code in self._events if code >= 500)
return {
"requests_total": self._requests,
"rate_limited_total": self._rate_limited,
"circuit_blocked_total": self._circuit_blocked,
"window_requests": total,
"window_errors": errors,
"window_error_rate": round((errors / total), 4) if total else 0.0,
"avg_latency_ms": round(self._avg_latency_ms, 2),
"circuit_open": 1 if self._open_until > now else 0,
}
def _trim(self, now: float) -> None:
lower = now - self.settings.app_circuit_breaker_window_seconds
while self._events and self._events[0][0] < lower:
self._events.popleft()
def _ema(self, prev: float, value: float, alpha: float = 0.2) -> float:
if prev <= 0:
return value
return alpha * value + (1 - alpha) * prev
def _is_exempt(self, path: str) -> bool:
return path in {"/health", "/docs", "/openapi.json", "/poc/ops/system/metrics", "/poc/ops/ai/metrics"}
def _send_alert(self, message: str, extra: dict) -> None:
now = time.time()
if now - self._alert_last_sent < 30:
return
self._alert_last_sent = now
logger.warning("%s extra=%s", message, extra)
if not self.settings.alert_webhook_url:
return
try:
with httpx.Client(timeout=2.0) as client:
client.post(self.settings.alert_webhook_url, json={"message": message, "extra": extra})
except Exception:
logger.exception("alert webhook send failed")