from __future__ import annotations import time from collections import deque from threading import Lock import httpx from app.core.config import Settings from app.core.logging import logger class TrafficGuard: def __init__(self, settings: Settings): self.settings = settings self._lock = Lock() self._minute = 0 self._minute_count = 0 self._open_until = 0.0 self._events: deque[tuple[float, int]] = deque() self._requests = 0 self._rate_limited = 0 self._circuit_blocked = 0 self._avg_latency_ms = 0.0 self._alert_last_sent = 0.0 def allow(self, path: str) -> tuple[bool, str]: now = time.time() with self._lock: minute = int(now // 60) if self._minute != minute: self._minute = minute self._minute_count = 0 if self._minute_count >= self.settings.app_rate_limit_per_minute: self._rate_limited += 1 return False, "rate_limited" if self._open_until > now and not self._is_exempt(path): self._circuit_blocked += 1 return False, "circuit_open" self._minute_count += 1 self._requests += 1 return True, "ok" def record(self, status_code: int, latency_ms: float) -> None: now = time.time() with self._lock: self._events.append((now, status_code)) self._avg_latency_ms = self._ema(self._avg_latency_ms, latency_ms) self._trim(now) total = len(self._events) if total < self.settings.app_circuit_breaker_min_requests: return errors = sum(1 for _, code in self._events if code >= 500) error_rate = errors / total if error_rate >= self.settings.app_circuit_breaker_error_rate: self._open_until = now + self.settings.app_circuit_breaker_cooldown_seconds self._send_alert( "app circuit opened", { "error_rate": round(error_rate, 4), "window_requests": total, "cooldown_seconds": self.settings.app_circuit_breaker_cooldown_seconds, }, ) def snapshot(self) -> dict[str, float | int]: now = time.time() with self._lock: self._trim(now) total = len(self._events) errors = sum(1 for _, code in self._events if code >= 500) return { "requests_total": self._requests, "rate_limited_total": self._rate_limited, "circuit_blocked_total": self._circuit_blocked, "window_requests": total, "window_errors": errors, "window_error_rate": round((errors / total), 4) if total else 0.0, "avg_latency_ms": round(self._avg_latency_ms, 2), "circuit_open": 1 if self._open_until > now else 0, } def _trim(self, now: float) -> None: lower = now - self.settings.app_circuit_breaker_window_seconds while self._events and self._events[0][0] < lower: self._events.popleft() def _ema(self, prev: float, value: float, alpha: float = 0.2) -> float: if prev <= 0: return value return alpha * value + (1 - alpha) * prev def _is_exempt(self, path: str) -> bool: return path in {"/health", "/docs", "/openapi.json", "/poc/ops/system/metrics", "/poc/ops/ai/metrics"} def _send_alert(self, message: str, extra: dict) -> None: now = time.time() if now - self._alert_last_sent < 30: return self._alert_last_sent = now logger.warning("%s extra=%s", message, extra) if not self.settings.alert_webhook_url: return try: with httpx.Client(timeout=2.0) as client: client.post(self.settings.alert_webhook_url, json={"message": message, "extra": extra}) except Exception: logger.exception("alert webhook send failed")