from __future__ import annotations import json import httpx from app.core.config import Settings from app.domain.schemas import PromptOutput from app.services.ai_guard import AIGuard class LLMClient: def __init__(self, settings: Settings): self.settings = settings self.guard = AIGuard(settings) def extract_json(self, system_prompt: str, user_text: str) -> PromptOutput | None: if not self.settings.llm_enabled or not self.settings.llm_base_url or not self.settings.llm_api_key: self.guard.record_fallback() return None payload = { "model": self.settings.llm_model, "messages": [ {"role": "system", "content": system_prompt}, {"role": "user", "content": user_text}, ], "temperature": 0.1, "response_format": {"type": "json_object"}, } endpoints = [self.settings.llm_base_url, *self.settings.llm_fallback_base_urls] raw_text = self._request_with_failover( endpoints=endpoints, path="/chat/completions", payload=payload, api_key=self.settings.llm_api_key, ) if raw_text is None: self.guard.record_fallback() return None return PromptOutput(content=json.loads(raw_text), raw_text=raw_text) def embedding(self, text: str) -> list[float] | None: if not self.settings.embedding_enabled: return None base_url = self.settings.embedding_base_url or self.settings.llm_base_url api_key = self.settings.embedding_api_key or self.settings.llm_api_key if not base_url or not api_key: self.guard.record_fallback() return None payload = { "model": self.settings.embedding_model, "input": text, } endpoints = [base_url, *self.settings.embedding_fallback_base_urls] data = self._request_with_failover( endpoints=endpoints, path="/embeddings", payload=payload, api_key=api_key, return_full_response=True, ) if data is None: self.guard.record_fallback() return None embedding = data["data"][0]["embedding"] if not isinstance(embedding, list): return None return [float(item) for item in embedding] def metrics(self) -> dict: return self.guard.snapshot() def _request_with_failover( self, endpoints: list[str], path: str, payload: dict, api_key: str, return_full_response: bool = False, ): if not endpoints: return None for index, endpoint in enumerate([item for item in endpoints if item]): allowed, _ = self.guard.allow_request(endpoint) if not allowed: continue if index > 0: self.guard.record_failover() try: headers = {"Authorization": f"Bearer {api_key}"} with httpx.Client(timeout=self.settings.ai_request_timeout_seconds) as client: response = client.post(f"{endpoint.rstrip('/')}{path}", json=payload, headers=headers) response.raise_for_status() data = response.json() self.guard.record_success(endpoint) if return_full_response: return data return data["choices"][0]["message"]["content"] except Exception: self.guard.record_failure(endpoint) continue return None