feat: 初始化零工后端代码
This commit is contained in:
@@ -6,14 +6,17 @@ import httpx
|
||||
|
||||
from app.core.config import Settings
|
||||
from app.domain.schemas import PromptOutput
|
||||
from app.services.ai_guard import AIGuard
|
||||
|
||||
|
||||
class LLMClient:
|
||||
def __init__(self, settings: Settings):
|
||||
self.settings = settings
|
||||
self.guard = AIGuard(settings)
|
||||
|
||||
def extract_json(self, system_prompt: str, user_text: str) -> PromptOutput | None:
|
||||
if not self.settings.llm_enabled or not self.settings.llm_base_url or not self.settings.llm_api_key:
|
||||
self.guard.record_fallback()
|
||||
return None
|
||||
|
||||
payload = {
|
||||
@@ -25,10 +28,77 @@ class LLMClient:
|
||||
"temperature": 0.1,
|
||||
"response_format": {"type": "json_object"},
|
||||
}
|
||||
headers = {"Authorization": f"Bearer {self.settings.llm_api_key}"}
|
||||
with httpx.Client(timeout=30.0) as client:
|
||||
response = client.post(f"{self.settings.llm_base_url.rstrip('/')}/chat/completions", json=payload, headers=headers)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
raw_text = data["choices"][0]["message"]["content"]
|
||||
endpoints = [self.settings.llm_base_url, *self.settings.llm_fallback_base_urls]
|
||||
raw_text = self._request_with_failover(
|
||||
endpoints=endpoints,
|
||||
path="/chat/completions",
|
||||
payload=payload,
|
||||
api_key=self.settings.llm_api_key,
|
||||
)
|
||||
if raw_text is None:
|
||||
self.guard.record_fallback()
|
||||
return None
|
||||
return PromptOutput(content=json.loads(raw_text), raw_text=raw_text)
|
||||
|
||||
def embedding(self, text: str) -> list[float] | None:
|
||||
if not self.settings.embedding_enabled:
|
||||
return None
|
||||
base_url = self.settings.embedding_base_url or self.settings.llm_base_url
|
||||
api_key = self.settings.embedding_api_key or self.settings.llm_api_key
|
||||
if not base_url or not api_key:
|
||||
self.guard.record_fallback()
|
||||
return None
|
||||
|
||||
payload = {
|
||||
"model": self.settings.embedding_model,
|
||||
"input": text,
|
||||
}
|
||||
endpoints = [base_url, *self.settings.embedding_fallback_base_urls]
|
||||
data = self._request_with_failover(
|
||||
endpoints=endpoints,
|
||||
path="/embeddings",
|
||||
payload=payload,
|
||||
api_key=api_key,
|
||||
return_full_response=True,
|
||||
)
|
||||
if data is None:
|
||||
self.guard.record_fallback()
|
||||
return None
|
||||
embedding = data["data"][0]["embedding"]
|
||||
if not isinstance(embedding, list):
|
||||
return None
|
||||
return [float(item) for item in embedding]
|
||||
|
||||
def metrics(self) -> dict:
|
||||
return self.guard.snapshot()
|
||||
|
||||
def _request_with_failover(
|
||||
self,
|
||||
endpoints: list[str],
|
||||
path: str,
|
||||
payload: dict,
|
||||
api_key: str,
|
||||
return_full_response: bool = False,
|
||||
):
|
||||
if not endpoints:
|
||||
return None
|
||||
for index, endpoint in enumerate([item for item in endpoints if item]):
|
||||
allowed, _ = self.guard.allow_request(endpoint)
|
||||
if not allowed:
|
||||
continue
|
||||
if index > 0:
|
||||
self.guard.record_failover()
|
||||
try:
|
||||
headers = {"Authorization": f"Bearer {api_key}"}
|
||||
with httpx.Client(timeout=self.settings.ai_request_timeout_seconds) as client:
|
||||
response = client.post(f"{endpoint.rstrip('/')}{path}", json=payload, headers=headers)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
self.guard.record_success(endpoint)
|
||||
if return_full_response:
|
||||
return data
|
||||
return data["choices"][0]["message"]["content"]
|
||||
except Exception:
|
||||
self.guard.record_failure(endpoint)
|
||||
continue
|
||||
return None
|
||||
|
||||
Reference in New Issue
Block a user