feat: 初始化零工后端代码
This commit is contained in:
87
gig-poc/apps/api/app/services/ai_guard.py
Normal file
87
gig-poc/apps/api/app/services/ai_guard.py
Normal file
@@ -0,0 +1,87 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import time
|
||||
from dataclasses import dataclass
|
||||
from threading import Lock
|
||||
|
||||
from app.core.config import Settings
|
||||
|
||||
|
||||
@dataclass
|
||||
class EndpointState:
|
||||
current_minute: int = 0
|
||||
minute_count: int = 0
|
||||
consecutive_failures: int = 0
|
||||
circuit_open_until: float = 0.0
|
||||
|
||||
|
||||
class AIGuard:
|
||||
_lock = Lock()
|
||||
_endpoint_states: dict[str, EndpointState] = {}
|
||||
_metrics = {
|
||||
"requests_total": 0,
|
||||
"success_total": 0,
|
||||
"fail_total": 0,
|
||||
"fallback_total": 0,
|
||||
"rate_limited_total": 0,
|
||||
"circuit_open_total": 0,
|
||||
"endpoint_failover_total": 0,
|
||||
}
|
||||
|
||||
def __init__(self, settings: Settings):
|
||||
self.settings = settings
|
||||
|
||||
def allow_request(self, endpoint: str) -> tuple[bool, str]:
|
||||
now = time.time()
|
||||
now_minute = int(now // 60)
|
||||
with self._lock:
|
||||
state = self._endpoint_states.setdefault(endpoint, EndpointState())
|
||||
if state.circuit_open_until > now:
|
||||
self._metrics["circuit_open_total"] += 1
|
||||
return False, "circuit_open"
|
||||
if state.current_minute != now_minute:
|
||||
state.current_minute = now_minute
|
||||
state.minute_count = 0
|
||||
if state.minute_count >= self.settings.ai_rate_limit_per_minute:
|
||||
self._metrics["rate_limited_total"] += 1
|
||||
return False, "rate_limited"
|
||||
state.minute_count += 1
|
||||
self._metrics["requests_total"] += 1
|
||||
return True, "ok"
|
||||
|
||||
def record_success(self, endpoint: str) -> None:
|
||||
with self._lock:
|
||||
state = self._endpoint_states.setdefault(endpoint, EndpointState())
|
||||
state.consecutive_failures = 0
|
||||
state.circuit_open_until = 0.0
|
||||
self._metrics["success_total"] += 1
|
||||
|
||||
def record_failure(self, endpoint: str) -> None:
|
||||
with self._lock:
|
||||
state = self._endpoint_states.setdefault(endpoint, EndpointState())
|
||||
state.consecutive_failures += 1
|
||||
self._metrics["fail_total"] += 1
|
||||
if state.consecutive_failures >= self.settings.ai_circuit_breaker_fail_threshold:
|
||||
state.circuit_open_until = time.time() + self.settings.ai_circuit_breaker_cooldown_seconds
|
||||
state.consecutive_failures = 0
|
||||
|
||||
def record_fallback(self) -> None:
|
||||
with self._lock:
|
||||
self._metrics["fallback_total"] += 1
|
||||
|
||||
def record_failover(self) -> None:
|
||||
with self._lock:
|
||||
self._metrics["endpoint_failover_total"] += 1
|
||||
|
||||
def snapshot(self) -> dict:
|
||||
with self._lock:
|
||||
requests_total = self._metrics["requests_total"]
|
||||
fallback_total = self._metrics["fallback_total"]
|
||||
success_total = self._metrics["success_total"]
|
||||
fail_total = self._metrics["fail_total"]
|
||||
return {
|
||||
**self._metrics,
|
||||
"fallback_hit_rate": round(fallback_total / requests_total, 4) if requests_total else 0.0,
|
||||
"success_rate": round(success_total / requests_total, 4) if requests_total else 0.0,
|
||||
"failure_rate": round(fail_total / requests_total, 4) if requests_total else 0.0,
|
||||
}
|
||||
146
gig-poc/apps/api/app/services/cache_service.py
Normal file
146
gig-poc/apps/api/app/services/cache_service.py
Normal file
@@ -0,0 +1,146 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import time
|
||||
from functools import lru_cache
|
||||
from threading import Lock
|
||||
from typing import Any, Protocol
|
||||
|
||||
from app.core.config import get_settings
|
||||
from app.core.logging import logger
|
||||
|
||||
try:
|
||||
from redis import Redis
|
||||
except Exception: # pragma: no cover
|
||||
Redis = None # type: ignore[assignment]
|
||||
|
||||
|
||||
class Cache(Protocol):
|
||||
def get(self, key: str): ...
|
||||
def set(self, key: str, value: Any) -> None: ...
|
||||
def delete(self, key: str) -> None: ...
|
||||
def clear(self) -> None: ...
|
||||
def stats(self) -> dict[str, int | float | str]: ...
|
||||
|
||||
|
||||
class TTLCache:
|
||||
def __init__(self, ttl_seconds: int):
|
||||
self.ttl_seconds = ttl_seconds
|
||||
self._store: dict[str, tuple[float, Any]] = {}
|
||||
self._lock = Lock()
|
||||
self._hits = 0
|
||||
self._misses = 0
|
||||
|
||||
def get(self, key: str):
|
||||
now = time.time()
|
||||
with self._lock:
|
||||
item = self._store.get(key)
|
||||
if item is None:
|
||||
self._misses += 1
|
||||
return None
|
||||
expires_at, value = item
|
||||
if expires_at < now:
|
||||
self._store.pop(key, None)
|
||||
self._misses += 1
|
||||
return None
|
||||
self._hits += 1
|
||||
return value
|
||||
|
||||
def set(self, key: str, value: Any) -> None:
|
||||
expires_at = time.time() + self.ttl_seconds
|
||||
with self._lock:
|
||||
self._store[key] = (expires_at, value)
|
||||
|
||||
def delete(self, key: str) -> None:
|
||||
with self._lock:
|
||||
self._store.pop(key, None)
|
||||
|
||||
def clear(self) -> None:
|
||||
with self._lock:
|
||||
self._store.clear()
|
||||
|
||||
def stats(self) -> dict[str, int | float | str]:
|
||||
with self._lock:
|
||||
requests = self._hits + self._misses
|
||||
hit_rate = (self._hits / requests) if requests else 0.0
|
||||
return {
|
||||
"backend": "memory",
|
||||
"size": len(self._store),
|
||||
"hits": self._hits,
|
||||
"misses": self._misses,
|
||||
"hit_rate": round(hit_rate, 4),
|
||||
}
|
||||
|
||||
|
||||
class RedisCache:
|
||||
def __init__(self, url: str, prefix: str, ttl_seconds: int):
|
||||
if Redis is None:
|
||||
raise RuntimeError("redis package is not installed")
|
||||
self.client = Redis.from_url(url, decode_responses=True)
|
||||
self.prefix = prefix
|
||||
self.ttl_seconds = ttl_seconds
|
||||
self._hits = 0
|
||||
self._misses = 0
|
||||
self._lock = Lock()
|
||||
|
||||
def get(self, key: str):
|
||||
raw = self.client.get(self._key(key))
|
||||
with self._lock:
|
||||
if raw is None:
|
||||
self._misses += 1
|
||||
return None
|
||||
self._hits += 1
|
||||
return json.loads(raw)
|
||||
|
||||
def set(self, key: str, value: Any) -> None:
|
||||
self.client.set(self._key(key), json.dumps(value, ensure_ascii=False), ex=self.ttl_seconds)
|
||||
|
||||
def delete(self, key: str) -> None:
|
||||
self.client.delete(self._key(key))
|
||||
|
||||
def clear(self) -> None:
|
||||
pattern = f"{self.prefix}:*"
|
||||
cursor = 0
|
||||
while True:
|
||||
cursor, keys = self.client.scan(cursor=cursor, match=pattern, count=200)
|
||||
if keys:
|
||||
self.client.delete(*keys)
|
||||
if cursor == 0:
|
||||
break
|
||||
|
||||
def stats(self) -> dict[str, int | float | str]:
|
||||
with self._lock:
|
||||
requests = self._hits + self._misses
|
||||
hit_rate = (self._hits / requests) if requests else 0.0
|
||||
return {
|
||||
"backend": "redis",
|
||||
"size": int(self.client.dbsize()),
|
||||
"hits": self._hits,
|
||||
"misses": self._misses,
|
||||
"hit_rate": round(hit_rate, 4),
|
||||
}
|
||||
|
||||
def _key(self, key: str) -> str:
|
||||
return f"{self.prefix}:{key}"
|
||||
|
||||
|
||||
def _build_cache(namespace: str, ttl_seconds: int) -> Cache:
|
||||
settings = get_settings()
|
||||
if settings.cache_backend == "redis":
|
||||
try:
|
||||
return RedisCache(settings.redis_url, f"{settings.redis_prefix}:{namespace}", ttl_seconds=ttl_seconds)
|
||||
except Exception:
|
||||
logger.exception("failed to init redis cache namespace=%s fallback to memory cache", namespace)
|
||||
return TTLCache(ttl_seconds=ttl_seconds)
|
||||
|
||||
|
||||
@lru_cache
|
||||
def get_match_cache() -> Cache:
|
||||
settings = get_settings()
|
||||
return _build_cache("match", settings.match_cache_ttl_seconds)
|
||||
|
||||
|
||||
@lru_cache
|
||||
def get_query_cache() -> Cache:
|
||||
settings = get_settings()
|
||||
return _build_cache("query", settings.query_cache_ttl_seconds)
|
||||
@@ -26,13 +26,9 @@ class ExtractionService:
|
||||
|
||||
def extract_job(self, text: str) -> ExtractResponse:
|
||||
logger.info("extract_job request text=%s", text)
|
||||
llm_result = self._llm_extract(text, self.settings.prompt_dir / "job_extract.md")
|
||||
if llm_result:
|
||||
try:
|
||||
return ExtractResponse(success=True, data=JobCard(**llm_result.content))
|
||||
except ValidationError as exc:
|
||||
logger.exception("LLM job extraction validation failed")
|
||||
return ExtractResponse(success=False, errors=[str(exc)], missing_fields=self._missing_fields(exc))
|
||||
llm_card = self._llm_extract_with_retry(text, self.settings.prompt_dir / "job_extract.md", JobCard)
|
||||
if llm_card:
|
||||
return ExtractResponse(success=True, data=llm_card)
|
||||
|
||||
try:
|
||||
card = self._extract_job_rule(text)
|
||||
@@ -43,13 +39,9 @@ class ExtractionService:
|
||||
|
||||
def extract_worker(self, text: str) -> ExtractResponse:
|
||||
logger.info("extract_worker request text=%s", text)
|
||||
llm_result = self._llm_extract(text, self.settings.prompt_dir / "worker_extract.md")
|
||||
if llm_result:
|
||||
try:
|
||||
return ExtractResponse(success=True, data=WorkerCard(**llm_result.content))
|
||||
except ValidationError as exc:
|
||||
logger.exception("LLM worker extraction validation failed")
|
||||
return ExtractResponse(success=False, errors=[str(exc)], missing_fields=self._missing_fields(exc))
|
||||
llm_card = self._llm_extract_with_retry(text, self.settings.prompt_dir / "worker_extract.md", WorkerCard)
|
||||
if llm_card:
|
||||
return ExtractResponse(success=True, data=llm_card)
|
||||
|
||||
try:
|
||||
card = self._extract_worker_rule(text)
|
||||
@@ -65,6 +57,57 @@ class ExtractionService:
|
||||
logger.exception("LLM extraction failed, fallback to rule-based extraction")
|
||||
return None
|
||||
|
||||
def _llm_extract_with_retry(self, text: str, prompt_path: Path, schema_cls):
|
||||
base_prompt = load_prompt(prompt_path)
|
||||
llm_result = self._llm_extract(text, prompt_path)
|
||||
if not llm_result:
|
||||
return None
|
||||
|
||||
try:
|
||||
return schema_cls(**llm_result.content)
|
||||
except ValidationError as exc:
|
||||
logger.warning("LLM extraction validation failed, trying schema-aware retry")
|
||||
last_error = exc
|
||||
last_output = llm_result.content
|
||||
|
||||
for _ in range(self.settings.extraction_llm_max_retries):
|
||||
missing_fields = self._missing_fields(last_error)
|
||||
repair_prompt = self._build_repair_prompt(base_prompt, schema_cls, missing_fields)
|
||||
try:
|
||||
repair_result = self.llm_client.extract_json(
|
||||
repair_prompt,
|
||||
self._build_repair_input(text, last_output, missing_fields),
|
||||
)
|
||||
except Exception:
|
||||
logger.exception("LLM schema-aware retry failed")
|
||||
return None
|
||||
if not repair_result:
|
||||
return None
|
||||
last_output = repair_result.content
|
||||
try:
|
||||
return schema_cls(**repair_result.content)
|
||||
except ValidationError as exc:
|
||||
last_error = exc
|
||||
logger.warning("LLM schema-aware retry still invalid missing_fields=%s", self._missing_fields(exc))
|
||||
return None
|
||||
|
||||
def _build_repair_prompt(self, base_prompt: str, schema_cls, missing_fields: list[str]) -> str:
|
||||
schema_json = json.dumps(schema_cls.model_json_schema(), ensure_ascii=False)
|
||||
return (
|
||||
f"{base_prompt}\n\n"
|
||||
"你是结构化修复助手。请严格输出可被 JSON 解析的对象,不要输出解释文字。\n"
|
||||
"目标是根据给定 schema 修复字段缺失和类型错误,优先保证必填字段完整。\n"
|
||||
f"缺失或错误字段: {', '.join(missing_fields) if missing_fields else 'unknown'}\n"
|
||||
f"JSON Schema: {schema_json}\n"
|
||||
)
|
||||
|
||||
def _build_repair_input(self, original_text: str, last_output: dict, missing_fields: list[str]) -> str:
|
||||
return (
|
||||
f"原始文本:\n{original_text}\n\n"
|
||||
f"上一次抽取结果:\n{json.dumps(last_output, ensure_ascii=False)}\n\n"
|
||||
f"请重点修复字段:\n{json.dumps(missing_fields, ensure_ascii=False)}"
|
||||
)
|
||||
|
||||
def _extract_job_rule(self, text: str) -> JobCard:
|
||||
skill_hits = [item for item in self.skills if item in text]
|
||||
category = next((item for item in self.categories if item in text), "活动执行")
|
||||
|
||||
105
gig-poc/apps/api/app/services/ingest_queue.py
Normal file
105
gig-poc/apps/api/app/services/ingest_queue.py
Normal file
@@ -0,0 +1,105 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
from queue import Empty, Full, Queue
|
||||
from threading import Event, Lock, Thread
|
||||
from typing import Any
|
||||
|
||||
from app.core.config import Settings
|
||||
from app.core.logging import logger
|
||||
from app.db.session import SessionLocal
|
||||
from app.domain.schemas import JobCard, WorkerCard
|
||||
from app.services.ingest_service import IngestService
|
||||
from app.utils.ids import generate_id
|
||||
|
||||
|
||||
@dataclass
|
||||
class QueueTask:
|
||||
task_id: str
|
||||
kind: str
|
||||
payload: dict[str, Any]
|
||||
|
||||
|
||||
class IngestQueue:
|
||||
def __init__(self, settings: Settings):
|
||||
self.settings = settings
|
||||
self.queue: Queue[QueueTask] = Queue(maxsize=settings.ingest_queue_max_size)
|
||||
self._stop_event = Event()
|
||||
self._thread: Thread | None = None
|
||||
self._lock = Lock()
|
||||
self._status: dict[str, str] = {}
|
||||
self._processed = 0
|
||||
self._failed = 0
|
||||
|
||||
def start(self) -> None:
|
||||
if not self.settings.ingest_async_enabled:
|
||||
return
|
||||
if self._thread and self._thread.is_alive():
|
||||
return
|
||||
self._thread = Thread(target=self._run, daemon=True, name="ingest-queue-worker")
|
||||
self._thread.start()
|
||||
logger.info("ingest queue worker started")
|
||||
|
||||
def stop(self) -> None:
|
||||
self._stop_event.set()
|
||||
if self._thread and self._thread.is_alive():
|
||||
self._thread.join(timeout=3)
|
||||
|
||||
def enqueue_job(self, card: JobCard) -> str:
|
||||
return self._enqueue("job", card.model_dump(mode="json"))
|
||||
|
||||
def enqueue_worker(self, card: WorkerCard) -> str:
|
||||
return self._enqueue("worker", card.model_dump(mode="json"))
|
||||
|
||||
def task_status(self, task_id: str) -> str:
|
||||
with self._lock:
|
||||
return self._status.get(task_id, "not_found")
|
||||
|
||||
def stats(self) -> dict[str, int]:
|
||||
with self._lock:
|
||||
return {
|
||||
"queued": self.queue.qsize(),
|
||||
"processed": self._processed,
|
||||
"failed": self._failed,
|
||||
}
|
||||
|
||||
def _enqueue(self, kind: str, payload: dict[str, Any]) -> str:
|
||||
task_id = generate_id("queue")
|
||||
task = QueueTask(task_id=task_id, kind=kind, payload=payload)
|
||||
with self._lock:
|
||||
self._status[task_id] = "queued"
|
||||
try:
|
||||
self.queue.put_nowait(task)
|
||||
except Full as exc:
|
||||
with self._lock:
|
||||
self._status[task_id] = "rejected"
|
||||
raise RuntimeError("ingest queue is full") from exc
|
||||
return task_id
|
||||
|
||||
def _run(self) -> None:
|
||||
while not self._stop_event.is_set():
|
||||
try:
|
||||
task = self.queue.get(timeout=0.5)
|
||||
except Empty:
|
||||
continue
|
||||
try:
|
||||
with self._lock:
|
||||
self._status[task.task_id] = "processing"
|
||||
with SessionLocal() as db:
|
||||
service = IngestService(db)
|
||||
if task.kind == "job":
|
||||
service.ingest_job(JobCard(**task.payload))
|
||||
elif task.kind == "worker":
|
||||
service.ingest_worker(WorkerCard(**task.payload))
|
||||
else:
|
||||
raise ValueError(f"unknown task kind {task.kind}")
|
||||
with self._lock:
|
||||
self._status[task.task_id] = "done"
|
||||
self._processed += 1
|
||||
except Exception:
|
||||
logger.exception("ingest queue task failed task_id=%s kind=%s", task.task_id, task.kind)
|
||||
with self._lock:
|
||||
self._status[task.task_id] = "failed"
|
||||
self._failed += 1
|
||||
finally:
|
||||
self.queue.task_done()
|
||||
@@ -9,6 +9,7 @@ from app.core.logging import logger
|
||||
from app.domain.schemas import BootstrapResponse, JobCard, WorkerCard
|
||||
from app.repositories.job_repository import JobRepository
|
||||
from app.repositories.worker_repository import WorkerRepository
|
||||
from app.services.cache_service import get_match_cache, get_query_cache
|
||||
from app.services.rag.lightrag_adapter import LightRAGAdapter
|
||||
|
||||
|
||||
@@ -19,17 +20,27 @@ class IngestService:
|
||||
self.job_repository = JobRepository(db)
|
||||
self.worker_repository = WorkerRepository(db)
|
||||
self.rag = LightRAGAdapter(self.settings)
|
||||
self.match_cache = get_match_cache()
|
||||
self.query_cache = get_query_cache()
|
||||
|
||||
def ingest_job(self, card: JobCard) -> JobCard:
|
||||
logger.info("ingest_job job_id=%s", card.job_id)
|
||||
self.job_repository.upsert(card)
|
||||
self.rag.upsert_job(card)
|
||||
if self.settings.match_cache_enabled:
|
||||
self.match_cache.clear()
|
||||
if self.settings.query_cache_enabled:
|
||||
self.query_cache.clear()
|
||||
return card
|
||||
|
||||
def ingest_worker(self, card: WorkerCard) -> WorkerCard:
|
||||
logger.info("ingest_worker worker_id=%s", card.worker_id)
|
||||
self.worker_repository.upsert(card)
|
||||
self.rag.upsert_worker(card)
|
||||
if self.settings.match_cache_enabled:
|
||||
self.match_cache.clear()
|
||||
if self.settings.query_cache_enabled:
|
||||
self.query_cache.clear()
|
||||
return card
|
||||
|
||||
def bootstrap(self) -> BootstrapResponse:
|
||||
@@ -43,6 +54,10 @@ class IngestService:
|
||||
self.ingest_job(JobCard(**item))
|
||||
for item in workers:
|
||||
self.ingest_worker(WorkerCard(**item))
|
||||
if self.settings.match_cache_enabled:
|
||||
self.match_cache.clear()
|
||||
if self.settings.query_cache_enabled:
|
||||
self.query_cache.clear()
|
||||
return BootstrapResponse(
|
||||
jobs=len(jobs),
|
||||
workers=len(workers),
|
||||
|
||||
@@ -6,14 +6,17 @@ import httpx
|
||||
|
||||
from app.core.config import Settings
|
||||
from app.domain.schemas import PromptOutput
|
||||
from app.services.ai_guard import AIGuard
|
||||
|
||||
|
||||
class LLMClient:
|
||||
def __init__(self, settings: Settings):
|
||||
self.settings = settings
|
||||
self.guard = AIGuard(settings)
|
||||
|
||||
def extract_json(self, system_prompt: str, user_text: str) -> PromptOutput | None:
|
||||
if not self.settings.llm_enabled or not self.settings.llm_base_url or not self.settings.llm_api_key:
|
||||
self.guard.record_fallback()
|
||||
return None
|
||||
|
||||
payload = {
|
||||
@@ -25,10 +28,77 @@ class LLMClient:
|
||||
"temperature": 0.1,
|
||||
"response_format": {"type": "json_object"},
|
||||
}
|
||||
headers = {"Authorization": f"Bearer {self.settings.llm_api_key}"}
|
||||
with httpx.Client(timeout=30.0) as client:
|
||||
response = client.post(f"{self.settings.llm_base_url.rstrip('/')}/chat/completions", json=payload, headers=headers)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
raw_text = data["choices"][0]["message"]["content"]
|
||||
endpoints = [self.settings.llm_base_url, *self.settings.llm_fallback_base_urls]
|
||||
raw_text = self._request_with_failover(
|
||||
endpoints=endpoints,
|
||||
path="/chat/completions",
|
||||
payload=payload,
|
||||
api_key=self.settings.llm_api_key,
|
||||
)
|
||||
if raw_text is None:
|
||||
self.guard.record_fallback()
|
||||
return None
|
||||
return PromptOutput(content=json.loads(raw_text), raw_text=raw_text)
|
||||
|
||||
def embedding(self, text: str) -> list[float] | None:
|
||||
if not self.settings.embedding_enabled:
|
||||
return None
|
||||
base_url = self.settings.embedding_base_url or self.settings.llm_base_url
|
||||
api_key = self.settings.embedding_api_key or self.settings.llm_api_key
|
||||
if not base_url or not api_key:
|
||||
self.guard.record_fallback()
|
||||
return None
|
||||
|
||||
payload = {
|
||||
"model": self.settings.embedding_model,
|
||||
"input": text,
|
||||
}
|
||||
endpoints = [base_url, *self.settings.embedding_fallback_base_urls]
|
||||
data = self._request_with_failover(
|
||||
endpoints=endpoints,
|
||||
path="/embeddings",
|
||||
payload=payload,
|
||||
api_key=api_key,
|
||||
return_full_response=True,
|
||||
)
|
||||
if data is None:
|
||||
self.guard.record_fallback()
|
||||
return None
|
||||
embedding = data["data"][0]["embedding"]
|
||||
if not isinstance(embedding, list):
|
||||
return None
|
||||
return [float(item) for item in embedding]
|
||||
|
||||
def metrics(self) -> dict:
|
||||
return self.guard.snapshot()
|
||||
|
||||
def _request_with_failover(
|
||||
self,
|
||||
endpoints: list[str],
|
||||
path: str,
|
||||
payload: dict,
|
||||
api_key: str,
|
||||
return_full_response: bool = False,
|
||||
):
|
||||
if not endpoints:
|
||||
return None
|
||||
for index, endpoint in enumerate([item for item in endpoints if item]):
|
||||
allowed, _ = self.guard.allow_request(endpoint)
|
||||
if not allowed:
|
||||
continue
|
||||
if index > 0:
|
||||
self.guard.record_failover()
|
||||
try:
|
||||
headers = {"Authorization": f"Bearer {api_key}"}
|
||||
with httpx.Client(timeout=self.settings.ai_request_timeout_seconds) as client:
|
||||
response = client.post(f"{endpoint.rstrip('/')}{path}", json=payload, headers=headers)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
self.guard.record_success(endpoint)
|
||||
if return_full_response:
|
||||
return data
|
||||
return data["choices"][0]["message"]["content"]
|
||||
except Exception:
|
||||
self.guard.record_failure(endpoint)
|
||||
continue
|
||||
return None
|
||||
|
||||
121
gig-poc/apps/api/app/services/match_queue.py
Normal file
121
gig-poc/apps/api/app/services/match_queue.py
Normal file
@@ -0,0 +1,121 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
from queue import Empty, Full, Queue
|
||||
from threading import Event, Lock, Thread
|
||||
from typing import Any
|
||||
|
||||
from app.core.config import Settings
|
||||
from app.core.logging import logger
|
||||
from app.db.session import SessionLocal
|
||||
from app.domain.schemas import MatchResult
|
||||
from app.repositories.job_repository import JobRepository
|
||||
from app.repositories.worker_repository import WorkerRepository
|
||||
from app.services.card_mapper import job_to_card, worker_to_card
|
||||
from app.services.matching_service import MatchingService
|
||||
from app.utils.ids import generate_id
|
||||
|
||||
|
||||
@dataclass
|
||||
class MatchTask:
|
||||
task_id: str
|
||||
kind: str
|
||||
source_id: str
|
||||
top_n: int
|
||||
|
||||
|
||||
class MatchQueue:
|
||||
def __init__(self, settings: Settings):
|
||||
self.settings = settings
|
||||
self.queue: Queue[MatchTask] = Queue(maxsize=settings.match_queue_max_size)
|
||||
self._stop_event = Event()
|
||||
self._thread: Thread | None = None
|
||||
self._lock = Lock()
|
||||
self._status: dict[str, str] = {}
|
||||
self._results: dict[str, list[dict[str, Any]]] = {}
|
||||
self._processed = 0
|
||||
self._failed = 0
|
||||
|
||||
def start(self) -> None:
|
||||
if not self.settings.match_async_enabled:
|
||||
return
|
||||
if self._thread and self._thread.is_alive():
|
||||
return
|
||||
self._thread = Thread(target=self._run, daemon=True, name="match-queue-worker")
|
||||
self._thread.start()
|
||||
logger.info("match queue worker started")
|
||||
|
||||
def stop(self) -> None:
|
||||
self._stop_event.set()
|
||||
if self._thread and self._thread.is_alive():
|
||||
self._thread.join(timeout=3)
|
||||
|
||||
def enqueue_workers(self, job_id: str, top_n: int) -> str:
|
||||
return self._enqueue("workers", job_id, top_n)
|
||||
|
||||
def enqueue_jobs(self, worker_id: str, top_n: int) -> str:
|
||||
return self._enqueue("jobs", worker_id, top_n)
|
||||
|
||||
def task_status(self, task_id: str) -> str:
|
||||
with self._lock:
|
||||
return self._status.get(task_id, "not_found")
|
||||
|
||||
def task_result(self, task_id: str) -> list[dict[str, Any]] | None:
|
||||
with self._lock:
|
||||
return self._results.get(task_id)
|
||||
|
||||
def stats(self) -> dict[str, int]:
|
||||
with self._lock:
|
||||
return {
|
||||
"queued": self.queue.qsize(),
|
||||
"processed": self._processed,
|
||||
"failed": self._failed,
|
||||
}
|
||||
|
||||
def _enqueue(self, kind: str, source_id: str, top_n: int) -> str:
|
||||
task_id = generate_id("mq")
|
||||
task = MatchTask(task_id=task_id, kind=kind, source_id=source_id, top_n=top_n)
|
||||
with self._lock:
|
||||
self._status[task_id] = "queued"
|
||||
try:
|
||||
self.queue.put_nowait(task)
|
||||
except Full as exc:
|
||||
with self._lock:
|
||||
self._status[task_id] = "rejected"
|
||||
raise RuntimeError("match queue is full") from exc
|
||||
return task_id
|
||||
|
||||
def _run(self) -> None:
|
||||
while not self._stop_event.is_set():
|
||||
try:
|
||||
task = self.queue.get(timeout=0.5)
|
||||
except Empty:
|
||||
continue
|
||||
try:
|
||||
with self._lock:
|
||||
self._status[task.task_id] = "processing"
|
||||
with SessionLocal() as db:
|
||||
service = MatchingService(db)
|
||||
if task.kind == "workers":
|
||||
job = JobRepository(db).get(task.source_id)
|
||||
if job is None:
|
||||
raise ValueError("job not found")
|
||||
items = service.match_workers(job_to_card(job), task.top_n)
|
||||
elif task.kind == "jobs":
|
||||
worker = WorkerRepository(db).get(task.source_id)
|
||||
if worker is None:
|
||||
raise ValueError("worker not found")
|
||||
items = service.match_jobs(worker_to_card(worker), task.top_n)
|
||||
else:
|
||||
raise ValueError(f"unknown task kind {task.kind}")
|
||||
with self._lock:
|
||||
self._status[task.task_id] = "done"
|
||||
self._results[task.task_id] = [item.model_dump(mode="json") for item in items]
|
||||
self._processed += 1
|
||||
except Exception:
|
||||
logger.exception("match queue task failed task_id=%s kind=%s", task.task_id, task.kind)
|
||||
with self._lock:
|
||||
self._status[task.task_id] = "failed"
|
||||
self._failed += 1
|
||||
finally:
|
||||
self.queue.task_done()
|
||||
@@ -10,8 +10,10 @@ from app.domain.schemas import JobCard, MatchBreakdown, MatchResult, QueryFilter
|
||||
from app.repositories.job_repository import JobRepository
|
||||
from app.repositories.match_repository import MatchRepository
|
||||
from app.repositories.worker_repository import WorkerRepository
|
||||
from app.services.cache_service import get_match_cache
|
||||
from app.services.card_mapper import job_to_card, worker_to_card
|
||||
from app.services.rag.lightrag_adapter import LightRAGAdapter
|
||||
from app.services.weight_service import MatchWeightService
|
||||
from app.utils.ids import generate_id
|
||||
|
||||
|
||||
@@ -23,9 +25,16 @@ class MatchingService:
|
||||
self.workers = WorkerRepository(db)
|
||||
self.matches = MatchRepository(db)
|
||||
self.rag = LightRAGAdapter(self.settings)
|
||||
self.weight_service = MatchWeightService(self.settings)
|
||||
self.cache = get_match_cache()
|
||||
|
||||
def match_workers(self, source: JobCard, top_n: int) -> list[MatchResult]:
|
||||
logger.info("match_workers source_id=%s top_n=%s", source.job_id, top_n)
|
||||
cache_key = f"match_workers:{source.job_id}:{top_n}"
|
||||
if self.settings.match_cache_enabled:
|
||||
cached = self.cache.get(cache_key)
|
||||
if cached is not None:
|
||||
return self._parse_cached_matches(cached)
|
||||
query_text = " ".join([source.title, source.category, source.city, source.region, *source.skills, *source.tags])
|
||||
candidate_ids = self.rag.search(
|
||||
query_text=query_text,
|
||||
@@ -36,10 +45,17 @@ class MatchingService:
|
||||
results = [self._build_job_to_worker_match(source, worker_to_card(worker)) for worker in candidates]
|
||||
results = sorted(results, key=lambda item: item.match_score, reverse=True)[:top_n]
|
||||
self.matches.bulk_replace(results, SourceType.job_to_worker.value, source.job_id)
|
||||
if self.settings.match_cache_enabled:
|
||||
self.cache.set(cache_key, [item.model_dump(mode="json") for item in results])
|
||||
return results
|
||||
|
||||
def match_jobs(self, source: WorkerCard, top_n: int) -> list[MatchResult]:
|
||||
logger.info("match_jobs source_id=%s top_n=%s", source.worker_id, top_n)
|
||||
cache_key = f"match_jobs:{source.worker_id}:{top_n}"
|
||||
if self.settings.match_cache_enabled:
|
||||
cached = self.cache.get(cache_key)
|
||||
if cached is not None:
|
||||
return self._parse_cached_matches(cached)
|
||||
query_text = " ".join([source.name, *source.cities, *source.regions, *[item.name for item in source.skills], *source.experience_tags])
|
||||
city = source.cities[0] if source.cities else None
|
||||
candidate_ids = self.rag.search(
|
||||
@@ -51,6 +67,8 @@ class MatchingService:
|
||||
results = [self._build_worker_to_job_match(source, job_to_card(job)) for job in candidates]
|
||||
results = sorted(results, key=lambda item: item.match_score, reverse=True)[:top_n]
|
||||
self.matches.bulk_replace(results, SourceType.worker_to_job.value, source.worker_id)
|
||||
if self.settings.match_cache_enabled:
|
||||
self.cache.set(cache_key, [item.model_dump(mode="json") for item in results])
|
||||
return results
|
||||
|
||||
def explain(self, match_id: str) -> MatchResult | None:
|
||||
@@ -61,6 +79,20 @@ class MatchingService:
|
||||
|
||||
return match_record_to_schema(record)
|
||||
|
||||
def feedback(self, match_id: str, accepted: bool) -> dict[str, float] | None:
|
||||
record = self.matches.get(match_id)
|
||||
if record is None:
|
||||
return None
|
||||
from app.services.card_mapper import match_record_to_schema
|
||||
|
||||
match = match_record_to_schema(record)
|
||||
if self.settings.ranking_learning_enabled:
|
||||
return self.weight_service.update_from_feedback(match.breakdown, accepted)
|
||||
return self.weight_service.get_weights()
|
||||
|
||||
def current_weights(self) -> dict[str, float]:
|
||||
return self.weight_service.get_weights()
|
||||
|
||||
def _build_job_to_worker_match(self, job: JobCard, worker: WorkerCard) -> MatchResult:
|
||||
job_skills = set(job.skills)
|
||||
expanded_skills = self.rag.expand_skills(job.skills)
|
||||
@@ -143,13 +175,14 @@ class MatchingService:
|
||||
experience_score: float,
|
||||
reliability_score: float,
|
||||
) -> float:
|
||||
return (
|
||||
self.settings.score_skill_weight * skill_score
|
||||
+ self.settings.score_region_weight * region_score
|
||||
+ self.settings.score_time_weight * time_score
|
||||
+ self.settings.score_experience_weight * experience_score
|
||||
+ self.settings.score_reliability_weight * reliability_score
|
||||
breakdown = MatchBreakdown(
|
||||
skill_score=skill_score,
|
||||
region_score=region_score,
|
||||
time_score=time_score,
|
||||
experience_score=experience_score,
|
||||
reliability_score=reliability_score,
|
||||
)
|
||||
return self.weight_service.score(breakdown)
|
||||
|
||||
def _build_reasons(
|
||||
self,
|
||||
@@ -176,3 +209,10 @@ class MatchingService:
|
||||
while len(reasons) < 3:
|
||||
reasons.append("岗位需求与候选画像存在基础匹配")
|
||||
return reasons[:5]
|
||||
|
||||
def _parse_cached_matches(self, cached) -> list[MatchResult]:
|
||||
if isinstance(cached, list) and cached and isinstance(cached[0], MatchResult):
|
||||
return cached
|
||||
if isinstance(cached, list):
|
||||
return [MatchResult(**item) for item in cached]
|
||||
return []
|
||||
|
||||
@@ -10,6 +10,7 @@ from qdrant_client import QdrantClient, models
|
||||
from app.core.config import Settings
|
||||
from app.core.logging import logger
|
||||
from app.domain.schemas import JobCard, QueryFilters, WorkerCard
|
||||
from app.services.llm_client import LLMClient
|
||||
|
||||
|
||||
class LightRAGAdapter:
|
||||
@@ -17,13 +18,28 @@ class LightRAGAdapter:
|
||||
self.settings = settings
|
||||
self.client = QdrantClient(url=settings.qdrant_url)
|
||||
self.skill_graph = self._load_skill_graph()
|
||||
self.llm_client = LLMClient(settings)
|
||||
self.collection_vector_size: int | None = None
|
||||
|
||||
def ensure_ready(self) -> None:
|
||||
collections = {item.name for item in self.client.get_collections().collections}
|
||||
expected_size = self._configured_vector_size()
|
||||
if self.settings.qdrant_collection not in collections:
|
||||
self.client.create_collection(
|
||||
collection_name=self.settings.qdrant_collection,
|
||||
vectors_config=models.VectorParams(size=self.settings.vector_size, distance=models.Distance.COSINE),
|
||||
vectors_config=models.VectorParams(size=expected_size, distance=models.Distance.COSINE),
|
||||
)
|
||||
self.collection_vector_size = expected_size
|
||||
return
|
||||
info = self.client.get_collection(self.settings.qdrant_collection)
|
||||
configured_size = info.config.params.vectors.size
|
||||
self.collection_vector_size = int(configured_size)
|
||||
if self.collection_vector_size != expected_size:
|
||||
logger.warning(
|
||||
"qdrant vector size mismatch, collection=%s expected=%s actual=%s; using actual size",
|
||||
self.settings.qdrant_collection,
|
||||
expected_size,
|
||||
self.collection_vector_size,
|
||||
)
|
||||
|
||||
def health(self) -> str:
|
||||
@@ -125,14 +141,40 @@ class LightRAGAdapter:
|
||||
)
|
||||
|
||||
def _vectorize(self, text: str) -> list[float]:
|
||||
vector = [0.0 for _ in range(self.settings.vector_size)]
|
||||
if self.settings.embedding_enabled and self.settings.embedding_backend == "openai_compatible":
|
||||
try:
|
||||
embedding = self.llm_client.embedding(text)
|
||||
if embedding:
|
||||
return self._normalize_embedding(embedding)
|
||||
except Exception:
|
||||
logger.exception("embedding request failed, fallback to hash vector")
|
||||
target_size = self._active_vector_size()
|
||||
vector = [0.0 for _ in range(target_size)]
|
||||
tokens = self._tokenize(text)
|
||||
for token in tokens:
|
||||
index = hash(token) % self.settings.vector_size
|
||||
index = hash(token) % target_size
|
||||
vector[index] += 1.0
|
||||
norm = math.sqrt(sum(item * item for item in vector)) or 1.0
|
||||
return [item / norm for item in vector]
|
||||
|
||||
def _normalize_embedding(self, embedding: list[float]) -> list[float]:
|
||||
target_size = self._active_vector_size()
|
||||
vector = embedding[:target_size]
|
||||
if len(vector) < target_size:
|
||||
vector.extend([0.0] * (target_size - len(vector)))
|
||||
norm = math.sqrt(sum(item * item for item in vector)) or 1.0
|
||||
return [item / norm for item in vector]
|
||||
|
||||
def _active_vector_size(self) -> int:
|
||||
if self.collection_vector_size:
|
||||
return self.collection_vector_size
|
||||
return self._configured_vector_size()
|
||||
|
||||
def _configured_vector_size(self) -> int:
|
||||
if self.settings.embedding_enabled and self.settings.embedding_backend == "openai_compatible":
|
||||
return self.settings.embedding_vector_size
|
||||
return self.settings.vector_size
|
||||
|
||||
def _tokenize(self, text: str) -> list[str]:
|
||||
cleaned = [part.strip().lower() for part in text.replace(",", " ").replace("、", " ").replace("。", " ").split()]
|
||||
tokens = [part for part in cleaned if part]
|
||||
|
||||
23
gig-poc/apps/api/app/services/runtime_state.py
Normal file
23
gig-poc/apps/api/app/services/runtime_state.py
Normal file
@@ -0,0 +1,23 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from functools import lru_cache
|
||||
|
||||
from app.core.config import get_settings
|
||||
from app.services.ingest_queue import IngestQueue
|
||||
from app.services.match_queue import MatchQueue
|
||||
from app.services.traffic_guard import TrafficGuard
|
||||
|
||||
|
||||
@lru_cache
|
||||
def get_ingest_queue() -> IngestQueue:
|
||||
return IngestQueue(get_settings())
|
||||
|
||||
|
||||
@lru_cache
|
||||
def get_match_queue() -> MatchQueue:
|
||||
return MatchQueue(get_settings())
|
||||
|
||||
|
||||
@lru_cache
|
||||
def get_traffic_guard() -> TrafficGuard:
|
||||
return TrafficGuard(get_settings())
|
||||
108
gig-poc/apps/api/app/services/traffic_guard.py
Normal file
108
gig-poc/apps/api/app/services/traffic_guard.py
Normal file
@@ -0,0 +1,108 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import time
|
||||
from collections import deque
|
||||
from threading import Lock
|
||||
|
||||
import httpx
|
||||
|
||||
from app.core.config import Settings
|
||||
from app.core.logging import logger
|
||||
|
||||
|
||||
class TrafficGuard:
|
||||
def __init__(self, settings: Settings):
|
||||
self.settings = settings
|
||||
self._lock = Lock()
|
||||
self._minute = 0
|
||||
self._minute_count = 0
|
||||
self._open_until = 0.0
|
||||
self._events: deque[tuple[float, int]] = deque()
|
||||
self._requests = 0
|
||||
self._rate_limited = 0
|
||||
self._circuit_blocked = 0
|
||||
self._avg_latency_ms = 0.0
|
||||
self._alert_last_sent = 0.0
|
||||
|
||||
def allow(self, path: str) -> tuple[bool, str]:
|
||||
now = time.time()
|
||||
with self._lock:
|
||||
minute = int(now // 60)
|
||||
if self._minute != minute:
|
||||
self._minute = minute
|
||||
self._minute_count = 0
|
||||
if self._minute_count >= self.settings.app_rate_limit_per_minute:
|
||||
self._rate_limited += 1
|
||||
return False, "rate_limited"
|
||||
if self._open_until > now and not self._is_exempt(path):
|
||||
self._circuit_blocked += 1
|
||||
return False, "circuit_open"
|
||||
self._minute_count += 1
|
||||
self._requests += 1
|
||||
return True, "ok"
|
||||
|
||||
def record(self, status_code: int, latency_ms: float) -> None:
|
||||
now = time.time()
|
||||
with self._lock:
|
||||
self._events.append((now, status_code))
|
||||
self._avg_latency_ms = self._ema(self._avg_latency_ms, latency_ms)
|
||||
self._trim(now)
|
||||
total = len(self._events)
|
||||
if total < self.settings.app_circuit_breaker_min_requests:
|
||||
return
|
||||
errors = sum(1 for _, code in self._events if code >= 500)
|
||||
error_rate = errors / total
|
||||
if error_rate >= self.settings.app_circuit_breaker_error_rate:
|
||||
self._open_until = now + self.settings.app_circuit_breaker_cooldown_seconds
|
||||
self._send_alert(
|
||||
"app circuit opened",
|
||||
{
|
||||
"error_rate": round(error_rate, 4),
|
||||
"window_requests": total,
|
||||
"cooldown_seconds": self.settings.app_circuit_breaker_cooldown_seconds,
|
||||
},
|
||||
)
|
||||
|
||||
def snapshot(self) -> dict[str, float | int]:
|
||||
now = time.time()
|
||||
with self._lock:
|
||||
self._trim(now)
|
||||
total = len(self._events)
|
||||
errors = sum(1 for _, code in self._events if code >= 500)
|
||||
return {
|
||||
"requests_total": self._requests,
|
||||
"rate_limited_total": self._rate_limited,
|
||||
"circuit_blocked_total": self._circuit_blocked,
|
||||
"window_requests": total,
|
||||
"window_errors": errors,
|
||||
"window_error_rate": round((errors / total), 4) if total else 0.0,
|
||||
"avg_latency_ms": round(self._avg_latency_ms, 2),
|
||||
"circuit_open": 1 if self._open_until > now else 0,
|
||||
}
|
||||
|
||||
def _trim(self, now: float) -> None:
|
||||
lower = now - self.settings.app_circuit_breaker_window_seconds
|
||||
while self._events and self._events[0][0] < lower:
|
||||
self._events.popleft()
|
||||
|
||||
def _ema(self, prev: float, value: float, alpha: float = 0.2) -> float:
|
||||
if prev <= 0:
|
||||
return value
|
||||
return alpha * value + (1 - alpha) * prev
|
||||
|
||||
def _is_exempt(self, path: str) -> bool:
|
||||
return path in {"/health", "/docs", "/openapi.json", "/poc/ops/system/metrics", "/poc/ops/ai/metrics"}
|
||||
|
||||
def _send_alert(self, message: str, extra: dict) -> None:
|
||||
now = time.time()
|
||||
if now - self._alert_last_sent < 30:
|
||||
return
|
||||
self._alert_last_sent = now
|
||||
logger.warning("%s extra=%s", message, extra)
|
||||
if not self.settings.alert_webhook_url:
|
||||
return
|
||||
try:
|
||||
with httpx.Client(timeout=2.0) as client:
|
||||
client.post(self.settings.alert_webhook_url, json={"message": message, "extra": extra})
|
||||
except Exception:
|
||||
logger.exception("alert webhook send failed")
|
||||
77
gig-poc/apps/api/app/services/weight_service.py
Normal file
77
gig-poc/apps/api/app/services/weight_service.py
Normal file
@@ -0,0 +1,77 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from pathlib import Path
|
||||
|
||||
from app.core.config import Settings
|
||||
from app.core.logging import logger
|
||||
from app.domain.schemas import MatchBreakdown
|
||||
|
||||
|
||||
class MatchWeightService:
|
||||
def __init__(self, settings: Settings):
|
||||
self.settings = settings
|
||||
self.path: Path = settings.match_weights_path
|
||||
|
||||
def default_weights(self) -> dict[str, float]:
|
||||
return {
|
||||
"skill": self.settings.score_skill_weight,
|
||||
"region": self.settings.score_region_weight,
|
||||
"time": self.settings.score_time_weight,
|
||||
"experience": self.settings.score_experience_weight,
|
||||
"reliability": self.settings.score_reliability_weight,
|
||||
}
|
||||
|
||||
def get_weights(self) -> dict[str, float]:
|
||||
weights = self.default_weights()
|
||||
if not self.path.exists():
|
||||
return self._normalize(weights)
|
||||
try:
|
||||
data = json.loads(self.path.read_text(encoding="utf-8"))
|
||||
for key in weights:
|
||||
value = data.get(key)
|
||||
if isinstance(value, (int, float)):
|
||||
weights[key] = float(value)
|
||||
except Exception:
|
||||
logger.exception("failed to read learned ranking weights, fallback to defaults")
|
||||
return self._normalize(weights)
|
||||
|
||||
def score(self, breakdown: MatchBreakdown) -> float:
|
||||
weights = self.get_weights()
|
||||
return (
|
||||
weights["skill"] * breakdown.skill_score
|
||||
+ weights["region"] * breakdown.region_score
|
||||
+ weights["time"] * breakdown.time_score
|
||||
+ weights["experience"] * breakdown.experience_score
|
||||
+ weights["reliability"] * breakdown.reliability_score
|
||||
)
|
||||
|
||||
def update_from_feedback(self, breakdown: MatchBreakdown, accepted: bool) -> dict[str, float]:
|
||||
weights = self.get_weights()
|
||||
features = {
|
||||
"skill": breakdown.skill_score,
|
||||
"region": breakdown.region_score,
|
||||
"time": breakdown.time_score,
|
||||
"experience": breakdown.experience_score,
|
||||
"reliability": breakdown.reliability_score,
|
||||
}
|
||||
target = 1.0 if accepted else 0.0
|
||||
prediction = sum(weights[name] * value for name, value in features.items())
|
||||
error = target - prediction
|
||||
lr = self.settings.ranking_learning_rate
|
||||
updated = {name: max(0.0, weights[name] + lr * error * value) for name, value in features.items()}
|
||||
normalized = self._normalize(updated)
|
||||
self._save_weights(normalized)
|
||||
return normalized
|
||||
|
||||
def _save_weights(self, weights: dict[str, float]) -> None:
|
||||
self.settings.data_dir.mkdir(parents=True, exist_ok=True)
|
||||
self.path.write_text(json.dumps(weights, ensure_ascii=False, indent=2), encoding="utf-8")
|
||||
|
||||
def _normalize(self, weights: dict[str, float]) -> dict[str, float]:
|
||||
total = sum(max(value, 0.0) for value in weights.values())
|
||||
if total <= 0:
|
||||
fallback = self.default_weights()
|
||||
total = sum(fallback.values())
|
||||
return {key: value / total for key, value in fallback.items()}
|
||||
return {key: max(value, 0.0) / total for key, value in weights.items()}
|
||||
Reference in New Issue
Block a user