feat: 初始化零工后端代码

This commit is contained in:
Daniel
2026-04-01 14:19:25 +08:00
parent c6fabe262c
commit 84f8be7c0e
41 changed files with 2813 additions and 147 deletions

View File

@@ -10,6 +10,7 @@ from qdrant_client import QdrantClient, models
from app.core.config import Settings
from app.core.logging import logger
from app.domain.schemas import JobCard, QueryFilters, WorkerCard
from app.services.llm_client import LLMClient
class LightRAGAdapter:
@@ -17,13 +18,28 @@ class LightRAGAdapter:
self.settings = settings
self.client = QdrantClient(url=settings.qdrant_url)
self.skill_graph = self._load_skill_graph()
self.llm_client = LLMClient(settings)
self.collection_vector_size: int | None = None
def ensure_ready(self) -> None:
collections = {item.name for item in self.client.get_collections().collections}
expected_size = self._configured_vector_size()
if self.settings.qdrant_collection not in collections:
self.client.create_collection(
collection_name=self.settings.qdrant_collection,
vectors_config=models.VectorParams(size=self.settings.vector_size, distance=models.Distance.COSINE),
vectors_config=models.VectorParams(size=expected_size, distance=models.Distance.COSINE),
)
self.collection_vector_size = expected_size
return
info = self.client.get_collection(self.settings.qdrant_collection)
configured_size = info.config.params.vectors.size
self.collection_vector_size = int(configured_size)
if self.collection_vector_size != expected_size:
logger.warning(
"qdrant vector size mismatch, collection=%s expected=%s actual=%s; using actual size",
self.settings.qdrant_collection,
expected_size,
self.collection_vector_size,
)
def health(self) -> str:
@@ -125,14 +141,40 @@ class LightRAGAdapter:
)
def _vectorize(self, text: str) -> list[float]:
vector = [0.0 for _ in range(self.settings.vector_size)]
if self.settings.embedding_enabled and self.settings.embedding_backend == "openai_compatible":
try:
embedding = self.llm_client.embedding(text)
if embedding:
return self._normalize_embedding(embedding)
except Exception:
logger.exception("embedding request failed, fallback to hash vector")
target_size = self._active_vector_size()
vector = [0.0 for _ in range(target_size)]
tokens = self._tokenize(text)
for token in tokens:
index = hash(token) % self.settings.vector_size
index = hash(token) % target_size
vector[index] += 1.0
norm = math.sqrt(sum(item * item for item in vector)) or 1.0
return [item / norm for item in vector]
def _normalize_embedding(self, embedding: list[float]) -> list[float]:
target_size = self._active_vector_size()
vector = embedding[:target_size]
if len(vector) < target_size:
vector.extend([0.0] * (target_size - len(vector)))
norm = math.sqrt(sum(item * item for item in vector)) or 1.0
return [item / norm for item in vector]
def _active_vector_size(self) -> int:
if self.collection_vector_size:
return self.collection_vector_size
return self._configured_vector_size()
def _configured_vector_size(self) -> int:
if self.settings.embedding_enabled and self.settings.embedding_backend == "openai_compatible":
return self.settings.embedding_vector_size
return self.settings.vector_size
def _tokenize(self, text: str) -> list[str]:
cleaned = [part.strip().lower() for part in text.replace("", " ").replace("", " ").replace("", " ").split()]
tokens = [part for part in cleaned if part]