feat: 初始化零工后端代码
This commit is contained in:
@@ -10,6 +10,7 @@ from qdrant_client import QdrantClient, models
|
||||
from app.core.config import Settings
|
||||
from app.core.logging import logger
|
||||
from app.domain.schemas import JobCard, QueryFilters, WorkerCard
|
||||
from app.services.llm_client import LLMClient
|
||||
|
||||
|
||||
class LightRAGAdapter:
|
||||
@@ -17,13 +18,28 @@ class LightRAGAdapter:
|
||||
self.settings = settings
|
||||
self.client = QdrantClient(url=settings.qdrant_url)
|
||||
self.skill_graph = self._load_skill_graph()
|
||||
self.llm_client = LLMClient(settings)
|
||||
self.collection_vector_size: int | None = None
|
||||
|
||||
def ensure_ready(self) -> None:
|
||||
collections = {item.name for item in self.client.get_collections().collections}
|
||||
expected_size = self._configured_vector_size()
|
||||
if self.settings.qdrant_collection not in collections:
|
||||
self.client.create_collection(
|
||||
collection_name=self.settings.qdrant_collection,
|
||||
vectors_config=models.VectorParams(size=self.settings.vector_size, distance=models.Distance.COSINE),
|
||||
vectors_config=models.VectorParams(size=expected_size, distance=models.Distance.COSINE),
|
||||
)
|
||||
self.collection_vector_size = expected_size
|
||||
return
|
||||
info = self.client.get_collection(self.settings.qdrant_collection)
|
||||
configured_size = info.config.params.vectors.size
|
||||
self.collection_vector_size = int(configured_size)
|
||||
if self.collection_vector_size != expected_size:
|
||||
logger.warning(
|
||||
"qdrant vector size mismatch, collection=%s expected=%s actual=%s; using actual size",
|
||||
self.settings.qdrant_collection,
|
||||
expected_size,
|
||||
self.collection_vector_size,
|
||||
)
|
||||
|
||||
def health(self) -> str:
|
||||
@@ -125,14 +141,40 @@ class LightRAGAdapter:
|
||||
)
|
||||
|
||||
def _vectorize(self, text: str) -> list[float]:
|
||||
vector = [0.0 for _ in range(self.settings.vector_size)]
|
||||
if self.settings.embedding_enabled and self.settings.embedding_backend == "openai_compatible":
|
||||
try:
|
||||
embedding = self.llm_client.embedding(text)
|
||||
if embedding:
|
||||
return self._normalize_embedding(embedding)
|
||||
except Exception:
|
||||
logger.exception("embedding request failed, fallback to hash vector")
|
||||
target_size = self._active_vector_size()
|
||||
vector = [0.0 for _ in range(target_size)]
|
||||
tokens = self._tokenize(text)
|
||||
for token in tokens:
|
||||
index = hash(token) % self.settings.vector_size
|
||||
index = hash(token) % target_size
|
||||
vector[index] += 1.0
|
||||
norm = math.sqrt(sum(item * item for item in vector)) or 1.0
|
||||
return [item / norm for item in vector]
|
||||
|
||||
def _normalize_embedding(self, embedding: list[float]) -> list[float]:
|
||||
target_size = self._active_vector_size()
|
||||
vector = embedding[:target_size]
|
||||
if len(vector) < target_size:
|
||||
vector.extend([0.0] * (target_size - len(vector)))
|
||||
norm = math.sqrt(sum(item * item for item in vector)) or 1.0
|
||||
return [item / norm for item in vector]
|
||||
|
||||
def _active_vector_size(self) -> int:
|
||||
if self.collection_vector_size:
|
||||
return self.collection_vector_size
|
||||
return self._configured_vector_size()
|
||||
|
||||
def _configured_vector_size(self) -> int:
|
||||
if self.settings.embedding_enabled and self.settings.embedding_backend == "openai_compatible":
|
||||
return self.settings.embedding_vector_size
|
||||
return self.settings.vector_size
|
||||
|
||||
def _tokenize(self, text: str) -> list[str]:
|
||||
cleaned = [part.strip().lower() for part in text.replace(",", " ").replace("、", " ").replace("。", " ").split()]
|
||||
tokens = [part for part in cleaned if part]
|
||||
|
||||
Reference in New Issue
Block a user