from __future__ import annotations import json from sqlalchemy.orm import Session from app.core.config import get_settings from app.core.logging import logger from app.domain.schemas import BootstrapResponse, JobCard, WorkerCard from app.repositories.job_repository import JobRepository from app.repositories.worker_repository import WorkerRepository from app.services.cache_service import get_match_cache, get_query_cache from app.services.rag.lightrag_adapter import LightRAGAdapter class IngestService: def __init__(self, db: Session): self.db = db self.settings = get_settings() self.job_repository = JobRepository(db) self.worker_repository = WorkerRepository(db) self.rag = LightRAGAdapter(self.settings) self.match_cache = get_match_cache() self.query_cache = get_query_cache() def ingest_job(self, card: JobCard) -> JobCard: logger.info("ingest_job job_id=%s", card.job_id) self.job_repository.upsert(card) self.rag.upsert_job(card) if self.settings.match_cache_enabled: self.match_cache.clear() if self.settings.query_cache_enabled: self.query_cache.clear() return card def ingest_worker(self, card: WorkerCard) -> WorkerCard: logger.info("ingest_worker worker_id=%s", card.worker_id) self.worker_repository.upsert(card) self.rag.upsert_worker(card) if self.settings.match_cache_enabled: self.match_cache.clear() if self.settings.query_cache_enabled: self.query_cache.clear() return card def bootstrap(self) -> BootstrapResponse: skills = json.loads((self.settings.sample_data_dir / "skills.json").read_text(encoding="utf-8")) categories = json.loads((self.settings.sample_data_dir / "categories.json").read_text(encoding="utf-8")) regions = json.loads((self.settings.sample_data_dir / "regions.json").read_text(encoding="utf-8")) jobs = json.loads((self.settings.sample_data_dir / "jobs.json").read_text(encoding="utf-8")) workers = json.loads((self.settings.sample_data_dir / "workers.json").read_text(encoding="utf-8")) self.rag.ensure_ready() for item in jobs: self.ingest_job(JobCard(**item)) for item in workers: self.ingest_worker(WorkerCard(**item)) if self.settings.match_cache_enabled: self.match_cache.clear() if self.settings.query_cache_enabled: self.query_cache.clear() return BootstrapResponse( jobs=len(jobs), workers=len(workers), skills=len(skills), categories=len(categories), regions=len(regions), )