feat: add new folder
This commit is contained in:
50
gig-poc/apps/api/app/services/card_mapper.py
Normal file
50
gig-poc/apps/api/app/services/card_mapper.py
Normal file
@@ -0,0 +1,50 @@
|
||||
from app.domain.models import Job, MatchRecord, Worker
|
||||
from app.domain.schemas import JobCard, MatchBreakdown, MatchResult, Salary, SkillScore, SourceType, WorkerCard
|
||||
|
||||
|
||||
def job_to_card(job: Job) -> JobCard:
|
||||
return JobCard(
|
||||
job_id=job.id,
|
||||
title=job.title,
|
||||
category=job.category,
|
||||
description=job.description,
|
||||
skills=[item.skill_name for item in job.skills],
|
||||
city=job.city,
|
||||
region=job.region,
|
||||
location_detail=job.location_detail,
|
||||
start_time=job.start_time,
|
||||
duration_hours=job.duration_hours,
|
||||
headcount=job.headcount,
|
||||
salary=Salary(type=job.salary_type, amount=job.salary_amount, currency=job.salary_currency),
|
||||
work_mode=job.work_mode,
|
||||
tags=job.tags_json,
|
||||
confidence=job.confidence,
|
||||
)
|
||||
|
||||
|
||||
def worker_to_card(worker: Worker) -> WorkerCard:
|
||||
return WorkerCard(
|
||||
worker_id=worker.id,
|
||||
name=worker.name,
|
||||
description=worker.description,
|
||||
skills=[SkillScore(name=item.skill_name, score=item.score) for item in worker.skills],
|
||||
cities=worker.cities_json,
|
||||
regions=worker.regions_json,
|
||||
availability=worker.availability_json,
|
||||
experience_tags=worker.experience_tags_json,
|
||||
reliability_score=worker.reliability_score,
|
||||
profile_completion=worker.profile_completion,
|
||||
confidence=worker.confidence,
|
||||
)
|
||||
|
||||
|
||||
def match_record_to_schema(match: MatchRecord) -> MatchResult:
|
||||
return MatchResult(
|
||||
match_id=match.id,
|
||||
source_type=SourceType(match.source_type),
|
||||
source_id=match.source_id,
|
||||
target_id=match.target_id,
|
||||
match_score=match.match_score,
|
||||
breakdown=MatchBreakdown(**match.breakdown_json),
|
||||
reasons=match.reasons_json,
|
||||
)
|
||||
209
gig-poc/apps/api/app/services/extraction_service.py
Normal file
209
gig-poc/apps/api/app/services/extraction_service.py
Normal file
@@ -0,0 +1,209 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import re
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from pathlib import Path
|
||||
|
||||
from dateutil import parser as date_parser
|
||||
from pydantic import ValidationError
|
||||
|
||||
from app.core.config import get_settings
|
||||
from app.core.logging import logger
|
||||
from app.domain.schemas import ExtractResponse, JobCard, Salary, SkillScore, WorkerCard
|
||||
from app.services.llm_client import LLMClient
|
||||
from app.utils.ids import generate_id
|
||||
from app.utils.prompts import load_prompt
|
||||
|
||||
|
||||
class ExtractionService:
|
||||
def __init__(self) -> None:
|
||||
self.settings = get_settings()
|
||||
self.skills = json.loads((self.settings.sample_data_dir / "skills.json").read_text(encoding="utf-8"))
|
||||
self.categories = json.loads((self.settings.sample_data_dir / "categories.json").read_text(encoding="utf-8"))
|
||||
self.regions = json.loads((self.settings.sample_data_dir / "regions.json").read_text(encoding="utf-8"))
|
||||
self.llm_client = LLMClient(self.settings)
|
||||
|
||||
def extract_job(self, text: str) -> ExtractResponse:
|
||||
logger.info("extract_job request text=%s", text)
|
||||
llm_result = self._llm_extract(text, self.settings.prompt_dir / "job_extract.md")
|
||||
if llm_result:
|
||||
try:
|
||||
return ExtractResponse(success=True, data=JobCard(**llm_result.content))
|
||||
except ValidationError as exc:
|
||||
logger.exception("LLM job extraction validation failed")
|
||||
return ExtractResponse(success=False, errors=[str(exc)], missing_fields=self._missing_fields(exc))
|
||||
|
||||
try:
|
||||
card = self._extract_job_rule(text)
|
||||
return ExtractResponse(success=True, data=card)
|
||||
except ValidationError as exc:
|
||||
logger.exception("Rule job extraction validation failed")
|
||||
return ExtractResponse(success=False, errors=[str(exc)], missing_fields=self._missing_fields(exc))
|
||||
|
||||
def extract_worker(self, text: str) -> ExtractResponse:
|
||||
logger.info("extract_worker request text=%s", text)
|
||||
llm_result = self._llm_extract(text, self.settings.prompt_dir / "worker_extract.md")
|
||||
if llm_result:
|
||||
try:
|
||||
return ExtractResponse(success=True, data=WorkerCard(**llm_result.content))
|
||||
except ValidationError as exc:
|
||||
logger.exception("LLM worker extraction validation failed")
|
||||
return ExtractResponse(success=False, errors=[str(exc)], missing_fields=self._missing_fields(exc))
|
||||
|
||||
try:
|
||||
card = self._extract_worker_rule(text)
|
||||
return ExtractResponse(success=True, data=card)
|
||||
except ValidationError as exc:
|
||||
logger.exception("Rule worker extraction validation failed")
|
||||
return ExtractResponse(success=False, errors=[str(exc)], missing_fields=self._missing_fields(exc))
|
||||
|
||||
def _llm_extract(self, text: str, prompt_path: Path):
|
||||
try:
|
||||
return self.llm_client.extract_json(load_prompt(prompt_path), text)
|
||||
except Exception:
|
||||
logger.exception("LLM extraction failed, fallback to rule-based extraction")
|
||||
return None
|
||||
|
||||
def _extract_job_rule(self, text: str) -> JobCard:
|
||||
skill_hits = [item for item in self.skills if item in text]
|
||||
category = next((item for item in self.categories if item in text), "活动执行")
|
||||
region = self._extract_region(text)
|
||||
salary = self._extract_salary(text)
|
||||
headcount = self._extract_number(text, [r"(\d+)\s*[个名人位]"], default=1)
|
||||
duration = self._extract_number(text, [r"(\d+(?:\.\d+)?)\s*小时"], default=4.0, cast=float)
|
||||
tags = [tag for tag in ["女生优先", "男生优先", "有经验优先", "沟通好", "可连做优先"] if tag in text]
|
||||
title = next((f"{category}{skill_hits[0]}兼职" for _ in [0] if skill_hits), f"{category}兼职")
|
||||
card = JobCard(
|
||||
job_id=generate_id("job"),
|
||||
title=title,
|
||||
category=category,
|
||||
description=text,
|
||||
skills=skill_hits[:5] or self._guess_category_skills(category),
|
||||
city=region["city"],
|
||||
region=region["region"],
|
||||
location_detail=self._extract_location(text, region),
|
||||
start_time=self._extract_job_time(text),
|
||||
duration_hours=duration,
|
||||
headcount=int(headcount),
|
||||
salary=salary,
|
||||
work_mode="排班制" if "排班" in text else "兼职",
|
||||
tags=tags or ["有经验优先"],
|
||||
confidence=self._compute_confidence(skill_hits, region, salary.amount > 0),
|
||||
)
|
||||
return card
|
||||
|
||||
def _extract_worker_rule(self, text: str) -> WorkerCard:
|
||||
skill_hits = [item for item in self.skills if item in text][:6]
|
||||
region_hits = [item for item in self.regions if item["region"] in text or item["city"] in text]
|
||||
city_names = list(dict.fromkeys([item["city"] for item in region_hits])) or ["深圳"]
|
||||
region_names = list(dict.fromkeys([item["region"] for item in region_hits])) or ["南山"]
|
||||
availability = self._extract_availability(text)
|
||||
experience = [item for item in ["商场", "会展", "活动执行", "物流", "零售", "客服中心", "快消", "校园推广"] if item in text]
|
||||
card = WorkerCard(
|
||||
worker_id=generate_id("worker"),
|
||||
name=self._extract_name(text),
|
||||
description=text,
|
||||
skills=[SkillScore(name=item, score=round(0.72 + index * 0.04, 2)) for index, item in enumerate(skill_hits or ["活动执行", "引导", "登记"])],
|
||||
cities=city_names,
|
||||
regions=region_names,
|
||||
availability=availability,
|
||||
experience_tags=experience or ["活动执行"],
|
||||
reliability_score=0.76,
|
||||
profile_completion=0.68,
|
||||
confidence=self._compute_confidence(skill_hits, {"city": city_names[0], "region": region_names[0]}, True),
|
||||
)
|
||||
return card
|
||||
|
||||
def _extract_region(self, text: str) -> dict:
|
||||
for item in self.regions:
|
||||
if item["city"] in text and item["region"] in text:
|
||||
return item
|
||||
for item in self.regions:
|
||||
if item["region"] in text:
|
||||
return item
|
||||
return {"city": "深圳", "region": "南山"}
|
||||
|
||||
def _extract_location(self, text: str, region: dict) -> str:
|
||||
markers = ["会展中心", "商场", "地铁站", "园区", "写字楼", "仓库", "门店"]
|
||||
for marker in markers:
|
||||
if marker in text:
|
||||
return f"{region['city']}{region['region']}{marker}"
|
||||
return f"{region['city']}{region['region']}待定点位"
|
||||
|
||||
def _extract_salary(self, text: str) -> Salary:
|
||||
amount = self._extract_number(text, [r"(\d+(?:\.\d+)?)\s*(?:元|块)"], default=150.0, cast=float)
|
||||
salary_type = "hourly" if "小时" in text and "/小时" in text else "daily"
|
||||
return Salary(type=salary_type, amount=amount, currency="CNY")
|
||||
|
||||
def _extract_number(self, text: str, patterns: list[str], default, cast=int):
|
||||
for pattern in patterns:
|
||||
match = re.search(pattern, text)
|
||||
if match:
|
||||
return cast(match.group(1))
|
||||
return default
|
||||
|
||||
def _extract_job_time(self, text: str) -> datetime:
|
||||
shanghai_tz = timezone(timedelta(hours=8))
|
||||
now = datetime.now(shanghai_tz)
|
||||
if "明天" in text:
|
||||
base = now + timedelta(days=1)
|
||||
elif "后天" in text:
|
||||
base = now + timedelta(days=2)
|
||||
else:
|
||||
month_day = re.search(r"(\d{1,2})月(\d{1,2})日", text)
|
||||
if month_day:
|
||||
month, day = int(month_day.group(1)), int(month_day.group(2))
|
||||
base = now.replace(month=month, day=day)
|
||||
else:
|
||||
base = now + timedelta(days=1)
|
||||
hour = 9
|
||||
if "下午" in text:
|
||||
hour = 13
|
||||
elif "晚上" in text:
|
||||
hour = 19
|
||||
explicit_hour = re.search(r"(\d{1,2})[:点时](\d{0,2})?", text)
|
||||
if explicit_hour:
|
||||
hour = int(explicit_hour.group(1))
|
||||
return base.replace(hour=hour, minute=0, second=0, microsecond=0)
|
||||
|
||||
def _extract_availability(self, text: str) -> list[str]:
|
||||
tags = []
|
||||
if "周末" in text:
|
||||
tags.append("weekend")
|
||||
if "上午" in text:
|
||||
tags.append("weekday_am")
|
||||
if "下午" in text:
|
||||
tags.append("weekday_pm")
|
||||
if "随时" in text or "都能" in text or "全天" in text:
|
||||
tags.append("anytime")
|
||||
return tags or ["anytime"]
|
||||
|
||||
def _extract_name(self, text: str) -> str:
|
||||
if match := re.search(r"我叫([\u4e00-\u9fa5]{2,4})", text):
|
||||
return match.group(1)
|
||||
if match := re.search(r"我是([\u4e00-\u9fa5]{2,4})", text):
|
||||
return match.group(1)
|
||||
return "匿名候选人"
|
||||
|
||||
def _guess_category_skills(self, category: str) -> list[str]:
|
||||
mapping = {
|
||||
"活动执行": ["签到", "引导", "登记"],
|
||||
"促销": ["促销", "导购", "陈列"],
|
||||
"配送": ["配送", "装卸", "司机协助"],
|
||||
"客服": ["客服", "电话邀约", "线上客服"],
|
||||
}
|
||||
return mapping.get(category, ["活动执行", "沟通"])
|
||||
|
||||
def _compute_confidence(self, skill_hits: list[str], region: dict, has_salary: bool) -> float:
|
||||
score = 0.55
|
||||
if skill_hits:
|
||||
score += 0.15
|
||||
if region.get("city"):
|
||||
score += 0.15
|
||||
if has_salary:
|
||||
score += 0.1
|
||||
return min(round(score, 2), 0.95)
|
||||
|
||||
def _missing_fields(self, exc: ValidationError) -> list[str]:
|
||||
return [".".join(str(part) for part in item["loc"]) for item in exc.errors()]
|
||||
52
gig-poc/apps/api/app/services/ingest_service.py
Normal file
52
gig-poc/apps/api/app/services/ingest_service.py
Normal file
@@ -0,0 +1,52 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.core.config import get_settings
|
||||
from app.core.logging import logger
|
||||
from app.domain.schemas import BootstrapResponse, JobCard, WorkerCard
|
||||
from app.repositories.job_repository import JobRepository
|
||||
from app.repositories.worker_repository import WorkerRepository
|
||||
from app.services.rag.lightrag_adapter import LightRAGAdapter
|
||||
|
||||
|
||||
class IngestService:
|
||||
def __init__(self, db: Session):
|
||||
self.db = db
|
||||
self.settings = get_settings()
|
||||
self.job_repository = JobRepository(db)
|
||||
self.worker_repository = WorkerRepository(db)
|
||||
self.rag = LightRAGAdapter(self.settings)
|
||||
|
||||
def ingest_job(self, card: JobCard) -> JobCard:
|
||||
logger.info("ingest_job job_id=%s", card.job_id)
|
||||
self.job_repository.upsert(card)
|
||||
self.rag.upsert_job(card)
|
||||
return card
|
||||
|
||||
def ingest_worker(self, card: WorkerCard) -> WorkerCard:
|
||||
logger.info("ingest_worker worker_id=%s", card.worker_id)
|
||||
self.worker_repository.upsert(card)
|
||||
self.rag.upsert_worker(card)
|
||||
return card
|
||||
|
||||
def bootstrap(self) -> BootstrapResponse:
|
||||
skills = json.loads((self.settings.sample_data_dir / "skills.json").read_text(encoding="utf-8"))
|
||||
categories = json.loads((self.settings.sample_data_dir / "categories.json").read_text(encoding="utf-8"))
|
||||
regions = json.loads((self.settings.sample_data_dir / "regions.json").read_text(encoding="utf-8"))
|
||||
jobs = json.loads((self.settings.sample_data_dir / "jobs.json").read_text(encoding="utf-8"))
|
||||
workers = json.loads((self.settings.sample_data_dir / "workers.json").read_text(encoding="utf-8"))
|
||||
self.rag.ensure_ready()
|
||||
for item in jobs:
|
||||
self.ingest_job(JobCard(**item))
|
||||
for item in workers:
|
||||
self.ingest_worker(WorkerCard(**item))
|
||||
return BootstrapResponse(
|
||||
jobs=len(jobs),
|
||||
workers=len(workers),
|
||||
skills=len(skills),
|
||||
categories=len(categories),
|
||||
regions=len(regions),
|
||||
)
|
||||
34
gig-poc/apps/api/app/services/llm_client.py
Normal file
34
gig-poc/apps/api/app/services/llm_client.py
Normal file
@@ -0,0 +1,34 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
|
||||
import httpx
|
||||
|
||||
from app.core.config import Settings
|
||||
from app.domain.schemas import PromptOutput
|
||||
|
||||
|
||||
class LLMClient:
|
||||
def __init__(self, settings: Settings):
|
||||
self.settings = settings
|
||||
|
||||
def extract_json(self, system_prompt: str, user_text: str) -> PromptOutput | None:
|
||||
if not self.settings.llm_enabled or not self.settings.llm_base_url or not self.settings.llm_api_key:
|
||||
return None
|
||||
|
||||
payload = {
|
||||
"model": self.settings.llm_model,
|
||||
"messages": [
|
||||
{"role": "system", "content": system_prompt},
|
||||
{"role": "user", "content": user_text},
|
||||
],
|
||||
"temperature": 0.1,
|
||||
"response_format": {"type": "json_object"},
|
||||
}
|
||||
headers = {"Authorization": f"Bearer {self.settings.llm_api_key}"}
|
||||
with httpx.Client(timeout=30.0) as client:
|
||||
response = client.post(f"{self.settings.llm_base_url.rstrip('/')}/chat/completions", json=payload, headers=headers)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
raw_text = data["choices"][0]["message"]["content"]
|
||||
return PromptOutput(content=json.loads(raw_text), raw_text=raw_text)
|
||||
178
gig-poc/apps/api/app/services/matching_service.py
Normal file
178
gig-poc/apps/api/app/services/matching_service.py
Normal file
@@ -0,0 +1,178 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime
|
||||
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.core.config import get_settings
|
||||
from app.core.logging import logger
|
||||
from app.domain.schemas import JobCard, MatchBreakdown, MatchResult, QueryFilters, SourceType, WorkerCard
|
||||
from app.repositories.job_repository import JobRepository
|
||||
from app.repositories.match_repository import MatchRepository
|
||||
from app.repositories.worker_repository import WorkerRepository
|
||||
from app.services.card_mapper import job_to_card, worker_to_card
|
||||
from app.services.rag.lightrag_adapter import LightRAGAdapter
|
||||
from app.utils.ids import generate_id
|
||||
|
||||
|
||||
class MatchingService:
|
||||
def __init__(self, db: Session):
|
||||
self.db = db
|
||||
self.settings = get_settings()
|
||||
self.jobs = JobRepository(db)
|
||||
self.workers = WorkerRepository(db)
|
||||
self.matches = MatchRepository(db)
|
||||
self.rag = LightRAGAdapter(self.settings)
|
||||
|
||||
def match_workers(self, source: JobCard, top_n: int) -> list[MatchResult]:
|
||||
logger.info("match_workers source_id=%s top_n=%s", source.job_id, top_n)
|
||||
query_text = " ".join([source.title, source.category, source.city, source.region, *source.skills, *source.tags])
|
||||
candidate_ids = self.rag.search(
|
||||
query_text=query_text,
|
||||
filters=QueryFilters(entity_type="worker", city=source.city),
|
||||
limit=max(top_n * 3, self.settings.default_recall_top_k),
|
||||
)
|
||||
candidates = self.workers.get_many(candidate_ids) or self.workers.list(limit=max(top_n * 3, 50))
|
||||
results = [self._build_job_to_worker_match(source, worker_to_card(worker)) for worker in candidates]
|
||||
results = sorted(results, key=lambda item: item.match_score, reverse=True)[:top_n]
|
||||
self.matches.bulk_replace(results, SourceType.job_to_worker.value, source.job_id)
|
||||
return results
|
||||
|
||||
def match_jobs(self, source: WorkerCard, top_n: int) -> list[MatchResult]:
|
||||
logger.info("match_jobs source_id=%s top_n=%s", source.worker_id, top_n)
|
||||
query_text = " ".join([source.name, *source.cities, *source.regions, *[item.name for item in source.skills], *source.experience_tags])
|
||||
city = source.cities[0] if source.cities else None
|
||||
candidate_ids = self.rag.search(
|
||||
query_text=query_text,
|
||||
filters=QueryFilters(entity_type="job", city=city),
|
||||
limit=max(top_n * 3, self.settings.default_recall_top_k),
|
||||
)
|
||||
candidates = self.jobs.get_many(candidate_ids) or self.jobs.list(limit=max(top_n * 3, 50))
|
||||
results = [self._build_worker_to_job_match(source, job_to_card(job)) for job in candidates]
|
||||
results = sorted(results, key=lambda item: item.match_score, reverse=True)[:top_n]
|
||||
self.matches.bulk_replace(results, SourceType.worker_to_job.value, source.worker_id)
|
||||
return results
|
||||
|
||||
def explain(self, match_id: str) -> MatchResult | None:
|
||||
record = self.matches.get(match_id)
|
||||
if record is None:
|
||||
return None
|
||||
from app.services.card_mapper import match_record_to_schema
|
||||
|
||||
return match_record_to_schema(record)
|
||||
|
||||
def _build_job_to_worker_match(self, job: JobCard, worker: WorkerCard) -> MatchResult:
|
||||
job_skills = set(job.skills)
|
||||
expanded_skills = self.rag.expand_skills(job.skills)
|
||||
worker_skills = {item.name: item.score for item in worker.skills}
|
||||
direct_hits = job_skills.intersection(worker_skills.keys())
|
||||
expanded_hits = expanded_skills.intersection(worker_skills.keys())
|
||||
base_skill_score = sum(worker_skills[name] for name in expanded_hits) / max(len(job_skills), 1)
|
||||
if not direct_hits:
|
||||
base_skill_score *= 0.4
|
||||
skill_score = min(base_skill_score, 1.0)
|
||||
region_score = self._region_score(job.city, job.region, worker.cities, worker.regions)
|
||||
time_score = self._time_score(job.start_time, worker.availability)
|
||||
experience_score = self._experience_score([job.category, *job.tags], worker.experience_tags)
|
||||
reliability_score = worker.reliability_score
|
||||
score = self._weighted_score(skill_score, region_score, time_score, experience_score, reliability_score)
|
||||
breakdown = MatchBreakdown(
|
||||
skill_score=round(skill_score, 2),
|
||||
region_score=round(region_score, 2),
|
||||
time_score=round(time_score, 2),
|
||||
experience_score=round(experience_score, 2),
|
||||
reliability_score=round(reliability_score, 2),
|
||||
)
|
||||
reasons = self._build_reasons(
|
||||
matched_skills=list(expanded_hits)[:3],
|
||||
region_hit=region_score,
|
||||
time_score=time_score,
|
||||
experience_hits=list(set(job.tags).intersection(worker.experience_tags))[:2] or [job.category],
|
||||
reliability_score=reliability_score,
|
||||
target_region=job.region,
|
||||
)
|
||||
return MatchResult(
|
||||
match_id=generate_id("match"),
|
||||
source_type=SourceType.job_to_worker,
|
||||
source_id=job.job_id,
|
||||
target_id=worker.worker_id,
|
||||
match_score=round(score, 2),
|
||||
breakdown=breakdown,
|
||||
reasons=reasons,
|
||||
)
|
||||
|
||||
def _build_worker_to_job_match(self, worker: WorkerCard, job: JobCard) -> MatchResult:
|
||||
reverse = self._build_job_to_worker_match(job, worker)
|
||||
return MatchResult(
|
||||
match_id=generate_id("match"),
|
||||
source_type=SourceType.worker_to_job,
|
||||
source_id=worker.worker_id,
|
||||
target_id=job.job_id,
|
||||
match_score=reverse.match_score,
|
||||
breakdown=reverse.breakdown,
|
||||
reasons=reverse.reasons,
|
||||
)
|
||||
|
||||
def _region_score(self, job_city: str, job_region: str, worker_cities: list[str], worker_regions: list[str]) -> float:
|
||||
if job_region in worker_regions:
|
||||
return 1.0
|
||||
if job_city in worker_cities:
|
||||
return 0.7
|
||||
return 0.2
|
||||
|
||||
def _time_score(self, start_time: datetime, availability: list[str]) -> float:
|
||||
if "anytime" in availability:
|
||||
return 1.0
|
||||
is_weekend = start_time.weekday() >= 5
|
||||
desired = "weekend" if is_weekend else ("weekday_pm" if start_time.hour >= 12 else "weekday_am")
|
||||
return 1.0 if desired in availability else 0.4
|
||||
|
||||
def _experience_score(self, left: list[str], right: list[str]) -> float:
|
||||
left_set = set(left)
|
||||
right_set = set(right)
|
||||
if not left_set or not right_set:
|
||||
return 0.4
|
||||
overlap = len(left_set.intersection(right_set))
|
||||
return min(overlap / max(len(left_set), 1) + 0.4, 1.0)
|
||||
|
||||
def _weighted_score(
|
||||
self,
|
||||
skill_score: float,
|
||||
region_score: float,
|
||||
time_score: float,
|
||||
experience_score: float,
|
||||
reliability_score: float,
|
||||
) -> float:
|
||||
return (
|
||||
self.settings.score_skill_weight * skill_score
|
||||
+ self.settings.score_region_weight * region_score
|
||||
+ self.settings.score_time_weight * time_score
|
||||
+ self.settings.score_experience_weight * experience_score
|
||||
+ self.settings.score_reliability_weight * reliability_score
|
||||
)
|
||||
|
||||
def _build_reasons(
|
||||
self,
|
||||
matched_skills: list[str],
|
||||
region_hit: float,
|
||||
time_score: float,
|
||||
experience_hits: list[str],
|
||||
reliability_score: float,
|
||||
target_region: str,
|
||||
) -> list[str]:
|
||||
reasons = []
|
||||
if matched_skills:
|
||||
reasons.append(f"具备{'、'.join(matched_skills[:3])}相关技能")
|
||||
if region_hit >= 1.0:
|
||||
reasons.append(f"服务区域覆盖{target_region},与岗位地点一致")
|
||||
elif region_hit >= 0.7:
|
||||
reasons.append("同城可到岗,区域匹配度较高")
|
||||
if time_score >= 1.0:
|
||||
reasons.append("可接单时间与岗位时间要求匹配")
|
||||
if experience_hits:
|
||||
reasons.append(f"具备{'、'.join(experience_hits[:2])}相关经验")
|
||||
if reliability_score >= 0.75:
|
||||
reasons.append("履约可信度较好,适合优先推荐")
|
||||
while len(reasons) < 3:
|
||||
reasons.append("岗位需求与候选画像存在基础匹配")
|
||||
return reasons[:5]
|
||||
143
gig-poc/apps/api/app/services/rag/lightrag_adapter.py
Normal file
143
gig-poc/apps/api/app/services/rag/lightrag_adapter.py
Normal file
@@ -0,0 +1,143 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import math
|
||||
from collections import defaultdict
|
||||
|
||||
from qdrant_client import QdrantClient, models
|
||||
|
||||
from app.core.config import Settings
|
||||
from app.core.logging import logger
|
||||
from app.domain.schemas import JobCard, QueryFilters, WorkerCard
|
||||
|
||||
|
||||
class LightRAGAdapter:
|
||||
def __init__(self, settings: Settings):
|
||||
self.settings = settings
|
||||
self.client = QdrantClient(url=settings.qdrant_url)
|
||||
self.skill_graph = self._load_skill_graph()
|
||||
|
||||
def ensure_ready(self) -> None:
|
||||
collections = {item.name for item in self.client.get_collections().collections}
|
||||
if self.settings.qdrant_collection not in collections:
|
||||
self.client.create_collection(
|
||||
collection_name=self.settings.qdrant_collection,
|
||||
vectors_config=models.VectorParams(size=self.settings.vector_size, distance=models.Distance.COSINE),
|
||||
)
|
||||
|
||||
def health(self) -> str:
|
||||
self.ensure_ready()
|
||||
self.client.get_collection(self.settings.qdrant_collection)
|
||||
return "ok"
|
||||
|
||||
def upsert_job(self, job: JobCard) -> None:
|
||||
self.ensure_ready()
|
||||
payload = {
|
||||
"entity_type": "job",
|
||||
"entity_id": job.job_id,
|
||||
"city": job.city,
|
||||
"region": job.region,
|
||||
"category": job.category,
|
||||
"skills": job.skills,
|
||||
"tags": job.tags,
|
||||
"document": self._serialize_job(job),
|
||||
}
|
||||
self.client.upsert(
|
||||
collection_name=self.settings.qdrant_collection,
|
||||
points=[
|
||||
models.PointStruct(
|
||||
id=job.job_id,
|
||||
vector=self._vectorize(payload["document"]),
|
||||
payload=payload,
|
||||
)
|
||||
],
|
||||
)
|
||||
|
||||
def upsert_worker(self, worker: WorkerCard) -> None:
|
||||
self.ensure_ready()
|
||||
payload = {
|
||||
"entity_type": "worker",
|
||||
"entity_id": worker.worker_id,
|
||||
"city": worker.cities[0] if worker.cities else "",
|
||||
"region": worker.regions[0] if worker.regions else "",
|
||||
"category": worker.experience_tags[0] if worker.experience_tags else "",
|
||||
"skills": [item.name for item in worker.skills],
|
||||
"tags": worker.experience_tags,
|
||||
"document": self._serialize_worker(worker),
|
||||
}
|
||||
self.client.upsert(
|
||||
collection_name=self.settings.qdrant_collection,
|
||||
points=[
|
||||
models.PointStruct(
|
||||
id=worker.worker_id,
|
||||
vector=self._vectorize(payload["document"]),
|
||||
payload=payload,
|
||||
)
|
||||
],
|
||||
)
|
||||
|
||||
def search(self, query_text: str, filters: QueryFilters, limit: int) -> list[str]:
|
||||
self.ensure_ready()
|
||||
must = [models.FieldCondition(key="entity_type", match=models.MatchValue(value=filters.entity_type))]
|
||||
if filters.city:
|
||||
must.append(models.FieldCondition(key="city", match=models.MatchValue(value=filters.city)))
|
||||
query_filter = models.Filter(must=must)
|
||||
results = self.client.search(
|
||||
collection_name=self.settings.qdrant_collection,
|
||||
query_vector=self._vectorize(query_text),
|
||||
query_filter=query_filter,
|
||||
limit=limit,
|
||||
with_payload=True,
|
||||
)
|
||||
ids = []
|
||||
for point in results:
|
||||
payload = point.payload or {}
|
||||
if filters.region and payload.get("region") != filters.region:
|
||||
continue
|
||||
ids.append(str(payload.get("entity_id", point.id)))
|
||||
return ids
|
||||
|
||||
def expand_skills(self, skills: list[str]) -> set[str]:
|
||||
expanded = set(skills)
|
||||
for skill in skills:
|
||||
expanded.update(self.skill_graph.get(skill, []))
|
||||
return expanded
|
||||
|
||||
def _load_skill_graph(self) -> dict[str, set[str]]:
|
||||
relations_path = self.settings.sample_data_dir / "skill_relations.json"
|
||||
if not relations_path.exists():
|
||||
return defaultdict(set)
|
||||
data = json.loads(relations_path.read_text(encoding="utf-8"))
|
||||
graph: dict[str, set[str]] = defaultdict(set)
|
||||
for source, targets in data.items():
|
||||
graph[source].update(targets)
|
||||
for target in targets:
|
||||
graph[target].add(source)
|
||||
return graph
|
||||
|
||||
def _serialize_job(self, job: JobCard) -> str:
|
||||
return " ".join([job.title, job.category, job.city, job.region, *job.skills, *job.tags, job.description])
|
||||
|
||||
def _serialize_worker(self, worker: WorkerCard) -> str:
|
||||
return " ".join(
|
||||
[worker.name, *worker.cities, *worker.regions, *[item.name for item in worker.skills], *worker.experience_tags, worker.description]
|
||||
)
|
||||
|
||||
def _vectorize(self, text: str) -> list[float]:
|
||||
vector = [0.0 for _ in range(self.settings.vector_size)]
|
||||
tokens = self._tokenize(text)
|
||||
for token in tokens:
|
||||
index = hash(token) % self.settings.vector_size
|
||||
vector[index] += 1.0
|
||||
norm = math.sqrt(sum(item * item for item in vector)) or 1.0
|
||||
return [item / norm for item in vector]
|
||||
|
||||
def _tokenize(self, text: str) -> list[str]:
|
||||
cleaned = [part.strip().lower() for part in text.replace(",", " ").replace("、", " ").replace("。", " ").split()]
|
||||
tokens = [part for part in cleaned if part]
|
||||
for size in (2, 3):
|
||||
for index in range(max(len(text) - size + 1, 0)):
|
||||
chunk = text[index : index + size].strip()
|
||||
if chunk:
|
||||
tokens.append(chunk)
|
||||
return tokens
|
||||
Reference in New Issue
Block a user