Files
Airtep/gig-poc/apps/api/app/services/matching_service.py
2026-03-30 20:49:40 +08:00

179 lines
8.0 KiB
Python

from __future__ import annotations
from datetime import datetime
from sqlalchemy.orm import Session
from app.core.config import get_settings
from app.core.logging import logger
from app.domain.schemas import JobCard, MatchBreakdown, MatchResult, QueryFilters, SourceType, WorkerCard
from app.repositories.job_repository import JobRepository
from app.repositories.match_repository import MatchRepository
from app.repositories.worker_repository import WorkerRepository
from app.services.card_mapper import job_to_card, worker_to_card
from app.services.rag.lightrag_adapter import LightRAGAdapter
from app.utils.ids import generate_id
class MatchingService:
def __init__(self, db: Session):
self.db = db
self.settings = get_settings()
self.jobs = JobRepository(db)
self.workers = WorkerRepository(db)
self.matches = MatchRepository(db)
self.rag = LightRAGAdapter(self.settings)
def match_workers(self, source: JobCard, top_n: int) -> list[MatchResult]:
logger.info("match_workers source_id=%s top_n=%s", source.job_id, top_n)
query_text = " ".join([source.title, source.category, source.city, source.region, *source.skills, *source.tags])
candidate_ids = self.rag.search(
query_text=query_text,
filters=QueryFilters(entity_type="worker", city=source.city),
limit=max(top_n * 3, self.settings.default_recall_top_k),
)
candidates = self.workers.get_many(candidate_ids) or self.workers.list(limit=max(top_n * 3, 50))
results = [self._build_job_to_worker_match(source, worker_to_card(worker)) for worker in candidates]
results = sorted(results, key=lambda item: item.match_score, reverse=True)[:top_n]
self.matches.bulk_replace(results, SourceType.job_to_worker.value, source.job_id)
return results
def match_jobs(self, source: WorkerCard, top_n: int) -> list[MatchResult]:
logger.info("match_jobs source_id=%s top_n=%s", source.worker_id, top_n)
query_text = " ".join([source.name, *source.cities, *source.regions, *[item.name for item in source.skills], *source.experience_tags])
city = source.cities[0] if source.cities else None
candidate_ids = self.rag.search(
query_text=query_text,
filters=QueryFilters(entity_type="job", city=city),
limit=max(top_n * 3, self.settings.default_recall_top_k),
)
candidates = self.jobs.get_many(candidate_ids) or self.jobs.list(limit=max(top_n * 3, 50))
results = [self._build_worker_to_job_match(source, job_to_card(job)) for job in candidates]
results = sorted(results, key=lambda item: item.match_score, reverse=True)[:top_n]
self.matches.bulk_replace(results, SourceType.worker_to_job.value, source.worker_id)
return results
def explain(self, match_id: str) -> MatchResult | None:
record = self.matches.get(match_id)
if record is None:
return None
from app.services.card_mapper import match_record_to_schema
return match_record_to_schema(record)
def _build_job_to_worker_match(self, job: JobCard, worker: WorkerCard) -> MatchResult:
job_skills = set(job.skills)
expanded_skills = self.rag.expand_skills(job.skills)
worker_skills = {item.name: item.score for item in worker.skills}
direct_hits = job_skills.intersection(worker_skills.keys())
expanded_hits = expanded_skills.intersection(worker_skills.keys())
base_skill_score = sum(worker_skills[name] for name in expanded_hits) / max(len(job_skills), 1)
if not direct_hits:
base_skill_score *= 0.4
skill_score = min(base_skill_score, 1.0)
region_score = self._region_score(job.city, job.region, worker.cities, worker.regions)
time_score = self._time_score(job.start_time, worker.availability)
experience_score = self._experience_score([job.category, *job.tags], worker.experience_tags)
reliability_score = worker.reliability_score
score = self._weighted_score(skill_score, region_score, time_score, experience_score, reliability_score)
breakdown = MatchBreakdown(
skill_score=round(skill_score, 2),
region_score=round(region_score, 2),
time_score=round(time_score, 2),
experience_score=round(experience_score, 2),
reliability_score=round(reliability_score, 2),
)
reasons = self._build_reasons(
matched_skills=list(expanded_hits)[:3],
region_hit=region_score,
time_score=time_score,
experience_hits=list(set(job.tags).intersection(worker.experience_tags))[:2] or [job.category],
reliability_score=reliability_score,
target_region=job.region,
)
return MatchResult(
match_id=generate_id("match"),
source_type=SourceType.job_to_worker,
source_id=job.job_id,
target_id=worker.worker_id,
match_score=round(score, 2),
breakdown=breakdown,
reasons=reasons,
)
def _build_worker_to_job_match(self, worker: WorkerCard, job: JobCard) -> MatchResult:
reverse = self._build_job_to_worker_match(job, worker)
return MatchResult(
match_id=generate_id("match"),
source_type=SourceType.worker_to_job,
source_id=worker.worker_id,
target_id=job.job_id,
match_score=reverse.match_score,
breakdown=reverse.breakdown,
reasons=reverse.reasons,
)
def _region_score(self, job_city: str, job_region: str, worker_cities: list[str], worker_regions: list[str]) -> float:
if job_region in worker_regions:
return 1.0
if job_city in worker_cities:
return 0.7
return 0.2
def _time_score(self, start_time: datetime, availability: list[str]) -> float:
if "anytime" in availability:
return 1.0
is_weekend = start_time.weekday() >= 5
desired = "weekend" if is_weekend else ("weekday_pm" if start_time.hour >= 12 else "weekday_am")
return 1.0 if desired in availability else 0.4
def _experience_score(self, left: list[str], right: list[str]) -> float:
left_set = set(left)
right_set = set(right)
if not left_set or not right_set:
return 0.4
overlap = len(left_set.intersection(right_set))
return min(overlap / max(len(left_set), 1) + 0.4, 1.0)
def _weighted_score(
self,
skill_score: float,
region_score: float,
time_score: float,
experience_score: float,
reliability_score: float,
) -> float:
return (
self.settings.score_skill_weight * skill_score
+ self.settings.score_region_weight * region_score
+ self.settings.score_time_weight * time_score
+ self.settings.score_experience_weight * experience_score
+ self.settings.score_reliability_weight * reliability_score
)
def _build_reasons(
self,
matched_skills: list[str],
region_hit: float,
time_score: float,
experience_hits: list[str],
reliability_score: float,
target_region: str,
) -> list[str]:
reasons = []
if matched_skills:
reasons.append(f"具备{''.join(matched_skills[:3])}相关技能")
if region_hit >= 1.0:
reasons.append(f"服务区域覆盖{target_region},与岗位地点一致")
elif region_hit >= 0.7:
reasons.append("同城可到岗,区域匹配度较高")
if time_score >= 1.0:
reasons.append("可接单时间与岗位时间要求匹配")
if experience_hits:
reasons.append(f"具备{''.join(experience_hits[:2])}相关经验")
if reliability_score >= 0.75:
reasons.append("履约可信度较好,适合优先推荐")
while len(reasons) < 3:
reasons.append("岗位需求与候选画像存在基础匹配")
return reasons[:5]