feat: add new folder
This commit is contained in:
8
gig-poc/.env.example
Normal file
8
gig-poc/.env.example
Normal file
@@ -0,0 +1,8 @@
|
||||
APP_ENV=development
|
||||
LOG_LEVEL=INFO
|
||||
DATABASE_URL=postgresql+psycopg://gig:gig@postgres:5432/gig_poc
|
||||
QDRANT_URL=http://qdrant:6333
|
||||
LLM_ENABLED=false
|
||||
LLM_BASE_URL=
|
||||
LLM_API_KEY=
|
||||
LLM_MODEL=gpt-5.4
|
||||
6
gig-poc/.gitignore
vendored
Normal file
6
gig-poc/.gitignore
vendored
Normal file
@@ -0,0 +1,6 @@
|
||||
__pycache__/
|
||||
.DS_Store
|
||||
.env
|
||||
node_modules/
|
||||
dist/
|
||||
*.pyc
|
||||
3
gig-poc/README.md
Normal file
3
gig-poc/README.md
Normal file
@@ -0,0 +1,3 @@
|
||||
# Gig POC
|
||||
|
||||
项目说明见 [docs/README.md](./docs/README.md)。
|
||||
18
gig-poc/apps/api/Dockerfile
Normal file
18
gig-poc/apps/api/Dockerfile
Normal file
@@ -0,0 +1,18 @@
|
||||
FROM docker.m.daocloud.io/library/python:3.11-slim
|
||||
|
||||
ENV PYTHONDONTWRITEBYTECODE=1 \
|
||||
PYTHONUNBUFFERED=1 \
|
||||
PIP_INDEX_URL=https://pypi.tuna.tsinghua.edu.cn/simple
|
||||
|
||||
WORKDIR /workspace
|
||||
|
||||
COPY apps/api /workspace/apps/api
|
||||
COPY packages /workspace/packages
|
||||
|
||||
RUN pip install --no-cache-dir /workspace/apps/api
|
||||
|
||||
WORKDIR /workspace/apps/api
|
||||
|
||||
ENV PYTHONPATH=/workspace/apps/api
|
||||
|
||||
CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"]
|
||||
1
gig-poc/apps/api/app/__init__.py
Normal file
1
gig-poc/apps/api/app/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
|
||||
133
gig-poc/apps/api/app/api/routes.py
Normal file
133
gig-poc/apps/api/app/api/routes.py
Normal file
@@ -0,0 +1,133 @@
|
||||
from datetime import datetime
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException
|
||||
from sqlalchemy import text
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.core.config import get_settings
|
||||
from app.db.session import get_db
|
||||
from app.domain.schemas import (
|
||||
ExplainResponse,
|
||||
ExtractResponse,
|
||||
ExtractTextRequest,
|
||||
HealthStatus,
|
||||
IngestJobRequest,
|
||||
IngestWorkerRequest,
|
||||
JobCard,
|
||||
ListResponse,
|
||||
MatchJobsRequest,
|
||||
MatchResponse,
|
||||
MatchWorkersRequest,
|
||||
WorkerCard,
|
||||
)
|
||||
from app.repositories.job_repository import JobRepository
|
||||
from app.repositories.worker_repository import WorkerRepository
|
||||
from app.services.card_mapper import job_to_card, worker_to_card
|
||||
from app.services.extraction_service import ExtractionService
|
||||
from app.services.ingest_service import IngestService
|
||||
from app.services.matching_service import MatchingService
|
||||
from app.services.rag.lightrag_adapter import LightRAGAdapter
|
||||
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
@router.get("/health", response_model=HealthStatus)
|
||||
def health(db: Session = Depends(get_db)) -> HealthStatus:
|
||||
settings = get_settings()
|
||||
db_status = "ok"
|
||||
rag_status = "ok"
|
||||
try:
|
||||
db.execute(text("SELECT 1"))
|
||||
except Exception:
|
||||
db_status = "error"
|
||||
try:
|
||||
rag_status = LightRAGAdapter(settings).health()
|
||||
except Exception:
|
||||
rag_status = "error"
|
||||
return HealthStatus(service="ok", database=db_status, rag=rag_status, timestamp=datetime.now().astimezone())
|
||||
|
||||
|
||||
@router.post("/poc/extract/job", response_model=ExtractResponse)
|
||||
def extract_job(payload: ExtractTextRequest) -> ExtractResponse:
|
||||
return ExtractionService().extract_job(payload.text)
|
||||
|
||||
|
||||
@router.post("/poc/extract/worker", response_model=ExtractResponse)
|
||||
def extract_worker(payload: ExtractTextRequest) -> ExtractResponse:
|
||||
return ExtractionService().extract_worker(payload.text)
|
||||
|
||||
|
||||
@router.post("/poc/ingest/job", response_model=JobCard)
|
||||
def ingest_job(payload: IngestJobRequest, db: Session = Depends(get_db)) -> JobCard:
|
||||
return IngestService(db).ingest_job(payload.job)
|
||||
|
||||
|
||||
@router.post("/poc/ingest/worker", response_model=WorkerCard)
|
||||
def ingest_worker(payload: IngestWorkerRequest, db: Session = Depends(get_db)) -> WorkerCard:
|
||||
return IngestService(db).ingest_worker(payload.worker)
|
||||
|
||||
|
||||
@router.post("/poc/ingest/bootstrap")
|
||||
def bootstrap(db: Session = Depends(get_db)):
|
||||
return IngestService(db).bootstrap()
|
||||
|
||||
|
||||
@router.post("/poc/match/workers", response_model=MatchResponse)
|
||||
def match_workers(payload: MatchWorkersRequest, db: Session = Depends(get_db)) -> MatchResponse:
|
||||
service = MatchingService(db)
|
||||
source = payload.job
|
||||
if source is None and payload.job_id:
|
||||
job = JobRepository(db).get(payload.job_id)
|
||||
source = job_to_card(job) if job else None
|
||||
if source is None:
|
||||
raise HTTPException(status_code=404, detail="岗位不存在")
|
||||
return MatchResponse(items=service.match_workers(source, payload.top_n))
|
||||
|
||||
|
||||
@router.post("/poc/match/jobs", response_model=MatchResponse)
|
||||
def match_jobs(payload: MatchJobsRequest, db: Session = Depends(get_db)) -> MatchResponse:
|
||||
service = MatchingService(db)
|
||||
source = payload.worker
|
||||
if source is None and payload.worker_id:
|
||||
worker = WorkerRepository(db).get(payload.worker_id)
|
||||
source = worker_to_card(worker) if worker else None
|
||||
if source is None:
|
||||
raise HTTPException(status_code=404, detail="工人不存在")
|
||||
return MatchResponse(items=service.match_jobs(source, payload.top_n))
|
||||
|
||||
|
||||
@router.get("/poc/match/explain/{match_id}", response_model=ExplainResponse)
|
||||
def explain_match(match_id: str, db: Session = Depends(get_db)) -> ExplainResponse:
|
||||
match = MatchingService(db).explain(match_id)
|
||||
if match is None:
|
||||
raise HTTPException(status_code=404, detail="匹配记录不存在")
|
||||
return ExplainResponse(match=match)
|
||||
|
||||
|
||||
@router.get("/poc/jobs", response_model=ListResponse)
|
||||
def list_jobs(db: Session = Depends(get_db)) -> ListResponse:
|
||||
items = [job_to_card(item).model_dump(mode="json") for item in JobRepository(db).list()]
|
||||
return ListResponse(items=items, total=len(items))
|
||||
|
||||
|
||||
@router.get("/poc/workers", response_model=ListResponse)
|
||||
def list_workers(db: Session = Depends(get_db)) -> ListResponse:
|
||||
items = [worker_to_card(item).model_dump(mode="json") for item in WorkerRepository(db).list()]
|
||||
return ListResponse(items=items, total=len(items))
|
||||
|
||||
|
||||
@router.get("/poc/jobs/{job_id}", response_model=JobCard)
|
||||
def get_job(job_id: str, db: Session = Depends(get_db)) -> JobCard:
|
||||
item = JobRepository(db).get(job_id)
|
||||
if item is None:
|
||||
raise HTTPException(status_code=404, detail="岗位不存在")
|
||||
return job_to_card(item)
|
||||
|
||||
|
||||
@router.get("/poc/workers/{worker_id}", response_model=WorkerCard)
|
||||
def get_worker(worker_id: str, db: Session = Depends(get_db)) -> WorkerCard:
|
||||
item = WorkerRepository(db).get(worker_id)
|
||||
if item is None:
|
||||
raise HTTPException(status_code=404, detail="工人不存在")
|
||||
return worker_to_card(item)
|
||||
51
gig-poc/apps/api/app/core/config.py
Normal file
51
gig-poc/apps/api/app/core/config.py
Normal file
@@ -0,0 +1,51 @@
|
||||
from functools import lru_cache
|
||||
from pathlib import Path
|
||||
|
||||
from pydantic import Field
|
||||
from pydantic_settings import BaseSettings, SettingsConfigDict
|
||||
|
||||
|
||||
BASE_DIR = Path(__file__).resolve().parents[2]
|
||||
ROOT_DIR = BASE_DIR.parents[1]
|
||||
|
||||
|
||||
class Settings(BaseSettings):
|
||||
model_config = SettingsConfigDict(env_file=".env", env_file_encoding="utf-8", extra="ignore")
|
||||
|
||||
app_name: str = "Gig POC API"
|
||||
app_env: str = "development"
|
||||
app_host: str = "0.0.0.0"
|
||||
app_port: int = 8000
|
||||
log_level: str = "INFO"
|
||||
|
||||
database_url: str = "postgresql+psycopg://gig:gig@postgres:5432/gig_poc"
|
||||
qdrant_url: str = "http://qdrant:6333"
|
||||
qdrant_collection: str = "gig_poc_entities"
|
||||
vector_size: int = 64
|
||||
|
||||
llm_enabled: bool = False
|
||||
llm_base_url: str | None = None
|
||||
llm_api_key: str | None = None
|
||||
llm_model: str = "gpt-5.4"
|
||||
embedding_enabled: bool = False
|
||||
embedding_model: str = "text-embedding-3-small"
|
||||
|
||||
bootstrap_jobs: int = 100
|
||||
bootstrap_workers: int = 300
|
||||
default_recall_top_k: int = 30
|
||||
default_match_top_n: int = 10
|
||||
|
||||
prompt_dir: Path = Field(default=ROOT_DIR / "packages" / "prompts")
|
||||
sample_data_dir: Path = Field(default=ROOT_DIR / "packages" / "sample-data")
|
||||
shared_types_dir: Path = Field(default=ROOT_DIR / "packages" / "shared-types")
|
||||
|
||||
score_skill_weight: float = 0.35
|
||||
score_region_weight: float = 0.20
|
||||
score_time_weight: float = 0.15
|
||||
score_experience_weight: float = 0.15
|
||||
score_reliability_weight: float = 0.15
|
||||
|
||||
|
||||
@lru_cache
|
||||
def get_settings() -> Settings:
|
||||
return Settings()
|
||||
26
gig-poc/apps/api/app/core/logging.py
Normal file
26
gig-poc/apps/api/app/core/logging.py
Normal file
@@ -0,0 +1,26 @@
|
||||
import logging
|
||||
from logging.config import dictConfig
|
||||
|
||||
|
||||
def configure_logging(level: str = "INFO") -> None:
|
||||
dictConfig(
|
||||
{
|
||||
"version": 1,
|
||||
"disable_existing_loggers": False,
|
||||
"formatters": {
|
||||
"default": {
|
||||
"format": "%(asctime)s %(levelname)s [%(name)s] %(message)s",
|
||||
}
|
||||
},
|
||||
"handlers": {
|
||||
"console": {
|
||||
"class": "logging.StreamHandler",
|
||||
"formatter": "default",
|
||||
}
|
||||
},
|
||||
"root": {"handlers": ["console"], "level": level},
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
logger = logging.getLogger("gig-poc")
|
||||
5
gig-poc/apps/api/app/db/base.py
Normal file
5
gig-poc/apps/api/app/db/base.py
Normal file
@@ -0,0 +1,5 @@
|
||||
from sqlalchemy.orm import DeclarativeBase
|
||||
|
||||
|
||||
class Base(DeclarativeBase):
|
||||
pass
|
||||
19
gig-poc/apps/api/app/db/session.py
Normal file
19
gig-poc/apps/api/app/db/session.py
Normal file
@@ -0,0 +1,19 @@
|
||||
from collections.abc import Generator
|
||||
|
||||
from sqlalchemy import create_engine
|
||||
from sqlalchemy.orm import Session, sessionmaker
|
||||
|
||||
from app.core.config import get_settings
|
||||
|
||||
|
||||
settings = get_settings()
|
||||
engine = create_engine(settings.database_url, future=True, pool_pre_ping=True)
|
||||
SessionLocal = sessionmaker(bind=engine, autoflush=False, autocommit=False, future=True)
|
||||
|
||||
|
||||
def get_db() -> Generator[Session, None, None]:
|
||||
db = SessionLocal()
|
||||
try:
|
||||
yield db
|
||||
finally:
|
||||
db.close()
|
||||
86
gig-poc/apps/api/app/domain/models.py
Normal file
86
gig-poc/apps/api/app/domain/models.py
Normal file
@@ -0,0 +1,86 @@
|
||||
from datetime import datetime
|
||||
from uuid import uuid4
|
||||
|
||||
from sqlalchemy import DateTime, Float, ForeignKey, Integer, String, Text, func
|
||||
from sqlalchemy.dialects.postgresql import JSONB
|
||||
from sqlalchemy.orm import Mapped, mapped_column, relationship
|
||||
|
||||
from app.db.base import Base
|
||||
|
||||
|
||||
class Job(Base):
|
||||
__tablename__ = "jobs"
|
||||
|
||||
id: Mapped[str] = mapped_column(String(64), primary_key=True, default=lambda: f"job_{uuid4().hex[:12]}")
|
||||
title: Mapped[str] = mapped_column(String(255))
|
||||
category: Mapped[str] = mapped_column(String(128))
|
||||
description: Mapped[str] = mapped_column(Text)
|
||||
city: Mapped[str] = mapped_column(String(64))
|
||||
region: Mapped[str] = mapped_column(String(64))
|
||||
location_detail: Mapped[str] = mapped_column(String(255))
|
||||
start_time: Mapped[datetime] = mapped_column(DateTime(timezone=True))
|
||||
duration_hours: Mapped[float] = mapped_column(Float)
|
||||
headcount: Mapped[int] = mapped_column(Integer)
|
||||
salary_type: Mapped[str] = mapped_column(String(32))
|
||||
salary_amount: Mapped[float] = mapped_column(Float)
|
||||
salary_currency: Mapped[str] = mapped_column(String(16), default="CNY")
|
||||
work_mode: Mapped[str] = mapped_column(String(64))
|
||||
tags_json: Mapped[list[str]] = mapped_column(JSONB, default=list)
|
||||
confidence: Mapped[float] = mapped_column(Float)
|
||||
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), server_default=func.now())
|
||||
|
||||
skills: Mapped[list["JobSkill"]] = relationship(back_populates="job", cascade="all, delete-orphan")
|
||||
|
||||
|
||||
class JobSkill(Base):
|
||||
__tablename__ = "job_skills"
|
||||
|
||||
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
|
||||
job_id: Mapped[str] = mapped_column(ForeignKey("jobs.id", ondelete="CASCADE"), index=True)
|
||||
skill_name: Mapped[str] = mapped_column(String(128), index=True)
|
||||
weight: Mapped[float] = mapped_column(Float, default=1.0)
|
||||
is_required: Mapped[bool] = mapped_column(default=True)
|
||||
|
||||
job: Mapped[Job] = relationship(back_populates="skills")
|
||||
|
||||
|
||||
class Worker(Base):
|
||||
__tablename__ = "workers"
|
||||
|
||||
id: Mapped[str] = mapped_column(String(64), primary_key=True, default=lambda: f"worker_{uuid4().hex[:12]}")
|
||||
name: Mapped[str] = mapped_column(String(128))
|
||||
description: Mapped[str] = mapped_column(Text)
|
||||
cities_json: Mapped[list[str]] = mapped_column(JSONB, default=list)
|
||||
regions_json: Mapped[list[str]] = mapped_column(JSONB, default=list)
|
||||
availability_json: Mapped[list[str]] = mapped_column(JSONB, default=list)
|
||||
experience_tags_json: Mapped[list[str]] = mapped_column(JSONB, default=list)
|
||||
reliability_score: Mapped[float] = mapped_column(Float, default=0.7)
|
||||
profile_completion: Mapped[float] = mapped_column(Float, default=0.6)
|
||||
confidence: Mapped[float] = mapped_column(Float, default=0.8)
|
||||
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), server_default=func.now())
|
||||
|
||||
skills: Mapped[list["WorkerSkill"]] = relationship(back_populates="worker", cascade="all, delete-orphan")
|
||||
|
||||
|
||||
class WorkerSkill(Base):
|
||||
__tablename__ = "worker_skills"
|
||||
|
||||
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
|
||||
worker_id: Mapped[str] = mapped_column(ForeignKey("workers.id", ondelete="CASCADE"), index=True)
|
||||
skill_name: Mapped[str] = mapped_column(String(128), index=True)
|
||||
score: Mapped[float] = mapped_column(Float, default=0.7)
|
||||
|
||||
worker: Mapped[Worker] = relationship(back_populates="skills")
|
||||
|
||||
|
||||
class MatchRecord(Base):
|
||||
__tablename__ = "matches"
|
||||
|
||||
id: Mapped[str] = mapped_column(String(64), primary_key=True, default=lambda: f"match_{uuid4().hex[:12]}")
|
||||
source_type: Mapped[str] = mapped_column(String(32), index=True)
|
||||
source_id: Mapped[str] = mapped_column(String(64), index=True)
|
||||
target_id: Mapped[str] = mapped_column(String(64), index=True)
|
||||
match_score: Mapped[float] = mapped_column(Float)
|
||||
breakdown_json: Mapped[dict] = mapped_column(JSONB)
|
||||
reasons_json: Mapped[list[str]] = mapped_column(JSONB)
|
||||
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), server_default=func.now())
|
||||
171
gig-poc/apps/api/app/domain/schemas.py
Normal file
171
gig-poc/apps/api/app/domain/schemas.py
Normal file
@@ -0,0 +1,171 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime
|
||||
from enum import Enum
|
||||
|
||||
from pydantic import BaseModel, Field, field_validator, model_validator
|
||||
|
||||
|
||||
class SalaryType(str, Enum):
|
||||
daily = "daily"
|
||||
hourly = "hourly"
|
||||
monthly = "monthly"
|
||||
task = "task"
|
||||
|
||||
|
||||
class SourceType(str, Enum):
|
||||
job_to_worker = "job_to_worker"
|
||||
worker_to_job = "worker_to_job"
|
||||
|
||||
|
||||
class Salary(BaseModel):
|
||||
type: SalaryType = SalaryType.daily
|
||||
amount: float = 0
|
||||
currency: str = "CNY"
|
||||
|
||||
|
||||
class SkillScore(BaseModel):
|
||||
name: str
|
||||
score: float = Field(ge=0, le=1)
|
||||
|
||||
|
||||
class JobCard(BaseModel):
|
||||
job_id: str
|
||||
title: str
|
||||
category: str
|
||||
description: str
|
||||
skills: list[str] = Field(default_factory=list)
|
||||
city: str
|
||||
region: str
|
||||
location_detail: str
|
||||
start_time: datetime
|
||||
duration_hours: float = Field(gt=0)
|
||||
headcount: int = Field(gt=0)
|
||||
salary: Salary
|
||||
work_mode: str
|
||||
tags: list[str] = Field(default_factory=list)
|
||||
confidence: float = Field(ge=0, le=1)
|
||||
|
||||
|
||||
class WorkerCard(BaseModel):
|
||||
worker_id: str
|
||||
name: str
|
||||
description: str
|
||||
skills: list[SkillScore] = Field(default_factory=list)
|
||||
cities: list[str] = Field(default_factory=list)
|
||||
regions: list[str] = Field(default_factory=list)
|
||||
availability: list[str] = Field(default_factory=list)
|
||||
experience_tags: list[str] = Field(default_factory=list)
|
||||
reliability_score: float = Field(ge=0, le=1)
|
||||
profile_completion: float = Field(ge=0, le=1)
|
||||
confidence: float = Field(ge=0, le=1)
|
||||
|
||||
|
||||
class MatchBreakdown(BaseModel):
|
||||
skill_score: float = Field(ge=0, le=1)
|
||||
region_score: float = Field(ge=0, le=1)
|
||||
time_score: float = Field(ge=0, le=1)
|
||||
experience_score: float = Field(ge=0, le=1)
|
||||
reliability_score: float = Field(ge=0, le=1)
|
||||
|
||||
|
||||
class MatchResult(BaseModel):
|
||||
match_id: str
|
||||
source_type: SourceType
|
||||
source_id: str
|
||||
target_id: str
|
||||
match_score: float = Field(ge=0, le=1)
|
||||
breakdown: MatchBreakdown
|
||||
reasons: list[str] = Field(default_factory=list, min_length=3)
|
||||
|
||||
|
||||
class ExtractTextRequest(BaseModel):
|
||||
text: str = Field(min_length=5)
|
||||
|
||||
|
||||
class IngestJobRequest(BaseModel):
|
||||
job: JobCard
|
||||
|
||||
|
||||
class IngestWorkerRequest(BaseModel):
|
||||
worker: WorkerCard
|
||||
|
||||
|
||||
class MatchWorkersRequest(BaseModel):
|
||||
job_id: str | None = None
|
||||
job: JobCard | None = None
|
||||
top_n: int = Field(default=10, ge=1, le=50)
|
||||
|
||||
@model_validator(mode="after")
|
||||
def validate_source(self) -> "MatchWorkersRequest":
|
||||
if not self.job_id and not self.job:
|
||||
raise ValueError("job_id 或 job 至少需要提供一个")
|
||||
return self
|
||||
|
||||
|
||||
class MatchJobsRequest(BaseModel):
|
||||
worker_id: str | None = None
|
||||
worker: WorkerCard | None = None
|
||||
top_n: int = Field(default=10, ge=1, le=50)
|
||||
|
||||
@model_validator(mode="after")
|
||||
def validate_source(self) -> "MatchJobsRequest":
|
||||
if not self.worker_id and not self.worker:
|
||||
raise ValueError("worker_id 或 worker 至少需要提供一个")
|
||||
return self
|
||||
|
||||
|
||||
class ExtractResponse(BaseModel):
|
||||
success: bool
|
||||
data: JobCard | WorkerCard | None = None
|
||||
errors: list[str] = Field(default_factory=list)
|
||||
missing_fields: list[str] = Field(default_factory=list)
|
||||
|
||||
|
||||
class BootstrapResponse(BaseModel):
|
||||
jobs: int
|
||||
workers: int
|
||||
skills: int
|
||||
categories: int
|
||||
regions: int
|
||||
|
||||
|
||||
class HealthStatus(BaseModel):
|
||||
service: str
|
||||
database: str
|
||||
rag: str
|
||||
timestamp: datetime
|
||||
|
||||
|
||||
class ListResponse(BaseModel):
|
||||
items: list[dict]
|
||||
total: int
|
||||
|
||||
|
||||
class MatchResponse(BaseModel):
|
||||
items: list[MatchResult]
|
||||
|
||||
|
||||
class ExplainResponse(BaseModel):
|
||||
match: MatchResult
|
||||
|
||||
|
||||
class PromptOutput(BaseModel):
|
||||
content: dict
|
||||
raw_text: str
|
||||
|
||||
|
||||
class QueryFilters(BaseModel):
|
||||
entity_type: str
|
||||
city: str | None = None
|
||||
region: str | None = None
|
||||
categories: list[str] = Field(default_factory=list)
|
||||
tags: list[str] = Field(default_factory=list)
|
||||
skills: list[str] = Field(default_factory=list)
|
||||
|
||||
@field_validator("entity_type")
|
||||
@classmethod
|
||||
def validate_entity_type(cls, value: str) -> str:
|
||||
if value not in {"job", "worker"}:
|
||||
raise ValueError("entity_type must be job or worker")
|
||||
return value
|
||||
36
gig-poc/apps/api/app/main.py
Normal file
36
gig-poc/apps/api/app/main.py
Normal file
@@ -0,0 +1,36 @@
|
||||
from contextlib import asynccontextmanager
|
||||
|
||||
from fastapi import FastAPI
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
|
||||
from app.api.routes import router
|
||||
from app.core.config import get_settings
|
||||
from app.core.logging import configure_logging, logger
|
||||
from app.db.base import Base
|
||||
from app.db.session import engine
|
||||
from app.services.rag.lightrag_adapter import LightRAGAdapter
|
||||
|
||||
|
||||
settings = get_settings()
|
||||
configure_logging(settings.log_level)
|
||||
|
||||
|
||||
@asynccontextmanager
|
||||
async def lifespan(_: FastAPI):
|
||||
Base.metadata.create_all(bind=engine)
|
||||
try:
|
||||
LightRAGAdapter(settings).ensure_ready()
|
||||
except Exception:
|
||||
logger.exception("Qdrant initialization skipped during startup")
|
||||
yield
|
||||
|
||||
|
||||
app = FastAPI(title=settings.app_name, lifespan=lifespan)
|
||||
app.add_middleware(
|
||||
CORSMiddleware,
|
||||
allow_origins=["*"],
|
||||
allow_credentials=True,
|
||||
allow_methods=["*"],
|
||||
allow_headers=["*"],
|
||||
)
|
||||
app.include_router(router)
|
||||
58
gig-poc/apps/api/app/repositories/job_repository.py
Normal file
58
gig-poc/apps/api/app/repositories/job_repository.py
Normal file
@@ -0,0 +1,58 @@
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.orm import Session, selectinload
|
||||
|
||||
from app.domain.models import Job, JobSkill
|
||||
from app.domain.schemas import JobCard
|
||||
|
||||
|
||||
class JobRepository:
|
||||
def __init__(self, db: Session):
|
||||
self.db = db
|
||||
|
||||
def upsert(self, job_card: JobCard) -> Job:
|
||||
instance = self.db.get(Job, job_card.job_id)
|
||||
if instance is None:
|
||||
instance = Job(id=job_card.job_id)
|
||||
self.db.add(instance)
|
||||
|
||||
instance.title = job_card.title
|
||||
instance.category = job_card.category
|
||||
instance.description = job_card.description
|
||||
instance.city = job_card.city
|
||||
instance.region = job_card.region
|
||||
instance.location_detail = job_card.location_detail
|
||||
instance.start_time = job_card.start_time
|
||||
instance.duration_hours = job_card.duration_hours
|
||||
instance.headcount = job_card.headcount
|
||||
instance.salary_type = job_card.salary.type.value
|
||||
instance.salary_amount = job_card.salary.amount
|
||||
instance.salary_currency = job_card.salary.currency
|
||||
instance.work_mode = job_card.work_mode
|
||||
instance.tags_json = job_card.tags
|
||||
instance.confidence = job_card.confidence
|
||||
instance.skills.clear()
|
||||
instance.skills.extend(
|
||||
[
|
||||
JobSkill(skill_name=skill_name, weight=1.0, is_required=index < 2)
|
||||
for index, skill_name in enumerate(job_card.skills)
|
||||
]
|
||||
)
|
||||
self.db.commit()
|
||||
self.db.refresh(instance)
|
||||
return instance
|
||||
|
||||
def list(self, limit: int = 100) -> list[Job]:
|
||||
stmt = select(Job).options(selectinload(Job.skills)).order_by(Job.created_at.desc()).limit(limit)
|
||||
return list(self.db.scalars(stmt))
|
||||
|
||||
def get(self, job_id: str) -> Job | None:
|
||||
stmt = select(Job).options(selectinload(Job.skills)).where(Job.id == job_id)
|
||||
return self.db.scalar(stmt)
|
||||
|
||||
def get_many(self, ids: list[str]) -> list[Job]:
|
||||
if not ids:
|
||||
return []
|
||||
stmt = select(Job).options(selectinload(Job.skills)).where(Job.id.in_(ids))
|
||||
result = list(self.db.scalars(stmt))
|
||||
order = {item_id: index for index, item_id in enumerate(ids)}
|
||||
return sorted(result, key=lambda item: order[item.id])
|
||||
49
gig-poc/apps/api/app/repositories/match_repository.py
Normal file
49
gig-poc/apps/api/app/repositories/match_repository.py
Normal file
@@ -0,0 +1,49 @@
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.domain.models import MatchRecord
|
||||
from app.domain.schemas import MatchResult
|
||||
|
||||
|
||||
class MatchRepository:
|
||||
def __init__(self, db: Session):
|
||||
self.db = db
|
||||
|
||||
def create(self, match: MatchResult) -> MatchRecord:
|
||||
instance = MatchRecord(
|
||||
id=match.match_id,
|
||||
source_type=match.source_type.value,
|
||||
source_id=match.source_id,
|
||||
target_id=match.target_id,
|
||||
match_score=match.match_score,
|
||||
breakdown_json=match.breakdown.model_dump(),
|
||||
reasons_json=match.reasons,
|
||||
)
|
||||
self.db.add(instance)
|
||||
self.db.commit()
|
||||
self.db.refresh(instance)
|
||||
return instance
|
||||
|
||||
def bulk_replace(self, matches: list[MatchResult], source_type: str, source_id: str) -> None:
|
||||
stmt = select(MatchRecord).where(
|
||||
MatchRecord.source_type == source_type,
|
||||
MatchRecord.source_id == source_id,
|
||||
)
|
||||
for item in self.db.scalars(stmt):
|
||||
self.db.delete(item)
|
||||
for match in matches:
|
||||
self.db.add(
|
||||
MatchRecord(
|
||||
id=match.match_id,
|
||||
source_type=match.source_type.value,
|
||||
source_id=match.source_id,
|
||||
target_id=match.target_id,
|
||||
match_score=match.match_score,
|
||||
breakdown_json=match.breakdown.model_dump(),
|
||||
reasons_json=match.reasons,
|
||||
)
|
||||
)
|
||||
self.db.commit()
|
||||
|
||||
def get(self, match_id: str) -> MatchRecord | None:
|
||||
return self.db.get(MatchRecord, match_id)
|
||||
49
gig-poc/apps/api/app/repositories/worker_repository.py
Normal file
49
gig-poc/apps/api/app/repositories/worker_repository.py
Normal file
@@ -0,0 +1,49 @@
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.orm import Session, selectinload
|
||||
|
||||
from app.domain.models import Worker, WorkerSkill
|
||||
from app.domain.schemas import WorkerCard
|
||||
|
||||
|
||||
class WorkerRepository:
|
||||
def __init__(self, db: Session):
|
||||
self.db = db
|
||||
|
||||
def upsert(self, worker_card: WorkerCard) -> Worker:
|
||||
instance = self.db.get(Worker, worker_card.worker_id)
|
||||
if instance is None:
|
||||
instance = Worker(id=worker_card.worker_id)
|
||||
self.db.add(instance)
|
||||
|
||||
instance.name = worker_card.name
|
||||
instance.description = worker_card.description
|
||||
instance.cities_json = worker_card.cities
|
||||
instance.regions_json = worker_card.regions
|
||||
instance.availability_json = worker_card.availability
|
||||
instance.experience_tags_json = worker_card.experience_tags
|
||||
instance.reliability_score = worker_card.reliability_score
|
||||
instance.profile_completion = worker_card.profile_completion
|
||||
instance.confidence = worker_card.confidence
|
||||
instance.skills.clear()
|
||||
instance.skills.extend(
|
||||
[WorkerSkill(skill_name=skill.name, score=skill.score) for skill in worker_card.skills]
|
||||
)
|
||||
self.db.commit()
|
||||
self.db.refresh(instance)
|
||||
return instance
|
||||
|
||||
def list(self, limit: int = 200) -> list[Worker]:
|
||||
stmt = select(Worker).options(selectinload(Worker.skills)).order_by(Worker.created_at.desc()).limit(limit)
|
||||
return list(self.db.scalars(stmt))
|
||||
|
||||
def get(self, worker_id: str) -> Worker | None:
|
||||
stmt = select(Worker).options(selectinload(Worker.skills)).where(Worker.id == worker_id)
|
||||
return self.db.scalar(stmt)
|
||||
|
||||
def get_many(self, ids: list[str]) -> list[Worker]:
|
||||
if not ids:
|
||||
return []
|
||||
stmt = select(Worker).options(selectinload(Worker.skills)).where(Worker.id.in_(ids))
|
||||
result = list(self.db.scalars(stmt))
|
||||
order = {item_id: index for index, item_id in enumerate(ids)}
|
||||
return sorted(result, key=lambda item: order[item.id])
|
||||
50
gig-poc/apps/api/app/services/card_mapper.py
Normal file
50
gig-poc/apps/api/app/services/card_mapper.py
Normal file
@@ -0,0 +1,50 @@
|
||||
from app.domain.models import Job, MatchRecord, Worker
|
||||
from app.domain.schemas import JobCard, MatchBreakdown, MatchResult, Salary, SkillScore, SourceType, WorkerCard
|
||||
|
||||
|
||||
def job_to_card(job: Job) -> JobCard:
|
||||
return JobCard(
|
||||
job_id=job.id,
|
||||
title=job.title,
|
||||
category=job.category,
|
||||
description=job.description,
|
||||
skills=[item.skill_name for item in job.skills],
|
||||
city=job.city,
|
||||
region=job.region,
|
||||
location_detail=job.location_detail,
|
||||
start_time=job.start_time,
|
||||
duration_hours=job.duration_hours,
|
||||
headcount=job.headcount,
|
||||
salary=Salary(type=job.salary_type, amount=job.salary_amount, currency=job.salary_currency),
|
||||
work_mode=job.work_mode,
|
||||
tags=job.tags_json,
|
||||
confidence=job.confidence,
|
||||
)
|
||||
|
||||
|
||||
def worker_to_card(worker: Worker) -> WorkerCard:
|
||||
return WorkerCard(
|
||||
worker_id=worker.id,
|
||||
name=worker.name,
|
||||
description=worker.description,
|
||||
skills=[SkillScore(name=item.skill_name, score=item.score) for item in worker.skills],
|
||||
cities=worker.cities_json,
|
||||
regions=worker.regions_json,
|
||||
availability=worker.availability_json,
|
||||
experience_tags=worker.experience_tags_json,
|
||||
reliability_score=worker.reliability_score,
|
||||
profile_completion=worker.profile_completion,
|
||||
confidence=worker.confidence,
|
||||
)
|
||||
|
||||
|
||||
def match_record_to_schema(match: MatchRecord) -> MatchResult:
|
||||
return MatchResult(
|
||||
match_id=match.id,
|
||||
source_type=SourceType(match.source_type),
|
||||
source_id=match.source_id,
|
||||
target_id=match.target_id,
|
||||
match_score=match.match_score,
|
||||
breakdown=MatchBreakdown(**match.breakdown_json),
|
||||
reasons=match.reasons_json,
|
||||
)
|
||||
209
gig-poc/apps/api/app/services/extraction_service.py
Normal file
209
gig-poc/apps/api/app/services/extraction_service.py
Normal file
@@ -0,0 +1,209 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import re
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from pathlib import Path
|
||||
|
||||
from dateutil import parser as date_parser
|
||||
from pydantic import ValidationError
|
||||
|
||||
from app.core.config import get_settings
|
||||
from app.core.logging import logger
|
||||
from app.domain.schemas import ExtractResponse, JobCard, Salary, SkillScore, WorkerCard
|
||||
from app.services.llm_client import LLMClient
|
||||
from app.utils.ids import generate_id
|
||||
from app.utils.prompts import load_prompt
|
||||
|
||||
|
||||
class ExtractionService:
|
||||
def __init__(self) -> None:
|
||||
self.settings = get_settings()
|
||||
self.skills = json.loads((self.settings.sample_data_dir / "skills.json").read_text(encoding="utf-8"))
|
||||
self.categories = json.loads((self.settings.sample_data_dir / "categories.json").read_text(encoding="utf-8"))
|
||||
self.regions = json.loads((self.settings.sample_data_dir / "regions.json").read_text(encoding="utf-8"))
|
||||
self.llm_client = LLMClient(self.settings)
|
||||
|
||||
def extract_job(self, text: str) -> ExtractResponse:
|
||||
logger.info("extract_job request text=%s", text)
|
||||
llm_result = self._llm_extract(text, self.settings.prompt_dir / "job_extract.md")
|
||||
if llm_result:
|
||||
try:
|
||||
return ExtractResponse(success=True, data=JobCard(**llm_result.content))
|
||||
except ValidationError as exc:
|
||||
logger.exception("LLM job extraction validation failed")
|
||||
return ExtractResponse(success=False, errors=[str(exc)], missing_fields=self._missing_fields(exc))
|
||||
|
||||
try:
|
||||
card = self._extract_job_rule(text)
|
||||
return ExtractResponse(success=True, data=card)
|
||||
except ValidationError as exc:
|
||||
logger.exception("Rule job extraction validation failed")
|
||||
return ExtractResponse(success=False, errors=[str(exc)], missing_fields=self._missing_fields(exc))
|
||||
|
||||
def extract_worker(self, text: str) -> ExtractResponse:
|
||||
logger.info("extract_worker request text=%s", text)
|
||||
llm_result = self._llm_extract(text, self.settings.prompt_dir / "worker_extract.md")
|
||||
if llm_result:
|
||||
try:
|
||||
return ExtractResponse(success=True, data=WorkerCard(**llm_result.content))
|
||||
except ValidationError as exc:
|
||||
logger.exception("LLM worker extraction validation failed")
|
||||
return ExtractResponse(success=False, errors=[str(exc)], missing_fields=self._missing_fields(exc))
|
||||
|
||||
try:
|
||||
card = self._extract_worker_rule(text)
|
||||
return ExtractResponse(success=True, data=card)
|
||||
except ValidationError as exc:
|
||||
logger.exception("Rule worker extraction validation failed")
|
||||
return ExtractResponse(success=False, errors=[str(exc)], missing_fields=self._missing_fields(exc))
|
||||
|
||||
def _llm_extract(self, text: str, prompt_path: Path):
|
||||
try:
|
||||
return self.llm_client.extract_json(load_prompt(prompt_path), text)
|
||||
except Exception:
|
||||
logger.exception("LLM extraction failed, fallback to rule-based extraction")
|
||||
return None
|
||||
|
||||
def _extract_job_rule(self, text: str) -> JobCard:
|
||||
skill_hits = [item for item in self.skills if item in text]
|
||||
category = next((item for item in self.categories if item in text), "活动执行")
|
||||
region = self._extract_region(text)
|
||||
salary = self._extract_salary(text)
|
||||
headcount = self._extract_number(text, [r"(\d+)\s*[个名人位]"], default=1)
|
||||
duration = self._extract_number(text, [r"(\d+(?:\.\d+)?)\s*小时"], default=4.0, cast=float)
|
||||
tags = [tag for tag in ["女生优先", "男生优先", "有经验优先", "沟通好", "可连做优先"] if tag in text]
|
||||
title = next((f"{category}{skill_hits[0]}兼职" for _ in [0] if skill_hits), f"{category}兼职")
|
||||
card = JobCard(
|
||||
job_id=generate_id("job"),
|
||||
title=title,
|
||||
category=category,
|
||||
description=text,
|
||||
skills=skill_hits[:5] or self._guess_category_skills(category),
|
||||
city=region["city"],
|
||||
region=region["region"],
|
||||
location_detail=self._extract_location(text, region),
|
||||
start_time=self._extract_job_time(text),
|
||||
duration_hours=duration,
|
||||
headcount=int(headcount),
|
||||
salary=salary,
|
||||
work_mode="排班制" if "排班" in text else "兼职",
|
||||
tags=tags or ["有经验优先"],
|
||||
confidence=self._compute_confidence(skill_hits, region, salary.amount > 0),
|
||||
)
|
||||
return card
|
||||
|
||||
def _extract_worker_rule(self, text: str) -> WorkerCard:
|
||||
skill_hits = [item for item in self.skills if item in text][:6]
|
||||
region_hits = [item for item in self.regions if item["region"] in text or item["city"] in text]
|
||||
city_names = list(dict.fromkeys([item["city"] for item in region_hits])) or ["深圳"]
|
||||
region_names = list(dict.fromkeys([item["region"] for item in region_hits])) or ["南山"]
|
||||
availability = self._extract_availability(text)
|
||||
experience = [item for item in ["商场", "会展", "活动执行", "物流", "零售", "客服中心", "快消", "校园推广"] if item in text]
|
||||
card = WorkerCard(
|
||||
worker_id=generate_id("worker"),
|
||||
name=self._extract_name(text),
|
||||
description=text,
|
||||
skills=[SkillScore(name=item, score=round(0.72 + index * 0.04, 2)) for index, item in enumerate(skill_hits or ["活动执行", "引导", "登记"])],
|
||||
cities=city_names,
|
||||
regions=region_names,
|
||||
availability=availability,
|
||||
experience_tags=experience or ["活动执行"],
|
||||
reliability_score=0.76,
|
||||
profile_completion=0.68,
|
||||
confidence=self._compute_confidence(skill_hits, {"city": city_names[0], "region": region_names[0]}, True),
|
||||
)
|
||||
return card
|
||||
|
||||
def _extract_region(self, text: str) -> dict:
|
||||
for item in self.regions:
|
||||
if item["city"] in text and item["region"] in text:
|
||||
return item
|
||||
for item in self.regions:
|
||||
if item["region"] in text:
|
||||
return item
|
||||
return {"city": "深圳", "region": "南山"}
|
||||
|
||||
def _extract_location(self, text: str, region: dict) -> str:
|
||||
markers = ["会展中心", "商场", "地铁站", "园区", "写字楼", "仓库", "门店"]
|
||||
for marker in markers:
|
||||
if marker in text:
|
||||
return f"{region['city']}{region['region']}{marker}"
|
||||
return f"{region['city']}{region['region']}待定点位"
|
||||
|
||||
def _extract_salary(self, text: str) -> Salary:
|
||||
amount = self._extract_number(text, [r"(\d+(?:\.\d+)?)\s*(?:元|块)"], default=150.0, cast=float)
|
||||
salary_type = "hourly" if "小时" in text and "/小时" in text else "daily"
|
||||
return Salary(type=salary_type, amount=amount, currency="CNY")
|
||||
|
||||
def _extract_number(self, text: str, patterns: list[str], default, cast=int):
|
||||
for pattern in patterns:
|
||||
match = re.search(pattern, text)
|
||||
if match:
|
||||
return cast(match.group(1))
|
||||
return default
|
||||
|
||||
def _extract_job_time(self, text: str) -> datetime:
|
||||
shanghai_tz = timezone(timedelta(hours=8))
|
||||
now = datetime.now(shanghai_tz)
|
||||
if "明天" in text:
|
||||
base = now + timedelta(days=1)
|
||||
elif "后天" in text:
|
||||
base = now + timedelta(days=2)
|
||||
else:
|
||||
month_day = re.search(r"(\d{1,2})月(\d{1,2})日", text)
|
||||
if month_day:
|
||||
month, day = int(month_day.group(1)), int(month_day.group(2))
|
||||
base = now.replace(month=month, day=day)
|
||||
else:
|
||||
base = now + timedelta(days=1)
|
||||
hour = 9
|
||||
if "下午" in text:
|
||||
hour = 13
|
||||
elif "晚上" in text:
|
||||
hour = 19
|
||||
explicit_hour = re.search(r"(\d{1,2})[:点时](\d{0,2})?", text)
|
||||
if explicit_hour:
|
||||
hour = int(explicit_hour.group(1))
|
||||
return base.replace(hour=hour, minute=0, second=0, microsecond=0)
|
||||
|
||||
def _extract_availability(self, text: str) -> list[str]:
|
||||
tags = []
|
||||
if "周末" in text:
|
||||
tags.append("weekend")
|
||||
if "上午" in text:
|
||||
tags.append("weekday_am")
|
||||
if "下午" in text:
|
||||
tags.append("weekday_pm")
|
||||
if "随时" in text or "都能" in text or "全天" in text:
|
||||
tags.append("anytime")
|
||||
return tags or ["anytime"]
|
||||
|
||||
def _extract_name(self, text: str) -> str:
|
||||
if match := re.search(r"我叫([\u4e00-\u9fa5]{2,4})", text):
|
||||
return match.group(1)
|
||||
if match := re.search(r"我是([\u4e00-\u9fa5]{2,4})", text):
|
||||
return match.group(1)
|
||||
return "匿名候选人"
|
||||
|
||||
def _guess_category_skills(self, category: str) -> list[str]:
|
||||
mapping = {
|
||||
"活动执行": ["签到", "引导", "登记"],
|
||||
"促销": ["促销", "导购", "陈列"],
|
||||
"配送": ["配送", "装卸", "司机协助"],
|
||||
"客服": ["客服", "电话邀约", "线上客服"],
|
||||
}
|
||||
return mapping.get(category, ["活动执行", "沟通"])
|
||||
|
||||
def _compute_confidence(self, skill_hits: list[str], region: dict, has_salary: bool) -> float:
|
||||
score = 0.55
|
||||
if skill_hits:
|
||||
score += 0.15
|
||||
if region.get("city"):
|
||||
score += 0.15
|
||||
if has_salary:
|
||||
score += 0.1
|
||||
return min(round(score, 2), 0.95)
|
||||
|
||||
def _missing_fields(self, exc: ValidationError) -> list[str]:
|
||||
return [".".join(str(part) for part in item["loc"]) for item in exc.errors()]
|
||||
52
gig-poc/apps/api/app/services/ingest_service.py
Normal file
52
gig-poc/apps/api/app/services/ingest_service.py
Normal file
@@ -0,0 +1,52 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.core.config import get_settings
|
||||
from app.core.logging import logger
|
||||
from app.domain.schemas import BootstrapResponse, JobCard, WorkerCard
|
||||
from app.repositories.job_repository import JobRepository
|
||||
from app.repositories.worker_repository import WorkerRepository
|
||||
from app.services.rag.lightrag_adapter import LightRAGAdapter
|
||||
|
||||
|
||||
class IngestService:
|
||||
def __init__(self, db: Session):
|
||||
self.db = db
|
||||
self.settings = get_settings()
|
||||
self.job_repository = JobRepository(db)
|
||||
self.worker_repository = WorkerRepository(db)
|
||||
self.rag = LightRAGAdapter(self.settings)
|
||||
|
||||
def ingest_job(self, card: JobCard) -> JobCard:
|
||||
logger.info("ingest_job job_id=%s", card.job_id)
|
||||
self.job_repository.upsert(card)
|
||||
self.rag.upsert_job(card)
|
||||
return card
|
||||
|
||||
def ingest_worker(self, card: WorkerCard) -> WorkerCard:
|
||||
logger.info("ingest_worker worker_id=%s", card.worker_id)
|
||||
self.worker_repository.upsert(card)
|
||||
self.rag.upsert_worker(card)
|
||||
return card
|
||||
|
||||
def bootstrap(self) -> BootstrapResponse:
|
||||
skills = json.loads((self.settings.sample_data_dir / "skills.json").read_text(encoding="utf-8"))
|
||||
categories = json.loads((self.settings.sample_data_dir / "categories.json").read_text(encoding="utf-8"))
|
||||
regions = json.loads((self.settings.sample_data_dir / "regions.json").read_text(encoding="utf-8"))
|
||||
jobs = json.loads((self.settings.sample_data_dir / "jobs.json").read_text(encoding="utf-8"))
|
||||
workers = json.loads((self.settings.sample_data_dir / "workers.json").read_text(encoding="utf-8"))
|
||||
self.rag.ensure_ready()
|
||||
for item in jobs:
|
||||
self.ingest_job(JobCard(**item))
|
||||
for item in workers:
|
||||
self.ingest_worker(WorkerCard(**item))
|
||||
return BootstrapResponse(
|
||||
jobs=len(jobs),
|
||||
workers=len(workers),
|
||||
skills=len(skills),
|
||||
categories=len(categories),
|
||||
regions=len(regions),
|
||||
)
|
||||
34
gig-poc/apps/api/app/services/llm_client.py
Normal file
34
gig-poc/apps/api/app/services/llm_client.py
Normal file
@@ -0,0 +1,34 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
|
||||
import httpx
|
||||
|
||||
from app.core.config import Settings
|
||||
from app.domain.schemas import PromptOutput
|
||||
|
||||
|
||||
class LLMClient:
|
||||
def __init__(self, settings: Settings):
|
||||
self.settings = settings
|
||||
|
||||
def extract_json(self, system_prompt: str, user_text: str) -> PromptOutput | None:
|
||||
if not self.settings.llm_enabled or not self.settings.llm_base_url or not self.settings.llm_api_key:
|
||||
return None
|
||||
|
||||
payload = {
|
||||
"model": self.settings.llm_model,
|
||||
"messages": [
|
||||
{"role": "system", "content": system_prompt},
|
||||
{"role": "user", "content": user_text},
|
||||
],
|
||||
"temperature": 0.1,
|
||||
"response_format": {"type": "json_object"},
|
||||
}
|
||||
headers = {"Authorization": f"Bearer {self.settings.llm_api_key}"}
|
||||
with httpx.Client(timeout=30.0) as client:
|
||||
response = client.post(f"{self.settings.llm_base_url.rstrip('/')}/chat/completions", json=payload, headers=headers)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
raw_text = data["choices"][0]["message"]["content"]
|
||||
return PromptOutput(content=json.loads(raw_text), raw_text=raw_text)
|
||||
178
gig-poc/apps/api/app/services/matching_service.py
Normal file
178
gig-poc/apps/api/app/services/matching_service.py
Normal file
@@ -0,0 +1,178 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime
|
||||
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.core.config import get_settings
|
||||
from app.core.logging import logger
|
||||
from app.domain.schemas import JobCard, MatchBreakdown, MatchResult, QueryFilters, SourceType, WorkerCard
|
||||
from app.repositories.job_repository import JobRepository
|
||||
from app.repositories.match_repository import MatchRepository
|
||||
from app.repositories.worker_repository import WorkerRepository
|
||||
from app.services.card_mapper import job_to_card, worker_to_card
|
||||
from app.services.rag.lightrag_adapter import LightRAGAdapter
|
||||
from app.utils.ids import generate_id
|
||||
|
||||
|
||||
class MatchingService:
|
||||
def __init__(self, db: Session):
|
||||
self.db = db
|
||||
self.settings = get_settings()
|
||||
self.jobs = JobRepository(db)
|
||||
self.workers = WorkerRepository(db)
|
||||
self.matches = MatchRepository(db)
|
||||
self.rag = LightRAGAdapter(self.settings)
|
||||
|
||||
def match_workers(self, source: JobCard, top_n: int) -> list[MatchResult]:
|
||||
logger.info("match_workers source_id=%s top_n=%s", source.job_id, top_n)
|
||||
query_text = " ".join([source.title, source.category, source.city, source.region, *source.skills, *source.tags])
|
||||
candidate_ids = self.rag.search(
|
||||
query_text=query_text,
|
||||
filters=QueryFilters(entity_type="worker", city=source.city),
|
||||
limit=max(top_n * 3, self.settings.default_recall_top_k),
|
||||
)
|
||||
candidates = self.workers.get_many(candidate_ids) or self.workers.list(limit=max(top_n * 3, 50))
|
||||
results = [self._build_job_to_worker_match(source, worker_to_card(worker)) for worker in candidates]
|
||||
results = sorted(results, key=lambda item: item.match_score, reverse=True)[:top_n]
|
||||
self.matches.bulk_replace(results, SourceType.job_to_worker.value, source.job_id)
|
||||
return results
|
||||
|
||||
def match_jobs(self, source: WorkerCard, top_n: int) -> list[MatchResult]:
|
||||
logger.info("match_jobs source_id=%s top_n=%s", source.worker_id, top_n)
|
||||
query_text = " ".join([source.name, *source.cities, *source.regions, *[item.name for item in source.skills], *source.experience_tags])
|
||||
city = source.cities[0] if source.cities else None
|
||||
candidate_ids = self.rag.search(
|
||||
query_text=query_text,
|
||||
filters=QueryFilters(entity_type="job", city=city),
|
||||
limit=max(top_n * 3, self.settings.default_recall_top_k),
|
||||
)
|
||||
candidates = self.jobs.get_many(candidate_ids) or self.jobs.list(limit=max(top_n * 3, 50))
|
||||
results = [self._build_worker_to_job_match(source, job_to_card(job)) for job in candidates]
|
||||
results = sorted(results, key=lambda item: item.match_score, reverse=True)[:top_n]
|
||||
self.matches.bulk_replace(results, SourceType.worker_to_job.value, source.worker_id)
|
||||
return results
|
||||
|
||||
def explain(self, match_id: str) -> MatchResult | None:
|
||||
record = self.matches.get(match_id)
|
||||
if record is None:
|
||||
return None
|
||||
from app.services.card_mapper import match_record_to_schema
|
||||
|
||||
return match_record_to_schema(record)
|
||||
|
||||
def _build_job_to_worker_match(self, job: JobCard, worker: WorkerCard) -> MatchResult:
|
||||
job_skills = set(job.skills)
|
||||
expanded_skills = self.rag.expand_skills(job.skills)
|
||||
worker_skills = {item.name: item.score for item in worker.skills}
|
||||
direct_hits = job_skills.intersection(worker_skills.keys())
|
||||
expanded_hits = expanded_skills.intersection(worker_skills.keys())
|
||||
base_skill_score = sum(worker_skills[name] for name in expanded_hits) / max(len(job_skills), 1)
|
||||
if not direct_hits:
|
||||
base_skill_score *= 0.4
|
||||
skill_score = min(base_skill_score, 1.0)
|
||||
region_score = self._region_score(job.city, job.region, worker.cities, worker.regions)
|
||||
time_score = self._time_score(job.start_time, worker.availability)
|
||||
experience_score = self._experience_score([job.category, *job.tags], worker.experience_tags)
|
||||
reliability_score = worker.reliability_score
|
||||
score = self._weighted_score(skill_score, region_score, time_score, experience_score, reliability_score)
|
||||
breakdown = MatchBreakdown(
|
||||
skill_score=round(skill_score, 2),
|
||||
region_score=round(region_score, 2),
|
||||
time_score=round(time_score, 2),
|
||||
experience_score=round(experience_score, 2),
|
||||
reliability_score=round(reliability_score, 2),
|
||||
)
|
||||
reasons = self._build_reasons(
|
||||
matched_skills=list(expanded_hits)[:3],
|
||||
region_hit=region_score,
|
||||
time_score=time_score,
|
||||
experience_hits=list(set(job.tags).intersection(worker.experience_tags))[:2] or [job.category],
|
||||
reliability_score=reliability_score,
|
||||
target_region=job.region,
|
||||
)
|
||||
return MatchResult(
|
||||
match_id=generate_id("match"),
|
||||
source_type=SourceType.job_to_worker,
|
||||
source_id=job.job_id,
|
||||
target_id=worker.worker_id,
|
||||
match_score=round(score, 2),
|
||||
breakdown=breakdown,
|
||||
reasons=reasons,
|
||||
)
|
||||
|
||||
def _build_worker_to_job_match(self, worker: WorkerCard, job: JobCard) -> MatchResult:
|
||||
reverse = self._build_job_to_worker_match(job, worker)
|
||||
return MatchResult(
|
||||
match_id=generate_id("match"),
|
||||
source_type=SourceType.worker_to_job,
|
||||
source_id=worker.worker_id,
|
||||
target_id=job.job_id,
|
||||
match_score=reverse.match_score,
|
||||
breakdown=reverse.breakdown,
|
||||
reasons=reverse.reasons,
|
||||
)
|
||||
|
||||
def _region_score(self, job_city: str, job_region: str, worker_cities: list[str], worker_regions: list[str]) -> float:
|
||||
if job_region in worker_regions:
|
||||
return 1.0
|
||||
if job_city in worker_cities:
|
||||
return 0.7
|
||||
return 0.2
|
||||
|
||||
def _time_score(self, start_time: datetime, availability: list[str]) -> float:
|
||||
if "anytime" in availability:
|
||||
return 1.0
|
||||
is_weekend = start_time.weekday() >= 5
|
||||
desired = "weekend" if is_weekend else ("weekday_pm" if start_time.hour >= 12 else "weekday_am")
|
||||
return 1.0 if desired in availability else 0.4
|
||||
|
||||
def _experience_score(self, left: list[str], right: list[str]) -> float:
|
||||
left_set = set(left)
|
||||
right_set = set(right)
|
||||
if not left_set or not right_set:
|
||||
return 0.4
|
||||
overlap = len(left_set.intersection(right_set))
|
||||
return min(overlap / max(len(left_set), 1) + 0.4, 1.0)
|
||||
|
||||
def _weighted_score(
|
||||
self,
|
||||
skill_score: float,
|
||||
region_score: float,
|
||||
time_score: float,
|
||||
experience_score: float,
|
||||
reliability_score: float,
|
||||
) -> float:
|
||||
return (
|
||||
self.settings.score_skill_weight * skill_score
|
||||
+ self.settings.score_region_weight * region_score
|
||||
+ self.settings.score_time_weight * time_score
|
||||
+ self.settings.score_experience_weight * experience_score
|
||||
+ self.settings.score_reliability_weight * reliability_score
|
||||
)
|
||||
|
||||
def _build_reasons(
|
||||
self,
|
||||
matched_skills: list[str],
|
||||
region_hit: float,
|
||||
time_score: float,
|
||||
experience_hits: list[str],
|
||||
reliability_score: float,
|
||||
target_region: str,
|
||||
) -> list[str]:
|
||||
reasons = []
|
||||
if matched_skills:
|
||||
reasons.append(f"具备{'、'.join(matched_skills[:3])}相关技能")
|
||||
if region_hit >= 1.0:
|
||||
reasons.append(f"服务区域覆盖{target_region},与岗位地点一致")
|
||||
elif region_hit >= 0.7:
|
||||
reasons.append("同城可到岗,区域匹配度较高")
|
||||
if time_score >= 1.0:
|
||||
reasons.append("可接单时间与岗位时间要求匹配")
|
||||
if experience_hits:
|
||||
reasons.append(f"具备{'、'.join(experience_hits[:2])}相关经验")
|
||||
if reliability_score >= 0.75:
|
||||
reasons.append("履约可信度较好,适合优先推荐")
|
||||
while len(reasons) < 3:
|
||||
reasons.append("岗位需求与候选画像存在基础匹配")
|
||||
return reasons[:5]
|
||||
143
gig-poc/apps/api/app/services/rag/lightrag_adapter.py
Normal file
143
gig-poc/apps/api/app/services/rag/lightrag_adapter.py
Normal file
@@ -0,0 +1,143 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import math
|
||||
from collections import defaultdict
|
||||
|
||||
from qdrant_client import QdrantClient, models
|
||||
|
||||
from app.core.config import Settings
|
||||
from app.core.logging import logger
|
||||
from app.domain.schemas import JobCard, QueryFilters, WorkerCard
|
||||
|
||||
|
||||
class LightRAGAdapter:
|
||||
def __init__(self, settings: Settings):
|
||||
self.settings = settings
|
||||
self.client = QdrantClient(url=settings.qdrant_url)
|
||||
self.skill_graph = self._load_skill_graph()
|
||||
|
||||
def ensure_ready(self) -> None:
|
||||
collections = {item.name for item in self.client.get_collections().collections}
|
||||
if self.settings.qdrant_collection not in collections:
|
||||
self.client.create_collection(
|
||||
collection_name=self.settings.qdrant_collection,
|
||||
vectors_config=models.VectorParams(size=self.settings.vector_size, distance=models.Distance.COSINE),
|
||||
)
|
||||
|
||||
def health(self) -> str:
|
||||
self.ensure_ready()
|
||||
self.client.get_collection(self.settings.qdrant_collection)
|
||||
return "ok"
|
||||
|
||||
def upsert_job(self, job: JobCard) -> None:
|
||||
self.ensure_ready()
|
||||
payload = {
|
||||
"entity_type": "job",
|
||||
"entity_id": job.job_id,
|
||||
"city": job.city,
|
||||
"region": job.region,
|
||||
"category": job.category,
|
||||
"skills": job.skills,
|
||||
"tags": job.tags,
|
||||
"document": self._serialize_job(job),
|
||||
}
|
||||
self.client.upsert(
|
||||
collection_name=self.settings.qdrant_collection,
|
||||
points=[
|
||||
models.PointStruct(
|
||||
id=job.job_id,
|
||||
vector=self._vectorize(payload["document"]),
|
||||
payload=payload,
|
||||
)
|
||||
],
|
||||
)
|
||||
|
||||
def upsert_worker(self, worker: WorkerCard) -> None:
|
||||
self.ensure_ready()
|
||||
payload = {
|
||||
"entity_type": "worker",
|
||||
"entity_id": worker.worker_id,
|
||||
"city": worker.cities[0] if worker.cities else "",
|
||||
"region": worker.regions[0] if worker.regions else "",
|
||||
"category": worker.experience_tags[0] if worker.experience_tags else "",
|
||||
"skills": [item.name for item in worker.skills],
|
||||
"tags": worker.experience_tags,
|
||||
"document": self._serialize_worker(worker),
|
||||
}
|
||||
self.client.upsert(
|
||||
collection_name=self.settings.qdrant_collection,
|
||||
points=[
|
||||
models.PointStruct(
|
||||
id=worker.worker_id,
|
||||
vector=self._vectorize(payload["document"]),
|
||||
payload=payload,
|
||||
)
|
||||
],
|
||||
)
|
||||
|
||||
def search(self, query_text: str, filters: QueryFilters, limit: int) -> list[str]:
|
||||
self.ensure_ready()
|
||||
must = [models.FieldCondition(key="entity_type", match=models.MatchValue(value=filters.entity_type))]
|
||||
if filters.city:
|
||||
must.append(models.FieldCondition(key="city", match=models.MatchValue(value=filters.city)))
|
||||
query_filter = models.Filter(must=must)
|
||||
results = self.client.search(
|
||||
collection_name=self.settings.qdrant_collection,
|
||||
query_vector=self._vectorize(query_text),
|
||||
query_filter=query_filter,
|
||||
limit=limit,
|
||||
with_payload=True,
|
||||
)
|
||||
ids = []
|
||||
for point in results:
|
||||
payload = point.payload or {}
|
||||
if filters.region and payload.get("region") != filters.region:
|
||||
continue
|
||||
ids.append(str(payload.get("entity_id", point.id)))
|
||||
return ids
|
||||
|
||||
def expand_skills(self, skills: list[str]) -> set[str]:
|
||||
expanded = set(skills)
|
||||
for skill in skills:
|
||||
expanded.update(self.skill_graph.get(skill, []))
|
||||
return expanded
|
||||
|
||||
def _load_skill_graph(self) -> dict[str, set[str]]:
|
||||
relations_path = self.settings.sample_data_dir / "skill_relations.json"
|
||||
if not relations_path.exists():
|
||||
return defaultdict(set)
|
||||
data = json.loads(relations_path.read_text(encoding="utf-8"))
|
||||
graph: dict[str, set[str]] = defaultdict(set)
|
||||
for source, targets in data.items():
|
||||
graph[source].update(targets)
|
||||
for target in targets:
|
||||
graph[target].add(source)
|
||||
return graph
|
||||
|
||||
def _serialize_job(self, job: JobCard) -> str:
|
||||
return " ".join([job.title, job.category, job.city, job.region, *job.skills, *job.tags, job.description])
|
||||
|
||||
def _serialize_worker(self, worker: WorkerCard) -> str:
|
||||
return " ".join(
|
||||
[worker.name, *worker.cities, *worker.regions, *[item.name for item in worker.skills], *worker.experience_tags, worker.description]
|
||||
)
|
||||
|
||||
def _vectorize(self, text: str) -> list[float]:
|
||||
vector = [0.0 for _ in range(self.settings.vector_size)]
|
||||
tokens = self._tokenize(text)
|
||||
for token in tokens:
|
||||
index = hash(token) % self.settings.vector_size
|
||||
vector[index] += 1.0
|
||||
norm = math.sqrt(sum(item * item for item in vector)) or 1.0
|
||||
return [item / norm for item in vector]
|
||||
|
||||
def _tokenize(self, text: str) -> list[str]:
|
||||
cleaned = [part.strip().lower() for part in text.replace(",", " ").replace("、", " ").replace("。", " ").split()]
|
||||
tokens = [part for part in cleaned if part]
|
||||
for size in (2, 3):
|
||||
for index in range(max(len(text) - size + 1, 0)):
|
||||
chunk = text[index : index + size].strip()
|
||||
if chunk:
|
||||
tokens.append(chunk)
|
||||
return tokens
|
||||
6
gig-poc/apps/api/app/utils/ids.py
Normal file
6
gig-poc/apps/api/app/utils/ids.py
Normal file
@@ -0,0 +1,6 @@
|
||||
from datetime import datetime
|
||||
from uuid import uuid4
|
||||
|
||||
|
||||
def generate_id(prefix: str) -> str:
|
||||
return f"{prefix}_{datetime.now().strftime('%Y%m%d%H%M%S')}_{uuid4().hex[:6]}"
|
||||
5
gig-poc/apps/api/app/utils/prompts.py
Normal file
5
gig-poc/apps/api/app/utils/prompts.py
Normal file
@@ -0,0 +1,5 @@
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def load_prompt(path: Path) -> str:
|
||||
return path.read_text(encoding="utf-8")
|
||||
21
gig-poc/apps/api/pyproject.toml
Normal file
21
gig-poc/apps/api/pyproject.toml
Normal file
@@ -0,0 +1,21 @@
|
||||
[project]
|
||||
name = "gig-poc-api"
|
||||
version = "0.1.0"
|
||||
description = "Flexible gig work POC API"
|
||||
requires-python = ">=3.11"
|
||||
dependencies = [
|
||||
"fastapi==0.115.12",
|
||||
"uvicorn[standard]==0.34.0",
|
||||
"sqlalchemy==2.0.40",
|
||||
"psycopg[binary]==3.2.6",
|
||||
"pydantic==2.11.3",
|
||||
"pydantic-settings==2.8.1",
|
||||
"httpx==0.28.1",
|
||||
"qdrant-client==1.14.2",
|
||||
"python-dateutil==2.9.0.post0",
|
||||
"orjson==3.10.16",
|
||||
]
|
||||
|
||||
[build-system]
|
||||
requires = ["setuptools>=68", "wheel"]
|
||||
build-backend = "setuptools.build_meta"
|
||||
20
gig-poc/apps/web/Dockerfile
Normal file
20
gig-poc/apps/web/Dockerfile
Normal file
@@ -0,0 +1,20 @@
|
||||
FROM docker.m.daocloud.io/library/node:22-alpine AS builder
|
||||
|
||||
RUN sed -i 's/dl-cdn.alpinelinux.org/mirrors.aliyun.com/g' /etc/apk/repositories
|
||||
RUN npm config set registry https://registry.npmmirror.com
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
COPY apps/web/package.json /app/package.json
|
||||
COPY apps/web/tsconfig.json /app/tsconfig.json
|
||||
COPY apps/web/tsconfig.app.json /app/tsconfig.app.json
|
||||
COPY apps/web/vite.config.ts /app/vite.config.ts
|
||||
COPY apps/web/index.html /app/index.html
|
||||
COPY apps/web/src /app/src
|
||||
|
||||
RUN npm install && npm run build
|
||||
|
||||
FROM docker.m.daocloud.io/library/nginx:1.27-alpine
|
||||
|
||||
COPY infrastructure/nginx/default.conf /etc/nginx/conf.d/default.conf
|
||||
COPY --from=builder /app/dist /usr/share/nginx/html
|
||||
12
gig-poc/apps/web/index.html
Normal file
12
gig-poc/apps/web/index.html
Normal file
@@ -0,0 +1,12 @@
|
||||
<!doctype html>
|
||||
<html lang="zh-CN">
|
||||
<head>
|
||||
<meta charset="UTF-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>Gig POC Console</title>
|
||||
</head>
|
||||
<body>
|
||||
<div id="root"></div>
|
||||
<script type="module" src="/src/main.tsx"></script>
|
||||
</body>
|
||||
</html>
|
||||
23
gig-poc/apps/web/package.json
Normal file
23
gig-poc/apps/web/package.json
Normal file
@@ -0,0 +1,23 @@
|
||||
{
|
||||
"name": "gig-poc-web",
|
||||
"version": "0.1.0",
|
||||
"private": true,
|
||||
"type": "module",
|
||||
"scripts": {
|
||||
"dev": "vite --host 0.0.0.0 --port 5173",
|
||||
"build": "tsc -b && vite build",
|
||||
"preview": "vite preview --host 0.0.0.0 --port 4173"
|
||||
},
|
||||
"dependencies": {
|
||||
"react": "18.3.1",
|
||||
"react-dom": "18.3.1",
|
||||
"react-router-dom": "6.30.1"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/react": "18.3.20",
|
||||
"@types/react-dom": "18.3.6",
|
||||
"@vitejs/plugin-react": "4.3.4",
|
||||
"typescript": "5.8.3",
|
||||
"vite": "5.4.18"
|
||||
}
|
||||
}
|
||||
41
gig-poc/apps/web/src/App.tsx
Normal file
41
gig-poc/apps/web/src/App.tsx
Normal file
@@ -0,0 +1,41 @@
|
||||
import { NavLink, Route, Routes } from "react-router-dom";
|
||||
import { JobPage } from "./pages/JobPage";
|
||||
import { WorkerPage } from "./pages/WorkerPage";
|
||||
import { DataBrowserPage } from "./pages/DataBrowserPage";
|
||||
import { StatusPage } from "./pages/StatusPage";
|
||||
|
||||
const navItems = [
|
||||
{ to: "/", label: "岗位测试" },
|
||||
{ to: "/workers", label: "工人测试" },
|
||||
{ to: "/browse", label: "数据浏览" },
|
||||
{ to: "/status", label: "系统状态" }
|
||||
];
|
||||
|
||||
export default function App() {
|
||||
return (
|
||||
<div className="layout-shell">
|
||||
<aside className="side-nav">
|
||||
<div>
|
||||
<p className="eyebrow">Gig POC</p>
|
||||
<h1>灵活用工匹配控制台</h1>
|
||||
<p className="side-copy">围绕“岗位理解、工人理解、LightRAG 检索、匹配排序、推荐解释”构建的最小演示台。</p>
|
||||
</div>
|
||||
<nav className="nav-list">
|
||||
{navItems.map((item) => (
|
||||
<NavLink key={item.to} to={item.to} end={item.to === "/"} className="nav-link">
|
||||
{item.label}
|
||||
</NavLink>
|
||||
))}
|
||||
</nav>
|
||||
</aside>
|
||||
<main className="content-area">
|
||||
<Routes>
|
||||
<Route path="/" element={<JobPage />} />
|
||||
<Route path="/workers" element={<WorkerPage />} />
|
||||
<Route path="/browse" element={<DataBrowserPage />} />
|
||||
<Route path="/status" element={<StatusPage />} />
|
||||
</Routes>
|
||||
</main>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
33
gig-poc/apps/web/src/api/client.ts
Normal file
33
gig-poc/apps/web/src/api/client.ts
Normal file
@@ -0,0 +1,33 @@
|
||||
const API_BASE = import.meta.env.VITE_API_BASE ?? "/api";
|
||||
|
||||
async function request<T>(path: string, init?: RequestInit): Promise<T> {
|
||||
const response = await fetch(`${API_BASE}${path}`, {
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
...(init?.headers ?? {})
|
||||
},
|
||||
...init
|
||||
});
|
||||
if (!response.ok) {
|
||||
const text = await response.text();
|
||||
throw new Error(text || `Request failed: ${response.status}`);
|
||||
}
|
||||
return response.json() as Promise<T>;
|
||||
}
|
||||
|
||||
export const api = {
|
||||
health: () => request("/health"),
|
||||
extractJob: (text: string) => request("/poc/extract/job", { method: "POST", body: JSON.stringify({ text }) }),
|
||||
extractWorker: (text: string) => request("/poc/extract/worker", { method: "POST", body: JSON.stringify({ text }) }),
|
||||
ingestJob: (job: unknown) => request("/poc/ingest/job", { method: "POST", body: JSON.stringify({ job }) }),
|
||||
ingestWorker: (worker: unknown) => request("/poc/ingest/worker", { method: "POST", body: JSON.stringify({ worker }) }),
|
||||
bootstrap: () => request("/poc/ingest/bootstrap", { method: "POST" }),
|
||||
matchWorkers: (job: unknown, top_n = 10) =>
|
||||
request("/poc/match/workers", { method: "POST", body: JSON.stringify({ job, top_n }) }),
|
||||
matchJobs: (worker: unknown, top_n = 10) =>
|
||||
request("/poc/match/jobs", { method: "POST", body: JSON.stringify({ worker, top_n }) }),
|
||||
jobs: () => request("/poc/jobs"),
|
||||
workers: () => request("/poc/workers"),
|
||||
job: (jobId: string) => request(`/poc/jobs/${jobId}`),
|
||||
worker: (workerId: string) => request(`/poc/workers/${workerId}`)
|
||||
};
|
||||
15
gig-poc/apps/web/src/components/JsonPanel.tsx
Normal file
15
gig-poc/apps/web/src/components/JsonPanel.tsx
Normal file
@@ -0,0 +1,15 @@
|
||||
type JsonPanelProps = {
|
||||
title: string;
|
||||
data: unknown;
|
||||
};
|
||||
|
||||
export function JsonPanel({ title, data }: JsonPanelProps) {
|
||||
return (
|
||||
<section className="panel">
|
||||
<div className="panel-head">
|
||||
<h3>{title}</h3>
|
||||
</div>
|
||||
<pre className="code-block">{JSON.stringify(data, null, 2)}</pre>
|
||||
</section>
|
||||
);
|
||||
}
|
||||
34
gig-poc/apps/web/src/components/MatchList.tsx
Normal file
34
gig-poc/apps/web/src/components/MatchList.tsx
Normal file
@@ -0,0 +1,34 @@
|
||||
type MatchItem = {
|
||||
match_id: string;
|
||||
target_id: string;
|
||||
match_score: number;
|
||||
reasons: string[];
|
||||
breakdown: Record<string, number>;
|
||||
};
|
||||
|
||||
export function MatchList({ title, items }: { title: string; items: MatchItem[] }) {
|
||||
return (
|
||||
<section className="panel">
|
||||
<div className="panel-head">
|
||||
<h3>{title}</h3>
|
||||
<span className="badge">{items.length} 条</span>
|
||||
</div>
|
||||
<div className="match-grid">
|
||||
{items.map((item) => (
|
||||
<article key={item.match_id} className="match-card">
|
||||
<div className="match-topline">
|
||||
<strong>{item.target_id}</strong>
|
||||
<span>{item.match_score}</span>
|
||||
</div>
|
||||
<div className="reason-list">
|
||||
{item.reasons.map((reason) => (
|
||||
<p key={reason}>{reason}</p>
|
||||
))}
|
||||
</div>
|
||||
<pre className="mini-code">{JSON.stringify(item.breakdown, null, 2)}</pre>
|
||||
</article>
|
||||
))}
|
||||
</div>
|
||||
</section>
|
||||
);
|
||||
}
|
||||
13
gig-poc/apps/web/src/main.tsx
Normal file
13
gig-poc/apps/web/src/main.tsx
Normal file
@@ -0,0 +1,13 @@
|
||||
import React from "react";
|
||||
import ReactDOM from "react-dom/client";
|
||||
import { BrowserRouter } from "react-router-dom";
|
||||
import App from "./App";
|
||||
import "./styles/global.css";
|
||||
|
||||
ReactDOM.createRoot(document.getElementById("root")!).render(
|
||||
<React.StrictMode>
|
||||
<BrowserRouter>
|
||||
<App />
|
||||
</BrowserRouter>
|
||||
</React.StrictMode>
|
||||
);
|
||||
28
gig-poc/apps/web/src/pages/DataBrowserPage.tsx
Normal file
28
gig-poc/apps/web/src/pages/DataBrowserPage.tsx
Normal file
@@ -0,0 +1,28 @@
|
||||
import { useEffect, useState } from "react";
|
||||
import { api } from "../api/client";
|
||||
import { JsonPanel } from "../components/JsonPanel";
|
||||
|
||||
export function DataBrowserPage() {
|
||||
const [jobs, setJobs] = useState<any[]>([]);
|
||||
const [workers, setWorkers] = useState<any[]>([]);
|
||||
|
||||
useEffect(() => {
|
||||
void (async () => {
|
||||
const [jobResult, workerResult] = await Promise.all([api.jobs(), api.workers()]);
|
||||
setJobs(jobResult.items.slice(0, 12));
|
||||
setWorkers(workerResult.items.slice(0, 12));
|
||||
})();
|
||||
}, []);
|
||||
|
||||
return (
|
||||
<div className="page-grid">
|
||||
<section className="hero-card">
|
||||
<p className="eyebrow">Page C</p>
|
||||
<h2>数据浏览页</h2>
|
||||
<p>浏览当前已入库的岗位与工人样本,方便验证 bootstrap 与前后端查询链路。</p>
|
||||
</section>
|
||||
<JsonPanel title="Jobs" data={jobs} />
|
||||
<JsonPanel title="Workers" data={workers} />
|
||||
</div>
|
||||
);
|
||||
}
|
||||
53
gig-poc/apps/web/src/pages/JobPage.tsx
Normal file
53
gig-poc/apps/web/src/pages/JobPage.tsx
Normal file
@@ -0,0 +1,53 @@
|
||||
import { useState } from "react";
|
||||
import { api } from "../api/client";
|
||||
import { JsonPanel } from "../components/JsonPanel";
|
||||
import { MatchList } from "../components/MatchList";
|
||||
|
||||
const DEFAULT_TEXT = "明天下午南山会展中心需要2个签到协助,5小时,150/人,女生优先,需要会签到、引导和登记。";
|
||||
|
||||
export function JobPage() {
|
||||
const [text, setText] = useState(DEFAULT_TEXT);
|
||||
const [jobCard, setJobCard] = useState<unknown>(null);
|
||||
const [matches, setMatches] = useState<any[]>([]);
|
||||
const [loading, setLoading] = useState(false);
|
||||
|
||||
const handleExtract = async () => {
|
||||
setLoading(true);
|
||||
try {
|
||||
const result = await api.extractJob(text);
|
||||
setJobCard(result.data);
|
||||
} finally {
|
||||
setLoading(false);
|
||||
}
|
||||
};
|
||||
|
||||
const handleIngestAndMatch = async () => {
|
||||
if (!jobCard) {
|
||||
return;
|
||||
}
|
||||
setLoading(true);
|
||||
try {
|
||||
await api.ingestJob(jobCard);
|
||||
const result = await api.matchWorkers(jobCard, 10);
|
||||
setMatches(result.items);
|
||||
} finally {
|
||||
setLoading(false);
|
||||
}
|
||||
};
|
||||
|
||||
return (
|
||||
<div className="page-grid">
|
||||
<section className="hero-card">
|
||||
<p className="eyebrow">Page A</p>
|
||||
<h2>岗位测试页</h2>
|
||||
<textarea value={text} onChange={(event) => setText(event.target.value)} rows={8} />
|
||||
<div className="button-row">
|
||||
<button onClick={handleExtract} disabled={loading}>抽取岗位</button>
|
||||
<button className="ghost" onClick={handleIngestAndMatch} disabled={loading || !jobCard}>入库并匹配工人</button>
|
||||
</div>
|
||||
</section>
|
||||
{jobCard ? <JsonPanel title="JobCard" data={jobCard} /> : null}
|
||||
{matches.length ? <MatchList title="匹配工人结果" items={matches} /> : null}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
43
gig-poc/apps/web/src/pages/StatusPage.tsx
Normal file
43
gig-poc/apps/web/src/pages/StatusPage.tsx
Normal file
@@ -0,0 +1,43 @@
|
||||
import { useState } from "react";
|
||||
import { api } from "../api/client";
|
||||
import { JsonPanel } from "../components/JsonPanel";
|
||||
|
||||
export function StatusPage() {
|
||||
const [health, setHealth] = useState<unknown>(null);
|
||||
const [bootstrapResult, setBootstrapResult] = useState<unknown>(null);
|
||||
const [loading, setLoading] = useState(false);
|
||||
|
||||
const handleHealth = async () => {
|
||||
setLoading(true);
|
||||
try {
|
||||
setHealth(await api.health());
|
||||
} finally {
|
||||
setLoading(false);
|
||||
}
|
||||
};
|
||||
|
||||
const handleBootstrap = async () => {
|
||||
setLoading(true);
|
||||
try {
|
||||
setBootstrapResult(await api.bootstrap());
|
||||
} finally {
|
||||
setLoading(false);
|
||||
}
|
||||
};
|
||||
|
||||
return (
|
||||
<div className="page-grid">
|
||||
<section className="hero-card">
|
||||
<p className="eyebrow">Page D</p>
|
||||
<h2>系统状态页</h2>
|
||||
<p>检查 API、PostgreSQL、Qdrant 健康状态,并执行样本数据 bootstrap。</p>
|
||||
<div className="button-row">
|
||||
<button onClick={handleHealth} disabled={loading}>检查健康状态</button>
|
||||
<button className="ghost" onClick={handleBootstrap} disabled={loading}>导入样本数据</button>
|
||||
</div>
|
||||
</section>
|
||||
{health ? <JsonPanel title="Health" data={health} /> : null}
|
||||
{bootstrapResult ? <JsonPanel title="Bootstrap" data={bootstrapResult} /> : null}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
53
gig-poc/apps/web/src/pages/WorkerPage.tsx
Normal file
53
gig-poc/apps/web/src/pages/WorkerPage.tsx
Normal file
@@ -0,0 +1,53 @@
|
||||
import { useState } from "react";
|
||||
import { api } from "../api/client";
|
||||
import { JsonPanel } from "../components/JsonPanel";
|
||||
import { MatchList } from "../components/MatchList";
|
||||
|
||||
const DEFAULT_TEXT = "我做过商场促销和活动签到,也能做登记和引导,周末都能接,福田南山都方便。";
|
||||
|
||||
export function WorkerPage() {
|
||||
const [text, setText] = useState(DEFAULT_TEXT);
|
||||
const [workerCard, setWorkerCard] = useState<unknown>(null);
|
||||
const [matches, setMatches] = useState<any[]>([]);
|
||||
const [loading, setLoading] = useState(false);
|
||||
|
||||
const handleExtract = async () => {
|
||||
setLoading(true);
|
||||
try {
|
||||
const result = await api.extractWorker(text);
|
||||
setWorkerCard(result.data);
|
||||
} finally {
|
||||
setLoading(false);
|
||||
}
|
||||
};
|
||||
|
||||
const handleIngestAndMatch = async () => {
|
||||
if (!workerCard) {
|
||||
return;
|
||||
}
|
||||
setLoading(true);
|
||||
try {
|
||||
await api.ingestWorker(workerCard);
|
||||
const result = await api.matchJobs(workerCard, 10);
|
||||
setMatches(result.items);
|
||||
} finally {
|
||||
setLoading(false);
|
||||
}
|
||||
};
|
||||
|
||||
return (
|
||||
<div className="page-grid">
|
||||
<section className="hero-card">
|
||||
<p className="eyebrow">Page B</p>
|
||||
<h2>工人测试页</h2>
|
||||
<textarea value={text} onChange={(event) => setText(event.target.value)} rows={8} />
|
||||
<div className="button-row">
|
||||
<button onClick={handleExtract} disabled={loading}>抽取工人画像</button>
|
||||
<button className="ghost" onClick={handleIngestAndMatch} disabled={loading || !workerCard}>入库并匹配岗位</button>
|
||||
</div>
|
||||
</section>
|
||||
{workerCard ? <JsonPanel title="WorkerCard" data={workerCard} /> : null}
|
||||
{matches.length ? <MatchList title="匹配岗位结果" items={matches} /> : null}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
176
gig-poc/apps/web/src/styles/global.css
Normal file
176
gig-poc/apps/web/src/styles/global.css
Normal file
@@ -0,0 +1,176 @@
|
||||
:root {
|
||||
font-family: "PingFang SC", "Noto Sans SC", sans-serif;
|
||||
color: #0f172a;
|
||||
background:
|
||||
radial-gradient(circle at top left, rgba(34, 197, 94, 0.18), transparent 25%),
|
||||
radial-gradient(circle at 80% 20%, rgba(14, 165, 233, 0.16), transparent 22%),
|
||||
linear-gradient(135deg, #f8fafc 0%, #eefbf3 45%, #f3f7fb 100%);
|
||||
line-height: 1.5;
|
||||
font-weight: 400;
|
||||
}
|
||||
|
||||
* {
|
||||
box-sizing: border-box;
|
||||
}
|
||||
|
||||
body {
|
||||
margin: 0;
|
||||
min-height: 100vh;
|
||||
}
|
||||
|
||||
button,
|
||||
textarea {
|
||||
font: inherit;
|
||||
}
|
||||
|
||||
.layout-shell {
|
||||
display: grid;
|
||||
grid-template-columns: 320px 1fr;
|
||||
min-height: 100vh;
|
||||
}
|
||||
|
||||
.side-nav {
|
||||
padding: 32px 24px;
|
||||
background: linear-gradient(180deg, rgba(15, 23, 42, 0.96) 0%, rgba(21, 44, 79, 0.92) 100%);
|
||||
color: #e2e8f0;
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
justify-content: space-between;
|
||||
}
|
||||
|
||||
.eyebrow {
|
||||
margin: 0 0 10px;
|
||||
letter-spacing: 0.14em;
|
||||
text-transform: uppercase;
|
||||
font-size: 12px;
|
||||
color: #38bdf8;
|
||||
}
|
||||
|
||||
.side-copy {
|
||||
color: #bfd6ea;
|
||||
max-width: 220px;
|
||||
}
|
||||
|
||||
.nav-list {
|
||||
display: grid;
|
||||
gap: 12px;
|
||||
}
|
||||
|
||||
.nav-link {
|
||||
color: #dbeafe;
|
||||
text-decoration: none;
|
||||
padding: 12px 14px;
|
||||
border-radius: 16px;
|
||||
background: rgba(255, 255, 255, 0.06);
|
||||
transition: transform 160ms ease, background 160ms ease;
|
||||
}
|
||||
|
||||
.nav-link.active,
|
||||
.nav-link:hover {
|
||||
background: rgba(56, 189, 248, 0.18);
|
||||
transform: translateX(3px);
|
||||
}
|
||||
|
||||
.content-area {
|
||||
padding: 28px;
|
||||
}
|
||||
|
||||
.page-grid {
|
||||
display: grid;
|
||||
gap: 20px;
|
||||
}
|
||||
|
||||
.hero-card,
|
||||
.panel,
|
||||
.match-card {
|
||||
border: 1px solid rgba(148, 163, 184, 0.25);
|
||||
background: rgba(255, 255, 255, 0.82);
|
||||
backdrop-filter: blur(10px);
|
||||
border-radius: 24px;
|
||||
box-shadow: 0 18px 50px rgba(15, 23, 42, 0.08);
|
||||
}
|
||||
|
||||
.hero-card,
|
||||
.panel {
|
||||
padding: 22px;
|
||||
}
|
||||
|
||||
.panel-head,
|
||||
.match-topline,
|
||||
.button-row {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
justify-content: space-between;
|
||||
gap: 12px;
|
||||
}
|
||||
|
||||
textarea {
|
||||
width: 100%;
|
||||
margin: 12px 0 0;
|
||||
border: 1px solid #cbd5e1;
|
||||
border-radius: 18px;
|
||||
padding: 16px;
|
||||
background: #f8fafc;
|
||||
resize: vertical;
|
||||
}
|
||||
|
||||
button {
|
||||
border: 0;
|
||||
border-radius: 999px;
|
||||
padding: 12px 18px;
|
||||
background: linear-gradient(135deg, #0f766e 0%, #0284c7 100%);
|
||||
color: white;
|
||||
cursor: pointer;
|
||||
}
|
||||
|
||||
button.ghost {
|
||||
background: #e2e8f0;
|
||||
color: #0f172a;
|
||||
}
|
||||
|
||||
button:disabled {
|
||||
opacity: 0.6;
|
||||
cursor: not-allowed;
|
||||
}
|
||||
|
||||
.badge {
|
||||
background: #dcfce7;
|
||||
color: #166534;
|
||||
border-radius: 999px;
|
||||
padding: 4px 12px;
|
||||
font-size: 12px;
|
||||
}
|
||||
|
||||
.code-block,
|
||||
.mini-code {
|
||||
margin: 0;
|
||||
overflow: auto;
|
||||
background: #0f172a;
|
||||
color: #dbeafe;
|
||||
border-radius: 18px;
|
||||
padding: 16px;
|
||||
}
|
||||
|
||||
.match-grid {
|
||||
display: grid;
|
||||
gap: 14px;
|
||||
grid-template-columns: repeat(auto-fit, minmax(260px, 1fr));
|
||||
}
|
||||
|
||||
.match-card {
|
||||
padding: 18px;
|
||||
}
|
||||
|
||||
.reason-list p {
|
||||
margin: 10px 0;
|
||||
}
|
||||
|
||||
@media (max-width: 920px) {
|
||||
.layout-shell {
|
||||
grid-template-columns: 1fr;
|
||||
}
|
||||
|
||||
.side-copy {
|
||||
max-width: none;
|
||||
}
|
||||
}
|
||||
20
gig-poc/apps/web/tsconfig.app.json
Normal file
20
gig-poc/apps/web/tsconfig.app.json
Normal file
@@ -0,0 +1,20 @@
|
||||
{
|
||||
"compilerOptions": {
|
||||
"target": "ES2020",
|
||||
"useDefineForClassFields": true,
|
||||
"lib": ["ES2020", "DOM", "DOM.Iterable"],
|
||||
"allowJs": false,
|
||||
"skipLibCheck": true,
|
||||
"esModuleInterop": true,
|
||||
"allowSyntheticDefaultImports": true,
|
||||
"strict": true,
|
||||
"forceConsistentCasingInFileNames": true,
|
||||
"module": "ESNext",
|
||||
"moduleResolution": "Node",
|
||||
"resolveJsonModule": true,
|
||||
"isolatedModules": true,
|
||||
"noEmit": true,
|
||||
"jsx": "react-jsx"
|
||||
},
|
||||
"include": ["src"]
|
||||
}
|
||||
6
gig-poc/apps/web/tsconfig.json
Normal file
6
gig-poc/apps/web/tsconfig.json
Normal file
@@ -0,0 +1,6 @@
|
||||
{
|
||||
"files": [],
|
||||
"references": [
|
||||
{ "path": "./tsconfig.app.json" }
|
||||
]
|
||||
}
|
||||
10
gig-poc/apps/web/vite.config.ts
Normal file
10
gig-poc/apps/web/vite.config.ts
Normal file
@@ -0,0 +1,10 @@
|
||||
import { defineConfig } from "vite";
|
||||
import react from "@vitejs/plugin-react";
|
||||
|
||||
export default defineConfig({
|
||||
plugins: [react()],
|
||||
server: {
|
||||
port: 5173,
|
||||
host: "0.0.0.0"
|
||||
}
|
||||
});
|
||||
75
gig-poc/docs/API.md
Normal file
75
gig-poc/docs/API.md
Normal file
@@ -0,0 +1,75 @@
|
||||
# API 文档
|
||||
|
||||
## 系统接口
|
||||
### `GET /health`
|
||||
返回:
|
||||
```json
|
||||
{
|
||||
"service": "ok",
|
||||
"database": "ok",
|
||||
"rag": "ok",
|
||||
"timestamp": "2026-03-30T12:00:00+08:00"
|
||||
}
|
||||
```
|
||||
|
||||
## 抽取接口
|
||||
### `POST /poc/extract/job`
|
||||
请求:
|
||||
```json
|
||||
{ "text": "明天下午南山会展中心需要2个签到协助,5小时,150/人,女生优先" }
|
||||
```
|
||||
|
||||
### `POST /poc/extract/worker`
|
||||
请求:
|
||||
```json
|
||||
{ "text": "我做过商场促销和活动签到,也能做登记和引导,周末都能接,福田南山都方便。" }
|
||||
```
|
||||
|
||||
## 入库接口
|
||||
### `POST /poc/ingest/job`
|
||||
请求体:
|
||||
```json
|
||||
{ "job": { "...": "JobCard" } }
|
||||
```
|
||||
|
||||
### `POST /poc/ingest/worker`
|
||||
请求体:
|
||||
```json
|
||||
{ "worker": { "...": "WorkerCard" } }
|
||||
```
|
||||
|
||||
### `POST /poc/ingest/bootstrap`
|
||||
说明:导入样本数据、词表、Qdrant 检索索引数据。
|
||||
|
||||
## 匹配接口
|
||||
### `POST /poc/match/workers`
|
||||
支持 `job_id` 或内联 `job`:
|
||||
```json
|
||||
{
|
||||
"job_id": "job_001",
|
||||
"top_n": 10
|
||||
}
|
||||
```
|
||||
|
||||
### `POST /poc/match/jobs`
|
||||
支持 `worker_id` 或内联 `worker`:
|
||||
```json
|
||||
{
|
||||
"worker_id": "worker_001",
|
||||
"top_n": 10
|
||||
}
|
||||
```
|
||||
|
||||
### `GET /poc/match/explain/{match_id}`
|
||||
返回具体匹配明细与理由。
|
||||
|
||||
## 查询接口
|
||||
### `GET /poc/jobs`
|
||||
### `GET /poc/workers`
|
||||
### `GET /poc/jobs/{job_id}`
|
||||
### `GET /poc/workers/{worker_id}`
|
||||
|
||||
## 交接说明
|
||||
- 抽取接口返回 `success/data/errors/missing_fields`,方便后续切换更强 LLM 时做错误回退。
|
||||
- 匹配接口输出 `breakdown` 五维打分,可直接给后续运营、策略或模型团队继续调权。
|
||||
- `packages/shared-types/src/index.ts` 保留了前端可复用类型定义。
|
||||
42
gig-poc/docs/ARCHITECTURE.md
Normal file
42
gig-poc/docs/ARCHITECTURE.md
Normal file
@@ -0,0 +1,42 @@
|
||||
# 架构说明
|
||||
|
||||
## 总体架构
|
||||
项目采用前后端分离部署:
|
||||
- `apps/api`:FastAPI 服务,负责抽取、入库、匹配、解释、查询
|
||||
- `apps/web`:React/Vite 控制台,负责 POC 演示
|
||||
- `postgres`:结构化数据持久化
|
||||
- `qdrant`:LightRAG 风格向量召回层
|
||||
|
||||
## 后端分层
|
||||
- `api`:HTTP 路由
|
||||
- `services`:抽取、入库、匹配、RAG 适配
|
||||
- `repositories`:数据库读写
|
||||
- `domain`:Pydantic schema 与 SQLAlchemy model
|
||||
|
||||
## 抽取链路
|
||||
1. 读取 `packages/prompts`
|
||||
2. 若配置了外部 LLM,则走 OpenAI 兼容结构化抽取
|
||||
3. 如果外部 LLM 不可用,则回退到本地规则抽取
|
||||
4. 所有输出统一走 Pydantic 校验
|
||||
|
||||
## RAG 方案
|
||||
当前 POC 使用 `LightRAGAdapter` 作为轻量适配层:
|
||||
- 结构化卡片先落 PostgreSQL
|
||||
- 关键信息序列化后写入 Qdrant
|
||||
- 使用技能近义词图做扩展召回
|
||||
- 对召回结果进入规则打分排序
|
||||
|
||||
## 匹配公式
|
||||
```text
|
||||
MatchScore =
|
||||
0.35 * SkillScore
|
||||
+ 0.20 * RegionScore
|
||||
+ 0.15 * TimeScore
|
||||
+ 0.15 * ExperienceScore
|
||||
+ 0.15 * ReliabilityScore
|
||||
```
|
||||
|
||||
## 部署方式
|
||||
- 本地:`infrastructure/docker-compose.yml`
|
||||
- 生产:`infrastructure/docker-compose.prod.yml`
|
||||
- 前端通过 Nginx 独立容器部署,并反向代理 API,保持前后端分离
|
||||
27
gig-poc/docs/DEMO.md
Normal file
27
gig-poc/docs/DEMO.md
Normal file
@@ -0,0 +1,27 @@
|
||||
# Demo 说明
|
||||
|
||||
## 一键启动
|
||||
```bash
|
||||
cd gig-poc
|
||||
sh infrastructure/scripts/dev-up.sh
|
||||
```
|
||||
|
||||
## 演示步骤
|
||||
1. 打开 `http://127.0.0.1:5173`
|
||||
2. 在“岗位测试页”输入岗位文本并点击“抽取岗位”
|
||||
3. 点击“入库并匹配工人”,观察 TopN 匹配与推荐原因
|
||||
4. 在“工人测试页”输入工人文本并点击“抽取工人画像”
|
||||
5. 点击“入库并匹配岗位”,观察 TopN 匹配与推荐原因
|
||||
6. 打开“数据浏览页”查看已入库样本
|
||||
7. 打开“系统状态页”查看健康状态,必要时重新执行 bootstrap
|
||||
|
||||
## 生产环境启动
|
||||
```bash
|
||||
cd gig-poc
|
||||
sh infrastructure/scripts/prod-up.sh
|
||||
```
|
||||
|
||||
## 演示建议
|
||||
- 先演示系统状态页,确认健康与 bootstrap 正常
|
||||
- 再演示岗位找人、人找岗位两个闭环
|
||||
- 最后展开 OpenAPI 文档讲接口交接方式
|
||||
78
gig-poc/docs/README.md
Normal file
78
gig-poc/docs/README.md
Normal file
@@ -0,0 +1,78 @@
|
||||
# Gig POC README
|
||||
|
||||
## 项目目标
|
||||
本项目实现一个“灵活用工最小 POC 内核系统”,围绕岗位理解、工人理解、LightRAG 检索、匹配排序和推荐解释构建完整闭环。
|
||||
|
||||
## 技术栈
|
||||
- 后端:Python 3.11、FastAPI、Pydantic、SQLAlchemy、Uvicorn
|
||||
- 数据层:PostgreSQL 16、Qdrant
|
||||
- 前端:React 19、Vite 6
|
||||
- 部署:Docker Compose
|
||||
- 模型接入:OpenAI 兼容 LLM 接口,可选开启;默认启用本地规则兜底,推荐模型默认值为 `gpt-5.4`
|
||||
|
||||
## 目录结构
|
||||
```text
|
||||
/gig-poc
|
||||
/apps
|
||||
/web
|
||||
/api
|
||||
/packages
|
||||
/shared-types
|
||||
/prompts
|
||||
/sample-data
|
||||
/infrastructure
|
||||
docker-compose.yml
|
||||
docker-compose.prod.yml
|
||||
/sql
|
||||
/scripts
|
||||
/docs
|
||||
README.md
|
||||
API.md
|
||||
ARCHITECTURE.md
|
||||
DEMO.md
|
||||
```
|
||||
|
||||
## 环境变量说明
|
||||
- `DATABASE_URL`:PostgreSQL 连接串
|
||||
- `QDRANT_URL`:Qdrant 服务地址
|
||||
- `LLM_ENABLED`:是否启用外部 LLM 抽取
|
||||
- `LLM_BASE_URL`:OpenAI 兼容接口地址
|
||||
- `LLM_API_KEY`:模型服务密钥
|
||||
- `LLM_MODEL`:模型名称
|
||||
|
||||
## 启动方式
|
||||
1. `cd gig-poc`
|
||||
2. `sh infrastructure/scripts/dev-up.sh`
|
||||
|
||||
## 样本导入方式
|
||||
`dev-up.sh` 会在健康检查通过后自动触发 `/poc/ingest/bootstrap`,导入 100 岗位、300 工人和词表。
|
||||
|
||||
## API 地址
|
||||
- `http://127.0.0.1:8000`
|
||||
- OpenAPI:`http://127.0.0.1:8000/docs`
|
||||
|
||||
## 前端访问地址
|
||||
- `http://127.0.0.1:5173`
|
||||
|
||||
## 演示路径
|
||||
1. 打开岗位测试页,输入岗位描述并抽取
|
||||
2. 点击入库并匹配工人
|
||||
3. 打开工人测试页,输入工人描述并抽取
|
||||
4. 点击入库并匹配岗位
|
||||
5. 在系统状态页执行健康检查和样本导入
|
||||
|
||||
## 已实现范围
|
||||
- 岗位抽取
|
||||
- 工人抽取
|
||||
- JobCard / WorkerCard / MatchResult 统一 schema
|
||||
- 样本数据 bootstrap
|
||||
- LightRAG 风格向量检索适配层
|
||||
- 两阶段召回与排序
|
||||
- 推荐理由生成
|
||||
- Web 控制台
|
||||
- Docker Compose 本地与生产部署
|
||||
|
||||
## 未实现范围
|
||||
- 裂变、支付、合同、工时、薪资结算
|
||||
- 权限系统与复杂后台
|
||||
- 大规模并发优化
|
||||
52
gig-poc/infrastructure/docker-compose.prod.yml
Normal file
52
gig-poc/infrastructure/docker-compose.prod.yml
Normal file
@@ -0,0 +1,52 @@
|
||||
name: gig-poc-prod
|
||||
|
||||
services:
|
||||
postgres:
|
||||
image: docker.m.daocloud.io/library/postgres:16-alpine
|
||||
restart: unless-stopped
|
||||
environment:
|
||||
POSTGRES_DB: ${POSTGRES_DB:-gig_poc}
|
||||
POSTGRES_USER: ${POSTGRES_USER:-gig}
|
||||
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:-gig}
|
||||
TZ: Asia/Shanghai
|
||||
volumes:
|
||||
- postgres_prod_data:/var/lib/postgresql/data
|
||||
- ./sql:/docker-entrypoint-initdb.d
|
||||
|
||||
qdrant:
|
||||
image: docker.m.daocloud.io/qdrant/qdrant:v1.14.1
|
||||
restart: unless-stopped
|
||||
volumes:
|
||||
- qdrant_prod_data:/qdrant/storage
|
||||
|
||||
api:
|
||||
build:
|
||||
context: ..
|
||||
dockerfile: apps/api/Dockerfile
|
||||
restart: unless-stopped
|
||||
environment:
|
||||
APP_ENV: production
|
||||
DATABASE_URL: postgresql+psycopg://${POSTGRES_USER:-gig}:${POSTGRES_PASSWORD:-gig}@postgres:5432/${POSTGRES_DB:-gig_poc}
|
||||
QDRANT_URL: http://qdrant:6333
|
||||
LOG_LEVEL: INFO
|
||||
LLM_ENABLED: ${LLM_ENABLED:-false}
|
||||
LLM_BASE_URL: ${LLM_BASE_URL:-}
|
||||
LLM_API_KEY: ${LLM_API_KEY:-}
|
||||
LLM_MODEL: ${LLM_MODEL:-gpt-5.4}
|
||||
depends_on:
|
||||
- postgres
|
||||
- qdrant
|
||||
|
||||
web:
|
||||
build:
|
||||
context: ..
|
||||
dockerfile: apps/web/Dockerfile
|
||||
restart: unless-stopped
|
||||
depends_on:
|
||||
- api
|
||||
ports:
|
||||
- "${WEB_PORT:-80}:80"
|
||||
|
||||
volumes:
|
||||
postgres_prod_data:
|
||||
qdrant_prod_data:
|
||||
59
gig-poc/infrastructure/docker-compose.yml
Normal file
59
gig-poc/infrastructure/docker-compose.yml
Normal file
@@ -0,0 +1,59 @@
|
||||
name: gig-poc
|
||||
|
||||
services:
|
||||
postgres:
|
||||
image: docker.m.daocloud.io/library/postgres:16-alpine
|
||||
environment:
|
||||
POSTGRES_DB: gig_poc
|
||||
POSTGRES_USER: gig
|
||||
POSTGRES_PASSWORD: gig
|
||||
TZ: Asia/Shanghai
|
||||
volumes:
|
||||
- postgres_data:/var/lib/postgresql/data
|
||||
- ./sql:/docker-entrypoint-initdb.d
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "pg_isready -U gig -d gig_poc"]
|
||||
interval: 10s
|
||||
timeout: 5s
|
||||
retries: 10
|
||||
ports:
|
||||
- "5432:5432"
|
||||
|
||||
qdrant:
|
||||
image: docker.m.daocloud.io/qdrant/qdrant:v1.14.1
|
||||
volumes:
|
||||
- qdrant_data:/qdrant/storage
|
||||
ports:
|
||||
- "6333:6333"
|
||||
|
||||
api:
|
||||
build:
|
||||
context: ..
|
||||
dockerfile: apps/api/Dockerfile
|
||||
environment:
|
||||
APP_ENV: docker
|
||||
DATABASE_URL: postgresql+psycopg://gig:gig@postgres:5432/gig_poc
|
||||
QDRANT_URL: http://qdrant:6333
|
||||
LOG_LEVEL: INFO
|
||||
LLM_ENABLED: "false"
|
||||
depends_on:
|
||||
postgres:
|
||||
condition: service_healthy
|
||||
qdrant:
|
||||
condition: service_started
|
||||
ports:
|
||||
- "8000:8000"
|
||||
|
||||
web:
|
||||
build:
|
||||
context: ..
|
||||
dockerfile: apps/web/Dockerfile
|
||||
depends_on:
|
||||
api:
|
||||
condition: service_started
|
||||
ports:
|
||||
- "5173:80"
|
||||
|
||||
volumes:
|
||||
postgres_data:
|
||||
qdrant_data:
|
||||
25
gig-poc/infrastructure/nginx/default.conf
Normal file
25
gig-poc/infrastructure/nginx/default.conf
Normal file
@@ -0,0 +1,25 @@
|
||||
server {
|
||||
listen 80;
|
||||
server_name _;
|
||||
root /usr/share/nginx/html;
|
||||
index index.html;
|
||||
|
||||
location / {
|
||||
try_files $uri $uri/ /index.html;
|
||||
}
|
||||
|
||||
location /api/ {
|
||||
proxy_pass http://api:8000/;
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
}
|
||||
|
||||
location /docs {
|
||||
proxy_pass http://api:8000/docs;
|
||||
}
|
||||
|
||||
location /openapi.json {
|
||||
proxy_pass http://api:8000/openapi.json;
|
||||
}
|
||||
}
|
||||
16
gig-poc/infrastructure/scripts/dev-up.sh
Executable file
16
gig-poc/infrastructure/scripts/dev-up.sh
Executable file
@@ -0,0 +1,16 @@
|
||||
#!/usr/bin/env sh
|
||||
set -eu
|
||||
|
||||
SCRIPT_DIR=$(CDPATH= cd -- "$(dirname "$0")" && pwd)
|
||||
INFRA_DIR=$(CDPATH= cd -- "$SCRIPT_DIR/.." && pwd)
|
||||
|
||||
cd "$INFRA_DIR"
|
||||
docker compose -f docker-compose.yml up --build -d
|
||||
echo "等待 API 健康检查..."
|
||||
until curl -fsS http://127.0.0.1:8000/health >/dev/null 2>&1; do
|
||||
sleep 3
|
||||
done
|
||||
until curl -fsS -X POST http://127.0.0.1:8000/poc/ingest/bootstrap >/dev/null 2>&1; do
|
||||
sleep 3
|
||||
done
|
||||
echo "本地环境已启动。Web: http://127.0.0.1:5173 API: http://127.0.0.1:8000/docs"
|
||||
8
gig-poc/infrastructure/scripts/down.sh
Executable file
8
gig-poc/infrastructure/scripts/down.sh
Executable file
@@ -0,0 +1,8 @@
|
||||
#!/usr/bin/env sh
|
||||
set -eu
|
||||
|
||||
SCRIPT_DIR=$(CDPATH= cd -- "$(dirname "$0")" && pwd)
|
||||
INFRA_DIR=$(CDPATH= cd -- "$SCRIPT_DIR/.." && pwd)
|
||||
|
||||
cd "$INFRA_DIR"
|
||||
docker compose -f docker-compose.yml down
|
||||
9
gig-poc/infrastructure/scripts/prod-up.sh
Executable file
9
gig-poc/infrastructure/scripts/prod-up.sh
Executable file
@@ -0,0 +1,9 @@
|
||||
#!/usr/bin/env sh
|
||||
set -eu
|
||||
|
||||
SCRIPT_DIR=$(CDPATH= cd -- "$(dirname "$0")" && pwd)
|
||||
INFRA_DIR=$(CDPATH= cd -- "$SCRIPT_DIR/.." && pwd)
|
||||
|
||||
cd "$INFRA_DIR"
|
||||
docker compose -f docker-compose.prod.yml up --build -d
|
||||
echo "生产部署容器已启动。请按实际域名或端口访问 Web。"
|
||||
1
gig-poc/infrastructure/sql/01-init.sql
Normal file
1
gig-poc/infrastructure/sql/01-init.sql
Normal file
@@ -0,0 +1 @@
|
||||
SET TIME ZONE 'Asia/Shanghai';
|
||||
28
gig-poc/packages/prompts/job_extract.md
Normal file
28
gig-poc/packages/prompts/job_extract.md
Normal file
@@ -0,0 +1,28 @@
|
||||
# 岗位抽取 Prompt
|
||||
|
||||
你是一个灵活用工岗位结构化抽取助手。
|
||||
请将输入的自然语言岗位描述提取为严格 JSON,不要输出 JSON 之外的任何文字。
|
||||
|
||||
必须输出字段:
|
||||
- job_id
|
||||
- title
|
||||
- category
|
||||
- description
|
||||
- skills
|
||||
- city
|
||||
- region
|
||||
- location_detail
|
||||
- start_time
|
||||
- duration_hours
|
||||
- headcount
|
||||
- salary
|
||||
- work_mode
|
||||
- tags
|
||||
- confidence
|
||||
|
||||
要求:
|
||||
- 保留原始 description
|
||||
- 时间使用 ISO 8601
|
||||
- skills 输出字符串数组
|
||||
- salary 输出 {type, amount, currency}
|
||||
- 如果缺失字段,尽量结合上下文推断,并降低 confidence
|
||||
14
gig-poc/packages/prompts/match_explain.md
Normal file
14
gig-poc/packages/prompts/match_explain.md
Normal file
@@ -0,0 +1,14 @@
|
||||
# 匹配解释 Prompt
|
||||
|
||||
你是一个推荐解释生成助手。
|
||||
请基于输入的岗位卡片、工人卡片和评分明细,生成 3-5 条真实、简洁、可验证的推荐理由。
|
||||
|
||||
优先覆盖:
|
||||
- 技能原因
|
||||
- 区域原因
|
||||
- 时间原因
|
||||
- 经验原因
|
||||
|
||||
输出要求:
|
||||
- 返回 JSON 数组
|
||||
- 每条理由不超过 30 字
|
||||
23
gig-poc/packages/prompts/worker_extract.md
Normal file
23
gig-poc/packages/prompts/worker_extract.md
Normal file
@@ -0,0 +1,23 @@
|
||||
# 工人抽取 Prompt
|
||||
|
||||
你是一个灵活用工工人画像结构化抽取助手。
|
||||
请将输入的自然语言工人描述提取为严格 JSON,不要输出 JSON 之外的任何文字。
|
||||
|
||||
必须输出字段:
|
||||
- worker_id
|
||||
- name
|
||||
- description
|
||||
- skills
|
||||
- cities
|
||||
- regions
|
||||
- availability
|
||||
- experience_tags
|
||||
- reliability_score
|
||||
- profile_completion
|
||||
- confidence
|
||||
|
||||
要求:
|
||||
- 保留原始 description
|
||||
- skills 输出 [{name, score}]
|
||||
- availability 使用标签:weekend / weekday_am / weekday_pm / anytime
|
||||
- 根据描述推断 experience_tags、区域和技能分值
|
||||
32
gig-poc/packages/sample-data/categories.json
Normal file
32
gig-poc/packages/sample-data/categories.json
Normal file
@@ -0,0 +1,32 @@
|
||||
[
|
||||
"促销",
|
||||
"地推",
|
||||
"导购",
|
||||
"会展",
|
||||
"分拣",
|
||||
"客服",
|
||||
"安装",
|
||||
"配送",
|
||||
"仓储",
|
||||
"活动执行",
|
||||
"礼仪",
|
||||
"数据录入",
|
||||
"盘点",
|
||||
"装卸",
|
||||
"直播协助",
|
||||
"社群运营",
|
||||
"收银",
|
||||
"拣货",
|
||||
"线下推广",
|
||||
"电话销售",
|
||||
"问卷调研",
|
||||
"样品派发",
|
||||
"售后服务",
|
||||
"展台搭建",
|
||||
"酒店服务",
|
||||
"商超导购",
|
||||
"会场服务",
|
||||
"物流协助",
|
||||
"活动控场",
|
||||
"物料执行"
|
||||
]
|
||||
183
gig-poc/packages/sample-data/generate_sample_data.py
Normal file
183
gig-poc/packages/sample-data/generate_sample_data.py
Normal file
@@ -0,0 +1,183 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import random
|
||||
from datetime import datetime, timedelta
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
ROOT = Path(__file__).resolve().parent
|
||||
random.seed(42)
|
||||
|
||||
skills = [
|
||||
"签到",
|
||||
"引导",
|
||||
"登记",
|
||||
"促销",
|
||||
"地推",
|
||||
"导购",
|
||||
"会展接待",
|
||||
"分拣",
|
||||
"打包",
|
||||
"客服",
|
||||
"电话邀约",
|
||||
"安装",
|
||||
"配送",
|
||||
"仓储",
|
||||
"陈列",
|
||||
"数据录入",
|
||||
"物料搬运",
|
||||
"收银",
|
||||
"盘点",
|
||||
"直播协助",
|
||||
"短视频拍摄",
|
||||
"面销",
|
||||
"海报派发",
|
||||
"问卷访问",
|
||||
"现场执行",
|
||||
"活动控场",
|
||||
"礼仪接待",
|
||||
"样品派发",
|
||||
"售后支持",
|
||||
"装卸",
|
||||
"拣货",
|
||||
"骑手配送",
|
||||
"司机协助",
|
||||
"设备调试",
|
||||
"展台搭建",
|
||||
"线上客服",
|
||||
"社群运营",
|
||||
"线索收集",
|
||||
"POS操作",
|
||||
"报表整理",
|
||||
]
|
||||
|
||||
extra_skills = [f"扩展技能{i:02d}" for i in range(1, 61)]
|
||||
skills = (skills + extra_skills)[:100]
|
||||
|
||||
categories = [
|
||||
"促销",
|
||||
"地推",
|
||||
"导购",
|
||||
"会展",
|
||||
"分拣",
|
||||
"客服",
|
||||
"安装",
|
||||
"配送",
|
||||
"仓储",
|
||||
"活动执行",
|
||||
"礼仪",
|
||||
"数据录入",
|
||||
"盘点",
|
||||
"装卸",
|
||||
"直播协助",
|
||||
"社群运营",
|
||||
"收银",
|
||||
"拣货",
|
||||
"线下推广",
|
||||
"电话销售",
|
||||
"问卷调研",
|
||||
"样品派发",
|
||||
"售后服务",
|
||||
"展台搭建",
|
||||
"酒店服务",
|
||||
"商超导购",
|
||||
"会场服务",
|
||||
"物流协助",
|
||||
"活动控场",
|
||||
"物料执行",
|
||||
]
|
||||
|
||||
regions = [
|
||||
{"city": "深圳", "region": "南山"},
|
||||
{"city": "深圳", "region": "福田"},
|
||||
{"city": "深圳", "region": "宝安"},
|
||||
{"city": "深圳", "region": "龙岗"},
|
||||
{"city": "深圳", "region": "罗湖"},
|
||||
{"city": "广州", "region": "天河"},
|
||||
{"city": "广州", "region": "海珠"},
|
||||
{"city": "广州", "region": "番禺"},
|
||||
{"city": "广州", "region": "白云"},
|
||||
{"city": "广州", "region": "越秀"},
|
||||
{"city": "上海", "region": "浦东"},
|
||||
{"city": "上海", "region": "徐汇"},
|
||||
{"city": "上海", "region": "静安"},
|
||||
{"city": "上海", "region": "闵行"},
|
||||
{"city": "上海", "region": "杨浦"},
|
||||
{"city": "杭州", "region": "西湖"},
|
||||
{"city": "杭州", "region": "滨江"},
|
||||
{"city": "杭州", "region": "余杭"},
|
||||
{"city": "成都", "region": "高新"},
|
||||
{"city": "成都", "region": "武侯"},
|
||||
]
|
||||
|
||||
time_tags = ["weekend", "weekday_am", "weekday_pm", "anytime"]
|
||||
experience_tags = ["商场", "会展", "活动执行", "物流", "零售", "校园推广", "客服中心", "展台", "仓库", "快消"]
|
||||
work_modes = ["排班制", "兼职", "临时工", "项目制"]
|
||||
|
||||
|
||||
def region_label(item: dict) -> str:
|
||||
return f"{item['city']}{item['region']}"
|
||||
|
||||
|
||||
jobs = []
|
||||
for index in range(1, 101):
|
||||
region = random.choice(regions)
|
||||
category = random.choice(categories)
|
||||
job_skills = random.sample(skills[:40], k=random.randint(3, 5))
|
||||
start_at = datetime(2026, 4, 1, 9, 0, 0) + timedelta(days=random.randint(0, 20), hours=random.choice([0, 4, 8]))
|
||||
tags = list(set(random.sample(experience_tags, k=2) + [random.choice(["女生优先", "有经验优先", "可连做优先", "沟通好"]) ]))
|
||||
salary_amount = random.choice([120, 150, 180, 200, 220, 260, 300])
|
||||
jobs.append(
|
||||
{
|
||||
"job_id": f"job_{index:03d}",
|
||||
"title": f"{category}兼职{index:03d}",
|
||||
"category": category,
|
||||
"description": f"{start_at.month}月{start_at.day}日{region['city']}{region['region']}需要{random.randint(1,4)}名{category}兼职,负责{'、'.join(job_skills[:3])},{random.randint(4,8)}小时,{salary_amount}元/天。",
|
||||
"skills": job_skills,
|
||||
"city": region["city"],
|
||||
"region": region["region"],
|
||||
"location_detail": f"{region_label(region)}核心商圈点位{random.randint(1, 20)}号",
|
||||
"start_time": start_at.isoformat() + "+08:00",
|
||||
"duration_hours": random.randint(4, 8),
|
||||
"headcount": random.randint(1, 4),
|
||||
"salary": {"type": "daily", "amount": salary_amount, "currency": "CNY"},
|
||||
"work_mode": random.choice(work_modes),
|
||||
"tags": tags,
|
||||
"confidence": round(random.uniform(0.82, 0.96), 2),
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
workers = []
|
||||
family_names = "赵钱孙李周吴郑王冯陈褚卫蒋沈韩杨朱秦尤许何吕施张孔曹严华金魏陶姜"
|
||||
given_names = ["伟", "芳", "娜", "敏", "静", "磊", "洋", "勇", "艳", "杰", "娟", "涛", "明", "超", "秀英", "霞"]
|
||||
for index in range(1, 301):
|
||||
primary_region = random.choice(regions)
|
||||
extra_region = random.choice([r for r in regions if r["city"] == primary_region["city"] and r["region"] != primary_region["region"]])
|
||||
worker_skills = random.sample(skills[:40], k=random.randint(3, 6))
|
||||
name = random.choice(list(family_names)) + random.choice(given_names)
|
||||
availability = random.sample(time_tags, k=random.randint(1, 2))
|
||||
exp_tags = list(set(random.sample(experience_tags, k=2) + [random.choice(categories[:10])]))
|
||||
workers.append(
|
||||
{
|
||||
"worker_id": f"worker_{index:03d}",
|
||||
"name": name,
|
||||
"description": f"我做过{'、'.join(exp_tags[:2])}相关兼职,擅长{'、'.join(worker_skills[:3])},平时{ '和'.join(availability) }都能接单,{primary_region['region']}和{extra_region['region']}都方便。",
|
||||
"skills": [{"name": item, "score": round(random.uniform(0.62, 0.94), 2)} for item in worker_skills],
|
||||
"cities": [primary_region["city"]],
|
||||
"regions": [primary_region["region"], extra_region["region"]],
|
||||
"availability": availability,
|
||||
"experience_tags": exp_tags,
|
||||
"reliability_score": round(random.uniform(0.65, 0.95), 2),
|
||||
"profile_completion": round(random.uniform(0.55, 0.98), 2),
|
||||
"confidence": round(random.uniform(0.8, 0.96), 2),
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
(ROOT / "skills.json").write_text(json.dumps(skills, ensure_ascii=False, indent=2), encoding="utf-8")
|
||||
(ROOT / "categories.json").write_text(json.dumps(categories, ensure_ascii=False, indent=2), encoding="utf-8")
|
||||
(ROOT / "regions.json").write_text(json.dumps(regions, ensure_ascii=False, indent=2), encoding="utf-8")
|
||||
(ROOT / "jobs.json").write_text(json.dumps(jobs, ensure_ascii=False, indent=2), encoding="utf-8")
|
||||
(ROOT / "workers.json").write_text(json.dumps(workers, ensure_ascii=False, indent=2), encoding="utf-8")
|
||||
2999
gig-poc/packages/sample-data/jobs.json
Normal file
2999
gig-poc/packages/sample-data/jobs.json
Normal file
File diff suppressed because it is too large
Load Diff
82
gig-poc/packages/sample-data/regions.json
Normal file
82
gig-poc/packages/sample-data/regions.json
Normal file
@@ -0,0 +1,82 @@
|
||||
[
|
||||
{
|
||||
"city": "深圳",
|
||||
"region": "南山"
|
||||
},
|
||||
{
|
||||
"city": "深圳",
|
||||
"region": "福田"
|
||||
},
|
||||
{
|
||||
"city": "深圳",
|
||||
"region": "宝安"
|
||||
},
|
||||
{
|
||||
"city": "深圳",
|
||||
"region": "龙岗"
|
||||
},
|
||||
{
|
||||
"city": "深圳",
|
||||
"region": "罗湖"
|
||||
},
|
||||
{
|
||||
"city": "广州",
|
||||
"region": "天河"
|
||||
},
|
||||
{
|
||||
"city": "广州",
|
||||
"region": "海珠"
|
||||
},
|
||||
{
|
||||
"city": "广州",
|
||||
"region": "番禺"
|
||||
},
|
||||
{
|
||||
"city": "广州",
|
||||
"region": "白云"
|
||||
},
|
||||
{
|
||||
"city": "广州",
|
||||
"region": "越秀"
|
||||
},
|
||||
{
|
||||
"city": "上海",
|
||||
"region": "浦东"
|
||||
},
|
||||
{
|
||||
"city": "上海",
|
||||
"region": "徐汇"
|
||||
},
|
||||
{
|
||||
"city": "上海",
|
||||
"region": "静安"
|
||||
},
|
||||
{
|
||||
"city": "上海",
|
||||
"region": "闵行"
|
||||
},
|
||||
{
|
||||
"city": "上海",
|
||||
"region": "杨浦"
|
||||
},
|
||||
{
|
||||
"city": "杭州",
|
||||
"region": "西湖"
|
||||
},
|
||||
{
|
||||
"city": "杭州",
|
||||
"region": "滨江"
|
||||
},
|
||||
{
|
||||
"city": "杭州",
|
||||
"region": "余杭"
|
||||
},
|
||||
{
|
||||
"city": "成都",
|
||||
"region": "高新"
|
||||
},
|
||||
{
|
||||
"city": "成都",
|
||||
"region": "武侯"
|
||||
}
|
||||
]
|
||||
20
gig-poc/packages/sample-data/skill_relations.json
Normal file
20
gig-poc/packages/sample-data/skill_relations.json
Normal file
@@ -0,0 +1,20 @@
|
||||
{
|
||||
"签到": ["登记", "引导", "会展接待"],
|
||||
"登记": ["签到", "数据录入"],
|
||||
"引导": ["签到", "礼仪接待", "现场执行"],
|
||||
"促销": ["导购", "面销", "样品派发"],
|
||||
"导购": ["促销", "陈列", "收银"],
|
||||
"会展接待": ["礼仪接待", "签到", "现场执行"],
|
||||
"分拣": ["拣货", "打包", "仓储"],
|
||||
"打包": ["分拣", "装卸"],
|
||||
"客服": ["线上客服", "电话邀约", "售后支持"],
|
||||
"安装": ["设备调试", "展台搭建"],
|
||||
"配送": ["骑手配送", "司机协助", "装卸"],
|
||||
"仓储": ["分拣", "拣货", "盘点"],
|
||||
"数据录入": ["报表整理", "登记"],
|
||||
"装卸": ["物料搬运", "打包", "配送"],
|
||||
"展台搭建": ["设备调试", "活动控场"],
|
||||
"电话邀约": ["客服", "线索收集"],
|
||||
"社群运营": ["线上客服", "线索收集"],
|
||||
"盘点": ["仓储", "收银", "POS操作"]
|
||||
}
|
||||
102
gig-poc/packages/sample-data/skills.json
Normal file
102
gig-poc/packages/sample-data/skills.json
Normal file
@@ -0,0 +1,102 @@
|
||||
[
|
||||
"签到",
|
||||
"引导",
|
||||
"登记",
|
||||
"促销",
|
||||
"地推",
|
||||
"导购",
|
||||
"会展接待",
|
||||
"分拣",
|
||||
"打包",
|
||||
"客服",
|
||||
"电话邀约",
|
||||
"安装",
|
||||
"配送",
|
||||
"仓储",
|
||||
"陈列",
|
||||
"数据录入",
|
||||
"物料搬运",
|
||||
"收银",
|
||||
"盘点",
|
||||
"直播协助",
|
||||
"短视频拍摄",
|
||||
"面销",
|
||||
"海报派发",
|
||||
"问卷访问",
|
||||
"现场执行",
|
||||
"活动控场",
|
||||
"礼仪接待",
|
||||
"样品派发",
|
||||
"售后支持",
|
||||
"装卸",
|
||||
"拣货",
|
||||
"骑手配送",
|
||||
"司机协助",
|
||||
"设备调试",
|
||||
"展台搭建",
|
||||
"线上客服",
|
||||
"社群运营",
|
||||
"线索收集",
|
||||
"POS操作",
|
||||
"报表整理",
|
||||
"扩展技能01",
|
||||
"扩展技能02",
|
||||
"扩展技能03",
|
||||
"扩展技能04",
|
||||
"扩展技能05",
|
||||
"扩展技能06",
|
||||
"扩展技能07",
|
||||
"扩展技能08",
|
||||
"扩展技能09",
|
||||
"扩展技能10",
|
||||
"扩展技能11",
|
||||
"扩展技能12",
|
||||
"扩展技能13",
|
||||
"扩展技能14",
|
||||
"扩展技能15",
|
||||
"扩展技能16",
|
||||
"扩展技能17",
|
||||
"扩展技能18",
|
||||
"扩展技能19",
|
||||
"扩展技能20",
|
||||
"扩展技能21",
|
||||
"扩展技能22",
|
||||
"扩展技能23",
|
||||
"扩展技能24",
|
||||
"扩展技能25",
|
||||
"扩展技能26",
|
||||
"扩展技能27",
|
||||
"扩展技能28",
|
||||
"扩展技能29",
|
||||
"扩展技能30",
|
||||
"扩展技能31",
|
||||
"扩展技能32",
|
||||
"扩展技能33",
|
||||
"扩展技能34",
|
||||
"扩展技能35",
|
||||
"扩展技能36",
|
||||
"扩展技能37",
|
||||
"扩展技能38",
|
||||
"扩展技能39",
|
||||
"扩展技能40",
|
||||
"扩展技能41",
|
||||
"扩展技能42",
|
||||
"扩展技能43",
|
||||
"扩展技能44",
|
||||
"扩展技能45",
|
||||
"扩展技能46",
|
||||
"扩展技能47",
|
||||
"扩展技能48",
|
||||
"扩展技能49",
|
||||
"扩展技能50",
|
||||
"扩展技能51",
|
||||
"扩展技能52",
|
||||
"扩展技能53",
|
||||
"扩展技能54",
|
||||
"扩展技能55",
|
||||
"扩展技能56",
|
||||
"扩展技能57",
|
||||
"扩展技能58",
|
||||
"扩展技能59",
|
||||
"扩展技能60"
|
||||
]
|
||||
13194
gig-poc/packages/sample-data/workers.json
Normal file
13194
gig-poc/packages/sample-data/workers.json
Normal file
File diff suppressed because it is too large
Load Diff
60
gig-poc/packages/shared-types/src/index.ts
Normal file
60
gig-poc/packages/shared-types/src/index.ts
Normal file
@@ -0,0 +1,60 @@
|
||||
export type Salary = {
|
||||
type: "daily" | "hourly" | "monthly" | "task";
|
||||
amount: number;
|
||||
currency: string;
|
||||
};
|
||||
|
||||
export type JobCard = {
|
||||
job_id: string;
|
||||
title: string;
|
||||
category: string;
|
||||
description: string;
|
||||
skills: string[];
|
||||
city: string;
|
||||
region: string;
|
||||
location_detail: string;
|
||||
start_time: string;
|
||||
duration_hours: number;
|
||||
headcount: number;
|
||||
salary: Salary;
|
||||
work_mode: string;
|
||||
tags: string[];
|
||||
confidence: number;
|
||||
};
|
||||
|
||||
export type SkillScore = {
|
||||
name: string;
|
||||
score: number;
|
||||
};
|
||||
|
||||
export type WorkerCard = {
|
||||
worker_id: string;
|
||||
name: string;
|
||||
description: string;
|
||||
skills: SkillScore[];
|
||||
cities: string[];
|
||||
regions: string[];
|
||||
availability: string[];
|
||||
experience_tags: string[];
|
||||
reliability_score: number;
|
||||
profile_completion: number;
|
||||
confidence: number;
|
||||
};
|
||||
|
||||
export type MatchBreakdown = {
|
||||
skill_score: number;
|
||||
region_score: number;
|
||||
time_score: number;
|
||||
experience_score: number;
|
||||
reliability_score: number;
|
||||
};
|
||||
|
||||
export type MatchResult = {
|
||||
match_id: string;
|
||||
source_type: "job_to_worker" | "worker_to_job";
|
||||
source_id: string;
|
||||
target_id: string;
|
||||
match_score: number;
|
||||
breakdown: MatchBreakdown;
|
||||
reasons: string[];
|
||||
};
|
||||
784
gig_poc_coding_agent_guide.md
Normal file
784
gig_poc_coding_agent_guide.md
Normal file
@@ -0,0 +1,784 @@
|
||||
# 灵活用工最小 POC:Coding Agent 指导文档
|
||||
|
||||
## 1. 文档目的
|
||||
|
||||
本指导文档用于直接驱动 coding agent 落地一个“灵活用工最小 POC 内核系统”。
|
||||
|
||||
本次实现目标不是完整平台,不是完整招聘系统,也不是完整裂变系统,而是优先打通最核心的底层能力闭环:
|
||||
|
||||
> **岗位理解 → 工人理解 → LightRAG 检索 → 匹配排序 → 推荐解释**
|
||||
|
||||
coding agent 必须严格围绕这个主线执行,避免在低优先级模块上消耗时间。
|
||||
|
||||
---
|
||||
|
||||
## 2. 项目目标定义
|
||||
|
||||
## 2.1 业务目标
|
||||
|
||||
构建一个最小可运行的 POC,用于验证:
|
||||
|
||||
1. 平台能否从自然语言中提取岗位需求。
|
||||
2. 平台能否从自然语言中提取工人技能画像。
|
||||
3. 平台能否利用 LightRAG 构建“岗位—技能—工人”知识关联。
|
||||
4. 平台能否完成岗位找人、人找岗位两类匹配。
|
||||
5. 平台能否为匹配结果给出可解释的推荐原因。
|
||||
|
||||
## 2.2 POC 成功标准
|
||||
|
||||
满足以下条件即可判定 POC 成功:
|
||||
|
||||
- 输入一段岗位描述,系统返回结构化 JobCard。
|
||||
- 输入一段工人描述,系统返回结构化 WorkerCard。
|
||||
- 岗位可召回 TopN 候选工人。
|
||||
- 工人可召回 TopN 候选岗位。
|
||||
- 每条推荐结果都附带推荐原因。
|
||||
- 支持一个简单的 Web 测试台进行演示。
|
||||
|
||||
---
|
||||
|
||||
## 3. 本次实现边界
|
||||
|
||||
## 3.1 本次必须实现
|
||||
|
||||
### 核心输入能力
|
||||
- 岗位自然语言输入
|
||||
- 工人自然语言输入
|
||||
- 样本数据导入
|
||||
|
||||
### 核心 AI 能力
|
||||
- 岗位结构化抽取
|
||||
- 工人技能画像抽取
|
||||
- LightRAG 数据入库
|
||||
- LightRAG 查询与召回
|
||||
- 匹配排序
|
||||
- 匹配解释
|
||||
|
||||
### 核心输出能力
|
||||
- JobCard 输出
|
||||
- WorkerCard 输出
|
||||
- 匹配结果输出
|
||||
- 推荐原因输出
|
||||
|
||||
### 基础交互能力
|
||||
- 简易 Web 控制台
|
||||
- API 文档
|
||||
- 健康检查
|
||||
- 日志输出
|
||||
|
||||
## 3.2 本次明确不做
|
||||
|
||||
以下功能全部延后,不进入当前迭代:
|
||||
|
||||
- 裂变传播引擎
|
||||
- 邀请奖励
|
||||
- 社交分享接入
|
||||
- 电子合同
|
||||
- 工时记录
|
||||
- 薪资结算
|
||||
- 支付对接
|
||||
- 短视频岗位展示
|
||||
- 多角色复杂权限
|
||||
- 百万并发优化
|
||||
- 金融级安全全套建设
|
||||
- 完整后台管理系统
|
||||
- 真实线上地图派单
|
||||
- 复杂调度系统
|
||||
|
||||
coding agent 必须避免擅自扩展到上述模块。
|
||||
|
||||
---
|
||||
|
||||
## 4. 一句话架构原则
|
||||
|
||||
> **先做一个“可解释的人岗匹配内核”,再考虑把它包装成完整平台。**
|
||||
|
||||
---
|
||||
|
||||
## 5. 技术栈要求
|
||||
|
||||
## 5.1 后端
|
||||
优先采用:
|
||||
- Python 3.11+
|
||||
- FastAPI
|
||||
- Pydantic
|
||||
- SQLAlchemy
|
||||
- Uvicorn
|
||||
|
||||
## 5.2 数据层
|
||||
- PostgreSQL
|
||||
- Redis(可选,POC 阶段用于缓存或不启用)
|
||||
- 本地文件或 MinIO(样本附件可选)
|
||||
- pgvector / Qdrant(二选一,优先 Qdrant 或 pgvector)
|
||||
|
||||
## 5.3 AI / RAG
|
||||
- LightRAG
|
||||
- 一个可调用的 LLM 接口
|
||||
- 一个 Embedding 接口
|
||||
|
||||
## 5.4 前端
|
||||
优先采用:
|
||||
- Next.js 或 React Vite
|
||||
|
||||
如果 coding agent 需要进一步压缩复杂度,可以直接:
|
||||
- 使用 React + Vite + 简单组件
|
||||
|
||||
## 5.5 部署方式
|
||||
POC 阶段统一使用:
|
||||
- Docker Compose
|
||||
|
||||
---
|
||||
|
||||
## 6. 项目目录结构要求
|
||||
|
||||
coding agent 必须按以下结构组织项目:
|
||||
|
||||
```text
|
||||
/gig-poc
|
||||
/apps
|
||||
/web
|
||||
/api
|
||||
/packages
|
||||
/shared-types
|
||||
/prompts
|
||||
/sample-data
|
||||
/infrastructure
|
||||
docker-compose.yml
|
||||
/sql
|
||||
/scripts
|
||||
/docs
|
||||
README.md
|
||||
API.md
|
||||
ARCHITECTURE.md
|
||||
```
|
||||
|
||||
### 6.1 apps/web
|
||||
包含:
|
||||
- 岗位输入测试页
|
||||
- 工人输入测试页
|
||||
- 匹配结果页
|
||||
- 系统状态页
|
||||
|
||||
### 6.2 apps/api
|
||||
包含:
|
||||
- FastAPI 主服务
|
||||
- 路由定义
|
||||
- service 层
|
||||
- repository 层
|
||||
- rag 适配层
|
||||
- 匹配引擎
|
||||
|
||||
### 6.3 packages/shared-types
|
||||
包含:
|
||||
- JobCard schema
|
||||
- WorkerCard schema
|
||||
- MatchResult schema
|
||||
- API request/response schema
|
||||
|
||||
### 6.4 packages/prompts
|
||||
包含:
|
||||
- job_extract prompt
|
||||
- worker_extract prompt
|
||||
- explain_match prompt
|
||||
|
||||
### 6.5 packages/sample-data
|
||||
包含:
|
||||
- jobs.json
|
||||
- workers.json
|
||||
- skills.json
|
||||
- regions.json
|
||||
- categories.json
|
||||
|
||||
---
|
||||
|
||||
## 7. 统一数据结构定义
|
||||
|
||||
## 7.1 JobCard
|
||||
|
||||
coding agent 必须实现以下结构:
|
||||
|
||||
```json
|
||||
{
|
||||
"job_id": "job_001",
|
||||
"title": "会展签到协助",
|
||||
"category": "活动执行",
|
||||
"description": "明天下午南山会展中心需要2个签到协助,5小时,150/人,女生优先",
|
||||
"skills": ["签到", "引导", "登记"],
|
||||
"city": "深圳",
|
||||
"region": "南山",
|
||||
"location_detail": "南山会展中心",
|
||||
"start_time": "2026-03-31T13:00:00+08:00",
|
||||
"duration_hours": 5,
|
||||
"headcount": 2,
|
||||
"salary": {
|
||||
"type": "daily",
|
||||
"amount": 150,
|
||||
"currency": "CNY"
|
||||
},
|
||||
"work_mode": "排班制",
|
||||
"tags": ["女生优先", "有会展经验优先"],
|
||||
"confidence": 0.89
|
||||
}
|
||||
```
|
||||
|
||||
## 7.2 WorkerCard
|
||||
|
||||
```json
|
||||
{
|
||||
"worker_id": "worker_001",
|
||||
"name": "张三",
|
||||
"description": "我做过商场促销和活动签到,也能做登记和引导,周末都能接,福田南山都方便。",
|
||||
"skills": [
|
||||
{"name": "促销", "score": 0.82},
|
||||
{"name": "活动签到", "score": 0.76},
|
||||
{"name": "登记", "score": 0.68},
|
||||
{"name": "引导", "score": 0.70}
|
||||
],
|
||||
"cities": ["深圳"],
|
||||
"regions": ["福田", "南山"],
|
||||
"availability": ["weekend"],
|
||||
"experience_tags": ["商场", "会展", "活动执行"],
|
||||
"reliability_score": 0.76,
|
||||
"profile_completion": 0.64,
|
||||
"confidence": 0.86
|
||||
}
|
||||
```
|
||||
|
||||
## 7.3 MatchResult
|
||||
|
||||
```json
|
||||
{
|
||||
"match_id": "match_001",
|
||||
"source_type": "job_to_worker",
|
||||
"source_id": "job_001",
|
||||
"target_id": "worker_003",
|
||||
"match_score": 0.84,
|
||||
"breakdown": {
|
||||
"skill_score": 0.91,
|
||||
"region_score": 1.0,
|
||||
"time_score": 0.8,
|
||||
"experience_score": 0.75,
|
||||
"reliability_score": 0.72
|
||||
},
|
||||
"reasons": [
|
||||
"具备签到、引导、登记相关技能",
|
||||
"服务区域包含南山,与岗位地点一致",
|
||||
"周末可接单,与岗位时间匹配",
|
||||
"有活动执行经验,与岗位类别接近"
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 8. LightRAG 数据建模要求
|
||||
|
||||
## 8.1 最小实体
|
||||
coding agent 至少需要支持以下实体:
|
||||
|
||||
- Job
|
||||
- Worker
|
||||
- Skill
|
||||
- Region
|
||||
- Category
|
||||
- ExperienceTag
|
||||
|
||||
## 8.2 最小关系
|
||||
至少支持以下关系:
|
||||
|
||||
- Job REQUIRES_SKILL Skill
|
||||
- Job LOCATED_IN Region
|
||||
- Job BELONGS_TO Category
|
||||
- Job HAS_TAG ExperienceTag
|
||||
- Worker HAS_SKILL Skill
|
||||
- Worker AVAILABLE_IN Region
|
||||
- Worker HAS_EXPERIENCE ExperienceTag
|
||||
- Skill SIMILAR_TO Skill
|
||||
|
||||
## 8.3 知识入库原则
|
||||
- 所有岗位文本入库前必须先转成结构化 JobCard
|
||||
- 所有工人文本入库前必须先转成结构化 WorkerCard
|
||||
- 技能表、区域表、岗位类别表应先作为基础词表导入
|
||||
|
||||
---
|
||||
|
||||
## 9. 抽取服务要求
|
||||
|
||||
## 9.1 岗位抽取服务
|
||||
### 输入
|
||||
自然语言岗位描述文本
|
||||
|
||||
### 输出
|
||||
符合 JobCard schema 的 JSON
|
||||
|
||||
### 实现要求
|
||||
- 优先使用 LLM 进行结构化抽取
|
||||
- 使用固定 JSON Schema
|
||||
- 必须包含字段校验
|
||||
- 抽取失败时返回缺失字段和错误信息
|
||||
- 必须保留原始 description
|
||||
|
||||
### 字段抽取重点
|
||||
- 岗位标题
|
||||
- 技能要求
|
||||
- 城市 / 区域 / 地点
|
||||
- 时间
|
||||
- 时长
|
||||
- 人数
|
||||
- 薪资
|
||||
- 标签偏好
|
||||
|
||||
## 9.2 工人抽取服务
|
||||
### 输入
|
||||
自然语言工人描述文本
|
||||
|
||||
### 输出
|
||||
符合 WorkerCard schema 的 JSON
|
||||
|
||||
### 实现要求
|
||||
- 优先使用 LLM 进行结构化抽取
|
||||
- 使用固定 JSON Schema
|
||||
- 必须输出技能项及分值
|
||||
- 必须输出区域和可服务时间
|
||||
- 必须保留原始 description
|
||||
|
||||
### 字段抽取重点
|
||||
- 技能
|
||||
- 经验标签
|
||||
- 服务区域
|
||||
- 可服务时间
|
||||
- 基础可信度字段
|
||||
|
||||
## 9.3 Prompt 设计要求
|
||||
coding agent 必须将 prompt 单独文件化,禁止把 prompt 硬编码到业务逻辑中。
|
||||
|
||||
要求至少拆成:
|
||||
- prompts/job_extract.md
|
||||
- prompts/worker_extract.md
|
||||
- prompts/match_explain.md
|
||||
|
||||
---
|
||||
|
||||
## 10. 匹配引擎要求
|
||||
|
||||
## 10.1 匹配模式
|
||||
必须支持两种:
|
||||
- 岗位找工人
|
||||
- 工人找岗位
|
||||
|
||||
## 10.2 匹配流程
|
||||
coding agent 必须按两阶段实现:
|
||||
|
||||
### 第一阶段:召回 Recall
|
||||
召回候选集合时,综合以下条件:
|
||||
- 技能关键词命中
|
||||
- 技能近义词/邻接技能命中
|
||||
- 地理区域命中
|
||||
- 类别/经验标签命中
|
||||
- 时间标签粗匹配
|
||||
|
||||
### 第二阶段:排序 Rank
|
||||
按以下公式实现第一版排序:
|
||||
|
||||
```text
|
||||
MatchScore =
|
||||
0.35 * SkillScore
|
||||
+ 0.20 * RegionScore
|
||||
+ 0.15 * TimeScore
|
||||
+ 0.15 * ExperienceScore
|
||||
+ 0.15 * ReliabilityScore
|
||||
```
|
||||
|
||||
## 10.3 评分实现约束
|
||||
### SkillScore
|
||||
- 先算技能命中率
|
||||
- 必备技能完全缺失时强制降权
|
||||
- 近义技能通过图谱扩展加分
|
||||
|
||||
### RegionScore
|
||||
- 同区 = 1.0
|
||||
- 同城不同区 = 0.7
|
||||
- 跨城 = 0.2
|
||||
|
||||
### TimeScore
|
||||
POC 阶段允许简化为标签匹配:
|
||||
- weekend
|
||||
- weekday_am
|
||||
- weekday_pm
|
||||
- anytime
|
||||
|
||||
### ExperienceScore
|
||||
按 experience_tags 与岗位 category/tags 的相似度计算
|
||||
|
||||
### ReliabilityScore
|
||||
POC 可直接读取静态字段或默认值
|
||||
|
||||
---
|
||||
|
||||
## 11. 推荐解释要求
|
||||
|
||||
coding agent 必须为每条匹配结果生成 reasons 数组。
|
||||
|
||||
## 11.1 reasons 生成原则
|
||||
每条推荐至少包含 3 条原因,优先覆盖:
|
||||
- 技能原因
|
||||
- 区域原因
|
||||
- 时间原因
|
||||
- 经验原因
|
||||
|
||||
## 11.2 示例
|
||||
```json
|
||||
[
|
||||
"具备签到、引导相关技能",
|
||||
"服务区域覆盖南山,与岗位地点一致",
|
||||
"周末可接单,与岗位时间要求匹配"
|
||||
]
|
||||
```
|
||||
|
||||
## 11.3 解释生成策略
|
||||
优先使用规则模板生成,必要时可用 LLM 美化文案。
|
||||
|
||||
POC 阶段原则:
|
||||
> 先保证解释真实可靠,再追求语言自然。
|
||||
|
||||
---
|
||||
|
||||
## 12. API 设计要求
|
||||
|
||||
coding agent 必须至少实现以下 API:
|
||||
|
||||
## 12.1 系统接口
|
||||
### `GET /health`
|
||||
返回服务状态、数据库状态、RAG 状态
|
||||
|
||||
## 12.2 抽取接口
|
||||
### `POST /poc/extract/job`
|
||||
输入岗位自然语言,返回 JobCard
|
||||
|
||||
### `POST /poc/extract/worker`
|
||||
输入工人自然语言,返回 WorkerCard
|
||||
|
||||
## 12.3 入库接口
|
||||
### `POST /poc/ingest/job`
|
||||
传入 JobCard,写入数据库 + LightRAG
|
||||
|
||||
### `POST /poc/ingest/worker`
|
||||
传入 WorkerCard,写入数据库 + LightRAG
|
||||
|
||||
### `POST /poc/ingest/bootstrap`
|
||||
导入样本数据与词表
|
||||
|
||||
## 12.4 匹配接口
|
||||
### `POST /poc/match/workers`
|
||||
输入 JobCard 或 job_id,返回 TopN Worker MatchResult
|
||||
|
||||
### `POST /poc/match/jobs`
|
||||
输入 WorkerCard 或 worker_id,返回 TopN Job MatchResult
|
||||
|
||||
### `GET /poc/match/explain/{match_id}`
|
||||
返回指定 match 的详细解释
|
||||
|
||||
## 12.5 查询接口
|
||||
### `GET /poc/jobs`
|
||||
### `GET /poc/workers`
|
||||
### `GET /poc/jobs/{job_id}`
|
||||
### `GET /poc/workers/{worker_id}`
|
||||
|
||||
---
|
||||
|
||||
## 13. 数据库表要求
|
||||
|
||||
coding agent 至少需要实现以下表:
|
||||
|
||||
## 13.1 jobs
|
||||
- id
|
||||
- title
|
||||
- category
|
||||
- description
|
||||
- city
|
||||
- region
|
||||
- location_detail
|
||||
- start_time
|
||||
- duration_hours
|
||||
- headcount
|
||||
- salary_type
|
||||
- salary_amount
|
||||
- tags_json
|
||||
- confidence
|
||||
- created_at
|
||||
|
||||
## 13.2 job_skills
|
||||
- job_id
|
||||
- skill_name
|
||||
- weight
|
||||
- is_required
|
||||
|
||||
## 13.3 workers
|
||||
- id
|
||||
- name
|
||||
- description
|
||||
- cities_json
|
||||
- regions_json
|
||||
- availability_json
|
||||
- experience_tags_json
|
||||
- reliability_score
|
||||
- profile_completion
|
||||
- confidence
|
||||
- created_at
|
||||
|
||||
## 13.4 worker_skills
|
||||
- worker_id
|
||||
- skill_name
|
||||
- score
|
||||
|
||||
## 13.5 matches
|
||||
- id
|
||||
- source_type
|
||||
- source_id
|
||||
- target_id
|
||||
- match_score
|
||||
- breakdown_json
|
||||
- reasons_json
|
||||
- created_at
|
||||
|
||||
---
|
||||
|
||||
## 14. Web 控制台要求
|
||||
|
||||
coding agent 必须实现一个最小可演示前端。
|
||||
|
||||
## 14.1 页面 A:岗位测试页
|
||||
### 功能
|
||||
- 输入岗位描述
|
||||
- 点击“抽取岗位”
|
||||
- 显示 JobCard
|
||||
- 点击“入库并匹配工人”
|
||||
- 显示匹配结果列表
|
||||
|
||||
## 14.2 页面 B:工人测试页
|
||||
### 功能
|
||||
- 输入工人描述
|
||||
- 点击“抽取工人画像”
|
||||
- 显示 WorkerCard
|
||||
- 点击“入库并匹配岗位”
|
||||
- 显示匹配结果列表
|
||||
|
||||
## 14.3 页面 C:数据浏览页
|
||||
### 功能
|
||||
- 浏览已导入岗位
|
||||
- 浏览已导入工人
|
||||
- 查看详情
|
||||
|
||||
## 14.4 页面 D:系统状态页
|
||||
### 功能
|
||||
- 显示服务健康状态
|
||||
- 显示数据库连接状态
|
||||
- 显示 RAG 初始化状态
|
||||
|
||||
---
|
||||
|
||||
## 15. 样本数据要求
|
||||
|
||||
coding agent 必须先提供一批演示数据,不能等待真实数据。
|
||||
|
||||
## 15.1 样本规模
|
||||
- jobs: 100 条
|
||||
- workers: 300 条
|
||||
- skills: 100 条
|
||||
- categories: 30 条
|
||||
- regions: 20 条
|
||||
|
||||
## 15.2 样本质量要求
|
||||
- 覆盖多个岗位类别:促销、地推、导购、会展、分拣、客服、安装、配送等
|
||||
- 覆盖多种技能组合
|
||||
- 覆盖多个时间标签
|
||||
- 覆盖多个区域标签
|
||||
|
||||
## 15.3 样本格式要求
|
||||
统一存放到:
|
||||
- packages/sample-data/jobs.json
|
||||
- packages/sample-data/workers.json
|
||||
- packages/sample-data/skills.json
|
||||
- packages/sample-data/categories.json
|
||||
- packages/sample-data/regions.json
|
||||
|
||||
---
|
||||
|
||||
## 16. 工程规范要求
|
||||
|
||||
## 16.1 代码规范
|
||||
- 后端采用分层结构:router / service / repository / domain
|
||||
- 所有 schema 使用 Pydantic
|
||||
- 所有配置通过环境变量管理
|
||||
- 不允许把密钥写死在代码中
|
||||
|
||||
## 16.2 日志规范
|
||||
必须记录:
|
||||
- 抽取请求
|
||||
- 入库请求
|
||||
- 匹配请求
|
||||
- RAG 查询请求
|
||||
- 错误堆栈
|
||||
|
||||
## 16.3 错误处理
|
||||
必须对以下情况返回明确错误:
|
||||
- LLM 抽取失败
|
||||
- 结构化字段校验失败
|
||||
- LightRAG 不可用
|
||||
- 数据库连接失败
|
||||
- 样本导入失败
|
||||
|
||||
## 16.4 可维护性要求
|
||||
- Prompt 单独文件化
|
||||
- 打分权重配置化
|
||||
- 词表配置化
|
||||
- 召回 TopN 配置化
|
||||
|
||||
---
|
||||
|
||||
## 17. 推荐实现顺序
|
||||
|
||||
coding agent 必须按以下顺序推进,避免无序开发:
|
||||
|
||||
## Step 1:初始化工程骨架
|
||||
- 创建目录结构
|
||||
- 创建 Docker Compose
|
||||
- 创建 FastAPI 项目
|
||||
- 创建前端项目
|
||||
|
||||
## Step 2:定义 schema 与 shared types
|
||||
- JobCard
|
||||
- WorkerCard
|
||||
- MatchResult
|
||||
- API Request/Response
|
||||
|
||||
## Step 3:建立数据库与样本导入
|
||||
- 建表
|
||||
- 建初始化脚本
|
||||
- 导入 sample data
|
||||
|
||||
## Step 4:实现抽取服务
|
||||
- job_extract prompt
|
||||
- worker_extract prompt
|
||||
- schema 校验
|
||||
- 抽取接口
|
||||
|
||||
## Step 5:实现 LightRAG 适配层
|
||||
- ingest job
|
||||
- ingest worker
|
||||
- query candidates
|
||||
- bootstrap glossary
|
||||
|
||||
## Step 6:实现匹配引擎
|
||||
- recall
|
||||
- rank
|
||||
- explain
|
||||
- match 接口
|
||||
|
||||
## Step 7:实现 Web 控制台
|
||||
- 岗位测试页
|
||||
- 工人测试页
|
||||
- 结果展示页
|
||||
- 系统状态页
|
||||
|
||||
## Step 8:补充文档与脚本
|
||||
- README
|
||||
- API 文档
|
||||
- 启动脚本
|
||||
- 演示说明
|
||||
|
||||
---
|
||||
|
||||
## 18. 交付物要求
|
||||
|
||||
coding agent 最终必须产出以下内容:
|
||||
|
||||
### 代码交付
|
||||
- 完整可运行源码
|
||||
- Docker Compose 启动方案
|
||||
- 样本数据
|
||||
- 初始化脚本
|
||||
|
||||
### 文档交付
|
||||
- README.md
|
||||
- API.md
|
||||
- ARCHITECTURE.md
|
||||
- DEMO.md
|
||||
|
||||
### 演示能力
|
||||
- 一键启动
|
||||
- 导入样本数据
|
||||
- 前端输入岗位文本
|
||||
- 前端输入工人文本
|
||||
- 查看匹配结果与推荐原因
|
||||
|
||||
---
|
||||
|
||||
## 19. README 必须包含的内容
|
||||
|
||||
README 至少要写清楚:
|
||||
|
||||
1. 项目目标
|
||||
2. 技术栈
|
||||
3. 目录结构
|
||||
4. 环境变量说明
|
||||
5. 启动方式
|
||||
6. 样本导入方式
|
||||
7. API 地址
|
||||
8. 前端访问地址
|
||||
9. 演示路径
|
||||
10. 已实现范围与未实现范围
|
||||
|
||||
---
|
||||
|
||||
## 20. coding agent 禁止事项
|
||||
|
||||
coding agent 禁止做以下事情:
|
||||
|
||||
1. 禁止擅自扩展到裂变、支付、合同等低优先级模块。
|
||||
2. 禁止为了“完整性”过度设计微服务。
|
||||
3. 禁止引入过多基础设施,导致 POC 难以启动。
|
||||
4. 禁止跳过样本数据准备。
|
||||
5. 禁止跳过推荐解释能力。
|
||||
6. 禁止把 prompt、权重、技能词表硬编码到业务逻辑中。
|
||||
7. 禁止先做 UI 花活而忽视底层检索和匹配内核。
|
||||
|
||||
---
|
||||
|
||||
## 21. coding agent 任务说明(可直接复制执行)
|
||||
|
||||
你现在需要实现一个“灵活用工最小 POC 内核系统”。
|
||||
|
||||
目标不是做完整平台,而是打通以下闭环:
|
||||
- 从自然语言中抽取岗位需求
|
||||
- 从自然语言中抽取工人技能画像
|
||||
- 将岗位、工人、技能词表写入 LightRAG
|
||||
- 支持岗位找人、人找岗位两类检索
|
||||
- 对召回结果进行排序打分
|
||||
- 对每个推荐结果生成推荐原因
|
||||
- 提供一个简单 Web 控制台完成演示
|
||||
|
||||
请严格遵守以下原则:
|
||||
- 优先完成底层能力,不做裂变、支付、合同等外围模块
|
||||
- 使用 FastAPI + PostgreSQL + LightRAG + React/Vite
|
||||
- 使用 Docker Compose 保证一键启动
|
||||
- 使用统一的 JobCard / WorkerCard / MatchResult schema
|
||||
- 所有 prompt、权重、词表必须配置化
|
||||
- 必须提供样本数据和 bootstrap 导入脚本
|
||||
- 必须输出 README、API 文档、架构说明
|
||||
|
||||
交付完成后,系统应支持以下演示:
|
||||
1. 输入一段岗位描述,抽取出结构化岗位。
|
||||
2. 输入一段工人描述,抽取出结构化画像。
|
||||
3. 岗位匹配出 TopN 工人,并显示推荐原因。
|
||||
4. 工人匹配出 TopN 岗位,并显示推荐原因。
|
||||
5. 前端页面可完整演示这条流程。
|
||||
|
||||
---
|
||||
|
||||
## 22. 最终执行结论
|
||||
|
||||
> 当前阶段唯一正确的开发顺序,是先把“岗位—工人—技能—检索—匹配—解释”这条链打透。
|
||||
|
||||
只要这条底层链路成立,后续再叠加裂变传播、电子合同、支付结算、运营后台,才有意义。
|
||||
|
||||
Reference in New Issue
Block a user