From c7788fdd92424630904d1116d07ec5ea02e4502d Mon Sep 17 00:00:00 2001 From: Daniel Date: Mon, 30 Mar 2026 20:49:40 +0800 Subject: [PATCH] feat: add new folder --- gig-poc/.env.example | 8 + gig-poc/.gitignore | 6 + gig-poc/README.md | 3 + gig-poc/apps/api/Dockerfile | 18 + gig-poc/apps/api/app/__init__.py | 1 + gig-poc/apps/api/app/api/routes.py | 133 + gig-poc/apps/api/app/core/config.py | 51 + gig-poc/apps/api/app/core/logging.py | 26 + gig-poc/apps/api/app/db/base.py | 5 + gig-poc/apps/api/app/db/session.py | 19 + gig-poc/apps/api/app/domain/models.py | 86 + gig-poc/apps/api/app/domain/schemas.py | 171 + gig-poc/apps/api/app/main.py | 36 + .../api/app/repositories/job_repository.py | 58 + .../api/app/repositories/match_repository.py | 49 + .../api/app/repositories/worker_repository.py | 49 + gig-poc/apps/api/app/services/card_mapper.py | 50 + .../api/app/services/extraction_service.py | 209 + .../apps/api/app/services/ingest_service.py | 52 + gig-poc/apps/api/app/services/llm_client.py | 34 + .../apps/api/app/services/matching_service.py | 178 + .../api/app/services/rag/lightrag_adapter.py | 143 + gig-poc/apps/api/app/utils/ids.py | 6 + gig-poc/apps/api/app/utils/prompts.py | 5 + gig-poc/apps/api/pyproject.toml | 21 + gig-poc/apps/web/Dockerfile | 20 + gig-poc/apps/web/index.html | 12 + gig-poc/apps/web/package.json | 23 + gig-poc/apps/web/src/App.tsx | 41 + gig-poc/apps/web/src/api/client.ts | 33 + gig-poc/apps/web/src/components/JsonPanel.tsx | 15 + gig-poc/apps/web/src/components/MatchList.tsx | 34 + gig-poc/apps/web/src/main.tsx | 13 + .../apps/web/src/pages/DataBrowserPage.tsx | 28 + gig-poc/apps/web/src/pages/JobPage.tsx | 53 + gig-poc/apps/web/src/pages/StatusPage.tsx | 43 + gig-poc/apps/web/src/pages/WorkerPage.tsx | 53 + gig-poc/apps/web/src/styles/global.css | 176 + gig-poc/apps/web/tsconfig.app.json | 20 + gig-poc/apps/web/tsconfig.json | 6 + gig-poc/apps/web/vite.config.ts | 10 + gig-poc/docs/API.md | 75 + gig-poc/docs/ARCHITECTURE.md | 42 + gig-poc/docs/DEMO.md | 27 + gig-poc/docs/README.md | 78 + .../infrastructure/docker-compose.prod.yml | 52 + gig-poc/infrastructure/docker-compose.yml | 59 + gig-poc/infrastructure/nginx/default.conf | 25 + gig-poc/infrastructure/scripts/dev-up.sh | 16 + gig-poc/infrastructure/scripts/down.sh | 8 + gig-poc/infrastructure/scripts/prod-up.sh | 9 + gig-poc/infrastructure/sql/01-init.sql | 1 + gig-poc/packages/prompts/job_extract.md | 28 + gig-poc/packages/prompts/match_explain.md | 14 + gig-poc/packages/prompts/worker_extract.md | 23 + gig-poc/packages/sample-data/categories.json | 32 + .../sample-data/generate_sample_data.py | 183 + gig-poc/packages/sample-data/jobs.json | 2999 ++++ gig-poc/packages/sample-data/regions.json | 82 + .../packages/sample-data/skill_relations.json | 20 + gig-poc/packages/sample-data/skills.json | 102 + gig-poc/packages/sample-data/workers.json | 13194 ++++++++++++++++ gig-poc/packages/shared-types/src/index.ts | 60 + gig_poc_coding_agent_guide.md | 784 + 64 files changed, 19910 insertions(+) create mode 100644 gig-poc/.env.example create mode 100644 gig-poc/.gitignore create mode 100644 gig-poc/README.md create mode 100644 gig-poc/apps/api/Dockerfile create mode 100644 gig-poc/apps/api/app/__init__.py create mode 100644 gig-poc/apps/api/app/api/routes.py create mode 100644 gig-poc/apps/api/app/core/config.py create mode 100644 gig-poc/apps/api/app/core/logging.py create mode 100644 gig-poc/apps/api/app/db/base.py create mode 100644 gig-poc/apps/api/app/db/session.py create mode 100644 gig-poc/apps/api/app/domain/models.py create mode 100644 gig-poc/apps/api/app/domain/schemas.py create mode 100644 gig-poc/apps/api/app/main.py create mode 100644 gig-poc/apps/api/app/repositories/job_repository.py create mode 100644 gig-poc/apps/api/app/repositories/match_repository.py create mode 100644 gig-poc/apps/api/app/repositories/worker_repository.py create mode 100644 gig-poc/apps/api/app/services/card_mapper.py create mode 100644 gig-poc/apps/api/app/services/extraction_service.py create mode 100644 gig-poc/apps/api/app/services/ingest_service.py create mode 100644 gig-poc/apps/api/app/services/llm_client.py create mode 100644 gig-poc/apps/api/app/services/matching_service.py create mode 100644 gig-poc/apps/api/app/services/rag/lightrag_adapter.py create mode 100644 gig-poc/apps/api/app/utils/ids.py create mode 100644 gig-poc/apps/api/app/utils/prompts.py create mode 100644 gig-poc/apps/api/pyproject.toml create mode 100644 gig-poc/apps/web/Dockerfile create mode 100644 gig-poc/apps/web/index.html create mode 100644 gig-poc/apps/web/package.json create mode 100644 gig-poc/apps/web/src/App.tsx create mode 100644 gig-poc/apps/web/src/api/client.ts create mode 100644 gig-poc/apps/web/src/components/JsonPanel.tsx create mode 100644 gig-poc/apps/web/src/components/MatchList.tsx create mode 100644 gig-poc/apps/web/src/main.tsx create mode 100644 gig-poc/apps/web/src/pages/DataBrowserPage.tsx create mode 100644 gig-poc/apps/web/src/pages/JobPage.tsx create mode 100644 gig-poc/apps/web/src/pages/StatusPage.tsx create mode 100644 gig-poc/apps/web/src/pages/WorkerPage.tsx create mode 100644 gig-poc/apps/web/src/styles/global.css create mode 100644 gig-poc/apps/web/tsconfig.app.json create mode 100644 gig-poc/apps/web/tsconfig.json create mode 100644 gig-poc/apps/web/vite.config.ts create mode 100644 gig-poc/docs/API.md create mode 100644 gig-poc/docs/ARCHITECTURE.md create mode 100644 gig-poc/docs/DEMO.md create mode 100644 gig-poc/docs/README.md create mode 100644 gig-poc/infrastructure/docker-compose.prod.yml create mode 100644 gig-poc/infrastructure/docker-compose.yml create mode 100644 gig-poc/infrastructure/nginx/default.conf create mode 100755 gig-poc/infrastructure/scripts/dev-up.sh create mode 100755 gig-poc/infrastructure/scripts/down.sh create mode 100755 gig-poc/infrastructure/scripts/prod-up.sh create mode 100644 gig-poc/infrastructure/sql/01-init.sql create mode 100644 gig-poc/packages/prompts/job_extract.md create mode 100644 gig-poc/packages/prompts/match_explain.md create mode 100644 gig-poc/packages/prompts/worker_extract.md create mode 100644 gig-poc/packages/sample-data/categories.json create mode 100644 gig-poc/packages/sample-data/generate_sample_data.py create mode 100644 gig-poc/packages/sample-data/jobs.json create mode 100644 gig-poc/packages/sample-data/regions.json create mode 100644 gig-poc/packages/sample-data/skill_relations.json create mode 100644 gig-poc/packages/sample-data/skills.json create mode 100644 gig-poc/packages/sample-data/workers.json create mode 100644 gig-poc/packages/shared-types/src/index.ts create mode 100644 gig_poc_coding_agent_guide.md diff --git a/gig-poc/.env.example b/gig-poc/.env.example new file mode 100644 index 0000000..4fa3b12 --- /dev/null +++ b/gig-poc/.env.example @@ -0,0 +1,8 @@ +APP_ENV=development +LOG_LEVEL=INFO +DATABASE_URL=postgresql+psycopg://gig:gig@postgres:5432/gig_poc +QDRANT_URL=http://qdrant:6333 +LLM_ENABLED=false +LLM_BASE_URL= +LLM_API_KEY= +LLM_MODEL=gpt-5.4 diff --git a/gig-poc/.gitignore b/gig-poc/.gitignore new file mode 100644 index 0000000..2d4288d --- /dev/null +++ b/gig-poc/.gitignore @@ -0,0 +1,6 @@ +__pycache__/ +.DS_Store +.env +node_modules/ +dist/ +*.pyc diff --git a/gig-poc/README.md b/gig-poc/README.md new file mode 100644 index 0000000..4046b64 --- /dev/null +++ b/gig-poc/README.md @@ -0,0 +1,3 @@ +# Gig POC + +项目说明见 [docs/README.md](./docs/README.md)。 diff --git a/gig-poc/apps/api/Dockerfile b/gig-poc/apps/api/Dockerfile new file mode 100644 index 0000000..1fe346e --- /dev/null +++ b/gig-poc/apps/api/Dockerfile @@ -0,0 +1,18 @@ +FROM docker.m.daocloud.io/library/python:3.11-slim + +ENV PYTHONDONTWRITEBYTECODE=1 \ + PYTHONUNBUFFERED=1 \ + PIP_INDEX_URL=https://pypi.tuna.tsinghua.edu.cn/simple + +WORKDIR /workspace + +COPY apps/api /workspace/apps/api +COPY packages /workspace/packages + +RUN pip install --no-cache-dir /workspace/apps/api + +WORKDIR /workspace/apps/api + +ENV PYTHONPATH=/workspace/apps/api + +CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"] diff --git a/gig-poc/apps/api/app/__init__.py b/gig-poc/apps/api/app/__init__.py new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/gig-poc/apps/api/app/__init__.py @@ -0,0 +1 @@ + diff --git a/gig-poc/apps/api/app/api/routes.py b/gig-poc/apps/api/app/api/routes.py new file mode 100644 index 0000000..ac801ce --- /dev/null +++ b/gig-poc/apps/api/app/api/routes.py @@ -0,0 +1,133 @@ +from datetime import datetime + +from fastapi import APIRouter, Depends, HTTPException +from sqlalchemy import text +from sqlalchemy.orm import Session + +from app.core.config import get_settings +from app.db.session import get_db +from app.domain.schemas import ( + ExplainResponse, + ExtractResponse, + ExtractTextRequest, + HealthStatus, + IngestJobRequest, + IngestWorkerRequest, + JobCard, + ListResponse, + MatchJobsRequest, + MatchResponse, + MatchWorkersRequest, + WorkerCard, +) +from app.repositories.job_repository import JobRepository +from app.repositories.worker_repository import WorkerRepository +from app.services.card_mapper import job_to_card, worker_to_card +from app.services.extraction_service import ExtractionService +from app.services.ingest_service import IngestService +from app.services.matching_service import MatchingService +from app.services.rag.lightrag_adapter import LightRAGAdapter + + +router = APIRouter() + + +@router.get("/health", response_model=HealthStatus) +def health(db: Session = Depends(get_db)) -> HealthStatus: + settings = get_settings() + db_status = "ok" + rag_status = "ok" + try: + db.execute(text("SELECT 1")) + except Exception: + db_status = "error" + try: + rag_status = LightRAGAdapter(settings).health() + except Exception: + rag_status = "error" + return HealthStatus(service="ok", database=db_status, rag=rag_status, timestamp=datetime.now().astimezone()) + + +@router.post("/poc/extract/job", response_model=ExtractResponse) +def extract_job(payload: ExtractTextRequest) -> ExtractResponse: + return ExtractionService().extract_job(payload.text) + + +@router.post("/poc/extract/worker", response_model=ExtractResponse) +def extract_worker(payload: ExtractTextRequest) -> ExtractResponse: + return ExtractionService().extract_worker(payload.text) + + +@router.post("/poc/ingest/job", response_model=JobCard) +def ingest_job(payload: IngestJobRequest, db: Session = Depends(get_db)) -> JobCard: + return IngestService(db).ingest_job(payload.job) + + +@router.post("/poc/ingest/worker", response_model=WorkerCard) +def ingest_worker(payload: IngestWorkerRequest, db: Session = Depends(get_db)) -> WorkerCard: + return IngestService(db).ingest_worker(payload.worker) + + +@router.post("/poc/ingest/bootstrap") +def bootstrap(db: Session = Depends(get_db)): + return IngestService(db).bootstrap() + + +@router.post("/poc/match/workers", response_model=MatchResponse) +def match_workers(payload: MatchWorkersRequest, db: Session = Depends(get_db)) -> MatchResponse: + service = MatchingService(db) + source = payload.job + if source is None and payload.job_id: + job = JobRepository(db).get(payload.job_id) + source = job_to_card(job) if job else None + if source is None: + raise HTTPException(status_code=404, detail="岗位不存在") + return MatchResponse(items=service.match_workers(source, payload.top_n)) + + +@router.post("/poc/match/jobs", response_model=MatchResponse) +def match_jobs(payload: MatchJobsRequest, db: Session = Depends(get_db)) -> MatchResponse: + service = MatchingService(db) + source = payload.worker + if source is None and payload.worker_id: + worker = WorkerRepository(db).get(payload.worker_id) + source = worker_to_card(worker) if worker else None + if source is None: + raise HTTPException(status_code=404, detail="工人不存在") + return MatchResponse(items=service.match_jobs(source, payload.top_n)) + + +@router.get("/poc/match/explain/{match_id}", response_model=ExplainResponse) +def explain_match(match_id: str, db: Session = Depends(get_db)) -> ExplainResponse: + match = MatchingService(db).explain(match_id) + if match is None: + raise HTTPException(status_code=404, detail="匹配记录不存在") + return ExplainResponse(match=match) + + +@router.get("/poc/jobs", response_model=ListResponse) +def list_jobs(db: Session = Depends(get_db)) -> ListResponse: + items = [job_to_card(item).model_dump(mode="json") for item in JobRepository(db).list()] + return ListResponse(items=items, total=len(items)) + + +@router.get("/poc/workers", response_model=ListResponse) +def list_workers(db: Session = Depends(get_db)) -> ListResponse: + items = [worker_to_card(item).model_dump(mode="json") for item in WorkerRepository(db).list()] + return ListResponse(items=items, total=len(items)) + + +@router.get("/poc/jobs/{job_id}", response_model=JobCard) +def get_job(job_id: str, db: Session = Depends(get_db)) -> JobCard: + item = JobRepository(db).get(job_id) + if item is None: + raise HTTPException(status_code=404, detail="岗位不存在") + return job_to_card(item) + + +@router.get("/poc/workers/{worker_id}", response_model=WorkerCard) +def get_worker(worker_id: str, db: Session = Depends(get_db)) -> WorkerCard: + item = WorkerRepository(db).get(worker_id) + if item is None: + raise HTTPException(status_code=404, detail="工人不存在") + return worker_to_card(item) diff --git a/gig-poc/apps/api/app/core/config.py b/gig-poc/apps/api/app/core/config.py new file mode 100644 index 0000000..13b0b48 --- /dev/null +++ b/gig-poc/apps/api/app/core/config.py @@ -0,0 +1,51 @@ +from functools import lru_cache +from pathlib import Path + +from pydantic import Field +from pydantic_settings import BaseSettings, SettingsConfigDict + + +BASE_DIR = Path(__file__).resolve().parents[2] +ROOT_DIR = BASE_DIR.parents[1] + + +class Settings(BaseSettings): + model_config = SettingsConfigDict(env_file=".env", env_file_encoding="utf-8", extra="ignore") + + app_name: str = "Gig POC API" + app_env: str = "development" + app_host: str = "0.0.0.0" + app_port: int = 8000 + log_level: str = "INFO" + + database_url: str = "postgresql+psycopg://gig:gig@postgres:5432/gig_poc" + qdrant_url: str = "http://qdrant:6333" + qdrant_collection: str = "gig_poc_entities" + vector_size: int = 64 + + llm_enabled: bool = False + llm_base_url: str | None = None + llm_api_key: str | None = None + llm_model: str = "gpt-5.4" + embedding_enabled: bool = False + embedding_model: str = "text-embedding-3-small" + + bootstrap_jobs: int = 100 + bootstrap_workers: int = 300 + default_recall_top_k: int = 30 + default_match_top_n: int = 10 + + prompt_dir: Path = Field(default=ROOT_DIR / "packages" / "prompts") + sample_data_dir: Path = Field(default=ROOT_DIR / "packages" / "sample-data") + shared_types_dir: Path = Field(default=ROOT_DIR / "packages" / "shared-types") + + score_skill_weight: float = 0.35 + score_region_weight: float = 0.20 + score_time_weight: float = 0.15 + score_experience_weight: float = 0.15 + score_reliability_weight: float = 0.15 + + +@lru_cache +def get_settings() -> Settings: + return Settings() diff --git a/gig-poc/apps/api/app/core/logging.py b/gig-poc/apps/api/app/core/logging.py new file mode 100644 index 0000000..2c5d80f --- /dev/null +++ b/gig-poc/apps/api/app/core/logging.py @@ -0,0 +1,26 @@ +import logging +from logging.config import dictConfig + + +def configure_logging(level: str = "INFO") -> None: + dictConfig( + { + "version": 1, + "disable_existing_loggers": False, + "formatters": { + "default": { + "format": "%(asctime)s %(levelname)s [%(name)s] %(message)s", + } + }, + "handlers": { + "console": { + "class": "logging.StreamHandler", + "formatter": "default", + } + }, + "root": {"handlers": ["console"], "level": level}, + } + ) + + +logger = logging.getLogger("gig-poc") diff --git a/gig-poc/apps/api/app/db/base.py b/gig-poc/apps/api/app/db/base.py new file mode 100644 index 0000000..fa2b68a --- /dev/null +++ b/gig-poc/apps/api/app/db/base.py @@ -0,0 +1,5 @@ +from sqlalchemy.orm import DeclarativeBase + + +class Base(DeclarativeBase): + pass diff --git a/gig-poc/apps/api/app/db/session.py b/gig-poc/apps/api/app/db/session.py new file mode 100644 index 0000000..dba3715 --- /dev/null +++ b/gig-poc/apps/api/app/db/session.py @@ -0,0 +1,19 @@ +from collections.abc import Generator + +from sqlalchemy import create_engine +from sqlalchemy.orm import Session, sessionmaker + +from app.core.config import get_settings + + +settings = get_settings() +engine = create_engine(settings.database_url, future=True, pool_pre_ping=True) +SessionLocal = sessionmaker(bind=engine, autoflush=False, autocommit=False, future=True) + + +def get_db() -> Generator[Session, None, None]: + db = SessionLocal() + try: + yield db + finally: + db.close() diff --git a/gig-poc/apps/api/app/domain/models.py b/gig-poc/apps/api/app/domain/models.py new file mode 100644 index 0000000..a3ec6ab --- /dev/null +++ b/gig-poc/apps/api/app/domain/models.py @@ -0,0 +1,86 @@ +from datetime import datetime +from uuid import uuid4 + +from sqlalchemy import DateTime, Float, ForeignKey, Integer, String, Text, func +from sqlalchemy.dialects.postgresql import JSONB +from sqlalchemy.orm import Mapped, mapped_column, relationship + +from app.db.base import Base + + +class Job(Base): + __tablename__ = "jobs" + + id: Mapped[str] = mapped_column(String(64), primary_key=True, default=lambda: f"job_{uuid4().hex[:12]}") + title: Mapped[str] = mapped_column(String(255)) + category: Mapped[str] = mapped_column(String(128)) + description: Mapped[str] = mapped_column(Text) + city: Mapped[str] = mapped_column(String(64)) + region: Mapped[str] = mapped_column(String(64)) + location_detail: Mapped[str] = mapped_column(String(255)) + start_time: Mapped[datetime] = mapped_column(DateTime(timezone=True)) + duration_hours: Mapped[float] = mapped_column(Float) + headcount: Mapped[int] = mapped_column(Integer) + salary_type: Mapped[str] = mapped_column(String(32)) + salary_amount: Mapped[float] = mapped_column(Float) + salary_currency: Mapped[str] = mapped_column(String(16), default="CNY") + work_mode: Mapped[str] = mapped_column(String(64)) + tags_json: Mapped[list[str]] = mapped_column(JSONB, default=list) + confidence: Mapped[float] = mapped_column(Float) + created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), server_default=func.now()) + + skills: Mapped[list["JobSkill"]] = relationship(back_populates="job", cascade="all, delete-orphan") + + +class JobSkill(Base): + __tablename__ = "job_skills" + + id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True) + job_id: Mapped[str] = mapped_column(ForeignKey("jobs.id", ondelete="CASCADE"), index=True) + skill_name: Mapped[str] = mapped_column(String(128), index=True) + weight: Mapped[float] = mapped_column(Float, default=1.0) + is_required: Mapped[bool] = mapped_column(default=True) + + job: Mapped[Job] = relationship(back_populates="skills") + + +class Worker(Base): + __tablename__ = "workers" + + id: Mapped[str] = mapped_column(String(64), primary_key=True, default=lambda: f"worker_{uuid4().hex[:12]}") + name: Mapped[str] = mapped_column(String(128)) + description: Mapped[str] = mapped_column(Text) + cities_json: Mapped[list[str]] = mapped_column(JSONB, default=list) + regions_json: Mapped[list[str]] = mapped_column(JSONB, default=list) + availability_json: Mapped[list[str]] = mapped_column(JSONB, default=list) + experience_tags_json: Mapped[list[str]] = mapped_column(JSONB, default=list) + reliability_score: Mapped[float] = mapped_column(Float, default=0.7) + profile_completion: Mapped[float] = mapped_column(Float, default=0.6) + confidence: Mapped[float] = mapped_column(Float, default=0.8) + created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), server_default=func.now()) + + skills: Mapped[list["WorkerSkill"]] = relationship(back_populates="worker", cascade="all, delete-orphan") + + +class WorkerSkill(Base): + __tablename__ = "worker_skills" + + id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True) + worker_id: Mapped[str] = mapped_column(ForeignKey("workers.id", ondelete="CASCADE"), index=True) + skill_name: Mapped[str] = mapped_column(String(128), index=True) + score: Mapped[float] = mapped_column(Float, default=0.7) + + worker: Mapped[Worker] = relationship(back_populates="skills") + + +class MatchRecord(Base): + __tablename__ = "matches" + + id: Mapped[str] = mapped_column(String(64), primary_key=True, default=lambda: f"match_{uuid4().hex[:12]}") + source_type: Mapped[str] = mapped_column(String(32), index=True) + source_id: Mapped[str] = mapped_column(String(64), index=True) + target_id: Mapped[str] = mapped_column(String(64), index=True) + match_score: Mapped[float] = mapped_column(Float) + breakdown_json: Mapped[dict] = mapped_column(JSONB) + reasons_json: Mapped[list[str]] = mapped_column(JSONB) + created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), server_default=func.now()) diff --git a/gig-poc/apps/api/app/domain/schemas.py b/gig-poc/apps/api/app/domain/schemas.py new file mode 100644 index 0000000..8f9f830 --- /dev/null +++ b/gig-poc/apps/api/app/domain/schemas.py @@ -0,0 +1,171 @@ +from __future__ import annotations + +from datetime import datetime +from enum import Enum + +from pydantic import BaseModel, Field, field_validator, model_validator + + +class SalaryType(str, Enum): + daily = "daily" + hourly = "hourly" + monthly = "monthly" + task = "task" + + +class SourceType(str, Enum): + job_to_worker = "job_to_worker" + worker_to_job = "worker_to_job" + + +class Salary(BaseModel): + type: SalaryType = SalaryType.daily + amount: float = 0 + currency: str = "CNY" + + +class SkillScore(BaseModel): + name: str + score: float = Field(ge=0, le=1) + + +class JobCard(BaseModel): + job_id: str + title: str + category: str + description: str + skills: list[str] = Field(default_factory=list) + city: str + region: str + location_detail: str + start_time: datetime + duration_hours: float = Field(gt=0) + headcount: int = Field(gt=0) + salary: Salary + work_mode: str + tags: list[str] = Field(default_factory=list) + confidence: float = Field(ge=0, le=1) + + +class WorkerCard(BaseModel): + worker_id: str + name: str + description: str + skills: list[SkillScore] = Field(default_factory=list) + cities: list[str] = Field(default_factory=list) + regions: list[str] = Field(default_factory=list) + availability: list[str] = Field(default_factory=list) + experience_tags: list[str] = Field(default_factory=list) + reliability_score: float = Field(ge=0, le=1) + profile_completion: float = Field(ge=0, le=1) + confidence: float = Field(ge=0, le=1) + + +class MatchBreakdown(BaseModel): + skill_score: float = Field(ge=0, le=1) + region_score: float = Field(ge=0, le=1) + time_score: float = Field(ge=0, le=1) + experience_score: float = Field(ge=0, le=1) + reliability_score: float = Field(ge=0, le=1) + + +class MatchResult(BaseModel): + match_id: str + source_type: SourceType + source_id: str + target_id: str + match_score: float = Field(ge=0, le=1) + breakdown: MatchBreakdown + reasons: list[str] = Field(default_factory=list, min_length=3) + + +class ExtractTextRequest(BaseModel): + text: str = Field(min_length=5) + + +class IngestJobRequest(BaseModel): + job: JobCard + + +class IngestWorkerRequest(BaseModel): + worker: WorkerCard + + +class MatchWorkersRequest(BaseModel): + job_id: str | None = None + job: JobCard | None = None + top_n: int = Field(default=10, ge=1, le=50) + + @model_validator(mode="after") + def validate_source(self) -> "MatchWorkersRequest": + if not self.job_id and not self.job: + raise ValueError("job_id 或 job 至少需要提供一个") + return self + + +class MatchJobsRequest(BaseModel): + worker_id: str | None = None + worker: WorkerCard | None = None + top_n: int = Field(default=10, ge=1, le=50) + + @model_validator(mode="after") + def validate_source(self) -> "MatchJobsRequest": + if not self.worker_id and not self.worker: + raise ValueError("worker_id 或 worker 至少需要提供一个") + return self + + +class ExtractResponse(BaseModel): + success: bool + data: JobCard | WorkerCard | None = None + errors: list[str] = Field(default_factory=list) + missing_fields: list[str] = Field(default_factory=list) + + +class BootstrapResponse(BaseModel): + jobs: int + workers: int + skills: int + categories: int + regions: int + + +class HealthStatus(BaseModel): + service: str + database: str + rag: str + timestamp: datetime + + +class ListResponse(BaseModel): + items: list[dict] + total: int + + +class MatchResponse(BaseModel): + items: list[MatchResult] + + +class ExplainResponse(BaseModel): + match: MatchResult + + +class PromptOutput(BaseModel): + content: dict + raw_text: str + + +class QueryFilters(BaseModel): + entity_type: str + city: str | None = None + region: str | None = None + categories: list[str] = Field(default_factory=list) + tags: list[str] = Field(default_factory=list) + skills: list[str] = Field(default_factory=list) + + @field_validator("entity_type") + @classmethod + def validate_entity_type(cls, value: str) -> str: + if value not in {"job", "worker"}: + raise ValueError("entity_type must be job or worker") + return value diff --git a/gig-poc/apps/api/app/main.py b/gig-poc/apps/api/app/main.py new file mode 100644 index 0000000..f75b4fd --- /dev/null +++ b/gig-poc/apps/api/app/main.py @@ -0,0 +1,36 @@ +from contextlib import asynccontextmanager + +from fastapi import FastAPI +from fastapi.middleware.cors import CORSMiddleware + +from app.api.routes import router +from app.core.config import get_settings +from app.core.logging import configure_logging, logger +from app.db.base import Base +from app.db.session import engine +from app.services.rag.lightrag_adapter import LightRAGAdapter + + +settings = get_settings() +configure_logging(settings.log_level) + + +@asynccontextmanager +async def lifespan(_: FastAPI): + Base.metadata.create_all(bind=engine) + try: + LightRAGAdapter(settings).ensure_ready() + except Exception: + logger.exception("Qdrant initialization skipped during startup") + yield + + +app = FastAPI(title=settings.app_name, lifespan=lifespan) +app.add_middleware( + CORSMiddleware, + allow_origins=["*"], + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], +) +app.include_router(router) diff --git a/gig-poc/apps/api/app/repositories/job_repository.py b/gig-poc/apps/api/app/repositories/job_repository.py new file mode 100644 index 0000000..fcaa64f --- /dev/null +++ b/gig-poc/apps/api/app/repositories/job_repository.py @@ -0,0 +1,58 @@ +from sqlalchemy import select +from sqlalchemy.orm import Session, selectinload + +from app.domain.models import Job, JobSkill +from app.domain.schemas import JobCard + + +class JobRepository: + def __init__(self, db: Session): + self.db = db + + def upsert(self, job_card: JobCard) -> Job: + instance = self.db.get(Job, job_card.job_id) + if instance is None: + instance = Job(id=job_card.job_id) + self.db.add(instance) + + instance.title = job_card.title + instance.category = job_card.category + instance.description = job_card.description + instance.city = job_card.city + instance.region = job_card.region + instance.location_detail = job_card.location_detail + instance.start_time = job_card.start_time + instance.duration_hours = job_card.duration_hours + instance.headcount = job_card.headcount + instance.salary_type = job_card.salary.type.value + instance.salary_amount = job_card.salary.amount + instance.salary_currency = job_card.salary.currency + instance.work_mode = job_card.work_mode + instance.tags_json = job_card.tags + instance.confidence = job_card.confidence + instance.skills.clear() + instance.skills.extend( + [ + JobSkill(skill_name=skill_name, weight=1.0, is_required=index < 2) + for index, skill_name in enumerate(job_card.skills) + ] + ) + self.db.commit() + self.db.refresh(instance) + return instance + + def list(self, limit: int = 100) -> list[Job]: + stmt = select(Job).options(selectinload(Job.skills)).order_by(Job.created_at.desc()).limit(limit) + return list(self.db.scalars(stmt)) + + def get(self, job_id: str) -> Job | None: + stmt = select(Job).options(selectinload(Job.skills)).where(Job.id == job_id) + return self.db.scalar(stmt) + + def get_many(self, ids: list[str]) -> list[Job]: + if not ids: + return [] + stmt = select(Job).options(selectinload(Job.skills)).where(Job.id.in_(ids)) + result = list(self.db.scalars(stmt)) + order = {item_id: index for index, item_id in enumerate(ids)} + return sorted(result, key=lambda item: order[item.id]) diff --git a/gig-poc/apps/api/app/repositories/match_repository.py b/gig-poc/apps/api/app/repositories/match_repository.py new file mode 100644 index 0000000..977770e --- /dev/null +++ b/gig-poc/apps/api/app/repositories/match_repository.py @@ -0,0 +1,49 @@ +from sqlalchemy import select +from sqlalchemy.orm import Session + +from app.domain.models import MatchRecord +from app.domain.schemas import MatchResult + + +class MatchRepository: + def __init__(self, db: Session): + self.db = db + + def create(self, match: MatchResult) -> MatchRecord: + instance = MatchRecord( + id=match.match_id, + source_type=match.source_type.value, + source_id=match.source_id, + target_id=match.target_id, + match_score=match.match_score, + breakdown_json=match.breakdown.model_dump(), + reasons_json=match.reasons, + ) + self.db.add(instance) + self.db.commit() + self.db.refresh(instance) + return instance + + def bulk_replace(self, matches: list[MatchResult], source_type: str, source_id: str) -> None: + stmt = select(MatchRecord).where( + MatchRecord.source_type == source_type, + MatchRecord.source_id == source_id, + ) + for item in self.db.scalars(stmt): + self.db.delete(item) + for match in matches: + self.db.add( + MatchRecord( + id=match.match_id, + source_type=match.source_type.value, + source_id=match.source_id, + target_id=match.target_id, + match_score=match.match_score, + breakdown_json=match.breakdown.model_dump(), + reasons_json=match.reasons, + ) + ) + self.db.commit() + + def get(self, match_id: str) -> MatchRecord | None: + return self.db.get(MatchRecord, match_id) diff --git a/gig-poc/apps/api/app/repositories/worker_repository.py b/gig-poc/apps/api/app/repositories/worker_repository.py new file mode 100644 index 0000000..1eb51f3 --- /dev/null +++ b/gig-poc/apps/api/app/repositories/worker_repository.py @@ -0,0 +1,49 @@ +from sqlalchemy import select +from sqlalchemy.orm import Session, selectinload + +from app.domain.models import Worker, WorkerSkill +from app.domain.schemas import WorkerCard + + +class WorkerRepository: + def __init__(self, db: Session): + self.db = db + + def upsert(self, worker_card: WorkerCard) -> Worker: + instance = self.db.get(Worker, worker_card.worker_id) + if instance is None: + instance = Worker(id=worker_card.worker_id) + self.db.add(instance) + + instance.name = worker_card.name + instance.description = worker_card.description + instance.cities_json = worker_card.cities + instance.regions_json = worker_card.regions + instance.availability_json = worker_card.availability + instance.experience_tags_json = worker_card.experience_tags + instance.reliability_score = worker_card.reliability_score + instance.profile_completion = worker_card.profile_completion + instance.confidence = worker_card.confidence + instance.skills.clear() + instance.skills.extend( + [WorkerSkill(skill_name=skill.name, score=skill.score) for skill in worker_card.skills] + ) + self.db.commit() + self.db.refresh(instance) + return instance + + def list(self, limit: int = 200) -> list[Worker]: + stmt = select(Worker).options(selectinload(Worker.skills)).order_by(Worker.created_at.desc()).limit(limit) + return list(self.db.scalars(stmt)) + + def get(self, worker_id: str) -> Worker | None: + stmt = select(Worker).options(selectinload(Worker.skills)).where(Worker.id == worker_id) + return self.db.scalar(stmt) + + def get_many(self, ids: list[str]) -> list[Worker]: + if not ids: + return [] + stmt = select(Worker).options(selectinload(Worker.skills)).where(Worker.id.in_(ids)) + result = list(self.db.scalars(stmt)) + order = {item_id: index for index, item_id in enumerate(ids)} + return sorted(result, key=lambda item: order[item.id]) diff --git a/gig-poc/apps/api/app/services/card_mapper.py b/gig-poc/apps/api/app/services/card_mapper.py new file mode 100644 index 0000000..1a59091 --- /dev/null +++ b/gig-poc/apps/api/app/services/card_mapper.py @@ -0,0 +1,50 @@ +from app.domain.models import Job, MatchRecord, Worker +from app.domain.schemas import JobCard, MatchBreakdown, MatchResult, Salary, SkillScore, SourceType, WorkerCard + + +def job_to_card(job: Job) -> JobCard: + return JobCard( + job_id=job.id, + title=job.title, + category=job.category, + description=job.description, + skills=[item.skill_name for item in job.skills], + city=job.city, + region=job.region, + location_detail=job.location_detail, + start_time=job.start_time, + duration_hours=job.duration_hours, + headcount=job.headcount, + salary=Salary(type=job.salary_type, amount=job.salary_amount, currency=job.salary_currency), + work_mode=job.work_mode, + tags=job.tags_json, + confidence=job.confidence, + ) + + +def worker_to_card(worker: Worker) -> WorkerCard: + return WorkerCard( + worker_id=worker.id, + name=worker.name, + description=worker.description, + skills=[SkillScore(name=item.skill_name, score=item.score) for item in worker.skills], + cities=worker.cities_json, + regions=worker.regions_json, + availability=worker.availability_json, + experience_tags=worker.experience_tags_json, + reliability_score=worker.reliability_score, + profile_completion=worker.profile_completion, + confidence=worker.confidence, + ) + + +def match_record_to_schema(match: MatchRecord) -> MatchResult: + return MatchResult( + match_id=match.id, + source_type=SourceType(match.source_type), + source_id=match.source_id, + target_id=match.target_id, + match_score=match.match_score, + breakdown=MatchBreakdown(**match.breakdown_json), + reasons=match.reasons_json, + ) diff --git a/gig-poc/apps/api/app/services/extraction_service.py b/gig-poc/apps/api/app/services/extraction_service.py new file mode 100644 index 0000000..bf5c666 --- /dev/null +++ b/gig-poc/apps/api/app/services/extraction_service.py @@ -0,0 +1,209 @@ +from __future__ import annotations + +import json +import re +from datetime import datetime, timedelta, timezone +from pathlib import Path + +from dateutil import parser as date_parser +from pydantic import ValidationError + +from app.core.config import get_settings +from app.core.logging import logger +from app.domain.schemas import ExtractResponse, JobCard, Salary, SkillScore, WorkerCard +from app.services.llm_client import LLMClient +from app.utils.ids import generate_id +from app.utils.prompts import load_prompt + + +class ExtractionService: + def __init__(self) -> None: + self.settings = get_settings() + self.skills = json.loads((self.settings.sample_data_dir / "skills.json").read_text(encoding="utf-8")) + self.categories = json.loads((self.settings.sample_data_dir / "categories.json").read_text(encoding="utf-8")) + self.regions = json.loads((self.settings.sample_data_dir / "regions.json").read_text(encoding="utf-8")) + self.llm_client = LLMClient(self.settings) + + def extract_job(self, text: str) -> ExtractResponse: + logger.info("extract_job request text=%s", text) + llm_result = self._llm_extract(text, self.settings.prompt_dir / "job_extract.md") + if llm_result: + try: + return ExtractResponse(success=True, data=JobCard(**llm_result.content)) + except ValidationError as exc: + logger.exception("LLM job extraction validation failed") + return ExtractResponse(success=False, errors=[str(exc)], missing_fields=self._missing_fields(exc)) + + try: + card = self._extract_job_rule(text) + return ExtractResponse(success=True, data=card) + except ValidationError as exc: + logger.exception("Rule job extraction validation failed") + return ExtractResponse(success=False, errors=[str(exc)], missing_fields=self._missing_fields(exc)) + + def extract_worker(self, text: str) -> ExtractResponse: + logger.info("extract_worker request text=%s", text) + llm_result = self._llm_extract(text, self.settings.prompt_dir / "worker_extract.md") + if llm_result: + try: + return ExtractResponse(success=True, data=WorkerCard(**llm_result.content)) + except ValidationError as exc: + logger.exception("LLM worker extraction validation failed") + return ExtractResponse(success=False, errors=[str(exc)], missing_fields=self._missing_fields(exc)) + + try: + card = self._extract_worker_rule(text) + return ExtractResponse(success=True, data=card) + except ValidationError as exc: + logger.exception("Rule worker extraction validation failed") + return ExtractResponse(success=False, errors=[str(exc)], missing_fields=self._missing_fields(exc)) + + def _llm_extract(self, text: str, prompt_path: Path): + try: + return self.llm_client.extract_json(load_prompt(prompt_path), text) + except Exception: + logger.exception("LLM extraction failed, fallback to rule-based extraction") + return None + + def _extract_job_rule(self, text: str) -> JobCard: + skill_hits = [item for item in self.skills if item in text] + category = next((item for item in self.categories if item in text), "活动执行") + region = self._extract_region(text) + salary = self._extract_salary(text) + headcount = self._extract_number(text, [r"(\d+)\s*[个名人位]"], default=1) + duration = self._extract_number(text, [r"(\d+(?:\.\d+)?)\s*小时"], default=4.0, cast=float) + tags = [tag for tag in ["女生优先", "男生优先", "有经验优先", "沟通好", "可连做优先"] if tag in text] + title = next((f"{category}{skill_hits[0]}兼职" for _ in [0] if skill_hits), f"{category}兼职") + card = JobCard( + job_id=generate_id("job"), + title=title, + category=category, + description=text, + skills=skill_hits[:5] or self._guess_category_skills(category), + city=region["city"], + region=region["region"], + location_detail=self._extract_location(text, region), + start_time=self._extract_job_time(text), + duration_hours=duration, + headcount=int(headcount), + salary=salary, + work_mode="排班制" if "排班" in text else "兼职", + tags=tags or ["有经验优先"], + confidence=self._compute_confidence(skill_hits, region, salary.amount > 0), + ) + return card + + def _extract_worker_rule(self, text: str) -> WorkerCard: + skill_hits = [item for item in self.skills if item in text][:6] + region_hits = [item for item in self.regions if item["region"] in text or item["city"] in text] + city_names = list(dict.fromkeys([item["city"] for item in region_hits])) or ["深圳"] + region_names = list(dict.fromkeys([item["region"] for item in region_hits])) or ["南山"] + availability = self._extract_availability(text) + experience = [item for item in ["商场", "会展", "活动执行", "物流", "零售", "客服中心", "快消", "校园推广"] if item in text] + card = WorkerCard( + worker_id=generate_id("worker"), + name=self._extract_name(text), + description=text, + skills=[SkillScore(name=item, score=round(0.72 + index * 0.04, 2)) for index, item in enumerate(skill_hits or ["活动执行", "引导", "登记"])], + cities=city_names, + regions=region_names, + availability=availability, + experience_tags=experience or ["活动执行"], + reliability_score=0.76, + profile_completion=0.68, + confidence=self._compute_confidence(skill_hits, {"city": city_names[0], "region": region_names[0]}, True), + ) + return card + + def _extract_region(self, text: str) -> dict: + for item in self.regions: + if item["city"] in text and item["region"] in text: + return item + for item in self.regions: + if item["region"] in text: + return item + return {"city": "深圳", "region": "南山"} + + def _extract_location(self, text: str, region: dict) -> str: + markers = ["会展中心", "商场", "地铁站", "园区", "写字楼", "仓库", "门店"] + for marker in markers: + if marker in text: + return f"{region['city']}{region['region']}{marker}" + return f"{region['city']}{region['region']}待定点位" + + def _extract_salary(self, text: str) -> Salary: + amount = self._extract_number(text, [r"(\d+(?:\.\d+)?)\s*(?:元|块)"], default=150.0, cast=float) + salary_type = "hourly" if "小时" in text and "/小时" in text else "daily" + return Salary(type=salary_type, amount=amount, currency="CNY") + + def _extract_number(self, text: str, patterns: list[str], default, cast=int): + for pattern in patterns: + match = re.search(pattern, text) + if match: + return cast(match.group(1)) + return default + + def _extract_job_time(self, text: str) -> datetime: + shanghai_tz = timezone(timedelta(hours=8)) + now = datetime.now(shanghai_tz) + if "明天" in text: + base = now + timedelta(days=1) + elif "后天" in text: + base = now + timedelta(days=2) + else: + month_day = re.search(r"(\d{1,2})月(\d{1,2})日", text) + if month_day: + month, day = int(month_day.group(1)), int(month_day.group(2)) + base = now.replace(month=month, day=day) + else: + base = now + timedelta(days=1) + hour = 9 + if "下午" in text: + hour = 13 + elif "晚上" in text: + hour = 19 + explicit_hour = re.search(r"(\d{1,2})[:点时](\d{0,2})?", text) + if explicit_hour: + hour = int(explicit_hour.group(1)) + return base.replace(hour=hour, minute=0, second=0, microsecond=0) + + def _extract_availability(self, text: str) -> list[str]: + tags = [] + if "周末" in text: + tags.append("weekend") + if "上午" in text: + tags.append("weekday_am") + if "下午" in text: + tags.append("weekday_pm") + if "随时" in text or "都能" in text or "全天" in text: + tags.append("anytime") + return tags or ["anytime"] + + def _extract_name(self, text: str) -> str: + if match := re.search(r"我叫([\u4e00-\u9fa5]{2,4})", text): + return match.group(1) + if match := re.search(r"我是([\u4e00-\u9fa5]{2,4})", text): + return match.group(1) + return "匿名候选人" + + def _guess_category_skills(self, category: str) -> list[str]: + mapping = { + "活动执行": ["签到", "引导", "登记"], + "促销": ["促销", "导购", "陈列"], + "配送": ["配送", "装卸", "司机协助"], + "客服": ["客服", "电话邀约", "线上客服"], + } + return mapping.get(category, ["活动执行", "沟通"]) + + def _compute_confidence(self, skill_hits: list[str], region: dict, has_salary: bool) -> float: + score = 0.55 + if skill_hits: + score += 0.15 + if region.get("city"): + score += 0.15 + if has_salary: + score += 0.1 + return min(round(score, 2), 0.95) + + def _missing_fields(self, exc: ValidationError) -> list[str]: + return [".".join(str(part) for part in item["loc"]) for item in exc.errors()] diff --git a/gig-poc/apps/api/app/services/ingest_service.py b/gig-poc/apps/api/app/services/ingest_service.py new file mode 100644 index 0000000..d14c828 --- /dev/null +++ b/gig-poc/apps/api/app/services/ingest_service.py @@ -0,0 +1,52 @@ +from __future__ import annotations + +import json + +from sqlalchemy.orm import Session + +from app.core.config import get_settings +from app.core.logging import logger +from app.domain.schemas import BootstrapResponse, JobCard, WorkerCard +from app.repositories.job_repository import JobRepository +from app.repositories.worker_repository import WorkerRepository +from app.services.rag.lightrag_adapter import LightRAGAdapter + + +class IngestService: + def __init__(self, db: Session): + self.db = db + self.settings = get_settings() + self.job_repository = JobRepository(db) + self.worker_repository = WorkerRepository(db) + self.rag = LightRAGAdapter(self.settings) + + def ingest_job(self, card: JobCard) -> JobCard: + logger.info("ingest_job job_id=%s", card.job_id) + self.job_repository.upsert(card) + self.rag.upsert_job(card) + return card + + def ingest_worker(self, card: WorkerCard) -> WorkerCard: + logger.info("ingest_worker worker_id=%s", card.worker_id) + self.worker_repository.upsert(card) + self.rag.upsert_worker(card) + return card + + def bootstrap(self) -> BootstrapResponse: + skills = json.loads((self.settings.sample_data_dir / "skills.json").read_text(encoding="utf-8")) + categories = json.loads((self.settings.sample_data_dir / "categories.json").read_text(encoding="utf-8")) + regions = json.loads((self.settings.sample_data_dir / "regions.json").read_text(encoding="utf-8")) + jobs = json.loads((self.settings.sample_data_dir / "jobs.json").read_text(encoding="utf-8")) + workers = json.loads((self.settings.sample_data_dir / "workers.json").read_text(encoding="utf-8")) + self.rag.ensure_ready() + for item in jobs: + self.ingest_job(JobCard(**item)) + for item in workers: + self.ingest_worker(WorkerCard(**item)) + return BootstrapResponse( + jobs=len(jobs), + workers=len(workers), + skills=len(skills), + categories=len(categories), + regions=len(regions), + ) diff --git a/gig-poc/apps/api/app/services/llm_client.py b/gig-poc/apps/api/app/services/llm_client.py new file mode 100644 index 0000000..bebd3aa --- /dev/null +++ b/gig-poc/apps/api/app/services/llm_client.py @@ -0,0 +1,34 @@ +from __future__ import annotations + +import json + +import httpx + +from app.core.config import Settings +from app.domain.schemas import PromptOutput + + +class LLMClient: + def __init__(self, settings: Settings): + self.settings = settings + + def extract_json(self, system_prompt: str, user_text: str) -> PromptOutput | None: + if not self.settings.llm_enabled or not self.settings.llm_base_url or not self.settings.llm_api_key: + return None + + payload = { + "model": self.settings.llm_model, + "messages": [ + {"role": "system", "content": system_prompt}, + {"role": "user", "content": user_text}, + ], + "temperature": 0.1, + "response_format": {"type": "json_object"}, + } + headers = {"Authorization": f"Bearer {self.settings.llm_api_key}"} + with httpx.Client(timeout=30.0) as client: + response = client.post(f"{self.settings.llm_base_url.rstrip('/')}/chat/completions", json=payload, headers=headers) + response.raise_for_status() + data = response.json() + raw_text = data["choices"][0]["message"]["content"] + return PromptOutput(content=json.loads(raw_text), raw_text=raw_text) diff --git a/gig-poc/apps/api/app/services/matching_service.py b/gig-poc/apps/api/app/services/matching_service.py new file mode 100644 index 0000000..00a1dc3 --- /dev/null +++ b/gig-poc/apps/api/app/services/matching_service.py @@ -0,0 +1,178 @@ +from __future__ import annotations + +from datetime import datetime + +from sqlalchemy.orm import Session + +from app.core.config import get_settings +from app.core.logging import logger +from app.domain.schemas import JobCard, MatchBreakdown, MatchResult, QueryFilters, SourceType, WorkerCard +from app.repositories.job_repository import JobRepository +from app.repositories.match_repository import MatchRepository +from app.repositories.worker_repository import WorkerRepository +from app.services.card_mapper import job_to_card, worker_to_card +from app.services.rag.lightrag_adapter import LightRAGAdapter +from app.utils.ids import generate_id + + +class MatchingService: + def __init__(self, db: Session): + self.db = db + self.settings = get_settings() + self.jobs = JobRepository(db) + self.workers = WorkerRepository(db) + self.matches = MatchRepository(db) + self.rag = LightRAGAdapter(self.settings) + + def match_workers(self, source: JobCard, top_n: int) -> list[MatchResult]: + logger.info("match_workers source_id=%s top_n=%s", source.job_id, top_n) + query_text = " ".join([source.title, source.category, source.city, source.region, *source.skills, *source.tags]) + candidate_ids = self.rag.search( + query_text=query_text, + filters=QueryFilters(entity_type="worker", city=source.city), + limit=max(top_n * 3, self.settings.default_recall_top_k), + ) + candidates = self.workers.get_many(candidate_ids) or self.workers.list(limit=max(top_n * 3, 50)) + results = [self._build_job_to_worker_match(source, worker_to_card(worker)) for worker in candidates] + results = sorted(results, key=lambda item: item.match_score, reverse=True)[:top_n] + self.matches.bulk_replace(results, SourceType.job_to_worker.value, source.job_id) + return results + + def match_jobs(self, source: WorkerCard, top_n: int) -> list[MatchResult]: + logger.info("match_jobs source_id=%s top_n=%s", source.worker_id, top_n) + query_text = " ".join([source.name, *source.cities, *source.regions, *[item.name for item in source.skills], *source.experience_tags]) + city = source.cities[0] if source.cities else None + candidate_ids = self.rag.search( + query_text=query_text, + filters=QueryFilters(entity_type="job", city=city), + limit=max(top_n * 3, self.settings.default_recall_top_k), + ) + candidates = self.jobs.get_many(candidate_ids) or self.jobs.list(limit=max(top_n * 3, 50)) + results = [self._build_worker_to_job_match(source, job_to_card(job)) for job in candidates] + results = sorted(results, key=lambda item: item.match_score, reverse=True)[:top_n] + self.matches.bulk_replace(results, SourceType.worker_to_job.value, source.worker_id) + return results + + def explain(self, match_id: str) -> MatchResult | None: + record = self.matches.get(match_id) + if record is None: + return None + from app.services.card_mapper import match_record_to_schema + + return match_record_to_schema(record) + + def _build_job_to_worker_match(self, job: JobCard, worker: WorkerCard) -> MatchResult: + job_skills = set(job.skills) + expanded_skills = self.rag.expand_skills(job.skills) + worker_skills = {item.name: item.score for item in worker.skills} + direct_hits = job_skills.intersection(worker_skills.keys()) + expanded_hits = expanded_skills.intersection(worker_skills.keys()) + base_skill_score = sum(worker_skills[name] for name in expanded_hits) / max(len(job_skills), 1) + if not direct_hits: + base_skill_score *= 0.4 + skill_score = min(base_skill_score, 1.0) + region_score = self._region_score(job.city, job.region, worker.cities, worker.regions) + time_score = self._time_score(job.start_time, worker.availability) + experience_score = self._experience_score([job.category, *job.tags], worker.experience_tags) + reliability_score = worker.reliability_score + score = self._weighted_score(skill_score, region_score, time_score, experience_score, reliability_score) + breakdown = MatchBreakdown( + skill_score=round(skill_score, 2), + region_score=round(region_score, 2), + time_score=round(time_score, 2), + experience_score=round(experience_score, 2), + reliability_score=round(reliability_score, 2), + ) + reasons = self._build_reasons( + matched_skills=list(expanded_hits)[:3], + region_hit=region_score, + time_score=time_score, + experience_hits=list(set(job.tags).intersection(worker.experience_tags))[:2] or [job.category], + reliability_score=reliability_score, + target_region=job.region, + ) + return MatchResult( + match_id=generate_id("match"), + source_type=SourceType.job_to_worker, + source_id=job.job_id, + target_id=worker.worker_id, + match_score=round(score, 2), + breakdown=breakdown, + reasons=reasons, + ) + + def _build_worker_to_job_match(self, worker: WorkerCard, job: JobCard) -> MatchResult: + reverse = self._build_job_to_worker_match(job, worker) + return MatchResult( + match_id=generate_id("match"), + source_type=SourceType.worker_to_job, + source_id=worker.worker_id, + target_id=job.job_id, + match_score=reverse.match_score, + breakdown=reverse.breakdown, + reasons=reverse.reasons, + ) + + def _region_score(self, job_city: str, job_region: str, worker_cities: list[str], worker_regions: list[str]) -> float: + if job_region in worker_regions: + return 1.0 + if job_city in worker_cities: + return 0.7 + return 0.2 + + def _time_score(self, start_time: datetime, availability: list[str]) -> float: + if "anytime" in availability: + return 1.0 + is_weekend = start_time.weekday() >= 5 + desired = "weekend" if is_weekend else ("weekday_pm" if start_time.hour >= 12 else "weekday_am") + return 1.0 if desired in availability else 0.4 + + def _experience_score(self, left: list[str], right: list[str]) -> float: + left_set = set(left) + right_set = set(right) + if not left_set or not right_set: + return 0.4 + overlap = len(left_set.intersection(right_set)) + return min(overlap / max(len(left_set), 1) + 0.4, 1.0) + + def _weighted_score( + self, + skill_score: float, + region_score: float, + time_score: float, + experience_score: float, + reliability_score: float, + ) -> float: + return ( + self.settings.score_skill_weight * skill_score + + self.settings.score_region_weight * region_score + + self.settings.score_time_weight * time_score + + self.settings.score_experience_weight * experience_score + + self.settings.score_reliability_weight * reliability_score + ) + + def _build_reasons( + self, + matched_skills: list[str], + region_hit: float, + time_score: float, + experience_hits: list[str], + reliability_score: float, + target_region: str, + ) -> list[str]: + reasons = [] + if matched_skills: + reasons.append(f"具备{'、'.join(matched_skills[:3])}相关技能") + if region_hit >= 1.0: + reasons.append(f"服务区域覆盖{target_region},与岗位地点一致") + elif region_hit >= 0.7: + reasons.append("同城可到岗,区域匹配度较高") + if time_score >= 1.0: + reasons.append("可接单时间与岗位时间要求匹配") + if experience_hits: + reasons.append(f"具备{'、'.join(experience_hits[:2])}相关经验") + if reliability_score >= 0.75: + reasons.append("履约可信度较好,适合优先推荐") + while len(reasons) < 3: + reasons.append("岗位需求与候选画像存在基础匹配") + return reasons[:5] diff --git a/gig-poc/apps/api/app/services/rag/lightrag_adapter.py b/gig-poc/apps/api/app/services/rag/lightrag_adapter.py new file mode 100644 index 0000000..9a90b0a --- /dev/null +++ b/gig-poc/apps/api/app/services/rag/lightrag_adapter.py @@ -0,0 +1,143 @@ +from __future__ import annotations + +import json +import math +from collections import defaultdict + +from qdrant_client import QdrantClient, models + +from app.core.config import Settings +from app.core.logging import logger +from app.domain.schemas import JobCard, QueryFilters, WorkerCard + + +class LightRAGAdapter: + def __init__(self, settings: Settings): + self.settings = settings + self.client = QdrantClient(url=settings.qdrant_url) + self.skill_graph = self._load_skill_graph() + + def ensure_ready(self) -> None: + collections = {item.name for item in self.client.get_collections().collections} + if self.settings.qdrant_collection not in collections: + self.client.create_collection( + collection_name=self.settings.qdrant_collection, + vectors_config=models.VectorParams(size=self.settings.vector_size, distance=models.Distance.COSINE), + ) + + def health(self) -> str: + self.ensure_ready() + self.client.get_collection(self.settings.qdrant_collection) + return "ok" + + def upsert_job(self, job: JobCard) -> None: + self.ensure_ready() + payload = { + "entity_type": "job", + "entity_id": job.job_id, + "city": job.city, + "region": job.region, + "category": job.category, + "skills": job.skills, + "tags": job.tags, + "document": self._serialize_job(job), + } + self.client.upsert( + collection_name=self.settings.qdrant_collection, + points=[ + models.PointStruct( + id=job.job_id, + vector=self._vectorize(payload["document"]), + payload=payload, + ) + ], + ) + + def upsert_worker(self, worker: WorkerCard) -> None: + self.ensure_ready() + payload = { + "entity_type": "worker", + "entity_id": worker.worker_id, + "city": worker.cities[0] if worker.cities else "", + "region": worker.regions[0] if worker.regions else "", + "category": worker.experience_tags[0] if worker.experience_tags else "", + "skills": [item.name for item in worker.skills], + "tags": worker.experience_tags, + "document": self._serialize_worker(worker), + } + self.client.upsert( + collection_name=self.settings.qdrant_collection, + points=[ + models.PointStruct( + id=worker.worker_id, + vector=self._vectorize(payload["document"]), + payload=payload, + ) + ], + ) + + def search(self, query_text: str, filters: QueryFilters, limit: int) -> list[str]: + self.ensure_ready() + must = [models.FieldCondition(key="entity_type", match=models.MatchValue(value=filters.entity_type))] + if filters.city: + must.append(models.FieldCondition(key="city", match=models.MatchValue(value=filters.city))) + query_filter = models.Filter(must=must) + results = self.client.search( + collection_name=self.settings.qdrant_collection, + query_vector=self._vectorize(query_text), + query_filter=query_filter, + limit=limit, + with_payload=True, + ) + ids = [] + for point in results: + payload = point.payload or {} + if filters.region and payload.get("region") != filters.region: + continue + ids.append(str(payload.get("entity_id", point.id))) + return ids + + def expand_skills(self, skills: list[str]) -> set[str]: + expanded = set(skills) + for skill in skills: + expanded.update(self.skill_graph.get(skill, [])) + return expanded + + def _load_skill_graph(self) -> dict[str, set[str]]: + relations_path = self.settings.sample_data_dir / "skill_relations.json" + if not relations_path.exists(): + return defaultdict(set) + data = json.loads(relations_path.read_text(encoding="utf-8")) + graph: dict[str, set[str]] = defaultdict(set) + for source, targets in data.items(): + graph[source].update(targets) + for target in targets: + graph[target].add(source) + return graph + + def _serialize_job(self, job: JobCard) -> str: + return " ".join([job.title, job.category, job.city, job.region, *job.skills, *job.tags, job.description]) + + def _serialize_worker(self, worker: WorkerCard) -> str: + return " ".join( + [worker.name, *worker.cities, *worker.regions, *[item.name for item in worker.skills], *worker.experience_tags, worker.description] + ) + + def _vectorize(self, text: str) -> list[float]: + vector = [0.0 for _ in range(self.settings.vector_size)] + tokens = self._tokenize(text) + for token in tokens: + index = hash(token) % self.settings.vector_size + vector[index] += 1.0 + norm = math.sqrt(sum(item * item for item in vector)) or 1.0 + return [item / norm for item in vector] + + def _tokenize(self, text: str) -> list[str]: + cleaned = [part.strip().lower() for part in text.replace(",", " ").replace("、", " ").replace("。", " ").split()] + tokens = [part for part in cleaned if part] + for size in (2, 3): + for index in range(max(len(text) - size + 1, 0)): + chunk = text[index : index + size].strip() + if chunk: + tokens.append(chunk) + return tokens diff --git a/gig-poc/apps/api/app/utils/ids.py b/gig-poc/apps/api/app/utils/ids.py new file mode 100644 index 0000000..8286ff9 --- /dev/null +++ b/gig-poc/apps/api/app/utils/ids.py @@ -0,0 +1,6 @@ +from datetime import datetime +from uuid import uuid4 + + +def generate_id(prefix: str) -> str: + return f"{prefix}_{datetime.now().strftime('%Y%m%d%H%M%S')}_{uuid4().hex[:6]}" diff --git a/gig-poc/apps/api/app/utils/prompts.py b/gig-poc/apps/api/app/utils/prompts.py new file mode 100644 index 0000000..1df1609 --- /dev/null +++ b/gig-poc/apps/api/app/utils/prompts.py @@ -0,0 +1,5 @@ +from pathlib import Path + + +def load_prompt(path: Path) -> str: + return path.read_text(encoding="utf-8") diff --git a/gig-poc/apps/api/pyproject.toml b/gig-poc/apps/api/pyproject.toml new file mode 100644 index 0000000..64dc4be --- /dev/null +++ b/gig-poc/apps/api/pyproject.toml @@ -0,0 +1,21 @@ +[project] +name = "gig-poc-api" +version = "0.1.0" +description = "Flexible gig work POC API" +requires-python = ">=3.11" +dependencies = [ + "fastapi==0.115.12", + "uvicorn[standard]==0.34.0", + "sqlalchemy==2.0.40", + "psycopg[binary]==3.2.6", + "pydantic==2.11.3", + "pydantic-settings==2.8.1", + "httpx==0.28.1", + "qdrant-client==1.14.2", + "python-dateutil==2.9.0.post0", + "orjson==3.10.16", +] + +[build-system] +requires = ["setuptools>=68", "wheel"] +build-backend = "setuptools.build_meta" diff --git a/gig-poc/apps/web/Dockerfile b/gig-poc/apps/web/Dockerfile new file mode 100644 index 0000000..57e4d22 --- /dev/null +++ b/gig-poc/apps/web/Dockerfile @@ -0,0 +1,20 @@ +FROM docker.m.daocloud.io/library/node:22-alpine AS builder + +RUN sed -i 's/dl-cdn.alpinelinux.org/mirrors.aliyun.com/g' /etc/apk/repositories +RUN npm config set registry https://registry.npmmirror.com + +WORKDIR /app + +COPY apps/web/package.json /app/package.json +COPY apps/web/tsconfig.json /app/tsconfig.json +COPY apps/web/tsconfig.app.json /app/tsconfig.app.json +COPY apps/web/vite.config.ts /app/vite.config.ts +COPY apps/web/index.html /app/index.html +COPY apps/web/src /app/src + +RUN npm install && npm run build + +FROM docker.m.daocloud.io/library/nginx:1.27-alpine + +COPY infrastructure/nginx/default.conf /etc/nginx/conf.d/default.conf +COPY --from=builder /app/dist /usr/share/nginx/html diff --git a/gig-poc/apps/web/index.html b/gig-poc/apps/web/index.html new file mode 100644 index 0000000..16603fb --- /dev/null +++ b/gig-poc/apps/web/index.html @@ -0,0 +1,12 @@ + + + + + + Gig POC Console + + +
+ + + diff --git a/gig-poc/apps/web/package.json b/gig-poc/apps/web/package.json new file mode 100644 index 0000000..cc11a70 --- /dev/null +++ b/gig-poc/apps/web/package.json @@ -0,0 +1,23 @@ +{ + "name": "gig-poc-web", + "version": "0.1.0", + "private": true, + "type": "module", + "scripts": { + "dev": "vite --host 0.0.0.0 --port 5173", + "build": "tsc -b && vite build", + "preview": "vite preview --host 0.0.0.0 --port 4173" + }, + "dependencies": { + "react": "18.3.1", + "react-dom": "18.3.1", + "react-router-dom": "6.30.1" + }, + "devDependencies": { + "@types/react": "18.3.20", + "@types/react-dom": "18.3.6", + "@vitejs/plugin-react": "4.3.4", + "typescript": "5.8.3", + "vite": "5.4.18" + } +} diff --git a/gig-poc/apps/web/src/App.tsx b/gig-poc/apps/web/src/App.tsx new file mode 100644 index 0000000..7e02c0c --- /dev/null +++ b/gig-poc/apps/web/src/App.tsx @@ -0,0 +1,41 @@ +import { NavLink, Route, Routes } from "react-router-dom"; +import { JobPage } from "./pages/JobPage"; +import { WorkerPage } from "./pages/WorkerPage"; +import { DataBrowserPage } from "./pages/DataBrowserPage"; +import { StatusPage } from "./pages/StatusPage"; + +const navItems = [ + { to: "/", label: "岗位测试" }, + { to: "/workers", label: "工人测试" }, + { to: "/browse", label: "数据浏览" }, + { to: "/status", label: "系统状态" } +]; + +export default function App() { + return ( +
+ +
+ + } /> + } /> + } /> + } /> + +
+
+ ); +} diff --git a/gig-poc/apps/web/src/api/client.ts b/gig-poc/apps/web/src/api/client.ts new file mode 100644 index 0000000..7d538e6 --- /dev/null +++ b/gig-poc/apps/web/src/api/client.ts @@ -0,0 +1,33 @@ +const API_BASE = import.meta.env.VITE_API_BASE ?? "/api"; + +async function request(path: string, init?: RequestInit): Promise { + const response = await fetch(`${API_BASE}${path}`, { + headers: { + "Content-Type": "application/json", + ...(init?.headers ?? {}) + }, + ...init + }); + if (!response.ok) { + const text = await response.text(); + throw new Error(text || `Request failed: ${response.status}`); + } + return response.json() as Promise; +} + +export const api = { + health: () => request("/health"), + extractJob: (text: string) => request("/poc/extract/job", { method: "POST", body: JSON.stringify({ text }) }), + extractWorker: (text: string) => request("/poc/extract/worker", { method: "POST", body: JSON.stringify({ text }) }), + ingestJob: (job: unknown) => request("/poc/ingest/job", { method: "POST", body: JSON.stringify({ job }) }), + ingestWorker: (worker: unknown) => request("/poc/ingest/worker", { method: "POST", body: JSON.stringify({ worker }) }), + bootstrap: () => request("/poc/ingest/bootstrap", { method: "POST" }), + matchWorkers: (job: unknown, top_n = 10) => + request("/poc/match/workers", { method: "POST", body: JSON.stringify({ job, top_n }) }), + matchJobs: (worker: unknown, top_n = 10) => + request("/poc/match/jobs", { method: "POST", body: JSON.stringify({ worker, top_n }) }), + jobs: () => request("/poc/jobs"), + workers: () => request("/poc/workers"), + job: (jobId: string) => request(`/poc/jobs/${jobId}`), + worker: (workerId: string) => request(`/poc/workers/${workerId}`) +}; diff --git a/gig-poc/apps/web/src/components/JsonPanel.tsx b/gig-poc/apps/web/src/components/JsonPanel.tsx new file mode 100644 index 0000000..c27998b --- /dev/null +++ b/gig-poc/apps/web/src/components/JsonPanel.tsx @@ -0,0 +1,15 @@ +type JsonPanelProps = { + title: string; + data: unknown; +}; + +export function JsonPanel({ title, data }: JsonPanelProps) { + return ( +
+
+

{title}

+
+
{JSON.stringify(data, null, 2)}
+
+ ); +} diff --git a/gig-poc/apps/web/src/components/MatchList.tsx b/gig-poc/apps/web/src/components/MatchList.tsx new file mode 100644 index 0000000..cbbff3b --- /dev/null +++ b/gig-poc/apps/web/src/components/MatchList.tsx @@ -0,0 +1,34 @@ +type MatchItem = { + match_id: string; + target_id: string; + match_score: number; + reasons: string[]; + breakdown: Record; +}; + +export function MatchList({ title, items }: { title: string; items: MatchItem[] }) { + return ( +
+
+

{title}

+ {items.length} 条 +
+
+ {items.map((item) => ( +
+
+ {item.target_id} + {item.match_score} +
+
+ {item.reasons.map((reason) => ( +

{reason}

+ ))} +
+
{JSON.stringify(item.breakdown, null, 2)}
+
+ ))} +
+
+ ); +} diff --git a/gig-poc/apps/web/src/main.tsx b/gig-poc/apps/web/src/main.tsx new file mode 100644 index 0000000..2fb152c --- /dev/null +++ b/gig-poc/apps/web/src/main.tsx @@ -0,0 +1,13 @@ +import React from "react"; +import ReactDOM from "react-dom/client"; +import { BrowserRouter } from "react-router-dom"; +import App from "./App"; +import "./styles/global.css"; + +ReactDOM.createRoot(document.getElementById("root")!).render( + + + + + +); diff --git a/gig-poc/apps/web/src/pages/DataBrowserPage.tsx b/gig-poc/apps/web/src/pages/DataBrowserPage.tsx new file mode 100644 index 0000000..2c9165e --- /dev/null +++ b/gig-poc/apps/web/src/pages/DataBrowserPage.tsx @@ -0,0 +1,28 @@ +import { useEffect, useState } from "react"; +import { api } from "../api/client"; +import { JsonPanel } from "../components/JsonPanel"; + +export function DataBrowserPage() { + const [jobs, setJobs] = useState([]); + const [workers, setWorkers] = useState([]); + + useEffect(() => { + void (async () => { + const [jobResult, workerResult] = await Promise.all([api.jobs(), api.workers()]); + setJobs(jobResult.items.slice(0, 12)); + setWorkers(workerResult.items.slice(0, 12)); + })(); + }, []); + + return ( +
+
+

Page C

+

数据浏览页

+

浏览当前已入库的岗位与工人样本,方便验证 bootstrap 与前后端查询链路。

+
+ + +
+ ); +} diff --git a/gig-poc/apps/web/src/pages/JobPage.tsx b/gig-poc/apps/web/src/pages/JobPage.tsx new file mode 100644 index 0000000..c623110 --- /dev/null +++ b/gig-poc/apps/web/src/pages/JobPage.tsx @@ -0,0 +1,53 @@ +import { useState } from "react"; +import { api } from "../api/client"; +import { JsonPanel } from "../components/JsonPanel"; +import { MatchList } from "../components/MatchList"; + +const DEFAULT_TEXT = "明天下午南山会展中心需要2个签到协助,5小时,150/人,女生优先,需要会签到、引导和登记。"; + +export function JobPage() { + const [text, setText] = useState(DEFAULT_TEXT); + const [jobCard, setJobCard] = useState(null); + const [matches, setMatches] = useState([]); + const [loading, setLoading] = useState(false); + + const handleExtract = async () => { + setLoading(true); + try { + const result = await api.extractJob(text); + setJobCard(result.data); + } finally { + setLoading(false); + } + }; + + const handleIngestAndMatch = async () => { + if (!jobCard) { + return; + } + setLoading(true); + try { + await api.ingestJob(jobCard); + const result = await api.matchWorkers(jobCard, 10); + setMatches(result.items); + } finally { + setLoading(false); + } + }; + + return ( +
+
+

Page A

+

岗位测试页

+