From 84f8be7c0e3c9694de00a4ca964563dddc69f631 Mon Sep 17 00:00:00 2001 From: Daniel Date: Wed, 1 Apr 2026 14:19:25 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20=E5=88=9D=E5=A7=8B=E5=8C=96=E9=9B=B6?= =?UTF-8?q?=E5=B7=A5=E5=90=8E=E7=AB=AF=E4=BB=A3=E7=A0=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- gig-poc/apps/api/app/api/routes.py | 365 ++++++++++- gig-poc/apps/api/app/core/config.py | 36 ++ gig-poc/apps/api/app/db/session.py | 9 +- gig-poc/apps/api/app/domain/schemas.py | 195 +++--- gig-poc/apps/api/app/main.py | 45 +- gig-poc/apps/api/app/services/ai_guard.py | 87 +++ .../apps/api/app/services/cache_service.py | 146 +++++ .../api/app/services/extraction_service.py | 71 ++- gig-poc/apps/api/app/services/ingest_queue.py | 105 ++++ .../apps/api/app/services/ingest_service.py | 15 + gig-poc/apps/api/app/services/llm_client.py | 82 ++- gig-poc/apps/api/app/services/match_queue.py | 121 ++++ .../apps/api/app/services/matching_service.py | 52 +- .../api/app/services/rag/lightrag_adapter.py | 48 +- .../apps/api/app/services/runtime_state.py | 23 + .../apps/api/app/services/traffic_guard.py | 108 ++++ .../apps/api/app/services/weight_service.py | 77 +++ gig-poc/apps/api/pyproject.toml | 1 + gig-poc/apps/web/src/vite-env.d.ts | 1 + gig-poc/docs/API.md | 577 +++++++++++++++++- gig-poc/docs/CAPACITY_BASELINE.md | 19 + gig-poc/docs/DEMO.md | 42 ++ gig-poc/docs/README.md | 87 +++ gig-poc/docs/SCALING.md | 19 + gig-poc/docs/openapi.json | 1 + .../infrastructure/docker-compose.prod.yml | 27 + gig-poc/infrastructure/docker-compose.yml | 27 + gig-poc/infrastructure/k8s/api.yaml | 90 +++ gig-poc/infrastructure/k8s/ingress.yaml | 24 + gig-poc/infrastructure/k8s/kustomization.yaml | 9 + gig-poc/infrastructure/k8s/namespace.yaml | 4 + gig-poc/infrastructure/k8s/redis.yaml | 41 ++ gig-poc/infrastructure/k8s/web.yaml | 61 ++ .../infrastructure/scripts/acceptance-e2e.sh | 114 ++++ gig-poc/infrastructure/scripts/dev-up.sh | 20 + .../infrastructure/scripts/export-openapi.sh | 11 + .../infrastructure/scripts/freeze-openapi.sh | 37 ++ .../infrastructure/scripts/load-baseline.sh | 137 +++++ gig-poc/infrastructure/scripts/prod-down.sh | 8 + gig-poc/infrastructure/scripts/prod-up.sh | 16 +- start.sh | 2 + 41 files changed, 2813 insertions(+), 147 deletions(-) create mode 100644 gig-poc/apps/api/app/services/ai_guard.py create mode 100644 gig-poc/apps/api/app/services/cache_service.py create mode 100644 gig-poc/apps/api/app/services/ingest_queue.py create mode 100644 gig-poc/apps/api/app/services/match_queue.py create mode 100644 gig-poc/apps/api/app/services/runtime_state.py create mode 100644 gig-poc/apps/api/app/services/traffic_guard.py create mode 100644 gig-poc/apps/api/app/services/weight_service.py create mode 100644 gig-poc/apps/web/src/vite-env.d.ts create mode 100644 gig-poc/docs/CAPACITY_BASELINE.md create mode 100644 gig-poc/docs/SCALING.md create mode 100644 gig-poc/docs/openapi.json create mode 100644 gig-poc/infrastructure/k8s/api.yaml create mode 100644 gig-poc/infrastructure/k8s/ingress.yaml create mode 100644 gig-poc/infrastructure/k8s/kustomization.yaml create mode 100644 gig-poc/infrastructure/k8s/namespace.yaml create mode 100644 gig-poc/infrastructure/k8s/redis.yaml create mode 100644 gig-poc/infrastructure/k8s/web.yaml create mode 100755 gig-poc/infrastructure/scripts/acceptance-e2e.sh create mode 100755 gig-poc/infrastructure/scripts/export-openapi.sh create mode 100755 gig-poc/infrastructure/scripts/freeze-openapi.sh create mode 100755 gig-poc/infrastructure/scripts/load-baseline.sh create mode 100755 gig-poc/infrastructure/scripts/prod-down.sh create mode 100644 start.sh diff --git a/gig-poc/apps/api/app/api/routes.py b/gig-poc/apps/api/app/api/routes.py index ac801ce..9004969 100644 --- a/gig-poc/apps/api/app/api/routes.py +++ b/gig-poc/apps/api/app/api/routes.py @@ -1,23 +1,33 @@ from datetime import datetime -from fastapi import APIRouter, Depends, HTTPException +from fastapi import APIRouter, Depends, HTTPException, Path from sqlalchemy import text from sqlalchemy.orm import Session from app.core.config import get_settings from app.db.session import get_db from app.domain.schemas import ( + AIObservabilityResponse, + BootstrapResponse, ExplainResponse, ExtractResponse, ExtractTextRequest, HealthStatus, IngestJobRequest, + IngestAsyncResponse, IngestWorkerRequest, JobCard, ListResponse, + MatchFeedbackRequest, + MatchAsyncJobsRequest, + MatchAsyncResponse, + MatchAsyncWorkersRequest, MatchJobsRequest, MatchResponse, + MatchWeightResponse, MatchWorkersRequest, + QueueStatusResponse, + SystemOpsResponse, WorkerCard, ) from app.repositories.job_repository import JobRepository @@ -25,14 +35,23 @@ from app.repositories.worker_repository import WorkerRepository from app.services.card_mapper import job_to_card, worker_to_card from app.services.extraction_service import ExtractionService from app.services.ingest_service import IngestService +from app.services.llm_client import LLMClient from app.services.matching_service import MatchingService +from app.services.cache_service import get_match_cache, get_query_cache +from app.services.runtime_state import get_ingest_queue, get_match_queue, get_traffic_guard from app.services.rag.lightrag_adapter import LightRAGAdapter router = APIRouter() -@router.get("/health", response_model=HealthStatus) +@router.get( + "/health", + response_model=HealthStatus, + tags=["系统"], + summary="服务健康检查", + description="检查 API 服务、数据库与 RAG 检索组件状态。", +) def health(db: Session = Depends(get_db)) -> HealthStatus: settings = get_settings() db_status = "ok" @@ -48,32 +67,119 @@ def health(db: Session = Depends(get_db)) -> HealthStatus: return HealthStatus(service="ok", database=db_status, rag=rag_status, timestamp=datetime.now().astimezone()) -@router.post("/poc/extract/job", response_model=ExtractResponse) +@router.post( + "/poc/extract/job", + response_model=ExtractResponse, + tags=["抽取"], + summary="岗位文本抽取", + description="将岗位自然语言文本抽取为结构化 JobCard。", +) def extract_job(payload: ExtractTextRequest) -> ExtractResponse: return ExtractionService().extract_job(payload.text) -@router.post("/poc/extract/worker", response_model=ExtractResponse) +@router.post( + "/poc/extract/worker", + response_model=ExtractResponse, + tags=["抽取"], + summary="工人文本抽取", + description="将工人自然语言文本抽取为结构化 WorkerCard。", +) def extract_worker(payload: ExtractTextRequest) -> ExtractResponse: return ExtractionService().extract_worker(payload.text) -@router.post("/poc/ingest/job", response_model=JobCard) +@router.post( + "/poc/ingest/job", + response_model=JobCard, + tags=["入库"], + summary="岗位入库", + description="写入或更新岗位卡片,并同步更新检索索引。", +) def ingest_job(payload: IngestJobRequest, db: Session = Depends(get_db)) -> JobCard: return IngestService(db).ingest_job(payload.job) -@router.post("/poc/ingest/worker", response_model=WorkerCard) +@router.post( + "/poc/ingest/worker", + response_model=WorkerCard, + tags=["入库"], + summary="工人入库", + description="写入或更新工人卡片,并同步更新检索索引。", +) def ingest_worker(payload: IngestWorkerRequest, db: Session = Depends(get_db)) -> WorkerCard: return IngestService(db).ingest_worker(payload.worker) -@router.post("/poc/ingest/bootstrap") -def bootstrap(db: Session = Depends(get_db)): +@router.post( + "/poc/ingest/job/async", + response_model=IngestAsyncResponse, + tags=["入库"], + summary="岗位异步入库", + description="将岗位入库请求写入异步队列,快速返回任务 ID。", +) +def ingest_job_async(payload: IngestJobRequest) -> IngestAsyncResponse: + settings = get_settings() + if not settings.ingest_async_enabled: + raise HTTPException(status_code=400, detail="异步入库未开启") + queue = get_ingest_queue() + try: + task_id = queue.enqueue_job(payload.job) + except RuntimeError as exc: + raise HTTPException(status_code=503, detail=str(exc)) from exc + return IngestAsyncResponse(task_id=task_id, status=queue.task_status(task_id)) + + +@router.post( + "/poc/ingest/worker/async", + response_model=IngestAsyncResponse, + tags=["入库"], + summary="工人异步入库", + description="将工人入库请求写入异步队列,快速返回任务 ID。", +) +def ingest_worker_async(payload: IngestWorkerRequest) -> IngestAsyncResponse: + settings = get_settings() + if not settings.ingest_async_enabled: + raise HTTPException(status_code=400, detail="异步入库未开启") + queue = get_ingest_queue() + try: + task_id = queue.enqueue_worker(payload.worker) + except RuntimeError as exc: + raise HTTPException(status_code=503, detail=str(exc)) from exc + return IngestAsyncResponse(task_id=task_id, status=queue.task_status(task_id)) + + +@router.get( + "/poc/ingest/queue/{task_id}", + response_model=IngestAsyncResponse, + tags=["入库"], + summary="异步入库任务状态", + description="根据 task_id 查询异步入库任务状态。", +) +def ingest_task_status(task_id: str) -> IngestAsyncResponse: + queue = get_ingest_queue() + return IngestAsyncResponse(task_id=task_id, status=queue.task_status(task_id)) + + +@router.post( + "/poc/ingest/bootstrap", + response_model=BootstrapResponse, + tags=["入库"], + summary="样本数据初始化", + description="导入内置样本数据(岗位、工人、技能、类目、区域)并构建检索数据。", +) +def bootstrap(db: Session = Depends(get_db)) -> BootstrapResponse: return IngestService(db).bootstrap() -@router.post("/poc/match/workers", response_model=MatchResponse) +@router.post( + "/poc/match/workers", + response_model=MatchResponse, + tags=["匹配"], + summary="岗位匹配工人", + description="支持通过 job_id 或内联 job 进行匹配,返回 top_n 条结果。", + responses={404: {"description": "岗位不存在"}}, +) def match_workers(payload: MatchWorkersRequest, db: Session = Depends(get_db)) -> MatchResponse: service = MatchingService(db) source = payload.job @@ -85,7 +191,14 @@ def match_workers(payload: MatchWorkersRequest, db: Session = Depends(get_db)) - return MatchResponse(items=service.match_workers(source, payload.top_n)) -@router.post("/poc/match/jobs", response_model=MatchResponse) +@router.post( + "/poc/match/jobs", + response_model=MatchResponse, + tags=["匹配"], + summary="工人匹配岗位", + description="支持通过 worker_id 或内联 worker 进行匹配,返回 top_n 条结果。", + responses={404: {"description": "工人不存在"}}, +) def match_jobs(payload: MatchJobsRequest, db: Session = Depends(get_db)) -> MatchResponse: service = MatchingService(db) source = payload.worker @@ -97,37 +210,245 @@ def match_jobs(payload: MatchJobsRequest, db: Session = Depends(get_db)) -> Matc return MatchResponse(items=service.match_jobs(source, payload.top_n)) -@router.get("/poc/match/explain/{match_id}", response_model=ExplainResponse) -def explain_match(match_id: str, db: Session = Depends(get_db)) -> ExplainResponse: +@router.post( + "/poc/match/workers/async", + response_model=MatchAsyncResponse, + tags=["匹配"], + summary="岗位异步匹配工人", + description="将匹配任务放入队列异步计算,适合高并发削峰。", +) +def match_workers_async(payload: MatchAsyncWorkersRequest) -> MatchAsyncResponse: + settings = get_settings() + if not settings.match_async_enabled: + raise HTTPException(status_code=400, detail="异步匹配未开启") + queue = get_match_queue() + try: + task_id = queue.enqueue_workers(payload.job_id, payload.top_n) + except RuntimeError as exc: + raise HTTPException(status_code=503, detail=str(exc)) from exc + return MatchAsyncResponse(task_id=task_id, status=queue.task_status(task_id)) + + +@router.post( + "/poc/match/jobs/async", + response_model=MatchAsyncResponse, + tags=["匹配"], + summary="工人异步匹配岗位", + description="将匹配任务放入队列异步计算,适合高并发削峰。", +) +def match_jobs_async(payload: MatchAsyncJobsRequest) -> MatchAsyncResponse: + settings = get_settings() + if not settings.match_async_enabled: + raise HTTPException(status_code=400, detail="异步匹配未开启") + queue = get_match_queue() + try: + task_id = queue.enqueue_jobs(payload.worker_id, payload.top_n) + except RuntimeError as exc: + raise HTTPException(status_code=503, detail=str(exc)) from exc + return MatchAsyncResponse(task_id=task_id, status=queue.task_status(task_id)) + + +@router.get( + "/poc/match/queue/{task_id}", + response_model=MatchAsyncResponse, + tags=["匹配"], + summary="异步匹配任务状态", + description="根据 task_id 查询异步匹配任务状态,完成后返回匹配结果。", +) +def match_task_status(task_id: str) -> MatchAsyncResponse: + queue = get_match_queue() + status = queue.task_status(task_id) + items = queue.task_result(task_id) + return MatchAsyncResponse(task_id=task_id, status=status, items=items) + + +@router.get( + "/poc/match/explain/{match_id}", + response_model=ExplainResponse, + tags=["匹配"], + summary="匹配结果解释", + description="根据 match_id 获取匹配明细和解释理由。", + responses={404: {"description": "匹配记录不存在"}}, +) +def explain_match( + match_id: str = Path(..., description="匹配记录 ID"), + db: Session = Depends(get_db), +) -> ExplainResponse: match = MatchingService(db).explain(match_id) if match is None: raise HTTPException(status_code=404, detail="匹配记录不存在") return ExplainResponse(match=match) -@router.get("/poc/jobs", response_model=ListResponse) +@router.post( + "/poc/match/feedback", + response_model=MatchWeightResponse, + tags=["匹配"], + summary="匹配反馈学习", + description="提交单条匹配的接受/拒绝反馈,用于在线更新排序权重。", + responses={404: {"description": "匹配记录不存在"}}, +) +def feedback_match(payload: MatchFeedbackRequest, db: Session = Depends(get_db)) -> MatchWeightResponse: + service = MatchingService(db) + weights = service.feedback(payload.match_id, payload.accepted) + if weights is None: + raise HTTPException(status_code=404, detail="匹配记录不存在") + return MatchWeightResponse(weights=weights, learning_enabled=get_settings().ranking_learning_enabled) + + +@router.get( + "/poc/match/weights", + response_model=MatchWeightResponse, + tags=["匹配"], + summary="当前排序权重", + description="查看当前生效的排序权重(默认权重或学习后的权重)。", +) +def get_match_weights(db: Session = Depends(get_db)) -> MatchWeightResponse: + service = MatchingService(db) + return MatchWeightResponse(weights=service.current_weights(), learning_enabled=get_settings().ranking_learning_enabled) + + +@router.get( + "/poc/ops/ai/metrics", + response_model=AIObservabilityResponse, + tags=["系统"], + summary="AI 观测指标", + description="返回 AI 调用的限流、熔断、降级与 fallback 命中率指标。", +) +def ai_metrics() -> AIObservabilityResponse: + metrics = LLMClient(get_settings()).metrics() + return AIObservabilityResponse(metrics=metrics) + + +@router.get( + "/poc/ops/system/metrics", + response_model=SystemOpsResponse, + tags=["系统"], + summary="系统运行指标", + description="返回全局流量护栏、缓存与异步队列指标。", +) +def system_metrics() -> SystemOpsResponse: + queue_stats = get_ingest_queue().stats() + match_queue_stats = get_match_queue().stats() + match_cache_stats = get_match_cache().stats() + query_cache_stats = get_query_cache().stats() + return SystemOpsResponse( + traffic=get_traffic_guard().snapshot(), + cache={ + "backend": match_cache_stats.get("backend", "memory"), + "match_hit_rate": match_cache_stats.get("hit_rate", 0.0), + "query_hit_rate": query_cache_stats.get("hit_rate", 0.0), + "match_size": int(match_cache_stats.get("size", 0)), + "query_size": int(query_cache_stats.get("size", 0)), + }, + ingest_queue=QueueStatusResponse( + queued=queue_stats["queued"], + processed=queue_stats["processed"], + failed=queue_stats["failed"], + ), + match_queue=QueueStatusResponse( + queued=match_queue_stats["queued"], + processed=match_queue_stats["processed"], + failed=match_queue_stats["failed"], + ), + ) + + +@router.get( + "/poc/jobs", + response_model=ListResponse, + tags=["查询"], + summary="岗位列表查询", + description="查询岗位列表,当前返回全量数据。", +) def list_jobs(db: Session = Depends(get_db)) -> ListResponse: + settings = get_settings() + cache = get_query_cache() + cache_key = "jobs:list" + if settings.query_cache_enabled: + cached = cache.get(cache_key) + if cached is not None: + return ListResponse(items=cached["items"], total=cached["total"]) items = [job_to_card(item).model_dump(mode="json") for item in JobRepository(db).list()] - return ListResponse(items=items, total=len(items)) + result = ListResponse(items=items, total=len(items)) + if settings.query_cache_enabled: + cache.set(cache_key, result.model_dump(mode="json")) + return result -@router.get("/poc/workers", response_model=ListResponse) +@router.get( + "/poc/workers", + response_model=ListResponse, + tags=["查询"], + summary="工人列表查询", + description="查询工人列表,当前返回全量数据。", +) def list_workers(db: Session = Depends(get_db)) -> ListResponse: + settings = get_settings() + cache = get_query_cache() + cache_key = "workers:list" + if settings.query_cache_enabled: + cached = cache.get(cache_key) + if cached is not None: + return ListResponse(items=cached["items"], total=cached["total"]) items = [worker_to_card(item).model_dump(mode="json") for item in WorkerRepository(db).list()] - return ListResponse(items=items, total=len(items)) + result = ListResponse(items=items, total=len(items)) + if settings.query_cache_enabled: + cache.set(cache_key, result.model_dump(mode="json")) + return result -@router.get("/poc/jobs/{job_id}", response_model=JobCard) -def get_job(job_id: str, db: Session = Depends(get_db)) -> JobCard: +@router.get( + "/poc/jobs/{job_id}", + response_model=JobCard, + tags=["查询"], + summary="岗位详情查询", + description="根据岗位 ID 查询单个岗位详情。", + responses={404: {"description": "岗位不存在"}}, +) +def get_job( + job_id: str = Path(..., description="岗位 ID"), + db: Session = Depends(get_db), +) -> JobCard: + settings = get_settings() + cache = get_query_cache() + cache_key = f"jobs:detail:{job_id}" + if settings.query_cache_enabled: + cached = cache.get(cache_key) + if cached is not None: + return JobCard(**cached) item = JobRepository(db).get(job_id) if item is None: raise HTTPException(status_code=404, detail="岗位不存在") - return job_to_card(item) + result = job_to_card(item) + if settings.query_cache_enabled: + cache.set(cache_key, result.model_dump(mode="json")) + return result -@router.get("/poc/workers/{worker_id}", response_model=WorkerCard) -def get_worker(worker_id: str, db: Session = Depends(get_db)) -> WorkerCard: +@router.get( + "/poc/workers/{worker_id}", + response_model=WorkerCard, + tags=["查询"], + summary="工人详情查询", + description="根据工人 ID 查询单个工人详情。", + responses={404: {"description": "工人不存在"}}, +) +def get_worker( + worker_id: str = Path(..., description="工人 ID"), + db: Session = Depends(get_db), +) -> WorkerCard: + settings = get_settings() + cache = get_query_cache() + cache_key = f"workers:detail:{worker_id}" + if settings.query_cache_enabled: + cached = cache.get(cache_key) + if cached is not None: + return WorkerCard(**cached) item = WorkerRepository(db).get(worker_id) if item is None: raise HTTPException(status_code=404, detail="工人不存在") - return worker_to_card(item) + result = worker_to_card(item) + if settings.query_cache_enabled: + cache.set(cache_key, result.model_dump(mode="json")) + return result diff --git a/gig-poc/apps/api/app/core/config.py b/gig-poc/apps/api/app/core/config.py index 13b0b48..93b22c7 100644 --- a/gig-poc/apps/api/app/core/config.py +++ b/gig-poc/apps/api/app/core/config.py @@ -17,18 +17,39 @@ class Settings(BaseSettings): app_host: str = "0.0.0.0" app_port: int = 8000 log_level: str = "INFO" + app_rate_limit_per_minute: int = 1200 + app_circuit_breaker_error_rate: float = 0.5 + app_circuit_breaker_min_requests: int = 50 + app_circuit_breaker_window_seconds: int = 60 + app_circuit_breaker_cooldown_seconds: int = 30 + alert_webhook_url: str | None = None database_url: str = "postgresql+psycopg://gig:gig@postgres:5432/gig_poc" + database_pool_size: int = 20 + database_max_overflow: int = 30 + database_pool_timeout: int = 30 qdrant_url: str = "http://qdrant:6333" qdrant_collection: str = "gig_poc_entities" vector_size: int = 64 llm_enabled: bool = False llm_base_url: str | None = None + llm_fallback_base_urls: list[str] = Field(default_factory=list) llm_api_key: str | None = None llm_model: str = "gpt-5.4" + extraction_llm_max_retries: int = 2 + + embedding_backend: str = "hash" # hash | openai_compatible embedding_enabled: bool = False + embedding_base_url: str | None = None + embedding_fallback_base_urls: list[str] = Field(default_factory=list) + embedding_api_key: str | None = None embedding_model: str = "text-embedding-3-small" + embedding_vector_size: int = 1536 + ai_request_timeout_seconds: float = 30.0 + ai_rate_limit_per_minute: int = 120 + ai_circuit_breaker_fail_threshold: int = 5 + ai_circuit_breaker_cooldown_seconds: int = 30 bootstrap_jobs: int = 100 bootstrap_workers: int = 300 @@ -38,12 +59,27 @@ class Settings(BaseSettings): prompt_dir: Path = Field(default=ROOT_DIR / "packages" / "prompts") sample_data_dir: Path = Field(default=ROOT_DIR / "packages" / "sample-data") shared_types_dir: Path = Field(default=ROOT_DIR / "packages" / "shared-types") + data_dir: Path = Field(default=ROOT_DIR / "data") + match_weights_path: Path = Field(default=ROOT_DIR / "data" / "match_weights.json") score_skill_weight: float = 0.35 score_region_weight: float = 0.20 score_time_weight: float = 0.15 score_experience_weight: float = 0.15 score_reliability_weight: float = 0.15 + ranking_learning_enabled: bool = True + ranking_learning_rate: float = 0.08 + cache_backend: str = "memory" # memory | redis + redis_url: str = "redis://redis:6379/0" + redis_prefix: str = "gig_poc" + match_cache_enabled: bool = True + match_cache_ttl_seconds: int = 30 + query_cache_enabled: bool = True + query_cache_ttl_seconds: int = 20 + ingest_async_enabled: bool = True + ingest_queue_max_size: int = 10000 + match_async_enabled: bool = True + match_queue_max_size: int = 10000 @lru_cache diff --git a/gig-poc/apps/api/app/db/session.py b/gig-poc/apps/api/app/db/session.py index dba3715..1d49f8b 100644 --- a/gig-poc/apps/api/app/db/session.py +++ b/gig-poc/apps/api/app/db/session.py @@ -7,7 +7,14 @@ from app.core.config import get_settings settings = get_settings() -engine = create_engine(settings.database_url, future=True, pool_pre_ping=True) +engine = create_engine( + settings.database_url, + future=True, + pool_pre_ping=True, + pool_size=settings.database_pool_size, + max_overflow=settings.database_max_overflow, + pool_timeout=settings.database_pool_timeout, +) SessionLocal = sessionmaker(bind=engine, autoflush=False, autocommit=False, future=True) diff --git a/gig-poc/apps/api/app/domain/schemas.py b/gig-poc/apps/api/app/domain/schemas.py index 8f9f830..9193c0e 100644 --- a/gig-poc/apps/api/app/domain/schemas.py +++ b/gig-poc/apps/api/app/domain/schemas.py @@ -3,7 +3,7 @@ from __future__ import annotations from datetime import datetime from enum import Enum -from pydantic import BaseModel, Field, field_validator, model_validator +from pydantic import BaseModel, ConfigDict, Field, field_validator, model_validator class SalaryType(str, Enum): @@ -19,82 +19,89 @@ class SourceType(str, Enum): class Salary(BaseModel): - type: SalaryType = SalaryType.daily - amount: float = 0 - currency: str = "CNY" + type: SalaryType = Field(default=SalaryType.daily, description="薪资类型:daily/hourly/monthly/task") + amount: float = Field(default=0, description="薪资金额") + currency: str = Field(default="CNY", description="货币类型,默认 CNY") class SkillScore(BaseModel): - name: str - score: float = Field(ge=0, le=1) + name: str = Field(description="技能名称") + score: float = Field(ge=0, le=1, description="技能熟练度,范围 0~1") class JobCard(BaseModel): - job_id: str - title: str - category: str - description: str - skills: list[str] = Field(default_factory=list) - city: str - region: str - location_detail: str - start_time: datetime - duration_hours: float = Field(gt=0) - headcount: int = Field(gt=0) - salary: Salary - work_mode: str - tags: list[str] = Field(default_factory=list) - confidence: float = Field(ge=0, le=1) + job_id: str = Field(description="岗位唯一 ID") + title: str = Field(description="岗位标题") + category: str = Field(description="岗位类别") + description: str = Field(description="岗位描述") + skills: list[str] = Field(default_factory=list, description="岗位技能要求列表") + city: str = Field(description="城市") + region: str = Field(description="区域") + location_detail: str = Field(description="详细地点描述") + start_time: datetime = Field(description="岗位开始时间,ISO-8601") + duration_hours: float = Field(gt=0, description="工时(小时),必须大于 0") + headcount: int = Field(gt=0, description="招聘人数,必须大于 0") + salary: Salary = Field(description="薪资信息") + work_mode: str = Field(description="工作模式,如兼职、全职、活动") + tags: list[str] = Field(default_factory=list, description="业务标签列表") + confidence: float = Field(ge=0, le=1, description="数据置信度,范围 0~1") class WorkerCard(BaseModel): - worker_id: str - name: str - description: str - skills: list[SkillScore] = Field(default_factory=list) - cities: list[str] = Field(default_factory=list) - regions: list[str] = Field(default_factory=list) - availability: list[str] = Field(default_factory=list) - experience_tags: list[str] = Field(default_factory=list) - reliability_score: float = Field(ge=0, le=1) - profile_completion: float = Field(ge=0, le=1) - confidence: float = Field(ge=0, le=1) + worker_id: str = Field(description="工人唯一 ID") + name: str = Field(description="工人姓名或昵称") + description: str = Field(description="工人自我描述") + skills: list[SkillScore] = Field(default_factory=list, description="技能及熟练度列表") + cities: list[str] = Field(default_factory=list, description="可接单城市列表") + regions: list[str] = Field(default_factory=list, description="可接单区域列表") + availability: list[str] = Field(default_factory=list, description="可上岗时间描述") + experience_tags: list[str] = Field(default_factory=list, description="经验标签列表") + reliability_score: float = Field(ge=0, le=1, description="履约可靠性分,范围 0~1") + profile_completion: float = Field(ge=0, le=1, description="档案完善度,范围 0~1") + confidence: float = Field(ge=0, le=1, description="数据置信度,范围 0~1") class MatchBreakdown(BaseModel): - skill_score: float = Field(ge=0, le=1) - region_score: float = Field(ge=0, le=1) - time_score: float = Field(ge=0, le=1) - experience_score: float = Field(ge=0, le=1) - reliability_score: float = Field(ge=0, le=1) + skill_score: float = Field(ge=0, le=1, description="技能匹配分,范围 0~1") + region_score: float = Field(ge=0, le=1, description="地域匹配分,范围 0~1") + time_score: float = Field(ge=0, le=1, description="时间匹配分,范围 0~1") + experience_score: float = Field(ge=0, le=1, description="经验匹配分,范围 0~1") + reliability_score: float = Field(ge=0, le=1, description="可靠性匹配分,范围 0~1") class MatchResult(BaseModel): - match_id: str - source_type: SourceType - source_id: str - target_id: str - match_score: float = Field(ge=0, le=1) - breakdown: MatchBreakdown - reasons: list[str] = Field(default_factory=list, min_length=3) + match_id: str = Field(description="匹配记录 ID") + source_type: SourceType = Field(description="匹配方向:job_to_worker 或 worker_to_job") + source_id: str = Field(description="源实体 ID") + target_id: str = Field(description="目标实体 ID") + match_score: float = Field(ge=0, le=1, description="综合匹配分,范围 0~1") + breakdown: MatchBreakdown = Field(description="多维打分拆解") + reasons: list[str] = Field(default_factory=list, min_length=3, description="匹配理由,至少 3 条") class ExtractTextRequest(BaseModel): - text: str = Field(min_length=5) + text: str = Field(min_length=5, description="待抽取的自然语言文本,最少 5 个字符") + model_config = ConfigDict( + json_schema_extra={ + "example": { + "text": "明天下午南山会展中心需要2个签到协助,5小时,150/人,女生优先", + } + } + ) class IngestJobRequest(BaseModel): - job: JobCard + job: JobCard = Field(description="岗位卡片对象") class IngestWorkerRequest(BaseModel): - worker: WorkerCard + worker: WorkerCard = Field(description="工人卡片对象") class MatchWorkersRequest(BaseModel): - job_id: str | None = None - job: JobCard | None = None - top_n: int = Field(default=10, ge=1, le=50) + job_id: str | None = Field(default=None, description="岗位 ID(与 job 二选一)") + job: JobCard | None = Field(default=None, description="内联岗位对象(与 job_id 二选一)") + top_n: int = Field(default=10, ge=1, le=50, description="返回条数,范围 1~50") @model_validator(mode="after") def validate_source(self) -> "MatchWorkersRequest": @@ -104,9 +111,9 @@ class MatchWorkersRequest(BaseModel): class MatchJobsRequest(BaseModel): - worker_id: str | None = None - worker: WorkerCard | None = None - top_n: int = Field(default=10, ge=1, le=50) + worker_id: str | None = Field(default=None, description="工人 ID(与 worker 二选一)") + worker: WorkerCard | None = Field(default=None, description="内联工人对象(与 worker_id 二选一)") + top_n: int = Field(default=10, ge=1, le=50, description="返回条数,范围 1~50") @model_validator(mode="after") def validate_source(self) -> "MatchJobsRequest": @@ -116,38 +123,86 @@ class MatchJobsRequest(BaseModel): class ExtractResponse(BaseModel): - success: bool - data: JobCard | WorkerCard | None = None - errors: list[str] = Field(default_factory=list) - missing_fields: list[str] = Field(default_factory=list) + success: bool = Field(description="抽取是否成功") + data: JobCard | WorkerCard | None = Field(default=None, description="抽取结果对象,可能为空") + errors: list[str] = Field(default_factory=list, description="错误信息列表") + missing_fields: list[str] = Field(default_factory=list, description="缺失字段列表") class BootstrapResponse(BaseModel): - jobs: int - workers: int - skills: int - categories: int - regions: int + jobs: int = Field(description="导入岗位数量") + workers: int = Field(description="导入工人数量") + skills: int = Field(description="技能词条数量") + categories: int = Field(description="类目数量") + regions: int = Field(description="区域数量") class HealthStatus(BaseModel): - service: str - database: str - rag: str - timestamp: datetime + service: str = Field(description="服务状态,通常为 ok") + database: str = Field(description="数据库状态:ok 或 error") + rag: str = Field(description="RAG 组件状态:ok 或 error") + timestamp: datetime = Field(description="服务端当前时间") class ListResponse(BaseModel): - items: list[dict] - total: int + items: list[dict] = Field(description="列表项") + total: int = Field(description="总数") class MatchResponse(BaseModel): - items: list[MatchResult] + items: list[MatchResult] = Field(description="匹配结果列表") class ExplainResponse(BaseModel): - match: MatchResult + match: MatchResult = Field(description="单条匹配结果详情") + + +class MatchFeedbackRequest(BaseModel): + match_id: str = Field(description="匹配记录 ID") + accepted: bool = Field(description="反馈是否接受该推荐") + + +class MatchWeightResponse(BaseModel): + weights: dict[str, float] = Field(description="当前生效的排序权重") + learning_enabled: bool = Field(description="是否开启在线学习") + + +class AIObservabilityResponse(BaseModel): + metrics: dict[str, float | int] = Field(description="AI 调用观测指标") + + +class IngestAsyncResponse(BaseModel): + task_id: str = Field(description="异步任务 ID") + status: str = Field(description="任务状态") + + +class QueueStatusResponse(BaseModel): + queued: int = Field(description="当前队列中任务数量") + processed: int = Field(description="历史处理成功数量") + failed: int = Field(description="历史处理失败数量") + + +class MatchAsyncWorkersRequest(BaseModel): + job_id: str = Field(description="岗位 ID") + top_n: int = Field(default=10, ge=1, le=50, description="返回条数,范围 1~50") + + +class MatchAsyncJobsRequest(BaseModel): + worker_id: str = Field(description="工人 ID") + top_n: int = Field(default=10, ge=1, le=50, description="返回条数,范围 1~50") + + +class MatchAsyncResponse(BaseModel): + task_id: str = Field(description="异步任务 ID") + status: str = Field(description="任务状态") + items: list[MatchResult] | None = Field(default=None, description="任务完成后返回的匹配结果") + + +class SystemOpsResponse(BaseModel): + traffic: dict[str, float | int] = Field(description="全局流量护栏与错误窗口指标") + cache: dict[str, float | int | str] = Field(description="缓存命中与大小") + ingest_queue: QueueStatusResponse = Field(description="异步入库队列状态") + match_queue: QueueStatusResponse = Field(description="异步匹配队列状态") class PromptOutput(BaseModel): diff --git a/gig-poc/apps/api/app/main.py b/gig-poc/apps/api/app/main.py index f75b4fd..25a2f4c 100644 --- a/gig-poc/apps/api/app/main.py +++ b/gig-poc/apps/api/app/main.py @@ -1,6 +1,8 @@ from contextlib import asynccontextmanager +from time import perf_counter -from fastapi import FastAPI +from fastapi import FastAPI, Request +from fastapi.responses import JSONResponse from fastapi.middleware.cors import CORSMiddleware from app.api.routes import router @@ -9,6 +11,7 @@ from app.core.logging import configure_logging, logger from app.db.base import Base from app.db.session import engine from app.services.rag.lightrag_adapter import LightRAGAdapter +from app.services.runtime_state import get_ingest_queue, get_match_queue, get_traffic_guard settings = get_settings() @@ -18,14 +21,33 @@ configure_logging(settings.log_level) @asynccontextmanager async def lifespan(_: FastAPI): Base.metadata.create_all(bind=engine) + get_ingest_queue().start() + get_match_queue().start() try: LightRAGAdapter(settings).ensure_ready() except Exception: logger.exception("Qdrant initialization skipped during startup") yield + get_ingest_queue().stop() + get_match_queue().stop() -app = FastAPI(title=settings.app_name, lifespan=lifespan) +app = FastAPI( + title=settings.app_name, + description=( + "Gig POC 接口文档。\n\n" + "接口分组:系统、抽取、入库、匹配、查询。\n" + "完整业务说明请参考项目文档 `docs/API.md`。" + ), + openapi_tags=[ + {"name": "系统", "description": "服务与依赖组件状态检查接口"}, + {"name": "抽取", "description": "自然语言文本抽取为结构化卡片"}, + {"name": "入库", "description": "结构化岗位/工人数据写入与初始化"}, + {"name": "匹配", "description": "岗位与工人双向匹配及结果解释"}, + {"name": "查询", "description": "岗位/工人列表与详情查询"}, + ], + lifespan=lifespan, +) app.add_middleware( CORSMiddleware, allow_origins=["*"], @@ -33,4 +55,23 @@ app.add_middleware( allow_methods=["*"], allow_headers=["*"], ) + + +@app.middleware("http") +async def traffic_guard_middleware(request: Request, call_next): + guard = get_traffic_guard() + allowed, reason = guard.allow(request.url.path) + if not allowed: + status_code = 429 if reason == "rate_limited" else 503 + return JSONResponse(status_code=status_code, content={"detail": reason}) + start = perf_counter() + try: + response = await call_next(request) + except Exception: + guard.record(500, (perf_counter() - start) * 1000) + raise + guard.record(response.status_code, (perf_counter() - start) * 1000) + return response + + app.include_router(router) diff --git a/gig-poc/apps/api/app/services/ai_guard.py b/gig-poc/apps/api/app/services/ai_guard.py new file mode 100644 index 0000000..774ef22 --- /dev/null +++ b/gig-poc/apps/api/app/services/ai_guard.py @@ -0,0 +1,87 @@ +from __future__ import annotations + +import time +from dataclasses import dataclass +from threading import Lock + +from app.core.config import Settings + + +@dataclass +class EndpointState: + current_minute: int = 0 + minute_count: int = 0 + consecutive_failures: int = 0 + circuit_open_until: float = 0.0 + + +class AIGuard: + _lock = Lock() + _endpoint_states: dict[str, EndpointState] = {} + _metrics = { + "requests_total": 0, + "success_total": 0, + "fail_total": 0, + "fallback_total": 0, + "rate_limited_total": 0, + "circuit_open_total": 0, + "endpoint_failover_total": 0, + } + + def __init__(self, settings: Settings): + self.settings = settings + + def allow_request(self, endpoint: str) -> tuple[bool, str]: + now = time.time() + now_minute = int(now // 60) + with self._lock: + state = self._endpoint_states.setdefault(endpoint, EndpointState()) + if state.circuit_open_until > now: + self._metrics["circuit_open_total"] += 1 + return False, "circuit_open" + if state.current_minute != now_minute: + state.current_minute = now_minute + state.minute_count = 0 + if state.minute_count >= self.settings.ai_rate_limit_per_minute: + self._metrics["rate_limited_total"] += 1 + return False, "rate_limited" + state.minute_count += 1 + self._metrics["requests_total"] += 1 + return True, "ok" + + def record_success(self, endpoint: str) -> None: + with self._lock: + state = self._endpoint_states.setdefault(endpoint, EndpointState()) + state.consecutive_failures = 0 + state.circuit_open_until = 0.0 + self._metrics["success_total"] += 1 + + def record_failure(self, endpoint: str) -> None: + with self._lock: + state = self._endpoint_states.setdefault(endpoint, EndpointState()) + state.consecutive_failures += 1 + self._metrics["fail_total"] += 1 + if state.consecutive_failures >= self.settings.ai_circuit_breaker_fail_threshold: + state.circuit_open_until = time.time() + self.settings.ai_circuit_breaker_cooldown_seconds + state.consecutive_failures = 0 + + def record_fallback(self) -> None: + with self._lock: + self._metrics["fallback_total"] += 1 + + def record_failover(self) -> None: + with self._lock: + self._metrics["endpoint_failover_total"] += 1 + + def snapshot(self) -> dict: + with self._lock: + requests_total = self._metrics["requests_total"] + fallback_total = self._metrics["fallback_total"] + success_total = self._metrics["success_total"] + fail_total = self._metrics["fail_total"] + return { + **self._metrics, + "fallback_hit_rate": round(fallback_total / requests_total, 4) if requests_total else 0.0, + "success_rate": round(success_total / requests_total, 4) if requests_total else 0.0, + "failure_rate": round(fail_total / requests_total, 4) if requests_total else 0.0, + } diff --git a/gig-poc/apps/api/app/services/cache_service.py b/gig-poc/apps/api/app/services/cache_service.py new file mode 100644 index 0000000..9c1d594 --- /dev/null +++ b/gig-poc/apps/api/app/services/cache_service.py @@ -0,0 +1,146 @@ +from __future__ import annotations + +import json +import time +from functools import lru_cache +from threading import Lock +from typing import Any, Protocol + +from app.core.config import get_settings +from app.core.logging import logger + +try: + from redis import Redis +except Exception: # pragma: no cover + Redis = None # type: ignore[assignment] + + +class Cache(Protocol): + def get(self, key: str): ... + def set(self, key: str, value: Any) -> None: ... + def delete(self, key: str) -> None: ... + def clear(self) -> None: ... + def stats(self) -> dict[str, int | float | str]: ... + + +class TTLCache: + def __init__(self, ttl_seconds: int): + self.ttl_seconds = ttl_seconds + self._store: dict[str, tuple[float, Any]] = {} + self._lock = Lock() + self._hits = 0 + self._misses = 0 + + def get(self, key: str): + now = time.time() + with self._lock: + item = self._store.get(key) + if item is None: + self._misses += 1 + return None + expires_at, value = item + if expires_at < now: + self._store.pop(key, None) + self._misses += 1 + return None + self._hits += 1 + return value + + def set(self, key: str, value: Any) -> None: + expires_at = time.time() + self.ttl_seconds + with self._lock: + self._store[key] = (expires_at, value) + + def delete(self, key: str) -> None: + with self._lock: + self._store.pop(key, None) + + def clear(self) -> None: + with self._lock: + self._store.clear() + + def stats(self) -> dict[str, int | float | str]: + with self._lock: + requests = self._hits + self._misses + hit_rate = (self._hits / requests) if requests else 0.0 + return { + "backend": "memory", + "size": len(self._store), + "hits": self._hits, + "misses": self._misses, + "hit_rate": round(hit_rate, 4), + } + + +class RedisCache: + def __init__(self, url: str, prefix: str, ttl_seconds: int): + if Redis is None: + raise RuntimeError("redis package is not installed") + self.client = Redis.from_url(url, decode_responses=True) + self.prefix = prefix + self.ttl_seconds = ttl_seconds + self._hits = 0 + self._misses = 0 + self._lock = Lock() + + def get(self, key: str): + raw = self.client.get(self._key(key)) + with self._lock: + if raw is None: + self._misses += 1 + return None + self._hits += 1 + return json.loads(raw) + + def set(self, key: str, value: Any) -> None: + self.client.set(self._key(key), json.dumps(value, ensure_ascii=False), ex=self.ttl_seconds) + + def delete(self, key: str) -> None: + self.client.delete(self._key(key)) + + def clear(self) -> None: + pattern = f"{self.prefix}:*" + cursor = 0 + while True: + cursor, keys = self.client.scan(cursor=cursor, match=pattern, count=200) + if keys: + self.client.delete(*keys) + if cursor == 0: + break + + def stats(self) -> dict[str, int | float | str]: + with self._lock: + requests = self._hits + self._misses + hit_rate = (self._hits / requests) if requests else 0.0 + return { + "backend": "redis", + "size": int(self.client.dbsize()), + "hits": self._hits, + "misses": self._misses, + "hit_rate": round(hit_rate, 4), + } + + def _key(self, key: str) -> str: + return f"{self.prefix}:{key}" + + +def _build_cache(namespace: str, ttl_seconds: int) -> Cache: + settings = get_settings() + if settings.cache_backend == "redis": + try: + return RedisCache(settings.redis_url, f"{settings.redis_prefix}:{namespace}", ttl_seconds=ttl_seconds) + except Exception: + logger.exception("failed to init redis cache namespace=%s fallback to memory cache", namespace) + return TTLCache(ttl_seconds=ttl_seconds) + + +@lru_cache +def get_match_cache() -> Cache: + settings = get_settings() + return _build_cache("match", settings.match_cache_ttl_seconds) + + +@lru_cache +def get_query_cache() -> Cache: + settings = get_settings() + return _build_cache("query", settings.query_cache_ttl_seconds) diff --git a/gig-poc/apps/api/app/services/extraction_service.py b/gig-poc/apps/api/app/services/extraction_service.py index bf5c666..70f55c4 100644 --- a/gig-poc/apps/api/app/services/extraction_service.py +++ b/gig-poc/apps/api/app/services/extraction_service.py @@ -26,13 +26,9 @@ class ExtractionService: def extract_job(self, text: str) -> ExtractResponse: logger.info("extract_job request text=%s", text) - llm_result = self._llm_extract(text, self.settings.prompt_dir / "job_extract.md") - if llm_result: - try: - return ExtractResponse(success=True, data=JobCard(**llm_result.content)) - except ValidationError as exc: - logger.exception("LLM job extraction validation failed") - return ExtractResponse(success=False, errors=[str(exc)], missing_fields=self._missing_fields(exc)) + llm_card = self._llm_extract_with_retry(text, self.settings.prompt_dir / "job_extract.md", JobCard) + if llm_card: + return ExtractResponse(success=True, data=llm_card) try: card = self._extract_job_rule(text) @@ -43,13 +39,9 @@ class ExtractionService: def extract_worker(self, text: str) -> ExtractResponse: logger.info("extract_worker request text=%s", text) - llm_result = self._llm_extract(text, self.settings.prompt_dir / "worker_extract.md") - if llm_result: - try: - return ExtractResponse(success=True, data=WorkerCard(**llm_result.content)) - except ValidationError as exc: - logger.exception("LLM worker extraction validation failed") - return ExtractResponse(success=False, errors=[str(exc)], missing_fields=self._missing_fields(exc)) + llm_card = self._llm_extract_with_retry(text, self.settings.prompt_dir / "worker_extract.md", WorkerCard) + if llm_card: + return ExtractResponse(success=True, data=llm_card) try: card = self._extract_worker_rule(text) @@ -65,6 +57,57 @@ class ExtractionService: logger.exception("LLM extraction failed, fallback to rule-based extraction") return None + def _llm_extract_with_retry(self, text: str, prompt_path: Path, schema_cls): + base_prompt = load_prompt(prompt_path) + llm_result = self._llm_extract(text, prompt_path) + if not llm_result: + return None + + try: + return schema_cls(**llm_result.content) + except ValidationError as exc: + logger.warning("LLM extraction validation failed, trying schema-aware retry") + last_error = exc + last_output = llm_result.content + + for _ in range(self.settings.extraction_llm_max_retries): + missing_fields = self._missing_fields(last_error) + repair_prompt = self._build_repair_prompt(base_prompt, schema_cls, missing_fields) + try: + repair_result = self.llm_client.extract_json( + repair_prompt, + self._build_repair_input(text, last_output, missing_fields), + ) + except Exception: + logger.exception("LLM schema-aware retry failed") + return None + if not repair_result: + return None + last_output = repair_result.content + try: + return schema_cls(**repair_result.content) + except ValidationError as exc: + last_error = exc + logger.warning("LLM schema-aware retry still invalid missing_fields=%s", self._missing_fields(exc)) + return None + + def _build_repair_prompt(self, base_prompt: str, schema_cls, missing_fields: list[str]) -> str: + schema_json = json.dumps(schema_cls.model_json_schema(), ensure_ascii=False) + return ( + f"{base_prompt}\n\n" + "你是结构化修复助手。请严格输出可被 JSON 解析的对象,不要输出解释文字。\n" + "目标是根据给定 schema 修复字段缺失和类型错误,优先保证必填字段完整。\n" + f"缺失或错误字段: {', '.join(missing_fields) if missing_fields else 'unknown'}\n" + f"JSON Schema: {schema_json}\n" + ) + + def _build_repair_input(self, original_text: str, last_output: dict, missing_fields: list[str]) -> str: + return ( + f"原始文本:\n{original_text}\n\n" + f"上一次抽取结果:\n{json.dumps(last_output, ensure_ascii=False)}\n\n" + f"请重点修复字段:\n{json.dumps(missing_fields, ensure_ascii=False)}" + ) + def _extract_job_rule(self, text: str) -> JobCard: skill_hits = [item for item in self.skills if item in text] category = next((item for item in self.categories if item in text), "活动执行") diff --git a/gig-poc/apps/api/app/services/ingest_queue.py b/gig-poc/apps/api/app/services/ingest_queue.py new file mode 100644 index 0000000..3dfeb59 --- /dev/null +++ b/gig-poc/apps/api/app/services/ingest_queue.py @@ -0,0 +1,105 @@ +from __future__ import annotations + +from dataclasses import dataclass +from queue import Empty, Full, Queue +from threading import Event, Lock, Thread +from typing import Any + +from app.core.config import Settings +from app.core.logging import logger +from app.db.session import SessionLocal +from app.domain.schemas import JobCard, WorkerCard +from app.services.ingest_service import IngestService +from app.utils.ids import generate_id + + +@dataclass +class QueueTask: + task_id: str + kind: str + payload: dict[str, Any] + + +class IngestQueue: + def __init__(self, settings: Settings): + self.settings = settings + self.queue: Queue[QueueTask] = Queue(maxsize=settings.ingest_queue_max_size) + self._stop_event = Event() + self._thread: Thread | None = None + self._lock = Lock() + self._status: dict[str, str] = {} + self._processed = 0 + self._failed = 0 + + def start(self) -> None: + if not self.settings.ingest_async_enabled: + return + if self._thread and self._thread.is_alive(): + return + self._thread = Thread(target=self._run, daemon=True, name="ingest-queue-worker") + self._thread.start() + logger.info("ingest queue worker started") + + def stop(self) -> None: + self._stop_event.set() + if self._thread and self._thread.is_alive(): + self._thread.join(timeout=3) + + def enqueue_job(self, card: JobCard) -> str: + return self._enqueue("job", card.model_dump(mode="json")) + + def enqueue_worker(self, card: WorkerCard) -> str: + return self._enqueue("worker", card.model_dump(mode="json")) + + def task_status(self, task_id: str) -> str: + with self._lock: + return self._status.get(task_id, "not_found") + + def stats(self) -> dict[str, int]: + with self._lock: + return { + "queued": self.queue.qsize(), + "processed": self._processed, + "failed": self._failed, + } + + def _enqueue(self, kind: str, payload: dict[str, Any]) -> str: + task_id = generate_id("queue") + task = QueueTask(task_id=task_id, kind=kind, payload=payload) + with self._lock: + self._status[task_id] = "queued" + try: + self.queue.put_nowait(task) + except Full as exc: + with self._lock: + self._status[task_id] = "rejected" + raise RuntimeError("ingest queue is full") from exc + return task_id + + def _run(self) -> None: + while not self._stop_event.is_set(): + try: + task = self.queue.get(timeout=0.5) + except Empty: + continue + try: + with self._lock: + self._status[task.task_id] = "processing" + with SessionLocal() as db: + service = IngestService(db) + if task.kind == "job": + service.ingest_job(JobCard(**task.payload)) + elif task.kind == "worker": + service.ingest_worker(WorkerCard(**task.payload)) + else: + raise ValueError(f"unknown task kind {task.kind}") + with self._lock: + self._status[task.task_id] = "done" + self._processed += 1 + except Exception: + logger.exception("ingest queue task failed task_id=%s kind=%s", task.task_id, task.kind) + with self._lock: + self._status[task.task_id] = "failed" + self._failed += 1 + finally: + self.queue.task_done() diff --git a/gig-poc/apps/api/app/services/ingest_service.py b/gig-poc/apps/api/app/services/ingest_service.py index d14c828..62707c6 100644 --- a/gig-poc/apps/api/app/services/ingest_service.py +++ b/gig-poc/apps/api/app/services/ingest_service.py @@ -9,6 +9,7 @@ from app.core.logging import logger from app.domain.schemas import BootstrapResponse, JobCard, WorkerCard from app.repositories.job_repository import JobRepository from app.repositories.worker_repository import WorkerRepository +from app.services.cache_service import get_match_cache, get_query_cache from app.services.rag.lightrag_adapter import LightRAGAdapter @@ -19,17 +20,27 @@ class IngestService: self.job_repository = JobRepository(db) self.worker_repository = WorkerRepository(db) self.rag = LightRAGAdapter(self.settings) + self.match_cache = get_match_cache() + self.query_cache = get_query_cache() def ingest_job(self, card: JobCard) -> JobCard: logger.info("ingest_job job_id=%s", card.job_id) self.job_repository.upsert(card) self.rag.upsert_job(card) + if self.settings.match_cache_enabled: + self.match_cache.clear() + if self.settings.query_cache_enabled: + self.query_cache.clear() return card def ingest_worker(self, card: WorkerCard) -> WorkerCard: logger.info("ingest_worker worker_id=%s", card.worker_id) self.worker_repository.upsert(card) self.rag.upsert_worker(card) + if self.settings.match_cache_enabled: + self.match_cache.clear() + if self.settings.query_cache_enabled: + self.query_cache.clear() return card def bootstrap(self) -> BootstrapResponse: @@ -43,6 +54,10 @@ class IngestService: self.ingest_job(JobCard(**item)) for item in workers: self.ingest_worker(WorkerCard(**item)) + if self.settings.match_cache_enabled: + self.match_cache.clear() + if self.settings.query_cache_enabled: + self.query_cache.clear() return BootstrapResponse( jobs=len(jobs), workers=len(workers), diff --git a/gig-poc/apps/api/app/services/llm_client.py b/gig-poc/apps/api/app/services/llm_client.py index bebd3aa..6caf5fa 100644 --- a/gig-poc/apps/api/app/services/llm_client.py +++ b/gig-poc/apps/api/app/services/llm_client.py @@ -6,14 +6,17 @@ import httpx from app.core.config import Settings from app.domain.schemas import PromptOutput +from app.services.ai_guard import AIGuard class LLMClient: def __init__(self, settings: Settings): self.settings = settings + self.guard = AIGuard(settings) def extract_json(self, system_prompt: str, user_text: str) -> PromptOutput | None: if not self.settings.llm_enabled or not self.settings.llm_base_url or not self.settings.llm_api_key: + self.guard.record_fallback() return None payload = { @@ -25,10 +28,77 @@ class LLMClient: "temperature": 0.1, "response_format": {"type": "json_object"}, } - headers = {"Authorization": f"Bearer {self.settings.llm_api_key}"} - with httpx.Client(timeout=30.0) as client: - response = client.post(f"{self.settings.llm_base_url.rstrip('/')}/chat/completions", json=payload, headers=headers) - response.raise_for_status() - data = response.json() - raw_text = data["choices"][0]["message"]["content"] + endpoints = [self.settings.llm_base_url, *self.settings.llm_fallback_base_urls] + raw_text = self._request_with_failover( + endpoints=endpoints, + path="/chat/completions", + payload=payload, + api_key=self.settings.llm_api_key, + ) + if raw_text is None: + self.guard.record_fallback() + return None return PromptOutput(content=json.loads(raw_text), raw_text=raw_text) + + def embedding(self, text: str) -> list[float] | None: + if not self.settings.embedding_enabled: + return None + base_url = self.settings.embedding_base_url or self.settings.llm_base_url + api_key = self.settings.embedding_api_key or self.settings.llm_api_key + if not base_url or not api_key: + self.guard.record_fallback() + return None + + payload = { + "model": self.settings.embedding_model, + "input": text, + } + endpoints = [base_url, *self.settings.embedding_fallback_base_urls] + data = self._request_with_failover( + endpoints=endpoints, + path="/embeddings", + payload=payload, + api_key=api_key, + return_full_response=True, + ) + if data is None: + self.guard.record_fallback() + return None + embedding = data["data"][0]["embedding"] + if not isinstance(embedding, list): + return None + return [float(item) for item in embedding] + + def metrics(self) -> dict: + return self.guard.snapshot() + + def _request_with_failover( + self, + endpoints: list[str], + path: str, + payload: dict, + api_key: str, + return_full_response: bool = False, + ): + if not endpoints: + return None + for index, endpoint in enumerate([item for item in endpoints if item]): + allowed, _ = self.guard.allow_request(endpoint) + if not allowed: + continue + if index > 0: + self.guard.record_failover() + try: + headers = {"Authorization": f"Bearer {api_key}"} + with httpx.Client(timeout=self.settings.ai_request_timeout_seconds) as client: + response = client.post(f"{endpoint.rstrip('/')}{path}", json=payload, headers=headers) + response.raise_for_status() + data = response.json() + self.guard.record_success(endpoint) + if return_full_response: + return data + return data["choices"][0]["message"]["content"] + except Exception: + self.guard.record_failure(endpoint) + continue + return None diff --git a/gig-poc/apps/api/app/services/match_queue.py b/gig-poc/apps/api/app/services/match_queue.py new file mode 100644 index 0000000..293780d --- /dev/null +++ b/gig-poc/apps/api/app/services/match_queue.py @@ -0,0 +1,121 @@ +from __future__ import annotations + +from dataclasses import dataclass +from queue import Empty, Full, Queue +from threading import Event, Lock, Thread +from typing import Any + +from app.core.config import Settings +from app.core.logging import logger +from app.db.session import SessionLocal +from app.domain.schemas import MatchResult +from app.repositories.job_repository import JobRepository +from app.repositories.worker_repository import WorkerRepository +from app.services.card_mapper import job_to_card, worker_to_card +from app.services.matching_service import MatchingService +from app.utils.ids import generate_id + + +@dataclass +class MatchTask: + task_id: str + kind: str + source_id: str + top_n: int + + +class MatchQueue: + def __init__(self, settings: Settings): + self.settings = settings + self.queue: Queue[MatchTask] = Queue(maxsize=settings.match_queue_max_size) + self._stop_event = Event() + self._thread: Thread | None = None + self._lock = Lock() + self._status: dict[str, str] = {} + self._results: dict[str, list[dict[str, Any]]] = {} + self._processed = 0 + self._failed = 0 + + def start(self) -> None: + if not self.settings.match_async_enabled: + return + if self._thread and self._thread.is_alive(): + return + self._thread = Thread(target=self._run, daemon=True, name="match-queue-worker") + self._thread.start() + logger.info("match queue worker started") + + def stop(self) -> None: + self._stop_event.set() + if self._thread and self._thread.is_alive(): + self._thread.join(timeout=3) + + def enqueue_workers(self, job_id: str, top_n: int) -> str: + return self._enqueue("workers", job_id, top_n) + + def enqueue_jobs(self, worker_id: str, top_n: int) -> str: + return self._enqueue("jobs", worker_id, top_n) + + def task_status(self, task_id: str) -> str: + with self._lock: + return self._status.get(task_id, "not_found") + + def task_result(self, task_id: str) -> list[dict[str, Any]] | None: + with self._lock: + return self._results.get(task_id) + + def stats(self) -> dict[str, int]: + with self._lock: + return { + "queued": self.queue.qsize(), + "processed": self._processed, + "failed": self._failed, + } + + def _enqueue(self, kind: str, source_id: str, top_n: int) -> str: + task_id = generate_id("mq") + task = MatchTask(task_id=task_id, kind=kind, source_id=source_id, top_n=top_n) + with self._lock: + self._status[task_id] = "queued" + try: + self.queue.put_nowait(task) + except Full as exc: + with self._lock: + self._status[task_id] = "rejected" + raise RuntimeError("match queue is full") from exc + return task_id + + def _run(self) -> None: + while not self._stop_event.is_set(): + try: + task = self.queue.get(timeout=0.5) + except Empty: + continue + try: + with self._lock: + self._status[task.task_id] = "processing" + with SessionLocal() as db: + service = MatchingService(db) + if task.kind == "workers": + job = JobRepository(db).get(task.source_id) + if job is None: + raise ValueError("job not found") + items = service.match_workers(job_to_card(job), task.top_n) + elif task.kind == "jobs": + worker = WorkerRepository(db).get(task.source_id) + if worker is None: + raise ValueError("worker not found") + items = service.match_jobs(worker_to_card(worker), task.top_n) + else: + raise ValueError(f"unknown task kind {task.kind}") + with self._lock: + self._status[task.task_id] = "done" + self._results[task.task_id] = [item.model_dump(mode="json") for item in items] + self._processed += 1 + except Exception: + logger.exception("match queue task failed task_id=%s kind=%s", task.task_id, task.kind) + with self._lock: + self._status[task.task_id] = "failed" + self._failed += 1 + finally: + self.queue.task_done() diff --git a/gig-poc/apps/api/app/services/matching_service.py b/gig-poc/apps/api/app/services/matching_service.py index 00a1dc3..910c94a 100644 --- a/gig-poc/apps/api/app/services/matching_service.py +++ b/gig-poc/apps/api/app/services/matching_service.py @@ -10,8 +10,10 @@ from app.domain.schemas import JobCard, MatchBreakdown, MatchResult, QueryFilter from app.repositories.job_repository import JobRepository from app.repositories.match_repository import MatchRepository from app.repositories.worker_repository import WorkerRepository +from app.services.cache_service import get_match_cache from app.services.card_mapper import job_to_card, worker_to_card from app.services.rag.lightrag_adapter import LightRAGAdapter +from app.services.weight_service import MatchWeightService from app.utils.ids import generate_id @@ -23,9 +25,16 @@ class MatchingService: self.workers = WorkerRepository(db) self.matches = MatchRepository(db) self.rag = LightRAGAdapter(self.settings) + self.weight_service = MatchWeightService(self.settings) + self.cache = get_match_cache() def match_workers(self, source: JobCard, top_n: int) -> list[MatchResult]: logger.info("match_workers source_id=%s top_n=%s", source.job_id, top_n) + cache_key = f"match_workers:{source.job_id}:{top_n}" + if self.settings.match_cache_enabled: + cached = self.cache.get(cache_key) + if cached is not None: + return self._parse_cached_matches(cached) query_text = " ".join([source.title, source.category, source.city, source.region, *source.skills, *source.tags]) candidate_ids = self.rag.search( query_text=query_text, @@ -36,10 +45,17 @@ class MatchingService: results = [self._build_job_to_worker_match(source, worker_to_card(worker)) for worker in candidates] results = sorted(results, key=lambda item: item.match_score, reverse=True)[:top_n] self.matches.bulk_replace(results, SourceType.job_to_worker.value, source.job_id) + if self.settings.match_cache_enabled: + self.cache.set(cache_key, [item.model_dump(mode="json") for item in results]) return results def match_jobs(self, source: WorkerCard, top_n: int) -> list[MatchResult]: logger.info("match_jobs source_id=%s top_n=%s", source.worker_id, top_n) + cache_key = f"match_jobs:{source.worker_id}:{top_n}" + if self.settings.match_cache_enabled: + cached = self.cache.get(cache_key) + if cached is not None: + return self._parse_cached_matches(cached) query_text = " ".join([source.name, *source.cities, *source.regions, *[item.name for item in source.skills], *source.experience_tags]) city = source.cities[0] if source.cities else None candidate_ids = self.rag.search( @@ -51,6 +67,8 @@ class MatchingService: results = [self._build_worker_to_job_match(source, job_to_card(job)) for job in candidates] results = sorted(results, key=lambda item: item.match_score, reverse=True)[:top_n] self.matches.bulk_replace(results, SourceType.worker_to_job.value, source.worker_id) + if self.settings.match_cache_enabled: + self.cache.set(cache_key, [item.model_dump(mode="json") for item in results]) return results def explain(self, match_id: str) -> MatchResult | None: @@ -61,6 +79,20 @@ class MatchingService: return match_record_to_schema(record) + def feedback(self, match_id: str, accepted: bool) -> dict[str, float] | None: + record = self.matches.get(match_id) + if record is None: + return None + from app.services.card_mapper import match_record_to_schema + + match = match_record_to_schema(record) + if self.settings.ranking_learning_enabled: + return self.weight_service.update_from_feedback(match.breakdown, accepted) + return self.weight_service.get_weights() + + def current_weights(self) -> dict[str, float]: + return self.weight_service.get_weights() + def _build_job_to_worker_match(self, job: JobCard, worker: WorkerCard) -> MatchResult: job_skills = set(job.skills) expanded_skills = self.rag.expand_skills(job.skills) @@ -143,13 +175,14 @@ class MatchingService: experience_score: float, reliability_score: float, ) -> float: - return ( - self.settings.score_skill_weight * skill_score - + self.settings.score_region_weight * region_score - + self.settings.score_time_weight * time_score - + self.settings.score_experience_weight * experience_score - + self.settings.score_reliability_weight * reliability_score + breakdown = MatchBreakdown( + skill_score=skill_score, + region_score=region_score, + time_score=time_score, + experience_score=experience_score, + reliability_score=reliability_score, ) + return self.weight_service.score(breakdown) def _build_reasons( self, @@ -176,3 +209,10 @@ class MatchingService: while len(reasons) < 3: reasons.append("岗位需求与候选画像存在基础匹配") return reasons[:5] + + def _parse_cached_matches(self, cached) -> list[MatchResult]: + if isinstance(cached, list) and cached and isinstance(cached[0], MatchResult): + return cached + if isinstance(cached, list): + return [MatchResult(**item) for item in cached] + return [] diff --git a/gig-poc/apps/api/app/services/rag/lightrag_adapter.py b/gig-poc/apps/api/app/services/rag/lightrag_adapter.py index 0c115a2..de9ba11 100644 --- a/gig-poc/apps/api/app/services/rag/lightrag_adapter.py +++ b/gig-poc/apps/api/app/services/rag/lightrag_adapter.py @@ -10,6 +10,7 @@ from qdrant_client import QdrantClient, models from app.core.config import Settings from app.core.logging import logger from app.domain.schemas import JobCard, QueryFilters, WorkerCard +from app.services.llm_client import LLMClient class LightRAGAdapter: @@ -17,13 +18,28 @@ class LightRAGAdapter: self.settings = settings self.client = QdrantClient(url=settings.qdrant_url) self.skill_graph = self._load_skill_graph() + self.llm_client = LLMClient(settings) + self.collection_vector_size: int | None = None def ensure_ready(self) -> None: collections = {item.name for item in self.client.get_collections().collections} + expected_size = self._configured_vector_size() if self.settings.qdrant_collection not in collections: self.client.create_collection( collection_name=self.settings.qdrant_collection, - vectors_config=models.VectorParams(size=self.settings.vector_size, distance=models.Distance.COSINE), + vectors_config=models.VectorParams(size=expected_size, distance=models.Distance.COSINE), + ) + self.collection_vector_size = expected_size + return + info = self.client.get_collection(self.settings.qdrant_collection) + configured_size = info.config.params.vectors.size + self.collection_vector_size = int(configured_size) + if self.collection_vector_size != expected_size: + logger.warning( + "qdrant vector size mismatch, collection=%s expected=%s actual=%s; using actual size", + self.settings.qdrant_collection, + expected_size, + self.collection_vector_size, ) def health(self) -> str: @@ -125,14 +141,40 @@ class LightRAGAdapter: ) def _vectorize(self, text: str) -> list[float]: - vector = [0.0 for _ in range(self.settings.vector_size)] + if self.settings.embedding_enabled and self.settings.embedding_backend == "openai_compatible": + try: + embedding = self.llm_client.embedding(text) + if embedding: + return self._normalize_embedding(embedding) + except Exception: + logger.exception("embedding request failed, fallback to hash vector") + target_size = self._active_vector_size() + vector = [0.0 for _ in range(target_size)] tokens = self._tokenize(text) for token in tokens: - index = hash(token) % self.settings.vector_size + index = hash(token) % target_size vector[index] += 1.0 norm = math.sqrt(sum(item * item for item in vector)) or 1.0 return [item / norm for item in vector] + def _normalize_embedding(self, embedding: list[float]) -> list[float]: + target_size = self._active_vector_size() + vector = embedding[:target_size] + if len(vector) < target_size: + vector.extend([0.0] * (target_size - len(vector))) + norm = math.sqrt(sum(item * item for item in vector)) or 1.0 + return [item / norm for item in vector] + + def _active_vector_size(self) -> int: + if self.collection_vector_size: + return self.collection_vector_size + return self._configured_vector_size() + + def _configured_vector_size(self) -> int: + if self.settings.embedding_enabled and self.settings.embedding_backend == "openai_compatible": + return self.settings.embedding_vector_size + return self.settings.vector_size + def _tokenize(self, text: str) -> list[str]: cleaned = [part.strip().lower() for part in text.replace(",", " ").replace("、", " ").replace("。", " ").split()] tokens = [part for part in cleaned if part] diff --git a/gig-poc/apps/api/app/services/runtime_state.py b/gig-poc/apps/api/app/services/runtime_state.py new file mode 100644 index 0000000..d0c7b0c --- /dev/null +++ b/gig-poc/apps/api/app/services/runtime_state.py @@ -0,0 +1,23 @@ +from __future__ import annotations + +from functools import lru_cache + +from app.core.config import get_settings +from app.services.ingest_queue import IngestQueue +from app.services.match_queue import MatchQueue +from app.services.traffic_guard import TrafficGuard + + +@lru_cache +def get_ingest_queue() -> IngestQueue: + return IngestQueue(get_settings()) + + +@lru_cache +def get_match_queue() -> MatchQueue: + return MatchQueue(get_settings()) + + +@lru_cache +def get_traffic_guard() -> TrafficGuard: + return TrafficGuard(get_settings()) diff --git a/gig-poc/apps/api/app/services/traffic_guard.py b/gig-poc/apps/api/app/services/traffic_guard.py new file mode 100644 index 0000000..b8be714 --- /dev/null +++ b/gig-poc/apps/api/app/services/traffic_guard.py @@ -0,0 +1,108 @@ +from __future__ import annotations + +import time +from collections import deque +from threading import Lock + +import httpx + +from app.core.config import Settings +from app.core.logging import logger + + +class TrafficGuard: + def __init__(self, settings: Settings): + self.settings = settings + self._lock = Lock() + self._minute = 0 + self._minute_count = 0 + self._open_until = 0.0 + self._events: deque[tuple[float, int]] = deque() + self._requests = 0 + self._rate_limited = 0 + self._circuit_blocked = 0 + self._avg_latency_ms = 0.0 + self._alert_last_sent = 0.0 + + def allow(self, path: str) -> tuple[bool, str]: + now = time.time() + with self._lock: + minute = int(now // 60) + if self._minute != minute: + self._minute = minute + self._minute_count = 0 + if self._minute_count >= self.settings.app_rate_limit_per_minute: + self._rate_limited += 1 + return False, "rate_limited" + if self._open_until > now and not self._is_exempt(path): + self._circuit_blocked += 1 + return False, "circuit_open" + self._minute_count += 1 + self._requests += 1 + return True, "ok" + + def record(self, status_code: int, latency_ms: float) -> None: + now = time.time() + with self._lock: + self._events.append((now, status_code)) + self._avg_latency_ms = self._ema(self._avg_latency_ms, latency_ms) + self._trim(now) + total = len(self._events) + if total < self.settings.app_circuit_breaker_min_requests: + return + errors = sum(1 for _, code in self._events if code >= 500) + error_rate = errors / total + if error_rate >= self.settings.app_circuit_breaker_error_rate: + self._open_until = now + self.settings.app_circuit_breaker_cooldown_seconds + self._send_alert( + "app circuit opened", + { + "error_rate": round(error_rate, 4), + "window_requests": total, + "cooldown_seconds": self.settings.app_circuit_breaker_cooldown_seconds, + }, + ) + + def snapshot(self) -> dict[str, float | int]: + now = time.time() + with self._lock: + self._trim(now) + total = len(self._events) + errors = sum(1 for _, code in self._events if code >= 500) + return { + "requests_total": self._requests, + "rate_limited_total": self._rate_limited, + "circuit_blocked_total": self._circuit_blocked, + "window_requests": total, + "window_errors": errors, + "window_error_rate": round((errors / total), 4) if total else 0.0, + "avg_latency_ms": round(self._avg_latency_ms, 2), + "circuit_open": 1 if self._open_until > now else 0, + } + + def _trim(self, now: float) -> None: + lower = now - self.settings.app_circuit_breaker_window_seconds + while self._events and self._events[0][0] < lower: + self._events.popleft() + + def _ema(self, prev: float, value: float, alpha: float = 0.2) -> float: + if prev <= 0: + return value + return alpha * value + (1 - alpha) * prev + + def _is_exempt(self, path: str) -> bool: + return path in {"/health", "/docs", "/openapi.json", "/poc/ops/system/metrics", "/poc/ops/ai/metrics"} + + def _send_alert(self, message: str, extra: dict) -> None: + now = time.time() + if now - self._alert_last_sent < 30: + return + self._alert_last_sent = now + logger.warning("%s extra=%s", message, extra) + if not self.settings.alert_webhook_url: + return + try: + with httpx.Client(timeout=2.0) as client: + client.post(self.settings.alert_webhook_url, json={"message": message, "extra": extra}) + except Exception: + logger.exception("alert webhook send failed") diff --git a/gig-poc/apps/api/app/services/weight_service.py b/gig-poc/apps/api/app/services/weight_service.py new file mode 100644 index 0000000..e70db79 --- /dev/null +++ b/gig-poc/apps/api/app/services/weight_service.py @@ -0,0 +1,77 @@ +from __future__ import annotations + +import json +from pathlib import Path + +from app.core.config import Settings +from app.core.logging import logger +from app.domain.schemas import MatchBreakdown + + +class MatchWeightService: + def __init__(self, settings: Settings): + self.settings = settings + self.path: Path = settings.match_weights_path + + def default_weights(self) -> dict[str, float]: + return { + "skill": self.settings.score_skill_weight, + "region": self.settings.score_region_weight, + "time": self.settings.score_time_weight, + "experience": self.settings.score_experience_weight, + "reliability": self.settings.score_reliability_weight, + } + + def get_weights(self) -> dict[str, float]: + weights = self.default_weights() + if not self.path.exists(): + return self._normalize(weights) + try: + data = json.loads(self.path.read_text(encoding="utf-8")) + for key in weights: + value = data.get(key) + if isinstance(value, (int, float)): + weights[key] = float(value) + except Exception: + logger.exception("failed to read learned ranking weights, fallback to defaults") + return self._normalize(weights) + + def score(self, breakdown: MatchBreakdown) -> float: + weights = self.get_weights() + return ( + weights["skill"] * breakdown.skill_score + + weights["region"] * breakdown.region_score + + weights["time"] * breakdown.time_score + + weights["experience"] * breakdown.experience_score + + weights["reliability"] * breakdown.reliability_score + ) + + def update_from_feedback(self, breakdown: MatchBreakdown, accepted: bool) -> dict[str, float]: + weights = self.get_weights() + features = { + "skill": breakdown.skill_score, + "region": breakdown.region_score, + "time": breakdown.time_score, + "experience": breakdown.experience_score, + "reliability": breakdown.reliability_score, + } + target = 1.0 if accepted else 0.0 + prediction = sum(weights[name] * value for name, value in features.items()) + error = target - prediction + lr = self.settings.ranking_learning_rate + updated = {name: max(0.0, weights[name] + lr * error * value) for name, value in features.items()} + normalized = self._normalize(updated) + self._save_weights(normalized) + return normalized + + def _save_weights(self, weights: dict[str, float]) -> None: + self.settings.data_dir.mkdir(parents=True, exist_ok=True) + self.path.write_text(json.dumps(weights, ensure_ascii=False, indent=2), encoding="utf-8") + + def _normalize(self, weights: dict[str, float]) -> dict[str, float]: + total = sum(max(value, 0.0) for value in weights.values()) + if total <= 0: + fallback = self.default_weights() + total = sum(fallback.values()) + return {key: value / total for key, value in fallback.items()} + return {key: max(value, 0.0) / total for key, value in weights.items()} diff --git a/gig-poc/apps/api/pyproject.toml b/gig-poc/apps/api/pyproject.toml index 64dc4be..b37d1cb 100644 --- a/gig-poc/apps/api/pyproject.toml +++ b/gig-poc/apps/api/pyproject.toml @@ -11,6 +11,7 @@ dependencies = [ "pydantic==2.11.3", "pydantic-settings==2.8.1", "httpx==0.28.1", + "redis==5.2.1", "qdrant-client==1.14.2", "python-dateutil==2.9.0.post0", "orjson==3.10.16", diff --git a/gig-poc/apps/web/src/vite-env.d.ts b/gig-poc/apps/web/src/vite-env.d.ts new file mode 100644 index 0000000..11f02fe --- /dev/null +++ b/gig-poc/apps/web/src/vite-env.d.ts @@ -0,0 +1 @@ +/// diff --git a/gig-poc/docs/API.md b/gig-poc/docs/API.md index 226f845..d335dac 100644 --- a/gig-poc/docs/API.md +++ b/gig-poc/docs/API.md @@ -1,8 +1,98 @@ -# API 文档 +# API 文档(中文完整版) + +## 机器可读文档(推荐作为联调基准) +- Swagger UI:`http://127.0.0.1:8000/docs` +- OpenAPI JSON:`http://127.0.0.1:8000/openapi.json` +- 一键导出: +```bash +cd gig-poc +sh infrastructure/scripts/export-openapi.sh +``` +- 导出到自定义路径: +```bash +sh infrastructure/scripts/export-openapi.sh ./docs/openapi.prod.json +``` + +## 通用约定 +- 基础路径:默认通过网关暴露为 `/api`,下文写的是服务内部路径(如 `/poc/...`)。 +- 数据格式:`Content-Type: application/json`。 +- 时间字段:ISO-8601 格式(示例:`2026-03-30T12:00:00+08:00`)。 +- 分数字段:大部分评分为 `0~1` 浮点数。 +- 常见错误码: + - `400/422`:请求参数不合法(字段缺失、类型不匹配、取值超范围)。 + - `404`:查询对象不存在(如岗位 ID、工人 ID、匹配记录 ID 不存在)。 + - `500`:服务内部异常(数据库、向量检索、模型调用失败等)。 + +## 数据结构说明 + +### Salary(薪资结构) +| 字段 | 类型 | 说明 | +| --- | --- | --- | +| `type` | `daily/hourly/monthly/task` | 薪资类型:日薪/时薪/月薪/按单 | +| `amount` | `number` | 薪资金额 | +| `currency` | `string` | 货币,默认 `CNY` | + +### SkillScore(技能分) +| 字段 | 类型 | 说明 | +| --- | --- | --- | +| `name` | `string` | 技能名称 | +| `score` | `number` | 技能熟练度,范围 `0~1` | + +### JobCard(岗位卡片) +| 字段 | 类型 | 说明 | +| --- | --- | --- | +| `job_id` | `string` | 岗位唯一 ID | +| `title` | `string` | 岗位标题 | +| `category` | `string` | 岗位类别 | +| `description` | `string` | 岗位描述 | +| `skills` | `string[]` | 需要的技能列表 | +| `city` | `string` | 城市 | +| `region` | `string` | 区域 | +| `location_detail` | `string` | 详细地址或地标 | +| `start_time` | `datetime` | 开始时间 | +| `duration_hours` | `number` | 工时(小时,>0) | +| `headcount` | `number` | 需求人数(>0) | +| `salary` | `Salary` | 薪资信息 | +| `work_mode` | `string` | 工作模式(如兼职/全职/活动) | +| `tags` | `string[]` | 业务标签 | +| `confidence` | `number` | 抽取或录入置信度,范围 `0~1` | + +### WorkerCard(工人卡片) +| 字段 | 类型 | 说明 | +| --- | --- | --- | +| `worker_id` | `string` | 工人唯一 ID | +| `name` | `string` | 姓名/昵称 | +| `description` | `string` | 自我描述 | +| `skills` | `SkillScore[]` | 技能及熟练度 | +| `cities` | `string[]` | 可接单城市 | +| `regions` | `string[]` | 可接单区域 | +| `availability` | `string[]` | 可上岗时段(自由文本) | +| `experience_tags` | `string[]` | 经验标签 | +| `reliability_score` | `number` | 履约可靠性分,范围 `0~1` | +| `profile_completion` | `number` | 档案完善度,范围 `0~1` | +| `confidence` | `number` | 抽取或录入置信度,范围 `0~1` | + +### MatchResult(匹配结果) +| 字段 | 类型 | 说明 | +| --- | --- | --- | +| `match_id` | `string` | 匹配记录 ID | +| `source_type` | `job_to_worker/worker_to_job` | 匹配方向 | +| `source_id` | `string` | 发起匹配的实体 ID | +| `target_id` | `string` | 被匹配到的实体 ID | +| `match_score` | `number` | 综合匹配分(`0~1`) | +| `breakdown.skill_score` | `number` | 技能匹配分 | +| `breakdown.region_score` | `number` | 地域匹配分 | +| `breakdown.time_score` | `number` | 时间匹配分 | +| `breakdown.experience_score` | `number` | 经验匹配分 | +| `breakdown.reliability_score` | `number` | 可靠性匹配分 | +| `reasons` | `string[]` | 至少 3 条可解释理由 | ## 系统接口 + ### `GET /health` -返回: +用途:检查服务、数据库、RAG 组件是否可用。 + +返回示例: ```json { "service": "ok", @@ -12,38 +102,242 @@ } ``` -## 抽取接口 -### `POST /poc/extract/job` -请求: +字段说明: +- `service`:API 进程状态。 +- `database`:数据库连通状态(`ok/error`)。 +- `rag`:检索增强组件状态(`ok/error`)。 +- `timestamp`:服务端当前时间。 + +### `GET /poc/ops/ai/metrics` +用途:查看 AI 调用观测指标(限流、熔断、降级、fallback 命中率)。 + +返回示例: ```json -{ "text": "明天下午南山会展中心需要2个签到协助,5小时,150/人,女生优先" } +{ + "metrics": { + "requests_total": 12, + "success_total": 10, + "fail_total": 2, + "fallback_total": 1, + "rate_limited_total": 0, + "circuit_open_total": 0, + "endpoint_failover_total": 1, + "fallback_hit_rate": 0.0833, + "success_rate": 0.8333, + "failure_rate": 0.1667 + } +} ``` +### `GET /poc/ops/system/metrics` +用途:查看全局系统护栏指标(流量限流、熔断状态、缓存命中率、异步队列状态)。 + +返回示例: +```json +{ + "traffic": { + "requests_total": 1000, + "rate_limited_total": 0, + "circuit_blocked_total": 0, + "window_requests": 120, + "window_errors": 3, + "window_error_rate": 0.025, + "avg_latency_ms": 35.4, + "circuit_open": 0 + }, + "cache": { + "backend": "redis", + "match_hit_rate": 0.62, + "query_hit_rate": 0.73, + "match_size": 320, + "query_size": 800 + }, + "ingest_queue": { + "queued": 0, + "processed": 1200, + "failed": 2 + }, + "match_queue": { + "queued": 2, + "processed": 3400, + "failed": 7 + } +} +``` + +## 抽取接口 + +### `POST /poc/extract/job` +用途:将岗位自然语言文本抽取为结构化 `JobCard`。 + +请求体: +```json +{ + "text": "明天下午南山会展中心需要2个签到协助,5小时,150/人,女生优先" +} +``` + +请求字段说明: +- `text`:待抽取文本,最小长度 5。 + +返回结构:`ExtractResponse` +```json +{ + "success": true, + "data": { + "job_id": "job_demo_001", + "title": "活动签到协助", + "category": "会展活动", + "description": "南山会展中心活动签到协助", + "skills": ["签到", "引导"], + "city": "深圳", + "region": "南山", + "location_detail": "南山会展中心", + "start_time": "2026-04-01T14:00:00+08:00", + "duration_hours": 5, + "headcount": 2, + "salary": { "type": "daily", "amount": 150, "currency": "CNY" }, + "work_mode": "兼职", + "tags": ["女生优先"], + "confidence": 0.88 + }, + "errors": [], + "missing_fields": [] +} +``` + +字段说明: +- `success`:是否抽取成功。 +- `data`:抽取出的结构化岗位对象;失败时可能为 `null`。 +- `errors`:错误信息列表。 +- `missing_fields`:缺失字段列表,便于前端二次补录。 + ### `POST /poc/extract/worker` -请求: +用途:将工人自然语言文本抽取为结构化 `WorkerCard`。 + +请求体: ```json -{ "text": "我做过商场促销和活动签到,也能做登记和引导,周末都能接,福田南山都方便。" } +{ + "text": "我做过商场促销和活动签到,也能做登记和引导,周末都能接,福田南山都方便。" +} ``` +返回结构:同 `ExtractResponse`,其中 `data` 为 `WorkerCard`。 + +说明: +- 适合把聊天文本/简历摘要快速转成可入库结构。 +- 若模型无法识别关键字段,会在 `missing_fields` 中给出提示。 + ## 入库接口 + ### `POST /poc/ingest/job` +用途:写入或更新岗位卡片(同时更新检索索引)。 + 请求体: ```json -{ "job": { "...": "JobCard" } } +{ + "job": { + "job_id": "job_001", + "title": "活动签到", + "category": "会展活动", + "description": "负责活动签到与引导", + "skills": ["签到", "沟通"], + "city": "深圳", + "region": "福田", + "location_detail": "会展中心", + "start_time": "2026-04-02T09:00:00+08:00", + "duration_hours": 8, + "headcount": 3, + "salary": { "type": "daily", "amount": 180, "currency": "CNY" }, + "work_mode": "兼职", + "tags": ["展会"], + "confidence": 0.95 + } +} ``` +返回:返回入库后的 `JobCard`(通常与请求体一致)。 + ### `POST /poc/ingest/worker` +用途:写入或更新工人卡片(同时更新检索索引)。 + 请求体: ```json -{ "worker": { "...": "WorkerCard" } } +{ + "worker": { + "worker_id": "worker_001", + "name": "张三", + "description": "有活动执行经验", + "skills": [ + { "name": "签到", "score": 0.9 }, + { "name": "引导", "score": 0.8 } + ], + "cities": ["深圳"], + "regions": ["福田", "南山"], + "availability": ["周末全天"], + "experience_tags": ["会展", "地推"], + "reliability_score": 0.92, + "profile_completion": 0.86, + "confidence": 0.93 + } +} ``` +返回:返回入库后的 `WorkerCard`(通常与请求体一致)。 + +### `POST /poc/ingest/job/async` +用途:异步岗位入库,快速返回任务 ID,不阻塞主请求。 + +返回示例: +```json +{ + "task_id": "queue_xxx", + "status": "queued" +} +``` + +### `POST /poc/ingest/worker/async` +用途:异步工人入库,快速返回任务 ID,不阻塞主请求。 + +返回结构同 `POST /poc/ingest/job/async`。 + +### `GET /poc/ingest/queue/{task_id}` +用途:查询异步入库任务状态。 + +可能状态: +- `queued` +- `processing` +- `done` +- `failed` +- `not_found` + ### `POST /poc/ingest/bootstrap` -说明:导入样本数据、词表、Qdrant 检索索引数据。 +用途:导入内置样本数据(岗位、工人、技能、类目、区域)并构建检索数据。 + +请求体:无。 + +返回示例: +```json +{ + "jobs": 100, + "workers": 300, + "skills": 120, + "categories": 20, + "regions": 50 +} +``` + +说明: +- 适合开发环境初始化。 +- 重复执行会触发 upsert 逻辑(覆盖同 ID 数据)。 ## 匹配接口 + ### `POST /poc/match/workers` -支持 `job_id` 或内联 `job`: +用途:以岗位为源,匹配合适工人。 + +请求体(二选一): +1. 传 `job_id`(按已入库岗位匹配): ```json { "job_id": "job_001", @@ -51,8 +345,51 @@ } ``` +2. 传内联 `job`(不依赖入库): +```json +{ + "job": { "...": "JobCard" }, + "top_n": 10 +} +``` + +字段说明: +- `job_id`:岗位 ID。 +- `job`:完整岗位对象。 +- `top_n`:返回条数,范围 `1~50`,默认 `10`。 +- `job_id` 与 `job` 至少提供一个。 + +返回: +```json +{ + "items": [ + { + "match_id": "match_001", + "source_type": "job_to_worker", + "source_id": "job_001", + "target_id": "worker_007", + "match_score": 0.87, + "breakdown": { + "skill_score": 0.9, + "region_score": 1.0, + "time_score": 0.8, + "experience_score": 0.85, + "reliability_score": 0.8 + }, + "reasons": ["技能高度匹配", "同区域可到岗", "有同类活动经验"] + } + ] +} +``` + +错误说明: +- 当 `job_id` 不存在且未传 `job` 时,返回 `404`,提示“岗位不存在”。 + ### `POST /poc/match/jobs` -支持 `worker_id` 或内联 `worker`: +用途:以工人为源,匹配合适岗位。 + +请求体(二选一): +1. 传 `worker_id`: ```json { "worker_id": "worker_001", @@ -60,16 +397,212 @@ } ``` +2. 传内联 `worker`: +```json +{ + "worker": { "...": "WorkerCard" }, + "top_n": 10 +} +``` + +字段约束: +- `worker_id` 与 `worker` 至少提供一个。 +- `top_n` 范围 `1~50`,默认 `10`。 + +返回:`MatchResponse`,结构同上,`source_type` 为 `worker_to_job`。 + +错误说明: +- 当 `worker_id` 不存在且未传 `worker` 时,返回 `404`,提示“工人不存在”。 + +### `POST /poc/match/workers/async` +用途:岗位异步匹配工人(削峰入口),快速返回任务 ID。 + +请求体: +```json +{ + "job_id": "job_001", + "top_n": 10 +} +``` + +返回示例: +```json +{ + "task_id": "mq_xxx", + "status": "queued" +} +``` + +### `POST /poc/match/jobs/async` +用途:工人异步匹配岗位(削峰入口),快速返回任务 ID。 + +请求体: +```json +{ + "worker_id": "worker_001", + "top_n": 10 +} +``` + +返回结构同 `POST /poc/match/workers/async`。 + +### `GET /poc/match/queue/{task_id}` +用途:查询异步匹配任务状态,完成后返回 `items` 结果集。 + +可能状态: +- `queued` +- `processing` +- `done` +- `failed` +- `not_found` + ### `GET /poc/match/explain/{match_id}` -返回具体匹配明细与理由。 +用途:查询单条匹配记录详情与解释理由。 + +路径参数: +- `match_id`:匹配记录 ID。 + +返回: +```json +{ + "match": { + "match_id": "match_001", + "source_type": "job_to_worker", + "source_id": "job_001", + "target_id": "worker_007", + "match_score": 0.87, + "breakdown": { + "skill_score": 0.9, + "region_score": 1.0, + "time_score": 0.8, + "experience_score": 0.85, + "reliability_score": 0.8 + }, + "reasons": ["技能高度匹配", "同区域可到岗", "有同类活动经验"] + } +} +``` + +错误说明: +- 找不到匹配记录时返回 `404`,提示“匹配记录不存在”。 + +### `POST /poc/match/feedback` +用途:提交匹配反馈(接受/拒绝),用于在线更新排序权重。 + +请求体: +```json +{ + "match_id": "match_001", + "accepted": true +} +``` + +返回: +```json +{ + "weights": { + "skill": 0.36, + "region": 0.21, + "time": 0.14, + "experience": 0.14, + "reliability": 0.15 + }, + "learning_enabled": true +} +``` + +错误说明: +- `match_id` 不存在时返回 `404`,提示“匹配记录不存在”。 + +### `GET /poc/match/weights` +用途:查看当前生效排序权重(默认权重或学习后的权重)。 + +返回结构同 `POST /poc/match/feedback`。 ## 查询接口 -### `GET /poc/jobs` -### `GET /poc/workers` -### `GET /poc/jobs/{job_id}` -### `GET /poc/workers/{worker_id}` -## 交接说明 -- 抽取接口返回 `success/data/errors/missing_fields`,方便后续切换更强 LLM 时做错误回退。 -- 匹配接口输出 `breakdown` 五维打分,可直接给后续运营、策略或模型团队继续调权。 -- `packages/shared-types/src/index.ts` 保留了前端可复用类型定义。 +### `GET /poc/jobs` +用途:分页前的基础列表查询(当前返回全量)。 + +返回: +```json +{ + "items": [{ "...": "JobCard(JSON)" }], + "total": 100 +} +``` + +### `GET /poc/workers` +用途:查询工人列表(当前返回全量)。 + +返回: +```json +{ + "items": [{ "...": "WorkerCard(JSON)" }], + "total": 300 +} +``` + +### `GET /poc/jobs/{job_id}` +用途:根据 ID 查询单个岗位。 + +路径参数: +- `job_id`:岗位 ID。 + +返回:`JobCard`。 + +错误说明: +- ID 不存在返回 `404`,提示“岗位不存在”。 + +### `GET /poc/workers/{worker_id}` +用途:根据 ID 查询单个工人。 + +路径参数: +- `worker_id`:工人 ID。 + +返回:`WorkerCard`。 + +错误说明: +- ID 不存在返回 `404`,提示“工人不存在”。 + +## 交接建议 +- 以 `docs/openapi.json` 作为机器契约,`docs/API.md` 作为业务语义解释。 +- 前端与测试联调时,优先校验: + - 抽取失败时 `errors/missing_fields` 是否按预期返回。 + - 匹配结果 `breakdown` 五维分是否完整。 + - `top_n` 边界值(`1`、`50`、`>50`)的校验行为。 + +## 升级配置说明 +- 抽取增强(schema-aware 重试): + - `EXTRACTION_LLM_MAX_RETRIES`:LLM 校验失败后的修复重试次数,默认 `2`。 +- 检索 embedding 可配置: + - `EMBEDDING_ENABLED`:是否启用正式 embedding,默认 `false`。 + - `EMBEDDING_BACKEND`:`hash` 或 `openai_compatible`。 + - `EMBEDDING_BASE_URL` / `EMBEDDING_API_KEY` / `EMBEDDING_MODEL`:embedding 服务配置。 + - `EMBEDDING_VECTOR_SIZE`:embedding 维度(需与 Qdrant 集合维度一致)。 +- 排序在线学习: + - `RANKING_LEARNING_ENABLED`:是否启用反馈学习,默认 `true`。 + - `RANKING_LEARNING_RATE`:在线更新学习率,默认 `0.08`。 + - 权重持久化文件:`data/match_weights.json`。 +- 全局稳定性护栏: + - `APP_RATE_LIMIT_PER_MINUTE`:全局每分钟请求上限。 + - `APP_CIRCUIT_BREAKER_ERROR_RATE`:窗口 5xx 错误率触发阈值。 + - `APP_CIRCUIT_BREAKER_MIN_REQUESTS`:熔断判定最小请求数。 + - `APP_CIRCUIT_BREAKER_WINDOW_SECONDS`:错误率统计窗口。 + - `APP_CIRCUIT_BREAKER_COOLDOWN_SECONDS`:熔断冷却时长。 + - `ALERT_WEBHOOK_URL`:告警 webhook 地址(可选)。 +- 异步队列与缓存: + - `INGEST_ASYNC_ENABLED`:是否启用异步入库队列。 + - `INGEST_QUEUE_MAX_SIZE`:队列最大长度。 + - `MATCH_ASYNC_ENABLED`:是否启用异步匹配队列。 + - `MATCH_QUEUE_MAX_SIZE`:异步匹配队列最大长度。 + - `MATCH_CACHE_ENABLED`:是否启用匹配缓存。 + - `MATCH_CACHE_TTL_SECONDS`:匹配缓存有效期。 + - `QUERY_CACHE_ENABLED`:是否启用查询缓存。 + - `QUERY_CACHE_TTL_SECONDS`:查询缓存有效期。 + - `CACHE_BACKEND`:缓存后端 `memory/redis`。 + - `REDIS_URL`:Redis 连接地址。 +- 数据库连接池: + - `DATABASE_POOL_SIZE`:连接池大小。 + - `DATABASE_MAX_OVERFLOW`:溢出连接数。 + - `DATABASE_POOL_TIMEOUT`:获取连接超时秒数。 diff --git a/gig-poc/docs/CAPACITY_BASELINE.md b/gig-poc/docs/CAPACITY_BASELINE.md new file mode 100644 index 0000000..21b3a99 --- /dev/null +++ b/gig-poc/docs/CAPACITY_BASELINE.md @@ -0,0 +1,19 @@ +# 容量基线(自动生成) + +- 生成时间: 2026-03-31 14:36:58 +0800 +- API_BASE: http://127.0.0.1:8000 +- TOTAL_REQUESTS: 80 +- CONCURRENCY: 20 + +| 场景 | 成功率 | RPS | 平均延迟(ms) | P95(ms) | P99(ms) | +| --- | --- | --- | --- | --- | --- | +| health | 1.0 | 19.34 | 978.98 | 1434.66 | 1544.06 | +| jobs | 1.0 | 95.39 | 197.95 | 409.12 | 424.99 | +| match_workers | 1.0 | 20.81 | 913.73 | 1975.6 | 2118.65 | +| match_jobs | 1.0 | 19.88 | 975.29 | 2001.08 | 2147.74 | +| match_workers_cached | 1.0 | 23.52 | 819.62 | 1220.26 | 1331.26 | +| match_jobs_cached | 1.0 | 25.21 | 759.14 | 1077.45 | 1200.4 | +| match_workers_async | 1.0 | 211.09 | 89.04 | 151.04 | 158.89 | +| match_jobs_async | 1.0 | 221.04 | 83.96 | 143.35 | 162.95 | + +> 建议:该基线仅代表当前单机/当前数据量下表现,发布前请在目标环境按 2x/5x 峰值复测。 diff --git a/gig-poc/docs/DEMO.md b/gig-poc/docs/DEMO.md index 2882ad6..ebd560a 100644 --- a/gig-poc/docs/DEMO.md +++ b/gig-poc/docs/DEMO.md @@ -5,6 +5,12 @@ cd gig-poc sh infrastructure/scripts/dev-up.sh ``` +默认会自动完成: +- 启动容器并健康检查 +- bootstrap 样本数据 +- 闭环验收(抽取 -> 入库 -> 匹配 -> 解释) +- 导出 `docs/openapi.json` +- 可选压测并生成 `docs/CAPACITY_BASELINE.md`(`RUN_BASELINE_ON_UP=true`) ## 演示步骤 1. 打开 `http://127.0.0.1:5173` @@ -21,6 +27,42 @@ cd gig-poc sh infrastructure/scripts/prod-up.sh ``` +## 生产环境停止 +```bash +cd gig-poc +sh infrastructure/scripts/prod-down.sh +``` + +## OpenAPI 交接文件导出 +```bash +cd gig-poc +sh infrastructure/scripts/export-openapi.sh +``` + +## OpenAPI 固化入库(离线) +```bash +cd gig-poc +sh infrastructure/scripts/freeze-openapi.sh +``` + +## 一键闭环验收(可单独执行) +```bash +cd gig-poc +sh infrastructure/scripts/acceptance-e2e.sh +``` + +## 容量基线压测(可单独执行) +```bash +cd gig-poc +sh infrastructure/scripts/load-baseline.sh +``` + +## 高并发演示建议 +1. 同步匹配:调用 `POST /poc/match/workers` 观察实时结果。 +2. 异步匹配削峰:调用 `POST /poc/match/workers/async` 获取 `task_id`。 +3. 轮询结果:调用 `GET /poc/match/queue/{task_id}` 直到 `status=done`。 +4. 打开 `GET /poc/ops/system/metrics` 观察缓存命中率、队列积压和限流熔断状态。 + ## 演示建议 - 先演示系统状态页,确认健康与 bootstrap 正常 - 再演示岗位找人、人找岗位两个闭环 diff --git a/gig-poc/docs/README.md b/gig-poc/docs/README.md index 96254ad..96c5d18 100644 --- a/gig-poc/docs/README.md +++ b/gig-poc/docs/README.md @@ -39,10 +39,45 @@ - `LLM_BASE_URL`:OpenAI 兼容接口地址 - `LLM_API_KEY`:模型服务密钥 - `LLM_MODEL`:模型名称 +- `LLM_FALLBACK_BASE_URLS`:LLM 备用端点列表(JSON 数组) +- `AI_RATE_LIMIT_PER_MINUTE`:AI 请求每分钟限流阈值 +- `AI_CIRCUIT_BREAKER_FAIL_THRESHOLD`:熔断触发失败次数 +- `AI_CIRCUIT_BREAKER_COOLDOWN_SECONDS`:熔断冷却秒数 +- `EMBEDDING_ENABLED`:是否启用正式 embedding +- `EMBEDDING_BACKEND`:`hash` 或 `openai_compatible` +- `EMBEDDING_BASE_URL` / `EMBEDDING_API_KEY` / `EMBEDDING_MODEL`:embedding 配置 +- `INGEST_ASYNC_ENABLED`:是否启用异步入库队列 +- `INGEST_QUEUE_MAX_SIZE`:异步队列最大长度 +- `MATCH_CACHE_ENABLED`:是否启用匹配缓存 +- `MATCH_CACHE_TTL_SECONDS`:匹配缓存 TTL(秒) +- `QUERY_CACHE_ENABLED`:是否启用查询缓存(列表与详情) +- `QUERY_CACHE_TTL_SECONDS`:查询缓存 TTL(秒) +- `CACHE_BACKEND`:缓存后端,`memory` 或 `redis` +- `REDIS_URL`:Redis 连接串 +- `APP_RATE_LIMIT_PER_MINUTE`:全局请求限流阈值 +- `APP_CIRCUIT_BREAKER_*`:全局熔断参数(错误率、窗口、冷却) +- `ALERT_WEBHOOK_URL`:告警 webhook(可选) +- `DATABASE_POOL_SIZE` / `DATABASE_MAX_OVERFLOW` / `DATABASE_POOL_TIMEOUT`:数据库连接池参数 +- `MATCH_ASYNC_ENABLED`:是否启用异步匹配队列 +- `MATCH_QUEUE_MAX_SIZE`:异步匹配队列最大长度 ## 启动方式 1. `cd gig-poc` 2. `sh infrastructure/scripts/dev-up.sh` +3. 默认会自动执行: + - 健康检查 + bootstrap + - 一键闭环验收脚本(抽取 -> 入库 -> 匹配 -> 解释) + - 导出 `docs/openapi.json` +4. 可选开启容量基线压测: + - `RUN_BASELINE_ON_UP=true sh infrastructure/scripts/dev-up.sh` + +## 生产环境启动/停止 +- 启动:`sh infrastructure/scripts/prod-up.sh` +- 停止:`sh infrastructure/scripts/prod-down.sh` +- 可选环境变量: + - `WEB_PORT`(默认 `80`) + - `API_PORT`(默认 `8000`) + - `BOOTSTRAP_ON_UP`(默认 `true`,可设置为 `false` 跳过样本初始化) ## 样本导入方式 `dev-up.sh` 会在健康检查通过后自动触发 `/poc/ingest/bootstrap`,导入 100 岗位、300 工人和词表。 @@ -50,6 +85,11 @@ ## API 地址 - `http://127.0.0.1:8000` - OpenAPI:`http://127.0.0.1:8000/docs` +- OpenAPI JSON 导出:`sh infrastructure/scripts/export-openapi.sh` +- OpenAPI 固化(离线生成并入库):`sh infrastructure/scripts/freeze-openapi.sh` +- AI 观测接口:`GET /poc/ops/ai/metrics` +- 系统观测接口:`GET /poc/ops/system/metrics` +- 异步匹配接口:`POST /poc/match/workers/async`、`POST /poc/match/jobs/async`、`GET /poc/match/queue/{task_id}` ## 前端访问地址 - `http://127.0.0.1:5173` @@ -61,6 +101,53 @@ 4. 点击入库并匹配岗位 5. 在系统状态页执行健康检查和样本导入 +## 一键闭环验收 +```bash +cd gig-poc +sh infrastructure/scripts/acceptance-e2e.sh +``` + +该脚本会自动验证两条链路: +- 岗位文本抽取 -> 岗位入库 -> 岗位匹配工人 -> 匹配解释 +- 工人文本抽取 -> 工人入库 -> 工人匹配岗位 -> 匹配解释 + +## 容量基线压测 +```bash +cd gig-poc +sh infrastructure/scripts/load-baseline.sh +``` + +输出文件: +- `docs/CAPACITY_BASELINE.md` + +可选参数: +- `TOTAL_REQUESTS`(默认 `400`) +- `CONCURRENCY`(默认 `40`) + +## 规模化建议(上线前) +- 应用层:开启多实例部署(建议至少 2 个 API 实例)并接入负载均衡。 +- 数据层:PostgreSQL、Qdrant 使用托管或主从/集群形态,避免单点。 +- 链路层:优先走异步入库接口(`/poc/ingest/*/async`)吸收突发写流量。 +- 匹配层:高峰请求优先走异步匹配接口(`/poc/match/*/async`)做削峰。 +- 观测层:接入 `/poc/ops/system/metrics` 与 `/poc/ops/ai/metrics` 到监控告警系统。 +- 发布层:每次发布前更新 `docs/openapi.json` 与 `docs/CAPACITY_BASELINE.md`。 + +## K8s 扩容部署(基础模板) +目录:`infrastructure/k8s` + +```bash +cd gig-poc +kubectl apply -k infrastructure/k8s +``` + +包含资源: +- API Deployment + Service + HPA(默认 3~20 副本) +- Web Deployment + Service + HPA(默认 2~10 副本) +- Redis Deployment + Service +- Ingress 示例路由 + +详细策略说明见:`docs/SCALING.md` + ## 已实现范围 - 岗位抽取 - 工人抽取 diff --git a/gig-poc/docs/SCALING.md b/gig-poc/docs/SCALING.md new file mode 100644 index 0000000..d72dc1f --- /dev/null +++ b/gig-poc/docs/SCALING.md @@ -0,0 +1,19 @@ +# 扩容与高 DAU 策略 + +## 当前能力 +- API/Web 提供 K8s 多副本与 HPA 模板:`infrastructure/k8s` +- Redis 缓存后端支持:热点匹配与查询链路缓存 +- 异步队列:入库与匹配都支持异步削峰 +- 观测接口:`/poc/ops/system/metrics`、`/poc/ops/ai/metrics` + +## 推荐上线形态 +1. API 多副本(>=3)+ HPA(3~20) +2. Web 多副本(>=2)+ HPA(2~10) +3. Redis 独立高可用(哨兵或托管) +4. PostgreSQL、Qdrant 使用托管或主从/集群 +5. 异步接口承接高峰写流量与匹配重算 + +## 发布前门槛 +- 运行 `sh infrastructure/scripts/load-baseline.sh` +- 更新并提交 `docs/CAPACITY_BASELINE.md` +- 验证 P95/P99、成功率和队列积压指标 diff --git a/gig-poc/docs/openapi.json b/gig-poc/docs/openapi.json new file mode 100644 index 0000000..fdd0fd1 --- /dev/null +++ b/gig-poc/docs/openapi.json @@ -0,0 +1 @@ +{"openapi":"3.1.0","info":{"title":"Gig POC API","description":"Gig POC 接口文档。\n\n接口分组:系统、抽取、入库、匹配、查询。\n完整业务说明请参考项目文档 `docs/API.md`。","version":"0.1.0"},"paths":{"/health":{"get":{"tags":["系统"],"summary":"服务健康检查","description":"检查 API 服务、数据库与 RAG 检索组件状态。","operationId":"health_health_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HealthStatus"}}}}}}},"/poc/extract/job":{"post":{"tags":["抽取"],"summary":"岗位文本抽取","description":"将岗位自然语言文本抽取为结构化 JobCard。","operationId":"extract_job_poc_extract_job_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ExtractTextRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ExtractResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/poc/extract/worker":{"post":{"tags":["抽取"],"summary":"工人文本抽取","description":"将工人自然语言文本抽取为结构化 WorkerCard。","operationId":"extract_worker_poc_extract_worker_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ExtractTextRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ExtractResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/poc/ingest/job":{"post":{"tags":["入库"],"summary":"岗位入库","description":"写入或更新岗位卡片,并同步更新检索索引。","operationId":"ingest_job_poc_ingest_job_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/IngestJobRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/JobCard-Output"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/poc/ingest/worker":{"post":{"tags":["入库"],"summary":"工人入库","description":"写入或更新工人卡片,并同步更新检索索引。","operationId":"ingest_worker_poc_ingest_worker_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/IngestWorkerRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/WorkerCard"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/poc/ingest/job/async":{"post":{"tags":["入库"],"summary":"岗位异步入库","description":"将岗位入库请求写入异步队列,快速返回任务 ID。","operationId":"ingest_job_async_poc_ingest_job_async_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/IngestJobRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/IngestAsyncResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/poc/ingest/worker/async":{"post":{"tags":["入库"],"summary":"工人异步入库","description":"将工人入库请求写入异步队列,快速返回任务 ID。","operationId":"ingest_worker_async_poc_ingest_worker_async_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/IngestWorkerRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/IngestAsyncResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/poc/ingest/queue/{task_id}":{"get":{"tags":["入库"],"summary":"异步入库任务状态","description":"根据 task_id 查询异步入库任务状态。","operationId":"ingest_task_status_poc_ingest_queue__task_id__get","parameters":[{"name":"task_id","in":"path","required":true,"schema":{"type":"string","title":"Task Id"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/IngestAsyncResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/poc/ingest/bootstrap":{"post":{"tags":["入库"],"summary":"样本数据初始化","description":"导入内置样本数据(岗位、工人、技能、类目、区域)并构建检索数据。","operationId":"bootstrap_poc_ingest_bootstrap_post","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/BootstrapResponse"}}}}}}},"/poc/match/workers":{"post":{"tags":["匹配"],"summary":"岗位匹配工人","description":"支持通过 job_id 或内联 job 进行匹配,返回 top_n 条结果。","operationId":"match_workers_poc_match_workers_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/MatchWorkersRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/MatchResponse"}}}},"404":{"description":"岗位不存在"},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/poc/match/jobs":{"post":{"tags":["匹配"],"summary":"工人匹配岗位","description":"支持通过 worker_id 或内联 worker 进行匹配,返回 top_n 条结果。","operationId":"match_jobs_poc_match_jobs_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/MatchJobsRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/MatchResponse"}}}},"404":{"description":"工人不存在"},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/poc/match/workers/async":{"post":{"tags":["匹配"],"summary":"岗位异步匹配工人","description":"将匹配任务放入队列异步计算,适合高并发削峰。","operationId":"match_workers_async_poc_match_workers_async_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/MatchAsyncWorkersRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/MatchAsyncResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/poc/match/jobs/async":{"post":{"tags":["匹配"],"summary":"工人异步匹配岗位","description":"将匹配任务放入队列异步计算,适合高并发削峰。","operationId":"match_jobs_async_poc_match_jobs_async_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/MatchAsyncJobsRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/MatchAsyncResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/poc/match/queue/{task_id}":{"get":{"tags":["匹配"],"summary":"异步匹配任务状态","description":"根据 task_id 查询异步匹配任务状态,完成后返回匹配结果。","operationId":"match_task_status_poc_match_queue__task_id__get","parameters":[{"name":"task_id","in":"path","required":true,"schema":{"type":"string","title":"Task Id"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/MatchAsyncResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/poc/match/explain/{match_id}":{"get":{"tags":["匹配"],"summary":"匹配结果解释","description":"根据 match_id 获取匹配明细和解释理由。","operationId":"explain_match_poc_match_explain__match_id__get","parameters":[{"name":"match_id","in":"path","required":true,"schema":{"type":"string","description":"匹配记录 ID","title":"Match Id"},"description":"匹配记录 ID"}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ExplainResponse"}}}},"404":{"description":"匹配记录不存在"},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/poc/match/feedback":{"post":{"tags":["匹配"],"summary":"匹配反馈学习","description":"提交单条匹配的接受/拒绝反馈,用于在线更新排序权重。","operationId":"feedback_match_poc_match_feedback_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/MatchFeedbackRequest"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/MatchWeightResponse"}}}},"404":{"description":"匹配记录不存在"},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/poc/match/weights":{"get":{"tags":["匹配"],"summary":"当前排序权重","description":"查看当前生效的排序权重(默认权重或学习后的权重)。","operationId":"get_match_weights_poc_match_weights_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/MatchWeightResponse"}}}}}}},"/poc/ops/ai/metrics":{"get":{"tags":["系统"],"summary":"AI 观测指标","description":"返回 AI 调用的限流、熔断、降级与 fallback 命中率指标。","operationId":"ai_metrics_poc_ops_ai_metrics_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/AIObservabilityResponse"}}}}}}},"/poc/ops/system/metrics":{"get":{"tags":["系统"],"summary":"系统运行指标","description":"返回全局流量护栏、缓存与异步队列指标。","operationId":"system_metrics_poc_ops_system_metrics_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/SystemOpsResponse"}}}}}}},"/poc/jobs":{"get":{"tags":["查询"],"summary":"岗位列表查询","description":"查询岗位列表,当前返回全量数据。","operationId":"list_jobs_poc_jobs_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListResponse"}}}}}}},"/poc/workers":{"get":{"tags":["查询"],"summary":"工人列表查询","description":"查询工人列表,当前返回全量数据。","operationId":"list_workers_poc_workers_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListResponse"}}}}}}},"/poc/jobs/{job_id}":{"get":{"tags":["查询"],"summary":"岗位详情查询","description":"根据岗位 ID 查询单个岗位详情。","operationId":"get_job_poc_jobs__job_id__get","parameters":[{"name":"job_id","in":"path","required":true,"schema":{"type":"string","description":"岗位 ID","title":"Job Id"},"description":"岗位 ID"}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/JobCard-Output"}}}},"404":{"description":"岗位不存在"},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/poc/workers/{worker_id}":{"get":{"tags":["查询"],"summary":"工人详情查询","description":"根据工人 ID 查询单个工人详情。","operationId":"get_worker_poc_workers__worker_id__get","parameters":[{"name":"worker_id","in":"path","required":true,"schema":{"type":"string","description":"工人 ID","title":"Worker Id"},"description":"工人 ID"}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/WorkerCard"}}}},"404":{"description":"工人不存在"},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}}},"components":{"schemas":{"AIObservabilityResponse":{"properties":{"metrics":{"additionalProperties":{"anyOf":[{"type":"number"},{"type":"integer"}]},"type":"object","title":"Metrics","description":"AI 调用观测指标"}},"type":"object","required":["metrics"],"title":"AIObservabilityResponse"},"BootstrapResponse":{"properties":{"jobs":{"type":"integer","title":"Jobs","description":"导入岗位数量"},"workers":{"type":"integer","title":"Workers","description":"导入工人数量"},"skills":{"type":"integer","title":"Skills","description":"技能词条数量"},"categories":{"type":"integer","title":"Categories","description":"类目数量"},"regions":{"type":"integer","title":"Regions","description":"区域数量"}},"type":"object","required":["jobs","workers","skills","categories","regions"],"title":"BootstrapResponse"},"ExplainResponse":{"properties":{"match":{"$ref":"#/components/schemas/MatchResult","description":"单条匹配结果详情"}},"type":"object","required":["match"],"title":"ExplainResponse"},"ExtractResponse":{"properties":{"success":{"type":"boolean","title":"Success","description":"抽取是否成功"},"data":{"anyOf":[{"$ref":"#/components/schemas/JobCard-Output"},{"$ref":"#/components/schemas/WorkerCard"},{"type":"null"}],"title":"Data","description":"抽取结果对象,可能为空"},"errors":{"items":{"type":"string"},"type":"array","title":"Errors","description":"错误信息列表"},"missing_fields":{"items":{"type":"string"},"type":"array","title":"Missing Fields","description":"缺失字段列表"}},"type":"object","required":["success"],"title":"ExtractResponse"},"ExtractTextRequest":{"properties":{"text":{"type":"string","minLength":5,"title":"Text","description":"待抽取的自然语言文本,最少 5 个字符"}},"type":"object","required":["text"],"title":"ExtractTextRequest","example":{"text":"明天下午南山会展中心需要2个签到协助,5小时,150/人,女生优先"}},"HTTPValidationError":{"properties":{"detail":{"items":{"$ref":"#/components/schemas/ValidationError"},"type":"array","title":"Detail"}},"type":"object","title":"HTTPValidationError"},"HealthStatus":{"properties":{"service":{"type":"string","title":"Service","description":"服务状态,通常为 ok"},"database":{"type":"string","title":"Database","description":"数据库状态:ok 或 error"},"rag":{"type":"string","title":"Rag","description":"RAG 组件状态:ok 或 error"},"timestamp":{"type":"string","format":"date-time","title":"Timestamp","description":"服务端当前时间"}},"type":"object","required":["service","database","rag","timestamp"],"title":"HealthStatus"},"IngestAsyncResponse":{"properties":{"task_id":{"type":"string","title":"Task Id","description":"异步任务 ID"},"status":{"type":"string","title":"Status","description":"任务状态"}},"type":"object","required":["task_id","status"],"title":"IngestAsyncResponse"},"IngestJobRequest":{"properties":{"job":{"$ref":"#/components/schemas/JobCard-Input","description":"岗位卡片对象"}},"type":"object","required":["job"],"title":"IngestJobRequest"},"IngestWorkerRequest":{"properties":{"worker":{"$ref":"#/components/schemas/WorkerCard","description":"工人卡片对象"}},"type":"object","required":["worker"],"title":"IngestWorkerRequest"},"JobCard-Input":{"properties":{"job_id":{"type":"string","title":"Job Id","description":"岗位唯一 ID"},"title":{"type":"string","title":"Title","description":"岗位标题"},"category":{"type":"string","title":"Category","description":"岗位类别"},"description":{"type":"string","title":"Description","description":"岗位描述"},"skills":{"items":{"type":"string"},"type":"array","title":"Skills","description":"岗位技能要求列表"},"city":{"type":"string","title":"City","description":"城市"},"region":{"type":"string","title":"Region","description":"区域"},"location_detail":{"type":"string","title":"Location Detail","description":"详细地点描述"},"start_time":{"type":"string","format":"date-time","title":"Start Time","description":"岗位开始时间,ISO-8601"},"duration_hours":{"type":"number","exclusiveMinimum":0.0,"title":"Duration Hours","description":"工时(小时),必须大于 0"},"headcount":{"type":"integer","exclusiveMinimum":0.0,"title":"Headcount","description":"招聘人数,必须大于 0"},"salary":{"$ref":"#/components/schemas/Salary","description":"薪资信息"},"work_mode":{"type":"string","title":"Work Mode","description":"工作模式,如兼职、全职、活动"},"tags":{"items":{"type":"string"},"type":"array","title":"Tags","description":"业务标签列表"},"confidence":{"type":"number","maximum":1.0,"minimum":0.0,"title":"Confidence","description":"数据置信度,范围 0~1"}},"type":"object","required":["job_id","title","category","description","city","region","location_detail","start_time","duration_hours","headcount","salary","work_mode","confidence"],"title":"JobCard"},"JobCard-Output":{"properties":{"job_id":{"type":"string","title":"Job Id","description":"岗位唯一 ID"},"title":{"type":"string","title":"Title","description":"岗位标题"},"category":{"type":"string","title":"Category","description":"岗位类别"},"description":{"type":"string","title":"Description","description":"岗位描述"},"skills":{"items":{"type":"string"},"type":"array","title":"Skills","description":"岗位技能要求列表"},"city":{"type":"string","title":"City","description":"城市"},"region":{"type":"string","title":"Region","description":"区域"},"location_detail":{"type":"string","title":"Location Detail","description":"详细地点描述"},"start_time":{"type":"string","format":"date-time","title":"Start Time","description":"岗位开始时间,ISO-8601"},"duration_hours":{"type":"number","exclusiveMinimum":0.0,"title":"Duration Hours","description":"工时(小时),必须大于 0"},"headcount":{"type":"integer","exclusiveMinimum":0.0,"title":"Headcount","description":"招聘人数,必须大于 0"},"salary":{"$ref":"#/components/schemas/Salary","description":"薪资信息"},"work_mode":{"type":"string","title":"Work Mode","description":"工作模式,如兼职、全职、活动"},"tags":{"items":{"type":"string"},"type":"array","title":"Tags","description":"业务标签列表"},"confidence":{"type":"number","maximum":1.0,"minimum":0.0,"title":"Confidence","description":"数据置信度,范围 0~1"}},"type":"object","required":["job_id","title","category","description","city","region","location_detail","start_time","duration_hours","headcount","salary","work_mode","confidence"],"title":"JobCard"},"ListResponse":{"properties":{"items":{"items":{"additionalProperties":true,"type":"object"},"type":"array","title":"Items","description":"列表项"},"total":{"type":"integer","title":"Total","description":"总数"}},"type":"object","required":["items","total"],"title":"ListResponse"},"MatchAsyncJobsRequest":{"properties":{"worker_id":{"type":"string","title":"Worker Id","description":"工人 ID"},"top_n":{"type":"integer","maximum":50.0,"minimum":1.0,"title":"Top N","description":"返回条数,范围 1~50","default":10}},"type":"object","required":["worker_id"],"title":"MatchAsyncJobsRequest"},"MatchAsyncResponse":{"properties":{"task_id":{"type":"string","title":"Task Id","description":"异步任务 ID"},"status":{"type":"string","title":"Status","description":"任务状态"},"items":{"anyOf":[{"items":{"$ref":"#/components/schemas/MatchResult"},"type":"array"},{"type":"null"}],"title":"Items","description":"任务完成后返回的匹配结果"}},"type":"object","required":["task_id","status"],"title":"MatchAsyncResponse"},"MatchAsyncWorkersRequest":{"properties":{"job_id":{"type":"string","title":"Job Id","description":"岗位 ID"},"top_n":{"type":"integer","maximum":50.0,"minimum":1.0,"title":"Top N","description":"返回条数,范围 1~50","default":10}},"type":"object","required":["job_id"],"title":"MatchAsyncWorkersRequest"},"MatchBreakdown":{"properties":{"skill_score":{"type":"number","maximum":1.0,"minimum":0.0,"title":"Skill Score","description":"技能匹配分,范围 0~1"},"region_score":{"type":"number","maximum":1.0,"minimum":0.0,"title":"Region Score","description":"地域匹配分,范围 0~1"},"time_score":{"type":"number","maximum":1.0,"minimum":0.0,"title":"Time Score","description":"时间匹配分,范围 0~1"},"experience_score":{"type":"number","maximum":1.0,"minimum":0.0,"title":"Experience Score","description":"经验匹配分,范围 0~1"},"reliability_score":{"type":"number","maximum":1.0,"minimum":0.0,"title":"Reliability Score","description":"可靠性匹配分,范围 0~1"}},"type":"object","required":["skill_score","region_score","time_score","experience_score","reliability_score"],"title":"MatchBreakdown"},"MatchFeedbackRequest":{"properties":{"match_id":{"type":"string","title":"Match Id","description":"匹配记录 ID"},"accepted":{"type":"boolean","title":"Accepted","description":"反馈是否接受该推荐"}},"type":"object","required":["match_id","accepted"],"title":"MatchFeedbackRequest"},"MatchJobsRequest":{"properties":{"worker_id":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Worker Id","description":"工人 ID(与 worker 二选一)"},"worker":{"anyOf":[{"$ref":"#/components/schemas/WorkerCard"},{"type":"null"}],"description":"内联工人对象(与 worker_id 二选一)"},"top_n":{"type":"integer","maximum":50.0,"minimum":1.0,"title":"Top N","description":"返回条数,范围 1~50","default":10}},"type":"object","title":"MatchJobsRequest"},"MatchResponse":{"properties":{"items":{"items":{"$ref":"#/components/schemas/MatchResult"},"type":"array","title":"Items","description":"匹配结果列表"}},"type":"object","required":["items"],"title":"MatchResponse"},"MatchResult":{"properties":{"match_id":{"type":"string","title":"Match Id","description":"匹配记录 ID"},"source_type":{"$ref":"#/components/schemas/SourceType","description":"匹配方向:job_to_worker 或 worker_to_job"},"source_id":{"type":"string","title":"Source Id","description":"源实体 ID"},"target_id":{"type":"string","title":"Target Id","description":"目标实体 ID"},"match_score":{"type":"number","maximum":1.0,"minimum":0.0,"title":"Match Score","description":"综合匹配分,范围 0~1"},"breakdown":{"$ref":"#/components/schemas/MatchBreakdown","description":"多维打分拆解"},"reasons":{"items":{"type":"string"},"type":"array","minItems":3,"title":"Reasons","description":"匹配理由,至少 3 条"}},"type":"object","required":["match_id","source_type","source_id","target_id","match_score","breakdown"],"title":"MatchResult"},"MatchWeightResponse":{"properties":{"weights":{"additionalProperties":{"type":"number"},"type":"object","title":"Weights","description":"当前生效的排序权重"},"learning_enabled":{"type":"boolean","title":"Learning Enabled","description":"是否开启在线学习"}},"type":"object","required":["weights","learning_enabled"],"title":"MatchWeightResponse"},"MatchWorkersRequest":{"properties":{"job_id":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Job Id","description":"岗位 ID(与 job 二选一)"},"job":{"anyOf":[{"$ref":"#/components/schemas/JobCard-Input"},{"type":"null"}],"description":"内联岗位对象(与 job_id 二选一)"},"top_n":{"type":"integer","maximum":50.0,"minimum":1.0,"title":"Top N","description":"返回条数,范围 1~50","default":10}},"type":"object","title":"MatchWorkersRequest"},"QueueStatusResponse":{"properties":{"queued":{"type":"integer","title":"Queued","description":"当前队列中任务数量"},"processed":{"type":"integer","title":"Processed","description":"历史处理成功数量"},"failed":{"type":"integer","title":"Failed","description":"历史处理失败数量"}},"type":"object","required":["queued","processed","failed"],"title":"QueueStatusResponse"},"Salary":{"properties":{"type":{"$ref":"#/components/schemas/SalaryType","description":"薪资类型:daily/hourly/monthly/task","default":"daily"},"amount":{"type":"number","title":"Amount","description":"薪资金额","default":0},"currency":{"type":"string","title":"Currency","description":"货币类型,默认 CNY","default":"CNY"}},"type":"object","title":"Salary"},"SalaryType":{"type":"string","enum":["daily","hourly","monthly","task"],"title":"SalaryType"},"SkillScore":{"properties":{"name":{"type":"string","title":"Name","description":"技能名称"},"score":{"type":"number","maximum":1.0,"minimum":0.0,"title":"Score","description":"技能熟练度,范围 0~1"}},"type":"object","required":["name","score"],"title":"SkillScore"},"SourceType":{"type":"string","enum":["job_to_worker","worker_to_job"],"title":"SourceType"},"SystemOpsResponse":{"properties":{"traffic":{"additionalProperties":{"anyOf":[{"type":"number"},{"type":"integer"}]},"type":"object","title":"Traffic","description":"全局流量护栏与错误窗口指标"},"cache":{"additionalProperties":{"anyOf":[{"type":"number"},{"type":"integer"},{"type":"string"}]},"type":"object","title":"Cache","description":"缓存命中与大小"},"ingest_queue":{"$ref":"#/components/schemas/QueueStatusResponse","description":"异步入库队列状态"},"match_queue":{"$ref":"#/components/schemas/QueueStatusResponse","description":"异步匹配队列状态"}},"type":"object","required":["traffic","cache","ingest_queue","match_queue"],"title":"SystemOpsResponse"},"ValidationError":{"properties":{"loc":{"items":{"anyOf":[{"type":"string"},{"type":"integer"}]},"type":"array","title":"Location"},"msg":{"type":"string","title":"Message"},"type":{"type":"string","title":"Error Type"}},"type":"object","required":["loc","msg","type"],"title":"ValidationError"},"WorkerCard":{"properties":{"worker_id":{"type":"string","title":"Worker Id","description":"工人唯一 ID"},"name":{"type":"string","title":"Name","description":"工人姓名或昵称"},"description":{"type":"string","title":"Description","description":"工人自我描述"},"skills":{"items":{"$ref":"#/components/schemas/SkillScore"},"type":"array","title":"Skills","description":"技能及熟练度列表"},"cities":{"items":{"type":"string"},"type":"array","title":"Cities","description":"可接单城市列表"},"regions":{"items":{"type":"string"},"type":"array","title":"Regions","description":"可接单区域列表"},"availability":{"items":{"type":"string"},"type":"array","title":"Availability","description":"可上岗时间描述"},"experience_tags":{"items":{"type":"string"},"type":"array","title":"Experience Tags","description":"经验标签列表"},"reliability_score":{"type":"number","maximum":1.0,"minimum":0.0,"title":"Reliability Score","description":"履约可靠性分,范围 0~1"},"profile_completion":{"type":"number","maximum":1.0,"minimum":0.0,"title":"Profile Completion","description":"档案完善度,范围 0~1"},"confidence":{"type":"number","maximum":1.0,"minimum":0.0,"title":"Confidence","description":"数据置信度,范围 0~1"}},"type":"object","required":["worker_id","name","description","reliability_score","profile_completion","confidence"],"title":"WorkerCard"}}},"tags":[{"name":"系统","description":"服务与依赖组件状态检查接口"},{"name":"抽取","description":"自然语言文本抽取为结构化卡片"},{"name":"入库","description":"结构化岗位/工人数据写入与初始化"},{"name":"匹配","description":"岗位与工人双向匹配及结果解释"},{"name":"查询","description":"岗位/工人列表与详情查询"}]} \ No newline at end of file diff --git a/gig-poc/infrastructure/docker-compose.prod.yml b/gig-poc/infrastructure/docker-compose.prod.yml index 28ffde3..448ea1b 100644 --- a/gig-poc/infrastructure/docker-compose.prod.yml +++ b/gig-poc/infrastructure/docker-compose.prod.yml @@ -19,6 +19,13 @@ services: volumes: - qdrant_prod_data:/qdrant/storage + redis: + image: docker.m.daocloud.io/library/redis:7-alpine + restart: unless-stopped + command: ["redis-server", "--appendonly", "yes"] + volumes: + - redis_prod_data:/data + api: build: context: .. @@ -33,9 +40,28 @@ services: LLM_BASE_URL: ${LLM_BASE_URL:-} LLM_API_KEY: ${LLM_API_KEY:-} LLM_MODEL: ${LLM_MODEL:-gpt-5.4} + CACHE_BACKEND: ${CACHE_BACKEND:-redis} + REDIS_URL: ${REDIS_URL:-redis://redis:6379/0} + INGEST_ASYNC_ENABLED: ${INGEST_ASYNC_ENABLED:-true} + MATCH_ASYNC_ENABLED: ${MATCH_ASYNC_ENABLED:-true} + MATCH_CACHE_ENABLED: ${MATCH_CACHE_ENABLED:-true} + MATCH_CACHE_TTL_SECONDS: ${MATCH_CACHE_TTL_SECONDS:-30} + QUERY_CACHE_ENABLED: ${QUERY_CACHE_ENABLED:-true} + QUERY_CACHE_TTL_SECONDS: ${QUERY_CACHE_TTL_SECONDS:-20} + APP_RATE_LIMIT_PER_MINUTE: ${APP_RATE_LIMIT_PER_MINUTE:-1200} + APP_CIRCUIT_BREAKER_ERROR_RATE: ${APP_CIRCUIT_BREAKER_ERROR_RATE:-0.5} + APP_CIRCUIT_BREAKER_MIN_REQUESTS: ${APP_CIRCUIT_BREAKER_MIN_REQUESTS:-50} + APP_CIRCUIT_BREAKER_WINDOW_SECONDS: ${APP_CIRCUIT_BREAKER_WINDOW_SECONDS:-60} + APP_CIRCUIT_BREAKER_COOLDOWN_SECONDS: ${APP_CIRCUIT_BREAKER_COOLDOWN_SECONDS:-30} + DATABASE_POOL_SIZE: ${DATABASE_POOL_SIZE:-20} + DATABASE_MAX_OVERFLOW: ${DATABASE_MAX_OVERFLOW:-30} + DATABASE_POOL_TIMEOUT: ${DATABASE_POOL_TIMEOUT:-30} depends_on: - postgres - qdrant + - redis + ports: + - "${API_PORT:-8000}:8000" web: build: @@ -50,3 +76,4 @@ services: volumes: postgres_prod_data: qdrant_prod_data: + redis_prod_data: diff --git a/gig-poc/infrastructure/docker-compose.yml b/gig-poc/infrastructure/docker-compose.yml index b7dadaa..4063cf9 100644 --- a/gig-poc/infrastructure/docker-compose.yml +++ b/gig-poc/infrastructure/docker-compose.yml @@ -26,6 +26,14 @@ services: ports: - "6333:6333" + redis: + image: docker.m.daocloud.io/library/redis:7-alpine + command: ["redis-server", "--appendonly", "yes"] + volumes: + - redis_data:/data + ports: + - "6379:6379" + api: build: context: .. @@ -36,11 +44,29 @@ services: QDRANT_URL: http://qdrant:6333 LOG_LEVEL: INFO LLM_ENABLED: "false" + CACHE_BACKEND: "redis" + REDIS_URL: redis://redis:6379/0 + INGEST_ASYNC_ENABLED: "true" + MATCH_ASYNC_ENABLED: "true" + MATCH_CACHE_ENABLED: "true" + MATCH_CACHE_TTL_SECONDS: "30" + QUERY_CACHE_ENABLED: "true" + QUERY_CACHE_TTL_SECONDS: "20" + APP_RATE_LIMIT_PER_MINUTE: "1200" + APP_CIRCUIT_BREAKER_ERROR_RATE: "0.5" + APP_CIRCUIT_BREAKER_MIN_REQUESTS: "50" + APP_CIRCUIT_BREAKER_WINDOW_SECONDS: "60" + APP_CIRCUIT_BREAKER_COOLDOWN_SECONDS: "30" + DATABASE_POOL_SIZE: "20" + DATABASE_MAX_OVERFLOW: "30" + DATABASE_POOL_TIMEOUT: "30" depends_on: postgres: condition: service_healthy qdrant: condition: service_started + redis: + condition: service_started ports: - "8000:8000" @@ -57,3 +83,4 @@ services: volumes: postgres_data: qdrant_data: + redis_data: diff --git a/gig-poc/infrastructure/k8s/api.yaml b/gig-poc/infrastructure/k8s/api.yaml new file mode 100644 index 0000000..36da93d --- /dev/null +++ b/gig-poc/infrastructure/k8s/api.yaml @@ -0,0 +1,90 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: gig-poc-api + namespace: gig-poc +spec: + replicas: 3 + selector: + matchLabels: + app: gig-poc-api + template: + metadata: + labels: + app: gig-poc-api + spec: + containers: + - name: api + image: gig-poc-api:latest + imagePullPolicy: IfNotPresent + ports: + - containerPort: 8000 + env: + - name: APP_ENV + value: production + - name: CACHE_BACKEND + value: redis + - name: REDIS_URL + value: redis://gig-poc-redis:6379/0 + - name: INGEST_ASYNC_ENABLED + value: "true" + - name: MATCH_ASYNC_ENABLED + value: "true" + - name: MATCH_CACHE_ENABLED + value: "true" + - name: QUERY_CACHE_ENABLED + value: "true" + - name: APP_RATE_LIMIT_PER_MINUTE + value: "3000" + resources: + requests: + cpu: "500m" + memory: "512Mi" + limits: + cpu: "2" + memory: "2Gi" + readinessProbe: + httpGet: + path: /health + port: 8000 + initialDelaySeconds: 10 + periodSeconds: 10 + livenessProbe: + httpGet: + path: /health + port: 8000 + initialDelaySeconds: 30 + periodSeconds: 15 +--- +apiVersion: v1 +kind: Service +metadata: + name: gig-poc-api + namespace: gig-poc +spec: + selector: + app: gig-poc-api + ports: + - name: http + port: 8000 + targetPort: 8000 +--- +apiVersion: autoscaling/v2 +kind: HorizontalPodAutoscaler +metadata: + name: gig-poc-api-hpa + namespace: gig-poc +spec: + scaleTargetRef: + apiVersion: apps/v1 + kind: Deployment + name: gig-poc-api + minReplicas: 3 + maxReplicas: 20 + metrics: + - type: Resource + resource: + name: cpu + target: + type: Utilization + averageUtilization: 70 diff --git a/gig-poc/infrastructure/k8s/ingress.yaml b/gig-poc/infrastructure/k8s/ingress.yaml new file mode 100644 index 0000000..2a243bc --- /dev/null +++ b/gig-poc/infrastructure/k8s/ingress.yaml @@ -0,0 +1,24 @@ +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: gig-poc-ingress + namespace: gig-poc +spec: + rules: + - host: gig-poc.local + http: + paths: + - path: /api + pathType: Prefix + backend: + service: + name: gig-poc-api + port: + number: 8000 + - path: / + pathType: Prefix + backend: + service: + name: gig-poc-web + port: + number: 80 diff --git a/gig-poc/infrastructure/k8s/kustomization.yaml b/gig-poc/infrastructure/k8s/kustomization.yaml new file mode 100644 index 0000000..1cd19da --- /dev/null +++ b/gig-poc/infrastructure/k8s/kustomization.yaml @@ -0,0 +1,9 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +namespace: gig-poc +resources: + - namespace.yaml + - redis.yaml + - api.yaml + - web.yaml + - ingress.yaml diff --git a/gig-poc/infrastructure/k8s/namespace.yaml b/gig-poc/infrastructure/k8s/namespace.yaml new file mode 100644 index 0000000..52d5981 --- /dev/null +++ b/gig-poc/infrastructure/k8s/namespace.yaml @@ -0,0 +1,4 @@ +apiVersion: v1 +kind: Namespace +metadata: + name: gig-poc diff --git a/gig-poc/infrastructure/k8s/redis.yaml b/gig-poc/infrastructure/k8s/redis.yaml new file mode 100644 index 0000000..e451b6a --- /dev/null +++ b/gig-poc/infrastructure/k8s/redis.yaml @@ -0,0 +1,41 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: gig-poc-redis + namespace: gig-poc +spec: + replicas: 1 + selector: + matchLabels: + app: gig-poc-redis + template: + metadata: + labels: + app: gig-poc-redis + spec: + containers: + - name: redis + image: redis:7-alpine + args: ["redis-server", "--appendonly", "yes"] + ports: + - containerPort: 6379 + resources: + requests: + cpu: "100m" + memory: "128Mi" + limits: + cpu: "500m" + memory: "512Mi" +--- +apiVersion: v1 +kind: Service +metadata: + name: gig-poc-redis + namespace: gig-poc +spec: + selector: + app: gig-poc-redis + ports: + - name: redis + port: 6379 + targetPort: 6379 diff --git a/gig-poc/infrastructure/k8s/web.yaml b/gig-poc/infrastructure/k8s/web.yaml new file mode 100644 index 0000000..6d05bbd --- /dev/null +++ b/gig-poc/infrastructure/k8s/web.yaml @@ -0,0 +1,61 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: gig-poc-web + namespace: gig-poc +spec: + replicas: 2 + selector: + matchLabels: + app: gig-poc-web + template: + metadata: + labels: + app: gig-poc-web + spec: + containers: + - name: web + image: gig-poc-web:latest + imagePullPolicy: IfNotPresent + ports: + - containerPort: 80 + resources: + requests: + cpu: "200m" + memory: "256Mi" + limits: + cpu: "1" + memory: "1Gi" +--- +apiVersion: v1 +kind: Service +metadata: + name: gig-poc-web + namespace: gig-poc +spec: + selector: + app: gig-poc-web + ports: + - name: http + port: 80 + targetPort: 80 +--- +apiVersion: autoscaling/v2 +kind: HorizontalPodAutoscaler +metadata: + name: gig-poc-web-hpa + namespace: gig-poc +spec: + scaleTargetRef: + apiVersion: apps/v1 + kind: Deployment + name: gig-poc-web + minReplicas: 2 + maxReplicas: 10 + metrics: + - type: Resource + resource: + name: cpu + target: + type: Utilization + averageUtilization: 70 diff --git a/gig-poc/infrastructure/scripts/acceptance-e2e.sh b/gig-poc/infrastructure/scripts/acceptance-e2e.sh new file mode 100755 index 0000000..08ef375 --- /dev/null +++ b/gig-poc/infrastructure/scripts/acceptance-e2e.sh @@ -0,0 +1,114 @@ +#!/usr/bin/env sh +set -eu + +API_BASE="${API_BASE:-http://127.0.0.1:8000}" +BOOTSTRAP_ON_RUN="${BOOTSTRAP_ON_RUN:-true}" + +TMP_DIR="$(mktemp -d)" +trap 'rm -rf "$TMP_DIR"' EXIT + +step() { + echo "[ACCEPTANCE] $1" +} + +fail() { + echo "[ACCEPTANCE][FAIL] $1" >&2 + exit 1 +} + +parse_json() { + PY_EXPR="$1" + INPUT_FILE="$2" + python3 - "$PY_EXPR" "$INPUT_FILE" <<'PY' +import json +import sys + +expr = sys.argv[1] +path = sys.argv[2] +data = json.loads(open(path, "r", encoding="utf-8").read()) +safe_builtins = {"bool": bool, "len": len, "str": str} +value = eval(expr, {"__builtins__": safe_builtins}, {"data": data}) +if isinstance(value, (dict, list)): + print(json.dumps(value, ensure_ascii=False)) +elif value is None: + print("") +else: + print(str(value)) +PY +} + +step "健康检查" +curl -fsS "${API_BASE}/health" >"$TMP_DIR/health.json" || fail "health 接口不可用" + +if [ "$BOOTSTRAP_ON_RUN" = "true" ]; then + step "执行 bootstrap" + curl -fsS -X POST "${API_BASE}/poc/ingest/bootstrap" >"$TMP_DIR/bootstrap.json" || fail "bootstrap 失败" +fi + +step "抽取岗位" +curl -fsS -X POST "${API_BASE}/poc/extract/job" \ + -H "Content-Type: application/json" \ + -d '{"text":"明天下午南山会展中心需要2个签到协助,5小时,150/人,女生优先"}' \ + >"$TMP_DIR/extract_job.json" || fail "岗位抽取调用失败" +[ "$(parse_json "bool(data.get('success'))" "$TMP_DIR/extract_job.json")" = "True" ] || fail "岗位抽取失败" +parse_json "data.get('data')" "$TMP_DIR/extract_job.json" >"$TMP_DIR/job.json" +JOB_ID="$(parse_json "data.get('data', {}).get('job_id')" "$TMP_DIR/extract_job.json")" +[ -n "$JOB_ID" ] || fail "岗位抽取缺少 job_id" + +step "岗位入库" +python3 - "$TMP_DIR/job.json" >"$TMP_DIR/ingest_job_payload.json" <<'PY' +import json +import sys +job = json.loads(open(sys.argv[1], "r", encoding="utf-8").read()) +print(json.dumps({"job": job}, ensure_ascii=False)) +PY +curl -fsS -X POST "${API_BASE}/poc/ingest/job" \ + -H "Content-Type: application/json" \ + --data @"$TMP_DIR/ingest_job_payload.json" \ + >"$TMP_DIR/ingest_job.json" || fail "岗位入库失败" + +step "岗位匹配工人" +curl -fsS -X POST "${API_BASE}/poc/match/workers" \ + -H "Content-Type: application/json" \ + -d "{\"job_id\":\"${JOB_ID}\",\"top_n\":3}" \ + >"$TMP_DIR/match_workers.json" || fail "岗位匹配工人失败" +MATCH_ID_1="$(parse_json "((data.get('items') or [{}])[0]).get('match_id')" "$TMP_DIR/match_workers.json")" +[ -n "$MATCH_ID_1" ] || fail "岗位匹配工人未返回 match_id" + +step "解释匹配(岗位->工人)" +curl -fsS "${API_BASE}/poc/match/explain/${MATCH_ID_1}" >"$TMP_DIR/explain_1.json" || fail "匹配解释失败(岗位->工人)" + +step "抽取工人" +curl -fsS -X POST "${API_BASE}/poc/extract/worker" \ + -H "Content-Type: application/json" \ + -d '{"text":"我做过商场促销和活动签到,也能做登记和引导,周末都能接,福田南山都方便。"}' \ + >"$TMP_DIR/extract_worker.json" || fail "工人抽取调用失败" +[ "$(parse_json "bool(data.get('success'))" "$TMP_DIR/extract_worker.json")" = "True" ] || fail "工人抽取失败" +parse_json "data.get('data')" "$TMP_DIR/extract_worker.json" >"$TMP_DIR/worker.json" +WORKER_ID="$(parse_json "data.get('data', {}).get('worker_id')" "$TMP_DIR/extract_worker.json")" +[ -n "$WORKER_ID" ] || fail "工人抽取缺少 worker_id" + +step "工人入库" +python3 - "$TMP_DIR/worker.json" >"$TMP_DIR/ingest_worker_payload.json" <<'PY' +import json +import sys +worker = json.loads(open(sys.argv[1], "r", encoding="utf-8").read()) +print(json.dumps({"worker": worker}, ensure_ascii=False)) +PY +curl -fsS -X POST "${API_BASE}/poc/ingest/worker" \ + -H "Content-Type: application/json" \ + --data @"$TMP_DIR/ingest_worker_payload.json" \ + >"$TMP_DIR/ingest_worker.json" || fail "工人入库失败" + +step "工人匹配岗位" +curl -fsS -X POST "${API_BASE}/poc/match/jobs" \ + -H "Content-Type: application/json" \ + -d "{\"worker_id\":\"${WORKER_ID}\",\"top_n\":3}" \ + >"$TMP_DIR/match_jobs.json" || fail "工人匹配岗位失败" +MATCH_ID_2="$(parse_json "((data.get('items') or [{}])[0]).get('match_id')" "$TMP_DIR/match_jobs.json")" +[ -n "$MATCH_ID_2" ] || fail "工人匹配岗位未返回 match_id" + +step "解释匹配(工人->岗位)" +curl -fsS "${API_BASE}/poc/match/explain/${MATCH_ID_2}" >"$TMP_DIR/explain_2.json" || fail "匹配解释失败(工人->岗位)" + +step "链路验收通过:抽取 -> 入库 -> 匹配 -> 解释" diff --git a/gig-poc/infrastructure/scripts/dev-up.sh b/gig-poc/infrastructure/scripts/dev-up.sh index cc12a9b..ec7052b 100755 --- a/gig-poc/infrastructure/scripts/dev-up.sh +++ b/gig-poc/infrastructure/scripts/dev-up.sh @@ -3,6 +3,10 @@ set -eu SCRIPT_DIR=$(CDPATH= cd -- "$(dirname "$0")" && pwd) INFRA_DIR=$(CDPATH= cd -- "$SCRIPT_DIR/.." && pwd) +PROJECT_DIR=$(CDPATH= cd -- "$INFRA_DIR/.." && pwd) +RUN_ACCEPTANCE_ON_UP="${RUN_ACCEPTANCE_ON_UP:-true}" +EXPORT_OPENAPI_ON_UP="${EXPORT_OPENAPI_ON_UP:-true}" +RUN_BASELINE_ON_UP="${RUN_BASELINE_ON_UP:-false}" cd "$INFRA_DIR" docker compose -f docker-compose.yml up --build -d @@ -13,4 +17,20 @@ done until curl -fsS -X POST http://127.0.0.1:8000/poc/ingest/bootstrap >/dev/null 2>&1; do sleep 3 done + +if [ "$RUN_ACCEPTANCE_ON_UP" = "true" ]; then + echo "执行一键闭环验收脚本..." + sh "$SCRIPT_DIR/acceptance-e2e.sh" +fi + +if [ "$EXPORT_OPENAPI_ON_UP" = "true" ]; then + echo "导出 OpenAPI 固化产物到 docs/openapi.json ..." + sh "$SCRIPT_DIR/export-openapi.sh" "$PROJECT_DIR/docs/openapi.json" +fi + +if [ "$RUN_BASELINE_ON_UP" = "true" ]; then + echo "执行容量基线压测..." + sh "$SCRIPT_DIR/load-baseline.sh" "$PROJECT_DIR/docs/CAPACITY_BASELINE.md" +fi + echo "本地环境已启动。Web: http://127.0.0.1:5173 API: http://127.0.0.1:8000/docs" diff --git a/gig-poc/infrastructure/scripts/export-openapi.sh b/gig-poc/infrastructure/scripts/export-openapi.sh new file mode 100755 index 0000000..8d77a20 --- /dev/null +++ b/gig-poc/infrastructure/scripts/export-openapi.sh @@ -0,0 +1,11 @@ +#!/usr/bin/env sh +set -eu + +SCRIPT_DIR=$(CDPATH= cd -- "$(dirname "$0")" && pwd) +PROJECT_DIR=$(CDPATH= cd -- "$SCRIPT_DIR/../.." && pwd) +API_PORT="${API_PORT:-8000}" +OUTPUT_PATH="${1:-$PROJECT_DIR/docs/openapi.json}" + +mkdir -p "$(dirname "$OUTPUT_PATH")" +curl -fsS "http://127.0.0.1:${API_PORT}/openapi.json" -o "$OUTPUT_PATH" +echo "OpenAPI 已导出到: $OUTPUT_PATH" diff --git a/gig-poc/infrastructure/scripts/freeze-openapi.sh b/gig-poc/infrastructure/scripts/freeze-openapi.sh new file mode 100755 index 0000000..0436d7e --- /dev/null +++ b/gig-poc/infrastructure/scripts/freeze-openapi.sh @@ -0,0 +1,37 @@ +#!/usr/bin/env sh +set -eu + +SCRIPT_DIR=$(CDPATH= cd -- "$(dirname "$0")" && pwd) +PROJECT_DIR=$(CDPATH= cd -- "$SCRIPT_DIR/../.." && pwd) +OUTPUT_PATH="${1:-$PROJECT_DIR/docs/openapi.json}" +API_PORT="${API_PORT:-8000}" + +mkdir -p "$(dirname "$OUTPUT_PATH")" + +if PYTHONPATH="$PROJECT_DIR/apps/api" python3 - "$OUTPUT_PATH" <<'PY' +import json +import sys +from app.main import app + +output = sys.argv[1] +spec = app.openapi() +with open(output, "w", encoding="utf-8") as f: + json.dump(spec, f, ensure_ascii=False, indent=2) + f.write("\n") +print(f"OpenAPI 已固化到: {output}") +PY +then + exit 0 +fi + +echo "本机缺少 API 依赖,尝试从已运行 API 导出..." +if curl -fsS "http://127.0.0.1:${API_PORT}/openapi.json" -o "$OUTPUT_PATH"; then + echo "OpenAPI 已固化到: $OUTPUT_PATH" + exit 0 +fi + +echo "本机 API 端口不可用,尝试通过 Docker 运行 API 镜像离线导出..." +docker compose -f "$PROJECT_DIR/infrastructure/docker-compose.yml" run --rm api \ + python -c "import json; from app.main import app; print(json.dumps(app.openapi(), ensure_ascii=False, indent=2))" \ + > "$OUTPUT_PATH" +echo "OpenAPI 已固化到: $OUTPUT_PATH" diff --git a/gig-poc/infrastructure/scripts/load-baseline.sh b/gig-poc/infrastructure/scripts/load-baseline.sh new file mode 100755 index 0000000..39c4b1b --- /dev/null +++ b/gig-poc/infrastructure/scripts/load-baseline.sh @@ -0,0 +1,137 @@ +#!/usr/bin/env sh +set -eu + +API_BASE="${API_BASE:-http://127.0.0.1:8000}" +TOTAL_REQUESTS="${TOTAL_REQUESTS:-400}" +CONCURRENCY="${CONCURRENCY:-40}" +OUTPUT_PATH="${1:-$(CDPATH= cd -- "$(dirname "$0")/../.." && pwd)/docs/CAPACITY_BASELINE.md}" + +TMP_DIR="$(mktemp -d)" +trap 'rm -rf "$TMP_DIR"' EXIT + +echo "[BASELINE] health check" +curl -fsS "$API_BASE/health" >/dev/null + +echo "[BASELINE] ensure bootstrap data" +curl -fsS -X POST "$API_BASE/poc/ingest/bootstrap" >/dev/null + +JOB_ID="$(curl -fsS "$API_BASE/poc/jobs" | python3 -c 'import json,sys; data=json.load(sys.stdin); print((data.get("items") or [{}])[0].get("job_id",""))')" +WORKER_ID="$(curl -fsS "$API_BASE/poc/workers" | python3 -c 'import json,sys; data=json.load(sys.stdin); print((data.get("items") or [{}])[0].get("worker_id",""))')" + +[ -n "$JOB_ID" ] || { echo "no job id found"; exit 1; } +[ -n "$WORKER_ID" ] || { echo "no worker id found"; exit 1; } + +run_case() { + NAME="$1" + METHOD="$2" + URL="$3" + BODY_FILE="$4" + OUT_FILE="$5" + python3 - "$METHOD" "$URL" "$BODY_FILE" "$TOTAL_REQUESTS" "$CONCURRENCY" "$OUT_FILE" <<'PY' +import json +import sys +import time +import urllib.request +from concurrent.futures import ThreadPoolExecutor, as_completed + +method, url, body_file, total, concurrency, out_file = sys.argv[1:] +total = int(total) +concurrency = int(concurrency) +payload = None +if body_file != "-": + payload = open(body_file, "rb").read() + +durations = [] +success = 0 +fail = 0 + +def once(): + start = time.perf_counter() + req = urllib.request.Request(url=url, method=method) + req.add_header("Content-Type", "application/json") + try: + if payload is None: + with urllib.request.urlopen(req, timeout=20) as resp: + code = resp.getcode() + else: + with urllib.request.urlopen(req, data=payload, timeout=20) as resp: + code = resp.getcode() + ok = 200 <= code < 400 + except Exception: + ok = False + ms = (time.perf_counter() - start) * 1000 + return ok, ms + +bench_start = time.perf_counter() +with ThreadPoolExecutor(max_workers=concurrency) as ex: + futures = [ex.submit(once) for _ in range(total)] + for f in as_completed(futures): + ok, ms = f.result() + durations.append(ms) + if ok: + success += 1 + else: + fail += 1 +elapsed = time.perf_counter() - bench_start +durations.sort() +def pct(p): + if not durations: + return 0.0 + idx = min(len(durations) - 1, int(len(durations) * p)) + return round(durations[idx], 2) +result = { + "total": total, + "success": success, + "fail": fail, + "success_rate": round(success / total, 4) if total else 0.0, + "rps": round(total / elapsed, 2) if elapsed > 0 else 0.0, + "latency_ms_avg": round(sum(durations) / len(durations), 2) if durations else 0.0, + "latency_ms_p95": pct(0.95), + "latency_ms_p99": pct(0.99), +} +with open(out_file, "w", encoding="utf-8") as f: + json.dump(result, f, ensure_ascii=False, indent=2) +PY + echo "[BASELINE] done $NAME" +} + +printf '{"job_id":"%s","top_n":10}\n' "$JOB_ID" >"$TMP_DIR/match_workers.json" +printf '{"worker_id":"%s","top_n":10}\n' "$WORKER_ID" >"$TMP_DIR/match_jobs.json" + +run_case "health" "GET" "$API_BASE/health" "-" "$TMP_DIR/health.result.json" +run_case "jobs_list" "GET" "$API_BASE/poc/jobs" "-" "$TMP_DIR/jobs.result.json" +run_case "match_workers" "POST" "$API_BASE/poc/match/workers" "$TMP_DIR/match_workers.json" "$TMP_DIR/match_workers.result.json" +run_case "match_jobs" "POST" "$API_BASE/poc/match/jobs" "$TMP_DIR/match_jobs.json" "$TMP_DIR/match_jobs.result.json" +run_case "match_workers_cached" "POST" "$API_BASE/poc/match/workers" "$TMP_DIR/match_workers.json" "$TMP_DIR/match_workers_cached.result.json" +run_case "match_jobs_cached" "POST" "$API_BASE/poc/match/jobs" "$TMP_DIR/match_jobs.json" "$TMP_DIR/match_jobs_cached.result.json" +run_case "match_workers_async_enqueue" "POST" "$API_BASE/poc/match/workers/async" "$TMP_DIR/match_workers.json" "$TMP_DIR/match_workers_async.result.json" +run_case "match_jobs_async_enqueue" "POST" "$API_BASE/poc/match/jobs/async" "$TMP_DIR/match_jobs.json" "$TMP_DIR/match_jobs_async.result.json" + +NOW="$(date '+%Y-%m-%d %H:%M:%S %z')" +mkdir -p "$(dirname "$OUTPUT_PATH")" + +{ + echo "# 容量基线(自动生成)" + echo + echo "- 生成时间: $NOW" + echo "- API_BASE: $API_BASE" + echo "- TOTAL_REQUESTS: $TOTAL_REQUESTS" + echo "- CONCURRENCY: $CONCURRENCY" + echo + echo "| 场景 | 成功率 | RPS | 平均延迟(ms) | P95(ms) | P99(ms) |" + echo "| --- | --- | --- | --- | --- | --- |" + for case in health jobs match_workers match_jobs match_workers_cached match_jobs_cached match_workers_async match_jobs_async; do + FILE="$TMP_DIR/${case}.result.json" + python3 - "$case" "$FILE" <<'PY' +import json +import sys +case, path = sys.argv[1], sys.argv[2] +data = json.loads(open(path, "r", encoding="utf-8").read()) +print(f"| {case} | {data['success_rate']} | {data['rps']} | {data['latency_ms_avg']} | {data['latency_ms_p95']} | {data['latency_ms_p99']} |") +PY + done + echo + echo "> 建议:该基线仅代表当前单机/当前数据量下表现,发布前请在目标环境按 2x/5x 峰值复测。" +} >"$OUTPUT_PATH" + +echo "[BASELINE] report generated at $OUTPUT_PATH" diff --git a/gig-poc/infrastructure/scripts/prod-down.sh b/gig-poc/infrastructure/scripts/prod-down.sh new file mode 100755 index 0000000..48337db --- /dev/null +++ b/gig-poc/infrastructure/scripts/prod-down.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env sh +set -eu + +SCRIPT_DIR=$(CDPATH= cd -- "$(dirname "$0")" && pwd) +INFRA_DIR=$(CDPATH= cd -- "$SCRIPT_DIR/.." && pwd) + +cd "$INFRA_DIR" +docker compose -f docker-compose.prod.yml down diff --git a/gig-poc/infrastructure/scripts/prod-up.sh b/gig-poc/infrastructure/scripts/prod-up.sh index 1a3dca5..6e69487 100755 --- a/gig-poc/infrastructure/scripts/prod-up.sh +++ b/gig-poc/infrastructure/scripts/prod-up.sh @@ -3,7 +3,21 @@ set -eu SCRIPT_DIR=$(CDPATH= cd -- "$(dirname "$0")" && pwd) INFRA_DIR=$(CDPATH= cd -- "$SCRIPT_DIR/.." && pwd) +API_PORT="${API_PORT:-8000}" +BOOTSTRAP_ON_UP="${BOOTSTRAP_ON_UP:-true}" cd "$INFRA_DIR" docker compose -f docker-compose.prod.yml up --build -d -echo "生产部署容器已启动。请按实际域名或端口访问 Web。" +echo "等待生产 API 健康检查..." +until curl -fsS "http://127.0.0.1:${API_PORT}/health" >/dev/null 2>&1; do + sleep 3 +done + +if [ "$BOOTSTRAP_ON_UP" = "true" ]; then + echo "执行 bootstrap 样本初始化..." + until curl -fsS -X POST "http://127.0.0.1:${API_PORT}/poc/ingest/bootstrap" >/dev/null 2>&1; do + sleep 3 + done +fi + +echo "生产环境已启动。Web: http://127.0.0.1:${WEB_PORT:-80} API: http://127.0.0.1:${API_PORT}/docs" diff --git a/start.sh b/start.sh new file mode 100644 index 0000000..d078736 --- /dev/null +++ b/start.sh @@ -0,0 +1,2 @@ +cd gig-poc +sh infrastructure/scripts/dev-up.sh \ No newline at end of file