feat: 初始化零工后端代码

This commit is contained in:
Daniel
2026-04-01 14:19:25 +08:00
parent c6fabe262c
commit 84f8be7c0e
41 changed files with 2813 additions and 147 deletions

View File

@@ -1,23 +1,33 @@
from datetime import datetime from datetime import datetime
from fastapi import APIRouter, Depends, HTTPException from fastapi import APIRouter, Depends, HTTPException, Path
from sqlalchemy import text from sqlalchemy import text
from sqlalchemy.orm import Session from sqlalchemy.orm import Session
from app.core.config import get_settings from app.core.config import get_settings
from app.db.session import get_db from app.db.session import get_db
from app.domain.schemas import ( from app.domain.schemas import (
AIObservabilityResponse,
BootstrapResponse,
ExplainResponse, ExplainResponse,
ExtractResponse, ExtractResponse,
ExtractTextRequest, ExtractTextRequest,
HealthStatus, HealthStatus,
IngestJobRequest, IngestJobRequest,
IngestAsyncResponse,
IngestWorkerRequest, IngestWorkerRequest,
JobCard, JobCard,
ListResponse, ListResponse,
MatchFeedbackRequest,
MatchAsyncJobsRequest,
MatchAsyncResponse,
MatchAsyncWorkersRequest,
MatchJobsRequest, MatchJobsRequest,
MatchResponse, MatchResponse,
MatchWeightResponse,
MatchWorkersRequest, MatchWorkersRequest,
QueueStatusResponse,
SystemOpsResponse,
WorkerCard, WorkerCard,
) )
from app.repositories.job_repository import JobRepository from app.repositories.job_repository import JobRepository
@@ -25,14 +35,23 @@ from app.repositories.worker_repository import WorkerRepository
from app.services.card_mapper import job_to_card, worker_to_card from app.services.card_mapper import job_to_card, worker_to_card
from app.services.extraction_service import ExtractionService from app.services.extraction_service import ExtractionService
from app.services.ingest_service import IngestService from app.services.ingest_service import IngestService
from app.services.llm_client import LLMClient
from app.services.matching_service import MatchingService from app.services.matching_service import MatchingService
from app.services.cache_service import get_match_cache, get_query_cache
from app.services.runtime_state import get_ingest_queue, get_match_queue, get_traffic_guard
from app.services.rag.lightrag_adapter import LightRAGAdapter from app.services.rag.lightrag_adapter import LightRAGAdapter
router = APIRouter() router = APIRouter()
@router.get("/health", response_model=HealthStatus) @router.get(
"/health",
response_model=HealthStatus,
tags=["系统"],
summary="服务健康检查",
description="检查 API 服务、数据库与 RAG 检索组件状态。",
)
def health(db: Session = Depends(get_db)) -> HealthStatus: def health(db: Session = Depends(get_db)) -> HealthStatus:
settings = get_settings() settings = get_settings()
db_status = "ok" db_status = "ok"
@@ -48,32 +67,119 @@ def health(db: Session = Depends(get_db)) -> HealthStatus:
return HealthStatus(service="ok", database=db_status, rag=rag_status, timestamp=datetime.now().astimezone()) return HealthStatus(service="ok", database=db_status, rag=rag_status, timestamp=datetime.now().astimezone())
@router.post("/poc/extract/job", response_model=ExtractResponse) @router.post(
"/poc/extract/job",
response_model=ExtractResponse,
tags=["抽取"],
summary="岗位文本抽取",
description="将岗位自然语言文本抽取为结构化 JobCard。",
)
def extract_job(payload: ExtractTextRequest) -> ExtractResponse: def extract_job(payload: ExtractTextRequest) -> ExtractResponse:
return ExtractionService().extract_job(payload.text) return ExtractionService().extract_job(payload.text)
@router.post("/poc/extract/worker", response_model=ExtractResponse) @router.post(
"/poc/extract/worker",
response_model=ExtractResponse,
tags=["抽取"],
summary="工人文本抽取",
description="将工人自然语言文本抽取为结构化 WorkerCard。",
)
def extract_worker(payload: ExtractTextRequest) -> ExtractResponse: def extract_worker(payload: ExtractTextRequest) -> ExtractResponse:
return ExtractionService().extract_worker(payload.text) return ExtractionService().extract_worker(payload.text)
@router.post("/poc/ingest/job", response_model=JobCard) @router.post(
"/poc/ingest/job",
response_model=JobCard,
tags=["入库"],
summary="岗位入库",
description="写入或更新岗位卡片,并同步更新检索索引。",
)
def ingest_job(payload: IngestJobRequest, db: Session = Depends(get_db)) -> JobCard: def ingest_job(payload: IngestJobRequest, db: Session = Depends(get_db)) -> JobCard:
return IngestService(db).ingest_job(payload.job) return IngestService(db).ingest_job(payload.job)
@router.post("/poc/ingest/worker", response_model=WorkerCard) @router.post(
"/poc/ingest/worker",
response_model=WorkerCard,
tags=["入库"],
summary="工人入库",
description="写入或更新工人卡片,并同步更新检索索引。",
)
def ingest_worker(payload: IngestWorkerRequest, db: Session = Depends(get_db)) -> WorkerCard: def ingest_worker(payload: IngestWorkerRequest, db: Session = Depends(get_db)) -> WorkerCard:
return IngestService(db).ingest_worker(payload.worker) return IngestService(db).ingest_worker(payload.worker)
@router.post("/poc/ingest/bootstrap") @router.post(
def bootstrap(db: Session = Depends(get_db)): "/poc/ingest/job/async",
response_model=IngestAsyncResponse,
tags=["入库"],
summary="岗位异步入库",
description="将岗位入库请求写入异步队列,快速返回任务 ID。",
)
def ingest_job_async(payload: IngestJobRequest) -> IngestAsyncResponse:
settings = get_settings()
if not settings.ingest_async_enabled:
raise HTTPException(status_code=400, detail="异步入库未开启")
queue = get_ingest_queue()
try:
task_id = queue.enqueue_job(payload.job)
except RuntimeError as exc:
raise HTTPException(status_code=503, detail=str(exc)) from exc
return IngestAsyncResponse(task_id=task_id, status=queue.task_status(task_id))
@router.post(
"/poc/ingest/worker/async",
response_model=IngestAsyncResponse,
tags=["入库"],
summary="工人异步入库",
description="将工人入库请求写入异步队列,快速返回任务 ID。",
)
def ingest_worker_async(payload: IngestWorkerRequest) -> IngestAsyncResponse:
settings = get_settings()
if not settings.ingest_async_enabled:
raise HTTPException(status_code=400, detail="异步入库未开启")
queue = get_ingest_queue()
try:
task_id = queue.enqueue_worker(payload.worker)
except RuntimeError as exc:
raise HTTPException(status_code=503, detail=str(exc)) from exc
return IngestAsyncResponse(task_id=task_id, status=queue.task_status(task_id))
@router.get(
"/poc/ingest/queue/{task_id}",
response_model=IngestAsyncResponse,
tags=["入库"],
summary="异步入库任务状态",
description="根据 task_id 查询异步入库任务状态。",
)
def ingest_task_status(task_id: str) -> IngestAsyncResponse:
queue = get_ingest_queue()
return IngestAsyncResponse(task_id=task_id, status=queue.task_status(task_id))
@router.post(
"/poc/ingest/bootstrap",
response_model=BootstrapResponse,
tags=["入库"],
summary="样本数据初始化",
description="导入内置样本数据(岗位、工人、技能、类目、区域)并构建检索数据。",
)
def bootstrap(db: Session = Depends(get_db)) -> BootstrapResponse:
return IngestService(db).bootstrap() return IngestService(db).bootstrap()
@router.post("/poc/match/workers", response_model=MatchResponse) @router.post(
"/poc/match/workers",
response_model=MatchResponse,
tags=["匹配"],
summary="岗位匹配工人",
description="支持通过 job_id 或内联 job 进行匹配,返回 top_n 条结果。",
responses={404: {"description": "岗位不存在"}},
)
def match_workers(payload: MatchWorkersRequest, db: Session = Depends(get_db)) -> MatchResponse: def match_workers(payload: MatchWorkersRequest, db: Session = Depends(get_db)) -> MatchResponse:
service = MatchingService(db) service = MatchingService(db)
source = payload.job source = payload.job
@@ -85,7 +191,14 @@ def match_workers(payload: MatchWorkersRequest, db: Session = Depends(get_db)) -
return MatchResponse(items=service.match_workers(source, payload.top_n)) return MatchResponse(items=service.match_workers(source, payload.top_n))
@router.post("/poc/match/jobs", response_model=MatchResponse) @router.post(
"/poc/match/jobs",
response_model=MatchResponse,
tags=["匹配"],
summary="工人匹配岗位",
description="支持通过 worker_id 或内联 worker 进行匹配,返回 top_n 条结果。",
responses={404: {"description": "工人不存在"}},
)
def match_jobs(payload: MatchJobsRequest, db: Session = Depends(get_db)) -> MatchResponse: def match_jobs(payload: MatchJobsRequest, db: Session = Depends(get_db)) -> MatchResponse:
service = MatchingService(db) service = MatchingService(db)
source = payload.worker source = payload.worker
@@ -97,37 +210,245 @@ def match_jobs(payload: MatchJobsRequest, db: Session = Depends(get_db)) -> Matc
return MatchResponse(items=service.match_jobs(source, payload.top_n)) return MatchResponse(items=service.match_jobs(source, payload.top_n))
@router.get("/poc/match/explain/{match_id}", response_model=ExplainResponse) @router.post(
def explain_match(match_id: str, db: Session = Depends(get_db)) -> ExplainResponse: "/poc/match/workers/async",
response_model=MatchAsyncResponse,
tags=["匹配"],
summary="岗位异步匹配工人",
description="将匹配任务放入队列异步计算,适合高并发削峰。",
)
def match_workers_async(payload: MatchAsyncWorkersRequest) -> MatchAsyncResponse:
settings = get_settings()
if not settings.match_async_enabled:
raise HTTPException(status_code=400, detail="异步匹配未开启")
queue = get_match_queue()
try:
task_id = queue.enqueue_workers(payload.job_id, payload.top_n)
except RuntimeError as exc:
raise HTTPException(status_code=503, detail=str(exc)) from exc
return MatchAsyncResponse(task_id=task_id, status=queue.task_status(task_id))
@router.post(
"/poc/match/jobs/async",
response_model=MatchAsyncResponse,
tags=["匹配"],
summary="工人异步匹配岗位",
description="将匹配任务放入队列异步计算,适合高并发削峰。",
)
def match_jobs_async(payload: MatchAsyncJobsRequest) -> MatchAsyncResponse:
settings = get_settings()
if not settings.match_async_enabled:
raise HTTPException(status_code=400, detail="异步匹配未开启")
queue = get_match_queue()
try:
task_id = queue.enqueue_jobs(payload.worker_id, payload.top_n)
except RuntimeError as exc:
raise HTTPException(status_code=503, detail=str(exc)) from exc
return MatchAsyncResponse(task_id=task_id, status=queue.task_status(task_id))
@router.get(
"/poc/match/queue/{task_id}",
response_model=MatchAsyncResponse,
tags=["匹配"],
summary="异步匹配任务状态",
description="根据 task_id 查询异步匹配任务状态,完成后返回匹配结果。",
)
def match_task_status(task_id: str) -> MatchAsyncResponse:
queue = get_match_queue()
status = queue.task_status(task_id)
items = queue.task_result(task_id)
return MatchAsyncResponse(task_id=task_id, status=status, items=items)
@router.get(
"/poc/match/explain/{match_id}",
response_model=ExplainResponse,
tags=["匹配"],
summary="匹配结果解释",
description="根据 match_id 获取匹配明细和解释理由。",
responses={404: {"description": "匹配记录不存在"}},
)
def explain_match(
match_id: str = Path(..., description="匹配记录 ID"),
db: Session = Depends(get_db),
) -> ExplainResponse:
match = MatchingService(db).explain(match_id) match = MatchingService(db).explain(match_id)
if match is None: if match is None:
raise HTTPException(status_code=404, detail="匹配记录不存在") raise HTTPException(status_code=404, detail="匹配记录不存在")
return ExplainResponse(match=match) return ExplainResponse(match=match)
@router.get("/poc/jobs", response_model=ListResponse) @router.post(
"/poc/match/feedback",
response_model=MatchWeightResponse,
tags=["匹配"],
summary="匹配反馈学习",
description="提交单条匹配的接受/拒绝反馈,用于在线更新排序权重。",
responses={404: {"description": "匹配记录不存在"}},
)
def feedback_match(payload: MatchFeedbackRequest, db: Session = Depends(get_db)) -> MatchWeightResponse:
service = MatchingService(db)
weights = service.feedback(payload.match_id, payload.accepted)
if weights is None:
raise HTTPException(status_code=404, detail="匹配记录不存在")
return MatchWeightResponse(weights=weights, learning_enabled=get_settings().ranking_learning_enabled)
@router.get(
"/poc/match/weights",
response_model=MatchWeightResponse,
tags=["匹配"],
summary="当前排序权重",
description="查看当前生效的排序权重(默认权重或学习后的权重)。",
)
def get_match_weights(db: Session = Depends(get_db)) -> MatchWeightResponse:
service = MatchingService(db)
return MatchWeightResponse(weights=service.current_weights(), learning_enabled=get_settings().ranking_learning_enabled)
@router.get(
"/poc/ops/ai/metrics",
response_model=AIObservabilityResponse,
tags=["系统"],
summary="AI 观测指标",
description="返回 AI 调用的限流、熔断、降级与 fallback 命中率指标。",
)
def ai_metrics() -> AIObservabilityResponse:
metrics = LLMClient(get_settings()).metrics()
return AIObservabilityResponse(metrics=metrics)
@router.get(
"/poc/ops/system/metrics",
response_model=SystemOpsResponse,
tags=["系统"],
summary="系统运行指标",
description="返回全局流量护栏、缓存与异步队列指标。",
)
def system_metrics() -> SystemOpsResponse:
queue_stats = get_ingest_queue().stats()
match_queue_stats = get_match_queue().stats()
match_cache_stats = get_match_cache().stats()
query_cache_stats = get_query_cache().stats()
return SystemOpsResponse(
traffic=get_traffic_guard().snapshot(),
cache={
"backend": match_cache_stats.get("backend", "memory"),
"match_hit_rate": match_cache_stats.get("hit_rate", 0.0),
"query_hit_rate": query_cache_stats.get("hit_rate", 0.0),
"match_size": int(match_cache_stats.get("size", 0)),
"query_size": int(query_cache_stats.get("size", 0)),
},
ingest_queue=QueueStatusResponse(
queued=queue_stats["queued"],
processed=queue_stats["processed"],
failed=queue_stats["failed"],
),
match_queue=QueueStatusResponse(
queued=match_queue_stats["queued"],
processed=match_queue_stats["processed"],
failed=match_queue_stats["failed"],
),
)
@router.get(
"/poc/jobs",
response_model=ListResponse,
tags=["查询"],
summary="岗位列表查询",
description="查询岗位列表,当前返回全量数据。",
)
def list_jobs(db: Session = Depends(get_db)) -> ListResponse: def list_jobs(db: Session = Depends(get_db)) -> ListResponse:
settings = get_settings()
cache = get_query_cache()
cache_key = "jobs:list"
if settings.query_cache_enabled:
cached = cache.get(cache_key)
if cached is not None:
return ListResponse(items=cached["items"], total=cached["total"])
items = [job_to_card(item).model_dump(mode="json") for item in JobRepository(db).list()] items = [job_to_card(item).model_dump(mode="json") for item in JobRepository(db).list()]
return ListResponse(items=items, total=len(items)) result = ListResponse(items=items, total=len(items))
if settings.query_cache_enabled:
cache.set(cache_key, result.model_dump(mode="json"))
return result
@router.get("/poc/workers", response_model=ListResponse) @router.get(
"/poc/workers",
response_model=ListResponse,
tags=["查询"],
summary="工人列表查询",
description="查询工人列表,当前返回全量数据。",
)
def list_workers(db: Session = Depends(get_db)) -> ListResponse: def list_workers(db: Session = Depends(get_db)) -> ListResponse:
settings = get_settings()
cache = get_query_cache()
cache_key = "workers:list"
if settings.query_cache_enabled:
cached = cache.get(cache_key)
if cached is not None:
return ListResponse(items=cached["items"], total=cached["total"])
items = [worker_to_card(item).model_dump(mode="json") for item in WorkerRepository(db).list()] items = [worker_to_card(item).model_dump(mode="json") for item in WorkerRepository(db).list()]
return ListResponse(items=items, total=len(items)) result = ListResponse(items=items, total=len(items))
if settings.query_cache_enabled:
cache.set(cache_key, result.model_dump(mode="json"))
return result
@router.get("/poc/jobs/{job_id}", response_model=JobCard) @router.get(
def get_job(job_id: str, db: Session = Depends(get_db)) -> JobCard: "/poc/jobs/{job_id}",
response_model=JobCard,
tags=["查询"],
summary="岗位详情查询",
description="根据岗位 ID 查询单个岗位详情。",
responses={404: {"description": "岗位不存在"}},
)
def get_job(
job_id: str = Path(..., description="岗位 ID"),
db: Session = Depends(get_db),
) -> JobCard:
settings = get_settings()
cache = get_query_cache()
cache_key = f"jobs:detail:{job_id}"
if settings.query_cache_enabled:
cached = cache.get(cache_key)
if cached is not None:
return JobCard(**cached)
item = JobRepository(db).get(job_id) item = JobRepository(db).get(job_id)
if item is None: if item is None:
raise HTTPException(status_code=404, detail="岗位不存在") raise HTTPException(status_code=404, detail="岗位不存在")
return job_to_card(item) result = job_to_card(item)
if settings.query_cache_enabled:
cache.set(cache_key, result.model_dump(mode="json"))
return result
@router.get("/poc/workers/{worker_id}", response_model=WorkerCard) @router.get(
def get_worker(worker_id: str, db: Session = Depends(get_db)) -> WorkerCard: "/poc/workers/{worker_id}",
response_model=WorkerCard,
tags=["查询"],
summary="工人详情查询",
description="根据工人 ID 查询单个工人详情。",
responses={404: {"description": "工人不存在"}},
)
def get_worker(
worker_id: str = Path(..., description="工人 ID"),
db: Session = Depends(get_db),
) -> WorkerCard:
settings = get_settings()
cache = get_query_cache()
cache_key = f"workers:detail:{worker_id}"
if settings.query_cache_enabled:
cached = cache.get(cache_key)
if cached is not None:
return WorkerCard(**cached)
item = WorkerRepository(db).get(worker_id) item = WorkerRepository(db).get(worker_id)
if item is None: if item is None:
raise HTTPException(status_code=404, detail="工人不存在") raise HTTPException(status_code=404, detail="工人不存在")
return worker_to_card(item) result = worker_to_card(item)
if settings.query_cache_enabled:
cache.set(cache_key, result.model_dump(mode="json"))
return result

View File

@@ -17,18 +17,39 @@ class Settings(BaseSettings):
app_host: str = "0.0.0.0" app_host: str = "0.0.0.0"
app_port: int = 8000 app_port: int = 8000
log_level: str = "INFO" log_level: str = "INFO"
app_rate_limit_per_minute: int = 1200
app_circuit_breaker_error_rate: float = 0.5
app_circuit_breaker_min_requests: int = 50
app_circuit_breaker_window_seconds: int = 60
app_circuit_breaker_cooldown_seconds: int = 30
alert_webhook_url: str | None = None
database_url: str = "postgresql+psycopg://gig:gig@postgres:5432/gig_poc" database_url: str = "postgresql+psycopg://gig:gig@postgres:5432/gig_poc"
database_pool_size: int = 20
database_max_overflow: int = 30
database_pool_timeout: int = 30
qdrant_url: str = "http://qdrant:6333" qdrant_url: str = "http://qdrant:6333"
qdrant_collection: str = "gig_poc_entities" qdrant_collection: str = "gig_poc_entities"
vector_size: int = 64 vector_size: int = 64
llm_enabled: bool = False llm_enabled: bool = False
llm_base_url: str | None = None llm_base_url: str | None = None
llm_fallback_base_urls: list[str] = Field(default_factory=list)
llm_api_key: str | None = None llm_api_key: str | None = None
llm_model: str = "gpt-5.4" llm_model: str = "gpt-5.4"
extraction_llm_max_retries: int = 2
embedding_backend: str = "hash" # hash | openai_compatible
embedding_enabled: bool = False embedding_enabled: bool = False
embedding_base_url: str | None = None
embedding_fallback_base_urls: list[str] = Field(default_factory=list)
embedding_api_key: str | None = None
embedding_model: str = "text-embedding-3-small" embedding_model: str = "text-embedding-3-small"
embedding_vector_size: int = 1536
ai_request_timeout_seconds: float = 30.0
ai_rate_limit_per_minute: int = 120
ai_circuit_breaker_fail_threshold: int = 5
ai_circuit_breaker_cooldown_seconds: int = 30
bootstrap_jobs: int = 100 bootstrap_jobs: int = 100
bootstrap_workers: int = 300 bootstrap_workers: int = 300
@@ -38,12 +59,27 @@ class Settings(BaseSettings):
prompt_dir: Path = Field(default=ROOT_DIR / "packages" / "prompts") prompt_dir: Path = Field(default=ROOT_DIR / "packages" / "prompts")
sample_data_dir: Path = Field(default=ROOT_DIR / "packages" / "sample-data") sample_data_dir: Path = Field(default=ROOT_DIR / "packages" / "sample-data")
shared_types_dir: Path = Field(default=ROOT_DIR / "packages" / "shared-types") shared_types_dir: Path = Field(default=ROOT_DIR / "packages" / "shared-types")
data_dir: Path = Field(default=ROOT_DIR / "data")
match_weights_path: Path = Field(default=ROOT_DIR / "data" / "match_weights.json")
score_skill_weight: float = 0.35 score_skill_weight: float = 0.35
score_region_weight: float = 0.20 score_region_weight: float = 0.20
score_time_weight: float = 0.15 score_time_weight: float = 0.15
score_experience_weight: float = 0.15 score_experience_weight: float = 0.15
score_reliability_weight: float = 0.15 score_reliability_weight: float = 0.15
ranking_learning_enabled: bool = True
ranking_learning_rate: float = 0.08
cache_backend: str = "memory" # memory | redis
redis_url: str = "redis://redis:6379/0"
redis_prefix: str = "gig_poc"
match_cache_enabled: bool = True
match_cache_ttl_seconds: int = 30
query_cache_enabled: bool = True
query_cache_ttl_seconds: int = 20
ingest_async_enabled: bool = True
ingest_queue_max_size: int = 10000
match_async_enabled: bool = True
match_queue_max_size: int = 10000
@lru_cache @lru_cache

View File

@@ -7,7 +7,14 @@ from app.core.config import get_settings
settings = get_settings() settings = get_settings()
engine = create_engine(settings.database_url, future=True, pool_pre_ping=True) engine = create_engine(
settings.database_url,
future=True,
pool_pre_ping=True,
pool_size=settings.database_pool_size,
max_overflow=settings.database_max_overflow,
pool_timeout=settings.database_pool_timeout,
)
SessionLocal = sessionmaker(bind=engine, autoflush=False, autocommit=False, future=True) SessionLocal = sessionmaker(bind=engine, autoflush=False, autocommit=False, future=True)

View File

@@ -3,7 +3,7 @@ from __future__ import annotations
from datetime import datetime from datetime import datetime
from enum import Enum from enum import Enum
from pydantic import BaseModel, Field, field_validator, model_validator from pydantic import BaseModel, ConfigDict, Field, field_validator, model_validator
class SalaryType(str, Enum): class SalaryType(str, Enum):
@@ -19,82 +19,89 @@ class SourceType(str, Enum):
class Salary(BaseModel): class Salary(BaseModel):
type: SalaryType = SalaryType.daily type: SalaryType = Field(default=SalaryType.daily, description="薪资类型daily/hourly/monthly/task")
amount: float = 0 amount: float = Field(default=0, description="薪资金额")
currency: str = "CNY" currency: str = Field(default="CNY", description="货币类型,默认 CNY")
class SkillScore(BaseModel): class SkillScore(BaseModel):
name: str name: str = Field(description="技能名称")
score: float = Field(ge=0, le=1) score: float = Field(ge=0, le=1, description="技能熟练度,范围 0~1")
class JobCard(BaseModel): class JobCard(BaseModel):
job_id: str job_id: str = Field(description="岗位唯一 ID")
title: str title: str = Field(description="岗位标题")
category: str category: str = Field(description="岗位类别")
description: str description: str = Field(description="岗位描述")
skills: list[str] = Field(default_factory=list) skills: list[str] = Field(default_factory=list, description="岗位技能要求列表")
city: str city: str = Field(description="城市")
region: str region: str = Field(description="区域")
location_detail: str location_detail: str = Field(description="详细地点描述")
start_time: datetime start_time: datetime = Field(description="岗位开始时间ISO-8601")
duration_hours: float = Field(gt=0) duration_hours: float = Field(gt=0, description="工时(小时),必须大于 0")
headcount: int = Field(gt=0) headcount: int = Field(gt=0, description="招聘人数,必须大于 0")
salary: Salary salary: Salary = Field(description="薪资信息")
work_mode: str work_mode: str = Field(description="工作模式,如兼职、全职、活动")
tags: list[str] = Field(default_factory=list) tags: list[str] = Field(default_factory=list, description="业务标签列表")
confidence: float = Field(ge=0, le=1) confidence: float = Field(ge=0, le=1, description="数据置信度,范围 0~1")
class WorkerCard(BaseModel): class WorkerCard(BaseModel):
worker_id: str worker_id: str = Field(description="工人唯一 ID")
name: str name: str = Field(description="工人姓名或昵称")
description: str description: str = Field(description="工人自我描述")
skills: list[SkillScore] = Field(default_factory=list) skills: list[SkillScore] = Field(default_factory=list, description="技能及熟练度列表")
cities: list[str] = Field(default_factory=list) cities: list[str] = Field(default_factory=list, description="可接单城市列表")
regions: list[str] = Field(default_factory=list) regions: list[str] = Field(default_factory=list, description="可接单区域列表")
availability: list[str] = Field(default_factory=list) availability: list[str] = Field(default_factory=list, description="可上岗时间描述")
experience_tags: list[str] = Field(default_factory=list) experience_tags: list[str] = Field(default_factory=list, description="经验标签列表")
reliability_score: float = Field(ge=0, le=1) reliability_score: float = Field(ge=0, le=1, description="履约可靠性分,范围 0~1")
profile_completion: float = Field(ge=0, le=1) profile_completion: float = Field(ge=0, le=1, description="档案完善度,范围 0~1")
confidence: float = Field(ge=0, le=1) confidence: float = Field(ge=0, le=1, description="数据置信度,范围 0~1")
class MatchBreakdown(BaseModel): class MatchBreakdown(BaseModel):
skill_score: float = Field(ge=0, le=1) skill_score: float = Field(ge=0, le=1, description="技能匹配分,范围 0~1")
region_score: float = Field(ge=0, le=1) region_score: float = Field(ge=0, le=1, description="地域匹配分,范围 0~1")
time_score: float = Field(ge=0, le=1) time_score: float = Field(ge=0, le=1, description="时间匹配分,范围 0~1")
experience_score: float = Field(ge=0, le=1) experience_score: float = Field(ge=0, le=1, description="经验匹配分,范围 0~1")
reliability_score: float = Field(ge=0, le=1) reliability_score: float = Field(ge=0, le=1, description="可靠性匹配分,范围 0~1")
class MatchResult(BaseModel): class MatchResult(BaseModel):
match_id: str match_id: str = Field(description="匹配记录 ID")
source_type: SourceType source_type: SourceType = Field(description="匹配方向job_to_worker 或 worker_to_job")
source_id: str source_id: str = Field(description="源实体 ID")
target_id: str target_id: str = Field(description="目标实体 ID")
match_score: float = Field(ge=0, le=1) match_score: float = Field(ge=0, le=1, description="综合匹配分,范围 0~1")
breakdown: MatchBreakdown breakdown: MatchBreakdown = Field(description="多维打分拆解")
reasons: list[str] = Field(default_factory=list, min_length=3) reasons: list[str] = Field(default_factory=list, min_length=3, description="匹配理由,至少 3 条")
class ExtractTextRequest(BaseModel): class ExtractTextRequest(BaseModel):
text: str = Field(min_length=5) text: str = Field(min_length=5, description="待抽取的自然语言文本,最少 5 个字符")
model_config = ConfigDict(
json_schema_extra={
"example": {
"text": "明天下午南山会展中心需要2个签到协助5小时150/人,女生优先",
}
}
)
class IngestJobRequest(BaseModel): class IngestJobRequest(BaseModel):
job: JobCard job: JobCard = Field(description="岗位卡片对象")
class IngestWorkerRequest(BaseModel): class IngestWorkerRequest(BaseModel):
worker: WorkerCard worker: WorkerCard = Field(description="工人卡片对象")
class MatchWorkersRequest(BaseModel): class MatchWorkersRequest(BaseModel):
job_id: str | None = None job_id: str | None = Field(default=None, description="岗位 ID与 job 二选一)")
job: JobCard | None = None job: JobCard | None = Field(default=None, description="内联岗位对象(与 job_id 二选一)")
top_n: int = Field(default=10, ge=1, le=50) top_n: int = Field(default=10, ge=1, le=50, description="返回条数,范围 1~50")
@model_validator(mode="after") @model_validator(mode="after")
def validate_source(self) -> "MatchWorkersRequest": def validate_source(self) -> "MatchWorkersRequest":
@@ -104,9 +111,9 @@ class MatchWorkersRequest(BaseModel):
class MatchJobsRequest(BaseModel): class MatchJobsRequest(BaseModel):
worker_id: str | None = None worker_id: str | None = Field(default=None, description="工人 ID与 worker 二选一)")
worker: WorkerCard | None = None worker: WorkerCard | None = Field(default=None, description="内联工人对象(与 worker_id 二选一)")
top_n: int = Field(default=10, ge=1, le=50) top_n: int = Field(default=10, ge=1, le=50, description="返回条数,范围 1~50")
@model_validator(mode="after") @model_validator(mode="after")
def validate_source(self) -> "MatchJobsRequest": def validate_source(self) -> "MatchJobsRequest":
@@ -116,38 +123,86 @@ class MatchJobsRequest(BaseModel):
class ExtractResponse(BaseModel): class ExtractResponse(BaseModel):
success: bool success: bool = Field(description="抽取是否成功")
data: JobCard | WorkerCard | None = None data: JobCard | WorkerCard | None = Field(default=None, description="抽取结果对象,可能为空")
errors: list[str] = Field(default_factory=list) errors: list[str] = Field(default_factory=list, description="错误信息列表")
missing_fields: list[str] = Field(default_factory=list) missing_fields: list[str] = Field(default_factory=list, description="缺失字段列表")
class BootstrapResponse(BaseModel): class BootstrapResponse(BaseModel):
jobs: int jobs: int = Field(description="导入岗位数量")
workers: int workers: int = Field(description="导入工人数量")
skills: int skills: int = Field(description="技能词条数量")
categories: int categories: int = Field(description="类目数量")
regions: int regions: int = Field(description="区域数量")
class HealthStatus(BaseModel): class HealthStatus(BaseModel):
service: str service: str = Field(description="服务状态,通常为 ok")
database: str database: str = Field(description="数据库状态ok 或 error")
rag: str rag: str = Field(description="RAG 组件状态ok 或 error")
timestamp: datetime timestamp: datetime = Field(description="服务端当前时间")
class ListResponse(BaseModel): class ListResponse(BaseModel):
items: list[dict] items: list[dict] = Field(description="列表项")
total: int total: int = Field(description="总数")
class MatchResponse(BaseModel): class MatchResponse(BaseModel):
items: list[MatchResult] items: list[MatchResult] = Field(description="匹配结果列表")
class ExplainResponse(BaseModel): class ExplainResponse(BaseModel):
match: MatchResult match: MatchResult = Field(description="单条匹配结果详情")
class MatchFeedbackRequest(BaseModel):
match_id: str = Field(description="匹配记录 ID")
accepted: bool = Field(description="反馈是否接受该推荐")
class MatchWeightResponse(BaseModel):
weights: dict[str, float] = Field(description="当前生效的排序权重")
learning_enabled: bool = Field(description="是否开启在线学习")
class AIObservabilityResponse(BaseModel):
metrics: dict[str, float | int] = Field(description="AI 调用观测指标")
class IngestAsyncResponse(BaseModel):
task_id: str = Field(description="异步任务 ID")
status: str = Field(description="任务状态")
class QueueStatusResponse(BaseModel):
queued: int = Field(description="当前队列中任务数量")
processed: int = Field(description="历史处理成功数量")
failed: int = Field(description="历史处理失败数量")
class MatchAsyncWorkersRequest(BaseModel):
job_id: str = Field(description="岗位 ID")
top_n: int = Field(default=10, ge=1, le=50, description="返回条数,范围 1~50")
class MatchAsyncJobsRequest(BaseModel):
worker_id: str = Field(description="工人 ID")
top_n: int = Field(default=10, ge=1, le=50, description="返回条数,范围 1~50")
class MatchAsyncResponse(BaseModel):
task_id: str = Field(description="异步任务 ID")
status: str = Field(description="任务状态")
items: list[MatchResult] | None = Field(default=None, description="任务完成后返回的匹配结果")
class SystemOpsResponse(BaseModel):
traffic: dict[str, float | int] = Field(description="全局流量护栏与错误窗口指标")
cache: dict[str, float | int | str] = Field(description="缓存命中与大小")
ingest_queue: QueueStatusResponse = Field(description="异步入库队列状态")
match_queue: QueueStatusResponse = Field(description="异步匹配队列状态")
class PromptOutput(BaseModel): class PromptOutput(BaseModel):

View File

@@ -1,6 +1,8 @@
from contextlib import asynccontextmanager from contextlib import asynccontextmanager
from time import perf_counter
from fastapi import FastAPI from fastapi import FastAPI, Request
from fastapi.responses import JSONResponse
from fastapi.middleware.cors import CORSMiddleware from fastapi.middleware.cors import CORSMiddleware
from app.api.routes import router from app.api.routes import router
@@ -9,6 +11,7 @@ from app.core.logging import configure_logging, logger
from app.db.base import Base from app.db.base import Base
from app.db.session import engine from app.db.session import engine
from app.services.rag.lightrag_adapter import LightRAGAdapter from app.services.rag.lightrag_adapter import LightRAGAdapter
from app.services.runtime_state import get_ingest_queue, get_match_queue, get_traffic_guard
settings = get_settings() settings = get_settings()
@@ -18,14 +21,33 @@ configure_logging(settings.log_level)
@asynccontextmanager @asynccontextmanager
async def lifespan(_: FastAPI): async def lifespan(_: FastAPI):
Base.metadata.create_all(bind=engine) Base.metadata.create_all(bind=engine)
get_ingest_queue().start()
get_match_queue().start()
try: try:
LightRAGAdapter(settings).ensure_ready() LightRAGAdapter(settings).ensure_ready()
except Exception: except Exception:
logger.exception("Qdrant initialization skipped during startup") logger.exception("Qdrant initialization skipped during startup")
yield yield
get_ingest_queue().stop()
get_match_queue().stop()
app = FastAPI(title=settings.app_name, lifespan=lifespan) app = FastAPI(
title=settings.app_name,
description=(
"Gig POC 接口文档。\n\n"
"接口分组:系统、抽取、入库、匹配、查询。\n"
"完整业务说明请参考项目文档 `docs/API.md`。"
),
openapi_tags=[
{"name": "系统", "description": "服务与依赖组件状态检查接口"},
{"name": "抽取", "description": "自然语言文本抽取为结构化卡片"},
{"name": "入库", "description": "结构化岗位/工人数据写入与初始化"},
{"name": "匹配", "description": "岗位与工人双向匹配及结果解释"},
{"name": "查询", "description": "岗位/工人列表与详情查询"},
],
lifespan=lifespan,
)
app.add_middleware( app.add_middleware(
CORSMiddleware, CORSMiddleware,
allow_origins=["*"], allow_origins=["*"],
@@ -33,4 +55,23 @@ app.add_middleware(
allow_methods=["*"], allow_methods=["*"],
allow_headers=["*"], allow_headers=["*"],
) )
@app.middleware("http")
async def traffic_guard_middleware(request: Request, call_next):
guard = get_traffic_guard()
allowed, reason = guard.allow(request.url.path)
if not allowed:
status_code = 429 if reason == "rate_limited" else 503
return JSONResponse(status_code=status_code, content={"detail": reason})
start = perf_counter()
try:
response = await call_next(request)
except Exception:
guard.record(500, (perf_counter() - start) * 1000)
raise
guard.record(response.status_code, (perf_counter() - start) * 1000)
return response
app.include_router(router) app.include_router(router)

View File

@@ -0,0 +1,87 @@
from __future__ import annotations
import time
from dataclasses import dataclass
from threading import Lock
from app.core.config import Settings
@dataclass
class EndpointState:
current_minute: int = 0
minute_count: int = 0
consecutive_failures: int = 0
circuit_open_until: float = 0.0
class AIGuard:
_lock = Lock()
_endpoint_states: dict[str, EndpointState] = {}
_metrics = {
"requests_total": 0,
"success_total": 0,
"fail_total": 0,
"fallback_total": 0,
"rate_limited_total": 0,
"circuit_open_total": 0,
"endpoint_failover_total": 0,
}
def __init__(self, settings: Settings):
self.settings = settings
def allow_request(self, endpoint: str) -> tuple[bool, str]:
now = time.time()
now_minute = int(now // 60)
with self._lock:
state = self._endpoint_states.setdefault(endpoint, EndpointState())
if state.circuit_open_until > now:
self._metrics["circuit_open_total"] += 1
return False, "circuit_open"
if state.current_minute != now_minute:
state.current_minute = now_minute
state.minute_count = 0
if state.minute_count >= self.settings.ai_rate_limit_per_minute:
self._metrics["rate_limited_total"] += 1
return False, "rate_limited"
state.minute_count += 1
self._metrics["requests_total"] += 1
return True, "ok"
def record_success(self, endpoint: str) -> None:
with self._lock:
state = self._endpoint_states.setdefault(endpoint, EndpointState())
state.consecutive_failures = 0
state.circuit_open_until = 0.0
self._metrics["success_total"] += 1
def record_failure(self, endpoint: str) -> None:
with self._lock:
state = self._endpoint_states.setdefault(endpoint, EndpointState())
state.consecutive_failures += 1
self._metrics["fail_total"] += 1
if state.consecutive_failures >= self.settings.ai_circuit_breaker_fail_threshold:
state.circuit_open_until = time.time() + self.settings.ai_circuit_breaker_cooldown_seconds
state.consecutive_failures = 0
def record_fallback(self) -> None:
with self._lock:
self._metrics["fallback_total"] += 1
def record_failover(self) -> None:
with self._lock:
self._metrics["endpoint_failover_total"] += 1
def snapshot(self) -> dict:
with self._lock:
requests_total = self._metrics["requests_total"]
fallback_total = self._metrics["fallback_total"]
success_total = self._metrics["success_total"]
fail_total = self._metrics["fail_total"]
return {
**self._metrics,
"fallback_hit_rate": round(fallback_total / requests_total, 4) if requests_total else 0.0,
"success_rate": round(success_total / requests_total, 4) if requests_total else 0.0,
"failure_rate": round(fail_total / requests_total, 4) if requests_total else 0.0,
}

View File

@@ -0,0 +1,146 @@
from __future__ import annotations
import json
import time
from functools import lru_cache
from threading import Lock
from typing import Any, Protocol
from app.core.config import get_settings
from app.core.logging import logger
try:
from redis import Redis
except Exception: # pragma: no cover
Redis = None # type: ignore[assignment]
class Cache(Protocol):
def get(self, key: str): ...
def set(self, key: str, value: Any) -> None: ...
def delete(self, key: str) -> None: ...
def clear(self) -> None: ...
def stats(self) -> dict[str, int | float | str]: ...
class TTLCache:
def __init__(self, ttl_seconds: int):
self.ttl_seconds = ttl_seconds
self._store: dict[str, tuple[float, Any]] = {}
self._lock = Lock()
self._hits = 0
self._misses = 0
def get(self, key: str):
now = time.time()
with self._lock:
item = self._store.get(key)
if item is None:
self._misses += 1
return None
expires_at, value = item
if expires_at < now:
self._store.pop(key, None)
self._misses += 1
return None
self._hits += 1
return value
def set(self, key: str, value: Any) -> None:
expires_at = time.time() + self.ttl_seconds
with self._lock:
self._store[key] = (expires_at, value)
def delete(self, key: str) -> None:
with self._lock:
self._store.pop(key, None)
def clear(self) -> None:
with self._lock:
self._store.clear()
def stats(self) -> dict[str, int | float | str]:
with self._lock:
requests = self._hits + self._misses
hit_rate = (self._hits / requests) if requests else 0.0
return {
"backend": "memory",
"size": len(self._store),
"hits": self._hits,
"misses": self._misses,
"hit_rate": round(hit_rate, 4),
}
class RedisCache:
def __init__(self, url: str, prefix: str, ttl_seconds: int):
if Redis is None:
raise RuntimeError("redis package is not installed")
self.client = Redis.from_url(url, decode_responses=True)
self.prefix = prefix
self.ttl_seconds = ttl_seconds
self._hits = 0
self._misses = 0
self._lock = Lock()
def get(self, key: str):
raw = self.client.get(self._key(key))
with self._lock:
if raw is None:
self._misses += 1
return None
self._hits += 1
return json.loads(raw)
def set(self, key: str, value: Any) -> None:
self.client.set(self._key(key), json.dumps(value, ensure_ascii=False), ex=self.ttl_seconds)
def delete(self, key: str) -> None:
self.client.delete(self._key(key))
def clear(self) -> None:
pattern = f"{self.prefix}:*"
cursor = 0
while True:
cursor, keys = self.client.scan(cursor=cursor, match=pattern, count=200)
if keys:
self.client.delete(*keys)
if cursor == 0:
break
def stats(self) -> dict[str, int | float | str]:
with self._lock:
requests = self._hits + self._misses
hit_rate = (self._hits / requests) if requests else 0.0
return {
"backend": "redis",
"size": int(self.client.dbsize()),
"hits": self._hits,
"misses": self._misses,
"hit_rate": round(hit_rate, 4),
}
def _key(self, key: str) -> str:
return f"{self.prefix}:{key}"
def _build_cache(namespace: str, ttl_seconds: int) -> Cache:
settings = get_settings()
if settings.cache_backend == "redis":
try:
return RedisCache(settings.redis_url, f"{settings.redis_prefix}:{namespace}", ttl_seconds=ttl_seconds)
except Exception:
logger.exception("failed to init redis cache namespace=%s fallback to memory cache", namespace)
return TTLCache(ttl_seconds=ttl_seconds)
@lru_cache
def get_match_cache() -> Cache:
settings = get_settings()
return _build_cache("match", settings.match_cache_ttl_seconds)
@lru_cache
def get_query_cache() -> Cache:
settings = get_settings()
return _build_cache("query", settings.query_cache_ttl_seconds)

View File

@@ -26,13 +26,9 @@ class ExtractionService:
def extract_job(self, text: str) -> ExtractResponse: def extract_job(self, text: str) -> ExtractResponse:
logger.info("extract_job request text=%s", text) logger.info("extract_job request text=%s", text)
llm_result = self._llm_extract(text, self.settings.prompt_dir / "job_extract.md") llm_card = self._llm_extract_with_retry(text, self.settings.prompt_dir / "job_extract.md", JobCard)
if llm_result: if llm_card:
try: return ExtractResponse(success=True, data=llm_card)
return ExtractResponse(success=True, data=JobCard(**llm_result.content))
except ValidationError as exc:
logger.exception("LLM job extraction validation failed")
return ExtractResponse(success=False, errors=[str(exc)], missing_fields=self._missing_fields(exc))
try: try:
card = self._extract_job_rule(text) card = self._extract_job_rule(text)
@@ -43,13 +39,9 @@ class ExtractionService:
def extract_worker(self, text: str) -> ExtractResponse: def extract_worker(self, text: str) -> ExtractResponse:
logger.info("extract_worker request text=%s", text) logger.info("extract_worker request text=%s", text)
llm_result = self._llm_extract(text, self.settings.prompt_dir / "worker_extract.md") llm_card = self._llm_extract_with_retry(text, self.settings.prompt_dir / "worker_extract.md", WorkerCard)
if llm_result: if llm_card:
try: return ExtractResponse(success=True, data=llm_card)
return ExtractResponse(success=True, data=WorkerCard(**llm_result.content))
except ValidationError as exc:
logger.exception("LLM worker extraction validation failed")
return ExtractResponse(success=False, errors=[str(exc)], missing_fields=self._missing_fields(exc))
try: try:
card = self._extract_worker_rule(text) card = self._extract_worker_rule(text)
@@ -65,6 +57,57 @@ class ExtractionService:
logger.exception("LLM extraction failed, fallback to rule-based extraction") logger.exception("LLM extraction failed, fallback to rule-based extraction")
return None return None
def _llm_extract_with_retry(self, text: str, prompt_path: Path, schema_cls):
base_prompt = load_prompt(prompt_path)
llm_result = self._llm_extract(text, prompt_path)
if not llm_result:
return None
try:
return schema_cls(**llm_result.content)
except ValidationError as exc:
logger.warning("LLM extraction validation failed, trying schema-aware retry")
last_error = exc
last_output = llm_result.content
for _ in range(self.settings.extraction_llm_max_retries):
missing_fields = self._missing_fields(last_error)
repair_prompt = self._build_repair_prompt(base_prompt, schema_cls, missing_fields)
try:
repair_result = self.llm_client.extract_json(
repair_prompt,
self._build_repair_input(text, last_output, missing_fields),
)
except Exception:
logger.exception("LLM schema-aware retry failed")
return None
if not repair_result:
return None
last_output = repair_result.content
try:
return schema_cls(**repair_result.content)
except ValidationError as exc:
last_error = exc
logger.warning("LLM schema-aware retry still invalid missing_fields=%s", self._missing_fields(exc))
return None
def _build_repair_prompt(self, base_prompt: str, schema_cls, missing_fields: list[str]) -> str:
schema_json = json.dumps(schema_cls.model_json_schema(), ensure_ascii=False)
return (
f"{base_prompt}\n\n"
"你是结构化修复助手。请严格输出可被 JSON 解析的对象,不要输出解释文字。\n"
"目标是根据给定 schema 修复字段缺失和类型错误,优先保证必填字段完整。\n"
f"缺失或错误字段: {', '.join(missing_fields) if missing_fields else 'unknown'}\n"
f"JSON Schema: {schema_json}\n"
)
def _build_repair_input(self, original_text: str, last_output: dict, missing_fields: list[str]) -> str:
return (
f"原始文本:\n{original_text}\n\n"
f"上一次抽取结果:\n{json.dumps(last_output, ensure_ascii=False)}\n\n"
f"请重点修复字段:\n{json.dumps(missing_fields, ensure_ascii=False)}"
)
def _extract_job_rule(self, text: str) -> JobCard: def _extract_job_rule(self, text: str) -> JobCard:
skill_hits = [item for item in self.skills if item in text] skill_hits = [item for item in self.skills if item in text]
category = next((item for item in self.categories if item in text), "活动执行") category = next((item for item in self.categories if item in text), "活动执行")

View File

@@ -0,0 +1,105 @@
from __future__ import annotations
from dataclasses import dataclass
from queue import Empty, Full, Queue
from threading import Event, Lock, Thread
from typing import Any
from app.core.config import Settings
from app.core.logging import logger
from app.db.session import SessionLocal
from app.domain.schemas import JobCard, WorkerCard
from app.services.ingest_service import IngestService
from app.utils.ids import generate_id
@dataclass
class QueueTask:
task_id: str
kind: str
payload: dict[str, Any]
class IngestQueue:
def __init__(self, settings: Settings):
self.settings = settings
self.queue: Queue[QueueTask] = Queue(maxsize=settings.ingest_queue_max_size)
self._stop_event = Event()
self._thread: Thread | None = None
self._lock = Lock()
self._status: dict[str, str] = {}
self._processed = 0
self._failed = 0
def start(self) -> None:
if not self.settings.ingest_async_enabled:
return
if self._thread and self._thread.is_alive():
return
self._thread = Thread(target=self._run, daemon=True, name="ingest-queue-worker")
self._thread.start()
logger.info("ingest queue worker started")
def stop(self) -> None:
self._stop_event.set()
if self._thread and self._thread.is_alive():
self._thread.join(timeout=3)
def enqueue_job(self, card: JobCard) -> str:
return self._enqueue("job", card.model_dump(mode="json"))
def enqueue_worker(self, card: WorkerCard) -> str:
return self._enqueue("worker", card.model_dump(mode="json"))
def task_status(self, task_id: str) -> str:
with self._lock:
return self._status.get(task_id, "not_found")
def stats(self) -> dict[str, int]:
with self._lock:
return {
"queued": self.queue.qsize(),
"processed": self._processed,
"failed": self._failed,
}
def _enqueue(self, kind: str, payload: dict[str, Any]) -> str:
task_id = generate_id("queue")
task = QueueTask(task_id=task_id, kind=kind, payload=payload)
with self._lock:
self._status[task_id] = "queued"
try:
self.queue.put_nowait(task)
except Full as exc:
with self._lock:
self._status[task_id] = "rejected"
raise RuntimeError("ingest queue is full") from exc
return task_id
def _run(self) -> None:
while not self._stop_event.is_set():
try:
task = self.queue.get(timeout=0.5)
except Empty:
continue
try:
with self._lock:
self._status[task.task_id] = "processing"
with SessionLocal() as db:
service = IngestService(db)
if task.kind == "job":
service.ingest_job(JobCard(**task.payload))
elif task.kind == "worker":
service.ingest_worker(WorkerCard(**task.payload))
else:
raise ValueError(f"unknown task kind {task.kind}")
with self._lock:
self._status[task.task_id] = "done"
self._processed += 1
except Exception:
logger.exception("ingest queue task failed task_id=%s kind=%s", task.task_id, task.kind)
with self._lock:
self._status[task.task_id] = "failed"
self._failed += 1
finally:
self.queue.task_done()

View File

@@ -9,6 +9,7 @@ from app.core.logging import logger
from app.domain.schemas import BootstrapResponse, JobCard, WorkerCard from app.domain.schemas import BootstrapResponse, JobCard, WorkerCard
from app.repositories.job_repository import JobRepository from app.repositories.job_repository import JobRepository
from app.repositories.worker_repository import WorkerRepository from app.repositories.worker_repository import WorkerRepository
from app.services.cache_service import get_match_cache, get_query_cache
from app.services.rag.lightrag_adapter import LightRAGAdapter from app.services.rag.lightrag_adapter import LightRAGAdapter
@@ -19,17 +20,27 @@ class IngestService:
self.job_repository = JobRepository(db) self.job_repository = JobRepository(db)
self.worker_repository = WorkerRepository(db) self.worker_repository = WorkerRepository(db)
self.rag = LightRAGAdapter(self.settings) self.rag = LightRAGAdapter(self.settings)
self.match_cache = get_match_cache()
self.query_cache = get_query_cache()
def ingest_job(self, card: JobCard) -> JobCard: def ingest_job(self, card: JobCard) -> JobCard:
logger.info("ingest_job job_id=%s", card.job_id) logger.info("ingest_job job_id=%s", card.job_id)
self.job_repository.upsert(card) self.job_repository.upsert(card)
self.rag.upsert_job(card) self.rag.upsert_job(card)
if self.settings.match_cache_enabled:
self.match_cache.clear()
if self.settings.query_cache_enabled:
self.query_cache.clear()
return card return card
def ingest_worker(self, card: WorkerCard) -> WorkerCard: def ingest_worker(self, card: WorkerCard) -> WorkerCard:
logger.info("ingest_worker worker_id=%s", card.worker_id) logger.info("ingest_worker worker_id=%s", card.worker_id)
self.worker_repository.upsert(card) self.worker_repository.upsert(card)
self.rag.upsert_worker(card) self.rag.upsert_worker(card)
if self.settings.match_cache_enabled:
self.match_cache.clear()
if self.settings.query_cache_enabled:
self.query_cache.clear()
return card return card
def bootstrap(self) -> BootstrapResponse: def bootstrap(self) -> BootstrapResponse:
@@ -43,6 +54,10 @@ class IngestService:
self.ingest_job(JobCard(**item)) self.ingest_job(JobCard(**item))
for item in workers: for item in workers:
self.ingest_worker(WorkerCard(**item)) self.ingest_worker(WorkerCard(**item))
if self.settings.match_cache_enabled:
self.match_cache.clear()
if self.settings.query_cache_enabled:
self.query_cache.clear()
return BootstrapResponse( return BootstrapResponse(
jobs=len(jobs), jobs=len(jobs),
workers=len(workers), workers=len(workers),

View File

@@ -6,14 +6,17 @@ import httpx
from app.core.config import Settings from app.core.config import Settings
from app.domain.schemas import PromptOutput from app.domain.schemas import PromptOutput
from app.services.ai_guard import AIGuard
class LLMClient: class LLMClient:
def __init__(self, settings: Settings): def __init__(self, settings: Settings):
self.settings = settings self.settings = settings
self.guard = AIGuard(settings)
def extract_json(self, system_prompt: str, user_text: str) -> PromptOutput | None: def extract_json(self, system_prompt: str, user_text: str) -> PromptOutput | None:
if not self.settings.llm_enabled or not self.settings.llm_base_url or not self.settings.llm_api_key: if not self.settings.llm_enabled or not self.settings.llm_base_url or not self.settings.llm_api_key:
self.guard.record_fallback()
return None return None
payload = { payload = {
@@ -25,10 +28,77 @@ class LLMClient:
"temperature": 0.1, "temperature": 0.1,
"response_format": {"type": "json_object"}, "response_format": {"type": "json_object"},
} }
headers = {"Authorization": f"Bearer {self.settings.llm_api_key}"} endpoints = [self.settings.llm_base_url, *self.settings.llm_fallback_base_urls]
with httpx.Client(timeout=30.0) as client: raw_text = self._request_with_failover(
response = client.post(f"{self.settings.llm_base_url.rstrip('/')}/chat/completions", json=payload, headers=headers) endpoints=endpoints,
response.raise_for_status() path="/chat/completions",
data = response.json() payload=payload,
raw_text = data["choices"][0]["message"]["content"] api_key=self.settings.llm_api_key,
)
if raw_text is None:
self.guard.record_fallback()
return None
return PromptOutput(content=json.loads(raw_text), raw_text=raw_text) return PromptOutput(content=json.loads(raw_text), raw_text=raw_text)
def embedding(self, text: str) -> list[float] | None:
if not self.settings.embedding_enabled:
return None
base_url = self.settings.embedding_base_url or self.settings.llm_base_url
api_key = self.settings.embedding_api_key or self.settings.llm_api_key
if not base_url or not api_key:
self.guard.record_fallback()
return None
payload = {
"model": self.settings.embedding_model,
"input": text,
}
endpoints = [base_url, *self.settings.embedding_fallback_base_urls]
data = self._request_with_failover(
endpoints=endpoints,
path="/embeddings",
payload=payload,
api_key=api_key,
return_full_response=True,
)
if data is None:
self.guard.record_fallback()
return None
embedding = data["data"][0]["embedding"]
if not isinstance(embedding, list):
return None
return [float(item) for item in embedding]
def metrics(self) -> dict:
return self.guard.snapshot()
def _request_with_failover(
self,
endpoints: list[str],
path: str,
payload: dict,
api_key: str,
return_full_response: bool = False,
):
if not endpoints:
return None
for index, endpoint in enumerate([item for item in endpoints if item]):
allowed, _ = self.guard.allow_request(endpoint)
if not allowed:
continue
if index > 0:
self.guard.record_failover()
try:
headers = {"Authorization": f"Bearer {api_key}"}
with httpx.Client(timeout=self.settings.ai_request_timeout_seconds) as client:
response = client.post(f"{endpoint.rstrip('/')}{path}", json=payload, headers=headers)
response.raise_for_status()
data = response.json()
self.guard.record_success(endpoint)
if return_full_response:
return data
return data["choices"][0]["message"]["content"]
except Exception:
self.guard.record_failure(endpoint)
continue
return None

View File

@@ -0,0 +1,121 @@
from __future__ import annotations
from dataclasses import dataclass
from queue import Empty, Full, Queue
from threading import Event, Lock, Thread
from typing import Any
from app.core.config import Settings
from app.core.logging import logger
from app.db.session import SessionLocal
from app.domain.schemas import MatchResult
from app.repositories.job_repository import JobRepository
from app.repositories.worker_repository import WorkerRepository
from app.services.card_mapper import job_to_card, worker_to_card
from app.services.matching_service import MatchingService
from app.utils.ids import generate_id
@dataclass
class MatchTask:
task_id: str
kind: str
source_id: str
top_n: int
class MatchQueue:
def __init__(self, settings: Settings):
self.settings = settings
self.queue: Queue[MatchTask] = Queue(maxsize=settings.match_queue_max_size)
self._stop_event = Event()
self._thread: Thread | None = None
self._lock = Lock()
self._status: dict[str, str] = {}
self._results: dict[str, list[dict[str, Any]]] = {}
self._processed = 0
self._failed = 0
def start(self) -> None:
if not self.settings.match_async_enabled:
return
if self._thread and self._thread.is_alive():
return
self._thread = Thread(target=self._run, daemon=True, name="match-queue-worker")
self._thread.start()
logger.info("match queue worker started")
def stop(self) -> None:
self._stop_event.set()
if self._thread and self._thread.is_alive():
self._thread.join(timeout=3)
def enqueue_workers(self, job_id: str, top_n: int) -> str:
return self._enqueue("workers", job_id, top_n)
def enqueue_jobs(self, worker_id: str, top_n: int) -> str:
return self._enqueue("jobs", worker_id, top_n)
def task_status(self, task_id: str) -> str:
with self._lock:
return self._status.get(task_id, "not_found")
def task_result(self, task_id: str) -> list[dict[str, Any]] | None:
with self._lock:
return self._results.get(task_id)
def stats(self) -> dict[str, int]:
with self._lock:
return {
"queued": self.queue.qsize(),
"processed": self._processed,
"failed": self._failed,
}
def _enqueue(self, kind: str, source_id: str, top_n: int) -> str:
task_id = generate_id("mq")
task = MatchTask(task_id=task_id, kind=kind, source_id=source_id, top_n=top_n)
with self._lock:
self._status[task_id] = "queued"
try:
self.queue.put_nowait(task)
except Full as exc:
with self._lock:
self._status[task_id] = "rejected"
raise RuntimeError("match queue is full") from exc
return task_id
def _run(self) -> None:
while not self._stop_event.is_set():
try:
task = self.queue.get(timeout=0.5)
except Empty:
continue
try:
with self._lock:
self._status[task.task_id] = "processing"
with SessionLocal() as db:
service = MatchingService(db)
if task.kind == "workers":
job = JobRepository(db).get(task.source_id)
if job is None:
raise ValueError("job not found")
items = service.match_workers(job_to_card(job), task.top_n)
elif task.kind == "jobs":
worker = WorkerRepository(db).get(task.source_id)
if worker is None:
raise ValueError("worker not found")
items = service.match_jobs(worker_to_card(worker), task.top_n)
else:
raise ValueError(f"unknown task kind {task.kind}")
with self._lock:
self._status[task.task_id] = "done"
self._results[task.task_id] = [item.model_dump(mode="json") for item in items]
self._processed += 1
except Exception:
logger.exception("match queue task failed task_id=%s kind=%s", task.task_id, task.kind)
with self._lock:
self._status[task.task_id] = "failed"
self._failed += 1
finally:
self.queue.task_done()

View File

@@ -10,8 +10,10 @@ from app.domain.schemas import JobCard, MatchBreakdown, MatchResult, QueryFilter
from app.repositories.job_repository import JobRepository from app.repositories.job_repository import JobRepository
from app.repositories.match_repository import MatchRepository from app.repositories.match_repository import MatchRepository
from app.repositories.worker_repository import WorkerRepository from app.repositories.worker_repository import WorkerRepository
from app.services.cache_service import get_match_cache
from app.services.card_mapper import job_to_card, worker_to_card from app.services.card_mapper import job_to_card, worker_to_card
from app.services.rag.lightrag_adapter import LightRAGAdapter from app.services.rag.lightrag_adapter import LightRAGAdapter
from app.services.weight_service import MatchWeightService
from app.utils.ids import generate_id from app.utils.ids import generate_id
@@ -23,9 +25,16 @@ class MatchingService:
self.workers = WorkerRepository(db) self.workers = WorkerRepository(db)
self.matches = MatchRepository(db) self.matches = MatchRepository(db)
self.rag = LightRAGAdapter(self.settings) self.rag = LightRAGAdapter(self.settings)
self.weight_service = MatchWeightService(self.settings)
self.cache = get_match_cache()
def match_workers(self, source: JobCard, top_n: int) -> list[MatchResult]: def match_workers(self, source: JobCard, top_n: int) -> list[MatchResult]:
logger.info("match_workers source_id=%s top_n=%s", source.job_id, top_n) logger.info("match_workers source_id=%s top_n=%s", source.job_id, top_n)
cache_key = f"match_workers:{source.job_id}:{top_n}"
if self.settings.match_cache_enabled:
cached = self.cache.get(cache_key)
if cached is not None:
return self._parse_cached_matches(cached)
query_text = " ".join([source.title, source.category, source.city, source.region, *source.skills, *source.tags]) query_text = " ".join([source.title, source.category, source.city, source.region, *source.skills, *source.tags])
candidate_ids = self.rag.search( candidate_ids = self.rag.search(
query_text=query_text, query_text=query_text,
@@ -36,10 +45,17 @@ class MatchingService:
results = [self._build_job_to_worker_match(source, worker_to_card(worker)) for worker in candidates] results = [self._build_job_to_worker_match(source, worker_to_card(worker)) for worker in candidates]
results = sorted(results, key=lambda item: item.match_score, reverse=True)[:top_n] results = sorted(results, key=lambda item: item.match_score, reverse=True)[:top_n]
self.matches.bulk_replace(results, SourceType.job_to_worker.value, source.job_id) self.matches.bulk_replace(results, SourceType.job_to_worker.value, source.job_id)
if self.settings.match_cache_enabled:
self.cache.set(cache_key, [item.model_dump(mode="json") for item in results])
return results return results
def match_jobs(self, source: WorkerCard, top_n: int) -> list[MatchResult]: def match_jobs(self, source: WorkerCard, top_n: int) -> list[MatchResult]:
logger.info("match_jobs source_id=%s top_n=%s", source.worker_id, top_n) logger.info("match_jobs source_id=%s top_n=%s", source.worker_id, top_n)
cache_key = f"match_jobs:{source.worker_id}:{top_n}"
if self.settings.match_cache_enabled:
cached = self.cache.get(cache_key)
if cached is not None:
return self._parse_cached_matches(cached)
query_text = " ".join([source.name, *source.cities, *source.regions, *[item.name for item in source.skills], *source.experience_tags]) query_text = " ".join([source.name, *source.cities, *source.regions, *[item.name for item in source.skills], *source.experience_tags])
city = source.cities[0] if source.cities else None city = source.cities[0] if source.cities else None
candidate_ids = self.rag.search( candidate_ids = self.rag.search(
@@ -51,6 +67,8 @@ class MatchingService:
results = [self._build_worker_to_job_match(source, job_to_card(job)) for job in candidates] results = [self._build_worker_to_job_match(source, job_to_card(job)) for job in candidates]
results = sorted(results, key=lambda item: item.match_score, reverse=True)[:top_n] results = sorted(results, key=lambda item: item.match_score, reverse=True)[:top_n]
self.matches.bulk_replace(results, SourceType.worker_to_job.value, source.worker_id) self.matches.bulk_replace(results, SourceType.worker_to_job.value, source.worker_id)
if self.settings.match_cache_enabled:
self.cache.set(cache_key, [item.model_dump(mode="json") for item in results])
return results return results
def explain(self, match_id: str) -> MatchResult | None: def explain(self, match_id: str) -> MatchResult | None:
@@ -61,6 +79,20 @@ class MatchingService:
return match_record_to_schema(record) return match_record_to_schema(record)
def feedback(self, match_id: str, accepted: bool) -> dict[str, float] | None:
record = self.matches.get(match_id)
if record is None:
return None
from app.services.card_mapper import match_record_to_schema
match = match_record_to_schema(record)
if self.settings.ranking_learning_enabled:
return self.weight_service.update_from_feedback(match.breakdown, accepted)
return self.weight_service.get_weights()
def current_weights(self) -> dict[str, float]:
return self.weight_service.get_weights()
def _build_job_to_worker_match(self, job: JobCard, worker: WorkerCard) -> MatchResult: def _build_job_to_worker_match(self, job: JobCard, worker: WorkerCard) -> MatchResult:
job_skills = set(job.skills) job_skills = set(job.skills)
expanded_skills = self.rag.expand_skills(job.skills) expanded_skills = self.rag.expand_skills(job.skills)
@@ -143,13 +175,14 @@ class MatchingService:
experience_score: float, experience_score: float,
reliability_score: float, reliability_score: float,
) -> float: ) -> float:
return ( breakdown = MatchBreakdown(
self.settings.score_skill_weight * skill_score skill_score=skill_score,
+ self.settings.score_region_weight * region_score region_score=region_score,
+ self.settings.score_time_weight * time_score time_score=time_score,
+ self.settings.score_experience_weight * experience_score experience_score=experience_score,
+ self.settings.score_reliability_weight * reliability_score reliability_score=reliability_score,
) )
return self.weight_service.score(breakdown)
def _build_reasons( def _build_reasons(
self, self,
@@ -176,3 +209,10 @@ class MatchingService:
while len(reasons) < 3: while len(reasons) < 3:
reasons.append("岗位需求与候选画像存在基础匹配") reasons.append("岗位需求与候选画像存在基础匹配")
return reasons[:5] return reasons[:5]
def _parse_cached_matches(self, cached) -> list[MatchResult]:
if isinstance(cached, list) and cached and isinstance(cached[0], MatchResult):
return cached
if isinstance(cached, list):
return [MatchResult(**item) for item in cached]
return []

View File

@@ -10,6 +10,7 @@ from qdrant_client import QdrantClient, models
from app.core.config import Settings from app.core.config import Settings
from app.core.logging import logger from app.core.logging import logger
from app.domain.schemas import JobCard, QueryFilters, WorkerCard from app.domain.schemas import JobCard, QueryFilters, WorkerCard
from app.services.llm_client import LLMClient
class LightRAGAdapter: class LightRAGAdapter:
@@ -17,13 +18,28 @@ class LightRAGAdapter:
self.settings = settings self.settings = settings
self.client = QdrantClient(url=settings.qdrant_url) self.client = QdrantClient(url=settings.qdrant_url)
self.skill_graph = self._load_skill_graph() self.skill_graph = self._load_skill_graph()
self.llm_client = LLMClient(settings)
self.collection_vector_size: int | None = None
def ensure_ready(self) -> None: def ensure_ready(self) -> None:
collections = {item.name for item in self.client.get_collections().collections} collections = {item.name for item in self.client.get_collections().collections}
expected_size = self._configured_vector_size()
if self.settings.qdrant_collection not in collections: if self.settings.qdrant_collection not in collections:
self.client.create_collection( self.client.create_collection(
collection_name=self.settings.qdrant_collection, collection_name=self.settings.qdrant_collection,
vectors_config=models.VectorParams(size=self.settings.vector_size, distance=models.Distance.COSINE), vectors_config=models.VectorParams(size=expected_size, distance=models.Distance.COSINE),
)
self.collection_vector_size = expected_size
return
info = self.client.get_collection(self.settings.qdrant_collection)
configured_size = info.config.params.vectors.size
self.collection_vector_size = int(configured_size)
if self.collection_vector_size != expected_size:
logger.warning(
"qdrant vector size mismatch, collection=%s expected=%s actual=%s; using actual size",
self.settings.qdrant_collection,
expected_size,
self.collection_vector_size,
) )
def health(self) -> str: def health(self) -> str:
@@ -125,14 +141,40 @@ class LightRAGAdapter:
) )
def _vectorize(self, text: str) -> list[float]: def _vectorize(self, text: str) -> list[float]:
vector = [0.0 for _ in range(self.settings.vector_size)] if self.settings.embedding_enabled and self.settings.embedding_backend == "openai_compatible":
try:
embedding = self.llm_client.embedding(text)
if embedding:
return self._normalize_embedding(embedding)
except Exception:
logger.exception("embedding request failed, fallback to hash vector")
target_size = self._active_vector_size()
vector = [0.0 for _ in range(target_size)]
tokens = self._tokenize(text) tokens = self._tokenize(text)
for token in tokens: for token in tokens:
index = hash(token) % self.settings.vector_size index = hash(token) % target_size
vector[index] += 1.0 vector[index] += 1.0
norm = math.sqrt(sum(item * item for item in vector)) or 1.0 norm = math.sqrt(sum(item * item for item in vector)) or 1.0
return [item / norm for item in vector] return [item / norm for item in vector]
def _normalize_embedding(self, embedding: list[float]) -> list[float]:
target_size = self._active_vector_size()
vector = embedding[:target_size]
if len(vector) < target_size:
vector.extend([0.0] * (target_size - len(vector)))
norm = math.sqrt(sum(item * item for item in vector)) or 1.0
return [item / norm for item in vector]
def _active_vector_size(self) -> int:
if self.collection_vector_size:
return self.collection_vector_size
return self._configured_vector_size()
def _configured_vector_size(self) -> int:
if self.settings.embedding_enabled and self.settings.embedding_backend == "openai_compatible":
return self.settings.embedding_vector_size
return self.settings.vector_size
def _tokenize(self, text: str) -> list[str]: def _tokenize(self, text: str) -> list[str]:
cleaned = [part.strip().lower() for part in text.replace("", " ").replace("", " ").replace("", " ").split()] cleaned = [part.strip().lower() for part in text.replace("", " ").replace("", " ").replace("", " ").split()]
tokens = [part for part in cleaned if part] tokens = [part for part in cleaned if part]

View File

@@ -0,0 +1,23 @@
from __future__ import annotations
from functools import lru_cache
from app.core.config import get_settings
from app.services.ingest_queue import IngestQueue
from app.services.match_queue import MatchQueue
from app.services.traffic_guard import TrafficGuard
@lru_cache
def get_ingest_queue() -> IngestQueue:
return IngestQueue(get_settings())
@lru_cache
def get_match_queue() -> MatchQueue:
return MatchQueue(get_settings())
@lru_cache
def get_traffic_guard() -> TrafficGuard:
return TrafficGuard(get_settings())

View File

@@ -0,0 +1,108 @@
from __future__ import annotations
import time
from collections import deque
from threading import Lock
import httpx
from app.core.config import Settings
from app.core.logging import logger
class TrafficGuard:
def __init__(self, settings: Settings):
self.settings = settings
self._lock = Lock()
self._minute = 0
self._minute_count = 0
self._open_until = 0.0
self._events: deque[tuple[float, int]] = deque()
self._requests = 0
self._rate_limited = 0
self._circuit_blocked = 0
self._avg_latency_ms = 0.0
self._alert_last_sent = 0.0
def allow(self, path: str) -> tuple[bool, str]:
now = time.time()
with self._lock:
minute = int(now // 60)
if self._minute != minute:
self._minute = minute
self._minute_count = 0
if self._minute_count >= self.settings.app_rate_limit_per_minute:
self._rate_limited += 1
return False, "rate_limited"
if self._open_until > now and not self._is_exempt(path):
self._circuit_blocked += 1
return False, "circuit_open"
self._minute_count += 1
self._requests += 1
return True, "ok"
def record(self, status_code: int, latency_ms: float) -> None:
now = time.time()
with self._lock:
self._events.append((now, status_code))
self._avg_latency_ms = self._ema(self._avg_latency_ms, latency_ms)
self._trim(now)
total = len(self._events)
if total < self.settings.app_circuit_breaker_min_requests:
return
errors = sum(1 for _, code in self._events if code >= 500)
error_rate = errors / total
if error_rate >= self.settings.app_circuit_breaker_error_rate:
self._open_until = now + self.settings.app_circuit_breaker_cooldown_seconds
self._send_alert(
"app circuit opened",
{
"error_rate": round(error_rate, 4),
"window_requests": total,
"cooldown_seconds": self.settings.app_circuit_breaker_cooldown_seconds,
},
)
def snapshot(self) -> dict[str, float | int]:
now = time.time()
with self._lock:
self._trim(now)
total = len(self._events)
errors = sum(1 for _, code in self._events if code >= 500)
return {
"requests_total": self._requests,
"rate_limited_total": self._rate_limited,
"circuit_blocked_total": self._circuit_blocked,
"window_requests": total,
"window_errors": errors,
"window_error_rate": round((errors / total), 4) if total else 0.0,
"avg_latency_ms": round(self._avg_latency_ms, 2),
"circuit_open": 1 if self._open_until > now else 0,
}
def _trim(self, now: float) -> None:
lower = now - self.settings.app_circuit_breaker_window_seconds
while self._events and self._events[0][0] < lower:
self._events.popleft()
def _ema(self, prev: float, value: float, alpha: float = 0.2) -> float:
if prev <= 0:
return value
return alpha * value + (1 - alpha) * prev
def _is_exempt(self, path: str) -> bool:
return path in {"/health", "/docs", "/openapi.json", "/poc/ops/system/metrics", "/poc/ops/ai/metrics"}
def _send_alert(self, message: str, extra: dict) -> None:
now = time.time()
if now - self._alert_last_sent < 30:
return
self._alert_last_sent = now
logger.warning("%s extra=%s", message, extra)
if not self.settings.alert_webhook_url:
return
try:
with httpx.Client(timeout=2.0) as client:
client.post(self.settings.alert_webhook_url, json={"message": message, "extra": extra})
except Exception:
logger.exception("alert webhook send failed")

View File

@@ -0,0 +1,77 @@
from __future__ import annotations
import json
from pathlib import Path
from app.core.config import Settings
from app.core.logging import logger
from app.domain.schemas import MatchBreakdown
class MatchWeightService:
def __init__(self, settings: Settings):
self.settings = settings
self.path: Path = settings.match_weights_path
def default_weights(self) -> dict[str, float]:
return {
"skill": self.settings.score_skill_weight,
"region": self.settings.score_region_weight,
"time": self.settings.score_time_weight,
"experience": self.settings.score_experience_weight,
"reliability": self.settings.score_reliability_weight,
}
def get_weights(self) -> dict[str, float]:
weights = self.default_weights()
if not self.path.exists():
return self._normalize(weights)
try:
data = json.loads(self.path.read_text(encoding="utf-8"))
for key in weights:
value = data.get(key)
if isinstance(value, (int, float)):
weights[key] = float(value)
except Exception:
logger.exception("failed to read learned ranking weights, fallback to defaults")
return self._normalize(weights)
def score(self, breakdown: MatchBreakdown) -> float:
weights = self.get_weights()
return (
weights["skill"] * breakdown.skill_score
+ weights["region"] * breakdown.region_score
+ weights["time"] * breakdown.time_score
+ weights["experience"] * breakdown.experience_score
+ weights["reliability"] * breakdown.reliability_score
)
def update_from_feedback(self, breakdown: MatchBreakdown, accepted: bool) -> dict[str, float]:
weights = self.get_weights()
features = {
"skill": breakdown.skill_score,
"region": breakdown.region_score,
"time": breakdown.time_score,
"experience": breakdown.experience_score,
"reliability": breakdown.reliability_score,
}
target = 1.0 if accepted else 0.0
prediction = sum(weights[name] * value for name, value in features.items())
error = target - prediction
lr = self.settings.ranking_learning_rate
updated = {name: max(0.0, weights[name] + lr * error * value) for name, value in features.items()}
normalized = self._normalize(updated)
self._save_weights(normalized)
return normalized
def _save_weights(self, weights: dict[str, float]) -> None:
self.settings.data_dir.mkdir(parents=True, exist_ok=True)
self.path.write_text(json.dumps(weights, ensure_ascii=False, indent=2), encoding="utf-8")
def _normalize(self, weights: dict[str, float]) -> dict[str, float]:
total = sum(max(value, 0.0) for value in weights.values())
if total <= 0:
fallback = self.default_weights()
total = sum(fallback.values())
return {key: value / total for key, value in fallback.items()}
return {key: max(value, 0.0) / total for key, value in weights.items()}

View File

@@ -11,6 +11,7 @@ dependencies = [
"pydantic==2.11.3", "pydantic==2.11.3",
"pydantic-settings==2.8.1", "pydantic-settings==2.8.1",
"httpx==0.28.1", "httpx==0.28.1",
"redis==5.2.1",
"qdrant-client==1.14.2", "qdrant-client==1.14.2",
"python-dateutil==2.9.0.post0", "python-dateutil==2.9.0.post0",
"orjson==3.10.16", "orjson==3.10.16",

1
gig-poc/apps/web/src/vite-env.d.ts vendored Normal file
View File

@@ -0,0 +1 @@
/// <reference types="vite/client" />

View File

@@ -1,8 +1,98 @@
# API 文档 # API 文档(中文完整版)
## 机器可读文档(推荐作为联调基准)
- Swagger UI`http://127.0.0.1:8000/docs`
- OpenAPI JSON`http://127.0.0.1:8000/openapi.json`
- 一键导出:
```bash
cd gig-poc
sh infrastructure/scripts/export-openapi.sh
```
- 导出到自定义路径:
```bash
sh infrastructure/scripts/export-openapi.sh ./docs/openapi.prod.json
```
## 通用约定
- 基础路径:默认通过网关暴露为 `/api`,下文写的是服务内部路径(如 `/poc/...`)。
- 数据格式:`Content-Type: application/json`
- 时间字段ISO-8601 格式(示例:`2026-03-30T12:00:00+08:00`)。
- 分数字段:大部分评分为 `0~1` 浮点数。
- 常见错误码:
- `400/422`:请求参数不合法(字段缺失、类型不匹配、取值超范围)。
- `404`:查询对象不存在(如岗位 ID、工人 ID、匹配记录 ID 不存在)。
- `500`:服务内部异常(数据库、向量检索、模型调用失败等)。
## 数据结构说明
### Salary薪资结构
| 字段 | 类型 | 说明 |
| --- | --- | --- |
| `type` | `daily/hourly/monthly/task` | 薪资类型:日薪/时薪/月薪/按单 |
| `amount` | `number` | 薪资金额 |
| `currency` | `string` | 货币,默认 `CNY` |
### SkillScore技能分
| 字段 | 类型 | 说明 |
| --- | --- | --- |
| `name` | `string` | 技能名称 |
| `score` | `number` | 技能熟练度,范围 `0~1` |
### JobCard岗位卡片
| 字段 | 类型 | 说明 |
| --- | --- | --- |
| `job_id` | `string` | 岗位唯一 ID |
| `title` | `string` | 岗位标题 |
| `category` | `string` | 岗位类别 |
| `description` | `string` | 岗位描述 |
| `skills` | `string[]` | 需要的技能列表 |
| `city` | `string` | 城市 |
| `region` | `string` | 区域 |
| `location_detail` | `string` | 详细地址或地标 |
| `start_time` | `datetime` | 开始时间 |
| `duration_hours` | `number` | 工时(小时,>0 |
| `headcount` | `number` | 需求人数(>0 |
| `salary` | `Salary` | 薪资信息 |
| `work_mode` | `string` | 工作模式(如兼职/全职/活动) |
| `tags` | `string[]` | 业务标签 |
| `confidence` | `number` | 抽取或录入置信度,范围 `0~1` |
### WorkerCard工人卡片
| 字段 | 类型 | 说明 |
| --- | --- | --- |
| `worker_id` | `string` | 工人唯一 ID |
| `name` | `string` | 姓名/昵称 |
| `description` | `string` | 自我描述 |
| `skills` | `SkillScore[]` | 技能及熟练度 |
| `cities` | `string[]` | 可接单城市 |
| `regions` | `string[]` | 可接单区域 |
| `availability` | `string[]` | 可上岗时段(自由文本) |
| `experience_tags` | `string[]` | 经验标签 |
| `reliability_score` | `number` | 履约可靠性分,范围 `0~1` |
| `profile_completion` | `number` | 档案完善度,范围 `0~1` |
| `confidence` | `number` | 抽取或录入置信度,范围 `0~1` |
### MatchResult匹配结果
| 字段 | 类型 | 说明 |
| --- | --- | --- |
| `match_id` | `string` | 匹配记录 ID |
| `source_type` | `job_to_worker/worker_to_job` | 匹配方向 |
| `source_id` | `string` | 发起匹配的实体 ID |
| `target_id` | `string` | 被匹配到的实体 ID |
| `match_score` | `number` | 综合匹配分(`0~1` |
| `breakdown.skill_score` | `number` | 技能匹配分 |
| `breakdown.region_score` | `number` | 地域匹配分 |
| `breakdown.time_score` | `number` | 时间匹配分 |
| `breakdown.experience_score` | `number` | 经验匹配分 |
| `breakdown.reliability_score` | `number` | 可靠性匹配分 |
| `reasons` | `string[]` | 至少 3 条可解释理由 |
## 系统接口 ## 系统接口
### `GET /health` ### `GET /health`
返回: 用途检查服务、数据库、RAG 组件是否可用。
返回示例:
```json ```json
{ {
"service": "ok", "service": "ok",
@@ -12,38 +102,242 @@
} }
``` ```
## 抽取接口 字段说明:
### `POST /poc/extract/job` - `service`API 进程状态。
请求: - `database`:数据库连通状态(`ok/error`)。
- `rag`:检索增强组件状态(`ok/error`)。
- `timestamp`:服务端当前时间。
### `GET /poc/ops/ai/metrics`
用途:查看 AI 调用观测指标限流、熔断、降级、fallback 命中率)。
返回示例:
```json ```json
{ "text": "明天下午南山会展中心需要2个签到协助5小时150/人,女生优先" } {
"metrics": {
"requests_total": 12,
"success_total": 10,
"fail_total": 2,
"fallback_total": 1,
"rate_limited_total": 0,
"circuit_open_total": 0,
"endpoint_failover_total": 1,
"fallback_hit_rate": 0.0833,
"success_rate": 0.8333,
"failure_rate": 0.1667
}
}
``` ```
### `GET /poc/ops/system/metrics`
用途:查看全局系统护栏指标(流量限流、熔断状态、缓存命中率、异步队列状态)。
返回示例:
```json
{
"traffic": {
"requests_total": 1000,
"rate_limited_total": 0,
"circuit_blocked_total": 0,
"window_requests": 120,
"window_errors": 3,
"window_error_rate": 0.025,
"avg_latency_ms": 35.4,
"circuit_open": 0
},
"cache": {
"backend": "redis",
"match_hit_rate": 0.62,
"query_hit_rate": 0.73,
"match_size": 320,
"query_size": 800
},
"ingest_queue": {
"queued": 0,
"processed": 1200,
"failed": 2
},
"match_queue": {
"queued": 2,
"processed": 3400,
"failed": 7
}
}
```
## 抽取接口
### `POST /poc/extract/job`
用途:将岗位自然语言文本抽取为结构化 `JobCard`
请求体:
```json
{
"text": "明天下午南山会展中心需要2个签到协助5小时150/人,女生优先"
}
```
请求字段说明:
- `text`:待抽取文本,最小长度 5。
返回结构:`ExtractResponse`
```json
{
"success": true,
"data": {
"job_id": "job_demo_001",
"title": "活动签到协助",
"category": "会展活动",
"description": "南山会展中心活动签到协助",
"skills": ["签到", "引导"],
"city": "深圳",
"region": "南山",
"location_detail": "南山会展中心",
"start_time": "2026-04-01T14:00:00+08:00",
"duration_hours": 5,
"headcount": 2,
"salary": { "type": "daily", "amount": 150, "currency": "CNY" },
"work_mode": "兼职",
"tags": ["女生优先"],
"confidence": 0.88
},
"errors": [],
"missing_fields": []
}
```
字段说明:
- `success`:是否抽取成功。
- `data`:抽取出的结构化岗位对象;失败时可能为 `null`
- `errors`:错误信息列表。
- `missing_fields`:缺失字段列表,便于前端二次补录。
### `POST /poc/extract/worker` ### `POST /poc/extract/worker`
请求: 用途:将工人自然语言文本抽取为结构化 `WorkerCard`
请求体:
```json ```json
{ "text": "我做过商场促销和活动签到,也能做登记和引导,周末都能接,福田南山都方便。" } {
"text": "我做过商场促销和活动签到,也能做登记和引导,周末都能接,福田南山都方便。"
}
``` ```
返回结构:同 `ExtractResponse`,其中 `data``WorkerCard`
说明:
- 适合把聊天文本/简历摘要快速转成可入库结构。
- 若模型无法识别关键字段,会在 `missing_fields` 中给出提示。
## 入库接口 ## 入库接口
### `POST /poc/ingest/job` ### `POST /poc/ingest/job`
用途:写入或更新岗位卡片(同时更新检索索引)。
请求体: 请求体:
```json ```json
{ "job": { "...": "JobCard" } } {
"job": {
"job_id": "job_001",
"title": "活动签到",
"category": "会展活动",
"description": "负责活动签到与引导",
"skills": ["签到", "沟通"],
"city": "深圳",
"region": "福田",
"location_detail": "会展中心",
"start_time": "2026-04-02T09:00:00+08:00",
"duration_hours": 8,
"headcount": 3,
"salary": { "type": "daily", "amount": 180, "currency": "CNY" },
"work_mode": "兼职",
"tags": ["展会"],
"confidence": 0.95
}
}
``` ```
返回:返回入库后的 `JobCard`(通常与请求体一致)。
### `POST /poc/ingest/worker` ### `POST /poc/ingest/worker`
用途:写入或更新工人卡片(同时更新检索索引)。
请求体: 请求体:
```json ```json
{ "worker": { "...": "WorkerCard" } } {
"worker": {
"worker_id": "worker_001",
"name": "张三",
"description": "有活动执行经验",
"skills": [
{ "name": "签到", "score": 0.9 },
{ "name": "引导", "score": 0.8 }
],
"cities": ["深圳"],
"regions": ["福田", "南山"],
"availability": ["周末全天"],
"experience_tags": ["会展", "地推"],
"reliability_score": 0.92,
"profile_completion": 0.86,
"confidence": 0.93
}
}
``` ```
返回:返回入库后的 `WorkerCard`(通常与请求体一致)。
### `POST /poc/ingest/job/async`
用途:异步岗位入库,快速返回任务 ID不阻塞主请求。
返回示例:
```json
{
"task_id": "queue_xxx",
"status": "queued"
}
```
### `POST /poc/ingest/worker/async`
用途:异步工人入库,快速返回任务 ID不阻塞主请求。
返回结构同 `POST /poc/ingest/job/async`
### `GET /poc/ingest/queue/{task_id}`
用途:查询异步入库任务状态。
可能状态:
- `queued`
- `processing`
- `done`
- `failed`
- `not_found`
### `POST /poc/ingest/bootstrap` ### `POST /poc/ingest/bootstrap`
说明:导入样本数据、词表、Qdrant 检索索引数据。 用途:导入内置样本数据(岗位、工人、技能、类目、区域)并构建检索数据。
请求体:无。
返回示例:
```json
{
"jobs": 100,
"workers": 300,
"skills": 120,
"categories": 20,
"regions": 50
}
```
说明:
- 适合开发环境初始化。
- 重复执行会触发 upsert 逻辑(覆盖同 ID 数据)。
## 匹配接口 ## 匹配接口
### `POST /poc/match/workers` ### `POST /poc/match/workers`
支持 `job_id` 或内联 `job` 用途:以岗位为源,匹配合适工人。
请求体(二选一):
1.`job_id`(按已入库岗位匹配):
```json ```json
{ {
"job_id": "job_001", "job_id": "job_001",
@@ -51,8 +345,51 @@
} }
``` ```
2. 传内联 `job`(不依赖入库):
```json
{
"job": { "...": "JobCard" },
"top_n": 10
}
```
字段说明:
- `job_id`:岗位 ID。
- `job`:完整岗位对象。
- `top_n`:返回条数,范围 `1~50`,默认 `10`
- `job_id``job` 至少提供一个。
返回:
```json
{
"items": [
{
"match_id": "match_001",
"source_type": "job_to_worker",
"source_id": "job_001",
"target_id": "worker_007",
"match_score": 0.87,
"breakdown": {
"skill_score": 0.9,
"region_score": 1.0,
"time_score": 0.8,
"experience_score": 0.85,
"reliability_score": 0.8
},
"reasons": ["技能高度匹配", "同区域可到岗", "有同类活动经验"]
}
]
}
```
错误说明:
-`job_id` 不存在且未传 `job` 时,返回 `404`,提示“岗位不存在”。
### `POST /poc/match/jobs` ### `POST /poc/match/jobs`
支持 `worker_id` 或内联 `worker` 用途:以工人为源,匹配合适岗位。
请求体(二选一):
1.`worker_id`
```json ```json
{ {
"worker_id": "worker_001", "worker_id": "worker_001",
@@ -60,16 +397,212 @@
} }
``` ```
2. 传内联 `worker`
```json
{
"worker": { "...": "WorkerCard" },
"top_n": 10
}
```
字段约束:
- `worker_id``worker` 至少提供一个。
- `top_n` 范围 `1~50`,默认 `10`
返回:`MatchResponse`,结构同上,`source_type``worker_to_job`
错误说明:
-`worker_id` 不存在且未传 `worker` 时,返回 `404`,提示“工人不存在”。
### `POST /poc/match/workers/async`
用途:岗位异步匹配工人(削峰入口),快速返回任务 ID。
请求体:
```json
{
"job_id": "job_001",
"top_n": 10
}
```
返回示例:
```json
{
"task_id": "mq_xxx",
"status": "queued"
}
```
### `POST /poc/match/jobs/async`
用途:工人异步匹配岗位(削峰入口),快速返回任务 ID。
请求体:
```json
{
"worker_id": "worker_001",
"top_n": 10
}
```
返回结构同 `POST /poc/match/workers/async`
### `GET /poc/match/queue/{task_id}`
用途:查询异步匹配任务状态,完成后返回 `items` 结果集。
可能状态:
- `queued`
- `processing`
- `done`
- `failed`
- `not_found`
### `GET /poc/match/explain/{match_id}` ### `GET /poc/match/explain/{match_id}`
返回具体匹配明细与理由。 用途:查询单条匹配记录详情与解释理由。
路径参数:
- `match_id`:匹配记录 ID。
返回:
```json
{
"match": {
"match_id": "match_001",
"source_type": "job_to_worker",
"source_id": "job_001",
"target_id": "worker_007",
"match_score": 0.87,
"breakdown": {
"skill_score": 0.9,
"region_score": 1.0,
"time_score": 0.8,
"experience_score": 0.85,
"reliability_score": 0.8
},
"reasons": ["技能高度匹配", "同区域可到岗", "有同类活动经验"]
}
}
```
错误说明:
- 找不到匹配记录时返回 `404`,提示“匹配记录不存在”。
### `POST /poc/match/feedback`
用途:提交匹配反馈(接受/拒绝),用于在线更新排序权重。
请求体:
```json
{
"match_id": "match_001",
"accepted": true
}
```
返回:
```json
{
"weights": {
"skill": 0.36,
"region": 0.21,
"time": 0.14,
"experience": 0.14,
"reliability": 0.15
},
"learning_enabled": true
}
```
错误说明:
- `match_id` 不存在时返回 `404`,提示“匹配记录不存在”。
### `GET /poc/match/weights`
用途:查看当前生效排序权重(默认权重或学习后的权重)。
返回结构同 `POST /poc/match/feedback`
## 查询接口 ## 查询接口
### `GET /poc/jobs`
### `GET /poc/workers`
### `GET /poc/jobs/{job_id}`
### `GET /poc/workers/{worker_id}`
## 交接说明 ### `GET /poc/jobs`
- 抽取接口返回 `success/data/errors/missing_fields`,方便后续切换更强 LLM 时做错误回退 用途:分页前的基础列表查询(当前返回全量)
- 匹配接口输出 `breakdown` 五维打分,可直接给后续运营、策略或模型团队继续调权。
- `packages/shared-types/src/index.ts` 保留了前端可复用类型定义。 返回:
```json
{
"items": [{ "...": "JobCard(JSON)" }],
"total": 100
}
```
### `GET /poc/workers`
用途:查询工人列表(当前返回全量)。
返回:
```json
{
"items": [{ "...": "WorkerCard(JSON)" }],
"total": 300
}
```
### `GET /poc/jobs/{job_id}`
用途:根据 ID 查询单个岗位。
路径参数:
- `job_id`:岗位 ID。
返回:`JobCard`
错误说明:
- ID 不存在返回 `404`,提示“岗位不存在”。
### `GET /poc/workers/{worker_id}`
用途:根据 ID 查询单个工人。
路径参数:
- `worker_id`:工人 ID。
返回:`WorkerCard`
错误说明:
- ID 不存在返回 `404`,提示“工人不存在”。
## 交接建议
-`docs/openapi.json` 作为机器契约,`docs/API.md` 作为业务语义解释。
- 前端与测试联调时,优先校验:
- 抽取失败时 `errors/missing_fields` 是否按预期返回。
- 匹配结果 `breakdown` 五维分是否完整。
- `top_n` 边界值(`1``50``>50`)的校验行为。
## 升级配置说明
- 抽取增强schema-aware 重试):
- `EXTRACTION_LLM_MAX_RETRIES`LLM 校验失败后的修复重试次数,默认 `2`
- 检索 embedding 可配置:
- `EMBEDDING_ENABLED`:是否启用正式 embedding默认 `false`
- `EMBEDDING_BACKEND``hash``openai_compatible`
- `EMBEDDING_BASE_URL` / `EMBEDDING_API_KEY` / `EMBEDDING_MODEL`embedding 服务配置。
- `EMBEDDING_VECTOR_SIZE`embedding 维度(需与 Qdrant 集合维度一致)。
- 排序在线学习:
- `RANKING_LEARNING_ENABLED`:是否启用反馈学习,默认 `true`
- `RANKING_LEARNING_RATE`:在线更新学习率,默认 `0.08`
- 权重持久化文件:`data/match_weights.json`
- 全局稳定性护栏:
- `APP_RATE_LIMIT_PER_MINUTE`:全局每分钟请求上限。
- `APP_CIRCUIT_BREAKER_ERROR_RATE`:窗口 5xx 错误率触发阈值。
- `APP_CIRCUIT_BREAKER_MIN_REQUESTS`:熔断判定最小请求数。
- `APP_CIRCUIT_BREAKER_WINDOW_SECONDS`:错误率统计窗口。
- `APP_CIRCUIT_BREAKER_COOLDOWN_SECONDS`:熔断冷却时长。
- `ALERT_WEBHOOK_URL`:告警 webhook 地址(可选)。
- 异步队列与缓存:
- `INGEST_ASYNC_ENABLED`:是否启用异步入库队列。
- `INGEST_QUEUE_MAX_SIZE`:队列最大长度。
- `MATCH_ASYNC_ENABLED`:是否启用异步匹配队列。
- `MATCH_QUEUE_MAX_SIZE`:异步匹配队列最大长度。
- `MATCH_CACHE_ENABLED`:是否启用匹配缓存。
- `MATCH_CACHE_TTL_SECONDS`:匹配缓存有效期。
- `QUERY_CACHE_ENABLED`:是否启用查询缓存。
- `QUERY_CACHE_TTL_SECONDS`:查询缓存有效期。
- `CACHE_BACKEND`:缓存后端 `memory/redis`
- `REDIS_URL`Redis 连接地址。
- 数据库连接池:
- `DATABASE_POOL_SIZE`:连接池大小。
- `DATABASE_MAX_OVERFLOW`:溢出连接数。
- `DATABASE_POOL_TIMEOUT`:获取连接超时秒数。

View File

@@ -0,0 +1,19 @@
# 容量基线(自动生成)
- 生成时间: 2026-03-31 14:36:58 +0800
- API_BASE: http://127.0.0.1:8000
- TOTAL_REQUESTS: 80
- CONCURRENCY: 20
| 场景 | 成功率 | RPS | 平均延迟(ms) | P95(ms) | P99(ms) |
| --- | --- | --- | --- | --- | --- |
| health | 1.0 | 19.34 | 978.98 | 1434.66 | 1544.06 |
| jobs | 1.0 | 95.39 | 197.95 | 409.12 | 424.99 |
| match_workers | 1.0 | 20.81 | 913.73 | 1975.6 | 2118.65 |
| match_jobs | 1.0 | 19.88 | 975.29 | 2001.08 | 2147.74 |
| match_workers_cached | 1.0 | 23.52 | 819.62 | 1220.26 | 1331.26 |
| match_jobs_cached | 1.0 | 25.21 | 759.14 | 1077.45 | 1200.4 |
| match_workers_async | 1.0 | 211.09 | 89.04 | 151.04 | 158.89 |
| match_jobs_async | 1.0 | 221.04 | 83.96 | 143.35 | 162.95 |
> 建议:该基线仅代表当前单机/当前数据量下表现,发布前请在目标环境按 2x/5x 峰值复测。

View File

@@ -5,6 +5,12 @@
cd gig-poc cd gig-poc
sh infrastructure/scripts/dev-up.sh sh infrastructure/scripts/dev-up.sh
``` ```
默认会自动完成:
- 启动容器并健康检查
- bootstrap 样本数据
- 闭环验收(抽取 -> 入库 -> 匹配 -> 解释)
- 导出 `docs/openapi.json`
- 可选压测并生成 `docs/CAPACITY_BASELINE.md``RUN_BASELINE_ON_UP=true`
## 演示步骤 ## 演示步骤
1. 打开 `http://127.0.0.1:5173` 1. 打开 `http://127.0.0.1:5173`
@@ -21,6 +27,42 @@ cd gig-poc
sh infrastructure/scripts/prod-up.sh sh infrastructure/scripts/prod-up.sh
``` ```
## 生产环境停止
```bash
cd gig-poc
sh infrastructure/scripts/prod-down.sh
```
## OpenAPI 交接文件导出
```bash
cd gig-poc
sh infrastructure/scripts/export-openapi.sh
```
## OpenAPI 固化入库(离线)
```bash
cd gig-poc
sh infrastructure/scripts/freeze-openapi.sh
```
## 一键闭环验收(可单独执行)
```bash
cd gig-poc
sh infrastructure/scripts/acceptance-e2e.sh
```
## 容量基线压测(可单独执行)
```bash
cd gig-poc
sh infrastructure/scripts/load-baseline.sh
```
## 高并发演示建议
1. 同步匹配:调用 `POST /poc/match/workers` 观察实时结果。
2. 异步匹配削峰:调用 `POST /poc/match/workers/async` 获取 `task_id`
3. 轮询结果:调用 `GET /poc/match/queue/{task_id}` 直到 `status=done`
4. 打开 `GET /poc/ops/system/metrics` 观察缓存命中率、队列积压和限流熔断状态。
## 演示建议 ## 演示建议
- 先演示系统状态页,确认健康与 bootstrap 正常 - 先演示系统状态页,确认健康与 bootstrap 正常
- 再演示岗位找人、人找岗位两个闭环 - 再演示岗位找人、人找岗位两个闭环

View File

@@ -39,10 +39,45 @@
- `LLM_BASE_URL`OpenAI 兼容接口地址 - `LLM_BASE_URL`OpenAI 兼容接口地址
- `LLM_API_KEY`:模型服务密钥 - `LLM_API_KEY`:模型服务密钥
- `LLM_MODEL`:模型名称 - `LLM_MODEL`:模型名称
- `LLM_FALLBACK_BASE_URLS`LLM 备用端点列表JSON 数组)
- `AI_RATE_LIMIT_PER_MINUTE`AI 请求每分钟限流阈值
- `AI_CIRCUIT_BREAKER_FAIL_THRESHOLD`:熔断触发失败次数
- `AI_CIRCUIT_BREAKER_COOLDOWN_SECONDS`:熔断冷却秒数
- `EMBEDDING_ENABLED`:是否启用正式 embedding
- `EMBEDDING_BACKEND``hash``openai_compatible`
- `EMBEDDING_BASE_URL` / `EMBEDDING_API_KEY` / `EMBEDDING_MODEL`embedding 配置
- `INGEST_ASYNC_ENABLED`:是否启用异步入库队列
- `INGEST_QUEUE_MAX_SIZE`:异步队列最大长度
- `MATCH_CACHE_ENABLED`:是否启用匹配缓存
- `MATCH_CACHE_TTL_SECONDS`:匹配缓存 TTL
- `QUERY_CACHE_ENABLED`:是否启用查询缓存(列表与详情)
- `QUERY_CACHE_TTL_SECONDS`:查询缓存 TTL
- `CACHE_BACKEND`:缓存后端,`memory``redis`
- `REDIS_URL`Redis 连接串
- `APP_RATE_LIMIT_PER_MINUTE`:全局请求限流阈值
- `APP_CIRCUIT_BREAKER_*`:全局熔断参数(错误率、窗口、冷却)
- `ALERT_WEBHOOK_URL`:告警 webhook可选
- `DATABASE_POOL_SIZE` / `DATABASE_MAX_OVERFLOW` / `DATABASE_POOL_TIMEOUT`:数据库连接池参数
- `MATCH_ASYNC_ENABLED`:是否启用异步匹配队列
- `MATCH_QUEUE_MAX_SIZE`:异步匹配队列最大长度
## 启动方式 ## 启动方式
1. `cd gig-poc` 1. `cd gig-poc`
2. `sh infrastructure/scripts/dev-up.sh` 2. `sh infrastructure/scripts/dev-up.sh`
3. 默认会自动执行:
- 健康检查 + bootstrap
- 一键闭环验收脚本(抽取 -> 入库 -> 匹配 -> 解释)
- 导出 `docs/openapi.json`
4. 可选开启容量基线压测:
- `RUN_BASELINE_ON_UP=true sh infrastructure/scripts/dev-up.sh`
## 生产环境启动/停止
- 启动:`sh infrastructure/scripts/prod-up.sh`
- 停止:`sh infrastructure/scripts/prod-down.sh`
- 可选环境变量:
- `WEB_PORT`(默认 `80`
- `API_PORT`(默认 `8000`
- `BOOTSTRAP_ON_UP`(默认 `true`,可设置为 `false` 跳过样本初始化)
## 样本导入方式 ## 样本导入方式
`dev-up.sh` 会在健康检查通过后自动触发 `/poc/ingest/bootstrap`,导入 100 岗位、300 工人和词表。 `dev-up.sh` 会在健康检查通过后自动触发 `/poc/ingest/bootstrap`,导入 100 岗位、300 工人和词表。
@@ -50,6 +85,11 @@
## API 地址 ## API 地址
- `http://127.0.0.1:8000` - `http://127.0.0.1:8000`
- OpenAPI`http://127.0.0.1:8000/docs` - OpenAPI`http://127.0.0.1:8000/docs`
- OpenAPI JSON 导出:`sh infrastructure/scripts/export-openapi.sh`
- OpenAPI 固化(离线生成并入库):`sh infrastructure/scripts/freeze-openapi.sh`
- AI 观测接口:`GET /poc/ops/ai/metrics`
- 系统观测接口:`GET /poc/ops/system/metrics`
- 异步匹配接口:`POST /poc/match/workers/async``POST /poc/match/jobs/async``GET /poc/match/queue/{task_id}`
## 前端访问地址 ## 前端访问地址
- `http://127.0.0.1:5173` - `http://127.0.0.1:5173`
@@ -61,6 +101,53 @@
4. 点击入库并匹配岗位 4. 点击入库并匹配岗位
5. 在系统状态页执行健康检查和样本导入 5. 在系统状态页执行健康检查和样本导入
## 一键闭环验收
```bash
cd gig-poc
sh infrastructure/scripts/acceptance-e2e.sh
```
该脚本会自动验证两条链路:
- 岗位文本抽取 -> 岗位入库 -> 岗位匹配工人 -> 匹配解释
- 工人文本抽取 -> 工人入库 -> 工人匹配岗位 -> 匹配解释
## 容量基线压测
```bash
cd gig-poc
sh infrastructure/scripts/load-baseline.sh
```
输出文件:
- `docs/CAPACITY_BASELINE.md`
可选参数:
- `TOTAL_REQUESTS`(默认 `400`
- `CONCURRENCY`(默认 `40`
## 规模化建议(上线前)
- 应用层:开启多实例部署(建议至少 2 个 API 实例)并接入负载均衡。
- 数据层PostgreSQL、Qdrant 使用托管或主从/集群形态,避免单点。
- 链路层:优先走异步入库接口(`/poc/ingest/*/async`)吸收突发写流量。
- 匹配层:高峰请求优先走异步匹配接口(`/poc/match/*/async`)做削峰。
- 观测层:接入 `/poc/ops/system/metrics``/poc/ops/ai/metrics` 到监控告警系统。
- 发布层:每次发布前更新 `docs/openapi.json``docs/CAPACITY_BASELINE.md`
## K8s 扩容部署(基础模板)
目录:`infrastructure/k8s`
```bash
cd gig-poc
kubectl apply -k infrastructure/k8s
```
包含资源:
- API Deployment + Service + HPA默认 3~20 副本)
- Web Deployment + Service + HPA默认 2~10 副本)
- Redis Deployment + Service
- Ingress 示例路由
详细策略说明见:`docs/SCALING.md`
## 已实现范围 ## 已实现范围
- 岗位抽取 - 岗位抽取
- 工人抽取 - 工人抽取

19
gig-poc/docs/SCALING.md Normal file
View File

@@ -0,0 +1,19 @@
# 扩容与高 DAU 策略
## 当前能力
- API/Web 提供 K8s 多副本与 HPA 模板:`infrastructure/k8s`
- Redis 缓存后端支持:热点匹配与查询链路缓存
- 异步队列:入库与匹配都支持异步削峰
- 观测接口:`/poc/ops/system/metrics``/poc/ops/ai/metrics`
## 推荐上线形态
1. API 多副本(>=3+ HPA3~20
2. Web 多副本(>=2+ HPA2~10
3. Redis 独立高可用(哨兵或托管)
4. PostgreSQL、Qdrant 使用托管或主从/集群
5. 异步接口承接高峰写流量与匹配重算
## 发布前门槛
- 运行 `sh infrastructure/scripts/load-baseline.sh`
- 更新并提交 `docs/CAPACITY_BASELINE.md`
- 验证 P95/P99、成功率和队列积压指标

File diff suppressed because one or more lines are too long

View File

@@ -19,6 +19,13 @@ services:
volumes: volumes:
- qdrant_prod_data:/qdrant/storage - qdrant_prod_data:/qdrant/storage
redis:
image: docker.m.daocloud.io/library/redis:7-alpine
restart: unless-stopped
command: ["redis-server", "--appendonly", "yes"]
volumes:
- redis_prod_data:/data
api: api:
build: build:
context: .. context: ..
@@ -33,9 +40,28 @@ services:
LLM_BASE_URL: ${LLM_BASE_URL:-} LLM_BASE_URL: ${LLM_BASE_URL:-}
LLM_API_KEY: ${LLM_API_KEY:-} LLM_API_KEY: ${LLM_API_KEY:-}
LLM_MODEL: ${LLM_MODEL:-gpt-5.4} LLM_MODEL: ${LLM_MODEL:-gpt-5.4}
CACHE_BACKEND: ${CACHE_BACKEND:-redis}
REDIS_URL: ${REDIS_URL:-redis://redis:6379/0}
INGEST_ASYNC_ENABLED: ${INGEST_ASYNC_ENABLED:-true}
MATCH_ASYNC_ENABLED: ${MATCH_ASYNC_ENABLED:-true}
MATCH_CACHE_ENABLED: ${MATCH_CACHE_ENABLED:-true}
MATCH_CACHE_TTL_SECONDS: ${MATCH_CACHE_TTL_SECONDS:-30}
QUERY_CACHE_ENABLED: ${QUERY_CACHE_ENABLED:-true}
QUERY_CACHE_TTL_SECONDS: ${QUERY_CACHE_TTL_SECONDS:-20}
APP_RATE_LIMIT_PER_MINUTE: ${APP_RATE_LIMIT_PER_MINUTE:-1200}
APP_CIRCUIT_BREAKER_ERROR_RATE: ${APP_CIRCUIT_BREAKER_ERROR_RATE:-0.5}
APP_CIRCUIT_BREAKER_MIN_REQUESTS: ${APP_CIRCUIT_BREAKER_MIN_REQUESTS:-50}
APP_CIRCUIT_BREAKER_WINDOW_SECONDS: ${APP_CIRCUIT_BREAKER_WINDOW_SECONDS:-60}
APP_CIRCUIT_BREAKER_COOLDOWN_SECONDS: ${APP_CIRCUIT_BREAKER_COOLDOWN_SECONDS:-30}
DATABASE_POOL_SIZE: ${DATABASE_POOL_SIZE:-20}
DATABASE_MAX_OVERFLOW: ${DATABASE_MAX_OVERFLOW:-30}
DATABASE_POOL_TIMEOUT: ${DATABASE_POOL_TIMEOUT:-30}
depends_on: depends_on:
- postgres - postgres
- qdrant - qdrant
- redis
ports:
- "${API_PORT:-8000}:8000"
web: web:
build: build:
@@ -50,3 +76,4 @@ services:
volumes: volumes:
postgres_prod_data: postgres_prod_data:
qdrant_prod_data: qdrant_prod_data:
redis_prod_data:

View File

@@ -26,6 +26,14 @@ services:
ports: ports:
- "6333:6333" - "6333:6333"
redis:
image: docker.m.daocloud.io/library/redis:7-alpine
command: ["redis-server", "--appendonly", "yes"]
volumes:
- redis_data:/data
ports:
- "6379:6379"
api: api:
build: build:
context: .. context: ..
@@ -36,11 +44,29 @@ services:
QDRANT_URL: http://qdrant:6333 QDRANT_URL: http://qdrant:6333
LOG_LEVEL: INFO LOG_LEVEL: INFO
LLM_ENABLED: "false" LLM_ENABLED: "false"
CACHE_BACKEND: "redis"
REDIS_URL: redis://redis:6379/0
INGEST_ASYNC_ENABLED: "true"
MATCH_ASYNC_ENABLED: "true"
MATCH_CACHE_ENABLED: "true"
MATCH_CACHE_TTL_SECONDS: "30"
QUERY_CACHE_ENABLED: "true"
QUERY_CACHE_TTL_SECONDS: "20"
APP_RATE_LIMIT_PER_MINUTE: "1200"
APP_CIRCUIT_BREAKER_ERROR_RATE: "0.5"
APP_CIRCUIT_BREAKER_MIN_REQUESTS: "50"
APP_CIRCUIT_BREAKER_WINDOW_SECONDS: "60"
APP_CIRCUIT_BREAKER_COOLDOWN_SECONDS: "30"
DATABASE_POOL_SIZE: "20"
DATABASE_MAX_OVERFLOW: "30"
DATABASE_POOL_TIMEOUT: "30"
depends_on: depends_on:
postgres: postgres:
condition: service_healthy condition: service_healthy
qdrant: qdrant:
condition: service_started condition: service_started
redis:
condition: service_started
ports: ports:
- "8000:8000" - "8000:8000"
@@ -57,3 +83,4 @@ services:
volumes: volumes:
postgres_data: postgres_data:
qdrant_data: qdrant_data:
redis_data:

View File

@@ -0,0 +1,90 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: gig-poc-api
namespace: gig-poc
spec:
replicas: 3
selector:
matchLabels:
app: gig-poc-api
template:
metadata:
labels:
app: gig-poc-api
spec:
containers:
- name: api
image: gig-poc-api:latest
imagePullPolicy: IfNotPresent
ports:
- containerPort: 8000
env:
- name: APP_ENV
value: production
- name: CACHE_BACKEND
value: redis
- name: REDIS_URL
value: redis://gig-poc-redis:6379/0
- name: INGEST_ASYNC_ENABLED
value: "true"
- name: MATCH_ASYNC_ENABLED
value: "true"
- name: MATCH_CACHE_ENABLED
value: "true"
- name: QUERY_CACHE_ENABLED
value: "true"
- name: APP_RATE_LIMIT_PER_MINUTE
value: "3000"
resources:
requests:
cpu: "500m"
memory: "512Mi"
limits:
cpu: "2"
memory: "2Gi"
readinessProbe:
httpGet:
path: /health
port: 8000
initialDelaySeconds: 10
periodSeconds: 10
livenessProbe:
httpGet:
path: /health
port: 8000
initialDelaySeconds: 30
periodSeconds: 15
---
apiVersion: v1
kind: Service
metadata:
name: gig-poc-api
namespace: gig-poc
spec:
selector:
app: gig-poc-api
ports:
- name: http
port: 8000
targetPort: 8000
---
apiVersion: autoscaling/v2
kind: HorizontalPodAutoscaler
metadata:
name: gig-poc-api-hpa
namespace: gig-poc
spec:
scaleTargetRef:
apiVersion: apps/v1
kind: Deployment
name: gig-poc-api
minReplicas: 3
maxReplicas: 20
metrics:
- type: Resource
resource:
name: cpu
target:
type: Utilization
averageUtilization: 70

View File

@@ -0,0 +1,24 @@
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: gig-poc-ingress
namespace: gig-poc
spec:
rules:
- host: gig-poc.local
http:
paths:
- path: /api
pathType: Prefix
backend:
service:
name: gig-poc-api
port:
number: 8000
- path: /
pathType: Prefix
backend:
service:
name: gig-poc-web
port:
number: 80

View File

@@ -0,0 +1,9 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
namespace: gig-poc
resources:
- namespace.yaml
- redis.yaml
- api.yaml
- web.yaml
- ingress.yaml

View File

@@ -0,0 +1,4 @@
apiVersion: v1
kind: Namespace
metadata:
name: gig-poc

View File

@@ -0,0 +1,41 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: gig-poc-redis
namespace: gig-poc
spec:
replicas: 1
selector:
matchLabels:
app: gig-poc-redis
template:
metadata:
labels:
app: gig-poc-redis
spec:
containers:
- name: redis
image: redis:7-alpine
args: ["redis-server", "--appendonly", "yes"]
ports:
- containerPort: 6379
resources:
requests:
cpu: "100m"
memory: "128Mi"
limits:
cpu: "500m"
memory: "512Mi"
---
apiVersion: v1
kind: Service
metadata:
name: gig-poc-redis
namespace: gig-poc
spec:
selector:
app: gig-poc-redis
ports:
- name: redis
port: 6379
targetPort: 6379

View File

@@ -0,0 +1,61 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: gig-poc-web
namespace: gig-poc
spec:
replicas: 2
selector:
matchLabels:
app: gig-poc-web
template:
metadata:
labels:
app: gig-poc-web
spec:
containers:
- name: web
image: gig-poc-web:latest
imagePullPolicy: IfNotPresent
ports:
- containerPort: 80
resources:
requests:
cpu: "200m"
memory: "256Mi"
limits:
cpu: "1"
memory: "1Gi"
---
apiVersion: v1
kind: Service
metadata:
name: gig-poc-web
namespace: gig-poc
spec:
selector:
app: gig-poc-web
ports:
- name: http
port: 80
targetPort: 80
---
apiVersion: autoscaling/v2
kind: HorizontalPodAutoscaler
metadata:
name: gig-poc-web-hpa
namespace: gig-poc
spec:
scaleTargetRef:
apiVersion: apps/v1
kind: Deployment
name: gig-poc-web
minReplicas: 2
maxReplicas: 10
metrics:
- type: Resource
resource:
name: cpu
target:
type: Utilization
averageUtilization: 70

View File

@@ -0,0 +1,114 @@
#!/usr/bin/env sh
set -eu
API_BASE="${API_BASE:-http://127.0.0.1:8000}"
BOOTSTRAP_ON_RUN="${BOOTSTRAP_ON_RUN:-true}"
TMP_DIR="$(mktemp -d)"
trap 'rm -rf "$TMP_DIR"' EXIT
step() {
echo "[ACCEPTANCE] $1"
}
fail() {
echo "[ACCEPTANCE][FAIL] $1" >&2
exit 1
}
parse_json() {
PY_EXPR="$1"
INPUT_FILE="$2"
python3 - "$PY_EXPR" "$INPUT_FILE" <<'PY'
import json
import sys
expr = sys.argv[1]
path = sys.argv[2]
data = json.loads(open(path, "r", encoding="utf-8").read())
safe_builtins = {"bool": bool, "len": len, "str": str}
value = eval(expr, {"__builtins__": safe_builtins}, {"data": data})
if isinstance(value, (dict, list)):
print(json.dumps(value, ensure_ascii=False))
elif value is None:
print("")
else:
print(str(value))
PY
}
step "健康检查"
curl -fsS "${API_BASE}/health" >"$TMP_DIR/health.json" || fail "health 接口不可用"
if [ "$BOOTSTRAP_ON_RUN" = "true" ]; then
step "执行 bootstrap"
curl -fsS -X POST "${API_BASE}/poc/ingest/bootstrap" >"$TMP_DIR/bootstrap.json" || fail "bootstrap 失败"
fi
step "抽取岗位"
curl -fsS -X POST "${API_BASE}/poc/extract/job" \
-H "Content-Type: application/json" \
-d '{"text":"明天下午南山会展中心需要2个签到协助5小时150/人,女生优先"}' \
>"$TMP_DIR/extract_job.json" || fail "岗位抽取调用失败"
[ "$(parse_json "bool(data.get('success'))" "$TMP_DIR/extract_job.json")" = "True" ] || fail "岗位抽取失败"
parse_json "data.get('data')" "$TMP_DIR/extract_job.json" >"$TMP_DIR/job.json"
JOB_ID="$(parse_json "data.get('data', {}).get('job_id')" "$TMP_DIR/extract_job.json")"
[ -n "$JOB_ID" ] || fail "岗位抽取缺少 job_id"
step "岗位入库"
python3 - "$TMP_DIR/job.json" >"$TMP_DIR/ingest_job_payload.json" <<'PY'
import json
import sys
job = json.loads(open(sys.argv[1], "r", encoding="utf-8").read())
print(json.dumps({"job": job}, ensure_ascii=False))
PY
curl -fsS -X POST "${API_BASE}/poc/ingest/job" \
-H "Content-Type: application/json" \
--data @"$TMP_DIR/ingest_job_payload.json" \
>"$TMP_DIR/ingest_job.json" || fail "岗位入库失败"
step "岗位匹配工人"
curl -fsS -X POST "${API_BASE}/poc/match/workers" \
-H "Content-Type: application/json" \
-d "{\"job_id\":\"${JOB_ID}\",\"top_n\":3}" \
>"$TMP_DIR/match_workers.json" || fail "岗位匹配工人失败"
MATCH_ID_1="$(parse_json "((data.get('items') or [{}])[0]).get('match_id')" "$TMP_DIR/match_workers.json")"
[ -n "$MATCH_ID_1" ] || fail "岗位匹配工人未返回 match_id"
step "解释匹配(岗位->工人)"
curl -fsS "${API_BASE}/poc/match/explain/${MATCH_ID_1}" >"$TMP_DIR/explain_1.json" || fail "匹配解释失败(岗位->工人)"
step "抽取工人"
curl -fsS -X POST "${API_BASE}/poc/extract/worker" \
-H "Content-Type: application/json" \
-d '{"text":"我做过商场促销和活动签到,也能做登记和引导,周末都能接,福田南山都方便。"}' \
>"$TMP_DIR/extract_worker.json" || fail "工人抽取调用失败"
[ "$(parse_json "bool(data.get('success'))" "$TMP_DIR/extract_worker.json")" = "True" ] || fail "工人抽取失败"
parse_json "data.get('data')" "$TMP_DIR/extract_worker.json" >"$TMP_DIR/worker.json"
WORKER_ID="$(parse_json "data.get('data', {}).get('worker_id')" "$TMP_DIR/extract_worker.json")"
[ -n "$WORKER_ID" ] || fail "工人抽取缺少 worker_id"
step "工人入库"
python3 - "$TMP_DIR/worker.json" >"$TMP_DIR/ingest_worker_payload.json" <<'PY'
import json
import sys
worker = json.loads(open(sys.argv[1], "r", encoding="utf-8").read())
print(json.dumps({"worker": worker}, ensure_ascii=False))
PY
curl -fsS -X POST "${API_BASE}/poc/ingest/worker" \
-H "Content-Type: application/json" \
--data @"$TMP_DIR/ingest_worker_payload.json" \
>"$TMP_DIR/ingest_worker.json" || fail "工人入库失败"
step "工人匹配岗位"
curl -fsS -X POST "${API_BASE}/poc/match/jobs" \
-H "Content-Type: application/json" \
-d "{\"worker_id\":\"${WORKER_ID}\",\"top_n\":3}" \
>"$TMP_DIR/match_jobs.json" || fail "工人匹配岗位失败"
MATCH_ID_2="$(parse_json "((data.get('items') or [{}])[0]).get('match_id')" "$TMP_DIR/match_jobs.json")"
[ -n "$MATCH_ID_2" ] || fail "工人匹配岗位未返回 match_id"
step "解释匹配(工人->岗位)"
curl -fsS "${API_BASE}/poc/match/explain/${MATCH_ID_2}" >"$TMP_DIR/explain_2.json" || fail "匹配解释失败(工人->岗位)"
step "链路验收通过:抽取 -> 入库 -> 匹配 -> 解释"

View File

@@ -3,6 +3,10 @@ set -eu
SCRIPT_DIR=$(CDPATH= cd -- "$(dirname "$0")" && pwd) SCRIPT_DIR=$(CDPATH= cd -- "$(dirname "$0")" && pwd)
INFRA_DIR=$(CDPATH= cd -- "$SCRIPT_DIR/.." && pwd) INFRA_DIR=$(CDPATH= cd -- "$SCRIPT_DIR/.." && pwd)
PROJECT_DIR=$(CDPATH= cd -- "$INFRA_DIR/.." && pwd)
RUN_ACCEPTANCE_ON_UP="${RUN_ACCEPTANCE_ON_UP:-true}"
EXPORT_OPENAPI_ON_UP="${EXPORT_OPENAPI_ON_UP:-true}"
RUN_BASELINE_ON_UP="${RUN_BASELINE_ON_UP:-false}"
cd "$INFRA_DIR" cd "$INFRA_DIR"
docker compose -f docker-compose.yml up --build -d docker compose -f docker-compose.yml up --build -d
@@ -13,4 +17,20 @@ done
until curl -fsS -X POST http://127.0.0.1:8000/poc/ingest/bootstrap >/dev/null 2>&1; do until curl -fsS -X POST http://127.0.0.1:8000/poc/ingest/bootstrap >/dev/null 2>&1; do
sleep 3 sleep 3
done done
if [ "$RUN_ACCEPTANCE_ON_UP" = "true" ]; then
echo "执行一键闭环验收脚本..."
sh "$SCRIPT_DIR/acceptance-e2e.sh"
fi
if [ "$EXPORT_OPENAPI_ON_UP" = "true" ]; then
echo "导出 OpenAPI 固化产物到 docs/openapi.json ..."
sh "$SCRIPT_DIR/export-openapi.sh" "$PROJECT_DIR/docs/openapi.json"
fi
if [ "$RUN_BASELINE_ON_UP" = "true" ]; then
echo "执行容量基线压测..."
sh "$SCRIPT_DIR/load-baseline.sh" "$PROJECT_DIR/docs/CAPACITY_BASELINE.md"
fi
echo "本地环境已启动。Web: http://127.0.0.1:5173 API: http://127.0.0.1:8000/docs" echo "本地环境已启动。Web: http://127.0.0.1:5173 API: http://127.0.0.1:8000/docs"

View File

@@ -0,0 +1,11 @@
#!/usr/bin/env sh
set -eu
SCRIPT_DIR=$(CDPATH= cd -- "$(dirname "$0")" && pwd)
PROJECT_DIR=$(CDPATH= cd -- "$SCRIPT_DIR/../.." && pwd)
API_PORT="${API_PORT:-8000}"
OUTPUT_PATH="${1:-$PROJECT_DIR/docs/openapi.json}"
mkdir -p "$(dirname "$OUTPUT_PATH")"
curl -fsS "http://127.0.0.1:${API_PORT}/openapi.json" -o "$OUTPUT_PATH"
echo "OpenAPI 已导出到: $OUTPUT_PATH"

View File

@@ -0,0 +1,37 @@
#!/usr/bin/env sh
set -eu
SCRIPT_DIR=$(CDPATH= cd -- "$(dirname "$0")" && pwd)
PROJECT_DIR=$(CDPATH= cd -- "$SCRIPT_DIR/../.." && pwd)
OUTPUT_PATH="${1:-$PROJECT_DIR/docs/openapi.json}"
API_PORT="${API_PORT:-8000}"
mkdir -p "$(dirname "$OUTPUT_PATH")"
if PYTHONPATH="$PROJECT_DIR/apps/api" python3 - "$OUTPUT_PATH" <<'PY'
import json
import sys
from app.main import app
output = sys.argv[1]
spec = app.openapi()
with open(output, "w", encoding="utf-8") as f:
json.dump(spec, f, ensure_ascii=False, indent=2)
f.write("\n")
print(f"OpenAPI 已固化到: {output}")
PY
then
exit 0
fi
echo "本机缺少 API 依赖,尝试从已运行 API 导出..."
if curl -fsS "http://127.0.0.1:${API_PORT}/openapi.json" -o "$OUTPUT_PATH"; then
echo "OpenAPI 已固化到: $OUTPUT_PATH"
exit 0
fi
echo "本机 API 端口不可用,尝试通过 Docker 运行 API 镜像离线导出..."
docker compose -f "$PROJECT_DIR/infrastructure/docker-compose.yml" run --rm api \
python -c "import json; from app.main import app; print(json.dumps(app.openapi(), ensure_ascii=False, indent=2))" \
> "$OUTPUT_PATH"
echo "OpenAPI 已固化到: $OUTPUT_PATH"

View File

@@ -0,0 +1,137 @@
#!/usr/bin/env sh
set -eu
API_BASE="${API_BASE:-http://127.0.0.1:8000}"
TOTAL_REQUESTS="${TOTAL_REQUESTS:-400}"
CONCURRENCY="${CONCURRENCY:-40}"
OUTPUT_PATH="${1:-$(CDPATH= cd -- "$(dirname "$0")/../.." && pwd)/docs/CAPACITY_BASELINE.md}"
TMP_DIR="$(mktemp -d)"
trap 'rm -rf "$TMP_DIR"' EXIT
echo "[BASELINE] health check"
curl -fsS "$API_BASE/health" >/dev/null
echo "[BASELINE] ensure bootstrap data"
curl -fsS -X POST "$API_BASE/poc/ingest/bootstrap" >/dev/null
JOB_ID="$(curl -fsS "$API_BASE/poc/jobs" | python3 -c 'import json,sys; data=json.load(sys.stdin); print((data.get("items") or [{}])[0].get("job_id",""))')"
WORKER_ID="$(curl -fsS "$API_BASE/poc/workers" | python3 -c 'import json,sys; data=json.load(sys.stdin); print((data.get("items") or [{}])[0].get("worker_id",""))')"
[ -n "$JOB_ID" ] || { echo "no job id found"; exit 1; }
[ -n "$WORKER_ID" ] || { echo "no worker id found"; exit 1; }
run_case() {
NAME="$1"
METHOD="$2"
URL="$3"
BODY_FILE="$4"
OUT_FILE="$5"
python3 - "$METHOD" "$URL" "$BODY_FILE" "$TOTAL_REQUESTS" "$CONCURRENCY" "$OUT_FILE" <<'PY'
import json
import sys
import time
import urllib.request
from concurrent.futures import ThreadPoolExecutor, as_completed
method, url, body_file, total, concurrency, out_file = sys.argv[1:]
total = int(total)
concurrency = int(concurrency)
payload = None
if body_file != "-":
payload = open(body_file, "rb").read()
durations = []
success = 0
fail = 0
def once():
start = time.perf_counter()
req = urllib.request.Request(url=url, method=method)
req.add_header("Content-Type", "application/json")
try:
if payload is None:
with urllib.request.urlopen(req, timeout=20) as resp:
code = resp.getcode()
else:
with urllib.request.urlopen(req, data=payload, timeout=20) as resp:
code = resp.getcode()
ok = 200 <= code < 400
except Exception:
ok = False
ms = (time.perf_counter() - start) * 1000
return ok, ms
bench_start = time.perf_counter()
with ThreadPoolExecutor(max_workers=concurrency) as ex:
futures = [ex.submit(once) for _ in range(total)]
for f in as_completed(futures):
ok, ms = f.result()
durations.append(ms)
if ok:
success += 1
else:
fail += 1
elapsed = time.perf_counter() - bench_start
durations.sort()
def pct(p):
if not durations:
return 0.0
idx = min(len(durations) - 1, int(len(durations) * p))
return round(durations[idx], 2)
result = {
"total": total,
"success": success,
"fail": fail,
"success_rate": round(success / total, 4) if total else 0.0,
"rps": round(total / elapsed, 2) if elapsed > 0 else 0.0,
"latency_ms_avg": round(sum(durations) / len(durations), 2) if durations else 0.0,
"latency_ms_p95": pct(0.95),
"latency_ms_p99": pct(0.99),
}
with open(out_file, "w", encoding="utf-8") as f:
json.dump(result, f, ensure_ascii=False, indent=2)
PY
echo "[BASELINE] done $NAME"
}
printf '{"job_id":"%s","top_n":10}\n' "$JOB_ID" >"$TMP_DIR/match_workers.json"
printf '{"worker_id":"%s","top_n":10}\n' "$WORKER_ID" >"$TMP_DIR/match_jobs.json"
run_case "health" "GET" "$API_BASE/health" "-" "$TMP_DIR/health.result.json"
run_case "jobs_list" "GET" "$API_BASE/poc/jobs" "-" "$TMP_DIR/jobs.result.json"
run_case "match_workers" "POST" "$API_BASE/poc/match/workers" "$TMP_DIR/match_workers.json" "$TMP_DIR/match_workers.result.json"
run_case "match_jobs" "POST" "$API_BASE/poc/match/jobs" "$TMP_DIR/match_jobs.json" "$TMP_DIR/match_jobs.result.json"
run_case "match_workers_cached" "POST" "$API_BASE/poc/match/workers" "$TMP_DIR/match_workers.json" "$TMP_DIR/match_workers_cached.result.json"
run_case "match_jobs_cached" "POST" "$API_BASE/poc/match/jobs" "$TMP_DIR/match_jobs.json" "$TMP_DIR/match_jobs_cached.result.json"
run_case "match_workers_async_enqueue" "POST" "$API_BASE/poc/match/workers/async" "$TMP_DIR/match_workers.json" "$TMP_DIR/match_workers_async.result.json"
run_case "match_jobs_async_enqueue" "POST" "$API_BASE/poc/match/jobs/async" "$TMP_DIR/match_jobs.json" "$TMP_DIR/match_jobs_async.result.json"
NOW="$(date '+%Y-%m-%d %H:%M:%S %z')"
mkdir -p "$(dirname "$OUTPUT_PATH")"
{
echo "# 容量基线(自动生成)"
echo
echo "- 生成时间: $NOW"
echo "- API_BASE: $API_BASE"
echo "- TOTAL_REQUESTS: $TOTAL_REQUESTS"
echo "- CONCURRENCY: $CONCURRENCY"
echo
echo "| 场景 | 成功率 | RPS | 平均延迟(ms) | P95(ms) | P99(ms) |"
echo "| --- | --- | --- | --- | --- | --- |"
for case in health jobs match_workers match_jobs match_workers_cached match_jobs_cached match_workers_async match_jobs_async; do
FILE="$TMP_DIR/${case}.result.json"
python3 - "$case" "$FILE" <<'PY'
import json
import sys
case, path = sys.argv[1], sys.argv[2]
data = json.loads(open(path, "r", encoding="utf-8").read())
print(f"| {case} | {data['success_rate']} | {data['rps']} | {data['latency_ms_avg']} | {data['latency_ms_p95']} | {data['latency_ms_p99']} |")
PY
done
echo
echo "> 建议:该基线仅代表当前单机/当前数据量下表现,发布前请在目标环境按 2x/5x 峰值复测。"
} >"$OUTPUT_PATH"
echo "[BASELINE] report generated at $OUTPUT_PATH"

View File

@@ -0,0 +1,8 @@
#!/usr/bin/env sh
set -eu
SCRIPT_DIR=$(CDPATH= cd -- "$(dirname "$0")" && pwd)
INFRA_DIR=$(CDPATH= cd -- "$SCRIPT_DIR/.." && pwd)
cd "$INFRA_DIR"
docker compose -f docker-compose.prod.yml down

View File

@@ -3,7 +3,21 @@ set -eu
SCRIPT_DIR=$(CDPATH= cd -- "$(dirname "$0")" && pwd) SCRIPT_DIR=$(CDPATH= cd -- "$(dirname "$0")" && pwd)
INFRA_DIR=$(CDPATH= cd -- "$SCRIPT_DIR/.." && pwd) INFRA_DIR=$(CDPATH= cd -- "$SCRIPT_DIR/.." && pwd)
API_PORT="${API_PORT:-8000}"
BOOTSTRAP_ON_UP="${BOOTSTRAP_ON_UP:-true}"
cd "$INFRA_DIR" cd "$INFRA_DIR"
docker compose -f docker-compose.prod.yml up --build -d docker compose -f docker-compose.prod.yml up --build -d
echo "生产部署容器已启动。请按实际域名或端口访问 Web。" echo "等待生产 API 健康检查..."
until curl -fsS "http://127.0.0.1:${API_PORT}/health" >/dev/null 2>&1; do
sleep 3
done
if [ "$BOOTSTRAP_ON_UP" = "true" ]; then
echo "执行 bootstrap 样本初始化..."
until curl -fsS -X POST "http://127.0.0.1:${API_PORT}/poc/ingest/bootstrap" >/dev/null 2>&1; do
sleep 3
done
fi
echo "生产环境已启动。Web: http://127.0.0.1:${WEB_PORT:-80} API: http://127.0.0.1:${API_PORT}/docs"

2
start.sh Normal file
View File

@@ -0,0 +1,2 @@
cd gig-poc
sh infrastructure/scripts/dev-up.sh