feat: 初始化零工后端代码

This commit is contained in:
Daniel
2026-04-01 14:19:25 +08:00
parent c6fabe262c
commit 84f8be7c0e
41 changed files with 2813 additions and 147 deletions

View File

@@ -26,13 +26,9 @@ class ExtractionService:
def extract_job(self, text: str) -> ExtractResponse:
logger.info("extract_job request text=%s", text)
llm_result = self._llm_extract(text, self.settings.prompt_dir / "job_extract.md")
if llm_result:
try:
return ExtractResponse(success=True, data=JobCard(**llm_result.content))
except ValidationError as exc:
logger.exception("LLM job extraction validation failed")
return ExtractResponse(success=False, errors=[str(exc)], missing_fields=self._missing_fields(exc))
llm_card = self._llm_extract_with_retry(text, self.settings.prompt_dir / "job_extract.md", JobCard)
if llm_card:
return ExtractResponse(success=True, data=llm_card)
try:
card = self._extract_job_rule(text)
@@ -43,13 +39,9 @@ class ExtractionService:
def extract_worker(self, text: str) -> ExtractResponse:
logger.info("extract_worker request text=%s", text)
llm_result = self._llm_extract(text, self.settings.prompt_dir / "worker_extract.md")
if llm_result:
try:
return ExtractResponse(success=True, data=WorkerCard(**llm_result.content))
except ValidationError as exc:
logger.exception("LLM worker extraction validation failed")
return ExtractResponse(success=False, errors=[str(exc)], missing_fields=self._missing_fields(exc))
llm_card = self._llm_extract_with_retry(text, self.settings.prompt_dir / "worker_extract.md", WorkerCard)
if llm_card:
return ExtractResponse(success=True, data=llm_card)
try:
card = self._extract_worker_rule(text)
@@ -65,6 +57,57 @@ class ExtractionService:
logger.exception("LLM extraction failed, fallback to rule-based extraction")
return None
def _llm_extract_with_retry(self, text: str, prompt_path: Path, schema_cls):
base_prompt = load_prompt(prompt_path)
llm_result = self._llm_extract(text, prompt_path)
if not llm_result:
return None
try:
return schema_cls(**llm_result.content)
except ValidationError as exc:
logger.warning("LLM extraction validation failed, trying schema-aware retry")
last_error = exc
last_output = llm_result.content
for _ in range(self.settings.extraction_llm_max_retries):
missing_fields = self._missing_fields(last_error)
repair_prompt = self._build_repair_prompt(base_prompt, schema_cls, missing_fields)
try:
repair_result = self.llm_client.extract_json(
repair_prompt,
self._build_repair_input(text, last_output, missing_fields),
)
except Exception:
logger.exception("LLM schema-aware retry failed")
return None
if not repair_result:
return None
last_output = repair_result.content
try:
return schema_cls(**repair_result.content)
except ValidationError as exc:
last_error = exc
logger.warning("LLM schema-aware retry still invalid missing_fields=%s", self._missing_fields(exc))
return None
def _build_repair_prompt(self, base_prompt: str, schema_cls, missing_fields: list[str]) -> str:
schema_json = json.dumps(schema_cls.model_json_schema(), ensure_ascii=False)
return (
f"{base_prompt}\n\n"
"你是结构化修复助手。请严格输出可被 JSON 解析的对象,不要输出解释文字。\n"
"目标是根据给定 schema 修复字段缺失和类型错误,优先保证必填字段完整。\n"
f"缺失或错误字段: {', '.join(missing_fields) if missing_fields else 'unknown'}\n"
f"JSON Schema: {schema_json}\n"
)
def _build_repair_input(self, original_text: str, last_output: dict, missing_fields: list[str]) -> str:
return (
f"原始文本:\n{original_text}\n\n"
f"上一次抽取结果:\n{json.dumps(last_output, ensure_ascii=False)}\n\n"
f"请重点修复字段:\n{json.dumps(missing_fields, ensure_ascii=False)}"
)
def _extract_job_rule(self, text: str) -> JobCard:
skill_hits = [item for item in self.skills if item in text]
category = next((item for item in self.categories if item in text), "活动执行")