feat: 初始化零工后端代码
This commit is contained in:
@@ -26,13 +26,9 @@ class ExtractionService:
|
||||
|
||||
def extract_job(self, text: str) -> ExtractResponse:
|
||||
logger.info("extract_job request text=%s", text)
|
||||
llm_result = self._llm_extract(text, self.settings.prompt_dir / "job_extract.md")
|
||||
if llm_result:
|
||||
try:
|
||||
return ExtractResponse(success=True, data=JobCard(**llm_result.content))
|
||||
except ValidationError as exc:
|
||||
logger.exception("LLM job extraction validation failed")
|
||||
return ExtractResponse(success=False, errors=[str(exc)], missing_fields=self._missing_fields(exc))
|
||||
llm_card = self._llm_extract_with_retry(text, self.settings.prompt_dir / "job_extract.md", JobCard)
|
||||
if llm_card:
|
||||
return ExtractResponse(success=True, data=llm_card)
|
||||
|
||||
try:
|
||||
card = self._extract_job_rule(text)
|
||||
@@ -43,13 +39,9 @@ class ExtractionService:
|
||||
|
||||
def extract_worker(self, text: str) -> ExtractResponse:
|
||||
logger.info("extract_worker request text=%s", text)
|
||||
llm_result = self._llm_extract(text, self.settings.prompt_dir / "worker_extract.md")
|
||||
if llm_result:
|
||||
try:
|
||||
return ExtractResponse(success=True, data=WorkerCard(**llm_result.content))
|
||||
except ValidationError as exc:
|
||||
logger.exception("LLM worker extraction validation failed")
|
||||
return ExtractResponse(success=False, errors=[str(exc)], missing_fields=self._missing_fields(exc))
|
||||
llm_card = self._llm_extract_with_retry(text, self.settings.prompt_dir / "worker_extract.md", WorkerCard)
|
||||
if llm_card:
|
||||
return ExtractResponse(success=True, data=llm_card)
|
||||
|
||||
try:
|
||||
card = self._extract_worker_rule(text)
|
||||
@@ -65,6 +57,57 @@ class ExtractionService:
|
||||
logger.exception("LLM extraction failed, fallback to rule-based extraction")
|
||||
return None
|
||||
|
||||
def _llm_extract_with_retry(self, text: str, prompt_path: Path, schema_cls):
|
||||
base_prompt = load_prompt(prompt_path)
|
||||
llm_result = self._llm_extract(text, prompt_path)
|
||||
if not llm_result:
|
||||
return None
|
||||
|
||||
try:
|
||||
return schema_cls(**llm_result.content)
|
||||
except ValidationError as exc:
|
||||
logger.warning("LLM extraction validation failed, trying schema-aware retry")
|
||||
last_error = exc
|
||||
last_output = llm_result.content
|
||||
|
||||
for _ in range(self.settings.extraction_llm_max_retries):
|
||||
missing_fields = self._missing_fields(last_error)
|
||||
repair_prompt = self._build_repair_prompt(base_prompt, schema_cls, missing_fields)
|
||||
try:
|
||||
repair_result = self.llm_client.extract_json(
|
||||
repair_prompt,
|
||||
self._build_repair_input(text, last_output, missing_fields),
|
||||
)
|
||||
except Exception:
|
||||
logger.exception("LLM schema-aware retry failed")
|
||||
return None
|
||||
if not repair_result:
|
||||
return None
|
||||
last_output = repair_result.content
|
||||
try:
|
||||
return schema_cls(**repair_result.content)
|
||||
except ValidationError as exc:
|
||||
last_error = exc
|
||||
logger.warning("LLM schema-aware retry still invalid missing_fields=%s", self._missing_fields(exc))
|
||||
return None
|
||||
|
||||
def _build_repair_prompt(self, base_prompt: str, schema_cls, missing_fields: list[str]) -> str:
|
||||
schema_json = json.dumps(schema_cls.model_json_schema(), ensure_ascii=False)
|
||||
return (
|
||||
f"{base_prompt}\n\n"
|
||||
"你是结构化修复助手。请严格输出可被 JSON 解析的对象,不要输出解释文字。\n"
|
||||
"目标是根据给定 schema 修复字段缺失和类型错误,优先保证必填字段完整。\n"
|
||||
f"缺失或错误字段: {', '.join(missing_fields) if missing_fields else 'unknown'}\n"
|
||||
f"JSON Schema: {schema_json}\n"
|
||||
)
|
||||
|
||||
def _build_repair_input(self, original_text: str, last_output: dict, missing_fields: list[str]) -> str:
|
||||
return (
|
||||
f"原始文本:\n{original_text}\n\n"
|
||||
f"上一次抽取结果:\n{json.dumps(last_output, ensure_ascii=False)}\n\n"
|
||||
f"请重点修复字段:\n{json.dumps(missing_fields, ensure_ascii=False)}"
|
||||
)
|
||||
|
||||
def _extract_job_rule(self, text: str) -> JobCard:
|
||||
skill_hits = [item for item in self.skills if item in text]
|
||||
category = next((item for item in self.categories if item in text), "活动执行")
|
||||
|
||||
Reference in New Issue
Block a user