fix:优化项目内容
This commit is contained in:
@@ -3,7 +3,7 @@ import json
|
||||
import os
|
||||
import re
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, Tuple
|
||||
from typing import Any, Dict, Tuple, List
|
||||
|
||||
from openai import AsyncOpenAI
|
||||
from openai import NotFoundError as OpenAINotFoundError
|
||||
@@ -197,6 +197,65 @@ async def extract_invoice_metadata(image_bytes: bytes, mime: str = "image/jpeg")
|
||||
api_key = (config.get("api_key") or "").strip()
|
||||
if not api_key:
|
||||
return (None, None)
|
||||
|
||||
|
||||
async def extract_finance_tags(
|
||||
content_text: str,
|
||||
doc_type: str,
|
||||
filename: str = "",
|
||||
) -> Tuple[List[str], Dict[str, Any]]:
|
||||
"""
|
||||
从附件文本内容中抽取标签与结构化信息(JSON)。
|
||||
返回 (tags, meta)。
|
||||
"""
|
||||
config = _load_ai_config()
|
||||
client = _client_from_config(config)
|
||||
model = config.get("model_name") or "gpt-4o-mini"
|
||||
temperature = float(config.get("temperature", 0.2))
|
||||
|
||||
prompt = (
|
||||
"你是一名财务助理。请根据附件的文本内容,为它生成可检索的标签,并抽取关键字段。\n"
|
||||
"只返回 JSON,不要任何解释文字。\n"
|
||||
"输入信息:\n"
|
||||
f"- 类型 doc_type: {doc_type}\n"
|
||||
f"- 文件名 filename: {filename}\n"
|
||||
"- 附件文本 content_text: (见下)\n\n"
|
||||
"返回 JSON 格式:\n"
|
||||
"{\n"
|
||||
' "tags": ["标签1","标签2"],\n'
|
||||
' "meta": {\n'
|
||||
' "counterparty": "对方单位/收款方/付款方(如能识别)或 null",\n'
|
||||
' "account": "账户/卡号后四位(如能识别)或 null",\n'
|
||||
' "amount": "金额数字字符串或 null",\n'
|
||||
' "date": "YYYY-MM-DD 或 null",\n'
|
||||
' "summary": "一句话摘要"\n'
|
||||
" }\n"
|
||||
"}\n\n"
|
||||
"content_text:\n"
|
||||
f"{content_text[:12000]}\n"
|
||||
)
|
||||
|
||||
completion = await client.chat.completions.create(
|
||||
model=model,
|
||||
response_format={"type": "json_object"},
|
||||
messages=[{"role": "user", "content": prompt}],
|
||||
temperature=temperature,
|
||||
max_tokens=500,
|
||||
)
|
||||
content = completion.choices[0].message.content or "{}"
|
||||
try:
|
||||
data: Any = json.loads(content)
|
||||
except Exception:
|
||||
return ([], {"summary": "", "raw": content})
|
||||
|
||||
tags = data.get("tags") if isinstance(data, dict) else None
|
||||
meta = data.get("meta") if isinstance(data, dict) else None
|
||||
if not isinstance(tags, list):
|
||||
tags = []
|
||||
tags = [str(t).strip() for t in tags if str(t).strip()][:12]
|
||||
if not isinstance(meta, dict):
|
||||
meta = {}
|
||||
return (tags, meta)
|
||||
try:
|
||||
client = _client_from_config(config)
|
||||
model = config.get("model_name") or "gpt-4o-mini"
|
||||
|
||||
Reference in New Issue
Block a user