fix: 优化后端数据更新机制
This commit is contained in:
@@ -15,31 +15,40 @@ CLEANER_AI_DISABLED = os.environ.get("CLEANER_AI_DISABLED", "0") == "1"
|
||||
OLLAMA_MODEL = os.environ.get("OLLAMA_MODEL", "llama3.1")
|
||||
|
||||
|
||||
def _call_ollama_extract(text: str, timeout: int = 10) -> Optional[Dict[str, Any]]:
|
||||
"""调用 Ollama 提取结构化数据。输出 JSON,仅包含新闻中可明确推断的字段"""
|
||||
# 用于 AI 提取的原文最大长度(有正文时取更长以提取精确数据)
|
||||
EXTRACT_TEXT_MAX_LEN = int(os.environ.get("EXTRACT_TEXT_MAX_LEN", "4000"))
|
||||
|
||||
|
||||
def _call_ollama_extract(text: str, timeout: int = 15) -> Optional[Dict[str, Any]]:
|
||||
"""调用 Ollama 从新闻全文/摘要中提取精确结构化数据,仅填写报道中明确给出的数字与事实。"""
|
||||
if CLEANER_AI_DISABLED or not text or len(str(text).strip()) < 10:
|
||||
return None
|
||||
try:
|
||||
import requests
|
||||
prompt = f"""从以下美伊/中东新闻中提取可推断的数值,输出 JSON,仅包含有明确依据的字段。无依据则省略该字段。
|
||||
raw = str(text).strip()[:EXTRACT_TEXT_MAX_LEN]
|
||||
prompt = f"""从以下美伊/中东新闻**全文或摘要**中,提取**报道明确给出的数字与事实**,输出 JSON。规则:
|
||||
1. 仅填写报道中**直接出现、可核对**的数据,不要推测或估算。
|
||||
2. 无明确依据的字段**必须省略**,不要填 0 或猜。
|
||||
3. **战损一律按增量**:只填本则报道中「本次/此次/今日/本轮」**新增**的伤亡或损毁数量。若报道只给「累计总数」「迄今共」「total so far」等,**不要填写**该字段(避免与库内已有累计值重复叠加)。
|
||||
4. **攻击地点**:提取双方遭袭的具体地点。美军/盟军基地被打击 → side=us;伊朗/亲伊设施被打击 → side=iran。name_keywords 用「中文名|英文名」便于匹配,可填多处。
|
||||
|
||||
要求:
|
||||
- summary: 1-2句中文事实,≤80字
|
||||
字段说明:
|
||||
- summary: 1-2 句中文事实概括,≤80 字
|
||||
- category: deployment|alert|intel|diplomatic|other
|
||||
- severity: low|medium|high|critical
|
||||
- 战损(仅当新闻明确提及数字时填写,格式 us_XXX / iran_XXX):
|
||||
- 战损(**仅填本则报道的新增增量**,如「此次 5 人丧生」「今日又损 2 架」):
|
||||
us_personnel_killed, iran_personnel_killed, us_personnel_wounded, iran_personnel_wounded,
|
||||
us_civilian_killed, iran_civilian_killed, us_civilian_wounded, iran_civilian_wounded,
|
||||
us_bases_destroyed, iran_bases_destroyed, us_bases_damaged, iran_bases_damaged.
|
||||
重要:bases_* 仅指已确认损毁/受损的基地数量;"军事目标"/targets 等泛指不是基地,若报道只说"X个军事目标遭袭"而无具体基地名,不填写 bases_*
|
||||
us_bases_destroyed, iran_bases_destroyed, us_bases_damaged, iran_bases_damaged,
|
||||
us_aircraft, iran_aircraft, us_warships, iran_warships, us_armor, iran_armor, us_vehicles, iran_vehicles,
|
||||
us_drones, iran_drones, us_missiles, iran_missiles, us_helicopters, iran_helicopters, us_submarines, iran_submarines,
|
||||
us_tanks, iran_tanks, us_civilian_ships, iran_civilian_ships, us_airport_port, iran_airport_port
|
||||
- retaliation_sentiment: 0-100,仅当新闻涉及伊朗报复情绪时
|
||||
- wall_street_value: 0-100,仅当新闻涉及美股/市场反应时
|
||||
- key_location_updates: 当新闻提及具体基地/地点遭袭时,数组项 { "name_keywords": "asad|阿萨德|assad", "side": "us", "status": "attacked", "damage_level": 1-3 }
|
||||
us_carriers, iran_carriers, us_civilian_ships, iran_civilian_ships, us_airport_port, iran_airport_port
|
||||
- retaliation_sentiment: 0-100,仅当报道涉及伊朗报复/反击情绪时
|
||||
- wall_street_value: 0-100,仅当报道涉及美股/市场时
|
||||
- key_location_updates: **双方攻击地点**。每项 {{ "name_keywords": "阿萨德|asad|al-asad", "side": "us或iran(被打击方)", "status": "attacked", "damage_level": 1-3 }}。美军基地例:阿萨德|asad、乌代德|udeid、埃尔比勒|erbil、因吉尔利克|incirlik。伊朗例:德黑兰|tehran、布什尔|bushehr、伊斯法罕|isfahan、阿巴斯|abbas、纳坦兹|natanz
|
||||
|
||||
原文:{str(text)[:800]}
|
||||
原文:
|
||||
{raw}
|
||||
|
||||
直接输出 JSON,不要解释:"""
|
||||
r = requests.post(
|
||||
@@ -48,7 +57,7 @@ def _call_ollama_extract(text: str, timeout: int = 10) -> Optional[Dict[str, Any
|
||||
"model": OLLAMA_MODEL,
|
||||
"messages": [{"role": "user", "content": prompt}],
|
||||
"stream": False,
|
||||
"options": {"num_predict": 256},
|
||||
"options": {"num_predict": 384},
|
||||
},
|
||||
timeout=timeout,
|
||||
)
|
||||
@@ -82,7 +91,7 @@ def extract_from_news(text: str, timestamp: Optional[str] = None) -> Dict[str, A
|
||||
# combat_losses 增量(仅数字字段)
|
||||
loss_us = {}
|
||||
loss_ir = {}
|
||||
for k in ["personnel_killed", "personnel_wounded", "civilian_killed", "civilian_wounded", "bases_destroyed", "bases_damaged", "aircraft", "warships", "armor", "vehicles", "drones", "missiles", "helicopters", "submarines", "tanks", "civilian_ships", "airport_port"]:
|
||||
for k in ["personnel_killed", "personnel_wounded", "civilian_killed", "civilian_wounded", "bases_destroyed", "bases_damaged", "aircraft", "warships", "armor", "vehicles", "drones", "missiles", "helicopters", "submarines", "carriers", "civilian_ships", "airport_port"]:
|
||||
uk = f"us_{k}"
|
||||
ik = f"iran_{k}"
|
||||
if uk in parsed and isinstance(parsed[uk], (int, float)):
|
||||
|
||||
Reference in New Issue
Block a user