fix:优化工作面板,支持动态字数

This commit is contained in:
Daniel
2026-04-21 13:14:53 +08:00
parent e69666dbb3
commit d76c7fa25a
6 changed files with 159 additions and 26 deletions

View File

@@ -292,7 +292,7 @@ async def rewrite(req: RewriteRequest, request: Request):
src = req.source_text or "" src = req.source_text or ""
logger.info( logger.info(
"api_rewrite_in rid=%s source_chars=%d title_hint_chars=%d tone=%s audience=%s " "api_rewrite_in rid=%s source_chars=%d title_hint_chars=%d tone=%s audience=%s "
"keep_points_chars=%d avoid_words_chars=%d", "keep_points_chars=%d avoid_words_chars=%d target_body_chars=%d",
rid, rid,
len(src), len(src),
len(req.title_hint or ""), len(req.title_hint or ""),
@@ -300,6 +300,7 @@ async def rewrite(req: RewriteRequest, request: Request):
req.audience, req.audience,
len(req.keep_points or ""), len(req.keep_points or ""),
len(req.avoid_words or ""), len(req.avoid_words or ""),
int(req.target_body_chars or 500),
) )
result = rewriter.rewrite(req, request_id=rid) result = rewriter.rewrite(req, request_id=rid)
tr = result.trace or {} tr = result.trace or {}

View File

@@ -11,6 +11,12 @@ class RewriteRequest(BaseModel):
audience: str = "公众号读者" audience: str = "公众号读者"
keep_points: str = "" keep_points: str = ""
avoid_words: str = "" avoid_words: str = ""
target_body_chars: int = Field(
default=500,
ge=180,
le=2200,
description="目标改写正文字数,前端可调(中文字符数近似)",
)
class RewriteResponse(BaseModel): class RewriteResponse(BaseModel):

View File

@@ -34,9 +34,9 @@ def _is_likely_timeout_error(exc: BaseException) -> bool:
return "timed out" in s or "timeout" in s return "timed out" in s or "timeout" in s
# 短文洗稿:正文目标约 500 字,优先完整性(软约束,不硬截断) # 短文洗稿:默认目标约 500 字,支持前端按需配置
MIN_BODY_CHARS = 80 MIN_BODY_CHARS = 80
TARGET_BODY_CHARS = 500 DEFAULT_TARGET_BODY_CHARS = 500
def _preview_for_log(text: str, limit: int = 400) -> str: def _preview_for_log(text: str, limit: int = 400) -> str:
@@ -46,7 +46,7 @@ def _preview_for_log(text: str, limit: int = 400) -> str:
return t[: limit - 1] + "" return t[: limit - 1] + ""
SYSTEM_PROMPT = """ SYSTEM_PROMPT_TEMPLATE = """
你是资深中文科普类公众号编辑,擅长把长文、线程贴改写成**极短、好读**的推送。 你是资深中文科普类公众号编辑,擅长把长文、线程贴改写成**极短、好读**的推送。
目标:在**不偏离原意**的前提下,用最少字数讲清一件事;不要写成技术方案、长文大纲或带很多小标题的文章。 目标:在**不偏离原意**的前提下,用最少字数讲清一件事;不要写成技术方案、长文大纲或带很多小标题的文章。
@@ -54,14 +54,14 @@ SYSTEM_PROMPT = """
1) **忠实原意**:只概括、转述原文已有信息,不编造事实,不偷换主题; 1) **忠实原意**:只概括、转述原文已有信息,不编造事实,不偷换主题;
2) 语气通俗、干脆,避免套话堆砌; 2) 语气通俗、干脆,避免套话堆砌;
3) 只输出合法 JSONtitle, summary, body_markdown 3) 只输出合法 JSONtitle, summary, body_markdown
4) **body_markdown 约束**:按内容密度使用 **4~6 个自然段**;段与段之间用一个空行分隔;**不要**使用 # / ## 标题符号;正文以 **约 500 字**为目标,优先完整表达并避免冗长重复; 4) **body_markdown 约束**:按内容密度使用 **4~6 个自然段**;段与段之间用一个空行分隔;**不要**使用 # / ## 标题符号;正文以 **约 {target_chars} 字**为目标,优先完整表达并避免冗长重复;
5) title、summary 也要短:标题约 818 字;摘要约 4080 字; 5) title、summary 也要短:标题约 818 字;摘要约 4080 字;
6) 关键观点需要加粗:请用 Markdown `**加粗**` 标出 2~4 个重点短语; 6) 关键观点需要加粗:请用 Markdown `**加粗**` 标出 2~4 个重点短语;
7) JSON 字符串内引号请用「」或『』,勿用未转义的英文 " 7) JSON 字符串内引号请用「」或『』,勿用未转义的英文 "
""".strip() """.strip()
REWRITE_SCHEMA_HINT = """ REWRITE_SCHEMA_HINT_TEMPLATE = """
请输出 JSON勿包在 ``` 里),例如: 请输出 JSON勿包在 ``` 里),例如:
{ {
"title": "短标题,点明主题", "title": "短标题,点明主题",
@@ -72,22 +72,38 @@ REWRITE_SCHEMA_HINT = """
body_markdown 写法: body_markdown 写法:
- 使用 **4~6 段**:每段若干完整句子,段之间 **\\n\\n**(空一行); - 使用 **4~6 段**:每段若干完整句子,段之间 **\\n\\n**(空一行);
- **禁止** markdown 标题(不要用 # - **禁止** markdown 标题(不要用 #
- 正文目标约 **500 字**(可上下浮动),以信息完整为先,避免冗长和重复; - 正文目标约 **{target_chars} 字**(可上下浮动),以信息完整为先,避免冗长和重复;
- 请用 `**...**` 加粗 2~4 个关键观点词; - 请用 `**...**` 加粗 2~4 个关键观点词;
- 内容顺序建议:首段交代在说什么;中间段展开关键信息;末段收束或提醒(均须紧扣原文,勿乱发挥)。 - 内容顺序建议:首段交代在说什么;中间段展开关键信息;末段收束或提醒(均须紧扣原文,勿乱发挥)。
""".strip() """.strip()
# 通义等模型若首次过短/结构不对,再要一次 # 通义等模型若首次过短/结构不对,再要一次
_JSON_BODY_TOO_SHORT_RETRY = """ _JSON_BODY_TOO_SHORT_RETRY_TEMPLATE = """
【系统复检】上一次 body_markdown 不符合要求。请重输出**完整** JSON 【系统复检】上一次 body_markdown 不符合要求。请重输出**完整** JSON
- 正文必须使用 **4~6 个自然段**(仅 \\n\\n 分段),无 # 标题;篇幅尽量收敛到约 500 字,同时保持信息完整; - 正文必须使用 **4~6 个自然段**(仅 \\n\\n 分段),无 # 标题;篇幅尽量收敛到约 {target_chars} 字,同时保持信息完整;
- 忠实原稿、简短高效; - 忠实原稿、简短高效;
- 引号只用「」『』; - 引号只用「」『』;
- 只输出 JSON。 - 只输出 JSON。
""".strip() """.strip()
def _target_chars(req: RewriteRequest) -> int:
return max(180, min(2200, int(req.target_body_chars or DEFAULT_TARGET_BODY_CHARS)))
def _system_prompt(target_chars: int) -> str:
return SYSTEM_PROMPT_TEMPLATE.format(target_chars=target_chars)
def _rewrite_schema_hint(target_chars: int) -> str:
return REWRITE_SCHEMA_HINT_TEMPLATE.format(target_chars=target_chars)
def _retry_hint(target_chars: int) -> str:
return _JSON_BODY_TOO_SHORT_RETRY_TEMPLATE.format(target_chars=target_chars)
class AIRewriter: class AIRewriter:
def __init__(self) -> None: def __init__(self) -> None:
self._client = None self._client = None
@@ -121,6 +137,7 @@ class AIRewriter:
"provider": "dashscope" if self._prefer_chat_first else "openai_compatible", "provider": "dashscope" if self._prefer_chat_first else "openai_compatible",
"source_chars_in": len(req.source_text or ""), "source_chars_in": len(req.source_text or ""),
"cleaned_chars": len(cleaned_source), "cleaned_chars": len(cleaned_source),
"target_body_chars": _target_chars(req),
"openai_timeout_env_sec": settings.openai_timeout, "openai_timeout_env_sec": settings.openai_timeout,
"steps": [], "steps": [],
} }
@@ -289,8 +306,15 @@ class AIRewriter:
def _model_rewrite( def _model_rewrite(
self, req: RewriteRequest, cleaned_source: str, timeout_sec: float, request_id: str = "" self, req: RewriteRequest, cleaned_source: str, timeout_sec: float, request_id: str = ""
) -> dict | None: ) -> dict | None:
user_prompt = self._build_user_prompt(req, cleaned_source) target_chars = _target_chars(req)
return self._call_model_json(user_prompt, timeout_sec=timeout_sec, request_id=request_id) user_prompt = self._build_user_prompt(req, cleaned_source, target_chars)
return self._call_model_json(
user_prompt,
system_prompt=_system_prompt(target_chars),
target_chars=target_chars,
timeout_sec=timeout_sec,
request_id=request_id,
)
def _model_polish( def _model_polish(
self, self,
@@ -301,6 +325,7 @@ class AIRewriter:
timeout_sec: float, timeout_sec: float,
request_id: str = "", request_id: str = "",
) -> dict | None: ) -> dict | None:
target_chars = _target_chars(req)
issue_text = "\n".join([f"- {i}" for i in issues]) issue_text = "\n".join([f"- {i}" for i in issues])
user_prompt = f""" user_prompt = f"""
你上一次的改写稿未通过质检,请针对下列问题重写;体裁仍为**科普介绍类公众号****忠实原稿**,不要写成技术方案或内部汇报。 你上一次的改写稿未通过质检,请针对下列问题重写;体裁仍为**科普介绍类公众号****忠实原稿**,不要写成技术方案或内部汇报。
@@ -323,11 +348,17 @@ class AIRewriter:
- 必须保留观点:{req.keep_points or ''} - 必须保留观点:{req.keep_points or ''}
- 避免词汇:{req.avoid_words or ''} - 避免词汇:{req.avoid_words or ''}
请输出一版全新稿件。{REWRITE_SCHEMA_HINT} 请输出一版全新稿件。{_rewrite_schema_hint(target_chars)}
""".strip() """.strip()
return self._call_model_json(user_prompt, timeout_sec=timeout_sec, request_id=request_id) return self._call_model_json(
user_prompt,
system_prompt=_system_prompt(target_chars),
target_chars=target_chars,
timeout_sec=timeout_sec,
request_id=request_id,
)
def _build_user_prompt(self, req: RewriteRequest, cleaned_source: str) -> str: def _build_user_prompt(self, req: RewriteRequest, cleaned_source: str, target_chars: int) -> str:
return f""" return f"""
原始内容(已清洗): 原始内容(已清洗):
{cleaned_source} {cleaned_source}
@@ -340,7 +371,7 @@ class AIRewriter:
- 必须保留观点:{req.keep_points or ''} - 必须保留观点:{req.keep_points or ''}
- 避免词汇:{req.avoid_words or ''} - 避免词汇:{req.avoid_words or ''}
任务:在**不偏离原帖主题与事实**的前提下,改写成科普介绍风格的公众号正文(好读、讲清楚,而非技术实施方案)。{REWRITE_SCHEMA_HINT} 任务:在**不偏离原帖主题与事实**的前提下,改写成科普介绍风格的公众号正文(好读、讲清楚,而非技术实施方案)。{_rewrite_schema_hint(target_chars)}
""".strip() """.strip()
def _fallback_rewrite( def _fallback_rewrite(
@@ -554,14 +585,21 @@ class AIRewriter:
escaped = False escaped = False
return "".join(out) return "".join(out)
def _chat_completions_json(self, user_prompt: str, timeout_sec: float, request_id: str) -> dict | None: def _chat_completions_json(
self,
user_prompt: str,
system_prompt: str,
target_chars: int,
timeout_sec: float,
request_id: str,
) -> dict | None:
"""chat.completions通义兼容层在 json_object 下易产出极短 JSON故 DashScope 不传 response_format并支持短文自动重试。""" """chat.completions通义兼容层在 json_object 下易产出极短 JSON故 DashScope 不传 response_format并支持短文自动重试。"""
max_attempts = 2 if self._prefer_chat_first else 1 max_attempts = 2 if self._prefer_chat_first else 1
deadline = time.monotonic() + max(0.0, timeout_sec) deadline = time.monotonic() + max(0.0, timeout_sec)
pe = user_prompt pe = user_prompt
for attempt in range(max_attempts): for attempt in range(max_attempts):
if attempt == 1: if attempt == 1:
pe = user_prompt + _JSON_BODY_TOO_SHORT_RETRY pe = user_prompt + _retry_hint(target_chars)
remaining = deadline - time.monotonic() remaining = deadline - time.monotonic()
if remaining <= 0: if remaining <= 0:
logger.warning( logger.warning(
@@ -585,7 +623,7 @@ class AIRewriter:
create_kwargs: dict[str, Any] = { create_kwargs: dict[str, Any] = {
"model": settings.openai_model, "model": settings.openai_model,
"messages": [ "messages": [
{"role": "system", "content": SYSTEM_PROMPT}, {"role": "system", "content": system_prompt},
{"role": "user", "content": pe}, {"role": "user", "content": pe},
], ],
"max_tokens": settings.openai_max_output_tokens, "max_tokens": settings.openai_max_output_tokens,
@@ -684,7 +722,14 @@ class AIRewriter:
return parsed return parsed
return None return None
def _call_model_json(self, user_prompt: str, timeout_sec: float, request_id: str = "") -> dict | None: def _call_model_json(
self,
user_prompt: str,
system_prompt: str,
target_chars: int,
timeout_sec: float,
request_id: str = "",
) -> dict | None:
methods = ["chat", "responses"] if self._prefer_chat_first else ["responses", "chat"] methods = ["chat", "responses"] if self._prefer_chat_first else ["responses", "chat"]
logger.info( logger.info(
"model_call_begin rid=%s model=%s timeout_s=%.1f prefer_chat_first=%s prompt_chars=%d " "model_call_begin rid=%s model=%s timeout_s=%.1f prefer_chat_first=%s prompt_chars=%d "
@@ -704,7 +749,7 @@ class AIRewriter:
completion = self._client.responses.create( completion = self._client.responses.create(
model=settings.openai_model, model=settings.openai_model,
input=[ input=[
{"role": "system", "content": SYSTEM_PROMPT}, {"role": "system", "content": system_prompt},
{"role": "user", "content": user_prompt}, {"role": "user", "content": user_prompt},
], ],
text={"format": {"type": "json_object"}}, text={"format": {"type": "json_object"}},
@@ -734,7 +779,13 @@ class AIRewriter:
if method == "chat": if method == "chat":
try: try:
t_chat = time.monotonic() t_chat = time.monotonic()
out = self._chat_completions_json(user_prompt, timeout_sec, request_id) out = self._chat_completions_json(
user_prompt,
system_prompt=system_prompt,
target_chars=target_chars,
timeout_sec=timeout_sec,
request_id=request_id,
)
if out is not None: if out is not None:
return out return out
if self._prefer_chat_first: if self._prefer_chat_first:
@@ -814,11 +865,14 @@ class AIRewriter:
elif pc > max_p: elif pc > max_p:
issues.append(f"正文段落偏多(当前 {pc} 段),建议控制在 {min_p}-{max_p}") issues.append(f"正文段落偏多(当前 {pc} 段),建议控制在 {min_p}-{max_p}")
if len(body) < MIN_BODY_CHARS: target_chars = _target_chars(req)
issues.append(f"正文过短(当前阈值 ≥{MIN_BODY_CHARS} 字)") min_body_chars = max(MIN_BODY_CHARS, int(target_chars * 0.45))
elif len(body) > 900: max_body_chars = int(target_chars * 1.7)
if len(body) < min_body_chars:
issues.append(f"正文过短(当前阈值 ≥{min_body_chars} 字)")
elif len(body) > max_body_chars:
issues.append( issues.append(
f"正文偏长(当前 {len(body)} 字),建议收敛到约 {TARGET_BODY_CHARS} 字(可上下浮动)" f"正文偏长(当前 {len(body)} 字),建议收敛到约 {target_chars} 字(可上下浮动)"
) )
if re.search(r"(?m)^#+\s", body): if re.search(r"(?m)^#+\s", body):

View File

@@ -17,6 +17,15 @@ const wechatBtn = $("wechatBtn");
const imBtn = $("imBtn"); const imBtn = $("imBtn");
const coverUploadBtn = $("coverUploadBtn"); const coverUploadBtn = $("coverUploadBtn");
const logoutBtn = $("logoutBtn"); const logoutBtn = $("logoutBtn");
const targetBodyCharsInput = $("targetBodyChars");
function syncTargetCharChips() {
const val = Number((targetBodyCharsInput && targetBodyCharsInput.value) || 0);
document.querySelectorAll(".target-char-chip").forEach((btn) => {
const n = Number(btn.getAttribute("data-target-chars") || 0);
btn.classList.toggle("is-active", Number.isFinite(val) && val === n);
});
}
function countText(v) { function countText(v) {
return (v || "").trim().length; return (v || "").trim().length;
@@ -183,12 +192,30 @@ if (logoutBtn) {
}); });
} }
document.querySelectorAll(".target-char-chip").forEach((btn) => {
btn.addEventListener("click", () => {
const n = Number(btn.getAttribute("data-target-chars") || 0);
if (!targetBodyCharsInput || !Number.isFinite(n) || n < 1) return;
targetBodyCharsInput.value = String(n);
syncTargetCharChips();
});
});
if (targetBodyCharsInput) {
targetBodyCharsInput.addEventListener("input", syncTargetCharChips);
}
$("rewriteBtn").addEventListener("click", async () => { $("rewriteBtn").addEventListener("click", async () => {
const sourceText = $("sourceText").value.trim(); const sourceText = $("sourceText").value.trim();
const targetBodyChars = Number(($("targetBodyChars") && $("targetBodyChars").value) || 500);
if (sourceText.length < 20) { if (sourceText.length < 20) {
setStatus("原始内容太短,至少 20 个字符", true); setStatus("原始内容太短,至少 20 个字符", true);
return; return;
} }
if (!Number.isFinite(targetBodyChars) || targetBodyChars < 180 || targetBodyChars > 2200) {
setStatus("改写目标字数需在 180~2200 之间", true);
return;
}
setStatus("正在改写..."); setStatus("正在改写...");
setLoading(rewriteBtn, true, "改写并排版", "改写中..."); setLoading(rewriteBtn, true, "改写并排版", "改写中...");
@@ -202,6 +229,7 @@ $("rewriteBtn").addEventListener("click", async () => {
audience, audience,
keep_points: $("keepPoints").value, keep_points: $("keepPoints").value,
avoid_words: $("avoidWords").value, avoid_words: $("avoidWords").value,
target_body_chars: Math.round(targetBodyChars),
}); });
$("title").value = data.title || ""; $("title").value = data.title || "";
$("summary").value = data.summary || ""; $("summary").value = data.summary || "";
@@ -307,3 +335,4 @@ $("imBtn").addEventListener("click", async () => {
updateCounters(); updateCounters();
initMultiDropdowns(); initMultiDropdowns();
initWechatAccountSwitch(); initWechatAccountSwitch();
syncTargetCharChips();

View File

@@ -327,6 +327,36 @@ label {
margin-top: 4px; margin-top: 4px;
} }
.target-chars-quick {
display: flex;
gap: 8px;
margin-top: 8px;
flex-wrap: wrap;
}
.target-char-chip {
width: auto;
margin-top: 0;
padding: 4px 10px;
border-radius: 999px;
border: 1px solid #cbd5e1;
background: #fff;
color: #334155;
font-size: 12px;
font-weight: 700;
line-height: 1.5;
}
.target-char-chip:hover {
background: #f8fafc;
}
.target-char-chip.is-active {
border-color: var(--accent);
background: var(--accent-soft);
color: var(--accent-2);
}
.meta { .meta {
color: var(--muted); color: var(--muted);
font-size: 12px; font-size: 12px;

View File

@@ -94,6 +94,19 @@
<label>必须保留观点</label> <label>必须保留观点</label>
<input id="keepPoints" type="text" placeholder="逗号分隔" /> <input id="keepPoints" type="text" placeholder="逗号分隔" />
<div class="grid2">
<div>
<label>改写目标字数</label>
<input id="targetBodyChars" type="number" min="180" max="2200" step="10" value="500" placeholder="如500" />
<div class="target-chars-quick" aria-label="快捷字数">
<button type="button" class="target-char-chip" data-target-chars="300">300</button>
<button type="button" class="target-char-chip is-active" data-target-chars="500">500</button>
<button type="button" class="target-char-chip" data-target-chars="800">800</button>
<button type="button" class="target-char-chip" data-target-chars="1200">1200</button>
</div>
</div>
</div>
<button id="rewriteBtn" class="primary">改写并排版</button> <button id="rewriteBtn" class="primary">改写并排版</button>
<p id="status" class="status"></p> <p id="status" class="status"></p>
</section> </section>
@@ -121,7 +134,7 @@
<p id="coverHint" class="muted small">未上传时将使用后端默认封面策略。</p> <p id="coverHint" class="muted small">未上传时将使用后端默认封面策略。</p>
<div class="field-head"> <div class="field-head">
<label>正文(5 自然段,建议 ≤500 字</label> <label>正文(4~6 自然段,字数由左侧配置</label>
<span id="bodyCount" class="meta">0 字</span> <span id="bodyCount" class="meta">0 字</span>
</div> </div>
<div class="body-split"> <div class="body-split">