diff --git a/app/main.py b/app/main.py index 4bfc500..69339ef 100644 --- a/app/main.py +++ b/app/main.py @@ -292,7 +292,7 @@ async def rewrite(req: RewriteRequest, request: Request): src = req.source_text or "" logger.info( "api_rewrite_in rid=%s source_chars=%d title_hint_chars=%d tone=%s audience=%s " - "keep_points_chars=%d avoid_words_chars=%d", + "keep_points_chars=%d avoid_words_chars=%d target_body_chars=%d", rid, len(src), len(req.title_hint or ""), @@ -300,6 +300,7 @@ async def rewrite(req: RewriteRequest, request: Request): req.audience, len(req.keep_points or ""), len(req.avoid_words or ""), + int(req.target_body_chars or 500), ) result = rewriter.rewrite(req, request_id=rid) tr = result.trace or {} diff --git a/app/schemas.py b/app/schemas.py index e1eb183..546a582 100644 --- a/app/schemas.py +++ b/app/schemas.py @@ -11,6 +11,12 @@ class RewriteRequest(BaseModel): audience: str = "公众号读者" keep_points: str = "" avoid_words: str = "" + target_body_chars: int = Field( + default=500, + ge=180, + le=2200, + description="目标改写正文字数,前端可调(中文字符数近似)", + ) class RewriteResponse(BaseModel): diff --git a/app/services/ai_rewriter.py b/app/services/ai_rewriter.py index b0feaf3..ea48a77 100644 --- a/app/services/ai_rewriter.py +++ b/app/services/ai_rewriter.py @@ -34,9 +34,9 @@ def _is_likely_timeout_error(exc: BaseException) -> bool: return "timed out" in s or "timeout" in s -# 短文洗稿:正文目标约 500 字,优先完整性(软约束,不硬截断) +# 短文洗稿:默认目标约 500 字,支持前端按需配置 MIN_BODY_CHARS = 80 -TARGET_BODY_CHARS = 500 +DEFAULT_TARGET_BODY_CHARS = 500 def _preview_for_log(text: str, limit: int = 400) -> str: @@ -46,7 +46,7 @@ def _preview_for_log(text: str, limit: int = 400) -> str: return t[: limit - 1] + "…" -SYSTEM_PROMPT = """ +SYSTEM_PROMPT_TEMPLATE = """ 你是资深中文科普类公众号编辑,擅长把长文、线程贴改写成**极短、好读**的推送。 目标:在**不偏离原意**的前提下,用最少字数讲清一件事;不要写成技术方案、长文大纲或带很多小标题的文章。 @@ -54,14 +54,14 @@ SYSTEM_PROMPT = """ 1) **忠实原意**:只概括、转述原文已有信息,不编造事实,不偷换主题; 2) 语气通俗、干脆,避免套话堆砌; 3) 只输出合法 JSON:title, summary, body_markdown; -4) **body_markdown 约束**:按内容密度使用 **4~6 个自然段**;段与段之间用一个空行分隔;**不要**使用 # / ## 标题符号;正文以 **约 500 字**为目标,优先完整表达并避免冗长重复; +4) **body_markdown 约束**:按内容密度使用 **4~6 个自然段**;段与段之间用一个空行分隔;**不要**使用 # / ## 标题符号;正文以 **约 {target_chars} 字**为目标,优先完整表达并避免冗长重复; 5) title、summary 也要短:标题约 8~18 字;摘要约 40~80 字; 6) 关键观点需要加粗:请用 Markdown `**加粗**` 标出 2~4 个重点短语; 7) JSON 字符串内引号请用「」或『』,勿用未转义的英文 "。 """.strip() -REWRITE_SCHEMA_HINT = """ +REWRITE_SCHEMA_HINT_TEMPLATE = """ 请输出 JSON(勿包在 ``` 里),例如: { "title": "短标题,点明主题", @@ -72,22 +72,38 @@ REWRITE_SCHEMA_HINT = """ body_markdown 写法: - 使用 **4~6 段**:每段若干完整句子,段之间 **\\n\\n**(空一行); - **禁止** markdown 标题(不要用 #); -- 正文目标约 **500 字**(可上下浮动),以信息完整为先,避免冗长和重复; +- 正文目标约 **{target_chars} 字**(可上下浮动),以信息完整为先,避免冗长和重复; - 请用 `**...**` 加粗 2~4 个关键观点词; - 内容顺序建议:首段交代在说什么;中间段展开关键信息;末段收束或提醒(均须紧扣原文,勿乱发挥)。 """.strip() # 通义等模型若首次过短/结构不对,再要一次 -_JSON_BODY_TOO_SHORT_RETRY = """ +_JSON_BODY_TOO_SHORT_RETRY_TEMPLATE = """ 【系统复检】上一次 body_markdown 不符合要求。请重输出**完整** JSON: -- 正文必须使用 **4~6 个自然段**(仅 \\n\\n 分段),无 # 标题;篇幅尽量收敛到约 500 字,同时保持信息完整; +- 正文必须使用 **4~6 个自然段**(仅 \\n\\n 分段),无 # 标题;篇幅尽量收敛到约 {target_chars} 字,同时保持信息完整; - 忠实原稿、简短高效; - 引号只用「」『』; - 只输出 JSON。 """.strip() +def _target_chars(req: RewriteRequest) -> int: + return max(180, min(2200, int(req.target_body_chars or DEFAULT_TARGET_BODY_CHARS))) + + +def _system_prompt(target_chars: int) -> str: + return SYSTEM_PROMPT_TEMPLATE.format(target_chars=target_chars) + + +def _rewrite_schema_hint(target_chars: int) -> str: + return REWRITE_SCHEMA_HINT_TEMPLATE.format(target_chars=target_chars) + + +def _retry_hint(target_chars: int) -> str: + return _JSON_BODY_TOO_SHORT_RETRY_TEMPLATE.format(target_chars=target_chars) + + class AIRewriter: def __init__(self) -> None: self._client = None @@ -121,6 +137,7 @@ class AIRewriter: "provider": "dashscope" if self._prefer_chat_first else "openai_compatible", "source_chars_in": len(req.source_text or ""), "cleaned_chars": len(cleaned_source), + "target_body_chars": _target_chars(req), "openai_timeout_env_sec": settings.openai_timeout, "steps": [], } @@ -289,8 +306,15 @@ class AIRewriter: def _model_rewrite( self, req: RewriteRequest, cleaned_source: str, timeout_sec: float, request_id: str = "" ) -> dict | None: - user_prompt = self._build_user_prompt(req, cleaned_source) - return self._call_model_json(user_prompt, timeout_sec=timeout_sec, request_id=request_id) + target_chars = _target_chars(req) + user_prompt = self._build_user_prompt(req, cleaned_source, target_chars) + return self._call_model_json( + user_prompt, + system_prompt=_system_prompt(target_chars), + target_chars=target_chars, + timeout_sec=timeout_sec, + request_id=request_id, + ) def _model_polish( self, @@ -301,6 +325,7 @@ class AIRewriter: timeout_sec: float, request_id: str = "", ) -> dict | None: + target_chars = _target_chars(req) issue_text = "\n".join([f"- {i}" for i in issues]) user_prompt = f""" 你上一次的改写稿未通过质检,请针对下列问题重写;体裁仍为**科普介绍类公众号**,**忠实原稿**,不要写成技术方案或内部汇报。 @@ -323,11 +348,17 @@ class AIRewriter: - 必须保留观点:{req.keep_points or '无'} - 避免词汇:{req.avoid_words or '无'} -请输出一版全新稿件。{REWRITE_SCHEMA_HINT} +请输出一版全新稿件。{_rewrite_schema_hint(target_chars)} """.strip() - return self._call_model_json(user_prompt, timeout_sec=timeout_sec, request_id=request_id) + return self._call_model_json( + user_prompt, + system_prompt=_system_prompt(target_chars), + target_chars=target_chars, + timeout_sec=timeout_sec, + request_id=request_id, + ) - def _build_user_prompt(self, req: RewriteRequest, cleaned_source: str) -> str: + def _build_user_prompt(self, req: RewriteRequest, cleaned_source: str, target_chars: int) -> str: return f""" 原始内容(已清洗): {cleaned_source} @@ -340,7 +371,7 @@ class AIRewriter: - 必须保留观点:{req.keep_points or '无'} - 避免词汇:{req.avoid_words or '无'} -任务:在**不偏离原帖主题与事实**的前提下,改写成科普介绍风格的公众号正文(好读、讲清楚,而非技术实施方案)。{REWRITE_SCHEMA_HINT} +任务:在**不偏离原帖主题与事实**的前提下,改写成科普介绍风格的公众号正文(好读、讲清楚,而非技术实施方案)。{_rewrite_schema_hint(target_chars)} """.strip() def _fallback_rewrite( @@ -554,14 +585,21 @@ class AIRewriter: escaped = False return "".join(out) - def _chat_completions_json(self, user_prompt: str, timeout_sec: float, request_id: str) -> dict | None: + def _chat_completions_json( + self, + user_prompt: str, + system_prompt: str, + target_chars: int, + timeout_sec: float, + request_id: str, + ) -> dict | None: """chat.completions:通义兼容层在 json_object 下易产出极短 JSON,故 DashScope 不传 response_format,并支持短文自动重试。""" max_attempts = 2 if self._prefer_chat_first else 1 deadline = time.monotonic() + max(0.0, timeout_sec) pe = user_prompt for attempt in range(max_attempts): if attempt == 1: - pe = user_prompt + _JSON_BODY_TOO_SHORT_RETRY + pe = user_prompt + _retry_hint(target_chars) remaining = deadline - time.monotonic() if remaining <= 0: logger.warning( @@ -585,7 +623,7 @@ class AIRewriter: create_kwargs: dict[str, Any] = { "model": settings.openai_model, "messages": [ - {"role": "system", "content": SYSTEM_PROMPT}, + {"role": "system", "content": system_prompt}, {"role": "user", "content": pe}, ], "max_tokens": settings.openai_max_output_tokens, @@ -684,7 +722,14 @@ class AIRewriter: return parsed return None - def _call_model_json(self, user_prompt: str, timeout_sec: float, request_id: str = "") -> dict | None: + def _call_model_json( + self, + user_prompt: str, + system_prompt: str, + target_chars: int, + timeout_sec: float, + request_id: str = "", + ) -> dict | None: methods = ["chat", "responses"] if self._prefer_chat_first else ["responses", "chat"] logger.info( "model_call_begin rid=%s model=%s timeout_s=%.1f prefer_chat_first=%s prompt_chars=%d " @@ -704,7 +749,7 @@ class AIRewriter: completion = self._client.responses.create( model=settings.openai_model, input=[ - {"role": "system", "content": SYSTEM_PROMPT}, + {"role": "system", "content": system_prompt}, {"role": "user", "content": user_prompt}, ], text={"format": {"type": "json_object"}}, @@ -734,7 +779,13 @@ class AIRewriter: if method == "chat": try: t_chat = time.monotonic() - out = self._chat_completions_json(user_prompt, timeout_sec, request_id) + out = self._chat_completions_json( + user_prompt, + system_prompt=system_prompt, + target_chars=target_chars, + timeout_sec=timeout_sec, + request_id=request_id, + ) if out is not None: return out if self._prefer_chat_first: @@ -814,11 +865,14 @@ class AIRewriter: elif pc > max_p: issues.append(f"正文段落偏多(当前 {pc} 段),建议控制在 {min_p}-{max_p} 段") - if len(body) < MIN_BODY_CHARS: - issues.append(f"正文过短(当前阈值 ≥{MIN_BODY_CHARS} 字)") - elif len(body) > 900: + target_chars = _target_chars(req) + min_body_chars = max(MIN_BODY_CHARS, int(target_chars * 0.45)) + max_body_chars = int(target_chars * 1.7) + if len(body) < min_body_chars: + issues.append(f"正文过短(当前阈值 ≥{min_body_chars} 字)") + elif len(body) > max_body_chars: issues.append( - f"正文偏长(当前 {len(body)} 字),建议收敛到约 {TARGET_BODY_CHARS} 字(可上下浮动)" + f"正文偏长(当前 {len(body)} 字),建议收敛到约 {target_chars} 字(可上下浮动)" ) if re.search(r"(?m)^#+\s", body): diff --git a/app/static/app.js b/app/static/app.js index cfaa368..f452603 100644 --- a/app/static/app.js +++ b/app/static/app.js @@ -17,6 +17,15 @@ const wechatBtn = $("wechatBtn"); const imBtn = $("imBtn"); const coverUploadBtn = $("coverUploadBtn"); const logoutBtn = $("logoutBtn"); +const targetBodyCharsInput = $("targetBodyChars"); + +function syncTargetCharChips() { + const val = Number((targetBodyCharsInput && targetBodyCharsInput.value) || 0); + document.querySelectorAll(".target-char-chip").forEach((btn) => { + const n = Number(btn.getAttribute("data-target-chars") || 0); + btn.classList.toggle("is-active", Number.isFinite(val) && val === n); + }); +} function countText(v) { return (v || "").trim().length; @@ -183,12 +192,30 @@ if (logoutBtn) { }); } +document.querySelectorAll(".target-char-chip").forEach((btn) => { + btn.addEventListener("click", () => { + const n = Number(btn.getAttribute("data-target-chars") || 0); + if (!targetBodyCharsInput || !Number.isFinite(n) || n < 1) return; + targetBodyCharsInput.value = String(n); + syncTargetCharChips(); + }); +}); + +if (targetBodyCharsInput) { + targetBodyCharsInput.addEventListener("input", syncTargetCharChips); +} + $("rewriteBtn").addEventListener("click", async () => { const sourceText = $("sourceText").value.trim(); + const targetBodyChars = Number(($("targetBodyChars") && $("targetBodyChars").value) || 500); if (sourceText.length < 20) { setStatus("原始内容太短,至少 20 个字符", true); return; } + if (!Number.isFinite(targetBodyChars) || targetBodyChars < 180 || targetBodyChars > 2200) { + setStatus("改写目标字数需在 180~2200 之间", true); + return; + } setStatus("正在改写..."); setLoading(rewriteBtn, true, "改写并排版", "改写中..."); @@ -202,6 +229,7 @@ $("rewriteBtn").addEventListener("click", async () => { audience, keep_points: $("keepPoints").value, avoid_words: $("avoidWords").value, + target_body_chars: Math.round(targetBodyChars), }); $("title").value = data.title || ""; $("summary").value = data.summary || ""; @@ -307,3 +335,4 @@ $("imBtn").addEventListener("click", async () => { updateCounters(); initMultiDropdowns(); initWechatAccountSwitch(); +syncTargetCharChips(); diff --git a/app/static/style.css b/app/static/style.css index aa2edab..c47f415 100644 --- a/app/static/style.css +++ b/app/static/style.css @@ -327,6 +327,36 @@ label { margin-top: 4px; } +.target-chars-quick { + display: flex; + gap: 8px; + margin-top: 8px; + flex-wrap: wrap; +} + +.target-char-chip { + width: auto; + margin-top: 0; + padding: 4px 10px; + border-radius: 999px; + border: 1px solid #cbd5e1; + background: #fff; + color: #334155; + font-size: 12px; + font-weight: 700; + line-height: 1.5; +} + +.target-char-chip:hover { + background: #f8fafc; +} + +.target-char-chip.is-active { + border-color: var(--accent); + background: var(--accent-soft); + color: var(--accent-2); +} + .meta { color: var(--muted); font-size: 12px; diff --git a/app/templates/index.html b/app/templates/index.html index 24e1165..11fa99c 100644 --- a/app/templates/index.html +++ b/app/templates/index.html @@ -94,6 +94,19 @@ +
+
+ + +
+ + + + +
+
+
+

@@ -121,7 +134,7 @@

未上传时将使用后端默认封面策略。

- + 0 字