fix: bug

2026-04-06 14:20:53 +08:00
parent babf24a0b0
commit 1d389767e6
14 changed files with 1079 additions and 179 deletions
--- a/.env.example
+++ b/.env.example
@@ -1,6 +1,20 @@
 # —— 通义千问（推荐）：阿里云 DashScope 的 OpenAI 兼容地址 + 模型名 + API Key
 # OPENAI_BASE_URL=https://dashscope.aliyuncs.com/compatible-mode/v1
 # OPENAI_API_KEY=sk-你的DashScopeKey
 # OPENAI_MODEL=qwen3.5-plus
 OPENAI_API_KEY=
 OPENAI_BASE_URL=
 OPENAI_MODEL=gpt-4.1-mini
 # 通义长文 JSON 常需 60~120s+，过短会 APITimeout 后走兜底
 OPENAI_TIMEOUT=120
 # SDK 自动重试次数。设为 0 可避免单次请求被隐式重试拖长（例如 30s 变 60s+）
 OPENAI_MAX_RETRIES=0
 # 长文 JSON 建议 8192；通义等若正文仍偏短可适当再加大
 OPENAI_MAX_OUTPUT_TOKENS=8192
 OPENAI_SOURCE_MAX_CHARS=5000
 # 质检未通过时仍返回模型洗稿正文（quality_notes 记录问题）；设为 false 则严格退回保底稿
 AI_SOFT_ACCEPT=true
 LOG_LEVEL=INFO
 WECHAT_APPID=
 WECHAT_SECRET=
--- a/15
+++ b/15
@@ -1,9 +1,20 @@
-FROM python:3.11-slim
+# syntax=docker/dockerfile:1
 # 国内拉基础镜像慢时：docker compose build --build-arg PY_BASE=docker.m.daocloud.io/library/python:3.11-slim
 ARG PY_BASE=python:3.11-slim
 FROM ${PY_BASE}
 WORKDIR /app
 # 默认清华 PyPI；海外可：docker compose build --build-arg PIP_INDEX_URL=https://pypi.org/simple
 ARG PIP_INDEX_URL=https://pypi.tuna.tsinghua.edu.cn/simple
 ENV PIP_INDEX_URL=${PIP_INDEX_URL} \
    PIP_DISABLE_PIP_VERSION_CHECK=1 \
    PIP_DEFAULT_TIMEOUT=120
 COPY requirements.txt ./
-RUN pip install --no-cache-dir -r requirements.txt
+# BuildKit 缓存加速重复构建；需 Docker 20.10+（compose 默认开 BuildKit）
 RUN --mount=type=cache,target=/root/.cache/pip \
    pip install -r requirements.txt
 COPY . .
--- a/app/config.py
+++ b/app/config.py
@@ -9,8 +9,27 @@ class Settings(BaseSettings):
    openai_api_key: str | None = Field(default=None, alias="OPENAI_API_KEY")
    openai_base_url: str | None = Field(default=None, alias="OPENAI_BASE_URL")
    openai_model: str = Field(default="gpt-4.1-mini", alias="OPENAI_MODEL")
-    openai_timeout: float = Field(default=60.0, alias="OPENAI_TIMEOUT")
+    openai_timeout: float = Field(
        default=120.0,
        alias="OPENAI_TIMEOUT",
        description="HTTP 等待模型单轮响应的最长时间（秒）。通义长文 JSON 建议 120~180",
    )
    openai_max_retries: int = Field(
        default=0,
        alias="OPENAI_MAX_RETRIES",
        description="OpenAI SDK 自动重试次数。为避免单次请求被重试拉长，默认 0。",
    )
    openai_max_output_tokens: int = Field(
        default=8192,
        alias="OPENAI_MAX_OUTPUT_TOKENS",
        description="单次模型输出 token 上限；通义等长文 JSON 需足够大",
    )
    openai_source_max_chars: int = Field(default=5000, alias="OPENAI_SOURCE_MAX_CHARS")
    ai_soft_accept: bool = Field(
        default=True,
        alias="AI_SOFT_ACCEPT",
        description="质检未通过时仍输出模型洗稿正文（mode=ai，quality_notes 记录问题），仅模型完全失败时用保底稿",
    )
    wechat_appid: str | None = Field(default=None, alias="WECHAT_APPID")
    wechat_secret: str | None = Field(default=None, alias="WECHAT_SECRET")
--- a/app/logging_setup.py
+++ b/app/logging_setup.py
@@ -0,0 +1,25 @@
 """统一日志格式，便于 grep / 日志平台解析。"""
 from __future__ import annotations
 import logging
 import os
 def configure_logging() -> None:
    level_name = os.getenv("LOG_LEVEL", "INFO").upper()
    level = getattr(logging, level_name, logging.INFO)
    fmt = "%(asctime)s | %(levelname)s | %(name)s | %(message)s"
    datefmt = "%Y-%m-%d %H:%M:%S"
    root = logging.getLogger()
    if not root.handlers:
        handler = logging.StreamHandler()
        handler.setFormatter(logging.Formatter(fmt=fmt, datefmt=datefmt))
        root.addHandler(handler)
    root.setLevel(level)
    # 降噪：第三方库默认 WARNING
    logging.getLogger("httpx").setLevel(logging.WARNING)
    logging.getLogger("httpcore").setLevel(logging.WARNING)
    logging.getLogger("openai").setLevel(logging.WARNING)
--- a/app/main.py
+++ b/app/main.py
@@ -1,17 +1,38 @@
 from __future__ import annotations
 import logging
 from urllib.parse import urlparse
 from fastapi import FastAPI, Request
 from fastapi.responses import HTMLResponse
 from fastapi.staticfiles import StaticFiles
 from fastapi.templating import Jinja2Templates
 from app.config import settings
 from app.logging_setup import configure_logging
 from app.middleware import RequestContextMiddleware
 from app.schemas import IMPublishRequest, RewriteRequest, WechatPublishRequest
 from app.services.ai_rewriter import AIRewriter
 from app.services.im import IMPublisher
 from app.services.wechat import WechatPublisher
 configure_logging()
 logger = logging.getLogger(__name__)
 app = FastAPI(title=settings.app_name)
@app.on_event("startup")
 async def _log_startup() -> None:
    logger.info(
        "app_start name=%s openai_configured=%s ai_soft_accept=%s",
        settings.app_name,
        bool(settings.openai_api_key),
        settings.ai_soft_accept,
    )
 app.add_middleware(RequestContextMiddleware)
 app.mount("/static", StaticFiles(directory="app/static"), name="static")
 templates = Jinja2Templates(directory="app/templates")
@@ -25,16 +46,82 @@ async def index(request: Request):
    return templates.TemplateResponse("index.html", {"request": request, "app_name": settings.app_name})
@app.get("/api/config")
 async def api_config():
    """供页面展示：当前是否接入模型、模型名、提供方（不含密钥）。"""
    base = settings.openai_base_url or ""
    provider = "dashscope" if "dashscope.aliyuncs.com" in base else "openai_compatible"
    host = urlparse(base).netloc if base else ""
    return {
        "openai_configured": bool(settings.openai_api_key),
        "openai_model": settings.openai_model,
        "provider": provider,
        "base_url_host": host or None,
        "openai_timeout_sec": settings.openai_timeout,
        "openai_max_output_tokens": settings.openai_max_output_tokens,
    }
@app.post("/api/rewrite")
-async def rewrite(req: RewriteRequest):
+async def rewrite(req: RewriteRequest, request: Request):
-    return rewriter.rewrite(req)
+    rid = getattr(request.state, "request_id", "")
    src = req.source_text or ""
    logger.info(
        "api_rewrite_in rid=%s source_chars=%d title_hint_chars=%d tone=%s audience=%s "
        "keep_points_chars=%d avoid_words_chars=%d",
        rid,
        len(src),
        len(req.title_hint or ""),
        req.tone,
        req.audience,
        len(req.keep_points or ""),
        len(req.avoid_words or ""),
    )
    result = rewriter.rewrite(req, request_id=rid)
    tr = result.trace or {}
    logger.info(
        "api_rewrite_out rid=%s mode=%s duration_ms=%s quality_notes=%d trace_steps=%s soft_accept=%s",
        rid,
        result.mode,
        tr.get("duration_ms"),
        len(result.quality_notes or []),
        len((tr.get("steps") or [])),
        tr.get("quality_soft_accept"),
    )
    return result
@app.post("/api/publish/wechat")
-async def publish_wechat(req: WechatPublishRequest):
+async def publish_wechat(req: WechatPublishRequest, request: Request):
-    return await wechat.publish_draft(req)
+    rid = getattr(request.state, "request_id", "")
    logger.info(
        "api_wechat_in rid=%s title_chars=%d summary_chars=%d body_md_chars=%d author_set=%s",
        rid,
        len(req.title or ""),
        len(req.summary or ""),
        len(req.body_markdown or ""),
        bool((req.author or "").strip()),
    )
    out = await wechat.publish_draft(req, request_id=rid)
    logger.info(
        "api_wechat_out rid=%s ok=%s detail=%s errcode=%s",
        rid,
        out.ok,
        (out.detail or "")[:120],
        (out.data or {}).get("errcode") if isinstance(out.data, dict) else None,
    )
    return out
@app.post("/api/publish/im")
-async def publish_im(req: IMPublishRequest):
+async def publish_im(req: IMPublishRequest, request: Request):
-    return await im.publish(req)
+    rid = getattr(request.state, "request_id", "")
    logger.info(
        "api_im_in rid=%s title_chars=%d body_md_chars=%d",
        rid,
        len(req.title or ""),
        len(req.body_markdown or ""),
    )
    out = await im.publish(req, request_id=rid)
    logger.info("api_im_out rid=%s ok=%s detail=%s", rid, out.ok, (out.detail or "")[:120])
    return out
--- a/app/middleware.py
+++ b/app/middleware.py
@@ -0,0 +1,61 @@
 from __future__ import annotations
 import logging
 import time
 import uuid
 from starlette.middleware.base import BaseHTTPMiddleware
 from starlette.requests import Request
 from starlette.responses import Response
 logger = logging.getLogger("app.http")
 class RequestContextMiddleware(BaseHTTPMiddleware):
    """注入 request_id，记录访问日志与耗时。"""
    async def dispatch(self, request: Request, call_next) -> Response:
        rid = request.headers.get("X-Request-ID") or str(uuid.uuid4())
        request.state.request_id = rid
        path = request.url.path
        if path.startswith("/static"):
            response = await call_next(request)
            response.headers["X-Request-ID"] = rid
            return response
        client = request.client.host if request.client else "-"
        if path.startswith("/api"):
            logger.info(
                "http_in method=%s path=%s rid=%s client=%s",
                request.method,
                path,
                rid,
                client,
            )
        started = time.perf_counter()
        try:
            response = await call_next(request)
        except Exception:
            duration_ms = (time.perf_counter() - started) * 1000
            logger.exception(
                "http_error method=%s path=%s duration_ms=%.1f rid=%s",
                request.method,
                path,
                duration_ms,
                rid,
            )
            raise
        duration_ms = (time.perf_counter() - started) * 1000
        response.headers["X-Request-ID"] = rid
        logger.info(
            "http_out method=%s path=%s status=%s duration_ms=%.1f rid=%s",
            request.method,
            path,
            response.status_code,
            duration_ms,
            rid,
        )
        return response
--- a/app/schemas.py
+++ b/app/schemas.py
@@ -1,3 +1,5 @@
 from typing import Any
 from pydantic import BaseModel, Field
@@ -16,6 +18,10 @@ class RewriteResponse(BaseModel):
    body_markdown: str
    mode: str = "ai"
    quality_notes: list[str] = []
    trace: dict[str, Any] | None = Field(
        default=None,
        description="改写链路追踪：请求 ID、耗时、模型、质检与降级原因，便于监测与回溯",
    )
 class WechatPublishRequest(BaseModel):
--- a/app/services/ai_rewriter.py
+++ b/app/services/ai_rewriter.py
@@ -5,7 +5,9 @@ import json
 import logging
 import re
 import time
 from typing import Any
 from textwrap import shorten
 from urllib.parse import urlparse
 from openai import OpenAI
@@ -15,47 +17,72 @@ from app.schemas import RewriteRequest, RewriteResponse
 logger = logging.getLogger(__name__)
 def _api_host(url: str | None) -> str:
    if not url:
        return ""
    try:
        return urlparse(url).netloc or ""
    except Exception:
        return ""
 def _is_likely_timeout_error(exc: BaseException) -> bool:
    n = type(exc).__name__.lower()
    if "timeout" in n:
        return True
    s = str(exc).lower()
    return "timed out" in s or "timeout" in s
 # 短文洗稿：5 个自然段、正文总字数上限（含标点）
 MAX_BODY_CHARS = 500
 MIN_BODY_CHARS = 80
 def _preview_for_log(text: str, limit: int = 400) -> str:
    t = (text or "").replace("\r\n", "\n").replace("\n", " ").strip()
    if len(t) <= limit:
        return t
    return t[: limit - 1] + "…"
 SYSTEM_PROMPT = """
-你是顶级中文公众号主编，擅长把 X/Twitter 的观点型内容改写成高质量公众号文章。
+你是资深中文科普类公众号编辑，擅长把长文、线程贴改写成**极短、好读**的推送。
-你的目标不是“同义替换”，而是“重构表达”，保证可读性、逻辑性和可发布性。
+目标：在**不偏离原意**的前提下，用最少字数讲清一件事；不要写成技术方案、长文大纲或带很多小标题的文章。
 硬性规则：
-1) 保留核心事实与关键观点，不编造数据，不夸大结论；
+1) **忠实原意**：只概括、转述原文已有信息，不编造事实，不偷换主题；
-2) 文章结构必须完整：导语 -> 核心观点 -> 深度分析 -> 落地建议 -> 结语；
+2) 语气通俗、干脆，避免套话堆砌；
-3) 风格自然，避免 AI 套话（如“首先其次最后”“赋能”“闭环”等空话）；
+3) 只输出合法 JSON：title, summary, body_markdown；
-4) 每节都要有信息增量，不要重复原文句式；
+4) **body_markdown 约束**：恰好 **5 个自然段**；段与段之间用一个空行分隔；**不要**使用 # / ## 标题符号；全文（正文）总字数 **不超过 500 字**（含标点）；
-5) 输出必须是合法 JSON，字段：title, summary, body_markdown。
+5) title、summary 也要短：标题约 8～18 字；摘要约 40～80 字；
 6) JSON 字符串内引号请用「」或『』，勿用未转义的英文 "。
 """.strip()
 REWRITE_SCHEMA_HINT = """
-请输出 JSON：
+请输出 JSON（勿包在 ``` 里），例如：
 {
-  "title": "20字内中文标题，明确价值点",
+  "title": "短标题，点明主题",
-  "summary": "80-120字中文摘要，说明读者收获",
+  "summary": "一句话到两句话摘要",
-  "body_markdown": "完整Markdown正文"
+  "body_markdown": "第一段内容…\\n\\n第二段…\\n\\n第三段…\\n\\n第四段…\\n\\n第五段…"
 }
-正文格式要求（必须遵循）：
+body_markdown 写法：
-## 导语
+- 必须且只能有 **5 段**：每段若干完整句子，段之间 **\\n\\n**（空一行）；
-2-3段，交代背景、冲突与阅读价值。
+- **禁止** markdown 标题（不要用 #）；
 - 正文总长 **≤500 字**，宁可短而清楚，不要写满废话；
 - 内容顺序建议：第 1 段交代在说什么；中间 3 段展开关键信息；最后 1 段收束或提醒（均须紧扣原文，勿乱发挥）。
 """.strip()
-## 核心观点
+# 通义等模型若首次过短/结构不对，再要一次
- 3~5条要点，每条是完整信息句，不要口号。
+_JSON_BODY_TOO_SHORT_RETRY = """
-## 深度分析
+【系统复检】上一次 body_markdown 不符合要求。请重输出**完整** JSON：
-### 1) 现象背后的原因
+- 正文必须 **恰好 5 个自然段**（仅 \\n\\n 分段），无 # 标题，总字数 **≤500 字**；
-2-3段
+- 忠实原稿、简短高效；
-### 2) 对行业/团队的影响
+- 引号只用「」『』；
-2-3段
+- 只输出 JSON。
 ### 3) 关键风险与边界
 2-3段
 ## 落地建议
 1. 三到五条可执行动作，尽量包含“谁在什么场景做什么”。
 ## 结语
 1段，收束观点并给出下一步建议。
 """.strip()
@@ -70,47 +97,198 @@ class AIRewriter:
                api_key=settings.openai_api_key,
                base_url=settings.openai_base_url,
                timeout=settings.openai_timeout,
-                max_retries=1,
+                max_retries=max(0, int(settings.openai_max_retries)),
            )
            logger.info(
                "AIRewriter_init model=%s api_host=%s prefer_chat_first=%s timeout_s=%s max_retries=%s",
                settings.openai_model,
                _api_host(settings.openai_base_url) or "(default)",
                self._prefer_chat_first,
                settings.openai_timeout,
                settings.openai_max_retries,
            )
        else:
            logger.warning("AIRewriter_init openai_key_missing=1 rewrite_will_use_fallback_only=1")
-    def rewrite(self, req: RewriteRequest) -> RewriteResponse:
+    def rewrite(self, req: RewriteRequest, request_id: str = "") -> RewriteResponse:
        cleaned_source = self._clean_source(req.source_text)
        started = time.monotonic()
        trace: dict[str, Any] = {
            "request_id": request_id or None,
            "model": settings.openai_model,
            "provider": "dashscope" if self._prefer_chat_first else "openai_compatible",
            "source_chars_in": len(req.source_text or ""),
            "cleaned_chars": len(cleaned_source),
            "openai_timeout_env_sec": settings.openai_timeout,
            "steps": [],
        }
        def _step(name: str, **extra: Any) -> None:
            elapsed_ms = round((time.monotonic() - started) * 1000, 1)
            trace["steps"].append({"name": name, "elapsed_ms": elapsed_ms, **extra})
            extra_fmt = ""
            if extra:
                parts: list[str] = []
                for k, v in extra.items():
                    s = repr(v)
                    if len(s) > 200:
                        s = s[:197] + "..."
                    parts.append(f"{k}={s}")
                extra_fmt = " " + " ".join(parts)
            logger.info(
                "rewrite_step rid=%s step=%s elapsed_ms=%s%s",
                request_id or "-",
                name,
                elapsed_ms,
                extra_fmt,
            )
        raw_in = (req.source_text or "").replace("\r\n", "\n").strip()
        _step("clean_source", truncated=len(cleaned_source) < len(raw_in))
        logger.info(
            "rewrite_enter rid=%s model=%s client_ok=%s prefer_chat_first=%s "
            "source_chars=%d cleaned_chars=%d ai_soft_accept=%s",
            request_id or "-",
            settings.openai_model,
            bool(self._client),
            self._prefer_chat_first,
            trace["source_chars_in"],
            len(cleaned_source),
            settings.ai_soft_accept,
        )
        # Primary: model rewrite + quality gate + optional second-pass polish.
        if self._client:
-            # DashScope/Qwen works better with a single stable call.
+            # 通义长文 JSON 常需 40~90s+。旧代码错误地将首轮 cap 在 30s → APITimeoutError → 仅走兜底。
            if self._prefer_chat_first:
-                first_pass_timeout = max(18.0, min(30.0, settings.openai_timeout))
+                first_pass_timeout = max(45.0, min(300.0, float(settings.openai_timeout)))
            else:
-                first_pass_timeout = max(20.0, min(50.0, settings.openai_timeout))
+                first_pass_timeout = max(20.0, min(120.0, float(settings.openai_timeout)))
-            draft = self._model_rewrite(req, cleaned_source, timeout_sec=first_pass_timeout)
+            trace["first_pass_http_timeout_sec"] = round(first_pass_timeout, 1)
            logger.info(
                "rewrite_model_first_pass rid=%s first_pass_http_timeout_s=%.1f openai_timeout_env_s=%.1f "
                "lenient_qa=%s note=dashscope_uses_full_openai_timeout_not_capped_30",
                request_id or "-",
                first_pass_timeout,
                settings.openai_timeout,
                self._prefer_chat_first,
            )
            t0 = time.monotonic()
            draft = self._model_rewrite(req, cleaned_source, timeout_sec=first_pass_timeout, request_id=request_id)
            _step(
                "model_first_pass",
                duration_ms=round((time.monotonic() - t0) * 1000, 1),
                ok=bool(draft),
                timeout_sec=first_pass_timeout,
            )
            if not draft:
                trace["quality_issues_final"] = ["模型未返回有效 JSON 或请求超时"]
                trace["model_unavailable_hint"] = (
                    "排查：① 日志是否 APITimeoutError → 提高 OPENAI_TIMEOUT（通义建议 120~180）并确认 "
                    "first_pass_http_timeout_sec 与 trace.openai_timeout_env_sec 一致；② 网络到 "
                    "dashscope.aliyuncs.com；③ 见 model_call_fail 的 is_likely_timeout。"
                )
                _step("model_first_pass_failed", detail="timeout_or_invalid_json")
            if draft:
                normalized = self._normalize_result(draft)
-                issues = self._quality_issues(req, cleaned_source, normalized)
+                issues = self._quality_issues(
                    req, cleaned_source, normalized, lenient=self._prefer_chat_first
                )
                trace["quality_issues_first"] = issues
                logger.info(
                    "rewrite quality check rid=%s first_issues=%s body_chars=%d",
                    request_id,
                    issues,
                    len(normalized.get("body_markdown", "") or ""),
                )
                elapsed = time.monotonic() - started
-                remaining_budget = max(0.0, (first_pass_timeout + 20.0) - elapsed)
+                remaining_budget = max(0.0, (first_pass_timeout + 25.0) - elapsed)
-                if issues and (not self._prefer_chat_first) and remaining_budget >= 10.0:
+                polish_budget = min(22.0, remaining_budget) if self._prefer_chat_first else min(30.0, remaining_budget)
                if issues and not (
                    remaining_budget >= 8.0 and polish_budget >= 6.0
                ):
                    logger.info(
                        "rewrite_polish_skipped rid=%s first_issues=%d remaining_budget_s=%.1f polish_budget_s=%.1f",
                        request_id or "-",
                        len(issues),
                        remaining_budget,
                        polish_budget,
                    )
                if issues and remaining_budget >= 8.0 and polish_budget >= 6.0:
                    t1 = time.monotonic()
                    polished = self._model_polish(
                        req,
                        cleaned_source,
                        normalized,
                        issues,
-                        timeout_sec=min(30.0, remaining_budget),
+                        timeout_sec=polish_budget,
                        request_id=request_id,
                    )
                    _step(
                        "model_polish",
                        duration_ms=round((time.monotonic() - t1) * 1000, 1),
                        ok=bool(polished),
                    )
                    if polished:
                        normalized = self._normalize_result(polished)
-                final_issues = self._quality_issues(req, cleaned_source, normalized)
+                final_issues = self._quality_issues(
                    req, cleaned_source, normalized, lenient=self._prefer_chat_first
                )
                trace["quality_issues_final"] = final_issues
                if not final_issues:
-                    return RewriteResponse(**normalized, mode="ai", quality_notes=[])
+                    trace["duration_ms"] = round((time.monotonic() - started) * 1000, 1)
-                logger.warning("rewrite quality gate fallback triggered: %s", final_issues)
+                    trace["mode"] = "ai"
                    logger.info(
                        "rewrite success rid=%s duration_ms=%.1f mode=ai",
                        request_id,
                        trace["duration_ms"],
                    )
                    return RewriteResponse(**normalized, mode="ai", quality_notes=[], trace=trace)
                # 模型已返回有效 JSON：默认「软接受」——仍视为 AI 洗稿，质检问题写入 quality_notes，避免误用模板稿
                if settings.ai_soft_accept and self._model_output_usable(normalized):
                    trace["duration_ms"] = round((time.monotonic() - started) * 1000, 1)
                    trace["mode"] = "ai"
                    trace["quality_soft_accept"] = True
                    trace["quality_warnings"] = final_issues
                    logger.warning(
                        "rewrite soft-accept rid=%s warnings=%s body_chars=%d",
                        request_id,
                        final_issues,
                        len(normalized.get("body_markdown", "") or ""),
                    )
                    return RewriteResponse(
                        **normalized,
                        mode="ai",
                        quality_notes=final_issues,
                        trace=trace,
                    )
                logger.warning(
                    "rewrite quality gate fallback rid=%s issues=%s",
                    request_id,
                    final_issues,
                )
                _step("quality_gate_failed", issues=final_issues)
        else:
            _step("skip_model", reason="OPENAI_API_KEY 未配置")
            trace["quality_issues_final"] = ["未配置 OPENAI_API_KEY，使用本地保底稿"]
        # Secondary: deterministic fallback with publishable structure.
-        return self._fallback_rewrite(req, cleaned_source, reason="模型超时或质量未达标，已使用结构化保底稿")
+        reason = "模型未返回有效 JSON、超时，或质量未达标，已使用结构化保底稿"
        trace["duration_ms"] = round((time.monotonic() - started) * 1000, 1)
        logger.info(
            "rewrite fallback rid=%s duration_ms=%.1f last_issues=%s",
            request_id,
            trace["duration_ms"],
            trace.get("quality_issues_final"),
        )
        return self._fallback_rewrite(req, cleaned_source, reason=reason, trace=trace)
-    def _model_rewrite(self, req: RewriteRequest, cleaned_source: str, timeout_sec: float) -> dict | None:
+    def _model_rewrite(
        self, req: RewriteRequest, cleaned_source: str, timeout_sec: float, request_id: str = ""
    ) -> dict | None:
        user_prompt = self._build_user_prompt(req, cleaned_source)
-        return self._call_model_json(user_prompt, timeout_sec=timeout_sec)
+        return self._call_model_json(user_prompt, timeout_sec=timeout_sec, request_id=request_id)
    def _model_polish(
        self,
@@ -119,10 +297,11 @@ class AIRewriter:
        normalized: dict,
        issues: list[str],
        timeout_sec: float,
        request_id: str = "",
    ) -> dict | None:
        issue_text = "\n".join([f"- {i}" for i in issues])
        user_prompt = f"""
-你上一次的改写稿质量未达标，请基于下面问题做彻底重写，不要只改几个词：
+你上一次的改写稿未通过质检，请针对下列问题重写；体裁仍为**科普介绍类公众号**，**忠实原稿**，不要写成技术方案或内部汇报。
 {issue_text}
 原始内容：
@@ -141,9 +320,9 @@ class AIRewriter:
 - 必须保留观点：{req.keep_points or '无'}
 - 避免词汇：{req.avoid_words or '无'}
-请输出一个全新且高质量版本。{REWRITE_SCHEMA_HINT}
+请输出一版全新稿件。{REWRITE_SCHEMA_HINT}
 """.strip()
-        return self._call_model_json(user_prompt, timeout_sec=timeout_sec)
+        return self._call_model_json(user_prompt, timeout_sec=timeout_sec, request_id=request_id)
    def _build_user_prompt(self, req: RewriteRequest, cleaned_source: str) -> str:
        return f"""
@@ -157,94 +336,95 @@ class AIRewriter:
 - 必须保留观点：{req.keep_points or '无'}
 - 避免词汇：{req.avoid_words or '无'}
-任务：请输出可直接用于公众号发布的文章。{REWRITE_SCHEMA_HINT}
+任务：在**不偏离原帖主题与事实**的前提下，改写成科普介绍风格的公众号正文（好读、讲清楚，而非技术实施方案）。{REWRITE_SCHEMA_HINT}
 """.strip()
-    def _fallback_rewrite(self, req: RewriteRequest, cleaned_source: str, reason: str) -> RewriteResponse:
+    def _fallback_rewrite(
        self, req: RewriteRequest, cleaned_source: str, reason: str, trace: dict[str, Any] | None = None
    ) -> RewriteResponse:
        sentences = self._extract_sentences(cleaned_source)
        points = self._pick_key_points(sentences, limit=5)
        title = req.title_hint.strip() or self._build_fallback_title(sentences)
        summary = self._build_fallback_summary(points, cleaned_source)
        intro = self._build_intro(points, cleaned_source)
        analysis = self._build_analysis(points)
-        actions = self._build_actions(points)
+        conclusion = "细节仍以原帖为准；若话题在更新，请对照出处核对。"
        conclusion = "如果你准备把这类内容持续做成栏目，建议建立固定模板：观点来源、关键证据、执行建议、复盘结论。"
-        body = (
+        def _one_line(s: str, n: int) -> str:
-            "## 导语\n"
+            t = re.sub(r"\s+", " ", (s or "").strip())
-            f"{intro}\n\n"
+            return t if len(t) <= n else t[: n - 1] + "…"
-            "## 核心观点\n"
+
-            + "\n".join([f"- {p}" for p in points])
+        paras = [
-            + "\n\n"
+            _one_line(self._build_intro(points, cleaned_source), 105),
-            "## 深度分析\n"
+            _one_line(analysis["cause"], 105),
-            "### 1) 现象背后的原因\n"
+            _one_line(analysis["impact"], 105),
-            f"{analysis['cause']}\n\n"
+            _one_line(analysis["risk"], 105),
-            "### 2) 对行业/团队的影响\n"
+            _one_line(conclusion, 105),
-            f"{analysis['impact']}\n\n"
+        ]
-            "### 3) 关键风险与边界\n"
+        body = "\n\n".join(paras)
-            f"{analysis['risk']}\n\n"
+        if len(body) > MAX_BODY_CHARS:
-            "## 落地建议\n"
+            body = body[: MAX_BODY_CHARS - 1] + "…"
            + "\n".join([f"{i + 1}. {a}" for i, a in enumerate(actions)])
            + "\n\n"
            "## 结语\n"
            f"{conclusion}"
        )
        normalized = {
            "title": title,
            "summary": summary,
            "body_markdown": self._format_markdown(body),
        }
-        return RewriteResponse(**normalized, mode="fallback", quality_notes=[reason])
+        if trace is not None:
            trace["mode"] = "fallback"
            trace["fallback_reason"] = reason
        rid = (trace or {}).get("request_id") or "-"
        logger.info(
            "rewrite_fallback_compose rid=%s reason=%s title_chars=%d summary_chars=%d body_chars=%d points=%d",
            rid,
            reason[:120],
            len(normalized["title"]),
            len(normalized["summary"]),
            len(normalized["body_markdown"]),
            len(points),
        )
        return RewriteResponse(**normalized, mode="fallback", quality_notes=[reason], trace=trace)
    def _build_fallback_title(self, sentences: list[str]) -> str:
-        seed = sentences[0] if sentences else "内容改写"
+        seed = sentences[0] if sentences else "内容导读"
        seed = shorten(seed, width=16, placeholder="")
-        return f"{seed}：给内容创作者的实战拆解"
+        return f"{seed}：一文读懂在说什么"
    def _build_fallback_summary(self, points: list[str], source: str) -> str:
        if len(points) >= 2:
-            return f"本文提炼了{points[0]}，并进一步分析{points[1]}，最后给出可直接执行的发布建议，帮助你把观点内容做成高质量公众号文章。"
+            return shorten(
-        return shorten(re.sub(r"\s+", " ", source), width=110, placeholder="...")
+                f"原帖在谈：{points[0]}；另一点：{points[1]}。",
                width=85,
                placeholder="…",
            )
        return shorten(re.sub(r"\s+", " ", source), width=85, placeholder="…")
    def _build_intro(self, points: list[str], source: str) -> str:
        focus = points[0] if points else shorten(source, width=42, placeholder="...")
        return (
-            f"这篇内容的价值不在“信息多”，而在于它点出了一个真正值得关注的问题：{focus}。\n\n"
+            f"原帖主要在谈：{focus}。下面用更适合公众号阅读的方式，把脉络和重点捋清楚，方便你快速抓住作者在表达什么。\n\n"
-            "对公众号读者来说，最关心的是这件事会带来什么变化、现在能做什么。"
+            "说明：这是基于原文的导读式整理，若需引用细节，请以原帖为准。"
            "因此本文不做逐句复述，而是按“观点-影响-动作”重组，方便直接落地。"
        )
    def _build_analysis(self, points: list[str]) -> dict[str, str]:
-        p1 = points[0] if points else "行业正在从信息堆叠转向结果导向"
+        p1 = points[0] if points else "原文讨论的核心现象"
-        p2 = points[1] if len(points) > 1 else "团队协作方式被自动化流程重塑"
+        p2 = points[1] if len(points) > 1 else "与读者日常能感知到的关联"
-        p3 = points[2] if len(points) > 2 else "内容质量会成为真正分水岭"
+        p3 = points[2] if len(points) > 2 else "原文可能提到的限制或尚未定论之处"
        return {
            "cause": (
-                f"从表面看是工具迭代，实质是生产逻辑变化。{p1}，意味着过去依赖经验的环节，正在被标准化流程替代。"
+                f"先把事情放在原文的语境里理解：{p1}。"
-                "谁先完成流程化改造，谁就更容易稳定产出。"
+                "这里侧重讲清楚「作者在说什么」，而不是替原文下结论。"
            ),
            "impact": (
-                f"短期影响体现在效率，中长期影响体现在品牌认知。{p2}。"
+                f"对大多数读者来说，更关心的是：这和自己有什么关系。{p2}。"
-                "如果只追求发布速度，内容会快速同质化；如果把洞察和表达打磨成体系，内容资产会持续增值。"
+                "若原帖偏专业，这里尽量用通俗说法转述，避免写成给决策层的公文。"
            ),
            "risk": (
-                f"最大的风险不是‘不用 AI’，而是‘只用 AI’。{p3}。"
+                f"任何公开讨论都有边界：{p3}。"
-                "没有事实校对与人工观点把关，文章容易出现空泛表达、错误引用和结论过度。"
+                "若话题仍在变化，结论可能更新，阅读时建议保留一点审慎，必要时回看原始出处。"
            ),
        }
    def _build_actions(self, points: list[str]) -> list[str]:
        anchor = points[0] if points else "核心观点"
        return [
            f"先确定本篇唯一主线：围绕“{anchor}”展开，删除与主线无关的段落。",
            "按“导语-观点-分析-建议-结语”五段式重排正文，每段只解决一个问题。",
            "为每个核心观点补一条可验证依据（数据、案例或公开来源），提升可信度。",
            "发布前做一次反 AI 味检查：删掉空话，替换为具体动作和明确对象。",
            "将高表现文章沉淀为模板，下次复用同样结构提高稳定性。",
        ]
    def _clean_source(self, text: str) -> str:
        src = (text or "").replace("\r\n", "\n").strip()
        src = re.sub(r"https?://\S+", "", src)
@@ -266,11 +446,11 @@ class AIRewriter:
    def _pick_key_points(self, sentences: list[str], limit: int) -> list[str]:
        points: list[str] = []
        templates = [
-            "核心变化：{}",
+            "值得关注：{}",
-            "关键问题：{}",
+            "背景要点：{}",
-            "方法调整：{}",
+            "原文强调：{}",
-            "结果反馈：{}",
+            "延伸信息：{}",
-            "结论启示：{}",
+            "阅读提示：{}",
        ]
        for s in sentences:
            if len(s) < 12:
@@ -309,18 +489,153 @@ class AIRewriter:
        raise ValueError("model output is not valid JSON")
-    def _call_model_json(self, user_prompt: str, timeout_sec: float) -> dict | None:
+    def _chat_completions_json(self, user_prompt: str, timeout_sec: float, request_id: str) -> dict | None:
        """chat.completions：通义兼容层在 json_object 下易产出极短 JSON，故 DashScope 不传 response_format，并支持短文自动重试。"""
        max_attempts = 2 if self._prefer_chat_first else 1
        deadline = time.monotonic() + max(0.0, timeout_sec)
        pe = user_prompt
        for attempt in range(max_attempts):
            if attempt == 1:
                pe = user_prompt + _JSON_BODY_TOO_SHORT_RETRY
            remaining = deadline - time.monotonic()
            if remaining <= 0:
                logger.warning(
                    "model_call_budget_exhausted rid=%s api=chat.completions attempt=%d/%d",
                    request_id or "-",
                    attempt + 1,
                    max_attempts,
                )
                return None
            try:
                logger.info(
                    "model_call_try rid=%s api=chat.completions.create attempt=%d/%d max_tokens=%d json_object=%s timeout_s=%.1f",
                    request_id or "-",
                    attempt + 1,
                    max_attempts,
                    settings.openai_max_output_tokens,
                    not self._prefer_chat_first,
                    remaining,
                )
                t0 = time.monotonic()
                create_kwargs: dict[str, Any] = {
                    "model": settings.openai_model,
                    "messages": [
                        {"role": "system", "content": SYSTEM_PROMPT},
                        {"role": "user", "content": pe},
                    ],
                    "max_tokens": settings.openai_max_output_tokens,
                    "temperature": 0.4,
                    "extra_body": {"enable_thinking": False},
                    "timeout": remaining,
                }
                # OpenAI 官方 API 在 json_object 下表现稳定；通义兼容模式若开启则常出现正文被压成一两百字。
                if not self._prefer_chat_first:
                    create_kwargs["response_format"] = {"type": "json_object"}
                completion = self._client.chat.completions.create(**create_kwargs)
            except Exception as exc:
                is_to = _is_likely_timeout_error(exc)
                logger.warning(
                    "model_call_fail rid=%s api=chat.completions attempt=%d/%d exc_type=%s exc=%s "
                    "is_likely_timeout=%s http_timeout_budget_s=%.1f openai_timeout_env_s=%.1f max_tokens=%d "
                    "hint=%s",
                    request_id or "-",
                    attempt + 1,
                    max_attempts,
                    type(exc).__name__,
                    exc,
                    is_to,
                    remaining,
                    settings.openai_timeout,
                    settings.openai_max_output_tokens,
                    (
                        "典型原因：单轮 HTTP 等待短于模型生成长文 JSON 所需时间；已取消错误的 30s 上限，"
                        "请确认 OPENAI_TIMEOUT>=120 并重启进程。"
                    )
                    if is_to and self._prefer_chat_first
                    else (
                        "若为超时：增大 OPENAI_TIMEOUT；否则检查 Key/模型名/网络。"
                        if is_to
                        else ""
                    ),
                )
                if self._prefer_chat_first:
                    return None
                raise
            choice = completion.choices[0] if completion.choices else None
            msg = (choice.message.content if choice else "") or ""
            fr = getattr(choice, "finish_reason", None) if choice else None
            usage = getattr(completion, "usage", None)
            udump = (
                usage.model_dump()
                if usage is not None and hasattr(usage, "model_dump")
                else usage
            )
            ms = (time.monotonic() - t0) * 1000
            logger.info(
                "model_call_ok rid=%s api=chat.completions attempt=%d duration_ms=%.1f output_chars=%d "
                "finish_reason=%s usage=%s preview=%s",
                request_id or "-",
                attempt + 1,
                ms,
                len(msg),
                fr,
                udump,
                _preview_for_log(msg, 380),
            )
            logger.debug(
                "model_call_raw rid=%s api=chat.completions attempt=%d body=%s",
                request_id or "-",
                attempt + 1,
                msg,
            )
            try:
                parsed = self._parse_response_json(msg)
            except Exception as exc:
                logger.warning(
                    "model_json_parse_fail rid=%s attempt=%d err=%s",
                    request_id or "-",
                    attempt + 1,
                    exc,
                )
                if not self._prefer_chat_first:
                    raise
                if attempt == max_attempts - 1:
                    return None
                continue
            raw_body = str(parsed.get("body_markdown", "")).strip()
            bl = len(raw_body)
            pc = len([p for p in re.split(r"\n\s*\n", raw_body) if p.strip()])
            if self._prefer_chat_first and attempt == 0 and (bl < 40 or pc < 3):
                logger.warning(
                    "model_body_retry rid=%s body_chars=%d paragraphs=%d reason=too_thin_or_not_segmented",
                    request_id or "-",
                    bl,
                    pc,
                )
                continue
            return parsed
        return None
    def _call_model_json(self, user_prompt: str, timeout_sec: float, request_id: str = "") -> dict | None:
        methods = ["chat", "responses"] if self._prefer_chat_first else ["responses", "chat"]
        logger.info(
-            "AI request start model=%s timeout=%.1fs prefer_chat_first=%s prompt_chars=%d",
+            "model_call_begin rid=%s model=%s timeout_s=%.1f prefer_chat_first=%s prompt_chars=%d "
            "try_order=%s",
            request_id or "-",
            settings.openai_model,
            timeout_sec,
            self._prefer_chat_first,
            len(user_prompt),
            methods,
        )
        methods = ["chat", "responses"] if self._prefer_chat_first else ["responses", "chat"]
        for method in methods:
            t0 = time.monotonic()
            if method == "responses":
                try:
                    logger.info("model_call_try rid=%s api=OpenAI.responses.create", request_id or "-")
                    completion = self._client.responses.create(
                        model=settings.openai_model,
                        input=[
@@ -331,35 +646,59 @@ class AIRewriter:
                        timeout=timeout_sec,
                    )
                    output_text = completion.output_text or ""
-                    logger.info("AI raw output (responses): %s", output_text)
+                    ms = (time.monotonic() - t0) * 1000
                    logger.info(
                        "model_call_ok rid=%s api=responses duration_ms=%.1f output_chars=%d preview=%s",
                        request_id or "-",
                        ms,
                        len(output_text),
                        _preview_for_log(output_text, 380),
                    )
                    logger.debug("model_call_raw rid=%s api=responses body=%s", request_id or "-", output_text)
                    return self._parse_response_json(output_text)
                except Exception as exc:
-                    logger.warning("responses API failed: %s", exc)
+                    logger.warning(
                        "model_call_fail rid=%s api=responses duration_ms=%.1f exc_type=%s exc=%s",
                        request_id or "-",
                        (time.monotonic() - t0) * 1000,
                        type(exc).__name__,
                        exc,
                    )
                    continue
            if method == "chat":
                try:
-                    completion = self._client.chat.completions.create(
+                    t_chat = time.monotonic()
-                        model=settings.openai_model,
+                    out = self._chat_completions_json(user_prompt, timeout_sec, request_id)
-                        messages=[
+                    if out is not None:
-                            {"role": "system", "content": SYSTEM_PROMPT},
+                        return out
                            {"role": "user", "content": user_prompt},
                        ],
                        response_format={"type": "json_object"},
                        max_tokens=1800,
                        temperature=0.4,
                        extra_body={"enable_thinking": False},
                        timeout=timeout_sec,
                    )
                    msg = completion.choices[0].message.content if completion.choices else ""
                    logger.info("AI raw output (chat.completions): %s", msg or "")
                    return self._parse_response_json(msg or "")
                except Exception as exc:
                    logger.warning("chat.completions API failed: %s", exc)
                    # DashScope compatibility path: don't spend extra time on responses fallback.
                    if self._prefer_chat_first:
                        logger.info(
                            "model_call_stop rid=%s reason=dashscope_chat_no_valid_json duration_ms=%.1f",
                            request_id or "-",
                            (time.monotonic() - t_chat) * 1000,
                        )
                        return None
                except Exception as exc:
                    logger.warning(
                        "model_call_fail rid=%s api=chat.completions duration_ms=%.1f exc_type=%s exc=%s",
                        request_id or "-",
                        (time.monotonic() - t0) * 1000,
                        type(exc).__name__,
                        exc,
                    )
                    if self._prefer_chat_first:
                        logger.info(
                            "model_call_stop rid=%s reason=dashscope_chat_exception",
                            request_id or "-",
                        )
                        return None
                    continue
        logger.error(
            "model_call_exhausted rid=%s methods_tried=%s result=none",
            request_id or "-",
            methods,
        )
        return None
    def _normalize_result(self, data: dict) -> dict:
@@ -370,49 +709,57 @@ class AIRewriter:
        if not title:
            title = "公众号改写稿"
        if not summary:
-            summary = shorten(re.sub(r"\s+", " ", body), width=110, placeholder="...")
+            summary = shorten(re.sub(r"\s+", " ", body), width=90, placeholder="...")
-        body = self._ensure_sections(body)
+        body = re.sub(r"(?m)^#{1,6}\s+[^\n]*\n?", "", body).strip()
        body = self._normalize_body_length(body)
        body = self._format_markdown(body)
        return {"title": title, "summary": summary, "body_markdown": body}
-    def _ensure_sections(self, body: str) -> str:
+    def _normalize_body_length(self, body: str) -> str:
        text = (body or "").strip()
        required = ["## 导语", "## 核心观点", "## 深度分析", "## 落地建议", "## 结语"]
        missing = [h for h in required if h not in text]
        if not text:
-            text = "## 导语\n\n内容生成失败，请重试。\n"
+            text = "（正文生成失败，请重试。）"
-        if missing:
+        if len(text) > MAX_BODY_CHARS:
-            # Light touch: append missing sections to keep publish structure stable.
+            text = text[: MAX_BODY_CHARS - 1] + "…"
            pads = "\n\n".join([f"{h}\n\n（待补充）" for h in missing])
            text = f"{text}\n\n{pads}"
        return text
-    def _quality_issues(self, req: RewriteRequest, source: str, normalized: dict) -> list[str]:
+    def _quality_issues(
        self, req: RewriteRequest, source: str, normalized: dict, lenient: bool = False
    ) -> list[str]:
        issues: list[str] = []
        title = normalized.get("title", "")
        summary = normalized.get("summary", "")
        body = normalized.get("body_markdown", "")
-        if len(title) < 8 or len(title) > 34:
+        min_title, max_title = (4, 30) if lenient else (6, 24)
-            issues.append("标题长度不理想（建议 8-34 字）")
+        if len(title) < min_title or len(title) > max_title:
            issues.append(f"标题长度不理想（建议 {min_title}-{max_title} 字，短标题即可）")
-        if len(summary) < 60:
+        min_summary, max_summary = (20, 100) if lenient else (25, 90)
-            issues.append("摘要过短，信息量不足")
+        if len(summary) < min_summary:
            issues.append("摘要过短")
        elif len(summary) > max_summary:
            issues.append(f"摘要过长（建议 ≤{max_summary} 字）")
-        headings = re.findall(r"(?m)^##\s+.+$", body)
+        paragraphs = [p.strip() for p in re.split(r"\n\s*\n", body) if p.strip()]
-        if len(headings) < 5:
+        pc = len(paragraphs)
-            issues.append("二级标题不足，结构不完整")
+        need_p = 4 if lenient else 5
        if pc < need_p:
            issues.append(f"正文需约 5 个自然段、空行分隔（当前 {pc} 段）")
        elif not lenient and pc > 6:
            issues.append(f"正文段落过多（当前 {pc} 段），请合并为 5 段左右")
-        paragraphs = [p.strip() for p in body.split("\n\n") if p.strip()]
+        if len(body) > MAX_BODY_CHARS:
-        if len(paragraphs) < 10:
+            issues.append(f"正文超过 {MAX_BODY_CHARS} 字（当前 {len(body)} 字），请压缩")
-            issues.append("正文段落偏少，展开不充分")
+        elif len(body) < MIN_BODY_CHARS:
            issues.append(f"正文过短（当前阈值 ≥{MIN_BODY_CHARS} 字）")
-        if len(body) < 900:
+        if re.search(r"(?m)^#+\s", body):
-            issues.append("正文过短，无法支撑公众号发布")
+            issues.append("正文请勿使用 # 标题符号，只用自然段")
-        if self._looks_like_raw_copy(source, body):
+        if self._looks_like_raw_copy(source, body, lenient=lenient):
            issues.append("改写与原文相似度过高，疑似未充分重写")
        if req.avoid_words:
@@ -428,7 +775,7 @@ class AIRewriter:
        return issues
-    def _looks_like_raw_copy(self, source: str, rewritten: str) -> bool:
+    def _looks_like_raw_copy(self, source: str, rewritten: str, lenient: bool = False) -> bool:
        src = re.sub(r"\s+", "", source or "")
        dst = re.sub(r"\s+", "", rewritten or "")
        if not src or not dst:
@@ -436,10 +783,20 @@ class AIRewriter:
        if dst in src or src in dst:
            return True
        ratio = difflib.SequenceMatcher(a=src[:3500], b=dst[:3500]).ratio()
-        return ratio >= 0.80
+        threshold = 0.88 if lenient else 0.80
        return ratio >= threshold
    def _model_output_usable(self, normalized: dict) -> bool:
        """模型 JSON 可解析且正文有实质内容时，允许软接受（不走模板保底）。"""
        body = (normalized.get("body_markdown") or "").strip()
        title = (normalized.get("title") or "").strip()
        if len(title) < 4 or len(body) < 40:
            return False
        if len(body) > MAX_BODY_CHARS + 80:
            return False
        return True
    def _format_markdown(self, text: str) -> str:
        body = text.replace("\r\n", "\n").strip()
        body = re.sub(r"\n{3,}", "\n\n", body)
        body = re.sub(r"(?m)^(#{1,3}\s[^\n]+)\n(?!\n)", r"\1\n\n", body)
        return body.strip() + "\n"
--- a/app/services/im.py
+++ b/app/services/im.py
@@ -3,20 +3,36 @@ from __future__ import annotations
 import hashlib
 import hmac
 import base64
 import logging
 import time
 from urllib.parse import quote_plus
 import httpx
 from urllib.parse import urlparse
 from app.config import settings
 from app.schemas import IMPublishRequest, PublishResponse
 logger = logging.getLogger(__name__)
 class IMPublisher:
-    async def publish(self, req: IMPublishRequest) -> PublishResponse:
+    async def publish(self, req: IMPublishRequest, request_id: str = "") -> PublishResponse:
        rid = request_id or "-"
        if not settings.im_webhook_url:
            logger.warning("im_skipped rid=%s reason=empty_webhook_url", rid)
            return PublishResponse(ok=False, detail="缺少 IM_WEBHOOK_URL 配置")
        parsed = urlparse(settings.im_webhook_url)
        host = parsed.netloc or "(invalid_url)"
        logger.info(
            "im_publish_start rid=%s webhook_host=%s sign_enabled=%s title_chars=%d body_truncated_to=3800",
            rid,
            host,
            bool(settings.im_secret),
            len(req.title or ""),
        )
        webhook = self._with_signature(settings.im_webhook_url, settings.im_secret)
        payload = {
            "msg_type": "post",
@@ -31,15 +47,25 @@ class IMPublisher:
        }
        async with httpx.AsyncClient(timeout=20) as client:
            logger.info("im_http_post rid=%s method=POST timeout_s=20", rid)
            r = await client.post(webhook, json=payload)
            try:
                data = r.json()
            except Exception:
                data = {"status_code": r.status_code, "text": r.text}
        logger.info(
            "im_http_response rid=%s status=%s body_preview=%s",
            rid,
            r.status_code,
            str(data)[:500],
        )
        if r.status_code >= 400:
            logger.warning("im_push_failed rid=%s http_status=%s", rid, r.status_code)
            return PublishResponse(ok=False, detail=f"IM 推送失败: {data}", data=data)
        logger.info("im_push_ok rid=%s", rid)
        return PublishResponse(ok=True, detail="IM 推送成功", data=data)
    def _with_signature(self, webhook: str, secret: str | None) -> str:
--- a/app/services/wechat.py
+++ b/app/services/wechat.py
@@ -1,5 +1,6 @@
 from __future__ import annotations
 import logging
 import time
 import httpx
@@ -8,21 +9,60 @@ import markdown2
 from app.config import settings
 from app.schemas import PublishResponse, WechatPublishRequest
 logger = logging.getLogger(__name__)
 def _detail_for_token_error(data: dict | None) -> str:
    """把微信返回的 errcode 转成可操作的说明。"""
    if not data:
        return "获取微信 access_token 失败（无返回内容）"
    code = data.get("errcode")
    msg = (data.get("errmsg") or "").strip()
    if code == 40164:
        return (
            "微信 errcode=40164：当前请求使用的出口 IP 未在公众号「IP 白名单」中。"
            "请到 微信公众平台 → 设置与开发 → 基本配置 → IP 白名单，添加本服务对外的公网 IP"
            "（日志里 invalid ip 后面的地址）。若在本地/Docker 调试，出口 IP 常会变，需填当前出口或改用固定出口的服务器。"
            f" 微信原文：{msg}"
        )
    if code == 40013:
        return f"微信 errcode=40013：AppSecret 无效或已重置，请检查 WECHAT_SECRET。{msg}"
    if code == 40125:
        return f"微信 errcode=40125：AppSecret 配置错误。{msg}"
    return f"获取微信 access_token 失败：errcode={code} errmsg={msg}"
 class WechatPublisher:
    def __init__(self) -> None:
        self._access_token = None
        self._expires_at = 0
-    async def publish_draft(self, req: WechatPublishRequest) -> PublishResponse:
+    async def publish_draft(self, req: WechatPublishRequest, request_id: str = "") -> PublishResponse:
        rid = request_id or "-"
        if not settings.wechat_appid or not settings.wechat_secret:
            logger.warning("wechat skipped rid=%s reason=missing_appid_or_secret", rid)
            return PublishResponse(ok=False, detail="缺少 WECHAT_APPID / WECHAT_SECRET 配置")
-        token = await self._get_access_token()
+        token, token_from_cache, token_err_body = await self._get_access_token()
        if not token:
-            return PublishResponse(ok=False, detail="获取微信 access_token 失败")
+            detail = _detail_for_token_error(token_err_body)
            logger.error("wechat access_token_unavailable rid=%s detail=%s", rid, detail[:200])
            return PublishResponse(ok=False, detail=detail, data=token_err_body)
        logger.info(
            "wechat_token rid=%s cache_hit=%s",
            rid,
            token_from_cache,
        )
        html = markdown2.markdown(req.body_markdown)
        logger.info(
            "wechat_draft_build rid=%s title_chars=%d digest_chars=%d html_chars=%d",
            rid,
            len(req.title or ""),
            len(req.summary or ""),
            len(html or ""),
        )
        payload = {
            "articles": [
                {
@@ -39,19 +79,37 @@ class WechatPublisher:
        async with httpx.AsyncClient(timeout=25) as client:
            url = f"https://api.weixin.qq.com/cgi-bin/draft/add?access_token={token}"
            logger.info(
                "wechat_http_post rid=%s endpoint=cgi-bin/draft/add http_timeout_s=25",
                rid,
            )
            r = await client.post(url, json=payload)
            data = r.json()
        if data.get("errcode", 0) != 0:
            logger.warning(
                "wechat_draft_failed rid=%s errcode=%s errmsg=%s raw=%s",
                rid,
                data.get("errcode"),
                data.get("errmsg"),
                data,
            )
            return PublishResponse(ok=False, detail=f"微信发布失败: {data}", data=data)
        logger.info(
            "wechat_draft_ok rid=%s media_id=%s",
            rid,
            data.get("media_id", data),
        )
        return PublishResponse(ok=True, detail="已发布到公众号草稿箱", data=data)
-    async def _get_access_token(self) -> str | None:
+    async def _get_access_token(self) -> tuple[str | None, bool, dict | None]:
        """成功时第三项为 None；失败时为微信返回的 JSON（含 errcode/errmsg）。"""
        now = int(time.time())
        if self._access_token and now < self._expires_at - 60:
-            return self._access_token
+            return self._access_token, True, None
        logger.info("wechat_http_get endpoint=cgi-bin/token reason=refresh_access_token")
        async with httpx.AsyncClient(timeout=20) as client:
            r = await client.get(
                "https://api.weixin.qq.com/cgi-bin/token",
@@ -61,12 +119,17 @@ class WechatPublisher:
                    "secret": settings.wechat_secret,
                },
            )
-            data = r.json()
+            data = r.json() if r.content else {}
        token = data.get("access_token")
        if not token:
-            return None
+            logger.warning(
                "wechat_token_refresh_failed http_status=%s body=%s",
                r.status_code,
                data,
            )
            return None, False, data if isinstance(data, dict) else None
        self._access_token = token
        self._expires_at = now + int(data.get("expires_in", 7200))
-        return token
+        return token, False, None
--- a/app/static/app.js
+++ b/app/static/app.js
@@ -1,5 +1,16 @@
 const $ = (id) => document.getElementById(id);
 function renderBodyPreview() {
  const raw = ($("body") && $("body").value) || "";
  const el = $("bodyPreview");
  if (!el) return;
  if (typeof marked !== "undefined" && marked.parse) {
    el.innerHTML = marked.parse(raw, { breaks: true });
  } else {
    el.textContent = raw;
  }
 }
 const statusEl = $("status");
 const rewriteBtn = $("rewriteBtn");
 const wechatBtn = $("wechatBtn");
@@ -13,6 +24,7 @@ function updateCounters() {
  $("sourceCount").textContent = `${countText($("sourceText").value)} 字`;
  $("summaryCount").textContent = `${countText($("summary").value)} 字`;
  $("bodyCount").textContent = `${countText($("body").value)} 字`;
  renderBodyPreview();
 }
 function setLoading(button, loading, idleText, loadingText) {
@@ -34,9 +46,36 @@ async function postJSON(url, body) {
  });
  const data = await res.json();
  if (!res.ok) throw new Error(data.detail || "请求失败");
  data._requestId = res.headers.get("X-Request-ID") || "";
  return data;
 }
 function renderTrace(trace, headerRid) {
  const wrap = $("traceWrap");
  const pre = $("traceJson");
  const badge = $("traceBadge");
  if (!pre || !wrap) return;
  if (!trace || Object.keys(trace).length === 0) {
    pre.textContent = headerRid
      ? JSON.stringify({ request_id: headerRid, note: "响应中无 trace 字段" }, null, 2)
      : "（尚无数据）完成一次「AI 改写」后，这里会显示请求 ID、耗时、质检与降级原因。";
    if (badge) badge.textContent = "";
    return;
  }
  const merged = { ...trace };
  if (headerRid && !merged.request_id) merged.request_id = headerRid;
  pre.textContent = JSON.stringify(merged, null, 2);
  const mode = merged.mode || "";
  if (badge) {
    badge.textContent = mode === "ai" ? "AI" : mode === "fallback" ? "保底" : "";
    badge.className = "trace-badge " + (mode === "ai" ? "is-ai" : mode === "fallback" ? "is-fallback" : "");
  }
  wrap.open = true;
 }
 $("rewriteBtn").addEventListener("click", async () => {
  const sourceText = $("sourceText").value.trim();
  if (sourceText.length < 20) {
@@ -59,11 +98,22 @@ $("rewriteBtn").addEventListener("click", async () => {
    $("summary").value = data.summary || "";
    $("body").value = data.body_markdown || "";
    updateCounters();
    renderTrace(data.trace, data._requestId);
    const tr = data.trace || {};
    const modelLine = tr.model ? `模型 ${tr.model}` : "";
    if (data.mode === "fallback") {
      const note = (data.quality_notes || [])[0] || "当前为保底改写稿";
-      setStatus(`改写完成（保底模式）：${note}`, true);
+      setStatus(
        `改写完成（保底模式，未使用或未通过千问长文）：${note}${modelLine ? ` · ${modelLine}` : ""}`,
        true
      );
    } else if (tr.quality_soft_accept) {
      setStatus(
        `改写完成（AI，质检提示）：${(data.quality_notes || []).join("；") || "见 quality_notes"} · ${modelLine || "AI"}`
      );
      statusEl.style.color = "#9a3412";
    } else {
-      setStatus("改写完成，可直接发布。");
+      setStatus(`改写完成（AI 洗稿）${modelLine ? ` · ${modelLine}` : ""}`);
    }
  } catch (e) {
    setStatus(`改写失败: ${e.message}`, true);
@@ -111,4 +161,32 @@ $("imBtn").addEventListener("click", async () => {
  $(id).addEventListener("input", updateCounters);
 });
 async function loadBackendConfig() {
  const el = $("backendConfig");
  if (!el) return;
  try {
    const res = await fetch("/api/config");
    const c = await res.json();
    if (!c.openai_configured) {
      el.textContent =
        "后端未配置 OPENAI_API_KEY：改写将使用本地保底稿，千问不会参与。请在 .env 中配置并重启容器。";
      el.style.color = "#b42318";
      return;
    }
    const name =
      c.provider === "dashscope"
        ? "通义千问（DashScope 兼容接口）"
        : "OpenAI 兼容接口";
    const host = c.base_url_host ? ` · ${c.base_url_host}` : "";
    const to = c.openai_timeout_sec != null ? ` · 单轮最长等待 ${c.openai_timeout_sec}s` : "";
    el.textContent = `已接入：${c.openai_model} · ${name}${host}${to}`;
    el.style.color = "";
  } catch (e) {
    el.textContent = "无法读取 /api/config（请确认服务已启动）";
    el.style.color = "#b42318";
  }
 }
 loadBackendConfig();
 updateCounters();
 renderTrace(null, "");
--- a/app/static/style.css
+++ b/app/static/style.css
@@ -38,6 +38,11 @@ body {
  margin: 6px 0 0;
 }
 .backend-config {
  margin: 8px 0 0;
  line-height: 1.5;
 }
 .badge {
  font-size: 12px;
  font-weight: 700;
@@ -161,11 +166,130 @@ button:disabled {
  font-weight: 600;
 }
 .small {
  font-size: 13px;
  margin: 0 0 12px;
 }
 .flow-hint {
  margin: 0 0 14px 18px;
  padding: 0;
  font-size: 13px;
  line-height: 1.6;
 }
 .trace-wrap {
  margin-top: 12px;
  padding: 10px 12px;
  border: 1px dashed var(--line);
  border-radius: 10px;
  background: #f9fbf9;
 }
 .trace-wrap summary {
  cursor: pointer;
  font-weight: 700;
  color: var(--text);
 }
 .trace-badge {
  margin-left: 8px;
  font-size: 11px;
  padding: 2px 8px;
  border-radius: 999px;
  font-weight: 700;
 }
 .trace-badge.is-ai {
  background: #eaf7f0;
  color: #0f5f3d;
  border: 1px solid #cde6d7;
 }
 .trace-badge.is-fallback {
  background: #fff4e6;
  color: #9a3412;
  border: 1px solid #fed7aa;
 }
 .body-split {
  display: grid;
  grid-template-columns: 1fr 1fr;
  gap: 12px;
  align-items: stretch;
 }
 .body-split textarea {
  min-height: 280px;
 }
 .preview-panel {
  display: flex;
  flex-direction: column;
  min-width: 0;
 }
 .markdown-preview {
  flex: 1;
  min-height: 280px;
  max-height: 480px;
  overflow: auto;
  padding: 12px 14px;
  border: 1px solid var(--line);
  border-radius: 10px;
  background: #fafcfb;
  font-size: 14px;
  line-height: 1.65;
 }
 .markdown-preview h2 {
  font-size: 1.15rem;
  margin: 1em 0 0.5em;
  color: var(--accent-2);
 }
 .markdown-preview h3 {
  font-size: 1.05rem;
  margin: 0.9em 0 0.4em;
 }
 .markdown-preview p {
  margin: 0.5em 0;
 }
 .markdown-preview ul,
 .markdown-preview ol {
  margin: 0.4em 0 0.6em 1.2em;
  padding: 0;
 }
 .markdown-preview li {
  margin: 0.25em 0;
 }
 .trace-json {
  margin: 10px 0 0;
  padding: 10px;
  max-height: 220px;
  overflow: auto;
  font-size: 11px;
  line-height: 1.45;
  background: #fff;
  border-radius: 8px;
  border: 1px solid var(--line);
  white-space: pre-wrap;
  word-break: break-word;
 }
@media (max-width: 960px) {
  .layout {
    grid-template-columns: 1fr;
  }
  .body-split {
    grid-template-columns: 1fr;
  }
  .topbar {
    align-items: flex-start;
    gap: 8px;
--- a/app/templates/index.html
+++ b/app/templates/index.html
@@ -10,7 +10,8 @@
    <header class="topbar">
      <div class="brand">
        <h1>{{ app_name }}</h1>
-        <p class="muted">把 X 长文快速改写为公众号可发布稿，并一键分发到公众号/IM。</p>
+        <p class="muted">粘贴原文 → 洗成约 <strong>5 段、500 字内</strong> 的短文（无小标题）→ 右侧预览 → 满意后发布。</p>
        <p id="backendConfig" class="backend-config muted small" aria-live="polite"></p>
      </div>
      <div class="badge">Beta</div>
    </header>
@@ -18,12 +19,18 @@
    <main class="layout">
      <section class="panel input-panel">
        <h2>输入与改写策略</h2>
        <ol class="flow-hint muted">
          <li>粘贴原文并设置语气/读者</li>
          <li>点击改写 → 右侧为短标题、摘要与<strong>五段正文</strong>（段落间空一行）</li>
          <li>看「运行追踪」：<strong>模式为 AI</strong> 且模型名正确，即千问/接口已生效</li>
          <li>人工改好后 →「发布到公众号草稿箱」（需配置 WECHAT_*）</li>
        </ol>
        <div class="field-head">
          <label>原始内容</label>
          <span id="sourceCount" class="meta">0 字</span>
        </div>
-        <textarea id="sourceText" rows="14" placeholder="粘贴 X 长文/线程内容..."></textarea>
+        <textarea id="sourceText" rows="14" placeholder="粘贴原文（长帖、线程、摘录均可），洗稿会围绕原文主题展开…"></textarea>
        <div class="grid2">
          <div>
@@ -56,6 +63,7 @@
      <section class="panel output-panel">
        <h2>发布内容</h2>
        <p class="muted small">下方「运行追踪」会显示本次请求 ID、耗时、质检项与是否降级，便于与容器日志对照。</p>
        <label>标题</label>
        <input id="title" type="text" />
@@ -67,10 +75,24 @@
        <textarea id="summary" rows="3"></textarea>
        <div class="field-head">
-          <label>Markdown 正文</label>
+          <label>正文（5 自然段，建议 ≤500 字）</label>
          <span id="bodyCount" class="meta">0 字</span>
        </div>
-        <textarea id="body" rows="16"></textarea>
+        <div class="body-split">
          <textarea id="body" rows="10" placeholder="五段之间空一行；无需 # 标题"></textarea>
          <div class="preview-panel">
            <div class="field-head">
              <label>排版预览</label>
              <span class="meta">与公众号 HTML 渲染接近</span>
            </div>
            <div id="bodyPreview" class="markdown-preview"></div>
          </div>
        </div>
        <details id="traceWrap" class="trace-wrap">
          <summary>运行追踪 <span id="traceBadge" class="trace-badge"></span></summary>
          <pre id="traceJson" class="trace-json"></pre>
        </details>
        <div class="actions">
          <button id="wechatBtn">发布到公众号草稿箱</button>
@@ -79,6 +101,7 @@
      </section>
    </main>
    <script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"></script>
    <script src="/static/app.js"></script>
  </body>
 </html>
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -1,6 +1,12 @@
 services:
  x2wechat:
-    build: .
+    build:
      context: .
      args:
        # 海外 PyPI 可改为 https://pypi.org/simple
        PIP_INDEX_URL: https://pypi.tuna.tsinghua.edu.cn/simple
        # 拉 python 镜像慢时取消下一行注释（DaoCloud 同步 Docker Hub）
        # PY_BASE: docker.m.daocloud.io/library/python:3.11-slim
    container_name: x2wechat-studio
    ports:
      - "18000:8000"