This commit is contained in:
Daniel
2026-04-06 14:20:53 +08:00
parent babf24a0b0
commit 1d389767e6
14 changed files with 1079 additions and 179 deletions

View File

@@ -1,6 +1,20 @@
# —— 通义千问(推荐):阿里云 DashScope 的 OpenAI 兼容地址 + 模型名 + API Key
# OPENAI_BASE_URL=https://dashscope.aliyuncs.com/compatible-mode/v1
# OPENAI_API_KEY=sk-你的DashScopeKey
# OPENAI_MODEL=qwen3.5-plus
OPENAI_API_KEY=
OPENAI_BASE_URL=
OPENAI_MODEL=gpt-4.1-mini
# 通义长文 JSON 常需 60~120s+,过短会 APITimeout 后走兜底
OPENAI_TIMEOUT=120
# SDK 自动重试次数。设为 0 可避免单次请求被隐式重试拖长(例如 30s 变 60s+
OPENAI_MAX_RETRIES=0
# 长文 JSON 建议 8192通义等若正文仍偏短可适当再加大
OPENAI_MAX_OUTPUT_TOKENS=8192
OPENAI_SOURCE_MAX_CHARS=5000
# 质检未通过时仍返回模型洗稿正文quality_notes 记录问题);设为 false 则严格退回保底稿
AI_SOFT_ACCEPT=true
LOG_LEVEL=INFO
WECHAT_APPID=
WECHAT_SECRET=

View File

@@ -1,9 +1,20 @@
FROM python:3.11-slim
# syntax=docker/dockerfile:1
# 国内拉基础镜像慢时docker compose build --build-arg PY_BASE=docker.m.daocloud.io/library/python:3.11-slim
ARG PY_BASE=python:3.11-slim
FROM ${PY_BASE}
WORKDIR /app
# 默认清华 PyPI海外可docker compose build --build-arg PIP_INDEX_URL=https://pypi.org/simple
ARG PIP_INDEX_URL=https://pypi.tuna.tsinghua.edu.cn/simple
ENV PIP_INDEX_URL=${PIP_INDEX_URL} \
PIP_DISABLE_PIP_VERSION_CHECK=1 \
PIP_DEFAULT_TIMEOUT=120
COPY requirements.txt ./
RUN pip install --no-cache-dir -r requirements.txt
# BuildKit 缓存加速重复构建;需 Docker 20.10+compose 默认开 BuildKit
RUN --mount=type=cache,target=/root/.cache/pip \
pip install -r requirements.txt
COPY . .

View File

@@ -9,8 +9,27 @@ class Settings(BaseSettings):
openai_api_key: str | None = Field(default=None, alias="OPENAI_API_KEY")
openai_base_url: str | None = Field(default=None, alias="OPENAI_BASE_URL")
openai_model: str = Field(default="gpt-4.1-mini", alias="OPENAI_MODEL")
openai_timeout: float = Field(default=60.0, alias="OPENAI_TIMEOUT")
openai_timeout: float = Field(
default=120.0,
alias="OPENAI_TIMEOUT",
description="HTTP 等待模型单轮响应的最长时间(秒)。通义长文 JSON 建议 120~180",
)
openai_max_retries: int = Field(
default=0,
alias="OPENAI_MAX_RETRIES",
description="OpenAI SDK 自动重试次数。为避免单次请求被重试拉长,默认 0。",
)
openai_max_output_tokens: int = Field(
default=8192,
alias="OPENAI_MAX_OUTPUT_TOKENS",
description="单次模型输出 token 上限;通义等长文 JSON 需足够大",
)
openai_source_max_chars: int = Field(default=5000, alias="OPENAI_SOURCE_MAX_CHARS")
ai_soft_accept: bool = Field(
default=True,
alias="AI_SOFT_ACCEPT",
description="质检未通过时仍输出模型洗稿正文mode=aiquality_notes 记录问题),仅模型完全失败时用保底稿",
)
wechat_appid: str | None = Field(default=None, alias="WECHAT_APPID")
wechat_secret: str | None = Field(default=None, alias="WECHAT_SECRET")

25
app/logging_setup.py Normal file
View File

@@ -0,0 +1,25 @@
"""统一日志格式,便于 grep / 日志平台解析。"""
from __future__ import annotations
import logging
import os
def configure_logging() -> None:
level_name = os.getenv("LOG_LEVEL", "INFO").upper()
level = getattr(logging, level_name, logging.INFO)
fmt = "%(asctime)s | %(levelname)s | %(name)s | %(message)s"
datefmt = "%Y-%m-%d %H:%M:%S"
root = logging.getLogger()
if not root.handlers:
handler = logging.StreamHandler()
handler.setFormatter(logging.Formatter(fmt=fmt, datefmt=datefmt))
root.addHandler(handler)
root.setLevel(level)
# 降噪:第三方库默认 WARNING
logging.getLogger("httpx").setLevel(logging.WARNING)
logging.getLogger("httpcore").setLevel(logging.WARNING)
logging.getLogger("openai").setLevel(logging.WARNING)

View File

@@ -1,17 +1,38 @@
from __future__ import annotations
import logging
from urllib.parse import urlparse
from fastapi import FastAPI, Request
from fastapi.responses import HTMLResponse
from fastapi.staticfiles import StaticFiles
from fastapi.templating import Jinja2Templates
from app.config import settings
from app.logging_setup import configure_logging
from app.middleware import RequestContextMiddleware
from app.schemas import IMPublishRequest, RewriteRequest, WechatPublishRequest
from app.services.ai_rewriter import AIRewriter
from app.services.im import IMPublisher
from app.services.wechat import WechatPublisher
configure_logging()
logger = logging.getLogger(__name__)
app = FastAPI(title=settings.app_name)
@app.on_event("startup")
async def _log_startup() -> None:
logger.info(
"app_start name=%s openai_configured=%s ai_soft_accept=%s",
settings.app_name,
bool(settings.openai_api_key),
settings.ai_soft_accept,
)
app.add_middleware(RequestContextMiddleware)
app.mount("/static", StaticFiles(directory="app/static"), name="static")
templates = Jinja2Templates(directory="app/templates")
@@ -25,16 +46,82 @@ async def index(request: Request):
return templates.TemplateResponse("index.html", {"request": request, "app_name": settings.app_name})
@app.get("/api/config")
async def api_config():
"""供页面展示:当前是否接入模型、模型名、提供方(不含密钥)。"""
base = settings.openai_base_url or ""
provider = "dashscope" if "dashscope.aliyuncs.com" in base else "openai_compatible"
host = urlparse(base).netloc if base else ""
return {
"openai_configured": bool(settings.openai_api_key),
"openai_model": settings.openai_model,
"provider": provider,
"base_url_host": host or None,
"openai_timeout_sec": settings.openai_timeout,
"openai_max_output_tokens": settings.openai_max_output_tokens,
}
@app.post("/api/rewrite")
async def rewrite(req: RewriteRequest):
return rewriter.rewrite(req)
async def rewrite(req: RewriteRequest, request: Request):
rid = getattr(request.state, "request_id", "")
src = req.source_text or ""
logger.info(
"api_rewrite_in rid=%s source_chars=%d title_hint_chars=%d tone=%s audience=%s "
"keep_points_chars=%d avoid_words_chars=%d",
rid,
len(src),
len(req.title_hint or ""),
req.tone,
req.audience,
len(req.keep_points or ""),
len(req.avoid_words or ""),
)
result = rewriter.rewrite(req, request_id=rid)
tr = result.trace or {}
logger.info(
"api_rewrite_out rid=%s mode=%s duration_ms=%s quality_notes=%d trace_steps=%s soft_accept=%s",
rid,
result.mode,
tr.get("duration_ms"),
len(result.quality_notes or []),
len((tr.get("steps") or [])),
tr.get("quality_soft_accept"),
)
return result
@app.post("/api/publish/wechat")
async def publish_wechat(req: WechatPublishRequest):
return await wechat.publish_draft(req)
async def publish_wechat(req: WechatPublishRequest, request: Request):
rid = getattr(request.state, "request_id", "")
logger.info(
"api_wechat_in rid=%s title_chars=%d summary_chars=%d body_md_chars=%d author_set=%s",
rid,
len(req.title or ""),
len(req.summary or ""),
len(req.body_markdown or ""),
bool((req.author or "").strip()),
)
out = await wechat.publish_draft(req, request_id=rid)
logger.info(
"api_wechat_out rid=%s ok=%s detail=%s errcode=%s",
rid,
out.ok,
(out.detail or "")[:120],
(out.data or {}).get("errcode") if isinstance(out.data, dict) else None,
)
return out
@app.post("/api/publish/im")
async def publish_im(req: IMPublishRequest):
return await im.publish(req)
async def publish_im(req: IMPublishRequest, request: Request):
rid = getattr(request.state, "request_id", "")
logger.info(
"api_im_in rid=%s title_chars=%d body_md_chars=%d",
rid,
len(req.title or ""),
len(req.body_markdown or ""),
)
out = await im.publish(req, request_id=rid)
logger.info("api_im_out rid=%s ok=%s detail=%s", rid, out.ok, (out.detail or "")[:120])
return out

61
app/middleware.py Normal file
View File

@@ -0,0 +1,61 @@
from __future__ import annotations
import logging
import time
import uuid
from starlette.middleware.base import BaseHTTPMiddleware
from starlette.requests import Request
from starlette.responses import Response
logger = logging.getLogger("app.http")
class RequestContextMiddleware(BaseHTTPMiddleware):
"""注入 request_id记录访问日志与耗时。"""
async def dispatch(self, request: Request, call_next) -> Response:
rid = request.headers.get("X-Request-ID") or str(uuid.uuid4())
request.state.request_id = rid
path = request.url.path
if path.startswith("/static"):
response = await call_next(request)
response.headers["X-Request-ID"] = rid
return response
client = request.client.host if request.client else "-"
if path.startswith("/api"):
logger.info(
"http_in method=%s path=%s rid=%s client=%s",
request.method,
path,
rid,
client,
)
started = time.perf_counter()
try:
response = await call_next(request)
except Exception:
duration_ms = (time.perf_counter() - started) * 1000
logger.exception(
"http_error method=%s path=%s duration_ms=%.1f rid=%s",
request.method,
path,
duration_ms,
rid,
)
raise
duration_ms = (time.perf_counter() - started) * 1000
response.headers["X-Request-ID"] = rid
logger.info(
"http_out method=%s path=%s status=%s duration_ms=%.1f rid=%s",
request.method,
path,
response.status_code,
duration_ms,
rid,
)
return response

View File

@@ -1,3 +1,5 @@
from typing import Any
from pydantic import BaseModel, Field
@@ -16,6 +18,10 @@ class RewriteResponse(BaseModel):
body_markdown: str
mode: str = "ai"
quality_notes: list[str] = []
trace: dict[str, Any] | None = Field(
default=None,
description="改写链路追踪:请求 ID、耗时、模型、质检与降级原因便于监测与回溯",
)
class WechatPublishRequest(BaseModel):

View File

@@ -5,7 +5,9 @@ import json
import logging
import re
import time
from typing import Any
from textwrap import shorten
from urllib.parse import urlparse
from openai import OpenAI
@@ -15,47 +17,72 @@ from app.schemas import RewriteRequest, RewriteResponse
logger = logging.getLogger(__name__)
def _api_host(url: str | None) -> str:
if not url:
return ""
try:
return urlparse(url).netloc or ""
except Exception:
return ""
def _is_likely_timeout_error(exc: BaseException) -> bool:
n = type(exc).__name__.lower()
if "timeout" in n:
return True
s = str(exc).lower()
return "timed out" in s or "timeout" in s
# 短文洗稿5 个自然段、正文总字数上限(含标点)
MAX_BODY_CHARS = 500
MIN_BODY_CHARS = 80
def _preview_for_log(text: str, limit: int = 400) -> str:
t = (text or "").replace("\r\n", "\n").replace("\n", " ").strip()
if len(t) <= limit:
return t
return t[: limit - 1] + ""
SYSTEM_PROMPT = """
你是顶级中文公众号编,擅长把 X/Twitter 的观点型内容改写成高质量公众号文章
你的目标不是“同义替换”,而是“重构表达”,保证可读性、逻辑性和可发布性
你是资深中文科普类公众号编,擅长把长文、线程贴改写成**极短、好读**的推送
目标:在**不偏离原意**的前提下,用最少字数讲清一件事;不要写成技术方案、长文大纲或带很多小标题的文章
硬性规则:
1) 保留核心事实与关键观点,不编造数据,不夸大结论
2) 文章结构必须完整:导语 -> 核心观点 -> 深度分析 -> 落地建议 -> 结语
3) 风格自然,避免 AI 套话(如“首先其次最后”“赋能”“闭环”等空话)
4) 每节都要有信息增量,不要重复原文句式
5) 输出必须是合法 JSON字段title, summary, body_markdown。
1) **忠实原意**:只概括、转述原文已有信息,不编造事实,不偷换主题
2) 语气通俗、干脆,避免套话堆砌
3) 只输出合法 JSONtitle, summary, body_markdown
4) **body_markdown 约束**:恰好 **5 个自然段**;段与段之间用一个空行分隔;**不要**使用 # / ## 标题符号;全文(正文)总字数 **不超过 500 字**(含标点)
5) titlesummary 也要短:标题约 818 字;摘要约 4080 字;
6) JSON 字符串内引号请用「」或『』,勿用未转义的英文 "
""".strip()
REWRITE_SCHEMA_HINT = """
请输出 JSON
请输出 JSON(勿包在 ``` 里),例如
{
"title": "20字内中文标题明确价值点",
"summary": "80-120字中文摘要说明读者收获",
"body_markdown": "完整Markdown正文"
"title": "短标题,点明主题",
"summary": "一句话到两句话摘要",
"body_markdown": "第一段内容…\\n\\n第二段…\\n\\n第三段…\\n\\n第四段…\\n\\n第五段…"
}
正文格式要求(必须遵循)
## 导语
2-3段交代背景、冲突与阅读价值。
body_markdown 写法
- 必须且只能有 **5 段**:每段若干完整句子,段之间 **\\n\\n**(空一行);
- **禁止** markdown 标题(不要用 #
- 正文总长 **≤500 字**,宁可短而清楚,不要写满废话;
- 内容顺序建议:第 1 段交代在说什么;中间 3 段展开关键信息;最后 1 段收束或提醒(均须紧扣原文,勿乱发挥)。
""".strip()
## 核心观点
- 3~5条要点每条是完整信息句不要口号。
# 通义等模型若首次过短/结构不对,再要一次
_JSON_BODY_TOO_SHORT_RETRY = """
## 深度分析
### 1) 现象背后的原因
2-3段
### 2) 对行业/团队的影响
2-3段
### 3) 关键风险与边界
2-3段
## 落地建议
1. 三到五条可执行动作,尽量包含“谁在什么场景做什么”。
## 结语
1段收束观点并给出下一步建议。
【系统复检】上一次 body_markdown 不符合要求。请重输出**完整** JSON
- 正文必须 **恰好 5 个自然段**(仅 \\n\\n 分段),无 # 标题,总字数 **≤500 字**
- 忠实原稿、简短高效;
- 引号只用「」『』;
- 只输出 JSON。
""".strip()
@@ -70,47 +97,198 @@ class AIRewriter:
api_key=settings.openai_api_key,
base_url=settings.openai_base_url,
timeout=settings.openai_timeout,
max_retries=1,
max_retries=max(0, int(settings.openai_max_retries)),
)
logger.info(
"AIRewriter_init model=%s api_host=%s prefer_chat_first=%s timeout_s=%s max_retries=%s",
settings.openai_model,
_api_host(settings.openai_base_url) or "(default)",
self._prefer_chat_first,
settings.openai_timeout,
settings.openai_max_retries,
)
else:
logger.warning("AIRewriter_init openai_key_missing=1 rewrite_will_use_fallback_only=1")
def rewrite(self, req: RewriteRequest) -> RewriteResponse:
def rewrite(self, req: RewriteRequest, request_id: str = "") -> RewriteResponse:
cleaned_source = self._clean_source(req.source_text)
started = time.monotonic()
trace: dict[str, Any] = {
"request_id": request_id or None,
"model": settings.openai_model,
"provider": "dashscope" if self._prefer_chat_first else "openai_compatible",
"source_chars_in": len(req.source_text or ""),
"cleaned_chars": len(cleaned_source),
"openai_timeout_env_sec": settings.openai_timeout,
"steps": [],
}
def _step(name: str, **extra: Any) -> None:
elapsed_ms = round((time.monotonic() - started) * 1000, 1)
trace["steps"].append({"name": name, "elapsed_ms": elapsed_ms, **extra})
extra_fmt = ""
if extra:
parts: list[str] = []
for k, v in extra.items():
s = repr(v)
if len(s) > 200:
s = s[:197] + "..."
parts.append(f"{k}={s}")
extra_fmt = " " + " ".join(parts)
logger.info(
"rewrite_step rid=%s step=%s elapsed_ms=%s%s",
request_id or "-",
name,
elapsed_ms,
extra_fmt,
)
raw_in = (req.source_text or "").replace("\r\n", "\n").strip()
_step("clean_source", truncated=len(cleaned_source) < len(raw_in))
logger.info(
"rewrite_enter rid=%s model=%s client_ok=%s prefer_chat_first=%s "
"source_chars=%d cleaned_chars=%d ai_soft_accept=%s",
request_id or "-",
settings.openai_model,
bool(self._client),
self._prefer_chat_first,
trace["source_chars_in"],
len(cleaned_source),
settings.ai_soft_accept,
)
# Primary: model rewrite + quality gate + optional second-pass polish.
if self._client:
# DashScope/Qwen works better with a single stable call.
# 通义长文 JSON 常需 40~90s+。旧代码错误地将首轮 cap 在 30s → APITimeoutError → 仅走兜底。
if self._prefer_chat_first:
first_pass_timeout = max(18.0, min(30.0, settings.openai_timeout))
first_pass_timeout = max(45.0, min(300.0, float(settings.openai_timeout)))
else:
first_pass_timeout = max(20.0, min(50.0, settings.openai_timeout))
draft = self._model_rewrite(req, cleaned_source, timeout_sec=first_pass_timeout)
first_pass_timeout = max(20.0, min(120.0, float(settings.openai_timeout)))
trace["first_pass_http_timeout_sec"] = round(first_pass_timeout, 1)
logger.info(
"rewrite_model_first_pass rid=%s first_pass_http_timeout_s=%.1f openai_timeout_env_s=%.1f "
"lenient_qa=%s note=dashscope_uses_full_openai_timeout_not_capped_30",
request_id or "-",
first_pass_timeout,
settings.openai_timeout,
self._prefer_chat_first,
)
t0 = time.monotonic()
draft = self._model_rewrite(req, cleaned_source, timeout_sec=first_pass_timeout, request_id=request_id)
_step(
"model_first_pass",
duration_ms=round((time.monotonic() - t0) * 1000, 1),
ok=bool(draft),
timeout_sec=first_pass_timeout,
)
if not draft:
trace["quality_issues_final"] = ["模型未返回有效 JSON 或请求超时"]
trace["model_unavailable_hint"] = (
"排查:① 日志是否 APITimeoutError → 提高 OPENAI_TIMEOUT通义建议 120~180并确认 "
"first_pass_http_timeout_sec 与 trace.openai_timeout_env_sec 一致;② 网络到 "
"dashscope.aliyuncs.com③ 见 model_call_fail 的 is_likely_timeout。"
)
_step("model_first_pass_failed", detail="timeout_or_invalid_json")
if draft:
normalized = self._normalize_result(draft)
issues = self._quality_issues(req, cleaned_source, normalized)
issues = self._quality_issues(
req, cleaned_source, normalized, lenient=self._prefer_chat_first
)
trace["quality_issues_first"] = issues
logger.info(
"rewrite quality check rid=%s first_issues=%s body_chars=%d",
request_id,
issues,
len(normalized.get("body_markdown", "") or ""),
)
elapsed = time.monotonic() - started
remaining_budget = max(0.0, (first_pass_timeout + 20.0) - elapsed)
if issues and (not self._prefer_chat_first) and remaining_budget >= 10.0:
remaining_budget = max(0.0, (first_pass_timeout + 25.0) - elapsed)
polish_budget = min(22.0, remaining_budget) if self._prefer_chat_first else min(30.0, remaining_budget)
if issues and not (
remaining_budget >= 8.0 and polish_budget >= 6.0
):
logger.info(
"rewrite_polish_skipped rid=%s first_issues=%d remaining_budget_s=%.1f polish_budget_s=%.1f",
request_id or "-",
len(issues),
remaining_budget,
polish_budget,
)
if issues and remaining_budget >= 8.0 and polish_budget >= 6.0:
t1 = time.monotonic()
polished = self._model_polish(
req,
cleaned_source,
normalized,
issues,
timeout_sec=min(30.0, remaining_budget),
timeout_sec=polish_budget,
request_id=request_id,
)
_step(
"model_polish",
duration_ms=round((time.monotonic() - t1) * 1000, 1),
ok=bool(polished),
)
if polished:
normalized = self._normalize_result(polished)
final_issues = self._quality_issues(req, cleaned_source, normalized)
final_issues = self._quality_issues(
req, cleaned_source, normalized, lenient=self._prefer_chat_first
)
trace["quality_issues_final"] = final_issues
if not final_issues:
return RewriteResponse(**normalized, mode="ai", quality_notes=[])
logger.warning("rewrite quality gate fallback triggered: %s", final_issues)
trace["duration_ms"] = round((time.monotonic() - started) * 1000, 1)
trace["mode"] = "ai"
logger.info(
"rewrite success rid=%s duration_ms=%.1f mode=ai",
request_id,
trace["duration_ms"],
)
return RewriteResponse(**normalized, mode="ai", quality_notes=[], trace=trace)
# 模型已返回有效 JSON默认「软接受」——仍视为 AI 洗稿,质检问题写入 quality_notes避免误用模板稿
if settings.ai_soft_accept and self._model_output_usable(normalized):
trace["duration_ms"] = round((time.monotonic() - started) * 1000, 1)
trace["mode"] = "ai"
trace["quality_soft_accept"] = True
trace["quality_warnings"] = final_issues
logger.warning(
"rewrite soft-accept rid=%s warnings=%s body_chars=%d",
request_id,
final_issues,
len(normalized.get("body_markdown", "") or ""),
)
return RewriteResponse(
**normalized,
mode="ai",
quality_notes=final_issues,
trace=trace,
)
logger.warning(
"rewrite quality gate fallback rid=%s issues=%s",
request_id,
final_issues,
)
_step("quality_gate_failed", issues=final_issues)
else:
_step("skip_model", reason="OPENAI_API_KEY 未配置")
trace["quality_issues_final"] = ["未配置 OPENAI_API_KEY使用本地保底稿"]
# Secondary: deterministic fallback with publishable structure.
return self._fallback_rewrite(req, cleaned_source, reason="模型超时或质量未达标,已使用结构化保底稿")
reason = "模型未返回有效 JSON、超时或质量未达标,已使用结构化保底稿"
trace["duration_ms"] = round((time.monotonic() - started) * 1000, 1)
logger.info(
"rewrite fallback rid=%s duration_ms=%.1f last_issues=%s",
request_id,
trace["duration_ms"],
trace.get("quality_issues_final"),
)
return self._fallback_rewrite(req, cleaned_source, reason=reason, trace=trace)
def _model_rewrite(self, req: RewriteRequest, cleaned_source: str, timeout_sec: float) -> dict | None:
def _model_rewrite(
self, req: RewriteRequest, cleaned_source: str, timeout_sec: float, request_id: str = ""
) -> dict | None:
user_prompt = self._build_user_prompt(req, cleaned_source)
return self._call_model_json(user_prompt, timeout_sec=timeout_sec)
return self._call_model_json(user_prompt, timeout_sec=timeout_sec, request_id=request_id)
def _model_polish(
self,
@@ -119,10 +297,11 @@ class AIRewriter:
normalized: dict,
issues: list[str],
timeout_sec: float,
request_id: str = "",
) -> dict | None:
issue_text = "\n".join([f"- {i}" for i in issues])
user_prompt = f"""
你上一次的改写稿质量未达标,请基于下面问题做彻底重写,不要只改几个词:
你上一次的改写稿未通过质检,请针对下列问题重写;体裁仍为**科普介绍类公众号****忠实原稿**,不要写成技术方案或内部汇报。
{issue_text}
原始内容:
@@ -141,9 +320,9 @@ class AIRewriter:
- 必须保留观点:{req.keep_points or ''}
- 避免词汇:{req.avoid_words or ''}
请输出一全新且高质量版本{REWRITE_SCHEMA_HINT}
请输出一全新稿件{REWRITE_SCHEMA_HINT}
""".strip()
return self._call_model_json(user_prompt, timeout_sec=timeout_sec)
return self._call_model_json(user_prompt, timeout_sec=timeout_sec, request_id=request_id)
def _build_user_prompt(self, req: RewriteRequest, cleaned_source: str) -> str:
return f"""
@@ -157,94 +336,95 @@ class AIRewriter:
- 必须保留观点:{req.keep_points or ''}
- 避免词汇:{req.avoid_words or ''}
任务:请输出可直接用于公众号发布的文章{REWRITE_SCHEMA_HINT}
任务:在**不偏离原帖主题与事实**的前提下,改写成科普介绍风格的公众号正文(好读、讲清楚,而非技术实施方案){REWRITE_SCHEMA_HINT}
""".strip()
def _fallback_rewrite(self, req: RewriteRequest, cleaned_source: str, reason: str) -> RewriteResponse:
def _fallback_rewrite(
self, req: RewriteRequest, cleaned_source: str, reason: str, trace: dict[str, Any] | None = None
) -> RewriteResponse:
sentences = self._extract_sentences(cleaned_source)
points = self._pick_key_points(sentences, limit=5)
title = req.title_hint.strip() or self._build_fallback_title(sentences)
summary = self._build_fallback_summary(points, cleaned_source)
intro = self._build_intro(points, cleaned_source)
analysis = self._build_analysis(points)
actions = self._build_actions(points)
conclusion = "如果你准备把这类内容持续做成栏目,建议建立固定模板:观点来源、关键证据、执行建议、复盘结论。"
conclusion = "细节仍以原帖为准;若话题在更新,请对照出处核对。"
body = (
"## 导语\n"
f"{intro}\n\n"
"## 核心观点\n"
+ "\n".join([f"- {p}" for p in points])
+ "\n\n"
"## 深度分析\n"
"### 1) 现象背后的原因\n"
f"{analysis['cause']}\n\n"
"### 2) 对行业/团队的影响\n"
f"{analysis['impact']}\n\n"
"### 3) 关键风险与边界\n"
f"{analysis['risk']}\n\n"
"## 落地建议\n"
+ "\n".join([f"{i + 1}. {a}" for i, a in enumerate(actions)])
+ "\n\n"
"## 结语\n"
f"{conclusion}"
)
def _one_line(s: str, n: int) -> str:
t = re.sub(r"\s+", " ", (s or "").strip())
return t if len(t) <= n else t[: n - 1] + ""
paras = [
_one_line(self._build_intro(points, cleaned_source), 105),
_one_line(analysis["cause"], 105),
_one_line(analysis["impact"], 105),
_one_line(analysis["risk"], 105),
_one_line(conclusion, 105),
]
body = "\n\n".join(paras)
if len(body) > MAX_BODY_CHARS:
body = body[: MAX_BODY_CHARS - 1] + ""
normalized = {
"title": title,
"summary": summary,
"body_markdown": self._format_markdown(body),
}
return RewriteResponse(**normalized, mode="fallback", quality_notes=[reason])
if trace is not None:
trace["mode"] = "fallback"
trace["fallback_reason"] = reason
rid = (trace or {}).get("request_id") or "-"
logger.info(
"rewrite_fallback_compose rid=%s reason=%s title_chars=%d summary_chars=%d body_chars=%d points=%d",
rid,
reason[:120],
len(normalized["title"]),
len(normalized["summary"]),
len(normalized["body_markdown"]),
len(points),
)
return RewriteResponse(**normalized, mode="fallback", quality_notes=[reason], trace=trace)
def _build_fallback_title(self, sentences: list[str]) -> str:
seed = sentences[0] if sentences else "内容改写"
seed = sentences[0] if sentences else "内容导读"
seed = shorten(seed, width=16, placeholder="")
return f"{seed}给内容创作者的实战拆解"
return f"{seed}一文读懂在说什么"
def _build_fallback_summary(self, points: list[str], source: str) -> str:
if len(points) >= 2:
return f"本文提炼了{points[0]},并进一步分析{points[1]},最后给出可直接执行的发布建议,帮助你把观点内容做成高质量公众号文章。"
return shorten(re.sub(r"\s+", " ", source), width=110, placeholder="...")
return shorten(
f"原帖在谈:{points[0]};另一点:{points[1]}",
width=85,
placeholder="",
)
return shorten(re.sub(r"\s+", " ", source), width=85, placeholder="")
def _build_intro(self, points: list[str], source: str) -> str:
focus = points[0] if points else shorten(source, width=42, placeholder="...")
return (
f"这篇内容的价值不在“信息多”,而在于它点出了一个真正值得关注的问题:{focus}\n\n"
"对公众号读者来说,最关心的是这件事会带来什么变化、现在能做什么"
"因此本文不做逐句复述,而是按“观点-影响-动作”重组,方便直接落地。"
f"原帖主要在谈:{focus}。下面用更适合公众号阅读的方式,把脉络和重点捋清楚,方便你快速抓住作者在表达什么\n\n"
"说明:这是基于原文的导读式整理,若需引用细节,请以原帖为准"
)
def _build_analysis(self, points: list[str]) -> dict[str, str]:
p1 = points[0] if points else "行业正在从信息堆叠转向结果导向"
p2 = points[1] if len(points) > 1 else "团队协作方式被自动化流程重塑"
p3 = points[2] if len(points) > 2 else "内容质量会成为真正分水岭"
p1 = points[0] if points else "原文讨论的核心现象"
p2 = points[1] if len(points) > 1 else "与读者日常能感知到的关联"
p3 = points[2] if len(points) > 2 else "原文可能提到的限制或尚未定论之处"
return {
"cause": (
f"从表面看是工具迭代,实质是生产逻辑变化。{p1},意味着过去依赖经验的环节,正在被标准化流程替代"
"谁先完成流程化改造,谁就更容易稳定产出"
f"先把事情放在原文的语境里理解:{p1}"
"这里侧重讲清楚「作者在说什么」,而不是替原文下结论"
),
"impact": (
f"短期影响体现在效率,中长期影响体现在品牌认知{p2}"
"如果只追求发布速度,内容会快速同质化;如果把洞察和表达打磨成体系,内容资产会持续增值"
f"对大多数读者来说,更关心的是:这和自己有什么关系{p2}"
"若原帖偏专业,这里尽量用通俗说法转述,避免写成给决策层的公文"
),
"risk": (
f"最大的风险不是‘不用 AI而是只用 AI{p3}"
"没有事实校对与人工观点把关,文章容易出现空泛表达、错误引用和结论过度"
f"任何公开讨论都有边界:{p3}"
"若话题仍在变化,结论可能更新,阅读时建议保留一点审慎,必要时回看原始出处"
),
}
def _build_actions(self, points: list[str]) -> list[str]:
anchor = points[0] if points else "核心观点"
return [
f"先确定本篇唯一主线:围绕“{anchor}”展开,删除与主线无关的段落。",
"按“导语-观点-分析-建议-结语”五段式重排正文,每段只解决一个问题。",
"为每个核心观点补一条可验证依据(数据、案例或公开来源),提升可信度。",
"发布前做一次反 AI 味检查:删掉空话,替换为具体动作和明确对象。",
"将高表现文章沉淀为模板,下次复用同样结构提高稳定性。",
]
def _clean_source(self, text: str) -> str:
src = (text or "").replace("\r\n", "\n").strip()
src = re.sub(r"https?://\S+", "", src)
@@ -266,11 +446,11 @@ class AIRewriter:
def _pick_key_points(self, sentences: list[str], limit: int) -> list[str]:
points: list[str] = []
templates = [
"核心变化{}",
"关键问题{}",
"方法调整{}",
"结果反馈{}",
"结论启示:{}",
"值得关注{}",
"背景要点{}",
"原文强调{}",
"延伸信息{}",
"阅读提示:{}",
]
for s in sentences:
if len(s) < 12:
@@ -309,18 +489,153 @@ class AIRewriter:
raise ValueError("model output is not valid JSON")
def _call_model_json(self, user_prompt: str, timeout_sec: float) -> dict | None:
def _chat_completions_json(self, user_prompt: str, timeout_sec: float, request_id: str) -> dict | None:
"""chat.completions通义兼容层在 json_object 下易产出极短 JSON故 DashScope 不传 response_format并支持短文自动重试。"""
max_attempts = 2 if self._prefer_chat_first else 1
deadline = time.monotonic() + max(0.0, timeout_sec)
pe = user_prompt
for attempt in range(max_attempts):
if attempt == 1:
pe = user_prompt + _JSON_BODY_TOO_SHORT_RETRY
remaining = deadline - time.monotonic()
if remaining <= 0:
logger.warning(
"model_call_budget_exhausted rid=%s api=chat.completions attempt=%d/%d",
request_id or "-",
attempt + 1,
max_attempts,
)
return None
try:
logger.info(
"AI request start model=%s timeout=%.1fs prefer_chat_first=%s prompt_chars=%d",
"model_call_try rid=%s api=chat.completions.create attempt=%d/%d max_tokens=%d json_object=%s timeout_s=%.1f",
request_id or "-",
attempt + 1,
max_attempts,
settings.openai_max_output_tokens,
not self._prefer_chat_first,
remaining,
)
t0 = time.monotonic()
create_kwargs: dict[str, Any] = {
"model": settings.openai_model,
"messages": [
{"role": "system", "content": SYSTEM_PROMPT},
{"role": "user", "content": pe},
],
"max_tokens": settings.openai_max_output_tokens,
"temperature": 0.4,
"extra_body": {"enable_thinking": False},
"timeout": remaining,
}
# OpenAI 官方 API 在 json_object 下表现稳定;通义兼容模式若开启则常出现正文被压成一两百字。
if not self._prefer_chat_first:
create_kwargs["response_format"] = {"type": "json_object"}
completion = self._client.chat.completions.create(**create_kwargs)
except Exception as exc:
is_to = _is_likely_timeout_error(exc)
logger.warning(
"model_call_fail rid=%s api=chat.completions attempt=%d/%d exc_type=%s exc=%s "
"is_likely_timeout=%s http_timeout_budget_s=%.1f openai_timeout_env_s=%.1f max_tokens=%d "
"hint=%s",
request_id or "-",
attempt + 1,
max_attempts,
type(exc).__name__,
exc,
is_to,
remaining,
settings.openai_timeout,
settings.openai_max_output_tokens,
(
"典型原因:单轮 HTTP 等待短于模型生成长文 JSON 所需时间;已取消错误的 30s 上限,"
"请确认 OPENAI_TIMEOUT>=120 并重启进程。"
)
if is_to and self._prefer_chat_first
else (
"若为超时:增大 OPENAI_TIMEOUT否则检查 Key/模型名/网络。"
if is_to
else ""
),
)
if self._prefer_chat_first:
return None
raise
choice = completion.choices[0] if completion.choices else None
msg = (choice.message.content if choice else "") or ""
fr = getattr(choice, "finish_reason", None) if choice else None
usage = getattr(completion, "usage", None)
udump = (
usage.model_dump()
if usage is not None and hasattr(usage, "model_dump")
else usage
)
ms = (time.monotonic() - t0) * 1000
logger.info(
"model_call_ok rid=%s api=chat.completions attempt=%d duration_ms=%.1f output_chars=%d "
"finish_reason=%s usage=%s preview=%s",
request_id or "-",
attempt + 1,
ms,
len(msg),
fr,
udump,
_preview_for_log(msg, 380),
)
logger.debug(
"model_call_raw rid=%s api=chat.completions attempt=%d body=%s",
request_id or "-",
attempt + 1,
msg,
)
try:
parsed = self._parse_response_json(msg)
except Exception as exc:
logger.warning(
"model_json_parse_fail rid=%s attempt=%d err=%s",
request_id or "-",
attempt + 1,
exc,
)
if not self._prefer_chat_first:
raise
if attempt == max_attempts - 1:
return None
continue
raw_body = str(parsed.get("body_markdown", "")).strip()
bl = len(raw_body)
pc = len([p for p in re.split(r"\n\s*\n", raw_body) if p.strip()])
if self._prefer_chat_first and attempt == 0 and (bl < 40 or pc < 3):
logger.warning(
"model_body_retry rid=%s body_chars=%d paragraphs=%d reason=too_thin_or_not_segmented",
request_id or "-",
bl,
pc,
)
continue
return parsed
return None
def _call_model_json(self, user_prompt: str, timeout_sec: float, request_id: str = "") -> dict | None:
methods = ["chat", "responses"] if self._prefer_chat_first else ["responses", "chat"]
logger.info(
"model_call_begin rid=%s model=%s timeout_s=%.1f prefer_chat_first=%s prompt_chars=%d "
"try_order=%s",
request_id or "-",
settings.openai_model,
timeout_sec,
self._prefer_chat_first,
len(user_prompt),
methods,
)
methods = ["chat", "responses"] if self._prefer_chat_first else ["responses", "chat"]
for method in methods:
t0 = time.monotonic()
if method == "responses":
try:
logger.info("model_call_try rid=%s api=OpenAI.responses.create", request_id or "-")
completion = self._client.responses.create(
model=settings.openai_model,
input=[
@@ -331,35 +646,59 @@ class AIRewriter:
timeout=timeout_sec,
)
output_text = completion.output_text or ""
logger.info("AI raw output (responses): %s", output_text)
ms = (time.monotonic() - t0) * 1000
logger.info(
"model_call_ok rid=%s api=responses duration_ms=%.1f output_chars=%d preview=%s",
request_id or "-",
ms,
len(output_text),
_preview_for_log(output_text, 380),
)
logger.debug("model_call_raw rid=%s api=responses body=%s", request_id or "-", output_text)
return self._parse_response_json(output_text)
except Exception as exc:
logger.warning("responses API failed: %s", exc)
logger.warning(
"model_call_fail rid=%s api=responses duration_ms=%.1f exc_type=%s exc=%s",
request_id or "-",
(time.monotonic() - t0) * 1000,
type(exc).__name__,
exc,
)
continue
if method == "chat":
try:
completion = self._client.chat.completions.create(
model=settings.openai_model,
messages=[
{"role": "system", "content": SYSTEM_PROMPT},
{"role": "user", "content": user_prompt},
],
response_format={"type": "json_object"},
max_tokens=1800,
temperature=0.4,
extra_body={"enable_thinking": False},
timeout=timeout_sec,
)
msg = completion.choices[0].message.content if completion.choices else ""
logger.info("AI raw output (chat.completions): %s", msg or "")
return self._parse_response_json(msg or "")
except Exception as exc:
logger.warning("chat.completions API failed: %s", exc)
# DashScope compatibility path: don't spend extra time on responses fallback.
t_chat = time.monotonic()
out = self._chat_completions_json(user_prompt, timeout_sec, request_id)
if out is not None:
return out
if self._prefer_chat_first:
logger.info(
"model_call_stop rid=%s reason=dashscope_chat_no_valid_json duration_ms=%.1f",
request_id or "-",
(time.monotonic() - t_chat) * 1000,
)
return None
except Exception as exc:
logger.warning(
"model_call_fail rid=%s api=chat.completions duration_ms=%.1f exc_type=%s exc=%s",
request_id or "-",
(time.monotonic() - t0) * 1000,
type(exc).__name__,
exc,
)
if self._prefer_chat_first:
logger.info(
"model_call_stop rid=%s reason=dashscope_chat_exception",
request_id or "-",
)
return None
continue
logger.error(
"model_call_exhausted rid=%s methods_tried=%s result=none",
request_id or "-",
methods,
)
return None
def _normalize_result(self, data: dict) -> dict:
@@ -370,49 +709,57 @@ class AIRewriter:
if not title:
title = "公众号改写稿"
if not summary:
summary = shorten(re.sub(r"\s+", " ", body), width=110, placeholder="...")
summary = shorten(re.sub(r"\s+", " ", body), width=90, placeholder="...")
body = self._ensure_sections(body)
body = re.sub(r"(?m)^#{1,6}\s+[^\n]*\n?", "", body).strip()
body = self._normalize_body_length(body)
body = self._format_markdown(body)
return {"title": title, "summary": summary, "body_markdown": body}
def _ensure_sections(self, body: str) -> str:
def _normalize_body_length(self, body: str) -> str:
text = (body or "").strip()
required = ["## 导语", "## 核心观点", "## 深度分析", "## 落地建议", "## 结语"]
missing = [h for h in required if h not in text]
if not text:
text = "## 导语\n\n内容生成失败,请重试。\n"
if missing:
# Light touch: append missing sections to keep publish structure stable.
pads = "\n\n".join([f"{h}\n\n(待补充)" for h in missing])
text = f"{text}\n\n{pads}"
text = "(正文生成失败,请重试。"
if len(text) > MAX_BODY_CHARS:
text = text[: MAX_BODY_CHARS - 1] + ""
return text
def _quality_issues(self, req: RewriteRequest, source: str, normalized: dict) -> list[str]:
def _quality_issues(
self, req: RewriteRequest, source: str, normalized: dict, lenient: bool = False
) -> list[str]:
issues: list[str] = []
title = normalized.get("title", "")
summary = normalized.get("summary", "")
body = normalized.get("body_markdown", "")
if len(title) < 8 or len(title) > 34:
issues.append("标题长度不理想(建议 8-34 字)")
min_title, max_title = (4, 30) if lenient else (6, 24)
if len(title) < min_title or len(title) > max_title:
issues.append(f"标题长度不理想(建议 {min_title}-{max_title} 字,短标题即可)")
if len(summary) < 60:
issues.append("摘要过短,信息量不足")
min_summary, max_summary = (20, 100) if lenient else (25, 90)
if len(summary) < min_summary:
issues.append("摘要过短")
elif len(summary) > max_summary:
issues.append(f"摘要过长(建议 ≤{max_summary} 字)")
headings = re.findall(r"(?m)^##\s+.+$", body)
if len(headings) < 5:
issues.append("二级标题不足,结构不完整")
paragraphs = [p.strip() for p in re.split(r"\n\s*\n", body) if p.strip()]
pc = len(paragraphs)
need_p = 4 if lenient else 5
if pc < need_p:
issues.append(f"正文需约 5 个自然段、空行分隔(当前 {pc} 段)")
elif not lenient and pc > 6:
issues.append(f"正文段落过多(当前 {pc} 段),请合并为 5 段左右")
paragraphs = [p.strip() for p in body.split("\n\n") if p.strip()]
if len(paragraphs) < 10:
issues.append("正文段落偏少,展开不充分")
if len(body) > MAX_BODY_CHARS:
issues.append(f"正文超过 {MAX_BODY_CHARS} 字(当前 {len(body)} 字),请压缩")
elif len(body) < MIN_BODY_CHARS:
issues.append(f"正文过短(当前阈值 ≥{MIN_BODY_CHARS} 字)")
if len(body) < 900:
issues.append("正文过短,无法支撑公众号发布")
if re.search(r"(?m)^#+\s", body):
issues.append("正文请勿使用 # 标题符号,只用自然段")
if self._looks_like_raw_copy(source, body):
if self._looks_like_raw_copy(source, body, lenient=lenient):
issues.append("改写与原文相似度过高,疑似未充分重写")
if req.avoid_words:
@@ -428,7 +775,7 @@ class AIRewriter:
return issues
def _looks_like_raw_copy(self, source: str, rewritten: str) -> bool:
def _looks_like_raw_copy(self, source: str, rewritten: str, lenient: bool = False) -> bool:
src = re.sub(r"\s+", "", source or "")
dst = re.sub(r"\s+", "", rewritten or "")
if not src or not dst:
@@ -436,10 +783,20 @@ class AIRewriter:
if dst in src or src in dst:
return True
ratio = difflib.SequenceMatcher(a=src[:3500], b=dst[:3500]).ratio()
return ratio >= 0.80
threshold = 0.88 if lenient else 0.80
return ratio >= threshold
def _model_output_usable(self, normalized: dict) -> bool:
"""模型 JSON 可解析且正文有实质内容时,允许软接受(不走模板保底)。"""
body = (normalized.get("body_markdown") or "").strip()
title = (normalized.get("title") or "").strip()
if len(title) < 4 or len(body) < 40:
return False
if len(body) > MAX_BODY_CHARS + 80:
return False
return True
def _format_markdown(self, text: str) -> str:
body = text.replace("\r\n", "\n").strip()
body = re.sub(r"\n{3,}", "\n\n", body)
body = re.sub(r"(?m)^(#{1,3}\s[^\n]+)\n(?!\n)", r"\1\n\n", body)
return body.strip() + "\n"

View File

@@ -3,20 +3,36 @@ from __future__ import annotations
import hashlib
import hmac
import base64
import logging
import time
from urllib.parse import quote_plus
import httpx
from urllib.parse import urlparse
from app.config import settings
from app.schemas import IMPublishRequest, PublishResponse
logger = logging.getLogger(__name__)
class IMPublisher:
async def publish(self, req: IMPublishRequest) -> PublishResponse:
async def publish(self, req: IMPublishRequest, request_id: str = "") -> PublishResponse:
rid = request_id or "-"
if not settings.im_webhook_url:
logger.warning("im_skipped rid=%s reason=empty_webhook_url", rid)
return PublishResponse(ok=False, detail="缺少 IM_WEBHOOK_URL 配置")
parsed = urlparse(settings.im_webhook_url)
host = parsed.netloc or "(invalid_url)"
logger.info(
"im_publish_start rid=%s webhook_host=%s sign_enabled=%s title_chars=%d body_truncated_to=3800",
rid,
host,
bool(settings.im_secret),
len(req.title or ""),
)
webhook = self._with_signature(settings.im_webhook_url, settings.im_secret)
payload = {
"msg_type": "post",
@@ -31,15 +47,25 @@ class IMPublisher:
}
async with httpx.AsyncClient(timeout=20) as client:
logger.info("im_http_post rid=%s method=POST timeout_s=20", rid)
r = await client.post(webhook, json=payload)
try:
data = r.json()
except Exception:
data = {"status_code": r.status_code, "text": r.text}
logger.info(
"im_http_response rid=%s status=%s body_preview=%s",
rid,
r.status_code,
str(data)[:500],
)
if r.status_code >= 400:
logger.warning("im_push_failed rid=%s http_status=%s", rid, r.status_code)
return PublishResponse(ok=False, detail=f"IM 推送失败: {data}", data=data)
logger.info("im_push_ok rid=%s", rid)
return PublishResponse(ok=True, detail="IM 推送成功", data=data)
def _with_signature(self, webhook: str, secret: str | None) -> str:

View File

@@ -1,5 +1,6 @@
from __future__ import annotations
import logging
import time
import httpx
@@ -8,21 +9,60 @@ import markdown2
from app.config import settings
from app.schemas import PublishResponse, WechatPublishRequest
logger = logging.getLogger(__name__)
def _detail_for_token_error(data: dict | None) -> str:
"""把微信返回的 errcode 转成可操作的说明。"""
if not data:
return "获取微信 access_token 失败(无返回内容)"
code = data.get("errcode")
msg = (data.get("errmsg") or "").strip()
if code == 40164:
return (
"微信 errcode=40164当前请求使用的出口 IP 未在公众号「IP 白名单」中。"
"请到 微信公众平台 → 设置与开发 → 基本配置 → IP 白名单,添加本服务对外的公网 IP"
"(日志里 invalid ip 后面的地址)。若在本地/Docker 调试,出口 IP 常会变,需填当前出口或改用固定出口的服务器。"
f" 微信原文:{msg}"
)
if code == 40013:
return f"微信 errcode=40013AppSecret 无效或已重置,请检查 WECHAT_SECRET。{msg}"
if code == 40125:
return f"微信 errcode=40125AppSecret 配置错误。{msg}"
return f"获取微信 access_token 失败errcode={code} errmsg={msg}"
class WechatPublisher:
def __init__(self) -> None:
self._access_token = None
self._expires_at = 0
async def publish_draft(self, req: WechatPublishRequest) -> PublishResponse:
async def publish_draft(self, req: WechatPublishRequest, request_id: str = "") -> PublishResponse:
rid = request_id or "-"
if not settings.wechat_appid or not settings.wechat_secret:
logger.warning("wechat skipped rid=%s reason=missing_appid_or_secret", rid)
return PublishResponse(ok=False, detail="缺少 WECHAT_APPID / WECHAT_SECRET 配置")
token = await self._get_access_token()
token, token_from_cache, token_err_body = await self._get_access_token()
if not token:
return PublishResponse(ok=False, detail="获取微信 access_token 失败")
detail = _detail_for_token_error(token_err_body)
logger.error("wechat access_token_unavailable rid=%s detail=%s", rid, detail[:200])
return PublishResponse(ok=False, detail=detail, data=token_err_body)
logger.info(
"wechat_token rid=%s cache_hit=%s",
rid,
token_from_cache,
)
html = markdown2.markdown(req.body_markdown)
logger.info(
"wechat_draft_build rid=%s title_chars=%d digest_chars=%d html_chars=%d",
rid,
len(req.title or ""),
len(req.summary or ""),
len(html or ""),
)
payload = {
"articles": [
{
@@ -39,19 +79,37 @@ class WechatPublisher:
async with httpx.AsyncClient(timeout=25) as client:
url = f"https://api.weixin.qq.com/cgi-bin/draft/add?access_token={token}"
logger.info(
"wechat_http_post rid=%s endpoint=cgi-bin/draft/add http_timeout_s=25",
rid,
)
r = await client.post(url, json=payload)
data = r.json()
if data.get("errcode", 0) != 0:
logger.warning(
"wechat_draft_failed rid=%s errcode=%s errmsg=%s raw=%s",
rid,
data.get("errcode"),
data.get("errmsg"),
data,
)
return PublishResponse(ok=False, detail=f"微信发布失败: {data}", data=data)
logger.info(
"wechat_draft_ok rid=%s media_id=%s",
rid,
data.get("media_id", data),
)
return PublishResponse(ok=True, detail="已发布到公众号草稿箱", data=data)
async def _get_access_token(self) -> str | None:
async def _get_access_token(self) -> tuple[str | None, bool, dict | None]:
"""成功时第三项为 None失败时为微信返回的 JSON含 errcode/errmsg"""
now = int(time.time())
if self._access_token and now < self._expires_at - 60:
return self._access_token
return self._access_token, True, None
logger.info("wechat_http_get endpoint=cgi-bin/token reason=refresh_access_token")
async with httpx.AsyncClient(timeout=20) as client:
r = await client.get(
"https://api.weixin.qq.com/cgi-bin/token",
@@ -61,12 +119,17 @@ class WechatPublisher:
"secret": settings.wechat_secret,
},
)
data = r.json()
data = r.json() if r.content else {}
token = data.get("access_token")
if not token:
return None
logger.warning(
"wechat_token_refresh_failed http_status=%s body=%s",
r.status_code,
data,
)
return None, False, data if isinstance(data, dict) else None
self._access_token = token
self._expires_at = now + int(data.get("expires_in", 7200))
return token
return token, False, None

View File

@@ -1,5 +1,16 @@
const $ = (id) => document.getElementById(id);
function renderBodyPreview() {
const raw = ($("body") && $("body").value) || "";
const el = $("bodyPreview");
if (!el) return;
if (typeof marked !== "undefined" && marked.parse) {
el.innerHTML = marked.parse(raw, { breaks: true });
} else {
el.textContent = raw;
}
}
const statusEl = $("status");
const rewriteBtn = $("rewriteBtn");
const wechatBtn = $("wechatBtn");
@@ -13,6 +24,7 @@ function updateCounters() {
$("sourceCount").textContent = `${countText($("sourceText").value)}`;
$("summaryCount").textContent = `${countText($("summary").value)}`;
$("bodyCount").textContent = `${countText($("body").value)}`;
renderBodyPreview();
}
function setLoading(button, loading, idleText, loadingText) {
@@ -34,9 +46,36 @@ async function postJSON(url, body) {
});
const data = await res.json();
if (!res.ok) throw new Error(data.detail || "请求失败");
data._requestId = res.headers.get("X-Request-ID") || "";
return data;
}
function renderTrace(trace, headerRid) {
const wrap = $("traceWrap");
const pre = $("traceJson");
const badge = $("traceBadge");
if (!pre || !wrap) return;
if (!trace || Object.keys(trace).length === 0) {
pre.textContent = headerRid
? JSON.stringify({ request_id: headerRid, note: "响应中无 trace 字段" }, null, 2)
: "尚无数据完成一次「AI 改写」后,这里会显示请求 ID、耗时、质检与降级原因。";
if (badge) badge.textContent = "";
return;
}
const merged = { ...trace };
if (headerRid && !merged.request_id) merged.request_id = headerRid;
pre.textContent = JSON.stringify(merged, null, 2);
const mode = merged.mode || "";
if (badge) {
badge.textContent = mode === "ai" ? "AI" : mode === "fallback" ? "保底" : "";
badge.className = "trace-badge " + (mode === "ai" ? "is-ai" : mode === "fallback" ? "is-fallback" : "");
}
wrap.open = true;
}
$("rewriteBtn").addEventListener("click", async () => {
const sourceText = $("sourceText").value.trim();
if (sourceText.length < 20) {
@@ -59,11 +98,22 @@ $("rewriteBtn").addEventListener("click", async () => {
$("summary").value = data.summary || "";
$("body").value = data.body_markdown || "";
updateCounters();
renderTrace(data.trace, data._requestId);
const tr = data.trace || {};
const modelLine = tr.model ? `模型 ${tr.model}` : "";
if (data.mode === "fallback") {
const note = (data.quality_notes || [])[0] || "当前为保底改写稿";
setStatus(`改写完成(保底模式):${note}`, true);
setStatus(
`改写完成(保底模式,未使用或未通过千问长文):${note}${modelLine ? ` · ${modelLine}` : ""}`,
true
);
} else if (tr.quality_soft_accept) {
setStatus(
`改写完成AI质检提示${(data.quality_notes || []).join("") || "见 quality_notes"} · ${modelLine || "AI"}`
);
statusEl.style.color = "#9a3412";
} else {
setStatus("改写完成,可直接发布。");
setStatus(`改写完成AI 洗稿)${modelLine ? ` · ${modelLine}` : ""}`);
}
} catch (e) {
setStatus(`改写失败: ${e.message}`, true);
@@ -111,4 +161,32 @@ $("imBtn").addEventListener("click", async () => {
$(id).addEventListener("input", updateCounters);
});
async function loadBackendConfig() {
const el = $("backendConfig");
if (!el) return;
try {
const res = await fetch("/api/config");
const c = await res.json();
if (!c.openai_configured) {
el.textContent =
"后端未配置 OPENAI_API_KEY改写将使用本地保底稿千问不会参与。请在 .env 中配置并重启容器。";
el.style.color = "#b42318";
return;
}
const name =
c.provider === "dashscope"
? "通义千问DashScope 兼容接口)"
: "OpenAI 兼容接口";
const host = c.base_url_host ? ` · ${c.base_url_host}` : "";
const to = c.openai_timeout_sec != null ? ` · 单轮最长等待 ${c.openai_timeout_sec}s` : "";
el.textContent = `已接入:${c.openai_model} · ${name}${host}${to}`;
el.style.color = "";
} catch (e) {
el.textContent = "无法读取 /api/config请确认服务已启动";
el.style.color = "#b42318";
}
}
loadBackendConfig();
updateCounters();
renderTrace(null, "");

View File

@@ -38,6 +38,11 @@ body {
margin: 6px 0 0;
}
.backend-config {
margin: 8px 0 0;
line-height: 1.5;
}
.badge {
font-size: 12px;
font-weight: 700;
@@ -161,11 +166,130 @@ button:disabled {
font-weight: 600;
}
.small {
font-size: 13px;
margin: 0 0 12px;
}
.flow-hint {
margin: 0 0 14px 18px;
padding: 0;
font-size: 13px;
line-height: 1.6;
}
.trace-wrap {
margin-top: 12px;
padding: 10px 12px;
border: 1px dashed var(--line);
border-radius: 10px;
background: #f9fbf9;
}
.trace-wrap summary {
cursor: pointer;
font-weight: 700;
color: var(--text);
}
.trace-badge {
margin-left: 8px;
font-size: 11px;
padding: 2px 8px;
border-radius: 999px;
font-weight: 700;
}
.trace-badge.is-ai {
background: #eaf7f0;
color: #0f5f3d;
border: 1px solid #cde6d7;
}
.trace-badge.is-fallback {
background: #fff4e6;
color: #9a3412;
border: 1px solid #fed7aa;
}
.body-split {
display: grid;
grid-template-columns: 1fr 1fr;
gap: 12px;
align-items: stretch;
}
.body-split textarea {
min-height: 280px;
}
.preview-panel {
display: flex;
flex-direction: column;
min-width: 0;
}
.markdown-preview {
flex: 1;
min-height: 280px;
max-height: 480px;
overflow: auto;
padding: 12px 14px;
border: 1px solid var(--line);
border-radius: 10px;
background: #fafcfb;
font-size: 14px;
line-height: 1.65;
}
.markdown-preview h2 {
font-size: 1.15rem;
margin: 1em 0 0.5em;
color: var(--accent-2);
}
.markdown-preview h3 {
font-size: 1.05rem;
margin: 0.9em 0 0.4em;
}
.markdown-preview p {
margin: 0.5em 0;
}
.markdown-preview ul,
.markdown-preview ol {
margin: 0.4em 0 0.6em 1.2em;
padding: 0;
}
.markdown-preview li {
margin: 0.25em 0;
}
.trace-json {
margin: 10px 0 0;
padding: 10px;
max-height: 220px;
overflow: auto;
font-size: 11px;
line-height: 1.45;
background: #fff;
border-radius: 8px;
border: 1px solid var(--line);
white-space: pre-wrap;
word-break: break-word;
}
@media (max-width: 960px) {
.layout {
grid-template-columns: 1fr;
}
.body-split {
grid-template-columns: 1fr;
}
.topbar {
align-items: flex-start;
gap: 8px;

View File

@@ -10,7 +10,8 @@
<header class="topbar">
<div class="brand">
<h1>{{ app_name }}</h1>
<p class="muted">把 X 长文快速改写为公众号可发布稿,并一键分发到公众号/IM</p>
<p class="muted">粘贴原文 → 洗成约 <strong>5 段、500 字内</strong> 的短文(无小标题)→ 右侧预览 → 满意后发布</p>
<p id="backendConfig" class="backend-config muted small" aria-live="polite"></p>
</div>
<div class="badge">Beta</div>
</header>
@@ -18,12 +19,18 @@
<main class="layout">
<section class="panel input-panel">
<h2>输入与改写策略</h2>
<ol class="flow-hint muted">
<li>粘贴原文并设置语气/读者</li>
<li>点击改写 → 右侧为短标题、摘要与<strong>五段正文</strong>(段落间空一行)</li>
<li>看「运行追踪」:<strong>模式为 AI</strong> 且模型名正确,即千问/接口已生效</li>
<li>人工改好后 →「发布到公众号草稿箱」(需配置 WECHAT_*</li>
</ol>
<div class="field-head">
<label>原始内容</label>
<span id="sourceCount" class="meta">0 字</span>
</div>
<textarea id="sourceText" rows="14" placeholder="粘贴 X 长文/线程内容..."></textarea>
<textarea id="sourceText" rows="14" placeholder="粘贴原文(长帖、线程、摘录均可),洗稿会围绕原文主题展开…"></textarea>
<div class="grid2">
<div>
@@ -56,6 +63,7 @@
<section class="panel output-panel">
<h2>发布内容</h2>
<p class="muted small">下方「运行追踪」会显示本次请求 ID、耗时、质检项与是否降级便于与容器日志对照。</p>
<label>标题</label>
<input id="title" type="text" />
@@ -67,10 +75,24 @@
<textarea id="summary" rows="3"></textarea>
<div class="field-head">
<label>Markdown 正文</label>
<label>正文5 自然段,建议 ≤500 字)</label>
<span id="bodyCount" class="meta">0 字</span>
</div>
<textarea id="body" rows="16"></textarea>
<div class="body-split">
<textarea id="body" rows="10" placeholder="五段之间空一行;无需 # 标题"></textarea>
<div class="preview-panel">
<div class="field-head">
<label>排版预览</label>
<span class="meta">与公众号 HTML 渲染接近</span>
</div>
<div id="bodyPreview" class="markdown-preview"></div>
</div>
</div>
<details id="traceWrap" class="trace-wrap">
<summary>运行追踪 <span id="traceBadge" class="trace-badge"></span></summary>
<pre id="traceJson" class="trace-json"></pre>
</details>
<div class="actions">
<button id="wechatBtn">发布到公众号草稿箱</button>
@@ -79,6 +101,7 @@
</section>
</main>
<script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"></script>
<script src="/static/app.js"></script>
</body>
</html>

View File

@@ -1,6 +1,12 @@
services:
x2wechat:
build: .
build:
context: .
args:
# 海外 PyPI 可改为 https://pypi.org/simple
PIP_INDEX_URL: https://pypi.tuna.tsinghua.edu.cn/simple
# 拉 python 镜像慢时取消下一行注释DaoCloud 同步 Docker Hub
# PY_BASE: docker.m.daocloud.io/library/python:3.11-slim
container_name: x2wechat-studio
ports:
- "18000:8000"