fix:优化项目内容

2026-03-18 17:01:10 +08:00
parent da63282a10
commit 27dc89e251
64 changed files with 3421 additions and 4982 deletions
--- a/backend/app/deps.py
+++ b/backend/app/deps.py
@@ -0,0 +1,56 @@
+from __future__ import annotations
+
+import secrets
+from datetime import datetime, timezone
+
+from fastapi import Depends, Request, Response
+from sqlalchemy.orm import Session
+
+from backend.app.db import get_db
+from backend.app import models
+
+DEVICE_COOKIE_NAME = "opc_device_token"
+
+
+def _issue_new_device_user(db: Session, response: Response) -> models.User:
+  token = secrets.token_hex(32)
+  user = models.User(
+      device_token=token,
+      created_at=datetime.now(timezone.utc),
+      last_seen_at=datetime.now(timezone.utc),
+  )
+  db.add(user)
+  db.commit()
+  db.refresh(user)
+  response.set_cookie(
+      key=DEVICE_COOKIE_NAME,
+      value=token,
+      httponly=True,
+      secure=False,
+      samesite="Lax",
+      max_age=60 * 60 * 24 * 365,
+  )
+  return user
+
+
+def get_current_user(
+  request: Request,
+  response: Response,
+  db: Session = Depends(get_db),
+) -> models.User:
+  token = request.cookies.get(DEVICE_COOKIE_NAME)
+  if not token:
+      return _issue_new_device_user(db, response)
+
+  user = (
+      db.query(models.User)
+      .filter(models.User.device_token == token)
+      .first()
+  )
+  if not user:
+      return _issue_new_device_user(db, response)
+
+  user.last_seen_at = datetime.now(timezone.utc)
+  db.commit()
+  return user
+
--- a/backend/app/main.py
+++ b/backend/app/main.py
@@ -32,27 +32,28 @@ def create_app() -> FastAPI:
    @app.on_event("startup")
    def on_startup() -> None:
        Base.metadata.create_all(bind=engine)
-        # Add new columns to finance_records if they don't exist (Module 6)
+        # Lightweight schema migrations for SQLite (add columns if missing)
        try:
            from sqlalchemy import text
            with engine.connect() as conn:
+                # finance_records: amount, billing_date
                r = conn.execute(text("PRAGMA table_info(finance_records)"))
                cols = [row[1] for row in r]
                if "amount" not in cols:
                    conn.execute(text("ALTER TABLE finance_records ADD COLUMN amount NUMERIC(12,2)"))
                if "billing_date" not in cols:
                    conn.execute(text("ALTER TABLE finance_records ADD COLUMN billing_date DATE"))
-                conn.commit()
-        except Exception:
-            pass
-        # Add customers.tags if missing (customer tags for project 收纳)
-        try:
-            from sqlalchemy import text
-            with engine.connect() as conn:
+                if "tags" not in cols:
+                    conn.execute(text("ALTER TABLE finance_records ADD COLUMN tags VARCHAR(512)"))
+                if "meta_json" not in cols:
+                    conn.execute(text("ALTER TABLE finance_records ADD COLUMN meta_json TEXT"))
+
+                # customers: tags
                r = conn.execute(text("PRAGMA table_info(customers)"))
                cols = [row[1] for row in r]
                if "tags" not in cols:
                    conn.execute(text("ALTER TABLE customers ADD COLUMN tags VARCHAR(512)"))
+
                conn.commit()
        except Exception:
            pass
--- a/backend/app/models.py
+++ b/backend/app/models.py
@@ -2,7 +2,6 @@ from datetime import date, datetime

 from sqlalchemy import (
    Date,
-    Column,
    DateTime,
    ForeignKey,
    Integer,
@@ -48,7 +47,6 @@ class Project(Base):
    created_at: Mapped[datetime] = mapped_column(
        DateTime(timezone=True), default=datetime.utcnow, nullable=False
    )
-
    customer: Mapped[Customer] = relationship("Customer", back_populates="projects")
    quotes: Mapped[list["Quote"]] = relationship(
        "Quote", back_populates="project", cascade="all, delete-orphan"
@@ -103,9 +101,10 @@ class FinanceRecord(Base):
    type: Mapped[str] = mapped_column(String(50), nullable=False)  # invoice / bank_receipt / manual / ...
    file_name: Mapped[str] = mapped_column(String(255), nullable=False)
    file_path: Mapped[str] = mapped_column(String(512), nullable=False)
+    tags: Mapped[str | None] = mapped_column(String(512), nullable=True)  # 逗号分隔标签
+    meta_json: Mapped[str | None] = mapped_column(Text, nullable=True)  # 结构化识别结果 JSON
    amount: Mapped[float | None] = mapped_column(Numeric(12, 2), nullable=True)
    billing_date: Mapped[date | None] = mapped_column(Date, nullable=True)
    created_at: Mapped[datetime] = mapped_column(
        DateTime(timezone=True), default=datetime.utcnow, nullable=False
    )
-
--- a/backend/app/routers/finance.py
+++ b/backend/app/routers/finance.py
@@ -1,6 +1,9 @@
 from typing import List
+from datetime import date
+import os
+from pathlib import Path

-from fastapi import APIRouter, Depends, File, HTTPException, Query, UploadFile
+from fastapi import APIRouter, Body, Depends, File, HTTPException, Query, UploadFile
 from fastapi.responses import FileResponse
 from sqlalchemy.orm import Session

@@ -9,6 +12,8 @@ from backend.app import models
 from backend.app.schemas import (
    FinanceRecordRead,
    FinanceRecordUpdate,
+    FinanceBatchDeleteRequest,
+    FinanceSyncRequest,
    FinanceSyncResponse,
    FinanceSyncResult,
    FinanceUploadResponse,
@@ -21,9 +26,14 @@ router = APIRouter(prefix="/finance", tags=["finance"])


@router.post("/sync", response_model=FinanceSyncResponse)
-async def sync_finance():
+async def sync_finance(payload: FinanceSyncRequest = Body(default=FinanceSyncRequest())):
    try:
-        items_raw = await sync_finance_emails()
+        items_raw = await sync_finance_emails(
+            mode=payload.mode,
+            start_date=payload.start_date,
+            end_date=payload.end_date,
+            doc_types=payload.doc_types,
+        )
    except RuntimeError as exc:
        # 邮箱配置/连接等问题属于可预期的业务错误，用 400 让前端直接展示原因，而不是泛化为 500。
        raise HTTPException(status_code=400, detail=str(exc)) from exc
@@ -108,6 +118,60 @@ async def update_finance_record(
    return record


+@router.delete("/records/{record_id}")
+async def delete_finance_record(
+    record_id: int,
+    db: Session = Depends(get_db),
+):
+    """删除单条财务记录及对应文件（若存在）。"""
+    record = db.query(models.FinanceRecord).get(record_id)
+    if not record:
+        raise HTTPException(404, "记录不存在")
+
+    file_path = Path(record.file_path)
+    if not file_path.is_absolute():
+        file_path = Path(".") / file_path
+    if file_path.exists():
+        try:
+            file_path.unlink()
+        except OSError:
+            pass
+
+    db.delete(record)
+    db.commit()
+    return {"status": "deleted", "id": record_id}
+
+
+@router.post("/records/batch-delete")
+async def batch_delete_finance_records(
+    payload: FinanceBatchDeleteRequest,
+    db: Session = Depends(get_db),
+):
+    """批量删除财务记录及对应文件。"""
+    if not payload.ids:
+        return {"status": "ok", "deleted": 0}
+
+    records = (
+        db.query(models.FinanceRecord)
+        .filter(models.FinanceRecord.id.in_(payload.ids))
+        .all()
+    )
+
+    for record in records:
+        file_path = Path(record.file_path)
+        if not file_path.is_absolute():
+            file_path = Path(".") / file_path
+        if file_path.exists():
+            try:
+                file_path.unlink()
+            except OSError:
+                pass
+        db.delete(record)
+
+    db.commit()
+    return {"status": "deleted", "deleted": len(records)}
+
+
@router.get("/download/{month}")
 async def download_finance_month(month: str):
    """
@@ -124,3 +188,53 @@ async def download_finance_month(month: str):
        filename=f"finance_{month}.zip",
    )

+
+@router.get("/download-range")
+async def download_finance_range(
+    start_date: date = Query(..., description="起始日期 YYYY-MM-DD"),
+    end_date: date = Query(..., description="结束日期 YYYY-MM-DD（含当日）"),
+    only_invoices: bool = Query(True, description="是否仅包含发票类型"),
+    db: Session = Depends(get_db),
+):
+    """
+    按时间范围打包下载发票（默认仅发票，可扩展）。
+    """
+    if end_date < start_date:
+        raise HTTPException(status_code=400, detail="结束日期不能早于开始日期")
+
+    q = db.query(models.FinanceRecord).filter(
+        models.FinanceRecord.billing_date.isnot(None),
+        models.FinanceRecord.billing_date >= start_date,
+        models.FinanceRecord.billing_date <= end_date,
+    )
+    if only_invoices:
+        q = q.filter(models.FinanceRecord.type == "invoices")
+
+    records = q.all()
+    if not records:
+        raise HTTPException(status_code=404, detail="该时间段内没有可导出的记录")
+
+    base_dir = Path("data/finance")
+    base_dir.mkdir(parents=True, exist_ok=True)
+    zip_name = f"invoices_{start_date.isoformat()}_{end_date.isoformat()}.zip"
+    zip_path = base_dir / zip_name
+
+    import zipfile
+
+    with zipfile.ZipFile(zip_path, "w", compression=zipfile.ZIP_DEFLATED) as zf:
+        for r in records:
+            file_path = Path(r.file_path)
+            if not file_path.is_absolute():
+                file_path = Path(".") / file_path
+            if not file_path.exists():
+                continue
+            # 保持月份/类型的相对结构
+            rel = file_path.relative_to(Path("data")) if "data" in file_path.parts else file_path.name
+            zf.write(file_path, arcname=rel)
+
+    return FileResponse(
+        path=str(zip_path),
+        media_type="application/zip",
+        filename=zip_name,
+    )
+
--- a/backend/app/routers/projects.py
+++ b/backend/app/routers/projects.py
@@ -3,7 +3,11 @@ from datetime import datetime, timezone
 from pathlib import Path
 from typing import Any, Dict, List, Union

-from fastapi import APIRouter, Depends, HTTPException, status
+import json
+from typing import AsyncGenerator
+
+from fastapi import APIRouter, Depends, HTTPException, Query, status
+from fastapi.responses import StreamingResponse
 from sqlalchemy.orm import Session, joinedload

 from backend.app import models
@@ -105,6 +109,7 @@ def _build_markdown_from_analysis(data: Union[Dict[str, Any], List[Any]]) -> str
@router.get("/", response_model=list[ProjectRead])
 async def list_projects(
    customer_tag: str | None = None,
+    limit: int = Query(30, ge=1, le=200, description="默认只返回最近 N 条"),
    db: Session = Depends(get_db),
 ):
    """列表项目；customer_tag 不为空时只返回该客户标签下的项目（按客户 tags 筛选）。"""
@@ -127,7 +132,7 @@ async def list_projects(
                t.ilike(f"%,{tag}"),
            )
        )
-    return query.all()
+    return query.limit(limit).all()


@router.get("/{project_id}", response_model=ProjectRead)
@@ -200,6 +205,88 @@ async def analyze_project_requirement(
    )


+@router.post("/analyze_stream")
+async def analyze_project_requirement_stream(
+    payload: RequirementAnalyzeRequest,
+    db: Session = Depends(get_db),
+):
+    """
+    SSE 流式输出 Markdown 到前端（用于编辑器实时显示）。
+    data: {"type":"delta","content":"..."} / {"type":"done","project_id":1}
+    """
+    customer = db.query(models.Customer).get(payload.customer_id)
+    if not customer:
+        raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Customer not found")
+
+    # Create a project first so we can return project_id at end
+    project = models.Project(
+        customer_id=payload.customer_id,
+        raw_requirement=payload.raw_text,
+        ai_solution_md="",
+        status="draft",
+    )
+    db.add(project)
+    db.commit()
+    db.refresh(project)
+
+    async def gen() -> AsyncGenerator[str, None]:
+        from backend.app.services.ai_service import get_active_ai_config
+        from openai import AsyncOpenAI
+
+        config = get_active_ai_config()
+        api_key = (config.get("api_key") or "").strip()
+        if not api_key:
+            yield f"data: {json.dumps({'type':'error','message':'AI API Key 未配置'})}\n\n"
+            return
+        base_url = (config.get("base_url") or "").strip() or None
+        client = AsyncOpenAI(api_key=api_key, base_url=base_url)
+        model = config.get("model_name") or "gpt-4o-mini"
+        temperature = float(config.get("temperature", 0.2))
+
+        system_prompt = (
+            (config.get("system_prompt_override") or "").strip()
+            or "你是一名资深系统架构师，请输出可直接编辑的 Markdown 方案，不要输出 JSON。"
+        )
+        user_prompt = (
+            "请基于以下客户原始需求，输出一份可交付的项目方案草稿（Markdown）。\n"
+            "要求包含：概要、功能模块拆分、技术实现思路、工时与报价估算、备注。\n\n"
+            f"【客户原始需求】\n{payload.raw_text}"
+        )
+
+        full = ""
+        try:
+            stream = await client.chat.completions.create(
+                model=model,
+                messages=[
+                    {"role": "system", "content": system_prompt},
+                    {"role": "user", "content": user_prompt},
+                ],
+                temperature=temperature,
+                stream=True,
+            )
+            async for event in stream:
+                delta = (event.choices[0].delta.content or "") if event.choices else ""
+                if not delta:
+                    continue
+                full += delta
+                yield f"data: {json.dumps({'type':'delta','content':delta}, ensure_ascii=False)}\n\n"
+        except Exception as exc:
+            yield f"data: {json.dumps({'type':'error','message':str(exc)}, ensure_ascii=False)}\n\n"
+            return
+
+        # Save final markdown
+        try:
+            project.ai_solution_md = full
+            db.add(project)
+            db.commit()
+        except Exception:
+            pass
+
+        yield f"data: {json.dumps({'type':'done','project_id':project.id}, ensure_ascii=False)}\n\n"
+
+    return StreamingResponse(gen(), media_type="text/event-stream")
+
+
@router.post("/{project_id}/generate_quote", response_model=QuoteGenerateResponse)
 async def generate_project_quote(
    project_id: int,
--- a/backend/app/schemas.py
+++ b/backend/app/schemas.py
@@ -104,12 +104,31 @@ class FinanceSyncResponse(BaseModel):
    details: List[FinanceSyncResult] = Field(default_factory=list)


+class FinanceSyncRequest(BaseModel):
+    """
+    邮箱附件同步策略：
+    - mode=incremental：默认策略。首次（无历史）全量，否则仅同步 UNSEEN。
+    - mode=all：同步全部附件（可配合时间范围）。
+    - mode=latest：只同步「最新一封」邮件中的附件（可配合时间范围）。
+    时间范围为任意起止日期（含起止日），内部会转为 IMAP 的 SINCE/BEFORE。
+    """
+    mode: str = Field("incremental", description="incremental | all | latest")
+    start_date: Optional[date] = Field(None, description="YYYY-MM-DD")
+    end_date: Optional[date] = Field(None, description="YYYY-MM-DD")
+    doc_types: Optional[List[str]] = Field(
+        None,
+        description="要同步的附件类型：invoices/receipts/statements。为空表示默认全部类型。",
+    )
+
+
 class FinanceRecordRead(BaseModel):
    id: int
    month: str
    type: str
    file_name: str
    file_path: str
+    tags: Optional[str] = None
+    meta_json: Optional[str] = None
    amount: Optional[float] = None
    billing_date: Optional[date] = None
    created_at: datetime
@@ -123,6 +142,10 @@ class FinanceRecordUpdate(BaseModel):
    billing_date: Optional[date] = None


+class FinanceBatchDeleteRequest(BaseModel):
+    ids: List[int] = Field(..., description="要删除的财务记录 ID 列表")
+
+
 class FinanceUploadResponse(BaseModel):
    id: int
    month: str
--- a/backend/app/services/ai_service.py
+++ b/backend/app/services/ai_service.py
@@ -3,7 +3,7 @@ import json
 import os
 import re
 from pathlib import Path
-from typing import Any, Dict, Tuple
+from typing import Any, Dict, Tuple, List

 from openai import AsyncOpenAI
 from openai import NotFoundError as OpenAINotFoundError
@@ -197,6 +197,65 @@ async def extract_invoice_metadata(image_bytes: bytes, mime: str = "image/jpeg")
    api_key = (config.get("api_key") or "").strip()
    if not api_key:
        return (None, None)
+
+
+async def extract_finance_tags(
+    content_text: str,
+    doc_type: str,
+    filename: str = "",
+) -> Tuple[List[str], Dict[str, Any]]:
+    """
+    从附件文本内容中抽取标签与结构化信息（JSON）。
+    返回 (tags, meta)。
+    """
+    config = _load_ai_config()
+    client = _client_from_config(config)
+    model = config.get("model_name") or "gpt-4o-mini"
+    temperature = float(config.get("temperature", 0.2))
+
+    prompt = (
+        "你是一名财务助理。请根据附件的文本内容，为它生成可检索的标签，并抽取关键字段。\n"
+        "只返回 JSON，不要任何解释文字。\n"
+        "输入信息：\n"
+        f"- 类型 doc_type: {doc_type}\n"
+        f"- 文件名 filename: {filename}\n"
+        "- 附件文本 content_text: (见下)\n\n"
+        "返回 JSON 格式：\n"
+        "{\n"
+        '  "tags": ["标签1","标签2"],\n'
+        '  "meta": {\n'
+        '    "counterparty": "对方单位/收款方/付款方（如能识别）或 null",\n'
+        '    "account": "账户/卡号后四位（如能识别）或 null",\n'
+        '    "amount": "金额数字字符串或 null",\n'
+        '    "date": "YYYY-MM-DD 或 null",\n'
+        '    "summary": "一句话摘要"\n'
+        "  }\n"
+        "}\n\n"
+        "content_text:\n"
+        f"{content_text[:12000]}\n"
+    )
+
+    completion = await client.chat.completions.create(
+        model=model,
+        response_format={"type": "json_object"},
+        messages=[{"role": "user", "content": prompt}],
+        temperature=temperature,
+        max_tokens=500,
+    )
+    content = completion.choices[0].message.content or "{}"
+    try:
+        data: Any = json.loads(content)
+    except Exception:
+        return ([], {"summary": "", "raw": content})
+
+    tags = data.get("tags") if isinstance(data, dict) else None
+    meta = data.get("meta") if isinstance(data, dict) else None
+    if not isinstance(tags, list):
+        tags = []
+    tags = [str(t).strip() for t in tags if str(t).strip()][:12]
+    if not isinstance(meta, dict):
+        meta = {}
+    return (tags, meta)
    try:
        client = _client_from_config(config)
        model = config.get("model_name") or "gpt-4o-mini"
--- a/backend/app/services/email_service.py
+++ b/backend/app/services/email_service.py
@@ -7,7 +7,7 @@ import os
 import re
 import sqlite3
 import ssl
-from datetime import date, datetime
+from datetime import date, datetime, timedelta
 from email.header import decode_header
 from pathlib import Path
 from typing import Any, Dict, List, Tuple
@@ -109,6 +109,29 @@ def _run_invoice_ocr_sync(file_path: str, mime: str, raw_bytes: bytes) -> Tuple[
        loop.close()


+def _extract_text_for_tagging(file_path: str, mime: str, raw_bytes: bytes) -> str:
+    """
+    Extract best-effort text from PDF/image/xlsx for tagging.
+    - PDF: extract text via fitz; fallback to first page OCR image (handled elsewhere if needed)
+    - Image: no local OCR here; return empty and let AI decide (optional)
+    - XLSX: not parsed currently
+    """
+    p = Path(file_path)
+    suf = p.suffix.lower()
+    if suf == ".pdf" or "pdf" in (mime or "").lower():
+        try:
+            import fitz  # PyMuPDF
+            doc = fitz.open(stream=raw_bytes, filetype="pdf")
+            texts: list[str] = []
+            for i in range(min(5, doc.page_count)):
+                texts.append(doc.load_page(i).get_text("text") or "")
+            doc.close()
+            return "\n".join(texts).strip()
+        except Exception:
+            return ""
+    return ""
+
+
 def _rename_invoice_file(
    file_path: str,
    amount: float | None,
@@ -173,6 +196,7 @@ def _has_sync_history() -> bool:
 def _save_attachment(
    msg: email.message.Message,
    month_str: str,
+    allowed_doc_types: set[str] | None = None,
 ) -> List[Tuple[str, str, str, bytes, str]]:
    """
    Save PDF/image attachments.
@@ -193,17 +217,20 @@ def _save_attachment(
        _ensure_sync_history_table(conn)

        for part in msg.walk():
-            content_disposition = part.get("Content-Disposition", "")
-            if "attachment" not in content_disposition:
-                continue
+            # 许多邮件附件会以 inline 或缺失 Content-Disposition 的形式出现，
+            # 只要存在 filename 且扩展名符合，就视为可下载附件。
+            content_disposition = (part.get("Content-Disposition", "") or "").lower()

            filename = part.get_filename()
            filename = _decode_header_value(filename)
            if not filename:
                continue
+            if content_disposition and ("attachment" not in content_disposition and "inline" not in content_disposition):
+                # 明确的非附件 disposition，跳过
+                continue

            ext = Path(filename).suffix.lower()
-            if ext not in (".pdf", ".jpg", ".jpeg", ".png", ".xlsx"):
+            if ext not in (".pdf", ".jpg", ".jpeg", ".png", ".webp", ".xlsx", ".xls"):
                continue

            maintype = part.get_content_maintype()
@@ -216,6 +243,8 @@ def _save_attachment(

            # 分类：基于主题 + 文件名
            doc_type = _classify_type(subject, filename)
+            if allowed_doc_types is not None and doc_type not in allowed_doc_types:
+                continue
            base_dir = _ensure_month_dir(month_str, doc_type)

            # 增量去重：根据 (message_id, md5) 判断是否已同步过
@@ -421,7 +450,56 @@ def _select_mailbox(imap: imaplib.IMAP4_SSL, mailbox: str) -> bool:
    return False


-def _sync_one_account(config: Dict[str, Any], db: Session, results: List[Dict[str, Any]]) -> None:
+def _imap_date(d: date) -> str:
+    # IMAP date format: 16-Mar-2026 (English month)
+    import calendar
+    return f"{d.day:02d}-{calendar.month_abbr[d.month]}-{d.year}"
+
+
+def _pick_latest_msg_id(imap: imaplib.IMAP4_SSL, msg_ids: List[bytes]) -> bytes | None:
+    """从一批 msg_id 中按 INTERNALDATE 选择最新的一封。"""
+    latest_id: bytes | None = None
+    latest_ts: float = -1.0
+    for mid in msg_ids:
+        try:
+            typ, data = imap.fetch(mid, "(INTERNALDATE)")
+            if typ != "OK" or not data or not data[0]:
+                continue
+            # imaplib.Internaldate2tuple expects a bytes response line
+            raw = data[0]
+            if isinstance(raw, tuple):
+                raw = raw[0]
+            if not isinstance(raw, (bytes, bytearray)):
+                raw = str(raw).encode("utf-8", errors="ignore")
+            t = imaplib.Internaldate2tuple(raw)
+            if not t:
+                continue
+            import time
+            ts = time.mktime(t)
+            if ts > latest_ts:
+                latest_ts = ts
+                latest_id = mid
+        except Exception:
+            continue
+    return latest_id
+
+
+def _sync_one_account(
+    config: Dict[str, Any],
+    db: Session,
+    results: List[Dict[str, Any]],
+    *,
+    mode: str = "incremental",
+    start_date: date | None = None,
+    end_date: date | None = None,
+    doc_types: list[str] | None = None,
+) -> None:
+    allowed: set[str] | None = None
+    if doc_types:
+        allowed = {d.strip().lower() for d in doc_types if d and d.strip()}
+        allowed = {d for d in allowed if d in ("invoices", "receipts", "statements")}
+        if not allowed:
+            allowed = None
    host = config.get("host")
    user = config.get("user")
    password = config.get("password")
@@ -461,20 +539,53 @@ def _sync_one_account(config: Dict[str, Any], db: Session, results: List[Dict[st
                f"无法选择邮箱「{mailbox}」，请检查该账户的 Mailbox 配置（如 163 使用 INBOX）"
            )

-        # 首次同步（历史库无记录）：拉取全部邮件中的附件，由 attachment_history 去重
-        # 已有历史：只拉取未读邮件，避免重复拉取
+        # 支持：
+        # - mode=incremental: 首次全量，否则 UNSEEN
+        # - mode=all: 全量（可加时间范围）
+        # - mode=latest: 仅最新一封（可加时间范围）
+        mode = (mode or "incremental").strip().lower()
+        if mode not in ("incremental", "all", "latest"):
+            mode = "incremental"
+
        is_first_sync = not _has_sync_history()
-        search_criterion = "ALL" if is_first_sync else "UNSEEN"
+        base_criterion = "ALL"
+        if mode == "incremental":
+            base_criterion = "ALL" if is_first_sync else "UNSEEN"
+        elif mode == "all":
+            base_criterion = "ALL"
+        elif mode == "latest":
+            base_criterion = "ALL"
+
+        criteria: List[str] = [base_criterion]
+        if start_date:
+            criteria += ["SINCE", _imap_date(start_date)]
+        if end_date:
+            # BEFORE is exclusive; add one day to make end_date inclusive
+            criteria += ["BEFORE", _imap_date(end_date + timedelta(days=1))]
+
        logging.getLogger(__name__).info(
-            "Finance sync: %s (criterion=%s)",
-            "全量" if is_first_sync else "增量",
-            search_criterion,
+            "Finance sync: mode=%s criterion=%s range=%s~%s",
+            mode,
+            base_criterion,
+            start_date,
+            end_date,
        )
-        status, data = imap.search(None, search_criterion)
+
+        status, data = imap.search(None, *criteria)
        if status != "OK":
            return

-        id_list = data[0].split()
+        id_list: List[bytes] = data[0].split() if data and data[0] else []
+        logging.getLogger(__name__).info(
+            "Finance sync: matched messages=%d (mode=%s)", len(id_list), mode
+        )
+        if not id_list:
+            return
+
+        if mode == "latest":
+            latest = _pick_latest_msg_id(imap, id_list)
+            id_list = [latest] if latest else []
+
        for msg_id in id_list:
            status, msg_data = imap.fetch(msg_id, "(RFC822)")
            if status != "OK":
@@ -485,7 +596,7 @@ def _sync_one_account(config: Dict[str, Any], db: Session, results: List[Dict[st
            dt = _parse_email_date(msg)
            month_str = dt.strftime("%Y-%m")

-            saved = _save_attachment(msg, month_str)
+            saved = _save_attachment(msg, month_str, allowed_doc_types=allowed)
            for file_name, file_path, mime, raw_bytes, doc_type in saved:
                final_name = file_name
                final_path = file_path
@@ -510,11 +621,28 @@ def _sync_one_account(config: Dict[str, Any], db: Session, results: List[Dict[st
                    type=doc_type,
                    file_name=final_name,
                    file_path=final_path,
+                    tags=None,
+                    meta_json=None,
                    amount=amount,
                    billing_date=billing_date,
                )
                db.add(record)
                db.flush()
+
+                # 自动识别打标签（同步后自动跑）
+                try:
+                    from backend.app.services.ai_service import extract_finance_tags
+                    content_text = _extract_text_for_tagging(final_path, mime, raw_bytes)
+                    tags, meta = asyncio.run(extract_finance_tags(content_text, doc_type, final_name))  # type: ignore[arg-type]
+                    if tags:
+                        record.tags = ",".join(tags)
+                    if meta:
+                        import json as _json
+                        record.meta_json = _json.dumps(meta, ensure_ascii=False)
+                    db.flush()
+                except Exception:
+                    pass
+
                results.append({
                    "id": record.id,
                    "month": record.month,
@@ -526,7 +654,13 @@ def _sync_one_account(config: Dict[str, Any], db: Session, results: List[Dict[st
            imap.store(msg_id, "+FLAGS", "\\Seen \\Flagged")


-async def sync_finance_emails() -> List[Dict[str, Any]]:
+async def sync_finance_emails(
+    *,
+    mode: str = "incremental",
+    start_date: date | None = None,
+    end_date: date | None = None,
+    doc_types: list[str] | None = None,
+) -> List[Dict[str, Any]]:
    """
    Sync from all active email configs (data/email_configs.json).
    Falls back to env vars if no configs. Classifies into invoices/, receipts/, statements/.
@@ -546,7 +680,15 @@ async def sync_finance_emails() -> List[Dict[str, Any]]:
        try:
            for config in configs:
                try:
-                    _sync_one_account(config, db, results)
+                    _sync_one_account(
+                        config,
+                        db,
+                        results,
+                        mode=mode,
+                        start_date=start_date,
+                        end_date=end_date,
+                        doc_types=doc_types,
+                    )
                except Exception as e:
                    # 不让单个账户的异常中断全部同步，记录错误并继续其他账户。
                    user = config.get("user", "") or config.get("id", "")