fix:优化项目内容
This commit is contained in:
56
backend/app/deps.py
Normal file
56
backend/app/deps.py
Normal file
@@ -0,0 +1,56 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import secrets
|
||||
from datetime import datetime, timezone
|
||||
|
||||
from fastapi import Depends, Request, Response
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from backend.app.db import get_db
|
||||
from backend.app import models
|
||||
|
||||
DEVICE_COOKIE_NAME = "opc_device_token"
|
||||
|
||||
|
||||
def _issue_new_device_user(db: Session, response: Response) -> models.User:
|
||||
token = secrets.token_hex(32)
|
||||
user = models.User(
|
||||
device_token=token,
|
||||
created_at=datetime.now(timezone.utc),
|
||||
last_seen_at=datetime.now(timezone.utc),
|
||||
)
|
||||
db.add(user)
|
||||
db.commit()
|
||||
db.refresh(user)
|
||||
response.set_cookie(
|
||||
key=DEVICE_COOKIE_NAME,
|
||||
value=token,
|
||||
httponly=True,
|
||||
secure=False,
|
||||
samesite="Lax",
|
||||
max_age=60 * 60 * 24 * 365,
|
||||
)
|
||||
return user
|
||||
|
||||
|
||||
def get_current_user(
|
||||
request: Request,
|
||||
response: Response,
|
||||
db: Session = Depends(get_db),
|
||||
) -> models.User:
|
||||
token = request.cookies.get(DEVICE_COOKIE_NAME)
|
||||
if not token:
|
||||
return _issue_new_device_user(db, response)
|
||||
|
||||
user = (
|
||||
db.query(models.User)
|
||||
.filter(models.User.device_token == token)
|
||||
.first()
|
||||
)
|
||||
if not user:
|
||||
return _issue_new_device_user(db, response)
|
||||
|
||||
user.last_seen_at = datetime.now(timezone.utc)
|
||||
db.commit()
|
||||
return user
|
||||
|
||||
@@ -32,27 +32,28 @@ def create_app() -> FastAPI:
|
||||
@app.on_event("startup")
|
||||
def on_startup() -> None:
|
||||
Base.metadata.create_all(bind=engine)
|
||||
# Add new columns to finance_records if they don't exist (Module 6)
|
||||
# Lightweight schema migrations for SQLite (add columns if missing)
|
||||
try:
|
||||
from sqlalchemy import text
|
||||
with engine.connect() as conn:
|
||||
# finance_records: amount, billing_date
|
||||
r = conn.execute(text("PRAGMA table_info(finance_records)"))
|
||||
cols = [row[1] for row in r]
|
||||
if "amount" not in cols:
|
||||
conn.execute(text("ALTER TABLE finance_records ADD COLUMN amount NUMERIC(12,2)"))
|
||||
if "billing_date" not in cols:
|
||||
conn.execute(text("ALTER TABLE finance_records ADD COLUMN billing_date DATE"))
|
||||
conn.commit()
|
||||
except Exception:
|
||||
pass
|
||||
# Add customers.tags if missing (customer tags for project 收纳)
|
||||
try:
|
||||
from sqlalchemy import text
|
||||
with engine.connect() as conn:
|
||||
if "tags" not in cols:
|
||||
conn.execute(text("ALTER TABLE finance_records ADD COLUMN tags VARCHAR(512)"))
|
||||
if "meta_json" not in cols:
|
||||
conn.execute(text("ALTER TABLE finance_records ADD COLUMN meta_json TEXT"))
|
||||
|
||||
# customers: tags
|
||||
r = conn.execute(text("PRAGMA table_info(customers)"))
|
||||
cols = [row[1] for row in r]
|
||||
if "tags" not in cols:
|
||||
conn.execute(text("ALTER TABLE customers ADD COLUMN tags VARCHAR(512)"))
|
||||
|
||||
conn.commit()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
@@ -2,7 +2,6 @@ from datetime import date, datetime
|
||||
|
||||
from sqlalchemy import (
|
||||
Date,
|
||||
Column,
|
||||
DateTime,
|
||||
ForeignKey,
|
||||
Integer,
|
||||
@@ -48,7 +47,6 @@ class Project(Base):
|
||||
created_at: Mapped[datetime] = mapped_column(
|
||||
DateTime(timezone=True), default=datetime.utcnow, nullable=False
|
||||
)
|
||||
|
||||
customer: Mapped[Customer] = relationship("Customer", back_populates="projects")
|
||||
quotes: Mapped[list["Quote"]] = relationship(
|
||||
"Quote", back_populates="project", cascade="all, delete-orphan"
|
||||
@@ -103,9 +101,10 @@ class FinanceRecord(Base):
|
||||
type: Mapped[str] = mapped_column(String(50), nullable=False) # invoice / bank_receipt / manual / ...
|
||||
file_name: Mapped[str] = mapped_column(String(255), nullable=False)
|
||||
file_path: Mapped[str] = mapped_column(String(512), nullable=False)
|
||||
tags: Mapped[str | None] = mapped_column(String(512), nullable=True) # 逗号分隔标签
|
||||
meta_json: Mapped[str | None] = mapped_column(Text, nullable=True) # 结构化识别结果 JSON
|
||||
amount: Mapped[float | None] = mapped_column(Numeric(12, 2), nullable=True)
|
||||
billing_date: Mapped[date | None] = mapped_column(Date, nullable=True)
|
||||
created_at: Mapped[datetime] = mapped_column(
|
||||
DateTime(timezone=True), default=datetime.utcnow, nullable=False
|
||||
)
|
||||
|
||||
|
||||
@@ -1,6 +1,9 @@
|
||||
from typing import List
|
||||
from datetime import date
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
from fastapi import APIRouter, Depends, File, HTTPException, Query, UploadFile
|
||||
from fastapi import APIRouter, Body, Depends, File, HTTPException, Query, UploadFile
|
||||
from fastapi.responses import FileResponse
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
@@ -9,6 +12,8 @@ from backend.app import models
|
||||
from backend.app.schemas import (
|
||||
FinanceRecordRead,
|
||||
FinanceRecordUpdate,
|
||||
FinanceBatchDeleteRequest,
|
||||
FinanceSyncRequest,
|
||||
FinanceSyncResponse,
|
||||
FinanceSyncResult,
|
||||
FinanceUploadResponse,
|
||||
@@ -21,9 +26,14 @@ router = APIRouter(prefix="/finance", tags=["finance"])
|
||||
|
||||
|
||||
@router.post("/sync", response_model=FinanceSyncResponse)
|
||||
async def sync_finance():
|
||||
async def sync_finance(payload: FinanceSyncRequest = Body(default=FinanceSyncRequest())):
|
||||
try:
|
||||
items_raw = await sync_finance_emails()
|
||||
items_raw = await sync_finance_emails(
|
||||
mode=payload.mode,
|
||||
start_date=payload.start_date,
|
||||
end_date=payload.end_date,
|
||||
doc_types=payload.doc_types,
|
||||
)
|
||||
except RuntimeError as exc:
|
||||
# 邮箱配置/连接等问题属于可预期的业务错误,用 400 让前端直接展示原因,而不是泛化为 500。
|
||||
raise HTTPException(status_code=400, detail=str(exc)) from exc
|
||||
@@ -108,6 +118,60 @@ async def update_finance_record(
|
||||
return record
|
||||
|
||||
|
||||
@router.delete("/records/{record_id}")
|
||||
async def delete_finance_record(
|
||||
record_id: int,
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
"""删除单条财务记录及对应文件(若存在)。"""
|
||||
record = db.query(models.FinanceRecord).get(record_id)
|
||||
if not record:
|
||||
raise HTTPException(404, "记录不存在")
|
||||
|
||||
file_path = Path(record.file_path)
|
||||
if not file_path.is_absolute():
|
||||
file_path = Path(".") / file_path
|
||||
if file_path.exists():
|
||||
try:
|
||||
file_path.unlink()
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
db.delete(record)
|
||||
db.commit()
|
||||
return {"status": "deleted", "id": record_id}
|
||||
|
||||
|
||||
@router.post("/records/batch-delete")
|
||||
async def batch_delete_finance_records(
|
||||
payload: FinanceBatchDeleteRequest,
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
"""批量删除财务记录及对应文件。"""
|
||||
if not payload.ids:
|
||||
return {"status": "ok", "deleted": 0}
|
||||
|
||||
records = (
|
||||
db.query(models.FinanceRecord)
|
||||
.filter(models.FinanceRecord.id.in_(payload.ids))
|
||||
.all()
|
||||
)
|
||||
|
||||
for record in records:
|
||||
file_path = Path(record.file_path)
|
||||
if not file_path.is_absolute():
|
||||
file_path = Path(".") / file_path
|
||||
if file_path.exists():
|
||||
try:
|
||||
file_path.unlink()
|
||||
except OSError:
|
||||
pass
|
||||
db.delete(record)
|
||||
|
||||
db.commit()
|
||||
return {"status": "deleted", "deleted": len(records)}
|
||||
|
||||
|
||||
@router.get("/download/{month}")
|
||||
async def download_finance_month(month: str):
|
||||
"""
|
||||
@@ -124,3 +188,53 @@ async def download_finance_month(month: str):
|
||||
filename=f"finance_{month}.zip",
|
||||
)
|
||||
|
||||
|
||||
@router.get("/download-range")
|
||||
async def download_finance_range(
|
||||
start_date: date = Query(..., description="起始日期 YYYY-MM-DD"),
|
||||
end_date: date = Query(..., description="结束日期 YYYY-MM-DD(含当日)"),
|
||||
only_invoices: bool = Query(True, description="是否仅包含发票类型"),
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
"""
|
||||
按时间范围打包下载发票(默认仅发票,可扩展)。
|
||||
"""
|
||||
if end_date < start_date:
|
||||
raise HTTPException(status_code=400, detail="结束日期不能早于开始日期")
|
||||
|
||||
q = db.query(models.FinanceRecord).filter(
|
||||
models.FinanceRecord.billing_date.isnot(None),
|
||||
models.FinanceRecord.billing_date >= start_date,
|
||||
models.FinanceRecord.billing_date <= end_date,
|
||||
)
|
||||
if only_invoices:
|
||||
q = q.filter(models.FinanceRecord.type == "invoices")
|
||||
|
||||
records = q.all()
|
||||
if not records:
|
||||
raise HTTPException(status_code=404, detail="该时间段内没有可导出的记录")
|
||||
|
||||
base_dir = Path("data/finance")
|
||||
base_dir.mkdir(parents=True, exist_ok=True)
|
||||
zip_name = f"invoices_{start_date.isoformat()}_{end_date.isoformat()}.zip"
|
||||
zip_path = base_dir / zip_name
|
||||
|
||||
import zipfile
|
||||
|
||||
with zipfile.ZipFile(zip_path, "w", compression=zipfile.ZIP_DEFLATED) as zf:
|
||||
for r in records:
|
||||
file_path = Path(r.file_path)
|
||||
if not file_path.is_absolute():
|
||||
file_path = Path(".") / file_path
|
||||
if not file_path.exists():
|
||||
continue
|
||||
# 保持月份/类型的相对结构
|
||||
rel = file_path.relative_to(Path("data")) if "data" in file_path.parts else file_path.name
|
||||
zf.write(file_path, arcname=rel)
|
||||
|
||||
return FileResponse(
|
||||
path=str(zip_path),
|
||||
media_type="application/zip",
|
||||
filename=zip_name,
|
||||
)
|
||||
|
||||
|
||||
@@ -3,7 +3,11 @@ from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Union
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, status
|
||||
import json
|
||||
from typing import AsyncGenerator
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, Query, status
|
||||
from fastapi.responses import StreamingResponse
|
||||
from sqlalchemy.orm import Session, joinedload
|
||||
|
||||
from backend.app import models
|
||||
@@ -105,6 +109,7 @@ def _build_markdown_from_analysis(data: Union[Dict[str, Any], List[Any]]) -> str
|
||||
@router.get("/", response_model=list[ProjectRead])
|
||||
async def list_projects(
|
||||
customer_tag: str | None = None,
|
||||
limit: int = Query(30, ge=1, le=200, description="默认只返回最近 N 条"),
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
"""列表项目;customer_tag 不为空时只返回该客户标签下的项目(按客户 tags 筛选)。"""
|
||||
@@ -127,7 +132,7 @@ async def list_projects(
|
||||
t.ilike(f"%,{tag}"),
|
||||
)
|
||||
)
|
||||
return query.all()
|
||||
return query.limit(limit).all()
|
||||
|
||||
|
||||
@router.get("/{project_id}", response_model=ProjectRead)
|
||||
@@ -200,6 +205,88 @@ async def analyze_project_requirement(
|
||||
)
|
||||
|
||||
|
||||
@router.post("/analyze_stream")
|
||||
async def analyze_project_requirement_stream(
|
||||
payload: RequirementAnalyzeRequest,
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
"""
|
||||
SSE 流式输出 Markdown 到前端(用于编辑器实时显示)。
|
||||
data: {"type":"delta","content":"..."} / {"type":"done","project_id":1}
|
||||
"""
|
||||
customer = db.query(models.Customer).get(payload.customer_id)
|
||||
if not customer:
|
||||
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Customer not found")
|
||||
|
||||
# Create a project first so we can return project_id at end
|
||||
project = models.Project(
|
||||
customer_id=payload.customer_id,
|
||||
raw_requirement=payload.raw_text,
|
||||
ai_solution_md="",
|
||||
status="draft",
|
||||
)
|
||||
db.add(project)
|
||||
db.commit()
|
||||
db.refresh(project)
|
||||
|
||||
async def gen() -> AsyncGenerator[str, None]:
|
||||
from backend.app.services.ai_service import get_active_ai_config
|
||||
from openai import AsyncOpenAI
|
||||
|
||||
config = get_active_ai_config()
|
||||
api_key = (config.get("api_key") or "").strip()
|
||||
if not api_key:
|
||||
yield f"data: {json.dumps({'type':'error','message':'AI API Key 未配置'})}\n\n"
|
||||
return
|
||||
base_url = (config.get("base_url") or "").strip() or None
|
||||
client = AsyncOpenAI(api_key=api_key, base_url=base_url)
|
||||
model = config.get("model_name") or "gpt-4o-mini"
|
||||
temperature = float(config.get("temperature", 0.2))
|
||||
|
||||
system_prompt = (
|
||||
(config.get("system_prompt_override") or "").strip()
|
||||
or "你是一名资深系统架构师,请输出可直接编辑的 Markdown 方案,不要输出 JSON。"
|
||||
)
|
||||
user_prompt = (
|
||||
"请基于以下客户原始需求,输出一份可交付的项目方案草稿(Markdown)。\n"
|
||||
"要求包含:概要、功能模块拆分、技术实现思路、工时与报价估算、备注。\n\n"
|
||||
f"【客户原始需求】\n{payload.raw_text}"
|
||||
)
|
||||
|
||||
full = ""
|
||||
try:
|
||||
stream = await client.chat.completions.create(
|
||||
model=model,
|
||||
messages=[
|
||||
{"role": "system", "content": system_prompt},
|
||||
{"role": "user", "content": user_prompt},
|
||||
],
|
||||
temperature=temperature,
|
||||
stream=True,
|
||||
)
|
||||
async for event in stream:
|
||||
delta = (event.choices[0].delta.content or "") if event.choices else ""
|
||||
if not delta:
|
||||
continue
|
||||
full += delta
|
||||
yield f"data: {json.dumps({'type':'delta','content':delta}, ensure_ascii=False)}\n\n"
|
||||
except Exception as exc:
|
||||
yield f"data: {json.dumps({'type':'error','message':str(exc)}, ensure_ascii=False)}\n\n"
|
||||
return
|
||||
|
||||
# Save final markdown
|
||||
try:
|
||||
project.ai_solution_md = full
|
||||
db.add(project)
|
||||
db.commit()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
yield f"data: {json.dumps({'type':'done','project_id':project.id}, ensure_ascii=False)}\n\n"
|
||||
|
||||
return StreamingResponse(gen(), media_type="text/event-stream")
|
||||
|
||||
|
||||
@router.post("/{project_id}/generate_quote", response_model=QuoteGenerateResponse)
|
||||
async def generate_project_quote(
|
||||
project_id: int,
|
||||
|
||||
@@ -104,12 +104,31 @@ class FinanceSyncResponse(BaseModel):
|
||||
details: List[FinanceSyncResult] = Field(default_factory=list)
|
||||
|
||||
|
||||
class FinanceSyncRequest(BaseModel):
|
||||
"""
|
||||
邮箱附件同步策略:
|
||||
- mode=incremental:默认策略。首次(无历史)全量,否则仅同步 UNSEEN。
|
||||
- mode=all:同步全部附件(可配合时间范围)。
|
||||
- mode=latest:只同步「最新一封」邮件中的附件(可配合时间范围)。
|
||||
时间范围为任意起止日期(含起止日),内部会转为 IMAP 的 SINCE/BEFORE。
|
||||
"""
|
||||
mode: str = Field("incremental", description="incremental | all | latest")
|
||||
start_date: Optional[date] = Field(None, description="YYYY-MM-DD")
|
||||
end_date: Optional[date] = Field(None, description="YYYY-MM-DD")
|
||||
doc_types: Optional[List[str]] = Field(
|
||||
None,
|
||||
description="要同步的附件类型:invoices/receipts/statements。为空表示默认全部类型。",
|
||||
)
|
||||
|
||||
|
||||
class FinanceRecordRead(BaseModel):
|
||||
id: int
|
||||
month: str
|
||||
type: str
|
||||
file_name: str
|
||||
file_path: str
|
||||
tags: Optional[str] = None
|
||||
meta_json: Optional[str] = None
|
||||
amount: Optional[float] = None
|
||||
billing_date: Optional[date] = None
|
||||
created_at: datetime
|
||||
@@ -123,6 +142,10 @@ class FinanceRecordUpdate(BaseModel):
|
||||
billing_date: Optional[date] = None
|
||||
|
||||
|
||||
class FinanceBatchDeleteRequest(BaseModel):
|
||||
ids: List[int] = Field(..., description="要删除的财务记录 ID 列表")
|
||||
|
||||
|
||||
class FinanceUploadResponse(BaseModel):
|
||||
id: int
|
||||
month: str
|
||||
|
||||
@@ -3,7 +3,7 @@ import json
|
||||
import os
|
||||
import re
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, Tuple
|
||||
from typing import Any, Dict, Tuple, List
|
||||
|
||||
from openai import AsyncOpenAI
|
||||
from openai import NotFoundError as OpenAINotFoundError
|
||||
@@ -197,6 +197,65 @@ async def extract_invoice_metadata(image_bytes: bytes, mime: str = "image/jpeg")
|
||||
api_key = (config.get("api_key") or "").strip()
|
||||
if not api_key:
|
||||
return (None, None)
|
||||
|
||||
|
||||
async def extract_finance_tags(
|
||||
content_text: str,
|
||||
doc_type: str,
|
||||
filename: str = "",
|
||||
) -> Tuple[List[str], Dict[str, Any]]:
|
||||
"""
|
||||
从附件文本内容中抽取标签与结构化信息(JSON)。
|
||||
返回 (tags, meta)。
|
||||
"""
|
||||
config = _load_ai_config()
|
||||
client = _client_from_config(config)
|
||||
model = config.get("model_name") or "gpt-4o-mini"
|
||||
temperature = float(config.get("temperature", 0.2))
|
||||
|
||||
prompt = (
|
||||
"你是一名财务助理。请根据附件的文本内容,为它生成可检索的标签,并抽取关键字段。\n"
|
||||
"只返回 JSON,不要任何解释文字。\n"
|
||||
"输入信息:\n"
|
||||
f"- 类型 doc_type: {doc_type}\n"
|
||||
f"- 文件名 filename: {filename}\n"
|
||||
"- 附件文本 content_text: (见下)\n\n"
|
||||
"返回 JSON 格式:\n"
|
||||
"{\n"
|
||||
' "tags": ["标签1","标签2"],\n'
|
||||
' "meta": {\n'
|
||||
' "counterparty": "对方单位/收款方/付款方(如能识别)或 null",\n'
|
||||
' "account": "账户/卡号后四位(如能识别)或 null",\n'
|
||||
' "amount": "金额数字字符串或 null",\n'
|
||||
' "date": "YYYY-MM-DD 或 null",\n'
|
||||
' "summary": "一句话摘要"\n'
|
||||
" }\n"
|
||||
"}\n\n"
|
||||
"content_text:\n"
|
||||
f"{content_text[:12000]}\n"
|
||||
)
|
||||
|
||||
completion = await client.chat.completions.create(
|
||||
model=model,
|
||||
response_format={"type": "json_object"},
|
||||
messages=[{"role": "user", "content": prompt}],
|
||||
temperature=temperature,
|
||||
max_tokens=500,
|
||||
)
|
||||
content = completion.choices[0].message.content or "{}"
|
||||
try:
|
||||
data: Any = json.loads(content)
|
||||
except Exception:
|
||||
return ([], {"summary": "", "raw": content})
|
||||
|
||||
tags = data.get("tags") if isinstance(data, dict) else None
|
||||
meta = data.get("meta") if isinstance(data, dict) else None
|
||||
if not isinstance(tags, list):
|
||||
tags = []
|
||||
tags = [str(t).strip() for t in tags if str(t).strip()][:12]
|
||||
if not isinstance(meta, dict):
|
||||
meta = {}
|
||||
return (tags, meta)
|
||||
try:
|
||||
client = _client_from_config(config)
|
||||
model = config.get("model_name") or "gpt-4o-mini"
|
||||
|
||||
@@ -7,7 +7,7 @@ import os
|
||||
import re
|
||||
import sqlite3
|
||||
import ssl
|
||||
from datetime import date, datetime
|
||||
from datetime import date, datetime, timedelta
|
||||
from email.header import decode_header
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Tuple
|
||||
@@ -109,6 +109,29 @@ def _run_invoice_ocr_sync(file_path: str, mime: str, raw_bytes: bytes) -> Tuple[
|
||||
loop.close()
|
||||
|
||||
|
||||
def _extract_text_for_tagging(file_path: str, mime: str, raw_bytes: bytes) -> str:
|
||||
"""
|
||||
Extract best-effort text from PDF/image/xlsx for tagging.
|
||||
- PDF: extract text via fitz; fallback to first page OCR image (handled elsewhere if needed)
|
||||
- Image: no local OCR here; return empty and let AI decide (optional)
|
||||
- XLSX: not parsed currently
|
||||
"""
|
||||
p = Path(file_path)
|
||||
suf = p.suffix.lower()
|
||||
if suf == ".pdf" or "pdf" in (mime or "").lower():
|
||||
try:
|
||||
import fitz # PyMuPDF
|
||||
doc = fitz.open(stream=raw_bytes, filetype="pdf")
|
||||
texts: list[str] = []
|
||||
for i in range(min(5, doc.page_count)):
|
||||
texts.append(doc.load_page(i).get_text("text") or "")
|
||||
doc.close()
|
||||
return "\n".join(texts).strip()
|
||||
except Exception:
|
||||
return ""
|
||||
return ""
|
||||
|
||||
|
||||
def _rename_invoice_file(
|
||||
file_path: str,
|
||||
amount: float | None,
|
||||
@@ -173,6 +196,7 @@ def _has_sync_history() -> bool:
|
||||
def _save_attachment(
|
||||
msg: email.message.Message,
|
||||
month_str: str,
|
||||
allowed_doc_types: set[str] | None = None,
|
||||
) -> List[Tuple[str, str, str, bytes, str]]:
|
||||
"""
|
||||
Save PDF/image attachments.
|
||||
@@ -193,17 +217,20 @@ def _save_attachment(
|
||||
_ensure_sync_history_table(conn)
|
||||
|
||||
for part in msg.walk():
|
||||
content_disposition = part.get("Content-Disposition", "")
|
||||
if "attachment" not in content_disposition:
|
||||
continue
|
||||
# 许多邮件附件会以 inline 或缺失 Content-Disposition 的形式出现,
|
||||
# 只要存在 filename 且扩展名符合,就视为可下载附件。
|
||||
content_disposition = (part.get("Content-Disposition", "") or "").lower()
|
||||
|
||||
filename = part.get_filename()
|
||||
filename = _decode_header_value(filename)
|
||||
if not filename:
|
||||
continue
|
||||
if content_disposition and ("attachment" not in content_disposition and "inline" not in content_disposition):
|
||||
# 明确的非附件 disposition,跳过
|
||||
continue
|
||||
|
||||
ext = Path(filename).suffix.lower()
|
||||
if ext not in (".pdf", ".jpg", ".jpeg", ".png", ".xlsx"):
|
||||
if ext not in (".pdf", ".jpg", ".jpeg", ".png", ".webp", ".xlsx", ".xls"):
|
||||
continue
|
||||
|
||||
maintype = part.get_content_maintype()
|
||||
@@ -216,6 +243,8 @@ def _save_attachment(
|
||||
|
||||
# 分类:基于主题 + 文件名
|
||||
doc_type = _classify_type(subject, filename)
|
||||
if allowed_doc_types is not None and doc_type not in allowed_doc_types:
|
||||
continue
|
||||
base_dir = _ensure_month_dir(month_str, doc_type)
|
||||
|
||||
# 增量去重:根据 (message_id, md5) 判断是否已同步过
|
||||
@@ -421,7 +450,56 @@ def _select_mailbox(imap: imaplib.IMAP4_SSL, mailbox: str) -> bool:
|
||||
return False
|
||||
|
||||
|
||||
def _sync_one_account(config: Dict[str, Any], db: Session, results: List[Dict[str, Any]]) -> None:
|
||||
def _imap_date(d: date) -> str:
|
||||
# IMAP date format: 16-Mar-2026 (English month)
|
||||
import calendar
|
||||
return f"{d.day:02d}-{calendar.month_abbr[d.month]}-{d.year}"
|
||||
|
||||
|
||||
def _pick_latest_msg_id(imap: imaplib.IMAP4_SSL, msg_ids: List[bytes]) -> bytes | None:
|
||||
"""从一批 msg_id 中按 INTERNALDATE 选择最新的一封。"""
|
||||
latest_id: bytes | None = None
|
||||
latest_ts: float = -1.0
|
||||
for mid in msg_ids:
|
||||
try:
|
||||
typ, data = imap.fetch(mid, "(INTERNALDATE)")
|
||||
if typ != "OK" or not data or not data[0]:
|
||||
continue
|
||||
# imaplib.Internaldate2tuple expects a bytes response line
|
||||
raw = data[0]
|
||||
if isinstance(raw, tuple):
|
||||
raw = raw[0]
|
||||
if not isinstance(raw, (bytes, bytearray)):
|
||||
raw = str(raw).encode("utf-8", errors="ignore")
|
||||
t = imaplib.Internaldate2tuple(raw)
|
||||
if not t:
|
||||
continue
|
||||
import time
|
||||
ts = time.mktime(t)
|
||||
if ts > latest_ts:
|
||||
latest_ts = ts
|
||||
latest_id = mid
|
||||
except Exception:
|
||||
continue
|
||||
return latest_id
|
||||
|
||||
|
||||
def _sync_one_account(
|
||||
config: Dict[str, Any],
|
||||
db: Session,
|
||||
results: List[Dict[str, Any]],
|
||||
*,
|
||||
mode: str = "incremental",
|
||||
start_date: date | None = None,
|
||||
end_date: date | None = None,
|
||||
doc_types: list[str] | None = None,
|
||||
) -> None:
|
||||
allowed: set[str] | None = None
|
||||
if doc_types:
|
||||
allowed = {d.strip().lower() for d in doc_types if d and d.strip()}
|
||||
allowed = {d for d in allowed if d in ("invoices", "receipts", "statements")}
|
||||
if not allowed:
|
||||
allowed = None
|
||||
host = config.get("host")
|
||||
user = config.get("user")
|
||||
password = config.get("password")
|
||||
@@ -461,20 +539,53 @@ def _sync_one_account(config: Dict[str, Any], db: Session, results: List[Dict[st
|
||||
f"无法选择邮箱「{mailbox}」,请检查该账户的 Mailbox 配置(如 163 使用 INBOX)"
|
||||
)
|
||||
|
||||
# 首次同步(历史库无记录):拉取全部邮件中的附件,由 attachment_history 去重
|
||||
# 已有历史:只拉取未读邮件,避免重复拉取
|
||||
# 支持:
|
||||
# - mode=incremental: 首次全量,否则 UNSEEN
|
||||
# - mode=all: 全量(可加时间范围)
|
||||
# - mode=latest: 仅最新一封(可加时间范围)
|
||||
mode = (mode or "incremental").strip().lower()
|
||||
if mode not in ("incremental", "all", "latest"):
|
||||
mode = "incremental"
|
||||
|
||||
is_first_sync = not _has_sync_history()
|
||||
search_criterion = "ALL" if is_first_sync else "UNSEEN"
|
||||
base_criterion = "ALL"
|
||||
if mode == "incremental":
|
||||
base_criterion = "ALL" if is_first_sync else "UNSEEN"
|
||||
elif mode == "all":
|
||||
base_criterion = "ALL"
|
||||
elif mode == "latest":
|
||||
base_criterion = "ALL"
|
||||
|
||||
criteria: List[str] = [base_criterion]
|
||||
if start_date:
|
||||
criteria += ["SINCE", _imap_date(start_date)]
|
||||
if end_date:
|
||||
# BEFORE is exclusive; add one day to make end_date inclusive
|
||||
criteria += ["BEFORE", _imap_date(end_date + timedelta(days=1))]
|
||||
|
||||
logging.getLogger(__name__).info(
|
||||
"Finance sync: %s (criterion=%s)",
|
||||
"全量" if is_first_sync else "增量",
|
||||
search_criterion,
|
||||
"Finance sync: mode=%s criterion=%s range=%s~%s",
|
||||
mode,
|
||||
base_criterion,
|
||||
start_date,
|
||||
end_date,
|
||||
)
|
||||
status, data = imap.search(None, search_criterion)
|
||||
|
||||
status, data = imap.search(None, *criteria)
|
||||
if status != "OK":
|
||||
return
|
||||
|
||||
id_list = data[0].split()
|
||||
id_list: List[bytes] = data[0].split() if data and data[0] else []
|
||||
logging.getLogger(__name__).info(
|
||||
"Finance sync: matched messages=%d (mode=%s)", len(id_list), mode
|
||||
)
|
||||
if not id_list:
|
||||
return
|
||||
|
||||
if mode == "latest":
|
||||
latest = _pick_latest_msg_id(imap, id_list)
|
||||
id_list = [latest] if latest else []
|
||||
|
||||
for msg_id in id_list:
|
||||
status, msg_data = imap.fetch(msg_id, "(RFC822)")
|
||||
if status != "OK":
|
||||
@@ -485,7 +596,7 @@ def _sync_one_account(config: Dict[str, Any], db: Session, results: List[Dict[st
|
||||
dt = _parse_email_date(msg)
|
||||
month_str = dt.strftime("%Y-%m")
|
||||
|
||||
saved = _save_attachment(msg, month_str)
|
||||
saved = _save_attachment(msg, month_str, allowed_doc_types=allowed)
|
||||
for file_name, file_path, mime, raw_bytes, doc_type in saved:
|
||||
final_name = file_name
|
||||
final_path = file_path
|
||||
@@ -510,11 +621,28 @@ def _sync_one_account(config: Dict[str, Any], db: Session, results: List[Dict[st
|
||||
type=doc_type,
|
||||
file_name=final_name,
|
||||
file_path=final_path,
|
||||
tags=None,
|
||||
meta_json=None,
|
||||
amount=amount,
|
||||
billing_date=billing_date,
|
||||
)
|
||||
db.add(record)
|
||||
db.flush()
|
||||
|
||||
# 自动识别打标签(同步后自动跑)
|
||||
try:
|
||||
from backend.app.services.ai_service import extract_finance_tags
|
||||
content_text = _extract_text_for_tagging(final_path, mime, raw_bytes)
|
||||
tags, meta = asyncio.run(extract_finance_tags(content_text, doc_type, final_name)) # type: ignore[arg-type]
|
||||
if tags:
|
||||
record.tags = ",".join(tags)
|
||||
if meta:
|
||||
import json as _json
|
||||
record.meta_json = _json.dumps(meta, ensure_ascii=False)
|
||||
db.flush()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
results.append({
|
||||
"id": record.id,
|
||||
"month": record.month,
|
||||
@@ -526,7 +654,13 @@ def _sync_one_account(config: Dict[str, Any], db: Session, results: List[Dict[st
|
||||
imap.store(msg_id, "+FLAGS", "\\Seen \\Flagged")
|
||||
|
||||
|
||||
async def sync_finance_emails() -> List[Dict[str, Any]]:
|
||||
async def sync_finance_emails(
|
||||
*,
|
||||
mode: str = "incremental",
|
||||
start_date: date | None = None,
|
||||
end_date: date | None = None,
|
||||
doc_types: list[str] | None = None,
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Sync from all active email configs (data/email_configs.json).
|
||||
Falls back to env vars if no configs. Classifies into invoices/, receipts/, statements/.
|
||||
@@ -546,7 +680,15 @@ async def sync_finance_emails() -> List[Dict[str, Any]]:
|
||||
try:
|
||||
for config in configs:
|
||||
try:
|
||||
_sync_one_account(config, db, results)
|
||||
_sync_one_account(
|
||||
config,
|
||||
db,
|
||||
results,
|
||||
mode=mode,
|
||||
start_date=start_date,
|
||||
end_date=end_date,
|
||||
doc_types=doc_types,
|
||||
)
|
||||
except Exception as e:
|
||||
# 不让单个账户的异常中断全部同步,记录错误并继续其他账户。
|
||||
user = config.get("user", "") or config.get("id", "")
|
||||
|
||||
Reference in New Issue
Block a user