This commit is contained in:
丹尼尔
2026-03-12 19:35:06 +08:00
commit ad96272ab6
40 changed files with 2645 additions and 0 deletions

View File

@@ -0,0 +1,215 @@
import asyncio
import email
import imaplib
import os
from datetime import datetime
from email.header import decode_header
from pathlib import Path
from typing import Any, Dict, List, Tuple
from backend.app.db import SessionLocal
from backend.app.models import FinanceRecord
FINANCE_BASE_DIR = Path("data/finance")
def _decode_header_value(value: str | None) -> str:
if not value:
return ""
parts = decode_header(value)
decoded = ""
for text, enc in parts:
if isinstance(text, bytes):
decoded += text.decode(enc or "utf-8", errors="ignore")
else:
decoded += text
return decoded
def _classify_type(subject: str) -> str:
"""
Classify finance document type based on subject keywords.
"""
subject_lower = subject.lower()
if any(k in subject for k in ["发票", "invoice"]):
return "invoices"
if any(k in subject for k in ["流水", "bank", "对账单", "statement"]):
return "bank_records"
if any(k in subject for k in ["回执", "receipt"]):
return "receipts"
return "others"
def _ensure_month_dir(month_str: str, doc_type: str) -> Path:
month_dir = FINANCE_BASE_DIR / month_str / doc_type
month_dir.mkdir(parents=True, exist_ok=True)
return month_dir
def _parse_email_date(msg: email.message.Message) -> datetime:
date_tuple = email.utils.parsedate_tz(msg.get("Date"))
if date_tuple:
dt = datetime.fromtimestamp(email.utils.mktime_tz(date_tuple))
else:
dt = datetime.utcnow()
return dt
def _save_attachment(
msg: email.message.Message,
month_str: str,
doc_type: str,
) -> List[Tuple[str, str]]:
"""
Save PDF/image attachments and return list of (file_name, file_path).
"""
saved: List[Tuple[str, str]] = []
base_dir = _ensure_month_dir(month_str, doc_type)
for part in msg.walk():
content_disposition = part.get("Content-Disposition", "")
if "attachment" not in content_disposition:
continue
filename = part.get_filename()
filename = _decode_header_value(filename)
if not filename:
continue
content_type = part.get_content_type()
maintype = part.get_content_maintype()
# Accept pdf and common images
if maintype not in ("application", "image"):
continue
data = part.get_payload(decode=True)
if not data:
continue
file_path = base_dir / filename
# Ensure unique filename
counter = 1
while file_path.exists():
stem = file_path.stem
suffix = file_path.suffix
file_path = base_dir / f"{stem}_{counter}{suffix}"
counter += 1
with open(file_path, "wb") as f:
f.write(data)
saved.append((filename, str(file_path)))
return saved
async def sync_finance_emails() -> List[Dict[str, Any]]:
"""
Connect to IMAP, fetch unread finance-related emails, download attachments,
save to filesystem and record FinanceRecord entries.
"""
def _sync() -> List[Dict[str, Any]]:
host = os.getenv("IMAP_HOST")
user = os.getenv("IMAP_USER")
password = os.getenv("IMAP_PASSWORD")
port = int(os.getenv("IMAP_PORT", "993"))
mailbox = os.getenv("IMAP_MAILBOX", "INBOX")
if not all([host, user, password]):
raise RuntimeError("IMAP_HOST, IMAP_USER, IMAP_PASSWORD must be set.")
results: List[Dict[str, Any]] = []
with imaplib.IMAP4_SSL(host, port) as imap:
imap.login(user, password)
imap.select(mailbox)
# Search for UNSEEN emails with finance related keywords in subject.
# Note: IMAP SEARCH is limited; here we search UNSEEN first then filter in Python.
status, data = imap.search(None, "UNSEEN")
if status != "OK":
return results
id_list = data[0].split()
db = SessionLocal()
try:
for msg_id in id_list:
status, msg_data = imap.fetch(msg_id, "(RFC822)")
if status != "OK":
continue
raw_email = msg_data[0][1]
msg = email.message_from_bytes(raw_email)
subject = _decode_header_value(msg.get("Subject"))
doc_type = _classify_type(subject)
# Filter by keywords first
if doc_type == "others":
continue
dt = _parse_email_date(msg)
month_str = dt.strftime("%Y-%m")
saved_files = _save_attachment(msg, month_str, doc_type)
for file_name, file_path in saved_files:
record = FinanceRecord(
month=month_str,
type=doc_type,
file_name=file_name,
file_path=file_path,
)
# NOTE: created_at defaults at DB layer
db.add(record)
db.flush()
results.append(
{
"id": record.id,
"month": record.month,
"type": record.type,
"file_name": record.file_name,
"file_path": record.file_path,
}
)
# Mark email as seen and flagged to avoid re-processing
imap.store(msg_id, "+FLAGS", "\\Seen \\Flagged")
db.commit()
finally:
db.close()
return results
return await asyncio.to_thread(_sync)
async def create_monthly_zip(month_str: str) -> str:
"""
Zip the finance folder for a given month (YYYY-MM) and return the zip path.
"""
import zipfile
def _zip() -> str:
month_dir = FINANCE_BASE_DIR / month_str
if not month_dir.exists():
raise FileNotFoundError(f"Finance directory for {month_str} not found.")
FINANCE_BASE_DIR.mkdir(parents=True, exist_ok=True)
zip_path = FINANCE_BASE_DIR / f"{month_str}.zip"
with zipfile.ZipFile(zip_path, "w", compression=zipfile.ZIP_DEFLATED) as zf:
for root, _, files in os.walk(month_dir):
for file in files:
full_path = Path(root) / file
rel_path = full_path.relative_to(FINANCE_BASE_DIR)
zf.write(full_path, arcname=rel_path)
return str(zip_path)
return await asyncio.to_thread(_zip)