feat: new file

This commit is contained in:
Daniel
2026-03-18 18:57:58 +08:00
commit d0ff049899
31 changed files with 1507 additions and 0 deletions

View File

@@ -0,0 +1,105 @@
from __future__ import annotations
import json
import logging
from typing import Any
import pandas as pd
from sqlalchemy import Engine, text
from ..db import get_engine
from ..services.schema_discovery import discover_schema
from ..settings import settings
log = logging.getLogger("db_sample")
def _truncate_value(v: Any, max_len: int) -> Any:
if v is None:
return None
if isinstance(v, (int, float, bool)):
return v
s = str(v)
if len(s) <= max_len:
return s
return s[: max_len - 3] + "..."
def _df_to_records(df: pd.DataFrame, max_str_len: int) -> list[dict[str, Any]]:
out: list[dict[str, Any]] = []
for _, row in df.iterrows():
rec: dict[str, Any] = {}
for k, v in row.items():
rec[str(k)] = _truncate_value(v, max_str_len)
out.append(rec)
return out
def _list_tables(engine: Engine) -> list[str]:
with engine.connect() as conn:
rows = conn.execute(
text(
"""
SELECT table_name
FROM information_schema.tables
WHERE table_schema = DATABASE()
ORDER BY table_name
"""
)
).all()
return [r[0] for r in rows]
def _table_row_count(engine: Engine, table: str) -> int | None:
try:
with engine.connect() as conn:
v = conn.execute(text(f"SELECT COUNT(*) FROM {table}")).scalar()
return int(v) if v is not None else None
except Exception:
return None
def _sample_table(engine: Engine, table: str, limit: int) -> pd.DataFrame:
with engine.connect() as conn:
return pd.read_sql(text(f"SELECT * FROM {table} LIMIT {limit}"), conn)
def print_db_sample_to_logs(limit: int | None = None) -> None:
"""
打印数据库结构与样例行到后端日志,便于分析。
注意:会截断长字符串,避免日志爆炸。
"""
engine = get_engine()
schema = discover_schema(engine)
eff_limit = int(limit or settings.debug_db_sample_limit)
max_str_len = int(settings.debug_db_sample_max_str_len)
tables = _list_tables(engine)
log.warning("DB SAMPLE: discovered schema=%s", schema.model_dump())
log.warning("DB SAMPLE: tables=%s", tables)
# prioritize discovered tables first
prioritized: list[str] = []
for t in [schema.sales_table, schema.products_table]:
if t and t in tables and t not in prioritized:
prioritized.append(t)
for t in tables:
if t not in prioritized:
prioritized.append(t)
for t in prioritized[: min(len(prioritized), 8)]:
cnt = _table_row_count(engine, t)
try:
df = _sample_table(engine, t, eff_limit)
recs = _df_to_records(df, max_str_len=max_str_len)
log.warning(
"DB SAMPLE: table=%s rows=%s cols=%s sample=%s",
t,
cnt,
list(df.columns),
json.dumps(recs, ensure_ascii=False),
)
except Exception as e:
log.warning("DB SAMPLE: table=%s rows=%s sample_failed=%s", t, cnt, str(e))