feat: new file
This commit is contained in:
105
backend/app/services/db_sample.py
Normal file
105
backend/app/services/db_sample.py
Normal file
@@ -0,0 +1,105 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
from typing import Any
|
||||
|
||||
import pandas as pd
|
||||
from sqlalchemy import Engine, text
|
||||
|
||||
from ..db import get_engine
|
||||
from ..services.schema_discovery import discover_schema
|
||||
from ..settings import settings
|
||||
|
||||
log = logging.getLogger("db_sample")
|
||||
|
||||
|
||||
def _truncate_value(v: Any, max_len: int) -> Any:
|
||||
if v is None:
|
||||
return None
|
||||
if isinstance(v, (int, float, bool)):
|
||||
return v
|
||||
s = str(v)
|
||||
if len(s) <= max_len:
|
||||
return s
|
||||
return s[: max_len - 3] + "..."
|
||||
|
||||
|
||||
def _df_to_records(df: pd.DataFrame, max_str_len: int) -> list[dict[str, Any]]:
|
||||
out: list[dict[str, Any]] = []
|
||||
for _, row in df.iterrows():
|
||||
rec: dict[str, Any] = {}
|
||||
for k, v in row.items():
|
||||
rec[str(k)] = _truncate_value(v, max_str_len)
|
||||
out.append(rec)
|
||||
return out
|
||||
|
||||
|
||||
def _list_tables(engine: Engine) -> list[str]:
|
||||
with engine.connect() as conn:
|
||||
rows = conn.execute(
|
||||
text(
|
||||
"""
|
||||
SELECT table_name
|
||||
FROM information_schema.tables
|
||||
WHERE table_schema = DATABASE()
|
||||
ORDER BY table_name
|
||||
"""
|
||||
)
|
||||
).all()
|
||||
return [r[0] for r in rows]
|
||||
|
||||
|
||||
def _table_row_count(engine: Engine, table: str) -> int | None:
|
||||
try:
|
||||
with engine.connect() as conn:
|
||||
v = conn.execute(text(f"SELECT COUNT(*) FROM {table}")).scalar()
|
||||
return int(v) if v is not None else None
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
def _sample_table(engine: Engine, table: str, limit: int) -> pd.DataFrame:
|
||||
with engine.connect() as conn:
|
||||
return pd.read_sql(text(f"SELECT * FROM {table} LIMIT {limit}"), conn)
|
||||
|
||||
|
||||
def print_db_sample_to_logs(limit: int | None = None) -> None:
|
||||
"""
|
||||
打印数据库结构与样例行到后端日志,便于分析。
|
||||
注意:会截断长字符串,避免日志爆炸。
|
||||
"""
|
||||
engine = get_engine()
|
||||
schema = discover_schema(engine)
|
||||
|
||||
eff_limit = int(limit or settings.debug_db_sample_limit)
|
||||
max_str_len = int(settings.debug_db_sample_max_str_len)
|
||||
|
||||
tables = _list_tables(engine)
|
||||
log.warning("DB SAMPLE: discovered schema=%s", schema.model_dump())
|
||||
log.warning("DB SAMPLE: tables=%s", tables)
|
||||
|
||||
# prioritize discovered tables first
|
||||
prioritized: list[str] = []
|
||||
for t in [schema.sales_table, schema.products_table]:
|
||||
if t and t in tables and t not in prioritized:
|
||||
prioritized.append(t)
|
||||
for t in tables:
|
||||
if t not in prioritized:
|
||||
prioritized.append(t)
|
||||
|
||||
for t in prioritized[: min(len(prioritized), 8)]:
|
||||
cnt = _table_row_count(engine, t)
|
||||
try:
|
||||
df = _sample_table(engine, t, eff_limit)
|
||||
recs = _df_to_records(df, max_str_len=max_str_len)
|
||||
log.warning(
|
||||
"DB SAMPLE: table=%s rows=%s cols=%s sample=%s",
|
||||
t,
|
||||
cnt,
|
||||
list(df.columns),
|
||||
json.dumps(recs, ensure_ascii=False),
|
||||
)
|
||||
except Exception as e:
|
||||
log.warning("DB SAMPLE: table=%s rows=%s sample_failed=%s", t, cnt, str(e))
|
||||
|
||||
Reference in New Issue
Block a user