feat: new file

This commit is contained in:
Daniel
2026-03-18 18:57:58 +08:00
commit d0ff049899
31 changed files with 1507 additions and 0 deletions

18
backend/Dockerfile Normal file
View File

@@ -0,0 +1,18 @@
FROM python:3.12-slim
WORKDIR /app
ENV PYTHONDONTWRITEBYTECODE=1
ENV PYTHONUNBUFFERED=1
RUN pip install --no-cache-dir --upgrade pip
COPY requirements.txt /app/requirements.txt
RUN pip install --no-cache-dir -r /app/requirements.txt
COPY app /app/app
EXPOSE 8000
CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"]

1
backend/app/__init__.py Normal file
View File

@@ -0,0 +1 @@

15
backend/app/db.py Normal file
View File

@@ -0,0 +1,15 @@
from __future__ import annotations
from sqlalchemy import create_engine
from sqlalchemy.engine import Engine
from .settings import settings
def get_engine() -> Engine:
return create_engine(
settings.mysql_url,
pool_pre_ping=True,
pool_recycle=3600,
)

43
backend/app/main.py Normal file
View File

@@ -0,0 +1,43 @@
from __future__ import annotations
from fastapi import FastAPI, Request
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import JSONResponse
from sqlalchemy.exc import OperationalError
from .settings import settings
from .routes import ai, debug, metrics, trend
from .services.db_sample import print_db_sample_to_logs
app = FastAPI(title="Crawl BI Backend", version="0.1.0")
@app.exception_handler(OperationalError)
async def db_operational_error_handler(_: Request, exc: OperationalError):
return JSONResponse(
status_code=503,
content={
"detail": "数据库连接失败(请检查 MYSQL_HOST/USER/PASSWORD 以及 MySQL 授权 host/IP 白名单)。",
"error": str(exc.orig) if getattr(exc, "orig", None) else str(exc),
},
)
app.add_middleware(
CORSMiddleware,
allow_origins=settings.cors_origins_list,
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
app.include_router(metrics.router, prefix="/api/metrics", tags=["metrics"])
app.include_router(trend.router, prefix="/api/trend", tags=["trend"])
app.include_router(ai.router, prefix="/api/ai", tags=["ai"])
app.include_router(debug.router, prefix="/api/debug", tags=["debug"])
@app.on_event("startup")
def _startup_print_sample() -> None:
if settings.debug_print_db_sample:
print_db_sample_to_logs()

View File

@@ -0,0 +1 @@

24
backend/app/routes/ai.py Normal file
View File

@@ -0,0 +1,24 @@
from __future__ import annotations
from fastapi import APIRouter
from pydantic import BaseModel, Field
from ..services.ai_insight import generate_insight
router = APIRouter()
class InsightRequest(BaseModel):
query: str = Field(..., min_length=1, max_length=2000)
product_id: str | None = None
top_k: int = Field(6, ge=1, le=20)
@router.post("/insight")
def insight(req: InsightRequest):
"""
基于向量检索 + LLM可选输出“爆款发现 -> 数据验证 -> 决策跟卖”的建议。
"""
return generate_insight(query=req.query, product_id=req.product_id, top_k=req.top_k)

View File

@@ -0,0 +1,18 @@
from __future__ import annotations
from fastapi import APIRouter, Query
from ..services.db_sample import print_db_sample_to_logs
router = APIRouter()
@router.post("/db-sample")
def db_sample(limit: int = Query(10, ge=1, le=200)):
"""
触发一次“数据库样例打印到后端日志”,用于快速理解数据内容与结构。
"""
print_db_sample_to_logs(limit=limit)
return {"ok": True, "printed": True, "limit": limit}

View File

@@ -0,0 +1,56 @@
from __future__ import annotations
from datetime import datetime, timedelta
import pandas as pd
from fastapi import APIRouter, HTTPException, Query
from sqlalchemy import text
from ..db import get_engine
from ..services.schema_discovery import discover_schema
from ..services.timeseries import normalize_timeseries
router = APIRouter()
@router.get("/overview")
def overview():
"""
返回 BI 顶部卡片核心指标(尽量从现有表自动推断)。
"""
engine = get_engine()
schema = discover_schema(engine)
if not schema.sales_table:
raise HTTPException(status_code=422, detail="未发现可用销量/订单明细表(需要至少包含 product_id + 时间 + 数量/金额)")
q = text(schema.overview_sql)
with engine.connect() as conn:
row = conn.execute(q).mappings().first()
return {"schema": schema.model_dump(), "metrics": dict(row) if row else {}}
@router.get("/sales/timeseries")
def sales_timeseries(
product_id: str = Query(..., min_length=1),
days: int = Query(30, ge=1, le=365),
):
engine = get_engine()
schema = discover_schema(engine)
if not schema.sales_table:
raise HTTPException(status_code=422, detail="未发现可用销量/订单明细表")
since = datetime.utcnow() - timedelta(days=days)
q = text(schema.timeseries_sql)
with engine.connect() as conn:
df = pd.read_sql(
q,
conn,
params={"product_id": product_id, "since": since},
)
if df.empty:
return {"product_id": product_id, "points": []}
points = normalize_timeseries(df, ts_col="ds", value_cols=["units", "gmv"])
return {"product_id": product_id, "points": points}

View File

@@ -0,0 +1,64 @@
from __future__ import annotations
import math
from datetime import datetime, timedelta
import pandas as pd
from fastapi import APIRouter, HTTPException, Query
from sqlalchemy import text
from ..db import get_engine
from ..services.forecast import forecast_next_n
from ..services.schema_discovery import discover_schema
from ..services.trend_engine import compute_trend_scores
router = APIRouter()
@router.get("/potential-winners")
def potential_winners(days: int = Query(14, ge=3, le=60), limit: int = Query(50, ge=1, le=200)):
engine = get_engine()
schema = discover_schema(engine)
if not schema.sales_table:
raise HTTPException(status_code=422, detail="未发现可用销量/订单明细表")
since = datetime.utcnow() - timedelta(days=days)
q = text(schema.trend_candidates_sql)
with engine.connect() as conn:
df = pd.read_sql(q, conn, params={"since": since, "limit": limit * 5})
if df.empty:
return {"items": []}
scored = compute_trend_scores(df)
scored = scored.sort_values("potential_score", ascending=False).head(limit)
return {"items": scored.to_dict(orient="records")}
@router.get("/forecast")
def forecast(
product_id: str = Query(..., min_length=1),
days: int = Query(30, ge=7, le=180),
horizon: int = Query(14, ge=1, le=60),
):
engine = get_engine()
schema = discover_schema(engine)
if not schema.sales_table:
raise HTTPException(status_code=422, detail="未发现可用销量/订单明细表")
since = datetime.utcnow() - timedelta(days=days)
q = text(schema.timeseries_sql)
with engine.connect() as conn:
df = pd.read_sql(q, conn, params={"product_id": product_id, "since": since})
if df.empty:
return {"product_id": product_id, "forecast": []}
df = df.sort_values("ds")
y = df["units"].astype(float).fillna(0.0).values
yhat = forecast_next_n(y, n=horizon)
start = pd.to_datetime(df["ds"]).max()
out = []
for i, v in enumerate(yhat, start=1):
out.append({"ds": (start + pd.Timedelta(days=i)).to_pydatetime().isoformat(), "units_hat": float(max(0.0, v))})
return {"product_id": product_id, "forecast": out}

View File

@@ -0,0 +1,73 @@
from __future__ import annotations
from typing import Any
import httpx
from ..settings import settings
def _rule_based_summary(query: str, retrieved: list[dict[str, Any]]) -> dict[str, Any]:
top = retrieved[:3]
bullets = []
for r in top:
pid = r.get("product_id") or r.get("id") or "-"
title = r.get("title") or ""
follow = r.get("follow_score")
life = r.get("lifecycle")
bullets.append(f"- {pid} {title}(跟卖指数={follow} 生命周期={life}")
return {
"mode": "rules_only",
"query": query,
"retrieved": retrieved,
"answer": "基于当前向量库/指标的规则摘要:\n" + "\n".join(bullets),
}
def generate_insight(query: str, product_id: str | None, top_k: int) -> dict[str, Any]:
"""
这里先做“可运行的最小闭环”:
- 向量检索(尚未实现时返回空)
- 有 OPENAI_API_KEY 则调用 LLM输出结构化建议
- 否则返回规则引擎摘要
"""
# TODO: 接入向量库检索Milvus/Azure 等)。先保留协议,保证前端可用。
retrieved: list[dict[str, Any]] = []
if not settings.openai_api_key:
return _rule_based_summary(query, retrieved)
prompt = f"""你是电商数据分析与选品决策助手。
用户问题:{query}
请输出一个“发现爆款 -> 数据验证 -> 决策跟卖”的闭环建议,包含:
1) 结论摘要3-5条
2) 数据证据(引用关键指标:销量/增速/竞争/生命周期)
3) 风险点与反例至少3条
4) 可执行动作(选品、备货、投流、供应链)
如果没有足够数据,请明确说明缺口,并给出最小补充数据清单。
"""
headers = {"Authorization": f"Bearer {settings.openai_api_key}"}
payload = {
"model": settings.openai_model,
"input": prompt,
}
try:
with httpx.Client(timeout=30.0) as client:
r = client.post("https://api.openai.com/v1/responses", headers=headers, json=payload)
r.raise_for_status()
data = r.json()
text = ""
for item in data.get("output", []):
for c in item.get("content", []):
if c.get("type") in ("output_text", "text"):
text += c.get("text", "")
return {"mode": "llm", "query": query, "retrieved": retrieved, "answer": text.strip()}
except Exception as e:
out = _rule_based_summary(query, retrieved)
out["mode"] = "rules_fallback"
out["error"] = str(e)
return out

View File

@@ -0,0 +1,105 @@
from __future__ import annotations
import json
import logging
from typing import Any
import pandas as pd
from sqlalchemy import Engine, text
from ..db import get_engine
from ..services.schema_discovery import discover_schema
from ..settings import settings
log = logging.getLogger("db_sample")
def _truncate_value(v: Any, max_len: int) -> Any:
if v is None:
return None
if isinstance(v, (int, float, bool)):
return v
s = str(v)
if len(s) <= max_len:
return s
return s[: max_len - 3] + "..."
def _df_to_records(df: pd.DataFrame, max_str_len: int) -> list[dict[str, Any]]:
out: list[dict[str, Any]] = []
for _, row in df.iterrows():
rec: dict[str, Any] = {}
for k, v in row.items():
rec[str(k)] = _truncate_value(v, max_str_len)
out.append(rec)
return out
def _list_tables(engine: Engine) -> list[str]:
with engine.connect() as conn:
rows = conn.execute(
text(
"""
SELECT table_name
FROM information_schema.tables
WHERE table_schema = DATABASE()
ORDER BY table_name
"""
)
).all()
return [r[0] for r in rows]
def _table_row_count(engine: Engine, table: str) -> int | None:
try:
with engine.connect() as conn:
v = conn.execute(text(f"SELECT COUNT(*) FROM {table}")).scalar()
return int(v) if v is not None else None
except Exception:
return None
def _sample_table(engine: Engine, table: str, limit: int) -> pd.DataFrame:
with engine.connect() as conn:
return pd.read_sql(text(f"SELECT * FROM {table} LIMIT {limit}"), conn)
def print_db_sample_to_logs(limit: int | None = None) -> None:
"""
打印数据库结构与样例行到后端日志,便于分析。
注意:会截断长字符串,避免日志爆炸。
"""
engine = get_engine()
schema = discover_schema(engine)
eff_limit = int(limit or settings.debug_db_sample_limit)
max_str_len = int(settings.debug_db_sample_max_str_len)
tables = _list_tables(engine)
log.warning("DB SAMPLE: discovered schema=%s", schema.model_dump())
log.warning("DB SAMPLE: tables=%s", tables)
# prioritize discovered tables first
prioritized: list[str] = []
for t in [schema.sales_table, schema.products_table]:
if t and t in tables and t not in prioritized:
prioritized.append(t)
for t in tables:
if t not in prioritized:
prioritized.append(t)
for t in prioritized[: min(len(prioritized), 8)]:
cnt = _table_row_count(engine, t)
try:
df = _sample_table(engine, t, eff_limit)
recs = _df_to_records(df, max_str_len=max_str_len)
log.warning(
"DB SAMPLE: table=%s rows=%s cols=%s sample=%s",
t,
cnt,
list(df.columns),
json.dumps(recs, ensure_ascii=False),
)
except Exception as e:
log.warning("DB SAMPLE: table=%s rows=%s sample_failed=%s", t, cnt, str(e))

View File

@@ -0,0 +1,28 @@
from __future__ import annotations
import numpy as np
from statsmodels.tsa.holtwinters import ExponentialSmoothing
def forecast_next_n(y: np.ndarray, n: int) -> np.ndarray:
"""
轻量级预测:优先 Holt-Winters对短序列也相对稳失败则用简单移动平均。
"""
y = np.asarray(y, dtype=float)
if y.size < 3:
return np.repeat(y[-1] if y.size else 0.0, n)
try:
model = ExponentialSmoothing(
y,
trend="add",
seasonal=None,
initialization_method="estimated",
)
fit = model.fit(optimized=True)
return np.asarray(fit.forecast(n), dtype=float)
except Exception:
window = int(min(7, max(3, y.size // 2)))
avg = float(np.mean(y[-window:])) if y.size else 0.0
return np.repeat(avg, n)

View File

@@ -0,0 +1,223 @@
from __future__ import annotations
from dataclasses import dataclass
from pydantic import BaseModel
from sqlalchemy import Engine, text
class DiscoveredSchema(BaseModel):
# discovered table names
sales_table: str | None = None
products_table: str | None = None
# required columns (in sales_table)
sales_product_id_col: str | None = None
sales_time_col: str | None = None
sales_units_col: str | None = None
sales_amount_col: str | None = None
# optional product cols
product_title_col: str | None = None
product_created_col: str | None = None
product_rank_col: str | None = None
product_category_col: str | None = None
product_desc_col: str | None = None
@property
def overview_sql(self) -> str:
# minimal, safe aggregations
t = self.sales_table
pid = self.sales_product_id_col
ts = self.sales_time_col
units = self.sales_units_col
amount = self.sales_amount_col
return f"""
SELECT
COUNT(DISTINCT {pid}) AS products,
SUM(COALESCE({units}, 0)) AS units_30d,
SUM(COALESCE({amount}, 0)) AS gmv_30d,
COUNT(*) AS rows_30d
FROM {t}
WHERE {ts} >= (UTC_TIMESTAMP() - INTERVAL 30 DAY)
"""
@property
def timeseries_sql(self) -> str:
t = self.sales_table
pid = self.sales_product_id_col
ts = self.sales_time_col
units = self.sales_units_col
amount = self.sales_amount_col
return f"""
SELECT
DATE({ts}) AS ds,
SUM(COALESCE({units}, 0)) AS units,
SUM(COALESCE({amount}, 0)) AS gmv
FROM {t}
WHERE {pid} = :product_id
AND {ts} >= :since
GROUP BY DATE({ts})
ORDER BY ds ASC
"""
@property
def trend_candidates_sql(self) -> str:
# produce per-product last-N-day rollups; join products when available
t = self.sales_table
pid = self.sales_product_id_col
ts = self.sales_time_col
units = self.sales_units_col
amount = self.sales_amount_col
p = self.products_table
title = self.product_title_col
created = self.product_created_col
rank = self.product_rank_col
cat = self.product_category_col
join = ""
if p:
join = f"LEFT JOIN {p} p ON p.{pid} = s.{pid}" if self._products_has_same_pid_name else f""
# if we can't confidently join, still return sales-only metrics
select_p = ""
if p and join:
title_expr = f"p.{title}" if title else "NULL"
cat_expr = f"p.{cat}" if cat else "NULL"
created_expr = f"p.{created}" if created else "NULL"
rank_expr = f"p.{rank}" if rank else "NULL"
select_p = f""",
{title_expr} AS title,
{cat_expr} AS category,
{created_expr} AS created_at,
{rank_expr} AS rank_now
"""
return f"""
SELECT
s.{pid} AS product_id,
SUM(COALESCE(s.{units}, 0)) AS units,
SUM(COALESCE(s.{amount}, 0)) AS gmv,
COUNT(*) AS records,
MIN(s.{ts}) AS first_seen,
MAX(s.{ts}) AS last_seen
{select_p}
FROM {t} s
{join}
WHERE s.{ts} >= :since
GROUP BY s.{pid}
ORDER BY units DESC
LIMIT :limit
"""
@property
def _products_has_same_pid_name(self) -> bool:
# discovery sets this attribute dynamically
return getattr(self, "__products_has_same_pid_name", False)
def set_products_pid_same(self, v: bool) -> None:
setattr(self, "__products_has_same_pid_name", v)
SALES_UNITS_CANDIDATES = ["units", "qty", "quantity", "sales", "sold", "order_qty", "num"]
SALES_AMOUNT_CANDIDATES = ["amount", "gmv", "revenue", "pay_amount", "total", "price", "order_amount"]
TIME_CANDIDATES = ["created_at", "create_time", "created", "ts", "timestamp", "date_time", "paid_at", "order_time"]
PID_CANDIDATES = ["product_id", "item_id", "sku_id", "goods_id", "asin"]
PRODUCT_TITLE_CANDIDATES = ["title", "name", "product_name", "item_title"]
PRODUCT_DESC_CANDIDATES = ["description", "desc", "detail"]
PRODUCT_CREATED_CANDIDATES = ["created_at", "create_time", "created"]
PRODUCT_RANK_CANDIDATES = ["rank", "bsr_rank", "position"]
PRODUCT_CATEGORY_CANDIDATES = ["category", "cat", "category_name"]
def _lower(s: str | None) -> str:
return (s or "").lower()
def _pick(cols: list[str], candidates: list[str]) -> str | None:
cols_l = {_lower(c): c for c in cols}
for cand in candidates:
if cand in cols_l:
return cols_l[cand]
return None
def discover_schema(engine: Engine) -> DiscoveredSchema:
"""
在未知表结构的情况下做“足够稳妥”的自动发现:
- 优先寻找包含 product_id + 时间 + 数量/金额 的表作为 sales_table
- 寻找包含 title/name 等列的表作为 products_table
"""
with engine.connect() as conn:
rows = conn.execute(
text(
"""
SELECT table_name, column_name
FROM information_schema.columns
WHERE table_schema = DATABASE()
ORDER BY table_name, ordinal_position
"""
)
).all()
by_table: dict[str, list[str]] = {}
for t, c in rows:
by_table.setdefault(t, []).append(c)
best_sales: tuple[int, str, dict[str, str]] | None = None
best_products: tuple[int, str, dict[str, str]] | None = None
for t, cols in by_table.items():
pid = _pick(cols, PID_CANDIDATES)
ts = _pick(cols, TIME_CANDIDATES)
units = _pick(cols, SALES_UNITS_CANDIDATES)
amount = _pick(cols, SALES_AMOUNT_CANDIDATES)
score = 0
if pid:
score += 3
if ts:
score += 3
if units:
score += 2
if amount:
score += 1
if score >= 6:
if best_sales is None or score > best_sales[0]:
best_sales = (score, t, {"pid": pid, "ts": ts, "units": units, "amount": amount})
title = _pick(cols, PRODUCT_TITLE_CANDIDATES)
if title:
pscore = 2
if _pick(cols, PID_CANDIDATES):
pscore += 2
if _pick(cols, PRODUCT_CATEGORY_CANDIDATES):
pscore += 1
if _pick(cols, PRODUCT_DESC_CANDIDATES):
pscore += 1
if best_products is None or pscore > best_products[0]:
best_products = (pscore, t, {"title": title})
schema = DiscoveredSchema()
if best_sales:
_, t, m = best_sales
schema.sales_table = t
schema.sales_product_id_col = m["pid"]
schema.sales_time_col = m["ts"]
schema.sales_units_col = m["units"] or m["amount"] # last resort
schema.sales_amount_col = m["amount"] or m["units"]
if best_products:
_, pt, _ = best_products
schema.products_table = pt
cols = by_table.get(pt, [])
schema.product_title_col = _pick(cols, PRODUCT_TITLE_CANDIDATES)
schema.product_desc_col = _pick(cols, PRODUCT_DESC_CANDIDATES)
schema.product_created_col = _pick(cols, PRODUCT_CREATED_CANDIDATES)
schema.product_rank_col = _pick(cols, PRODUCT_RANK_CANDIDATES)
schema.product_category_col = _pick(cols, PRODUCT_CATEGORY_CANDIDATES)
schema.set_products_pid_same(_pick(cols, PID_CANDIDATES) == schema.sales_product_id_col)
return schema

View File

@@ -0,0 +1,21 @@
from __future__ import annotations
import pandas as pd
def normalize_timeseries(df: pd.DataFrame, ts_col: str, value_cols: list[str]) -> list[dict]:
df = df.copy()
df[ts_col] = pd.to_datetime(df[ts_col], errors="coerce")
df = df.dropna(subset=[ts_col]).sort_values(ts_col)
out = []
for _, r in df.iterrows():
p = {"ds": r[ts_col].to_pydatetime().isoformat()}
for c in value_cols:
v = r.get(c)
try:
p[c] = float(v) if v is not None else 0.0
except Exception:
p[c] = 0.0
out.append(p)
return out

View File

@@ -0,0 +1,108 @@
from __future__ import annotations
import math
import numpy as np
import pandas as pd
def _sigmoid(x: float) -> float:
return 1.0 / (1.0 + math.exp(-x))
def compute_trend_scores(df: pd.DataFrame) -> pd.DataFrame:
"""
在“表结构不确定”的情况下,先基于可用字段做可解释评分:
- 潜伏期识别Potential Winners新上架/刚出现 + 指标增长快
- 爆发力:若存在 tiktok/search 等字段则融合,否则使用销量/GMV加速度代理
- 决策建议:跟卖指数 + 生命周期预警(缺失字段会自动降权)
"""
out = df.copy()
now = pd.Timestamp.now(tz="UTC")
# 新品/潜伏期first_seen 越近、同时 units/gmv 相对高 -> potential
first_seen = pd.to_datetime(out.get("first_seen"), errors="coerce", utc=True)
age_days = (now - first_seen).dt.total_seconds() / 86400.0
age_days = age_days.fillna(999.0).clip(lower=0.0)
units = pd.to_numeric(out.get("units"), errors="coerce").fillna(0.0)
gmv = pd.to_numeric(out.get("gmv"), errors="coerce").fillna(0.0)
# 规模归一log1p 降噪
units_s = np.log1p(units)
gmv_s = np.log1p(gmv)
freshness = 1.0 / (1.0 + (age_days / 7.0)) # 0~1
scale = (units_s.rank(pct=True) * 0.6 + gmv_s.rank(pct=True) * 0.4).clip(0.0, 1.0)
out["potential_score"] = (freshness * 0.55 + scale * 0.45).clip(0.0, 1.0)
# 爆发力:优先融合可选字段
tiktok_raw = out["tiktok_hot"] if "tiktok_hot" in out.columns else pd.Series(np.nan, index=out.index)
search_raw = out["search_growth"] if "search_growth" in out.columns else pd.Series(np.nan, index=out.index)
tiktok = pd.to_numeric(tiktok_raw, errors="coerce")
search_g = pd.to_numeric(search_raw, errors="coerce")
if tiktok.notna().any() or search_g.notna().any():
tiktok_s = tiktok.fillna(tiktok.median() if tiktok.notna().any() else 0.0)
search_s = search_g.fillna(search_g.median() if search_g.notna().any() else 0.0)
burst = (
pd.Series(tiktok_s).rank(pct=True) * 0.6
+ pd.Series(search_s).rank(pct=True) * 0.4
).clip(0.0, 1.0)
else:
# 无外部热度字段:用规模 + 新鲜度 作为代理
burst = (scale * 0.65 + freshness * 0.35).clip(0.0, 1.0)
out["burst_score"] = burst
# 跟卖指数竞争records 越多)负向;利润空间/供应链难度若缺失则降级
records = pd.to_numeric(out.get("records"), errors="coerce").fillna(0.0)
competition = records.rank(pct=True).clip(0.0, 1.0) # 越大越卷
margin_raw = out["margin"] if "margin" in out.columns else pd.Series(np.nan, index=out.index)
margin = pd.to_numeric(margin_raw, errors="coerce")
if margin.notna().any():
margin_s = margin.fillna(margin.median()).rank(pct=True).clip(0.0, 1.0)
margin_w = 0.35
else:
margin_s = pd.Series(0.5, index=out.index)
margin_w = 0.15
supply_raw = out["supply_difficulty"] if "supply_difficulty" in out.columns else pd.Series(np.nan, index=out.index)
supply = pd.to_numeric(supply_raw, errors="coerce")
if supply.notna().any():
supply_s = (1.0 - supply.fillna(supply.median()).rank(pct=True)).clip(0.0, 1.0) # 越难越低分
supply_w = 0.20
else:
supply_s = pd.Series(0.5, index=out.index)
supply_w = 0.10
# 趋势作为正向
trend = (out["potential_score"] * 0.5 + out["burst_score"] * 0.5).clip(0.0, 1.0)
trend_w = 0.45
comp_w = 0.20
follow = (
trend * trend_w
+ margin_s * margin_w
+ supply_s * supply_w
+ (1.0 - competition) * comp_w
)
out["follow_score"] = follow.clip(0.0, 1.0)
# 生命周期预警(简化):过老 + 规模不增长 + 竞争高 => red-ocean / decline
lifecycle = []
for i in out.index:
a = float(age_days.loc[i])
comp = float(competition.loc[i])
tr = float(trend.loc[i])
if a > 120 and comp > 0.7 and tr < 0.4:
lifecycle.append("decline_or_red_ocean")
elif a > 60 and comp > 0.75:
lifecycle.append("red_ocean")
elif a < 21 and tr > 0.65:
lifecycle.append("early_growth")
else:
lifecycle.append("normal")
out["lifecycle"] = lifecycle
return out

42
backend/app/settings.py Normal file
View File

@@ -0,0 +1,42 @@
from __future__ import annotations
from pydantic_settings import BaseSettings, SettingsConfigDict
class Settings(BaseSettings):
model_config = SettingsConfigDict(env_file=".env", env_file_encoding="utf-8", extra="ignore")
mysql_host: str
mysql_port: int = 3306
mysql_user: str
mysql_password: str
mysql_database: str
openai_api_key: str | None = None
openai_model: str = "gpt-4.1-mini"
openai_embed_model: str = "text-embedding-3-small"
milvus_host: str = "localhost"
milvus_port: int = 19530
milvus_collection: str = "products_v1"
app_env: str = "dev"
app_cors_origins: str = "http://localhost:3000"
debug_print_db_sample: bool = False
debug_db_sample_limit: int = 10
debug_db_sample_max_str_len: int = 120
@property
def mysql_url(self) -> str:
return (
f"mysql+pymysql://{self.mysql_user}:{self.mysql_password}"
f"@{self.mysql_host}:{self.mysql_port}/{self.mysql_database}?charset=utf8mb4"
)
@property
def cors_origins_list(self) -> list[str]:
return [o.strip() for o in self.app_cors_origins.split(",") if o.strip()]
settings = Settings()

13
backend/requirements.txt Normal file
View File

@@ -0,0 +1,13 @@
fastapi==0.115.11
uvicorn[standard]==0.34.0
pydantic==2.10.6
pydantic-settings==2.8.1
sqlalchemy==2.0.39
pymysql==1.1.1
pandas==2.2.3
numpy==2.2.3
scikit-learn==1.6.1
statsmodels==0.14.4
httpx==0.28.1
tenacity==9.0.0
python-dotenv==1.0.1