Files
Crawl_demo/backend/app/services/trend_engine.py
2026-03-18 18:57:58 +08:00

109 lines
4.2 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
from __future__ import annotations
import math
import numpy as np
import pandas as pd
def _sigmoid(x: float) -> float:
return 1.0 / (1.0 + math.exp(-x))
def compute_trend_scores(df: pd.DataFrame) -> pd.DataFrame:
"""
在“表结构不确定”的情况下,先基于可用字段做可解释评分:
- 潜伏期识别Potential Winners新上架/刚出现 + 指标增长快
- 爆发力:若存在 tiktok/search 等字段则融合,否则使用销量/GMV加速度代理
- 决策建议:跟卖指数 + 生命周期预警(缺失字段会自动降权)
"""
out = df.copy()
now = pd.Timestamp.now(tz="UTC")
# 新品/潜伏期first_seen 越近、同时 units/gmv 相对高 -> potential
first_seen = pd.to_datetime(out.get("first_seen"), errors="coerce", utc=True)
age_days = (now - first_seen).dt.total_seconds() / 86400.0
age_days = age_days.fillna(999.0).clip(lower=0.0)
units = pd.to_numeric(out.get("units"), errors="coerce").fillna(0.0)
gmv = pd.to_numeric(out.get("gmv"), errors="coerce").fillna(0.0)
# 规模归一log1p 降噪
units_s = np.log1p(units)
gmv_s = np.log1p(gmv)
freshness = 1.0 / (1.0 + (age_days / 7.0)) # 0~1
scale = (units_s.rank(pct=True) * 0.6 + gmv_s.rank(pct=True) * 0.4).clip(0.0, 1.0)
out["potential_score"] = (freshness * 0.55 + scale * 0.45).clip(0.0, 1.0)
# 爆发力:优先融合可选字段
tiktok_raw = out["tiktok_hot"] if "tiktok_hot" in out.columns else pd.Series(np.nan, index=out.index)
search_raw = out["search_growth"] if "search_growth" in out.columns else pd.Series(np.nan, index=out.index)
tiktok = pd.to_numeric(tiktok_raw, errors="coerce")
search_g = pd.to_numeric(search_raw, errors="coerce")
if tiktok.notna().any() or search_g.notna().any():
tiktok_s = tiktok.fillna(tiktok.median() if tiktok.notna().any() else 0.0)
search_s = search_g.fillna(search_g.median() if search_g.notna().any() else 0.0)
burst = (
pd.Series(tiktok_s).rank(pct=True) * 0.6
+ pd.Series(search_s).rank(pct=True) * 0.4
).clip(0.0, 1.0)
else:
# 无外部热度字段:用规模 + 新鲜度 作为代理
burst = (scale * 0.65 + freshness * 0.35).clip(0.0, 1.0)
out["burst_score"] = burst
# 跟卖指数竞争records 越多)负向;利润空间/供应链难度若缺失则降级
records = pd.to_numeric(out.get("records"), errors="coerce").fillna(0.0)
competition = records.rank(pct=True).clip(0.0, 1.0) # 越大越卷
margin_raw = out["margin"] if "margin" in out.columns else pd.Series(np.nan, index=out.index)
margin = pd.to_numeric(margin_raw, errors="coerce")
if margin.notna().any():
margin_s = margin.fillna(margin.median()).rank(pct=True).clip(0.0, 1.0)
margin_w = 0.35
else:
margin_s = pd.Series(0.5, index=out.index)
margin_w = 0.15
supply_raw = out["supply_difficulty"] if "supply_difficulty" in out.columns else pd.Series(np.nan, index=out.index)
supply = pd.to_numeric(supply_raw, errors="coerce")
if supply.notna().any():
supply_s = (1.0 - supply.fillna(supply.median()).rank(pct=True)).clip(0.0, 1.0) # 越难越低分
supply_w = 0.20
else:
supply_s = pd.Series(0.5, index=out.index)
supply_w = 0.10
# 趋势作为正向
trend = (out["potential_score"] * 0.5 + out["burst_score"] * 0.5).clip(0.0, 1.0)
trend_w = 0.45
comp_w = 0.20
follow = (
trend * trend_w
+ margin_s * margin_w
+ supply_s * supply_w
+ (1.0 - competition) * comp_w
)
out["follow_score"] = follow.clip(0.0, 1.0)
# 生命周期预警(简化):过老 + 规模不增长 + 竞争高 => red-ocean / decline
lifecycle = []
for i in out.index:
a = float(age_days.loc[i])
comp = float(competition.loc[i])
tr = float(trend.loc[i])
if a > 120 and comp > 0.7 and tr < 0.4:
lifecycle.append("decline_or_red_ocean")
elif a > 60 and comp > 0.75:
lifecycle.append("red_ocean")
elif a < 21 and tr > 0.65:
lifecycle.append("early_growth")
else:
lifecycle.append("normal")
out["lifecycle"] = lifecycle
return out