Files
Airtep/gig-poc/infrastructure/scripts/load-baseline.sh
2026-04-01 14:19:25 +08:00

138 lines
5.1 KiB
Bash
Executable File

#!/usr/bin/env sh
set -eu
API_BASE="${API_BASE:-http://127.0.0.1:8000}"
TOTAL_REQUESTS="${TOTAL_REQUESTS:-400}"
CONCURRENCY="${CONCURRENCY:-40}"
OUTPUT_PATH="${1:-$(CDPATH= cd -- "$(dirname "$0")/../.." && pwd)/docs/CAPACITY_BASELINE.md}"
TMP_DIR="$(mktemp -d)"
trap 'rm -rf "$TMP_DIR"' EXIT
echo "[BASELINE] health check"
curl -fsS "$API_BASE/health" >/dev/null
echo "[BASELINE] ensure bootstrap data"
curl -fsS -X POST "$API_BASE/poc/ingest/bootstrap" >/dev/null
JOB_ID="$(curl -fsS "$API_BASE/poc/jobs" | python3 -c 'import json,sys; data=json.load(sys.stdin); print((data.get("items") or [{}])[0].get("job_id",""))')"
WORKER_ID="$(curl -fsS "$API_BASE/poc/workers" | python3 -c 'import json,sys; data=json.load(sys.stdin); print((data.get("items") or [{}])[0].get("worker_id",""))')"
[ -n "$JOB_ID" ] || { echo "no job id found"; exit 1; }
[ -n "$WORKER_ID" ] || { echo "no worker id found"; exit 1; }
run_case() {
NAME="$1"
METHOD="$2"
URL="$3"
BODY_FILE="$4"
OUT_FILE="$5"
python3 - "$METHOD" "$URL" "$BODY_FILE" "$TOTAL_REQUESTS" "$CONCURRENCY" "$OUT_FILE" <<'PY'
import json
import sys
import time
import urllib.request
from concurrent.futures import ThreadPoolExecutor, as_completed
method, url, body_file, total, concurrency, out_file = sys.argv[1:]
total = int(total)
concurrency = int(concurrency)
payload = None
if body_file != "-":
payload = open(body_file, "rb").read()
durations = []
success = 0
fail = 0
def once():
start = time.perf_counter()
req = urllib.request.Request(url=url, method=method)
req.add_header("Content-Type", "application/json")
try:
if payload is None:
with urllib.request.urlopen(req, timeout=20) as resp:
code = resp.getcode()
else:
with urllib.request.urlopen(req, data=payload, timeout=20) as resp:
code = resp.getcode()
ok = 200 <= code < 400
except Exception:
ok = False
ms = (time.perf_counter() - start) * 1000
return ok, ms
bench_start = time.perf_counter()
with ThreadPoolExecutor(max_workers=concurrency) as ex:
futures = [ex.submit(once) for _ in range(total)]
for f in as_completed(futures):
ok, ms = f.result()
durations.append(ms)
if ok:
success += 1
else:
fail += 1
elapsed = time.perf_counter() - bench_start
durations.sort()
def pct(p):
if not durations:
return 0.0
idx = min(len(durations) - 1, int(len(durations) * p))
return round(durations[idx], 2)
result = {
"total": total,
"success": success,
"fail": fail,
"success_rate": round(success / total, 4) if total else 0.0,
"rps": round(total / elapsed, 2) if elapsed > 0 else 0.0,
"latency_ms_avg": round(sum(durations) / len(durations), 2) if durations else 0.0,
"latency_ms_p95": pct(0.95),
"latency_ms_p99": pct(0.99),
}
with open(out_file, "w", encoding="utf-8") as f:
json.dump(result, f, ensure_ascii=False, indent=2)
PY
echo "[BASELINE] done $NAME"
}
printf '{"job_id":"%s","top_n":10}\n' "$JOB_ID" >"$TMP_DIR/match_workers.json"
printf '{"worker_id":"%s","top_n":10}\n' "$WORKER_ID" >"$TMP_DIR/match_jobs.json"
run_case "health" "GET" "$API_BASE/health" "-" "$TMP_DIR/health.result.json"
run_case "jobs_list" "GET" "$API_BASE/poc/jobs" "-" "$TMP_DIR/jobs.result.json"
run_case "match_workers" "POST" "$API_BASE/poc/match/workers" "$TMP_DIR/match_workers.json" "$TMP_DIR/match_workers.result.json"
run_case "match_jobs" "POST" "$API_BASE/poc/match/jobs" "$TMP_DIR/match_jobs.json" "$TMP_DIR/match_jobs.result.json"
run_case "match_workers_cached" "POST" "$API_BASE/poc/match/workers" "$TMP_DIR/match_workers.json" "$TMP_DIR/match_workers_cached.result.json"
run_case "match_jobs_cached" "POST" "$API_BASE/poc/match/jobs" "$TMP_DIR/match_jobs.json" "$TMP_DIR/match_jobs_cached.result.json"
run_case "match_workers_async_enqueue" "POST" "$API_BASE/poc/match/workers/async" "$TMP_DIR/match_workers.json" "$TMP_DIR/match_workers_async.result.json"
run_case "match_jobs_async_enqueue" "POST" "$API_BASE/poc/match/jobs/async" "$TMP_DIR/match_jobs.json" "$TMP_DIR/match_jobs_async.result.json"
NOW="$(date '+%Y-%m-%d %H:%M:%S %z')"
mkdir -p "$(dirname "$OUTPUT_PATH")"
{
echo "# 容量基线(自动生成)"
echo
echo "- 生成时间: $NOW"
echo "- API_BASE: $API_BASE"
echo "- TOTAL_REQUESTS: $TOTAL_REQUESTS"
echo "- CONCURRENCY: $CONCURRENCY"
echo
echo "| 场景 | 成功率 | RPS | 平均延迟(ms) | P95(ms) | P99(ms) |"
echo "| --- | --- | --- | --- | --- | --- |"
for case in health jobs match_workers match_jobs match_workers_cached match_jobs_cached match_workers_async match_jobs_async; do
FILE="$TMP_DIR/${case}.result.json"
python3 - "$case" "$FILE" <<'PY'
import json
import sys
case, path = sys.argv[1], sys.argv[2]
data = json.loads(open(path, "r", encoding="utf-8").read())
print(f"| {case} | {data['success_rate']} | {data['rps']} | {data['latency_ms_avg']} | {data['latency_ms_p95']} | {data['latency_ms_p99']} |")
PY
done
echo
echo "> 建议:该基线仅代表当前单机/当前数据量下表现,发布前请在目标环境按 2x/5x 峰值复测。"
} >"$OUTPUT_PATH"
echo "[BASELINE] report generated at $OUTPUT_PATH"