fix:优化wsl环境下的边缘设备执行逻辑

This commit is contained in:
Daniel
2026-04-07 02:06:43 +08:00
parent afec0980d4
commit 6b9b9d60b5
7 changed files with 439 additions and 4 deletions

View File

@@ -6,6 +6,8 @@ WORKER_BASE_URL=http://127.0.0.1:8000
WS_POLL_INTERVAL_SEC=1.0
DISPATCH_WS_URL=ws://127.0.0.1:8060/ws/edge/edge-a4000-01
EDGE_POLL_INTERVAL_SEC=1.0
EDGE_ALLOW_REMOTE_UPDATE=true
EDGE_ALLOW_REMOTE_RESTART=true
EDGE_DISPATCH_HOST=0.0.0.0
EDGE_DISPATCH_PORT=8020
EDGE_MAX_DISPATCH_RECORDS=2000

View File

@@ -76,6 +76,34 @@ cd video_worker
bash scripts/start_edge_device_local.sh
```
Windows + WSL 一键边缘设备启动(推荐):
```powershell
cd video_worker
.\scripts\edge_device_wsl.ps1 -Action start
```
常用操作:
```powershell
.\scripts\edge_device_wsl.ps1 -Action status
.\scripts\edge_device_wsl.ps1 -Action restart
.\scripts\edge_device_wsl.ps1 -Action stop
```
多发行版时可指定:
```powershell
.\scripts\edge_device_wsl.ps1 -Action start -Distro Ubuntu-22.04
```
边缘设备停止 / 重启:
```bash
bash scripts/stop_edge_device_local.sh
bash scripts/restart_edge_device_local.sh
```
独立 WS 网关服务(远程推荐):
```bash
@@ -223,6 +251,46 @@ video_worker/
- 查看在线边缘设备
- `WS /ws/edge/{device_id}`edge_dispatch_service
- 边缘设备接入通道
- `POST /devices/{device_id}/command`edge_dispatch_service
- 通过 HTTP 下发设备运维指令(中心自动转 WS
- `GET /commands/{dispatch_id}`edge_dispatch_service
- 查询设备指令执行状态和结果
边缘设备 WS 控制指令(由上游下发到 `edge_device_client.py`
- `generate`: 下发生成任务
- `update_code`: 拉取最新代码(默认执行 `git fetch --all && git checkout <branch> && git pull --ff-only origin <branch>`,可通过 `command` 自定义)
- `restart_service`: 执行边缘本地重启脚本
- `ping`: 心跳探活,设备回 `pong`
HTTP 下发设备指令示例(推荐上游系统使用):
```bash
# 1) 更新边缘设备代码
curl -X POST http://<dispatch-host>:8020/devices/edge-a4000-01/command \
-H "Content-Type: application/json" \
-d '{
"command": "update_code",
"branch": "master"
}'
# 2) 重启边缘设备服务
curl -X POST http://<dispatch-host>:8020/devices/edge-a4000-01/command \
-H "Content-Type: application/json" \
-d '{
"command": "restart_service"
}'
# 3) 设备心跳检查
curl -X POST http://<dispatch-host>:8020/devices/edge-a4000-01/command \
-H "Content-Type: application/json" \
-d '{
"command": "ping"
}'
# 4) 查询指令执行状态(使用上一步返回的 dispatch_id
curl http://<dispatch-host>:8020/commands/<dispatch_id>
```
参数限制:

View File

@@ -29,6 +29,22 @@ class DispatchResponse(BaseModel):
created_at: str
class DeviceCommandRequest(BaseModel):
command: str
dispatch_id: Optional[str] = None
branch: Optional[str] = None
shell_command: Optional[str] = None
payload: Optional[dict[str, Any]] = None
class DeviceCommandResponse(BaseModel):
dispatch_id: str
device_id: str
command: str
status: str
created_at: str
@dataclass
class EdgeConnection:
device_id: str
@@ -42,6 +58,7 @@ class EdgeDispatchManager:
def __init__(self) -> None:
self.connections: dict[str, EdgeConnection] = {}
self.dispatches: dict[str, dict[str, Any]] = {}
self.commands: dict[str, dict[str, Any]] = {}
self.lock = asyncio.Lock()
async def register(self, device_id: str, websocket: WebSocket) -> EdgeConnection:
@@ -130,17 +147,68 @@ class EdgeDispatchManager:
raise
return record
async def create_command(self, conn: EdgeConnection, body: DeviceCommandRequest) -> dict[str, Any]:
await self._prune_if_needed()
command = body.command.strip().lower()
allowed = {"update_code", "restart_service", "ping"}
if command not in allowed:
raise HTTPException(status_code=400, detail=f"unsupported command: {command}")
dispatch_id = body.dispatch_id or uuid4().hex
now = utc_now_iso()
record = {
"dispatch_id": dispatch_id,
"device_id": conn.device_id,
"command": command,
"status": "DISPATCHED",
"created_at": now,
"updated_at": now,
"request": body.model_dump(),
"result": None,
"error": None,
}
payload: dict[str, Any] = {
"event": command,
"dispatch_id": dispatch_id,
}
if body.branch:
payload["branch"] = body.branch
if body.shell_command:
payload["command"] = body.shell_command
if body.payload:
payload.update(body.payload)
async with self.lock:
self.commands[dispatch_id] = record
target = self.connections.get(conn.device_id)
if target is None:
raise HTTPException(status_code=404, detail=f"device disconnected: {conn.device_id}")
target.last_seen = now
try:
await conn.websocket.send_json(payload)
except Exception as exc:
async with self.lock:
record["status"] = "FAILED"
record["error"] = f"command send failed: {exc}"
record["updated_at"] = utc_now_iso()
raise
return record
async def _prune_if_needed(self) -> None:
max_records = max(100, int(settings.edge_max_dispatch_records))
async with self.lock:
total = len(self.dispatches)
total = len(self.dispatches) + len(self.commands)
if total < max_records:
return
over = total - max_records + 1
done = [v for v in self.dispatches.values() if v.get("status") in {"SUCCEEDED", "FAILED"}]
done.extend([v for v in self.commands.values() if v.get("status") in {"SUCCEEDED", "FAILED"}])
done.sort(key=lambda x: x.get("updated_at", ""))
for rec in done[:over]:
self.dispatches.pop(rec["dispatch_id"], None)
self.commands.pop(rec["dispatch_id"], None)
async def mark_event(self, device_id: str, event: dict[str, Any]) -> None:
now = utc_now_iso()
@@ -154,6 +222,8 @@ class EdgeDispatchManager:
return
record = self.dispatches.get(dispatch_id)
if record is None:
record = self.commands.get(dispatch_id)
if record is None:
return
@@ -175,6 +245,18 @@ class EdgeDispatchManager:
record["error"] = event.get("error")
if conn:
conn.busy = False
elif evt == "command_status":
status_val = event.get("status") or "RUNNING"
record["status"] = status_val
elif evt == "command_result":
status_val = event.get("status") or "SUCCEEDED"
record["status"] = status_val
record["result"] = event
if status_val == "FAILED":
record["error"] = event.get("error") or event.get("stderr")
elif evt == "pong":
record["status"] = "SUCCEEDED"
record["result"] = event
record["updated_at"] = now
@@ -186,6 +268,11 @@ class EdgeDispatchManager:
record["status"] = "FAILED"
record["error"] = f"device disconnected: {device_id}"
record["updated_at"] = now
for record in self.commands.values():
if record["device_id"] == device_id and record["status"] in {"DISPATCHED", "RUNNING", "PENDING"}:
record["status"] = "FAILED"
record["error"] = f"device disconnected: {device_id}"
record["updated_at"] = now
manager = EdgeDispatchManager()
@@ -234,6 +321,34 @@ async def get_dispatch(dispatch_id: str) -> dict[str, Any]:
return record
@app.post("/devices/{device_id}/command", response_model=DeviceCommandResponse)
async def device_command(device_id: str, body: DeviceCommandRequest) -> DeviceCommandResponse:
conn = await manager.select_device(device_id)
try:
record = await manager.create_command(conn, body)
except WebSocketDisconnect as exc:
await manager.unregister(conn.device_id)
raise HTTPException(status_code=503, detail=f"device disconnected during command: {conn.device_id}") from exc
except RuntimeError as exc:
raise HTTPException(status_code=503, detail=str(exc)) from exc
return DeviceCommandResponse(
dispatch_id=record["dispatch_id"],
device_id=record["device_id"],
command=record["command"],
status=record["status"],
created_at=record["created_at"],
)
@app.get("/commands/{dispatch_id}")
async def get_command(dispatch_id: str) -> dict[str, Any]:
record = manager.commands.get(dispatch_id)
if record is None:
raise HTTPException(status_code=404, detail=f"command not found: {dispatch_id}")
return record
@app.post("/dispatch/{dispatch_id}/artifacts")
async def upload_artifacts(
dispatch_id: str,

View File

@@ -2,6 +2,7 @@ import asyncio
import json
import os
from pathlib import Path
import subprocess
import sys
import time
from urllib.parse import urlparse
@@ -13,6 +14,12 @@ DISPATCH_WS_URL = os.getenv("DISPATCH_WS_URL", "ws://127.0.0.1:8020/ws/edge/edge
WORKER_BASE_URL = os.getenv("WORKER_BASE_URL", "http://127.0.0.1:8000")
POLL_INTERVAL = float(os.getenv("EDGE_POLL_INTERVAL_SEC", "1.0"))
DISPATCH_HTTP_BASE = os.getenv("DISPATCH_HTTP_BASE", "")
ALLOW_REMOTE_UPDATE = os.getenv("EDGE_ALLOW_REMOTE_UPDATE", "true").lower() in {"1", "true", "yes", "on"}
ALLOW_REMOTE_RESTART = os.getenv("EDGE_ALLOW_REMOTE_RESTART", "true").lower() in {"1", "true", "yes", "on"}
SCRIPT_DIR = Path(__file__).resolve().parent
ROOT_DIR = SCRIPT_DIR.parent
RESTART_SCRIPT = ROOT_DIR / "scripts" / "restart_edge_device_local.sh"
if len(sys.argv) > 1:
DISPATCH_WS_URL = sys.argv[1]
@@ -41,6 +48,18 @@ def worker_get(path: str):
return r.json()
def run_shell(command: str, timeout_sec: int = 1200) -> tuple[int, str, str]:
proc = subprocess.run(
command,
cwd=str(ROOT_DIR),
shell=True,
text=True,
capture_output=True,
timeout=timeout_sec,
)
return proc.returncode, proc.stdout.strip(), proc.stderr.strip()
def upload_artifacts(dispatch_id: str, task_id: str, result: dict) -> dict:
candidate_fields = ["video_path", "first_frame_path", "metadata_path", "log_path"]
existing_paths = []
@@ -78,6 +97,12 @@ def upload_artifacts(dispatch_id: str, task_id: str, result: dict) -> dict:
fh.close()
def _short_text(text: str, limit: int = 1500) -> str:
if len(text) <= limit:
return text
return text[:limit] + "...(truncated)"
async def handle_generate(ws, data: dict):
dispatch_id = data["dispatch_id"]
req = data["request"]
@@ -114,13 +139,126 @@ async def handle_generate(ws, data: dict):
result_payload["artifact_urls"] = artifact_urls
else:
result_payload["error"] = result.get("error")
await ws.send(
json.dumps(result_payload, ensure_ascii=False)
)
await ws.send(json.dumps(result_payload, ensure_ascii=False))
return
await asyncio.sleep(POLL_INTERVAL)
async def handle_update_code(ws, data: dict):
dispatch_id = data.get("dispatch_id", "")
if not ALLOW_REMOTE_UPDATE:
await ws.send(
json.dumps(
{
"event": "command_result",
"dispatch_id": dispatch_id,
"command": "update_code",
"status": "FAILED",
"error": "EDGE_ALLOW_REMOTE_UPDATE=false",
},
ensure_ascii=False,
)
)
return
branch = data.get("branch", "master")
git_command = data.get("command") or f"git fetch --all && git checkout {branch} && git pull --ff-only origin {branch}"
await ws.send(
json.dumps(
{"event": "command_status", "dispatch_id": dispatch_id, "command": "update_code", "status": "RUNNING"},
ensure_ascii=False,
)
)
code, out, err = await asyncio.to_thread(run_shell, git_command, 1800)
payload = {
"event": "command_result",
"dispatch_id": dispatch_id,
"command": "update_code",
"status": "SUCCEEDED" if code == 0 else "FAILED",
"exit_code": code,
"stdout": _short_text(out),
"stderr": _short_text(err),
}
await ws.send(json.dumps(payload, ensure_ascii=False))
async def handle_restart_service(ws, data: dict):
dispatch_id = data.get("dispatch_id", "")
if not ALLOW_REMOTE_RESTART:
await ws.send(
json.dumps(
{
"event": "command_result",
"dispatch_id": dispatch_id,
"command": "restart_service",
"status": "FAILED",
"error": "EDGE_ALLOW_REMOTE_RESTART=false",
},
ensure_ascii=False,
)
)
return
if not RESTART_SCRIPT.exists():
await ws.send(
json.dumps(
{
"event": "command_result",
"dispatch_id": dispatch_id,
"command": "restart_service",
"status": "FAILED",
"error": f"restart script missing: {RESTART_SCRIPT}",
},
ensure_ascii=False,
)
)
return
await ws.send(
json.dumps(
{"event": "command_status", "dispatch_id": dispatch_id, "command": "restart_service", "status": "RUNNING"},
ensure_ascii=False,
)
)
await ws.send(
json.dumps(
{
"event": "command_result",
"dispatch_id": dispatch_id,
"command": "restart_service",
"status": "SUCCEEDED",
"message": "restart script launched",
},
ensure_ascii=False,
)
)
subprocess.Popen(
["bash", str(RESTART_SCRIPT)],
cwd=str(ROOT_DIR),
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
start_new_session=True,
)
raise SystemExit(0)
async def handle_ping(ws, data: dict):
await ws.send(
json.dumps(
{
"event": "pong",
"dispatch_id": data.get("dispatch_id", ""),
"status": "ok",
"worker_base_url": WORKER_BASE_URL,
},
ensure_ascii=False,
)
)
async def main() -> None:
while True:
try:
@@ -132,8 +270,16 @@ async def main() -> None:
event = data.get("event")
if event == "generate":
await handle_generate(ws, data)
elif event == "update_code":
await handle_update_code(ws, data)
elif event == "restart_service":
await handle_restart_service(ws, data)
elif event == "ping":
await handle_ping(ws, data)
elif event == "registered":
print("registered", data)
except SystemExit:
return
except Exception as exc:
print("connection error, retry in 3s:", exc)
time.sleep(3)

View File

@@ -0,0 +1,54 @@
param(
[ValidateSet("start", "stop", "restart", "status")]
[string]$Action = "start",
[string]$Distro = ""
)
$ErrorActionPreference = "Stop"
$Root = Split-Path -Parent (Split-Path -Parent $MyInvocation.MyCommand.Path)
Set-Location $Root
if (!(Get-Command wsl -ErrorAction SilentlyContinue)) {
throw "WSL command not found. Please install WSL first."
}
if ([string]::IsNullOrWhiteSpace($Distro)) {
$linuxPath = (wsl -- wslpath -a "$Root").Trim()
} else {
$linuxPath = (wsl -d $Distro -- wslpath -a "$Root").Trim()
}
if ([string]::IsNullOrWhiteSpace($linuxPath)) {
throw "Failed to resolve WSL path for project root: $Root"
}
switch ($Action) {
"start" { $targetScript = "scripts/start_edge_device_local.sh" }
"stop" { $targetScript = "scripts/stop_edge_device_local.sh" }
"restart" { $targetScript = "scripts/restart_edge_device_local.sh" }
"status" { $targetScript = "" }
}
if ($Action -eq "status") {
$bashCommand = @"
cd '$linuxPath' && \
if [ -f runtime/pids/worker.pid ] && kill -0 `$(cat runtime/pids/worker.pid) >/dev/null 2>&1; then
echo "[OK] worker running pid=`$(cat runtime/pids/worker.pid)"
else
echo "[INFO] worker not running"
fi && \
if [ -f runtime/pids/edge_client.pid ] && kill -0 `$(cat runtime/pids/edge_client.pid) >/dev/null 2>&1; then
echo "[OK] edge_client running pid=`$(cat runtime/pids/edge_client.pid)"
else
echo "[INFO] edge_client not running"
fi
"@
} else {
$bashCommand = "cd '$linuxPath' && chmod +x scripts/install_wsl_env.sh scripts/start_edge_device_local.sh scripts/stop_edge_device_local.sh scripts/restart_edge_device_local.sh && bash $targetScript"
}
if ([string]::IsNullOrWhiteSpace($Distro)) {
wsl -- bash -lc "$bashCommand"
} else {
wsl -d $Distro -- bash -lc "$bashCommand"
}

View File

@@ -0,0 +1,9 @@
#!/usr/bin/env bash
set -euo pipefail
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
cd "$ROOT_DIR"
bash scripts/stop_edge_device_local.sh
sleep 1
bash scripts/start_edge_device_local.sh

View File

@@ -0,0 +1,41 @@
#!/usr/bin/env bash
set -euo pipefail
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
cd "$ROOT_DIR"
PID_DIR="runtime/pids"
WORKER_PID_FILE="${PID_DIR}/worker.pid"
EDGE_CLIENT_PID_FILE="${PID_DIR}/edge_client.pid"
stop_by_pid_file() {
local name="$1"
local pid_file="$2"
if [ ! -f "$pid_file" ]; then
echo "[INFO] ${name} not running (no pid file)"
return
fi
local pid
pid="$(cat "$pid_file" 2>/dev/null || true)"
if [ -z "$pid" ]; then
rm -f "$pid_file"
echo "[INFO] ${name} pid file empty, cleaned"
return
fi
if kill -0 "$pid" >/dev/null 2>&1; then
kill "$pid" >/dev/null 2>&1 || true
sleep 1
if kill -0 "$pid" >/dev/null 2>&1; then
kill -9 "$pid" >/dev/null 2>&1 || true
fi
echo "[OK] ${name} stopped (pid ${pid})"
else
echo "[INFO] ${name} already stopped (stale pid ${pid})"
fi
rm -f "$pid_file"
}
stop_by_pid_file "edge client" "$EDGE_CLIENT_PID_FILE"
stop_by_pid_file "worker" "$WORKER_PID_FILE"