#!/usr/bin/env bash set -euo pipefail WORKER_PORT="${1:-8001}" echo "Starting Qwen3-8B-FP8 worker on port ${WORKER_PORT}..." docker exec -d vllm_node sh -c " cd /root/.cache/huggingface/hub exec vllm serve 'Qwen/Qwen3-8B-FP8' \ --served-model-name 'Qwen3-8B-FP8' \ --max-model-len 32768 \ --max-num-seqs 8 \ --gpu-memory-utilization 0.20 \ --port '${WORKER_PORT}' \ --host 0.0.0.0 \ --enable-chunked-prefill \ --enable-auto-tool-choice \ --tool-call-parser hermes \ --trust-remote-code \ --kv-cache-dtype fp8 \ -tp 1 \ >> /tmp/qwen3-8b-worker.log 2>&1 & " echo "Waiting for worker to start..." sleep 10 for i in $(seq 1 30); do if curl -sf "http://localhost:${WORKER_PORT}/v1/models" > /dev/null 2>&1; then echo "✓ Worker ready on port ${WORKER_PORT}" curl -s "http://localhost:${WORKER_PORT}/v1/models" | python3 -m json.tool 2>/dev/null || true exit 0 fi echo " Waiting... (${i}/30)" sleep 2 done echo "✗ Worker failed to start. Check /tmp/qwen3-8b-worker.log" exit 1