docker run -d --name vllm_gemma4 \ --restart unless-stopped \ --gpus all --network host --ipc host \ --ulimit memlock=-1 --ulimit stack=67108864 \ --entrypoint "" \ vllm-node-tf5 \ bash -c " exec vllm serve RedHatAI/gemma-4-26B-A4B-it-FP8-Dynamic \ --max-model-len 65536 \ --gpu-memory-utilization 0.4 \ --port 8001 --host 0.0.0.0 \ --enable-prefix-caching \ --override-generation-config '{\"temperature\": 0.2}' \ --enable-auto-tool-choice --tool-call-parser gemma4 \ --reasoning-parser gemma4 \ --kv-cache-dtype fp8 \ --max-num-batched-tokens 8192 \ -tp 1 "