From 8e093d10efda7c8e74ec1f32b21caa5468b7dec0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ignacio=20L=C3=B3pez=20Luna?= Date: Tue, 17 Mar 2026 11:52:55 +0100 Subject: [PATCH 1/2] fix: update vllm installation logic to support non-desktop model runners on macOS ARM64 --- cmd/cli/commands/install-runner.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cmd/cli/commands/install-runner.go b/cmd/cli/commands/install-runner.go index 0e5eee45..82b913b8 100644 --- a/cmd/cli/commands/install-runner.go +++ b/cmd/cli/commands/install-runner.go @@ -244,7 +244,8 @@ type runnerOptions struct { func runInstallOrStart(cmd *cobra.Command, opts runnerOptions, debug bool) error { // On macOS ARM64, the vllm backend requires deferred installation // (on-demand via the running model runner), not as a standalone container. - if opts.backend == vllm.Name && platform.SupportsVLLMMetal() { + if opts.backend == vllm.Name && platform.SupportsVLLMMetal() && + modelRunner.EngineKind() != types.ModelRunnerEngineKindDesktop { cmd.Println("Installing vllm backend...") if err := desktopClient.InstallBackend(vllm.Name); err != nil { return fmt.Errorf("failed to install vllm backend: %w", err) From d8072b776e00eb0d8247f5f353ad725031988ebb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ignacio=20L=C3=B3pez=20Luna?= Date: Tue, 17 Mar 2026 13:41:39 +0100 Subject: [PATCH 2/2] fix: implement installation strategy resolution for vllm and diffusers backends --- cmd/cli/commands/install-runner.go | 113 ++++++++++++++---- cmd/cli/commands/install_action_test.go | 151 ++++++++++++++++++++++++ 2 files changed, 238 insertions(+), 26 deletions(-) create mode 100644 cmd/cli/commands/install_action_test.go diff --git a/cmd/cli/commands/install-runner.go b/cmd/cli/commands/install-runner.go index 82b913b8..1ff9c722 100644 --- a/cmd/cli/commands/install-runner.go +++ b/cmd/cli/commands/install-runner.go @@ -224,6 +224,66 @@ func getStandaloneRunner(ctx context.Context) (*standaloneRunner, error) { return inspectStandaloneRunner(ctr), nil } +// installAction describes what install-runner should do for a given backend. +type installAction int + +const ( + // installActionDeferredVLLMMetal triggers on-demand vllm-metal installation + // via the running model runner's API. Used when the model runner runs + // natively on macOS ARM64 (Desktop or MobyManual contexts). + installActionDeferredVLLMMetal installAction = iota + // installActionDeferredDiffusers triggers on-demand diffusers installation + // via the running model runner's API. + installActionDeferredDiffusers + // installActionAlreadyInDesktop indicates the backend is already available + // in Docker Desktop's built-in model runner. + installActionAlreadyInDesktop + // installActionCreateContainer creates a standalone model runner container. + installActionCreateContainer +) + +// resolveInstallAction determines the installation strategy based on the +// requested backend, engine kind, platform capabilities, and WSL context. +// This is a pure function to enable comprehensive unit testing of all +// platform/engine combinations. +func resolveInstallAction( + backend string, + engineKind types.ModelRunnerEngineKind, + supportsVLLMMetal bool, + isWSL bool, +) installAction { + switch backend { + case vllm.Name: + // On macOS ARM64, Desktop (non-WSL) and MobyManual contexts mean + // the model runner runs on the host and supports vllm-metal. + if supportsVLLMMetal { + if engineKind == types.ModelRunnerEngineKindDesktop && !isWSL { + return installActionDeferredVLLMMetal + } + if engineKind == types.ModelRunnerEngineKindMobyManual { + return installActionDeferredVLLMMetal + } + } + // Moby, Cloud, or Desktop+WSL: the Docker daemon is remote Linux, + // so create a standalone container with vLLM. + return installActionCreateContainer + + case diffusers.Name: + // Diffusers always uses deferred installation via the model + // runner API (the server downloads the Python environment). + return installActionDeferredDiffusers + + default: + // For llamacpp and other default backends on Desktop, they are + // already included in Docker Desktop's built-in model runner. + if engineKind == types.ModelRunnerEngineKindDesktop { + return installActionAlreadyInDesktop + } + // Moby/Cloud: create a standalone container. + return installActionCreateContainer + } +} + // runnerOptions holds common configuration for install/start/reinstall commands type runnerOptions struct { port uint16 @@ -242,26 +302,29 @@ type runnerOptions struct { // runInstallOrStart is shared logic for install-runner and start-runner commands func runInstallOrStart(cmd *cobra.Command, opts runnerOptions, debug bool) error { - // On macOS ARM64, the vllm backend requires deferred installation - // (on-demand via the running model runner), not as a standalone container. - if opts.backend == vllm.Name && platform.SupportsVLLMMetal() && - modelRunner.EngineKind() != types.ModelRunnerEngineKindDesktop { + engineKind := modelRunner.EngineKind() + isWSL := engineKind == types.ModelRunnerEngineKindDesktop && + desktop.IsDesktopWSLContext(cmd.Context(), dockerCLI) + + action := resolveInstallAction( + opts.backend, + engineKind, + platform.SupportsVLLMMetal(), + isWSL, + ) + + switch action { + case installActionDeferredVLLMMetal: cmd.Println("Installing vllm backend...") if err := desktopClient.InstallBackend(vllm.Name); err != nil { return fmt.Errorf("failed to install vllm backend: %w", err) } cmd.Println("vllm backend installed successfully") return nil - } - // The diffusers backend uses deferred installation: it pulls a Docker - // image, extracts a self-contained Python environment, and installs it - // to a well-known local folder. Trigger installation via the running - // model runner's API, the same way vllm-metal is handled above. - if opts.backend == diffusers.Name && platform.SupportsDiffusers() { + case installActionDeferredDiffusers: // For standalone contexts (Moby/Cloud), ensure a base runner is // available first so we have an API endpoint to call. - engineKind := modelRunner.EngineKind() if engineKind == types.ModelRunnerEngineKindMoby || engineKind == types.ModelRunnerEngineKindCloud { if _, err := ensureStandaloneRunnerAvailable(cmd.Context(), asPrinter(cmd), debug); err != nil { return fmt.Errorf("unable to initialize standalone model runner: %w", err) @@ -274,23 +337,21 @@ func runInstallOrStart(cmd *cobra.Command, opts runnerOptions, debug bool) error } cmd.Println("diffusers backend installed successfully") return nil + + case installActionAlreadyInDesktop: + cmd.Println("Standalone installation not supported with Docker Desktop") + cmd.Println("Use `docker desktop enable model-runner` instead") + return nil + + case installActionCreateContainer: + // Fall through to container creation below. } - var vllmOnWSL bool - // Ensure that we're running in a supported model runner context. - engineKind := modelRunner.EngineKind() - if engineKind == types.ModelRunnerEngineKindDesktop { - if opts.backend == vllm.Name && desktop.IsDesktopWSLContext(cmd.Context(), dockerCLI) { - engineKind = types.ModelRunnerEngineKindMoby - vllmOnWSL = true - } else { - // TODO: We may eventually want to auto-forward this to - // docker desktop enable model-runner, but we should first make - // sure the CLI flags match. - cmd.Println("Standalone installation not supported with Docker Desktop") - cmd.Println("Use `docker desktop enable model-runner` instead") - return nil - } + // For Desktop+WSL with vllm, override engine kind to Moby. + vllmOnWSL := false + if isWSL && opts.backend == vllm.Name { + engineKind = types.ModelRunnerEngineKindMoby + vllmOnWSL = true } port := opts.port diff --git a/cmd/cli/commands/install_action_test.go b/cmd/cli/commands/install_action_test.go new file mode 100644 index 00000000..1a4d7f6d --- /dev/null +++ b/cmd/cli/commands/install_action_test.go @@ -0,0 +1,151 @@ +package commands + +import ( + "testing" + + "github.com/docker/model-runner/cmd/cli/pkg/types" + "github.com/docker/model-runner/pkg/inference/backends/diffusers" + "github.com/docker/model-runner/pkg/inference/backends/llamacpp" + "github.com/docker/model-runner/pkg/inference/backends/vllm" +) + +func TestResolveInstallAction(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + backend string + engineKind types.ModelRunnerEngineKind + supportsVLLMMetal bool + isWSL bool + expected installAction + }{ + // === vllm backend === + { + name: "vllm + Desktop (darwin/arm64) → deferred vllm-metal", + backend: vllm.Name, + engineKind: types.ModelRunnerEngineKindDesktop, + supportsVLLMMetal: true, + isWSL: false, + expected: installActionDeferredVLLMMetal, + }, + { + name: "vllm + Desktop+WSL (darwin/arm64) → create container", + backend: vllm.Name, + engineKind: types.ModelRunnerEngineKindDesktop, + supportsVLLMMetal: true, + isWSL: true, + expected: installActionCreateContainer, + }, + { + name: "vllm + MobyManual (darwin/arm64) → deferred vllm-metal", + backend: vllm.Name, + engineKind: types.ModelRunnerEngineKindMobyManual, + supportsVLLMMetal: true, + isWSL: false, + expected: installActionDeferredVLLMMetal, + }, + { + name: "vllm + Cloud → create container", + backend: vllm.Name, + engineKind: types.ModelRunnerEngineKindCloud, + supportsVLLMMetal: false, + isWSL: false, + expected: installActionCreateContainer, + }, + { + name: "vllm + Moby → create container", + backend: vllm.Name, + engineKind: types.ModelRunnerEngineKindMoby, + supportsVLLMMetal: false, + isWSL: false, + expected: installActionCreateContainer, + }, + { + name: "vllm + Moby (darwin/arm64, e.g. Colima) → create container", + backend: vllm.Name, + engineKind: types.ModelRunnerEngineKindMoby, + supportsVLLMMetal: true, + isWSL: false, + expected: installActionCreateContainer, + }, + { + name: "vllm + Cloud (darwin/arm64) → create container", + backend: vllm.Name, + engineKind: types.ModelRunnerEngineKindCloud, + supportsVLLMMetal: true, + isWSL: false, + expected: installActionCreateContainer, + }, + + // === diffusers backend === + { + name: "diffusers + Desktop → deferred diffusers", + backend: diffusers.Name, + engineKind: types.ModelRunnerEngineKindDesktop, + supportsVLLMMetal: true, + isWSL: false, + expected: installActionDeferredDiffusers, + }, + { + name: "diffusers + Moby → deferred diffusers", + backend: diffusers.Name, + engineKind: types.ModelRunnerEngineKindMoby, + supportsVLLMMetal: false, + isWSL: false, + expected: installActionDeferredDiffusers, + }, + { + name: "diffusers + Cloud → deferred diffusers", + backend: diffusers.Name, + engineKind: types.ModelRunnerEngineKindCloud, + supportsVLLMMetal: false, + isWSL: false, + expected: installActionDeferredDiffusers, + }, + + // === llamacpp backend === + { + name: "llamacpp + Desktop → already in Desktop", + backend: llamacpp.Name, + engineKind: types.ModelRunnerEngineKindDesktop, + supportsVLLMMetal: true, + isWSL: false, + expected: installActionAlreadyInDesktop, + }, + { + name: "llamacpp + Moby → create container", + backend: llamacpp.Name, + engineKind: types.ModelRunnerEngineKindMoby, + supportsVLLMMetal: false, + isWSL: false, + expected: installActionCreateContainer, + }, + { + name: "llamacpp + Cloud → create container", + backend: llamacpp.Name, + engineKind: types.ModelRunnerEngineKindCloud, + supportsVLLMMetal: false, + isWSL: false, + expected: installActionCreateContainer, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + + result := resolveInstallAction( + tt.backend, + tt.engineKind, + tt.supportsVLLMMetal, + tt.isWSL, + ) + + if result != tt.expected { + t.Errorf("resolveInstallAction(%q, %v, supportsVLLMMetal=%v, isWSL=%v) = %d, want %d", + tt.backend, tt.engineKind, tt.supportsVLLMMetal, tt.isWSL, result, tt.expected) + } + }) + } +}