From 8e093d10efda7c8e74ec1f32b21caa5468b7dec0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ignacio=20L=C3=B3pez=20Luna?= <ignasi.lopez.luna@gmail.com>
Date: Tue, 17 Mar 2026 11:52:55 +0100
Subject: [PATCH 1/2] fix: update vllm installation logic to support
 non-desktop model runners on macOS ARM64

---
 cmd/cli/commands/install-runner.go | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/cmd/cli/commands/install-runner.go b/cmd/cli/commands/install-runner.go
index 0e5eee45..82b913b8 100644
--- a/cmd/cli/commands/install-runner.go
+++ b/cmd/cli/commands/install-runner.go
@@ -244,7 +244,8 @@ type runnerOptions struct {
 func runInstallOrStart(cmd *cobra.Command, opts runnerOptions, debug bool) error {
 	// On macOS ARM64, the vllm backend requires deferred installation
 	// (on-demand via the running model runner), not as a standalone container.
-	if opts.backend == vllm.Name && platform.SupportsVLLMMetal() {
+	if opts.backend == vllm.Name && platform.SupportsVLLMMetal() &&
+		modelRunner.EngineKind() != types.ModelRunnerEngineKindDesktop {
 		cmd.Println("Installing vllm backend...")
 		if err := desktopClient.InstallBackend(vllm.Name); err != nil {
 			return fmt.Errorf("failed to install vllm backend: %w", err)

From d8072b776e00eb0d8247f5f353ad725031988ebb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ignacio=20L=C3=B3pez=20Luna?= <ignasi.lopez.luna@gmail.com>
Date: Tue, 17 Mar 2026 13:41:39 +0100
Subject: [PATCH 2/2] fix: implement installation strategy resolution for vllm
 and diffusers backends

---
 cmd/cli/commands/install-runner.go      | 113 ++++++++++++++----
 cmd/cli/commands/install_action_test.go | 151 ++++++++++++++++++++++++
 2 files changed, 238 insertions(+), 26 deletions(-)
 create mode 100644 cmd/cli/commands/install_action_test.go

diff --git a/cmd/cli/commands/install-runner.go b/cmd/cli/commands/install-runner.go
index 82b913b8..1ff9c722 100644
--- a/cmd/cli/commands/install-runner.go
+++ b/cmd/cli/commands/install-runner.go
@@ -224,6 +224,66 @@ func getStandaloneRunner(ctx context.Context) (*standaloneRunner, error) {
 	return inspectStandaloneRunner(ctr), nil
 }
 
+// installAction describes what install-runner should do for a given backend.
+type installAction int
+
+const (
+	// installActionDeferredVLLMMetal triggers on-demand vllm-metal installation
+	// via the running model runner's API. Used when the model runner runs
+	// natively on macOS ARM64 (Desktop or MobyManual contexts).
+	installActionDeferredVLLMMetal installAction = iota
+	// installActionDeferredDiffusers triggers on-demand diffusers installation
+	// via the running model runner's API.
+	installActionDeferredDiffusers
+	// installActionAlreadyInDesktop indicates the backend is already available
+	// in Docker Desktop's built-in model runner.
+	installActionAlreadyInDesktop
+	// installActionCreateContainer creates a standalone model runner container.
+	installActionCreateContainer
+)
+
+// resolveInstallAction determines the installation strategy based on the
+// requested backend, engine kind, platform capabilities, and WSL context.
+// This is a pure function to enable comprehensive unit testing of all
+// platform/engine combinations.
+func resolveInstallAction(
+	backend string,
+	engineKind types.ModelRunnerEngineKind,
+	supportsVLLMMetal bool,
+	isWSL bool,
+) installAction {
+	switch backend {
+	case vllm.Name:
+		// On macOS ARM64, Desktop (non-WSL) and MobyManual contexts mean
+		// the model runner runs on the host and supports vllm-metal.
+		if supportsVLLMMetal {
+			if engineKind == types.ModelRunnerEngineKindDesktop && !isWSL {
+				return installActionDeferredVLLMMetal
+			}
+			if engineKind == types.ModelRunnerEngineKindMobyManual {
+				return installActionDeferredVLLMMetal
+			}
+		}
+		// Moby, Cloud, or Desktop+WSL: the Docker daemon is remote Linux,
+		// so create a standalone container with vLLM.
+		return installActionCreateContainer
+
+	case diffusers.Name:
+		// Diffusers always uses deferred installation via the model
+		// runner API (the server downloads the Python environment).
+		return installActionDeferredDiffusers
+
+	default:
+		// For llamacpp and other default backends on Desktop, they are
+		// already included in Docker Desktop's built-in model runner.
+		if engineKind == types.ModelRunnerEngineKindDesktop {
+			return installActionAlreadyInDesktop
+		}
+		// Moby/Cloud: create a standalone container.
+		return installActionCreateContainer
+	}
+}
+
 // runnerOptions holds common configuration for install/start/reinstall commands
 type runnerOptions struct {
 	port            uint16
@@ -242,26 +302,29 @@ type runnerOptions struct {
 
 // runInstallOrStart is shared logic for install-runner and start-runner commands
 func runInstallOrStart(cmd *cobra.Command, opts runnerOptions, debug bool) error {
-	// On macOS ARM64, the vllm backend requires deferred installation
-	// (on-demand via the running model runner), not as a standalone container.
-	if opts.backend == vllm.Name && platform.SupportsVLLMMetal() &&
-		modelRunner.EngineKind() != types.ModelRunnerEngineKindDesktop {
+	engineKind := modelRunner.EngineKind()
+	isWSL := engineKind == types.ModelRunnerEngineKindDesktop &&
+		desktop.IsDesktopWSLContext(cmd.Context(), dockerCLI)
+
+	action := resolveInstallAction(
+		opts.backend,
+		engineKind,
+		platform.SupportsVLLMMetal(),
+		isWSL,
+	)
+
+	switch action {
+	case installActionDeferredVLLMMetal:
 		cmd.Println("Installing vllm backend...")
 		if err := desktopClient.InstallBackend(vllm.Name); err != nil {
 			return fmt.Errorf("failed to install vllm backend: %w", err)
 		}
 		cmd.Println("vllm backend installed successfully")
 		return nil
-	}
 
-	// The diffusers backend uses deferred installation: it pulls a Docker
-	// image, extracts a self-contained Python environment, and installs it
-	// to a well-known local folder. Trigger installation via the running
-	// model runner's API, the same way vllm-metal is handled above.
-	if opts.backend == diffusers.Name && platform.SupportsDiffusers() {
+	case installActionDeferredDiffusers:
 		// For standalone contexts (Moby/Cloud), ensure a base runner is
 		// available first so we have an API endpoint to call.
-		engineKind := modelRunner.EngineKind()
 		if engineKind == types.ModelRunnerEngineKindMoby || engineKind == types.ModelRunnerEngineKindCloud {
 			if _, err := ensureStandaloneRunnerAvailable(cmd.Context(), asPrinter(cmd), debug); err != nil {
 				return fmt.Errorf("unable to initialize standalone model runner: %w", err)
@@ -274,23 +337,21 @@ func runInstallOrStart(cmd *cobra.Command, opts runnerOptions, debug bool) error
 		}
 		cmd.Println("diffusers backend installed successfully")
 		return nil
+
+	case installActionAlreadyInDesktop:
+		cmd.Println("Standalone installation not supported with Docker Desktop")
+		cmd.Println("Use `docker desktop enable model-runner` instead")
+		return nil
+
+	case installActionCreateContainer:
+		// Fall through to container creation below.
 	}
 
-	var vllmOnWSL bool
-	// Ensure that we're running in a supported model runner context.
-	engineKind := modelRunner.EngineKind()
-	if engineKind == types.ModelRunnerEngineKindDesktop {
-		if opts.backend == vllm.Name && desktop.IsDesktopWSLContext(cmd.Context(), dockerCLI) {
-			engineKind = types.ModelRunnerEngineKindMoby
-			vllmOnWSL = true
-		} else {
-			// TODO: We may eventually want to auto-forward this to
-			// docker desktop enable model-runner, but we should first make
-			// sure the CLI flags match.
-			cmd.Println("Standalone installation not supported with Docker Desktop")
-			cmd.Println("Use `docker desktop enable model-runner` instead")
-			return nil
-		}
+	// For Desktop+WSL with vllm, override engine kind to Moby.
+	vllmOnWSL := false
+	if isWSL && opts.backend == vllm.Name {
+		engineKind = types.ModelRunnerEngineKindMoby
+		vllmOnWSL = true
 	}
 
 	port := opts.port
diff --git a/cmd/cli/commands/install_action_test.go b/cmd/cli/commands/install_action_test.go
new file mode 100644
index 00000000..1a4d7f6d
--- /dev/null
+++ b/cmd/cli/commands/install_action_test.go
@@ -0,0 +1,151 @@
+package commands
+
+import (
+	"testing"
+
+	"github.com/docker/model-runner/cmd/cli/pkg/types"
+	"github.com/docker/model-runner/pkg/inference/backends/diffusers"
+	"github.com/docker/model-runner/pkg/inference/backends/llamacpp"
+	"github.com/docker/model-runner/pkg/inference/backends/vllm"
+)
+
+func TestResolveInstallAction(t *testing.T) {
+	t.Parallel()
+
+	tests := []struct {
+		name              string
+		backend           string
+		engineKind        types.ModelRunnerEngineKind
+		supportsVLLMMetal bool
+		isWSL             bool
+		expected          installAction
+	}{
+		// === vllm backend ===
+		{
+			name:              "vllm + Desktop (darwin/arm64) → deferred vllm-metal",
+			backend:           vllm.Name,
+			engineKind:        types.ModelRunnerEngineKindDesktop,
+			supportsVLLMMetal: true,
+			isWSL:             false,
+			expected:          installActionDeferredVLLMMetal,
+		},
+		{
+			name:              "vllm + Desktop+WSL (darwin/arm64) → create container",
+			backend:           vllm.Name,
+			engineKind:        types.ModelRunnerEngineKindDesktop,
+			supportsVLLMMetal: true,
+			isWSL:             true,
+			expected:          installActionCreateContainer,
+		},
+		{
+			name:              "vllm + MobyManual (darwin/arm64) → deferred vllm-metal",
+			backend:           vllm.Name,
+			engineKind:        types.ModelRunnerEngineKindMobyManual,
+			supportsVLLMMetal: true,
+			isWSL:             false,
+			expected:          installActionDeferredVLLMMetal,
+		},
+		{
+			name:              "vllm + Cloud → create container",
+			backend:           vllm.Name,
+			engineKind:        types.ModelRunnerEngineKindCloud,
+			supportsVLLMMetal: false,
+			isWSL:             false,
+			expected:          installActionCreateContainer,
+		},
+		{
+			name:              "vllm + Moby → create container",
+			backend:           vllm.Name,
+			engineKind:        types.ModelRunnerEngineKindMoby,
+			supportsVLLMMetal: false,
+			isWSL:             false,
+			expected:          installActionCreateContainer,
+		},
+		{
+			name:              "vllm + Moby (darwin/arm64, e.g. Colima) → create container",
+			backend:           vllm.Name,
+			engineKind:        types.ModelRunnerEngineKindMoby,
+			supportsVLLMMetal: true,
+			isWSL:             false,
+			expected:          installActionCreateContainer,
+		},
+		{
+			name:              "vllm + Cloud (darwin/arm64) → create container",
+			backend:           vllm.Name,
+			engineKind:        types.ModelRunnerEngineKindCloud,
+			supportsVLLMMetal: true,
+			isWSL:             false,
+			expected:          installActionCreateContainer,
+		},
+
+		// === diffusers backend ===
+		{
+			name:              "diffusers + Desktop → deferred diffusers",
+			backend:           diffusers.Name,
+			engineKind:        types.ModelRunnerEngineKindDesktop,
+			supportsVLLMMetal: true,
+			isWSL:             false,
+			expected:          installActionDeferredDiffusers,
+		},
+		{
+			name:              "diffusers + Moby → deferred diffusers",
+			backend:           diffusers.Name,
+			engineKind:        types.ModelRunnerEngineKindMoby,
+			supportsVLLMMetal: false,
+			isWSL:             false,
+			expected:          installActionDeferredDiffusers,
+		},
+		{
+			name:              "diffusers + Cloud → deferred diffusers",
+			backend:           diffusers.Name,
+			engineKind:        types.ModelRunnerEngineKindCloud,
+			supportsVLLMMetal: false,
+			isWSL:             false,
+			expected:          installActionDeferredDiffusers,
+		},
+
+		// === llamacpp backend ===
+		{
+			name:              "llamacpp + Desktop → already in Desktop",
+			backend:           llamacpp.Name,
+			engineKind:        types.ModelRunnerEngineKindDesktop,
+			supportsVLLMMetal: true,
+			isWSL:             false,
+			expected:          installActionAlreadyInDesktop,
+		},
+		{
+			name:              "llamacpp + Moby → create container",
+			backend:           llamacpp.Name,
+			engineKind:        types.ModelRunnerEngineKindMoby,
+			supportsVLLMMetal: false,
+			isWSL:             false,
+			expected:          installActionCreateContainer,
+		},
+		{
+			name:              "llamacpp + Cloud → create container",
+			backend:           llamacpp.Name,
+			engineKind:        types.ModelRunnerEngineKindCloud,
+			supportsVLLMMetal: false,
+			isWSL:             false,
+			expected:          installActionCreateContainer,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			t.Parallel()
+
+			result := resolveInstallAction(
+				tt.backend,
+				tt.engineKind,
+				tt.supportsVLLMMetal,
+				tt.isWSL,
+			)
+
+			if result != tt.expected {
+				t.Errorf("resolveInstallAction(%q, %v, supportsVLLMMetal=%v, isWSL=%v) = %d, want %d",
+					tt.backend, tt.engineKind, tt.supportsVLLMMetal, tt.isWSL, result, tt.expected)
+			}
+		})
+	}
+}