From 72809c5e70e3ee705f37c5c8f85de20dcca87a75 Mon Sep 17 00:00:00 2001 From: Dorin Geman Date: Fri, 20 Mar 2026 16:00:24 +0200 Subject: [PATCH] feat(backends): add Uninstall method to Backend interface and uninstall-backend endpoint Deferred backends (vllm-metal, diffusers) install to local directories but had no cleanup path; this lets the CLI trigger backend removal via the running model-runner. Signed-off-by: Dorin Geman --- cmd/cli/commands/uninstall-runner.go | 14 +++++++ cmd/cli/desktop/desktop.go | 24 ++++++++++++ .../docker_model_uninstall-runner.yaml | 9 +++++ .../docs/reference/model_uninstall-runner.md | 9 +++-- pkg/inference/backend.go | 4 ++ pkg/inference/backends/diffusers/diffusers.go | 10 +++++ pkg/inference/backends/llamacpp/llamacpp.go | 5 +++ pkg/inference/backends/mlx/mlx.go | 5 +++ pkg/inference/backends/sglang/sglang.go | 5 +++ pkg/inference/backends/vllm/vllm.go | 5 +++ pkg/inference/backends/vllm/vllm_metal.go | 10 +++++ pkg/inference/scheduling/http_handler.go | 38 +++++++++++++++++++ pkg/inference/scheduling/installer.go | 29 ++++++++++++++ pkg/inference/scheduling/loader.go | 19 ++++++++++ pkg/inference/scheduling/loader_test.go | 4 ++ pkg/inference/scheduling/scheduler.go | 7 ++++ 16 files changed, 193 insertions(+), 4 deletions(-) diff --git a/cmd/cli/commands/uninstall-runner.go b/cmd/cli/commands/uninstall-runner.go index 2fc9e2ac4..139a13f30 100644 --- a/cmd/cli/commands/uninstall-runner.go +++ b/cmd/cli/commands/uninstall-runner.go @@ -14,10 +14,21 @@ import ( type cleanupOptions struct { models bool removeImages bool + backend string } // runUninstallOrStop is shared logic for uninstall-runner and stop-runner commands func runUninstallOrStop(cmd *cobra.Command, opts cleanupOptions) error { + // Deferred backend uninstall is handled via the running model runner API + // and works in any context (Desktop, Moby, etc.), so handle it first. + if opts.backend != "" { + if err := desktopClient.UninstallBackend(opts.backend); err != nil { + return fmt.Errorf("failed to uninstall %s backend: %w", opts.backend, err) + } + cmd.Printf("Uninstalled %s backend\n", opts.backend) + return nil + } + // Ensure that we're running in a supported model runner context. if kind := modelRunner.EngineKind(); kind == types.ModelRunnerEngineKindDesktop { if desktop.IsDesktopWSLContext(cmd.Context(), dockerCLI) { @@ -62,6 +73,7 @@ func runUninstallOrStop(cmd *cobra.Command, opts cleanupOptions) error { func newUninstallRunner() *cobra.Command { var models, images bool + var backend string c := &cobra.Command{ Use: "uninstall-runner", Short: "Uninstall Docker Model Runner (Docker Engine only)", @@ -69,11 +81,13 @@ func newUninstallRunner() *cobra.Command { return runUninstallOrStop(cmd, cleanupOptions{ models: models, removeImages: images, + backend: backend, }) }, ValidArgsFunction: completion.NoComplete, } c.Flags().BoolVar(&models, "models", false, "Remove model storage volume") c.Flags().BoolVar(&images, "images", false, "Remove "+standalone.ControllerImage+" images") + c.Flags().StringVar(&backend, "backend", "", "Uninstall a deferred backend (e.g. vllm, diffusers)") return c } diff --git a/cmd/cli/desktop/desktop.go b/cmd/cli/desktop/desktop.go index bb1fbcd5a..361779e51 100644 --- a/cmd/cli/desktop/desktop.go +++ b/cmd/cli/desktop/desktop.go @@ -895,6 +895,30 @@ func (c *Client) InstallBackend(backend string) error { return nil } +// UninstallBackend removes a backend's local installation via the model runner API. +func (c *Client) UninstallBackend(backend string) error { + uninstallPath := inference.InferencePrefix + "/uninstall-backend" + jsonData, err := json.Marshal(struct { + Backend string `json:"backend"` + }{Backend: backend}) + if err != nil { + return fmt.Errorf("error marshaling request: %w", err) + } + + resp, err := c.doRequest(http.MethodPost, uninstallPath, bytes.NewReader(jsonData)) + if err != nil { + return c.handleQueryError(err, uninstallPath) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + body, _ := io.ReadAll(resp.Body) + return fmt.Errorf("uninstall backend failed with status %s: %s", resp.Status, string(body)) + } + + return nil +} + func (c *Client) ConfigureBackend(request scheduling.ConfigureRequest) error { configureBackendPath := inference.InferencePrefix + "/_configure" jsonData, err := json.Marshal(request) diff --git a/cmd/cli/docs/reference/docker_model_uninstall-runner.yaml b/cmd/cli/docs/reference/docker_model_uninstall-runner.yaml index b8ec4c77b..e93363d53 100644 --- a/cmd/cli/docs/reference/docker_model_uninstall-runner.yaml +++ b/cmd/cli/docs/reference/docker_model_uninstall-runner.yaml @@ -5,6 +5,15 @@ usage: docker model uninstall-runner pname: docker model plink: docker_model.yaml options: + - option: backend + value_type: string + description: Uninstall a deferred backend (e.g. vllm, diffusers) + deprecated: false + hidden: false + experimental: false + experimentalcli: false + kubernetes: false + swarm: false - option: images value_type: bool default_value: "false" diff --git a/cmd/cli/docs/reference/model_uninstall-runner.md b/cmd/cli/docs/reference/model_uninstall-runner.md index ddd455573..8beb8744f 100644 --- a/cmd/cli/docs/reference/model_uninstall-runner.md +++ b/cmd/cli/docs/reference/model_uninstall-runner.md @@ -5,10 +5,11 @@ Uninstall Docker Model Runner (Docker Engine only) ### Options -| Name | Type | Default | Description | -|:-----------|:-------|:--------|:----------------------------------| -| `--images` | `bool` | | Remove docker/model-runner images | -| `--models` | `bool` | | Remove model storage volume | +| Name | Type | Default | Description | +|:------------|:---------|:--------|:----------------------------------------------------| +| `--backend` | `string` | | Uninstall a deferred backend (e.g. vllm, diffusers) | +| `--images` | `bool` | | Remove docker/model-runner images | +| `--models` | `bool` | | Remove model storage volume | diff --git a/pkg/inference/backend.go b/pkg/inference/backend.go index e7c678a3d..95f300743 100644 --- a/pkg/inference/backend.go +++ b/pkg/inference/backend.go @@ -314,6 +314,10 @@ type Backend interface { // instead load only the specified model. Backends should still respond to // OpenAI API requests for other models with a 421 error code. Run(ctx context.Context, socket, model string, modelRef string, mode BackendMode, config *BackendConfiguration) error + // Uninstall removes backend-specific local installations (e.g. files + // downloaded to ~/.docker/model-runner/). Backends with nothing to clean + // up should return nil. + Uninstall() error // Status returns a description of the backend's state. Status() string // GetDiskUsage returns the disk usage of the backend. diff --git a/pkg/inference/backends/diffusers/diffusers.go b/pkg/inference/backends/diffusers/diffusers.go index 6fca54a7d..a082dc8ed 100644 --- a/pkg/inference/backends/diffusers/diffusers.go +++ b/pkg/inference/backends/diffusers/diffusers.go @@ -264,6 +264,16 @@ func (d *diffusers) Run(ctx context.Context, socket, model string, modelRef stri } // Status implements inference.Backend.Status. +// Uninstall implements inference.Backend.Uninstall. +func (d *diffusers) Uninstall() error { + if err := os.RemoveAll(d.installDir); err != nil { + return fmt.Errorf("failed to remove diffusers install directory: %w", err) + } + d.pythonPath = "" + d.status = inference.FormatNotInstalled("") + return nil +} + func (d *diffusers) Status() string { return d.status } diff --git a/pkg/inference/backends/llamacpp/llamacpp.go b/pkg/inference/backends/llamacpp/llamacpp.go index 62cfbd637..a8b14277f 100644 --- a/pkg/inference/backends/llamacpp/llamacpp.go +++ b/pkg/inference/backends/llamacpp/llamacpp.go @@ -183,6 +183,11 @@ func (l *llamaCpp) Run(ctx context.Context, socket, model string, _ string, mode }) } +// Uninstall implements inference.Backend.Uninstall. +func (l *llamaCpp) Uninstall() error { + return nil +} + func (l *llamaCpp) Status() string { return l.status } diff --git a/pkg/inference/backends/mlx/mlx.go b/pkg/inference/backends/mlx/mlx.go index e1b5f5552..ebd6c924b 100644 --- a/pkg/inference/backends/mlx/mlx.go +++ b/pkg/inference/backends/mlx/mlx.go @@ -147,6 +147,11 @@ func (m *mlx) Run(ctx context.Context, socket, model string, modelRef string, mo }) } +// Uninstall implements inference.Backend.Uninstall. +func (m *mlx) Uninstall() error { + return nil +} + func (m *mlx) Status() string { return m.status } diff --git a/pkg/inference/backends/sglang/sglang.go b/pkg/inference/backends/sglang/sglang.go index bf9bf9faa..bf02b45d6 100644 --- a/pkg/inference/backends/sglang/sglang.go +++ b/pkg/inference/backends/sglang/sglang.go @@ -176,6 +176,11 @@ func (s *sglang) Run(ctx context.Context, socket, model string, modelRef string, }) } +// Uninstall implements inference.Backend.Uninstall. +func (s *sglang) Uninstall() error { + return nil +} + func (s *sglang) Status() string { return s.status } diff --git a/pkg/inference/backends/vllm/vllm.go b/pkg/inference/backends/vllm/vllm.go index e08546cdc..53c32c52a 100644 --- a/pkg/inference/backends/vllm/vllm.go +++ b/pkg/inference/backends/vllm/vllm.go @@ -187,6 +187,11 @@ func (v *vLLM) Run(ctx context.Context, socket, model string, modelRef string, m }) } +// Uninstall implements inference.Backend.Uninstall. +func (v *vLLM) Uninstall() error { + return nil +} + func (v *vLLM) Status() string { return v.status } diff --git a/pkg/inference/backends/vllm/vllm_metal.go b/pkg/inference/backends/vllm/vllm_metal.go index 7d70290ec..f91a562bb 100644 --- a/pkg/inference/backends/vllm/vllm_metal.go +++ b/pkg/inference/backends/vllm/vllm_metal.go @@ -277,6 +277,16 @@ func (v *vllmMetal) buildArgs(bundle interface{ SafetensorsPath() string }, sock return args, nil } +// Uninstall implements inference.Backend.Uninstall. +func (v *vllmMetal) Uninstall() error { + if err := os.RemoveAll(v.installDir); err != nil { + return fmt.Errorf("failed to remove vllm-metal install directory: %w", err) + } + v.pythonPath = "" + v.status = inference.FormatNotInstalled("") + return nil +} + // Status implements inference.Backend.Status. func (v *vllmMetal) Status() string { return v.status diff --git a/pkg/inference/scheduling/http_handler.go b/pkg/inference/scheduling/http_handler.go index a9f3077b9..a283de1da 100644 --- a/pkg/inference/scheduling/http_handler.go +++ b/pkg/inference/scheduling/http_handler.go @@ -98,6 +98,7 @@ func (h *HTTPHandler) routeHandlers() map[string]http.HandlerFunc { m["GET "+inference.InferencePrefix+"/v1/models/{name...}"] = h.handleModels m["POST "+inference.InferencePrefix+"/install-backend"] = h.InstallBackend + m["POST "+inference.InferencePrefix+"/uninstall-backend"] = h.UninstallBackend m["GET "+inference.InferencePrefix+"/status"] = h.GetBackendStatus m["GET "+inference.InferencePrefix+"/ps"] = h.GetRunningBackends m["GET "+inference.InferencePrefix+"/df"] = h.GetDiskUsage @@ -400,6 +401,43 @@ func (h *HTTPHandler) InstallBackend(w http.ResponseWriter, r *http.Request) { w.WriteHeader(http.StatusOK) } +// uninstallBackendRequest is the JSON body for the uninstall-backend endpoint. +type uninstallBackendRequest struct { + Backend string `json:"backend"` +} + +// UninstallBackend handles POST /uninstall-backend requests. +// It removes a backend's local installation. +func (h *HTTPHandler) UninstallBackend(w http.ResponseWriter, r *http.Request) { + body, err := io.ReadAll(http.MaxBytesReader(w, r.Body, maximumOpenAIInferenceRequestSize)) + if err != nil { + var maxBytesError *http.MaxBytesError + if errors.As(err, &maxBytesError) { + http.Error(w, "request too large", http.StatusBadRequest) + } else { + http.Error(w, "failed to read request body", http.StatusInternalServerError) + } + return + } + + var req uninstallBackendRequest + if err := json.Unmarshal(body, &req); err != nil || req.Backend == "" { + http.Error(w, "invalid request: backend is required", http.StatusBadRequest) + return + } + + if err := h.scheduler.UninstallBackend(r.Context(), req.Backend); err != nil { + if errors.Is(err, ErrBackendNotFound) { + http.Error(w, err.Error(), http.StatusNotFound) + } else { + http.Error(w, fmt.Sprintf("backend uninstall failed: %v", err), http.StatusInternalServerError) + } + return + } + + w.WriteHeader(http.StatusOK) +} + // Configure handles POST /{backend}/_configure requests. func (h *HTTPHandler) Configure(w http.ResponseWriter, r *http.Request) { // Determine the requested backend and ensure that it's valid. diff --git a/pkg/inference/scheduling/installer.go b/pkg/inference/scheduling/installer.go index a31a975ed..3c8f36d56 100644 --- a/pkg/inference/scheduling/installer.go +++ b/pkg/inference/scheduling/installer.go @@ -257,6 +257,35 @@ func (i *installer) installBackend(ctx context.Context, name string) error { return nil } +// uninstallBackend removes a backend's local installation and resets its +// install status so that a subsequent wait() will trigger re-installation. +func (i *installer) uninstallBackend(_ context.Context, name string) error { + i.installMu.Lock() + defer i.installMu.Unlock() + + backend, ok := i.backends[name] + if !ok { + return ErrBackendNotFound + } + + if err := backend.Uninstall(); err != nil { + i.log.Warn("Backend uninstall failed", "backend", name, "error", err) + return err + } + + i.log.Info("Backend uninstalled", "backend", name) + + // Reset the install status so the backend can be re-installed later. + i.mu.Lock() + i.statuses[name] = &installStatus{ + installed: make(chan struct{}), + failed: make(chan struct{}), + } + i.mu.Unlock() + + return nil +} + // isInstalled returns true if the given backend has completed installation. // It is non-blocking. func (i *installer) isInstalled(name string) bool { diff --git a/pkg/inference/scheduling/loader.go b/pkg/inference/scheduling/loader.go index be2a5f81b..e3fa24724 100644 --- a/pkg/inference/scheduling/loader.go +++ b/pkg/inference/scheduling/loader.go @@ -283,6 +283,25 @@ func (l *loader) evictRunner(backend, model string, mode inference.BackendMode) return len(l.runners) } +// UnloadBackend unloads all runners for a specific backend. +// It returns the number of unloaded runners. +func (l *loader) UnloadBackend(ctx context.Context, backend string) int { + if !l.lock(ctx) { + return 0 + } + defer l.unlock() + + count := 0 + for r, runnerInfo := range l.runners { + if r.backend == backend && l.references[runnerInfo.slot] == 0 { + l.log.Info("Evicting backend runner for uninstall", "backend", r.backend, "model", r.modelID, "modelRef", runnerInfo.modelRef, "mode", r.mode) + l.freeRunnerSlot(runnerInfo.slot, r) + count++ + } + } + return count +} + // Unload unloads runners and returns the number of unloaded runners. func (l *loader) Unload(ctx context.Context, unload UnloadRequest) int { if !l.lock(ctx) { diff --git a/pkg/inference/scheduling/loader_test.go b/pkg/inference/scheduling/loader_test.go index 9120fba6e..f5e4ca3b0 100644 --- a/pkg/inference/scheduling/loader_test.go +++ b/pkg/inference/scheduling/loader_test.go @@ -31,6 +31,10 @@ func (m *mockBackend) Run(ctx context.Context, socket, model string, modelRef st return nil } +func (m *mockBackend) Uninstall() error { + return nil +} + func (m *mockBackend) Status() string { return "mock" } diff --git a/pkg/inference/scheduling/scheduler.go b/pkg/inference/scheduling/scheduler.go index a2f2dbdd1..daafc7bb2 100644 --- a/pkg/inference/scheduling/scheduler.go +++ b/pkg/inference/scheduling/scheduler.go @@ -168,6 +168,13 @@ func (s *Scheduler) InstallBackend(ctx context.Context, name string) error { return s.installer.installBackend(ctx, name) } +// UninstallBackend unloads all runners for the backend and then removes its +// local installation. +func (s *Scheduler) UninstallBackend(ctx context.Context, name string) error { + s.loader.UnloadBackend(ctx, name) + return s.installer.uninstallBackend(ctx, name) +} + // GetRunningBackendsInfo returns information about all running backends as a slice func (s *Scheduler) GetRunningBackendsInfo(ctx context.Context) []BackendStatus { return s.getLoaderStatus(ctx)