Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions cmd/cli/commands/uninstall-runner.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,21 @@ import (
type cleanupOptions struct {
models bool
removeImages bool
backend string
}

// runUninstallOrStop is shared logic for uninstall-runner and stop-runner commands
func runUninstallOrStop(cmd *cobra.Command, opts cleanupOptions) error {
// Deferred backend uninstall is handled via the running model runner API
// and works in any context (Desktop, Moby, etc.), so handle it first.
if opts.backend != "" {
if err := desktopClient.UninstallBackend(opts.backend); err != nil {
return fmt.Errorf("failed to uninstall %s backend: %w", opts.backend, err)
}
cmd.Printf("Uninstalled %s backend\n", opts.backend)
return nil
}

// Ensure that we're running in a supported model runner context.
if kind := modelRunner.EngineKind(); kind == types.ModelRunnerEngineKindDesktop {
if desktop.IsDesktopWSLContext(cmd.Context(), dockerCLI) {
Expand Down Expand Up @@ -62,18 +73,21 @@ func runUninstallOrStop(cmd *cobra.Command, opts cleanupOptions) error {

func newUninstallRunner() *cobra.Command {
var models, images bool
var backend string
c := &cobra.Command{
Use: "uninstall-runner",
Short: "Uninstall Docker Model Runner (Docker Engine only)",
RunE: func(cmd *cobra.Command, args []string) error {
return runUninstallOrStop(cmd, cleanupOptions{
models: models,
removeImages: images,
backend: backend,
})
},
ValidArgsFunction: completion.NoComplete,
}
c.Flags().BoolVar(&models, "models", false, "Remove model storage volume")
c.Flags().BoolVar(&images, "images", false, "Remove "+standalone.ControllerImage+" images")
c.Flags().StringVar(&backend, "backend", "", "Uninstall a deferred backend (e.g. vllm, diffusers)")
return c
}
24 changes: 24 additions & 0 deletions cmd/cli/desktop/desktop.go
Original file line number Diff line number Diff line change
Expand Up @@ -895,6 +895,30 @@ func (c *Client) InstallBackend(backend string) error {
return nil
}

// UninstallBackend removes a backend's local installation via the model runner API.
func (c *Client) UninstallBackend(backend string) error {
uninstallPath := inference.InferencePrefix + "/uninstall-backend"
jsonData, err := json.Marshal(struct {
Backend string `json:"backend"`
}{Backend: backend})
if err != nil {
return fmt.Errorf("error marshaling request: %w", err)
}

resp, err := c.doRequest(http.MethodPost, uninstallPath, bytes.NewReader(jsonData))
if err != nil {
return c.handleQueryError(err, uninstallPath)
}
defer resp.Body.Close()

if resp.StatusCode != http.StatusOK {
body, _ := io.ReadAll(resp.Body)
return fmt.Errorf("uninstall backend failed with status %s: %s", resp.Status, string(body))
}

return nil
}

func (c *Client) ConfigureBackend(request scheduling.ConfigureRequest) error {
configureBackendPath := inference.InferencePrefix + "/_configure"
jsonData, err := json.Marshal(request)
Expand Down
9 changes: 9 additions & 0 deletions cmd/cli/docs/reference/docker_model_uninstall-runner.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,15 @@ usage: docker model uninstall-runner
pname: docker model
plink: docker_model.yaml
options:
- option: backend
value_type: string
description: Uninstall a deferred backend (e.g. vllm, diffusers)
deprecated: false
hidden: false
experimental: false
experimentalcli: false
kubernetes: false
swarm: false
- option: images
value_type: bool
default_value: "false"
Expand Down
9 changes: 5 additions & 4 deletions cmd/cli/docs/reference/model_uninstall-runner.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,11 @@ Uninstall Docker Model Runner (Docker Engine only)

### Options

| Name | Type | Default | Description |
|:-----------|:-------|:--------|:----------------------------------|
| `--images` | `bool` | | Remove docker/model-runner images |
| `--models` | `bool` | | Remove model storage volume |
| Name | Type | Default | Description |
|:------------|:---------|:--------|:----------------------------------------------------|
| `--backend` | `string` | | Uninstall a deferred backend (e.g. vllm, diffusers) |
| `--images` | `bool` | | Remove docker/model-runner images |
| `--models` | `bool` | | Remove model storage volume |


<!---MARKER_GEN_END-->
Expand Down
4 changes: 4 additions & 0 deletions pkg/inference/backend.go
Original file line number Diff line number Diff line change
Expand Up @@ -314,6 +314,10 @@ type Backend interface {
// instead load only the specified model. Backends should still respond to
// OpenAI API requests for other models with a 421 error code.
Run(ctx context.Context, socket, model string, modelRef string, mode BackendMode, config *BackendConfiguration) error
// Uninstall removes backend-specific local installations (e.g. files
// downloaded to ~/.docker/model-runner/). Backends with nothing to clean
// up should return nil.
Uninstall() error
// Status returns a description of the backend's state.
Status() string
// GetDiskUsage returns the disk usage of the backend.
Expand Down
10 changes: 10 additions & 0 deletions pkg/inference/backends/diffusers/diffusers.go
Original file line number Diff line number Diff line change
Expand Up @@ -264,6 +264,16 @@ func (d *diffusers) Run(ctx context.Context, socket, model string, modelRef stri
}

// Status implements inference.Backend.Status.
// Uninstall implements inference.Backend.Uninstall.
func (d *diffusers) Uninstall() error {
if err := os.RemoveAll(d.installDir); err != nil {
return fmt.Errorf("failed to remove diffusers install directory: %w", err)
}
d.pythonPath = ""
d.status = inference.FormatNotInstalled("")
return nil
}

func (d *diffusers) Status() string {
return d.status
}
Expand Down
5 changes: 5 additions & 0 deletions pkg/inference/backends/llamacpp/llamacpp.go
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,11 @@ func (l *llamaCpp) Run(ctx context.Context, socket, model string, _ string, mode
})
}

// Uninstall implements inference.Backend.Uninstall.
func (l *llamaCpp) Uninstall() error {
return nil
}

func (l *llamaCpp) Status() string {
return l.status
}
Expand Down
5 changes: 5 additions & 0 deletions pkg/inference/backends/mlx/mlx.go
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,11 @@ func (m *mlx) Run(ctx context.Context, socket, model string, modelRef string, mo
})
}

// Uninstall implements inference.Backend.Uninstall.
func (m *mlx) Uninstall() error {
return nil
}

func (m *mlx) Status() string {
return m.status
}
Expand Down
5 changes: 5 additions & 0 deletions pkg/inference/backends/sglang/sglang.go
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,11 @@ func (s *sglang) Run(ctx context.Context, socket, model string, modelRef string,
})
}

// Uninstall implements inference.Backend.Uninstall.
func (s *sglang) Uninstall() error {
return nil
}

func (s *sglang) Status() string {
return s.status
}
Expand Down
5 changes: 5 additions & 0 deletions pkg/inference/backends/vllm/vllm.go
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,11 @@ func (v *vLLM) Run(ctx context.Context, socket, model string, modelRef string, m
})
}

// Uninstall implements inference.Backend.Uninstall.
func (v *vLLM) Uninstall() error {
return nil
}

func (v *vLLM) Status() string {
return v.status
}
Expand Down
10 changes: 10 additions & 0 deletions pkg/inference/backends/vllm/vllm_metal.go
Original file line number Diff line number Diff line change
Expand Up @@ -277,6 +277,16 @@ func (v *vllmMetal) buildArgs(bundle interface{ SafetensorsPath() string }, sock
return args, nil
}

// Uninstall implements inference.Backend.Uninstall.
func (v *vllmMetal) Uninstall() error {
if err := os.RemoveAll(v.installDir); err != nil {
return fmt.Errorf("failed to remove vllm-metal install directory: %w", err)
}
v.pythonPath = ""
v.status = inference.FormatNotInstalled("")
return nil
}

// Status implements inference.Backend.Status.
func (v *vllmMetal) Status() string {
return v.status
Expand Down
38 changes: 38 additions & 0 deletions pkg/inference/scheduling/http_handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ func (h *HTTPHandler) routeHandlers() map[string]http.HandlerFunc {
m["GET "+inference.InferencePrefix+"/v1/models/{name...}"] = h.handleModels

m["POST "+inference.InferencePrefix+"/install-backend"] = h.InstallBackend
m["POST "+inference.InferencePrefix+"/uninstall-backend"] = h.UninstallBackend
m["GET "+inference.InferencePrefix+"/status"] = h.GetBackendStatus
m["GET "+inference.InferencePrefix+"/ps"] = h.GetRunningBackends
m["GET "+inference.InferencePrefix+"/df"] = h.GetDiskUsage
Expand Down Expand Up @@ -400,6 +401,43 @@ func (h *HTTPHandler) InstallBackend(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusOK)
}

// uninstallBackendRequest is the JSON body for the uninstall-backend endpoint.
type uninstallBackendRequest struct {
Backend string `json:"backend"`
}

// UninstallBackend handles POST <inference-prefix>/uninstall-backend requests.
// It removes a backend's local installation.
func (h *HTTPHandler) UninstallBackend(w http.ResponseWriter, r *http.Request) {
body, err := io.ReadAll(http.MaxBytesReader(w, r.Body, maximumOpenAIInferenceRequestSize))
if err != nil {
var maxBytesError *http.MaxBytesError
if errors.As(err, &maxBytesError) {
http.Error(w, "request too large", http.StatusBadRequest)
} else {
http.Error(w, "failed to read request body", http.StatusInternalServerError)
}
return
}

var req uninstallBackendRequest
if err := json.Unmarshal(body, &req); err != nil || req.Backend == "" {
http.Error(w, "invalid request: backend is required", http.StatusBadRequest)
return
}

if err := h.scheduler.UninstallBackend(r.Context(), req.Backend); err != nil {
if errors.Is(err, ErrBackendNotFound) {
http.Error(w, err.Error(), http.StatusNotFound)
} else {
http.Error(w, fmt.Sprintf("backend uninstall failed: %v", err), http.StatusInternalServerError)
}
return
}

w.WriteHeader(http.StatusOK)
}

// Configure handles POST <inference-prefix>/{backend}/_configure requests.
func (h *HTTPHandler) Configure(w http.ResponseWriter, r *http.Request) {
// Determine the requested backend and ensure that it's valid.
Expand Down
29 changes: 29 additions & 0 deletions pkg/inference/scheduling/installer.go
Original file line number Diff line number Diff line change
Expand Up @@ -257,6 +257,35 @@ func (i *installer) installBackend(ctx context.Context, name string) error {
return nil
}

// uninstallBackend removes a backend's local installation and resets its
// install status so that a subsequent wait() will trigger re-installation.
func (i *installer) uninstallBackend(_ context.Context, name string) error {
i.installMu.Lock()
defer i.installMu.Unlock()

backend, ok := i.backends[name]
if !ok {
return ErrBackendNotFound
}

if err := backend.Uninstall(); err != nil {
i.log.Warn("Backend uninstall failed", "backend", name, "error", err)
return err
}

i.log.Info("Backend uninstalled", "backend", name)

// Reset the install status so the backend can be re-installed later.
i.mu.Lock()
i.statuses[name] = &installStatus{
installed: make(chan struct{}),
failed: make(chan struct{}),
}
i.mu.Unlock()

return nil
}

// isInstalled returns true if the given backend has completed installation.
// It is non-blocking.
func (i *installer) isInstalled(name string) bool {
Expand Down
19 changes: 19 additions & 0 deletions pkg/inference/scheduling/loader.go
Original file line number Diff line number Diff line change
Expand Up @@ -283,6 +283,25 @@ func (l *loader) evictRunner(backend, model string, mode inference.BackendMode)
return len(l.runners)
}

// UnloadBackend unloads all runners for a specific backend.
// It returns the number of unloaded runners.
func (l *loader) UnloadBackend(ctx context.Context, backend string) int {
if !l.lock(ctx) {
return 0
}
defer l.unlock()

count := 0
for r, runnerInfo := range l.runners {
if r.backend == backend && l.references[runnerInfo.slot] == 0 {
l.log.Info("Evicting backend runner for uninstall", "backend", r.backend, "model", r.modelID, "modelRef", runnerInfo.modelRef, "mode", r.mode)
l.freeRunnerSlot(runnerInfo.slot, r)
count++
}
}
return count
}

// Unload unloads runners and returns the number of unloaded runners.
func (l *loader) Unload(ctx context.Context, unload UnloadRequest) int {
if !l.lock(ctx) {
Expand Down
4 changes: 4 additions & 0 deletions pkg/inference/scheduling/loader_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,10 @@ func (m *mockBackend) Run(ctx context.Context, socket, model string, modelRef st
return nil
}

func (m *mockBackend) Uninstall() error {
return nil
}

func (m *mockBackend) Status() string {
return "mock"
}
Expand Down
7 changes: 7 additions & 0 deletions pkg/inference/scheduling/scheduler.go
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,13 @@ func (s *Scheduler) InstallBackend(ctx context.Context, name string) error {
return s.installer.installBackend(ctx, name)
}

// UninstallBackend unloads all runners for the backend and then removes its
// local installation.
func (s *Scheduler) UninstallBackend(ctx context.Context, name string) error {
s.loader.UnloadBackend(ctx, name)
return s.installer.uninstallBackend(ctx, name)
}

// GetRunningBackendsInfo returns information about all running backends as a slice
func (s *Scheduler) GetRunningBackendsInfo(ctx context.Context) []BackendStatus {
return s.getLoaderStatus(ctx)
Expand Down
Loading