From 47ff7ef340ad4baff2c89ed983c891514cd171d2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20G=C3=BCttler?= Date: Wed, 8 Apr 2026 15:34:37 +0200 Subject: [PATCH 1/2] :seedling: Create Host Yaml --- .gitignore | 2 + README.md | 63 +- internal/cmd/check_bm_servers.go | 15 +- internal/cmd/createhosttemplate.go | 76 +++ internal/cmd/root.go | 1 + .../createhosttemplate/createhosttemplate.go | 627 ++++++++++++++++++ .../createhosttemplate_test.go | 84 +++ internal/tools/readmegen/main.go | 12 + 8 files changed, 865 insertions(+), 15 deletions(-) create mode 100644 internal/cmd/createhosttemplate.go create mode 100644 internal/createhosttemplate/createhosttemplate.go create mode 100644 internal/createhosttemplate/createhosttemplate_test.go diff --git a/.gitignore b/.gitignore index 90b4886..49dddba 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,4 @@ +.envrc .vscode/ caphcli +/*.yaml diff --git a/README.md b/README.md index 1572af4..3ea99ce 100644 --- a/README.md +++ b/README.md @@ -12,6 +12,28 @@ Depending on the command, these environment variables are needed. - One of `HETZNER_SSH_PUB_PATH` or `HETZNER_SSH_PUB` for the SSH public key. - One of `HETZNER_SSH_PRIV_PATH` or `HETZNER_SSH_PRIV` for the SSH private key. +## Common Usage + +If you have Go installed, the easiest way is to run the code like this: + +```console +go run github.com/syself/caphcli@latest -h +``` + +If you have new Hetzner Baremetal (Robot) Server, then create a HetznerBareMetalHost YAML file: + +```console +go run github.com/syself/caphcli@latest create-host-template 1234567 1234567.yaml +``` + +This will create a HetznerBareMetalHost YAML file: `1234567.yaml` + +After that you can check if the rescue system is reachable reliably: + +```console +go run github.com/syself/caphcli@latest check-bm-servers 1234567.yaml +``` + ## CLI Help @@ -25,9 +47,10 @@ Usage: caphcli [command] Available Commands: - check-bm-servers Validate rescue and provisioning reliability for one bare-metal server - completion Generate the autocompletion script for the specified shell - help Help about any command + check-bm-servers Validate rescue and provisioning reliability for one bare-metal server + completion Generate the autocompletion script for the specified shell + create-host-template Generate a HetznerBareMetalHost template for one Robot server + help Help about any command Flags: -h, --help help for caphcli @@ -45,15 +68,14 @@ HetznerBareMetalHost objects and then talks directly to Hetzner Robot plus the target server. Usage: - caphcli check-bm-servers [flags] + caphcli check-bm-servers FILE [flags] Examples: caphcli check-bm-servers \ - --file test/e2e/data/infrastructure-hetzner/v1beta1/bases/hetznerbaremetalhosts.yaml \ + test/e2e/data/infrastructure-hetzner/v1beta1/bases/hetznerbaremetalhosts.yaml \ --name bm-e2e-1731561 Flags: - --file string Path to a local YAML file containing HetznerBareMetalHost objects (required) --force Skip the destructive-action confirmation prompt -h, --help help for check-bm-servers --image-path string Installimage IMAGE path for operating system inside the Hetzner rescue system (default "/root/.oldroot/nfs/images/Ubuntu-2404-noble-amd64-base.tar.gz") @@ -71,4 +93,33 @@ Flags: --timeout-wait-rescue duration Timeout for waiting until rescue SSH is reachable (default 6m0s) ``` +### `caphcli create-host-template --help` + +```text +Generate a HetznerBareMetalHost YAML template for one Hetzner Robot server. + +The command talks directly to Hetzner Robot, ensures rescue SSH access, reboots +the target server into rescue once, inspects the available disks, and writes a +template YAML to the requested output file. Progress and confirmation prompts go to stderr. + +Usage: + caphcli create-host-template SERVER_ID OUTPUT_FILE [flags] + +Examples: + caphcli create-host-template 1751550 host.yaml + caphcli create-host-template --force --name bm-e2e-1751550 1751550 host.yaml + +Flags: + --force Skip the reboot confirmation prompt + -h, --help help for create-host-template + --name string metadata.name for the generated HetznerBareMetalHost (default: bm-SERVER_ID) + --poll-interval duration Polling interval while waiting for rescue SSH (default 10s) + --timeout-activate-rescue duration Timeout for activating rescue boot (default 45s) + --timeout-ensure-ssh-key duration Timeout for ensuring SSH key in Robot (default 1m0s) + --timeout-fetch-server duration Timeout for fetching server details from Robot (default 30s) + --timeout-load-input duration Timeout for env loading + initial validation (default 30s) + --timeout-reboot-rescue duration Timeout for requesting reboot to rescue (default 45s) + --timeout-wait-rescue duration Timeout for waiting until rescue SSH is reachable (default 6m0s) +``` + diff --git a/internal/cmd/check_bm_servers.go b/internal/cmd/check_bm_servers.go index 4ea3df6..8da4b12 100644 --- a/internal/cmd/check_bm_servers.go +++ b/internal/cmd/check_bm_servers.go @@ -2,7 +2,6 @@ package cmd import ( "context" - "errors" "fmt" "os" @@ -17,7 +16,7 @@ func newCheckBMServersCommand() *cobra.Command { cfg.Output = os.Stdout cmd := &cobra.Command{ - Use: "check-bm-servers", + Use: "check-bm-servers FILE", Short: "Validate rescue and provisioning reliability for one bare-metal server", Long: `Validate rescue and provisioning reliability for one HetznerBareMetalHost from a local YAML file. @@ -25,15 +24,14 @@ The command does not talk to Kubernetes. It reads one local YAML file containing HetznerBareMetalHost objects and then talks directly to Hetzner Robot plus the target server.`, Example: ` caphcli check-bm-servers \ - --file test/e2e/data/infrastructure-hetzner/v1beta1/bases/hetznerbaremetalhosts.yaml \ + test/e2e/data/infrastructure-hetzner/v1beta1/bases/hetznerbaremetalhosts.yaml \ --name bm-e2e-1731561`, - RunE: func(_ *cobra.Command, _ []string) error { - if cfg.HbmhYAMLFile == "" { - return errors.New("--file is required") - } + Args: cobra.ExactArgs(1), + RunE: func(_ *cobra.Command, args []string) error { + cfg.HbmhYAMLFile = args[0] if _, err := os.Stat(cfg.HbmhYAMLFile); err != nil { - return fmt.Errorf("check --file: %w", err) + return fmt.Errorf("check FILE: %w", err) } if err := provisioncheck.Run(context.Background(), cfg); err != nil { @@ -45,7 +43,6 @@ target server.`, } flags := cmd.Flags() - flags.StringVar(&cfg.HbmhYAMLFile, "file", "", "Path to a local YAML file containing HetznerBareMetalHost objects (required)") flags.StringVar(&cfg.Name, "name", "", "HetznerBareMetalHost metadata.name. Optional if YAML contains exactly one host") flags.StringVar(&cfg.ImagePath, "image-path", provisioncheck.DefaultUbuntu2404ImagePath, "Installimage IMAGE path for operating system inside the Hetzner rescue system") flags.BoolVar(&cfg.Force, "force", false, "Skip the destructive-action confirmation prompt") diff --git a/internal/cmd/createhosttemplate.go b/internal/cmd/createhosttemplate.go new file mode 100644 index 0000000..a4a661d --- /dev/null +++ b/internal/cmd/createhosttemplate.go @@ -0,0 +1,76 @@ +package cmd + +import ( + "context" + "fmt" + "os" + "strconv" + + "github.com/spf13/cobra" + + "github.com/syself/caphcli/internal/createhosttemplate" + "github.com/syself/caphcli/internal/provisioncheck" +) + +func newCreateHostTemplateCommand() *cobra.Command { + cfg := createhosttemplate.DefaultConfig() + cfg.Input = os.Stdin + cfg.LogOutput = os.Stderr + + cmd := &cobra.Command{ + Use: "create-host-template SERVER_ID OUTPUT_FILE", + Short: "Generate a HetznerBareMetalHost template for one Robot server", + Long: `Generate a HetznerBareMetalHost YAML template for one Hetzner Robot server. + +The command talks directly to Hetzner Robot, ensures rescue SSH access, reboots +the target server into rescue once, inspects the available disks, and writes a +template YAML to the requested output file. Progress and confirmation prompts go to stderr.`, + Example: ` caphcli create-host-template 1751550 host.yaml + caphcli create-host-template --force --name bm-e2e-1751550 1751550 host.yaml`, + Args: cobra.ExactArgs(2), + RunE: func(_ *cobra.Command, args []string) error { + serverID, err := strconv.Atoi(args[0]) + if err != nil { + return fmt.Errorf("parse SERVER_ID %q: %w", args[0], err) + } + cfg.ServerID = serverID + outputFile := args[1] + + f, err := os.Create(outputFile) + if err != nil { + return fmt.Errorf("create output file %q: %w", outputFile, err) + } + defer func() { + if f != nil { + _ = f.Close() + } + }() + cfg.Output = f + + if err := createhosttemplate.Run(context.Background(), cfg); err != nil { + return fmt.Errorf("caphcli create-host-template failed for server %d: %w", cfg.ServerID, err) + } + + if err := f.Close(); err != nil { + return fmt.Errorf("close output file %q: %w", outputFile, err) + } + f = nil + _, _ = fmt.Fprintf(cfg.LogOutput, "✓ created %s\n", outputFile) + + return nil + }, + } + + flags := cmd.Flags() + flags.BoolVar(&cfg.Force, "force", false, "Skip the reboot confirmation prompt") + flags.StringVar(&cfg.Name, "name", "", "metadata.name for the generated HetznerBareMetalHost (default: bm-SERVER_ID)") + flags.DurationVar(&cfg.PollInterval, "poll-interval", provisioncheck.DefaultPollInterval, "Polling interval while waiting for rescue SSH") + flags.DurationVar(&cfg.Timeouts.LoadInput, "timeout-load-input", provisioncheck.DefaultLoadInputTimeout, "Timeout for env loading + initial validation") + flags.DurationVar(&cfg.Timeouts.EnsureSSHKey, "timeout-ensure-ssh-key", provisioncheck.DefaultEnsureSSHKeyTimeout, "Timeout for ensuring SSH key in Robot") + flags.DurationVar(&cfg.Timeouts.FetchServerDetails, "timeout-fetch-server", provisioncheck.DefaultFetchServerDetailsTimeout, "Timeout for fetching server details from Robot") + flags.DurationVar(&cfg.Timeouts.ActivateRescue, "timeout-activate-rescue", provisioncheck.DefaultActivateRescueTimeout, "Timeout for activating rescue boot") + flags.DurationVar(&cfg.Timeouts.RebootToRescue, "timeout-reboot-rescue", provisioncheck.DefaultRebootToRescueTimeout, "Timeout for requesting reboot to rescue") + flags.DurationVar(&cfg.Timeouts.WaitForRescue, "timeout-wait-rescue", provisioncheck.DefaultWaitForRescueTimeout, "Timeout for waiting until rescue SSH is reachable") + + return cmd +} diff --git a/internal/cmd/root.go b/internal/cmd/root.go index a0e57d0..b77eb94 100644 --- a/internal/cmd/root.go +++ b/internal/cmd/root.go @@ -21,6 +21,7 @@ func NewRootCommand() *cobra.Command { } rootCmd.AddCommand(newCheckBMServersCommand()) + rootCmd.AddCommand(newCreateHostTemplateCommand()) return rootCmd } diff --git a/internal/createhosttemplate/createhosttemplate.go b/internal/createhosttemplate/createhosttemplate.go new file mode 100644 index 0000000..c912267 --- /dev/null +++ b/internal/createhosttemplate/createhosttemplate.go @@ -0,0 +1,627 @@ +package createhosttemplate + +import ( + "bufio" + "context" + "encoding/base64" + "encoding/json" + "errors" + "fmt" + "io" + "os" + "sort" + "strconv" + "strings" + "time" + + "github.com/syself/hrobot-go/models" + + infrav1 "github.com/syself/cluster-api-provider-hetzner/api/v1beta1" + robotclient "github.com/syself/cluster-api-provider-hetzner/pkg/services/baremetal/client/robot" + sshclient "github.com/syself/cluster-api-provider-hetzner/pkg/services/baremetal/client/ssh" + + "github.com/syself/caphcli/internal/provisioncheck" +) + +const ( + minDiskSizeBytes int64 = 1_000_000_000 + rescueHostName = "rescue" + sshPort = 22 +) + +type Timeouts struct { + LoadInput time.Duration + EnsureSSHKey time.Duration + FetchServerDetails time.Duration + ActivateRescue time.Duration + RebootToRescue time.Duration + WaitForRescue time.Duration +} + +type Config struct { + ServerID int + Name string + Force bool + PollInterval time.Duration + Timeouts Timeouts + Input io.Reader + Output io.Writer + LogOutput io.Writer +} + +type runner struct { + cfg Config + sshFactory sshclient.Factory + robotClient robotclient.Client + creds envCredentials + fingerprint string + server *models.Server +} + +type envCredentials struct { + robotUser string + robotPass string + sshKeyName string + sshPub string + sshPriv string +} + +type disk struct { + Name string + WWN string + SizeBytes int64 +} + +type storageDetails struct { + Name string `json:"name,omitempty"` + Type string `json:"type,omitempty"` + Size string `json:"size,omitempty"` + WWN string `json:"wwn,omitempty"` +} + +func DefaultConfig() Config { + return Config{ + PollInterval: provisioncheck.DefaultPollInterval, + Timeouts: Timeouts{ + LoadInput: provisioncheck.DefaultLoadInputTimeout, + EnsureSSHKey: provisioncheck.DefaultEnsureSSHKeyTimeout, + FetchServerDetails: provisioncheck.DefaultFetchServerDetailsTimeout, + ActivateRescue: provisioncheck.DefaultActivateRescueTimeout, + RebootToRescue: provisioncheck.DefaultRebootToRescueTimeout, + WaitForRescue: provisioncheck.DefaultWaitForRescueTimeout, + }, + Input: os.Stdin, + Output: os.Stdout, + LogOutput: os.Stderr, + } +} + +func (cfg Config) withDefaults() Config { + defaults := DefaultConfig() + + if cfg.PollInterval == 0 { + cfg.PollInterval = defaults.PollInterval + } + if cfg.Input == nil { + cfg.Input = defaults.Input + } + if cfg.Output == nil { + cfg.Output = defaults.Output + } + if cfg.LogOutput == nil { + cfg.LogOutput = defaults.LogOutput + } + if cfg.Timeouts.LoadInput == 0 { + cfg.Timeouts.LoadInput = defaults.Timeouts.LoadInput + } + if cfg.Timeouts.EnsureSSHKey == 0 { + cfg.Timeouts.EnsureSSHKey = defaults.Timeouts.EnsureSSHKey + } + if cfg.Timeouts.FetchServerDetails == 0 { + cfg.Timeouts.FetchServerDetails = defaults.Timeouts.FetchServerDetails + } + if cfg.Timeouts.ActivateRescue == 0 { + cfg.Timeouts.ActivateRescue = defaults.Timeouts.ActivateRescue + } + if cfg.Timeouts.RebootToRescue == 0 { + cfg.Timeouts.RebootToRescue = defaults.Timeouts.RebootToRescue + } + if cfg.Timeouts.WaitForRescue == 0 { + cfg.Timeouts.WaitForRescue = defaults.Timeouts.WaitForRescue + } + + return cfg +} + +func (cfg Config) Validate() error { + if cfg.ServerID <= 0 { + return fmt.Errorf("server id must be > 0, got %d", cfg.ServerID) + } + if cfg.Input == nil { + return errors.New("config Input must not be nil") + } + if cfg.Output == nil { + return errors.New("config Output must not be nil") + } + if cfg.LogOutput == nil { + return errors.New("config LogOutput must not be nil") + } + if cfg.PollInterval <= 0 { + return fmt.Errorf("--poll-interval must be > 0, got %s", cfg.PollInterval) + } + if err := validateTimeout("--timeout-load-input", cfg.Timeouts.LoadInput); err != nil { + return err + } + if err := validateTimeout("--timeout-ensure-ssh-key", cfg.Timeouts.EnsureSSHKey); err != nil { + return err + } + if err := validateTimeout("--timeout-fetch-server", cfg.Timeouts.FetchServerDetails); err != nil { + return err + } + if err := validateTimeout("--timeout-activate-rescue", cfg.Timeouts.ActivateRescue); err != nil { + return err + } + if err := validateTimeout("--timeout-reboot-rescue", cfg.Timeouts.RebootToRescue); err != nil { + return err + } + if err := validateTimeout("--timeout-wait-rescue", cfg.Timeouts.WaitForRescue); err != nil { + return err + } + + return nil +} + +func Run(ctx context.Context, cfg Config) error { + cfg = cfg.withDefaults() + if err := cfg.Validate(); err != nil { + return err + } + + r := &runner{ + cfg: cfg, + sshFactory: sshclient.NewFactory(), + } + + if err := runWithTimeout(ctx, cfg.Timeouts.LoadInput, func(context.Context) error { + creds, err := loadEnvCredentials() + if err != nil { + return err + } + r.creds = creds + return nil + }); err != nil { + return err + } + + r.robotClient = robotclient.NewFactory().NewClient(robotclient.Credentials{ + Username: r.creds.robotUser, + Password: r.creds.robotPass, + }) + + if err := r.ensureSSHKey(ctx); err != nil { + return err + } + if err := r.fetchServerDetails(ctx); err != nil { + return err + } + if err := r.confirmRescueReboot(); err != nil { + return err + } + if err := r.activateRescue(ctx); err != nil { + return err + } + if err := r.rebootToRescue(ctx); err != nil { + return err + } + + ssh, err := r.waitForRescue(ctx) + if err != nil { + return err + } + + disks, err := disksFromStorageOutput(ssh.GetHardwareDetailsStorage()) + if err != nil { + return err + } + + template := renderTemplate(r.server, effectiveName(cfg.Name, cfg.ServerID), disks) + if _, err := io.WriteString(cfg.Output, template); err != nil { + return fmt.Errorf("write template: %w", err) + } + + return nil +} + +func (r *runner) ensureSSHKey(ctx context.Context) error { + return runWithTimeout(ctx, r.cfg.Timeouts.EnsureSSHKey, func(context.Context) error { + r.logf("ensuring Robot SSH key %q", r.creds.sshKeyName) + + fingerprint, err := ensureRobotSSHKey(r.robotClient, r.creds.sshKeyName, r.creds.sshPub) + if err != nil { + return err + } + + r.fingerprint = fingerprint + r.logf("using Robot SSH key fingerprint %q", r.fingerprint) + return nil + }) +} + +func (r *runner) fetchServerDetails(ctx context.Context) error { + return runWithTimeout(ctx, r.cfg.Timeouts.FetchServerDetails, func(context.Context) error { + r.logf("fetching Robot server %d", r.cfg.ServerID) + + server, err := r.robotClient.GetBMServer(r.cfg.ServerID) + if err != nil { + return fmt.Errorf("get robot server %d: %w", r.cfg.ServerID, err) + } + if server.ServerIP == "" { + return fmt.Errorf("server %d has empty server_ip in Robot API", r.cfg.ServerID) + } + + r.server = server + r.logf("server %d name=%q ip=%s", r.cfg.ServerID, server.Name, server.ServerIP) + return nil + }) +} + +func (r *runner) confirmRescueReboot() error { + if r.cfg.Force { + r.logf("confirmation skipped because --force was provided") + return nil + } + + _, err := fmt.Fprintf( + r.cfg.LogOutput, + "WARNING: this will reboot server %d (%q, %s) into rescue to inspect its disks.\nType \"yes\" to continue: ", + r.cfg.ServerID, + r.server.Name, + r.server.ServerIP, + ) + if err != nil { + return fmt.Errorf("write confirmation prompt: %w", err) + } + + reader := bufio.NewReader(r.cfg.Input) + confirmation, err := reader.ReadString('\n') + if err != nil { + return fmt.Errorf("read confirmation: %w", err) + } + + confirmation = strings.TrimSpace(confirmation) + if confirmation != "yes" { + return fmt.Errorf("confirmation failed: expected %q, got %q", "yes", confirmation) + } + + r.logf("reboot confirmed for server %d", r.cfg.ServerID) + return nil +} + +func (r *runner) activateRescue(ctx context.Context) error { + return runWithTimeout(ctx, r.cfg.Timeouts.ActivateRescue, func(context.Context) error { + r.logf("activating rescue boot") + + _, deleteErr := r.robotClient.DeleteBootRescue(r.cfg.ServerID) + if deleteErr != nil && !models.IsError(deleteErr, models.ErrorCodeNotFound) { + return fmt.Errorf("delete boot rescue: %w", deleteErr) + } + if _, err := r.robotClient.SetBootRescue(r.cfg.ServerID, r.fingerprint); err != nil { + return fmt.Errorf("set boot rescue: %w", err) + } + + r.logf("rescue boot activated") + return nil + }) +} + +func (r *runner) rebootToRescue(ctx context.Context) error { + return runWithTimeout(ctx, r.cfg.Timeouts.RebootToRescue, func(context.Context) error { + r.logf("requesting hardware reboot into rescue") + + if _, err := r.robotClient.RebootBMServer(r.cfg.ServerID, infrav1.RebootTypeHardware); err != nil { + return fmt.Errorf("robot reboot hw: %w", err) + } + + return nil + }) +} + +func (r *runner) waitForRescue(ctx context.Context) (sshclient.Client, error) { + var ssh sshclient.Client + err := runWithTimeout(ctx, r.cfg.Timeouts.WaitForRescue, func(stepCtx context.Context) error { + ssh = r.sshFactory.NewClient(sshclient.Input{ + IP: r.server.ServerIP, + Port: sshPort, + PrivateKey: r.creds.sshPriv, + }) + + r.logf("waiting for rescue SSH on %s", r.server.ServerIP) + return waitUntil(stepCtx, r.cfg.PollInterval, func(format string, args ...any) { + r.logf(format, args...) + }, func() (bool, string, error) { + out := ssh.GetHostName() + if out.Err == nil { + hostName := strings.TrimSpace(out.StdOut) + if hostName == rescueHostName { + return true, fmt.Sprintf("rescue reachable (hostname=%q)", hostName), nil + } + if hostName == "" { + return false, "connected but empty hostname", nil + } + return false, fmt.Sprintf("host reachable but hostname=%q (want=%q)", hostName, rescueHostName), nil + } + return false, fmt.Sprintf("waiting for rescue ssh: %v", out.Err), nil + }) + }) + if err != nil { + return nil, err + } + + return ssh, nil +} + +func (r *runner) logf(format string, args ...any) { + _, _ = fmt.Fprintf(r.cfg.LogOutput, "%s\n", fmt.Sprintf(format, args...)) +} + +func runWithTimeout(ctx context.Context, timeout time.Duration, fn func(context.Context) error) error { + stepCtx, cancel := context.WithTimeout(ctx, timeout) + defer cancel() + + if err := fn(stepCtx); err != nil { + if errors.Is(err, context.DeadlineExceeded) || errors.Is(stepCtx.Err(), context.DeadlineExceeded) { + return fmt.Errorf("timed out after %s: %w", timeout, err) + } + return err + } + + return nil +} + +func validateTimeout(flagName string, timeout time.Duration) error { + if timeout <= 0 { + return fmt.Errorf("%s must be > 0, got %s", flagName, timeout) + } + return nil +} + +func waitUntil(ctx context.Context, pollInterval time.Duration, progress func(format string, args ...any), check func() (done bool, message string, err error)) error { + ticker := time.NewTicker(pollInterval) + defer ticker.Stop() + + for { + done, message, err := check() + if err != nil { + return err + } + if message != "" { + progress("%s", message) + } + if done { + return nil + } + + select { + case <-ctx.Done(): + return ctx.Err() + case <-ticker.C: + } + } +} + +func ensureRobotSSHKey(cli robotclient.Client, keyName, publicKey string) (string, error) { + keys, err := cli.ListSSHKeys() + if err != nil { + return "", fmt.Errorf("list ssh keys: %w", err) + } + for _, key := range keys { + if key.Name == keyName { + return key.Fingerprint, nil + } + } + + created, err := cli.SetSSHKey(keyName, publicKey) + if err != nil { + return "", fmt.Errorf("create ssh key %q: %w", keyName, err) + } + return created.Fingerprint, nil +} + +func loadEnvCredentials() (envCredentials, error) { + user := strings.TrimSpace(os.Getenv("HETZNER_ROBOT_USER")) + pass := strings.TrimSpace(os.Getenv("HETZNER_ROBOT_PASSWORD")) + if user == "" || pass == "" { + return envCredentials{}, errors.New("HETZNER_ROBOT_USER and HETZNER_ROBOT_PASSWORD are required") + } + + keyName := strings.TrimSpace(os.Getenv("SSH_KEY_NAME")) + if keyName == "" { + return envCredentials{}, errors.New("SSH_KEY_NAME is required") + } + + sshPub, err := loadKeyMaterial("HETZNER_SSH_PUB_PATH", "HETZNER_SSH_PUB") + if err != nil { + return envCredentials{}, fmt.Errorf("load public key: %w", err) + } + sshPriv, err := loadKeyMaterial("HETZNER_SSH_PRIV_PATH", "HETZNER_SSH_PRIV") + if err != nil { + return envCredentials{}, fmt.Errorf("load private key: %w", err) + } + + return envCredentials{ + robotUser: user, + robotPass: pass, + sshKeyName: keyName, + sshPub: strings.TrimSpace(sshPub), + sshPriv: strings.TrimSpace(sshPriv), + }, nil +} + +func loadKeyMaterial(pathVar, base64Var string) (string, error) { + path := strings.TrimSpace(os.Getenv(pathVar)) + if path != "" { + data, err := os.ReadFile(path) // #nosec G304,G703 -- file path is intentionally provided via environment variable. + if err != nil { + return "", fmt.Errorf("read %s (%s): %w", pathVar, path, err) + } + if len(data) == 0 { + return "", fmt.Errorf("%s points to empty file: %s", pathVar, path) + } + return string(data), nil + } + + raw := strings.TrimSpace(os.Getenv(base64Var)) + if raw == "" { + return "", fmt.Errorf("set either %s or %s", pathVar, base64Var) + } + + decoded, err := base64.StdEncoding.DecodeString(raw) + if err == nil { + if len(decoded) == 0 { + return "", fmt.Errorf("%s decoded to empty value", base64Var) + } + return string(decoded), nil + } + + return raw, nil +} + +func disksFromStorageOutput(out sshclient.Output) ([]disk, error) { + if out.Err != nil { + return nil, fmt.Errorf("get hardware details storage: %w", out.Err) + } + if strings.TrimSpace(out.StdOut) == "" { + return nil, errors.New("storage output is empty") + } + + lines := strings.Split(strings.TrimSpace(out.StdOut), "\n") + disks := make([]disk, 0, len(lines)) + for _, line := range lines { + var diskInfo storageDetails + if err := json.Unmarshal([]byte(validJSONFromSSHOutput(line)), &diskInfo); err != nil { + return nil, fmt.Errorf("parse lsblk line %q: %w", line, err) + } + if diskInfo.Type != "disk" { + continue + } + + sizeBytes, err := strconv.ParseInt(strings.TrimSpace(diskInfo.Size), 10, 64) + if err != nil { + return nil, fmt.Errorf("parse disk size %q for %q: %w", diskInfo.Size, diskInfo.Name, err) + } + + wwn := strings.TrimSpace(diskInfo.WWN) + if wwn == "" { + continue + } + + disks = append(disks, disk{ + Name: strings.TrimSpace(diskInfo.Name), + WWN: wwn, + SizeBytes: sizeBytes, + }) + } + + sort.Slice(disks, func(i, j int) bool { + if disks[i].SizeBytes != disks[j].SizeBytes { + return disks[i].SizeBytes < disks[j].SizeBytes + } + return normalizeWWN(disks[i].WWN) < normalizeWWN(disks[j].WWN) + }) + + if len(disks) == 0 { + return nil, errors.New("no disk with WWN found") + } + if _, _, err := selectDisk(disks); err != nil { + return nil, err + } + + return disks, nil +} + +func selectDisk(disks []disk) (disk, int, error) { + for idx, disk := range disks { + if disk.SizeBytes > minDiskSizeBytes { + return disk, idx, nil + } + } + + return disk{}, -1, fmt.Errorf("no disk with WWN and size > %d bytes found", minDiskSizeBytes) +} + +func effectiveName(name string, serverID int) string { + name = strings.TrimSpace(name) + if name != "" { + return name + } + return fmt.Sprintf("bm-%d", serverID) +} + +func renderTemplate(server *models.Server, name string, disks []disk) string { + selected, selectedIndex, err := selectDisk(disks) + if err != nil { + panic(err) + } + + var b strings.Builder + fmt.Fprintf(&b, "# Generated from Hetzner Robot server %d.\n", server.ServerNumber) + b.WriteString("# Candidate WWNs are sorted by disk size, then WWN.\n") + fmt.Fprintf(&b, "# The selected WWN is the smallest disk above %d bytes.\n", minDiskSizeBytes) + b.WriteString("# Review the selected disk before applying this object.\n") + b.WriteString("apiVersion: infrastructure.cluster.x-k8s.io/v1beta1\n") + b.WriteString("kind: HetznerBareMetalHost\n") + b.WriteString("metadata:\n") + fmt.Fprintf(&b, " name: %q\n", name) + b.WriteString("spec:\n") + fmt.Fprintf(&b, " serverID: %d", server.ServerNumber) + if suffix := robotServerComment(server); suffix != "" { + fmt.Fprintf(&b, " # %s", suffix) + } + b.WriteString("\n") + b.WriteString(" rootDeviceHints:\n") + for idx, disk := range disks { + if idx == selectedIndex { + fmt.Fprintf(&b, " wwn: %q\n", selected.WWN) + continue + } + fmt.Fprintf(&b, " # wwn: %q\n", disk.WWN) + } + b.WriteString(" maintenanceMode: false\n") + fmt.Fprintf(&b, " description: %q\n", defaultDescription(server)) + return b.String() +} + +func robotServerComment(server *models.Server) string { + parts := make([]string, 0, 2) + if name := sanitizeComment(server.Name); name != "" { + parts = append(parts, fmt.Sprintf("Robot name: %s", name)) + } + if ip := sanitizeComment(server.ServerIP); ip != "" { + parts = append(parts, fmt.Sprintf("IP: %s", ip)) + } + return strings.Join(parts, ", ") +} + +func defaultDescription(server *models.Server) string { + if name := strings.TrimSpace(server.Name); name != "" { + return name + } + return fmt.Sprintf("Robot server %d", server.ServerNumber) +} + +func sanitizeComment(value string) string { + value = strings.ReplaceAll(value, "\n", " ") + value = strings.ReplaceAll(value, "\r", " ") + value = strings.ReplaceAll(value, "#", "") + return strings.TrimSpace(value) +} + +func validJSONFromSSHOutput(str string) string { + tempString1 := strings.ReplaceAll(str, `" `, `","`) + tempString2 := strings.ReplaceAll(tempString1, `="`, `":"`) + return fmt.Sprintf(`{"%s}`, strings.TrimSpace(tempString2)) +} + +func normalizeWWN(s string) string { + return strings.ToLower(strings.TrimSpace(s)) +} diff --git a/internal/createhosttemplate/createhosttemplate_test.go b/internal/createhosttemplate/createhosttemplate_test.go new file mode 100644 index 0000000..e7fda00 --- /dev/null +++ b/internal/createhosttemplate/createhosttemplate_test.go @@ -0,0 +1,84 @@ +package createhosttemplate + +import ( + "strings" + "testing" + + "github.com/syself/hrobot-go/models" + + sshclient "github.com/syself/cluster-api-provider-hetzner/pkg/services/baremetal/client/ssh" +) + +func TestDisksFromStorageOutput(t *testing.T) { + t.Parallel() + + out := sshclient.Output{ + StdOut: `NAME="loop0" TYPE="loop" SIZE="3068773888" WWN="" +NAME="sda" TYPE="disk" SIZE="1000000000" WWN="0x-too-small" +NAME="sdb" TYPE="disk" SIZE="2000000000" WWN="0x0002" +NAME="sdc" TYPE="disk" SIZE="2000000000" WWN="0x0001" +NAME="sdd" TYPE="disk" SIZE="4000000000" WWN="" +NAME="sde" TYPE="disk" SIZE="8000000000" WWN="0x0003"`, + } + + disks, err := disksFromStorageOutput(out) + if err != nil { + t.Fatalf("disksFromStorageOutput() error = %v", err) + } + + if len(disks) != 4 { + t.Fatalf("disksFromStorageOutput() len = %d, want 4", len(disks)) + } + + if disks[0].WWN != "0x-too-small" || disks[0].SizeBytes != 1000000000 { + t.Fatalf("first disk = %+v, want WWN 0x-too-small and size 1000000000", disks[0]) + } + if disks[1].WWN != "0x0001" || disks[1].SizeBytes != 2000000000 { + t.Fatalf("second disk = %+v, want WWN 0x0001 and size 2000000000", disks[1]) + } + if disks[2].WWN != "0x0002" || disks[2].SizeBytes != 2000000000 { + t.Fatalf("third disk = %+v, want WWN 0x0002 and size 2000000000", disks[2]) + } + if disks[3].WWN != "0x0003" || disks[3].SizeBytes != 8000000000 { + t.Fatalf("fourth disk = %+v, want WWN 0x0003 and size 8000000000", disks[3]) + } + + selected, selectedIndex, err := selectDisk(disks) + if err != nil { + t.Fatalf("selectDisk() error = %v", err) + } + if selectedIndex != 1 || selected.WWN != "0x0001" { + t.Fatalf("selectDisk() = (%+v, %d), want WWN 0x0001 at index 1", selected, selectedIndex) + } +} + +func TestRenderTemplate(t *testing.T) { + t.Parallel() + + server := &models.Server{ + ServerNumber: 1751550, + ServerIP: "144.76.74.13", + Name: "ci-box-1751550", + } + disks := []disk{ + {Name: "nvme1n1", WWN: "0x0001", SizeBytes: 2000000000}, + {Name: "nvme2n1", WWN: "0x0002", SizeBytes: 4000000000}, + } + + got := renderTemplate(server, effectiveName("", server.ServerNumber), disks) + + wantContains := []string{ + `name: "bm-1751550"`, + `serverID: 1751550 # Robot name: ci-box-1751550, IP: 144.76.74.13`, + `wwn: "0x0001"`, + `# wwn: "0x0002"`, + `maintenanceMode: false`, + `description: "ci-box-1751550"`, + } + + for _, want := range wantContains { + if !strings.Contains(got, want) { + t.Fatalf("renderTemplate() missing %q in output:\n%s", want, got) + } + } +} diff --git a/internal/tools/readmegen/main.go b/internal/tools/readmegen/main.go index 2d429a2..1687fe6 100644 --- a/internal/tools/readmegen/main.go +++ b/internal/tools/readmegen/main.go @@ -28,6 +28,12 @@ const generatedSectionTemplate = `## CLI Help ` + "```text" + ` {{CHECK_HELP}} ` + "```" + ` + +### ` + "`caphcli create-host-template --help`" + ` + +` + "```text" + ` +{{CREATE_HOST_TEMPLATE_HELP}} +` + "```" + ` ` func main() { @@ -41,8 +47,14 @@ func main() { fail(err) } + createHostTemplateHelp, err := renderHelp("create-host-template") + if err != nil { + fail(err) + } + generatedSection := strings.ReplaceAll(generatedSectionTemplate, "{{ROOT_HELP}}", strings.TrimSpace(rootHelp)) generatedSection = strings.ReplaceAll(generatedSection, "{{CHECK_HELP}}", strings.TrimSpace(checkHelp)) + generatedSection = strings.ReplaceAll(generatedSection, "{{CREATE_HOST_TEMPLATE_HELP}}", strings.TrimSpace(createHostTemplateHelp)) readme, err := os.ReadFile(readmePath) if err != nil { From e4aeb8da1dde480f0104107757137b95e7ce7a0f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20G=C3=BCttler?= Date: Wed, 8 Apr 2026 15:53:01 +0200 Subject: [PATCH 2/2] ... --- README.md | 24 +++++++++---------- ...reatehosttemplate.go => createhostyaml.go} | 22 ++++++++--------- internal/cmd/root.go | 2 +- .../createhostyaml.go} | 2 +- .../createhostyaml_test.go} | 2 +- internal/provisioncheck/provisioncheck.go | 1 + internal/tools/readmegen/main.go | 8 +++---- 7 files changed, 31 insertions(+), 30 deletions(-) rename internal/cmd/{createhosttemplate.go => createhostyaml.go} (75%) rename internal/{createhosttemplate/createhosttemplate.go => createhostyaml/createhostyaml.go} (99%) rename internal/{createhosttemplate/createhosttemplate_test.go => createhostyaml/createhostyaml_test.go} (98%) diff --git a/README.md b/README.md index 3ea99ce..e0b4c27 100644 --- a/README.md +++ b/README.md @@ -23,7 +23,7 @@ go run github.com/syself/caphcli@latest -h If you have new Hetzner Baremetal (Robot) Server, then create a HetznerBareMetalHost YAML file: ```console -go run github.com/syself/caphcli@latest create-host-template 1234567 1234567.yaml +go run github.com/syself/caphcli@latest create-host-yaml 1234567 1234567.yaml ``` This will create a HetznerBareMetalHost YAML file: `1234567.yaml` @@ -47,10 +47,10 @@ Usage: caphcli [command] Available Commands: - check-bm-servers Validate rescue and provisioning reliability for one bare-metal server - completion Generate the autocompletion script for the specified shell - create-host-template Generate a HetznerBareMetalHost template for one Robot server - help Help about any command + check-bm-servers Validate rescue and provisioning reliability for one bare-metal server + completion Generate the autocompletion script for the specified shell + create-host-yaml Generate a HetznerBareMetalHost YAML file for one Robot server + help Help about any command Flags: -h, --help help for caphcli @@ -93,25 +93,25 @@ Flags: --timeout-wait-rescue duration Timeout for waiting until rescue SSH is reachable (default 6m0s) ``` -### `caphcli create-host-template --help` +### `caphcli create-host-yaml --help` ```text -Generate a HetznerBareMetalHost YAML template for one Hetzner Robot server. +Generate a HetznerBareMetalHost YAML file for one Hetzner Robot server. The command talks directly to Hetzner Robot, ensures rescue SSH access, reboots the target server into rescue once, inspects the available disks, and writes a -template YAML to the requested output file. Progress and confirmation prompts go to stderr. +YAML file to the requested output path. Progress and confirmation prompts go to stderr. Usage: - caphcli create-host-template SERVER_ID OUTPUT_FILE [flags] + caphcli create-host-yaml SERVER_ID OUTPUT_FILE [flags] Examples: - caphcli create-host-template 1751550 host.yaml - caphcli create-host-template --force --name bm-e2e-1751550 1751550 host.yaml + caphcli create-host-yaml 1751550 host.yaml + caphcli create-host-yaml --force --name bm-e2e-1751550 1751550 host.yaml Flags: --force Skip the reboot confirmation prompt - -h, --help help for create-host-template + -h, --help help for create-host-yaml --name string metadata.name for the generated HetznerBareMetalHost (default: bm-SERVER_ID) --poll-interval duration Polling interval while waiting for rescue SSH (default 10s) --timeout-activate-rescue duration Timeout for activating rescue boot (default 45s) diff --git a/internal/cmd/createhosttemplate.go b/internal/cmd/createhostyaml.go similarity index 75% rename from internal/cmd/createhosttemplate.go rename to internal/cmd/createhostyaml.go index a4a661d..8c5c7fc 100644 --- a/internal/cmd/createhosttemplate.go +++ b/internal/cmd/createhostyaml.go @@ -8,25 +8,25 @@ import ( "github.com/spf13/cobra" - "github.com/syself/caphcli/internal/createhosttemplate" + "github.com/syself/caphcli/internal/createhostyaml" "github.com/syself/caphcli/internal/provisioncheck" ) -func newCreateHostTemplateCommand() *cobra.Command { - cfg := createhosttemplate.DefaultConfig() +func newCreateHostYAMLCommand() *cobra.Command { + cfg := createhostyaml.DefaultConfig() cfg.Input = os.Stdin cfg.LogOutput = os.Stderr cmd := &cobra.Command{ - Use: "create-host-template SERVER_ID OUTPUT_FILE", - Short: "Generate a HetznerBareMetalHost template for one Robot server", - Long: `Generate a HetznerBareMetalHost YAML template for one Hetzner Robot server. + Use: "create-host-yaml SERVER_ID OUTPUT_FILE", + Short: "Generate a HetznerBareMetalHost YAML file for one Robot server", + Long: `Generate a HetznerBareMetalHost YAML file for one Hetzner Robot server. The command talks directly to Hetzner Robot, ensures rescue SSH access, reboots the target server into rescue once, inspects the available disks, and writes a -template YAML to the requested output file. Progress and confirmation prompts go to stderr.`, - Example: ` caphcli create-host-template 1751550 host.yaml - caphcli create-host-template --force --name bm-e2e-1751550 1751550 host.yaml`, +YAML file to the requested output path. Progress and confirmation prompts go to stderr.`, + Example: ` caphcli create-host-yaml 1751550 host.yaml + caphcli create-host-yaml --force --name bm-e2e-1751550 1751550 host.yaml`, Args: cobra.ExactArgs(2), RunE: func(_ *cobra.Command, args []string) error { serverID, err := strconv.Atoi(args[0]) @@ -47,8 +47,8 @@ template YAML to the requested output file. Progress and confirmation prompts go }() cfg.Output = f - if err := createhosttemplate.Run(context.Background(), cfg); err != nil { - return fmt.Errorf("caphcli create-host-template failed for server %d: %w", cfg.ServerID, err) + if err := createhostyaml.Run(context.Background(), cfg); err != nil { + return fmt.Errorf("caphcli create-host-yaml failed for server %d: %w", cfg.ServerID, err) } if err := f.Close(); err != nil { diff --git a/internal/cmd/root.go b/internal/cmd/root.go index b77eb94..07944b6 100644 --- a/internal/cmd/root.go +++ b/internal/cmd/root.go @@ -21,7 +21,7 @@ func NewRootCommand() *cobra.Command { } rootCmd.AddCommand(newCheckBMServersCommand()) - rootCmd.AddCommand(newCreateHostTemplateCommand()) + rootCmd.AddCommand(newCreateHostYAMLCommand()) return rootCmd } diff --git a/internal/createhosttemplate/createhosttemplate.go b/internal/createhostyaml/createhostyaml.go similarity index 99% rename from internal/createhosttemplate/createhosttemplate.go rename to internal/createhostyaml/createhostyaml.go index c912267..ddf7038 100644 --- a/internal/createhosttemplate/createhosttemplate.go +++ b/internal/createhostyaml/createhostyaml.go @@ -1,4 +1,4 @@ -package createhosttemplate +package createhostyaml import ( "bufio" diff --git a/internal/createhosttemplate/createhosttemplate_test.go b/internal/createhostyaml/createhostyaml_test.go similarity index 98% rename from internal/createhosttemplate/createhosttemplate_test.go rename to internal/createhostyaml/createhostyaml_test.go index e7fda00..66129cf 100644 --- a/internal/createhosttemplate/createhosttemplate_test.go +++ b/internal/createhostyaml/createhostyaml_test.go @@ -1,4 +1,4 @@ -package createhosttemplate +package createhostyaml import ( "strings" diff --git a/internal/provisioncheck/provisioncheck.go b/internal/provisioncheck/provisioncheck.go index d7442c7..e9bd3a3 100644 --- a/internal/provisioncheck/provisioncheck.go +++ b/internal/provisioncheck/provisioncheck.go @@ -361,6 +361,7 @@ func (r *runner) run(ctx context.Context) error { } } + _, _ = fmt.Fprintln(r.out) r.logf("all checks passed: machine %q (serverID=%d) completed two rescue+install+boot cycles", r.host.Name, r.host.Spec.ServerID) return nil } diff --git a/internal/tools/readmegen/main.go b/internal/tools/readmegen/main.go index 1687fe6..4ce3c21 100644 --- a/internal/tools/readmegen/main.go +++ b/internal/tools/readmegen/main.go @@ -29,10 +29,10 @@ const generatedSectionTemplate = `## CLI Help {{CHECK_HELP}} ` + "```" + ` -### ` + "`caphcli create-host-template --help`" + ` +### ` + "`caphcli create-host-yaml --help`" + ` ` + "```text" + ` -{{CREATE_HOST_TEMPLATE_HELP}} +{{CREATE_HOST_YAML_HELP}} ` + "```" + ` ` @@ -47,14 +47,14 @@ func main() { fail(err) } - createHostTemplateHelp, err := renderHelp("create-host-template") + createHostYAMLHelp, err := renderHelp("create-host-yaml") if err != nil { fail(err) } generatedSection := strings.ReplaceAll(generatedSectionTemplate, "{{ROOT_HELP}}", strings.TrimSpace(rootHelp)) generatedSection = strings.ReplaceAll(generatedSection, "{{CHECK_HELP}}", strings.TrimSpace(checkHelp)) - generatedSection = strings.ReplaceAll(generatedSection, "{{CREATE_HOST_TEMPLATE_HELP}}", strings.TrimSpace(createHostTemplateHelp)) + generatedSection = strings.ReplaceAll(generatedSection, "{{CREATE_HOST_YAML_HELP}}", strings.TrimSpace(createHostYAMLHelp)) readme, err := os.ReadFile(readmePath) if err != nil {