From 7a9a2f7061f23214fdca43a0a63a9ee5184bfcf9 Mon Sep 17 00:00:00 2001 From: Andrei Kvapil Date: Tue, 10 Feb 2026 20:46:38 +0100 Subject: [PATCH 1/6] docs: add cluster-autoscaler guides for Hetzner and Azure Add comprehensive documentation for deploying cluster-autoscaler on Cozystack with Hetzner Cloud and Azure providers, covering Talos image creation, infrastructure setup, and troubleshooting. Co-Authored-By: Claude Signed-off-by: Andrei Kvapil --- .../operations/cluster/autoscaling/_index.md | 16 + .../operations/cluster/autoscaling/azure.md | 346 ++++++++++++++++ .../operations/cluster/autoscaling/hetzner.md | 376 ++++++++++++++++++ 3 files changed, 738 insertions(+) create mode 100644 content/en/docs/operations/cluster/autoscaling/_index.md create mode 100644 content/en/docs/operations/cluster/autoscaling/azure.md create mode 100644 content/en/docs/operations/cluster/autoscaling/hetzner.md diff --git a/content/en/docs/operations/cluster/autoscaling/_index.md b/content/en/docs/operations/cluster/autoscaling/_index.md new file mode 100644 index 00000000..66f22039 --- /dev/null +++ b/content/en/docs/operations/cluster/autoscaling/_index.md @@ -0,0 +1,16 @@ +--- +title: "Cluster Autoscaling" +linkTitle: "Autoscaling" +description: "Automatic node scaling for Cozystack management clusters using Kubernetes Cluster Autoscaler." +weight: 25 +--- + +The `cluster-autoscaler` system package enables automatic node scaling for Cozystack management clusters. +It monitors pending pods and automatically provisions or removes cloud nodes based on demand. + +Cozystack provides pre-configured variants for different cloud providers: + +- [Hetzner Cloud]({{% ref "hetzner" %}}) -- scale using Hetzner Cloud servers +- [Azure]({{% ref "azure" %}}) -- scale using Azure Virtual Machine Scale Sets + +Each variant is deployed as a separate Cozystack Package with provider-specific configuration. diff --git a/content/en/docs/operations/cluster/autoscaling/azure.md b/content/en/docs/operations/cluster/autoscaling/azure.md new file mode 100644 index 00000000..7c5e38d1 --- /dev/null +++ b/content/en/docs/operations/cluster/autoscaling/azure.md @@ -0,0 +1,346 @@ +--- +title: "Cluster Autoscaler for Azure" +linkTitle: "Azure" +description: "Configure automatic node scaling in Azure with Talos Linux and VMSS." +weight: 20 +--- + +This guide explains how to configure cluster-autoscaler for automatic node scaling in Azure with Talos Linux. + +## Prerequisites + +- Azure subscription with Contributor Service Principal +- `az` CLI installed +- Existing Talos Kubernetes cluster with Kilo WireGuard mesh +- Talos worker machine config + +## Step 1: Create Azure Infrastructure + +### 1.1 Login with Service Principal + +```bash +az login --service-principal \ + --username "" \ + --password "" \ + --tenant "" +``` + +### 1.2 Create Resource Group + +```bash +az group create \ + --name \ + --location +``` + +### 1.3 Create VNet and Subnet + +```bash +az network vnet create \ + --resource-group \ + --name cozystack-vnet \ + --address-prefix 10.2.0.0/16 \ + --subnet-name workers \ + --subnet-prefix 10.2.0.0/24 \ + --location +``` + +### 1.4 Create Network Security Group + +```bash +az network nsg create \ + --resource-group \ + --name cozystack-nsg \ + --location + +# Allow WireGuard +az network nsg rule create \ + --resource-group \ + --nsg-name cozystack-nsg \ + --name AllowWireGuard \ + --priority 100 \ + --direction Inbound \ + --access Allow \ + --protocol Udp \ + --destination-port-ranges 51820 + +# Allow Talos API +az network nsg rule create \ + --resource-group \ + --nsg-name cozystack-nsg \ + --name AllowTalosAPI \ + --priority 110 \ + --direction Inbound \ + --access Allow \ + --protocol Tcp \ + --destination-port-ranges 50000 + +# Associate NSG with subnet +az network vnet subnet update \ + --resource-group \ + --vnet-name cozystack-vnet \ + --name workers \ + --network-security-group cozystack-nsg +``` + +## Step 2: Create Talos Image + +### 2.1 Generate Schematic ID + +Create a schematic at [factory.talos.dev](https://factory.talos.dev) with required extensions: + +```bash +curl -s -X POST https://factory.talos.dev/schematics \ + -H "Content-Type: application/json" \ + -d '{ + "customization": { + "systemExtensions": { + "officialExtensions": [ + "siderolabs/amd-ucode", + "siderolabs/amdgpu-firmware", + "siderolabs/bnx2-bnx2x", + "siderolabs/drbd", + "siderolabs/i915-ucode", + "siderolabs/intel-ice-firmware", + "siderolabs/intel-ucode", + "siderolabs/qlogic-firmware", + "siderolabs/zfs" + ] + } + } + }' +``` + +Save the returned `id` as `SCHEMATIC_ID`. + +### 2.2 Create Managed Image from VHD + +```bash +# Download Talos Azure image +curl -L -o azure-amd64.raw.xz \ + "https://factory.talos.dev/image/${SCHEMATIC_ID}//azure-amd64.raw.xz" + +# Decompress +xz -d azure-amd64.raw.xz + +# Convert to VHD +qemu-img convert -f raw -o subformat=fixed,force_size -O vpc \ + azure-amd64.raw azure-amd64.vhd + +# Get VHD size +VHD_SIZE=$(stat -f%z azure-amd64.vhd) # macOS +# VHD_SIZE=$(stat -c%s azure-amd64.vhd) # Linux + +# Create managed disk for upload +az disk create \ + --resource-group \ + --name talos- \ + --location \ + --upload-type Upload \ + --upload-size-bytes $VHD_SIZE \ + --sku Standard_LRS \ + --os-type Linux \ + --hyper-v-generation V2 + +# Get SAS URL for upload +SAS_URL=$(az disk grant-access \ + --resource-group \ + --name talos- \ + --access-level Write \ + --duration-in-seconds 3600 \ + --query accessSAS --output tsv) + +# Upload VHD +azcopy copy azure-amd64.vhd "$SAS_URL" --blob-type PageBlob + +# Revoke access +az disk revoke-access \ + --resource-group \ + --name talos- + +# Create managed image from disk +az image create \ + --resource-group \ + --name talos- \ + --location \ + --os-type Linux \ + --hyper-v-generation V2 \ + --source $(az disk show --resource-group \ + --name talos- --query id --output tsv) +``` + +## Step 3: Create Talos Machine Config for Azure + +Create a machine config similar to the Hetzner one, with these Azure-specific changes: + +```yaml +machine: + nodeLabels: + kilo.squat.ai/location: azure + topology.kubernetes.io/zone: azure + kubelet: + nodeIP: + validSubnets: + - 10.2.0.0/24 # Azure VNet subnet +``` + +All other settings (cluster tokens, control plane endpoint, extensions, etc.) remain the same as the Hetzner config. + +## Step 4: Create VMSS (Virtual Machine Scale Set) + +```bash +IMAGE_ID=$(az image show \ + --resource-group \ + --name talos- \ + --query id --output tsv) + +az vmss create \ + --resource-group \ + --name workers \ + --location \ + --orchestration-mode Uniform \ + --image "$IMAGE_ID" \ + --vm-sku Standard_D2s_v3 \ + --instance-count 0 \ + --vnet-name cozystack-vnet \ + --subnet workers \ + --public-ip-per-vm \ + --custom-data machineconfig-azure.yaml \ + --security-type Standard \ + --admin-username talos \ + --authentication-type ssh \ + --generate-ssh-keys \ + --upgrade-policy-mode Manual +``` + +{{% alert title="Important" color="warning" %}} +- Must use `--orchestration-mode Uniform` (cluster-autoscaler requires Uniform mode) +- Must use `--public-ip-per-vm` for WireGuard connectivity +- Check VM quota in your region: `az vm list-usage --location ` +- `--custom-data` passes the Talos machine config to new instances +{{% /alert %}} + +## Step 5: Deploy Cluster Autoscaler + +Create the Package resource: + +```yaml +apiVersion: cozystack.io/v1alpha1 +kind: Package +metadata: + name: cozystack.cluster-autoscaler-azure +spec: + variant: default + components: + cluster-autoscaler-azure: + values: + cluster-autoscaler: + azureClientID: "" + azureClientSecret: "" + azureTenantID: "" + azureSubscriptionID: "" + azureResourceGroup: "" + azureVMType: "vmss" + autoscalingGroups: + - name: workers + minSize: 0 + maxSize: 10 +``` + +Apply: +```bash +kubectl apply -f package.yaml +``` + +## Step 6: Kilo WireGuard Endpoint Configuration + +Azure nodes behind NAT need their public IP advertised as the WireGuard endpoint. Without this, the WireGuard tunnel between on-premises and Azure nodes will not be established. + +Each new Azure node needs the annotation: + +```bash +kubectl annotate node \ + kilo.squat.ai/force-endpoint=:51820 +``` + +### Automated Endpoint Configuration + +For automated endpoint detection, create a DaemonSet that runs on Azure nodes (`topology.kubernetes.io/zone=azure`) and: + +1. Queries Azure Instance Metadata Service (IMDS) for the public IP: + ```bash + curl -s -H "Metadata: true" \ + "http://169.254.169.254/metadata/instance/network/interface/0/ipv4/ipAddress/0/publicIpAddress?api-version=2021-02-01&format=text" + ``` +2. Annotates the node with `kilo.squat.ai/force-endpoint=:51820` + +This ensures new autoscaled nodes automatically get proper WireGuard connectivity. + +## Testing + +### Manual scale test + +```bash +# Scale up +az vmss scale --resource-group --name workers --new-capacity 1 + +# Check node joined +kubectl get nodes -o wide + +# Check WireGuard tunnel +kubectl logs -n cozy-kilo + +# Scale down +az vmss scale --resource-group --name workers --new-capacity 0 +``` + +### Autoscaler test + +Deploy a workload to trigger autoscaling: + +```yaml +apiVersion: apps/v1 +kind: Deployment +metadata: + name: test-azure-autoscale +spec: + replicas: 3 + selector: + matchLabels: + app: test-azure + template: + metadata: + labels: + app: test-azure + spec: + nodeSelector: + topology.kubernetes.io/zone: azure + containers: + - name: pause + image: registry.k8s.io/pause:3.9 + resources: + requests: + cpu: "500m" + memory: "512Mi" +``` + +## Troubleshooting + +### Node doesn't join cluster +- Check that the Talos machine config control plane endpoint is reachable from Azure +- Verify NSG rules allow outbound traffic to port 6443 +- Check VMSS instance provisioning state: `az vmss list-instances --resource-group --name workers` + +### WireGuard tunnel not established +- Verify `kilo.squat.ai/force-endpoint` annotation is set with the public IP +- Check NSG allows inbound UDP 51820 +- Inspect kilo logs: `kubectl logs -n cozy-kilo ` + +### VM quota errors +- Check quota: `az vm list-usage --location ` +- Request quota increase via Azure portal +- Try a different VM family that has available quota + +### SkuNotAvailable errors +- Some VM sizes may have capacity restrictions in certain regions +- Try a different VM size: `az vm list-skus --location --size ` diff --git a/content/en/docs/operations/cluster/autoscaling/hetzner.md b/content/en/docs/operations/cluster/autoscaling/hetzner.md new file mode 100644 index 00000000..4f187fa3 --- /dev/null +++ b/content/en/docs/operations/cluster/autoscaling/hetzner.md @@ -0,0 +1,376 @@ +--- +title: "Cluster Autoscaler for Hetzner Cloud" +linkTitle: "Hetzner" +description: "Configure automatic node scaling in Hetzner Cloud with Talos Linux." +weight: 10 +--- + +This guide explains how to configure cluster-autoscaler for automatic node scaling in Hetzner Cloud with Talos Linux. + +## Prerequisites + +- Hetzner Cloud account with API token +- `hcloud` CLI installed +- Existing Talos Kubernetes cluster +- Talos worker machine config + +## Step 1: Create Talos Image in Hetzner Cloud + +Hetzner doesn't support direct image uploads, so we need to create a snapshot via a temporary server. + +### 1.1 Configure hcloud CLI + +```bash +export HCLOUD_TOKEN="" +``` + +### 1.2 Create temporary server in rescue mode + +```bash +# Create server (without starting) +hcloud server create \ + --name talos-image-builder \ + --type cpx22 \ + --image ubuntu-24.04 \ + --location fsn1 \ + --ssh-key \ + --start-after-create=false + +# Enable rescue mode and start +hcloud server enable-rescue --type linux64 --ssh-key talos-image-builder +hcloud server poweron talos-image-builder +``` + +### 1.3 Get server IP and write Talos image + +```bash +# Get server IP +SERVER_IP=$(hcloud server ip talos-image-builder) + +# SSH into rescue mode and write image +ssh root@$SERVER_IP + +# Inside rescue mode: +wget -O- "https://factory.talos.dev/image///hcloud-amd64.raw.xz" \ + | xz -d \ + | dd of=/dev/sda bs=4M status=progress +sync +exit +``` + +Get your schematic ID from https://factory.talos.dev with required extensions: +- `siderolabs/qemu-guest-agent` (required for Hetzner) +- Other extensions as needed (zfs, drbd, etc.) + +### 1.4 Create snapshot and cleanup + +```bash +# Power off and create snapshot +hcloud server poweroff talos-image-builder +hcloud server create-image --type snapshot --description "Talos v1.11.6" talos-image-builder + +# Get snapshot ID (save this for later) +hcloud image list --type snapshot + +# Delete temporary server +hcloud server delete talos-image-builder +``` + +## Step 2: Create Hetzner vSwitch (Optional but Recommended) + +Create a private network for communication between nodes: + +```bash +# Create network +hcloud network create --name cozystack-vswitch --ip-range 10.100.0.0/16 + +# Add subnet for your region (eu-central covers FSN1, NBG1) +hcloud network add-subnet cozystack-vswitch \ + --type cloud \ + --network-zone eu-central \ + --ip-range 10.100.0.0/24 +``` + +## Step 3: Create Talos Machine Config + +Create a worker machine config for autoscaled nodes. Important fields: + +```yaml +version: v1alpha1 +machine: + type: worker + token: + ca: + crt: + # Node labels (applied automatically on join) + nodeLabels: + kilo.squat.ai/location: hetzner-cloud + topology.kubernetes.io/zone: hetzner-cloud + kubelet: + image: ghcr.io/siderolabs/kubelet:v1.33.1 + # Use vSwitch IP as internal IP + nodeIP: + validSubnets: + - 10.100.0.0/24 + # Required for external cloud provider + extraArgs: + cloud-provider: external + extraConfig: + maxPods: 512 + defaultRuntimeSeccompProfileEnabled: true + disableManifestsDirectory: true + # Registry mirrors (recommended to avoid rate limiting) + registries: + mirrors: + docker.io: + endpoints: + - https://mirror.gcr.io +cluster: + controlPlane: + endpoint: https://:6443 + clusterName: + network: + cni: + name: none + podSubnets: + - 10.244.0.0/16 + serviceSubnets: + - 10.96.0.0/16 + token: + ca: + crt: +``` + +{{% alert title="Important" color="warning" %}} +Ensure kubelet version matches your cluster version. Talos 1.11.6 doesn't support Kubernetes 1.35+. +{{% /alert %}} + +## Step 4: Create Kubernetes Secrets + +### 4.1 Create secret with Hetzner API token + +```bash +kubectl -n cozy-cluster-autoscaler-hetzner create secret generic hetzner-credentials \ + --from-literal=token= +``` + +### 4.2 Create secret with Talos machine config + +The machine config must be base64-encoded: + +```bash +# Encode your worker.yaml (single line base64) +base64 -w 0 -i worker.yaml -o worker.b64 + +# Create secret +kubectl -n cozy-cluster-autoscaler-hetzner create secret generic talos-config \ + --from-file=cloud-init=worker.b64 +``` + +## Step 5: Deploy Cluster Autoscaler + +Create the Package resource: + +```yaml +apiVersion: cozystack.io/v1alpha1 +kind: Package +metadata: + name: cozystack.cluster-autoscaler-hetzner +spec: + variant: default + components: + cluster-autoscaler-hetzner: + values: + cluster-autoscaler: + autoscalingGroups: + - name: workers-fsn1 + minSize: 0 + maxSize: 10 + instanceType: cpx22 + region: FSN1 + extraEnv: + HCLOUD_IMAGE: "" + HCLOUD_SSH_KEY: "" + HCLOUD_NETWORK: "cozystack-vswitch" + HCLOUD_PUBLIC_IPV4: "true" + HCLOUD_PUBLIC_IPV6: "false" + extraEnvSecrets: + HCLOUD_TOKEN: + name: hetzner-credentials + key: token + HCLOUD_CLOUD_INIT: + name: talos-config + key: cloud-init +``` + +Apply: +```bash +kubectl apply -f package.yaml +``` + +## Step 6: Test Autoscaling + +Create a deployment with pod anti-affinity to force scale-up: + +```yaml +apiVersion: apps/v1 +kind: Deployment +metadata: + name: test-autoscaler +spec: + replicas: 5 + selector: + matchLabels: + app: test-autoscaler + template: + metadata: + labels: + app: test-autoscaler + spec: + affinity: + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchLabels: + app: test-autoscaler + topologyKey: kubernetes.io/hostname + containers: + - name: nginx + image: nginx + resources: + requests: + cpu: "100m" + memory: "128Mi" +``` + +If you have fewer nodes than replicas, the autoscaler will create new Hetzner servers. + +## Step 7: Verify + +```bash +# Check autoscaler logs +kubectl -n cozy-cluster-autoscaler-hetzner logs \ + deployment/cluster-autoscaler-hetzner-hetzner-cluster-autoscaler -f + +# Check nodes +kubectl get nodes -o wide + +# Verify node labels and internal IP +kubectl get node --show-labels +``` + +Expected result for autoscaled nodes: +- Internal IP from vSwitch range (e.g., 10.100.0.2) +- Label `kilo.squat.ai/location=hetzner-cloud` + +## Configuration Reference + +### Environment Variables + +| Variable | Description | Required | +|----------|-------------|----------| +| `HCLOUD_TOKEN` | Hetzner API token | Yes | +| `HCLOUD_IMAGE` | Talos snapshot ID | Yes | +| `HCLOUD_CLOUD_INIT` | Base64-encoded machine config | Yes | +| `HCLOUD_NETWORK` | vSwitch network name/ID | No | +| `HCLOUD_SSH_KEY` | SSH key name/ID | No | +| `HCLOUD_FIREWALL` | Firewall name/ID | No | +| `HCLOUD_PUBLIC_IPV4` | Assign public IPv4 | No (default: true) | +| `HCLOUD_PUBLIC_IPV6` | Assign public IPv6 | No (default: false) | + +### Hetzner Server Types + +| Type | vCPU | RAM | Good for | +|------|------|-----|----------| +| cpx22 | 2 | 4GB | Small workloads | +| cpx32 | 4 | 8GB | General purpose | +| cpx42 | 8 | 16GB | Medium workloads | +| cpx52 | 16 | 32GB | Large workloads | +| ccx13 | 2 dedicated | 8GB | CPU-intensive | +| ccx23 | 4 dedicated | 16GB | CPU-intensive | +| ccx33 | 8 dedicated | 32GB | CPU-intensive | +| cax11 | 2 ARM | 4GB | ARM workloads | +| cax21 | 4 ARM | 8GB | ARM workloads | + +{{% alert title="Note" color="info" %}} +Some older server types (cpx11, cpx21, etc.) may be unavailable in certain regions. +{{% /alert %}} + +### Hetzner Regions + +| Code | Location | +|------|----------| +| FSN1 | Falkenstein, Germany | +| NBG1 | Nuremberg, Germany | +| HEL1 | Helsinki, Finland | +| ASH | Ashburn, USA | +| HIL | Hillsboro, USA | + +## Troubleshooting + +### Nodes not joining cluster + +1. Check VNC console via Hetzner Cloud Console or: + ```bash + hcloud server request-console + ``` +2. Common errors: + - **"unknown keys found during decoding"**: Check Talos config format. `nodeLabels` goes under `machine`, `nodeIP` goes under `machine.kubelet` + - **"kubelet image is not valid"**: Kubernetes version mismatch. Use kubelet version compatible with your Talos version + - **"failed to load config"**: Machine config syntax error + +### Nodes have wrong Internal IP + +Ensure `machine.kubelet.nodeIP.validSubnets` is set to your vSwitch subnet: +```yaml +machine: + kubelet: + nodeIP: + validSubnets: + - 10.100.0.0/24 +``` + +### Scale-up not triggered + +1. Check autoscaler logs for errors +2. Verify RBAC permissions (leases access required) +3. Check if pods are actually pending: + ```bash + kubectl get pods --field-selector=status.phase=Pending + ``` + +### Registry rate limiting (403 errors) + +Add registry mirrors to Talos config: +```yaml +machine: + registries: + mirrors: + docker.io: + endpoints: + - https://mirror.gcr.io + registry.k8s.io: + endpoints: + - https://registry.k8s.io +``` + +### Scale-down not working + +The autoscaler caches node information for up to 30 minutes. Wait or restart autoscaler: +```bash +kubectl -n cozy-cluster-autoscaler-hetzner rollout restart \ + deployment cluster-autoscaler-hetzner-hetzner-cluster-autoscaler +``` + +## Integration with Kilo + +For multi-location clusters using Kilo mesh networking, add location label to machine config: + +```yaml +machine: + nodeLabels: + kilo.squat.ai/location: hetzner-cloud + topology.kubernetes.io/zone: hetzner-cloud +``` + +This allows Kilo to create proper WireGuard tunnels between your bare-metal nodes and Hetzner Cloud nodes. From 43d9da0005ecbca425ef728095b3929d284fb109 Mon Sep 17 00:00:00 2001 From: Andrei Kvapil Date: Tue, 10 Feb 2026 20:53:24 +0100 Subject: [PATCH 2/6] docs: add cloud-provider: external requirement to Azure autoscaler guide The kubelet cloud-provider: external flag is required for Azure cloud-controller-manager to assign ProviderID to nodes. Without it, cluster-autoscaler cannot match Kubernetes nodes to VMSS instances. Co-Authored-By: Claude Signed-off-by: Andrei Kvapil --- content/en/docs/operations/cluster/autoscaling/azure.md | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/content/en/docs/operations/cluster/autoscaling/azure.md b/content/en/docs/operations/cluster/autoscaling/azure.md index 7c5e38d1..ece92974 100644 --- a/content/en/docs/operations/cluster/autoscaling/azure.md +++ b/content/en/docs/operations/cluster/autoscaling/azure.md @@ -182,8 +182,17 @@ machine: nodeIP: validSubnets: - 10.2.0.0/24 # Azure VNet subnet + # Required for external cloud provider (ProviderID assignment) + extraArgs: + cloud-provider: external ``` +{{% alert title="Important" color="warning" %}} +The `cloud-provider: external` setting is required for the Azure cloud-controller-manager to assign ProviderID to nodes. +Without it, the cluster-autoscaler cannot match Kubernetes nodes to Azure VMSS instances. +This setting must be present on **all** nodes in the cluster, including control plane nodes. +{{% /alert %}} + All other settings (cluster tokens, control plane endpoint, extensions, etc.) remain the same as the Hetzner config. ## Step 4: Create VMSS (Virtual Machine Scale Set) From bc95e2cadec18fa868cd89a5a3f3d35d4fa7f050 Mon Sep 17 00:00:00 2001 From: Andrei Kvapil Date: Wed, 11 Feb 2026 11:37:41 +0100 Subject: [PATCH 3/6] fix(docs): use nodeAnnotations for kilo location and persistent-keepalive Kilo reads kilo.squat.ai/location from node annotations, not labels. Using nodeLabels for this value does not work. Add kilo.squat.ai/persistent-keepalive annotation which is required for WireGuard NAT traversal on cloud nodes (especially Azure nodes behind NAT). Without it, Kilo's NAT endpoint discovery is disabled and tunnels will not stabilize. Replace force-endpoint approach in Azure docs with the simpler persistent-keepalive mechanism that enables automatic NAT traversal. Co-Authored-By: Claude Signed-off-by: Andrei Kvapil --- .../operations/cluster/autoscaling/azure.md | 43 +++++++++---------- .../operations/cluster/autoscaling/hetzner.md | 17 +++++--- 2 files changed, 32 insertions(+), 28 deletions(-) diff --git a/content/en/docs/operations/cluster/autoscaling/azure.md b/content/en/docs/operations/cluster/autoscaling/azure.md index ece92974..9d07ebb9 100644 --- a/content/en/docs/operations/cluster/autoscaling/azure.md +++ b/content/en/docs/operations/cluster/autoscaling/azure.md @@ -175,8 +175,11 @@ Create a machine config similar to the Hetzner one, with these Azure-specific ch ```yaml machine: - nodeLabels: + # Kilo annotations for WireGuard mesh (applied automatically on join) + nodeAnnotations: kilo.squat.ai/location: azure + kilo.squat.ai/persistent-keepalive: "20" + nodeLabels: topology.kubernetes.io/zone: azure kubelet: nodeIP: @@ -187,6 +190,10 @@ machine: cloud-provider: external ``` +{{% alert title="Note" color="info" %}} +Kilo reads `kilo.squat.ai/location` from **node annotations**, not labels. The `persistent-keepalive` annotation is critical for Azure nodes behind NAT -- it enables WireGuard NAT traversal, allowing Kilo to discover the real public endpoint of the node automatically. +{{% /alert %}} + {{% alert title="Important" color="warning" %}} The `cloud-provider: external` setting is required for the Azure cloud-controller-manager to assign ProviderID to nodes. Without it, the cluster-autoscaler cannot match Kubernetes nodes to Azure VMSS instances. @@ -261,29 +268,19 @@ Apply: kubectl apply -f package.yaml ``` -## Step 6: Kilo WireGuard Endpoint Configuration +## Step 6: Kilo WireGuard Connectivity -Azure nodes behind NAT need their public IP advertised as the WireGuard endpoint. Without this, the WireGuard tunnel between on-premises and Azure nodes will not be established. +Azure nodes are behind NAT, so their initial WireGuard endpoint will be a private IP. Kilo handles this automatically through WireGuard's built-in NAT traversal when `persistent-keepalive` is configured (already included in the machine config from Step 3). -Each new Azure node needs the annotation: +The flow works as follows: +1. The Azure node initiates a WireGuard handshake to the on-premises leader (which has a public IP) +2. `persistent-keepalive` sends periodic keepalive packets, maintaining the NAT mapping +3. The on-premises Kilo leader discovers the real public endpoint of the Azure node through WireGuard +4. Kilo stores the discovered endpoint and uses it for subsequent connections -```bash -kubectl annotate node \ - kilo.squat.ai/force-endpoint=:51820 -``` - -### Automated Endpoint Configuration - -For automated endpoint detection, create a DaemonSet that runs on Azure nodes (`topology.kubernetes.io/zone=azure`) and: - -1. Queries Azure Instance Metadata Service (IMDS) for the public IP: - ```bash - curl -s -H "Metadata: true" \ - "http://169.254.169.254/metadata/instance/network/interface/0/ipv4/ipAddress/0/publicIpAddress?api-version=2021-02-01&format=text" - ``` -2. Annotates the node with `kilo.squat.ai/force-endpoint=:51820` - -This ensures new autoscaled nodes automatically get proper WireGuard connectivity. +{{% alert title="Note" color="info" %}} +No manual `force-endpoint` annotation is needed. The `kilo.squat.ai/persistent-keepalive: "20"` annotation in the machine config is sufficient for Kilo to discover NAT endpoints automatically. Without this annotation, Kilo's NAT traversal mechanism is disabled and the tunnel will not stabilize. +{{% /alert %}} ## Testing @@ -341,9 +338,11 @@ spec: - Check VMSS instance provisioning state: `az vmss list-instances --resource-group --name workers` ### WireGuard tunnel not established -- Verify `kilo.squat.ai/force-endpoint` annotation is set with the public IP +- Verify the node has `kilo.squat.ai/persistent-keepalive: "20"` annotation +- Verify the node has `kilo.squat.ai/location: azure` annotation (not just as a label) - Check NSG allows inbound UDP 51820 - Inspect kilo logs: `kubectl logs -n cozy-kilo ` +- Check for "WireGuard configurations are different" messages repeating every 30 seconds -- this indicates `persistent-keepalive` annotation is missing ### VM quota errors - Check quota: `az vm list-usage --location ` diff --git a/content/en/docs/operations/cluster/autoscaling/hetzner.md b/content/en/docs/operations/cluster/autoscaling/hetzner.md index 4f187fa3..f25f5113 100644 --- a/content/en/docs/operations/cluster/autoscaling/hetzner.md +++ b/content/en/docs/operations/cluster/autoscaling/hetzner.md @@ -102,9 +102,11 @@ machine: token: ca: crt: - # Node labels (applied automatically on join) - nodeLabels: + # Kilo annotations for WireGuard mesh (applied automatically on join) + nodeAnnotations: kilo.squat.ai/location: hetzner-cloud + kilo.squat.ai/persistent-keepalive: "20" + nodeLabels: topology.kubernetes.io/zone: hetzner-cloud kubelet: image: ghcr.io/siderolabs/kubelet:v1.33.1 @@ -364,13 +366,16 @@ kubectl -n cozy-cluster-autoscaler-hetzner rollout restart \ ## Integration with Kilo -For multi-location clusters using Kilo mesh networking, add location label to machine config: +For multi-location clusters using Kilo mesh networking, add location and persistent-keepalive as **node annotations** in the machine config: ```yaml machine: - nodeLabels: + nodeAnnotations: kilo.squat.ai/location: hetzner-cloud - topology.kubernetes.io/zone: hetzner-cloud + kilo.squat.ai/persistent-keepalive: "20" ``` -This allows Kilo to create proper WireGuard tunnels between your bare-metal nodes and Hetzner Cloud nodes. +{{% alert title="Important" color="warning" %}} +Kilo reads `kilo.squat.ai/location` from **node annotations**, not labels. Using `nodeLabels` for this value will not work. +The `persistent-keepalive` annotation enables WireGuard NAT traversal, which is required for nodes behind NAT and recommended for all cloud nodes to maintain stable tunnels. +{{% /alert %}} From 888967860265651a0f5de37b4b84619a2bc7b691 Mon Sep 17 00:00:00 2001 From: Andrei Kvapil Date: Fri, 13 Feb 2026 12:55:56 +0100 Subject: [PATCH 4/6] docs(azure): add route table and IP forwarding requirements Add documentation for Azure UDR (User Defined Route) table required for Kilo non-leader node connectivity and VMSS IP forwarding setup. Without these, reply traffic from non-leader nodes to remote subnets is dropped by Azure SDN because it routes by destination IP, not Linux next-hop. Co-Authored-By: Claude Signed-off-by: Andrei Kvapil --- .../operations/cluster/autoscaling/azure.md | 82 +++++++++++++++++++ 1 file changed, 82 insertions(+) diff --git a/content/en/docs/operations/cluster/autoscaling/azure.md b/content/en/docs/operations/cluster/autoscaling/azure.md index 9d07ebb9..99625177 100644 --- a/content/en/docs/operations/cluster/autoscaling/azure.md +++ b/content/en/docs/operations/cluster/autoscaling/azure.md @@ -83,6 +83,54 @@ az network vnet subnet update \ --network-security-group cozystack-nsg ``` +### 1.5 Create Route Table for Kilo Routing + +Azure SDN routes packets based on destination IP, not the Linux next-hop set by Kilo. Without a custom route table, reply traffic from non-leader nodes to remote subnets (e.g. on-premises networks) is sent to the Internet route and dropped, making non-leader nodes unreachable from outside Azure. + +Create a route table that directs remote subnet traffic through the Kilo location leader: + +```bash +# Create route table +az network route-table create \ + --resource-group \ + --name kilo-routes \ + --location + +# Add routes for each remote subnet reachable via Kilo WireGuard mesh +# Replace with the internal IP of the Kilo leader node in this subnet +az network route-table route create \ + --resource-group \ + --route-table-name kilo-routes \ + --name to-onprem \ + --address-prefix \ + --next-hop-type VirtualAppliance \ + --next-hop-ip-address + +# Add route for WireGuard overlay IPs +az network route-table route create \ + --resource-group \ + --route-table-name kilo-routes \ + --name to-wireguard-ips \ + --address-prefix 100.66.0.0/16 \ + --next-hop-type VirtualAppliance \ + --next-hop-ip-address + +# Associate route table with worker subnet +az network vnet subnet update \ + --resource-group \ + --vnet-name cozystack-vnet \ + --name workers \ + --route-table kilo-routes +``` + +Add a route for each remote location's subnet (repeat the `route create` command for every on-premises or other cloud subnet that must be reachable through the WireGuard mesh). + +{{% alert title="Important" color="warning" %}} +- The `` is the internal IP of the Kilo location leader in this subnet. In a VMSS-based setup, this is typically the first instance that joins the cluster. You can find it by checking `kilo.squat.ai/leader: "true"` annotation on the nodes. +- IP forwarding must be enabled on the leader's NIC (see Step 4). +- If the leader node changes, the route table must be updated with the new leader's IP. +{{% /alert %}} + ## Step 2: Create Talos Image ### 2.1 Generate Schematic ID @@ -227,11 +275,18 @@ az vmss create \ --authentication-type ssh \ --generate-ssh-keys \ --upgrade-policy-mode Manual + +# Enable IP forwarding on VMSS NICs (required for Kilo leader to forward traffic) +az vmss update \ + --resource-group \ + --name workers \ + --set virtualMachineProfile.networkProfile.networkInterfaceConfigurations[0].enableIPForwarding=true ``` {{% alert title="Important" color="warning" %}} - Must use `--orchestration-mode Uniform` (cluster-autoscaler requires Uniform mode) - Must use `--public-ip-per-vm` for WireGuard connectivity +- IP forwarding must be enabled on VMSS NICs so the Kilo leader can forward traffic between the WireGuard mesh and non-leader nodes in the same subnet - Check VM quota in your region: `az vm list-usage --location ` - `--custom-data` passes the Talos machine config to new instances {{% /alert %}} @@ -344,6 +399,33 @@ spec: - Inspect kilo logs: `kubectl logs -n cozy-kilo ` - Check for "WireGuard configurations are different" messages repeating every 30 seconds -- this indicates `persistent-keepalive` annotation is missing +### Non-leader nodes unreachable (kubectl logs/exec timeout) + +If `kubectl logs` or `kubectl exec` works for the Kilo leader node but times out for all other nodes in the same Azure subnet: + +1. **Verify IP forwarding** is enabled on the VMSS: + ```bash + az vmss show --resource-group --name workers \ + --query "virtualMachineProfile.networkProfile.networkInterfaceConfigurations[0].enableIPForwarding" + ``` + If `false`, enable it and apply to existing instances: + ```bash + az vmss update --resource-group --name workers \ + --set virtualMachineProfile.networkProfile.networkInterfaceConfigurations[0].enableIPForwarding=true + az vmss update-instances --resource-group --name workers --instance-ids "*" + ``` + +2. **Verify route table** is associated with the subnet and contains routes for all remote subnets pointing to the leader's IP as a Virtual Appliance (see Step 1.5). + +3. **Test the return path** from the leader node: + ```bash + # This should work (same subnet, direct) + kubectl exec -n cozy-kilo -- ping -c 2 + # This tests the return path through the route table + kubectl exec -n cozy-kilo -- ping -c 2 -I + ``` + If the first ping works but the second fails, the route table is missing or misconfigured. + ### VM quota errors - Check quota: `az vm list-usage --location ` - Request quota increase via Azure portal From 1a848e8b78e63f54283ae6b621b6c755f691669b Mon Sep 17 00:00:00 2001 From: Andrei Kvapil Date: Sat, 14 Feb 2026 02:29:36 +0100 Subject: [PATCH 5/6] docs(azure): update machineconfig workflow and route sync setup --- .../operations/cluster/autoscaling/azure.md | 111 +++++++++++------- 1 file changed, 71 insertions(+), 40 deletions(-) diff --git a/content/en/docs/operations/cluster/autoscaling/azure.md b/content/en/docs/operations/cluster/autoscaling/azure.md index 99625177..c9d4fab1 100644 --- a/content/en/docs/operations/cluster/autoscaling/azure.md +++ b/content/en/docs/operations/cluster/autoscaling/azure.md @@ -14,6 +14,12 @@ This guide explains how to configure cluster-autoscaler for automatic node scali - Existing Talos Kubernetes cluster with Kilo WireGuard mesh - Talos worker machine config +Install required Cozystack packages before configuring autoscaling: + +```bash +cozypkg add cozystack.kilo cozystack.local-ccm +``` + ## Step 1: Create Azure Infrastructure ### 1.1 Login with Service Principal @@ -83,54 +89,39 @@ az network vnet subnet update \ --network-security-group cozystack-nsg ``` -### 1.5 Create Route Table for Kilo Routing +### 1.5 Configure Route Table for Kilo Routing -Azure SDN routes packets based on destination IP, not the Linux next-hop set by Kilo. Without a custom route table, reply traffic from non-leader nodes to remote subnets (e.g. on-premises networks) is sent to the Internet route and dropped, making non-leader nodes unreachable from outside Azure. +Azure SDN routes packets based on destination IP, not the Linux next-hop set by Kilo. +For traffic that reaches Azure through the Kilo location leader, non-leader nodes need a correct return path back to remote subnets. -Create a route table that directs remote subnet traffic through the Kilo location leader: +Create and attach a route table for the workers subnet: ```bash # Create route table az network route-table create \ --resource-group \ - --name kilo-routes \ + --name kilo-routes-workers-serverscom \ --location -# Add routes for each remote subnet reachable via Kilo WireGuard mesh -# Replace with the internal IP of the Kilo leader node in this subnet -az network route-table route create \ - --resource-group \ - --route-table-name kilo-routes \ - --name to-onprem \ - --address-prefix \ - --next-hop-type VirtualAppliance \ - --next-hop-ip-address - -# Add route for WireGuard overlay IPs -az network route-table route create \ - --resource-group \ - --route-table-name kilo-routes \ - --name to-wireguard-ips \ - --address-prefix 100.66.0.0/16 \ - --next-hop-type VirtualAppliance \ - --next-hop-ip-address - -# Associate route table with worker subnet +# Associate it with workers subnet az network vnet subnet update \ --resource-group \ --vnet-name cozystack-vnet \ - --name workers \ - --route-table kilo-routes + --name workers-serverscom \ + --route-table kilo-routes-workers-serverscom ``` -Add a route for each remote location's subnet (repeat the `route create` command for every on-premises or other cloud subnet that must be reachable through the WireGuard mesh). - {{% alert title="Important" color="warning" %}} -- The `` is the internal IP of the Kilo location leader in this subnet. In a VMSS-based setup, this is typically the first instance that joins the cluster. You can find it by checking `kilo.squat.ai/leader: "true"` annotation on the nodes. -- IP forwarding must be enabled on the leader's NIC (see Step 4). -- If the leader node changes, the route table must be updated with the new leader's IP. +- `nextHopIpAddress` in UDR routes must point to the current Azure Kilo leader internal IP. +- If leader changes, routes must be updated to the new leader. {{% /alert %}} +Automate route updates with the route-sync controller (watches Azure-location nodes and updates UDR routes when `kilo.squat.ai/leader=true` changes): + +```bash +kubectl apply -f manifests/kilo-azure-route-sync-deployment.yaml +``` + ## Step 2: Create Talos Image ### 2.1 Generate Schematic ID @@ -219,11 +210,41 @@ az image create \ ## Step 3: Create Talos Machine Config for Azure -Create a machine config similar to the Hetzner one, with these Azure-specific changes: +From your cluster repository, generate a worker config file: + +```bash +talm template -t templates/worker.yaml --offline --full > nodes/azure.yaml +``` + +Then edit `nodes/azure.yaml` for Azure workers: + +1. Add Azure location metadata: + ```yaml + machine: + nodeAnnotations: + kilo.squat.ai/location: azure + kilo.squat.ai/persistent-keepalive: "20" + nodeLabels: + topology.kubernetes.io/zone: azure + ``` +2. Set public Kubernetes API endpoint: + Change `cluster.controlPlane.endpoint` to the **public** API server address (for example `https://:6443`). +3. Remove discovered installer/network sections: + Delete `machine.install` and `machine.network` sections from this file. +4. Set external cloud provider for kubelet: + ```yaml + machine: + kubelet: + extraArgs: + cloud-provider: external + ``` +5. Fix node IP subnet detection: + Set `machine.kubelet.nodeIP.validSubnets` to the actual Azure subnet where autoscaled nodes run (for example `192.168.102.0/23`). + +Result should include at least: ```yaml machine: - # Kilo annotations for WireGuard mesh (applied automatically on join) nodeAnnotations: kilo.squat.ai/location: azure kilo.squat.ai/persistent-keepalive: "20" @@ -232,10 +253,12 @@ machine: kubelet: nodeIP: validSubnets: - - 10.2.0.0/24 # Azure VNet subnet - # Required for external cloud provider (ProviderID assignment) + - 192.168.102.0/23 # replace with your Azure workers subnet extraArgs: cloud-provider: external +cluster: + controlPlane: + endpoint: https://:6443 ``` {{% alert title="Note" color="info" %}} @@ -248,7 +271,7 @@ Without it, the cluster-autoscaler cannot match Kubernetes nodes to Azure VMSS i This setting must be present on **all** nodes in the cluster, including control plane nodes. {{% /alert %}} -All other settings (cluster tokens, control plane endpoint, extensions, etc.) remain the same as the Hetzner config. +All other settings (cluster tokens, CA, extensions, etc.) remain the same as the generated template. ## Step 4: Create VMSS (Virtual Machine Scale Set) @@ -269,7 +292,7 @@ az vmss create \ --vnet-name cozystack-vnet \ --subnet workers \ --public-ip-per-vm \ - --custom-data machineconfig-azure.yaml \ + --custom-data nodes/azure.yaml \ --security-type Standard \ --admin-username talos \ --authentication-type ssh \ @@ -415,16 +438,24 @@ If `kubectl logs` or `kubectl exec` works for the Kilo leader node but times out az vmss update-instances --resource-group --name workers --instance-ids "*" ``` -2. **Verify route table** is associated with the subnet and contains routes for all remote subnets pointing to the leader's IP as a Virtual Appliance (see Step 1.5). +2. **Verify UDR configuration** (Step 1.5): + - `workers-serverscom` subnet is associated with `kilo-routes-workers-serverscom`. + - Route entries exist for required remote prefixes. + - `nextHopIpAddress` points to current Azure leader internal IP. + - Route-sync controller is running and has no errors: + ```bash + kubectl -n cozy-cluster-autoscaler-azure get deploy kilo-azure-route-sync + kubectl -n cozy-cluster-autoscaler-azure logs deploy/kilo-azure-route-sync + ``` 3. **Test the return path** from the leader node: ```bash # This should work (same subnet, direct) kubectl exec -n cozy-kilo -- ping -c 2 - # This tests the return path through the route table + # This tests the return path through UDR + leader forwarding kubectl exec -n cozy-kilo -- ping -c 2 -I ``` - If the first ping works but the second fails, the route table is missing or misconfigured. + If the first ping works but the second fails, UDR/return-path configuration is missing or misconfigured. ### VM quota errors - Check quota: `az vm list-usage --location ` From c439266fc95ec43af25bb6f66155824517641124 Mon Sep 17 00:00:00 2001 From: Andrei Kvapil Date: Sat, 14 Feb 2026 02:39:26 +0100 Subject: [PATCH 6/6] docs(azure): add kilo route-sync deployment manifest --- .../kilo-azure-route-sync-deployment.yaml | 129 ++++++++++++++++++ 1 file changed, 129 insertions(+) create mode 100644 manifests/kilo-azure-route-sync-deployment.yaml diff --git a/manifests/kilo-azure-route-sync-deployment.yaml b/manifests/kilo-azure-route-sync-deployment.yaml new file mode 100644 index 00000000..0d93f1dd --- /dev/null +++ b/manifests/kilo-azure-route-sync-deployment.yaml @@ -0,0 +1,129 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: kilo-azure-route-sync + namespace: cozy-cluster-autoscaler-azure +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: kilo-azure-route-sync +rules: +- apiGroups: [""] + resources: ["nodes"] + verbs: ["get", "list", "watch"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: kilo-azure-route-sync +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: kilo-azure-route-sync +subjects: +- kind: ServiceAccount + name: kilo-azure-route-sync + namespace: cozy-cluster-autoscaler-azure +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: kilo-azure-route-sync + namespace: cozy-cluster-autoscaler-azure +spec: + replicas: 1 + selector: + matchLabels: + app: kilo-azure-route-sync + template: + metadata: + labels: + app: kilo-azure-route-sync + spec: + serviceAccountName: kilo-azure-route-sync + containers: + - name: sync + image: mcr.microsoft.com/azure-cli:2.67.0 + imagePullPolicy: IfNotPresent + env: + - name: AZURE_CLIENT_ID + valueFrom: + secretKeyRef: + name: cluster-autoscaler-azure-azure-cluster-autoscaler + key: ClientID + - name: AZURE_CLIENT_SECRET + valueFrom: + secretKeyRef: + name: cluster-autoscaler-azure-azure-cluster-autoscaler + key: ClientSecret + - name: AZURE_TENANT_ID + valueFrom: + secretKeyRef: + name: cluster-autoscaler-azure-azure-cluster-autoscaler + key: TenantID + - name: AZURE_SUBSCRIPTION_ID + valueFrom: + secretKeyRef: + name: cluster-autoscaler-azure-azure-cluster-autoscaler + key: SubscriptionID + - name: AZURE_RESOURCE_GROUP + valueFrom: + secretKeyRef: + name: cluster-autoscaler-azure-azure-cluster-autoscaler + key: ResourceGroup + - name: AZURE_ROUTE_TABLE + value: kilo-routes-workers-serverscom + - name: AZURE_VNET_NAME + value: cozystack-vnet + - name: AZURE_SUBNET_NAME + value: workers-serverscom + - name: AZURE_ROUTES + value: to-serverscom=192.168.102.0/23 + command: ["/bin/sh","-ceu"] + args: + - | + az login --service-principal -u "$AZURE_CLIENT_ID" -p "$AZURE_CLIENT_SECRET" --tenant "$AZURE_TENANT_ID" >/dev/null + az account set --subscription "$AZURE_SUBSCRIPTION_ID" + + az aks install-cli --install-location /usr/local/bin/kubectl >/dev/null + + sync_route() { + route_name="$1" + route_prefix="$2" + leader_ip="$3" + az network route-table route create -g "$AZURE_RESOURCE_GROUP" --route-table-name "$AZURE_ROUTE_TABLE" \ + -n "$route_name" --address-prefix "$route_prefix" \ + --next-hop-type VirtualAppliance --next-hop-ip-address "$leader_ip" >/dev/null || true + az network route-table route update -g "$AZURE_RESOURCE_GROUP" --route-table-name "$AZURE_ROUTE_TABLE" \ + -n "$route_name" --address-prefix "$route_prefix" \ + --next-hop-type VirtualAppliance --next-hop-ip-address "$leader_ip" >/dev/null + } + + sync_all_routes() { + leader_ip="$1" + IFS=',' + for entry in $AZURE_ROUTES; do + route_name="${entry%%=*}" + route_prefix="${entry#*=}" + [ -n "$route_name" ] && [ -n "$route_prefix" ] || continue + sync_route "$route_name" "$route_prefix" "$leader_ip" + done + unset IFS + } + + kubectl get node -w -l topology.kubernetes.io/zone=azure --no-headers \ + -o 'custom-columns=NAME:.metadata.name,LEADER:.metadata.annotations.kilo\.squat\.ai/leader,IP:.status.addresses[?(@.type=="InternalIP")].address' \ + | while read -r n leader ip; do + echo "$(date -Iseconds) event node=${n} leader=${leader} ip=${ip}" + [ "$leader" = "true" ] || continue + az network vnet subnet update \ + -g "$AZURE_RESOURCE_GROUP" \ + --vnet-name "$AZURE_VNET_NAME" \ + -n "$AZURE_SUBNET_NAME" \ + --route-table "$AZURE_ROUTE_TABLE" >/dev/null + + sync_all_routes "$ip" + + echo "$(date -Iseconds) synced routes to leader ${n} (${ip})" + done