diff --git a/images/virtualization-artifact/cmd/virtualization-controller/main.go b/images/virtualization-artifact/cmd/virtualization-controller/main.go index 45bbc5efcc..a0c10616af 100644 --- a/images/virtualization-artifact/cmd/virtualization-controller/main.go +++ b/images/virtualization-artifact/cmd/virtualization-controller/main.go @@ -46,6 +46,7 @@ import ( "github.com/deckhouse/virtualization-controller/pkg/controller/evacuation" "github.com/deckhouse/virtualization-controller/pkg/controller/indexer" "github.com/deckhouse/virtualization-controller/pkg/controller/livemigration" + "github.com/deckhouse/virtualization-controller/pkg/controller/migrationiface" mc "github.com/deckhouse/virtualization-controller/pkg/controller/moduleconfig" mcapi "github.com/deckhouse/virtualization-controller/pkg/controller/moduleconfig/api" "github.com/deckhouse/virtualization-controller/pkg/controller/nodeusbdevice" @@ -96,6 +97,8 @@ const ( SdnEnabledEnv = "SDN_ENABLED" clusterUUIDEnv = "CLUSTER_UUID" + + migrationSystemNetworkNameEnv = "MIGRATION_SYSTEM_NETWORK_NAME" ) func main() { @@ -390,6 +393,12 @@ func main() { os.Exit(1) } + migrationIfaceLogger := logger.NewControllerLogger(migrationiface.ControllerName, logLevel, logOutput, logDebugVerbosity, logDebugControllerList) + if _, err = migrationiface.NewController(ctx, mgr, migrationIfaceLogger, os.Getenv(migrationSystemNetworkNameEnv)); err != nil { + log.Error(err.Error()) + os.Exit(1) + } + resourceSliceLogger := logger.NewControllerLogger(resourceslice.ControllerName, logLevel, logOutput, logDebugVerbosity, logDebugControllerList) if _, err = resourceslice.NewController(ctx, mgr, resourceSliceLogger); err != nil { log.Error(err.Error()) diff --git a/images/virtualization-artifact/pkg/common/annotations/annotations.go b/images/virtualization-artifact/pkg/common/annotations/annotations.go index c70c6542cb..d3161e88b7 100644 --- a/images/virtualization-artifact/pkg/common/annotations/annotations.go +++ b/images/virtualization-artifact/pkg/common/annotations/annotations.go @@ -193,6 +193,12 @@ const ( // AnnNetworksStatus is the annotation for view current network configuration into Pod. AnnNetworksStatus = "network.deckhouse.io/networks-status" + // AnnMigrationIface names the kernel interface that virt-handler binds + // live-migration traffic to. Written on Nodes by the migrationiface + // controller from a SystemNetwork CR (sdn module); read by virt-handler + // on startup. + AnnMigrationIface = AnnAPIGroupV + "/migration-iface" + // AnnVirtualDiskOriginalAnnotations is the annotation for storing original VirtualDisk annotations. AnnVirtualDiskOriginalAnnotations = AnnAPIGroupV + "/vd-original-annotations" // AnnVirtualDiskOriginalLabels is the annotation for storing original VirtualDisk labels. diff --git a/images/virtualization-artifact/pkg/controller/indexer/indexer.go b/images/virtualization-artifact/pkg/controller/indexer/indexer.go index fe9abac103..6a4003a460 100644 --- a/images/virtualization-artifact/pkg/controller/indexer/indexer.go +++ b/images/virtualization-artifact/pkg/controller/indexer/indexer.go @@ -76,6 +76,9 @@ const ( IndexFieldResourceSliceByPoolName = "spec.pool.name" IndexFieldResourceSliceByDriver = "spec.driver" + + IndexFieldSNNNIAByNodeName = "snnnia.status.nodeName" + IndexFieldSNNNIABySystemNetworkName = "snnnia.spec.systemNetworkName" ) var IndexGetters = []IndexGetter{ @@ -114,6 +117,11 @@ var IndexGettersUSB = []IndexGetter{ IndexResourceSliceByDriver, } +var IndexGettersSDN = []IndexGetter{ + IndexSNNNIAByNodeName, + IndexSNNNIABySystemNetworkName, +} + type IndexGetter func() (obj client.Object, field string, extractValue client.IndexerFunc) func IndexALL(ctx context.Context, mgr manager.Manager) error { @@ -126,6 +134,15 @@ func IndexALL(ctx context.Context, mgr manager.Manager) error { } } + if featuregates.Default().Enabled(featuregates.SDN) { + for _, fn := range IndexGettersSDN { + obj, field, indexFunc := fn() + if err := mgr.GetFieldIndexer().IndexField(ctx, obj, field, indexFunc); err != nil { + return err + } + } + } + for _, fn := range IndexGetters { obj, field, indexFunc := fn() if err := mgr.GetFieldIndexer().IndexField(ctx, obj, field, indexFunc); err != nil { diff --git a/images/virtualization-artifact/pkg/controller/indexer/snnnia_indexer.go b/images/virtualization-artifact/pkg/controller/indexer/snnnia_indexer.go new file mode 100644 index 0000000000..f38a9e3dc5 --- /dev/null +++ b/images/virtualization-artifact/pkg/controller/indexer/snnnia_indexer.go @@ -0,0 +1,57 @@ +/* +Copyright 2026 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package indexer + +import ( + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/apimachinery/pkg/runtime/schema" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +var snnniaGVK = schema.GroupVersionKind{ + Group: "network.deckhouse.io", + Version: "v1alpha1", + Kind: "SystemNetworkNodeNetworkInterfaceAttachment", +} + +func snnniaSeed() client.Object { + u := &unstructured.Unstructured{} + u.SetGroupVersionKind(snnniaGVK) + return u +} + +func snnniaIndexer(path ...string) client.IndexerFunc { + return func(o client.Object) []string { + u, ok := o.(*unstructured.Unstructured) + if !ok || u == nil { + return nil + } + v, _, _ := unstructured.NestedString(u.Object, path...) + if v == "" { + return nil + } + return []string{v} + } +} + +func IndexSNNNIAByNodeName() (client.Object, string, client.IndexerFunc) { + return snnniaSeed(), IndexFieldSNNNIAByNodeName, snnniaIndexer("status", "nodeName") +} + +func IndexSNNNIABySystemNetworkName() (client.Object, string, client.IndexerFunc) { + return snnniaSeed(), IndexFieldSNNNIABySystemNetworkName, snnniaIndexer("spec", "systemNetworkName") +} diff --git a/images/virtualization-artifact/pkg/controller/migrationiface/migrationiface_controller.go b/images/virtualization-artifact/pkg/controller/migrationiface/migrationiface_controller.go new file mode 100644 index 0000000000..6f216131f2 --- /dev/null +++ b/images/virtualization-artifact/pkg/controller/migrationiface/migrationiface_controller.go @@ -0,0 +1,72 @@ +/* +Copyright 2026 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Package migrationiface annotates each Node with the kernel interface name +// of a dedicated live-migration network, resolved from sdn's +// SystemNetworkNodeNetworkInterfaceAttachment + NodeNetworkInterface. +// virt-handler reads the annotation (see pkg/common/annotations.AnnMigrationIface) +// at startup to bind migration traffic to that interface. +package migrationiface + +import ( + "context" + "time" + + "k8s.io/utils/ptr" + "sigs.k8s.io/controller-runtime/pkg/controller" + "sigs.k8s.io/controller-runtime/pkg/manager" + + "github.com/deckhouse/deckhouse/pkg/log" + "github.com/deckhouse/virtualization-controller/pkg/featuregates" + "github.com/deckhouse/virtualization-controller/pkg/logger" +) + +const ControllerName = "migrationiface-controller" + +func NewController( + ctx context.Context, + mgr manager.Manager, + log *log.Logger, + systemNetworkName string, +) (controller.Controller, error) { + if !featuregates.Default().Enabled(featuregates.SDN) { + log.Info("SDN feature gate is disabled, migrationiface controller is disabled") + return nil, nil + } + if systemNetworkName == "" { + log.Info("MIGRATION_SYSTEM_NETWORK_NAME is empty, migrationiface controller is disabled") + return nil, nil + } + + r := NewReconciler(mgr.GetClient(), systemNetworkName, log) + + c, err := controller.New(ControllerName, mgr, controller.Options{ + Reconciler: r, + RecoverPanic: ptr.To(true), + LogConstructor: logger.NewConstructor(log), + CacheSyncTimeout: 10 * time.Minute, + }) + if err != nil { + return nil, err + } + + if err = r.SetupController(ctx, mgr, c); err != nil { + return nil, err + } + + log.Info("Initialized migrationiface controller", "systemNetwork", systemNetworkName) + return c, nil +} diff --git a/images/virtualization-artifact/pkg/controller/migrationiface/migrationiface_reconciler.go b/images/virtualization-artifact/pkg/controller/migrationiface/migrationiface_reconciler.go new file mode 100644 index 0000000000..c0d1886a02 --- /dev/null +++ b/images/virtualization-artifact/pkg/controller/migrationiface/migrationiface_reconciler.go @@ -0,0 +1,207 @@ +/* +Copyright 2026 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package migrationiface + +import ( + "context" + "encoding/json" + "fmt" + + corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/api/meta" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/apimachinery/pkg/runtime/schema" + "k8s.io/apimachinery/pkg/types" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/controller" + "sigs.k8s.io/controller-runtime/pkg/event" + "sigs.k8s.io/controller-runtime/pkg/handler" + "sigs.k8s.io/controller-runtime/pkg/manager" + "sigs.k8s.io/controller-runtime/pkg/predicate" + "sigs.k8s.io/controller-runtime/pkg/reconcile" + "sigs.k8s.io/controller-runtime/pkg/source" + + "github.com/deckhouse/deckhouse/pkg/log" + "github.com/deckhouse/virtualization-controller/pkg/common/annotations" + "github.com/deckhouse/virtualization-controller/pkg/controller/indexer" +) + +const ( + sdnGroup = "network.deckhouse.io" + sdnVersion = "v1alpha1" + sdnNodeNameLabel = sdnGroup + "/node-name" + sdnInterfaceType = sdnGroup + "/interface-type" + sdnInterfaceVLAN = "VLAN" +) + +var ( + snnniaGVK = schema.GroupVersionKind{Group: sdnGroup, Version: sdnVersion, Kind: "SystemNetworkNodeNetworkInterfaceAttachment"} + nniGVK = schema.GroupVersionKind{Group: sdnGroup, Version: sdnVersion, Kind: "NodeNetworkInterface"} +) + +func NewReconciler(c client.Client, systemNetworkName string, log *log.Logger) *Reconciler { + return &Reconciler{ + client: c, + systemNetworkName: systemNetworkName, + log: log, + } +} + +type Reconciler struct { + client client.Client + systemNetworkName string + log *log.Logger +} + +func (r *Reconciler) SetupController(_ context.Context, mgr manager.Manager, ctr controller.Controller) error { + nodePredicate := predicate.TypedFuncs[*corev1.Node]{ + CreateFunc: func(event.TypedCreateEvent[*corev1.Node]) bool { return true }, + UpdateFunc: func(e event.TypedUpdateEvent[*corev1.Node]) bool { + return e.ObjectOld.Annotations[annotations.AnnMigrationIface] != + e.ObjectNew.Annotations[annotations.AnnMigrationIface] + }, + DeleteFunc: func(event.TypedDeleteEvent[*corev1.Node]) bool { return false }, + GenericFunc: func(event.TypedGenericEvent[*corev1.Node]) bool { return false }, + } + if err := ctr.Watch(source.Kind(mgr.GetCache(), + &corev1.Node{}, + &handler.TypedEnqueueRequestForObject[*corev1.Node]{}, + nodePredicate, + )); err != nil { + return fmt.Errorf("watch Node: %w", err) + } + + r.watchSdnKind(mgr, ctr, snnniaGVK, func(obj *unstructured.Unstructured) string { + n, _, _ := unstructured.NestedString(obj.Object, "status", "nodeName") + return n + }) + + r.watchSdnKind(mgr, ctr, nniGVK, func(obj *unstructured.Unstructured) string { + if obj.GetLabels()[sdnInterfaceType] != sdnInterfaceVLAN { + return "" + } + if n := obj.GetLabels()[sdnNodeNameLabel]; n != "" { + return n + } + n, _, _ := unstructured.NestedString(obj.Object, "spec", "nodeName") + return n + }) + + return nil +} + +func (r *Reconciler) watchSdnKind( + mgr manager.Manager, + ctr controller.Controller, + gvk schema.GroupVersionKind, + toNodeName func(*unstructured.Unstructured) string, +) { + obj := &unstructured.Unstructured{} + obj.SetGroupVersionKind(gvk) + err := ctr.Watch(source.Kind(mgr.GetCache(), obj, + handler.TypedEnqueueRequestsFromMapFunc(func(_ context.Context, o *unstructured.Unstructured) []reconcile.Request { + if n := toNodeName(o); n != "" { + return []reconcile.Request{{NamespacedName: types.NamespacedName{Name: n}}} + } + return nil + }), + )) + if err != nil { + r.log.Warn("sdn watch failed; migration interface annotation will not track sdn changes", + "kind", gvk.Kind, "err", err.Error()) + } +} + +func (r *Reconciler) Reconcile(ctx context.Context, req reconcile.Request) (reconcile.Result, error) { + var node corev1.Node + if err := r.client.Get(ctx, req.NamespacedName, &node); err != nil { + if apierrors.IsNotFound(err) { + return reconcile.Result{}, nil + } + return reconcile.Result{}, err + } + + desired, err := r.resolveInterfaceForNode(ctx, node.Name) + if err != nil { + return reconcile.Result{}, err + } + + if node.Annotations[annotations.AnnMigrationIface] == desired { + return reconcile.Result{}, nil + } + + var value any // nil → annotation removed + if desired != "" { + value = desired + } + patch, err := json.Marshal(map[string]any{ + "metadata": map[string]any{ + "annotations": map[string]any{annotations.AnnMigrationIface: value}, + }, + }) + if err != nil { + return reconcile.Result{}, err + } + if err := r.client.Patch(ctx, &node, client.RawPatch(types.StrategicMergePatchType, patch)); err != nil { + return reconcile.Result{}, fmt.Errorf("patch node %q annotation: %w", node.Name, err) + } + + r.log.Info("updated migration interface annotation", + "node", node.Name, + "systemNetwork", r.systemNetworkName, + "interface", desired, + ) + return reconcile.Result{}, nil +} + +func (r *Reconciler) resolveInterfaceForNode(ctx context.Context, nodeName string) (string, error) { + list := &unstructured.UnstructuredList{} + list.SetGroupVersionKind(schema.GroupVersionKind{Group: sdnGroup, Version: sdnVersion, Kind: snnniaGVK.Kind + "List"}) + err := r.client.List(ctx, list, client.MatchingFields{ + indexer.IndexFieldSNNNIAByNodeName: nodeName, + indexer.IndexFieldSNNNIABySystemNetworkName: r.systemNetworkName, + }) + if err != nil { + if meta.IsNoMatchError(err) { + return "", nil + } + return "", fmt.Errorf("list %s: %w", snnniaGVK.Kind, err) + } + + for i := range list.Items { + nniName, _, _ := unstructured.NestedString(list.Items[i].Object, "status", "nodeNetworkInterfaceName") + if nniName == "" { + continue + } + return r.ifNameFromNNI(ctx, nniName) + } + return "", nil +} + +func (r *Reconciler) ifNameFromNNI(ctx context.Context, nniName string) (string, error) { + nni := &unstructured.Unstructured{} + nni.SetGroupVersionKind(nniGVK) + if err := r.client.Get(ctx, client.ObjectKey{Name: nniName}, nni); err != nil { + if apierrors.IsNotFound(err) || meta.IsNoMatchError(err) { + return "", nil + } + return "", fmt.Errorf("get %s %q: %w", nniGVK.Kind, nniName, err) + } + ifName, _, _ := unstructured.NestedString(nni.Object, "status", "ifName") + return ifName, nil +} diff --git a/openapi/config-values.yaml b/openapi/config-values.yaml index b34e7ec2c2..2f5e2575ac 100644 --- a/openapi/config-values.yaml +++ b/openapi/config-values.yaml @@ -252,6 +252,22 @@ properties: type: string minLength: 1 x-examples: ["sc-1", "sc-2"] + liveMigration: + type: object + description: | + Live migration network configuration. + properties: + systemNetworkName: + type: string + minLength: 1 + description: | + Name of a `SystemNetwork` (sdn module) whose per-host IP addresses + are used for VM live migration traffic between `virt-handler` pods. + + Requires the `sdn` module. The named `SystemNetwork` must be `Ready` + and provision an interface on every node that runs VMs. When unset, + live migration traffic flows over the default node network. + x-examples: ["migration"] logLevel: type: string description: | diff --git a/templates/virtualization-controller/_helpers.tpl b/templates/virtualization-controller/_helpers.tpl index 8db602dd1e..3a8a7dc613 100644 --- a/templates/virtualization-controller/_helpers.tpl +++ b/templates/virtualization-controller/_helpers.tpl @@ -88,16 +88,10 @@ true value: "24h" - name: GC_VM_POD_SCHEDULE value: "0 0 * * *" -{{- if (hasKey .Values.virtualization.internal.moduleConfig "liveMigration") }} -- name: LIVE_MIGRATION_BANDWIDTH_PER_NODE - value: {{ .Values.virtualization.internal.moduleConfig.liveMigration.bandwidthPerNode | quote }} -- name: LIVE_MIGRATION_MAX_MIGRATIONS_PER_NODE - value: {{ .Values.virtualization.internal.moduleConfig.liveMigration.maxMigrationsPerNode | quote }} -- name: LIVE_MIGRATION_NETWORK - value: {{ .Values.virtualization.internal.moduleConfig.liveMigration.network | quote }} -{{- if (hasKey .Values.virtualization.internal.moduleConfig.liveMigration "dedicated") }} -- name: LIVE_MIGRATION_DEDICATED_INTERFACE_NAME - value: {{ .Values.virtualization.internal.moduleConfig.liveMigration.dedicated.interfaceName | quote }} +{{- if (hasKey (.Values.virtualization | default dict) "liveMigration") }} +{{- if .Values.virtualization.liveMigration.systemNetworkName }} +- name: MIGRATION_SYSTEM_NETWORK_NAME + value: {{ .Values.virtualization.liveMigration.systemNetworkName | quote }} {{- end }} {{- end }} - name: METRICS_BIND_ADDRESS diff --git a/templates/virtualization-controller/rbac-for-us.yaml b/templates/virtualization-controller/rbac-for-us.yaml index 60e27392df..b3d5c2c580 100644 --- a/templates/virtualization-controller/rbac-for-us.yaml +++ b/templates/virtualization-controller/rbac-for-us.yaml @@ -100,6 +100,16 @@ rules: resources: - nodes verbs: + - get + - list + - watch + - patch +- apiGroups: + - network.deckhouse.io + resources: + - systemnetworknodenetworkinterfaceattachments + verbs: + - get - list - watch - apiGroups: