|
| 1 | +//go:build windows |
| 2 | + |
| 3 | +package network |
| 4 | + |
| 5 | +import ( |
| 6 | + "context" |
| 7 | + "errors" |
| 8 | + "fmt" |
| 9 | + "slices" |
| 10 | + "strings" |
| 11 | + "sync" |
| 12 | + |
| 13 | + "github.com/Microsoft/hcsshim/hcn" |
| 14 | + "github.com/Microsoft/hcsshim/internal/log" |
| 15 | + "github.com/Microsoft/hcsshim/internal/logfields" |
| 16 | + "github.com/Microsoft/hcsshim/internal/vm/guestmanager" |
| 17 | + "github.com/Microsoft/hcsshim/internal/vm/vmmanager" |
| 18 | + |
| 19 | + "github.com/Microsoft/go-winio/pkg/guid" |
| 20 | + "github.com/sirupsen/logrus" |
| 21 | +) |
| 22 | + |
| 23 | +// Manager is the concrete implementation of [Controller]. |
| 24 | +type Manager struct { |
| 25 | + mu sync.Mutex |
| 26 | + |
| 27 | + // podID is the identifier of the pod whose network this Controller manages. |
| 28 | + podID string |
| 29 | + |
| 30 | + // namespaceID is the HCN namespace ID in use after a successful Setup. |
| 31 | + namespaceID string |
| 32 | + |
| 33 | + // vmEndpoints maps nicID (ID within UVM) -> HCN endpoint. |
| 34 | + vmEndpoints map[string]*hcn.HostComputeEndpoint |
| 35 | + |
| 36 | + // netState is the current lifecycle state of the network. |
| 37 | + netState State |
| 38 | + |
| 39 | + // isNamespaceSupportedByGuest determines if network namespace is supported inside the guest |
| 40 | + isNamespaceSupportedByGuest bool |
| 41 | + |
| 42 | + // vmNetManager performs host-side NIC hot-add/remove on the UVM. |
| 43 | + vmNetManager vmmanager.NetworkManager |
| 44 | + |
| 45 | + // linuxGuestMgr performs guest-side NIC inject/remove for LCOW. |
| 46 | + linuxGuestMgr guestmanager.LCOWNetworkManager |
| 47 | + |
| 48 | + // winGuestMgr performs guest-side NIC/namespace operations for WCOW. |
| 49 | + winGuestMgr guestmanager.WCOWNetworkManager |
| 50 | + |
| 51 | + // capsProvider exposes the guest's declared capabilities. |
| 52 | + // Used to check IsNamespaceAddRequestSupported. |
| 53 | + capsProvider capabilitiesProvider |
| 54 | +} |
| 55 | + |
| 56 | +// Assert that Manager implements Controller. |
| 57 | +var _ Controller = (*Manager)(nil) |
| 58 | + |
| 59 | +// New creates a ready-to-use Manager in [StateNotConfigured]. |
| 60 | +// |
| 61 | +// This method is called from [VMController.CreateNetworkController()] |
| 62 | +// which injects the necessary dependencies. |
| 63 | +func New( |
| 64 | + vmNetManager vmmanager.NetworkManager, |
| 65 | + linuxGuestMgr guestmanager.LCOWNetworkManager, |
| 66 | + windowsGuestMgr guestmanager.WCOWNetworkManager, |
| 67 | + capsProvider capabilitiesProvider, |
| 68 | +) *Manager { |
| 69 | + m := &Manager{ |
| 70 | + vmNetManager: vmNetManager, |
| 71 | + linuxGuestMgr: linuxGuestMgr, |
| 72 | + winGuestMgr: windowsGuestMgr, |
| 73 | + capsProvider: capsProvider, |
| 74 | + netState: StateNotConfigured, |
| 75 | + vmEndpoints: make(map[string]*hcn.HostComputeEndpoint), |
| 76 | + } |
| 77 | + |
| 78 | + // Cache once at construction so hot-add paths can branch without re-querying. |
| 79 | + if caps := capsProvider.Capabilities(); caps != nil { |
| 80 | + m.isNamespaceSupportedByGuest = caps.IsNamespaceAddRequestSupported() |
| 81 | + } |
| 82 | + |
| 83 | + return m |
| 84 | +} |
| 85 | + |
| 86 | +// Setup attaches the requested HCN namespace to the guest VM |
| 87 | +// and hot-adds all endpoints found in that namespace. |
| 88 | +// It must be called only once; subsequent calls return an error. |
| 89 | +func (m *Manager) Setup(ctx context.Context, opts *SetupOptions) (err error) { |
| 90 | + ctx, _ = log.WithContext(ctx, logrus.WithField(logfields.Operation, "Network Setup")) |
| 91 | + |
| 92 | + m.mu.Lock() |
| 93 | + defer m.mu.Unlock() |
| 94 | + |
| 95 | + log.G(ctx).WithFields(logrus.Fields{ |
| 96 | + logfields.PodID: opts.PodID, |
| 97 | + logfields.Namespace: opts.NetworkNamespace, |
| 98 | + }).Debug("starting network setup") |
| 99 | + |
| 100 | + // If Setup has already been called, then error out. |
| 101 | + if m.netState != StateNotConfigured { |
| 102 | + return fmt.Errorf("cannot set up network in state %s", m.netState) |
| 103 | + } |
| 104 | + |
| 105 | + defer func() { |
| 106 | + if err != nil { |
| 107 | + // If setup fails for any reason, move to invalid so no further |
| 108 | + // Setup calls are accepted. |
| 109 | + m.netState = StateInvalid |
| 110 | + log.G(ctx).WithError(err).Error("network setup failed, moving to invalid state") |
| 111 | + } |
| 112 | + }() |
| 113 | + |
| 114 | + if opts.NetworkNamespace == "" { |
| 115 | + return fmt.Errorf("network namespace must not be empty") |
| 116 | + } |
| 117 | + |
| 118 | + // Validate that the provided namespace exists. |
| 119 | + hcnNamespace, err := hcn.GetNamespaceByID(opts.NetworkNamespace) |
| 120 | + if err != nil { |
| 121 | + return fmt.Errorf("get network namespace %s: %w", opts.NetworkNamespace, err) |
| 122 | + } |
| 123 | + |
| 124 | + // Fetch all endpoints in the namespace. |
| 125 | + endpoints, err := m.fetchEndpointsInNamespace(ctx, hcnNamespace) |
| 126 | + if err != nil { |
| 127 | + return fmt.Errorf("fetch endpoints in namespace %s: %w", hcnNamespace.Id, err) |
| 128 | + } |
| 129 | + |
| 130 | + // Add the namespace to the guest. |
| 131 | + if err = m.addNetNSInsideGuest(ctx, hcnNamespace); err != nil { |
| 132 | + return fmt.Errorf("add network namespace to guest: %w", err) |
| 133 | + } |
| 134 | + |
| 135 | + // Hot-add all endpoints in the namespace to the guest. |
| 136 | + for _, endpoint := range endpoints { |
| 137 | + nicGUID, err := guid.NewV4() |
| 138 | + if err != nil { |
| 139 | + return fmt.Errorf("generate NIC GUID: %w", err) |
| 140 | + } |
| 141 | + if err = m.addEndpointToGuestNamespace(ctx, nicGUID.String(), endpoint, opts.PolicyBasedRouting); err != nil { |
| 142 | + return fmt.Errorf("add endpoint %s to guest: %w", endpoint.Name, err) |
| 143 | + } |
| 144 | + } |
| 145 | + |
| 146 | + m.podID = opts.PodID |
| 147 | + m.namespaceID = hcnNamespace.Id |
| 148 | + m.netState = StateConfigured |
| 149 | + |
| 150 | + log.G(ctx).WithFields(logrus.Fields{ |
| 151 | + logfields.PodID: opts.PodID, |
| 152 | + logfields.Namespace: hcnNamespace.Id, |
| 153 | + }).Info("network setup completed successfully") |
| 154 | + |
| 155 | + return nil |
| 156 | +} |
| 157 | + |
| 158 | +// Teardown removes all guest-side NICs and the HCN namespace from the UVM. |
| 159 | +// |
| 160 | +// It is idempotent: calling it when the network is already torn down or not yet |
| 161 | +// configured is a no-op. |
| 162 | +func (m *Manager) Teardown(ctx context.Context) error { |
| 163 | + ctx, _ = log.WithContext(ctx, logrus.WithField(logfields.Operation, "Network Teardown")) |
| 164 | + |
| 165 | + m.mu.Lock() |
| 166 | + defer m.mu.Unlock() |
| 167 | + |
| 168 | + log.G(ctx).WithFields(logrus.Fields{ |
| 169 | + logfields.PodID: m.podID, |
| 170 | + logfields.Namespace: m.namespaceID, |
| 171 | + "State": m.netState, |
| 172 | + }).Debug("starting network teardown") |
| 173 | + |
| 174 | + if m.netState == StateTornDown { |
| 175 | + // Teardown is idempotent, so return nil if already torn down. |
| 176 | + log.G(ctx).Info("network already torn down, skipping") |
| 177 | + return nil |
| 178 | + } |
| 179 | + |
| 180 | + if m.netState == StateNotConfigured { |
| 181 | + // Nothing was configured; nothing to clean up. |
| 182 | + log.G(ctx).Info("network not configured, skipping") |
| 183 | + return nil |
| 184 | + } |
| 185 | + |
| 186 | + // Remove all endpoints from the guest. |
| 187 | + // Use a continue-on-error strategy: attempt every NIC regardless of individual |
| 188 | + // failures, then collect all errors. |
| 189 | + var teardownErrs []error |
| 190 | + for nicID, endpoint := range m.vmEndpoints { |
| 191 | + if err := m.removeEndpointFromGuestNamespace(ctx, nicID, endpoint); err != nil { |
| 192 | + teardownErrs = append(teardownErrs, fmt.Errorf("remove endpoint %s from guest: %w", endpoint.Name, err)) |
| 193 | + continue // continue attempting to remove other endpoints |
| 194 | + } |
| 195 | + |
| 196 | + delete(m.vmEndpoints, nicID) |
| 197 | + } |
| 198 | + |
| 199 | + if err := m.removeNetNSInsideGuest(ctx, m.namespaceID); err != nil { |
| 200 | + teardownErrs = append(teardownErrs, fmt.Errorf("remove network namespace from guest: %w", err)) |
| 201 | + } |
| 202 | + |
| 203 | + if len(teardownErrs) > 0 { |
| 204 | + // If any errors were encountered during teardown, mark the state as invalid. |
| 205 | + m.netState = StateInvalid |
| 206 | + return errors.Join(teardownErrs...) |
| 207 | + } |
| 208 | + |
| 209 | + // Mark as torn down if we do not encounter any errors. |
| 210 | + // No further Setup or Teardown calls are allowed. |
| 211 | + m.netState = StateTornDown |
| 212 | + |
| 213 | + log.G(ctx).WithFields(logrus.Fields{ |
| 214 | + logfields.PodID: m.podID, |
| 215 | + "networkNamespace": m.namespaceID, |
| 216 | + }).Info("network teardown completed successfully") |
| 217 | + |
| 218 | + return nil |
| 219 | +} |
| 220 | + |
| 221 | +// fetchEndpointsInNamespace retrieves all HCN endpoints present in |
| 222 | +// the given namespace. |
| 223 | +// Endpoints are sorted so that those with names ending in "eth0" appear first. |
| 224 | +func (m *Manager) fetchEndpointsInNamespace(ctx context.Context, ns *hcn.HostComputeNamespace) ([]*hcn.HostComputeEndpoint, error) { |
| 225 | + ctx, _ = log.WithContext(ctx, logrus.WithField(logfields.Namespace, ns.Id)) |
| 226 | + log.G(ctx).Info("fetching endpoints from the network namespace") |
| 227 | + |
| 228 | + ids, err := hcn.GetNamespaceEndpointIds(ns.Id) |
| 229 | + if err != nil { |
| 230 | + return nil, fmt.Errorf("get endpoint IDs for namespace %s: %w", ns.Id, err) |
| 231 | + } |
| 232 | + endpoints := make([]*hcn.HostComputeEndpoint, 0, len(ids)) |
| 233 | + for _, id := range ids { |
| 234 | + ep, err := hcn.GetEndpointByID(id) |
| 235 | + if err != nil { |
| 236 | + return nil, fmt.Errorf("get endpoint %s: %w", id, err) |
| 237 | + } |
| 238 | + endpoints = append(endpoints, ep) |
| 239 | + } |
| 240 | + |
| 241 | + // Ensure the endpoint named "eth0" is added first when multiple endpoints are present, |
| 242 | + // so it maps to eth0 inside the guest. CNI results aren't available here, so we rely |
| 243 | + // on the endpoint name suffix as a heuristic. |
| 244 | + cmp := func(a, b *hcn.HostComputeEndpoint) int { |
| 245 | + if strings.HasSuffix(a.Name, "eth0") { |
| 246 | + return -1 |
| 247 | + } |
| 248 | + if strings.HasSuffix(b.Name, "eth0") { |
| 249 | + return 1 |
| 250 | + } |
| 251 | + return 0 |
| 252 | + } |
| 253 | + |
| 254 | + slices.SortStableFunc(endpoints, cmp) |
| 255 | + |
| 256 | + log.G(ctx).Tracef("fetched endpoints from the network namespace %+v", endpoints) |
| 257 | + |
| 258 | + return endpoints, nil |
| 259 | +} |
0 commit comments