diff --git a/api/nvidia/v1/clusterpolicy_types.go b/api/nvidia/v1/clusterpolicy_types.go index f79497e64..033fdd760 100644 --- a/api/nvidia/v1/clusterpolicy_types.go +++ b/api/nvidia/v1/clusterpolicy_types.go @@ -357,6 +357,13 @@ type ValidatorSpec struct { // +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors.displayName="Environment Variables" // +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors.x-descriptors="urn:alm:descriptor:com.tectonic.ui:advanced,urn:alm:descriptor:com.tectonic.ui:text" Env []EnvVar `json:"env,omitempty"` + + // HostNetwork indicates whether the Validator pod uses the host's network namespace. + // +kubebuilder:validation:Optional + // +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors=true + // +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors.displayName="Enable hostNetwork for NVIDIA Operator Validator" + // +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors.x-descriptors="urn:alm:descriptor:com.tectonic.ui:booleanSwitch" + HostNetwork *bool `json:"hostNetwork,omitempty"` } // PluginValidatorSpec defines validator spec for NVIDIA Device Plugin @@ -615,6 +622,13 @@ type DriverSpec struct { // +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors=true // +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors.displayName="Name of the Kubernetes Secret with secret environment variables for the NVIDIA Driver" SecretEnv string `json:"secretEnv,omitempty"` + + // HostNetwork indicates whether the Driver pod uses the host's network namespace. + // +kubebuilder:validation:Optional + // +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors=true + // +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors.displayName="Enable hostNetwork for NVIDIA Driver" + // +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors.x-descriptors="urn:alm:descriptor:com.tectonic.ui:booleanSwitch" + HostNetwork *bool `json:"hostNetwork,omitempty"` } // VGPUManagerSpec defines the properties for the NVIDIA vGPU Manager deployment @@ -676,6 +690,13 @@ type VGPUManagerSpec struct { // +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors=true // +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors.displayName="Kernel module configuration parameters for the vGPU manager" KernelModuleConfig *KernelModuleConfigSpec `json:"kernelModuleConfig,omitempty"` + + // HostNetwork indicates whether the vGPU Manager pod uses the host's network namespace. + // +kubebuilder:validation:Optional + // +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors=true + // +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors.displayName="Enable hostNetwork for NVIDIA vGPU Manager" + // +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors.x-descriptors="urn:alm:descriptor:com.tectonic.ui:booleanSwitch" + HostNetwork *bool `json:"hostNetwork,omitempty"` } // ToolkitSpec defines the properties for NVIDIA Container Toolkit deployment @@ -737,6 +758,13 @@ type ToolkitSpec struct { // +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors.displayName="Toolkit install directory on the host" // +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors.x-descriptors="urn:alm:descriptor:com.tectonic.ui:text" InstallDir string `json:"installDir,omitempty"` + + // HostNetwork indicates whether the Container Toolkit pod uses the host's network namespace. + // +kubebuilder:validation:Optional + // +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors=true + // +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors.displayName="Enable hostNetwork for NVIDIA Container Toolkit" + // +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors.x-descriptors="urn:alm:descriptor:com.tectonic.ui:booleanSwitch" + HostNetwork *bool `json:"hostNetwork,omitempty"` } // DevicePluginSpec defines the properties for NVIDIA Device Plugin deployment @@ -800,6 +828,13 @@ type DevicePluginSpec struct { // +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors=true // +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors.displayName="MPS related configuration for the NVIDIA Device Plugin" MPS *MPSConfig `json:"mps,omitempty"` + + // HostNetwork indicates whether the Device Plugin pod uses the host's network namespace. + // +kubebuilder:validation:Optional + // +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors=true + // +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors.displayName="Enable hostNetwork for NVIDIA Device Plugin" + // +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors.x-descriptors="urn:alm:descriptor:com.tectonic.ui:booleanSwitch" + HostNetwork *bool `json:"hostNetwork,omitempty"` } // DevicePluginConfig defines ConfigMap name for NVIDIA Device Plugin config @@ -880,6 +915,13 @@ type SandboxDevicePluginSpec struct { // +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors.displayName="Environment Variables" // +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors.x-descriptors="urn:alm:descriptor:com.tectonic.ui:advanced,urn:alm:descriptor:com.tectonic.ui:text" Env []EnvVar `json:"env,omitempty"` + + // HostNetwork indicates whether the Sandbox Device Plugin pod uses the host's network namespace. + // +kubebuilder:validation:Optional + // +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors=true + // +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors.displayName="Enable hostNetwork for NVIDIA Sandbox Device Plugin" + // +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors.x-descriptors="urn:alm:descriptor:com.tectonic.ui:booleanSwitch" + HostNetwork *bool `json:"hostNetwork,omitempty"` } // DCGMExporterSpec defines the properties for NVIDIA DCGM Exporter deployment @@ -1104,6 +1146,13 @@ type DCGMSpec struct { // +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors.displayName="Host port to bind for DCGM engine" // +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors.x-descriptors="urn:alm:descriptor:com.tectonic.ui:number" HostPort int32 `json:"hostPort,omitempty"` + + // HostNetwork indicates whether the DCGM pod uses the host's network namespace. + // +kubebuilder:validation:Optional + // +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors=true + // +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors.displayName="Enable hostNetwork for NVIDIA DCGM" + // +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors.x-descriptors="urn:alm:descriptor:com.tectonic.ui:booleanSwitch" + HostNetwork *bool `json:"hostNetwork,omitempty"` } // NodeStatusExporterSpec defines the properties for node-status-exporter state @@ -1157,6 +1206,13 @@ type NodeStatusExporterSpec struct { // +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors.displayName="Environment Variables" // +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors.x-descriptors="urn:alm:descriptor:com.tectonic.ui:advanced,urn:alm:descriptor:com.tectonic.ui:text" Env []EnvVar `json:"env,omitempty"` + + // HostNetwork indicates whether the Node Status Exporter pod uses the host's network namespace. + // +kubebuilder:validation:Optional + // +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors=true + // +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors.displayName="Enable hostNetwork for NVIDIA Node Status Exporter" + // +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors.x-descriptors="urn:alm:descriptor:com.tectonic.ui:booleanSwitch" + HostNetwork *bool `json:"hostNetwork,omitempty"` } // DriverRepoConfigSpec defines custom repo configuration for NVIDIA Driver container @@ -1279,6 +1335,13 @@ type GPUFeatureDiscoverySpec struct { // +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors.displayName="Environment Variables" // +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors.x-descriptors="urn:alm:descriptor:com.tectonic.ui:advanced,urn:alm:descriptor:com.tectonic.ui:text" Env []EnvVar `json:"env,omitempty"` + + // HostNetwork indicates whether the GPU Feature Discovery pod uses the host's network namespace. + // +kubebuilder:validation:Optional + // +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors=true + // +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors.displayName="Enable hostNetwork for GPU Feature Discovery" + // +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors.x-descriptors="urn:alm:descriptor:com.tectonic.ui:booleanSwitch" + HostNetwork *bool `json:"hostNetwork,omitempty"` } // MIGManagerSpec defines the properties for deploying NVIDIA MIG Manager @@ -1342,6 +1405,13 @@ type MIGManagerSpec struct { // +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors=true // +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors.displayName="Custom gpu-clients configuration for NVIDIA MIG Manager container" GPUClientsConfig *MIGGPUClientsConfigSpec `json:"gpuClientsConfig,omitempty"` + + // HostNetwork indicates whether the MIG Manager pod uses the host's network namespace. + // +kubebuilder:validation:Optional + // +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors=true + // +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors.displayName="Enable hostNetwork for NVIDIA MIG Manager" + // +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors.x-descriptors="urn:alm:descriptor:com.tectonic.ui:booleanSwitch" + HostNetwork *bool `json:"hostNetwork,omitempty"` } // GPUDirectRDMASpec defines the properties for nvidia-peermem deployment @@ -1541,6 +1611,13 @@ type ComponentCommonSpec struct { type KataDevicePluginSpec struct { ImageSpec `json:",inline"` ComponentCommonSpec `json:",inline"` + + // HostNetwork indicates whether the Kata Sandbox Device Plugin pod uses the host's network namespace. + // +kubebuilder:validation:Optional + // +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors=true + // +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors.displayName="Enable hostNetwork for NVIDIA Kata Sandbox Device Plugin" + // +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors.x-descriptors="urn:alm:descriptor:com.tectonic.ui:booleanSwitch" + HostNetwork *bool `json:"hostNetwork,omitempty"` } // KataManagerSpec defines the configuration for the kata-manager which prepares NVIDIA-specific kata runtimes @@ -1599,6 +1676,13 @@ type KataManagerSpec struct { // +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors.displayName="Environment Variables" // +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors.x-descriptors="urn:alm:descriptor:com.tectonic.ui:advanced,urn:alm:descriptor:com.tectonic.ui:text" Env []EnvVar `json:"env,omitempty"` + + // HostNetwork indicates whether the Kata Manager pod uses the host's network namespace. + // +kubebuilder:validation:Optional + // +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors=true + // +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors.displayName="Enable hostNetwork for NVIDIA Kata Manager" + // +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors.x-descriptors="urn:alm:descriptor:com.tectonic.ui:booleanSwitch" + HostNetwork *bool `json:"hostNetwork,omitempty"` } // CCManagerSpec defines the properties for deploying Confidential Containers (CC) manager @@ -1659,6 +1743,13 @@ type CCManagerSpec struct { // +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors.displayName="Environment Variables" // +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors.x-descriptors="urn:alm:descriptor:com.tectonic.ui:advanced,urn:alm:descriptor:com.tectonic.ui:text" Env []EnvVar `json:"env,omitempty"` + + // HostNetwork indicates whether the CC Manager pod uses the host's network namespace. + // +kubebuilder:validation:Optional + // +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors=true + // +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors.displayName="Enable hostNetwork for NVIDIA CC Manager" + // +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors.x-descriptors="urn:alm:descriptor:com.tectonic.ui:booleanSwitch" + HostNetwork *bool `json:"hostNetwork,omitempty"` } // VFIOManagerSpec defines the properties for deploying VFIO-PCI manager @@ -1715,6 +1806,13 @@ type VFIOManagerSpec struct { // DriverManager represents configuration for NVIDIA Driver Manager DriverManager DriverManagerSpec `json:"driverManager,omitempty"` + + // HostNetwork indicates whether the VFIO Manager pod uses the host's network namespace. + // +kubebuilder:validation:Optional + // +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors=true + // +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors.displayName="Enable hostNetwork for NVIDIA VFIO Manager" + // +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors.x-descriptors="urn:alm:descriptor:com.tectonic.ui:booleanSwitch" + HostNetwork *bool `json:"hostNetwork,omitempty"` } // VGPUDeviceManagerSpec defines the properties for deploying NVIDIA vGPU Device Manager @@ -1773,6 +1871,13 @@ type VGPUDeviceManagerSpec struct { // +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors=true // +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors.displayName="NVIDIA vGPU devices configuration for NVIDIA vGPU Device Manager container" Config *VGPUDevicesConfigSpec `json:"config,omitempty"` + + // HostNetwork indicates whether the vGPU Device Manager pod uses the host's network namespace. + // +kubebuilder:validation:Optional + // +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors=true + // +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors.displayName="Enable hostNetwork for NVIDIA vGPU Device Manager" + // +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors.x-descriptors="urn:alm:descriptor:com.tectonic.ui:booleanSwitch" + HostNetwork *bool `json:"hostNetwork,omitempty"` } // VGPUDevicesConfigSpec defines vGPU devices configuration for NVIDIA vGPU Device Manager container diff --git a/api/nvidia/v1/zz_generated.deepcopy.go b/api/nvidia/v1/zz_generated.deepcopy.go index f65e0648b..afd45f93e 100644 --- a/api/nvidia/v1/zz_generated.deepcopy.go +++ b/api/nvidia/v1/zz_generated.deepcopy.go @@ -57,6 +57,11 @@ func (in *CCManagerSpec) DeepCopyInto(out *CCManagerSpec) { *out = make([]EnvVar, len(*in)) copy(*out, *in) } + if in.HostNetwork != nil { + in, out := &in.HostNetwork, &out.HostNetwork + *out = new(bool) + **out = **in + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new CCManagerSpec. @@ -500,6 +505,11 @@ func (in *DCGMSpec) DeepCopyInto(out *DCGMSpec) { *out = make([]EnvVar, len(*in)) copy(*out, *in) } + if in.HostNetwork != nil { + in, out := &in.HostNetwork, &out.HostNetwork + *out = new(bool) + **out = **in + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DCGMSpec. @@ -611,6 +621,11 @@ func (in *DevicePluginSpec) DeepCopyInto(out *DevicePluginSpec) { *out = new(MPSConfig) **out = **in } + if in.HostNetwork != nil { + in, out := &in.HostNetwork, &out.HostNetwork + *out = new(bool) + **out = **in + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DevicePluginSpec. @@ -792,6 +807,11 @@ func (in *DriverSpec) DeepCopyInto(out *DriverSpec) { *out = new(KernelModuleConfigSpec) **out = **in } + if in.HostNetwork != nil { + in, out := &in.HostNetwork, &out.HostNetwork + *out = new(bool) + **out = **in + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DriverSpec. @@ -962,6 +982,11 @@ func (in *GPUFeatureDiscoverySpec) DeepCopyInto(out *GPUFeatureDiscoverySpec) { *out = make([]EnvVar, len(*in)) copy(*out, *in) } + if in.HostNetwork != nil { + in, out := &in.HostNetwork, &out.HostNetwork + *out = new(bool) + **out = **in + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new GPUFeatureDiscoverySpec. @@ -1029,6 +1054,11 @@ func (in *KataDevicePluginSpec) DeepCopyInto(out *KataDevicePluginSpec) { *out = *in out.ImageSpec = in.ImageSpec in.ComponentCommonSpec.DeepCopyInto(&out.ComponentCommonSpec) + if in.HostNetwork != nil { + in, out := &in.HostNetwork, &out.HostNetwork + *out = new(bool) + **out = **in + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new KataDevicePluginSpec. @@ -1074,6 +1104,11 @@ func (in *KataManagerSpec) DeepCopyInto(out *KataManagerSpec) { *out = make([]EnvVar, len(*in)) copy(*out, *in) } + if in.HostNetwork != nil { + in, out := &in.HostNetwork, &out.HostNetwork + *out = new(bool) + **out = **in + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new KataManagerSpec. @@ -1154,6 +1189,11 @@ func (in *MIGManagerSpec) DeepCopyInto(out *MIGManagerSpec) { *out = new(MIGGPUClientsConfigSpec) **out = **in } + if in.HostNetwork != nil { + in, out := &in.HostNetwork, &out.HostNetwork + *out = new(bool) + **out = **in + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MIGManagerSpec. @@ -1239,6 +1279,11 @@ func (in *NodeStatusExporterSpec) DeepCopyInto(out *NodeStatusExporterSpec) { *out = make([]EnvVar, len(*in)) copy(*out, *in) } + if in.HostNetwork != nil { + in, out := &in.HostNetwork, &out.HostNetwork + *out = new(bool) + **out = **in + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NodeStatusExporterSpec. @@ -1418,6 +1463,11 @@ func (in *SandboxDevicePluginSpec) DeepCopyInto(out *SandboxDevicePluginSpec) { *out = make([]EnvVar, len(*in)) copy(*out, *in) } + if in.HostNetwork != nil { + in, out := &in.HostNetwork, &out.HostNetwork + *out = new(bool) + **out = **in + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SandboxDevicePluginSpec. @@ -1478,6 +1528,11 @@ func (in *ToolkitSpec) DeepCopyInto(out *ToolkitSpec) { *out = make([]EnvVar, len(*in)) copy(*out, *in) } + if in.HostNetwork != nil { + in, out := &in.HostNetwork, &out.HostNetwork + *out = new(bool) + **out = **in + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ToolkitSpec. @@ -1539,6 +1594,11 @@ func (in *VFIOManagerSpec) DeepCopyInto(out *VFIOManagerSpec) { copy(*out, *in) } in.DriverManager.DeepCopyInto(&out.DriverManager) + if in.HostNetwork != nil { + in, out := &in.HostNetwork, &out.HostNetwork + *out = new(bool) + **out = **in + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new VFIOManagerSpec. @@ -1604,6 +1664,11 @@ func (in *VGPUDeviceManagerSpec) DeepCopyInto(out *VGPUDeviceManagerSpec) { *out = new(VGPUDevicesConfigSpec) **out = **in } + if in.HostNetwork != nil { + in, out := &in.HostNetwork, &out.HostNetwork + *out = new(bool) + **out = **in + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new VGPUDeviceManagerSpec. @@ -1685,6 +1750,11 @@ func (in *VGPUManagerSpec) DeepCopyInto(out *VGPUManagerSpec) { *out = new(KernelModuleConfigSpec) **out = **in } + if in.HostNetwork != nil { + in, out := &in.HostNetwork, &out.HostNetwork + *out = new(bool) + **out = **in + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new VGPUManagerSpec. @@ -1747,6 +1817,11 @@ func (in *ValidatorSpec) DeepCopyInto(out *ValidatorSpec) { *out = make([]EnvVar, len(*in)) copy(*out, *in) } + if in.HostNetwork != nil { + in, out := &in.HostNetwork, &out.HostNetwork + *out = new(bool) + **out = **in + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ValidatorSpec. diff --git a/bundle/manifests/nvidia.com_clusterpolicies.yaml b/bundle/manifests/nvidia.com_clusterpolicies.yaml index 030581fc1..297370279 100644 --- a/bundle/manifests/nvidia.com_clusterpolicies.yaml +++ b/bundle/manifests/nvidia.com_clusterpolicies.yaml @@ -82,6 +82,10 @@ spec: - name type: object type: array + hostNetwork: + description: HostNetwork indicates whether the CC Manager pod + uses the host's network namespace. + type: boolean image: description: CC Manager image name pattern: '[a-zA-Z0-9\-]+' @@ -489,6 +493,10 @@ spec: - name type: object type: array + hostNetwork: + description: HostNetwork indicates whether the DCGM pod uses the + host's network namespace. + type: boolean hostPort: description: 'Deprecated: HostPort represents host port that needs to be bound for DCGM engine (Default: 5555)' @@ -815,6 +823,10 @@ spec: - name type: object type: array + hostNetwork: + description: HostNetwork indicates whether the Device Plugin pod + uses the host's network namespace. + type: boolean image: description: NVIDIA Device Plugin image name pattern: '[a-zA-Z0-9\-]+' @@ -907,6 +919,10 @@ spec: - name type: object type: array + hostNetwork: + description: HostNetwork indicates whether the Driver pod uses + the host's network namespace. + type: boolean image: description: NVIDIA Driver image name pattern: '[a-zA-Z0-9\-]+' @@ -1419,6 +1435,10 @@ spec: - name type: object type: array + hostNetwork: + description: HostNetwork indicates whether the GPU Feature Discovery + pod uses the host's network namespace. + type: boolean image: description: GFD image name pattern: '[a-zA-Z0-9\-]+' @@ -1562,6 +1582,10 @@ spec: - name type: object type: array + hostNetwork: + description: HostNetwork indicates whether the Kata Manager pod + uses the host's network namespace. + type: boolean image: description: Kata Manager image name pattern: '[a-zA-Z0-9\-]+' @@ -1638,6 +1662,10 @@ spec: - name type: object type: array + hostNetwork: + description: HostNetwork indicates whether the Kata Sandbox Device + Plugin pod uses the host's network namespace. + type: boolean image: description: NVIDIA component image name pattern: '[a-zA-Z0-9\-]+' @@ -1751,6 +1779,10 @@ spec: description: ConfigMap name type: string type: object + hostNetwork: + description: HostNetwork indicates whether the MIG Manager pod + uses the host's network namespace. + type: boolean image: description: NVIDIA MIG Manager image name pattern: '[a-zA-Z0-9\-]+' @@ -1827,6 +1859,10 @@ spec: - name type: object type: array + hostNetwork: + description: HostNetwork indicates whether the Node Status Exporter + pod uses the host's network namespace. + type: boolean image: description: Node Status Exporter image name pattern: '[a-zA-Z0-9\-]+' @@ -1976,6 +2012,10 @@ spec: - name type: object type: array + hostNetwork: + description: HostNetwork indicates whether the Sandbox Device + Plugin pod uses the host's network namespace. + type: boolean image: description: NVIDIA Sandbox Device Plugin image name pattern: '[a-zA-Z0-9\-]+' @@ -2081,6 +2121,10 @@ spec: - name type: object type: array + hostNetwork: + description: HostNetwork indicates whether the Container Toolkit + pod uses the host's network namespace. + type: boolean image: description: NVIDIA Container Toolkit image name pattern: '[a-zA-Z0-9\-]+' @@ -2197,6 +2241,10 @@ spec: - name type: object type: array + hostNetwork: + description: HostNetwork indicates whether the Validator pod uses + the host's network namespace. + type: boolean image: description: Validator image name pattern: '[a-zA-Z0-9\-]+' @@ -2415,6 +2463,10 @@ spec: - name type: object type: array + hostNetwork: + description: HostNetwork indicates whether the VFIO Manager pod + uses the host's network namespace. + type: boolean image: description: VFIO Manager image name pattern: '[a-zA-Z0-9\-]+' @@ -2503,6 +2555,10 @@ spec: - name type: object type: array + hostNetwork: + description: HostNetwork indicates whether the vGPU Device Manager + pod uses the host's network namespace. + type: boolean image: description: NVIDIA vGPU Device Manager image name pattern: '[a-zA-Z0-9\-]+' @@ -2621,6 +2677,10 @@ spec: - name type: object type: array + hostNetwork: + description: HostNetwork indicates whether the vGPU Manager pod + uses the host's network namespace. + type: boolean image: description: NVIDIA vGPU Manager image name pattern: '[a-zA-Z0-9\-]+' diff --git a/config/crd/bases/nvidia.com_clusterpolicies.yaml b/config/crd/bases/nvidia.com_clusterpolicies.yaml index 030581fc1..297370279 100644 --- a/config/crd/bases/nvidia.com_clusterpolicies.yaml +++ b/config/crd/bases/nvidia.com_clusterpolicies.yaml @@ -82,6 +82,10 @@ spec: - name type: object type: array + hostNetwork: + description: HostNetwork indicates whether the CC Manager pod + uses the host's network namespace. + type: boolean image: description: CC Manager image name pattern: '[a-zA-Z0-9\-]+' @@ -489,6 +493,10 @@ spec: - name type: object type: array + hostNetwork: + description: HostNetwork indicates whether the DCGM pod uses the + host's network namespace. + type: boolean hostPort: description: 'Deprecated: HostPort represents host port that needs to be bound for DCGM engine (Default: 5555)' @@ -815,6 +823,10 @@ spec: - name type: object type: array + hostNetwork: + description: HostNetwork indicates whether the Device Plugin pod + uses the host's network namespace. + type: boolean image: description: NVIDIA Device Plugin image name pattern: '[a-zA-Z0-9\-]+' @@ -907,6 +919,10 @@ spec: - name type: object type: array + hostNetwork: + description: HostNetwork indicates whether the Driver pod uses + the host's network namespace. + type: boolean image: description: NVIDIA Driver image name pattern: '[a-zA-Z0-9\-]+' @@ -1419,6 +1435,10 @@ spec: - name type: object type: array + hostNetwork: + description: HostNetwork indicates whether the GPU Feature Discovery + pod uses the host's network namespace. + type: boolean image: description: GFD image name pattern: '[a-zA-Z0-9\-]+' @@ -1562,6 +1582,10 @@ spec: - name type: object type: array + hostNetwork: + description: HostNetwork indicates whether the Kata Manager pod + uses the host's network namespace. + type: boolean image: description: Kata Manager image name pattern: '[a-zA-Z0-9\-]+' @@ -1638,6 +1662,10 @@ spec: - name type: object type: array + hostNetwork: + description: HostNetwork indicates whether the Kata Sandbox Device + Plugin pod uses the host's network namespace. + type: boolean image: description: NVIDIA component image name pattern: '[a-zA-Z0-9\-]+' @@ -1751,6 +1779,10 @@ spec: description: ConfigMap name type: string type: object + hostNetwork: + description: HostNetwork indicates whether the MIG Manager pod + uses the host's network namespace. + type: boolean image: description: NVIDIA MIG Manager image name pattern: '[a-zA-Z0-9\-]+' @@ -1827,6 +1859,10 @@ spec: - name type: object type: array + hostNetwork: + description: HostNetwork indicates whether the Node Status Exporter + pod uses the host's network namespace. + type: boolean image: description: Node Status Exporter image name pattern: '[a-zA-Z0-9\-]+' @@ -1976,6 +2012,10 @@ spec: - name type: object type: array + hostNetwork: + description: HostNetwork indicates whether the Sandbox Device + Plugin pod uses the host's network namespace. + type: boolean image: description: NVIDIA Sandbox Device Plugin image name pattern: '[a-zA-Z0-9\-]+' @@ -2081,6 +2121,10 @@ spec: - name type: object type: array + hostNetwork: + description: HostNetwork indicates whether the Container Toolkit + pod uses the host's network namespace. + type: boolean image: description: NVIDIA Container Toolkit image name pattern: '[a-zA-Z0-9\-]+' @@ -2197,6 +2241,10 @@ spec: - name type: object type: array + hostNetwork: + description: HostNetwork indicates whether the Validator pod uses + the host's network namespace. + type: boolean image: description: Validator image name pattern: '[a-zA-Z0-9\-]+' @@ -2415,6 +2463,10 @@ spec: - name type: object type: array + hostNetwork: + description: HostNetwork indicates whether the VFIO Manager pod + uses the host's network namespace. + type: boolean image: description: VFIO Manager image name pattern: '[a-zA-Z0-9\-]+' @@ -2503,6 +2555,10 @@ spec: - name type: object type: array + hostNetwork: + description: HostNetwork indicates whether the vGPU Device Manager + pod uses the host's network namespace. + type: boolean image: description: NVIDIA vGPU Device Manager image name pattern: '[a-zA-Z0-9\-]+' @@ -2621,6 +2677,10 @@ spec: - name type: object type: array + hostNetwork: + description: HostNetwork indicates whether the vGPU Manager pod + uses the host's network namespace. + type: boolean image: description: NVIDIA vGPU Manager image name pattern: '[a-zA-Z0-9\-]+' diff --git a/controllers/object_controls.go b/controllers/object_controls.go index b436bcab1..788880064 100644 --- a/controllers/object_controls.go +++ b/controllers/object_controls.go @@ -53,6 +53,15 @@ import ( "github.com/NVIDIA/gpu-operator/internal/utils" ) +// applyHostNetworkConfig sets hostNetwork and the corresponding DNSPolicy on a pod spec +// if the provided hostNetwork bool pointer is non-nil and true. +func applyHostNetworkConfig(podSpec *corev1.PodSpec, hostNetwork *bool) { + if hostNetwork != nil && *hostNetwork { + podSpec.HostNetwork = true + podSpec.DNSPolicy = corev1.DNSClusterFirstWithHostNet + } +} + const ( // DefaultContainerdConfigFile indicates default config file path for containerd DefaultContainerdConfigFile = "/etc/containerd/config.toml" @@ -927,6 +936,9 @@ func TransformGPUDiscoveryPlugin(obj *appsv1.DaemonSet, config *gpuv1.ClusterPol // update env required for MIG support applyMIGConfiguration(&(obj.Spec.Template.Spec.Containers[0]), config.MIG.Strategy) + // set hostNetwork for gpu-feature-discovery if specified + applyHostNetworkConfig(&obj.Spec.Template.Spec, config.GPUFeatureDiscovery.HostNetwork) + return nil } @@ -1033,6 +1045,9 @@ func TransformDriver(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec, n C setContainerEnv(driverToolkitContainer, "DRIVER_CONFIG_DIGEST", configDigest) } + // set hostNetwork for driver if specified + applyHostNetworkConfig(&obj.Spec.Template.Spec, config.Driver.HostNetwork) + return nil } @@ -1056,6 +1071,9 @@ func TransformVGPUManager(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec return fmt.Errorf("failed to transform the Driver Toolkit container: %s", err) } + // set hostNetwork for vgpu-manager if specified + applyHostNetworkConfig(&obj.Spec.Template.Spec, config.VGPUManager.HostNetwork) + return nil } @@ -1318,6 +1336,9 @@ func TransformToolkit(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec, n return fmt.Errorf("error transforming toolkit daemonset : %w", err) } + // set hostNetwork for toolkit if specified + applyHostNetworkConfig(&obj.Spec.Template.Spec, config.Toolkit.HostNetwork) + return nil } @@ -1552,6 +1573,9 @@ func TransformDevicePlugin(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpe } } + // set hostNetwork for device-plugin if specified + applyHostNetworkConfig(&obj.Spec.Template.Spec, config.DevicePlugin.HostNetwork) + return nil } @@ -1621,6 +1645,9 @@ func TransformMPSControlDaemon(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolic } } + // set hostNetwork for mps-control-daemon if specified + applyHostNetworkConfig(&obj.Spec.Template.Spec, config.DevicePlugin.HostNetwork) + return nil } @@ -1662,6 +1689,10 @@ func TransformSandboxDevicePlugin(obj *appsv1.DaemonSet, config *gpuv1.ClusterPo setContainerEnv(&(obj.Spec.Template.Spec.Containers[0]), env.Name, env.Value) } } + + // set hostNetwork for sandbox-device-plugin if specified + applyHostNetworkConfig(&obj.Spec.Template.Spec, config.SandboxDevicePlugin.HostNetwork) + return nil } @@ -1696,6 +1727,10 @@ func TransformKataDevicePlugin(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolic setContainerEnv(&(obj.Spec.Template.Spec.Containers[0]), env.Name, env.Value) } } + + // set hostNetwork for kata-device-plugin if specified + applyHostNetworkConfig(&obj.Spec.Template.Spec, config.KataSandboxDevicePlugin.HostNetwork) + return nil } @@ -1857,6 +1892,9 @@ func TransformDCGM(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec, n Clu setNRIPluginAnnotation(&obj.Spec.Template.ObjectMeta, &config.CDI, obj.Spec.Template.Spec.Containers[0].Name) setRuntimeClassName(&obj.Spec.Template.Spec, config, n.runtime) + // set hostNetwork for dcgm if specified + applyHostNetworkConfig(&obj.Spec.Template.Spec, config.DCGM.HostNetwork) + return nil } @@ -1950,6 +1988,9 @@ func TransformMIGManager(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec, } } + // set hostNetwork for mig-manager if specified + applyHostNetworkConfig(&obj.Spec.Template.Spec, config.MIGManager.HostNetwork) + return nil } @@ -2024,6 +2065,9 @@ func TransformKataManager(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec return fmt.Errorf("error transforming kata-manager daemonset : %w", err) } + // set hostNetwork for kata-manager if specified + applyHostNetworkConfig(&obj.Spec.Template.Spec, config.KataManager.HostNetwork) + return nil } @@ -2071,6 +2115,9 @@ func TransformVFIOManager(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec } } + // set hostNetwork for vfio-manager if specified + applyHostNetworkConfig(&obj.Spec.Template.Spec, config.VFIOManager.HostNetwork) + return nil } @@ -2117,6 +2164,9 @@ func TransformCCManager(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec, } } + // set hostNetwork for cc-manager if specified + applyHostNetworkConfig(&obj.Spec.Template.Spec, config.CCManager.HostNetwork) + return nil } @@ -2183,6 +2233,9 @@ func TransformVGPUDeviceManager(obj *appsv1.DaemonSet, config *gpuv1.ClusterPoli } setContainerEnv(&(obj.Spec.Template.Spec.Containers[0]), "DEFAULT_VGPU_CONFIG", defaultConfig) + // set hostNetwork for vgpu-device-manager if specified + applyHostNetworkConfig(&obj.Spec.Template.Spec, config.VGPUDeviceManager.HostNetwork) + return nil } @@ -2234,6 +2287,9 @@ func TransformValidator(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec, n.logger.Info("WARN: errors transforming the validator containers: %v", validatorErr) } + // set hostNetwork for validator if specified + applyHostNetworkConfig(&obj.Spec.Template.Spec, config.Validator.HostNetwork) + return nil } @@ -2263,6 +2319,9 @@ func TransformSandboxValidator(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolic n.logger.Info("WARN: errors transforming the validator containers: %v", validatorErr) } + // set hostNetwork for sandbox-validator if specified + applyHostNetworkConfig(&obj.Spec.Template.Spec, config.Validator.HostNetwork) + return nil } @@ -2460,6 +2519,9 @@ func TransformNodeStatusExporter(obj *appsv1.DaemonSet, config *gpuv1.ClusterPol // update the security context for the node status exporter container. transformValidatorSecurityContext(&obj.Spec.Template.Spec.Containers[0]) + // set hostNetwork for node-status-exporter if specified + applyHostNetworkConfig(&obj.Spec.Template.Spec, config.NodeStatusExporter.HostNetwork) + return nil } diff --git a/controllers/transforms_test.go b/controllers/transforms_test.go index 86ade02a2..9d6bbf99f 100644 --- a/controllers/transforms_test.go +++ b/controllers/transforms_test.go @@ -754,6 +754,43 @@ func TestApplyCommonDaemonSetConfig(t *testing.T) { } } +func TestApplyHostNetworkConfig(t *testing.T) { + tests := []struct { + name string + hostNetwork *bool + expectEnabled bool + expectDNSPolicy corev1.DNSPolicy + }{ + { + name: "hostNetwork nil, should not set hostNetwork", + hostNetwork: nil, + expectEnabled: false, + expectDNSPolicy: "", + }, + { + name: "hostNetwork true, should set hostNetwork and DNSPolicy", + hostNetwork: ptr.To(true), + expectEnabled: true, + expectDNSPolicy: corev1.DNSClusterFirstWithHostNet, + }, + { + name: "hostNetwork false, should not set hostNetwork", + hostNetwork: ptr.To(false), + expectEnabled: false, + expectDNSPolicy: "", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + podSpec := &corev1.PodSpec{} + applyHostNetworkConfig(podSpec, tt.hostNetwork) + require.Equal(t, tt.expectEnabled, podSpec.HostNetwork) + require.Equal(t, tt.expectDNSPolicy, podSpec.DNSPolicy) + }) + } +} + func TestApplyCommonDaemonsetMetadata(t *testing.T) { testCases := []struct { description string diff --git a/deployments/gpu-operator/crds/nvidia.com_clusterpolicies.yaml b/deployments/gpu-operator/crds/nvidia.com_clusterpolicies.yaml index 030581fc1..297370279 100644 --- a/deployments/gpu-operator/crds/nvidia.com_clusterpolicies.yaml +++ b/deployments/gpu-operator/crds/nvidia.com_clusterpolicies.yaml @@ -82,6 +82,10 @@ spec: - name type: object type: array + hostNetwork: + description: HostNetwork indicates whether the CC Manager pod + uses the host's network namespace. + type: boolean image: description: CC Manager image name pattern: '[a-zA-Z0-9\-]+' @@ -489,6 +493,10 @@ spec: - name type: object type: array + hostNetwork: + description: HostNetwork indicates whether the DCGM pod uses the + host's network namespace. + type: boolean hostPort: description: 'Deprecated: HostPort represents host port that needs to be bound for DCGM engine (Default: 5555)' @@ -815,6 +823,10 @@ spec: - name type: object type: array + hostNetwork: + description: HostNetwork indicates whether the Device Plugin pod + uses the host's network namespace. + type: boolean image: description: NVIDIA Device Plugin image name pattern: '[a-zA-Z0-9\-]+' @@ -907,6 +919,10 @@ spec: - name type: object type: array + hostNetwork: + description: HostNetwork indicates whether the Driver pod uses + the host's network namespace. + type: boolean image: description: NVIDIA Driver image name pattern: '[a-zA-Z0-9\-]+' @@ -1419,6 +1435,10 @@ spec: - name type: object type: array + hostNetwork: + description: HostNetwork indicates whether the GPU Feature Discovery + pod uses the host's network namespace. + type: boolean image: description: GFD image name pattern: '[a-zA-Z0-9\-]+' @@ -1562,6 +1582,10 @@ spec: - name type: object type: array + hostNetwork: + description: HostNetwork indicates whether the Kata Manager pod + uses the host's network namespace. + type: boolean image: description: Kata Manager image name pattern: '[a-zA-Z0-9\-]+' @@ -1638,6 +1662,10 @@ spec: - name type: object type: array + hostNetwork: + description: HostNetwork indicates whether the Kata Sandbox Device + Plugin pod uses the host's network namespace. + type: boolean image: description: NVIDIA component image name pattern: '[a-zA-Z0-9\-]+' @@ -1751,6 +1779,10 @@ spec: description: ConfigMap name type: string type: object + hostNetwork: + description: HostNetwork indicates whether the MIG Manager pod + uses the host's network namespace. + type: boolean image: description: NVIDIA MIG Manager image name pattern: '[a-zA-Z0-9\-]+' @@ -1827,6 +1859,10 @@ spec: - name type: object type: array + hostNetwork: + description: HostNetwork indicates whether the Node Status Exporter + pod uses the host's network namespace. + type: boolean image: description: Node Status Exporter image name pattern: '[a-zA-Z0-9\-]+' @@ -1976,6 +2012,10 @@ spec: - name type: object type: array + hostNetwork: + description: HostNetwork indicates whether the Sandbox Device + Plugin pod uses the host's network namespace. + type: boolean image: description: NVIDIA Sandbox Device Plugin image name pattern: '[a-zA-Z0-9\-]+' @@ -2081,6 +2121,10 @@ spec: - name type: object type: array + hostNetwork: + description: HostNetwork indicates whether the Container Toolkit + pod uses the host's network namespace. + type: boolean image: description: NVIDIA Container Toolkit image name pattern: '[a-zA-Z0-9\-]+' @@ -2197,6 +2241,10 @@ spec: - name type: object type: array + hostNetwork: + description: HostNetwork indicates whether the Validator pod uses + the host's network namespace. + type: boolean image: description: Validator image name pattern: '[a-zA-Z0-9\-]+' @@ -2415,6 +2463,10 @@ spec: - name type: object type: array + hostNetwork: + description: HostNetwork indicates whether the VFIO Manager pod + uses the host's network namespace. + type: boolean image: description: VFIO Manager image name pattern: '[a-zA-Z0-9\-]+' @@ -2503,6 +2555,10 @@ spec: - name type: object type: array + hostNetwork: + description: HostNetwork indicates whether the vGPU Device Manager + pod uses the host's network namespace. + type: boolean image: description: NVIDIA vGPU Device Manager image name pattern: '[a-zA-Z0-9\-]+' @@ -2621,6 +2677,10 @@ spec: - name type: object type: array + hostNetwork: + description: HostNetwork indicates whether the vGPU Manager pod + uses the host's network namespace. + type: boolean image: description: NVIDIA vGPU Manager image name pattern: '[a-zA-Z0-9\-]+' diff --git a/deployments/gpu-operator/templates/clusterpolicy.yaml b/deployments/gpu-operator/templates/clusterpolicy.yaml index 978b5dc86..6e5c6a9c6 100644 --- a/deployments/gpu-operator/templates/clusterpolicy.yaml +++ b/deployments/gpu-operator/templates/clusterpolicy.yaml @@ -85,6 +85,9 @@ spec: {{- if .Values.validator.args }} args: {{ toYaml .Values.validator.args | nindent 6 }} {{- end }} + {{- if .Values.validator.hostNetwork }} + hostNetwork: {{ .Values.validator.hostNetwork }} + {{- end }} {{- if .Values.validator.plugin }} plugin: {{- if .Values.validator.plugin.env }} @@ -253,6 +256,9 @@ spec: timeoutSeconds: {{ .Values.driver.upgradePolicy.drain.timeoutSeconds }} deleteEmptyDir: {{ .Values.driver.upgradePolicy.drain.deleteEmptyDir | default false}} {{- end }} + {{- if .Values.driver.hostNetwork }} + hostNetwork: {{ .Values.driver.hostNetwork }} + {{- end }} vgpuManager: enabled: {{ .Values.vgpuManager.enabled }} {{- if .Values.vgpuManager.repository }} @@ -282,6 +288,9 @@ spec: {{- if .Values.vgpuManager.kernelModuleConfig }} kernelModuleConfig: {{ toYaml .Values.vgpuManager.kernelModuleConfig | nindent 6 }} {{- end }} + {{- if .Values.vgpuManager.hostNetwork }} + hostNetwork: {{ .Values.vgpuManager.hostNetwork }} + {{- end }} driverManager: {{- if .Values.vgpuManager.driverManager.repository }} repository: {{ .Values.vgpuManager.driverManager.repository }} @@ -327,6 +336,9 @@ spec: {{- if .Values.kataManager.args }} args: {{ toYaml .Values.kataManager.args | nindent 6 }} {{- end }} + {{- if .Values.kataManager.hostNetwork }} + hostNetwork: {{ .Values.kataManager.hostNetwork }} + {{- end }} vfioManager: enabled: {{ .Values.vfioManager.enabled }} {{- if .Values.vfioManager.repository }} @@ -369,6 +381,9 @@ spec: {{- if .Values.vfioManager.driverManager.env }} env: {{ toYaml .Values.vfioManager.driverManager.env | nindent 8 }} {{- end }} + {{- if .Values.vfioManager.hostNetwork }} + hostNetwork: {{ .Values.vfioManager.hostNetwork }} + {{- end }} vgpuDeviceManager: enabled: {{ .Values.vgpuDeviceManager.enabled }} {{- if .Values.vgpuDeviceManager.repository }} @@ -398,6 +413,9 @@ spec: {{- if .Values.vgpuDeviceManager.config }} config: {{ toYaml .Values.vgpuDeviceManager.config | nindent 6 }} {{- end }} + {{- if .Values.vgpuDeviceManager.hostNetwork }} + hostNetwork: {{ .Values.vgpuDeviceManager.hostNetwork }} + {{- end }} ccManager: enabled: {{ .Values.ccManager.enabled }} defaultMode: {{ .Values.ccManager.defaultMode | quote }} @@ -425,6 +443,9 @@ spec: {{- if .Values.ccManager.args }} args: {{ toYaml .Values.ccManager.args | nindent 6 }} {{- end }} + {{- if .Values.ccManager.hostNetwork }} + hostNetwork: {{ .Values.ccManager.hostNetwork }} + {{- end }} toolkit: enabled: {{ .Values.toolkit.enabled }} {{- if .Values.toolkit.repository }} @@ -451,6 +472,9 @@ spec: {{- if .Values.toolkit.installDir }} installDir: {{ .Values.toolkit.installDir }} {{- end }} + {{- if .Values.toolkit.hostNetwork }} + hostNetwork: {{ .Values.toolkit.hostNetwork }} + {{- end }} devicePlugin: enabled: {{ .Values.devicePlugin.enabled }} {{- if .Values.devicePlugin.repository }} @@ -482,6 +506,9 @@ spec: name: {{ .Values.devicePlugin.config.name | quote }} default: {{ .Values.devicePlugin.config.default | quote }} {{- end }} + {{- if .Values.devicePlugin.hostNetwork }} + hostNetwork: {{ .Values.devicePlugin.hostNetwork }} + {{- end }} dcgm: enabled: {{ .Values.dcgm.enabled }} {{- if .Values.dcgm.repository }} @@ -508,6 +535,9 @@ spec: {{- if .Values.dcgm.args }} args: {{ toYaml .Values.dcgm.args | nindent 6 }} {{- end }} + {{- if .Values.dcgm.hostNetwork }} + hostNetwork: {{ .Values.dcgm.hostNetwork }} + {{- end }} dcgmExporter: enabled: {{ .Values.dcgmExporter.enabled }} {{- if .Values.dcgmExporter.repository }} @@ -579,6 +609,9 @@ spec: {{- if .Values.gfd.args }} args: {{ toYaml .Values.gfd.args | nindent 6 }} {{- end }} + {{- if .Values.gfd.hostNetwork }} + hostNetwork: {{ .Values.gfd.hostNetwork }} + {{- end }} migManager: enabled: {{ .Values.migManager.enabled }} {{- if .Values.migManager.repository }} @@ -615,6 +648,9 @@ spec: {{- if .Values.migManager.gpuClientsConfig }} gpuClientsConfig: {{ toYaml .Values.migManager.gpuClientsConfig | nindent 6 }} {{- end }} + {{- if .Values.migManager.hostNetwork }} + hostNetwork: {{ .Values.migManager.hostNetwork }} + {{- end }} nodeStatusExporter: enabled: {{ .Values.nodeStatusExporter.enabled }} {{- if .Values.nodeStatusExporter.repository }} @@ -639,6 +675,9 @@ spec: {{- if .Values.nodeStatusExporter.args }} args: {{ toYaml .Values.nodeStatusExporter.args | nindent 6 }} {{- end }} + {{- if .Values.nodeStatusExporter.hostNetwork }} + hostNetwork: {{ .Values.nodeStatusExporter.hostNetwork }} + {{- end }} {{- if .Values.gds }} gds: enabled: {{ .Values.gds.enabled }} @@ -721,6 +760,9 @@ spec: {{- if .Values.sandboxDevicePlugin.args }} args: {{ toYaml .Values.sandboxDevicePlugin.args | nindent 6 }} {{- end }} + {{- if .Values.sandboxDevicePlugin.hostNetwork }} + hostNetwork: {{ .Values.sandboxDevicePlugin.hostNetwork }} + {{- end }} kataSandboxDevicePlugin: {{- if ne .Values.kataSandboxDevicePlugin.enabled nil }} enabled: {{ .Values.kataSandboxDevicePlugin.enabled }} @@ -749,3 +791,6 @@ spec: {{- if .Values.kataSandboxDevicePlugin.args }} args: {{ toYaml .Values.kataSandboxDevicePlugin.args | nindent 6 }} {{- end }} + {{- if .Values.kataSandboxDevicePlugin.hostNetwork }} + hostNetwork: {{ .Values.kataSandboxDevicePlugin.hostNetwork }} + {{- end }} diff --git a/deployments/gpu-operator/values.yaml b/deployments/gpu-operator/values.yaml index 222b0e389..256da30e8 100644 --- a/deployments/gpu-operator/values.yaml +++ b/deployments/gpu-operator/values.yaml @@ -61,6 +61,7 @@ validator: env: [] args: [] resources: {} + hostNetwork: false plugin: env: [] @@ -204,6 +205,7 @@ driver: name: "" # Name of Kubernetes Secret which contains secrets to be passed in as environment variables secretEnv: "" + hostNetwork: false toolkit: enabled: true @@ -215,6 +217,7 @@ toolkit: env: [] resources: {} installDir: "/usr/local/nvidia" + hostNetwork: false devicePlugin: enabled: true @@ -259,6 +262,7 @@ devicePlugin: mps: # MPS root path on the host root: "/run/nvidia/mps" + hostNetwork: false # standalone dcgm hostengine dcgm: @@ -271,6 +275,7 @@ dcgm: args: [] env: [] resources: {} + hostNetwork: false dcgmExporter: enabled: true @@ -330,6 +335,7 @@ gfd: imagePullSecrets: [] env: [] resources: {} + hostNetwork: false migManager: enabled: true @@ -392,6 +398,7 @@ migManager: data: {} gpuClientsConfig: name: "" + hostNetwork: false nodeStatusExporter: enabled: false @@ -402,6 +409,7 @@ nodeStatusExporter: imagePullPolicy: IfNotPresent imagePullSecrets: [] resources: {} + hostNetwork: false gds: enabled: false @@ -443,6 +451,7 @@ vgpuManager: # kernel module configuration for vGPU manager kernelModuleConfig: name: "" + hostNetwork: false vgpuDeviceManager: enabled: true @@ -455,6 +464,7 @@ vgpuDeviceManager: config: name: "" default: "default" + hostNetwork: false vfioManager: enabled: true @@ -473,6 +483,7 @@ vfioManager: version: v0.10.0 imagePullPolicy: IfNotPresent env: [] + hostNetwork: false kataManager: enabled: false @@ -481,6 +492,7 @@ kataManager: imagePullSecrets: [] env: [] resources: {} + hostNetwork: false sandboxDevicePlugin: enabled: true @@ -492,6 +504,7 @@ sandboxDevicePlugin: args: [] env: [] resources: {} + hostNetwork: false # Kata sandbox device plugin (used when sandboxWorkloads.mode is "kata"). kataSandboxDevicePlugin: @@ -504,6 +517,7 @@ kataSandboxDevicePlugin: args: [] env: [] resources: {} + hostNetwork: false ccManager: enabled: true @@ -517,6 +531,7 @@ ccManager: - name: CC_CAPABLE_DEVICE_IDS value: "0x2339,0x2331,0x2330,0x2324,0x2322,0x233d" resources: {} + hostNetwork: false # Array of extra K8s manifests to deploy # Supports use of custom Helm templates