@@ -164,11 +164,12 @@ type ClusterPolicyController struct {
164164 openshift string
165165 ocpDriverToolkit OpenShiftDriverToolkit
166166
167- runtime gpuv1.Runtime
168- gpuNodeOSTag string
169- hasGPUNodes bool
170- hasNFDLabels bool
171- sandboxEnabled bool
167+ runtime gpuv1.Runtime
168+ gpuNodeOSTag string
169+ gpuNodeOSRelease string
170+ hasGPUNodes bool
171+ hasNFDLabels bool
172+ sandboxEnabled bool
172173}
173174
174175func addState (n * ClusterPolicyController , path string ) {
@@ -637,7 +638,7 @@ func getRuntimeString(node corev1.Node) (gpuv1.Runtime, error) {
637638 return runtime , nil
638639}
639640
640- func (n * ClusterPolicyController ) getGPUNodeOSTag () (string , error ) {
641+ func (n * ClusterPolicyController ) getGPUNodeOSInfo () (string , string , error ) {
641642 ctx := n .ctx
642643 opts := []client.ListOption {
643644 client .MatchingLabels (map [string ]string {commonGPULabelKey : commonGPULabelValue }),
@@ -646,34 +647,54 @@ func (n *ClusterPolicyController) getGPUNodeOSTag() (string, error) {
646647 nodeList := & corev1.NodeList {}
647648 err := n .client .List (ctx , nodeList , opts ... )
648649 if err != nil {
649- return "" , fmt .Errorf ("unable to list nodes with GPU present: %w" , err )
650+ return "" , "" , fmt .Errorf ("unable to list nodes with GPU present: %w" , err )
650651 }
651652 if len (nodeList .Items ) == 0 {
652- return "" , fmt .Errorf ("no nodes found with GPU present" )
653+ return "" , "" , fmt .Errorf ("no nodes found with GPU present" )
653654 }
654655
655656 labels := nodeList .Items [0 ].Labels
656657 osName , ok := labels [nfdOSReleaseIDLabelKey ]
657658 if ! ok {
658- return "" , fmt .Errorf ("unable to retrieve OS name from label %s" , nfdOSReleaseIDLabelKey )
659+ return "" , "" , fmt .Errorf ("unable to retrieve OS name from label %s" , nfdOSReleaseIDLabelKey )
659660 }
660661 osVersion , ok := labels [nfdOSVersionIDLabelKey ]
661662 if ! ok {
662- return "" , fmt .Errorf ("unable to retrieve OS version from label %s" , nfdOSVersionIDLabelKey )
663+ return "" , "" , fmt .Errorf ("unable to retrieve OS version from label %s" , nfdOSVersionIDLabelKey )
663664 }
664665 osMajorVersion := strings .Split (osVersion , "." )[0 ]
665- osMajorNumber , err := strconv .Atoi (osMajorVersion )
666- if err != nil {
667- return "" , fmt .Errorf ("error processing OS major version %s: %w" , osMajorVersion , err )
668- }
669666
670667 // If the OS is RockyLinux or RHEL 10 & above, we will omit the minor version when constructing the os image tag
671- if osName == "rocky" || (osName == "rhel" && osMajorNumber >= 10 ) {
668+ switch osName {
669+ case "rocky" :
672670 osVersion = osMajorVersion
671+ case "rhel" :
672+ osMajorNumber , err := parseOSMajorVersion (osVersion )
673+ if err != nil {
674+ n .logger .Info ("Unable to parse RHEL major version, using full OS version for GPU node OS tag" , "osVersion" , osVersion , "error" , err )
675+ } else if osMajorNumber >= 10 {
676+ osVersion = osMajorVersion
677+ }
673678 }
674679 osTag := fmt .Sprintf ("%s%s" , osName , osVersion )
675680
676- return osTag , nil
681+ return osName , osTag , nil
682+ }
683+
684+ func parseOSMajorVersion (osVersion string ) (int , error ) {
685+ osMajorVersion := strings .Split (osVersion , "." )[0 ]
686+ osMajorVersion = strings .TrimSpace (osMajorVersion )
687+ osMajorVersion = strings .TrimPrefix (strings .TrimPrefix (osMajorVersion , "v" ), "V" )
688+ if osMajorVersion == "" {
689+ return 0 , fmt .Errorf ("empty OS major version" )
690+ }
691+
692+ osMajorNumber , err := strconv .Atoi (osMajorVersion )
693+ if err != nil {
694+ return 0 , fmt .Errorf ("error processing OS major version %s: %w" , osMajorVersion , err )
695+ }
696+
697+ return osMajorNumber , nil
677698}
678699
679700func (n * ClusterPolicyController ) setPodSecurityLabelsForNamespace () error {
@@ -939,10 +960,11 @@ func (n *ClusterPolicyController) init(ctx context.Context, reconciler *ClusterP
939960 n .hasNFDLabels = hasNFDLabels
940961
941962 if n .hasGPUNodes {
942- gpuNodeOSTag , err := n .getGPUNodeOSTag ()
963+ gpuNodeOSRelease , gpuNodeOSTag , err := n .getGPUNodeOSInfo ()
943964 if err != nil {
944965 return fmt .Errorf ("failed to retrieve GPU node OS tag: %w" , err )
945966 }
967+ n .gpuNodeOSRelease = gpuNodeOSRelease
946968 n .gpuNodeOSTag = gpuNodeOSTag
947969 }
948970 // fetch all nodes and annotate gpu nodes
0 commit comments