diff --git a/modules/enableit/profile/manifests/computing/slurm.pp b/modules/enableit/profile/manifests/computing/slurm.pp index f2610b98..5af20318 100644 --- a/modules/enableit/profile/manifests/computing/slurm.pp +++ b/modules/enableit/profile/manifests/computing/slurm.pp @@ -36,6 +36,8 @@ Boolean $hwloc_enabled = $::role::computing::slurm::hwloc_enabled, String $db_buffer_pool_size = $::role::computing::slurm::db_buffer_pool_size, String $db_log_file_size = $::role::computing::slurm::db_log_file_size, + Integer[1] $all_users_limit_nofile = $::role::computing::slurm::all_users_limit_nofile, + Integer[1] $slurmd_limit_nofile = $::role::computing::slurm::slurmd_limit_nofile, ) inherits ::profile::computing { # We manually install SLURM and munge packages because we're using packages @@ -156,11 +158,21 @@ }) class { '::profile::computing::slurm::slurmd': - interface => $interface, - node_cidrs => $node_cidrs, + interface => $interface, + node_cidrs => $node_cidrs, + slurmd_limit_nofile => $slurmd_limit_nofile, } } + # Configure PAM file descriptor limits for all users on SLURM nodes + pam::limits::fragment { 'slurm_nofile': + ensure => 'present', + list => [ + "* soft nofile ${all_users_limit_nofile}", + "* hard nofile ${all_users_limit_nofile}", + ], + } + if !$enable { package::remove( ['slurm', 'slurm-slurmdbd', 'slurm-slurmctld', 'slurm-slurmd', 'munge']) } diff --git a/modules/enableit/profile/manifests/computing/slurm/slurmd.pp b/modules/enableit/profile/manifests/computing/slurm/slurmd.pp index 64ba1caf..5c4e740c 100644 --- a/modules/enableit/profile/manifests/computing/slurm/slurmd.pp +++ b/modules/enableit/profile/manifests/computing/slurm/slurmd.pp @@ -2,6 +2,7 @@ class profile::computing::slurm::slurmd ( Eit_types::SimpleString $interface, Array[Eit_types::IPCIDR] $node_cidrs, + Integer[1] $slurmd_limit_nofile, ) { firewall_multi { @@ -21,5 +22,13 @@ ; } + # Configure systemd drop-in file to set file descriptor limits for slurmd service + systemd::dropin_file { 'slurmd-90-nofile.conf': + unit => 'slurmd.service', + filename => '90-nofile.conf', + ensure => 'present', + content => "[Service]\nLimitNOFILE=${slurmd_limit_nofile}\n", + } + include ::slurm::slurmd } diff --git a/modules/enableit/role/manifests/computing/slurm.pp b/modules/enableit/role/manifests/computing/slurm.pp index 8d92c90b..47324d0f 100644 --- a/modules/enableit/role/manifests/computing/slurm.pp +++ b/modules/enableit/role/manifests/computing/slurm.pp @@ -61,6 +61,10 @@ # # @param db_log_file_size Maximum size allocated for database log files. # +# @param all_users_limit_nofile PAM `nofile` value for all users. +# +# @param slurmd_limit_nofile Systemd `LimitNOFILE` value for slurmd service. +# # @groups authentication munge_key, jwt_key, encrypt_params # # @groups daemon_control slurmctld, slurmdbd, slurmd @@ -77,6 +81,8 @@ # # @groups db db_buffer_pool_size, db_log_file_size # +# @groups limits all_users_limit_nofile, slurmd_limit_nofile +# # @encrypt_params munge_key, jwt_key, slurm_gateway.*.bind_password # class role::computing::slurm ( @@ -112,6 +118,8 @@ Optional[Eit_types::Slurm::Policy] $slurm_policy = undef, String $db_buffer_pool_size = '256M', String $db_log_file_size = '24M', + Integer[1] $all_users_limit_nofile = 10000, + Integer[1] $slurmd_limit_nofile = 10000, Eit_types::Encrypt::Params $encrypt_params = ['munge_key', 'jwt_key','slurm_gateway.*.bind_password'] ) inherits ::role::computing {