NixOS configuration for HPC cluster https://docs.hpc.informatik.hs-fulda.de/
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

65 lines
1.4 KiB

{ pkgs, lib, config, nodes, ... }:
with lib;
let
nodeConfig = concatStringsSep " " (mapAttrsToList
(key: value: "${key}=${toString value}")
{
CPUs = 64;
Boards = 1;
SocketsPerBoard = 4;
CoresPerSocket = 16;
ThreadsPerCore = 1;
});
in
{
services.slurm = {
clusterName = "mogli";
controlMachine = "manager";
nodeName = map
(node: "${node.config.networking.hostName} ${nodeConfig}")
(filter
(node: node.config.hpc.node.enable)
(attrValues nodes));
partitionName = [
"all Nodes=ALL Default=YES MaxTime=6:00:00 State=UP OverSubscribe=NO"
"vip Nodes=ALL AllowGroups=vip Default=NO MaxTime=INFINITE State=UP OverSubscribe=NO"
];
extraConfig = ''
MailProg=${pkgs.mailutils}/bin/mail
# Do not block whole nodes to allow multiple jobs to fill up nodes
SelectType=select/cons_res
SelectTypeParameters=CR_CPU
# Free resources of halted jobs
PreemptType=preempt/partition_prio
PreemptMode=SUSPEND,GANG
'';
};
services.munge = {
enable = true;
password = config.sops.secrets."munge/password".path;
};
sops.secrets."munge/password" = {
sopsFile = ./secrets.yaml;
owner = "munge";
};
# Have a VIP group for users with unlimited queues
users.groups."vip" = {
members = [
"fdai2856"
"fdai0159"
"fdai0231"
];
};
}