NixOS configuration for HPC cluster
https://docs.hpc.informatik.hs-fulda.de/
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
65 lines
1.4 KiB
65 lines
1.4 KiB
{ pkgs, lib, config, nodes, ... }:
|
|
|
|
with lib;
|
|
|
|
let
|
|
nodeConfig = concatStringsSep " " (mapAttrsToList
|
|
(key: value: "${key}=${toString value}")
|
|
{
|
|
CPUs = 64;
|
|
Boards = 1;
|
|
SocketsPerBoard = 4;
|
|
CoresPerSocket = 16;
|
|
ThreadsPerCore = 1;
|
|
});
|
|
|
|
in
|
|
{
|
|
services.slurm = {
|
|
clusterName = "mogli";
|
|
|
|
controlMachine = "manager";
|
|
|
|
nodeName = map
|
|
(node: "${node.config.networking.hostName} ${nodeConfig}")
|
|
(filter
|
|
(node: node.config.hpc.node.enable)
|
|
(attrValues nodes));
|
|
|
|
partitionName = [
|
|
"all Nodes=ALL Default=YES MaxTime=6:00:00 State=UP OverSubscribe=NO"
|
|
"vip Nodes=ALL AllowGroups=vip Default=NO MaxTime=INFINITE State=UP OverSubscribe=NO"
|
|
];
|
|
|
|
extraConfig = ''
|
|
MailProg=${pkgs.mailutils}/bin/mail
|
|
|
|
# Do not block whole nodes to allow multiple jobs to fill up nodes
|
|
SelectType=select/cons_res
|
|
SelectTypeParameters=CR_CPU
|
|
|
|
# Free resources of halted jobs
|
|
PreemptType=preempt/partition_prio
|
|
PreemptMode=SUSPEND,GANG
|
|
'';
|
|
};
|
|
|
|
services.munge = {
|
|
enable = true;
|
|
password = config.sops.secrets."munge/password".path;
|
|
};
|
|
|
|
sops.secrets."munge/password" = {
|
|
sopsFile = ./secrets.yaml;
|
|
owner = "munge";
|
|
};
|
|
|
|
# Have a VIP group for users with unlimited queues
|
|
users.groups."vip" = {
|
|
members = [
|
|
"fdai2856"
|
|
"fdai0159"
|
|
"fdai0231"
|
|
];
|
|
};
|
|
}
|