NixOS configuration for HPC cluster https://docs.hpc.informatik.hs-fulda.de/
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

65 lines
1.4 KiB

1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
7 months ago
2 months ago
7 months ago
1 year ago
1 year ago
1 year ago
  1. { pkgs, lib, config, nodes, ... }:
  2. with lib;
  3. let
  4. nodeConfig = concatStringsSep " " (mapAttrsToList
  5. (key: value: "${key}=${toString value}")
  6. {
  7. CPUs = 64;
  8. Boards = 1;
  9. SocketsPerBoard = 4;
  10. CoresPerSocket = 16;
  11. ThreadsPerCore = 1;
  12. });
  13. in
  14. {
  15. services.slurm = {
  16. clusterName = "mogli";
  17. controlMachine = "manager";
  18. nodeName = map
  19. (node: "${node.config.networking.hostName} ${nodeConfig}")
  20. (filter
  21. (node: node.config.hpc.node.enable)
  22. (attrValues nodes));
  23. partitionName = [
  24. "all Nodes=ALL Default=YES MaxTime=6:00:00 State=UP OverSubscribe=NO"
  25. "vip Nodes=ALL AllowGroups=vip Default=NO MaxTime=INFINITE State=UP OverSubscribe=NO"
  26. ];
  27. extraConfig = ''
  28. MailProg=${pkgs.mailutils}/bin/mail
  29. # Do not block whole nodes to allow multiple jobs to fill up nodes
  30. SelectType=select/cons_tres
  31. SelectTypeParameters=CR_CPU
  32. # Free resources of halted jobs
  33. PreemptType=preempt/partition_prio
  34. PreemptMode=SUSPEND,GANG
  35. '';
  36. };
  37. services.munge = {
  38. enable = true;
  39. password = config.sops.secrets."munge/password".path;
  40. };
  41. sops.secrets."munge/password" = {
  42. sopsFile = ./secrets.yaml;
  43. owner = "munge";
  44. };
  45. # Have a VIP group for users with unlimited queues
  46. users.groups."vip" = {
  47. members = [
  48. "fdai2856"
  49. "fdai0159"
  50. "fdai0231"
  51. ];
  52. };
  53. }