NixOS configuration for HPC cluster https://docs.hpc.informatik.hs-fulda.de/
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

138 lines
3.5 KiB

11 months ago
11 months ago
  1. # TFTP boot with shared image
  2. # Requests store path to install from master
  3. # Runs disko and nixos-install
  4. { pkgs, lib, config, nodes, ... }@args:
  5. with lib;
  6. let
  7. targets = attrNames
  8. (filterAttrs
  9. (_: node: node.config.hpc.netinstall.enable)
  10. nodes);
  11. installer = pkgs.nixos [
  12. ./installer.nix
  13. {
  14. _module.args = {
  15. nodes = getAttrs [ "manager" ] nodes;
  16. };
  17. }
  18. ];
  19. apiEntry = name:
  20. let
  21. node = nodes.${name}.config.system.build;
  22. boot = installer.config.system.build;
  23. install = pkgs.writeScript "install-${name}" ''
  24. #!/usr/bin/env bash
  25. set -xeuo pipefail
  26. "${node.diskoScript}"
  27. "${node.nixos-install}/bin/nixos-install" \
  28. --root /mnt \
  29. --system "${node.toplevel}" \
  30. --no-channel-copy \
  31. --no-root-password \
  32. --verbose
  33. ${pkgs.ipmitool}/bin/ipmitool chassis bootdev disk
  34. reboot
  35. '';
  36. in
  37. pkgs.writeText "pixieboot-api-${name}" (builtins.toJSON {
  38. kernel = "file://${boot.kernel}/bzImage";
  39. initrd = [ "file://${boot.netbootRamdisk}/initrd" ];
  40. cmdline = concatStringsSep " " [
  41. "init=${boot.toplevel}/init"
  42. "loglevel=4"
  43. "nixos.install=${install}"
  44. "console=tty0"
  45. "console=ttyS1,57600n8"
  46. ];
  47. message = "NixOS Automatic Installer for ${name}";
  48. });
  49. api = pkgs.linkFarm "pixiecore-api" (listToAttrs (map
  50. (name: nameValuePair "pixiecore/v1/boot/${nodes."${name}".config.hpc.dhcp.reservations."data".hwAddress}" (apiEntry name))
  51. targets));
  52. ipxe-with-serial = pkgs.ipxe.override {
  53. additionalOptions = [ "CONSOLE_SERIAL" ];
  54. embedScript = "${pkgs.pixiecore.src}/pixiecore/boot.ipxe";
  55. };
  56. trigger-script = pkgs.writeScriptBin "auto-install" ''
  57. #!/usr/bin/env bash
  58. set -euo pipefail
  59. case "$1" in
  60. ${concatMapStringsSep "\n" (node:
  61. '' "${node}")
  62. MNGT_IP="${nodes.${node}.config.hpc.dhcp.reservations."mngt".ipAddress}"
  63. DATA_IP="${nodes.${node}.config.hpc.dhcp.reservations."data".ipAddress}"
  64. ;;
  65. '')
  66. targets}
  67. *)
  68. echo "No such node" >&2
  69. exit 255
  70. ;;
  71. esac
  72. echo "Switch boot device to PXE"
  73. ${pkgs.ipmitool}/bin/ipmitool -I lanplus -H "$MNGT_IP" -U admin -P admin chassis bootdev pxe
  74. sleep 1s
  75. echo -n "Resetting node "
  76. ${pkgs.ipmitool}/bin/ipmitool -I lanplus -H "$MNGT_IP" -U admin -P admin chassis power reset
  77. while ! ping -c 1 "$DATA_IP" -n > /dev/null; do
  78. echo -n "."
  79. done
  80. echo " done"
  81. echo "Reset boot device to disk"
  82. ${pkgs.ipmitool}/bin/ipmitool -I lanplus -H "$MNGT_IP" -U admin -P admin chassis bootdev disk
  83. '';
  84. in
  85. {
  86. services.pixiecore = {
  87. enable = true;
  88. mode = "api";
  89. dhcpNoBind = true;
  90. debug = true;
  91. openFirewall = true;
  92. port = 5080;
  93. statusPort = 6080;
  94. apiServer = "http://boot.${config.networking.domain}/pixiecore";
  95. extraArguments = [
  96. "--ipxe-bios"
  97. "${ipxe-with-serial}/undionly.kpxe"
  98. ];
  99. };
  100. services.nginx = {
  101. virtualHosts = {
  102. "boot.${config.networking.domain}" = {
  103. locations."/".proxyPass = "http://localhost:${toString config.services.pixiecore.port}";
  104. locations."/status".proxyPass = "http://localhost:${toString config.services.pixiecore.statusPort}";
  105. locations."/pixiecore".root = api;
  106. };
  107. };
  108. };
  109. users.users."root".packages = [ trigger-script ];
  110. hpc.hostFile.aliases = [
  111. "boot.${config.networking.domain}"
  112. ];
  113. }