From a6a940d6922bcc5bcebe92617b4935a9ba5619c0 Mon Sep 17 00:00:00 2001 From: Dustin Frisch Date: Wed, 14 Jun 2023 13:30:49 +0200 Subject: [PATCH] update to 23.05 and other stuff --- flake.lock | 44 ++++++++++++++++++------------------ flake.nix | 16 +++++++++++-- gather.sh | 26 +++++++++++++++++++++ machines/manager/default.nix | 2 ++ machines/manager/ldap.nix | 11 --------- machines/manager/mpi.nix | 13 +++++++++++ machines/manager/rdma.nix | 19 ++++++++++++++++ machines/manager/users.nix | 2 +- machines/node/users.nix | 9 +++++--- modules/hostFile.nix | 10 +------- shared/default.nix | 13 +++++++++++ shared/rdma.nix | 11 +++++++++ shared/users.nix | 2 +- 13 files changed, 129 insertions(+), 49 deletions(-) create mode 100755 gather.sh create mode 100644 machines/manager/mpi.nix create mode 100644 machines/manager/rdma.nix create mode 100644 shared/rdma.nix diff --git a/flake.lock b/flake.lock index b90a67d..4088e0e 100644 --- a/flake.lock +++ b/flake.lock @@ -10,11 +10,11 @@ "stable": "stable" }, "locked": { - "lastModified": 1684127527, - "narHash": "sha256-tAzgb2jgmRaX9HETry38h2OvBf9YkHEH1fFvIJQV9A0=", + "lastModified": 1685163780, + "narHash": "sha256-tMwseHtEFDpO3WKeZKWqrKRAZI6TiEULidxEbzicuFg=", "owner": "zhaofengli", "repo": "colmena", - "rev": "caf33af7d854c8d9b88a8f3dae7adb1c24c1407b", + "rev": "c61bebae1dc1d57237577080b1ca1e37a3fbcebf", "type": "github" }, "original": { @@ -30,11 +30,11 @@ ] }, "locked": { - "lastModified": 1684783210, - "narHash": "sha256-hxRbwwBTu1G1u1EdI9nEo/n4HIsQIfNi+2BQ1nEoj/o=", + "lastModified": 1685450011, + "narHash": "sha256-/Az50GoWePZHL+Pkxy2ZuKW9zwIk+oVdzkR9xWomnpo=", "owner": "nix-community", "repo": "disko", - "rev": "f0b9f374bb42fdcd57baa7d4448ac5d4788226bd", + "rev": "0d270372b21818eba342954220c1a30a7bdaba19", "type": "github" }, "original": { @@ -167,11 +167,11 @@ }, "gather": { "locked": { - "lastModified": 1685458852, - "narHash": "sha256-w05QjsDxebdO6Hsfp60Run3tECsI6ucwoS1pPR1YLbk=", + "lastModified": 1685550922, + "narHash": "sha256-hShAmNFcHagm0tm+p8xaWEwl1XtKlPjS1fjO6HGdCFQ=", "owner": "fooker", "repo": "gather.nix", - "rev": "cd099d749f2050370d0cc42a496d537e61b9a6f4", + "rev": "c4cdc9b84053fe827b78c55d4f79e51c7fd15838", "type": "github" }, "original": { @@ -325,16 +325,16 @@ }, "nixpkgs": { "locked": { - "lastModified": 1684858140, - "narHash": "sha256-dQStox5GYrVlVNMvxxXs3xX9bXG7J7ttSjqUcVm8EaA=", + "lastModified": 1686513595, + "narHash": "sha256-H3JNqj7TEiMx5rd8lRiONvgFZvmf3kmwHI2umDdqgFY=", "owner": "NixOS", "repo": "nixpkgs", - "rev": "a17f99dfcb9643200b3884ca195c69ae41d7f059", + "rev": "bb8b5735d6f7e06b9ddd27de115b0600c1ffbdb4", "type": "github" }, "original": { "owner": "NixOS", - "ref": "nixos-22.11", + "ref": "nixos-23.05", "repo": "nixpkgs", "type": "github" } @@ -366,11 +366,11 @@ "nixpkgs-stable": "nixpkgs-stable" }, "locked": { - "lastModified": 1684842236, - "narHash": "sha256-rYWsIXHvNhVQ15RQlBUv67W3YnM+Pd+DuXGMvCBq2IE=", + "lastModified": 1685361114, + "narHash": "sha256-4RjrlSb+OO+e1nzTExKW58o3WRwVGpXwj97iCta8aj4=", "owner": "cachix", "repo": "pre-commit-hooks.nix", - "rev": "61e567d6497bc9556f391faebe5e410e6623217f", + "rev": "ca2fdbf3edda2a38140184da6381d49f8206eaf4", "type": "github" }, "original": { @@ -401,11 +401,11 @@ ] }, "locked": { - "lastModified": 1684637723, - "narHash": "sha256-0vAxL7MVMhGbTkAyvzLvleELHjVsaS43p+PR1h9gzNQ=", + "lastModified": 1685434555, + "narHash": "sha256-aZl0yeaYX3T2L3W3yXOd3S9OfpS+8YUOT2b1KwrSf6E=", "owner": "Mic92", "repo": "sops-nix", - "rev": "4ccdfb573f323a108a44c13bb7730e42baf962a9", + "rev": "876846cde9762ae563f018c17993354875e2538e", "type": "github" }, "original": { @@ -450,11 +450,11 @@ "systems": "systems" }, "locked": { - "lastModified": 1681202837, - "narHash": "sha256-H+Rh19JDwRtpVPAWp64F+rlEtxUWBAQW28eAi3SRSzg=", + "lastModified": 1685518550, + "narHash": "sha256-o2d0KcvaXzTrPRIo0kOLV0/QXHhDQ5DTi+OxcjO8xqY=", "owner": "numtide", "repo": "flake-utils", - "rev": "cfacdce06f30d2b68473a46042957675eebb3401", + "rev": "a1720a10a6cfe8234c0e93907ffe81be440f4cef", "type": "github" }, "original": { diff --git a/flake.nix b/flake.nix index d549d15..5e6a85c 100644 --- a/flake.nix +++ b/flake.nix @@ -4,7 +4,7 @@ type = "github"; owner = "NixOS"; repo = "nixpkgs"; - ref = "nixos-22.11"; + ref = "nixos-23.05"; }; colmena = { @@ -79,10 +79,22 @@ shellcheck.enable = true; }; }; + + gather = pkgs.writeShellScript "gather" '' + ROOT=${toString ./.} + + MACHINES=( "$(${colmena}/bin/colmena eval -E '{nodes, ...}: ')" ) + if [[ "$1" != "" ]]; then + + else + mapfile -t MACHINES < <() + fi + ''; in pkgs.mkShell { buildInputs = [ - inputs.colmena.defaultPackage.${system} + colmena + #gather ] ++ (with pkgs; [ bash gitAndTools.git diff --git a/gather.sh b/gather.sh new file mode 100755 index 0000000..e513fb3 --- /dev/null +++ b/gather.sh @@ -0,0 +1,26 @@ +#!/usr/bin/env bash + +# Get all nodes with deployment data +NODES="$(colmena eval -E ' + {nodes, lib, ...}: + with lib; + + mapAttrs + (_: node: with node.config.deployment; "${targetUser}@${targetHost}") + nodes +')" + +# Filter to single node if parameter is set +if [[ "$1" != "" ]]; then + NODES="$(jq "{ \"$1\" }" <<< "$NODES")" +fi + +# Convert nodes to array of @ +mapfile -t NODES < <(jq -r '.[]' <<< "$NODES") + +for NODE in "${NODES[@]}"; do + echo "-- ${NODE} --" + ssh "${NODE}" "/run/gather" \ + | tar xv --dereference --directory "." \ + || true +done diff --git a/machines/manager/default.nix b/machines/manager/default.nix index 7fa2958..720f773 100644 --- a/machines/manager/default.nix +++ b/machines/manager/default.nix @@ -16,6 +16,8 @@ with lib; #./gateway.nix #./netinstall.nix ./cache.nix + ./rdma.nix + ./mpi.nix ]; deployment = { diff --git a/machines/manager/ldap.nix b/machines/manager/ldap.nix index be7fb17..e7d3f60 100644 --- a/machines/manager/ldap.nix +++ b/machines/manager/ldap.nix @@ -45,17 +45,6 @@ in }; }; }; - declarativeContents.${baseDN} = '' - dn: ${baseDN} - objectClass: top - objectClass: dcObject - objectClass: organization - o: ${config.networking.domain} - - dn: ou=users,${baseDN} - objectClass: top - objectClass: organizationalUnit - ''; }; sops.secrets."ldap/root/password" = { diff --git a/machines/manager/mpi.nix b/machines/manager/mpi.nix new file mode 100644 index 0000000..18afdc6 --- /dev/null +++ b/machines/manager/mpi.nix @@ -0,0 +1,13 @@ +{ pkgs, lib, config, nodes, ... }: + +with lib; + +{ + environment.etc."mpi/hosts" = { + text = concatMapStringsSep "\n" + (node: "${node.config.networking.hostName}") + (filter + (node: elem "node" node.config.deployment.tags) + (attrValues nodes)); + }; +} diff --git a/machines/manager/rdma.nix b/machines/manager/rdma.nix new file mode 100644 index 0000000..3bcb409 --- /dev/null +++ b/machines/manager/rdma.nix @@ -0,0 +1,19 @@ +{ pkgs, lib, config, ... }: + +with lib; + +{ + systemd.services."opensm" = { + unitConfig = { + Before = [ "network.target" "remote-fs-pre.target" ]; + ConditionPathExists = [ "/sys/class/infiniband_mad/abi_version" ]; + }; + + serviceConfig = { + Type = "simple"; + ExecStart = "${pkgs.opensm}/bin/opensm --guid '0x0002c9030052188b'"; + }; + + wantedBy = [ "network-online.target" ]; + }; +} diff --git a/machines/manager/users.nix b/machines/manager/users.nix index aacb625..5d42f54 100644 --- a/machines/manager/users.nix +++ b/machines/manager/users.nix @@ -13,7 +13,7 @@ with lib; extraConfig = '' EnableSSHKeysign yes - Host node-*.${config.networking.domain} + Host node-*.${config.networking.domain} node-* HostbasedAuthentication yes ''; }; diff --git a/machines/node/users.nix b/machines/node/users.nix index 68b8f40..cff5362 100644 --- a/machines/node/users.nix +++ b/machines/node/users.nix @@ -28,7 +28,10 @@ with lib; ''; }; - environment.etc."ssh/shosts.equiv".text = concatMapStringsSep "\n" - (node: node.config.networking.fqdn) - (attrValues nodes); + environment.etc."ssh/shosts.equiv".text = concatStringsSep "\n" (concatMap + (node: [ + node.config.networking.hostName + node.config.networking.fqdn + ]) + (attrValues nodes)); } diff --git a/modules/hostFile.nix b/modules/hostFile.nix index c0cec11..9c6985f 100644 --- a/modules/hostFile.nix +++ b/modules/hostFile.nix @@ -19,14 +19,6 @@ with lib; ]; }; - canonical = mkOption { - description = '' - Canonical host name - ''; - type = types.str; - default = config.networking.fqdn; - }; - aliases = mkOption { description = '' Aliases for this host @@ -41,7 +33,7 @@ with lib; ''; type = types.listOf types.str; readOnly = true; - default = (singleton config.hpc.hostFile.canonical) ++ config.hpc.hostFile.aliases; + default = [ config.networking.fqdn config.networking.hostName ] ++ config.hpc.hostFile.aliases; }; }; diff --git a/shared/default.nix b/shared/default.nix index 9132699..f7a9e13 100644 --- a/shared/default.nix +++ b/shared/default.nix @@ -5,6 +5,7 @@ ./network.nix ./users.nix ./ssh.nix + ./rdma.nix "${modulesPath}/profiles/headless.nix" "${modulesPath}/profiles/all-hardware.nix" @@ -26,10 +27,22 @@ console.keyMap = "de"; environment.systemPackages = with pkgs; [ + openmpi vim wget curl tmux + fd + ripgrep + zlib + htop + iotop + iftop + ]; + + # Performance FTW! + boot.kernelParams = [ + "mitigations=off" ]; services.haveged.enable = true; diff --git a/shared/rdma.nix b/shared/rdma.nix new file mode 100644 index 0000000..4767508 --- /dev/null +++ b/shared/rdma.nix @@ -0,0 +1,11 @@ +{ lib, pkgs, config, ... }: + +with lib; + +{ + boot.kernelModules = [ "ib_umad" "ib_ipoib" ]; + + environment.systemPackages = [ + pkgs.rdma-core + ]; +} diff --git a/shared/users.nix b/shared/users.nix index 7eb97b8..6f18cd8 100644 --- a/shared/users.nix +++ b/shared/users.nix @@ -21,7 +21,7 @@ in users.ldap = { enable = true; - server = "ldap://manager.${config.networking.domain}/"; + server = "ldap://ldap.${config.networking.domain}/"; base = baseDN; daemon.enable = true;