6 Commits
70cf1d8dd0
...
59c5a80f7d
Author | SHA1 | Message | Date |
---|---|---|---|
Dustin Frisch |
59c5a80f7d
|
update
|
6 months ago |
Dustin Frisch |
88d09e6a48
|
net: dhcp restruct
|
6 months ago |
Dustin Frisch |
2902a741c6
|
manager: better dhcp
|
6 months ago |
Dustin Frisch |
bac22cfb2b
|
manager: network mngt vlan and bond
|
6 months ago |
Dustin Frisch |
d4a12c7953
|
Mail
|
6 months ago |
Dustin Frisch |
dae245e5e6
|
More docs
|
6 months ago |
26 changed files with 538 additions and 152 deletions
-
57docs/config.nix
-
19docs/content/first_steps.md
-
1docs/content/index.md
-
79docs/content/internal/deployment.md
-
18docs/default.nix
-
37docs/mkdocs.yaml
-
19docs/module.nix
-
1docs/result
-
84flake.lock
-
7flake.nix
-
3machines/manager/default.nix
-
97machines/manager/dhcp.nix
-
20machines/manager/docs.nix
-
20machines/manager/mail.nix
-
2machines/manager/mpi.nix
-
36machines/manager/netinstall/default.nix
-
36machines/manager/network.nix
-
6machines/manager/secrets.yaml
-
11machines/node/default.nix
-
19machines/node/network.nix
-
30machines/nodes.nix
-
3modules/default.nix
-
29modules/dhcp.nix
-
20modules/netinstall.nix
-
30modules/node.nix
-
6shared/slurm.nix
@ -0,0 +1,57 @@ |
|||||
|
{ config, ... }: |
||||
|
|
||||
|
{ |
||||
|
site_name = "HPC @ HS-Fulda"; |
||||
|
site_description = '' |
||||
|
User documentation for high performance cluster on University of Applied Sciences Fulda |
||||
|
''; |
||||
|
site_url = "http://${config.networking.domain}/"; |
||||
|
|
||||
|
use_directory_urls = false; |
||||
|
strict = true; |
||||
|
|
||||
|
repo_url = "https://gogs.informatik.hs-fulda.de/hpc/nixcfg.git"; |
||||
|
|
||||
|
docs_dir = ./content; |
||||
|
|
||||
|
theme = { |
||||
|
name = "readthedocs"; |
||||
|
locale = "de"; |
||||
|
prev_next_buttons_location = "none"; |
||||
|
highlightjs = true; |
||||
|
hljs_languages = [ |
||||
|
"bash" |
||||
|
"yaml" |
||||
|
"rust" |
||||
|
]; |
||||
|
}; |
||||
|
|
||||
|
markdown_extensions = [ |
||||
|
"extra" |
||||
|
"admonition" |
||||
|
]; |
||||
|
|
||||
|
plugins = [ |
||||
|
"search" |
||||
|
]; |
||||
|
|
||||
|
extra = { |
||||
|
"manager"."host" = config.networking.domain; |
||||
|
}; |
||||
|
|
||||
|
nav = [ |
||||
|
{ "Start" = "index.md"; } |
||||
|
{ "Erste Schritte" = "first_steps.md"; } |
||||
|
{ "Nutzung" = "usage.md"; } |
||||
|
{ "Software" = "environment.md"; } |
||||
|
{ "Daten" = "storage.md"; } |
||||
|
{ "Best Practices" = "best_practice.md"; } |
||||
|
{ "Hilfe" = "support.md"; } |
||||
|
{ |
||||
|
"Internes" = [ |
||||
|
{ "Deployment" = "internal/deployment.md"; } |
||||
|
{ "Netzwerk" = "internal/network.md"; } |
||||
|
]; |
||||
|
} |
||||
|
]; |
||||
|
} |
@ -0,0 +1,79 @@ |
|||||
|
# Infrastructure Deployment |
||||
|
|
||||
|
The whole cluster infrastructure is build using [NixOS](https://nixos.org/). |
||||
|
The configuration repository is hosted at {{ config.repo_url }} and is deployed using [colmena](https://github.com/zhaofengli/colmena). |
||||
|
|
||||
|
## Building the configuration |
||||
|
To build the configuration, as system with [Nix](https://nix.dev/install-nix) installed is required. |
||||
|
|
||||
|
To activate the environment, run `nix develop` inside the configuration folder. |
||||
|
This will fetch all required build dependecies and makes them available in the environment. |
||||
|
|
||||
|
Building the whole configuration is as easy as running: |
||||
|
``` |
||||
|
colmana build --verbose --show-trace |
||||
|
``` |
||||
|
*Go grap a coffee, this can take a while* |
||||
|
|
||||
|
## Deploying |
||||
|
> Note: Deployment requires SSH access as the `root` user to all machines. |
||||
|
|
||||
|
To deploy a configuration change or updates to the cluster, run the following command: |
||||
|
``` |
||||
|
colmena apply switch |
||||
|
``` |
||||
|
|
||||
|
### Using the manager as a SSH jump host |
||||
|
SSH access to the nodes is limited. |
||||
|
Therefore it the manager system can be used as a jump host. |
||||
|
To do so, add the following lines to your local `~/.ssh/config` file (before the the `Host *` entry): |
||||
|
``` |
||||
|
Host 10.32.47.1?? |
||||
|
IdentitiesOnly yes |
||||
|
ProxyJump root@10.32.47.10 |
||||
|
``` |
||||
|
|
||||
|
## Updating |
||||
|
Updating all systems can be done by running the following command in the configuration repository: |
||||
|
``` |
||||
|
nix flake update |
||||
|
``` |
||||
|
|
||||
|
This will update all dependencies including the NixOS operation system. |
||||
|
|
||||
|
After doing the update, the changed config (with the updated dependencies) must be [deployed](#deploying). |
||||
|
|
||||
|
## Gather node information |
||||
|
The configuration repository relies on some information gathered from the machines itself. |
||||
|
After bootstrapping a machine, these information need to be gathered from the machines into the configuration repository. |
||||
|
|
||||
|
To gather there data, run the following command: |
||||
|
``` |
||||
|
./gather.sh |
||||
|
``` |
||||
|
|
||||
|
## Secret management |
||||
|
The config repository contains several secrets which are secured by [sops](https://github.com/getsops/sops) and the according [Nix integration](https://github.com/Mic92/sops-nix). |
||||
|
|
||||
|
To edit a config file, run the following command: |
||||
|
``` |
||||
|
sops <path/to/secrets/file> |
||||
|
``` |
||||
|
|
||||
|
This requires the editor to have its PGP-key fingerprint be part of the `adminKeys` list in `sops.nix`. |
||||
|
|
||||
|
Altering the list requires one of the previous members to [update the keys](#update-keys). |
||||
|
|
||||
|
### Update keys |
||||
|
Whenever a key, either the SSH key of a machine or the PGP key of an administrator, changes, the secret files need updating. |
||||
|
To do so, run the following command: |
||||
|
``` |
||||
|
find -name "secrets.yaml" -or -path "*/secrets/**" -type f -exec 'sops updatekeys {}' |
||||
|
``` |
||||
|
|
||||
|
## Bootstrapping a node |
||||
|
Compute nodes can be bootstrapped using PXE boot. |
||||
|
The manager will provide a touchless boot image which will install the node with the current deployment automatically. |
||||
|
Booting the node from PXE (network boot) is enough to activate the bootstrapping process. |
||||
|
|
||||
|
After bootstrapping a node, make sure to [gather the node data](#gather-node-information) and [update the secret keys](#update-keys). |
@ -1,18 +0,0 @@ |
|||||
{ stdenv |
|
||||
, mkdocs |
|
||||
, ... |
|
||||
}: |
|
||||
|
|
||||
stdenv.mkDerivation { |
|
||||
name = "docs"; |
|
||||
|
|
||||
preferLocalBuild = true; |
|
||||
allowSubstitutes = false; |
|
||||
|
|
||||
src = ./.; |
|
||||
|
|
||||
buildCommand = '' |
|
||||
cd "$src" |
|
||||
${mkdocs}/bin/mkdocs build --site-dir "$out" |
|
||||
''; |
|
||||
} |
|
@ -1,37 +0,0 @@ |
|||||
site_name: HPC @ HS-Fulda |
|
||||
site_description: User documentation for high performance cluster on University of Applied Sciences Fulda |
|
||||
site_url: https://docs.hpc.informatik.hs-fulda.de/ |
|
||||
site_dir: public |
|
||||
use_directory_urls: false |
|
||||
strict: true |
|
||||
repo_url: https://gogs.informatik.hs-fulda.de/hpc/nixcfg.git |
|
||||
docs_dir: content |
|
||||
|
|
||||
theme: |
|
||||
name: readthedocs |
|
||||
locale: de |
|
||||
prev_next_buttons_location: none |
|
||||
highlightjs: true |
|
||||
hljs_languages: |
|
||||
- bash |
|
||||
- yaml |
|
||||
- rust |
|
||||
|
|
||||
markdown_extensions: |
|
||||
- extra |
|
||||
- admonition |
|
||||
|
|
||||
plugins: |
|
||||
- search |
|
||||
|
|
||||
nav: |
|
||||
- Start: index.md |
|
||||
- Erste Schritte: first_steps.md |
|
||||
- Nutzung: usage.md |
|
||||
- Software: environment.md |
|
||||
- Daten: storage.md |
|
||||
- Best Practices: best_practice.md |
|
||||
- Hilfe: support.md |
|
||||
- Internes: |
|
||||
- Netzwerk: internal/network.md |
|
||||
|
|
@ -0,0 +1,19 @@ |
|||||
|
{ pkgs, config, lib, ... }: |
||||
|
|
||||
|
with lib; |
||||
|
|
||||
|
let |
||||
|
mkdocsConfig = import ./config.nix { |
||||
|
inherit config lib; |
||||
|
}; |
||||
|
|
||||
|
mkdocsConfigYaml = pkgs.writeText "mkdocs.yaml" (generators.toYAML { } mkdocsConfig); |
||||
|
|
||||
|
in |
||||
|
{ |
||||
|
system.build.docs = pkgs.runCommand "docs" { } '' |
||||
|
${pkgs.mkdocs}/bin/mkdocs build \ |
||||
|
--site-dir "$out" \ |
||||
|
--config-file "${mkdocsConfigYaml}" |
||||
|
''; |
||||
|
} |
@ -1 +0,0 @@ |
|||||
/nix/store/8v3r668x18fl49yx2s41yzs0qx9cn24d-docs |
|
@ -0,0 +1,97 @@ |
|||||
|
{ pkgs, lib, config, nodes, ... }: |
||||
|
|
||||
|
with lib; |
||||
|
|
||||
|
let |
||||
|
mkReservations = net: concatLists (mapAttrsToList |
||||
|
(_: node: optional (hasAttr net node.config.hpc.dhcp.reservations) { |
||||
|
"hw-address" = node.config.hpc.dhcp.reservations.${net}.hwAddress; |
||||
|
"ip-address" = node.config.hpc.dhcp.reservations.${net}.ipAddress; |
||||
|
}) |
||||
|
nodes); |
||||
|
|
||||
|
in |
||||
|
{ |
||||
|
services.kea = { |
||||
|
dhcp4 = { |
||||
|
enable = true; |
||||
|
settings = { |
||||
|
"valid-lifetime" = 4000; |
||||
|
"renew-timer" = 1000; |
||||
|
"rebind-timer" = 2000; |
||||
|
|
||||
|
"interfaces-config" = { |
||||
|
"interfaces" = [ "mngt" "data" ]; |
||||
|
}; |
||||
|
|
||||
|
"lease-database" = { |
||||
|
"type" = "memfile"; |
||||
|
"persist" = true; |
||||
|
"name" = "/var/lib/kea/dhcp4.leases"; |
||||
|
}; |
||||
|
|
||||
|
"subnet4" = [ |
||||
|
{ |
||||
|
"subnet" = "10.32.46.0/24"; |
||||
|
"interface" = "mngt"; |
||||
|
|
||||
|
"option-data" = [ |
||||
|
{ |
||||
|
"name" = "routers"; |
||||
|
"data" = config.networking.defaultGateway.address; |
||||
|
} |
||||
|
{ |
||||
|
"name" = "domain-name-servers"; |
||||
|
"data" = "10.0.0.53,10.1.1.10"; |
||||
|
} |
||||
|
{ |
||||
|
"name" = "domain-name"; |
||||
|
"data" = "mngt.${config.networking.domain}"; |
||||
|
} |
||||
|
{ |
||||
|
"name" = "domain-search"; |
||||
|
"data" = "mngt.${config.networking.domain}"; |
||||
|
} |
||||
|
]; |
||||
|
|
||||
|
"pools" = [ |
||||
|
{ |
||||
|
"pool" = "10.32.46.100-10.32.46.200"; |
||||
|
} |
||||
|
]; |
||||
|
|
||||
|
"reservations" = mkReservations "mngt"; |
||||
|
} |
||||
|
|
||||
|
{ |
||||
|
"subnet" = "10.32.47.0/24"; |
||||
|
"interface" = "data"; |
||||
|
|
||||
|
"option-data" = [ |
||||
|
{ |
||||
|
"name" = "domain-name-servers"; |
||||
|
"data" = "10.0.0.53,10.1.1.10"; |
||||
|
} |
||||
|
{ |
||||
|
"name" = "domain-name"; |
||||
|
"data" = config.networking.domain; |
||||
|
} |
||||
|
{ |
||||
|
"name" = "domain-search"; |
||||
|
"data" = config.networking.domain; |
||||
|
} |
||||
|
]; |
||||
|
|
||||
|
"pools" = [ |
||||
|
{ |
||||
|
"pool" = "10.32.47.100-10.32.47.200"; |
||||
|
} |
||||
|
]; |
||||
|
|
||||
|
"reservations" = mkReservations "data"; |
||||
|
} |
||||
|
]; |
||||
|
}; |
||||
|
}; |
||||
|
}; |
||||
|
} |
@ -1,16 +1,22 @@ |
|||||
{ pkgs, lib, ... }: |
|
||||
|
{ config, pkgs, lib, ... }: |
||||
|
|
||||
with lib; |
with lib; |
||||
|
|
||||
let |
|
||||
docs = pkgs.callPackage ../../docs { }; |
|
||||
|
|
||||
in |
|
||||
{ |
{ |
||||
|
imports = [ |
||||
|
../../docs/module.nix |
||||
|
]; |
||||
|
|
||||
services.nginx = { |
services.nginx = { |
||||
virtualHosts = { |
virtualHosts = { |
||||
"docs.${config.networking.domain}" = { |
|
||||
locations."/".root = docs; |
|
||||
|
"${config.networking.domain}" = { |
||||
|
default = true; |
||||
|
|
||||
|
serverAliases = [ |
||||
|
"doku.${config.networking.domain}" |
||||
|
]; |
||||
|
|
||||
|
locations."/".root = config.system.build.docs; |
||||
}; |
}; |
||||
}; |
}; |
||||
}; |
}; |
||||
|
@ -0,0 +1,20 @@ |
|||||
|
{ config, ... }: |
||||
|
|
||||
|
{ |
||||
|
programs.msmtp = { |
||||
|
enable = true; |
||||
|
accounts = { |
||||
|
default = { |
||||
|
auth = true; |
||||
|
tls = true; |
||||
|
port = 587; |
||||
|
from = "fdhpc@informatik.hs-fulda.de"; |
||||
|
host = "smtp.hs-fulda.de"; |
||||
|
user = "fdhpc"; |
||||
|
passwordeval = "cat ${config.sops.secrets."mail/password".path}"; |
||||
|
}; |
||||
|
}; |
||||
|
}; |
||||
|
|
||||
|
sops.secrets."mail/password" = { }; |
||||
|
} |
@ -1,13 +1,45 @@ |
|||||
{ |
{ |
||||
networking.interfaces."enp11s0f0" = { |
|
||||
|
networking.interfaces."enp11s0f0" = { }; |
||||
|
networking.interfaces."enp11s0f1" = { }; |
||||
|
|
||||
|
networking.bonds."data" = { |
||||
|
interfaces = [ "enp11s0f0" "enp11s0f1" ]; |
||||
|
driverOptions = { |
||||
|
miimon = "100"; |
||||
|
mode = "802.3ad"; |
||||
|
}; |
||||
|
}; |
||||
|
|
||||
|
networking.vlans."mngt" = { |
||||
|
id = 1032; |
||||
|
interface = "data"; |
||||
|
}; |
||||
|
|
||||
|
networking.interfaces."data" = { |
||||
ipv4.addresses = [{ |
ipv4.addresses = [{ |
||||
address = "10.32.47.10"; |
address = "10.32.47.10"; |
||||
prefixLength = 24; |
prefixLength = 24; |
||||
}]; |
}]; |
||||
}; |
}; |
||||
|
|
||||
|
# This is not our real management interface but the hosts interface to the |
||||
|
# manangement network |
||||
|
networking.interfaces."mngt" = { |
||||
|
ipv4.addresses = [{ |
||||
|
address = "10.32.46.253"; |
||||
|
prefixLength = 24; |
||||
|
}]; |
||||
|
}; |
||||
|
|
||||
networking.defaultGateway = { |
networking.defaultGateway = { |
||||
address = "10.32.47.1"; |
address = "10.32.47.1"; |
||||
interface = "enp11s0f0"; |
|
||||
|
interface = "data"; |
||||
|
}; |
||||
|
|
||||
|
hpc.dhcp.reservations = { |
||||
|
"mngt" = { |
||||
|
hwAddress = "e4:1f:13:28:c7:b9"; |
||||
|
ipAddress = "10.32.46.10"; |
||||
|
}; |
||||
}; |
}; |
||||
} |
} |
@ -0,0 +1,30 @@ |
|||||
|
{ |
||||
|
"node-00" = { |
||||
|
mngt = "50:46:5D:DA:0C:C9"; |
||||
|
data = "50:46:5d:da:0b:d6"; |
||||
|
}; |
||||
|
"node-01" = { |
||||
|
mngt = "50:46:5D:DA:0C:07"; |
||||
|
data = "50:46:5d:da:0c:56"; |
||||
|
}; |
||||
|
"node-02" = { |
||||
|
mngt = "10:BF:48:19:B0:04"; |
||||
|
data = "10:bf:48:1f:a6:8f"; |
||||
|
}; |
||||
|
"node-03" = { |
||||
|
mngt = "10:BF:48:19:A4:FE"; |
||||
|
data = "10:bf:48:1b:57:47"; |
||||
|
}; |
||||
|
"node-04" = { |
||||
|
mngt = "10:BF:48:19:A2:E2"; |
||||
|
data = "10:bf:48:19:a2:4d"; |
||||
|
}; |
||||
|
"node-05" = { |
||||
|
mngt = "10:BF:48:15:00:F5"; |
||||
|
data = "10:bf:48:1b:56:df"; |
||||
|
}; |
||||
|
"node-06" = { |
||||
|
mngt = "50:46:5D:DA:0C:09"; |
||||
|
data = "50:46:5d:da:0c:52"; |
||||
|
}; |
||||
|
} |
@ -1,6 +1,9 @@ |
|||||
{ |
{ |
||||
imports = [ |
imports = [ |
||||
|
./node.nix |
||||
./hostFile.nix |
./hostFile.nix |
||||
|
./dhcp.nix |
||||
|
./netinstall.nix |
||||
./beegfs.nix |
./beegfs.nix |
||||
]; |
]; |
||||
} |
} |
@ -0,0 +1,29 @@ |
|||||
|
{ lib, config, ... }: |
||||
|
|
||||
|
with lib; |
||||
|
|
||||
|
{ |
||||
|
options.hpc.dhcp = { |
||||
|
reservations = mkOption { |
||||
|
description = '' |
||||
|
DHCP reservations for this host. |
||||
|
''; |
||||
|
type = types.attrsOf (types.submodule { |
||||
|
options = { |
||||
|
hwAddress = mkOption { |
||||
|
description = '' |
||||
|
MAC address of the interface in this network. |
||||
|
''; |
||||
|
type = types.str; |
||||
|
}; |
||||
|
ipAddress = mkOption { |
||||
|
description = '' |
||||
|
IP address of the host. |
||||
|
''; |
||||
|
}; |
||||
|
}; |
||||
|
}); |
||||
|
default = { }; |
||||
|
}; |
||||
|
}; |
||||
|
} |
@ -0,0 +1,20 @@ |
|||||
|
{ config, lib, ... }: |
||||
|
|
||||
|
with lib; |
||||
|
|
||||
|
{ |
||||
|
options.hpc.netinstall = { |
||||
|
enable = mkEnableOption "NetInstall"; |
||||
|
}; |
||||
|
|
||||
|
config = mkIf config.hpc.netinstall.enable { |
||||
|
deployment.tags = [ "netinstall" ]; |
||||
|
|
||||
|
assertions = [ |
||||
|
{ |
||||
|
assertion = elem "data" config.hpc.dhcp.reservations; |
||||
|
message = "NetInstall needs DHCP reservation in data network"; |
||||
|
} |
||||
|
]; |
||||
|
}; |
||||
|
} |
@ -0,0 +1,30 @@ |
|||||
|
{ lib, config, ... }: |
||||
|
|
||||
|
with lib; |
||||
|
|
||||
|
{ |
||||
|
options.hpc.node = { |
||||
|
enable = mkEnableOption "Compute Node"; |
||||
|
|
||||
|
id = mkOption { |
||||
|
description = '' |
||||
|
ID of the compute node. |
||||
|
''; |
||||
|
type = types.ints.unsigned; |
||||
|
}; |
||||
|
|
||||
|
name = mkOption { |
||||
|
description = '' |
||||
|
Name of the node. |
||||
|
''; |
||||
|
type = types.str; |
||||
|
readOnly = true; |
||||
|
}; |
||||
|
}; |
||||
|
|
||||
|
config = mkIf config.hpc.node.enable { |
||||
|
hpc.node.name = "node-${fixedWidthNumber 2 config.hpc.node.id}"; |
||||
|
|
||||
|
deployment.tags = [ "node" ]; |
||||
|
}; |
||||
|
} |
Write
Preview
Loading…
Cancel
Save
Reference in new issue