6 Commits
70cf1d8dd0
...
59c5a80f7d
Author | SHA1 | Message | Date |
---|---|---|---|
Dustin Frisch |
59c5a80f7d
|
update
|
6 months ago |
Dustin Frisch |
88d09e6a48
|
net: dhcp restruct
|
6 months ago |
Dustin Frisch |
2902a741c6
|
manager: better dhcp
|
6 months ago |
Dustin Frisch |
bac22cfb2b
|
manager: network mngt vlan and bond
|
6 months ago |
Dustin Frisch |
d4a12c7953
|
Mail
|
7 months ago |
Dustin Frisch |
dae245e5e6
|
More docs
|
7 months ago |
26 changed files with 538 additions and 152 deletions
-
57docs/config.nix
-
19docs/content/first_steps.md
-
1docs/content/index.md
-
79docs/content/internal/deployment.md
-
18docs/default.nix
-
37docs/mkdocs.yaml
-
19docs/module.nix
-
1docs/result
-
84flake.lock
-
7flake.nix
-
3machines/manager/default.nix
-
97machines/manager/dhcp.nix
-
20machines/manager/docs.nix
-
20machines/manager/mail.nix
-
2machines/manager/mpi.nix
-
36machines/manager/netinstall/default.nix
-
36machines/manager/network.nix
-
6machines/manager/secrets.yaml
-
11machines/node/default.nix
-
19machines/node/network.nix
-
30machines/nodes.nix
-
3modules/default.nix
-
29modules/dhcp.nix
-
20modules/netinstall.nix
-
30modules/node.nix
-
6shared/slurm.nix
@ -0,0 +1,57 @@ |
|||
{ config, ... }: |
|||
|
|||
{ |
|||
site_name = "HPC @ HS-Fulda"; |
|||
site_description = '' |
|||
User documentation for high performance cluster on University of Applied Sciences Fulda |
|||
''; |
|||
site_url = "http://${config.networking.domain}/"; |
|||
|
|||
use_directory_urls = false; |
|||
strict = true; |
|||
|
|||
repo_url = "https://gogs.informatik.hs-fulda.de/hpc/nixcfg.git"; |
|||
|
|||
docs_dir = ./content; |
|||
|
|||
theme = { |
|||
name = "readthedocs"; |
|||
locale = "de"; |
|||
prev_next_buttons_location = "none"; |
|||
highlightjs = true; |
|||
hljs_languages = [ |
|||
"bash" |
|||
"yaml" |
|||
"rust" |
|||
]; |
|||
}; |
|||
|
|||
markdown_extensions = [ |
|||
"extra" |
|||
"admonition" |
|||
]; |
|||
|
|||
plugins = [ |
|||
"search" |
|||
]; |
|||
|
|||
extra = { |
|||
"manager"."host" = config.networking.domain; |
|||
}; |
|||
|
|||
nav = [ |
|||
{ "Start" = "index.md"; } |
|||
{ "Erste Schritte" = "first_steps.md"; } |
|||
{ "Nutzung" = "usage.md"; } |
|||
{ "Software" = "environment.md"; } |
|||
{ "Daten" = "storage.md"; } |
|||
{ "Best Practices" = "best_practice.md"; } |
|||
{ "Hilfe" = "support.md"; } |
|||
{ |
|||
"Internes" = [ |
|||
{ "Deployment" = "internal/deployment.md"; } |
|||
{ "Netzwerk" = "internal/network.md"; } |
|||
]; |
|||
} |
|||
]; |
|||
} |
@ -0,0 +1,79 @@ |
|||
# Infrastructure Deployment |
|||
|
|||
The whole cluster infrastructure is build using [NixOS](https://nixos.org/). |
|||
The configuration repository is hosted at {{ config.repo_url }} and is deployed using [colmena](https://github.com/zhaofengli/colmena). |
|||
|
|||
## Building the configuration |
|||
To build the configuration, as system with [Nix](https://nix.dev/install-nix) installed is required. |
|||
|
|||
To activate the environment, run `nix develop` inside the configuration folder. |
|||
This will fetch all required build dependecies and makes them available in the environment. |
|||
|
|||
Building the whole configuration is as easy as running: |
|||
``` |
|||
colmana build --verbose --show-trace |
|||
``` |
|||
*Go grap a coffee, this can take a while* |
|||
|
|||
## Deploying |
|||
> Note: Deployment requires SSH access as the `root` user to all machines. |
|||
|
|||
To deploy a configuration change or updates to the cluster, run the following command: |
|||
``` |
|||
colmena apply switch |
|||
``` |
|||
|
|||
### Using the manager as a SSH jump host |
|||
SSH access to the nodes is limited. |
|||
Therefore it the manager system can be used as a jump host. |
|||
To do so, add the following lines to your local `~/.ssh/config` file (before the the `Host *` entry): |
|||
``` |
|||
Host 10.32.47.1?? |
|||
IdentitiesOnly yes |
|||
ProxyJump root@10.32.47.10 |
|||
``` |
|||
|
|||
## Updating |
|||
Updating all systems can be done by running the following command in the configuration repository: |
|||
``` |
|||
nix flake update |
|||
``` |
|||
|
|||
This will update all dependencies including the NixOS operation system. |
|||
|
|||
After doing the update, the changed config (with the updated dependencies) must be [deployed](#deploying). |
|||
|
|||
## Gather node information |
|||
The configuration repository relies on some information gathered from the machines itself. |
|||
After bootstrapping a machine, these information need to be gathered from the machines into the configuration repository. |
|||
|
|||
To gather there data, run the following command: |
|||
``` |
|||
./gather.sh |
|||
``` |
|||
|
|||
## Secret management |
|||
The config repository contains several secrets which are secured by [sops](https://github.com/getsops/sops) and the according [Nix integration](https://github.com/Mic92/sops-nix). |
|||
|
|||
To edit a config file, run the following command: |
|||
``` |
|||
sops <path/to/secrets/file> |
|||
``` |
|||
|
|||
This requires the editor to have its PGP-key fingerprint be part of the `adminKeys` list in `sops.nix`. |
|||
|
|||
Altering the list requires one of the previous members to [update the keys](#update-keys). |
|||
|
|||
### Update keys |
|||
Whenever a key, either the SSH key of a machine or the PGP key of an administrator, changes, the secret files need updating. |
|||
To do so, run the following command: |
|||
``` |
|||
find -name "secrets.yaml" -or -path "*/secrets/**" -type f -exec 'sops updatekeys {}' |
|||
``` |
|||
|
|||
## Bootstrapping a node |
|||
Compute nodes can be bootstrapped using PXE boot. |
|||
The manager will provide a touchless boot image which will install the node with the current deployment automatically. |
|||
Booting the node from PXE (network boot) is enough to activate the bootstrapping process. |
|||
|
|||
After bootstrapping a node, make sure to [gather the node data](#gather-node-information) and [update the secret keys](#update-keys). |
@ -1,18 +0,0 @@ |
|||
{ stdenv |
|||
, mkdocs |
|||
, ... |
|||
}: |
|||
|
|||
stdenv.mkDerivation { |
|||
name = "docs"; |
|||
|
|||
preferLocalBuild = true; |
|||
allowSubstitutes = false; |
|||
|
|||
src = ./.; |
|||
|
|||
buildCommand = '' |
|||
cd "$src" |
|||
${mkdocs}/bin/mkdocs build --site-dir "$out" |
|||
''; |
|||
} |
@ -1,37 +0,0 @@ |
|||
site_name: HPC @ HS-Fulda |
|||
site_description: User documentation for high performance cluster on University of Applied Sciences Fulda |
|||
site_url: https://docs.hpc.informatik.hs-fulda.de/ |
|||
site_dir: public |
|||
use_directory_urls: false |
|||
strict: true |
|||
repo_url: https://gogs.informatik.hs-fulda.de/hpc/nixcfg.git |
|||
docs_dir: content |
|||
|
|||
theme: |
|||
name: readthedocs |
|||
locale: de |
|||
prev_next_buttons_location: none |
|||
highlightjs: true |
|||
hljs_languages: |
|||
- bash |
|||
- yaml |
|||
- rust |
|||
|
|||
markdown_extensions: |
|||
- extra |
|||
- admonition |
|||
|
|||
plugins: |
|||
- search |
|||
|
|||
nav: |
|||
- Start: index.md |
|||
- Erste Schritte: first_steps.md |
|||
- Nutzung: usage.md |
|||
- Software: environment.md |
|||
- Daten: storage.md |
|||
- Best Practices: best_practice.md |
|||
- Hilfe: support.md |
|||
- Internes: |
|||
- Netzwerk: internal/network.md |
|||
|
@ -0,0 +1,19 @@ |
|||
{ pkgs, config, lib, ... }: |
|||
|
|||
with lib; |
|||
|
|||
let |
|||
mkdocsConfig = import ./config.nix { |
|||
inherit config lib; |
|||
}; |
|||
|
|||
mkdocsConfigYaml = pkgs.writeText "mkdocs.yaml" (generators.toYAML { } mkdocsConfig); |
|||
|
|||
in |
|||
{ |
|||
system.build.docs = pkgs.runCommand "docs" { } '' |
|||
${pkgs.mkdocs}/bin/mkdocs build \ |
|||
--site-dir "$out" \ |
|||
--config-file "${mkdocsConfigYaml}" |
|||
''; |
|||
} |
@ -1 +0,0 @@ |
|||
/nix/store/8v3r668x18fl49yx2s41yzs0qx9cn24d-docs |
@ -0,0 +1,97 @@ |
|||
{ pkgs, lib, config, nodes, ... }: |
|||
|
|||
with lib; |
|||
|
|||
let |
|||
mkReservations = net: concatLists (mapAttrsToList |
|||
(_: node: optional (hasAttr net node.config.hpc.dhcp.reservations) { |
|||
"hw-address" = node.config.hpc.dhcp.reservations.${net}.hwAddress; |
|||
"ip-address" = node.config.hpc.dhcp.reservations.${net}.ipAddress; |
|||
}) |
|||
nodes); |
|||
|
|||
in |
|||
{ |
|||
services.kea = { |
|||
dhcp4 = { |
|||
enable = true; |
|||
settings = { |
|||
"valid-lifetime" = 4000; |
|||
"renew-timer" = 1000; |
|||
"rebind-timer" = 2000; |
|||
|
|||
"interfaces-config" = { |
|||
"interfaces" = [ "mngt" "data" ]; |
|||
}; |
|||
|
|||
"lease-database" = { |
|||
"type" = "memfile"; |
|||
"persist" = true; |
|||
"name" = "/var/lib/kea/dhcp4.leases"; |
|||
}; |
|||
|
|||
"subnet4" = [ |
|||
{ |
|||
"subnet" = "10.32.46.0/24"; |
|||
"interface" = "mngt"; |
|||
|
|||
"option-data" = [ |
|||
{ |
|||
"name" = "routers"; |
|||
"data" = config.networking.defaultGateway.address; |
|||
} |
|||
{ |
|||
"name" = "domain-name-servers"; |
|||
"data" = "10.0.0.53,10.1.1.10"; |
|||
} |
|||
{ |
|||
"name" = "domain-name"; |
|||
"data" = "mngt.${config.networking.domain}"; |
|||
} |
|||
{ |
|||
"name" = "domain-search"; |
|||
"data" = "mngt.${config.networking.domain}"; |
|||
} |
|||
]; |
|||
|
|||
"pools" = [ |
|||
{ |
|||
"pool" = "10.32.46.100-10.32.46.200"; |
|||
} |
|||
]; |
|||
|
|||
"reservations" = mkReservations "mngt"; |
|||
} |
|||
|
|||
{ |
|||
"subnet" = "10.32.47.0/24"; |
|||
"interface" = "data"; |
|||
|
|||
"option-data" = [ |
|||
{ |
|||
"name" = "domain-name-servers"; |
|||
"data" = "10.0.0.53,10.1.1.10"; |
|||
} |
|||
{ |
|||
"name" = "domain-name"; |
|||
"data" = config.networking.domain; |
|||
} |
|||
{ |
|||
"name" = "domain-search"; |
|||
"data" = config.networking.domain; |
|||
} |
|||
]; |
|||
|
|||
"pools" = [ |
|||
{ |
|||
"pool" = "10.32.47.100-10.32.47.200"; |
|||
} |
|||
]; |
|||
|
|||
"reservations" = mkReservations "data"; |
|||
} |
|||
]; |
|||
}; |
|||
}; |
|||
}; |
|||
} |
@ -1,16 +1,22 @@ |
|||
{ pkgs, lib, ... }: |
|||
{ config, pkgs, lib, ... }: |
|||
|
|||
with lib; |
|||
|
|||
let |
|||
docs = pkgs.callPackage ../../docs { }; |
|||
|
|||
in |
|||
{ |
|||
imports = [ |
|||
../../docs/module.nix |
|||
]; |
|||
|
|||
services.nginx = { |
|||
virtualHosts = { |
|||
"docs.${config.networking.domain}" = { |
|||
locations."/".root = docs; |
|||
"${config.networking.domain}" = { |
|||
default = true; |
|||
|
|||
serverAliases = [ |
|||
"doku.${config.networking.domain}" |
|||
]; |
|||
|
|||
locations."/".root = config.system.build.docs; |
|||
}; |
|||
}; |
|||
}; |
|||
|
@ -0,0 +1,20 @@ |
|||
{ config, ... }: |
|||
|
|||
{ |
|||
programs.msmtp = { |
|||
enable = true; |
|||
accounts = { |
|||
default = { |
|||
auth = true; |
|||
tls = true; |
|||
port = 587; |
|||
from = "fdhpc@informatik.hs-fulda.de"; |
|||
host = "smtp.hs-fulda.de"; |
|||
user = "fdhpc"; |
|||
passwordeval = "cat ${config.sops.secrets."mail/password".path}"; |
|||
}; |
|||
}; |
|||
}; |
|||
|
|||
sops.secrets."mail/password" = { }; |
|||
} |
@ -1,13 +1,45 @@ |
|||
{ |
|||
networking.interfaces."enp11s0f0" = { |
|||
networking.interfaces."enp11s0f0" = { }; |
|||
networking.interfaces."enp11s0f1" = { }; |
|||
|
|||
networking.bonds."data" = { |
|||
interfaces = [ "enp11s0f0" "enp11s0f1" ]; |
|||
driverOptions = { |
|||
miimon = "100"; |
|||
mode = "802.3ad"; |
|||
}; |
|||
}; |
|||
|
|||
networking.vlans."mngt" = { |
|||
id = 1032; |
|||
interface = "data"; |
|||
}; |
|||
|
|||
networking.interfaces."data" = { |
|||
ipv4.addresses = [{ |
|||
address = "10.32.47.10"; |
|||
prefixLength = 24; |
|||
}]; |
|||
}; |
|||
|
|||
# This is not our real management interface but the hosts interface to the |
|||
# manangement network |
|||
networking.interfaces."mngt" = { |
|||
ipv4.addresses = [{ |
|||
address = "10.32.46.253"; |
|||
prefixLength = 24; |
|||
}]; |
|||
}; |
|||
|
|||
networking.defaultGateway = { |
|||
address = "10.32.47.1"; |
|||
interface = "enp11s0f0"; |
|||
interface = "data"; |
|||
}; |
|||
|
|||
hpc.dhcp.reservations = { |
|||
"mngt" = { |
|||
hwAddress = "e4:1f:13:28:c7:b9"; |
|||
ipAddress = "10.32.46.10"; |
|||
}; |
|||
}; |
|||
} |
@ -0,0 +1,30 @@ |
|||
{ |
|||
"node-00" = { |
|||
mngt = "50:46:5D:DA:0C:C9"; |
|||
data = "50:46:5d:da:0b:d6"; |
|||
}; |
|||
"node-01" = { |
|||
mngt = "50:46:5D:DA:0C:07"; |
|||
data = "50:46:5d:da:0c:56"; |
|||
}; |
|||
"node-02" = { |
|||
mngt = "10:BF:48:19:B0:04"; |
|||
data = "10:bf:48:1f:a6:8f"; |
|||
}; |
|||
"node-03" = { |
|||
mngt = "10:BF:48:19:A4:FE"; |
|||
data = "10:bf:48:1b:57:47"; |
|||
}; |
|||
"node-04" = { |
|||
mngt = "10:BF:48:19:A2:E2"; |
|||
data = "10:bf:48:19:a2:4d"; |
|||
}; |
|||
"node-05" = { |
|||
mngt = "10:BF:48:15:00:F5"; |
|||
data = "10:bf:48:1b:56:df"; |
|||
}; |
|||
"node-06" = { |
|||
mngt = "50:46:5D:DA:0C:09"; |
|||
data = "50:46:5d:da:0c:52"; |
|||
}; |
|||
} |
@ -1,6 +1,9 @@ |
|||
{ |
|||
imports = [ |
|||
./node.nix |
|||
./hostFile.nix |
|||
./dhcp.nix |
|||
./netinstall.nix |
|||
./beegfs.nix |
|||
]; |
|||
} |
@ -0,0 +1,29 @@ |
|||
{ lib, config, ... }: |
|||
|
|||
with lib; |
|||
|
|||
{ |
|||
options.hpc.dhcp = { |
|||
reservations = mkOption { |
|||
description = '' |
|||
DHCP reservations for this host. |
|||
''; |
|||
type = types.attrsOf (types.submodule { |
|||
options = { |
|||
hwAddress = mkOption { |
|||
description = '' |
|||
MAC address of the interface in this network. |
|||
''; |
|||
type = types.str; |
|||
}; |
|||
ipAddress = mkOption { |
|||
description = '' |
|||
IP address of the host. |
|||
''; |
|||
}; |
|||
}; |
|||
}); |
|||
default = { }; |
|||
}; |
|||
}; |
|||
} |
@ -0,0 +1,20 @@ |
|||
{ config, lib, ... }: |
|||
|
|||
with lib; |
|||
|
|||
{ |
|||
options.hpc.netinstall = { |
|||
enable = mkEnableOption "NetInstall"; |
|||
}; |
|||
|
|||
config = mkIf config.hpc.netinstall.enable { |
|||
deployment.tags = [ "netinstall" ]; |
|||
|
|||
assertions = [ |
|||
{ |
|||
assertion = elem "data" config.hpc.dhcp.reservations; |
|||
message = "NetInstall needs DHCP reservation in data network"; |
|||
} |
|||
]; |
|||
}; |
|||
} |
@ -0,0 +1,30 @@ |
|||
{ lib, config, ... }: |
|||
|
|||
with lib; |
|||
|
|||
{ |
|||
options.hpc.node = { |
|||
enable = mkEnableOption "Compute Node"; |
|||
|
|||
id = mkOption { |
|||
description = '' |
|||
ID of the compute node. |
|||
''; |
|||
type = types.ints.unsigned; |
|||
}; |
|||
|
|||
name = mkOption { |
|||
description = '' |
|||
Name of the node. |
|||
''; |
|||
type = types.str; |
|||
readOnly = true; |
|||
}; |
|||
}; |
|||
|
|||
config = mkIf config.hpc.node.enable { |
|||
hpc.node.name = "node-${fixedWidthNumber 2 config.hpc.node.id}"; |
|||
|
|||
deployment.tags = [ "node" ]; |
|||
}; |
|||
} |
Write
Preview
Loading…
Cancel
Save
Reference in new issue