commit 5808ac15b056f06bb5d7f98f6416daaf15650e03 Author: Marcel Nijenhof Date: Sun May 31 06:58:17 2020 -0400 Basis slurm cluster in docker diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..8d07b1c --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +.gitsecret/keys/random_seed +!*.secret +slurm-base/files/munge.key diff --git a/.gitsecret/keys/pubring.kbx b/.gitsecret/keys/pubring.kbx new file mode 100644 index 0000000..25f8dda Binary files /dev/null and b/.gitsecret/keys/pubring.kbx differ diff --git a/.gitsecret/keys/pubring.kbx~ b/.gitsecret/keys/pubring.kbx~ new file mode 100644 index 0000000..c44bd18 Binary files /dev/null and b/.gitsecret/keys/pubring.kbx~ differ diff --git a/.gitsecret/keys/trustdb.gpg b/.gitsecret/keys/trustdb.gpg new file mode 100644 index 0000000..48d82a3 Binary files /dev/null and b/.gitsecret/keys/trustdb.gpg differ diff --git a/.gitsecret/paths/mapping.cfg b/.gitsecret/paths/mapping.cfg new file mode 100644 index 0000000..9efd278 --- /dev/null +++ b/.gitsecret/paths/mapping.cfg @@ -0,0 +1 @@ +slurm-base/files/munge.key:c1969b6105adce0e62d71877a77bb7a69762d1be8dae8d7ffe92156663f3ee22 diff --git a/README.md b/README.md new file mode 100644 index 0000000..9ef560b --- /dev/null +++ b/README.md @@ -0,0 +1,75 @@ +# Een mini slurm cluster als docker compose omgeving + +## Introductie + +Op dit moment zijn we aan het onderzoeken of we via docker containers +een slurm omgeving kunnen maken waarin de Deltares waqua en d-hydro +modelen kunnen draaien. + +Dit cluster is een POK van slurm in docker containers. +De doelen hiervan is: + + - Uitzoeken of slurm in docker kan draaien + - Uitzoeken of we de deltares modelen hierin kunnen draaien + - Kennis overdracht van slurm + +## Build instructies + +De submit en reken nodes zijn afhankelijk van een basis image slurm-base. +Hierin staan al een aantal files die op zowel reken nodes als submit node +aanwezig moet zijn. + +Deze moet eerst gebouwt worden met een docker commando: +``` +docker build -t slurm-base:latest slurm-base +``` + +Hierna kan het cluster gebouwt worden via: +``` +docker-compose build +``` + +En gestart worden via: +``` +docker-compose up -d +``` + +## Testen + +### De status van het cluster + +Met de volgende commando's kun je status informatie krijgen: + - sinfo + - squeue + - scontrol ping + - scontrol show nodes + - scontrol show partition + - scontrol show job + +### Een simpel shell script + +Plaats het volgende shell script ergens in '''/home''': +``` +#!/bin/sh + +hostname +sleep $(( ${RANDOM}%40+40 )) +``` + +Vervolgens kun je dit script met sbatch 8 keer submitten. +Wat je ziet is dat elke node 2 scripten start. + +De overige 4 blijven in de queue staan. + +## Todo +### Nu + + - Integratie mpi. + - Integratie waqua/d-hydro. + - Workshop schrijven. + +### Voor productie + + - Persistent maken job administratie. + - Redundante master nodes. + - Submit nodes die geen master node zijn. diff --git a/cal/Dockerfile b/cal/Dockerfile new file mode 100644 index 0000000..0d28b1a --- /dev/null +++ b/cal/Dockerfile @@ -0,0 +1,19 @@ +# Start with docker base +FROM slurm-base + +LABEL maintainer="Marcel Nijenhof " + +RUN "/usr/bin/yum" "-y" "install" \ + slurm-slurmd + +# +# Startup +# +ADD files/startup /sbin/startup +RUN chown root:root /sbin/startup +RUN chmod 700 /sbin/startup + + +HEALTHCHECK CMD ps -e | grep -q slurmd + +CMD ["/sbin/startup"] diff --git a/cal/files/startup b/cal/files/startup new file mode 100644 index 0000000..89fccba --- /dev/null +++ b/cal/files/startup @@ -0,0 +1,4 @@ +#!/bin/sh + +su -s /bin/sh munge -c /usr/sbin/munged +exec /opt/slurm/sbin/slurmd -D /opt/slurm/etc/slurm.conf diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..a201f0f --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,21 @@ +--- +version: '3.7' +services: + submit: + build: submit + hostname: submit + volumes: + - "/dev/log:/dev/log" + - "/var/lib/docker/bindmounts/test/home:/home" + cal01: + build: cal + hostname: cal01 + volumes: + - "/dev/log:/dev/log" + - "/var/lib/docker/bindmounts/test/home:/home" + cal02: + build: cal + hostname: cal02 + volumes: + - "/dev/log:/dev/log" + - "/var/lib/docker/bindmounts/test/home:/home" diff --git a/slurm-base/Dockerfile b/slurm-base/Dockerfile new file mode 100644 index 0000000..f6706c9 --- /dev/null +++ b/slurm-base/Dockerfile @@ -0,0 +1,40 @@ +# Start with docker base +FROM centos:7 + +LABEL maintainer="Marcel Nijenhof " + +# +# Install and update +# +ADD files/slurm.repo /etc/yum.repos.d/slurm.repo + +RUN "/usr/bin/yum" "-y" "update" + +RUN "/usr/bin/yum" "-y" "install" \ + https://dl.fedoraproject.org/pub/epel/epel-release-latest-7.noarch.rpm + +RUN "/usr/bin/yum" "-y" "install" \ + slurm + +RUN "/usr/sbin/groupadd" "-g" "1000" "marceln" +RUN "/usr/sbin/useradd" \ + "-c" "Marcel Nijenhof" \ + "-u" "1000" \ + "-g" "marceln" \ + "-G" "wheel" \ + "-p" '$6$noVPG3snbYoJqcpO$7ii6A0GJPLzKS1cwjypUkSSID8uHG2rA3plQQifLONh9gtHpq1QY08Wako7wzFE7jMbkbFSgB3a3xlhQkvTQ00' \ + "marceln" + +# +# Munge config +# +ADD files/munge.key /etc/munge/munge.key +RUN chown munge:munge /etc/munge/munge.key +RUN chmod 600 /etc/munge/munge.key + +# +# Slurm config +# +RUN mkdir /opt/slurm/etc /var/log/slurm/ +ADD files/slurm.conf /opt/slurm/etc/slurm.conf +ADD files/slurm.sh /etc/profile.d/slurm.sh diff --git a/slurm-base/files/munge.key.secret b/slurm-base/files/munge.key.secret new file mode 100644 index 0000000..81d3c5b Binary files /dev/null and b/slurm-base/files/munge.key.secret differ diff --git a/slurm-base/files/slurm.conf b/slurm-base/files/slurm.conf new file mode 100644 index 0000000..ce0a7b5 --- /dev/null +++ b/slurm-base/files/slurm.conf @@ -0,0 +1,36 @@ +# +# https://slurm.schedmd.com/slurm.conf.html +# +ClusterName=slurmcluster +SlurmctldHost=submit +# +AuthType=auth/munge +InactiveLimit=120 +JobCompType=jobcomp/filetxt +JobCompLoc=/var/log/slurm/jobcomp +ProctrackType=proctrack/linuxproc +KillWait=30 +MaxJobCount=10000 +MinJobAge=3600 +ReturnToService=0 +SchedulerType=sched/backfill +SelectType=select/cons_res +SelectTypeParameters=CR_CPU +SlurmctldLogFile=/var/log/slurm/slurmctld.log +SlurmdLogFile=/var/log/slurm/slurmd.log +SlurmctldPort=7002 +SlurmdPort=7003 +SlurmdSpoolDir=/var/spool/slurmd.spool +StateSaveLocation=/var/spool/slurm.state +SwitchType=switch/none +TmpFS=/tmp +WaitTime=30 +# +# Node Configurations +# +NodeName=cal01 CPUs=2 RealMemory=2000 TmpDisk=64000 +NodeName=cal02 CPUs=2 RealMemory=2000 TmpDisk=64000 +# +# Partition Configurations +# +PartitionName=queue Nodes=ALL Default=YES diff --git a/slurm-base/files/slurm.repo b/slurm-base/files/slurm.repo new file mode 100644 index 0000000..106c9aa --- /dev/null +++ b/slurm-base/files/slurm.repo @@ -0,0 +1,5 @@ +[slurm] +name=Slurm CentOS7 +baseurl=https://marceln.org/CentOS7 +gpgcheck=0 +enabled=1 diff --git a/slurm-base/files/slurm.sh b/slurm-base/files/slurm.sh new file mode 100644 index 0000000..a67abef --- /dev/null +++ b/slurm-base/files/slurm.sh @@ -0,0 +1 @@ +PATH=${PATH}:/opt/slurm/bin diff --git a/slurm-base/files/startup b/slurm-base/files/startup new file mode 100644 index 0000000..d856f31 --- /dev/null +++ b/slurm-base/files/startup @@ -0,0 +1,4 @@ +#!/bin/sh + +su -s /bin/sh munge -c /usr/sbin/munged +exec /opt/slurm/sbin/slurmctld -D /opt/slurm/etc/slurm.conf diff --git a/slurm-base/files/wait b/slurm-base/files/wait new file mode 100755 index 0000000..e21a71b Binary files /dev/null and b/slurm-base/files/wait differ diff --git a/submit/Dockerfile b/submit/Dockerfile new file mode 100644 index 0000000..9a1b2b9 --- /dev/null +++ b/submit/Dockerfile @@ -0,0 +1,18 @@ +# Start with docker base +FROM slurm-base + +LABEL maintainer="Marcel Nijenhof " + +RUN "/usr/bin/yum" "-y" "install" \ + slurm-slurmctld \ + slurm-torque + +# +# Startup +# +ADD files/startup /sbin/startup +RUN chown root:root /sbin/startup +RUN chmod 700 /sbin/startup + +HEALTHCHECK CMD /opt/slurm/bin/scontrol ping | grep -q UP +CMD ["/sbin/startup"] diff --git a/submit/files/startup b/submit/files/startup new file mode 100644 index 0000000..d856f31 --- /dev/null +++ b/submit/files/startup @@ -0,0 +1,4 @@ +#!/bin/sh + +su -s /bin/sh munge -c /usr/sbin/munged +exec /opt/slurm/sbin/slurmctld -D /opt/slurm/etc/slurm.conf