Skip to content

Commit afc0400

Browse files
Merge pull request #67 from scalableminds/port-slurm-in-docker-to-debian
Port Slurm Docker Cluster to Debian
2 parents f2908ad + f51ed84 commit afc0400

File tree

2 files changed

+66
-134
lines changed

2 files changed

+66
-134
lines changed

slurm-docker-cluster/Dockerfile

Lines changed: 35 additions & 92 deletions
Original file line numberDiff line numberDiff line change
@@ -1,108 +1,51 @@
1-
FROM rockylinux:9.2
2-
3-
ENV LC_ALL=en_US.utf8
4-
ENV LANG=en_US.utf8
5-
6-
LABEL org.opencontainers.image.source="https://github.com/giovtorres/slurm-docker-cluster" \
7-
org.opencontainers.image.title="slurm-docker-cluster" \
8-
org.opencontainers.image.description="Slurm Docker cluster on CentOS 7" \
9-
org.label-schema.docker.cmd="docker-compose up -d" \
10-
maintainer="Giovanni Torres"
11-
12-
ARG SLURM_TAG=slurm-22-05-9-1
13-
ARG GOSU_VERSION=1.11
14-
15-
COPY --from=ghcr.io/astral-sh/uv:0.4.20 /uv /bin/uv
16-
17-
RUN set -ex \
18-
&& dnf makecache \
19-
&& dnf -y update \
20-
&& dnf -y install dnf-plugins-core https://dl.fedoraproject.org/pub/epel/epel-release-latest-9.noarch.rpm \
21-
&& dnf config-manager --enable epel \
22-
&& dnf config-manager --enable crb \
23-
&& dnf -y install \
24-
wget \
25-
bzip2 \
26-
perl \
27-
gcc \
28-
gcc-c++\
29-
git \
30-
glibc-langpack-en \
31-
gnupg \
32-
make \
33-
munge \
34-
munge-devel \
35-
python39 \
36-
python3-devel \
37-
python3-pip \
38-
mariadb-server \
39-
mariadb-devel \
40-
psmisc \
41-
slurm-contribs \
42-
bash-completion \
43-
vim-enhanced \
44-
&& dnf clean all \
45-
&& rm -rf /var/cache/yum \
46-
&& ln -s /usr/bin/python3 /usr/bin/python
47-
48-
49-
50-
RUN set -ex \
51-
&& wget -O /usr/local/bin/gosu "https://github.com/tianon/gosu/releases/download/$GOSU_VERSION/gosu-amd64" \
52-
&& wget -O /usr/local/bin/gosu.asc "https://github.com/tianon/gosu/releases/download/$GOSU_VERSION/gosu-amd64.asc" \
53-
&& export GNUPGHOME="$(mktemp -d)" \
54-
# See https://github.com/tianon/gosu/issues/17#issuecomment-348464529
55-
&& ( gpg --keyserver ha.pool.sks-keyservers.net --recv-keys B42F6819007F00F88E364FD4036A9C25BF357DD4 \
56-
|| gpg --keyserver pgp.mit.edu --recv-keys B42F6819007F00F88E364FD4036A9C25BF357DD4 \
57-
|| gpg --keyserver keyserver.pgp.com --recv-keys B42F6819007F00F88E364FD4036A9C25BF357DD4 \
58-
|| gpg --keyserver keyserver.ubuntu.com --recv-keys B42F6819007F00F88E364FD4036A9C25BF357DD4 \
59-
) \
60-
&& gpg --batch --verify /usr/local/bin/gosu.asc /usr/local/bin/gosu \
61-
&& rm -rf "${GNUPGHOME}" /usr/local/bin/gosu.asc \
62-
&& chmod +x /usr/local/bin/gosu \
63-
&& gosu nobody true
64-
65-
RUN set -x \
66-
&& git clone --depth 1 --branch $SLURM_TAG https://github.com/SchedMD/slurm.git \
67-
&& pushd slurm \
68-
&& ./configure --enable-debug --prefix=/usr --sysconfdir=/etc/slurm \
69-
--with-mysql_config=/usr/bin --libdir=/usr/lib64 \
70-
&& make install \
71-
&& install -D -m644 etc/cgroup.conf.example /etc/slurm/cgroup.conf.example \
72-
&& install -D -m644 etc/slurm.conf.example /etc/slurm/slurm.conf.example \
73-
&& install -D -m644 etc/slurmdbd.conf.example /etc/slurm/slurmdbd.conf.example \
74-
&& install -D -m644 contribs/slurm_completion_help/slurm_completion.sh /etc/profile.d/slurm_completion.sh \
75-
&& popd \
76-
&& rm -rf slurm \
77-
&& groupadd -r --gid=1001 slurm \
78-
&& useradd -r -g slurm --uid=1001 slurm \
79-
&& mkdir /etc/sysconfig/slurm \
1+
ARG UV_VERSION="0.7.0"
2+
3+
FROM ghcr.io/astral-sh/uv:$UV_VERSION AS uv
4+
5+
FROM debian:bookworm
6+
7+
ARG SLURM_VERSION="24.05.4"
8+
ARG SLURM_USER_UID=990
9+
ARG SLURM_USER_GID=990
10+
ARG MYSQL_CONFIG_VERSION="0.8.34"
11+
12+
COPY --from=uv /uv /bin/uv
13+
14+
RUN set -xe \
15+
&& apt-get update \
16+
&& apt-get install -y curl gnupg lsb-release wget
17+
RUN curl -Lo /tmp/mysql-apt-config.deb "https://dev.mysql.com/get/mysql-apt-config_$MYSQL_CONFIG_VERSION-1_all.deb"
18+
RUN set -xe \
19+
&& dpkg -i /tmp/mysql-apt-config.deb \
20+
&& env DEBIAN_FRONTEND="noninteractive" dpkg-reconfigure mysql-apt-config \
21+
&& apt-get update \
22+
&& apt-get install --yes mysql-server
23+
24+
RUN bash -c "set -xe && for package in 'smd' 'smd-client' 'smd-slurmd' 'smd-slurmctld' 'smd-slurmdbd' 'smd-sview'; do \
25+
curl -Lo \"/tmp/slurm-\${package}.deb\" https://github.com/scalableminds/slurm-packages/releases/download/${SLURM_VERSION}/slurm-\${package}_${SLURM_VERSION}-1_amd64.deb; \
26+
done"
27+
RUN apt-get install --yes -f /tmp/*.deb
28+
RUN rm /tmp/*
29+
30+
RUN set -xe \
31+
&& addgroup --gid="$SLURM_USER_GID" slurm \
32+
&& adduser --system --uid="$SLURM_USER_UID" --ingroup slurm slurm \
33+
&& mkdir -p /etc/sysconfig/slurm \
8034
/var/spool/slurmd \
8135
/var/run/slurmd \
8236
/var/run/slurmdbd \
8337
/var/lib/slurmd \
8438
/var/log/slurm \
8539
/data \
86-
&& touch /var/lib/slurmd/node_state \
87-
/var/lib/slurmd/front_end_state \
88-
/var/lib/slurmd/job_state \
89-
/var/lib/slurmd/resv_state \
90-
/var/lib/slurmd/trigger_state \
91-
/var/lib/slurmd/assoc_mgr_state \
92-
/var/lib/slurmd/assoc_usage \
93-
/var/lib/slurmd/qos_usage \
94-
/var/lib/slurmd/fed_mgr_state \
40+
/run/munge \
9541
&& chown -R slurm:slurm /var/*/slurm* \
96-
&& /sbin/create-munge-key
42+
&& chown -R munge:munge /run/munge
9743

9844
COPY --chown=slurm:slurm slurm.conf /etc/slurm/slurm.conf
9945
COPY --chown=slurm:slurm slurmdbd.conf /etc/slurm/slurmdbd.conf
100-
10146
RUN chmod 600 /etc/slurm/slurm.conf
10247
RUN chmod 600 /etc/slurm/slurmdbd.conf
10348

10449
COPY docker-entrypoint.sh /usr/local/bin/docker-entrypoint.sh
105-
10650
ENTRYPOINT ["/usr/local/bin/docker-entrypoint.sh"]
107-
10851
CMD ["slurmdbd"]

slurm-docker-cluster/docker-entrypoint.sh

Lines changed: 31 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -3,58 +3,47 @@ set -e
33

44
if [ "$1" = "slurmdbd" ]
55
then
6-
echo "---> Starting the MUNGE Authentication service (munged) ..."
7-
gosu munge /usr/sbin/munged
8-
9-
echo "---> Starting the Slurm Database Daemon (slurmdbd) ..."
10-
11-
{
12-
. /etc/slurm/slurmdbd.conf
13-
until echo "SELECT 1" | mysql -h $StorageHost -u$StorageUser -p$StoragePass 2>&1 > /dev/null
14-
do
15-
echo "-- Waiting for database to become active ..."
16-
sleep 2
17-
done
18-
}
19-
echo "-- Database is now active ..."
20-
21-
exec gosu slurm /usr/sbin/slurmdbd -Dvvv
6+
set -x
7+
8+
{ echo "---> Starting the MUNGE Authentication service (munged) ..."; } 2>/dev/null
9+
setpriv --reuid=munge --regid=munge --init-groups /usr/sbin/munged
10+
{ echo "---> Starting the SlurmDBD Manager Daemon ..."; } 2>/dev/null
11+
exec setpriv --reuid=slurm --regid=slurm --init-groups /usr/sbin/slurmdbd -D
2212
fi
2313

2414
if [ "$1" = "slurmctld" ]
2515
then
26-
echo "---> Starting the MUNGE Authentication service (munged) ..."
27-
gosu munge /usr/sbin/munged
28-
29-
echo "---> Waiting for slurmdbd to become active before starting slurmctld ..."
16+
set -x
3017

31-
until 2>/dev/null >/dev/tcp/slurmdbd/6819
32-
do
33-
echo "-- slurmdbd is not available. Sleeping ..."
34-
sleep 2
35-
done
36-
echo "-- slurmdbd is now active ..."
18+
{ echo "---> Starting the MUNGE Authentication service (munged) ..."; } 2>/dev/null
19+
setpriv --reuid=munge --regid=munge --init-groups /usr/sbin/munged
3720

38-
echo "---> Starting the Slurm Controller Daemon (slurmctld) ..."
39-
exec gosu slurm /usr/sbin/slurmctld -Dvvv
21+
{ echo "---> Starting the Slurm Controller Daemon (slurmctld) ..."; } 2>/dev/null
22+
exec setpriv --reuid=slurm --regid=slurm --init-groups /usr/sbin/slurmctld -D
4023
fi
4124

4225
if [ "$1" = "slurmd" ]
4326
then
44-
echo "---> Starting the MUNGE Authentication service (munged) ..."
45-
gosu munge /usr/sbin/munged
46-
47-
echo "---> Waiting for slurmctld to become active before starting slurmd..."
48-
49-
until 2>/dev/null >/dev/tcp/slurmctld/6817
50-
do
51-
echo "-- slurmctld is not available. Sleeping ..."
52-
sleep 2
53-
done
54-
echo "-- slurmctld is now active ..."
55-
56-
echo "---> Starting the Slurm Node Daemon (slurmd) ..."
57-
exec /usr/sbin/slurmd -Dvvv
27+
set -x
28+
29+
{ echo "---> Setup Cgroup v2 ..."; } 2>/dev/null
30+
mkdir /sys/fs/cgroup/init.scope
31+
mkdir /sys/fs/cgroup/system.slice
32+
# Move Root Process to new cgroup
33+
echo "1" > /sys/fs/cgroup/init.scope/cgroup.procs
34+
if [ ! "$$" = "1" ]; then
35+
echo "$$" > /sys/fs/cgroup/init.scope/cgroup.procs
36+
fi
37+
# Add cpu and memory controller to system.slice namespace
38+
echo "+cpuset +cpu +io +memory +pids" > /sys/fs/cgroup/cgroup.subtree_control
39+
echo "+cpuset +cpu +io +memory +pids" > /sys/fs/cgroup/system.slice/cgroup.subtree_control
40+
41+
{ echo "---> Starting the MUNGE Authentication service (munged) ..."; } 2>/dev/null
42+
setpriv --reuid=munge --regid=munge --init-groups /usr/sbin/munged
43+
44+
{ echo "---> Starting the Slurm Node Daemon (slurmd) ..."; } 1>/dev/null
45+
exec /usr/sbin/slurmd -D
5846
fi
5947

48+
set -x
6049
exec "$@"

0 commit comments

Comments
 (0)