Skip to content

Commit 81ddf87

Browse files
authored
Slurm support (#26)
Add slurm support. The assumption here is any slurm class needs a GPU. Class definitions will now be searched in all cluster.d files, not just kuberenets cluster. The cluster file in which the class is defined will be the cluster the job submits to.
1 parent 0541f68 commit 81ddf87

File tree

3 files changed

+51
-30
lines changed

3 files changed

+51
-30
lines changed

form.yml.erb

Lines changed: 28 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -3,42 +3,44 @@
33
groups.unshift(groups.delete(OodSupport::Process.group))
44
end.map(&:name).grep(/^P./)
55

6-
classrooms = OodAppkit.clusters.each_with_object({}) do |cluster, hash|
7-
if cluster.kubernetes?
8-
hash.merge!(cluster.custom_config[:classrooms].to_h)
9-
end
10-
end.select do |k,v|
11-
k.start_with?('jupyter') && groups.include?(v)
12-
end.map do |k,v|
13-
tokens = k.gsub('jupyter/', '').split('/')
6+
classrooms = OodAppkit.clusters.map do |cluster|
7+
[ cluster.id.to_s, cluster.custom_config[:classrooms].to_h ]
8+
end.to_h.map do |cluster_id, classrooms|
9+
classes = classrooms.select do |k,v|
10+
k.start_with?('jupyter') && groups.include?(v)
11+
end.map do |k,v|
12+
tokens = k.gsub('jupyter/', '').split('/')
1413

15-
{
16-
name: tokens[0].nil? ? 'unknown' : tokens[0],
17-
size: tokens[1].nil? ? 1 : tokens[1].to_i,
18-
time: tokens[2].nil? ? 1 : tokens[2].to_i,
19-
compute_cluster: tokens[3].nil? ? 'pitzer' : tokens[3].to_s,
20-
module_type: tokens[4].nil? ? 'default' : tokens[4].to_s,
21-
account: v,
22-
}
23-
end
14+
{
15+
name: tokens[0].nil? ? 'unknown' : tokens[0],
16+
size: tokens[1].nil? ? 1 : tokens[1].to_i,
17+
time: tokens[2].nil? ? 1 : tokens[2].to_i,
18+
cluster_fs: tokens[3].nil? ? 'pitzer' : tokens[3].to_s,
19+
module_type: tokens[4].nil? ? 'default' : tokens[4].to_s,
20+
cluster: cluster_id.to_s,
21+
account: v,
22+
}
23+
end.reject do |arr|
24+
arr.empty?
25+
end
26+
end.flatten
2427
-%>
2528
---
26-
cluster:
27-
- "kubernetes"
28-
- "kubernetes-test"
29-
- "kubernetes-dev"
3029
form:
30+
- cluster
3131
- classroom
3232
- account
3333
- jupyterlab_switch
3434
- size
3535
- time
36-
- compute_cluster
36+
- cluster_fs
3737
- module_type
3838
attributes:
39+
cluster:
40+
widget: "hidden_field"
3941
account:
4042
widget: "hidden_field"
41-
compute_cluster:
43+
cluster_fs:
4244
widget: "hidden_field"
4345
module_type:
4446
widget: "hidden_field"
@@ -117,8 +119,9 @@ attributes:
117119
- [
118120
"<%= cr[:name].gsub('_', ' ') %>", "<%= cr[:name] %>",
119121
data-set-account: "<%= cr[:account] %>",
120-
data-set-compute-cluster: "<%= cr[:compute_cluster] %>",
121-
data-set-module-type: "<%= cr[:module_type] %>"
122+
data-set-cluster-fs: "<%= cr[:cluster_fs] %>",
123+
data-set-module-type: "<%= cr[:module_type] %>",
124+
data-set-cluster: "<%= cr[:cluster] %>"
122125
]
123126
<%- end %>
124127
<%- else -%>

submit.yml.erb

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
21
<%
32
err_msg = "You are not a part of any classroom project. Restarting your web \
43
server in the help menu may fix the issue. You may also have to wait if you've \
@@ -34,8 +33,10 @@
3433
},
3534
}
3635

37-
apps_path = compute_cluster == 'pitzer' ? '/apps' : '/usr/local'
36+
apps_path = cluster_fs == 'pitzer' ? '/apps' : '/usr/local'
3837
-%>
38+
39+
<% if cluster =~ /kubernetes/ -%>
3940
---
4041
script:
4142
accounting_id: "<%= account %>"
@@ -90,15 +91,21 @@ script:
9091
- type: host
9192
name: lmod-init
9293
host_type: File
93-
path: /apps/<%= compute_cluster %>/lmod/lmod.sh
94+
path: /apps/<%= cluster_fs %>/lmod/lmod.sh
9495
destination_path: /etc/profile.d/lmod.sh
9596
- type: host
9697
name: intel
9798
host_type: Directory
98-
path: /nfsroot/<%= compute_cluster %>/opt/intel
99+
path: /nfsroot/<%= cluster_fs %>/opt/intel
99100
destination_path: /opt/intel
100101
- type: host
101102
name: apps
102103
host_type: Directory
103-
path: "/apps/<%= compute_cluster %>"
104+
path: "/apps/<%= cluster_fs %>"
104105
destination_path: "<%= apps_path %>"
106+
<%- else -%>
107+
script:
108+
accounting_id: "<%= account %>"
109+
wall_time: "<%= time.to_f * 3600 %>"
110+
gpus_per_node: 1
111+
<%- end -%>

template/before.sh.erb

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,23 @@
11
#!/bin/bash
22

3+
<%- if context.cluster =~ /kubernetes/ -%>
34
exec &> >(tee -a "pod.log")
45

56
source /etc/profile.d/lmod.sh
67
source /bin/find_host_port
78
source /bin/save_passwd_as_secret
89
source /bin/create_salt_and_sha1
10+
<%- else -%>
11+
[[ $(type -t module) == "function" ]] && export -f module
912

13+
# Find available port to run server on
14+
port=$(find_port ${host})
15+
16+
# Generate SHA1 encrypted password (requires OpenSSL installed)
17+
SALT="$(create_passwd 16)"
18+
password="$(create_passwd 16)"
19+
PASSWORD_SHA1="$(echo -n "${password}${SALT}" | openssl dgst -sha1 | awk '{print $NF}')"
20+
<%- end -%>
1021

1122
export OSC_CLASS_ID="<%= context.classroom %>"
1223
export OSC_PROJECT_ID="<%= context.account %>"

0 commit comments

Comments
 (0)