|
14 | 14 | # See the License for the specific language governing permissions and |
15 | 15 | # limitations under the License. |
16 | 16 |
|
| 17 | +# Simplified build script for CI using nixl-deps-base image |
| 18 | +# Dependencies (UCX, libfabric, etcd, aws-sdk, rust, DOCA) are pre-installed in the base image |
| 19 | + |
17 | 20 | # shellcheck disable=SC1091 |
18 | 21 | . "$(dirname "$0")/../.ci/scripts/common.sh" |
19 | 22 |
|
20 | 23 | set -e |
21 | 24 | set -x |
22 | 25 | set -o pipefail |
23 | 26 |
|
24 | | -# Parse commandline arguments with first argument being the install directory |
25 | | -# and second argument being the UCX installation directory. |
| 27 | +# Parse commandline arguments |
26 | 28 | INSTALL_DIR=$1 |
27 | | -UCX_INSTALL_DIR=$2 |
28 | | -EXTRA_BUILD_ARGS=${3:-""} |
29 | | -# UCX_VERSION is the version of UCX to build override default with env variable. |
30 | | -UCX_VERSION=${UCX_VERSION:-v1.19.0} |
31 | | -# LIBFABRIC_VERSION is the version of libfabric to build override default with env variable. |
32 | | -LIBFABRIC_VERSION=${LIBFABRIC_VERSION:-v2.3.0} |
33 | | -# LIBFABRIC_INSTALL_DIR can be set via environment variable, defaults to INSTALL_DIR |
34 | | -LIBFABRIC_INSTALL_DIR=${LIBFABRIC_INSTALL_DIR:-$INSTALL_DIR} |
| 29 | +EXTRA_BUILD_ARGS=${2:-""} |
| 30 | +LIBFABRIC_INSTALL_DIR=${LIBFABRIC_INSTALL_DIR:-/usr/local} |
35 | 31 |
|
36 | 32 | if [ -z "$INSTALL_DIR" ]; then |
37 | | - echo "Usage: $0 <install_dir> <ucx_install_dir>" |
| 33 | + echo "Usage: $0 <install_dir> [extra_build_args]" |
38 | 34 | exit 1 |
39 | 35 | fi |
40 | 36 |
|
41 | | -if [ -z "$UCX_INSTALL_DIR" ]; then |
42 | | - UCX_INSTALL_DIR=$INSTALL_DIR |
43 | | -fi |
44 | | - |
45 | | - |
46 | | -# For running as user - check if running as root, if not set sudo variable |
| 37 | +# For running as user - check if running as root |
47 | 38 | if [ "$(id -u)" -ne 0 ]; then |
48 | 39 | SUDO=sudo |
49 | 40 | else |
|
53 | 44 | ARCH=$(uname -m) |
54 | 45 | [ "$ARCH" = "arm64" ] && ARCH="aarch64" |
55 | 46 |
|
56 | | -# Some docker images are with broken installations: |
57 | | -$SUDO rm -rf /usr/lib/cmake/grpc /usr/lib/cmake/protobuf |
58 | | - |
59 | | -$SUDO apt-get -qq update |
60 | | -$SUDO apt-get -qq install -y python3-dev \ |
61 | | - python3-pip \ |
62 | | - curl \ |
63 | | - wget \ |
64 | | - libnuma-dev \ |
65 | | - numactl \ |
66 | | - autotools-dev \ |
67 | | - automake \ |
68 | | - git \ |
69 | | - libtool \ |
70 | | - libz-dev \ |
71 | | - libiberty-dev \ |
72 | | - flex \ |
73 | | - build-essential \ |
74 | | - cmake \ |
75 | | - libgoogle-glog-dev \ |
76 | | - libgtest-dev \ |
77 | | - libgmock-dev \ |
78 | | - libjsoncpp-dev \ |
79 | | - libpython3-dev \ |
80 | | - libboost-all-dev \ |
81 | | - libssl-dev \ |
82 | | - libgrpc-dev \ |
83 | | - libgrpc++-dev \ |
84 | | - libprotobuf-dev \ |
85 | | - libcpprest-dev \ |
86 | | - libaio-dev \ |
87 | | - liburing-dev \ |
88 | | - meson \ |
89 | | - ninja-build \ |
90 | | - pkg-config \ |
91 | | - protobuf-compiler-grpc \ |
92 | | - pybind11-dev \ |
93 | | - etcd-server \ |
94 | | - net-tools \ |
95 | | - iproute2 \ |
96 | | - pciutils \ |
97 | | - libpci-dev \ |
98 | | - uuid-dev \ |
99 | | - libibmad-dev \ |
100 | | - doxygen \ |
101 | | - clang \ |
102 | | - hwloc \ |
103 | | - libhwloc-dev \ |
104 | | - libcurl4-openssl-dev zlib1g-dev # aws-sdk-cpp dependencies |
105 | | - |
106 | | -# Ubuntu 22.04 specific setup |
107 | | -if grep -q "Ubuntu 22.04" /etc/os-release 2>/dev/null; then |
108 | | - # Upgrade pip for '--break-system-packages' support |
109 | | - $SUDO pip3 install --upgrade pip |
110 | | - |
111 | | - # Upgrade meson (distro version 0.61.2 is too old, project requires >= 0.64.0) |
112 | | - $SUDO pip3 install --upgrade meson |
113 | | - # Ensure pip3's meson takes precedence over apt's version |
114 | | - export PATH="$HOME/.local/bin:/usr/local/bin:$PATH" |
115 | | -fi |
116 | | - |
117 | | -# Add DOCA repository and install packages |
118 | | -ARCH_SUFFIX=$(if [ "${ARCH}" = "aarch64" ]; then echo "arm64"; else echo "amd64"; fi) |
119 | | -MELLANOX_OS="$(. /etc/lsb-release; echo ${DISTRIB_ID}${DISTRIB_RELEASE} | tr A-Z a-z | tr -d .)" |
120 | | -wget --tries=3 --waitretry=5 --no-verbose https://www.mellanox.com/downloads/DOCA/DOCA_v3.1.0/host/doca-host_3.1.0-091000-25.07-${MELLANOX_OS}_${ARCH_SUFFIX}.deb -O doca-host.deb |
121 | | -$SUDO dpkg -i doca-host.deb |
122 | | -$SUDO apt-get update |
123 | | -$SUDO apt-get upgrade -y |
124 | | -$SUDO apt-get install -y --no-install-recommends doca-sdk-gpunetio libdoca-sdk-gpunetio-dev libdoca-sdk-verbs-dev |
125 | | - |
126 | | -# Force reinstall of RDMA packages from DOCA repository |
127 | | -# Reinstall needed to fix broken libibverbs-dev, which may lead to lack of Infiniband support. |
128 | | -# Upgrade is not sufficient if the version is the same since apt skips the installation. |
129 | | -$SUDO apt-get -qq -y install \ |
130 | | - --reinstall libibverbs-dev rdma-core ibverbs-utils libibumad-dev \ |
131 | | - libnuma-dev librdmacm-dev ibverbs-providers |
132 | | - |
133 | | -wget --tries=3 --waitretry=5 https://static.rust-lang.org/rustup/dist/${ARCH}-unknown-linux-gnu/rustup-init |
134 | | -chmod +x rustup-init |
135 | | -./rustup-init -y --default-toolchain 1.86.0 |
136 | | -export PATH="$HOME/.cargo/bin:$PATH" |
137 | | - |
138 | | -wget --tries=3 --waitretry=5 "https://astral.sh/uv/install.sh" -O install_uv.sh |
139 | | -chmod +x install_uv.sh |
140 | | -./install_uv.sh |
141 | | -export PATH="$HOME/.local/bin:$PATH" |
142 | | - |
143 | | -curl -fSsL "https://github.com/openucx/ucx/tarball/${UCX_VERSION}" | tar xz |
144 | | -( \ |
145 | | - cd openucx-ucx* && \ |
146 | | - ./autogen.sh && \ |
147 | | - ./configure \ |
148 | | - --prefix="${UCX_INSTALL_DIR}" \ |
149 | | - --enable-shared \ |
150 | | - --disable-static \ |
151 | | - --disable-doxygen-doc \ |
152 | | - --enable-optimizations \ |
153 | | - --enable-cma \ |
154 | | - --enable-devel-headers \ |
155 | | - --with-verbs \ |
156 | | - --with-dm \ |
157 | | - ${UCX_CUDA_BUILD_ARGS} \ |
158 | | - --enable-mt && \ |
159 | | - make -j && \ |
160 | | - make -j install-strip && \ |
161 | | - $SUDO ldconfig \ |
162 | | -) |
163 | | - |
164 | | -wget --tries=3 --waitretry=5 -O "libfabric-${LIBFABRIC_VERSION#v}.tar.bz2" "https://github.com/ofiwg/libfabric/releases/download/${LIBFABRIC_VERSION}/libfabric-${LIBFABRIC_VERSION#v}.tar.bz2" |
165 | | -tar xjf "libfabric-${LIBFABRIC_VERSION#v}.tar.bz2" |
166 | | -rm "libfabric-${LIBFABRIC_VERSION#v}.tar.bz2" |
167 | | -( \ |
168 | | - cd libfabric-* && \ |
169 | | - ./autogen.sh && \ |
170 | | - ./configure --prefix="${LIBFABRIC_INSTALL_DIR}" \ |
171 | | - --disable-verbs \ |
172 | | - --disable-psm3 \ |
173 | | - --disable-opx \ |
174 | | - --disable-usnic \ |
175 | | - --disable-rstream \ |
176 | | - --enable-efa && \ |
177 | | - make -j && \ |
178 | | - make install && \ |
179 | | - $SUDO ldconfig \ |
180 | | -) |
181 | | - |
182 | | -( \ |
183 | | - cd /tmp && \ |
184 | | - git clone --depth 1 https://github.com/etcd-cpp-apiv3/etcd-cpp-apiv3.git && \ |
185 | | - cd etcd-cpp-apiv3 && \ |
186 | | - mkdir build && cd build && \ |
187 | | - cmake .. && \ |
188 | | - make -j"$NPROC" && \ |
189 | | - $SUDO make install && \ |
190 | | - $SUDO ldconfig \ |
191 | | -) |
192 | | - |
193 | | -( \ |
194 | | - cd /tmp && \ |
195 | | - git clone --recurse-submodules --depth 1 --shallow-submodules https://github.com/aws/aws-sdk-cpp.git --branch 1.11.581 && \ |
196 | | - mkdir aws_sdk_build && \ |
197 | | - cd aws_sdk_build && \ |
198 | | - cmake ../aws-sdk-cpp/ -DCMAKE_BUILD_TYPE=Release -DBUILD_ONLY="s3" -DENABLE_TESTING=OFF -DCMAKE_INSTALL_PREFIX=/usr/local && \ |
199 | | - make -j"$NPROC" && \ |
200 | | - $SUDO make install |
201 | | -) |
202 | | - |
203 | | -( \ |
204 | | - cd /tmp && \ |
205 | | - git clone https://github.com/nvidia/gusli.git && \ |
206 | | - cd gusli && \ |
207 | | - $SUDO make all BUILD_RELEASE=1 BUILD_FOR_UNITEST=0 VERBOSE=1 ALLOW_USE_URING=0 && \ |
208 | | - $SUDO ldconfig |
209 | | -) |
210 | | - |
211 | | -( \ |
212 | | - cd /tmp && |
213 | | - git clone --depth 1 https://github.com/google/gtest-parallel.git && |
214 | | - mkdir -p ${INSTALL_DIR}/bin && |
215 | | - cp gtest-parallel/* ${INSTALL_DIR}/bin/ |
216 | | -) |
217 | | - |
| 47 | +# Set library and binary paths |
218 | 48 | export LD_LIBRARY_PATH="${INSTALL_DIR}/lib:${INSTALL_DIR}/lib/$ARCH-linux-gnu:${INSTALL_DIR}/lib64:$LD_LIBRARY_PATH:${LIBFABRIC_INSTALL_DIR}/lib" |
219 | 49 | export CPATH="${INSTALL_DIR}/include:${LIBFABRIC_INSTALL_DIR}/include:$CPATH" |
220 | 50 | export PATH="${INSTALL_DIR}/bin:$PATH" |
221 | 51 | export PKG_CONFIG_PATH="${INSTALL_DIR}/lib/pkgconfig:${INSTALL_DIR}/lib64/pkgconfig:${INSTALL_DIR}:${LIBFABRIC_INSTALL_DIR}/lib/pkgconfig:$PKG_CONFIG_PATH" |
222 | 52 | export NIXL_PLUGIN_DIR="${INSTALL_DIR}/lib/$ARCH-linux-gnu/plugins" |
223 | 53 | export CMAKE_PREFIX_PATH="${INSTALL_DIR}:${CMAKE_PREFIX_PATH}" |
224 | 54 |
|
225 | | -# Disabling CUDA IPC not to use NVLINK, as it slows down local |
226 | | -# UCX transfers and can cause contention with local collectives. |
| 55 | +# Disabling CUDA IPC not to use NVLINK |
227 | 56 | export UCX_TLS=^cuda_ipc |
228 | 57 |
|
| 58 | +# Build NIXL |
229 | 59 | # shellcheck disable=SC2086 |
230 | | -meson setup nixl_build --prefix=${INSTALL_DIR} -Ducx_path=${UCX_INSTALL_DIR} -Dbuild_docs=true -Drust=false ${EXTRA_BUILD_ARGS} -Dlibfabric_path="${LIBFABRIC_INSTALL_DIR}" |
| 60 | +meson setup nixl_build --prefix=${INSTALL_DIR} -Ducx_path=/usr -Dbuild_docs=true -Drust=false ${EXTRA_BUILD_ARGS} -Dlibfabric_path="${LIBFABRIC_INSTALL_DIR}" |
| 61 | +ninja -j"$NPROC" -C nixl_build && ninja -j"$NPROC" -C nixl_build install |
231 | 62 | mkdir -p dist && cp nixl_build/src/bindings/python/nixl-meta/nixl-*.whl dist/ |
232 | | -ninja -j${NPROC:-$(nproc)} -C nixl_build && ninja -j${NPROC:-$(nproc)} -C nixl_build install |
233 | | - |
234 | | -# TODO(kapila): Copy the nixl.pc file to the install directory if needed. |
235 | | -# cp ${BUILD_DIR}/nixl.pc ${INSTALL_DIR}/lib/pkgconfig/nixl.pc |
236 | 63 |
|
| 64 | +# Build nixlbench |
237 | 65 | cd benchmark/nixlbench |
238 | 66 | meson setup nixlbench_build -Dnixl_path=${INSTALL_DIR} -Dprefix=${INSTALL_DIR} |
239 | 67 | ninja -j"$NPROC" -C nixlbench_build && ninja -j"$NPROC" -C nixlbench_build install |
0 commit comments