meta-pytorch · xuzhao9 · Dec 10, 2025 · Dec 10, 2025 · Dec 10, 2025 · Dec 10, 2025
diff --git a/.ci/helion/install.sh b/.ci/helion/install.sh
@@ -36,5 +36,3 @@ tritonbench_dir=$(dirname "$(readlink -f "$0")")/../..
 cd ${tritonbench_dir}
 
 python install.py --helion
-# Helion requires tritonbench installed as a library
-pip install -e .
diff --git a/.github/workflows/docker.yaml b/.github/workflows/docker.yaml
@@ -57,12 +57,12 @@ jobs:
         if: github.event_name != 'pull_request'
         run: |
           # Extract pytorch version from the docker
+          set -x
           PYTORCH_VERSION=$(docker run -e SETUP_SCRIPT="${SETUP_SCRIPT}" ghcr.io/meta-pytorch/tritonbench:latest bash -c '. "${SETUP_SCRIPT}"; python -c "import torch; print(torch.__version__)"')
           export DOCKER_TAG=$(awk '{match($0, /dev[0-9]+/, arr); print arr[0]}' <<< "${PYTORCH_VERSION}")
           docker tag ghcr.io/meta-pytorch/tritonbench:latest ghcr.io/meta-pytorch/tritonbench:${DOCKER_TAG}
           docker push ghcr.io/meta-pytorch/tritonbench:${DOCKER_TAG}
           docker push ghcr.io/meta-pytorch/tritonbench:latest
-          set -x
           if [[ -n "${{ github.event.inputs.tags }}" ]]; then
             IFS=',' read -ra tags <<< "${{ github.event.inputs.tags }}"
             for tag in "${tags[@]}"; do

diff --git a/docker/tritonbench-nightly.dockerfile b/docker/tritonbench-nightly.dockerfile
@@ -26,15 +26,14 @@ RUN sudo chmod +x /usr/bin/switch-cuda.sh
 RUN sudo mkdir -p /workspace; sudo chown runner:runner /workspace
 
 # We assume that the host NVIDIA driver binaries and libraries are mapped to the docker filesystem
-# Install CUDA 12.8 build toolchains
-RUN cd /workspace; mkdir -p pytorch-ci; cd pytorch-ci; wget https://raw.githubusercontent.com/pytorch/pytorch/main/.ci/docker/common/install_cuda.sh
-RUN cd /workspace/pytorch-ci; wget https://raw.githubusercontent.com/pytorch/pytorch/main/.ci/docker/common/install_cudnn.sh || true && \
-    wget https://raw.githubusercontent.com/pytorch/pytorch/main/.ci/docker/common/install_nccl.sh && \
-    wget https://raw.githubusercontent.com/pytorch/pytorch/main/.ci/docker/common/install_cusparselt.sh && \
-    mkdir ci_commit_pins && cd ci_commit_pins && \
-    wget https://raw.githubusercontent.com/pytorch/pytorch/main/.ci/docker/ci_commit_pins/nccl-cu12.txt
-
-RUN sudo bash -c "set -x;export OVERRIDE_GENCODE=\"${OVERRIDE_GENCODE}\" OVERRIDE_GENCODE_CUDNN=\"${OVERRIDE_GENCODE_CUDNN}\"; cd /workspace/pytorch-ci; bash install_cuda.sh 12.8"
+# Install CUDA 12.8 build toolchains (only useful for bisection)
+# RUN cd /workspace; mkdir -p pytorch-ci; cd pytorch-ci; wget https://raw.githubusercontent.com/pytorch/pytorch/main/.ci/docker/common/install_cuda.sh
+# RUN cd /workspace/pytorch-ci; wget https://raw.githubusercontent.com/pytorch/pytorch/main/.ci/docker/common/install_cudnn.sh || true && \
+#     wget https://raw.githubusercontent.com/pytorch/pytorch/main/.ci/docker/common/install_nccl.sh && \
+#     wget https://raw.githubusercontent.com/pytorch/pytorch/main/.ci/docker/common/install_cusparselt.sh && \
+#     mkdir ci_commit_pins && cd ci_commit_pins && \
+#     wget https://raw.githubusercontent.com/pytorch/pytorch/main/.ci/docker/ci_commit_pins/nccl-cu12.txt
+# RUN sudo bash -c "set -x;export OVERRIDE_GENCODE=\"${OVERRIDE_GENCODE}\" OVERRIDE_GENCODE_CUDNN=\"${OVERRIDE_GENCODE_CUDNN}\"; cd /workspace/pytorch-ci; bash install_cuda.sh 12.8"
 
 # Checkout TritonBench and submodules
 RUN git clone --recurse-submodules -b "${TRITONBENCH_BRANCH}" --single-branch \
@@ -47,30 +46,22 @@ RUN echo "\
 . /workspace/miniconda3/etc/profile.d/conda.sh\n\
 conda activate base\n\
 export CONDA_HOME=/workspace/miniconda3\n\
-export CUDA_HOME=/usr/local/cuda\n\
-export PATH=\${CUDA_HOME}/bin:/home/runner/bin\${PATH:+:\${PATH}}\n\
-export LD_LIBRARY_PATH=\${CUDA_HOME}/lib64\${LD_LIBRARY_PATH:+:\${LD_LIBRARY_PATH}}\n\
-export LIBRARY_PATH=\${CUDA_HOME}/lib64\${LIBRARY_PATHPATH:+:\${LIBRARY_PATHPATH}}\n" >> /workspace/setup_instance.sh
+export PATH=/home/runner/bin\${PATH:+:\${PATH}}\n" >> /workspace/setup_instance.sh
 
 RUN echo ". /workspace/setup_instance.sh\n" >> ${HOME}/.bashrc
 
-# Setup conda env and CUDA
+# Setup conda env
 RUN cd /workspace/tritonbench && \
     . ${SETUP_SCRIPT} && \
     python tools/python_utils.py --create-conda-env ${CONDA_ENV} && \
     echo "if [ -z \${CONDA_ENV} ]; then export CONDA_ENV=${CONDA_ENV}; fi" >> /workspace/setup_instance.sh && \
     echo "conda activate \${CONDA_ENV}" >> /workspace/setup_instance.sh
 
-# Preserve env in sudo
-RUN cd /workspace/tritonbench && \
-    . ${SETUP_SCRIPT} && \
-    sudo -E python -m tools.cuda_utils --setup-cuda-softlink
-
 # Install PyTorch nightly and verify the date is correct
 RUN cd /workspace/tritonbench && \
     . ${SETUP_SCRIPT} && \
     python -m tools.cuda_utils --install-torch-deps && \
-    python -m tools.cuda_utils --install-torch-nightly
+    python -m tools.cuda_utils --install-torch-nightly --cuda
 
 # Check the installed version of nightly if needed
 RUN cd /workspace/tritonbench && \
@@ -106,6 +97,7 @@ RUN cd /workspace/tritonbench && \
     bash .ci/triton/install.sh --conda-env "${CONDA_ENV_META_TRITON}" \
         --repo facebookexperimental/triton --commit b939601a9a376342985ab27bc649e02d4288afc6 --side single \
         --install-dir /workspace/meta-triton
+
 # Install Helion in the meta-triton conda env
 RUN cd /workspace/tritonbench && \
     bash .ci/helion/install.sh --conda-env "${CONDA_ENV_META_TRITON}"

diff --git a/docker/tritonbench-rocm-nightly.dockerfile b/docker/tritonbench-rocm-nightly.dockerfile
@@ -43,7 +43,7 @@ RUN cd /workspace/tritonbench && \
 RUN cd /workspace/tritonbench && \
     . ${SETUP_SCRIPT} && \
     python -m tools.cuda_utils --install-torch-deps && \
-    python -m tools.cuda_utils --install-torch-nightly
+    python -m tools.cuda_utils --install-torch-nightly --hip
 
 
 # Install Tritonbench

diff --git a/pyproject.toml b/pyproject.toml
@@ -10,6 +10,8 @@ dependencies = [
     "triton",
     "psutil",
     "tabulate",
+    "matplotlib",
+    "packaging",
 ]
 
 [tool.setuptools.packages.find]

diff --git a/tools/cuda_utils.py b/tools/cuda_utils.py
@@ -133,9 +133,19 @@ def get_toolkit_version_from_torch(key="pytorch_url") -> str:
     parser = argparse.ArgumentParser()
     parser.add_argument(
         "--toolkit-version",
-        default=DEFAULT_TOOLKIT_VERSION,
+        default=None,
         help="Specify the default CUDA/HIP version",
     )
+    parser.add_argument(
+        "--cuda",
+        action="store_true",
+        help="Setup the environment for CUDA",
+    )
+    parser.add_argument(
+        "--hip",
+        action="store_true",
+        help="Setup the environment for ROCm",
+    )
     parser.add_argument(
         "--setup-cuda-softlink",
         action="store_true",
@@ -166,6 +176,21 @@ def get_toolkit_version_from_torch(key="pytorch_url") -> str:
         help="Force Pytorch nightly release date version. Date string format: YYmmdd",
     )
     args = parser.parse_args()
+    if args.toolkit_version is None:
+        if args.cuda:
+            args.toolkit_version = DEFAULT_CUDA_VERSION
+            toolkit_mapping = CUDA_VERSION_MAP
+        elif args.hip:
+            args.toolkit_version = DEFAULT_HIP_VERSION
+            toolkit_mapping = HIP_VERSION_MAP
+        elif IS_CUDA:
+            args.toolkit_version = DEFAULT_CUDA_VERSION
+            toolkit_mapping = CUDA_VERSION_MAP
+        else:
+            args.toolkit_version = DEFAULT_HIP_VERSION
+            toolkit_mapping = HIP_VERSION_MAP
+    else:
+        toolkit_mapping = CUDA_VERSION_MAP if args.cuda or IS_CUDA else HIP_VERSION_MAP
     if args.setup_cuda_softlink:
         assert IS_CUDA, "Error: CUDA is not available on this machine."
         setup_cuda_softlink(cuda_version=args.toolkit_version)
@@ -177,7 +202,7 @@ def get_toolkit_version_from_torch(key="pytorch_url") -> str:
         install_torch_deps()
         install_torch_build_deps()
     if args.install_torch_nightly:
-        toolkit_version = TOOLKIT_MAPPING[args.toolkit_version]["pytorch_url"]
+        toolkit_version = toolkit_mapping[args.toolkit_version]["pytorch_url"]
         install_pytorch_nightly(toolkit_version=toolkit_version, env=os.environ)
     if args.check_torch_nightly_version:
         from .torch_utils import check_torch_nightly_version

diff --git a/tools/helion/install.py b/tools/helion/install.py
@@ -10,16 +10,20 @@
 HELION_INSTALL_PATH = REPO_PATH.joinpath(".install")
 HELION_REPO = "https://github.com/pytorch/helion.git"
 HEION_COMMIT = "51580b43bd65978a28b6e5bcd6f625485f02cba1"
+BUILD_CONSTRAINTS_FILE = REPO_PATH.joinpath("build", "constraints.txt")
 
 
 def install_helion():
     HELION_INSTALL_PATH.mkdir(parents=True, exist_ok=True)
     HELION_PATH = HELION_INSTALL_PATH.joinpath("helion")
+    constraints_parameters = ["-c", str(BUILD_CONSTRAINTS_FILE.resolve())]
     if HELION_PATH.exists():
         shutil.rmtree(HELION_PATH)
     git_clone_cmd = ["git", "clone", HELION_REPO]
     subprocess.check_call(git_clone_cmd, cwd=HELION_INSTALL_PATH)
     git_checkout_cmd = ["git", "checkout", HEION_COMMIT]
     subprocess.check_call(git_checkout_cmd, cwd=HELION_PATH)
-    install_helion_cmd = ["pip", "install", "-e", ".[dev]"]
+    install_tritonbench_cmd = ["pip", "install", "--no-deps", "-e", "."] + constraints_parameters
+    subprocess.check_call(install_tritonbench_cmd, cwd=REPO_PATH)
+    install_helion_cmd = ["pip", "install", "-e", ".[dev]"] + constraints_parameters
     subprocess.check_call(install_helion_cmd, cwd=HELION_PATH)
diff --git a/tools/quack/install.py b/tools/quack/install.py
@@ -9,24 +9,21 @@
 CURRENT_DIR = Path(os.path.abspath(__file__)).parent
 
 QUACK_REPO = "https://github.com/Dao-AILab/quack.git"
-QUACK_SHA = "bceb632dbac9bb0b55d48a7ed3ad204bd952fcb2"
+QUACK_SHA = "21f1c053b1c35a3aaf20002107d0704f364de10a"
 
 QUACK_INSTALL_PATH = REPO_PATH.joinpath(".install")
-
-
-def install_quack():
-    cmd = ["pip", "install", "-e", "."]
-    subprocess.check_call(cmd, cwd=QUACK_PATH)
+BUILD_CONSTRAINTS_FILE = REPO_PATH.joinpath("build", "constraints.txt")
 
 
 def install_quack():
     QUACK_INSTALL_PATH.mkdir(parents=True, exist_ok=True)
+    constraints_parameters = ["-c", str(BUILD_CONSTRAINTS_FILE.resolve())]
     quack_path = QUACK_INSTALL_PATH.joinpath("quack")
     if quack_path.exists():
         shutil.rmtree(quack_path)
     git_clone_cmd = ["git", "clone", QUACK_REPO]
     subprocess.check_call(git_clone_cmd, cwd=QUACK_INSTALL_PATH)
     git_checkout_cmd = ["git", "checkout", QUACK_SHA]
     subprocess.check_call(git_checkout_cmd, cwd=quack_path)
-    install_helion_cmd = ["pip", "install", "-e", ".[dev]"]
-    subprocess.check_call(install_helion_cmd, cwd=quack_path)
+    install_quack_cmd = ["pip", "install", "-e", ".[dev]"] + constraints_parameters
+    subprocess.check_call(install_quack_cmd, cwd=quack_path)
diff --git a/tritonbench/utils/gpu_utils.py b/tritonbench/utils/gpu_utils.py
@@ -22,8 +22,6 @@
     MTIA_COMPUTE_SPECS = {}
     MTIA_MEMORY_SPECS = {}
 
-from tritonbench.utils.env_utils import is_hip
-
 
 # NVIDIA A100 GPU Spec:
 # https://www.nvidia.com/content/dam/en-zz/Solutions/Data-Center/a100/pdf/nvidia-a100-datasheet-us-nvidia-1758950-r4-web.pdf
@@ -263,6 +261,7 @@ def has_nvidia_smi() -> bool:
 
 def get_amd_device_name() -> str:
     import torch
+    from tritonbench.utils.env_utils import is_hip
 
     assert is_hip(), "get_amd_device_name() is only supported on AMD GPUs"
     current_device = torch.cuda.current_device()