Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions .ci/helion/install.sh
Original file line number Diff line number Diff line change
Expand Up @@ -36,5 +36,3 @@ tritonbench_dir=$(dirname "$(readlink -f "$0")")/../..
cd ${tritonbench_dir}

python install.py --helion
# Helion requires tritonbench installed as a library
pip install -e .
2 changes: 1 addition & 1 deletion .github/workflows/docker.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -57,12 +57,12 @@ jobs:
if: github.event_name != 'pull_request'
run: |
# Extract pytorch version from the docker
set -x
PYTORCH_VERSION=$(docker run -e SETUP_SCRIPT="${SETUP_SCRIPT}" ghcr.io/meta-pytorch/tritonbench:latest bash -c '. "${SETUP_SCRIPT}"; python -c "import torch; print(torch.__version__)"')
export DOCKER_TAG=$(awk '{match($0, /dev[0-9]+/, arr); print arr[0]}' <<< "${PYTORCH_VERSION}")
docker tag ghcr.io/meta-pytorch/tritonbench:latest ghcr.io/meta-pytorch/tritonbench:${DOCKER_TAG}
docker push ghcr.io/meta-pytorch/tritonbench:${DOCKER_TAG}
docker push ghcr.io/meta-pytorch/tritonbench:latest
set -x
if [[ -n "${{ github.event.inputs.tags }}" ]]; then
IFS=',' read -ra tags <<< "${{ github.event.inputs.tags }}"
for tag in "${tags[@]}"; do
Expand Down
32 changes: 12 additions & 20 deletions docker/tritonbench-nightly.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -26,15 +26,14 @@ RUN sudo chmod +x /usr/bin/switch-cuda.sh
RUN sudo mkdir -p /workspace; sudo chown runner:runner /workspace

# We assume that the host NVIDIA driver binaries and libraries are mapped to the docker filesystem
# Install CUDA 12.8 build toolchains
RUN cd /workspace; mkdir -p pytorch-ci; cd pytorch-ci; wget https://raw.githubusercontent.com/pytorch/pytorch/main/.ci/docker/common/install_cuda.sh
RUN cd /workspace/pytorch-ci; wget https://raw.githubusercontent.com/pytorch/pytorch/main/.ci/docker/common/install_cudnn.sh || true && \
wget https://raw.githubusercontent.com/pytorch/pytorch/main/.ci/docker/common/install_nccl.sh && \
wget https://raw.githubusercontent.com/pytorch/pytorch/main/.ci/docker/common/install_cusparselt.sh && \
mkdir ci_commit_pins && cd ci_commit_pins && \
wget https://raw.githubusercontent.com/pytorch/pytorch/main/.ci/docker/ci_commit_pins/nccl-cu12.txt

RUN sudo bash -c "set -x;export OVERRIDE_GENCODE=\"${OVERRIDE_GENCODE}\" OVERRIDE_GENCODE_CUDNN=\"${OVERRIDE_GENCODE_CUDNN}\"; cd /workspace/pytorch-ci; bash install_cuda.sh 12.8"
# Install CUDA 12.8 build toolchains (only useful for bisection)
# RUN cd /workspace; mkdir -p pytorch-ci; cd pytorch-ci; wget https://raw.githubusercontent.com/pytorch/pytorch/main/.ci/docker/common/install_cuda.sh
# RUN cd /workspace/pytorch-ci; wget https://raw.githubusercontent.com/pytorch/pytorch/main/.ci/docker/common/install_cudnn.sh || true && \
# wget https://raw.githubusercontent.com/pytorch/pytorch/main/.ci/docker/common/install_nccl.sh && \
# wget https://raw.githubusercontent.com/pytorch/pytorch/main/.ci/docker/common/install_cusparselt.sh && \
# mkdir ci_commit_pins && cd ci_commit_pins && \
# wget https://raw.githubusercontent.com/pytorch/pytorch/main/.ci/docker/ci_commit_pins/nccl-cu12.txt
# RUN sudo bash -c "set -x;export OVERRIDE_GENCODE=\"${OVERRIDE_GENCODE}\" OVERRIDE_GENCODE_CUDNN=\"${OVERRIDE_GENCODE_CUDNN}\"; cd /workspace/pytorch-ci; bash install_cuda.sh 12.8"

# Checkout TritonBench and submodules
RUN git clone --recurse-submodules -b "${TRITONBENCH_BRANCH}" --single-branch \
Expand All @@ -47,30 +46,22 @@ RUN echo "\
. /workspace/miniconda3/etc/profile.d/conda.sh\n\
conda activate base\n\
export CONDA_HOME=/workspace/miniconda3\n\
export CUDA_HOME=/usr/local/cuda\n\
export PATH=\${CUDA_HOME}/bin:/home/runner/bin\${PATH:+:\${PATH}}\n\
export LD_LIBRARY_PATH=\${CUDA_HOME}/lib64\${LD_LIBRARY_PATH:+:\${LD_LIBRARY_PATH}}\n\
export LIBRARY_PATH=\${CUDA_HOME}/lib64\${LIBRARY_PATHPATH:+:\${LIBRARY_PATHPATH}}\n" >> /workspace/setup_instance.sh
export PATH=/home/runner/bin\${PATH:+:\${PATH}}\n" >> /workspace/setup_instance.sh

RUN echo ". /workspace/setup_instance.sh\n" >> ${HOME}/.bashrc

# Setup conda env and CUDA
# Setup conda env
RUN cd /workspace/tritonbench && \
. ${SETUP_SCRIPT} && \
python tools/python_utils.py --create-conda-env ${CONDA_ENV} && \
echo "if [ -z \${CONDA_ENV} ]; then export CONDA_ENV=${CONDA_ENV}; fi" >> /workspace/setup_instance.sh && \
echo "conda activate \${CONDA_ENV}" >> /workspace/setup_instance.sh

# Preserve env in sudo
RUN cd /workspace/tritonbench && \
. ${SETUP_SCRIPT} && \
sudo -E python -m tools.cuda_utils --setup-cuda-softlink

# Install PyTorch nightly and verify the date is correct
RUN cd /workspace/tritonbench && \
. ${SETUP_SCRIPT} && \
python -m tools.cuda_utils --install-torch-deps && \
python -m tools.cuda_utils --install-torch-nightly
python -m tools.cuda_utils --install-torch-nightly --cuda

# Check the installed version of nightly if needed
RUN cd /workspace/tritonbench && \
Expand Down Expand Up @@ -106,6 +97,7 @@ RUN cd /workspace/tritonbench && \
bash .ci/triton/install.sh --conda-env "${CONDA_ENV_META_TRITON}" \
--repo facebookexperimental/triton --commit b939601a9a376342985ab27bc649e02d4288afc6 --side single \
--install-dir /workspace/meta-triton

# Install Helion in the meta-triton conda env
RUN cd /workspace/tritonbench && \
bash .ci/helion/install.sh --conda-env "${CONDA_ENV_META_TRITON}"
Expand Down
2 changes: 1 addition & 1 deletion docker/tritonbench-rocm-nightly.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ RUN cd /workspace/tritonbench && \
RUN cd /workspace/tritonbench && \
. ${SETUP_SCRIPT} && \
python -m tools.cuda_utils --install-torch-deps && \
python -m tools.cuda_utils --install-torch-nightly
python -m tools.cuda_utils --install-torch-nightly --hip


# Install Tritonbench
Expand Down
2 changes: 2 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ dependencies = [
"triton",
"psutil",
"tabulate",
"matplotlib",
"packaging",
]

[tool.setuptools.packages.find]
Expand Down
29 changes: 27 additions & 2 deletions tools/cuda_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,9 +133,19 @@ def get_toolkit_version_from_torch(key="pytorch_url") -> str:
parser = argparse.ArgumentParser()
parser.add_argument(
"--toolkit-version",
default=DEFAULT_TOOLKIT_VERSION,
default=None,
help="Specify the default CUDA/HIP version",
)
parser.add_argument(
"--cuda",
action="store_true",
help="Setup the environment for CUDA",
)
parser.add_argument(
"--hip",
action="store_true",
help="Setup the environment for ROCm",
)
parser.add_argument(
"--setup-cuda-softlink",
action="store_true",
Expand Down Expand Up @@ -166,6 +176,21 @@ def get_toolkit_version_from_torch(key="pytorch_url") -> str:
help="Force Pytorch nightly release date version. Date string format: YYmmdd",
)
args = parser.parse_args()
if args.toolkit_version is None:
if args.cuda:
args.toolkit_version = DEFAULT_CUDA_VERSION
toolkit_mapping = CUDA_VERSION_MAP
elif args.hip:
args.toolkit_version = DEFAULT_HIP_VERSION
toolkit_mapping = HIP_VERSION_MAP
elif IS_CUDA:
args.toolkit_version = DEFAULT_CUDA_VERSION
toolkit_mapping = CUDA_VERSION_MAP
else:
args.toolkit_version = DEFAULT_HIP_VERSION
toolkit_mapping = HIP_VERSION_MAP
else:
toolkit_mapping = CUDA_VERSION_MAP if args.cuda or IS_CUDA else HIP_VERSION_MAP
if args.setup_cuda_softlink:
assert IS_CUDA, "Error: CUDA is not available on this machine."
setup_cuda_softlink(cuda_version=args.toolkit_version)
Expand All @@ -177,7 +202,7 @@ def get_toolkit_version_from_torch(key="pytorch_url") -> str:
install_torch_deps()
install_torch_build_deps()
if args.install_torch_nightly:
toolkit_version = TOOLKIT_MAPPING[args.toolkit_version]["pytorch_url"]
toolkit_version = toolkit_mapping[args.toolkit_version]["pytorch_url"]
install_pytorch_nightly(toolkit_version=toolkit_version, env=os.environ)
if args.check_torch_nightly_version:
from .torch_utils import check_torch_nightly_version
Expand Down
6 changes: 5 additions & 1 deletion tools/helion/install.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,16 +10,20 @@
HELION_INSTALL_PATH = REPO_PATH.joinpath(".install")
HELION_REPO = "https://github.com/pytorch/helion.git"
HEION_COMMIT = "51580b43bd65978a28b6e5bcd6f625485f02cba1"
BUILD_CONSTRAINTS_FILE = REPO_PATH.joinpath("build", "constraints.txt")


def install_helion():
HELION_INSTALL_PATH.mkdir(parents=True, exist_ok=True)
HELION_PATH = HELION_INSTALL_PATH.joinpath("helion")
constraints_parameters = ["-c", str(BUILD_CONSTRAINTS_FILE.resolve())]
if HELION_PATH.exists():
shutil.rmtree(HELION_PATH)
git_clone_cmd = ["git", "clone", HELION_REPO]
subprocess.check_call(git_clone_cmd, cwd=HELION_INSTALL_PATH)
git_checkout_cmd = ["git", "checkout", HEION_COMMIT]
subprocess.check_call(git_checkout_cmd, cwd=HELION_PATH)
install_helion_cmd = ["pip", "install", "-e", ".[dev]"]
install_tritonbench_cmd = ["pip", "install", "--no-deps", "-e", "."] + constraints_parameters
subprocess.check_call(install_tritonbench_cmd, cwd=REPO_PATH)
install_helion_cmd = ["pip", "install", "-e", ".[dev]"] + constraints_parameters
subprocess.check_call(install_helion_cmd, cwd=HELION_PATH)
13 changes: 5 additions & 8 deletions tools/quack/install.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,24 +9,21 @@
CURRENT_DIR = Path(os.path.abspath(__file__)).parent

QUACK_REPO = "https://github.com/Dao-AILab/quack.git"
QUACK_SHA = "bceb632dbac9bb0b55d48a7ed3ad204bd952fcb2"
QUACK_SHA = "21f1c053b1c35a3aaf20002107d0704f364de10a"

QUACK_INSTALL_PATH = REPO_PATH.joinpath(".install")


def install_quack():
cmd = ["pip", "install", "-e", "."]
subprocess.check_call(cmd, cwd=QUACK_PATH)
BUILD_CONSTRAINTS_FILE = REPO_PATH.joinpath("build", "constraints.txt")


def install_quack():
QUACK_INSTALL_PATH.mkdir(parents=True, exist_ok=True)
constraints_parameters = ["-c", str(BUILD_CONSTRAINTS_FILE.resolve())]
quack_path = QUACK_INSTALL_PATH.joinpath("quack")
if quack_path.exists():
shutil.rmtree(quack_path)
git_clone_cmd = ["git", "clone", QUACK_REPO]
subprocess.check_call(git_clone_cmd, cwd=QUACK_INSTALL_PATH)
git_checkout_cmd = ["git", "checkout", QUACK_SHA]
subprocess.check_call(git_checkout_cmd, cwd=quack_path)
install_helion_cmd = ["pip", "install", "-e", ".[dev]"]
subprocess.check_call(install_helion_cmd, cwd=quack_path)
install_quack_cmd = ["pip", "install", "-e", ".[dev]"] + constraints_parameters
subprocess.check_call(install_quack_cmd, cwd=quack_path)
3 changes: 1 addition & 2 deletions tritonbench/utils/gpu_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,6 @@
MTIA_COMPUTE_SPECS = {}
MTIA_MEMORY_SPECS = {}

from tritonbench.utils.env_utils import is_hip


# NVIDIA A100 GPU Spec:
# https://www.nvidia.com/content/dam/en-zz/Solutions/Data-Center/a100/pdf/nvidia-a100-datasheet-us-nvidia-1758950-r4-web.pdf
Expand Down Expand Up @@ -263,6 +261,7 @@ def has_nvidia_smi() -> bool:

def get_amd_device_name() -> str:
import torch
from tritonbench.utils.env_utils import is_hip

assert is_hip(), "get_amd_device_name() is only supported on AMD GPUs"
current_device = torch.cuda.current_device()
Expand Down
Loading