Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 7 additions & 4 deletions tests/models/quantization/test_bitsandbytes.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,13 @@
from ...utils import compare_two_settings, multi_gpu_test
from ..utils import check_embeddings_close, check_logprobs_close

pytestmark = pytest.mark.skipif(
current_platform.is_rocm(),
reason="bitsandbytes quantization not supported on ROCm (CUDA-only kernels)",
)
if current_platform.is_rocm():
from vllm.platforms.rocm import on_gfx9

pytestmark = pytest.mark.skipif(
on_gfx9(),
reason="bitsandbytes quantization not supported on gfx9 (warp size 64 limitation)",

Check failure on line 22 in tests/models/quantization/test_bitsandbytes.py

View workflow job for this annotation

GitHub Actions / pre-commit

Ruff (E501)

tests/models/quantization/test_bitsandbytes.py:22:89: E501 Line too long (91 > 88)
)

models_4bit_to_test = [
("facebook/opt-125m", "quantize opt model inflight"),
Expand Down
3 changes: 3 additions & 0 deletions vllm/platforms/rocm.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,9 @@ class RocmPlatform(Platform):
"petit_nvfp4",
"torchao",
]
# bitsandbytes quantization not supported on gfx9 (warp size 64 limitation)
if not on_gfx9():
supported_quantization += ["bitsandbytes"]

@classmethod
def get_vit_attn_backend(
Expand Down
Loading