@@ -71,7 +71,7 @@ struct NvlsAdapter {
7171                          mscclpp::DeviceHandle<mscclpp::SwitchChannel>* nvlsOutChannels, size_t  channelInOffset,
7272                          size_t  channelOutOffset, size_t , int  rank, int  nRanksPerNode, int , size_t  nelems,
7373                          cudaStream_t stream, uint32_t *, uint32_t *, uint32_t *, uint32_t ) {
74- #if  defined(__CUDA_ARCH__) //  Skip the __CUDA_ARCH__ < 1000 since FP8 has not been supported for NVLS
74+ #if  defined(__CUDA_ARCH__)   //  Skip the __CUDA_ARCH__ < 1000 since FP8 has not been supported for NVLS
7575    if  constexpr  (std::is_same_v<T, __fp8_e4m3> || std::is_same_v<T, __fp8_e5m2>) {
7676      return  cudaErrorNotSupported;
7777    } else 
@@ -95,7 +95,7 @@ struct NvlsWithCopyAdapter {
9595                          mscclpp::DeviceHandle<mscclpp::SwitchChannel>*, size_t , size_t , size_t  scratchBufferSize,
9696                          int  rank, int  nRanksPerNode, int , size_t  nelems, cudaStream_t stream, uint32_t *, uint32_t *,
9797                          uint32_t *, uint32_t ) {
98- #if  defined(__CUDA_ARCH__) //  Skip the __CUDA_ARCH__ < 1000 since FP8 has not been supported for NVLS
98+ #if  defined(__CUDA_ARCH__)   //  Skip the __CUDA_ARCH__ < 1000 since FP8 has not been supported for NVLS
9999    if  constexpr  (std::is_same_v<T, __fp8_e4m3> || std::is_same_v<T, __fp8_e5m2>) {
100100      return  cudaErrorNotSupported;
101101    } else 
0 commit comments