Skip to content

Commit 6bcd8be

Browse files
pciolkoszdavebayer
authored andcommitted
Move launch API from cudax to libcu++ (#6667)
* Move launch API from cudax to libcu++ * Review feedback and test fixes * More fixes * GCC7 fix * Update libcudacxx/include/cuda/__launch/configuration.h * Fix old GCC --------- Co-authored-by: David Bayer <[email protected]> (cherry picked from commit 4ab39a7)
1 parent a093d47 commit 6bcd8be

File tree

21 files changed

+974
-322
lines changed

21 files changed

+974
-322
lines changed

cudax/examples/simple_p2p.cu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,7 @@ void test_cross_device_access_from_kernel(
130130
dev1_stream.wait(dev0_stream);
131131

132132
// Kernel launch configuration
133-
auto config = cudax::distribute<512>(dev0_buffer.size());
133+
auto config = cuda::distribute<512>(dev0_buffer.size());
134134

135135
// Run kernel on GPU 1, reading input from the GPU 0 buffer, writing output to the GPU 1 buffer
136136
printf("Run kernel on GPU%d, taking source data from GPU%d and writing to "

cudax/examples/vector_add.cu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ try
9292

9393
// Define the kernel launch parameters
9494
constexpr int threadsPerBlock = 256;
95-
auto config = cudax::distribute<threadsPerBlock>(numElements);
95+
auto config = cuda::distribute<threadsPerBlock>(numElements);
9696

9797
// Launch the vectorAdd kernel
9898
printf(

cudax/include/cuda/experimental/__execution/bulk.cuh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
#endif // no system header
2323

2424
#include <cuda/__cmath/ceil_div.h>
25+
#include <cuda/__launch/configuration.h>
2526
#include <cuda/__utility/immovable.h>
2627
#include <cuda/std/__concepts/arithmetic.h>
2728
#include <cuda/std/__concepts/same_as.h>
@@ -45,7 +46,6 @@
4546
#include <cuda/experimental/__execution/transform_completion_signatures.cuh>
4647
#include <cuda/experimental/__execution/transform_sender.cuh>
4748
#include <cuda/experimental/__execution/type_traits.cuh>
48-
#include <cuda/experimental/__launch/configuration.cuh>
4949

5050
#include <cuda/experimental/__execution/prologue.cuh>
5151

@@ -73,7 +73,7 @@ struct _CCCL_TYPE_VISIBILITY_DEFAULT __attrs_t
7373
{
7474
constexpr int __block_threads = 256;
7575
const int __grid_blocks = ::cuda::ceil_div(static_cast<int>(__shape), __block_threads);
76-
return experimental::make_config(block_dims<__block_threads>(), grid_dims(__grid_blocks));
76+
return make_config(block_dims<__block_threads>(), grid_dims(__grid_blocks));
7777
}
7878

7979
using __launch_config_t = decltype(__get_launch_config(_Shape()));

cudax/include/cuda/experimental/__execution/queries.cuh

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ _CCCL_SUPPRESS_DEPRECATED_PUSH
2525
#include <cuda/std/__memory/allocator.h>
2626
_CCCL_SUPPRESS_DEPRECATED_POP
2727

28+
#include <cuda/__launch/configuration.h>
2829
#include <cuda/std/__execution/env.h>
2930
#include <cuda/std/__type_traits/enable_if.h>
3031
#include <cuda/std/__type_traits/is_callable.h>
@@ -39,7 +40,6 @@ _CCCL_SUPPRESS_DEPRECATED_POP
3940
#include <cuda/experimental/__execution/stop_token.cuh>
4041
#include <cuda/experimental/__execution/type_traits.cuh>
4142
#include <cuda/experimental/__execution/utility.cuh>
42-
#include <cuda/experimental/__launch/configuration.cuh>
4343

4444
#include <cuda/experimental/__execution/prologue.cuh>
4545

@@ -314,14 +314,14 @@ _CCCL_GLOBAL_CONSTANT struct get_forward_progress_guarantee_t
314314
} get_forward_progress_guarantee{};
315315

316316
// By default, CUDA kernels are launched with a single thread and a single block.
317-
using __single_threaded_config_base_t = decltype(experimental::make_config(grid_dims<1>(), block_dims<1>()));
317+
using __single_threaded_config_base_t = decltype(make_config(grid_dims<1>(), block_dims<1>()));
318318

319319
// We hide the complicated type of the default launch configuration so diagnositics are
320320
// easier to read.
321321
struct __single_threaded_config_t : __single_threaded_config_base_t
322322
{
323323
_CCCL_HOST_API constexpr __single_threaded_config_t() noexcept
324-
: __single_threaded_config_base_t{experimental::make_config(grid_dims<1>(), block_dims<1>())}
324+
: __single_threaded_config_base_t{make_config(grid_dims<1>(), block_dims<1>())}
325325
{}
326326
};
327327

cudax/include/cuda/experimental/__execution/stream/adaptor.cuh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
# pragma system_header
2222
#endif // no system header
2323

24+
#include <cuda/__launch/configuration.h>
2425
#include <cuda/__utility/immovable.h>
2526
#include <cuda/std/__concepts/concept_macros.h>
2627
#include <cuda/std/__memory/unique_ptr.h>
@@ -36,7 +37,6 @@
3637
#include <cuda/experimental/__execution/utility.cuh>
3738
#include <cuda/experimental/__execution/variant.cuh>
3839
#include <cuda/experimental/__execution/visit.cuh>
39-
#include <cuda/experimental/__launch/configuration.cuh>
4040
#include <cuda/experimental/__launch/launch.cuh>
4141
#include <cuda/experimental/__stream/stream_ref.cuh>
4242

0 commit comments

Comments
 (0)