Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
6ded2ab
Remove code duplication in fwd conv builder tests.
vpietila-amd Nov 3, 2025
9fc3d64
Rename test config file.
vpietila-amd Nov 3, 2025
0d68298
Separate test configs from other testing assets.
vpietila-amd Nov 3, 2025
15f0101
Fix namespace.
vpietila-amd Nov 3, 2025
1893079
WIP: Builder for expected test results.
vpietila-amd Nov 3, 2025
0ac48ab
Improve ckb fwd conv instance tests.
vpietila-amd Nov 4, 2025
adf0a80
clang-format
vpietila-amd Nov 4, 2025
69a93a5
Change if-else statements into switch in conv factory.
vpietila-amd Nov 4, 2025
930dcaa
Merge branch 'develop' into vpietila/ckb-fwd-instance-test-improvements
vpietila-amd Nov 4, 2025
c1db749
Fix clang-formatting.
vpietila-amd Nov 4, 2025
5d6427a
Removed unnecessary includes.
vpietila-amd Nov 4, 2025
30df335
Merge remote-tracking branch 'origin/develop' into vpietila/ckb-fwd-i…
vpietila-amd Nov 5, 2025
f0291b7
Added missing copyright.
vpietila-amd Nov 5, 2025
deec3a0
Remove explicit device op flag from from convolution signature.
vpietila-amd Nov 5, 2025
1213ef7
Add missing concept.
vpietila-amd Nov 5, 2025
d9bb5c3
Fix build.
vpietila-amd Nov 5, 2025
9c64167
Merge remote-tracking branch 'origin/develop' into vpietila/ckb-fwd-i…
vpietila-amd Nov 6, 2025
13b980c
clang-format
vpietila-amd Nov 6, 2025
beb1165
Merge branch 'vpietila/ckb-fwd-instance-test-improvements' into vpiet…
vpietila-amd Nov 6, 2025
59c7caf
Merge branch 'develop' into vpietila/ckb-fwd-instance-test-improvements
vpietila-amd Nov 6, 2025
4c7c133
Add test for building conv fwd FP8 instances.
vpietila-amd Nov 6, 2025
9fb1d16
Merge remote-tracking branch 'origin/develop' into vpietila/ckb-fwd-i…
vpietila-amd Nov 7, 2025
8028ff6
Add missing header to instance traits.
vpietila-amd Nov 7, 2025
ea9d100
Merge branch 'vpietila/ckb-fwd-instance-test-improvements' into vpiet…
vpietila-amd Nov 7, 2025
c959c11
Clean-up recently added instances.
vpietila-amd Nov 7, 2025
d4a419c
Introduce inheritance and specialization.
vpietila-amd Nov 7, 2025
bbc6d18
Use builder to build conv algorithm templates and types.
vpietila-amd Nov 7, 2025
1c43563
clang-format
vpietila-amd Nov 7, 2025
87977c2
Fix conv description tests.
vpietila-amd Nov 7, 2025
a04d1c7
Merge branch 'develop' into vpietila/ckb-fwd-conv-builder-improvements
shumway Nov 10, 2025
442c930
Merge branch 'develop' into vpietila/ckb-fwd-conv-builder-improvements
shumway Nov 11, 2025
c807c9d
Merge branch 'develop' into vpietila/ckb-fwd-conv-builder-improvements
shumway Nov 11, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,8 @@ concept AccessOrderDescriptor = requires(T t) {
{ t.order } -> std::convertible_to<std::array<size_t, 3>>;
};

// No requirements yet for a ConvAlgorithm concept.
// Base requirement for all ConvAlgorithm concepts, i.e., all conv algorithm concepts must meet this
// concept.
template <typename T>
concept ConvAlgorithmDescriptor = std::is_class_v<T>;

Expand Down Expand Up @@ -183,6 +184,12 @@ concept SpecifiesLoopScheduler = requires {
{ T::loop_scheduler } -> std::convertible_to<PipelineScheduler>;
};

template <typename T>
concept SpecifiesLargeTensorSupport = requires {
{ T::specialization } -> std::convertible_to<ConvAlgorithmSpecialization>;
requires T::specialization == ConvAlgorithmSpecialization::LARGE_TENSOR;
};

/******************************************** */
/* DL-specific descriptors and requirements */
/******************************************** */
Expand All @@ -204,21 +211,9 @@ concept DlThreadClusterDescriptor = requires(T t) {
{ t.n1_xs } -> std::convertible_to<std::array<size_t, 2>>;
};

// Concept for DL block transfer K0_M0_M1_K1 format
template <typename T>
concept DlBlockTransferK0M0M1K1Descriptor = requires(T t) {
{ t.thread_slice_lengths } -> std::convertible_to<std::array<size_t, 4>>;
{ t.thread_cluster_lengths } -> std::convertible_to<std::array<size_t, 4>>;
{ t.thread_cluster_arrange_order } -> std::convertible_to<std::array<size_t, 4>>;
{ t.src_access_order } -> std::convertible_to<std::array<size_t, 4>>;
{ t.src_vector_tensor_lengths } -> std::convertible_to<std::array<size_t, 4>>;
{ t.src_vector_tensor_contiguous_dim_order } -> std::convertible_to<std::array<size_t, 4>>;
{ t.dst_vector_tensor_lengths } -> std::convertible_to<std::array<size_t, 4>>;
};

// Concept for DL block transfer K0_N0_N1_K1 format
// Concept for DL block transfer
template <typename T>
concept DlBlockTransferK0N0N1K1Descriptor = requires(T t) {
concept DlBlockTransferDescriptor = requires(T t) {
{ t.thread_slice_lengths } -> std::convertible_to<std::array<size_t, 4>>;
{ t.thread_cluster_lengths } -> std::convertible_to<std::array<size_t, 4>>;
{ t.thread_cluster_arrange_order } -> std::convertible_to<std::array<size_t, 4>>;
Expand All @@ -228,9 +223,9 @@ concept DlBlockTransferK0N0N1K1Descriptor = requires(T t) {
{ t.dst_vector_tensor_lengths } -> std::convertible_to<std::array<size_t, 4>>;
};

// Concept for DL C thread transfer
// Concept for DL epilogue
template <typename T>
concept DlCThreadTransferDescriptor = requires(T t) {
concept DlEpilogueDescriptor = requires(T t) {
{ t.src_dst_access_order } -> std::convertible_to<std::array<size_t, 6>>;
{ t.src_dst_vector_dim } -> std::convertible_to<size_t>;
{ t.dst_scalar_per_vector } -> std::convertible_to<size_t>;
Expand All @@ -239,31 +234,63 @@ concept DlCThreadTransferDescriptor = requires(T t) {
// Concept to check if algorithm specifies DL thread config
template <typename T>
concept SpecifiesDlThreadConfig = requires {
{ T::dl_thread_config } -> DlThreadConfigDescriptor;
{ T::thread_config } -> DlThreadConfigDescriptor;
};

// Concept to check if algorithm specifies DL thread cluster
template <typename T>
concept SpecifiesDlThreadCluster = requires {
{ T::dl_thread_cluster } -> DlThreadClusterDescriptor;
{ T::thread_cluster } -> DlThreadClusterDescriptor;
};

// Concept to check if algorithm specifies DL A block transfer
// Concept to check if algorithm specifies DL block transfer
template <typename T>
concept SpecifiesDlBlockTransferA = requires {
{ T::dl_block_transfer_a } -> DlBlockTransferK0M0M1K1Descriptor;
concept SpecifiesDlBlockTransfer = requires {
{ T::block_transfer_a } -> DlBlockTransferDescriptor;
{ T::block_transfer_b } -> DlBlockTransferDescriptor;
};

// Concept to check if algorithm specifies DL B block transfer
// Concept to check if algorithm specifies DL C thread transfer
template <typename T>
concept SpecifiesDlBlockTransferB = requires {
{ T::dl_block_transfer_b } -> DlBlockTransferK0N0N1K1Descriptor;
concept SpecifiesDlEpilogue = requires {
{ T::epilogue_c } -> DlEpilogueDescriptor;
};

// Concept to check if algorithm specifies DL C thread transfer
/******************************************** */
/* Concepts for the different device ops */
/******************************************** */

template <typename T>
concept SpecifiesDlCThreadTransfer = requires {
{ T::dl_c_thread_transfer } -> DlCThreadTransferDescriptor;
};
concept DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3 =
ConvAlgorithmDescriptor<T> && SpecifiesThreadBlock<T> && SpecifiesGridwiseXdlGemm<T> &&
SpecifiesBlockTransfer<T> && SpecifiesLdsTransfer<T> && SpecifiesThreadClusterAccessOrder<T> &&
SpecifiesSourceAccessOrder<T> && SpecifiesFwdConcSpecialization<T> &&
SpecifiesGemmSpecialization<T> && SpecifiesBlockGemm<T>;

template <typename T>
concept DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle =
ConvAlgorithmDescriptor<T> && SpecifiesThreadBlock<T> && SpecifiesGridwiseXdlGemm<T> &&
SpecifiesBlockTransfer<T> && SpecifiesLdsTransfer<T> && SpecifiesThreadClusterAccessOrder<T> &&
SpecifiesSourceAccessOrder<T> && SpecifiesFwdConcSpecialization<T> &&
SpecifiesGemmSpecialization<T> && SpecifiesNumPrefetchStages<T> &&
SpecifiesNumGroupsToMerge<T> && SpecifiesLoopScheduler<T>;

template <typename T>
concept DeviceGroupedConvFwdMultipleABD_Wmma_CShuffle =
ConvAlgorithmDescriptor<T> && SpecifiesThreadBlock<T> && SpecifiesGridwiseWmmaGemm<T> &&
SpecifiesBlockTransfer<T> && SpecifiesLdsTransfer<T> && SpecifiesThreadClusterAccessOrder<T> &&
SpecifiesSourceAccessOrder<T> && SpecifiesFwdConcSpecialization<T> &&
SpecifiesGemmSpecialization<T> && SpecifiesNumPrefetchStages<T> && SpecifiesLoopScheduler<T>;

template <typename T>
concept DeviceGroupedConvFwdDlMultipleD_NHWC_KYXC_NHWK =
ConvAlgorithmDescriptor<T> && SpecifiesThreadBlock<T> && SpecifiesFwdConcSpecialization<T> &&
SpecifiesGemmSpecialization<T> && SpecifiesDlThreadConfig<T> && SpecifiesDlThreadCluster<T> &&
SpecifiesDlBlockTransfer<T> && SpecifiesDlEpilogue<T>;

template <typename T>
concept DeviceGroupedConvFwdMultipleD_Xdl_CShuffle_Large_Tensor =
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<decltype(T::base_algorithm)> &&
SpecifiesLargeTensorSupport<T>;

} // namespace ck_tile::builder
Loading