Skip to content
Open
Show file tree
Hide file tree
Changes from 19 commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
c69ccdb
Prototype: Add support for fp16 iGEMM with SME2
gmiodice Jul 8, 2025
a3537a1
Include missing files
gmiodice Jul 9, 2025
232826c
Update FP16 iGEMM based on review comments
gmiodice Sep 8, 2025
03bccaa
Updated FP16 iGemm Review with Fixes
JonathanC-ARM Oct 7, 2025
9cd6e88
Fix rebase issues
JonathanC-ARM Oct 20, 2025
7eb618d
Added multiple_of to handle all multiples in reductions simply.
Aelphy Oct 20, 2025
e5cb8c0
Changed K1_1 strategy for f32 to go with single accumulator and maxim…
Aelphy Oct 20, 2025
aeeca5d
Remove threadpool library and just build threadpool.cc as part of sub…
dsharletg Oct 20, 2025
7304027
Disable SME when msan is enabled
dsharletg Oct 20, 2025
89a72e3
Don't bother disabling KleidiAI if using YNNPACK
dsharletg Oct 20, 2025
0c5edfc
Disable SME on older Apple compilers
dsharletg Oct 20, 2025
9b29972
Fix usage of `sv{ld,st}1_hor_vnum_za32`
dsharletg Oct 20, 2025
0d3dc09
Fix correctness of dot benchmarks for transpose_a kernels
dsharletg Oct 20, 2025
4b73eb1
Update `pthreadpool` dependency.
gonnet Oct 20, 2025
66d084b
Fix flaky quantize tests
dsharletg Oct 21, 2025
6fc5696
Add missing `gemm_config` `.element_size` initializations.
qukhan Oct 21, 2025
923b7f9
Fix build issues and guard against sme2 specific path
JonathanC-ARM Oct 21, 2025
22beb50
Merge remote-tracking branch 'origin/master' into f16_igemm
JonathanC-ARM Oct 22, 2025
06a44d2
Refactor Convolution to new structure and fix build failures
JonathanC-ARM Oct 23, 2025
175903d
Remove unused gemm config structure init
JonathanC-ARM Oct 25, 2025
9efa3d6
Merge branch 'google:master' into f16_igemm
JonathanC-ARM Oct 29, 2025
999f4e3
Updated code with sme variants of kernels and fixed tests
JonathanC-ARM Oct 29, 2025
892eee1
Merge branch 'f16_igemm' of github.com:JonathanC-ARM/XNNPACK into f16…
JonathanC-ARM Oct 29, 2025
a2bd7aa
Updated ifdef guards and yml file
JonathanC-ARM Oct 29, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions build_srcs.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -284,6 +284,7 @@ MICROKERNEL_DEFS = [
"src/x64-transposec/x64-transposec.inc",
"src/x8-pack-lh/x8-pack-lh.inc",
"src/x8-pack-lh/x8-pack-lh-igemm.inc",
"src/x16-pack-lh/x16-pack-lh-igemm.inc",
"src/x8-packq/x8-packq.inc",
"src/x8-packw/x8-packw.inc",
"src/x8-transposec/x8-transposec.inc",
Expand Down
2 changes: 2 additions & 0 deletions cmake/gen/neonsme2_microkernels.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,15 @@ SET(PROD_NEONSME2_MICROKERNEL_SRCS
src/pf16-gemm/pf16-gemm-32x32c2-minmax-neonsme2.c
src/pf32-gemm/pf32-gemm-1x32-minmax-neonsme2.c
src/pf32-gemm/pf32-gemm-32x32-minmax-neonsme2.c
src/pf16-f16-f16-igemm/pf16-f16-f16-igemm-32x32c2-minmax-neonsme2.c
src/pqs8-f32-qc8w-igemm/pqs8-f32-qc8w-igemm-32x32c4-minmax-neonsme2.c
src/pqs8-qc8w-gemm/pqs8-qc8w-gemm-1x32c4-minmax-neonsme2.c
src/pqs8-qc8w-gemm/pqs8-qc8w-gemm-32x32c4-minmax-neonsme2.c
src/qp8-f32-qc4w-gemm/qp8-f32-qc4w-gemm-minmax-1x64c4-neonsme2.c
src/qp8-f32-qc4w-gemm/qp8-f32-qc4w-gemm-minmax-16x64c4-neonsme2.c
src/qp8-f32-qc8w-gemm/qp8-f32-qc8w-gemm-minmax-1x64c4-neonsme2.c
src/qp8-f32-qc8w-gemm/qp8-f32-qc8w-gemm-minmax-16x64c4-neonsme2.c
src/x16-pack-lh/x16-packlh-igemm-neonsme2.c
src/x8-pack-lh/x8-packlh-igemm-neonsme2.c
src/x8-pack-lh/x8-packlh-neonsme2.c
src/x16-pack-lh/x16-packlh-neonsme2.c)
Expand Down
2 changes: 2 additions & 0 deletions gen/neonsme2_microkernels.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,15 @@ PROD_NEONSME2_MICROKERNEL_SRCS = [
"src/pf16-gemm/pf16-gemm-32x32c2-minmax-neonsme2.c",
"src/pf32-gemm/pf32-gemm-1x32-minmax-neonsme2.c",
"src/pf32-gemm/pf32-gemm-32x32-minmax-neonsme2.c",
"src/pf16-f16-f16-igemm/pf16-f16-f16-igemm-32x32c2-minmax-neonsme2.c",
"src/pqs8-f32-qc8w-igemm/pqs8-f32-qc8w-igemm-32x32c4-minmax-neonsme2.c",
"src/pqs8-qc8w-gemm/pqs8-qc8w-gemm-1x32c4-minmax-neonsme2.c",
"src/pqs8-qc8w-gemm/pqs8-qc8w-gemm-32x32c4-minmax-neonsme2.c",
"src/qp8-f32-qc4w-gemm/qp8-f32-qc4w-gemm-minmax-1x64c4-neonsme2.c",
"src/qp8-f32-qc4w-gemm/qp8-f32-qc4w-gemm-minmax-16x64c4-neonsme2.c",
"src/qp8-f32-qc8w-gemm/qp8-f32-qc8w-gemm-minmax-1x64c4-neonsme2.c",
"src/qp8-f32-qc8w-gemm/qp8-f32-qc8w-gemm-minmax-16x64c4-neonsme2.c",
"src/x16-pack-lh/x16-packlh-igemm-neonsme2.c",
"src/x8-pack-lh/x8-packlh-igemm-neonsme2.c",
"src/x8-pack-lh/x8-packlh-neonsme2.c",
"src/x16-pack-lh/x16-packlh-neonsme2.c",
Expand Down
40 changes: 40 additions & 0 deletions include/xnnpack.h
Original file line number Diff line number Diff line change
Expand Up @@ -3049,6 +3049,46 @@ enum xnn_status xnn_create_convolution2d_nhwc_f16(
xnn_weights_cache_t weights_cache,
xnn_operator_t* convolution_op_out);

enum xnn_status xnn_create_convolution2d_nhwc_pf16(
uint32_t input_padding_top,
uint32_t input_padding_right,
uint32_t input_padding_bottom,
uint32_t input_padding_left,
uint32_t kernel_height,
uint32_t kernel_width,
uint32_t subsampling_height,
uint32_t subsampling_width,
uint32_t dilation_height,
uint32_t dilation_width,
uint32_t groups,
size_t group_input_channels,
size_t group_output_channels,
size_t input_channel_stride,
size_t output_channel_stride,
const void* kernel,
const void* bias,
float output_min,
float output_max,
uint32_t flags,
xnn_weights_cache_t weights_cache,
xnn_operator_t* convolution_op_out);

enum xnn_status xnn_reshape_convolution2d_nhwc_pf16(
xnn_operator_t convolution_op,
size_t batch_size,
size_t input_height,
size_t input_width,
size_t* workspace_size,
size_t* output_height_out,
size_t* output_width_out,
pthreadpool_t threadpool);

enum xnn_status xnn_setup_convolution2d_nhwc_pf16(
xnn_operator_t convolution_op,
void* workspace,
const void* input,
void* output);

enum xnn_status xnn_reshape_convolution2d_nhwc_f16(
xnn_operator_t convolution_op,
size_t batch_size,
Expand Down
1 change: 1 addition & 0 deletions scripts/generate-tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ tools/generate-gemm-test.py --spec test/qs8-qc4w-gemm-minmax-fp32.yaml --output-
tools/generate-gemm-test.py --spec test/qs8-qc8w-gemm-minmax-fp32.yaml --output-test test/qs8-qc8w-gemm-minmax-fp32.cc --output-test test/qs8-qc8w-gemm-minmax-fp32-2.cc --output-test test/qs8-qc8w-gemm-minmax-fp32-3.cc --output-bench bench/qs8-qc8w-gemm-fp32.cc &

### Tests for IGEMM micro-kernels
tools/generate-gemm-test.py --spec test/pf16-f16-igemm-minmax.yaml --output-test test/pf16-f16-igemm-minmax.cc &
tools/generate-gemm-test.py --spec test/f16-igemm-minmax.yaml --output-test test/f16-igemm-minmax.cc &
tools/generate-gemm-test.py --spec test/f16-f32acc-igemm-minmax.yaml --output-test test/f16-f32acc-igemm-minmax.cc &

Expand Down
9 changes: 9 additions & 0 deletions src/configs/gemm-config.c
Original file line number Diff line number Diff line change
Expand Up @@ -333,9 +333,17 @@ static void init_pf16_gemm_config(void) {
pf16_gemm_config.arch = xnn_arch_arm_sme2;
pf16_gemm_config.minmax.gemm[XNN_MR_TO_INDEX(1)] = XNN_INIT_HMP_GEMM_UKERNEL(xnn_pf16_gemm_minmax_ukernel_1x32c2__neonsme2);
pf16_gemm_config.minmax.gemm[XNN_MR_TO_INDEX(mr)] = XNN_INIT_HMP_GEMM_UKERNEL(xnn_pf16_gemm_minmax_ukernel_32x32c2__neonsme2);
pf16_gemm_config.minmax.igemm[XNN_MR_TO_INDEX(mr)] =
xnn_init_hmp_packed_igemm_ukernel(
(xnn_packed_lhs_igemm_ukernel_fn)
xnn_pf16_f16_igemm_minmax_fp16_ukernel_32x32c2__neonsme2);
pf16_gemm_config.init.f16 = xnn_init_f16_minmax_scalar_params;
pf16_gemm_config.pack_weights_and_biases = xnn_pack_kai_f16_weights_and_biases;
pf16_gemm_config.packed_stride_weights_and_biases = xnn_packed_stride_kai_f16_weights_and_biases;
pf16_gemm_config.pack_igemm_goki =
(xnn_pack_conv_goki_w_fn)xnn_pack_kai_f16_conv_goki_w_sme2;
pf16_gemm_config.pack_igemm_kgo =
(xnn_pack_conv_kgo_w_fn)xnn_pack_f16_conv_kgo_w;
pf16_gemm_config.mr = mr;
pf16_gemm_config.mr_packed = mr;
pf16_gemm_config.nr = nr;
Expand Down Expand Up @@ -5586,6 +5594,7 @@ const struct xnn_gemm_config* xnn_init_pf16_gemm_config() {
return NULL;
}
XNN_INIT_ONCE(pf16_gemm);

return pf16_gemm_config.mr ? &pf16_gemm_config : NULL;
}

Expand Down
123 changes: 89 additions & 34 deletions src/configs/pack-lh-config.c
Original file line number Diff line number Diff line change
Expand Up @@ -20,21 +20,27 @@ static struct xnn_pack_lh_config x8_pack_lh_config = {0};
static struct xnn_pack_lh_config x16_pack_lh_config = {0};
static struct xnn_pack_lh_config x32_pack_lh_config = {0};
static struct xnn_pack_lh_config x8_igemm_pack_lh_config = {0};
static struct xnn_pack_lh_config x16_igemm_pack_lh_config = {0};

XNN_INIT_ONCE_GUARD(qp8_pack_lh);
XNN_INIT_ONCE_GUARD(x8_pack_lh);
XNN_INIT_ONCE_GUARD(x16_pack_lh);
XNN_INIT_ONCE_GUARD(x32_pack_lh);
XNN_INIT_ONCE_GUARD(x8_igemm_pack_lh);
XNN_INIT_ONCE_GUARD(x16_igemm_pack_lh);

static void init_qp8_pack_lh_config(void) {
#if XNN_ARCH_ARM64 && XNN_ENABLE_KLEIDIAI
qp8_pack_lh_config.pack_lh_fn = (xnn_pack_lh_ukernel_fn)xnn_x8_packq_f32qp8_ukernel__aarch64_neon_u2;
qp8_pack_lh_config.pack_lh_fn =
(xnn_pack_lh_ukernel_fn)xnn_x8_packq_f32qp8_ukernel__aarch64_neon_u2;
#else
qp8_pack_lh_config.pack_lh_fn = (xnn_pack_lh_ukernel_fn)xnn_x8_packq_f32qp8_ukernel__scalar_u1;
qp8_pack_lh_config.pack_lh_fn =
(xnn_pack_lh_ukernel_fn)xnn_x8_packq_f32qp8_ukernel__scalar_u1;
#endif // XNN_ARCH_ARM64 && XNN_ENABLE_KLEIDIAI
qp8_pack_lh_config.size_fn = (xnn_pack_lh_size_fn)xnn_x8_packq_f32qp8_packed_size;
qp8_pack_lh_config.offset_fn = (xnn_pack_lh_offset_fn)xnn_x8_packq_f32qp8_packed_offset;
qp8_pack_lh_config.size_fn =
(xnn_pack_lh_size_fn)xnn_x8_packq_f32qp8_packed_size;
qp8_pack_lh_config.offset_fn =
(xnn_pack_lh_offset_fn)xnn_x8_packq_f32qp8_packed_offset;
qp8_pack_lh_config.log2_input_element_size = XNN_LOG2_SIZEOF_FLOAT;
qp8_pack_lh_config.log2_packed_element_size = 0;
}
Expand All @@ -51,13 +57,17 @@ const struct xnn_pack_lh_config* xnn_init_qp8_pack_lh_config() {

static void init_x32_pack_lh_config(void) {
#if XNN_ARCH_ARM64 && XNN_ENABLE_KLEIDIAI
#if XNN_ENABLE_ARM_SME2 || XNN_ENABLE_ARM_SME
const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config();
#if XNN_ENABLE_ARM_SME2
const struct xnn_hardware_config* hardware_config =
xnn_init_hardware_config();
assert(hardware_config != NULL);
if (hardware_config->arch_flags & xnn_arch_arm_sme) {
x32_pack_lh_config.pack_lh_fn = (xnn_pack_lh_ukernel_fn) xnn_x32_pack_lh_ukernel__neonsme;
x32_pack_lh_config.size_fn = (xnn_pack_lh_size_fn) xnn_x32_pack_lh_size__neonsme;
x32_pack_lh_config.offset_fn = (xnn_pack_lh_offset_fn) xnn_x32_pack_lh_offset__neonsme;
if ((hardware_config->arch_flags & xnn_arch_arm_sme2)) {
x32_pack_lh_config.pack_lh_fn =
(xnn_pack_lh_ukernel_fn)xnn_x32_pack_lh_ukernel__neonsme;
x32_pack_lh_config.size_fn =
(xnn_pack_lh_size_fn)xnn_x32_pack_lh_size__neonsme;
x32_pack_lh_config.offset_fn =
(xnn_pack_lh_offset_fn)xnn_x32_pack_lh_offset__neonsme;
}
#endif // XNN_ENABLE_ARM_SME2 || XNN_ENABLE_ARM_SME
#endif // XNN_ARCH_ARM64 && XNN_ENABLE_KLEIDIAI
Expand All @@ -67,7 +77,8 @@ static void init_x32_pack_lh_config(void) {
}

const struct xnn_pack_lh_config* xnn_init_x32_pack_lh_config() {
const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config();
const struct xnn_hardware_config* hardware_config =
xnn_init_hardware_config();
if (hardware_config == NULL) {
return NULL;
}
Expand All @@ -78,12 +89,16 @@ const struct xnn_pack_lh_config* xnn_init_x32_pack_lh_config() {
static void init_x16_pack_lh_config(void) {
#if XNN_ARCH_ARM64 && XNN_ENABLE_KLEIDIAI
#if XNN_ENABLE_ARM_SME2
const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config();
const struct xnn_hardware_config* hardware_config =
xnn_init_hardware_config();
assert(hardware_config != NULL);
if (hardware_config->arch_flags & xnn_arch_arm_sme2) {
x16_pack_lh_config.pack_lh_fn = (xnn_pack_lh_ukernel_fn) xnn_x16_pack_lh_ukernel__neonsme2;
x16_pack_lh_config.size_fn = (xnn_pack_lh_size_fn) xnn_x16_pack_lh_size__neonsme2;
x16_pack_lh_config.offset_fn = (xnn_pack_lh_offset_fn) xnn_x16_pack_lh_offset__neonsme2;
if ((hardware_config->arch_flags & xnn_arch_arm_sme2)) {
x16_pack_lh_config.pack_lh_fn =
(xnn_pack_lh_ukernel_fn)xnn_x16_pack_lh_ukernel__neonsme2;
x16_pack_lh_config.size_fn =
(xnn_pack_lh_size_fn)xnn_x16_pack_lh_size__neonsme2;
x16_pack_lh_config.offset_fn =
(xnn_pack_lh_offset_fn)xnn_x16_pack_lh_offset__neonsme2;
}
#endif // XNN_ENABLE_ARM_SME2
#endif // XNN_ARCH_ARM64 && XNN_ENABLE_KLEIDIAI
Expand All @@ -93,7 +108,8 @@ static void init_x16_pack_lh_config(void) {
}

const struct xnn_pack_lh_config* xnn_init_x16_pack_lh_config() {
const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config();
const struct xnn_hardware_config* hardware_config =
xnn_init_hardware_config();
if (hardware_config == NULL) {
return NULL;
}
Expand All @@ -104,12 +120,16 @@ const struct xnn_pack_lh_config* xnn_init_x16_pack_lh_config() {
static void init_x8_pack_lh_config(void) {
#if XNN_ARCH_ARM64 && XNN_ENABLE_KLEIDIAI
#if XNN_ENABLE_ARM_SME2
const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config();
const struct xnn_hardware_config* hardware_config =
xnn_init_hardware_config();
assert(hardware_config != NULL);
if (hardware_config->arch_flags & xnn_arch_arm_sme2) {
x8_pack_lh_config.pack_lh_fn = (xnn_pack_lh_ukernel_fn) xnn_x8_pack_lh_ukernel__neonsme2;
x8_pack_lh_config.size_fn = (xnn_pack_lh_size_fn) xnn_x8_pack_lh_size__neonsme2;
x8_pack_lh_config.offset_fn = (xnn_pack_lh_offset_fn) xnn_x8_pack_lh_offset__neonsme2;
if ((hardware_config->arch_flags & xnn_arch_arm_sme2)) {
x8_pack_lh_config.pack_lh_fn =
(xnn_pack_lh_ukernel_fn)xnn_x8_pack_lh_ukernel__neonsme2;
x8_pack_lh_config.size_fn =
(xnn_pack_lh_size_fn)xnn_x8_pack_lh_size__neonsme2;
x8_pack_lh_config.offset_fn =
(xnn_pack_lh_offset_fn)xnn_x8_pack_lh_offset__neonsme2;
}
#endif // XNN_ENABLE_ARM_SME2
#endif // XNN_ARCH_ARM64 && XNN_ENABLE_KLEIDIAI
Expand All @@ -119,7 +139,8 @@ static void init_x8_pack_lh_config(void) {
}

const struct xnn_pack_lh_config* xnn_init_x8_pack_lh_config() {
const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config();
const struct xnn_hardware_config* hardware_config =
xnn_init_hardware_config();
if (hardware_config == NULL) {
return NULL;
}
Expand All @@ -128,17 +149,21 @@ const struct xnn_pack_lh_config* xnn_init_x8_pack_lh_config() {
}

static void init_x8_igemm_pack_lh_config(void) {
#if XNN_ARCH_ARM64 && XNN_ENABLE_KLEIDIAI
#if XNN_ENABLE_ARM_SME2
const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config();
assert(hardware_config != NULL);
if (hardware_config->arch_flags & xnn_arch_arm_sme2) {
x8_igemm_pack_lh_config.pack_lh_for_igemm_fn = (xnn_pack_lh_igemm_ukernel_fn) xnn_x8_pack_lh_ukernel__igemm_neonsme2;
x8_igemm_pack_lh_config.size_for_igemm_fn = (xnn_pack_lh_igemm_size_fn) xnn_x8_pack_lh_size__igemm_neonsme2;
x8_igemm_pack_lh_config.offset_for_igemm_fn = (xnn_pack_lh_igemm_offset_fn) xnn_x8_pack_lh_offset__igemm_neonsme2;
}
#endif // XNN_ENABLE_ARM_SME2
#endif // XNN_ARCH_ARM64 && XNN_ENABLE_KLEIDIAI
#if XNN_ARCH_ARM64 && XNN_ENABLE_KLEIDIAI
#if XNN_ENABLE_ARM_SME2
const struct xnn_hardware_config* hardware_config =
xnn_init_hardware_config();
assert(hardware_config != NULL);
if ((hardware_config->arch_flags & xnn_arch_arm_sme2)) {
x8_igemm_pack_lh_config.pack_lh_for_igemm_fn =
(xnn_pack_lh_igemm_ukernel_fn)xnn_x8_pack_lh_ukernel__igemm_neonsme2;
x8_igemm_pack_lh_config.size_for_igemm_fn =
(xnn_pack_lh_igemm_size_fn)xnn_x8_pack_lh_size__igemm_neonsme2;
x8_igemm_pack_lh_config.offset_for_igemm_fn =
(xnn_pack_lh_igemm_offset_fn)xnn_x8_pack_lh_offset__igemm_neonsme2;
}
#endif // XNN_ENABLE_ARM_SME2
#endif // XNN_ARCH_ARM64 && XNN_ENABLE_KLEIDIAI
x8_igemm_pack_lh_config.log2_input_element_size = 0;
x8_igemm_pack_lh_config.log2_packed_element_size = 0;
}
Expand All @@ -152,3 +177,33 @@ const struct xnn_pack_lh_config* xnn_init_x8_igemm_pack_lh_config() {
XNN_INIT_ONCE(x8_igemm_pack_lh);
return &x8_igemm_pack_lh_config;
}

static void init_x16_igemm_pack_lh_config(void) {
#if XNN_ARCH_ARM64 && XNN_ENABLE_KLEIDIAI
#if XNN_ENABLE_ARM_SME2
const struct xnn_hardware_config* hardware_config =
xnn_init_hardware_config();
assert(hardware_config != NULL);
if ((hardware_config->arch_flags & xnn_arch_arm_sme2)) {
x16_igemm_pack_lh_config.pack_lh_for_igemm_fn =
(xnn_pack_lh_igemm_ukernel_fn)xnn_x16_pack_lh_ukernel__igemm_neonsme2;
x16_igemm_pack_lh_config.size_for_igemm_fn =
(xnn_pack_lh_igemm_size_fn)xnn_x16_pack_lh_size__igemm_neonsme2;
x16_igemm_pack_lh_config.offset_for_igemm_fn =
(xnn_pack_lh_igemm_offset_fn)xnn_x16_pack_lh_offset__igemm_neonsme2;
}
#endif // XNN_ENABLE_ARM_SME2
#endif // XNN_ARCH_ARM64 && XNN_ENABLE_KLEIDIAI
x16_igemm_pack_lh_config.log2_input_element_size = 1;
x16_igemm_pack_lh_config.log2_packed_element_size = 1;
}

const struct xnn_pack_lh_config* xnn_init_x16_igemm_pack_lh_config() {
const struct xnn_hardware_config* hardware_config =
xnn_init_hardware_config();
if (hardware_config == NULL) {
return NULL;
}
XNN_INIT_ONCE(x16_igemm_pack_lh);
return &x16_igemm_pack_lh_config;
}
Loading
Loading