Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions build_srcs.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -280,6 +280,7 @@ MICROKERNEL_DEFS = [
"src/x64-transposec/x64-transposec.inc",
"src/x8-pack-lh/x8-pack-lh.inc",
"src/x8-pack-lh/x8-pack-lh-igemm.inc",
"src/x16-pack-lh/x16-pack-lh-igemm.inc",
"src/x8-packq/x8-packq.inc",
"src/x8-packw/x8-packw.inc",
"src/x8-transposec/x8-transposec.inc",
Expand Down
2 changes: 2 additions & 0 deletions cmake/gen/neonsme2_microkernels.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,15 @@ SET(PROD_NEONSME2_MICROKERNEL_SRCS
src/pf16-gemm/pf16-gemm-32x32c2-minmax-neonsme2.c
src/pf32-gemm/pf32-gemm-1x32-minmax-neonsme2.c
src/pf32-gemm/pf32-gemm-32x32-minmax-neonsme2.c
src/pf16-f16-f16-igemm/pf16-f16-f16-igemm-32x32c2-minmax-neonsme2.c
src/pqs8-f32-qc8w-igemm/pqs8-f32-qc8w-igemm-32x32c4-minmax-neonsme2.c
src/pqs8-qc8w-gemm/pqs8-qc8w-gemm-1x32c4-minmax-neonsme2.c
src/pqs8-qc8w-gemm/pqs8-qc8w-gemm-32x32c4-minmax-neonsme2.c
src/qp8-f32-qc4w-gemm/qp8-f32-qc4w-gemm-minmax-1x64c4-neonsme2.c
src/qp8-f32-qc4w-gemm/qp8-f32-qc4w-gemm-minmax-16x64c4-neonsme2.c
src/qp8-f32-qc8w-gemm/qp8-f32-qc8w-gemm-minmax-1x64c4-neonsme2.c
src/qp8-f32-qc8w-gemm/qp8-f32-qc8w-gemm-minmax-16x64c4-neonsme2.c
src/x16-pack-lh/x16-packlh-igemm-neonsme2.c
src/x8-pack-lh/x8-packlh-igemm-neonsme2.c
src/x8-pack-lh/x8-packlh-neonsme2.c
src/x16-pack-lh/x16-packlh-neonsme2.c
Expand Down
2 changes: 2 additions & 0 deletions gen/neonsme2_microkernels.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,15 @@ PROD_NEONSME2_MICROKERNEL_SRCS = [
"src/pf16-gemm/pf16-gemm-32x32c2-minmax-neonsme2.c",
"src/pf32-gemm/pf32-gemm-1x32-minmax-neonsme2.c",
"src/pf32-gemm/pf32-gemm-32x32-minmax-neonsme2.c",
"src/pf16-f16-f16-igemm/pf16-f16-f16-igemm-32x32c2-minmax-neonsme2.c",
"src/pqs8-f32-qc8w-igemm/pqs8-f32-qc8w-igemm-32x32c4-minmax-neonsme2.c",
"src/pqs8-qc8w-gemm/pqs8-qc8w-gemm-1x32c4-minmax-neonsme2.c",
"src/pqs8-qc8w-gemm/pqs8-qc8w-gemm-32x32c4-minmax-neonsme2.c",
"src/qp8-f32-qc4w-gemm/qp8-f32-qc4w-gemm-minmax-1x64c4-neonsme2.c",
"src/qp8-f32-qc4w-gemm/qp8-f32-qc4w-gemm-minmax-16x64c4-neonsme2.c",
"src/qp8-f32-qc8w-gemm/qp8-f32-qc8w-gemm-minmax-1x64c4-neonsme2.c",
"src/qp8-f32-qc8w-gemm/qp8-f32-qc8w-gemm-minmax-16x64c4-neonsme2.c",
"src/x16-pack-lh/x16-packlh-igemm-neonsme2.c",
"src/x8-pack-lh/x8-packlh-igemm-neonsme2.c",
"src/x8-pack-lh/x8-packlh-neonsme2.c",
"src/x16-pack-lh/x16-packlh-neonsme2.c",
Expand Down
1 change: 1 addition & 0 deletions scripts/generate-tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ tools/generate-gemm-test.py --spec test/qs8-qc4w-gemm-minmax-fp32.yaml --output-
tools/generate-gemm-test.py --spec test/qs8-qc8w-gemm-minmax-fp32.yaml --output-test test/qs8-qc8w-gemm-minmax-fp32.cc --output-test test/qs8-qc8w-gemm-minmax-fp32-2.cc --output-test test/qs8-qc8w-gemm-minmax-fp32-3.cc --output-bench bench/qs8-qc8w-gemm-fp32.cc &

### Tests for IGEMM micro-kernels
tools/generate-gemm-test.py --spec test/pf16-f16-igemm-minmax.yaml --output-test test/pf16-f16-igemm-minmax.cc &
tools/generate-gemm-test.py --spec test/f16-igemm-minmax.yaml --output-test test/f16-igemm-minmax.cc &
tools/generate-gemm-test.py --spec test/f16-f32acc-igemm-minmax.yaml --output-test test/f16-f32acc-igemm-minmax.cc &

Expand Down
9 changes: 9 additions & 0 deletions src/configs/gemm-config.c
Original file line number Diff line number Diff line change
Expand Up @@ -312,9 +312,17 @@ static void init_pf16_gemm_config(void) {
pf16_gemm_config.arch = xnn_arch_arm_sme2;
pf16_gemm_config.minmax.gemm[XNN_MR_TO_INDEX(1)] = XNN_INIT_HMP_GEMM_UKERNEL(xnn_pf16_gemm_minmax_ukernel_1x32c2__neonsme2);
pf16_gemm_config.minmax.gemm[XNN_MR_TO_INDEX(mr)] = XNN_INIT_HMP_GEMM_UKERNEL(xnn_pf16_gemm_minmax_ukernel_32x32c2__neonsme2);
pf16_gemm_config.minmax.igemm[XNN_MR_TO_INDEX(mr)] =
xnn_init_hmp_packed_igemm_ukernel(
(xnn_packed_lhs_igemm_ukernel_fn)
xnn_pf16_f16_igemm_minmax_fp16_ukernel_32x32c2__neonsme2);
pf16_gemm_config.init.f16 = xnn_init_f16_minmax_scalar_params;
pf16_gemm_config.pack_weights_and_biases = xnn_pack_kai_f16_weights_and_biases;
pf16_gemm_config.packed_stride_weights_and_biases = xnn_packed_stride_kai_f16_weights_and_biases;
pf16_gemm_config.pack_igemm_goki =
(xnn_pack_conv_goki_w_fn)xnn_pack_kai_f16_conv_goki_w_sme2;
pf16_gemm_config.pack_igemm_kgo =
(xnn_pack_conv_kgo_w_fn)xnn_pack_f16_conv_kgo_w;
pf16_gemm_config.mr = mr;
pf16_gemm_config.mr_packed = mr;
pf16_gemm_config.nr = nr;
Expand Down Expand Up @@ -5028,6 +5036,7 @@ const struct xnn_gemm_config* xnn_init_pf16_gemm_config() {
return NULL;
}
XNN_INIT_ONCE(pf16_gemm);

return pf16_gemm_config.mr ? &pf16_gemm_config : NULL;
}

Expand Down
152 changes: 109 additions & 43 deletions src/configs/pack-lh-config.c
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ static struct xnn_pack_lh_config x8_pack_lh_config = {0};
static struct xnn_pack_lh_config x16_pack_lh_config = {0};
static struct xnn_pack_lh_config x32_pack_lh_config = {0};
static struct xnn_pack_lh_config x8_igemm_pack_lh_config = {0};
static struct xnn_pack_lh_config x16_igemm_pack_lh_config = {0};

XNN_INIT_ONCE_GUARD(f16_qdint8_pack_lh);
XNN_INIT_ONCE_GUARD(f16_qduint8_pack_lh);
Expand All @@ -34,11 +35,15 @@ XNN_INIT_ONCE_GUARD(x8_pack_lh);
XNN_INIT_ONCE_GUARD(x16_pack_lh);
XNN_INIT_ONCE_GUARD(x32_pack_lh);
XNN_INIT_ONCE_GUARD(x8_igemm_pack_lh);
XNN_INIT_ONCE_GUARD(x16_igemm_pack_lh);

static void init_f16_qdint8_pack_lh_config(void) {
f16_qdint8_pack_lh_config.pack_lh_fn = (xnn_pack_lh_ukernel_fn)xnn_pack_lh_f16_qdint8;
f16_qdint8_pack_lh_config.size_fn = (xnn_pack_lh_size_fn)xnn_pack_lh_fx_qd8_packed_size;
f16_qdint8_pack_lh_config.offset_fn = (xnn_pack_lh_offset_fn)xnn_pack_lh_fx_qd8_packed_offset;
f16_qdint8_pack_lh_config.pack_lh_fn =
(xnn_pack_lh_ukernel_fn)xnn_pack_lh_f16_qdint8;
f16_qdint8_pack_lh_config.size_fn =
(xnn_pack_lh_size_fn)xnn_pack_lh_fx_qd8_packed_size;
f16_qdint8_pack_lh_config.offset_fn =
(xnn_pack_lh_offset_fn)xnn_pack_lh_fx_qd8_packed_offset;
f16_qdint8_pack_lh_config.log2_input_element_size = XNN_LOG2_SIZEOF_HALF;
f16_qdint8_pack_lh_config.log2_packed_element_size = 0;
}
Expand All @@ -54,9 +59,12 @@ const struct xnn_pack_lh_config* xnn_init_f16_qdint8_pack_lh_config() {
}

static void init_f16_qduint8_pack_lh_config(void) {
f16_qduint8_pack_lh_config.pack_lh_fn = (xnn_pack_lh_ukernel_fn)xnn_pack_lh_f16_qduint8;
f16_qduint8_pack_lh_config.size_fn = (xnn_pack_lh_size_fn)xnn_pack_lh_fx_qd8_packed_size;
f16_qduint8_pack_lh_config.offset_fn = (xnn_pack_lh_offset_fn)xnn_pack_lh_fx_qd8_packed_offset;
f16_qduint8_pack_lh_config.pack_lh_fn =
(xnn_pack_lh_ukernel_fn)xnn_pack_lh_f16_qduint8;
f16_qduint8_pack_lh_config.size_fn =
(xnn_pack_lh_size_fn)xnn_pack_lh_fx_qd8_packed_size;
f16_qduint8_pack_lh_config.offset_fn =
(xnn_pack_lh_offset_fn)xnn_pack_lh_fx_qd8_packed_offset;
f16_qduint8_pack_lh_config.log2_input_element_size = XNN_LOG2_SIZEOF_HALF;
f16_qduint8_pack_lh_config.log2_packed_element_size = 0;
}
Expand All @@ -72,9 +80,12 @@ const struct xnn_pack_lh_config* xnn_init_f16_qduint8_pack_lh_config() {
}

static void init_f32_qdint8_pack_lh_config(void) {
f32_qdint8_pack_lh_config.pack_lh_fn = (xnn_pack_lh_ukernel_fn)xnn_pack_lh_f32_qdint8;
f32_qdint8_pack_lh_config.size_fn = (xnn_pack_lh_size_fn)xnn_pack_lh_fx_qd8_packed_size;
f32_qdint8_pack_lh_config.offset_fn = (xnn_pack_lh_offset_fn)xnn_pack_lh_fx_qd8_packed_offset;
f32_qdint8_pack_lh_config.pack_lh_fn =
(xnn_pack_lh_ukernel_fn)xnn_pack_lh_f32_qdint8;
f32_qdint8_pack_lh_config.size_fn =
(xnn_pack_lh_size_fn)xnn_pack_lh_fx_qd8_packed_size;
f32_qdint8_pack_lh_config.offset_fn =
(xnn_pack_lh_offset_fn)xnn_pack_lh_fx_qd8_packed_offset;
f32_qdint8_pack_lh_config.log2_input_element_size = XNN_LOG2_SIZEOF_FLOAT;
f32_qdint8_pack_lh_config.log2_packed_element_size = 0;
}
Expand All @@ -90,9 +101,12 @@ const struct xnn_pack_lh_config* xnn_init_f32_qdint8_pack_lh_config() {
}

static void init_f32_qduint8_pack_lh_config(void) {
f32_qduint8_pack_lh_config.pack_lh_fn = (xnn_pack_lh_ukernel_fn)xnn_pack_lh_f32_qduint8;
f32_qduint8_pack_lh_config.size_fn = (xnn_pack_lh_size_fn)xnn_pack_lh_fx_qd8_packed_size;
f32_qduint8_pack_lh_config.offset_fn = (xnn_pack_lh_offset_fn)xnn_pack_lh_fx_qd8_packed_offset;
f32_qduint8_pack_lh_config.pack_lh_fn =
(xnn_pack_lh_ukernel_fn)xnn_pack_lh_f32_qduint8;
f32_qduint8_pack_lh_config.size_fn =
(xnn_pack_lh_size_fn)xnn_pack_lh_fx_qd8_packed_size;
f32_qduint8_pack_lh_config.offset_fn =
(xnn_pack_lh_offset_fn)xnn_pack_lh_fx_qd8_packed_offset;
f32_qduint8_pack_lh_config.log2_input_element_size = XNN_LOG2_SIZEOF_FLOAT;
f32_qduint8_pack_lh_config.log2_packed_element_size = 0;
}
Expand All @@ -109,12 +123,16 @@ const struct xnn_pack_lh_config* xnn_init_f32_qduint8_pack_lh_config() {

static void init_qp8_pack_lh_config(void) {
#if XNN_ARCH_ARM64 && XNN_ENABLE_KLEIDIAI
qp8_pack_lh_config.pack_lh_fn = (xnn_pack_lh_ukernel_fn)xnn_x8_packq_f32qp8_ukernel__aarch64_neon_u2;
qp8_pack_lh_config.pack_lh_fn =
(xnn_pack_lh_ukernel_fn)xnn_x8_packq_f32qp8_ukernel__aarch64_neon_u2;
#else
qp8_pack_lh_config.pack_lh_fn = (xnn_pack_lh_ukernel_fn)xnn_x8_packq_f32qp8_ukernel__scalar_u1;
qp8_pack_lh_config.pack_lh_fn =
(xnn_pack_lh_ukernel_fn)xnn_x8_packq_f32qp8_ukernel__scalar_u1;
#endif // XNN_ARCH_ARM64 && XNN_ENABLE_KLEIDIAI
qp8_pack_lh_config.size_fn = (xnn_pack_lh_size_fn)xnn_x8_packq_f32qp8_packed_size;
qp8_pack_lh_config.offset_fn = (xnn_pack_lh_offset_fn)xnn_x8_packq_f32qp8_packed_offset;
qp8_pack_lh_config.size_fn =
(xnn_pack_lh_size_fn)xnn_x8_packq_f32qp8_packed_size;
qp8_pack_lh_config.offset_fn =
(xnn_pack_lh_offset_fn)xnn_x8_packq_f32qp8_packed_offset;
qp8_pack_lh_config.log2_input_element_size = XNN_LOG2_SIZEOF_FLOAT;
qp8_pack_lh_config.log2_packed_element_size = 0;
}
Expand All @@ -132,12 +150,16 @@ const struct xnn_pack_lh_config* xnn_init_qp8_pack_lh_config() {
static void init_x32_pack_lh_config(void) {
#if XNN_ARCH_ARM64 && XNN_ENABLE_KLEIDIAI
#if XNN_ENABLE_ARM_SME2
const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config();
const struct xnn_hardware_config* hardware_config =
xnn_init_hardware_config();
assert(hardware_config != NULL);
if ((hardware_config->arch_flags & xnn_arch_arm_sme2)) {
x32_pack_lh_config.pack_lh_fn = (xnn_pack_lh_ukernel_fn) xnn_x32_pack_lh_ukernel__neonsme2;
x32_pack_lh_config.size_fn = (xnn_pack_lh_size_fn) xnn_x32_pack_lh_size__neonsme2;
x32_pack_lh_config.offset_fn = (xnn_pack_lh_offset_fn) xnn_x32_pack_lh_offset__neonsme2;
x32_pack_lh_config.pack_lh_fn =
(xnn_pack_lh_ukernel_fn)xnn_x32_pack_lh_ukernel__neonsme2;
x32_pack_lh_config.size_fn =
(xnn_pack_lh_size_fn)xnn_x32_pack_lh_size__neonsme2;
x32_pack_lh_config.offset_fn =
(xnn_pack_lh_offset_fn)xnn_x32_pack_lh_offset__neonsme2;
}
#endif // XNN_ENABLE_ARM_SME2
#endif // XNN_ARCH_ARM64 && XNN_ENABLE_KLEIDIAI
Expand All @@ -147,7 +169,8 @@ static void init_x32_pack_lh_config(void) {
}

const struct xnn_pack_lh_config* xnn_init_x32_pack_lh_config() {
const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config();
const struct xnn_hardware_config* hardware_config =
xnn_init_hardware_config();
if (hardware_config == NULL) {
return NULL;
}
Expand All @@ -158,12 +181,16 @@ const struct xnn_pack_lh_config* xnn_init_x32_pack_lh_config() {
static void init_x16_pack_lh_config(void) {
#if XNN_ARCH_ARM64 && XNN_ENABLE_KLEIDIAI
#if XNN_ENABLE_ARM_SME2
const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config();
const struct xnn_hardware_config* hardware_config =
xnn_init_hardware_config();
assert(hardware_config != NULL);
if ((hardware_config->arch_flags & xnn_arch_arm_sme2)) {
x16_pack_lh_config.pack_lh_fn = (xnn_pack_lh_ukernel_fn) xnn_x16_pack_lh_ukernel__neonsme2;
x16_pack_lh_config.size_fn = (xnn_pack_lh_size_fn) xnn_x16_pack_lh_size__neonsme2;
x16_pack_lh_config.offset_fn = (xnn_pack_lh_offset_fn) xnn_x16_pack_lh_offset__neonsme2;
x16_pack_lh_config.pack_lh_fn =
(xnn_pack_lh_ukernel_fn)xnn_x16_pack_lh_ukernel__neonsme2;
x16_pack_lh_config.size_fn =
(xnn_pack_lh_size_fn)xnn_x16_pack_lh_size__neonsme2;
x16_pack_lh_config.offset_fn =
(xnn_pack_lh_offset_fn)xnn_x16_pack_lh_offset__neonsme2;
}
#endif // XNN_ENABLE_ARM_SME2
#endif // XNN_ARCH_ARM64 && XNN_ENABLE_KLEIDIAI
Expand All @@ -173,7 +200,8 @@ static void init_x16_pack_lh_config(void) {
}

const struct xnn_pack_lh_config* xnn_init_x16_pack_lh_config() {
const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config();
const struct xnn_hardware_config* hardware_config =
xnn_init_hardware_config();
if (hardware_config == NULL) {
return NULL;
}
Expand All @@ -184,12 +212,16 @@ const struct xnn_pack_lh_config* xnn_init_x16_pack_lh_config() {
static void init_x8_pack_lh_config(void) {
#if XNN_ARCH_ARM64 && XNN_ENABLE_KLEIDIAI
#if XNN_ENABLE_ARM_SME2
const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config();
const struct xnn_hardware_config* hardware_config =
xnn_init_hardware_config();
assert(hardware_config != NULL);
if ((hardware_config->arch_flags & xnn_arch_arm_sme2)) {
x8_pack_lh_config.pack_lh_fn = (xnn_pack_lh_ukernel_fn) xnn_x8_pack_lh_ukernel__neonsme2;
x8_pack_lh_config.size_fn = (xnn_pack_lh_size_fn) xnn_x8_pack_lh_size__neonsme2;
x8_pack_lh_config.offset_fn = (xnn_pack_lh_offset_fn) xnn_x8_pack_lh_offset__neonsme2;
x8_pack_lh_config.pack_lh_fn =
(xnn_pack_lh_ukernel_fn)xnn_x8_pack_lh_ukernel__neonsme2;
x8_pack_lh_config.size_fn =
(xnn_pack_lh_size_fn)xnn_x8_pack_lh_size__neonsme2;
x8_pack_lh_config.offset_fn =
(xnn_pack_lh_offset_fn)xnn_x8_pack_lh_offset__neonsme2;
}
#endif // XNN_ENABLE_ARM_SME2
#endif // XNN_ARCH_ARM64 && XNN_ENABLE_KLEIDIAI
Expand All @@ -199,7 +231,8 @@ static void init_x8_pack_lh_config(void) {
}

const struct xnn_pack_lh_config* xnn_init_x8_pack_lh_config() {
const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config();
const struct xnn_hardware_config* hardware_config =
xnn_init_hardware_config();
if (hardware_config == NULL) {
return NULL;
}
Expand All @@ -208,17 +241,21 @@ const struct xnn_pack_lh_config* xnn_init_x8_pack_lh_config() {
}

static void init_x8_igemm_pack_lh_config(void) {
#if XNN_ARCH_ARM64 && XNN_ENABLE_KLEIDIAI
#if XNN_ENABLE_ARM_SME2
const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config();
assert(hardware_config != NULL);
if ((hardware_config->arch_flags & xnn_arch_arm_sme2)) {
x8_igemm_pack_lh_config.pack_lh_for_igemm_fn = (xnn_pack_lh_igemm_ukernel_fn) xnn_x8_pack_lh_ukernel__igemm_neonsme2;
x8_igemm_pack_lh_config.size_for_igemm_fn = (xnn_pack_lh_igemm_size_fn) xnn_x8_pack_lh_size__igemm_neonsme2;
x8_igemm_pack_lh_config.offset_for_igemm_fn = (xnn_pack_lh_igemm_offset_fn) xnn_x8_pack_lh_offset__igemm_neonsme2;
}
#endif // XNN_ENABLE_ARM_SME2
#endif // XNN_ARCH_ARM64 && XNN_ENABLE_KLEIDIAI
#if XNN_ARCH_ARM64 && XNN_ENABLE_KLEIDIAI
#if XNN_ENABLE_ARM_SME2
const struct xnn_hardware_config* hardware_config =
xnn_init_hardware_config();
assert(hardware_config != NULL);
if ((hardware_config->arch_flags & xnn_arch_arm_sme2)) {
x8_igemm_pack_lh_config.pack_lh_for_igemm_fn =
(xnn_pack_lh_igemm_ukernel_fn)xnn_x8_pack_lh_ukernel__igemm_neonsme2;
x8_igemm_pack_lh_config.size_for_igemm_fn =
(xnn_pack_lh_igemm_size_fn)xnn_x8_pack_lh_size__igemm_neonsme2;
x8_igemm_pack_lh_config.offset_for_igemm_fn =
(xnn_pack_lh_igemm_offset_fn)xnn_x8_pack_lh_offset__igemm_neonsme2;
}
#endif // XNN_ENABLE_ARM_SME2
#endif // XNN_ARCH_ARM64 && XNN_ENABLE_KLEIDIAI
x8_igemm_pack_lh_config.log2_input_element_size = 0;
x8_igemm_pack_lh_config.log2_packed_element_size = 0;
}
Expand All @@ -232,4 +269,33 @@ const struct xnn_pack_lh_config* xnn_init_x8_igemm_pack_lh_config() {
XNN_INIT_ONCE(x8_igemm_pack_lh);
return &x8_igemm_pack_lh_config;
}


static void init_x16_igemm_pack_lh_config(void) {
#if XNN_ARCH_ARM64 && XNN_ENABLE_KLEIDIAI
#if XNN_ENABLE_ARM_SME2
const struct xnn_hardware_config* hardware_config =
xnn_init_hardware_config();
assert(hardware_config != NULL);
if ((hardware_config->arch_flags & xnn_arch_arm_sme2)) {
x16_igemm_pack_lh_config.pack_lh_for_igemm_fn =
(xnn_pack_lh_igemm_ukernel_fn)xnn_x16_pack_lh_ukernel__igemm_neonsme2;
x16_igemm_pack_lh_config.size_for_igemm_fn =
(xnn_pack_lh_igemm_size_fn)xnn_x16_pack_lh_size__igemm_neonsme2;
x16_igemm_pack_lh_config.offset_for_igemm_fn =
(xnn_pack_lh_igemm_offset_fn)xnn_x16_pack_lh_offset__igemm_neonsme2;
}
#endif // XNN_ENABLE_ARM_SME2
#endif // XNN_ARCH_ARM64 && XNN_ENABLE_KLEIDIAI
x16_igemm_pack_lh_config.log2_input_element_size = 1;
x16_igemm_pack_lh_config.log2_packed_element_size = 1;
}

const struct xnn_pack_lh_config* xnn_init_x16_igemm_pack_lh_config() {
const struct xnn_hardware_config* hardware_config =
xnn_init_hardware_config();
if (hardware_config == NULL) {
return NULL;
}
XNN_INIT_ONCE(x16_igemm_pack_lh);
return &x16_igemm_pack_lh_config;
}
Loading