From 0387e997ec188205dbc21028a52897e4d4dc6701 Mon Sep 17 00:00:00 2001 From: Folkert de Vries Date: Fri, 30 Jan 2026 20:22:18 +0100 Subject: [PATCH 1/4] test the `vld1*` functions --- crates/core_arch/src/aarch64/neon/mod.rs | 864 +++++++++++++++++++++++ 1 file changed, 864 insertions(+) diff --git a/crates/core_arch/src/aarch64/neon/mod.rs b/crates/core_arch/src/aarch64/neon/mod.rs index bac4574239..feaf94a7f9 100644 --- a/crates/core_arch/src/aarch64/neon/mod.rs +++ b/crates/core_arch/src/aarch64/neon/mod.rs @@ -993,6 +993,870 @@ mod tests { assert_eq!(vals[1], 1.); assert_eq!(vals[2], 2.); } + + #[simd_test(enable = "neon,fp16")] + #[cfg(not(target_arch = "arm64ec"))] + unsafe fn test_vld1_f16_x2() { + let vals: [f16; 8] = crate::array::from_fn(|i| i as f16); + let a: float16x4x2_t = transmute(vals); + let mut tmp = [0_f16; 8]; + vst1_f16_x2(tmp.as_mut_ptr().cast(), a); + let r: float16x4x2_t = vld1_f16_x2(tmp.as_ptr().cast()); + let out: [f16; 8] = transmute(r); + assert_eq!(out, vals); + } + + #[simd_test(enable = "neon,fp16")] + #[cfg(not(target_arch = "arm64ec"))] + unsafe fn test_vld1_f16_x3() { + let vals: [f16; 12] = crate::array::from_fn(|i| i as f16); + let a: float16x4x3_t = transmute(vals); + let mut tmp = [0_f16; 12]; + vst1_f16_x3(tmp.as_mut_ptr().cast(), a); + let r: float16x4x3_t = vld1_f16_x3(tmp.as_ptr().cast()); + let out: [f16; 12] = transmute(r); + assert_eq!(out, vals); + } + + #[simd_test(enable = "neon,fp16")] + #[cfg(not(target_arch = "arm64ec"))] + unsafe fn test_vld1_f16_x4() { + let vals: [f16; 16] = crate::array::from_fn(|i| i as f16); + let a: float16x4x4_t = transmute(vals); + let mut tmp = [0_f16; 16]; + vst1_f16_x4(tmp.as_mut_ptr().cast(), a); + let r: float16x4x4_t = vld1_f16_x4(tmp.as_ptr().cast()); + let out: [f16; 16] = transmute(r); + assert_eq!(out, vals); + } + + #[simd_test(enable = "neon,fp16")] + #[cfg(not(target_arch = "arm64ec"))] + unsafe fn test_vld1q_f16_x2() { + let vals: [f16; 16] = crate::array::from_fn(|i| i as f16); + let a: float16x8x2_t = transmute(vals); + let mut tmp = [0_f16; 16]; + vst1q_f16_x2(tmp.as_mut_ptr().cast(), a); + let r: float16x8x2_t = vld1q_f16_x2(tmp.as_ptr().cast()); + let out: [f16; 16] = transmute(r); + assert_eq!(out, vals); + } + + #[simd_test(enable = "neon,fp16")] + #[cfg(not(target_arch = "arm64ec"))] + unsafe fn test_vld1q_f16_x3() { + let vals: [f16; 24] = crate::array::from_fn(|i| i as f16); + let a: float16x8x3_t = transmute(vals); + let mut tmp = [0_f16; 24]; + vst1q_f16_x3(tmp.as_mut_ptr().cast(), a); + let r: float16x8x3_t = vld1q_f16_x3(tmp.as_ptr().cast()); + let out: [f16; 24] = transmute(r); + assert_eq!(out, vals); + } + + #[simd_test(enable = "neon,fp16")] + #[cfg(not(target_arch = "arm64ec"))] + unsafe fn test_vld1q_f16_x4() { + let vals: [f16; 32] = crate::array::from_fn(|i| i as f16); + let a: float16x8x4_t = transmute(vals); + let mut tmp = [0_f16; 32]; + vst1q_f16_x4(tmp.as_mut_ptr().cast(), a); + let r: float16x8x4_t = vld1q_f16_x4(tmp.as_ptr().cast()); + let out: [f16; 32] = transmute(r); + assert_eq!(out, vals); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1_f32_x2() { + let vals: [f32; 4] = crate::array::from_fn(|i| i as f32); + let a: float32x2x2_t = transmute(vals); + let mut tmp = [0_f32; 4]; + vst1_f32_x2(tmp.as_mut_ptr().cast(), a); + let r: float32x2x2_t = vld1_f32_x2(tmp.as_ptr().cast()); + let out: [f32; 4] = transmute(r); + assert_eq!(out, vals); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1_f32_x3() { + let vals: [f32; 6] = crate::array::from_fn(|i| i as f32); + let a: float32x2x3_t = transmute(vals); + let mut tmp = [0_f32; 6]; + vst1_f32_x3(tmp.as_mut_ptr().cast(), a); + let r: float32x2x3_t = vld1_f32_x3(tmp.as_ptr().cast()); + let out: [f32; 6] = transmute(r); + assert_eq!(out, vals); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1_f32_x4() { + let vals: [f32; 8] = crate::array::from_fn(|i| i as f32); + let a: float32x2x4_t = transmute(vals); + let mut tmp = [0_f32; 8]; + vst1_f32_x4(tmp.as_mut_ptr().cast(), a); + let r: float32x2x4_t = vld1_f32_x4(tmp.as_ptr().cast()); + let out: [f32; 8] = transmute(r); + assert_eq!(out, vals); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1q_f32_x2() { + let vals: [f32; 8] = crate::array::from_fn(|i| i as f32); + let a: float32x4x2_t = transmute(vals); + let mut tmp = [0_f32; 8]; + vst1q_f32_x2(tmp.as_mut_ptr().cast(), a); + let r: float32x4x2_t = vld1q_f32_x2(tmp.as_ptr().cast()); + let out: [f32; 8] = transmute(r); + assert_eq!(out, vals); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1q_f32_x3() { + let vals: [f32; 12] = crate::array::from_fn(|i| i as f32); + let a: float32x4x3_t = transmute(vals); + let mut tmp = [0_f32; 12]; + vst1q_f32_x3(tmp.as_mut_ptr().cast(), a); + let r: float32x4x3_t = vld1q_f32_x3(tmp.as_ptr().cast()); + let out: [f32; 12] = transmute(r); + assert_eq!(out, vals); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1q_f32_x4() { + let vals: [f32; 16] = crate::array::from_fn(|i| i as f32); + let a: float32x4x4_t = transmute(vals); + let mut tmp = [0_f32; 16]; + vst1q_f32_x4(tmp.as_mut_ptr().cast(), a); + let r: float32x4x4_t = vld1q_f32_x4(tmp.as_ptr().cast()); + let out: [f32; 16] = transmute(r); + assert_eq!(out, vals); + } + + #[simd_test(enable = "neon,aes")] + unsafe fn test_vld1_p64_x2() { + let vals: [p64; 2] = crate::array::from_fn(|i| i as p64); + let a: poly64x1x2_t = transmute(vals); + let mut tmp = [0 as p64; 2]; + vst1_p64_x2(tmp.as_mut_ptr().cast(), a); + let r: poly64x1x2_t = vld1_p64_x2(tmp.as_ptr().cast()); + let out: [p64; 2] = transmute(r); + assert_eq!(out, vals); + } + + #[simd_test(enable = "neon,aes")] + unsafe fn test_vld1_p64_x3() { + let vals: [p64; 3] = crate::array::from_fn(|i| i as p64); + let a: poly64x1x3_t = transmute(vals); + let mut tmp = [0 as p64; 3]; + vst1_p64_x3(tmp.as_mut_ptr().cast(), a); + let r: poly64x1x3_t = vld1_p64_x3(tmp.as_ptr().cast()); + let out: [p64; 3] = transmute(r); + assert_eq!(out, vals); + } + + #[simd_test(enable = "neon,aes")] + unsafe fn test_vld1_p64_x4() { + let vals: [p64; 4] = crate::array::from_fn(|i| i as p64); + let a: poly64x1x4_t = transmute(vals); + let mut tmp = [0 as p64; 4]; + vst1_p64_x4(tmp.as_mut_ptr().cast(), a); + let r: poly64x1x4_t = vld1_p64_x4(tmp.as_ptr().cast()); + let out: [p64; 4] = transmute(r); + assert_eq!(out, vals); + } + + #[simd_test(enable = "neon,aes")] + unsafe fn test_vld1q_p64_x2() { + let vals: [p64; 4] = crate::array::from_fn(|i| i as p64); + let a: poly64x2x2_t = transmute(vals); + let mut tmp = [0 as p64; 4]; + vst1q_p64_x2(tmp.as_mut_ptr().cast(), a); + let r: poly64x2x2_t = vld1q_p64_x2(tmp.as_ptr().cast()); + let out: [p64; 4] = transmute(r); + assert_eq!(out, vals); + } + + #[simd_test(enable = "neon,aes")] + unsafe fn test_vld1q_p64_x3() { + let vals: [p64; 6] = crate::array::from_fn(|i| i as p64); + let a: poly64x2x3_t = transmute(vals); + let mut tmp = [0 as p64; 6]; + vst1q_p64_x3(tmp.as_mut_ptr().cast(), a); + let r: poly64x2x3_t = vld1q_p64_x3(tmp.as_ptr().cast()); + let out: [p64; 6] = transmute(r); + assert_eq!(out, vals); + } + + #[simd_test(enable = "neon,aes")] + unsafe fn test_vld1q_p64_x4() { + let vals: [p64; 8] = crate::array::from_fn(|i| i as p64); + let a: poly64x2x4_t = transmute(vals); + let mut tmp = [0 as p64; 8]; + vst1q_p64_x4(tmp.as_mut_ptr().cast(), a); + let r: poly64x2x4_t = vld1q_p64_x4(tmp.as_ptr().cast()); + let out: [p64; 8] = transmute(r); + assert_eq!(out, vals); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1_s8_x2() { + let vals: [i8; 16] = crate::array::from_fn(|i| i as i8); + let a: int8x8x2_t = transmute(vals); + let mut tmp = [0_i8; 16]; + vst1_s8_x2(tmp.as_mut_ptr().cast(), a); + let r: int8x8x2_t = vld1_s8_x2(tmp.as_ptr().cast()); + let out: [i8; 16] = transmute(r); + assert_eq!(out, vals); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1_s8_x3() { + let vals: [i8; 24] = crate::array::from_fn(|i| i as i8); + let a: int8x8x3_t = transmute(vals); + let mut tmp = [0_i8; 24]; + vst1_s8_x3(tmp.as_mut_ptr().cast(), a); + let r: int8x8x3_t = vld1_s8_x3(tmp.as_ptr().cast()); + let out: [i8; 24] = transmute(r); + assert_eq!(out, vals); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1_s8_x4() { + let vals: [i8; 32] = crate::array::from_fn(|i| i as i8); + let a: int8x8x4_t = transmute(vals); + let mut tmp = [0_i8; 32]; + vst1_s8_x4(tmp.as_mut_ptr().cast(), a); + let r: int8x8x4_t = vld1_s8_x4(tmp.as_ptr().cast()); + let out: [i8; 32] = transmute(r); + assert_eq!(out, vals); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1q_s8_x2() { + let vals: [i8; 32] = crate::array::from_fn(|i| i as i8); + let a: int8x16x2_t = transmute(vals); + let mut tmp = [0_i8; 32]; + vst1q_s8_x2(tmp.as_mut_ptr().cast(), a); + let r: int8x16x2_t = vld1q_s8_x2(tmp.as_ptr().cast()); + let out: [i8; 32] = transmute(r); + assert_eq!(out, vals); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1q_s8_x3() { + let vals: [i8; 48] = crate::array::from_fn(|i| i as i8); + let a: int8x16x3_t = transmute(vals); + let mut tmp = [0_i8; 48]; + vst1q_s8_x3(tmp.as_mut_ptr().cast(), a); + let r: int8x16x3_t = vld1q_s8_x3(tmp.as_ptr().cast()); + let out: [i8; 48] = transmute(r); + assert_eq!(out, vals); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1q_s8_x4() { + let vals: [i8; 64] = crate::array::from_fn(|i| i as i8); + let a: int8x16x4_t = transmute(vals); + let mut tmp = [0_i8; 64]; + vst1q_s8_x4(tmp.as_mut_ptr().cast(), a); + let r: int8x16x4_t = vld1q_s8_x4(tmp.as_ptr().cast()); + let out: [i8; 64] = transmute(r); + assert_eq!(out, vals); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1_s16_x2() { + let vals: [i16; 8] = crate::array::from_fn(|i| i as i16); + let a: int16x4x2_t = transmute(vals); + let mut tmp = [0_i16; 8]; + vst1_s16_x2(tmp.as_mut_ptr().cast(), a); + let r: int16x4x2_t = vld1_s16_x2(tmp.as_ptr().cast()); + let out: [i16; 8] = transmute(r); + assert_eq!(out, vals); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1_s16_x3() { + let vals: [i16; 12] = crate::array::from_fn(|i| i as i16); + let a: int16x4x3_t = transmute(vals); + let mut tmp = [0_i16; 12]; + vst1_s16_x3(tmp.as_mut_ptr().cast(), a); + let r: int16x4x3_t = vld1_s16_x3(tmp.as_ptr().cast()); + let out: [i16; 12] = transmute(r); + assert_eq!(out, vals); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1_s16_x4() { + let vals: [i16; 16] = crate::array::from_fn(|i| i as i16); + let a: int16x4x4_t = transmute(vals); + let mut tmp = [0_i16; 16]; + vst1_s16_x4(tmp.as_mut_ptr().cast(), a); + let r: int16x4x4_t = vld1_s16_x4(tmp.as_ptr().cast()); + let out: [i16; 16] = transmute(r); + assert_eq!(out, vals); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1q_s16_x2() { + let vals: [i16; 16] = crate::array::from_fn(|i| i as i16); + let a: int16x8x2_t = transmute(vals); + let mut tmp = [0_i16; 16]; + vst1q_s16_x2(tmp.as_mut_ptr().cast(), a); + let r: int16x8x2_t = vld1q_s16_x2(tmp.as_ptr().cast()); + let out: [i16; 16] = transmute(r); + assert_eq!(out, vals); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1q_s16_x3() { + let vals: [i16; 24] = crate::array::from_fn(|i| i as i16); + let a: int16x8x3_t = transmute(vals); + let mut tmp = [0_i16; 24]; + vst1q_s16_x3(tmp.as_mut_ptr().cast(), a); + let r: int16x8x3_t = vld1q_s16_x3(tmp.as_ptr().cast()); + let out: [i16; 24] = transmute(r); + assert_eq!(out, vals); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1q_s16_x4() { + let vals: [i16; 32] = crate::array::from_fn(|i| i as i16); + let a: int16x8x4_t = transmute(vals); + let mut tmp = [0_i16; 32]; + vst1q_s16_x4(tmp.as_mut_ptr().cast(), a); + let r: int16x8x4_t = vld1q_s16_x4(tmp.as_ptr().cast()); + let out: [i16; 32] = transmute(r); + assert_eq!(out, vals); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1_s32_x2() { + let vals: [i32; 4] = crate::array::from_fn(|i| i as i32); + let a: int32x2x2_t = transmute(vals); + let mut tmp = [0_i32; 4]; + vst1_s32_x2(tmp.as_mut_ptr().cast(), a); + let r: int32x2x2_t = vld1_s32_x2(tmp.as_ptr().cast()); + let out: [i32; 4] = transmute(r); + assert_eq!(out, vals); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1_s32_x3() { + let vals: [i32; 6] = crate::array::from_fn(|i| i as i32); + let a: int32x2x3_t = transmute(vals); + let mut tmp = [0_i32; 6]; + vst1_s32_x3(tmp.as_mut_ptr().cast(), a); + let r: int32x2x3_t = vld1_s32_x3(tmp.as_ptr().cast()); + let out: [i32; 6] = transmute(r); + assert_eq!(out, vals); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1_s32_x4() { + let vals: [i32; 8] = crate::array::from_fn(|i| i as i32); + let a: int32x2x4_t = transmute(vals); + let mut tmp = [0_i32; 8]; + vst1_s32_x4(tmp.as_mut_ptr().cast(), a); + let r: int32x2x4_t = vld1_s32_x4(tmp.as_ptr().cast()); + let out: [i32; 8] = transmute(r); + assert_eq!(out, vals); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1q_s32_x2() { + let vals: [i32; 8] = crate::array::from_fn(|i| i as i32); + let a: int32x4x2_t = transmute(vals); + let mut tmp = [0_i32; 8]; + vst1q_s32_x2(tmp.as_mut_ptr().cast(), a); + let r: int32x4x2_t = vld1q_s32_x2(tmp.as_ptr().cast()); + let out: [i32; 8] = transmute(r); + assert_eq!(out, vals); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1q_s32_x3() { + let vals: [i32; 12] = crate::array::from_fn(|i| i as i32); + let a: int32x4x3_t = transmute(vals); + let mut tmp = [0_i32; 12]; + vst1q_s32_x3(tmp.as_mut_ptr().cast(), a); + let r: int32x4x3_t = vld1q_s32_x3(tmp.as_ptr().cast()); + let out: [i32; 12] = transmute(r); + assert_eq!(out, vals); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1q_s32_x4() { + let vals: [i32; 16] = crate::array::from_fn(|i| i as i32); + let a: int32x4x4_t = transmute(vals); + let mut tmp = [0_i32; 16]; + vst1q_s32_x4(tmp.as_mut_ptr().cast(), a); + let r: int32x4x4_t = vld1q_s32_x4(tmp.as_ptr().cast()); + let out: [i32; 16] = transmute(r); + assert_eq!(out, vals); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1_s64_x2() { + let vals: [i64; 2] = crate::array::from_fn(|i| i as i64); + let a: int64x1x2_t = transmute(vals); + let mut tmp = [0_i64; 2]; + vst1_s64_x2(tmp.as_mut_ptr().cast(), a); + let r: int64x1x2_t = vld1_s64_x2(tmp.as_ptr().cast()); + let out: [i64; 2] = transmute(r); + assert_eq!(out, vals); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1_s64_x3() { + let vals: [i64; 3] = crate::array::from_fn(|i| i as i64); + let a: int64x1x3_t = transmute(vals); + let mut tmp = [0_i64; 3]; + vst1_s64_x3(tmp.as_mut_ptr().cast(), a); + let r: int64x1x3_t = vld1_s64_x3(tmp.as_ptr().cast()); + let out: [i64; 3] = transmute(r); + assert_eq!(out, vals); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1_s64_x4() { + let vals: [i64; 4] = crate::array::from_fn(|i| i as i64); + let a: int64x1x4_t = transmute(vals); + let mut tmp = [0_i64; 4]; + vst1_s64_x4(tmp.as_mut_ptr().cast(), a); + let r: int64x1x4_t = vld1_s64_x4(tmp.as_ptr().cast()); + let out: [i64; 4] = transmute(r); + assert_eq!(out, vals); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1q_s64_x2() { + let vals: [i64; 4] = crate::array::from_fn(|i| i as i64); + let a: int64x2x2_t = transmute(vals); + let mut tmp = [0_i64; 4]; + vst1q_s64_x2(tmp.as_mut_ptr().cast(), a); + let r: int64x2x2_t = vld1q_s64_x2(tmp.as_ptr().cast()); + let out: [i64; 4] = transmute(r); + assert_eq!(out, vals); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1q_s64_x3() { + let vals: [i64; 6] = crate::array::from_fn(|i| i as i64); + let a: int64x2x3_t = transmute(vals); + let mut tmp = [0_i64; 6]; + vst1q_s64_x3(tmp.as_mut_ptr().cast(), a); + let r: int64x2x3_t = vld1q_s64_x3(tmp.as_ptr().cast()); + let out: [i64; 6] = transmute(r); + assert_eq!(out, vals); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1q_s64_x4() { + let vals: [i64; 8] = crate::array::from_fn(|i| i as i64); + let a: int64x2x4_t = transmute(vals); + let mut tmp = [0_i64; 8]; + vst1q_s64_x4(tmp.as_mut_ptr().cast(), a); + let r: int64x2x4_t = vld1q_s64_x4(tmp.as_ptr().cast()); + let out: [i64; 8] = transmute(r); + assert_eq!(out, vals); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1_u8_x2() { + let vals: [u8; 16] = crate::array::from_fn(|i| i as u8); + let a: uint8x8x2_t = transmute(vals); + let mut tmp = [0_u8; 16]; + vst1_u8_x2(tmp.as_mut_ptr().cast(), a); + let r: uint8x8x2_t = vld1_u8_x2(tmp.as_ptr().cast()); + let out: [u8; 16] = transmute(r); + assert_eq!(out, vals); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1_u8_x3() { + let vals: [u8; 24] = crate::array::from_fn(|i| i as u8); + let a: uint8x8x3_t = transmute(vals); + let mut tmp = [0_u8; 24]; + vst1_u8_x3(tmp.as_mut_ptr().cast(), a); + let r: uint8x8x3_t = vld1_u8_x3(tmp.as_ptr().cast()); + let out: [u8; 24] = transmute(r); + assert_eq!(out, vals); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1_u8_x4() { + let vals: [u8; 32] = crate::array::from_fn(|i| i as u8); + let a: uint8x8x4_t = transmute(vals); + let mut tmp = [0_u8; 32]; + vst1_u8_x4(tmp.as_mut_ptr().cast(), a); + let r: uint8x8x4_t = vld1_u8_x4(tmp.as_ptr().cast()); + let out: [u8; 32] = transmute(r); + assert_eq!(out, vals); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1q_u8_x2() { + let vals: [u8; 32] = crate::array::from_fn(|i| i as u8); + let a: uint8x16x2_t = transmute(vals); + let mut tmp = [0_u8; 32]; + vst1q_u8_x2(tmp.as_mut_ptr().cast(), a); + let r: uint8x16x2_t = vld1q_u8_x2(tmp.as_ptr().cast()); + let out: [u8; 32] = transmute(r); + assert_eq!(out, vals); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1q_u8_x3() { + let vals: [u8; 48] = crate::array::from_fn(|i| i as u8); + let a: uint8x16x3_t = transmute(vals); + let mut tmp = [0_u8; 48]; + vst1q_u8_x3(tmp.as_mut_ptr().cast(), a); + let r: uint8x16x3_t = vld1q_u8_x3(tmp.as_ptr().cast()); + let out: [u8; 48] = transmute(r); + assert_eq!(out, vals); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1q_u8_x4() { + let vals: [u8; 64] = crate::array::from_fn(|i| i as u8); + let a: uint8x16x4_t = transmute(vals); + let mut tmp = [0_u8; 64]; + vst1q_u8_x4(tmp.as_mut_ptr().cast(), a); + let r: uint8x16x4_t = vld1q_u8_x4(tmp.as_ptr().cast()); + let out: [u8; 64] = transmute(r); + assert_eq!(out, vals); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1_u16_x2() { + let vals: [u16; 8] = crate::array::from_fn(|i| i as u16); + let a: uint16x4x2_t = transmute(vals); + let mut tmp = [0_u16; 8]; + vst1_u16_x2(tmp.as_mut_ptr().cast(), a); + let r: uint16x4x2_t = vld1_u16_x2(tmp.as_ptr().cast()); + let out: [u16; 8] = transmute(r); + assert_eq!(out, vals); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1_u16_x3() { + let vals: [u16; 12] = crate::array::from_fn(|i| i as u16); + let a: uint16x4x3_t = transmute(vals); + let mut tmp = [0_u16; 12]; + vst1_u16_x3(tmp.as_mut_ptr().cast(), a); + let r: uint16x4x3_t = vld1_u16_x3(tmp.as_ptr().cast()); + let out: [u16; 12] = transmute(r); + assert_eq!(out, vals); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1_u16_x4() { + let vals: [u16; 16] = crate::array::from_fn(|i| i as u16); + let a: uint16x4x4_t = transmute(vals); + let mut tmp = [0_u16; 16]; + vst1_u16_x4(tmp.as_mut_ptr().cast(), a); + let r: uint16x4x4_t = vld1_u16_x4(tmp.as_ptr().cast()); + let out: [u16; 16] = transmute(r); + assert_eq!(out, vals); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1q_u16_x2() { + let vals: [u16; 16] = crate::array::from_fn(|i| i as u16); + let a: uint16x8x2_t = transmute(vals); + let mut tmp = [0_u16; 16]; + vst1q_u16_x2(tmp.as_mut_ptr().cast(), a); + let r: uint16x8x2_t = vld1q_u16_x2(tmp.as_ptr().cast()); + let out: [u16; 16] = transmute(r); + assert_eq!(out, vals); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1q_u16_x3() { + let vals: [u16; 24] = crate::array::from_fn(|i| i as u16); + let a: uint16x8x3_t = transmute(vals); + let mut tmp = [0_u16; 24]; + vst1q_u16_x3(tmp.as_mut_ptr().cast(), a); + let r: uint16x8x3_t = vld1q_u16_x3(tmp.as_ptr().cast()); + let out: [u16; 24] = transmute(r); + assert_eq!(out, vals); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1q_u16_x4() { + let vals: [u16; 32] = crate::array::from_fn(|i| i as u16); + let a: uint16x8x4_t = transmute(vals); + let mut tmp = [0_u16; 32]; + vst1q_u16_x4(tmp.as_mut_ptr().cast(), a); + let r: uint16x8x4_t = vld1q_u16_x4(tmp.as_ptr().cast()); + let out: [u16; 32] = transmute(r); + assert_eq!(out, vals); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1_u32_x2() { + let vals: [u32; 4] = crate::array::from_fn(|i| i as u32); + let a: uint32x2x2_t = transmute(vals); + let mut tmp = [0_u32; 4]; + vst1_u32_x2(tmp.as_mut_ptr().cast(), a); + let r: uint32x2x2_t = vld1_u32_x2(tmp.as_ptr().cast()); + let out: [u32; 4] = transmute(r); + assert_eq!(out, vals); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1_u32_x3() { + let vals: [u32; 6] = crate::array::from_fn(|i| i as u32); + let a: uint32x2x3_t = transmute(vals); + let mut tmp = [0_u32; 6]; + vst1_u32_x3(tmp.as_mut_ptr().cast(), a); + let r: uint32x2x3_t = vld1_u32_x3(tmp.as_ptr().cast()); + let out: [u32; 6] = transmute(r); + assert_eq!(out, vals); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1_u32_x4() { + let vals: [u32; 8] = crate::array::from_fn(|i| i as u32); + let a: uint32x2x4_t = transmute(vals); + let mut tmp = [0_u32; 8]; + vst1_u32_x4(tmp.as_mut_ptr().cast(), a); + let r: uint32x2x4_t = vld1_u32_x4(tmp.as_ptr().cast()); + let out: [u32; 8] = transmute(r); + assert_eq!(out, vals); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1q_u32_x2() { + let vals: [u32; 8] = crate::array::from_fn(|i| i as u32); + let a: uint32x4x2_t = transmute(vals); + let mut tmp = [0_u32; 8]; + vst1q_u32_x2(tmp.as_mut_ptr().cast(), a); + let r: uint32x4x2_t = vld1q_u32_x2(tmp.as_ptr().cast()); + let out: [u32; 8] = transmute(r); + assert_eq!(out, vals); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1q_u32_x3() { + let vals: [u32; 12] = crate::array::from_fn(|i| i as u32); + let a: uint32x4x3_t = transmute(vals); + let mut tmp = [0_u32; 12]; + vst1q_u32_x3(tmp.as_mut_ptr().cast(), a); + let r: uint32x4x3_t = vld1q_u32_x3(tmp.as_ptr().cast()); + let out: [u32; 12] = transmute(r); + assert_eq!(out, vals); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1q_u32_x4() { + let vals: [u32; 16] = crate::array::from_fn(|i| i as u32); + let a: uint32x4x4_t = transmute(vals); + let mut tmp = [0_u32; 16]; + vst1q_u32_x4(tmp.as_mut_ptr().cast(), a); + let r: uint32x4x4_t = vld1q_u32_x4(tmp.as_ptr().cast()); + let out: [u32; 16] = transmute(r); + assert_eq!(out, vals); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1_u64_x2() { + let vals: [u64; 2] = crate::array::from_fn(|i| i as u64); + let a: uint64x1x2_t = transmute(vals); + let mut tmp = [0_u64; 2]; + vst1_u64_x2(tmp.as_mut_ptr().cast(), a); + let r: uint64x1x2_t = vld1_u64_x2(tmp.as_ptr().cast()); + let out: [u64; 2] = transmute(r); + assert_eq!(out, vals); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1_u64_x3() { + let vals: [u64; 3] = crate::array::from_fn(|i| i as u64); + let a: uint64x1x3_t = transmute(vals); + let mut tmp = [0_u64; 3]; + vst1_u64_x3(tmp.as_mut_ptr().cast(), a); + let r: uint64x1x3_t = vld1_u64_x3(tmp.as_ptr().cast()); + let out: [u64; 3] = transmute(r); + assert_eq!(out, vals); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1_u64_x4() { + let vals: [u64; 4] = crate::array::from_fn(|i| i as u64); + let a: uint64x1x4_t = transmute(vals); + let mut tmp = [0_u64; 4]; + vst1_u64_x4(tmp.as_mut_ptr().cast(), a); + let r: uint64x1x4_t = vld1_u64_x4(tmp.as_ptr().cast()); + let out: [u64; 4] = transmute(r); + assert_eq!(out, vals); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1q_u64_x2() { + let vals: [u64; 4] = crate::array::from_fn(|i| i as u64); + let a: uint64x2x2_t = transmute(vals); + let mut tmp = [0_u64; 4]; + vst1q_u64_x2(tmp.as_mut_ptr().cast(), a); + let r: uint64x2x2_t = vld1q_u64_x2(tmp.as_ptr().cast()); + let out: [u64; 4] = transmute(r); + assert_eq!(out, vals); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1q_u64_x3() { + let vals: [u64; 6] = crate::array::from_fn(|i| i as u64); + let a: uint64x2x3_t = transmute(vals); + let mut tmp = [0_u64; 6]; + vst1q_u64_x3(tmp.as_mut_ptr().cast(), a); + let r: uint64x2x3_t = vld1q_u64_x3(tmp.as_ptr().cast()); + let out: [u64; 6] = transmute(r); + assert_eq!(out, vals); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1q_u64_x4() { + let vals: [u64; 8] = crate::array::from_fn(|i| i as u64); + let a: uint64x2x4_t = transmute(vals); + let mut tmp = [0_u64; 8]; + vst1q_u64_x4(tmp.as_mut_ptr().cast(), a); + let r: uint64x2x4_t = vld1q_u64_x4(tmp.as_ptr().cast()); + let out: [u64; 8] = transmute(r); + assert_eq!(out, vals); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1_p8_x2() { + let vals: [p8; 16] = crate::array::from_fn(|i| i as p8); + let a: poly8x8x2_t = transmute(vals); + let mut tmp = [0 as p8; 16]; + vst1_p8_x2(tmp.as_mut_ptr().cast(), a); + let r: poly8x8x2_t = vld1_p8_x2(tmp.as_ptr().cast()); + let out: [p8; 16] = transmute(r); + assert_eq!(out, vals); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1_p8_x3() { + let vals: [p8; 24] = crate::array::from_fn(|i| i as p8); + let a: poly8x8x3_t = transmute(vals); + let mut tmp = [0 as p8; 24]; + vst1_p8_x3(tmp.as_mut_ptr().cast(), a); + let r: poly8x8x3_t = vld1_p8_x3(tmp.as_ptr().cast()); + let out: [p8; 24] = transmute(r); + assert_eq!(out, vals); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1_p8_x4() { + let vals: [p8; 32] = crate::array::from_fn(|i| i as p8); + let a: poly8x8x4_t = transmute(vals); + let mut tmp = [0 as p8; 32]; + vst1_p8_x4(tmp.as_mut_ptr().cast(), a); + let r: poly8x8x4_t = vld1_p8_x4(tmp.as_ptr().cast()); + let out: [p8; 32] = transmute(r); + assert_eq!(out, vals); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1q_p8_x2() { + let vals: [p8; 32] = crate::array::from_fn(|i| i as p8); + let a: poly8x16x2_t = transmute(vals); + let mut tmp = [0 as p8; 32]; + vst1q_p8_x2(tmp.as_mut_ptr().cast(), a); + let r: poly8x16x2_t = vld1q_p8_x2(tmp.as_ptr().cast()); + let out: [p8; 32] = transmute(r); + assert_eq!(out, vals); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1q_p8_x3() { + let vals: [p8; 48] = crate::array::from_fn(|i| i as p8); + let a: poly8x16x3_t = transmute(vals); + let mut tmp = [0 as p8; 48]; + vst1q_p8_x3(tmp.as_mut_ptr().cast(), a); + let r: poly8x16x3_t = vld1q_p8_x3(tmp.as_ptr().cast()); + let out: [p8; 48] = transmute(r); + assert_eq!(out, vals); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1q_p8_x4() { + let vals: [p8; 64] = crate::array::from_fn(|i| i as p8); + let a: poly8x16x4_t = transmute(vals); + let mut tmp = [0 as p8; 64]; + vst1q_p8_x4(tmp.as_mut_ptr().cast(), a); + let r: poly8x16x4_t = vld1q_p8_x4(tmp.as_ptr().cast()); + let out: [p8; 64] = transmute(r); + assert_eq!(out, vals); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1_p16_x2() { + let vals: [p16; 8] = crate::array::from_fn(|i| i as p16); + let a: poly16x4x2_t = transmute(vals); + let mut tmp = [0 as p16; 8]; + vst1_p16_x2(tmp.as_mut_ptr().cast(), a); + let r: poly16x4x2_t = vld1_p16_x2(tmp.as_ptr().cast()); + let out: [p16; 8] = transmute(r); + assert_eq!(out, vals); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1_p16_x3() { + let vals: [p16; 12] = crate::array::from_fn(|i| i as p16); + let a: poly16x4x3_t = transmute(vals); + let mut tmp = [0 as p16; 12]; + vst1_p16_x3(tmp.as_mut_ptr().cast(), a); + let r: poly16x4x3_t = vld1_p16_x3(tmp.as_ptr().cast()); + let out: [p16; 12] = transmute(r); + assert_eq!(out, vals); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1_p16_x4() { + let vals: [p16; 16] = crate::array::from_fn(|i| i as p16); + let a: poly16x4x4_t = transmute(vals); + let mut tmp = [0 as p16; 16]; + vst1_p16_x4(tmp.as_mut_ptr().cast(), a); + let r: poly16x4x4_t = vld1_p16_x4(tmp.as_ptr().cast()); + let out: [p16; 16] = transmute(r); + assert_eq!(out, vals); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1q_p16_x2() { + let vals: [p16; 16] = crate::array::from_fn(|i| i as p16); + let a: poly16x8x2_t = transmute(vals); + let mut tmp = [0 as p16; 16]; + vst1q_p16_x2(tmp.as_mut_ptr().cast(), a); + let r: poly16x8x2_t = vld1q_p16_x2(tmp.as_ptr().cast()); + let out: [p16; 16] = transmute(r); + assert_eq!(out, vals); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1q_p16_x3() { + let vals: [p16; 24] = crate::array::from_fn(|i| i as p16); + let a: poly16x8x3_t = transmute(vals); + let mut tmp = [0 as p16; 24]; + vst1q_p16_x3(tmp.as_mut_ptr().cast(), a); + let r: poly16x8x3_t = vld1q_p16_x3(tmp.as_ptr().cast()); + let out: [p16; 24] = transmute(r); + assert_eq!(out, vals); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1q_p16_x4() { + let vals: [p16; 32] = crate::array::from_fn(|i| i as p16); + let a: poly16x8x4_t = transmute(vals); + let mut tmp = [0 as p16; 32]; + vst1q_p16_x4(tmp.as_mut_ptr().cast(), a); + let r: poly16x8x4_t = vld1q_p16_x4(tmp.as_ptr().cast()); + let out: [p16; 32] = transmute(r); + assert_eq!(out, vals); + } } #[cfg(test)] From e3feb19a9edf7530e550d064f5485c99d5abeca9 Mon Sep 17 00:00:00 2001 From: Folkert de Vries Date: Fri, 30 Jan 2026 21:35:56 +0100 Subject: [PATCH 2/4] maybe fix aarch64be unsigned vector tuple loads --- .../src/arm_shared/neon/generated.rs | 1352 ++--------------- .../spec/neon/arm_shared.spec.yml | 2 + 2 files changed, 102 insertions(+), 1252 deletions(-) diff --git a/crates/core_arch/src/arm_shared/neon/generated.rs b/crates/core_arch/src/arm_shared/neon/generated.rs index 3b67208182..2f52e3b52b 100644 --- a/crates/core_arch/src/arm_shared/neon/generated.rs +++ b/crates/core_arch/src/arm_shared/neon/generated.rs @@ -17067,7 +17067,6 @@ pub unsafe fn vld1_p64_x4(a: *const p64) -> poly64x1x4_t { #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline(always)] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon,aes")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -17087,38 +17086,10 @@ pub unsafe fn vld1q_p64_x2(a: *const p64) -> poly64x2x2_t { transmute(vld1q_s64_x2(transmute(a))) } #[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p64_x2)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline(always)] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld1) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vld1q_p64_x2(a: *const p64) -> poly64x2x2_t { - let mut ret_val: poly64x2x2_t = transmute(vld1q_s64_x2(transmute(a))); - ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [1, 0]) }; - ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [1, 0]) }; - ret_val -} -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p64_x3)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline(always)] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon,aes")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -17138,39 +17109,10 @@ pub unsafe fn vld1q_p64_x3(a: *const p64) -> poly64x2x3_t { transmute(vld1q_s64_x3(transmute(a))) } #[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p64_x3)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline(always)] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld1) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vld1q_p64_x3(a: *const p64) -> poly64x2x3_t { - let mut ret_val: poly64x2x3_t = transmute(vld1q_s64_x3(transmute(a))); - ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [1, 0]) }; - ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [1, 0]) }; - ret_val.2 = unsafe { simd_shuffle!(ret_val.2, ret_val.2, [1, 0]) }; - ret_val -} -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p64_x4)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline(always)] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon,aes")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -17189,35 +17131,6 @@ pub unsafe fn vld1q_p64_x3(a: *const p64) -> poly64x2x3_t { pub unsafe fn vld1q_p64_x4(a: *const p64) -> poly64x2x4_t { transmute(vld1q_s64_x4(transmute(a))) } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p64_x4)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline(always)] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld1) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vld1q_p64_x4(a: *const p64) -> poly64x2x4_t { - let mut ret_val: poly64x2x4_t = transmute(vld1q_s64_x4(transmute(a))); - ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [1, 0]) }; - ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [1, 0]) }; - ret_val.2 = unsafe { simd_shuffle!(ret_val.2, ret_val.2, [1, 0]) }; - ret_val.3 = unsafe { simd_shuffle!(ret_val.3, ret_val.3, [1, 0]) }; - ret_val -} #[doc = "Load multiple single-element structures to one, two, three, or four registers."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s8)"] #[doc = "## Safety"] @@ -18071,7 +17984,6 @@ pub unsafe fn vld1q_s64_x4(a: *const i64) -> int64x2x4_t { #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline(always)] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] @@ -18091,11 +18003,10 @@ pub unsafe fn vld1_u8_x2(a: *const u8) -> uint8x8x2_t { transmute(vld1_s8_x2(transmute(a))) } #[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u8_x2)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u8_x3)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline(always)] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] @@ -18111,18 +18022,14 @@ pub unsafe fn vld1_u8_x2(a: *const u8) -> uint8x8x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1_u8_x2(a: *const u8) -> uint8x8x2_t { - let mut ret_val: uint8x8x2_t = transmute(vld1_s8_x2(transmute(a))); - ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]) }; - ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]) }; - ret_val +pub unsafe fn vld1_u8_x3(a: *const u8) -> uint8x8x3_t { + transmute(vld1_s8_x3(transmute(a))) } #[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u8_x3)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u8_x4)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline(always)] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] @@ -18138,15 +18045,14 @@ pub unsafe fn vld1_u8_x2(a: *const u8) -> uint8x8x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1_u8_x3(a: *const u8) -> uint8x8x3_t { - transmute(vld1_s8_x3(transmute(a))) +pub unsafe fn vld1_u8_x4(a: *const u8) -> uint8x8x4_t { + transmute(vld1_s8_x4(transmute(a))) } #[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u8_x3)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u8_x2)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline(always)] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] @@ -18162,19 +18068,14 @@ pub unsafe fn vld1_u8_x3(a: *const u8) -> uint8x8x3_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1_u8_x3(a: *const u8) -> uint8x8x3_t { - let mut ret_val: uint8x8x3_t = transmute(vld1_s8_x3(transmute(a))); - ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]) }; - ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]) }; - ret_val.2 = unsafe { simd_shuffle!(ret_val.2, ret_val.2, [7, 6, 5, 4, 3, 2, 1, 0]) }; - ret_val +pub unsafe fn vld1q_u8_x2(a: *const u8) -> uint8x16x2_t { + transmute(vld1q_s8_x2(transmute(a))) } #[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u8_x4)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u8_x3)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline(always)] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] @@ -18190,15 +18091,14 @@ pub unsafe fn vld1_u8_x3(a: *const u8) -> uint8x8x3_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1_u8_x4(a: *const u8) -> uint8x8x4_t { - transmute(vld1_s8_x4(transmute(a))) +pub unsafe fn vld1q_u8_x3(a: *const u8) -> uint8x16x3_t { + transmute(vld1q_s8_x3(transmute(a))) } #[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u8_x4)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u8_x4)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline(always)] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] @@ -18214,20 +18114,14 @@ pub unsafe fn vld1_u8_x4(a: *const u8) -> uint8x8x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1_u8_x4(a: *const u8) -> uint8x8x4_t { - let mut ret_val: uint8x8x4_t = transmute(vld1_s8_x4(transmute(a))); - ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]) }; - ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]) }; - ret_val.2 = unsafe { simd_shuffle!(ret_val.2, ret_val.2, [7, 6, 5, 4, 3, 2, 1, 0]) }; - ret_val.3 = unsafe { simd_shuffle!(ret_val.3, ret_val.3, [7, 6, 5, 4, 3, 2, 1, 0]) }; - ret_val +pub unsafe fn vld1q_u8_x4(a: *const u8) -> uint8x16x4_t { + transmute(vld1q_s8_x4(transmute(a))) } #[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u8_x2)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u16_x2)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline(always)] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] @@ -18243,15 +18137,14 @@ pub unsafe fn vld1_u8_x4(a: *const u8) -> uint8x8x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1q_u8_x2(a: *const u8) -> uint8x16x2_t { - transmute(vld1q_s8_x2(transmute(a))) +pub unsafe fn vld1_u16_x2(a: *const u16) -> uint16x4x2_t { + transmute(vld1_s16_x2(transmute(a))) } #[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u8_x2)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u16_x3)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline(always)] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] @@ -18267,30 +18160,14 @@ pub unsafe fn vld1q_u8_x2(a: *const u8) -> uint8x16x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1q_u8_x2(a: *const u8) -> uint8x16x2_t { - let mut ret_val: uint8x16x2_t = transmute(vld1q_s8_x2(transmute(a))); - ret_val.0 = unsafe { - simd_shuffle!( - ret_val.0, - ret_val.0, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ) - }; - ret_val.1 = unsafe { - simd_shuffle!( - ret_val.1, - ret_val.1, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ) - }; - ret_val +pub unsafe fn vld1_u16_x3(a: *const u16) -> uint16x4x3_t { + transmute(vld1_s16_x3(transmute(a))) } #[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u8_x3)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u16_x4)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline(always)] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] @@ -18306,15 +18183,14 @@ pub unsafe fn vld1q_u8_x2(a: *const u8) -> uint8x16x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1q_u8_x3(a: *const u8) -> uint8x16x3_t { - transmute(vld1q_s8_x3(transmute(a))) +pub unsafe fn vld1_u16_x4(a: *const u16) -> uint16x4x4_t { + transmute(vld1_s16_x4(transmute(a))) } #[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u8_x3)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u16_x2)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline(always)] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] @@ -18330,37 +18206,14 @@ pub unsafe fn vld1q_u8_x3(a: *const u8) -> uint8x16x3_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1q_u8_x3(a: *const u8) -> uint8x16x3_t { - let mut ret_val: uint8x16x3_t = transmute(vld1q_s8_x3(transmute(a))); - ret_val.0 = unsafe { - simd_shuffle!( - ret_val.0, - ret_val.0, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ) - }; - ret_val.1 = unsafe { - simd_shuffle!( - ret_val.1, - ret_val.1, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ) - }; - ret_val.2 = unsafe { - simd_shuffle!( - ret_val.2, - ret_val.2, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ) - }; - ret_val +pub unsafe fn vld1q_u16_x2(a: *const u16) -> uint16x8x2_t { + transmute(vld1q_s16_x2(transmute(a))) } #[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u8_x4)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u16_x3)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline(always)] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] @@ -18376,15 +18229,14 @@ pub unsafe fn vld1q_u8_x3(a: *const u8) -> uint8x16x3_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1q_u8_x4(a: *const u8) -> uint8x16x4_t { - transmute(vld1q_s8_x4(transmute(a))) +pub unsafe fn vld1q_u16_x3(a: *const u16) -> uint16x8x3_t { + transmute(vld1q_s16_x3(transmute(a))) } #[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u8_x4)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u16_x4)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline(always)] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] @@ -18400,44 +18252,14 @@ pub unsafe fn vld1q_u8_x4(a: *const u8) -> uint8x16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1q_u8_x4(a: *const u8) -> uint8x16x4_t { - let mut ret_val: uint8x16x4_t = transmute(vld1q_s8_x4(transmute(a))); - ret_val.0 = unsafe { - simd_shuffle!( - ret_val.0, - ret_val.0, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ) - }; - ret_val.1 = unsafe { - simd_shuffle!( - ret_val.1, - ret_val.1, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ) - }; - ret_val.2 = unsafe { - simd_shuffle!( - ret_val.2, - ret_val.2, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ) - }; - ret_val.3 = unsafe { - simd_shuffle!( - ret_val.3, - ret_val.3, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ) - }; - ret_val +pub unsafe fn vld1q_u16_x4(a: *const u16) -> uint16x8x4_t { + transmute(vld1q_s16_x4(transmute(a))) } #[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u16_x2)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u32_x2)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline(always)] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] @@ -18453,15 +18275,14 @@ pub unsafe fn vld1q_u8_x4(a: *const u8) -> uint8x16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1_u16_x2(a: *const u16) -> uint16x4x2_t { - transmute(vld1_s16_x2(transmute(a))) +pub unsafe fn vld1_u32_x2(a: *const u32) -> uint32x2x2_t { + transmute(vld1_s32_x2(transmute(a))) } #[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u16_x2)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u32_x3)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline(always)] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] @@ -18477,18 +18298,14 @@ pub unsafe fn vld1_u16_x2(a: *const u16) -> uint16x4x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1_u16_x2(a: *const u16) -> uint16x4x2_t { - let mut ret_val: uint16x4x2_t = transmute(vld1_s16_x2(transmute(a))); - ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]) }; - ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]) }; - ret_val +pub unsafe fn vld1_u32_x3(a: *const u32) -> uint32x2x3_t { + transmute(vld1_s32_x3(transmute(a))) } #[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u16_x3)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u32_x4)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline(always)] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] @@ -18504,840 +18321,14 @@ pub unsafe fn vld1_u16_x2(a: *const u16) -> uint16x4x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1_u16_x3(a: *const u16) -> uint16x4x3_t { - transmute(vld1_s16_x3(transmute(a))) -} -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u16_x3)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline(always)] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld1) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vld1_u16_x3(a: *const u16) -> uint16x4x3_t { - let mut ret_val: uint16x4x3_t = transmute(vld1_s16_x3(transmute(a))); - ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]) }; - ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]) }; - ret_val.2 = unsafe { simd_shuffle!(ret_val.2, ret_val.2, [3, 2, 1, 0]) }; - ret_val -} -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u16_x4)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline(always)] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld1) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vld1_u16_x4(a: *const u16) -> uint16x4x4_t { - transmute(vld1_s16_x4(transmute(a))) -} -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u16_x4)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline(always)] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld1) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vld1_u16_x4(a: *const u16) -> uint16x4x4_t { - let mut ret_val: uint16x4x4_t = transmute(vld1_s16_x4(transmute(a))); - ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]) }; - ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]) }; - ret_val.2 = unsafe { simd_shuffle!(ret_val.2, ret_val.2, [3, 2, 1, 0]) }; - ret_val.3 = unsafe { simd_shuffle!(ret_val.3, ret_val.3, [3, 2, 1, 0]) }; - ret_val -} -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u16_x2)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline(always)] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld1) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vld1q_u16_x2(a: *const u16) -> uint16x8x2_t { - transmute(vld1q_s16_x2(transmute(a))) -} -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u16_x2)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline(always)] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld1) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vld1q_u16_x2(a: *const u16) -> uint16x8x2_t { - let mut ret_val: uint16x8x2_t = transmute(vld1q_s16_x2(transmute(a))); - ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]) }; - ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]) }; - ret_val -} -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u16_x3)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline(always)] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld1) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vld1q_u16_x3(a: *const u16) -> uint16x8x3_t { - transmute(vld1q_s16_x3(transmute(a))) -} -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u16_x3)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline(always)] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld1) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vld1q_u16_x3(a: *const u16) -> uint16x8x3_t { - let mut ret_val: uint16x8x3_t = transmute(vld1q_s16_x3(transmute(a))); - ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]) }; - ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]) }; - ret_val.2 = unsafe { simd_shuffle!(ret_val.2, ret_val.2, [7, 6, 5, 4, 3, 2, 1, 0]) }; - ret_val -} -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u16_x4)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline(always)] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld1) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vld1q_u16_x4(a: *const u16) -> uint16x8x4_t { - transmute(vld1q_s16_x4(transmute(a))) -} -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u16_x4)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline(always)] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld1) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vld1q_u16_x4(a: *const u16) -> uint16x8x4_t { - let mut ret_val: uint16x8x4_t = transmute(vld1q_s16_x4(transmute(a))); - ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]) }; - ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]) }; - ret_val.2 = unsafe { simd_shuffle!(ret_val.2, ret_val.2, [7, 6, 5, 4, 3, 2, 1, 0]) }; - ret_val.3 = unsafe { simd_shuffle!(ret_val.3, ret_val.3, [7, 6, 5, 4, 3, 2, 1, 0]) }; - ret_val -} -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u32_x2)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline(always)] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld1) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vld1_u32_x2(a: *const u32) -> uint32x2x2_t { - transmute(vld1_s32_x2(transmute(a))) -} -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u32_x2)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline(always)] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld1) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vld1_u32_x2(a: *const u32) -> uint32x2x2_t { - let mut ret_val: uint32x2x2_t = transmute(vld1_s32_x2(transmute(a))); - ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [1, 0]) }; - ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [1, 0]) }; - ret_val -} -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u32_x3)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline(always)] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld1) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vld1_u32_x3(a: *const u32) -> uint32x2x3_t { - transmute(vld1_s32_x3(transmute(a))) -} -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u32_x3)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline(always)] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld1) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vld1_u32_x3(a: *const u32) -> uint32x2x3_t { - let mut ret_val: uint32x2x3_t = transmute(vld1_s32_x3(transmute(a))); - ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [1, 0]) }; - ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [1, 0]) }; - ret_val.2 = unsafe { simd_shuffle!(ret_val.2, ret_val.2, [1, 0]) }; - ret_val -} -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u32_x4)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline(always)] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld1) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vld1_u32_x4(a: *const u32) -> uint32x2x4_t { - transmute(vld1_s32_x4(transmute(a))) -} -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u32_x4)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline(always)] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld1) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vld1_u32_x4(a: *const u32) -> uint32x2x4_t { - let mut ret_val: uint32x2x4_t = transmute(vld1_s32_x4(transmute(a))); - ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [1, 0]) }; - ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [1, 0]) }; - ret_val.2 = unsafe { simd_shuffle!(ret_val.2, ret_val.2, [1, 0]) }; - ret_val.3 = unsafe { simd_shuffle!(ret_val.3, ret_val.3, [1, 0]) }; - ret_val -} -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u32_x2)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline(always)] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld1) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vld1q_u32_x2(a: *const u32) -> uint32x4x2_t { - transmute(vld1q_s32_x2(transmute(a))) -} -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u32_x2)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline(always)] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld1) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vld1q_u32_x2(a: *const u32) -> uint32x4x2_t { - let mut ret_val: uint32x4x2_t = transmute(vld1q_s32_x2(transmute(a))); - ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]) }; - ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]) }; - ret_val -} -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u32_x3)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline(always)] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld1) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vld1q_u32_x3(a: *const u32) -> uint32x4x3_t { - transmute(vld1q_s32_x3(transmute(a))) -} -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u32_x3)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline(always)] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld1) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vld1q_u32_x3(a: *const u32) -> uint32x4x3_t { - let mut ret_val: uint32x4x3_t = transmute(vld1q_s32_x3(transmute(a))); - ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]) }; - ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]) }; - ret_val.2 = unsafe { simd_shuffle!(ret_val.2, ret_val.2, [3, 2, 1, 0]) }; - ret_val -} -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u32_x4)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline(always)] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld1) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vld1q_u32_x4(a: *const u32) -> uint32x4x4_t { - transmute(vld1q_s32_x4(transmute(a))) -} -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u32_x4)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline(always)] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld1) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vld1q_u32_x4(a: *const u32) -> uint32x4x4_t { - let mut ret_val: uint32x4x4_t = transmute(vld1q_s32_x4(transmute(a))); - ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]) }; - ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]) }; - ret_val.2 = unsafe { simd_shuffle!(ret_val.2, ret_val.2, [3, 2, 1, 0]) }; - ret_val.3 = unsafe { simd_shuffle!(ret_val.3, ret_val.3, [3, 2, 1, 0]) }; - ret_val -} -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u64_x2)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline(always)] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld1) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vld1_u64_x2(a: *const u64) -> uint64x1x2_t { - transmute(vld1_s64_x2(transmute(a))) -} -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u64_x3)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline(always)] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld1) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vld1_u64_x3(a: *const u64) -> uint64x1x3_t { - transmute(vld1_s64_x3(transmute(a))) -} -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u64_x4)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline(always)] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld1) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vld1_u64_x4(a: *const u64) -> uint64x1x4_t { - transmute(vld1_s64_x4(transmute(a))) -} -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u64_x2)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline(always)] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld1) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vld1q_u64_x2(a: *const u64) -> uint64x2x2_t { - transmute(vld1q_s64_x2(transmute(a))) -} -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u64_x2)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline(always)] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld1) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vld1q_u64_x2(a: *const u64) -> uint64x2x2_t { - let mut ret_val: uint64x2x2_t = transmute(vld1q_s64_x2(transmute(a))); - ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [1, 0]) }; - ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [1, 0]) }; - ret_val -} -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u64_x3)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline(always)] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld1) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vld1q_u64_x3(a: *const u64) -> uint64x2x3_t { - transmute(vld1q_s64_x3(transmute(a))) -} -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u64_x3)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline(always)] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld1) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vld1q_u64_x3(a: *const u64) -> uint64x2x3_t { - let mut ret_val: uint64x2x3_t = transmute(vld1q_s64_x3(transmute(a))); - ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [1, 0]) }; - ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [1, 0]) }; - ret_val.2 = unsafe { simd_shuffle!(ret_val.2, ret_val.2, [1, 0]) }; - ret_val -} -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u64_x4)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline(always)] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld1) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vld1q_u64_x4(a: *const u64) -> uint64x2x4_t { - transmute(vld1q_s64_x4(transmute(a))) -} -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u64_x4)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline(always)] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld1) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vld1q_u64_x4(a: *const u64) -> uint64x2x4_t { - let mut ret_val: uint64x2x4_t = transmute(vld1q_s64_x4(transmute(a))); - ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [1, 0]) }; - ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [1, 0]) }; - ret_val.2 = unsafe { simd_shuffle!(ret_val.2, ret_val.2, [1, 0]) }; - ret_val.3 = unsafe { simd_shuffle!(ret_val.3, ret_val.3, [1, 0]) }; - ret_val -} -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p8_x2)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline(always)] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld1) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vld1_p8_x2(a: *const p8) -> poly8x8x2_t { - transmute(vld1_s8_x2(transmute(a))) -} -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p8_x2)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline(always)] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld1) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vld1_p8_x2(a: *const p8) -> poly8x8x2_t { - let mut ret_val: poly8x8x2_t = transmute(vld1_s8_x2(transmute(a))); - ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]) }; - ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]) }; - ret_val +pub unsafe fn vld1_u32_x4(a: *const u32) -> uint32x2x4_t { + transmute(vld1_s32_x4(transmute(a))) } #[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p8_x3)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u32_x2)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline(always)] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] @@ -19353,15 +18344,14 @@ pub unsafe fn vld1_p8_x2(a: *const p8) -> poly8x8x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1_p8_x3(a: *const p8) -> poly8x8x3_t { - transmute(vld1_s8_x3(transmute(a))) +pub unsafe fn vld1q_u32_x2(a: *const u32) -> uint32x4x2_t { + transmute(vld1q_s32_x2(transmute(a))) } #[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p8_x3)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u32_x3)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline(always)] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] @@ -19377,19 +18367,14 @@ pub unsafe fn vld1_p8_x3(a: *const p8) -> poly8x8x3_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1_p8_x3(a: *const p8) -> poly8x8x3_t { - let mut ret_val: poly8x8x3_t = transmute(vld1_s8_x3(transmute(a))); - ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]) }; - ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]) }; - ret_val.2 = unsafe { simd_shuffle!(ret_val.2, ret_val.2, [7, 6, 5, 4, 3, 2, 1, 0]) }; - ret_val +pub unsafe fn vld1q_u32_x3(a: *const u32) -> uint32x4x3_t { + transmute(vld1q_s32_x3(transmute(a))) } #[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p8_x4)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u32_x4)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline(always)] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] @@ -19405,15 +18390,14 @@ pub unsafe fn vld1_p8_x3(a: *const p8) -> poly8x8x3_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1_p8_x4(a: *const p8) -> poly8x8x4_t { - transmute(vld1_s8_x4(transmute(a))) +pub unsafe fn vld1q_u32_x4(a: *const u32) -> uint32x4x4_t { + transmute(vld1q_s32_x4(transmute(a))) } #[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p8_x4)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u64_x2)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline(always)] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] @@ -19429,20 +18413,14 @@ pub unsafe fn vld1_p8_x4(a: *const p8) -> poly8x8x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1_p8_x4(a: *const p8) -> poly8x8x4_t { - let mut ret_val: poly8x8x4_t = transmute(vld1_s8_x4(transmute(a))); - ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]) }; - ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]) }; - ret_val.2 = unsafe { simd_shuffle!(ret_val.2, ret_val.2, [7, 6, 5, 4, 3, 2, 1, 0]) }; - ret_val.3 = unsafe { simd_shuffle!(ret_val.3, ret_val.3, [7, 6, 5, 4, 3, 2, 1, 0]) }; - ret_val +pub unsafe fn vld1_u64_x2(a: *const u64) -> uint64x1x2_t { + transmute(vld1_s64_x2(transmute(a))) } #[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p8_x2)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u64_x3)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline(always)] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] @@ -19458,15 +18436,14 @@ pub unsafe fn vld1_p8_x4(a: *const p8) -> poly8x8x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1q_p8_x2(a: *const p8) -> poly8x16x2_t { - transmute(vld1q_s8_x2(transmute(a))) +pub unsafe fn vld1_u64_x3(a: *const u64) -> uint64x1x3_t { + transmute(vld1_s64_x3(transmute(a))) } #[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p8_x2)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u64_x4)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline(always)] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] @@ -19482,30 +18459,14 @@ pub unsafe fn vld1q_p8_x2(a: *const p8) -> poly8x16x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1q_p8_x2(a: *const p8) -> poly8x16x2_t { - let mut ret_val: poly8x16x2_t = transmute(vld1q_s8_x2(transmute(a))); - ret_val.0 = unsafe { - simd_shuffle!( - ret_val.0, - ret_val.0, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ) - }; - ret_val.1 = unsafe { - simd_shuffle!( - ret_val.1, - ret_val.1, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ) - }; - ret_val +pub unsafe fn vld1_u64_x4(a: *const u64) -> uint64x1x4_t { + transmute(vld1_s64_x4(transmute(a))) } #[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p8_x3)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u64_x2)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline(always)] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] @@ -19521,15 +18482,14 @@ pub unsafe fn vld1q_p8_x2(a: *const p8) -> poly8x16x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1q_p8_x3(a: *const p8) -> poly8x16x3_t { - transmute(vld1q_s8_x3(transmute(a))) +pub unsafe fn vld1q_u64_x2(a: *const u64) -> uint64x2x2_t { + transmute(vld1q_s64_x2(transmute(a))) } #[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p8_x3)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u64_x3)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline(always)] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] @@ -19545,37 +18505,14 @@ pub unsafe fn vld1q_p8_x3(a: *const p8) -> poly8x16x3_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1q_p8_x3(a: *const p8) -> poly8x16x3_t { - let mut ret_val: poly8x16x3_t = transmute(vld1q_s8_x3(transmute(a))); - ret_val.0 = unsafe { - simd_shuffle!( - ret_val.0, - ret_val.0, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ) - }; - ret_val.1 = unsafe { - simd_shuffle!( - ret_val.1, - ret_val.1, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ) - }; - ret_val.2 = unsafe { - simd_shuffle!( - ret_val.2, - ret_val.2, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ) - }; - ret_val +pub unsafe fn vld1q_u64_x3(a: *const u64) -> uint64x2x3_t { + transmute(vld1q_s64_x3(transmute(a))) } #[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p8_x4)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u64_x4)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline(always)] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] @@ -19591,15 +18528,14 @@ pub unsafe fn vld1q_p8_x3(a: *const p8) -> poly8x16x3_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1q_p8_x4(a: *const p8) -> poly8x16x4_t { - transmute(vld1q_s8_x4(transmute(a))) +pub unsafe fn vld1q_u64_x4(a: *const u64) -> uint64x2x4_t { + transmute(vld1q_s64_x4(transmute(a))) } #[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p8_x4)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p8_x2)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline(always)] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] @@ -19615,44 +18551,14 @@ pub unsafe fn vld1q_p8_x4(a: *const p8) -> poly8x16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1q_p8_x4(a: *const p8) -> poly8x16x4_t { - let mut ret_val: poly8x16x4_t = transmute(vld1q_s8_x4(transmute(a))); - ret_val.0 = unsafe { - simd_shuffle!( - ret_val.0, - ret_val.0, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ) - }; - ret_val.1 = unsafe { - simd_shuffle!( - ret_val.1, - ret_val.1, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ) - }; - ret_val.2 = unsafe { - simd_shuffle!( - ret_val.2, - ret_val.2, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ) - }; - ret_val.3 = unsafe { - simd_shuffle!( - ret_val.3, - ret_val.3, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ) - }; - ret_val +pub unsafe fn vld1_p8_x2(a: *const p8) -> poly8x8x2_t { + transmute(vld1_s8_x2(transmute(a))) } #[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p16_x2)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p8_x3)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline(always)] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] @@ -19668,15 +18574,14 @@ pub unsafe fn vld1q_p8_x4(a: *const p8) -> poly8x16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1_p16_x2(a: *const p16) -> poly16x4x2_t { - transmute(vld1_s16_x2(transmute(a))) +pub unsafe fn vld1_p8_x3(a: *const p8) -> poly8x8x3_t { + transmute(vld1_s8_x3(transmute(a))) } #[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p16_x2)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p8_x4)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline(always)] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] @@ -19692,18 +18597,14 @@ pub unsafe fn vld1_p16_x2(a: *const p16) -> poly16x4x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1_p16_x2(a: *const p16) -> poly16x4x2_t { - let mut ret_val: poly16x4x2_t = transmute(vld1_s16_x2(transmute(a))); - ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]) }; - ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]) }; - ret_val +pub unsafe fn vld1_p8_x4(a: *const p8) -> poly8x8x4_t { + transmute(vld1_s8_x4(transmute(a))) } #[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p16_x3)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p8_x2)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline(always)] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] @@ -19719,15 +18620,14 @@ pub unsafe fn vld1_p16_x2(a: *const p16) -> poly16x4x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1_p16_x3(a: *const p16) -> poly16x4x3_t { - transmute(vld1_s16_x3(transmute(a))) +pub unsafe fn vld1q_p8_x2(a: *const p8) -> poly8x16x2_t { + transmute(vld1q_s8_x2(transmute(a))) } #[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p16_x3)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p8_x3)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline(always)] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] @@ -19743,19 +18643,14 @@ pub unsafe fn vld1_p16_x3(a: *const p16) -> poly16x4x3_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1_p16_x3(a: *const p16) -> poly16x4x3_t { - let mut ret_val: poly16x4x3_t = transmute(vld1_s16_x3(transmute(a))); - ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]) }; - ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]) }; - ret_val.2 = unsafe { simd_shuffle!(ret_val.2, ret_val.2, [3, 2, 1, 0]) }; - ret_val +pub unsafe fn vld1q_p8_x3(a: *const p8) -> poly8x16x3_t { + transmute(vld1q_s8_x3(transmute(a))) } #[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p16_x4)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p8_x4)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline(always)] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] @@ -19771,15 +18666,14 @@ pub unsafe fn vld1_p16_x3(a: *const p16) -> poly16x4x3_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1_p16_x4(a: *const p16) -> poly16x4x4_t { - transmute(vld1_s16_x4(transmute(a))) +pub unsafe fn vld1q_p8_x4(a: *const p8) -> poly8x16x4_t { + transmute(vld1q_s8_x4(transmute(a))) } #[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p16_x4)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p16_x2)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline(always)] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] @@ -19795,20 +18689,14 @@ pub unsafe fn vld1_p16_x4(a: *const p16) -> poly16x4x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1_p16_x4(a: *const p16) -> poly16x4x4_t { - let mut ret_val: poly16x4x4_t = transmute(vld1_s16_x4(transmute(a))); - ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]) }; - ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]) }; - ret_val.2 = unsafe { simd_shuffle!(ret_val.2, ret_val.2, [3, 2, 1, 0]) }; - ret_val.3 = unsafe { simd_shuffle!(ret_val.3, ret_val.3, [3, 2, 1, 0]) }; - ret_val +pub unsafe fn vld1_p16_x2(a: *const p16) -> poly16x4x2_t { + transmute(vld1_s16_x2(transmute(a))) } #[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p16_x2)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p16_x3)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline(always)] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] @@ -19824,15 +18712,14 @@ pub unsafe fn vld1_p16_x4(a: *const p16) -> poly16x4x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1q_p16_x2(a: *const p16) -> poly16x8x2_t { - transmute(vld1q_s16_x2(transmute(a))) +pub unsafe fn vld1_p16_x3(a: *const p16) -> poly16x4x3_t { + transmute(vld1_s16_x3(transmute(a))) } #[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p16_x2)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p16_x4)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline(always)] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] @@ -19848,18 +18735,14 @@ pub unsafe fn vld1q_p16_x2(a: *const p16) -> poly16x8x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1q_p16_x2(a: *const p16) -> poly16x8x2_t { - let mut ret_val: poly16x8x2_t = transmute(vld1q_s16_x2(transmute(a))); - ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]) }; - ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]) }; - ret_val +pub unsafe fn vld1_p16_x4(a: *const p16) -> poly16x4x4_t { + transmute(vld1_s16_x4(transmute(a))) } #[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p16_x3)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p16_x2)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline(always)] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] @@ -19875,15 +18758,14 @@ pub unsafe fn vld1q_p16_x2(a: *const p16) -> poly16x8x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1q_p16_x3(a: *const p16) -> poly16x8x3_t { - transmute(vld1q_s16_x3(transmute(a))) +pub unsafe fn vld1q_p16_x2(a: *const p16) -> poly16x8x2_t { + transmute(vld1q_s16_x2(transmute(a))) } #[doc = "Load multiple single-element structures to one, two, three, or four registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p16_x3)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline(always)] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] @@ -19900,18 +18782,13 @@ pub unsafe fn vld1q_p16_x3(a: *const p16) -> poly16x8x3_t { unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] pub unsafe fn vld1q_p16_x3(a: *const p16) -> poly16x8x3_t { - let mut ret_val: poly16x8x3_t = transmute(vld1q_s16_x3(transmute(a))); - ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]) }; - ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]) }; - ret_val.2 = unsafe { simd_shuffle!(ret_val.2, ret_val.2, [7, 6, 5, 4, 3, 2, 1, 0]) }; - ret_val + transmute(vld1q_s16_x3(transmute(a))) } #[doc = "Load multiple single-element structures to one, two, three, or four registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p16_x4)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline(always)] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] @@ -19930,35 +18807,6 @@ pub unsafe fn vld1q_p16_x3(a: *const p16) -> poly16x8x3_t { pub unsafe fn vld1q_p16_x4(a: *const p16) -> poly16x8x4_t { transmute(vld1q_s16_x4(transmute(a))) } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p16_x4)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline(always)] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld1) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vld1q_p16_x4(a: *const p16) -> poly16x8x4_t { - let mut ret_val: poly16x8x4_t = transmute(vld1q_s16_x4(transmute(a))); - ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]) }; - ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]) }; - ret_val.2 = unsafe { simd_shuffle!(ret_val.2, ret_val.2, [7, 6, 5, 4, 3, 2, 1, 0]) }; - ret_val.3 = unsafe { simd_shuffle!(ret_val.3, ret_val.3, [7, 6, 5, 4, 3, 2, 1, 0]) }; - ret_val -} #[inline(always)] #[rustc_legacy_const_generics(1)] #[cfg(target_arch = "arm")] diff --git a/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml b/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml index 52748a4cc0..3ec7ba8814 100644 --- a/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml +++ b/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml @@ -2681,6 +2681,7 @@ intrinsics: - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ld1]]}]] - *neon-not-arm-stable - *neon-cfg-arm-unstable + big_endian_inverse: false safety: unsafe: [neon] types: @@ -2740,6 +2741,7 @@ intrinsics: - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ld1]]}]] - *neon-not-arm-stable - *neon-cfg-arm-unstable + big_endian_inverse: false safety: unsafe: [neon] types: From fe9aa9cc5a39291aff9c86c24bff1eba8a1b668b Mon Sep 17 00:00:00 2001 From: Folkert de Vries Date: Sun, 1 Feb 2026 13:50:20 +0100 Subject: [PATCH 3/4] use macro for wide store/load roundtrip tests --- crates/core_arch/src/aarch64/neon/mod.rs | 636 ++++------------------- 1 file changed, 90 insertions(+), 546 deletions(-) diff --git a/crates/core_arch/src/aarch64/neon/mod.rs b/crates/core_arch/src/aarch64/neon/mod.rs index feaf94a7f9..ee27bef973 100644 --- a/crates/core_arch/src/aarch64/neon/mod.rs +++ b/crates/core_arch/src/aarch64/neon/mod.rs @@ -994,868 +994,412 @@ mod tests { assert_eq!(vals[2], 2.); } + macro_rules! wide_store_load_roundtrip { + ($elem_ty:ty, $len:expr, $vec_ty:ty, $store:expr, $load:expr) => { + let vals: [$elem_ty; $len] = crate::array::from_fn(|i| i as $elem_ty); + let a: $vec_ty = transmute(vals); + let mut tmp = [0 as $elem_ty; $len]; + $store(tmp.as_mut_ptr().cast(), a); + let r: $vec_ty = $load(tmp.as_ptr().cast()); + let out: [$elem_ty; $len] = transmute(r); + assert_eq!(out, vals); + }; + } + #[simd_test(enable = "neon,fp16")] #[cfg(not(target_arch = "arm64ec"))] unsafe fn test_vld1_f16_x2() { - let vals: [f16; 8] = crate::array::from_fn(|i| i as f16); - let a: float16x4x2_t = transmute(vals); - let mut tmp = [0_f16; 8]; - vst1_f16_x2(tmp.as_mut_ptr().cast(), a); - let r: float16x4x2_t = vld1_f16_x2(tmp.as_ptr().cast()); - let out: [f16; 8] = transmute(r); - assert_eq!(out, vals); + wide_store_load_roundtrip!(f16, 8, float16x4x2_t, vst1_f16_x2, vld1_f16_x2); } #[simd_test(enable = "neon,fp16")] #[cfg(not(target_arch = "arm64ec"))] unsafe fn test_vld1_f16_x3() { - let vals: [f16; 12] = crate::array::from_fn(|i| i as f16); - let a: float16x4x3_t = transmute(vals); - let mut tmp = [0_f16; 12]; - vst1_f16_x3(tmp.as_mut_ptr().cast(), a); - let r: float16x4x3_t = vld1_f16_x3(tmp.as_ptr().cast()); - let out: [f16; 12] = transmute(r); - assert_eq!(out, vals); + wide_store_load_roundtrip!(f16, 12, float16x4x3_t, vst1_f16_x3, vld1_f16_x3); } #[simd_test(enable = "neon,fp16")] #[cfg(not(target_arch = "arm64ec"))] unsafe fn test_vld1_f16_x4() { - let vals: [f16; 16] = crate::array::from_fn(|i| i as f16); - let a: float16x4x4_t = transmute(vals); - let mut tmp = [0_f16; 16]; - vst1_f16_x4(tmp.as_mut_ptr().cast(), a); - let r: float16x4x4_t = vld1_f16_x4(tmp.as_ptr().cast()); - let out: [f16; 16] = transmute(r); - assert_eq!(out, vals); + wide_store_load_roundtrip!(f16, 16, float16x4x4_t, vst1_f16_x4, vld1_f16_x4); } #[simd_test(enable = "neon,fp16")] #[cfg(not(target_arch = "arm64ec"))] unsafe fn test_vld1q_f16_x2() { - let vals: [f16; 16] = crate::array::from_fn(|i| i as f16); - let a: float16x8x2_t = transmute(vals); - let mut tmp = [0_f16; 16]; - vst1q_f16_x2(tmp.as_mut_ptr().cast(), a); - let r: float16x8x2_t = vld1q_f16_x2(tmp.as_ptr().cast()); - let out: [f16; 16] = transmute(r); - assert_eq!(out, vals); + wide_store_load_roundtrip!(f16, 16, float16x8x2_t, vst1q_f16_x2, vld1q_f16_x2); } #[simd_test(enable = "neon,fp16")] #[cfg(not(target_arch = "arm64ec"))] unsafe fn test_vld1q_f16_x3() { - let vals: [f16; 24] = crate::array::from_fn(|i| i as f16); - let a: float16x8x3_t = transmute(vals); - let mut tmp = [0_f16; 24]; - vst1q_f16_x3(tmp.as_mut_ptr().cast(), a); - let r: float16x8x3_t = vld1q_f16_x3(tmp.as_ptr().cast()); - let out: [f16; 24] = transmute(r); - assert_eq!(out, vals); + wide_store_load_roundtrip!(f16, 24, float16x8x3_t, vst1q_f16_x3, vld1q_f16_x3); } #[simd_test(enable = "neon,fp16")] #[cfg(not(target_arch = "arm64ec"))] unsafe fn test_vld1q_f16_x4() { - let vals: [f16; 32] = crate::array::from_fn(|i| i as f16); - let a: float16x8x4_t = transmute(vals); - let mut tmp = [0_f16; 32]; - vst1q_f16_x4(tmp.as_mut_ptr().cast(), a); - let r: float16x8x4_t = vld1q_f16_x4(tmp.as_ptr().cast()); - let out: [f16; 32] = transmute(r); - assert_eq!(out, vals); + wide_store_load_roundtrip!(f16, 32, float16x8x4_t, vst1q_f16_x4, vld1q_f16_x4); } #[simd_test(enable = "neon")] unsafe fn test_vld1_f32_x2() { - let vals: [f32; 4] = crate::array::from_fn(|i| i as f32); - let a: float32x2x2_t = transmute(vals); - let mut tmp = [0_f32; 4]; - vst1_f32_x2(tmp.as_mut_ptr().cast(), a); - let r: float32x2x2_t = vld1_f32_x2(tmp.as_ptr().cast()); - let out: [f32; 4] = transmute(r); - assert_eq!(out, vals); + wide_store_load_roundtrip!(f32, 4, float32x2x2_t, vst1_f32_x2, vld1_f32_x2); } #[simd_test(enable = "neon")] unsafe fn test_vld1_f32_x3() { - let vals: [f32; 6] = crate::array::from_fn(|i| i as f32); - let a: float32x2x3_t = transmute(vals); - let mut tmp = [0_f32; 6]; - vst1_f32_x3(tmp.as_mut_ptr().cast(), a); - let r: float32x2x3_t = vld1_f32_x3(tmp.as_ptr().cast()); - let out: [f32; 6] = transmute(r); - assert_eq!(out, vals); + wide_store_load_roundtrip!(f32, 6, float32x2x3_t, vst1_f32_x3, vld1_f32_x3); } #[simd_test(enable = "neon")] unsafe fn test_vld1_f32_x4() { - let vals: [f32; 8] = crate::array::from_fn(|i| i as f32); - let a: float32x2x4_t = transmute(vals); - let mut tmp = [0_f32; 8]; - vst1_f32_x4(tmp.as_mut_ptr().cast(), a); - let r: float32x2x4_t = vld1_f32_x4(tmp.as_ptr().cast()); - let out: [f32; 8] = transmute(r); - assert_eq!(out, vals); + wide_store_load_roundtrip!(f32, 8, float32x2x4_t, vst1_f32_x4, vld1_f32_x4); } #[simd_test(enable = "neon")] unsafe fn test_vld1q_f32_x2() { - let vals: [f32; 8] = crate::array::from_fn(|i| i as f32); - let a: float32x4x2_t = transmute(vals); - let mut tmp = [0_f32; 8]; - vst1q_f32_x2(tmp.as_mut_ptr().cast(), a); - let r: float32x4x2_t = vld1q_f32_x2(tmp.as_ptr().cast()); - let out: [f32; 8] = transmute(r); - assert_eq!(out, vals); + wide_store_load_roundtrip!(f32, 8, float32x4x2_t, vst1q_f32_x2, vld1q_f32_x2); } #[simd_test(enable = "neon")] unsafe fn test_vld1q_f32_x3() { - let vals: [f32; 12] = crate::array::from_fn(|i| i as f32); - let a: float32x4x3_t = transmute(vals); - let mut tmp = [0_f32; 12]; - vst1q_f32_x3(tmp.as_mut_ptr().cast(), a); - let r: float32x4x3_t = vld1q_f32_x3(tmp.as_ptr().cast()); - let out: [f32; 12] = transmute(r); - assert_eq!(out, vals); + wide_store_load_roundtrip!(f32, 12, float32x4x3_t, vst1q_f32_x3, vld1q_f32_x3); } #[simd_test(enable = "neon")] unsafe fn test_vld1q_f32_x4() { - let vals: [f32; 16] = crate::array::from_fn(|i| i as f32); - let a: float32x4x4_t = transmute(vals); - let mut tmp = [0_f32; 16]; - vst1q_f32_x4(tmp.as_mut_ptr().cast(), a); - let r: float32x4x4_t = vld1q_f32_x4(tmp.as_ptr().cast()); - let out: [f32; 16] = transmute(r); - assert_eq!(out, vals); + wide_store_load_roundtrip!(f32, 16, float32x4x4_t, vst1q_f32_x4, vld1q_f32_x4); } #[simd_test(enable = "neon,aes")] unsafe fn test_vld1_p64_x2() { - let vals: [p64; 2] = crate::array::from_fn(|i| i as p64); - let a: poly64x1x2_t = transmute(vals); - let mut tmp = [0 as p64; 2]; - vst1_p64_x2(tmp.as_mut_ptr().cast(), a); - let r: poly64x1x2_t = vld1_p64_x2(tmp.as_ptr().cast()); - let out: [p64; 2] = transmute(r); - assert_eq!(out, vals); + wide_store_load_roundtrip!(p64, 2, poly64x1x2_t, vst1_p64_x2, vld1_p64_x2); } #[simd_test(enable = "neon,aes")] unsafe fn test_vld1_p64_x3() { - let vals: [p64; 3] = crate::array::from_fn(|i| i as p64); - let a: poly64x1x3_t = transmute(vals); - let mut tmp = [0 as p64; 3]; - vst1_p64_x3(tmp.as_mut_ptr().cast(), a); - let r: poly64x1x3_t = vld1_p64_x3(tmp.as_ptr().cast()); - let out: [p64; 3] = transmute(r); - assert_eq!(out, vals); + wide_store_load_roundtrip!(p64, 3, poly64x1x3_t, vst1_p64_x3, vld1_p64_x3); } #[simd_test(enable = "neon,aes")] unsafe fn test_vld1_p64_x4() { - let vals: [p64; 4] = crate::array::from_fn(|i| i as p64); - let a: poly64x1x4_t = transmute(vals); - let mut tmp = [0 as p64; 4]; - vst1_p64_x4(tmp.as_mut_ptr().cast(), a); - let r: poly64x1x4_t = vld1_p64_x4(tmp.as_ptr().cast()); - let out: [p64; 4] = transmute(r); - assert_eq!(out, vals); + wide_store_load_roundtrip!(p64, 4, poly64x1x4_t, vst1_p64_x4, vld1_p64_x4); } #[simd_test(enable = "neon,aes")] unsafe fn test_vld1q_p64_x2() { - let vals: [p64; 4] = crate::array::from_fn(|i| i as p64); - let a: poly64x2x2_t = transmute(vals); - let mut tmp = [0 as p64; 4]; - vst1q_p64_x2(tmp.as_mut_ptr().cast(), a); - let r: poly64x2x2_t = vld1q_p64_x2(tmp.as_ptr().cast()); - let out: [p64; 4] = transmute(r); - assert_eq!(out, vals); + wide_store_load_roundtrip!(p64, 4, poly64x2x2_t, vst1q_p64_x2, vld1q_p64_x2); } #[simd_test(enable = "neon,aes")] unsafe fn test_vld1q_p64_x3() { - let vals: [p64; 6] = crate::array::from_fn(|i| i as p64); - let a: poly64x2x3_t = transmute(vals); - let mut tmp = [0 as p64; 6]; - vst1q_p64_x3(tmp.as_mut_ptr().cast(), a); - let r: poly64x2x3_t = vld1q_p64_x3(tmp.as_ptr().cast()); - let out: [p64; 6] = transmute(r); - assert_eq!(out, vals); + wide_store_load_roundtrip!(p64, 6, poly64x2x3_t, vst1q_p64_x3, vld1q_p64_x3); } #[simd_test(enable = "neon,aes")] unsafe fn test_vld1q_p64_x4() { - let vals: [p64; 8] = crate::array::from_fn(|i| i as p64); - let a: poly64x2x4_t = transmute(vals); - let mut tmp = [0 as p64; 8]; - vst1q_p64_x4(tmp.as_mut_ptr().cast(), a); - let r: poly64x2x4_t = vld1q_p64_x4(tmp.as_ptr().cast()); - let out: [p64; 8] = transmute(r); - assert_eq!(out, vals); + wide_store_load_roundtrip!(p64, 8, poly64x2x4_t, vst1q_p64_x4, vld1q_p64_x4); } #[simd_test(enable = "neon")] unsafe fn test_vld1_s8_x2() { - let vals: [i8; 16] = crate::array::from_fn(|i| i as i8); - let a: int8x8x2_t = transmute(vals); - let mut tmp = [0_i8; 16]; - vst1_s8_x2(tmp.as_mut_ptr().cast(), a); - let r: int8x8x2_t = vld1_s8_x2(tmp.as_ptr().cast()); - let out: [i8; 16] = transmute(r); - assert_eq!(out, vals); + wide_store_load_roundtrip!(i8, 16, int8x8x2_t, vst1_s8_x2, vld1_s8_x2); } #[simd_test(enable = "neon")] unsafe fn test_vld1_s8_x3() { - let vals: [i8; 24] = crate::array::from_fn(|i| i as i8); - let a: int8x8x3_t = transmute(vals); - let mut tmp = [0_i8; 24]; - vst1_s8_x3(tmp.as_mut_ptr().cast(), a); - let r: int8x8x3_t = vld1_s8_x3(tmp.as_ptr().cast()); - let out: [i8; 24] = transmute(r); - assert_eq!(out, vals); + wide_store_load_roundtrip!(i8, 24, int8x8x3_t, vst1_s8_x3, vld1_s8_x3); } #[simd_test(enable = "neon")] unsafe fn test_vld1_s8_x4() { - let vals: [i8; 32] = crate::array::from_fn(|i| i as i8); - let a: int8x8x4_t = transmute(vals); - let mut tmp = [0_i8; 32]; - vst1_s8_x4(tmp.as_mut_ptr().cast(), a); - let r: int8x8x4_t = vld1_s8_x4(tmp.as_ptr().cast()); - let out: [i8; 32] = transmute(r); - assert_eq!(out, vals); + wide_store_load_roundtrip!(i8, 32, int8x8x4_t, vst1_s8_x4, vld1_s8_x4); } #[simd_test(enable = "neon")] unsafe fn test_vld1q_s8_x2() { - let vals: [i8; 32] = crate::array::from_fn(|i| i as i8); - let a: int8x16x2_t = transmute(vals); - let mut tmp = [0_i8; 32]; - vst1q_s8_x2(tmp.as_mut_ptr().cast(), a); - let r: int8x16x2_t = vld1q_s8_x2(tmp.as_ptr().cast()); - let out: [i8; 32] = transmute(r); - assert_eq!(out, vals); + wide_store_load_roundtrip!(i8, 32, int8x16x2_t, vst1q_s8_x2, vld1q_s8_x2); } #[simd_test(enable = "neon")] unsafe fn test_vld1q_s8_x3() { - let vals: [i8; 48] = crate::array::from_fn(|i| i as i8); - let a: int8x16x3_t = transmute(vals); - let mut tmp = [0_i8; 48]; - vst1q_s8_x3(tmp.as_mut_ptr().cast(), a); - let r: int8x16x3_t = vld1q_s8_x3(tmp.as_ptr().cast()); - let out: [i8; 48] = transmute(r); - assert_eq!(out, vals); + wide_store_load_roundtrip!(i8, 48, int8x16x3_t, vst1q_s8_x3, vld1q_s8_x3); } #[simd_test(enable = "neon")] unsafe fn test_vld1q_s8_x4() { - let vals: [i8; 64] = crate::array::from_fn(|i| i as i8); - let a: int8x16x4_t = transmute(vals); - let mut tmp = [0_i8; 64]; - vst1q_s8_x4(tmp.as_mut_ptr().cast(), a); - let r: int8x16x4_t = vld1q_s8_x4(tmp.as_ptr().cast()); - let out: [i8; 64] = transmute(r); - assert_eq!(out, vals); + wide_store_load_roundtrip!(i8, 64, int8x16x4_t, vst1q_s8_x4, vld1q_s8_x4); } #[simd_test(enable = "neon")] unsafe fn test_vld1_s16_x2() { - let vals: [i16; 8] = crate::array::from_fn(|i| i as i16); - let a: int16x4x2_t = transmute(vals); - let mut tmp = [0_i16; 8]; - vst1_s16_x2(tmp.as_mut_ptr().cast(), a); - let r: int16x4x2_t = vld1_s16_x2(tmp.as_ptr().cast()); - let out: [i16; 8] = transmute(r); - assert_eq!(out, vals); + wide_store_load_roundtrip!(i16, 8, int16x4x2_t, vst1_s16_x2, vld1_s16_x2); } #[simd_test(enable = "neon")] unsafe fn test_vld1_s16_x3() { - let vals: [i16; 12] = crate::array::from_fn(|i| i as i16); - let a: int16x4x3_t = transmute(vals); - let mut tmp = [0_i16; 12]; - vst1_s16_x3(tmp.as_mut_ptr().cast(), a); - let r: int16x4x3_t = vld1_s16_x3(tmp.as_ptr().cast()); - let out: [i16; 12] = transmute(r); - assert_eq!(out, vals); + wide_store_load_roundtrip!(i16, 12, int16x4x3_t, vst1_s16_x3, vld1_s16_x3); } #[simd_test(enable = "neon")] unsafe fn test_vld1_s16_x4() { - let vals: [i16; 16] = crate::array::from_fn(|i| i as i16); - let a: int16x4x4_t = transmute(vals); - let mut tmp = [0_i16; 16]; - vst1_s16_x4(tmp.as_mut_ptr().cast(), a); - let r: int16x4x4_t = vld1_s16_x4(tmp.as_ptr().cast()); - let out: [i16; 16] = transmute(r); - assert_eq!(out, vals); + wide_store_load_roundtrip!(i16, 16, int16x4x4_t, vst1_s16_x4, vld1_s16_x4); } #[simd_test(enable = "neon")] unsafe fn test_vld1q_s16_x2() { - let vals: [i16; 16] = crate::array::from_fn(|i| i as i16); - let a: int16x8x2_t = transmute(vals); - let mut tmp = [0_i16; 16]; - vst1q_s16_x2(tmp.as_mut_ptr().cast(), a); - let r: int16x8x2_t = vld1q_s16_x2(tmp.as_ptr().cast()); - let out: [i16; 16] = transmute(r); - assert_eq!(out, vals); + wide_store_load_roundtrip!(i16, 16, int16x8x2_t, vst1q_s16_x2, vld1q_s16_x2); } #[simd_test(enable = "neon")] unsafe fn test_vld1q_s16_x3() { - let vals: [i16; 24] = crate::array::from_fn(|i| i as i16); - let a: int16x8x3_t = transmute(vals); - let mut tmp = [0_i16; 24]; - vst1q_s16_x3(tmp.as_mut_ptr().cast(), a); - let r: int16x8x3_t = vld1q_s16_x3(tmp.as_ptr().cast()); - let out: [i16; 24] = transmute(r); - assert_eq!(out, vals); + wide_store_load_roundtrip!(i16, 24, int16x8x3_t, vst1q_s16_x3, vld1q_s16_x3); } #[simd_test(enable = "neon")] unsafe fn test_vld1q_s16_x4() { - let vals: [i16; 32] = crate::array::from_fn(|i| i as i16); - let a: int16x8x4_t = transmute(vals); - let mut tmp = [0_i16; 32]; - vst1q_s16_x4(tmp.as_mut_ptr().cast(), a); - let r: int16x8x4_t = vld1q_s16_x4(tmp.as_ptr().cast()); - let out: [i16; 32] = transmute(r); - assert_eq!(out, vals); + wide_store_load_roundtrip!(i16, 32, int16x8x4_t, vst1q_s16_x4, vld1q_s16_x4); } #[simd_test(enable = "neon")] unsafe fn test_vld1_s32_x2() { - let vals: [i32; 4] = crate::array::from_fn(|i| i as i32); - let a: int32x2x2_t = transmute(vals); - let mut tmp = [0_i32; 4]; - vst1_s32_x2(tmp.as_mut_ptr().cast(), a); - let r: int32x2x2_t = vld1_s32_x2(tmp.as_ptr().cast()); - let out: [i32; 4] = transmute(r); - assert_eq!(out, vals); + wide_store_load_roundtrip!(i32, 4, int32x2x2_t, vst1_s32_x2, vld1_s32_x2); } #[simd_test(enable = "neon")] unsafe fn test_vld1_s32_x3() { - let vals: [i32; 6] = crate::array::from_fn(|i| i as i32); - let a: int32x2x3_t = transmute(vals); - let mut tmp = [0_i32; 6]; - vst1_s32_x3(tmp.as_mut_ptr().cast(), a); - let r: int32x2x3_t = vld1_s32_x3(tmp.as_ptr().cast()); - let out: [i32; 6] = transmute(r); - assert_eq!(out, vals); + wide_store_load_roundtrip!(i32, 6, int32x2x3_t, vst1_s32_x3, vld1_s32_x3); } #[simd_test(enable = "neon")] unsafe fn test_vld1_s32_x4() { - let vals: [i32; 8] = crate::array::from_fn(|i| i as i32); - let a: int32x2x4_t = transmute(vals); - let mut tmp = [0_i32; 8]; - vst1_s32_x4(tmp.as_mut_ptr().cast(), a); - let r: int32x2x4_t = vld1_s32_x4(tmp.as_ptr().cast()); - let out: [i32; 8] = transmute(r); - assert_eq!(out, vals); + wide_store_load_roundtrip!(i32, 8, int32x2x4_t, vst1_s32_x4, vld1_s32_x4); } #[simd_test(enable = "neon")] unsafe fn test_vld1q_s32_x2() { - let vals: [i32; 8] = crate::array::from_fn(|i| i as i32); - let a: int32x4x2_t = transmute(vals); - let mut tmp = [0_i32; 8]; - vst1q_s32_x2(tmp.as_mut_ptr().cast(), a); - let r: int32x4x2_t = vld1q_s32_x2(tmp.as_ptr().cast()); - let out: [i32; 8] = transmute(r); - assert_eq!(out, vals); + wide_store_load_roundtrip!(i32, 8, int32x4x2_t, vst1q_s32_x2, vld1q_s32_x2); } #[simd_test(enable = "neon")] unsafe fn test_vld1q_s32_x3() { - let vals: [i32; 12] = crate::array::from_fn(|i| i as i32); - let a: int32x4x3_t = transmute(vals); - let mut tmp = [0_i32; 12]; - vst1q_s32_x3(tmp.as_mut_ptr().cast(), a); - let r: int32x4x3_t = vld1q_s32_x3(tmp.as_ptr().cast()); - let out: [i32; 12] = transmute(r); - assert_eq!(out, vals); + wide_store_load_roundtrip!(i32, 12, int32x4x3_t, vst1q_s32_x3, vld1q_s32_x3); } #[simd_test(enable = "neon")] unsafe fn test_vld1q_s32_x4() { - let vals: [i32; 16] = crate::array::from_fn(|i| i as i32); - let a: int32x4x4_t = transmute(vals); - let mut tmp = [0_i32; 16]; - vst1q_s32_x4(tmp.as_mut_ptr().cast(), a); - let r: int32x4x4_t = vld1q_s32_x4(tmp.as_ptr().cast()); - let out: [i32; 16] = transmute(r); - assert_eq!(out, vals); + wide_store_load_roundtrip!(i32, 16, int32x4x4_t, vst1q_s32_x4, vld1q_s32_x4); } #[simd_test(enable = "neon")] unsafe fn test_vld1_s64_x2() { - let vals: [i64; 2] = crate::array::from_fn(|i| i as i64); - let a: int64x1x2_t = transmute(vals); - let mut tmp = [0_i64; 2]; - vst1_s64_x2(tmp.as_mut_ptr().cast(), a); - let r: int64x1x2_t = vld1_s64_x2(tmp.as_ptr().cast()); - let out: [i64; 2] = transmute(r); - assert_eq!(out, vals); + wide_store_load_roundtrip!(i64, 2, int64x1x2_t, vst1_s64_x2, vld1_s64_x2); } #[simd_test(enable = "neon")] unsafe fn test_vld1_s64_x3() { - let vals: [i64; 3] = crate::array::from_fn(|i| i as i64); - let a: int64x1x3_t = transmute(vals); - let mut tmp = [0_i64; 3]; - vst1_s64_x3(tmp.as_mut_ptr().cast(), a); - let r: int64x1x3_t = vld1_s64_x3(tmp.as_ptr().cast()); - let out: [i64; 3] = transmute(r); - assert_eq!(out, vals); + wide_store_load_roundtrip!(i64, 3, int64x1x3_t, vst1_s64_x3, vld1_s64_x3); } #[simd_test(enable = "neon")] unsafe fn test_vld1_s64_x4() { - let vals: [i64; 4] = crate::array::from_fn(|i| i as i64); - let a: int64x1x4_t = transmute(vals); - let mut tmp = [0_i64; 4]; - vst1_s64_x4(tmp.as_mut_ptr().cast(), a); - let r: int64x1x4_t = vld1_s64_x4(tmp.as_ptr().cast()); - let out: [i64; 4] = transmute(r); - assert_eq!(out, vals); + wide_store_load_roundtrip!(i64, 4, int64x1x4_t, vst1_s64_x4, vld1_s64_x4); } #[simd_test(enable = "neon")] unsafe fn test_vld1q_s64_x2() { - let vals: [i64; 4] = crate::array::from_fn(|i| i as i64); - let a: int64x2x2_t = transmute(vals); - let mut tmp = [0_i64; 4]; - vst1q_s64_x2(tmp.as_mut_ptr().cast(), a); - let r: int64x2x2_t = vld1q_s64_x2(tmp.as_ptr().cast()); - let out: [i64; 4] = transmute(r); - assert_eq!(out, vals); + wide_store_load_roundtrip!(i64, 4, int64x2x2_t, vst1q_s64_x2, vld1q_s64_x2); } #[simd_test(enable = "neon")] unsafe fn test_vld1q_s64_x3() { - let vals: [i64; 6] = crate::array::from_fn(|i| i as i64); - let a: int64x2x3_t = transmute(vals); - let mut tmp = [0_i64; 6]; - vst1q_s64_x3(tmp.as_mut_ptr().cast(), a); - let r: int64x2x3_t = vld1q_s64_x3(tmp.as_ptr().cast()); - let out: [i64; 6] = transmute(r); - assert_eq!(out, vals); + wide_store_load_roundtrip!(i64, 6, int64x2x3_t, vst1q_s64_x3, vld1q_s64_x3); } #[simd_test(enable = "neon")] unsafe fn test_vld1q_s64_x4() { - let vals: [i64; 8] = crate::array::from_fn(|i| i as i64); - let a: int64x2x4_t = transmute(vals); - let mut tmp = [0_i64; 8]; - vst1q_s64_x4(tmp.as_mut_ptr().cast(), a); - let r: int64x2x4_t = vld1q_s64_x4(tmp.as_ptr().cast()); - let out: [i64; 8] = transmute(r); - assert_eq!(out, vals); + wide_store_load_roundtrip!(i64, 8, int64x2x4_t, vst1q_s64_x4, vld1q_s64_x4); } #[simd_test(enable = "neon")] unsafe fn test_vld1_u8_x2() { - let vals: [u8; 16] = crate::array::from_fn(|i| i as u8); - let a: uint8x8x2_t = transmute(vals); - let mut tmp = [0_u8; 16]; - vst1_u8_x2(tmp.as_mut_ptr().cast(), a); - let r: uint8x8x2_t = vld1_u8_x2(tmp.as_ptr().cast()); - let out: [u8; 16] = transmute(r); - assert_eq!(out, vals); + wide_store_load_roundtrip!(u8, 16, uint8x8x2_t, vst1_u8_x2, vld1_u8_x2); } #[simd_test(enable = "neon")] unsafe fn test_vld1_u8_x3() { - let vals: [u8; 24] = crate::array::from_fn(|i| i as u8); - let a: uint8x8x3_t = transmute(vals); - let mut tmp = [0_u8; 24]; - vst1_u8_x3(tmp.as_mut_ptr().cast(), a); - let r: uint8x8x3_t = vld1_u8_x3(tmp.as_ptr().cast()); - let out: [u8; 24] = transmute(r); - assert_eq!(out, vals); + wide_store_load_roundtrip!(u8, 24, uint8x8x3_t, vst1_u8_x3, vld1_u8_x3); } #[simd_test(enable = "neon")] unsafe fn test_vld1_u8_x4() { - let vals: [u8; 32] = crate::array::from_fn(|i| i as u8); - let a: uint8x8x4_t = transmute(vals); - let mut tmp = [0_u8; 32]; - vst1_u8_x4(tmp.as_mut_ptr().cast(), a); - let r: uint8x8x4_t = vld1_u8_x4(tmp.as_ptr().cast()); - let out: [u8; 32] = transmute(r); - assert_eq!(out, vals); + wide_store_load_roundtrip!(u8, 32, uint8x8x4_t, vst1_u8_x4, vld1_u8_x4); } #[simd_test(enable = "neon")] unsafe fn test_vld1q_u8_x2() { - let vals: [u8; 32] = crate::array::from_fn(|i| i as u8); - let a: uint8x16x2_t = transmute(vals); - let mut tmp = [0_u8; 32]; - vst1q_u8_x2(tmp.as_mut_ptr().cast(), a); - let r: uint8x16x2_t = vld1q_u8_x2(tmp.as_ptr().cast()); - let out: [u8; 32] = transmute(r); - assert_eq!(out, vals); + wide_store_load_roundtrip!(u8, 32, uint8x16x2_t, vst1q_u8_x2, vld1q_u8_x2); } #[simd_test(enable = "neon")] unsafe fn test_vld1q_u8_x3() { - let vals: [u8; 48] = crate::array::from_fn(|i| i as u8); - let a: uint8x16x3_t = transmute(vals); - let mut tmp = [0_u8; 48]; - vst1q_u8_x3(tmp.as_mut_ptr().cast(), a); - let r: uint8x16x3_t = vld1q_u8_x3(tmp.as_ptr().cast()); - let out: [u8; 48] = transmute(r); - assert_eq!(out, vals); + wide_store_load_roundtrip!(u8, 48, uint8x16x3_t, vst1q_u8_x3, vld1q_u8_x3); } #[simd_test(enable = "neon")] unsafe fn test_vld1q_u8_x4() { - let vals: [u8; 64] = crate::array::from_fn(|i| i as u8); - let a: uint8x16x4_t = transmute(vals); - let mut tmp = [0_u8; 64]; - vst1q_u8_x4(tmp.as_mut_ptr().cast(), a); - let r: uint8x16x4_t = vld1q_u8_x4(tmp.as_ptr().cast()); - let out: [u8; 64] = transmute(r); - assert_eq!(out, vals); + wide_store_load_roundtrip!(u8, 64, uint8x16x4_t, vst1q_u8_x4, vld1q_u8_x4); } #[simd_test(enable = "neon")] unsafe fn test_vld1_u16_x2() { - let vals: [u16; 8] = crate::array::from_fn(|i| i as u16); - let a: uint16x4x2_t = transmute(vals); - let mut tmp = [0_u16; 8]; - vst1_u16_x2(tmp.as_mut_ptr().cast(), a); - let r: uint16x4x2_t = vld1_u16_x2(tmp.as_ptr().cast()); - let out: [u16; 8] = transmute(r); - assert_eq!(out, vals); + wide_store_load_roundtrip!(u16, 8, uint16x4x2_t, vst1_u16_x2, vld1_u16_x2); } #[simd_test(enable = "neon")] unsafe fn test_vld1_u16_x3() { - let vals: [u16; 12] = crate::array::from_fn(|i| i as u16); - let a: uint16x4x3_t = transmute(vals); - let mut tmp = [0_u16; 12]; - vst1_u16_x3(tmp.as_mut_ptr().cast(), a); - let r: uint16x4x3_t = vld1_u16_x3(tmp.as_ptr().cast()); - let out: [u16; 12] = transmute(r); - assert_eq!(out, vals); + wide_store_load_roundtrip!(u16, 12, uint16x4x3_t, vst1_u16_x3, vld1_u16_x3); } #[simd_test(enable = "neon")] unsafe fn test_vld1_u16_x4() { - let vals: [u16; 16] = crate::array::from_fn(|i| i as u16); - let a: uint16x4x4_t = transmute(vals); - let mut tmp = [0_u16; 16]; - vst1_u16_x4(tmp.as_mut_ptr().cast(), a); - let r: uint16x4x4_t = vld1_u16_x4(tmp.as_ptr().cast()); - let out: [u16; 16] = transmute(r); - assert_eq!(out, vals); + wide_store_load_roundtrip!(u16, 16, uint16x4x4_t, vst1_u16_x4, vld1_u16_x4); } #[simd_test(enable = "neon")] unsafe fn test_vld1q_u16_x2() { - let vals: [u16; 16] = crate::array::from_fn(|i| i as u16); - let a: uint16x8x2_t = transmute(vals); - let mut tmp = [0_u16; 16]; - vst1q_u16_x2(tmp.as_mut_ptr().cast(), a); - let r: uint16x8x2_t = vld1q_u16_x2(tmp.as_ptr().cast()); - let out: [u16; 16] = transmute(r); - assert_eq!(out, vals); + wide_store_load_roundtrip!(u16, 16, uint16x8x2_t, vst1q_u16_x2, vld1q_u16_x2); } #[simd_test(enable = "neon")] unsafe fn test_vld1q_u16_x3() { - let vals: [u16; 24] = crate::array::from_fn(|i| i as u16); - let a: uint16x8x3_t = transmute(vals); - let mut tmp = [0_u16; 24]; - vst1q_u16_x3(tmp.as_mut_ptr().cast(), a); - let r: uint16x8x3_t = vld1q_u16_x3(tmp.as_ptr().cast()); - let out: [u16; 24] = transmute(r); - assert_eq!(out, vals); + wide_store_load_roundtrip!(u16, 24, uint16x8x3_t, vst1q_u16_x3, vld1q_u16_x3); } #[simd_test(enable = "neon")] unsafe fn test_vld1q_u16_x4() { - let vals: [u16; 32] = crate::array::from_fn(|i| i as u16); - let a: uint16x8x4_t = transmute(vals); - let mut tmp = [0_u16; 32]; - vst1q_u16_x4(tmp.as_mut_ptr().cast(), a); - let r: uint16x8x4_t = vld1q_u16_x4(tmp.as_ptr().cast()); - let out: [u16; 32] = transmute(r); - assert_eq!(out, vals); + wide_store_load_roundtrip!(u16, 32, uint16x8x4_t, vst1q_u16_x4, vld1q_u16_x4); } #[simd_test(enable = "neon")] unsafe fn test_vld1_u32_x2() { - let vals: [u32; 4] = crate::array::from_fn(|i| i as u32); - let a: uint32x2x2_t = transmute(vals); - let mut tmp = [0_u32; 4]; - vst1_u32_x2(tmp.as_mut_ptr().cast(), a); - let r: uint32x2x2_t = vld1_u32_x2(tmp.as_ptr().cast()); - let out: [u32; 4] = transmute(r); - assert_eq!(out, vals); + wide_store_load_roundtrip!(u32, 4, uint32x2x2_t, vst1_u32_x2, vld1_u32_x2); } #[simd_test(enable = "neon")] unsafe fn test_vld1_u32_x3() { - let vals: [u32; 6] = crate::array::from_fn(|i| i as u32); - let a: uint32x2x3_t = transmute(vals); - let mut tmp = [0_u32; 6]; - vst1_u32_x3(tmp.as_mut_ptr().cast(), a); - let r: uint32x2x3_t = vld1_u32_x3(tmp.as_ptr().cast()); - let out: [u32; 6] = transmute(r); - assert_eq!(out, vals); + wide_store_load_roundtrip!(u32, 6, uint32x2x3_t, vst1_u32_x3, vld1_u32_x3); } #[simd_test(enable = "neon")] unsafe fn test_vld1_u32_x4() { - let vals: [u32; 8] = crate::array::from_fn(|i| i as u32); - let a: uint32x2x4_t = transmute(vals); - let mut tmp = [0_u32; 8]; - vst1_u32_x4(tmp.as_mut_ptr().cast(), a); - let r: uint32x2x4_t = vld1_u32_x4(tmp.as_ptr().cast()); - let out: [u32; 8] = transmute(r); - assert_eq!(out, vals); + wide_store_load_roundtrip!(u32, 8, uint32x2x4_t, vst1_u32_x4, vld1_u32_x4); } #[simd_test(enable = "neon")] unsafe fn test_vld1q_u32_x2() { - let vals: [u32; 8] = crate::array::from_fn(|i| i as u32); - let a: uint32x4x2_t = transmute(vals); - let mut tmp = [0_u32; 8]; - vst1q_u32_x2(tmp.as_mut_ptr().cast(), a); - let r: uint32x4x2_t = vld1q_u32_x2(tmp.as_ptr().cast()); - let out: [u32; 8] = transmute(r); - assert_eq!(out, vals); + wide_store_load_roundtrip!(u32, 8, uint32x4x2_t, vst1q_u32_x2, vld1q_u32_x2); } #[simd_test(enable = "neon")] unsafe fn test_vld1q_u32_x3() { - let vals: [u32; 12] = crate::array::from_fn(|i| i as u32); - let a: uint32x4x3_t = transmute(vals); - let mut tmp = [0_u32; 12]; - vst1q_u32_x3(tmp.as_mut_ptr().cast(), a); - let r: uint32x4x3_t = vld1q_u32_x3(tmp.as_ptr().cast()); - let out: [u32; 12] = transmute(r); - assert_eq!(out, vals); + wide_store_load_roundtrip!(u32, 12, uint32x4x3_t, vst1q_u32_x3, vld1q_u32_x3); } #[simd_test(enable = "neon")] unsafe fn test_vld1q_u32_x4() { - let vals: [u32; 16] = crate::array::from_fn(|i| i as u32); - let a: uint32x4x4_t = transmute(vals); - let mut tmp = [0_u32; 16]; - vst1q_u32_x4(tmp.as_mut_ptr().cast(), a); - let r: uint32x4x4_t = vld1q_u32_x4(tmp.as_ptr().cast()); - let out: [u32; 16] = transmute(r); - assert_eq!(out, vals); + wide_store_load_roundtrip!(u32, 16, uint32x4x4_t, vst1q_u32_x4, vld1q_u32_x4); } #[simd_test(enable = "neon")] unsafe fn test_vld1_u64_x2() { - let vals: [u64; 2] = crate::array::from_fn(|i| i as u64); - let a: uint64x1x2_t = transmute(vals); - let mut tmp = [0_u64; 2]; - vst1_u64_x2(tmp.as_mut_ptr().cast(), a); - let r: uint64x1x2_t = vld1_u64_x2(tmp.as_ptr().cast()); - let out: [u64; 2] = transmute(r); - assert_eq!(out, vals); + wide_store_load_roundtrip!(u64, 2, uint64x1x2_t, vst1_u64_x2, vld1_u64_x2); } #[simd_test(enable = "neon")] unsafe fn test_vld1_u64_x3() { - let vals: [u64; 3] = crate::array::from_fn(|i| i as u64); - let a: uint64x1x3_t = transmute(vals); - let mut tmp = [0_u64; 3]; - vst1_u64_x3(tmp.as_mut_ptr().cast(), a); - let r: uint64x1x3_t = vld1_u64_x3(tmp.as_ptr().cast()); - let out: [u64; 3] = transmute(r); - assert_eq!(out, vals); + wide_store_load_roundtrip!(u64, 3, uint64x1x3_t, vst1_u64_x3, vld1_u64_x3); } #[simd_test(enable = "neon")] unsafe fn test_vld1_u64_x4() { - let vals: [u64; 4] = crate::array::from_fn(|i| i as u64); - let a: uint64x1x4_t = transmute(vals); - let mut tmp = [0_u64; 4]; - vst1_u64_x4(tmp.as_mut_ptr().cast(), a); - let r: uint64x1x4_t = vld1_u64_x4(tmp.as_ptr().cast()); - let out: [u64; 4] = transmute(r); - assert_eq!(out, vals); + wide_store_load_roundtrip!(u64, 4, uint64x1x4_t, vst1_u64_x4, vld1_u64_x4); } #[simd_test(enable = "neon")] unsafe fn test_vld1q_u64_x2() { - let vals: [u64; 4] = crate::array::from_fn(|i| i as u64); - let a: uint64x2x2_t = transmute(vals); - let mut tmp = [0_u64; 4]; - vst1q_u64_x2(tmp.as_mut_ptr().cast(), a); - let r: uint64x2x2_t = vld1q_u64_x2(tmp.as_ptr().cast()); - let out: [u64; 4] = transmute(r); - assert_eq!(out, vals); + wide_store_load_roundtrip!(u64, 4, uint64x2x2_t, vst1q_u64_x2, vld1q_u64_x2); } #[simd_test(enable = "neon")] unsafe fn test_vld1q_u64_x3() { - let vals: [u64; 6] = crate::array::from_fn(|i| i as u64); - let a: uint64x2x3_t = transmute(vals); - let mut tmp = [0_u64; 6]; - vst1q_u64_x3(tmp.as_mut_ptr().cast(), a); - let r: uint64x2x3_t = vld1q_u64_x3(tmp.as_ptr().cast()); - let out: [u64; 6] = transmute(r); - assert_eq!(out, vals); + wide_store_load_roundtrip!(u64, 6, uint64x2x3_t, vst1q_u64_x3, vld1q_u64_x3); } #[simd_test(enable = "neon")] unsafe fn test_vld1q_u64_x4() { - let vals: [u64; 8] = crate::array::from_fn(|i| i as u64); - let a: uint64x2x4_t = transmute(vals); - let mut tmp = [0_u64; 8]; - vst1q_u64_x4(tmp.as_mut_ptr().cast(), a); - let r: uint64x2x4_t = vld1q_u64_x4(tmp.as_ptr().cast()); - let out: [u64; 8] = transmute(r); - assert_eq!(out, vals); + wide_store_load_roundtrip!(u64, 8, uint64x2x4_t, vst1q_u64_x4, vld1q_u64_x4); } #[simd_test(enable = "neon")] unsafe fn test_vld1_p8_x2() { - let vals: [p8; 16] = crate::array::from_fn(|i| i as p8); - let a: poly8x8x2_t = transmute(vals); - let mut tmp = [0 as p8; 16]; - vst1_p8_x2(tmp.as_mut_ptr().cast(), a); - let r: poly8x8x2_t = vld1_p8_x2(tmp.as_ptr().cast()); - let out: [p8; 16] = transmute(r); - assert_eq!(out, vals); + wide_store_load_roundtrip!(p8, 16, poly8x8x2_t, vst1_p8_x2, vld1_p8_x2); } #[simd_test(enable = "neon")] unsafe fn test_vld1_p8_x3() { - let vals: [p8; 24] = crate::array::from_fn(|i| i as p8); - let a: poly8x8x3_t = transmute(vals); - let mut tmp = [0 as p8; 24]; - vst1_p8_x3(tmp.as_mut_ptr().cast(), a); - let r: poly8x8x3_t = vld1_p8_x3(tmp.as_ptr().cast()); - let out: [p8; 24] = transmute(r); - assert_eq!(out, vals); + wide_store_load_roundtrip!(p8, 24, poly8x8x3_t, vst1_p8_x3, vld1_p8_x3); } #[simd_test(enable = "neon")] unsafe fn test_vld1_p8_x4() { - let vals: [p8; 32] = crate::array::from_fn(|i| i as p8); - let a: poly8x8x4_t = transmute(vals); - let mut tmp = [0 as p8; 32]; - vst1_p8_x4(tmp.as_mut_ptr().cast(), a); - let r: poly8x8x4_t = vld1_p8_x4(tmp.as_ptr().cast()); - let out: [p8; 32] = transmute(r); - assert_eq!(out, vals); + wide_store_load_roundtrip!(p8, 32, poly8x8x4_t, vst1_p8_x4, vld1_p8_x4); } #[simd_test(enable = "neon")] unsafe fn test_vld1q_p8_x2() { - let vals: [p8; 32] = crate::array::from_fn(|i| i as p8); - let a: poly8x16x2_t = transmute(vals); - let mut tmp = [0 as p8; 32]; - vst1q_p8_x2(tmp.as_mut_ptr().cast(), a); - let r: poly8x16x2_t = vld1q_p8_x2(tmp.as_ptr().cast()); - let out: [p8; 32] = transmute(r); - assert_eq!(out, vals); + wide_store_load_roundtrip!(p8, 32, poly8x16x2_t, vst1q_p8_x2, vld1q_p8_x2); } #[simd_test(enable = "neon")] unsafe fn test_vld1q_p8_x3() { - let vals: [p8; 48] = crate::array::from_fn(|i| i as p8); - let a: poly8x16x3_t = transmute(vals); - let mut tmp = [0 as p8; 48]; - vst1q_p8_x3(tmp.as_mut_ptr().cast(), a); - let r: poly8x16x3_t = vld1q_p8_x3(tmp.as_ptr().cast()); - let out: [p8; 48] = transmute(r); - assert_eq!(out, vals); + wide_store_load_roundtrip!(p8, 48, poly8x16x3_t, vst1q_p8_x3, vld1q_p8_x3); } #[simd_test(enable = "neon")] unsafe fn test_vld1q_p8_x4() { - let vals: [p8; 64] = crate::array::from_fn(|i| i as p8); - let a: poly8x16x4_t = transmute(vals); - let mut tmp = [0 as p8; 64]; - vst1q_p8_x4(tmp.as_mut_ptr().cast(), a); - let r: poly8x16x4_t = vld1q_p8_x4(tmp.as_ptr().cast()); - let out: [p8; 64] = transmute(r); - assert_eq!(out, vals); + wide_store_load_roundtrip!(p8, 64, poly8x16x4_t, vst1q_p8_x4, vld1q_p8_x4); } #[simd_test(enable = "neon")] unsafe fn test_vld1_p16_x2() { - let vals: [p16; 8] = crate::array::from_fn(|i| i as p16); - let a: poly16x4x2_t = transmute(vals); - let mut tmp = [0 as p16; 8]; - vst1_p16_x2(tmp.as_mut_ptr().cast(), a); - let r: poly16x4x2_t = vld1_p16_x2(tmp.as_ptr().cast()); - let out: [p16; 8] = transmute(r); - assert_eq!(out, vals); + wide_store_load_roundtrip!(p16, 8, poly16x4x2_t, vst1_p16_x2, vld1_p16_x2); } #[simd_test(enable = "neon")] unsafe fn test_vld1_p16_x3() { - let vals: [p16; 12] = crate::array::from_fn(|i| i as p16); - let a: poly16x4x3_t = transmute(vals); - let mut tmp = [0 as p16; 12]; - vst1_p16_x3(tmp.as_mut_ptr().cast(), a); - let r: poly16x4x3_t = vld1_p16_x3(tmp.as_ptr().cast()); - let out: [p16; 12] = transmute(r); - assert_eq!(out, vals); + wide_store_load_roundtrip!(p16, 12, poly16x4x3_t, vst1_p16_x3, vld1_p16_x3); } #[simd_test(enable = "neon")] unsafe fn test_vld1_p16_x4() { - let vals: [p16; 16] = crate::array::from_fn(|i| i as p16); - let a: poly16x4x4_t = transmute(vals); - let mut tmp = [0 as p16; 16]; - vst1_p16_x4(tmp.as_mut_ptr().cast(), a); - let r: poly16x4x4_t = vld1_p16_x4(tmp.as_ptr().cast()); - let out: [p16; 16] = transmute(r); - assert_eq!(out, vals); + wide_store_load_roundtrip!(p16, 16, poly16x4x4_t, vst1_p16_x4, vld1_p16_x4); } #[simd_test(enable = "neon")] unsafe fn test_vld1q_p16_x2() { - let vals: [p16; 16] = crate::array::from_fn(|i| i as p16); - let a: poly16x8x2_t = transmute(vals); - let mut tmp = [0 as p16; 16]; - vst1q_p16_x2(tmp.as_mut_ptr().cast(), a); - let r: poly16x8x2_t = vld1q_p16_x2(tmp.as_ptr().cast()); - let out: [p16; 16] = transmute(r); - assert_eq!(out, vals); + wide_store_load_roundtrip!(p16, 16, poly16x8x2_t, vst1q_p16_x2, vld1q_p16_x2); } #[simd_test(enable = "neon")] unsafe fn test_vld1q_p16_x3() { - let vals: [p16; 24] = crate::array::from_fn(|i| i as p16); - let a: poly16x8x3_t = transmute(vals); - let mut tmp = [0 as p16; 24]; - vst1q_p16_x3(tmp.as_mut_ptr().cast(), a); - let r: poly16x8x3_t = vld1q_p16_x3(tmp.as_ptr().cast()); - let out: [p16; 24] = transmute(r); - assert_eq!(out, vals); + wide_store_load_roundtrip!(p16, 24, poly16x8x3_t, vst1q_p16_x3, vld1q_p16_x3); } #[simd_test(enable = "neon")] unsafe fn test_vld1q_p16_x4() { - let vals: [p16; 32] = crate::array::from_fn(|i| i as p16); - let a: poly16x8x4_t = transmute(vals); - let mut tmp = [0 as p16; 32]; - vst1q_p16_x4(tmp.as_mut_ptr().cast(), a); - let r: poly16x8x4_t = vld1q_p16_x4(tmp.as_ptr().cast()); - let out: [p16; 32] = transmute(r); - assert_eq!(out, vals); + wide_store_load_roundtrip!(p16, 32, poly16x8x4_t, vst1q_p16_x4, vld1q_p16_x4); } } From 5787838c05d0f2805ca9562fdab340f6a33dd15a Mon Sep 17 00:00:00 2001 From: Folkert de Vries Date: Sun, 1 Feb 2026 14:17:35 +0100 Subject: [PATCH 4/4] use more capable macro for wide store/load roundtrip tests --- crates/core_arch/src/aarch64/neon/mod.rs | 470 ++++++----------------- 1 file changed, 109 insertions(+), 361 deletions(-) diff --git a/crates/core_arch/src/aarch64/neon/mod.rs b/crates/core_arch/src/aarch64/neon/mod.rs index ee27bef973..580f203ef0 100644 --- a/crates/core_arch/src/aarch64/neon/mod.rs +++ b/crates/core_arch/src/aarch64/neon/mod.rs @@ -1006,400 +1006,148 @@ mod tests { }; } - #[simd_test(enable = "neon,fp16")] - #[cfg(not(target_arch = "arm64ec"))] - unsafe fn test_vld1_f16_x2() { - wide_store_load_roundtrip!(f16, 8, float16x4x2_t, vst1_f16_x2, vld1_f16_x2); - } - - #[simd_test(enable = "neon,fp16")] - #[cfg(not(target_arch = "arm64ec"))] - unsafe fn test_vld1_f16_x3() { - wide_store_load_roundtrip!(f16, 12, float16x4x3_t, vst1_f16_x3, vld1_f16_x3); - } - - #[simd_test(enable = "neon,fp16")] - #[cfg(not(target_arch = "arm64ec"))] - unsafe fn test_vld1_f16_x4() { - wide_store_load_roundtrip!(f16, 16, float16x4x4_t, vst1_f16_x4, vld1_f16_x4); - } - - #[simd_test(enable = "neon,fp16")] - #[cfg(not(target_arch = "arm64ec"))] - unsafe fn test_vld1q_f16_x2() { - wide_store_load_roundtrip!(f16, 16, float16x8x2_t, vst1q_f16_x2, vld1q_f16_x2); - } - - #[simd_test(enable = "neon,fp16")] - #[cfg(not(target_arch = "arm64ec"))] - unsafe fn test_vld1q_f16_x3() { - wide_store_load_roundtrip!(f16, 24, float16x8x3_t, vst1q_f16_x3, vld1q_f16_x3); - } - - #[simd_test(enable = "neon,fp16")] - #[cfg(not(target_arch = "arm64ec"))] - unsafe fn test_vld1q_f16_x4() { - wide_store_load_roundtrip!(f16, 32, float16x8x4_t, vst1q_f16_x4, vld1q_f16_x4); - } - - #[simd_test(enable = "neon")] - unsafe fn test_vld1_f32_x2() { - wide_store_load_roundtrip!(f32, 4, float32x2x2_t, vst1_f32_x2, vld1_f32_x2); - } - - #[simd_test(enable = "neon")] - unsafe fn test_vld1_f32_x3() { - wide_store_load_roundtrip!(f32, 6, float32x2x3_t, vst1_f32_x3, vld1_f32_x3); - } - - #[simd_test(enable = "neon")] - unsafe fn test_vld1_f32_x4() { - wide_store_load_roundtrip!(f32, 8, float32x2x4_t, vst1_f32_x4, vld1_f32_x4); - } - - #[simd_test(enable = "neon")] - unsafe fn test_vld1q_f32_x2() { - wide_store_load_roundtrip!(f32, 8, float32x4x2_t, vst1q_f32_x2, vld1q_f32_x2); - } - - #[simd_test(enable = "neon")] - unsafe fn test_vld1q_f32_x3() { - wide_store_load_roundtrip!(f32, 12, float32x4x3_t, vst1q_f32_x3, vld1q_f32_x3); - } - - #[simd_test(enable = "neon")] - unsafe fn test_vld1q_f32_x4() { - wide_store_load_roundtrip!(f32, 16, float32x4x4_t, vst1q_f32_x4, vld1q_f32_x4); - } - - #[simd_test(enable = "neon,aes")] - unsafe fn test_vld1_p64_x2() { - wide_store_load_roundtrip!(p64, 2, poly64x1x2_t, vst1_p64_x2, vld1_p64_x2); - } - - #[simd_test(enable = "neon,aes")] - unsafe fn test_vld1_p64_x3() { - wide_store_load_roundtrip!(p64, 3, poly64x1x3_t, vst1_p64_x3, vld1_p64_x3); - } - - #[simd_test(enable = "neon,aes")] - unsafe fn test_vld1_p64_x4() { - wide_store_load_roundtrip!(p64, 4, poly64x1x4_t, vst1_p64_x4, vld1_p64_x4); - } - - #[simd_test(enable = "neon,aes")] - unsafe fn test_vld1q_p64_x2() { - wide_store_load_roundtrip!(p64, 4, poly64x2x2_t, vst1q_p64_x2, vld1q_p64_x2); - } - - #[simd_test(enable = "neon,aes")] - unsafe fn test_vld1q_p64_x3() { - wide_store_load_roundtrip!(p64, 6, poly64x2x3_t, vst1q_p64_x3, vld1q_p64_x3); - } - - #[simd_test(enable = "neon,aes")] - unsafe fn test_vld1q_p64_x4() { - wide_store_load_roundtrip!(p64, 8, poly64x2x4_t, vst1q_p64_x4, vld1q_p64_x4); - } - - #[simd_test(enable = "neon")] - unsafe fn test_vld1_s8_x2() { - wide_store_load_roundtrip!(i8, 16, int8x8x2_t, vst1_s8_x2, vld1_s8_x2); - } - - #[simd_test(enable = "neon")] - unsafe fn test_vld1_s8_x3() { - wide_store_load_roundtrip!(i8, 24, int8x8x3_t, vst1_s8_x3, vld1_s8_x3); - } - - #[simd_test(enable = "neon")] - unsafe fn test_vld1_s8_x4() { - wide_store_load_roundtrip!(i8, 32, int8x8x4_t, vst1_s8_x4, vld1_s8_x4); - } - - #[simd_test(enable = "neon")] - unsafe fn test_vld1q_s8_x2() { - wide_store_load_roundtrip!(i8, 32, int8x16x2_t, vst1q_s8_x2, vld1q_s8_x2); - } - - #[simd_test(enable = "neon")] - unsafe fn test_vld1q_s8_x3() { - wide_store_load_roundtrip!(i8, 48, int8x16x3_t, vst1q_s8_x3, vld1q_s8_x3); - } - - #[simd_test(enable = "neon")] - unsafe fn test_vld1q_s8_x4() { - wide_store_load_roundtrip!(i8, 64, int8x16x4_t, vst1q_s8_x4, vld1q_s8_x4); - } - - #[simd_test(enable = "neon")] - unsafe fn test_vld1_s16_x2() { - wide_store_load_roundtrip!(i16, 8, int16x4x2_t, vst1_s16_x2, vld1_s16_x2); - } - - #[simd_test(enable = "neon")] - unsafe fn test_vld1_s16_x3() { - wide_store_load_roundtrip!(i16, 12, int16x4x3_t, vst1_s16_x3, vld1_s16_x3); - } - - #[simd_test(enable = "neon")] - unsafe fn test_vld1_s16_x4() { - wide_store_load_roundtrip!(i16, 16, int16x4x4_t, vst1_s16_x4, vld1_s16_x4); - } - - #[simd_test(enable = "neon")] - unsafe fn test_vld1q_s16_x2() { - wide_store_load_roundtrip!(i16, 16, int16x8x2_t, vst1q_s16_x2, vld1q_s16_x2); - } - - #[simd_test(enable = "neon")] - unsafe fn test_vld1q_s16_x3() { - wide_store_load_roundtrip!(i16, 24, int16x8x3_t, vst1q_s16_x3, vld1q_s16_x3); - } - - #[simd_test(enable = "neon")] - unsafe fn test_vld1q_s16_x4() { - wide_store_load_roundtrip!(i16, 32, int16x8x4_t, vst1q_s16_x4, vld1q_s16_x4); - } - - #[simd_test(enable = "neon")] - unsafe fn test_vld1_s32_x2() { - wide_store_load_roundtrip!(i32, 4, int32x2x2_t, vst1_s32_x2, vld1_s32_x2); - } - - #[simd_test(enable = "neon")] - unsafe fn test_vld1_s32_x3() { - wide_store_load_roundtrip!(i32, 6, int32x2x3_t, vst1_s32_x3, vld1_s32_x3); - } - - #[simd_test(enable = "neon")] - unsafe fn test_vld1_s32_x4() { - wide_store_load_roundtrip!(i32, 8, int32x2x4_t, vst1_s32_x4, vld1_s32_x4); - } - - #[simd_test(enable = "neon")] - unsafe fn test_vld1q_s32_x2() { - wide_store_load_roundtrip!(i32, 8, int32x4x2_t, vst1q_s32_x2, vld1q_s32_x2); - } - - #[simd_test(enable = "neon")] - unsafe fn test_vld1q_s32_x3() { - wide_store_load_roundtrip!(i32, 12, int32x4x3_t, vst1q_s32_x3, vld1q_s32_x3); - } - - #[simd_test(enable = "neon")] - unsafe fn test_vld1q_s32_x4() { - wide_store_load_roundtrip!(i32, 16, int32x4x4_t, vst1q_s32_x4, vld1q_s32_x4); - } - - #[simd_test(enable = "neon")] - unsafe fn test_vld1_s64_x2() { - wide_store_load_roundtrip!(i64, 2, int64x1x2_t, vst1_s64_x2, vld1_s64_x2); - } - - #[simd_test(enable = "neon")] - unsafe fn test_vld1_s64_x3() { - wide_store_load_roundtrip!(i64, 3, int64x1x3_t, vst1_s64_x3, vld1_s64_x3); - } - - #[simd_test(enable = "neon")] - unsafe fn test_vld1_s64_x4() { - wide_store_load_roundtrip!(i64, 4, int64x1x4_t, vst1_s64_x4, vld1_s64_x4); - } - - #[simd_test(enable = "neon")] - unsafe fn test_vld1q_s64_x2() { - wide_store_load_roundtrip!(i64, 4, int64x2x2_t, vst1q_s64_x2, vld1q_s64_x2); - } - - #[simd_test(enable = "neon")] - unsafe fn test_vld1q_s64_x3() { - wide_store_load_roundtrip!(i64, 6, int64x2x3_t, vst1q_s64_x3, vld1q_s64_x3); - } - - #[simd_test(enable = "neon")] - unsafe fn test_vld1q_s64_x4() { - wide_store_load_roundtrip!(i64, 8, int64x2x4_t, vst1q_s64_x4, vld1q_s64_x4); - } - - #[simd_test(enable = "neon")] - unsafe fn test_vld1_u8_x2() { - wide_store_load_roundtrip!(u8, 16, uint8x8x2_t, vst1_u8_x2, vld1_u8_x2); - } - - #[simd_test(enable = "neon")] - unsafe fn test_vld1_u8_x3() { - wide_store_load_roundtrip!(u8, 24, uint8x8x3_t, vst1_u8_x3, vld1_u8_x3); - } - - #[simd_test(enable = "neon")] - unsafe fn test_vld1_u8_x4() { - wide_store_load_roundtrip!(u8, 32, uint8x8x4_t, vst1_u8_x4, vld1_u8_x4); - } - - #[simd_test(enable = "neon")] - unsafe fn test_vld1q_u8_x2() { - wide_store_load_roundtrip!(u8, 32, uint8x16x2_t, vst1q_u8_x2, vld1q_u8_x2); - } - - #[simd_test(enable = "neon")] - unsafe fn test_vld1q_u8_x3() { - wide_store_load_roundtrip!(u8, 48, uint8x16x3_t, vst1q_u8_x3, vld1q_u8_x3); - } - - #[simd_test(enable = "neon")] - unsafe fn test_vld1q_u8_x4() { - wide_store_load_roundtrip!(u8, 64, uint8x16x4_t, vst1q_u8_x4, vld1q_u8_x4); - } - - #[simd_test(enable = "neon")] - unsafe fn test_vld1_u16_x2() { - wide_store_load_roundtrip!(u16, 8, uint16x4x2_t, vst1_u16_x2, vld1_u16_x2); + macro_rules! wide_store_load_roundtrip_fp16 { + ($( $name:ident $args:tt);* $(;)?) => { + $( + #[simd_test(enable = "neon,fp16")] + #[cfg(not(target_arch = "arm64ec"))] + unsafe fn $name() { + wide_store_load_roundtrip! $args; + } + )* + }; } - #[simd_test(enable = "neon")] - unsafe fn test_vld1_u16_x3() { - wide_store_load_roundtrip!(u16, 12, uint16x4x3_t, vst1_u16_x3, vld1_u16_x3); - } + wide_store_load_roundtrip_fp16! { + test_vld1_f16_x2(f16, 8, float16x4x2_t, vst1_f16_x2, vld1_f16_x2); + test_vld1_f16_x3(f16, 12, float16x4x3_t, vst1_f16_x3, vld1_f16_x3); + test_vld1_f16_x4(f16, 16, float16x4x4_t, vst1_f16_x4, vld1_f16_x4); - #[simd_test(enable = "neon")] - unsafe fn test_vld1_u16_x4() { - wide_store_load_roundtrip!(u16, 16, uint16x4x4_t, vst1_u16_x4, vld1_u16_x4); + test_vld1q_f16_x2(f16, 16, float16x8x2_t, vst1q_f16_x2, vld1q_f16_x2); + test_vld1q_f16_x3(f16, 24, float16x8x3_t, vst1q_f16_x3, vld1q_f16_x3); + test_vld1q_f16_x4(f16, 32, float16x8x4_t, vst1q_f16_x4, vld1q_f16_x4); } - #[simd_test(enable = "neon")] - unsafe fn test_vld1q_u16_x2() { - wide_store_load_roundtrip!(u16, 16, uint16x8x2_t, vst1q_u16_x2, vld1q_u16_x2); + macro_rules! wide_store_load_roundtrip_aes { + ($( $name:ident $args:tt);* $(;)?) => { + $( + #[simd_test(enable = "neon,aes")] + unsafe fn $name() { + wide_store_load_roundtrip! $args; + } + )* + }; } - #[simd_test(enable = "neon")] - unsafe fn test_vld1q_u16_x3() { - wide_store_load_roundtrip!(u16, 24, uint16x8x3_t, vst1q_u16_x3, vld1q_u16_x3); - } + wide_store_load_roundtrip_aes! { + test_vld1_p64_x2(p64, 2, poly64x1x2_t, vst1_p64_x2, vld1_p64_x2); + test_vld1_p64_x3(p64, 3, poly64x1x3_t, vst1_p64_x3, vld1_p64_x3); + test_vld1_p64_x4(p64, 4, poly64x1x4_t, vst1_p64_x4, vld1_p64_x4); - #[simd_test(enable = "neon")] - unsafe fn test_vld1q_u16_x4() { - wide_store_load_roundtrip!(u16, 32, uint16x8x4_t, vst1q_u16_x4, vld1q_u16_x4); + test_vld1q_p64_x2(p64, 4, poly64x2x2_t, vst1q_p64_x2, vld1q_p64_x2); + test_vld1q_p64_x3(p64, 6, poly64x2x3_t, vst1q_p64_x3, vld1q_p64_x3); + test_vld1q_p64_x4(p64, 8, poly64x2x4_t, vst1q_p64_x4, vld1q_p64_x4); } - #[simd_test(enable = "neon")] - unsafe fn test_vld1_u32_x2() { - wide_store_load_roundtrip!(u32, 4, uint32x2x2_t, vst1_u32_x2, vld1_u32_x2); + macro_rules! wide_store_load_roundtrip_neon { + ($( $name:ident $args:tt);* $(;)?) => { + $( + #[simd_test(enable = "neon")] + unsafe fn $name() { + wide_store_load_roundtrip! $args; + } + )* + }; } - #[simd_test(enable = "neon")] - unsafe fn test_vld1_u32_x3() { - wide_store_load_roundtrip!(u32, 6, uint32x2x3_t, vst1_u32_x3, vld1_u32_x3); - } + wide_store_load_roundtrip_neon! { + test_vld1_f32_x2(f32, 4, float32x2x2_t, vst1_f32_x2, vld1_f32_x2); + test_vld1_f32_x3(f32, 6, float32x2x3_t, vst1_f32_x3, vld1_f32_x3); + test_vld1_f32_x4(f32, 8, float32x2x4_t, vst1_f32_x4, vld1_f32_x4); - #[simd_test(enable = "neon")] - unsafe fn test_vld1_u32_x4() { - wide_store_load_roundtrip!(u32, 8, uint32x2x4_t, vst1_u32_x4, vld1_u32_x4); - } + test_vld1q_f32_x2(f32, 8, float32x4x2_t, vst1q_f32_x2, vld1q_f32_x2); + test_vld1q_f32_x3(f32, 12, float32x4x3_t, vst1q_f32_x3, vld1q_f32_x3); + test_vld1q_f32_x4(f32, 16, float32x4x4_t, vst1q_f32_x4, vld1q_f32_x4); - #[simd_test(enable = "neon")] - unsafe fn test_vld1q_u32_x2() { - wide_store_load_roundtrip!(u32, 8, uint32x4x2_t, vst1q_u32_x2, vld1q_u32_x2); - } + test_vld1_s8_x2(i8, 16, int8x8x2_t, vst1_s8_x2, vld1_s8_x2); + test_vld1_s8_x3(i8, 24, int8x8x3_t, vst1_s8_x3, vld1_s8_x3); + test_vld1_s8_x4(i8, 32, int8x8x4_t, vst1_s8_x4, vld1_s8_x4); - #[simd_test(enable = "neon")] - unsafe fn test_vld1q_u32_x3() { - wide_store_load_roundtrip!(u32, 12, uint32x4x3_t, vst1q_u32_x3, vld1q_u32_x3); - } + test_vld1q_s8_x2(i8, 32, int8x16x2_t, vst1q_s8_x2, vld1q_s8_x2); + test_vld1q_s8_x3(i8, 48, int8x16x3_t, vst1q_s8_x3, vld1q_s8_x3); + test_vld1q_s8_x4(i8, 64, int8x16x4_t, vst1q_s8_x4, vld1q_s8_x4); - #[simd_test(enable = "neon")] - unsafe fn test_vld1q_u32_x4() { - wide_store_load_roundtrip!(u32, 16, uint32x4x4_t, vst1q_u32_x4, vld1q_u32_x4); - } + test_vld1_s16_x2(i16, 8, int16x4x2_t, vst1_s16_x2, vld1_s16_x2); + test_vld1_s16_x3(i16, 12, int16x4x3_t, vst1_s16_x3, vld1_s16_x3); + test_vld1_s16_x4(i16, 16, int16x4x4_t, vst1_s16_x4, vld1_s16_x4); - #[simd_test(enable = "neon")] - unsafe fn test_vld1_u64_x2() { - wide_store_load_roundtrip!(u64, 2, uint64x1x2_t, vst1_u64_x2, vld1_u64_x2); - } + test_vld1q_s16_x2(i16, 16, int16x8x2_t, vst1q_s16_x2, vld1q_s16_x2); + test_vld1q_s16_x3(i16, 24, int16x8x3_t, vst1q_s16_x3, vld1q_s16_x3); + test_vld1q_s16_x4(i16, 32, int16x8x4_t, vst1q_s16_x4, vld1q_s16_x4); - #[simd_test(enable = "neon")] - unsafe fn test_vld1_u64_x3() { - wide_store_load_roundtrip!(u64, 3, uint64x1x3_t, vst1_u64_x3, vld1_u64_x3); - } + test_vld1_s32_x2(i32, 4, int32x2x2_t, vst1_s32_x2, vld1_s32_x2); + test_vld1_s32_x3(i32, 6, int32x2x3_t, vst1_s32_x3, vld1_s32_x3); + test_vld1_s32_x4(i32, 8, int32x2x4_t, vst1_s32_x4, vld1_s32_x4); - #[simd_test(enable = "neon")] - unsafe fn test_vld1_u64_x4() { - wide_store_load_roundtrip!(u64, 4, uint64x1x4_t, vst1_u64_x4, vld1_u64_x4); - } + test_vld1q_s32_x2(i32, 8, int32x4x2_t, vst1q_s32_x2, vld1q_s32_x2); + test_vld1q_s32_x3(i32, 12, int32x4x3_t, vst1q_s32_x3, vld1q_s32_x3); + test_vld1q_s32_x4(i32, 16, int32x4x4_t, vst1q_s32_x4, vld1q_s32_x4); - #[simd_test(enable = "neon")] - unsafe fn test_vld1q_u64_x2() { - wide_store_load_roundtrip!(u64, 4, uint64x2x2_t, vst1q_u64_x2, vld1q_u64_x2); - } + test_vld1_s64_x2(i64, 2, int64x1x2_t, vst1_s64_x2, vld1_s64_x2); + test_vld1_s64_x3(i64, 3, int64x1x3_t, vst1_s64_x3, vld1_s64_x3); + test_vld1_s64_x4(i64, 4, int64x1x4_t, vst1_s64_x4, vld1_s64_x4); - #[simd_test(enable = "neon")] - unsafe fn test_vld1q_u64_x3() { - wide_store_load_roundtrip!(u64, 6, uint64x2x3_t, vst1q_u64_x3, vld1q_u64_x3); - } + test_vld1q_s64_x2(i64, 4, int64x2x2_t, vst1q_s64_x2, vld1q_s64_x2); + test_vld1q_s64_x3(i64, 6, int64x2x3_t, vst1q_s64_x3, vld1q_s64_x3); + test_vld1q_s64_x4(i64, 8, int64x2x4_t, vst1q_s64_x4, vld1q_s64_x4); - #[simd_test(enable = "neon")] - unsafe fn test_vld1q_u64_x4() { - wide_store_load_roundtrip!(u64, 8, uint64x2x4_t, vst1q_u64_x4, vld1q_u64_x4); - } + test_vld1_u8_x2(u8, 16, uint8x8x2_t, vst1_u8_x2, vld1_u8_x2); + test_vld1_u8_x3(u8, 24, uint8x8x3_t, vst1_u8_x3, vld1_u8_x3); + test_vld1_u8_x4(u8, 32, uint8x8x4_t, vst1_u8_x4, vld1_u8_x4); - #[simd_test(enable = "neon")] - unsafe fn test_vld1_p8_x2() { - wide_store_load_roundtrip!(p8, 16, poly8x8x2_t, vst1_p8_x2, vld1_p8_x2); - } + test_vld1q_u8_x2(u8, 32, uint8x16x2_t, vst1q_u8_x2, vld1q_u8_x2); + test_vld1q_u8_x3(u8, 48, uint8x16x3_t, vst1q_u8_x3, vld1q_u8_x3); + test_vld1q_u8_x4(u8, 64, uint8x16x4_t, vst1q_u8_x4, vld1q_u8_x4); - #[simd_test(enable = "neon")] - unsafe fn test_vld1_p8_x3() { - wide_store_load_roundtrip!(p8, 24, poly8x8x3_t, vst1_p8_x3, vld1_p8_x3); - } + test_vld1_u16_x2(u16, 8, uint16x4x2_t, vst1_u16_x2, vld1_u16_x2); + test_vld1_u16_x3(u16, 12, uint16x4x3_t, vst1_u16_x3, vld1_u16_x3); + test_vld1_u16_x4(u16, 16, uint16x4x4_t, vst1_u16_x4, vld1_u16_x4); - #[simd_test(enable = "neon")] - unsafe fn test_vld1_p8_x4() { - wide_store_load_roundtrip!(p8, 32, poly8x8x4_t, vst1_p8_x4, vld1_p8_x4); - } + test_vld1q_u16_x2(u16, 16, uint16x8x2_t, vst1q_u16_x2, vld1q_u16_x2); + test_vld1q_u16_x3(u16, 24, uint16x8x3_t, vst1q_u16_x3, vld1q_u16_x3); + test_vld1q_u16_x4(u16, 32, uint16x8x4_t, vst1q_u16_x4, vld1q_u16_x4); - #[simd_test(enable = "neon")] - unsafe fn test_vld1q_p8_x2() { - wide_store_load_roundtrip!(p8, 32, poly8x16x2_t, vst1q_p8_x2, vld1q_p8_x2); - } + test_vld1_u32_x2(u32, 4, uint32x2x2_t, vst1_u32_x2, vld1_u32_x2); + test_vld1_u32_x3(u32, 6, uint32x2x3_t, vst1_u32_x3, vld1_u32_x3); + test_vld1_u32_x4(u32, 8, uint32x2x4_t, vst1_u32_x4, vld1_u32_x4); - #[simd_test(enable = "neon")] - unsafe fn test_vld1q_p8_x3() { - wide_store_load_roundtrip!(p8, 48, poly8x16x3_t, vst1q_p8_x3, vld1q_p8_x3); - } + test_vld1q_u32_x2(u32, 8, uint32x4x2_t, vst1q_u32_x2, vld1q_u32_x2); + test_vld1q_u32_x3(u32, 12, uint32x4x3_t, vst1q_u32_x3, vld1q_u32_x3); + test_vld1q_u32_x4(u32, 16, uint32x4x4_t, vst1q_u32_x4, vld1q_u32_x4); - #[simd_test(enable = "neon")] - unsafe fn test_vld1q_p8_x4() { - wide_store_load_roundtrip!(p8, 64, poly8x16x4_t, vst1q_p8_x4, vld1q_p8_x4); - } + test_vld1_u64_x2(u64, 2, uint64x1x2_t, vst1_u64_x2, vld1_u64_x2); + test_vld1_u64_x3(u64, 3, uint64x1x3_t, vst1_u64_x3, vld1_u64_x3); + test_vld1_u64_x4(u64, 4, uint64x1x4_t, vst1_u64_x4, vld1_u64_x4); - #[simd_test(enable = "neon")] - unsafe fn test_vld1_p16_x2() { - wide_store_load_roundtrip!(p16, 8, poly16x4x2_t, vst1_p16_x2, vld1_p16_x2); - } + test_vld1q_u64_x2(u64, 4, uint64x2x2_t, vst1q_u64_x2, vld1q_u64_x2); + test_vld1q_u64_x3(u64, 6, uint64x2x3_t, vst1q_u64_x3, vld1q_u64_x3); + test_vld1q_u64_x4(u64, 8, uint64x2x4_t, vst1q_u64_x4, vld1q_u64_x4); - #[simd_test(enable = "neon")] - unsafe fn test_vld1_p16_x3() { - wide_store_load_roundtrip!(p16, 12, poly16x4x3_t, vst1_p16_x3, vld1_p16_x3); - } + test_vld1_p8_x2(p8, 16, poly8x8x2_t, vst1_p8_x2, vld1_p8_x2); + test_vld1_p8_x3(p8, 24, poly8x8x3_t, vst1_p8_x3, vld1_p8_x3); + test_vld1_p8_x4(p8, 32, poly8x8x4_t, vst1_p8_x4, vld1_p8_x4); - #[simd_test(enable = "neon")] - unsafe fn test_vld1_p16_x4() { - wide_store_load_roundtrip!(p16, 16, poly16x4x4_t, vst1_p16_x4, vld1_p16_x4); - } + test_vld1q_p8_x2(p8, 32, poly8x16x2_t, vst1q_p8_x2, vld1q_p8_x2); + test_vld1q_p8_x3(p8, 48, poly8x16x3_t, vst1q_p8_x3, vld1q_p8_x3); + test_vld1q_p8_x4(p8, 64, poly8x16x4_t, vst1q_p8_x4, vld1q_p8_x4); - #[simd_test(enable = "neon")] - unsafe fn test_vld1q_p16_x2() { - wide_store_load_roundtrip!(p16, 16, poly16x8x2_t, vst1q_p16_x2, vld1q_p16_x2); - } + test_vld1_p16_x2(p16, 8, poly16x4x2_t, vst1_p16_x2, vld1_p16_x2); + test_vld1_p16_x3(p16, 12, poly16x4x3_t, vst1_p16_x3, vld1_p16_x3); + test_vld1_p16_x4(p16, 16, poly16x4x4_t, vst1_p16_x4, vld1_p16_x4); - #[simd_test(enable = "neon")] - unsafe fn test_vld1q_p16_x3() { - wide_store_load_roundtrip!(p16, 24, poly16x8x3_t, vst1q_p16_x3, vld1q_p16_x3); - } - - #[simd_test(enable = "neon")] - unsafe fn test_vld1q_p16_x4() { - wide_store_load_roundtrip!(p16, 32, poly16x8x4_t, vst1q_p16_x4, vld1q_p16_x4); + test_vld1q_p16_x2(p16, 16, poly16x8x2_t, vst1q_p16_x2, vld1q_p16_x2); + test_vld1q_p16_x3(p16, 24, poly16x8x3_t, vst1q_p16_x3, vld1q_p16_x3); + test_vld1q_p16_x4(p16, 32, poly16x8x4_t, vst1q_p16_x4, vld1q_p16_x4); } }