|
|
@@ -1,4 +1,4 @@
|
|
|
-/* Copyright (c) 2017-2018 Evan Nemerson <[email protected]>
|
|
|
+/* SPDX-License-Identifier: MIT
|
|
|
*
|
|
|
* Permission is hereby granted, free of charge, to any person
|
|
|
* obtaining a copy of this software and associated documentation
|
|
|
@@ -19,64 +19,71 @@
|
|
|
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
|
|
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
|
* SOFTWARE.
|
|
|
+ *
|
|
|
+ * Copyright:
|
|
|
+ * 2017-2020 Evan Nemerson <[email protected]>
|
|
|
*/
|
|
|
|
|
|
-#if !defined(SIMDE__MMX_H)
|
|
|
-#if !defined(SIMDE__MMX_H)
|
|
|
-#define SIMDE__MMX_H
|
|
|
-#endif
|
|
|
+#if !defined(SIMDE_X86_MMX_H)
|
|
|
+#define SIMDE_X86_MMX_H
|
|
|
+
|
|
|
#include "simde-common.h"
|
|
|
|
|
|
-#if defined(SIMDE_MMX_FORCE_NATIVE)
|
|
|
-#define SIMDE_MMX_NATIVE
|
|
|
-#elif defined(__MMX__) && !defined(SIMDE_MMX_NO_NATIVE) && \
|
|
|
- !defined(SIMDE_NO_NATIVE)
|
|
|
-#define SIMDE_MMX_NATIVE
|
|
|
-#elif defined(__ARM_NEON) && !defined(SIMDE_MMX_NO_NEON) && \
|
|
|
- !defined(SIMDE_NO_NEON)
|
|
|
-#define SIMDE_MMX_NEON
|
|
|
+#if !defined(SIMDE_X86_MMX_NATIVE) && defined(SIMDE_ENABLE_NATIVE_ALIASES)
|
|
|
+#define SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES
|
|
|
+#endif
|
|
|
+
|
|
|
+HEDLEY_DIAGNOSTIC_PUSH
|
|
|
+SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
|
|
|
+
|
|
|
+#if defined(SIMDE_X86_MMX_NATIVE)
|
|
|
+#define SIMDE_X86_MMX_USE_NATIVE_TYPE
|
|
|
+#elif defined(SIMDE_X86_SSE_NATIVE)
|
|
|
+#define SIMDE_X86_MMX_USE_NATIVE_TYPE
|
|
|
#endif
|
|
|
|
|
|
-#if defined(SIMDE_MMX_NATIVE)
|
|
|
+#if defined(SIMDE_X86_MMX_USE_NATIVE_TYPE)
|
|
|
#include <mmintrin.h>
|
|
|
-#else
|
|
|
-#if defined(SIMDE_MMX_NEON)
|
|
|
+#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
|
|
#include <arm_neon.h>
|
|
|
#endif
|
|
|
-#endif
|
|
|
+
|
|
|
#include <stdint.h>
|
|
|
#include <limits.h>
|
|
|
-#include <stdlib.h>
|
|
|
-#include <string.h>
|
|
|
|
|
|
-SIMDE__BEGIN_DECLS
|
|
|
+SIMDE_BEGIN_DECLS_
|
|
|
|
|
|
typedef union {
|
|
|
-#if defined(SIMDE__ENABLE_GCC_VEC_EXT)
|
|
|
- int8_t i8 __attribute__((__vector_size__(8), __may_alias__));
|
|
|
- int16_t i16 __attribute__((__vector_size__(8), __may_alias__));
|
|
|
- int32_t i32 __attribute__((__vector_size__(8), __may_alias__));
|
|
|
- int64_t i64 __attribute__((__vector_size__(8), __may_alias__));
|
|
|
- uint8_t u8 __attribute__((__vector_size__(8), __may_alias__));
|
|
|
- uint16_t u16 __attribute__((__vector_size__(8), __may_alias__));
|
|
|
- uint32_t u32 __attribute__((__vector_size__(8), __may_alias__));
|
|
|
- uint64_t u64 __attribute__((__vector_size__(8), __may_alias__));
|
|
|
- simde_float32 f32 __attribute__((__vector_size__(8), __may_alias__));
|
|
|
-#else
|
|
|
- int8_t i8[8];
|
|
|
- int16_t i16[4];
|
|
|
- int32_t i32[2];
|
|
|
- int64_t i64[1];
|
|
|
- uint8_t u8[8];
|
|
|
- uint16_t u16[4];
|
|
|
- uint32_t u32[2];
|
|
|
- uint64_t u64[1];
|
|
|
- simde_float32 f32[2];
|
|
|
-#endif
|
|
|
-
|
|
|
-#if defined(SIMDE_MMX_NATIVE)
|
|
|
+#if defined(SIMDE_VECTOR_SUBSCRIPT)
|
|
|
+ SIMDE_ALIGN(8) int8_t i8 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS;
|
|
|
+ SIMDE_ALIGN(8) int16_t i16 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS;
|
|
|
+ SIMDE_ALIGN(8) int32_t i32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS;
|
|
|
+ SIMDE_ALIGN(8) int64_t i64 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS;
|
|
|
+ SIMDE_ALIGN(8) uint8_t u8 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS;
|
|
|
+ SIMDE_ALIGN(8) uint16_t u16 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS;
|
|
|
+ SIMDE_ALIGN(8) uint32_t u32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS;
|
|
|
+ SIMDE_ALIGN(8) uint64_t u64 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS;
|
|
|
+ SIMDE_ALIGN(8) simde_float32 f32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS;
|
|
|
+ SIMDE_ALIGN(8) int_fast32_t i32f SIMDE_VECTOR(8) SIMDE_MAY_ALIAS;
|
|
|
+ SIMDE_ALIGN(8) uint_fast32_t u32f SIMDE_VECTOR(8) SIMDE_MAY_ALIAS;
|
|
|
+#else
|
|
|
+ SIMDE_ALIGN(8) int8_t i8[8];
|
|
|
+ SIMDE_ALIGN(8) int16_t i16[4];
|
|
|
+ SIMDE_ALIGN(8) int32_t i32[2];
|
|
|
+ SIMDE_ALIGN(8) int64_t i64[1];
|
|
|
+ SIMDE_ALIGN(8) uint8_t u8[8];
|
|
|
+ SIMDE_ALIGN(8) uint16_t u16[4];
|
|
|
+ SIMDE_ALIGN(8) uint32_t u32[2];
|
|
|
+ SIMDE_ALIGN(8) uint64_t u64[1];
|
|
|
+ SIMDE_ALIGN(8) simde_float32 f32[2];
|
|
|
+ SIMDE_ALIGN(8) int_fast32_t i32f[8 / sizeof(int_fast32_t)];
|
|
|
+ SIMDE_ALIGN(8) uint_fast32_t u32f[8 / sizeof(uint_fast32_t)];
|
|
|
+#endif
|
|
|
+
|
|
|
+#if defined(SIMDE_X86_MMX_USE_NATIVE_TYPE)
|
|
|
__m64 n;
|
|
|
-#elif defined(SIMDE_MMX_NEON)
|
|
|
+#endif
|
|
|
+#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
|
|
int8x8_t neon_i8;
|
|
|
int16x4_t neon_i16;
|
|
|
int32x2_t neon_i32;
|
|
|
@@ -87,1270 +94,2177 @@ typedef union {
|
|
|
uint64x1_t neon_u64;
|
|
|
float32x2_t neon_f32;
|
|
|
#endif
|
|
|
-} simde__m64;
|
|
|
+} simde__m64_private;
|
|
|
+
|
|
|
+#if defined(SIMDE_X86_MMX_USE_NATIVE_TYPE)
|
|
|
+typedef __m64 simde__m64;
|
|
|
+#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
|
|
+typedef int32x2_t simde__m64;
|
|
|
+#elif defined(SIMDE_VECTOR_SUBSCRIPT)
|
|
|
+typedef int32_t simde__m64 SIMDE_ALIGN(8) SIMDE_VECTOR(8) SIMDE_MAY_ALIAS;
|
|
|
+#else
|
|
|
+typedef simde__m64_private simde__m64;
|
|
|
+#endif
|
|
|
+
|
|
|
+#if !defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) && \
|
|
|
+ defined(SIMDE_ENABLE_NATIVE_ALIASES)
|
|
|
+#define SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES
|
|
|
+typedef simde__m64 __m64;
|
|
|
+#endif
|
|
|
|
|
|
-#if defined(SIMDE_MMX_NATIVE)
|
|
|
-HEDLEY_STATIC_ASSERT(sizeof(__m64) == sizeof(simde__m64),
|
|
|
- "__m64 size doesn't match simde__m64 size");
|
|
|
-SIMDE__FUNCTION_ATTRIBUTES simde__m64 SIMDE__M64_C(__m64 v)
|
|
|
+HEDLEY_STATIC_ASSERT(8 == sizeof(simde__m64), "__m64 size incorrect");
|
|
|
+HEDLEY_STATIC_ASSERT(8 == sizeof(simde__m64_private), "__m64 size incorrect");
|
|
|
+#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF)
|
|
|
+HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m64) == 8,
|
|
|
+ "simde__m64 is not 8-byte aligned");
|
|
|
+HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m64_private) == 8,
|
|
|
+ "simde__m64_private is not 8-byte aligned");
|
|
|
+#endif
|
|
|
+
|
|
|
+SIMDE_FUNCTION_ATTRIBUTES
|
|
|
+simde__m64 simde__m64_from_private(simde__m64_private v)
|
|
|
{
|
|
|
simde__m64 r;
|
|
|
- r.n = v;
|
|
|
+ simde_memcpy(&r, &v, sizeof(r));
|
|
|
return r;
|
|
|
}
|
|
|
-#elif defined(SIMDE_MMX_NEON)
|
|
|
-#define SIMDE__M64_NEON_C(T, expr) \
|
|
|
- (simde__m64) { .neon_##T = (expr) }
|
|
|
-#endif
|
|
|
-HEDLEY_STATIC_ASSERT(8 == sizeof(simde__m64), "__m64 size incorrect");
|
|
|
|
|
|
-SIMDE__FUNCTION_ATTRIBUTES
|
|
|
+SIMDE_FUNCTION_ATTRIBUTES
|
|
|
+simde__m64_private simde__m64_to_private(simde__m64 v)
|
|
|
+{
|
|
|
+ simde__m64_private r;
|
|
|
+ simde_memcpy(&r, &v, sizeof(r));
|
|
|
+ return r;
|
|
|
+}
|
|
|
+
|
|
|
+#define SIMDE_X86_GENERATE_CONVERSION_FUNCTION(simde_type, source_type, isax, \
|
|
|
+ fragment) \
|
|
|
+ SIMDE_FUNCTION_ATTRIBUTES \
|
|
|
+ simde__##simde_type simde__##simde_type##_from_##isax##_##fragment( \
|
|
|
+ source_type value) \
|
|
|
+ { \
|
|
|
+ simde__##simde_type##_private r_; \
|
|
|
+ r_.isax##_##fragment = value; \
|
|
|
+ return simde__##simde_type##_from_private(r_); \
|
|
|
+ } \
|
|
|
+ \
|
|
|
+ SIMDE_FUNCTION_ATTRIBUTES \
|
|
|
+ source_type simde__##simde_type##_to_##isax##_##fragment( \
|
|
|
+ simde__##simde_type value) \
|
|
|
+ { \
|
|
|
+ simde__##simde_type##_private r_ = \
|
|
|
+ simde__##simde_type##_to_private(value); \
|
|
|
+ return r_.isax##_##fragment; \
|
|
|
+ }
|
|
|
+
|
|
|
+#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
|
|
+SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int8x8_t, neon, i8)
|
|
|
+SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int16x4_t, neon, i16)
|
|
|
+SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int32x2_t, neon, i32)
|
|
|
+SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int64x1_t, neon, i64)
|
|
|
+SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint8x8_t, neon, u8)
|
|
|
+SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint16x4_t, neon, u16)
|
|
|
+SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint32x2_t, neon, u32)
|
|
|
+SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint64x1_t, neon, u64)
|
|
|
+SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, float32x2_t, neon, f32)
|
|
|
+#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */
|
|
|
+
|
|
|
+SIMDE_FUNCTION_ATTRIBUTES
|
|
|
simde__m64 simde_mm_add_pi8(simde__m64 a, simde__m64 b)
|
|
|
{
|
|
|
-#if defined(SIMDE_MMX_NATIVE)
|
|
|
- return SIMDE__M64_C(_mm_add_pi8(a.n, b.n));
|
|
|
+#if defined(SIMDE_X86_MMX_NATIVE)
|
|
|
+ return _mm_add_pi8(a, b);
|
|
|
#else
|
|
|
- simde__m64 r;
|
|
|
- SIMDE__VECTORIZE
|
|
|
- for (size_t i = 0; i < 8; i++) {
|
|
|
- r.i8[i] = a.i8[i] + b.i8[i];
|
|
|
+ simde__m64_private r_;
|
|
|
+ simde__m64_private a_ = simde__m64_to_private(a);
|
|
|
+ simde__m64_private b_ = simde__m64_to_private(b);
|
|
|
+
|
|
|
+#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
|
|
+ r_.neon_i8 = vadd_s8(a_.neon_i8, b_.neon_i8);
|
|
|
+#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
|
|
+ r_.i8 = a_.i8 + b_.i8;
|
|
|
+#else
|
|
|
+ SIMDE_VECTORIZE
|
|
|
+ for (size_t i = 0; i < (sizeof(r_.i8) / sizeof(r_.i8[0])); i++) {
|
|
|
+ r_.i8[i] = a_.i8[i] + b_.i8[i];
|
|
|
}
|
|
|
- return r;
|
|
|
+#endif
|
|
|
+
|
|
|
+ return simde__m64_from_private(r_);
|
|
|
#endif
|
|
|
}
|
|
|
#define simde_m_paddb(a, b) simde_mm_add_pi8(a, b)
|
|
|
+#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
|
|
|
+#define _mm_add_pi8(a, b) simde_mm_add_pi8(a, b)
|
|
|
+#define _m_paddb(a, b) simde_m_paddb(a, b)
|
|
|
+#endif
|
|
|
|
|
|
-SIMDE__FUNCTION_ATTRIBUTES
|
|
|
+SIMDE_FUNCTION_ATTRIBUTES
|
|
|
simde__m64 simde_mm_add_pi16(simde__m64 a, simde__m64 b)
|
|
|
{
|
|
|
-#if defined(SIMDE_MMX_NATIVE)
|
|
|
- return SIMDE__M64_C(_mm_add_pi16(a.n, b.n));
|
|
|
+#if defined(SIMDE_X86_MMX_NATIVE)
|
|
|
+ return _mm_add_pi16(a, b);
|
|
|
#else
|
|
|
- simde__m64 r;
|
|
|
- SIMDE__VECTORIZE
|
|
|
- for (size_t i = 0; i < (8 / sizeof(int16_t)); i++) {
|
|
|
- r.i16[i] = a.i16[i] + b.i16[i];
|
|
|
+ simde__m64_private r_;
|
|
|
+ simde__m64_private a_ = simde__m64_to_private(a);
|
|
|
+ simde__m64_private b_ = simde__m64_to_private(b);
|
|
|
+
|
|
|
+#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
|
|
+ r_.neon_i16 = vadd_s16(a_.neon_i16, b_.neon_i16);
|
|
|
+#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
|
|
+ r_.i16 = a_.i16 + b_.i16;
|
|
|
+#else
|
|
|
+ SIMDE_VECTORIZE
|
|
|
+ for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) {
|
|
|
+ r_.i16[i] = a_.i16[i] + b_.i16[i];
|
|
|
}
|
|
|
- return r;
|
|
|
+#endif
|
|
|
+
|
|
|
+ return simde__m64_from_private(r_);
|
|
|
#endif
|
|
|
}
|
|
|
#define simde_m_paddw(a, b) simde_mm_add_pi16(a, b)
|
|
|
+#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
|
|
|
+#define _mm_add_pi16(a, b) simde_mm_add_pi16(a, b)
|
|
|
+#define _m_add_paddw(a, b) simde_mm_add_pi16(a, b)
|
|
|
+#endif
|
|
|
|
|
|
-SIMDE__FUNCTION_ATTRIBUTES
|
|
|
+SIMDE_FUNCTION_ATTRIBUTES
|
|
|
simde__m64 simde_mm_add_pi32(simde__m64 a, simde__m64 b)
|
|
|
{
|
|
|
-#if defined(SIMDE_MMX_NATIVE)
|
|
|
- return SIMDE__M64_C(_mm_add_pi32(a.n, b.n));
|
|
|
+#if defined(SIMDE_X86_MMX_NATIVE)
|
|
|
+ return _mm_add_pi32(a, b);
|
|
|
#else
|
|
|
- simde__m64 r;
|
|
|
- SIMDE__VECTORIZE
|
|
|
- for (size_t i = 0; i < (8 / sizeof(int32_t)); i++) {
|
|
|
- r.i32[i] = a.i32[i] + b.i32[i];
|
|
|
+ simde__m64_private r_;
|
|
|
+ simde__m64_private a_ = simde__m64_to_private(a);
|
|
|
+ simde__m64_private b_ = simde__m64_to_private(b);
|
|
|
+
|
|
|
+#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
|
|
+ r_.neon_i32 = vadd_s32(a_.neon_i32, b_.neon_i32);
|
|
|
+#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
|
|
+ r_.i32 = a_.i32 + b_.i32;
|
|
|
+#else
|
|
|
+ SIMDE_VECTORIZE
|
|
|
+ for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])); i++) {
|
|
|
+ r_.i32[i] = a_.i32[i] + b_.i32[i];
|
|
|
}
|
|
|
- return r;
|
|
|
+#endif
|
|
|
+
|
|
|
+ return simde__m64_from_private(r_);
|
|
|
#endif
|
|
|
}
|
|
|
#define simde_m_paddd(a, b) simde_mm_add_pi32(a, b)
|
|
|
+#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
|
|
|
+#define _mm_add_pi32(a, b) simde_mm_add_pi32(a, b)
|
|
|
+#define _m_add_paddd(a, b) simde_mm_add_pi32(a, b)
|
|
|
+#endif
|
|
|
|
|
|
-SIMDE__FUNCTION_ATTRIBUTES
|
|
|
+SIMDE_FUNCTION_ATTRIBUTES
|
|
|
simde__m64 simde_mm_adds_pi8(simde__m64 a, simde__m64 b)
|
|
|
{
|
|
|
-#if defined(SIMDE_MMX_NATIVE)
|
|
|
- return SIMDE__M64_C(_mm_adds_pi8(a.n, b.n));
|
|
|
+#if defined(SIMDE_X86_MMX_NATIVE)
|
|
|
+ return _mm_adds_pi8(a, b);
|
|
|
#else
|
|
|
- simde__m64 r;
|
|
|
- SIMDE__VECTORIZE
|
|
|
- for (int i = 0; i < 8; i++) {
|
|
|
- if ((((b.i8[i]) > 0) && ((a.i8[i]) > (INT8_MAX - (b.i8[i]))))) {
|
|
|
- r.i8[i] = INT8_MAX;
|
|
|
- } else if ((((b.i8[i]) < 0) &&
|
|
|
- ((a.i8[i]) < (INT8_MIN - (b.i8[i]))))) {
|
|
|
- r.i8[i] = INT8_MIN;
|
|
|
+ simde__m64_private r_, a_ = simde__m64_to_private(a),
|
|
|
+ b_ = simde__m64_to_private(b);
|
|
|
+
|
|
|
+#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
|
|
+ r_.neon_i8 = vqadd_s8(a_.neon_i8, b_.neon_i8);
|
|
|
+#else
|
|
|
+ SIMDE_VECTORIZE
|
|
|
+ for (size_t i = 0; i < (sizeof(r_.i8) / sizeof(r_.i8[0])); i++) {
|
|
|
+ if ((((b_.i8[i]) > 0) &&
|
|
|
+ ((a_.i8[i]) > (INT8_MAX - (b_.i8[i]))))) {
|
|
|
+ r_.i8[i] = INT8_MAX;
|
|
|
+ } else if ((((b_.i8[i]) < 0) &&
|
|
|
+ ((a_.i8[i]) < (INT8_MIN - (b_.i8[i]))))) {
|
|
|
+ r_.i8[i] = INT8_MIN;
|
|
|
} else {
|
|
|
- r.i8[i] = (a.i8[i]) + (b.i8[i]);
|
|
|
+ r_.i8[i] = (a_.i8[i]) + (b_.i8[i]);
|
|
|
}
|
|
|
}
|
|
|
- return r;
|
|
|
+#endif
|
|
|
+
|
|
|
+ return simde__m64_from_private(r_);
|
|
|
#endif
|
|
|
}
|
|
|
#define simde_m_paddsb(a, b) simde_mm_adds_pi8(a, b)
|
|
|
+#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
|
|
|
+#define _mm_adds_pi8(a, b) simde_mm_adds_pi8(a, b)
|
|
|
+#define _m_add_paddsb(a, b) simde_mm_adds_pi8(a, b)
|
|
|
+#endif
|
|
|
|
|
|
-SIMDE__FUNCTION_ATTRIBUTES
|
|
|
+SIMDE_FUNCTION_ATTRIBUTES
|
|
|
simde__m64 simde_mm_adds_pu8(simde__m64 a, simde__m64 b)
|
|
|
{
|
|
|
-#if defined(SIMDE_MMX_NATIVE)
|
|
|
- return SIMDE__M64_C(_mm_adds_pu8(a.n, b.n));
|
|
|
+#if defined(SIMDE_X86_MMX_NATIVE)
|
|
|
+ return _mm_adds_pu8(a, b);
|
|
|
#else
|
|
|
- simde__m64 r;
|
|
|
- SIMDE__VECTORIZE
|
|
|
- for (size_t i = 0; i < 8; i++) {
|
|
|
- const int32_t x = a.u8[i] + b.u8[i];
|
|
|
- if (x < 0)
|
|
|
- r.u8[i] = 0;
|
|
|
- else if (x > UINT8_MAX)
|
|
|
- r.u8[i] = UINT8_MAX;
|
|
|
+ simde__m64_private r_;
|
|
|
+ simde__m64_private a_ = simde__m64_to_private(a);
|
|
|
+ simde__m64_private b_ = simde__m64_to_private(b);
|
|
|
+
|
|
|
+#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
|
|
+ r_.neon_u8 = vqadd_u8(a_.neon_u8, b_.neon_u8);
|
|
|
+#else
|
|
|
+ SIMDE_VECTORIZE
|
|
|
+ for (size_t i = 0; i < (sizeof(r_.u8) / sizeof(r_.u8[0])); i++) {
|
|
|
+ const uint_fast16_t x =
|
|
|
+ HEDLEY_STATIC_CAST(uint_fast16_t, a_.u8[i]) +
|
|
|
+ HEDLEY_STATIC_CAST(uint_fast16_t, b_.u8[i]);
|
|
|
+ if (x > UINT8_MAX)
|
|
|
+ r_.u8[i] = UINT8_MAX;
|
|
|
else
|
|
|
- r.u8[i] = (uint8_t)x;
|
|
|
+ r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, x);
|
|
|
}
|
|
|
- return r;
|
|
|
+#endif
|
|
|
+
|
|
|
+ return simde__m64_from_private(r_);
|
|
|
#endif
|
|
|
}
|
|
|
#define simde_m_paddusb(a, b) simde_mm_adds_pu8(a, b)
|
|
|
+#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
|
|
|
+#define _mm_adds_pu8(a, b) simde_mm_adds_pu8(a, b)
|
|
|
+#define _m_paddusb(a, b) simde_mm_adds_pu8(a, b)
|
|
|
+#endif
|
|
|
|
|
|
-SIMDE__FUNCTION_ATTRIBUTES
|
|
|
+SIMDE_FUNCTION_ATTRIBUTES
|
|
|
simde__m64 simde_mm_adds_pi16(simde__m64 a, simde__m64 b)
|
|
|
{
|
|
|
-#if defined(SIMDE_MMX_NATIVE)
|
|
|
- return SIMDE__M64_C(_mm_adds_pi16(a.n, b.n));
|
|
|
+#if defined(SIMDE_X86_MMX_NATIVE)
|
|
|
+ return _mm_adds_pi16(a, b);
|
|
|
#else
|
|
|
- simde__m64 r;
|
|
|
- SIMDE__VECTORIZE
|
|
|
- for (int i = 0; i < 4; i++) {
|
|
|
- if ((((b.i16[i]) > 0) &&
|
|
|
- ((a.i16[i]) > (INT16_MAX - (b.i16[i]))))) {
|
|
|
- r.i16[i] = INT16_MAX;
|
|
|
- } else if ((((b.i16[i]) < 0) &&
|
|
|
- ((a.i16[i]) < (SHRT_MIN - (b.i16[i]))))) {
|
|
|
- r.i16[i] = SHRT_MIN;
|
|
|
+ simde__m64_private r_;
|
|
|
+ simde__m64_private a_ = simde__m64_to_private(a);
|
|
|
+ simde__m64_private b_ = simde__m64_to_private(b);
|
|
|
+
|
|
|
+#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
|
|
+ r_.neon_i16 = vqadd_s16(a_.neon_i16, b_.neon_i16);
|
|
|
+#else
|
|
|
+ SIMDE_VECTORIZE
|
|
|
+ for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) {
|
|
|
+ if ((((b_.i16[i]) > 0) &&
|
|
|
+ ((a_.i16[i]) > (INT16_MAX - (b_.i16[i]))))) {
|
|
|
+ r_.i16[i] = INT16_MAX;
|
|
|
+ } else if ((((b_.i16[i]) < 0) &&
|
|
|
+ ((a_.i16[i]) < (SHRT_MIN - (b_.i16[i]))))) {
|
|
|
+ r_.i16[i] = SHRT_MIN;
|
|
|
} else {
|
|
|
- r.i16[i] = (a.i16[i]) + (b.i16[i]);
|
|
|
+ r_.i16[i] = (a_.i16[i]) + (b_.i16[i]);
|
|
|
}
|
|
|
}
|
|
|
- return r;
|
|
|
+#endif
|
|
|
+
|
|
|
+ return simde__m64_from_private(r_);
|
|
|
#endif
|
|
|
}
|
|
|
#define simde_m_paddsw(a, b) simde_mm_adds_pi16(a, b)
|
|
|
+#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
|
|
|
+#define _mm_adds_pi16(a, b) simde_mm_adds_pi16(a, b)
|
|
|
+#define _m_paddsw(a, b) simde_mm_adds_pi16(a, b)
|
|
|
+#endif
|
|
|
|
|
|
-SIMDE__FUNCTION_ATTRIBUTES
|
|
|
+SIMDE_FUNCTION_ATTRIBUTES
|
|
|
simde__m64 simde_mm_adds_pu16(simde__m64 a, simde__m64 b)
|
|
|
{
|
|
|
-#if defined(SIMDE_MMX_NATIVE)
|
|
|
- return SIMDE__M64_C(_mm_adds_pu16(a.n, b.n));
|
|
|
+#if defined(SIMDE_X86_MMX_NATIVE)
|
|
|
+ return _mm_adds_pu16(a, b);
|
|
|
#else
|
|
|
- simde__m64 r;
|
|
|
- SIMDE__VECTORIZE
|
|
|
- for (size_t i = 0; i < (8 / sizeof(int16_t)); i++) {
|
|
|
- const uint32_t x = a.u16[i] + b.u16[i];
|
|
|
+ simde__m64_private r_;
|
|
|
+ simde__m64_private a_ = simde__m64_to_private(a);
|
|
|
+ simde__m64_private b_ = simde__m64_to_private(b);
|
|
|
+
|
|
|
+#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
|
|
+ r_.neon_u16 = vqadd_u16(a_.neon_u16, b_.neon_u16);
|
|
|
+#else
|
|
|
+ SIMDE_VECTORIZE
|
|
|
+ for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) {
|
|
|
+ const uint32_t x = a_.u16[i] + b_.u16[i];
|
|
|
if (x > UINT16_MAX)
|
|
|
- r.u16[i] = UINT16_MAX;
|
|
|
+ r_.u16[i] = UINT16_MAX;
|
|
|
else
|
|
|
- r.u16[i] = (uint16_t)x;
|
|
|
+ r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, x);
|
|
|
}
|
|
|
- return r;
|
|
|
+#endif
|
|
|
+
|
|
|
+ return simde__m64_from_private(r_);
|
|
|
#endif
|
|
|
}
|
|
|
#define simde_m_paddusw(a, b) simde_mm_adds_pu16(a, b)
|
|
|
+#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
|
|
|
+#define _mm_adds_pu16(a, b) simde_mm_adds_pu16(a, b)
|
|
|
+#define _m_paddusw(a, b) simde_mm_adds_pu16(a, b)
|
|
|
+#endif
|
|
|
|
|
|
-SIMDE__FUNCTION_ATTRIBUTES
|
|
|
+SIMDE_FUNCTION_ATTRIBUTES
|
|
|
simde__m64 simde_mm_and_si64(simde__m64 a, simde__m64 b)
|
|
|
{
|
|
|
-#if defined(SIMDE_MMX_NATIVE)
|
|
|
- return SIMDE__M64_C(_mm_and_si64(a.n, b.n));
|
|
|
+#if defined(SIMDE_X86_MMX_NATIVE)
|
|
|
+ return _mm_and_si64(a, b);
|
|
|
#else
|
|
|
- simde__m64 r;
|
|
|
- r.i64[0] = a.i64[0] & b.i64[0];
|
|
|
- return r;
|
|
|
+ simde__m64_private r_;
|
|
|
+ simde__m64_private a_ = simde__m64_to_private(a);
|
|
|
+ simde__m64_private b_ = simde__m64_to_private(b);
|
|
|
+
|
|
|
+#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
|
|
+ r_.neon_i32 = vand_s32(a_.neon_i32, b_.neon_i32);
|
|
|
+#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
|
|
+ r_.i64 = a_.i64 & b_.i64;
|
|
|
+#else
|
|
|
+ r_.i64[0] = a_.i64[0] & b_.i64[0];
|
|
|
+#endif
|
|
|
+
|
|
|
+ return simde__m64_from_private(r_);
|
|
|
#endif
|
|
|
}
|
|
|
#define simde_m_pand(a, b) simde_mm_and_si64(a, b)
|
|
|
+#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
|
|
|
+#define _mm_and_si64(a, b) simde_mm_and_si64(a, b)
|
|
|
+#define _m_pand(a, b) simde_mm_and_si64(a, b)
|
|
|
+#endif
|
|
|
|
|
|
-SIMDE__FUNCTION_ATTRIBUTES
|
|
|
+SIMDE_FUNCTION_ATTRIBUTES
|
|
|
simde__m64 simde_mm_andnot_si64(simde__m64 a, simde__m64 b)
|
|
|
{
|
|
|
-#if defined(SIMDE_MMX_NATIVE)
|
|
|
- return SIMDE__M64_C(_mm_andnot_si64(a.n, b.n));
|
|
|
+#if defined(SIMDE_X86_MMX_NATIVE)
|
|
|
+ return _mm_andnot_si64(a, b);
|
|
|
#else
|
|
|
- simde__m64 r;
|
|
|
- r.i64[0] = ~(a.i64[0]) & b.i64[0];
|
|
|
- return r;
|
|
|
+ simde__m64_private r_;
|
|
|
+ simde__m64_private a_ = simde__m64_to_private(a);
|
|
|
+ simde__m64_private b_ = simde__m64_to_private(b);
|
|
|
+
|
|
|
+#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
|
|
+ r_.neon_i32 = vbic_s32(b_.neon_i32, a_.neon_i32);
|
|
|
+#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
|
|
+ r_.i32f = ~a_.i32f & b_.i32f;
|
|
|
+#else
|
|
|
+ r_.u64[0] = (~(a_.u64[0])) & (b_.u64[0]);
|
|
|
+#endif
|
|
|
+
|
|
|
+ return simde__m64_from_private(r_);
|
|
|
#endif
|
|
|
}
|
|
|
#define simde_m_pandn(a, b) simde_mm_andnot_si64(a, b)
|
|
|
+#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
|
|
|
+#define _mm_andnot_si64(a, b) simde_mm_andnot_si64(a, b)
|
|
|
+#define _m_pandn(a, b) simde_mm_andnot_si64(a, b)
|
|
|
+#endif
|
|
|
|
|
|
-SIMDE__FUNCTION_ATTRIBUTES
|
|
|
+SIMDE_FUNCTION_ATTRIBUTES
|
|
|
simde__m64 simde_mm_cmpeq_pi8(simde__m64 a, simde__m64 b)
|
|
|
{
|
|
|
-#if defined(SIMDE_MMX_NATIVE)
|
|
|
- return SIMDE__M64_C(_mm_cmpeq_pi8(a.n, b.n));
|
|
|
+#if defined(SIMDE_X86_MMX_NATIVE)
|
|
|
+ return _mm_cmpeq_pi8(a, b);
|
|
|
#else
|
|
|
- simde__m64 r;
|
|
|
- SIMDE__VECTORIZE
|
|
|
- for (int i = 0; i < 8; i++) {
|
|
|
- r.i8[i] = (a.i8[i] == b.i8[i]) * 0xff;
|
|
|
+ simde__m64_private r_;
|
|
|
+ simde__m64_private a_ = simde__m64_to_private(a);
|
|
|
+ simde__m64_private b_ = simde__m64_to_private(b);
|
|
|
+
|
|
|
+#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
|
|
+ r_.neon_i8 = vreinterpret_s8_u8(vceq_s8(a_.neon_i8, b_.neon_i8));
|
|
|
+#else
|
|
|
+ SIMDE_VECTORIZE
|
|
|
+ for (size_t i = 0; i < (sizeof(r_.i8) / sizeof(r_.i8[0])); i++) {
|
|
|
+ r_.i8[i] = (a_.i8[i] == b_.i8[i]) ? ~INT8_C(0) : INT8_C(0);
|
|
|
}
|
|
|
- return r;
|
|
|
+#endif
|
|
|
+
|
|
|
+ return simde__m64_from_private(r_);
|
|
|
#endif
|
|
|
}
|
|
|
#define simde_m_pcmpeqb(a, b) simde_mm_cmpeq_pi8(a, b)
|
|
|
+#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
|
|
|
+#define _mm_cmpeq_pi8(a, b) simde_mm_cmpeq_pi8(a, b)
|
|
|
+#define _m_pcmpeqb(a, b) simde_mm_cmpeq_pi8(a, b)
|
|
|
+#endif
|
|
|
|
|
|
-SIMDE__FUNCTION_ATTRIBUTES
|
|
|
+SIMDE_FUNCTION_ATTRIBUTES
|
|
|
simde__m64 simde_mm_cmpeq_pi16(simde__m64 a, simde__m64 b)
|
|
|
{
|
|
|
-#if defined(SIMDE_MMX_NATIVE)
|
|
|
- return SIMDE__M64_C(_mm_cmpeq_pi16(a.n, b.n));
|
|
|
+#if defined(SIMDE_X86_MMX_NATIVE)
|
|
|
+ return _mm_cmpeq_pi16(a, b);
|
|
|
#else
|
|
|
- simde__m64 r;
|
|
|
- SIMDE__VECTORIZE
|
|
|
- for (int i = 0; i < 4; i++) {
|
|
|
- r.i16[i] = (a.i16[i] == b.i16[i]) * 0xffff;
|
|
|
+ simde__m64_private r_;
|
|
|
+ simde__m64_private a_ = simde__m64_to_private(a);
|
|
|
+ simde__m64_private b_ = simde__m64_to_private(b);
|
|
|
+
|
|
|
+#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
|
|
+ r_.neon_i16 = vreinterpret_s16_u16(vceq_s16(a_.neon_i16, b_.neon_i16));
|
|
|
+#else
|
|
|
+ SIMDE_VECTORIZE
|
|
|
+ for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) {
|
|
|
+ r_.i16[i] = (a_.i16[i] == b_.i16[i]) ? ~INT16_C(0) : INT16_C(0);
|
|
|
}
|
|
|
- return r;
|
|
|
+#endif
|
|
|
+
|
|
|
+ return simde__m64_from_private(r_);
|
|
|
#endif
|
|
|
}
|
|
|
#define simde_m_pcmpeqw(a, b) simde_mm_cmpeq_pi16(a, b)
|
|
|
+#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
|
|
|
+#define _mm_cmpeq_pi16(a, b) simde_mm_cmpeq_pi16(a, b)
|
|
|
+#define _m_pcmpeqw(a, b) simde_mm_cmpeq_pi16(a, b)
|
|
|
+#endif
|
|
|
|
|
|
-SIMDE__FUNCTION_ATTRIBUTES
|
|
|
+SIMDE_FUNCTION_ATTRIBUTES
|
|
|
simde__m64 simde_mm_cmpeq_pi32(simde__m64 a, simde__m64 b)
|
|
|
{
|
|
|
-#if defined(SIMDE_MMX_NATIVE)
|
|
|
- return SIMDE__M64_C(_mm_cmpeq_pi32(a.n, b.n));
|
|
|
+#if defined(SIMDE_X86_MMX_NATIVE)
|
|
|
+ return _mm_cmpeq_pi32(a, b);
|
|
|
#else
|
|
|
- simde__m64 r;
|
|
|
- SIMDE__VECTORIZE
|
|
|
- for (int i = 0; i < 2; i++) {
|
|
|
- r.i32[i] = (a.i32[i] == b.i32[i]) * 0xffffffff;
|
|
|
+ simde__m64_private r_;
|
|
|
+ simde__m64_private a_ = simde__m64_to_private(a);
|
|
|
+ simde__m64_private b_ = simde__m64_to_private(b);
|
|
|
+
|
|
|
+#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
|
|
+ r_.neon_i32 = vreinterpret_s32_u32(vceq_s32(a_.neon_i32, b_.neon_i32));
|
|
|
+#else
|
|
|
+ SIMDE_VECTORIZE
|
|
|
+ for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])); i++) {
|
|
|
+ r_.i32[i] = (a_.i32[i] == b_.i32[i]) ? ~INT32_C(0) : INT32_C(0);
|
|
|
}
|
|
|
- return r;
|
|
|
+#endif
|
|
|
+
|
|
|
+ return simde__m64_from_private(r_);
|
|
|
#endif
|
|
|
}
|
|
|
#define simde_m_pcmpeqd(a, b) simde_mm_cmpeq_pi32(a, b)
|
|
|
+#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
|
|
|
+#define _mm_cmpeq_pi32(a, b) simde_mm_cmpeq_pi32(a, b)
|
|
|
+#define _m_pcmpeqd(a, b) simde_mm_cmpeq_pi32(a, b)
|
|
|
+#endif
|
|
|
|
|
|
-SIMDE__FUNCTION_ATTRIBUTES
|
|
|
+SIMDE_FUNCTION_ATTRIBUTES
|
|
|
simde__m64 simde_mm_cmpgt_pi8(simde__m64 a, simde__m64 b)
|
|
|
{
|
|
|
-#if defined(SIMDE_MMX_NATIVE)
|
|
|
- return SIMDE__M64_C(_mm_cmpgt_pi8(a.n, b.n));
|
|
|
+#if defined(SIMDE_X86_MMX_NATIVE)
|
|
|
+ return _mm_cmpgt_pi8(a, b);
|
|
|
#else
|
|
|
- simde__m64 r;
|
|
|
- SIMDE__VECTORIZE
|
|
|
- for (int i = 0; i < 8; i++) {
|
|
|
- r.i8[i] = (a.i8[i] > b.i8[i]) * 0xff;
|
|
|
+ simde__m64_private r_;
|
|
|
+ simde__m64_private a_ = simde__m64_to_private(a);
|
|
|
+ simde__m64_private b_ = simde__m64_to_private(b);
|
|
|
+
|
|
|
+#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
|
|
+ r_.neon_i8 = vreinterpret_s8_u8(vcgt_s8(a_.neon_i8, b_.neon_i8));
|
|
|
+#else
|
|
|
+ SIMDE_VECTORIZE
|
|
|
+ for (size_t i = 0; i < (sizeof(r_.i8) / sizeof(r_.i8[0])); i++) {
|
|
|
+ r_.i8[i] = (a_.i8[i] > b_.i8[i]) ? ~INT8_C(0) : INT8_C(0);
|
|
|
}
|
|
|
- return r;
|
|
|
+#endif
|
|
|
+
|
|
|
+ return simde__m64_from_private(r_);
|
|
|
#endif
|
|
|
}
|
|
|
#define simde_m_pcmpgtb(a, b) simde_mm_cmpgt_pi8(a, b)
|
|
|
+#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
|
|
|
+#define _mm_cmpgt_pi8(a, b) simde_mm_cmpgt_pi8(a, b)
|
|
|
+#define _m_pcmpgtb(a, b) simde_mm_cmpgt_pi8(a, b)
|
|
|
+#endif
|
|
|
|
|
|
-SIMDE__FUNCTION_ATTRIBUTES
|
|
|
+SIMDE_FUNCTION_ATTRIBUTES
|
|
|
simde__m64 simde_mm_cmpgt_pi16(simde__m64 a, simde__m64 b)
|
|
|
{
|
|
|
-#if defined(SIMDE_MMX_NATIVE)
|
|
|
- return SIMDE__M64_C(_mm_cmpgt_pi16(a.n, b.n));
|
|
|
+#if defined(SIMDE_X86_MMX_NATIVE)
|
|
|
+ return _mm_cmpgt_pi16(a, b);
|
|
|
#else
|
|
|
- simde__m64 r;
|
|
|
- SIMDE__VECTORIZE
|
|
|
- for (int i = 0; i < 4; i++) {
|
|
|
- r.i16[i] = (a.i16[i] > b.i16[i]) * 0xffff;
|
|
|
+ simde__m64_private r_;
|
|
|
+ simde__m64_private a_ = simde__m64_to_private(a);
|
|
|
+ simde__m64_private b_ = simde__m64_to_private(b);
|
|
|
+
|
|
|
+#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
|
|
+ r_.neon_i16 = vreinterpret_s16_u16(vcgt_s16(a_.neon_i16, b_.neon_i16));
|
|
|
+#else
|
|
|
+ SIMDE_VECTORIZE
|
|
|
+ for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) {
|
|
|
+ r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? ~INT16_C(0) : INT16_C(0);
|
|
|
}
|
|
|
- return r;
|
|
|
+#endif
|
|
|
+
|
|
|
+ return simde__m64_from_private(r_);
|
|
|
#endif
|
|
|
}
|
|
|
#define simde_m_pcmpgtw(a, b) simde_mm_cmpgt_pi16(a, b)
|
|
|
+#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
|
|
|
+#define _mm_cmpgt_pi16(a, b) simde_mm_cmpgt_pi16(a, b)
|
|
|
+#define _m_pcmpgtw(a, b) simde_mm_cmpgt_pi16(a, b)
|
|
|
+#endif
|
|
|
|
|
|
-SIMDE__FUNCTION_ATTRIBUTES
|
|
|
+SIMDE_FUNCTION_ATTRIBUTES
|
|
|
simde__m64 simde_mm_cmpgt_pi32(simde__m64 a, simde__m64 b)
|
|
|
{
|
|
|
-#if defined(SIMDE_MMX_NATIVE)
|
|
|
- return SIMDE__M64_C(_mm_cmpgt_pi32(a.n, b.n));
|
|
|
+#if defined(SIMDE_X86_MMX_NATIVE)
|
|
|
+ return _mm_cmpgt_pi32(a, b);
|
|
|
#else
|
|
|
- simde__m64 r;
|
|
|
- SIMDE__VECTORIZE
|
|
|
- for (int i = 0; i < 2; i++) {
|
|
|
- r.i32[i] = (a.i32[i] > b.i32[i]) * 0xffffffff;
|
|
|
+ simde__m64_private r_;
|
|
|
+ simde__m64_private a_ = simde__m64_to_private(a);
|
|
|
+ simde__m64_private b_ = simde__m64_to_private(b);
|
|
|
+
|
|
|
+#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
|
|
+ r_.neon_i32 = vreinterpret_s32_u32(vcgt_s32(a_.neon_i32, b_.neon_i32));
|
|
|
+#else
|
|
|
+ SIMDE_VECTORIZE
|
|
|
+ for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])); i++) {
|
|
|
+ r_.i32[i] = (a_.i32[i] > b_.i32[i]) ? ~INT32_C(0) : INT32_C(0);
|
|
|
}
|
|
|
- return r;
|
|
|
+#endif
|
|
|
+
|
|
|
+ return simde__m64_from_private(r_);
|
|
|
#endif
|
|
|
}
|
|
|
#define simde_m_pcmpgtd(a, b) simde_mm_cmpgt_pi32(a, b)
|
|
|
+#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
|
|
|
+#define _mm_cmpgt_pi32(a, b) simde_mm_cmpgt_pi32(a, b)
|
|
|
+#define _m_pcmpgtd(a, b) simde_mm_cmpgt_pi32(a, b)
|
|
|
+#endif
|
|
|
|
|
|
-SIMDE__FUNCTION_ATTRIBUTES
|
|
|
+SIMDE_FUNCTION_ATTRIBUTES
|
|
|
int64_t simde_mm_cvtm64_si64(simde__m64 a)
|
|
|
{
|
|
|
-#if defined(SIMDE_MMX_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(__PGI)
|
|
|
- return _mm_cvtm64_si64(a.n);
|
|
|
+#if defined(SIMDE_X86_MMX_NATIVE) && defined(SIMDE_ARCH_AMD64) && \
|
|
|
+ !defined(__PGI)
|
|
|
+ return _mm_cvtm64_si64(a);
|
|
|
+#else
|
|
|
+ simde__m64_private a_ = simde__m64_to_private(a);
|
|
|
+
|
|
|
+#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
|
|
+ return vget_lane_s64(a_.neon_i64, 0);
|
|
|
#else
|
|
|
- return a.i64[0];
|
|
|
+ return a_.i64[0];
|
|
|
+#endif
|
|
|
#endif
|
|
|
}
|
|
|
#define simde_m_to_int64(a) simde_mm_cvtm64_si64(a)
|
|
|
+#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
|
|
|
+#define _mm_cvtm64_si64(a) simde_mm_cvtm64_si64(a)
|
|
|
+#define _m_to_int64(a) simde_mm_cvtm64_si64(a)
|
|
|
+#endif
|
|
|
|
|
|
-SIMDE__FUNCTION_ATTRIBUTES
|
|
|
+SIMDE_FUNCTION_ATTRIBUTES
|
|
|
simde__m64 simde_mm_cvtsi32_si64(int32_t a)
|
|
|
{
|
|
|
-#if defined(SIMDE_MMX_NATIVE)
|
|
|
- return SIMDE__M64_C(_mm_cvtsi32_si64(a));
|
|
|
+#if defined(SIMDE_X86_MMX_NATIVE)
|
|
|
+ return _mm_cvtsi32_si64(a);
|
|
|
#else
|
|
|
- simde__m64 r;
|
|
|
- r.i32[0] = a;
|
|
|
- r.i32[1] = 0;
|
|
|
- return r;
|
|
|
+ simde__m64_private r_;
|
|
|
+
|
|
|
+#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
|
|
+ const int32_t av[sizeof(r_.neon_i32) / sizeof(r_.neon_i32[0])] = {a, 0};
|
|
|
+ r_.neon_i32 = vld1_s32(av);
|
|
|
+#else
|
|
|
+ r_.i32[0] = a;
|
|
|
+ r_.i32[1] = 0;
|
|
|
+#endif
|
|
|
+
|
|
|
+ return simde__m64_from_private(r_);
|
|
|
#endif
|
|
|
}
|
|
|
#define simde_m_from_int(a) simde_mm_cvtsi32_si64(a)
|
|
|
+#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
|
|
|
+#define _mm_cvtsi32_si64(a) simde_mm_cvtsi32_si64(a)
|
|
|
+#define _m_from_int(a) simde_mm_cvtsi32_si64(a)
|
|
|
+#endif
|
|
|
|
|
|
-SIMDE__FUNCTION_ATTRIBUTES
|
|
|
+SIMDE_FUNCTION_ATTRIBUTES
|
|
|
simde__m64 simde_mm_cvtsi64_m64(int64_t a)
|
|
|
{
|
|
|
-#if defined(SIMDE_MMX_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(__PGI)
|
|
|
- return SIMDE__M64_C(_mm_cvtsi64_m64(a));
|
|
|
+#if defined(SIMDE_X86_MMX_NATIVE) && defined(SIMDE_ARCH_AMD64) && \
|
|
|
+ !defined(__PGI)
|
|
|
+ return _mm_cvtsi64_m64(a);
|
|
|
#else
|
|
|
- simde__m64 r;
|
|
|
- r.i64[0] = a;
|
|
|
- return r;
|
|
|
+ simde__m64_private r_;
|
|
|
+
|
|
|
+#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
|
|
+ r_.neon_i64 = vld1_s64(&a);
|
|
|
+#else
|
|
|
+ r_.i64[0] = a;
|
|
|
+#endif
|
|
|
+
|
|
|
+ return simde__m64_from_private(r_);
|
|
|
#endif
|
|
|
}
|
|
|
#define simde_m_from_int64(a) simde_mm_cvtsi64_m64(a)
|
|
|
+#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
|
|
|
+#define _mm_cvtsi64_m64(a) simde_mm_cvtsi64_m64(a)
|
|
|
+#define _m_from_int64(a) simde_mm_cvtsi64_m64(a)
|
|
|
+#endif
|
|
|
|
|
|
-SIMDE__FUNCTION_ATTRIBUTES
|
|
|
+SIMDE_FUNCTION_ATTRIBUTES
|
|
|
int32_t simde_mm_cvtsi64_si32(simde__m64 a)
|
|
|
{
|
|
|
-#if defined(SIMDE_MMX_NATIVE)
|
|
|
- return _mm_cvtsi64_si32(a.n);
|
|
|
+#if defined(SIMDE_X86_MMX_NATIVE)
|
|
|
+ return _mm_cvtsi64_si32(a);
|
|
|
+#else
|
|
|
+ simde__m64_private a_ = simde__m64_to_private(a);
|
|
|
+
|
|
|
+#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
|
|
+ return vget_lane_s32(a_.neon_i32, 0);
|
|
|
#else
|
|
|
- return a.i32[0];
|
|
|
+ return a_.i32[0];
|
|
|
+#endif
|
|
|
#endif
|
|
|
}
|
|
|
+#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
|
|
|
+#define _mm_cvtsi64_si32(a) simde_mm_cvtsi64_si32(a)
|
|
|
+#endif
|
|
|
|
|
|
-SIMDE__FUNCTION_ATTRIBUTES
|
|
|
+SIMDE_FUNCTION_ATTRIBUTES
|
|
|
void simde_mm_empty(void)
|
|
|
{
|
|
|
-#if defined(SIMDE_MMX_NATIVE)
|
|
|
+#if defined(SIMDE_X86_MMX_NATIVE)
|
|
|
_mm_empty();
|
|
|
#else
|
|
|
#endif
|
|
|
}
|
|
|
#define simde_m_empty() simde_mm_empty()
|
|
|
+#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
|
|
|
+#define _mm_empty() simde_mm_empty()
|
|
|
+#define _m_empty() simde_mm_empty()
|
|
|
+#endif
|
|
|
|
|
|
-SIMDE__FUNCTION_ATTRIBUTES
|
|
|
+SIMDE_FUNCTION_ATTRIBUTES
|
|
|
simde__m64 simde_mm_madd_pi16(simde__m64 a, simde__m64 b)
|
|
|
{
|
|
|
-#if defined(SIMDE_MMX_NATIVE)
|
|
|
- return SIMDE__M64_C(_mm_madd_pi16(a.n, b.n));
|
|
|
+#if defined(SIMDE_X86_MMX_NATIVE)
|
|
|
+ return _mm_madd_pi16(a, b);
|
|
|
#else
|
|
|
- simde__m64 r;
|
|
|
- SIMDE__VECTORIZE
|
|
|
- for (int i = 0; i < 4; i += 2) {
|
|
|
- r.i32[i / 2] =
|
|
|
- (a.i16[i] * b.i16[i]) + (a.i16[i + 1] * b.i16[i + 1]);
|
|
|
+ simde__m64_private r_;
|
|
|
+ simde__m64_private a_ = simde__m64_to_private(a);
|
|
|
+ simde__m64_private b_ = simde__m64_to_private(b);
|
|
|
+
|
|
|
+#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
|
|
+ int32x4_t i1 = vmull_s16(a_.neon_i16, b_.neon_i16);
|
|
|
+ r_.neon_i32 = vpadd_s32(vget_low_s32(i1), vget_high_s32(i1));
|
|
|
+#else
|
|
|
+ SIMDE_VECTORIZE
|
|
|
+ for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i += 2) {
|
|
|
+ r_.i32[i / 2] = (a_.i16[i] * b_.i16[i]) +
|
|
|
+ (a_.i16[i + 1] * b_.i16[i + 1]);
|
|
|
}
|
|
|
- return r;
|
|
|
+#endif
|
|
|
+
|
|
|
+ return simde__m64_from_private(r_);
|
|
|
#endif
|
|
|
}
|
|
|
#define simde_m_pmaddwd(a, b) simde_mm_madd_pi16(a, b)
|
|
|
+#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
|
|
|
+#define _mm_madd_pi16(a, b) simde_mm_madd_pi16(a, b)
|
|
|
+#define _m_pmaddwd(a, b) simde_mm_madd_pi16(a, b)
|
|
|
+#endif
|
|
|
|
|
|
-SIMDE__FUNCTION_ATTRIBUTES
|
|
|
+SIMDE_FUNCTION_ATTRIBUTES
|
|
|
simde__m64 simde_mm_mulhi_pi16(simde__m64 a, simde__m64 b)
|
|
|
{
|
|
|
-#if defined(SIMDE_MMX_NATIVE)
|
|
|
- return SIMDE__M64_C(_mm_mulhi_pi16(a.n, b.n));
|
|
|
+#if defined(SIMDE_X86_MMX_NATIVE)
|
|
|
+ return _mm_mulhi_pi16(a, b);
|
|
|
#else
|
|
|
- simde__m64 r;
|
|
|
- SIMDE__VECTORIZE
|
|
|
- for (int i = 0; i < 4; i++) {
|
|
|
- r.i16[i] = (int16_t)((a.i16[i] * b.i16[i]) >> 16);
|
|
|
+ simde__m64_private r_;
|
|
|
+ simde__m64_private a_ = simde__m64_to_private(a);
|
|
|
+ simde__m64_private b_ = simde__m64_to_private(b);
|
|
|
+
|
|
|
+#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
|
|
+ const int32x4_t t1 = vmull_s16(a_.neon_i16, b_.neon_i16);
|
|
|
+ const uint32x4_t t2 = vshrq_n_u32(vreinterpretq_u32_s32(t1), 16);
|
|
|
+ const uint16x4_t t3 = vmovn_u32(t2);
|
|
|
+ r_.neon_i16 = vreinterpret_s16_u16(t3);
|
|
|
+#else
|
|
|
+ SIMDE_VECTORIZE
|
|
|
+ for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) {
|
|
|
+ r_.i16[i] = HEDLEY_STATIC_CAST(int16_t,
|
|
|
+ ((a_.i16[i] * b_.i16[i]) >> 16));
|
|
|
}
|
|
|
- return r;
|
|
|
+#endif
|
|
|
+
|
|
|
+ return simde__m64_from_private(r_);
|
|
|
#endif
|
|
|
}
|
|
|
#define simde_m_pmulhw(a, b) simde_mm_mulhi_pi16(a, b)
|
|
|
+#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
|
|
|
+#define _mm_mulhi_pi16(a, b) simde_mm_mulhi_pi16(a, b)
|
|
|
+#define _m_pmulhw(a, b) simde_mm_mulhi_pi16(a, b)
|
|
|
+#endif
|
|
|
|
|
|
-SIMDE__FUNCTION_ATTRIBUTES
|
|
|
+SIMDE_FUNCTION_ATTRIBUTES
|
|
|
simde__m64 simde_mm_mullo_pi16(simde__m64 a, simde__m64 b)
|
|
|
{
|
|
|
-#if defined(SIMDE_MMX_NATIVE)
|
|
|
- return SIMDE__M64_C(_mm_mullo_pi16(a.n, b.n));
|
|
|
+#if defined(SIMDE_X86_MMX_NATIVE)
|
|
|
+ return _mm_mullo_pi16(a, b);
|
|
|
#else
|
|
|
- simde__m64 r;
|
|
|
- SIMDE__VECTORIZE
|
|
|
- for (int i = 0; i < 4; i++) {
|
|
|
- r.i16[i] = (int16_t)((a.i16[i] * b.i16[i]) & 0xffff);
|
|
|
+ simde__m64_private r_;
|
|
|
+ simde__m64_private a_ = simde__m64_to_private(a);
|
|
|
+ simde__m64_private b_ = simde__m64_to_private(b);
|
|
|
+
|
|
|
+#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
|
|
+ const int32x4_t t1 = vmull_s16(a_.neon_i16, b_.neon_i16);
|
|
|
+ const uint16x4_t t2 = vmovn_u32(vreinterpretq_u32_s32(t1));
|
|
|
+ r_.neon_i16 = vreinterpret_s16_u16(t2);
|
|
|
+#else
|
|
|
+ SIMDE_VECTORIZE
|
|
|
+ for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) {
|
|
|
+ r_.i16[i] = HEDLEY_STATIC_CAST(
|
|
|
+ int16_t, ((a_.i16[i] * b_.i16[i]) & 0xffff));
|
|
|
}
|
|
|
- return r;
|
|
|
+#endif
|
|
|
+
|
|
|
+ return simde__m64_from_private(r_);
|
|
|
#endif
|
|
|
}
|
|
|
#define simde_m_pmullw(a, b) simde_mm_mullo_pi16(a, b)
|
|
|
+#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
|
|
|
+#define _mm_mullo_pi16(a, b) simde_mm_mullo_pi16(a, b)
|
|
|
+#define _m_pmullw(a, b) simde_mm_mullo_pi16(a, b)
|
|
|
+#endif
|
|
|
|
|
|
-SIMDE__FUNCTION_ATTRIBUTES
|
|
|
+SIMDE_FUNCTION_ATTRIBUTES
|
|
|
simde__m64 simde_mm_or_si64(simde__m64 a, simde__m64 b)
|
|
|
{
|
|
|
-#if defined(SIMDE_MMX_NATIVE)
|
|
|
- return SIMDE__M64_C(_mm_or_si64(a.n, b.n));
|
|
|
+#if defined(SIMDE_X86_MMX_NATIVE)
|
|
|
+ return _mm_or_si64(a, b);
|
|
|
#else
|
|
|
- simde__m64 r;
|
|
|
- r.i64[0] = a.i64[0] | b.i64[0];
|
|
|
- return r;
|
|
|
+ simde__m64_private r_;
|
|
|
+ simde__m64_private a_ = simde__m64_to_private(a);
|
|
|
+ simde__m64_private b_ = simde__m64_to_private(b);
|
|
|
+
|
|
|
+#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
|
|
+ r_.neon_i32 = vorr_s32(a_.neon_i32, b_.neon_i32);
|
|
|
+#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
|
|
+ r_.i64 = a_.i64 | b_.i64;
|
|
|
+#else
|
|
|
+ r_.i64[0] = a_.i64[0] | b_.i64[0];
|
|
|
+#endif
|
|
|
+
|
|
|
+ return simde__m64_from_private(r_);
|
|
|
#endif
|
|
|
}
|
|
|
#define simde_m_por(a, b) simde_mm_or_si64(a, b)
|
|
|
+#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
|
|
|
+#define _mm_or_si64(a, b) simde_mm_or_si64(a, b)
|
|
|
+#define _m_por(a, b) simde_mm_or_si64(a, b)
|
|
|
+#endif
|
|
|
|
|
|
-SIMDE__FUNCTION_ATTRIBUTES
|
|
|
+SIMDE_FUNCTION_ATTRIBUTES
|
|
|
simde__m64 simde_mm_packs_pi16(simde__m64 a, simde__m64 b)
|
|
|
{
|
|
|
-#if defined(SIMDE_MMX_NATIVE)
|
|
|
- return SIMDE__M64_C(_mm_packs_pi16(a.n, b.n));
|
|
|
+#if defined(SIMDE_X86_MMX_NATIVE)
|
|
|
+ return _mm_packs_pi16(a, b);
|
|
|
#else
|
|
|
- simde__m64 r;
|
|
|
+ simde__m64_private r_;
|
|
|
+ simde__m64_private a_ = simde__m64_to_private(a);
|
|
|
+ simde__m64_private b_ = simde__m64_to_private(b);
|
|
|
|
|
|
- SIMDE__VECTORIZE
|
|
|
- for (size_t i = 0; i < (8 / sizeof(int16_t)); i++) {
|
|
|
- if (a.i16[i] < INT8_MIN) {
|
|
|
- r.i8[i] = INT8_MIN;
|
|
|
- } else if (a.i16[i] > INT8_MAX) {
|
|
|
- r.i8[i] = INT8_MAX;
|
|
|
+#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
|
|
+ r_.neon_i8 = vqmovn_s16(vcombine_s16(a_.neon_i16, b_.neon_i16));
|
|
|
+#else
|
|
|
+ SIMDE_VECTORIZE
|
|
|
+ for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) {
|
|
|
+ if (a_.i16[i] < INT8_MIN) {
|
|
|
+ r_.i8[i] = INT8_MIN;
|
|
|
+ } else if (a_.i16[i] > INT8_MAX) {
|
|
|
+ r_.i8[i] = INT8_MAX;
|
|
|
} else {
|
|
|
- r.i8[i] = (int8_t)a.i16[i];
|
|
|
+ r_.i8[i] = HEDLEY_STATIC_CAST(int8_t, a_.i16[i]);
|
|
|
}
|
|
|
}
|
|
|
|
|
|
- SIMDE__VECTORIZE
|
|
|
- for (size_t i = 0; i < (8 / sizeof(int16_t)); i++) {
|
|
|
- if (b.i16[i] < INT8_MIN) {
|
|
|
- r.i8[i + 4] = INT8_MIN;
|
|
|
- } else if (b.i16[i] > INT8_MAX) {
|
|
|
- r.i8[i + 4] = INT8_MAX;
|
|
|
+ SIMDE_VECTORIZE
|
|
|
+ for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) {
|
|
|
+ if (b_.i16[i] < INT8_MIN) {
|
|
|
+ r_.i8[i + 4] = INT8_MIN;
|
|
|
+ } else if (b_.i16[i] > INT8_MAX) {
|
|
|
+ r_.i8[i + 4] = INT8_MAX;
|
|
|
} else {
|
|
|
- r.i8[i + 4] = (int8_t)b.i16[i];
|
|
|
+ r_.i8[i + 4] = HEDLEY_STATIC_CAST(int8_t, b_.i16[i]);
|
|
|
}
|
|
|
}
|
|
|
+#endif
|
|
|
|
|
|
- return r;
|
|
|
+ return simde__m64_from_private(r_);
|
|
|
#endif
|
|
|
}
|
|
|
#define simde_m_packsswb(a, b) simde_mm_packs_pi16(a, b)
|
|
|
+#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
|
|
|
+#define _mm_packs_pi16(a, b) simde_mm_packs_pi16(a, b)
|
|
|
+#define _m_packsswb(a, b) mm_packs_pi16(a, b)
|
|
|
+#endif
|
|
|
|
|
|
-SIMDE__FUNCTION_ATTRIBUTES
|
|
|
+SIMDE_FUNCTION_ATTRIBUTES
|
|
|
simde__m64 simde_mm_packs_pi32(simde__m64 a, simde__m64 b)
|
|
|
{
|
|
|
-#if defined(SIMDE_MMX_NATIVE)
|
|
|
- return SIMDE__M64_C(_mm_packs_pi32(a.n, b.n));
|
|
|
+#if defined(SIMDE_X86_MMX_NATIVE)
|
|
|
+ return _mm_packs_pi32(a, b);
|
|
|
#else
|
|
|
- simde__m64 r;
|
|
|
+ simde__m64_private r_;
|
|
|
+ simde__m64_private a_ = simde__m64_to_private(a);
|
|
|
+ simde__m64_private b_ = simde__m64_to_private(b);
|
|
|
|
|
|
- SIMDE__VECTORIZE
|
|
|
- for (size_t i = 0; i < (8 / sizeof(a.i32[0])); i++) {
|
|
|
- if (a.i32[i] < SHRT_MIN) {
|
|
|
- r.i16[i] = SHRT_MIN;
|
|
|
- } else if (a.i32[i] > INT16_MAX) {
|
|
|
- r.i16[i] = INT16_MAX;
|
|
|
+#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
|
|
+ r_.neon_i16 = vqmovn_s32(vcombine_s32(a_.neon_i32, b_.neon_i32));
|
|
|
+#else
|
|
|
+ SIMDE_VECTORIZE
|
|
|
+ for (size_t i = 0; i < (8 / sizeof(a_.i32[0])); i++) {
|
|
|
+ if (a_.i32[i] < SHRT_MIN) {
|
|
|
+ r_.i16[i] = SHRT_MIN;
|
|
|
+ } else if (a_.i32[i] > INT16_MAX) {
|
|
|
+ r_.i16[i] = INT16_MAX;
|
|
|
} else {
|
|
|
- r.i16[i] = (int16_t)a.i32[i];
|
|
|
+ r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i32[i]);
|
|
|
}
|
|
|
}
|
|
|
|
|
|
- SIMDE__VECTORIZE
|
|
|
- for (size_t i = 0; i < (8 / sizeof(b.i32[0])); i++) {
|
|
|
- if (b.i32[i] < SHRT_MIN) {
|
|
|
- r.i16[i + 2] = SHRT_MIN;
|
|
|
- } else if (b.i32[i] > INT16_MAX) {
|
|
|
- r.i16[i + 2] = INT16_MAX;
|
|
|
+ SIMDE_VECTORIZE
|
|
|
+ for (size_t i = 0; i < (8 / sizeof(b_.i32[0])); i++) {
|
|
|
+ if (b_.i32[i] < SHRT_MIN) {
|
|
|
+ r_.i16[i + 2] = SHRT_MIN;
|
|
|
+ } else if (b_.i32[i] > INT16_MAX) {
|
|
|
+ r_.i16[i + 2] = INT16_MAX;
|
|
|
} else {
|
|
|
- r.i16[i + 2] = (int16_t)b.i32[i];
|
|
|
+ r_.i16[i + 2] = HEDLEY_STATIC_CAST(int16_t, b_.i32[i]);
|
|
|
}
|
|
|
}
|
|
|
+#endif
|
|
|
|
|
|
- return r;
|
|
|
+ return simde__m64_from_private(r_);
|
|
|
#endif
|
|
|
}
|
|
|
#define simde_m_packssdw(a, b) simde_mm_packs_pi32(a, b)
|
|
|
+#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
|
|
|
+#define _mm_packs_pi32(a, b) simde_mm_packs_pi32(a, b)
|
|
|
+#define _m_packssdw(a, b) simde_mm_packs_pi32(a, b)
|
|
|
+#endif
|
|
|
|
|
|
-SIMDE__FUNCTION_ATTRIBUTES
|
|
|
+SIMDE_FUNCTION_ATTRIBUTES
|
|
|
simde__m64 simde_mm_packs_pu16(simde__m64 a, simde__m64 b)
|
|
|
{
|
|
|
-#if defined(SIMDE_MMX_NATIVE)
|
|
|
- return SIMDE__M64_C(_mm_packs_pu16(a.n, b.n));
|
|
|
+#if defined(SIMDE_X86_MMX_NATIVE)
|
|
|
+ return _mm_packs_pu16(a, b);
|
|
|
#else
|
|
|
- simde__m64 r;
|
|
|
+ simde__m64_private r_;
|
|
|
+ simde__m64_private a_ = simde__m64_to_private(a);
|
|
|
+ simde__m64_private b_ = simde__m64_to_private(b);
|
|
|
|
|
|
- SIMDE__VECTORIZE
|
|
|
- for (size_t i = 0; i < (8 / sizeof(int16_t)); i++) {
|
|
|
- if (a.i16[i] > UINT8_MAX) {
|
|
|
- r.u8[i] = UINT8_MAX;
|
|
|
- } else if (a.i16[i] < 0) {
|
|
|
- r.u8[i] = 0;
|
|
|
+#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
|
|
+ const int16x8_t t1 = vcombine_s16(a_.neon_i16, b_.neon_i16);
|
|
|
+
|
|
|
+ /* Set elements which are < 0 to 0 */
|
|
|
+ const int16x8_t t2 =
|
|
|
+ vandq_s16(t1, vreinterpretq_s16_u16(vcgezq_s16(t1)));
|
|
|
+
|
|
|
+ /* Vector with all s16 elements set to UINT8_MAX */
|
|
|
+ const int16x8_t vmax = vmovq_n_s16((int16_t)UINT8_MAX);
|
|
|
+
|
|
|
+ /* Elements which are within the acceptable range */
|
|
|
+ const int16x8_t le_max =
|
|
|
+ vandq_s16(t2, vreinterpretq_s16_u16(vcleq_s16(t2, vmax)));
|
|
|
+ const int16x8_t gt_max =
|
|
|
+ vandq_s16(vmax, vreinterpretq_s16_u16(vcgtq_s16(t2, vmax)));
|
|
|
+
|
|
|
+ /* Final values as 16-bit integers */
|
|
|
+ const int16x8_t values = vorrq_s16(le_max, gt_max);
|
|
|
+
|
|
|
+ r_.neon_u8 = vmovn_u16(vreinterpretq_u16_s16(values));
|
|
|
+#else
|
|
|
+ SIMDE_VECTORIZE
|
|
|
+ for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) {
|
|
|
+ if (a_.i16[i] > UINT8_MAX) {
|
|
|
+ r_.u8[i] = UINT8_MAX;
|
|
|
+ } else if (a_.i16[i] < 0) {
|
|
|
+ r_.u8[i] = 0;
|
|
|
} else {
|
|
|
- r.u8[i] = (int8_t)a.i16[i];
|
|
|
+ r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, a_.i16[i]);
|
|
|
}
|
|
|
}
|
|
|
|
|
|
- SIMDE__VECTORIZE
|
|
|
- for (size_t i = 0; i < (8 / sizeof(int16_t)); i++) {
|
|
|
- if (b.i16[i] > UINT8_MAX) {
|
|
|
- r.u8[i + 4] = UINT8_MAX;
|
|
|
- } else if (b.i16[i] < 0) {
|
|
|
- r.u8[i + 4] = 0;
|
|
|
+ SIMDE_VECTORIZE
|
|
|
+ for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) {
|
|
|
+ if (b_.i16[i] > UINT8_MAX) {
|
|
|
+ r_.u8[i + 4] = UINT8_MAX;
|
|
|
+ } else if (b_.i16[i] < 0) {
|
|
|
+ r_.u8[i + 4] = 0;
|
|
|
} else {
|
|
|
- r.u8[i + 4] = (int8_t)b.i16[i];
|
|
|
+ r_.u8[i + 4] = HEDLEY_STATIC_CAST(uint8_t, b_.i16[i]);
|
|
|
}
|
|
|
}
|
|
|
+#endif
|
|
|
|
|
|
- return r;
|
|
|
+ return simde__m64_from_private(r_);
|
|
|
#endif
|
|
|
}
|
|
|
#define simde_m_packuswb(a, b) simde_mm_packs_pu16(a, b)
|
|
|
+#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
|
|
|
+#define _mm_packs_pu16(a, b) simde_mm_packs_pu16(a, b)
|
|
|
+#define _m_packuswb(a, b) simde_mm_packs_pu16(a, b)
|
|
|
+#endif
|
|
|
|
|
|
-SIMDE__FUNCTION_ATTRIBUTES
|
|
|
+SIMDE_FUNCTION_ATTRIBUTES
|
|
|
simde__m64 simde_mm_set_pi8(int8_t e7, int8_t e6, int8_t e5, int8_t e4,
|
|
|
int8_t e3, int8_t e2, int8_t e1, int8_t e0)
|
|
|
{
|
|
|
-#if defined(SIMDE_MMX_NATIVE)
|
|
|
- return SIMDE__M64_C(_mm_set_pi8(e7, e6, e5, e4, e3, e2, e1, e0));
|
|
|
+#if defined(SIMDE_X86_MMX_NATIVE)
|
|
|
+ return _mm_set_pi8(e7, e6, e5, e4, e3, e2, e1, e0);
|
|
|
#else
|
|
|
- simde__m64 r;
|
|
|
- r.i8[0] = e0;
|
|
|
- r.i8[1] = e1;
|
|
|
- r.i8[2] = e2;
|
|
|
- r.i8[3] = e3;
|
|
|
- r.i8[4] = e4;
|
|
|
- r.i8[5] = e5;
|
|
|
- r.i8[6] = e6;
|
|
|
- r.i8[7] = e7;
|
|
|
- return r;
|
|
|
+ simde__m64_private r_;
|
|
|
+
|
|
|
+#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
|
|
+ const int8_t v[sizeof(r_.i8) / sizeof(r_.i8[0])] = {e0, e1, e2, e3,
|
|
|
+ e4, e5, e6, e7};
|
|
|
+ r_.neon_i8 = vld1_s8(v);
|
|
|
+#else
|
|
|
+ r_.i8[0] = e0;
|
|
|
+ r_.i8[1] = e1;
|
|
|
+ r_.i8[2] = e2;
|
|
|
+ r_.i8[3] = e3;
|
|
|
+ r_.i8[4] = e4;
|
|
|
+ r_.i8[5] = e5;
|
|
|
+ r_.i8[6] = e6;
|
|
|
+ r_.i8[7] = e7;
|
|
|
+#endif
|
|
|
+
|
|
|
+ return simde__m64_from_private(r_);
|
|
|
#endif
|
|
|
}
|
|
|
+#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
|
|
|
+#define _mm_set_pi8(e7, e6, e5, e4, e3, e2, e1, e0) \
|
|
|
+ simde_mm_set_pi8(e7, e6, e5, e4, e3, e2, e1, e0)
|
|
|
+#endif
|
|
|
|
|
|
-SIMDE__FUNCTION_ATTRIBUTES
|
|
|
+SIMDE_FUNCTION_ATTRIBUTES
|
|
|
simde__m64 simde_x_mm_set_pu8(uint8_t e7, uint8_t e6, uint8_t e5, uint8_t e4,
|
|
|
uint8_t e3, uint8_t e2, uint8_t e1, uint8_t e0)
|
|
|
{
|
|
|
-#if defined(SIMDE_MMX_NATIVE)
|
|
|
- return SIMDE__M64_C(_mm_set_pi8((int8_t)e7, (int8_t)e6, (int8_t)e5,
|
|
|
- (int8_t)e4, (int8_t)e3, (int8_t)e2,
|
|
|
- (int8_t)e1, (int8_t)e0));
|
|
|
+ simde__m64_private r_;
|
|
|
+
|
|
|
+#if defined(SIMDE_X86_MMX_NATIVE)
|
|
|
+ r_.n = _mm_set_pi8(
|
|
|
+ HEDLEY_STATIC_CAST(int8_t, e7), HEDLEY_STATIC_CAST(int8_t, e6),
|
|
|
+ HEDLEY_STATIC_CAST(int8_t, e5), HEDLEY_STATIC_CAST(int8_t, e4),
|
|
|
+ HEDLEY_STATIC_CAST(int8_t, e3), HEDLEY_STATIC_CAST(int8_t, e2),
|
|
|
+ HEDLEY_STATIC_CAST(int8_t, e1), HEDLEY_STATIC_CAST(int8_t, e0));
|
|
|
+#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
|
|
+ const uint8_t v[sizeof(r_.u8) / sizeof(r_.u8[0])] = {e0, e1, e2, e3,
|
|
|
+ e4, e5, e6, e7};
|
|
|
+ r_.neon_u8 = vld1_u8(v);
|
|
|
#else
|
|
|
- simde__m64 r;
|
|
|
- r.u8[0] = e0;
|
|
|
- r.u8[1] = e1;
|
|
|
- r.u8[2] = e2;
|
|
|
- r.u8[3] = e3;
|
|
|
- r.u8[4] = e4;
|
|
|
- r.u8[5] = e5;
|
|
|
- r.u8[6] = e6;
|
|
|
- r.u8[7] = e7;
|
|
|
- return r;
|
|
|
+ r_.u8[0] = e0;
|
|
|
+ r_.u8[1] = e1;
|
|
|
+ r_.u8[2] = e2;
|
|
|
+ r_.u8[3] = e3;
|
|
|
+ r_.u8[4] = e4;
|
|
|
+ r_.u8[5] = e5;
|
|
|
+ r_.u8[6] = e6;
|
|
|
+ r_.u8[7] = e7;
|
|
|
#endif
|
|
|
+
|
|
|
+ return simde__m64_from_private(r_);
|
|
|
}
|
|
|
|
|
|
-SIMDE__FUNCTION_ATTRIBUTES
|
|
|
+SIMDE_FUNCTION_ATTRIBUTES
|
|
|
simde__m64 simde_mm_set_pi16(int16_t e3, int16_t e2, int16_t e1, int16_t e0)
|
|
|
{
|
|
|
-#if defined(SIMDE_MMX_NATIVE)
|
|
|
- return SIMDE__M64_C(_mm_set_pi16(e3, e2, e1, e0));
|
|
|
+#if defined(SIMDE_X86_MMX_NATIVE)
|
|
|
+ return _mm_set_pi16(e3, e2, e1, e0);
|
|
|
#else
|
|
|
- simde__m64 r;
|
|
|
- r.i16[0] = e0;
|
|
|
- r.i16[1] = e1;
|
|
|
- r.i16[2] = e2;
|
|
|
- r.i16[3] = e3;
|
|
|
- return r;
|
|
|
+ simde__m64_private r_;
|
|
|
+
|
|
|
+#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
|
|
+ const int16_t v[sizeof(r_.i16) / sizeof(r_.i16[0])] = {e0, e1, e2, e3};
|
|
|
+ r_.neon_i16 = vld1_s16(v);
|
|
|
+#else
|
|
|
+ r_.i16[0] = e0;
|
|
|
+ r_.i16[1] = e1;
|
|
|
+ r_.i16[2] = e2;
|
|
|
+ r_.i16[3] = e3;
|
|
|
+#endif
|
|
|
+ return simde__m64_from_private(r_);
|
|
|
#endif
|
|
|
}
|
|
|
+#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
|
|
|
+#define _mm_set_pi16(e3, e2, e1, e0) simde_mm_set_pi16(e3, e2, e1, e0)
|
|
|
+#endif
|
|
|
|
|
|
-SIMDE__FUNCTION_ATTRIBUTES
|
|
|
+SIMDE_FUNCTION_ATTRIBUTES
|
|
|
simde__m64 simde_x_mm_set_pu16(uint16_t e3, uint16_t e2, uint16_t e1,
|
|
|
uint16_t e0)
|
|
|
{
|
|
|
-#if defined(SIMDE_MMX_NATIVE)
|
|
|
- return SIMDE__M64_C(_mm_set_pi16((int16_t)e3, (int16_t)e2, (int16_t)e1,
|
|
|
- (int16_t)e0));
|
|
|
+ simde__m64_private r_;
|
|
|
+
|
|
|
+#if defined(SIMDE_X86_MMX_NATIVE)
|
|
|
+ r_.n = _mm_set_pi16(HEDLEY_STATIC_CAST(int16_t, e3),
|
|
|
+ HEDLEY_STATIC_CAST(int16_t, e2),
|
|
|
+ HEDLEY_STATIC_CAST(int16_t, e1),
|
|
|
+ HEDLEY_STATIC_CAST(int16_t, e0));
|
|
|
+#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
|
|
+ const uint16_t v[sizeof(r_.u16) / sizeof(r_.u16[0])] = {e0, e1, e2, e3};
|
|
|
+ r_.neon_u16 = vld1_u16(v);
|
|
|
#else
|
|
|
- simde__m64 r;
|
|
|
- r.u16[0] = e0;
|
|
|
- r.u16[1] = e1;
|
|
|
- r.u16[2] = e2;
|
|
|
- r.u16[3] = e3;
|
|
|
- return r;
|
|
|
+ r_.u16[0] = e0;
|
|
|
+ r_.u16[1] = e1;
|
|
|
+ r_.u16[2] = e2;
|
|
|
+ r_.u16[3] = e3;
|
|
|
#endif
|
|
|
+
|
|
|
+ return simde__m64_from_private(r_);
|
|
|
}
|
|
|
|
|
|
-SIMDE__FUNCTION_ATTRIBUTES
|
|
|
+SIMDE_FUNCTION_ATTRIBUTES
|
|
|
simde__m64 simde_x_mm_set_pu32(uint32_t e1, uint32_t e0)
|
|
|
{
|
|
|
-#if defined(SIMDE_MMX_NATIVE)
|
|
|
- return SIMDE__M64_C(_mm_set_pi32((int32_t)e1, (int32_t)e0));
|
|
|
+ simde__m64_private r_;
|
|
|
+
|
|
|
+#if defined(SIMDE_X86_MMX_NATIVE)
|
|
|
+ r_.n = _mm_set_pi32(HEDLEY_STATIC_CAST(int32_t, e1),
|
|
|
+ HEDLEY_STATIC_CAST(int32_t, e0));
|
|
|
+#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
|
|
+ const uint32_t v[sizeof(r_.u32) / sizeof(r_.u32[0])] = {e0, e1};
|
|
|
+ r_.neon_u32 = vld1_u32(v);
|
|
|
#else
|
|
|
- simde__m64 r;
|
|
|
- r.u32[0] = e0;
|
|
|
- r.u32[1] = e1;
|
|
|
- return r;
|
|
|
+ r_.u32[0] = e0;
|
|
|
+ r_.u32[1] = e1;
|
|
|
#endif
|
|
|
+
|
|
|
+ return simde__m64_from_private(r_);
|
|
|
}
|
|
|
|
|
|
-SIMDE__FUNCTION_ATTRIBUTES
|
|
|
+SIMDE_FUNCTION_ATTRIBUTES
|
|
|
simde__m64 simde_mm_set_pi32(int32_t e1, int32_t e0)
|
|
|
{
|
|
|
-#if defined(SIMDE_MMX_NATIVE)
|
|
|
- return SIMDE__M64_C(_mm_set_pi32(e1, e0));
|
|
|
+ simde__m64_private r_;
|
|
|
+
|
|
|
+#if defined(SIMDE_X86_MMX_NATIVE)
|
|
|
+ r_.n = _mm_set_pi32(e1, e0);
|
|
|
+#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
|
|
+ const int32_t v[sizeof(r_.i32) / sizeof(r_.i32[0])] = {e0, e1};
|
|
|
+ r_.neon_i32 = vld1_s32(v);
|
|
|
#else
|
|
|
- simde__m64 r;
|
|
|
- r.i32[0] = e0;
|
|
|
- r.i32[1] = e1;
|
|
|
- return r;
|
|
|
+ r_.i32[0] = e0;
|
|
|
+ r_.i32[1] = e1;
|
|
|
#endif
|
|
|
+
|
|
|
+ return simde__m64_from_private(r_);
|
|
|
}
|
|
|
+#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
|
|
|
+#define _mm_set_pi32(e1, e0) simde_mm_set_pi32(e1, e0)
|
|
|
+#endif
|
|
|
|
|
|
-SIMDE__FUNCTION_ATTRIBUTES
|
|
|
+SIMDE_FUNCTION_ATTRIBUTES
|
|
|
+simde__m64 simde_x_mm_set_pi64(int64_t e0)
|
|
|
+{
|
|
|
+ simde__m64_private r_;
|
|
|
+
|
|
|
+#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
|
|
+ const int64_t v[sizeof(r_.i64) / sizeof(r_.i64[0])] = {e0};
|
|
|
+ r_.neon_i64 = vld1_s64(v);
|
|
|
+#else
|
|
|
+ r_.i64[0] = e0;
|
|
|
+#endif
|
|
|
+
|
|
|
+ return simde__m64_from_private(r_);
|
|
|
+}
|
|
|
+
|
|
|
+SIMDE_FUNCTION_ATTRIBUTES
|
|
|
+simde__m64 simde_x_mm_set_f32x2(simde_float32 e1, simde_float32 e0)
|
|
|
+{
|
|
|
+ simde__m64_private r_;
|
|
|
+
|
|
|
+#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
|
|
+ const simde_float32 v[sizeof(r_.f32) / sizeof(r_.f32[0])] = {e0, e1};
|
|
|
+ r_.neon_f32 = vld1_f32(v);
|
|
|
+#else
|
|
|
+ r_.f32[0] = e0;
|
|
|
+ r_.f32[1] = e1;
|
|
|
+#endif
|
|
|
+
|
|
|
+ return simde__m64_from_private(r_);
|
|
|
+}
|
|
|
+
|
|
|
+SIMDE_FUNCTION_ATTRIBUTES
|
|
|
simde__m64 simde_mm_set1_pi8(int8_t a)
|
|
|
{
|
|
|
-#if defined(SIMDE_MMX_NATIVE)
|
|
|
- return SIMDE__M64_C(_mm_set1_pi8(a));
|
|
|
+#if defined(SIMDE_X86_MMX_NATIVE)
|
|
|
+ return _mm_set1_pi8(a);
|
|
|
+#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
|
|
+ simde__m64_private r_;
|
|
|
+ r_.neon_i8 = vmov_n_s8(a);
|
|
|
+ return simde__m64_from_private(r_);
|
|
|
#else
|
|
|
return simde_mm_set_pi8(a, a, a, a, a, a, a, a);
|
|
|
#endif
|
|
|
}
|
|
|
+#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
|
|
|
+#define _mm_set1_pi8(a) simde_mm_set1_pi8(a)
|
|
|
+#endif
|
|
|
|
|
|
-SIMDE__FUNCTION_ATTRIBUTES
|
|
|
+SIMDE_FUNCTION_ATTRIBUTES
|
|
|
simde__m64 simde_mm_set1_pi16(int16_t a)
|
|
|
{
|
|
|
-#if defined(SIMDE_MMX_NATIVE)
|
|
|
- return SIMDE__M64_C(_mm_set1_pi16(a));
|
|
|
+#if defined(SIMDE_X86_MMX_NATIVE)
|
|
|
+ return _mm_set1_pi16(a);
|
|
|
+#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
|
|
+ simde__m64_private r_;
|
|
|
+ r_.neon_i16 = vmov_n_s16(a);
|
|
|
+ return simde__m64_from_private(r_);
|
|
|
#else
|
|
|
return simde_mm_set_pi16(a, a, a, a);
|
|
|
#endif
|
|
|
}
|
|
|
+#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
|
|
|
+#define _mm_set1_pi16(a) simde_mm_set1_pi16(a)
|
|
|
+#endif
|
|
|
|
|
|
-SIMDE__FUNCTION_ATTRIBUTES
|
|
|
+SIMDE_FUNCTION_ATTRIBUTES
|
|
|
simde__m64 simde_mm_set1_pi32(int32_t a)
|
|
|
{
|
|
|
-#if defined(SIMDE_MMX_NATIVE)
|
|
|
- return SIMDE__M64_C(_mm_set1_pi32(a));
|
|
|
+#if defined(SIMDE_X86_MMX_NATIVE)
|
|
|
+ return _mm_set1_pi32(a);
|
|
|
+#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
|
|
+ simde__m64_private r_;
|
|
|
+ r_.neon_i32 = vmov_n_s32(a);
|
|
|
+ return simde__m64_from_private(r_);
|
|
|
#else
|
|
|
return simde_mm_set_pi32(a, a);
|
|
|
#endif
|
|
|
}
|
|
|
+#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
|
|
|
+#define _mm_set1_pi32(a) simde_mm_set1_pi32(a)
|
|
|
+#endif
|
|
|
|
|
|
-SIMDE__FUNCTION_ATTRIBUTES
|
|
|
+SIMDE_FUNCTION_ATTRIBUTES
|
|
|
simde__m64 simde_mm_setr_pi8(int8_t e7, int8_t e6, int8_t e5, int8_t e4,
|
|
|
int8_t e3, int8_t e2, int8_t e1, int8_t e0)
|
|
|
{
|
|
|
-#if defined(SIMDE_MMX_NATIVE)
|
|
|
- return SIMDE__M64_C(_mm_setr_pi8(e7, e6, e5, e4, e3, e2, e1, e0));
|
|
|
+#if defined(SIMDE_X86_MMX_NATIVE)
|
|
|
+ return _mm_setr_pi8(e7, e6, e5, e4, e3, e2, e1, e0);
|
|
|
#else
|
|
|
return simde_mm_set_pi8(e0, e1, e2, e3, e4, e5, e6, e7);
|
|
|
#endif
|
|
|
}
|
|
|
+#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
|
|
|
+#define _mm_setr_pi8(e7, e6, e5, e4, e3, e2, e1, e0) \
|
|
|
+ simde_mm_setr_pi8(e7, e6, e5, e4, e3, e2, e1, e0)
|
|
|
+#endif
|
|
|
|
|
|
-SIMDE__FUNCTION_ATTRIBUTES
|
|
|
+SIMDE_FUNCTION_ATTRIBUTES
|
|
|
simde__m64 simde_mm_setr_pi16(int16_t e3, int16_t e2, int16_t e1, int16_t e0)
|
|
|
{
|
|
|
-#if defined(SIMDE_MMX_NATIVE)
|
|
|
- return SIMDE__M64_C(_mm_setr_pi16(e3, e2, e1, e0));
|
|
|
+#if defined(SIMDE_X86_MMX_NATIVE)
|
|
|
+ return _mm_setr_pi16(e3, e2, e1, e0);
|
|
|
#else
|
|
|
return simde_mm_set_pi16(e0, e1, e2, e3);
|
|
|
#endif
|
|
|
}
|
|
|
+#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
|
|
|
+#define _mm_setr_pi16(e3, e2, e1, e0) simde_mm_setr_pi16(e3, e2, e1, e0)
|
|
|
+#endif
|
|
|
|
|
|
-SIMDE__FUNCTION_ATTRIBUTES
|
|
|
+SIMDE_FUNCTION_ATTRIBUTES
|
|
|
simde__m64 simde_mm_setr_pi32(int32_t e1, int32_t e0)
|
|
|
{
|
|
|
-#if defined(SIMDE_MMX_NATIVE)
|
|
|
- return SIMDE__M64_C(_mm_setr_pi32(e1, e0));
|
|
|
+#if defined(SIMDE_X86_MMX_NATIVE)
|
|
|
+ return _mm_setr_pi32(e1, e0);
|
|
|
#else
|
|
|
return simde_mm_set_pi32(e0, e1);
|
|
|
#endif
|
|
|
}
|
|
|
+#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
|
|
|
+#define _mm_setr_pi32(e1, e0) simde_mm_setr_pi32(e1, e0)
|
|
|
+#endif
|
|
|
|
|
|
-SIMDE__FUNCTION_ATTRIBUTES
|
|
|
+SIMDE_FUNCTION_ATTRIBUTES
|
|
|
simde__m64 simde_mm_setzero_si64(void)
|
|
|
{
|
|
|
-#if defined(SIMDE_MMX_NATIVE)
|
|
|
- return SIMDE__M64_C(_mm_setzero_si64());
|
|
|
+#if defined(SIMDE_X86_MMX_NATIVE)
|
|
|
+ return _mm_setzero_si64();
|
|
|
+#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
|
|
+ simde__m64_private r_;
|
|
|
+ r_.neon_u32 = vmov_n_u32(0);
|
|
|
+ return simde__m64_from_private(r_);
|
|
|
#else
|
|
|
return simde_mm_set_pi32(0, 0);
|
|
|
#endif
|
|
|
}
|
|
|
+#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
|
|
|
+#define _mm_setzero_si64() simde_mm_setzero_si64()
|
|
|
+#endif
|
|
|
|
|
|
-SIMDE__FUNCTION_ATTRIBUTES
|
|
|
+SIMDE_FUNCTION_ATTRIBUTES
|
|
|
+simde__m64 simde_x_mm_setone_si64(void)
|
|
|
+{
|
|
|
+ return simde_mm_set1_pi32(~INT32_C(0));
|
|
|
+}
|
|
|
+
|
|
|
+SIMDE_FUNCTION_ATTRIBUTES
|
|
|
simde__m64 simde_mm_sll_pi16(simde__m64 a, simde__m64 count)
|
|
|
{
|
|
|
-#if defined(SIMDE_MMX_NATIVE)
|
|
|
- return SIMDE__M64_C(_mm_sll_pi16(a.n, count.n));
|
|
|
+#if defined(SIMDE_X86_MMX_NATIVE)
|
|
|
+ return _mm_sll_pi16(a, count);
|
|
|
#else
|
|
|
- simde__m64 r;
|
|
|
+ simde__m64_private r_;
|
|
|
+ simde__m64_private a_ = simde__m64_to_private(a);
|
|
|
+ simde__m64_private count_ = simde__m64_to_private(count);
|
|
|
|
|
|
- if (HEDLEY_UNLIKELY(count.u64[0] > 15)) {
|
|
|
- memset(&r, 0, sizeof(r));
|
|
|
- return r;
|
|
|
+#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
|
|
+ r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16((int16_t)vget_lane_u64(
|
|
|
+ count_.neon_u64, 0)));
|
|
|
+#elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
|
|
|
+ r_.i16 = a_.i16 << count_.u64[0];
|
|
|
+#else
|
|
|
+ if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) {
|
|
|
+ simde_memset(&r_, 0, sizeof(r_));
|
|
|
+ return simde__m64_from_private(r_);
|
|
|
}
|
|
|
|
|
|
- SIMDE__VECTORIZE
|
|
|
- for (size_t i = 0; i < (sizeof(r.u16) / sizeof(r.u16[0])); i++) {
|
|
|
- r.u16[i] = a.u16[i] << count.u64[0];
|
|
|
+ SIMDE_VECTORIZE
|
|
|
+ for (size_t i = 0; i < (sizeof(r_.u16) / sizeof(r_.u16[0])); i++) {
|
|
|
+ r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t,
|
|
|
+ a_.u16[i] << count_.u64[0]);
|
|
|
}
|
|
|
- return r;
|
|
|
+#endif
|
|
|
+
|
|
|
+ return simde__m64_from_private(r_);
|
|
|
#endif
|
|
|
}
|
|
|
#define simde_m_psllw(a, count) simde_mm_sll_pi16(a, count)
|
|
|
+#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
|
|
|
+#define _mm_sll_pi16(a, count) simde_mm_sll_pi16(a, count)
|
|
|
+#define _m_psllw(a, count) simde_mm_sll_pi16(a, count)
|
|
|
+#endif
|
|
|
|
|
|
-SIMDE__FUNCTION_ATTRIBUTES
|
|
|
+SIMDE_FUNCTION_ATTRIBUTES
|
|
|
simde__m64 simde_mm_sll_pi32(simde__m64 a, simde__m64 count)
|
|
|
{
|
|
|
-#if defined(SIMDE_MMX_NATIVE)
|
|
|
- return SIMDE__M64_C(_mm_sll_pi32(a.n, count.n));
|
|
|
+#if defined(SIMDE_X86_MMX_NATIVE)
|
|
|
+ return _mm_sll_pi32(a, count);
|
|
|
#else
|
|
|
- simde__m64 r;
|
|
|
+ simde__m64_private r_;
|
|
|
+ simde__m64_private a_ = simde__m64_to_private(a);
|
|
|
+ simde__m64_private count_ = simde__m64_to_private(count);
|
|
|
|
|
|
- if (HEDLEY_UNLIKELY(count.u64[0] > 31)) {
|
|
|
- memset(&r, 0, sizeof(r));
|
|
|
- return r;
|
|
|
+#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
|
|
+ r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32((int32_t)vget_lane_u64(
|
|
|
+ count_.neon_u64, 0)));
|
|
|
+#elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
|
|
|
+ r_.i32 = a_.i32 << count_.u64[0];
|
|
|
+#else
|
|
|
+ if (HEDLEY_UNLIKELY(count_.u64[0] > 31)) {
|
|
|
+ simde_memset(&r_, 0, sizeof(r_));
|
|
|
+ return simde__m64_from_private(r_);
|
|
|
}
|
|
|
|
|
|
- SIMDE__VECTORIZE
|
|
|
- for (size_t i = 0; i < (sizeof(r.u32) / sizeof(r.u32[0])); i++) {
|
|
|
- r.u32[i] = a.u32[i] << count.u64[0];
|
|
|
+ SIMDE_VECTORIZE
|
|
|
+ for (size_t i = 0; i < (sizeof(r_.u32) / sizeof(r_.u32[0])); i++) {
|
|
|
+ r_.u32[i] = a_.u32[i] << count_.u64[0];
|
|
|
}
|
|
|
- return r;
|
|
|
+#endif
|
|
|
+
|
|
|
+ return simde__m64_from_private(r_);
|
|
|
#endif
|
|
|
}
|
|
|
#define simde_m_pslld(a, count) simde_mm_sll_pi32(a, count)
|
|
|
+#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
|
|
|
+#define _mm_sll_pi32(a, count) simde_mm_sll_pi32(a, count)
|
|
|
+#define _m_pslld(a, count) simde_mm_sll_pi32(a, count)
|
|
|
+#endif
|
|
|
|
|
|
-SIMDE__FUNCTION_ATTRIBUTES
|
|
|
+SIMDE_FUNCTION_ATTRIBUTES
|
|
|
simde__m64 simde_mm_slli_pi16(simde__m64 a, int count)
|
|
|
{
|
|
|
-#if defined(SIMDE_MMX_NATIVE) && !defined(__PGI)
|
|
|
- return SIMDE__M64_C(_mm_slli_pi16(a.n, count));
|
|
|
+#if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI)
|
|
|
+ return _mm_slli_pi16(a, count);
|
|
|
#else
|
|
|
- simde__m64 r;
|
|
|
+ simde__m64_private r_;
|
|
|
+ simde__m64_private a_ = simde__m64_to_private(a);
|
|
|
|
|
|
- SIMDE__VECTORIZE
|
|
|
- for (size_t i = 0; i < (sizeof(r.u16) / sizeof(r.u16[0])); i++) {
|
|
|
- r.u16[i] = a.u16[i] << count;
|
|
|
+#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
|
|
|
+ r_.i16 = a_.i16 << count;
|
|
|
+#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
|
|
+ r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16((int16_t)count));
|
|
|
+#else
|
|
|
+ SIMDE_VECTORIZE
|
|
|
+ for (size_t i = 0; i < (sizeof(r_.u16) / sizeof(r_.u16[0])); i++) {
|
|
|
+ r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, a_.u16[i] << count);
|
|
|
}
|
|
|
+#endif
|
|
|
|
|
|
- return r;
|
|
|
+ return simde__m64_from_private(r_);
|
|
|
#endif
|
|
|
}
|
|
|
#define simde_m_psllwi(a, count) simde_mm_slli_pi16(a, count)
|
|
|
+#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
|
|
|
+#define _mm_slli_pi16(a, count) simde_mm_slli_pi16(a, count)
|
|
|
+#define _m_psllwi(a, count) simde_mm_slli_pi16(a, count)
|
|
|
+#endif
|
|
|
|
|
|
-SIMDE__FUNCTION_ATTRIBUTES
|
|
|
+SIMDE_FUNCTION_ATTRIBUTES
|
|
|
simde__m64 simde_mm_slli_pi32(simde__m64 a, int count)
|
|
|
{
|
|
|
-#if defined(SIMDE_MMX_NATIVE) && !defined(__PGI)
|
|
|
- return SIMDE__M64_C(_mm_slli_pi32(a.n, count));
|
|
|
+#if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI)
|
|
|
+ return _mm_slli_pi32(a, count);
|
|
|
#else
|
|
|
- simde__m64 r;
|
|
|
+ simde__m64_private r_;
|
|
|
+ simde__m64_private a_ = simde__m64_to_private(a);
|
|
|
|
|
|
- SIMDE__VECTORIZE
|
|
|
- for (size_t i = 0; i < (8 / sizeof(int)); i++) {
|
|
|
- r.u32[i] = a.u32[i] << count;
|
|
|
+#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
|
|
|
+ r_.i32 = a_.i32 << count;
|
|
|
+#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
|
|
+ r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32((int32_t)count));
|
|
|
+#else
|
|
|
+ SIMDE_VECTORIZE
|
|
|
+ for (size_t i = 0; i < (sizeof(r_.u32) / sizeof(r_.u32[0])); i++) {
|
|
|
+ r_.u32[i] = a_.u32[i] << count;
|
|
|
}
|
|
|
+#endif
|
|
|
|
|
|
- return r;
|
|
|
+ return simde__m64_from_private(r_);
|
|
|
#endif
|
|
|
}
|
|
|
#define simde_m_pslldi(a, b) simde_mm_slli_pi32(a, b)
|
|
|
+#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
|
|
|
+#define _mm_slli_pi32(a, count) simde_mm_slli_pi32(a, count)
|
|
|
+#define _m_pslldi(a, count) simde_mm_slli_pi32(a, count)
|
|
|
+#endif
|
|
|
|
|
|
-SIMDE__FUNCTION_ATTRIBUTES
|
|
|
+SIMDE_FUNCTION_ATTRIBUTES
|
|
|
simde__m64 simde_mm_slli_si64(simde__m64 a, int count)
|
|
|
{
|
|
|
-#if defined(SIMDE_MMX_NATIVE)
|
|
|
- return SIMDE__M64_C(_mm_slli_si64(a.n, count));
|
|
|
+#if defined(SIMDE_X86_MMX_NATIVE)
|
|
|
+ return _mm_slli_si64(a, count);
|
|
|
#else
|
|
|
- simde__m64 r;
|
|
|
- r.u64[0] = a.u64[0] << count;
|
|
|
- return r;
|
|
|
+ simde__m64_private r_;
|
|
|
+ simde__m64_private a_ = simde__m64_to_private(a);
|
|
|
+
|
|
|
+#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
|
|
|
+ r_.i64 = a_.i64 << count;
|
|
|
+#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
|
|
+ r_.neon_i64 = vshl_s64(a_.neon_i64, vmov_n_s64((int64_t)count));
|
|
|
+#else
|
|
|
+ r_.u64[0] = a_.u64[0] << count;
|
|
|
+#endif
|
|
|
+
|
|
|
+ return simde__m64_from_private(r_);
|
|
|
#endif
|
|
|
}
|
|
|
#define simde_m_psllqi(a, count) simde_mm_slli_si64(a, count)
|
|
|
+#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
|
|
|
+#define _mm_slli_si64(a, count) simde_mm_slli_si64(a, count)
|
|
|
+#define _m_psllqi(a, count) simde_mm_slli_si64(a, count)
|
|
|
+#endif
|
|
|
|
|
|
-SIMDE__FUNCTION_ATTRIBUTES
|
|
|
+SIMDE_FUNCTION_ATTRIBUTES
|
|
|
simde__m64 simde_mm_sll_si64(simde__m64 a, simde__m64 count)
|
|
|
{
|
|
|
-#if defined(SIMDE_MMX_NATIVE)
|
|
|
- return SIMDE__M64_C(_mm_sll_si64(a.n, count.n));
|
|
|
+#if defined(SIMDE_X86_MMX_NATIVE)
|
|
|
+ return _mm_sll_si64(a, count);
|
|
|
#else
|
|
|
- simde__m64 r;
|
|
|
+ simde__m64_private r_;
|
|
|
+ simde__m64_private a_ = simde__m64_to_private(a);
|
|
|
+ simde__m64_private count_ = simde__m64_to_private(count);
|
|
|
|
|
|
- if (HEDLEY_UNLIKELY(count.u64[0] > 63)) {
|
|
|
- memset(&r, 0, sizeof(r));
|
|
|
- return r;
|
|
|
+#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
|
|
+ r_.neon_i64 = vshl_s64(a_.neon_i64, count_.neon_i64);
|
|
|
+#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
|
|
+ r_.i64 = a_.i64 << count_.i64;
|
|
|
+#else
|
|
|
+ if (HEDLEY_UNLIKELY(count_.u64[0] > 63)) {
|
|
|
+ simde_memset(&r_, 0, sizeof(r_));
|
|
|
+ return simde__m64_from_private(r_);
|
|
|
}
|
|
|
|
|
|
- r.u64[0] = a.u64[0] << count.u64[0];
|
|
|
+ r_.u64[0] = a_.u64[0] << count_.u64[0];
|
|
|
+#endif
|
|
|
|
|
|
- return r;
|
|
|
+ return simde__m64_from_private(r_);
|
|
|
#endif
|
|
|
}
|
|
|
#define simde_m_psllq(a, count) simde_mm_sll_si64(a, count)
|
|
|
+#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
|
|
|
+#define _mm_sll_si64(a, count) simde_mm_sll_si64(a, count)
|
|
|
+#define _m_psllq(a, count) simde_mm_sll_si64(a, count)
|
|
|
+#endif
|
|
|
|
|
|
-SIMDE__FUNCTION_ATTRIBUTES
|
|
|
+SIMDE_FUNCTION_ATTRIBUTES
|
|
|
simde__m64 simde_mm_srl_pi16(simde__m64 a, simde__m64 count)
|
|
|
{
|
|
|
-#if defined(SIMDE_MMX_NATIVE)
|
|
|
- return SIMDE__M64_C(_mm_srl_pi16(a.n, count.n));
|
|
|
+#if defined(SIMDE_X86_MMX_NATIVE)
|
|
|
+ return _mm_srl_pi16(a, count);
|
|
|
#else
|
|
|
- simde__m64 r;
|
|
|
+ simde__m64_private r_;
|
|
|
+ simde__m64_private a_ = simde__m64_to_private(a);
|
|
|
+ simde__m64_private count_ = simde__m64_to_private(count);
|
|
|
|
|
|
- if (HEDLEY_UNLIKELY(count.u64[0] > 15)) {
|
|
|
- memset(&r, 0, sizeof(r));
|
|
|
- return r;
|
|
|
+#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
|
|
|
+ r_.u16 = a_.u16 >> count_.u64[0];
|
|
|
+#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
|
|
+ r_.neon_u16 = vshl_u16(
|
|
|
+ a_.neon_u16,
|
|
|
+ vmov_n_s16(-((int16_t)vget_lane_u64(count_.neon_u64, 0))));
|
|
|
+#else
|
|
|
+ if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) {
|
|
|
+ simde_memset(&r_, 0, sizeof(r_));
|
|
|
+ return simde__m64_from_private(r_);
|
|
|
}
|
|
|
|
|
|
- SIMDE__VECTORIZE
|
|
|
- for (size_t i = 0; i < sizeof(r.u16) / sizeof(r.u16[0]); i++) {
|
|
|
- r.u16[i] = a.u16[i] >> count.u64[0];
|
|
|
+ SIMDE_VECTORIZE
|
|
|
+ for (size_t i = 0; i < sizeof(r_.u16) / sizeof(r_.u16[0]); i++) {
|
|
|
+ r_.u16[i] = a_.u16[i] >> count_.u64[0];
|
|
|
}
|
|
|
- return r;
|
|
|
+#endif
|
|
|
+
|
|
|
+ return simde__m64_from_private(r_);
|
|
|
#endif
|
|
|
}
|
|
|
#define simde_m_psrlw(a, count) simde_mm_srl_pi16(a, count)
|
|
|
+#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
|
|
|
+#define _mm_srl_pi16(a, count) simde_mm_srl_pi16(a, count)
|
|
|
+#define _m_psrlw(a, count) simde_mm_srl_pi16(a, count)
|
|
|
+#endif
|
|
|
|
|
|
-SIMDE__FUNCTION_ATTRIBUTES
|
|
|
+SIMDE_FUNCTION_ATTRIBUTES
|
|
|
simde__m64 simde_mm_srl_pi32(simde__m64 a, simde__m64 count)
|
|
|
{
|
|
|
-#if defined(SIMDE_MMX_NATIVE)
|
|
|
- return SIMDE__M64_C(_mm_srl_pi32(a.n, count.n));
|
|
|
+#if defined(SIMDE_X86_MMX_NATIVE)
|
|
|
+ return _mm_srl_pi32(a, count);
|
|
|
#else
|
|
|
- simde__m64 r;
|
|
|
+ simde__m64_private r_;
|
|
|
+ simde__m64_private a_ = simde__m64_to_private(a);
|
|
|
+ simde__m64_private count_ = simde__m64_to_private(count);
|
|
|
|
|
|
- if (HEDLEY_UNLIKELY(count.u64[0] > 31)) {
|
|
|
- memset(&r, 0, sizeof(r));
|
|
|
- return r;
|
|
|
+#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
|
|
|
+ r_.u32 = a_.u32 >> count_.u64[0];
|
|
|
+#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
|
|
+ r_.neon_u32 = vshl_u32(
|
|
|
+ a_.neon_u32,
|
|
|
+ vmov_n_s32(-((int32_t)vget_lane_u64(count_.neon_u64, 0))));
|
|
|
+#else
|
|
|
+ if (HEDLEY_UNLIKELY(count_.u64[0] > 31)) {
|
|
|
+ simde_memset(&r_, 0, sizeof(r_));
|
|
|
+ return simde__m64_from_private(r_);
|
|
|
}
|
|
|
|
|
|
- SIMDE__VECTORIZE
|
|
|
- for (size_t i = 0; i < sizeof(r.u32) / sizeof(r.u32[0]); i++) {
|
|
|
- r.u32[i] = a.u32[i] >> count.u64[0];
|
|
|
+ SIMDE_VECTORIZE
|
|
|
+ for (size_t i = 0; i < sizeof(r_.u32) / sizeof(r_.u32[0]); i++) {
|
|
|
+ r_.u32[i] = a_.u32[i] >> count_.u64[0];
|
|
|
}
|
|
|
- return r;
|
|
|
+#endif
|
|
|
+
|
|
|
+ return simde__m64_from_private(r_);
|
|
|
#endif
|
|
|
}
|
|
|
#define simde_m_psrld(a, count) simde_mm_srl_pi32(a, count)
|
|
|
+#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
|
|
|
+#define _mm_srl_pi32(a, count) simde_mm_srl_pi32(a, count)
|
|
|
+#define _m_psrld(a, count) simde_mm_srl_pi32(a, count)
|
|
|
+#endif
|
|
|
|
|
|
-SIMDE__FUNCTION_ATTRIBUTES
|
|
|
+SIMDE_FUNCTION_ATTRIBUTES
|
|
|
simde__m64 simde_mm_srli_pi16(simde__m64 a, int count)
|
|
|
{
|
|
|
-#if defined(SIMDE_MMX_NATIVE) && !defined(__PGI)
|
|
|
- return SIMDE__M64_C(_mm_srli_pi16(a.n, count));
|
|
|
+#if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI)
|
|
|
+ return _mm_srli_pi16(a, count);
|
|
|
#else
|
|
|
- simde__m64 r;
|
|
|
+ simde__m64_private r_;
|
|
|
+ simde__m64_private a_ = simde__m64_to_private(a);
|
|
|
|
|
|
- SIMDE__VECTORIZE
|
|
|
- for (size_t i = 0; i < (8 / sizeof(uint16_t)); i++) {
|
|
|
- r.u16[i] = a.u16[i] >> count;
|
|
|
+#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
|
|
|
+ r_.u16 = a_.u16 >> count;
|
|
|
+#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
|
|
+ r_.neon_u16 = vshl_u16(a_.neon_u16, vmov_n_s16(-((int16_t)count)));
|
|
|
+#else
|
|
|
+ SIMDE_VECTORIZE
|
|
|
+ for (size_t i = 0; i < (sizeof(r_.u16) / sizeof(r_.u16[0])); i++) {
|
|
|
+ r_.u16[i] = a_.u16[i] >> count;
|
|
|
}
|
|
|
+#endif
|
|
|
|
|
|
- return r;
|
|
|
+ return simde__m64_from_private(r_);
|
|
|
#endif
|
|
|
}
|
|
|
#define simde_m_psrlwi(a, count) simde_mm_srli_pi16(a, count)
|
|
|
+#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
|
|
|
+#define _mm_srli_pi16(a, count) simde_mm_srli_pi16(a, count)
|
|
|
+#define _m_psrlwi(a, count) simde_mm_srli_pi16(a, count)
|
|
|
+#endif
|
|
|
|
|
|
-SIMDE__FUNCTION_ATTRIBUTES
|
|
|
+SIMDE_FUNCTION_ATTRIBUTES
|
|
|
simde__m64 simde_mm_srli_pi32(simde__m64 a, int count)
|
|
|
{
|
|
|
-#if defined(SIMDE_MMX_NATIVE) && !defined(__PGI)
|
|
|
- return SIMDE__M64_C(_mm_srli_pi32(a.n, count));
|
|
|
+#if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI)
|
|
|
+ return _mm_srli_pi32(a, count);
|
|
|
#else
|
|
|
- simde__m64 r;
|
|
|
+ simde__m64_private r_;
|
|
|
+ simde__m64_private a_ = simde__m64_to_private(a);
|
|
|
|
|
|
- SIMDE__VECTORIZE
|
|
|
- for (size_t i = 0; i < (8 / sizeof(int)); i++) {
|
|
|
- r.u32[i] = a.u32[i] >> count;
|
|
|
+#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
|
|
|
+ r_.u32 = a_.u32 >> count;
|
|
|
+#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
|
|
+ r_.neon_u32 = vshl_u32(a_.neon_u32, vmov_n_s32(-((int32_t)count)));
|
|
|
+#else
|
|
|
+ SIMDE_VECTORIZE
|
|
|
+ for (size_t i = 0; i < (sizeof(r_.u32) / sizeof(r_.u32[0])); i++) {
|
|
|
+ r_.u32[i] = a_.u32[i] >> count;
|
|
|
}
|
|
|
+#endif
|
|
|
|
|
|
- return r;
|
|
|
+ return simde__m64_from_private(r_);
|
|
|
#endif
|
|
|
}
|
|
|
#define simde_m_psrldi(a, count) simde_mm_srli_pi32(a, count)
|
|
|
+#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
|
|
|
+#define _mm_srli_pi32(a, count) simde_mm_srli_pi32(a, count)
|
|
|
+#define _m_psrldi(a, count) simde_mm_srli_pi32(a, count)
|
|
|
+#endif
|
|
|
|
|
|
-SIMDE__FUNCTION_ATTRIBUTES
|
|
|
+SIMDE_FUNCTION_ATTRIBUTES
|
|
|
simde__m64 simde_mm_srli_si64(simde__m64 a, int count)
|
|
|
{
|
|
|
-#if defined(SIMDE_MMX_NATIVE) && !defined(__PGI)
|
|
|
- return SIMDE__M64_C(_mm_srli_si64(a.n, count));
|
|
|
+#if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI)
|
|
|
+ return _mm_srli_si64(a, count);
|
|
|
#else
|
|
|
- simde__m64 r;
|
|
|
- r.u64[0] = a.u64[0] >> count;
|
|
|
- return r;
|
|
|
+ simde__m64_private r_;
|
|
|
+ simde__m64_private a_ = simde__m64_to_private(a);
|
|
|
+
|
|
|
+#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
|
|
+ r_.neon_u64 = vshl_u64(a_.neon_u64, vmov_n_s64(-count));
|
|
|
+#elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
|
|
|
+ r_.u64 = a_.u64 >> count;
|
|
|
+#else
|
|
|
+ r_.u64[0] = a_.u64[0] >> count;
|
|
|
+#endif
|
|
|
+
|
|
|
+ return simde__m64_from_private(r_);
|
|
|
#endif
|
|
|
}
|
|
|
#define simde_m_psrlqi(a, count) simde_mm_srli_si64(a, count)
|
|
|
+#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
|
|
|
+#define _mm_srli_si64(a, count) simde_mm_srli_si64(a, count)
|
|
|
+#define _m_psrlqi(a, count) simde_mm_srli_si64(a, count)
|
|
|
+#endif
|
|
|
|
|
|
-SIMDE__FUNCTION_ATTRIBUTES
|
|
|
+SIMDE_FUNCTION_ATTRIBUTES
|
|
|
simde__m64 simde_mm_srl_si64(simde__m64 a, simde__m64 count)
|
|
|
{
|
|
|
-#if defined(SIMDE_MMX_NATIVE)
|
|
|
- return SIMDE__M64_C(_mm_srl_si64(a.n, count.n));
|
|
|
+#if defined(SIMDE_X86_MMX_NATIVE)
|
|
|
+ return _mm_srl_si64(a, count);
|
|
|
#else
|
|
|
- simde__m64 r;
|
|
|
+ simde__m64_private r_;
|
|
|
+ simde__m64_private a_ = simde__m64_to_private(a);
|
|
|
+ simde__m64_private count_ = simde__m64_to_private(count);
|
|
|
|
|
|
- if (HEDLEY_UNLIKELY(count.u64[0] > 63)) {
|
|
|
- memset(&r, 0, sizeof(r));
|
|
|
- return r;
|
|
|
+#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
|
|
+ r_.neon_u64 = vshl_u64(a_.neon_u64, vneg_s64(count_.neon_i64));
|
|
|
+#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
|
|
+ r_.u64 = a_.u64 >> count_.u64;
|
|
|
+#else
|
|
|
+ if (HEDLEY_UNLIKELY(count_.u64[0] > 63)) {
|
|
|
+ simde_memset(&r_, 0, sizeof(r_));
|
|
|
+ return simde__m64_from_private(r_);
|
|
|
}
|
|
|
|
|
|
- r.u64[0] = a.u64[0] >> count.u64[0];
|
|
|
- return r;
|
|
|
+ r_.u64[0] = a_.u64[0] >> count_.u64[0];
|
|
|
+#endif
|
|
|
+
|
|
|
+ return simde__m64_from_private(r_);
|
|
|
#endif
|
|
|
}
|
|
|
#define simde_m_psrlq(a, count) simde_mm_srl_si64(a, count)
|
|
|
+#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
|
|
|
+#define _mm_srl_si64(a, count) simde_mm_srl_si64(a, count)
|
|
|
+#define _m_psrlq(a, count) simde_mm_srl_si64(a, count)
|
|
|
+#endif
|
|
|
|
|
|
-SIMDE__FUNCTION_ATTRIBUTES
|
|
|
+SIMDE_FUNCTION_ATTRIBUTES
|
|
|
simde__m64 simde_mm_srai_pi16(simde__m64 a, int count)
|
|
|
{
|
|
|
-#if defined(SIMDE_MMX_NATIVE) && !defined(__PGI)
|
|
|
- return SIMDE__M64_C(_mm_srai_pi16(a.n, count));
|
|
|
+#if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI)
|
|
|
+ return _mm_srai_pi16(a, count);
|
|
|
#else
|
|
|
- simde__m64 r;
|
|
|
-
|
|
|
- const uint16_t m =
|
|
|
- (uint16_t)((~0U) << ((sizeof(int16_t) * CHAR_BIT) - count));
|
|
|
+ simde__m64_private r_;
|
|
|
+ simde__m64_private a_ = simde__m64_to_private(a);
|
|
|
|
|
|
- SIMDE__VECTORIZE
|
|
|
- for (size_t i = 0; i < (8 / sizeof(int16_t)); i++) {
|
|
|
- const uint16_t is_neg = ((uint16_t)(
|
|
|
- ((a.u16[i]) >> ((sizeof(int16_t) * CHAR_BIT) - 1))));
|
|
|
- r.u16[i] = (a.u16[i] >> count) | (m * is_neg);
|
|
|
+#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
|
|
|
+ r_.i16 = a_.i16 >> (count & 0xff);
|
|
|
+#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
|
|
+ r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16(-HEDLEY_STATIC_CAST(int16_t, count));
|
|
|
+#else
|
|
|
+ SIMDE_VECTORIZE
|
|
|
+ for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) {
|
|
|
+ r_.i16[i] = a_.i16[i] >> (count & 0xff);
|
|
|
}
|
|
|
+#endif
|
|
|
|
|
|
- return r;
|
|
|
+ return simde__m64_from_private(r_);
|
|
|
#endif
|
|
|
}
|
|
|
#define simde_m_psrawi(a, count) simde_mm_srai_pi16(a, count)
|
|
|
+#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
|
|
|
+#define _mm_srai_pi16(a, count) simde_mm_srai_pi16(a, count)
|
|
|
+#define _m_psrawi(a, count) simde_mm_srai_pi16(a, count)
|
|
|
+#endif
|
|
|
|
|
|
-SIMDE__FUNCTION_ATTRIBUTES
|
|
|
+SIMDE_FUNCTION_ATTRIBUTES
|
|
|
simde__m64 simde_mm_srai_pi32(simde__m64 a, int count)
|
|
|
{
|
|
|
-#if defined(SIMDE_MMX_NATIVE) && !defined(__PGI)
|
|
|
- return SIMDE__M64_C(_mm_srai_pi32(a.n, count));
|
|
|
+#if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI)
|
|
|
+ return _mm_srai_pi32(a, count);
|
|
|
#else
|
|
|
- simde__m64 r;
|
|
|
+ simde__m64_private r_;
|
|
|
+ simde__m64_private a_ = simde__m64_to_private(a);
|
|
|
|
|
|
- const uint32_t m =
|
|
|
- (uint32_t)((~0U) << ((sizeof(int) * CHAR_BIT) - count));
|
|
|
- SIMDE__VECTORIZE
|
|
|
- for (size_t i = 0; i < (8 / sizeof(int)); i++) {
|
|
|
- const uint32_t is_neg = ((uint32_t)(
|
|
|
- ((a.u32[i]) >> ((sizeof(int) * CHAR_BIT) - 1))));
|
|
|
- r.u32[i] = (a.u32[i] >> count) | (m * is_neg);
|
|
|
+#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
|
|
|
+ r_.i32 = a_.i32 >> (count & 0xff);
|
|
|
+#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
|
|
+ r_.neon_i32 = vshl_s32(a_.neon_i32,
|
|
|
+ vmov_n_s32(-HEDLEY_STATIC_CAST(int32_t, count)));
|
|
|
+#else
|
|
|
+ SIMDE_VECTORIZE
|
|
|
+ for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])); i++) {
|
|
|
+ r_.i32[i] = a_.i32[i] >> (count & 0xff);
|
|
|
}
|
|
|
+#endif
|
|
|
|
|
|
- return r;
|
|
|
+ return simde__m64_from_private(r_);
|
|
|
#endif
|
|
|
}
|
|
|
-#define simde_m_srai_pi32(a, count) simde_mm_srai_pi32(a, count)
|
|
|
+#define simde_m_psradi(a, count) simde_mm_srai_pi32(a, count)
|
|
|
+#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
|
|
|
+#define _mm_srai_pi32(a, count) simde_mm_srai_pi32(a, count)
|
|
|
+#define _m_srai_pi32(a, count) simde_mm_srai_pi32(a, count)
|
|
|
+#endif
|
|
|
|
|
|
-SIMDE__FUNCTION_ATTRIBUTES
|
|
|
+SIMDE_FUNCTION_ATTRIBUTES
|
|
|
simde__m64 simde_mm_sra_pi16(simde__m64 a, simde__m64 count)
|
|
|
{
|
|
|
-#if defined(SIMDE_MMX_NATIVE)
|
|
|
- return SIMDE__M64_C(_mm_sra_pi16(a.n, count.n));
|
|
|
-#else
|
|
|
- simde__m64 r;
|
|
|
- int cnt = (int)count.i64[0];
|
|
|
-
|
|
|
- if (cnt > 15 || cnt < 0) {
|
|
|
- for (size_t i = 0; i < (sizeof(r.i16) / sizeof(r.i16[0]));
|
|
|
- i++) {
|
|
|
- r.u16[i] = (a.i16[i] < 0) ? 0xffff : 0x0000;
|
|
|
- }
|
|
|
- } else {
|
|
|
- const uint16_t m = (uint16_t)(
|
|
|
- (~0U) << ((sizeof(int16_t) * CHAR_BIT) - cnt));
|
|
|
- for (size_t i = 0; i < (sizeof(r.i16) / sizeof(r.i16[0]));
|
|
|
- i++) {
|
|
|
- const uint16_t is_neg = a.i16[i] < 0;
|
|
|
- r.u16[i] = (a.u16[i] >> cnt) | (m * is_neg);
|
|
|
- }
|
|
|
+#if defined(SIMDE_X86_MMX_NATIVE)
|
|
|
+ return _mm_sra_pi16(a, count);
|
|
|
+#else
|
|
|
+ simde__m64_private r_;
|
|
|
+ simde__m64_private a_ = simde__m64_to_private(a);
|
|
|
+ simde__m64_private count_ = simde__m64_to_private(count);
|
|
|
+ const int cnt = HEDLEY_STATIC_CAST(
|
|
|
+ int, (count_.i64[0] > 15 ? 15 : count_.i64[0]));
|
|
|
+
|
|
|
+#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
|
|
|
+ r_.i16 = a_.i16 >> cnt;
|
|
|
+#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
|
|
+ r_.neon_i16 =
|
|
|
+ vshl_s16(a_.neon_i16,
|
|
|
+ vmov_n_s16(-HEDLEY_STATIC_CAST(
|
|
|
+ int16_t, vget_lane_u64(count_.neon_u64, 0))));
|
|
|
+#else
|
|
|
+ SIMDE_VECTORIZE
|
|
|
+ for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) {
|
|
|
+ r_.i16[i] = a_.i16[i] >> cnt;
|
|
|
}
|
|
|
+#endif
|
|
|
|
|
|
- return r;
|
|
|
+ return simde__m64_from_private(r_);
|
|
|
#endif
|
|
|
}
|
|
|
#define simde_m_psraw(a, count) simde_mm_sra_pi16(a, count)
|
|
|
+#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
|
|
|
+#define _mm_sra_pi16(a, count) simde_mm_sra_pi16(a, count)
|
|
|
+#define _m_psraw(a, count) simde_mm_sra_pi16(a, count)
|
|
|
+#endif
|
|
|
|
|
|
-SIMDE__FUNCTION_ATTRIBUTES
|
|
|
+SIMDE_FUNCTION_ATTRIBUTES
|
|
|
simde__m64 simde_mm_sra_pi32(simde__m64 a, simde__m64 count)
|
|
|
{
|
|
|
-#if defined(SIMDE_MMX_NATIVE)
|
|
|
- return SIMDE__M64_C(_mm_sra_pi32(a.n, count.n));
|
|
|
-#else
|
|
|
- simde__m64 r;
|
|
|
- const uint64_t cnt = count.u64[0];
|
|
|
-
|
|
|
- if (cnt > 31) {
|
|
|
- for (size_t i = 0; i < (sizeof(r.i32) / sizeof(r.i32[0]));
|
|
|
- i++) {
|
|
|
- r.u32[i] = (a.i32[i] < 0) ? UINT32_MAX : 0;
|
|
|
- }
|
|
|
- } else if (cnt == 0) {
|
|
|
- memcpy(&r, &a, sizeof(r));
|
|
|
- } else {
|
|
|
- const uint32_t m = (uint32_t)(
|
|
|
- (~0U) << ((sizeof(int32_t) * CHAR_BIT) - cnt));
|
|
|
- for (size_t i = 0; i < (sizeof(r.i32) / sizeof(r.i32[0]));
|
|
|
- i++) {
|
|
|
- const uint32_t is_neg = a.i32[i] < 0;
|
|
|
- r.u32[i] = (a.u32[i] >> cnt) | (m * is_neg);
|
|
|
- }
|
|
|
+#if defined(SIMDE_X86_MMX_NATIVE)
|
|
|
+ return _mm_sra_pi32(a, count);
|
|
|
+#else
|
|
|
+ simde__m64_private r_;
|
|
|
+ simde__m64_private a_ = simde__m64_to_private(a);
|
|
|
+ simde__m64_private count_ = simde__m64_to_private(count);
|
|
|
+ const int32_t cnt =
|
|
|
+ (count_.u64[0] > 31)
|
|
|
+ ? 31
|
|
|
+ : HEDLEY_STATIC_CAST(int32_t, count_.u64[0]);
|
|
|
+
|
|
|
+#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
|
|
|
+ r_.i32 = a_.i32 >> cnt;
|
|
|
+#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
|
|
+ r_.neon_i32 =
|
|
|
+ vshl_s32(a_.neon_i32,
|
|
|
+ vmov_n_s32(-HEDLEY_STATIC_CAST(
|
|
|
+ int32_t, vget_lane_u64(count_.neon_u64, 0))));
|
|
|
+#else
|
|
|
+ SIMDE_VECTORIZE
|
|
|
+ for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])); i++) {
|
|
|
+ r_.i32[i] = a_.i32[i] >> cnt;
|
|
|
}
|
|
|
+#endif
|
|
|
|
|
|
- return r;
|
|
|
+ return simde__m64_from_private(r_);
|
|
|
#endif
|
|
|
}
|
|
|
#define simde_m_psrad(a, b) simde_mm_sra_pi32(a, b)
|
|
|
+#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
|
|
|
+#define _mm_sra_pi32(a, count) simde_mm_sra_pi32(a, count)
|
|
|
+#define _m_psrad(a, count) simde_mm_sra_pi32(a, count)
|
|
|
+#endif
|
|
|
|
|
|
-SIMDE__FUNCTION_ATTRIBUTES
|
|
|
+SIMDE_FUNCTION_ATTRIBUTES
|
|
|
simde__m64 simde_mm_sub_pi8(simde__m64 a, simde__m64 b)
|
|
|
{
|
|
|
-#if defined(SIMDE_MMX_NATIVE)
|
|
|
- return SIMDE__M64_C(_mm_sub_pi8(a.n, b.n));
|
|
|
+#if defined(SIMDE_X86_MMX_NATIVE)
|
|
|
+ return _mm_sub_pi8(a, b);
|
|
|
#else
|
|
|
- simde__m64 r;
|
|
|
- SIMDE__VECTORIZE
|
|
|
- for (size_t i = 0; i < 8; i++) {
|
|
|
- r.i8[i] = a.i8[i] - b.i8[i];
|
|
|
+ simde__m64_private r_;
|
|
|
+ simde__m64_private a_ = simde__m64_to_private(a);
|
|
|
+ simde__m64_private b_ = simde__m64_to_private(b);
|
|
|
+
|
|
|
+#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
|
|
+ r_.neon_i8 = vsub_s8(a_.neon_i8, b_.neon_i8);
|
|
|
+#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
|
|
+ r_.i8 = a_.i8 - b_.i8;
|
|
|
+#else
|
|
|
+ SIMDE_VECTORIZE
|
|
|
+ for (size_t i = 0; i < (sizeof(r_.i8) / sizeof(r_.i8[0])); i++) {
|
|
|
+ r_.i8[i] = a_.i8[i] - b_.i8[i];
|
|
|
}
|
|
|
- return r;
|
|
|
+#endif
|
|
|
+
|
|
|
+ return simde__m64_from_private(r_);
|
|
|
#endif
|
|
|
}
|
|
|
#define simde_m_psubb(a, b) simde_mm_sub_pi8(a, b)
|
|
|
+#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
|
|
|
+#define _mm_sub_pi8(a, b) simde_mm_sub_pi8(a, b)
|
|
|
+#define _m_psubb(a, b) simde_mm_sub_pi8(a, b)
|
|
|
+#endif
|
|
|
|
|
|
-SIMDE__FUNCTION_ATTRIBUTES
|
|
|
+SIMDE_FUNCTION_ATTRIBUTES
|
|
|
simde__m64 simde_mm_sub_pi16(simde__m64 a, simde__m64 b)
|
|
|
{
|
|
|
-#if defined(SIMDE_MMX_NATIVE)
|
|
|
- return SIMDE__M64_C(_mm_sub_pi16(a.n, b.n));
|
|
|
+#if defined(SIMDE_X86_MMX_NATIVE)
|
|
|
+ return _mm_sub_pi16(a, b);
|
|
|
#else
|
|
|
- simde__m64 r;
|
|
|
- SIMDE__VECTORIZE
|
|
|
- for (size_t i = 0; i < (8 / sizeof(int16_t)); i++) {
|
|
|
- r.i16[i] = a.i16[i] - b.i16[i];
|
|
|
+ simde__m64_private r_;
|
|
|
+ simde__m64_private a_ = simde__m64_to_private(a);
|
|
|
+ simde__m64_private b_ = simde__m64_to_private(b);
|
|
|
+
|
|
|
+#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
|
|
+ r_.neon_i16 = vsub_s16(a_.neon_i16, b_.neon_i16);
|
|
|
+#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
|
|
+ r_.i16 = a_.i16 - b_.i16;
|
|
|
+#else
|
|
|
+ SIMDE_VECTORIZE
|
|
|
+ for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) {
|
|
|
+ r_.i16[i] = a_.i16[i] - b_.i16[i];
|
|
|
}
|
|
|
- return r;
|
|
|
+#endif
|
|
|
+
|
|
|
+ return simde__m64_from_private(r_);
|
|
|
#endif
|
|
|
}
|
|
|
#define simde_m_psubw(a, b) simde_mm_sub_pi16(a, b)
|
|
|
+#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
|
|
|
+#define _mm_sub_pi16(a, b) simde_mm_sub_pi16(a, b)
|
|
|
+#define _m_psubw(a, b) simde_mm_sub_pi16(a, b)
|
|
|
+#endif
|
|
|
|
|
|
-SIMDE__FUNCTION_ATTRIBUTES
|
|
|
+SIMDE_FUNCTION_ATTRIBUTES
|
|
|
simde__m64 simde_mm_sub_pi32(simde__m64 a, simde__m64 b)
|
|
|
{
|
|
|
-#if defined(SIMDE_MMX_NATIVE)
|
|
|
- return SIMDE__M64_C(_mm_sub_pi32(a.n, b.n));
|
|
|
+#if defined(SIMDE_X86_MMX_NATIVE)
|
|
|
+ return _mm_sub_pi32(a, b);
|
|
|
#else
|
|
|
- simde__m64 r;
|
|
|
- SIMDE__VECTORIZE
|
|
|
- for (size_t i = 0; i < (8 / sizeof(int)); i++) {
|
|
|
- r.i32[i] = a.i32[i] - b.i32[i];
|
|
|
+ simde__m64_private r_;
|
|
|
+ simde__m64_private a_ = simde__m64_to_private(a);
|
|
|
+ simde__m64_private b_ = simde__m64_to_private(b);
|
|
|
+
|
|
|
+#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
|
|
+ r_.neon_i32 = vsub_s32(a_.neon_i32, b_.neon_i32);
|
|
|
+#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
|
|
+ r_.i32 = a_.i32 - b_.i32;
|
|
|
+#else
|
|
|
+ SIMDE_VECTORIZE
|
|
|
+ for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])); i++) {
|
|
|
+ r_.i32[i] = a_.i32[i] - b_.i32[i];
|
|
|
}
|
|
|
- return r;
|
|
|
+#endif
|
|
|
+
|
|
|
+ return simde__m64_from_private(r_);
|
|
|
#endif
|
|
|
}
|
|
|
#define simde_m_psubd(a, b) simde_mm_sub_pi32(a, b)
|
|
|
+#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
|
|
|
+#define _mm_sub_pi32(a, b) simde_mm_sub_pi32(a, b)
|
|
|
+#define _m_psubd(a, b) simde_mm_sub_pi32(a, b)
|
|
|
+#endif
|
|
|
|
|
|
-SIMDE__FUNCTION_ATTRIBUTES
|
|
|
+SIMDE_FUNCTION_ATTRIBUTES
|
|
|
simde__m64 simde_mm_subs_pi8(simde__m64 a, simde__m64 b)
|
|
|
{
|
|
|
-#if defined(SIMDE_MMX_NATIVE)
|
|
|
- return SIMDE__M64_C(_mm_subs_pi8(a.n, b.n));
|
|
|
+#if defined(SIMDE_X86_MMX_NATIVE)
|
|
|
+ return _mm_subs_pi8(a, b);
|
|
|
#else
|
|
|
- simde__m64 r;
|
|
|
- SIMDE__VECTORIZE
|
|
|
- for (size_t i = 0; i < (8); i++) {
|
|
|
- if (((b.i8[i]) > 0 && (a.i8[i]) < INT8_MIN + (b.i8[i]))) {
|
|
|
- r.i8[i] = INT8_MIN;
|
|
|
- } else if ((b.i8[i]) < 0 && (a.i8[i]) > INT8_MAX + (b.i8[i])) {
|
|
|
- r.i8[i] = INT8_MAX;
|
|
|
+ simde__m64_private r_;
|
|
|
+ simde__m64_private a_ = simde__m64_to_private(a);
|
|
|
+ simde__m64_private b_ = simde__m64_to_private(b);
|
|
|
+
|
|
|
+#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
|
|
+ r_.neon_i8 = vqsub_s8(a_.neon_i8, b_.neon_i8);
|
|
|
+#else
|
|
|
+ SIMDE_VECTORIZE
|
|
|
+ for (size_t i = 0; i < (sizeof(r_.i8) / sizeof(r_.i8[0])); i++) {
|
|
|
+ if (((b_.i8[i]) > 0 && (a_.i8[i]) < INT8_MIN + (b_.i8[i]))) {
|
|
|
+ r_.i8[i] = INT8_MIN;
|
|
|
+ } else if ((b_.i8[i]) < 0 &&
|
|
|
+ (a_.i8[i]) > INT8_MAX + (b_.i8[i])) {
|
|
|
+ r_.i8[i] = INT8_MAX;
|
|
|
} else {
|
|
|
- r.i8[i] = (a.i8[i]) - (b.i8[i]);
|
|
|
+ r_.i8[i] = (a_.i8[i]) - (b_.i8[i]);
|
|
|
}
|
|
|
}
|
|
|
- return r;
|
|
|
+#endif
|
|
|
+
|
|
|
+ return simde__m64_from_private(r_);
|
|
|
#endif
|
|
|
}
|
|
|
#define simde_m_psubsb(a, b) simde_mm_subs_pi8(a, b)
|
|
|
+#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
|
|
|
+#define _mm_subs_pi8(a, b) simde_mm_subs_pi8(a, b)
|
|
|
+#define _m_psubsb(a, b) simde_mm_subs_pi8(a, b)
|
|
|
+#endif
|
|
|
|
|
|
-SIMDE__FUNCTION_ATTRIBUTES
|
|
|
+SIMDE_FUNCTION_ATTRIBUTES
|
|
|
simde__m64 simde_mm_subs_pu8(simde__m64 a, simde__m64 b)
|
|
|
{
|
|
|
-#if defined(SIMDE_MMX_NATIVE)
|
|
|
- return SIMDE__M64_C(_mm_subs_pu8(a.n, b.n));
|
|
|
+#if defined(SIMDE_X86_MMX_NATIVE)
|
|
|
+ return _mm_subs_pu8(a, b);
|
|
|
#else
|
|
|
- simde__m64 r;
|
|
|
- SIMDE__VECTORIZE
|
|
|
- for (size_t i = 0; i < (8); i++) {
|
|
|
- const int32_t x = a.u8[i] - b.u8[i];
|
|
|
+ simde__m64_private r_;
|
|
|
+ simde__m64_private a_ = simde__m64_to_private(a);
|
|
|
+ simde__m64_private b_ = simde__m64_to_private(b);
|
|
|
+
|
|
|
+#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
|
|
+ r_.neon_u8 = vqsub_u8(a_.neon_u8, b_.neon_u8);
|
|
|
+#else
|
|
|
+ SIMDE_VECTORIZE
|
|
|
+ for (size_t i = 0; i < (sizeof(r_.u8) / sizeof(r_.u8[0])); i++) {
|
|
|
+ const int32_t x = a_.u8[i] - b_.u8[i];
|
|
|
if (x < 0) {
|
|
|
- r.u8[i] = 0;
|
|
|
+ r_.u8[i] = 0;
|
|
|
} else if (x > UINT8_MAX) {
|
|
|
- r.u8[i] = UINT8_MAX;
|
|
|
+ r_.u8[i] = UINT8_MAX;
|
|
|
} else {
|
|
|
- r.u8[i] = (uint8_t)x;
|
|
|
+ r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, x);
|
|
|
}
|
|
|
}
|
|
|
- return r;
|
|
|
+#endif
|
|
|
+
|
|
|
+ return simde__m64_from_private(r_);
|
|
|
#endif
|
|
|
}
|
|
|
#define simde_m_psubusb(a, b) simde_mm_subs_pu8(a, b)
|
|
|
+#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
|
|
|
+#define _mm_subs_pu8(a, b) simde_mm_subs_pu8(a, b)
|
|
|
+#define _m_psubusb(a, b) simde_mm_subs_pu8(a, b)
|
|
|
+#endif
|
|
|
|
|
|
-SIMDE__FUNCTION_ATTRIBUTES
|
|
|
+SIMDE_FUNCTION_ATTRIBUTES
|
|
|
simde__m64 simde_mm_subs_pi16(simde__m64 a, simde__m64 b)
|
|
|
{
|
|
|
-#if defined(SIMDE_MMX_NATIVE)
|
|
|
- return SIMDE__M64_C(_mm_subs_pi16(a.n, b.n));
|
|
|
+#if defined(SIMDE_X86_MMX_NATIVE)
|
|
|
+ return _mm_subs_pi16(a, b);
|
|
|
#else
|
|
|
- simde__m64 r;
|
|
|
- SIMDE__VECTORIZE
|
|
|
- for (size_t i = 0; i < (8 / sizeof(int16_t)); i++) {
|
|
|
- if (((b.i16[i]) > 0 && (a.i16[i]) < SHRT_MIN + (b.i16[i]))) {
|
|
|
- r.i16[i] = SHRT_MIN;
|
|
|
- } else if ((b.i16[i]) < 0 &&
|
|
|
- (a.i16[i]) > INT16_MAX + (b.i16[i])) {
|
|
|
- r.i16[i] = INT16_MAX;
|
|
|
+ simde__m64_private r_;
|
|
|
+ simde__m64_private a_ = simde__m64_to_private(a);
|
|
|
+ simde__m64_private b_ = simde__m64_to_private(b);
|
|
|
+
|
|
|
+#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
|
|
+ r_.neon_i16 = vqsub_s16(a_.neon_i16, b_.neon_i16);
|
|
|
+#else
|
|
|
+ SIMDE_VECTORIZE
|
|
|
+ for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) {
|
|
|
+ if (((b_.i16[i]) > 0 && (a_.i16[i]) < SHRT_MIN + (b_.i16[i]))) {
|
|
|
+ r_.i16[i] = SHRT_MIN;
|
|
|
+ } else if ((b_.i16[i]) < 0 &&
|
|
|
+ (a_.i16[i]) > INT16_MAX + (b_.i16[i])) {
|
|
|
+ r_.i16[i] = INT16_MAX;
|
|
|
} else {
|
|
|
- r.i16[i] = (a.i16[i]) - (b.i16[i]);
|
|
|
+ r_.i16[i] = (a_.i16[i]) - (b_.i16[i]);
|
|
|
}
|
|
|
}
|
|
|
- return r;
|
|
|
+#endif
|
|
|
+
|
|
|
+ return simde__m64_from_private(r_);
|
|
|
#endif
|
|
|
}
|
|
|
#define simde_m_psubsw(a, b) simde_mm_subs_pi16(a, b)
|
|
|
+#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
|
|
|
+#define _mm_subs_pi16(a, b) simde_mm_subs_pi16(a, b)
|
|
|
+#define _m_psubsw(a, b) simde_mm_subs_pi16(a, b)
|
|
|
+#endif
|
|
|
|
|
|
-SIMDE__FUNCTION_ATTRIBUTES
|
|
|
+SIMDE_FUNCTION_ATTRIBUTES
|
|
|
simde__m64 simde_mm_subs_pu16(simde__m64 a, simde__m64 b)
|
|
|
{
|
|
|
-#if defined(SIMDE_MMX_NATIVE)
|
|
|
- return SIMDE__M64_C(_mm_subs_pu16(a.n, b.n));
|
|
|
+#if defined(SIMDE_X86_MMX_NATIVE)
|
|
|
+ return _mm_subs_pu16(a, b);
|
|
|
#else
|
|
|
- simde__m64 r;
|
|
|
- SIMDE__VECTORIZE
|
|
|
- for (size_t i = 0; i < (8 / sizeof(uint16_t)); i++) {
|
|
|
- const int x = a.u16[i] - b.u16[i];
|
|
|
+ simde__m64_private r_;
|
|
|
+ simde__m64_private a_ = simde__m64_to_private(a);
|
|
|
+ simde__m64_private b_ = simde__m64_to_private(b);
|
|
|
+
|
|
|
+#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
|
|
+ r_.neon_u16 = vqsub_u16(a_.neon_u16, b_.neon_u16);
|
|
|
+#else
|
|
|
+ SIMDE_VECTORIZE
|
|
|
+ for (size_t i = 0; i < (sizeof(r_.u16) / sizeof(r_.u16[0])); i++) {
|
|
|
+ const int x = a_.u16[i] - b_.u16[i];
|
|
|
if (x < 0) {
|
|
|
- r.u16[i] = 0;
|
|
|
+ r_.u16[i] = 0;
|
|
|
} else if (x > UINT16_MAX) {
|
|
|
- r.u16[i] = UINT16_MAX;
|
|
|
+ r_.u16[i] = UINT16_MAX;
|
|
|
} else {
|
|
|
- r.u16[i] = (uint16_t)x;
|
|
|
+ r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, x);
|
|
|
}
|
|
|
}
|
|
|
- return r;
|
|
|
+#endif
|
|
|
+
|
|
|
+ return simde__m64_from_private(r_);
|
|
|
#endif
|
|
|
}
|
|
|
#define simde_m_psubusw(a, b) simde_mm_subs_pu16(a, b)
|
|
|
+#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
|
|
|
+#define _mm_subs_pu16(a, b) simde_mm_subs_pu16(a, b)
|
|
|
+#define _m_psubusw(a, b) simde_mm_subs_pu16(a, b)
|
|
|
+#endif
|
|
|
|
|
|
-SIMDE__FUNCTION_ATTRIBUTES
|
|
|
+SIMDE_FUNCTION_ATTRIBUTES
|
|
|
simde__m64 simde_mm_unpackhi_pi8(simde__m64 a, simde__m64 b)
|
|
|
{
|
|
|
-#if defined(SIMDE_MMX_NATIVE)
|
|
|
- return SIMDE__M64_C(_mm_unpackhi_pi8(a.n, b.n));
|
|
|
+#if defined(SIMDE_X86_MMX_NATIVE)
|
|
|
+ return _mm_unpackhi_pi8(a, b);
|
|
|
#else
|
|
|
- simde__m64 r;
|
|
|
- r.i8[0] = a.i8[4];
|
|
|
- r.i8[1] = b.i8[4];
|
|
|
- r.i8[2] = a.i8[5];
|
|
|
- r.i8[3] = b.i8[5];
|
|
|
- r.i8[4] = a.i8[6];
|
|
|
- r.i8[5] = b.i8[6];
|
|
|
- r.i8[6] = a.i8[7];
|
|
|
- r.i8[7] = b.i8[7];
|
|
|
- return r;
|
|
|
+ simde__m64_private r_;
|
|
|
+ simde__m64_private a_ = simde__m64_to_private(a);
|
|
|
+ simde__m64_private b_ = simde__m64_to_private(b);
|
|
|
+
|
|
|
+#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
|
|
+ r_.neon_i8 = vzip2_s8(a_.neon_i8, b_.neon_i8);
|
|
|
+#elif defined(SIMDE_SHUFFLE_VECTOR_)
|
|
|
+ r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 8, a_.i8, b_.i8, 4, 12, 5, 13, 6, 14,
|
|
|
+ 7, 15);
|
|
|
+#else
|
|
|
+ r_.i8[0] = a_.i8[4];
|
|
|
+ r_.i8[1] = b_.i8[4];
|
|
|
+ r_.i8[2] = a_.i8[5];
|
|
|
+ r_.i8[3] = b_.i8[5];
|
|
|
+ r_.i8[4] = a_.i8[6];
|
|
|
+ r_.i8[5] = b_.i8[6];
|
|
|
+ r_.i8[6] = a_.i8[7];
|
|
|
+ r_.i8[7] = b_.i8[7];
|
|
|
+#endif
|
|
|
+
|
|
|
+ return simde__m64_from_private(r_);
|
|
|
#endif
|
|
|
}
|
|
|
#define simde_m_punpckhbw(a, b) simde_mm_unpackhi_pi8(a, b)
|
|
|
+#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
|
|
|
+#define _mm_unpackhi_pi8(a, b) simde_mm_unpackhi_pi8(a, b)
|
|
|
+#define _m_punpckhbw(a, b) simde_mm_unpackhi_pi8(a, b)
|
|
|
+#endif
|
|
|
|
|
|
-SIMDE__FUNCTION_ATTRIBUTES
|
|
|
+SIMDE_FUNCTION_ATTRIBUTES
|
|
|
simde__m64 simde_mm_unpackhi_pi16(simde__m64 a, simde__m64 b)
|
|
|
{
|
|
|
-#if defined(SIMDE_MMX_NATIVE)
|
|
|
- return SIMDE__M64_C(_mm_unpackhi_pi16(a.n, b.n));
|
|
|
+#if defined(SIMDE_X86_MMX_NATIVE)
|
|
|
+ return _mm_unpackhi_pi16(a, b);
|
|
|
#else
|
|
|
- simde__m64 r;
|
|
|
- r.i16[0] = a.i16[2];
|
|
|
- r.i16[1] = b.i16[2];
|
|
|
- r.i16[2] = a.i16[3];
|
|
|
- r.i16[3] = b.i16[3];
|
|
|
- return r;
|
|
|
+ simde__m64_private r_;
|
|
|
+ simde__m64_private a_ = simde__m64_to_private(a);
|
|
|
+ simde__m64_private b_ = simde__m64_to_private(b);
|
|
|
+
|
|
|
+#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
|
|
+ r_.neon_i16 = vzip2_s16(a_.neon_i16, b_.neon_i16);
|
|
|
+#elif defined(SIMDE_SHUFFLE_VECTOR_)
|
|
|
+ r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 2, 6, 3, 7);
|
|
|
+#else
|
|
|
+ r_.i16[0] = a_.i16[2];
|
|
|
+ r_.i16[1] = b_.i16[2];
|
|
|
+ r_.i16[2] = a_.i16[3];
|
|
|
+ r_.i16[3] = b_.i16[3];
|
|
|
+#endif
|
|
|
+
|
|
|
+ return simde__m64_from_private(r_);
|
|
|
#endif
|
|
|
}
|
|
|
#define simde_m_punpckhwd(a, b) simde_mm_unpackhi_pi16(a, b)
|
|
|
+#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
|
|
|
+#define _mm_unpackhi_pi16(a, b) simde_mm_unpackhi_pi16(a, b)
|
|
|
+#define _m_punpckhwd(a, b) simde_mm_unpackhi_pi16(a, b)
|
|
|
+#endif
|
|
|
|
|
|
-SIMDE__FUNCTION_ATTRIBUTES
|
|
|
+SIMDE_FUNCTION_ATTRIBUTES
|
|
|
simde__m64 simde_mm_unpackhi_pi32(simde__m64 a, simde__m64 b)
|
|
|
{
|
|
|
-#if defined(SIMDE_MMX_NATIVE)
|
|
|
- return SIMDE__M64_C(_mm_unpackhi_pi32(a.n, b.n));
|
|
|
+#if defined(SIMDE_X86_MMX_NATIVE)
|
|
|
+ return _mm_unpackhi_pi32(a, b);
|
|
|
#else
|
|
|
- simde__m64 r;
|
|
|
- r.i32[0] = a.i32[1];
|
|
|
- r.i32[1] = b.i32[1];
|
|
|
- return r;
|
|
|
+ simde__m64_private r_;
|
|
|
+ simde__m64_private a_ = simde__m64_to_private(a);
|
|
|
+ simde__m64_private b_ = simde__m64_to_private(b);
|
|
|
+
|
|
|
+#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
|
|
+ r_.neon_i32 = vzip2_s32(a_.neon_i32, b_.neon_i32);
|
|
|
+#elif defined(SIMDE_SHUFFLE_VECTOR_)
|
|
|
+ r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 1, 3);
|
|
|
+#else
|
|
|
+ r_.i32[0] = a_.i32[1];
|
|
|
+ r_.i32[1] = b_.i32[1];
|
|
|
+#endif
|
|
|
+
|
|
|
+ return simde__m64_from_private(r_);
|
|
|
#endif
|
|
|
}
|
|
|
#define simde_m_punpckhdq(a, b) simde_mm_unpackhi_pi32(a, b)
|
|
|
+#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
|
|
|
+#define _mm_unpackhi_pi32(a, b) simde_mm_unpackhi_pi32(a, b)
|
|
|
+#define _m_punpckhdq(a, b) simde_mm_unpackhi_pi32(a, b)
|
|
|
+#endif
|
|
|
|
|
|
-SIMDE__FUNCTION_ATTRIBUTES
|
|
|
+SIMDE_FUNCTION_ATTRIBUTES
|
|
|
simde__m64 simde_mm_unpacklo_pi8(simde__m64 a, simde__m64 b)
|
|
|
{
|
|
|
-#if defined(SIMDE_MMX_NATIVE)
|
|
|
- return SIMDE__M64_C(_mm_unpacklo_pi8(a.n, b.n));
|
|
|
+#if defined(SIMDE_X86_MMX_NATIVE)
|
|
|
+ return _mm_unpacklo_pi8(a, b);
|
|
|
#else
|
|
|
- simde__m64 r;
|
|
|
- r.i8[0] = a.i8[0];
|
|
|
- r.i8[1] = b.i8[0];
|
|
|
- r.i8[2] = a.i8[1];
|
|
|
- r.i8[3] = b.i8[1];
|
|
|
- r.i8[4] = a.i8[2];
|
|
|
- r.i8[5] = b.i8[2];
|
|
|
- r.i8[6] = a.i8[3];
|
|
|
- r.i8[7] = b.i8[3];
|
|
|
- return r;
|
|
|
+ simde__m64_private r_;
|
|
|
+ simde__m64_private a_ = simde__m64_to_private(a);
|
|
|
+ simde__m64_private b_ = simde__m64_to_private(b);
|
|
|
+
|
|
|
+#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
|
|
+ r_.neon_i8 = vzip1_s8(a_.neon_i8, b_.neon_i8);
|
|
|
+#elif defined(SIMDE_SHUFFLE_VECTOR_)
|
|
|
+ r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 8, a_.i8, b_.i8, 0, 8, 1, 9, 2, 10, 3,
|
|
|
+ 11);
|
|
|
+#else
|
|
|
+ r_.i8[0] = a_.i8[0];
|
|
|
+ r_.i8[1] = b_.i8[0];
|
|
|
+ r_.i8[2] = a_.i8[1];
|
|
|
+ r_.i8[3] = b_.i8[1];
|
|
|
+ r_.i8[4] = a_.i8[2];
|
|
|
+ r_.i8[5] = b_.i8[2];
|
|
|
+ r_.i8[6] = a_.i8[3];
|
|
|
+ r_.i8[7] = b_.i8[3];
|
|
|
+#endif
|
|
|
+
|
|
|
+ return simde__m64_from_private(r_);
|
|
|
#endif
|
|
|
}
|
|
|
#define simde_m_punpcklbw(a, b) simde_mm_unpacklo_pi8(a, b)
|
|
|
+#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
|
|
|
+#define _mm_unpacklo_pi8(a, b) simde_mm_unpacklo_pi8(a, b)
|
|
|
+#define _m_punpcklbw(a, b) simde_mm_unpacklo_pi8(a, b)
|
|
|
+#endif
|
|
|
|
|
|
-SIMDE__FUNCTION_ATTRIBUTES
|
|
|
+SIMDE_FUNCTION_ATTRIBUTES
|
|
|
simde__m64 simde_mm_unpacklo_pi16(simde__m64 a, simde__m64 b)
|
|
|
{
|
|
|
-#if defined(SIMDE_MMX_NATIVE)
|
|
|
- return SIMDE__M64_C(_mm_unpacklo_pi16(a.n, b.n));
|
|
|
+#if defined(SIMDE_X86_MMX_NATIVE)
|
|
|
+ return _mm_unpacklo_pi16(a, b);
|
|
|
#else
|
|
|
- simde__m64 r;
|
|
|
- r.i16[0] = a.i16[0];
|
|
|
- r.i16[1] = b.i16[0];
|
|
|
- r.i16[2] = a.i16[1];
|
|
|
- r.i16[3] = b.i16[1];
|
|
|
- return r;
|
|
|
+ simde__m64_private r_;
|
|
|
+ simde__m64_private a_ = simde__m64_to_private(a);
|
|
|
+ simde__m64_private b_ = simde__m64_to_private(b);
|
|
|
+
|
|
|
+#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
|
|
+ r_.neon_i16 = vzip1_s16(a_.neon_i16, b_.neon_i16);
|
|
|
+#elif defined(SIMDE_SHUFFLE_VECTOR_)
|
|
|
+ r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 0, 4, 1, 5);
|
|
|
+#else
|
|
|
+ r_.i16[0] = a_.i16[0];
|
|
|
+ r_.i16[1] = b_.i16[0];
|
|
|
+ r_.i16[2] = a_.i16[1];
|
|
|
+ r_.i16[3] = b_.i16[1];
|
|
|
+#endif
|
|
|
+
|
|
|
+ return simde__m64_from_private(r_);
|
|
|
#endif
|
|
|
}
|
|
|
#define simde_m_punpcklwd(a, b) simde_mm_unpacklo_pi16(a, b)
|
|
|
+#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
|
|
|
+#define _mm_unpacklo_pi16(a, b) simde_mm_unpacklo_pi16(a, b)
|
|
|
+#define _m_punpcklwd(a, b) simde_mm_unpacklo_pi16(a, b)
|
|
|
+#endif
|
|
|
|
|
|
-SIMDE__FUNCTION_ATTRIBUTES
|
|
|
+SIMDE_FUNCTION_ATTRIBUTES
|
|
|
simde__m64 simde_mm_unpacklo_pi32(simde__m64 a, simde__m64 b)
|
|
|
{
|
|
|
-#if defined(SIMDE_MMX_NATIVE)
|
|
|
- return SIMDE__M64_C(_mm_unpacklo_pi32(a.n, b.n));
|
|
|
+#if defined(SIMDE_X86_MMX_NATIVE)
|
|
|
+ return _mm_unpacklo_pi32(a, b);
|
|
|
#else
|
|
|
- simde__m64 r;
|
|
|
- r.i32[0] = a.i32[0];
|
|
|
- r.i32[1] = b.i32[0];
|
|
|
- return r;
|
|
|
+ simde__m64_private r_;
|
|
|
+ simde__m64_private a_ = simde__m64_to_private(a);
|
|
|
+ simde__m64_private b_ = simde__m64_to_private(b);
|
|
|
+
|
|
|
+#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
|
|
+ r_.neon_i32 = vzip1_s32(a_.neon_i32, b_.neon_i32);
|
|
|
+#elif defined(SIMDE_SHUFFLE_VECTOR_)
|
|
|
+ r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 0, 2);
|
|
|
+#else
|
|
|
+ r_.i32[0] = a_.i32[0];
|
|
|
+ r_.i32[1] = b_.i32[0];
|
|
|
+#endif
|
|
|
+
|
|
|
+ return simde__m64_from_private(r_);
|
|
|
#endif
|
|
|
}
|
|
|
#define simde_m_punpckldq(a, b) simde_mm_unpacklo_pi32(a, b)
|
|
|
+#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
|
|
|
+#define _mm_unpacklo_pi32(a, b) simde_mm_unpacklo_pi32(a, b)
|
|
|
+#define _m_punpckldq(a, b) simde_mm_unpacklo_pi32(a, b)
|
|
|
+#endif
|
|
|
|
|
|
-SIMDE__FUNCTION_ATTRIBUTES
|
|
|
+SIMDE_FUNCTION_ATTRIBUTES
|
|
|
simde__m64 simde_mm_xor_si64(simde__m64 a, simde__m64 b)
|
|
|
{
|
|
|
-#if defined(SIMDE_MMX_NATIVE)
|
|
|
- return SIMDE__M64_C(_mm_xor_si64(a.n, b.n));
|
|
|
+#if defined(SIMDE_X86_MMX_NATIVE)
|
|
|
+ return _mm_xor_si64(a, b);
|
|
|
#else
|
|
|
- simde__m64 r;
|
|
|
- r.i64[0] = a.i64[0] ^ b.i64[0];
|
|
|
- return r;
|
|
|
+ simde__m64_private r_;
|
|
|
+ simde__m64_private a_ = simde__m64_to_private(a);
|
|
|
+ simde__m64_private b_ = simde__m64_to_private(b);
|
|
|
+
|
|
|
+#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
|
|
+ r_.neon_i32 = veor_s32(a_.neon_i32, b_.neon_i32);
|
|
|
+#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
|
|
+ r_.i32f = a_.i32f ^ b_.i32f;
|
|
|
+#else
|
|
|
+ r_.u64[0] = a_.u64[0] ^ b_.u64[0];
|
|
|
+#endif
|
|
|
+
|
|
|
+ return simde__m64_from_private(r_);
|
|
|
#endif
|
|
|
}
|
|
|
#define simde_m_pxor(a, b) simde_mm_xor_si64(a, b)
|
|
|
+#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
|
|
|
+#define _mm_xor_si64(a, b) simde_mm_xor_si64(a, b)
|
|
|
+#define _m_pxor(a, b) simde_mm_xor_si64(a, b)
|
|
|
+#endif
|
|
|
|
|
|
-SIMDE__FUNCTION_ATTRIBUTES
|
|
|
+SIMDE_FUNCTION_ATTRIBUTES
|
|
|
int32_t simde_m_to_int(simde__m64 a)
|
|
|
{
|
|
|
-#if defined(SIMDE_MMX_NATIVE)
|
|
|
- return _m_to_int(a.n);
|
|
|
+#if defined(SIMDE_X86_MMX_NATIVE)
|
|
|
+ return _m_to_int(a);
|
|
|
+#else
|
|
|
+ simde__m64_private a_ = simde__m64_to_private(a);
|
|
|
+
|
|
|
+#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
|
|
+ return vget_lane_s32(a_.neon_i32, 0);
|
|
|
#else
|
|
|
- return a.i32[0];
|
|
|
+ return a_.i32[0];
|
|
|
+#endif
|
|
|
#endif
|
|
|
}
|
|
|
+#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
|
|
|
+#define _m_to_int(a) simde_m_to_int(a)
|
|
|
+#endif
|
|
|
+
|
|
|
+SIMDE_END_DECLS_
|
|
|
|
|
|
-SIMDE__END_DECLS
|
|
|
+HEDLEY_DIAGNOSTIC_POP
|
|
|
|
|
|
-#endif /* !defined(SIMDE__MMX_H) */
|
|
|
+#endif /* !defined(SIMDE_X86_MMX_H) */
|