| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278 |
- /* Copyright (c) 2017-2019 Evan Nemerson <[email protected]>
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use, copy,
- * modify, merge, publish, distribute, sublicense, and/or sell copies
- * of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
- #if !defined(SIMDE_COMMON_H)
- #define SIMDE_COMMON_H
- #include "hedley.h"
- #include "check.h"
- #include "simde-arch.h"
- #if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L)
- #define SIMDE_ALIGN(alignment) _Alignas(alignment)
- #elif (defined(__cplusplus) && (__cplusplus >= 201103L))
- #define SIMDE_ALIGN(alignment) alignas(alignment)
- #elif HEDLEY_GCC_VERSION_CHECK(2, 95, 0) || \
- HEDLEY_CRAY_VERSION_CHECK(8, 4, 0) || \
- HEDLEY_IBM_VERSION_CHECK(11, 1, 0) || \
- HEDLEY_INTEL_VERSION_CHECK(13, 0, 0) || \
- HEDLEY_PGI_VERSION_CHECK(19, 4, 0) || \
- HEDLEY_ARM_VERSION_CHECK(4, 1, 0) || \
- HEDLEY_TINYC_VERSION_CHECK(0, 9, 24) || \
- HEDLEY_TI_VERSION_CHECK(8, 1, 0)
- #define SIMDE_ALIGN(alignment) __attribute__((aligned(alignment)))
- #elif defined(_MSC_VER) && (!defined(_M_IX86) || defined(_M_AMD64))
- #define SIMDE_ALIGN(alignment) __declspec(align(alignment))
- #else
- #define SIMDE_ALIGN(alignment)
- #endif
- #define simde_assert_aligned(alignment, val) \
- simde_assert_int(((uintptr_t)(val)) % (alignment), ==, 0)
- #if HEDLEY_GCC_HAS_ATTRIBUTE(vector_size, 4, 6, 0)
- #define SIMDE__ENABLE_GCC_VEC_EXT
- #endif
- #if !defined(SIMDE_ENABLE_OPENMP) && \
- ((defined(_OPENMP) && (_OPENMP >= 201307L)) || \
- (defined(_OPENMP_SIMD) && (_OPENMP_SIMD >= 201307L)))
- #define SIMDE_ENABLE_OPENMP
- #endif
- #if !defined(SIMDE_ENABLE_CILKPLUS) && defined(__cilk)
- #define SIMDE_ENABLE_CILKPLUS
- #endif
- #if defined(SIMDE_ENABLE_OPENMP)
- #define SIMDE__VECTORIZE _Pragma("omp simd")
- #define SIMDE__VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(omp simd safelen(l))
- #define SIMDE__VECTORIZE_REDUCTION(r) HEDLEY_PRAGMA(omp simd reduction(r))
- #define SIMDE__VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(omp simd aligned(a))
- #elif defined(SIMDE_ENABLE_CILKPLUS)
- #define SIMDE__VECTORIZE _Pragma("simd")
- #define SIMDE__VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(simd vectorlength(l))
- #define SIMDE__VECTORIZE_REDUCTION(r) HEDLEY_PRAGMA(simd reduction(r))
- #define SIMDE__VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(simd aligned(a))
- #elif defined(__INTEL_COMPILER)
- #define SIMDE__VECTORIZE _Pragma("simd")
- #define SIMDE__VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(simd vectorlength(l))
- #define SIMDE__VECTORIZE_REDUCTION(r) HEDLEY_PRAGMA(simd reduction(r))
- #define SIMDE__VECTORIZE_ALIGNED(a)
- #elif defined(__clang__)
- #define SIMDE__VECTORIZE _Pragma("clang loop vectorize(enable)")
- #define SIMDE__VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(clang loop vectorize_width(l))
- #define SIMDE__VECTORIZE_REDUCTION(r) SIMDE__VECTORIZE
- #define SIMDE__VECTORIZE_ALIGNED(a)
- #elif HEDLEY_GCC_VERSION_CHECK(4, 9, 0)
- #define SIMDE__VECTORIZE _Pragma("GCC ivdep")
- #define SIMDE__VECTORIZE_SAFELEN(l) SIMDE__VECTORIZE
- #define SIMDE__VECTORIZE_REDUCTION(r) SIMDE__VECTORIZE
- #define SIMDE__VECTORIZE_ALIGNED(a)
- #elif HEDLEY_CRAY_VERSION_CHECK(5, 0, 0)
- #define SIMDE__VECTORIZE _Pragma("_CRI ivdep")
- #define SIMDE__VECTORIZE_SAFELEN(l) SIMDE__VECTORIZE
- #define SIMDE__VECTORIZE_REDUCTION(r) SIMDE__VECTORIZE
- #define SIMDE__VECTORIZE_ALIGNED(a)
- #else
- #define SIMDE__VECTORIZE
- #define SIMDE__VECTORIZE_SAFELEN(l)
- #define SIMDE__VECTORIZE_REDUCTION(r)
- #define SIMDE__VECTORIZE_ALIGNED(a)
- #endif
- #if HEDLEY_GCC_HAS_ATTRIBUTE(unused, 3, 1, 0)
- #define SIMDE__UNUSED __attribute__((__unused__))
- #else
- #define SIMDE__UNUSED
- #endif
- #if HEDLEY_GCC_HAS_ATTRIBUTE(artificial, 4, 3, 0)
- #define SIMDE__ARTIFICIAL __attribute__((__artificial__))
- #else
- #define SIMDE__ARTIFICIAL
- #endif
- /* Intended for checking coverage, you should never use this in
- production. */
- #if defined(SIMDE_NO_INLINE)
- #define SIMDE__FUNCTION_ATTRIBUTES HEDLEY_NEVER_INLINE SIMDE__UNUSED static
- #else
- #define SIMDE__FUNCTION_ATTRIBUTES HEDLEY_INLINE SIMDE__ARTIFICIAL static
- #endif
- #if defined(_MSC_VER)
- #define SIMDE__BEGIN_DECLS \
- HEDLEY_DIAGNOSTIC_PUSH __pragma(warning(disable : 4996 4204)) \
- HEDLEY_BEGIN_C_DECLS
- #define SIMDE__END_DECLS HEDLEY_DIAGNOSTIC_POP HEDLEY_END_C_DECLS
- #else
- #define SIMDE__BEGIN_DECLS HEDLEY_BEGIN_C_DECLS
- #define SIMDE__END_DECLS HEDLEY_END_C_DECLS
- #endif
- #if defined(__SIZEOF_INT128__)
- #define SIMDE__HAVE_INT128
- typedef __int128 simde_int128;
- typedef unsigned __int128 simde_uint128;
- #endif
- /* TODO: we should at least make an attempt to detect the correct
- types for simde_float32/float64 instead of just assuming float and
- double. */
- #if !defined(SIMDE_FLOAT32_TYPE)
- #define SIMDE_FLOAT32_TYPE float
- #define SIMDE_FLOAT32_C(value) value##f
- #else
- #define SIMDE_FLOAT32_C(value) ((SIMDE_FLOAT32_TYPE)value)
- #endif
- typedef SIMDE_FLOAT32_TYPE simde_float32;
- HEDLEY_STATIC_ASSERT(sizeof(simde_float32) == 4,
- "Unable to find 32-bit floating-point type.");
- #if !defined(SIMDE_FLOAT64_TYPE)
- #define SIMDE_FLOAT64_TYPE double
- #define SIMDE_FLOAT64_C(value) value
- #else
- #define SIMDE_FLOAT32_C(value) ((SIMDE_FLOAT64_TYPE)value)
- #endif
- typedef SIMDE_FLOAT64_TYPE simde_float64;
- HEDLEY_STATIC_ASSERT(sizeof(simde_float64) == 8,
- "Unable to find 64-bit floating-point type.");
- /* Whether to assume that the compiler can auto-vectorize reasonably
- well. This will cause SIMDe to attempt to compose vector
- operations using more simple vector operations instead of minimize
- serial work.
- As an example, consider the _mm_add_ss(a, b) function from SSE,
- which returns { a0 + b0, a1, a2, a3 }. This pattern is repeated
- for other operations (sub, mul, etc.).
- The naïve implementation would result in loading a0 and b0, adding
- them into a temporary variable, then splicing that value into a new
- vector with the remaining elements from a.
- On platforms which support vectorization, it's generally faster to
- simply perform the operation on the entire vector to avoid having
- to move data between SIMD registers and non-SIMD registers.
- Basically, instead of the temporary variable being (a0 + b0) it
- would be a vector of (a + b), which is then combined with a to form
- the result.
- By default, SIMDe will prefer the pure-vector versions if we detect
- a vector ISA extension, but this can be overridden by defining
- SIMDE_NO_ASSUME_VECTORIZATION. You can also define
- SIMDE_ASSUME_VECTORIZATION if you want to force SIMDe to use the
- vectorized version. */
- #if !defined(SIMDE_NO_ASSUME_VECTORIZATION) && \
- !defined(SIMDE_ASSUME_VECTORIZATION)
- #if defined(__SSE__) || defined(__ARM_NEON) || defined(__mips_msa) || \
- defined(__ALTIVEC__)
- #define SIMDE_ASSUME_VECTORIZATION
- #endif
- #endif
- /* GCC and clang have built-in functions to handle shuffling of
- vectors, but the implementations are slightly different. This
- macro is just an abstraction over them. Note that elem_size is in
- bits but vec_size is in bytes. */
- #if HEDLEY_CLANG_HAS_BUILTIN(__builtin_shufflevector)
- #define SIMDE__SHUFFLE_VECTOR(elem_size, vec_size, a, b, ...) \
- __builtin_shufflevector(a, b, __VA_ARGS__)
- #elif HEDLEY_GCC_HAS_BUILTIN(__builtin_shuffle, 4, 7, 0) && \
- !defined(__INTEL_COMPILER)
- #define SIMDE__SHUFFLE_VECTOR(elem_size, vec_size, a, b, ...) \
- __builtin_shuffle(a, b, \
- (int##elem_size##_t __attribute__( \
- (__vector_size__(vec_size)))){__VA_ARGS__})
- #endif
- /* Some algorithms are iterative, and fewer iterations means less
- accuracy. Lower values here will result in faster, but less
- accurate, calculations for some functions. */
- #if !defined(SIMDE_ACCURACY_ITERS)
- #define SIMDE_ACCURACY_ITERS 2
- #endif
- /* This will probably move into Hedley at some point, but I'd like to
- more thoroughly check for other compilers which define __GNUC__
- first. */
- #if defined(SIMDE__REALLY_GCC)
- #undef SIMDE__REALLY_GCC
- #endif
- #if !defined(__GNUC__) || defined(__clang__) || defined(__INTEL_COMPILER)
- #define SIMDE__REALLY_GCC 0
- #else
- #define SIMDE__REALLY_GCC 1
- #endif
- #if defined(SIMDE__ASSUME_ALIGNED)
- #undef SIMDE__ASSUME_ALIGNED
- #endif
- #if HEDLEY_INTEL_VERSION_CHECK(9, 0, 0)
- #define SIMDE__ASSUME_ALIGNED(ptr, align) __assume_aligned(ptr, align)
- #elif HEDLEY_MSVC_VERSION_CHECK(13, 10, 0)
- #define SIMDE__ASSUME_ALIGNED(ptr, align) \
- __assume((((char *)ptr) - ((char *)0)) % (align) == 0)
- #elif HEDLEY_GCC_HAS_BUILTIN(__builtin_assume_aligned, 4, 7, 0)
- #define SIMDE__ASSUME_ALIGNED(ptr, align) \
- (ptr = (__typeof__(ptr))__builtin_assume_aligned((ptr), align))
- #elif HEDLEY_CLANG_HAS_BUILTIN(__builtin_assume)
- #define SIMDE__ASSUME_ALIGNED(ptr, align) \
- __builtin_assume((((char *)ptr) - ((char *)0)) % (align) == 0)
- #elif HEDLEY_GCC_HAS_BUILTIN(__builtin_unreachable, 4, 5, 0)
- #define SIMDE__ASSUME_ALIGNED(ptr, align) \
- ((((char *)ptr) - ((char *)0)) % (align) == 0) \
- ? (1) \
- : (__builtin_unreachable(), 0)
- #else
- #define SIMDE__ASSUME_ALIGNED(ptr, align)
- #endif
- /* Sometimes we run into problems with specific versions of compilers
- which make the native versions unusable for us. Often this is due
- to missing functions, sometimes buggy implementations, etc. These
- macros are how we check for specific bugs. As they are fixed we'll
- start only defining them for problematic compiler versions. */
- #if !defined(SIMDE_IGNORE_COMPILER_BUGS)
- #if SIMDE__REALLY_GCC
- #if !HEDLEY_GCC_VERSION_CHECK(4, 9, 0)
- #define SIMDE_BUG_GCC_REV_208793
- #endif
- #if !HEDLEY_GCC_VERSION_CHECK(5, 0, 0)
- #define SIMDE_BUG_GCC_BAD_MM_SRA_EPI32 /* TODO: find relevant bug or commit */
- #endif
- #if !HEDLEY_GCC_VERSION_CHECK(4, 6, 0)
- #define SIMDE_BUG_GCC_BAD_MM_EXTRACT_EPI8 /* TODO: find relevant bug or commit */
- #endif
- #endif
- #if defined(__EMSCRIPTEN__)
- #define SIMDE_BUG_EMSCRIPTEN_MISSING_IMPL /* Placeholder for (as yet) unfiled issues. */
- #define SIMDE_BUG_EMSCRIPTEN_5242
- #endif
- #endif
- #endif /* !defined(SIMDE_COMMON_H) */
|