1
0

simde-common.h 33 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918
  1. /* SPDX-License-Identifier: MIT
  2. *
  3. * Permission is hereby granted, free of charge, to any person
  4. * obtaining a copy of this software and associated documentation
  5. * files (the "Software"), to deal in the Software without
  6. * restriction, including without limitation the rights to use, copy,
  7. * modify, merge, publish, distribute, sublicense, and/or sell copies
  8. * of the Software, and to permit persons to whom the Software is
  9. * furnished to do so, subject to the following conditions:
  10. *
  11. * The above copyright notice and this permission notice shall be
  12. * included in all copies or substantial portions of the Software.
  13. *
  14. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  15. * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  16. * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  17. * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  18. * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  19. * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  20. * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  21. * SOFTWARE.
  22. *
  23. * Copyright:
  24. * 2017-2020 Evan Nemerson <[email protected]>
  25. */
  26. #if !defined(SIMDE_COMMON_H)
  27. #define SIMDE_COMMON_H
  28. #include "hedley.h"
  29. #define SIMDE_VERSION_MAJOR 0
  30. #define SIMDE_VERSION_MINOR 7
  31. #define SIMDE_VERSION_MICRO 1
  32. #define SIMDE_VERSION \
  33. HEDLEY_VERSION_ENCODE(SIMDE_VERSION_MAJOR, SIMDE_VERSION_MINOR, \
  34. SIMDE_VERSION_MICRO)
  35. #include <stddef.h>
  36. #include <stdint.h>
  37. #include "simde-detect-clang.h"
  38. #include "simde-arch.h"
  39. #include "simde-features.h"
  40. #include "simde-diagnostic.h"
  41. #include "simde-math.h"
  42. #include "simde-constify.h"
  43. #include "simde-align.h"
  44. /* In some situations, SIMDe has to make large performance sacrifices
  45. * for small increases in how faithfully it reproduces an API, but
  46. * only a relatively small number of users will actually need the API
  47. * to be completely accurate. The SIMDE_FAST_* options can be used to
  48. * disable these trade-offs.
  49. *
  50. * They can be enabled by passing -DSIMDE_FAST_MATH to the compiler, or
  51. * the individual defines (e.g., -DSIMDE_FAST_NANS) if you only want to
  52. * enable some optimizations. Using -ffast-math and/or
  53. * -ffinite-math-only will also enable the relevant options. If you
  54. * don't want that you can pass -DSIMDE_NO_FAST_* to disable them. */
  55. /* Most programs avoid NaNs by never passing values which can result in
  56. * a NaN; for example, if you only pass non-negative values to the sqrt
  57. * functions, it won't generate a NaN. On some platforms, similar
  58. * functions handle NaNs differently; for example, the _mm_min_ps SSE
  59. * function will return 0.0 if you pass it (0.0, NaN), but the NEON
  60. * vminq_f32 function will return NaN. Making them behave like one
  61. * another is expensive; it requires generating a mask of all lanes
  62. * with NaNs, then performing the operation (e.g., vminq_f32), then
  63. * blending together the result with another vector using the mask.
  64. *
  65. * If you don't want SIMDe to worry about the differences between how
  66. * NaNs are handled on the two platforms, define this (or pass
  67. * -ffinite-math-only) */
  68. #if !defined(SIMDE_FAST_MATH) && !defined(SIMDE_NO_FAST_MATH) && \
  69. defined(__FAST_MATH__)
  70. #define SIMDE_FAST_MATH
  71. #endif
  72. #if !defined(SIMDE_FAST_NANS) && !defined(SIMDE_NO_FAST_NANS)
  73. #if defined(SIMDE_FAST_MATH)
  74. #define SIMDE_FAST_NANS
  75. #elif defined(__FINITE_MATH_ONLY__)
  76. #if __FINITE_MATH_ONLY__
  77. #define SIMDE_FAST_NANS
  78. #endif
  79. #endif
  80. #endif
  81. /* Many functions are defined as using the current rounding mode
  82. * (i.e., the SIMD version of fegetround()) when converting to
  83. * an integer. For example, _mm_cvtpd_epi32. Unfortunately,
  84. * on some platforms (such as ARMv8+ where round-to-nearest is
  85. * always used, regardless of the FPSCR register) this means we
  86. * have to first query the current rounding mode, then choose
  87. * the proper function (rounnd
  88. , ceil, floor, etc.) */
  89. #if !defined(SIMDE_FAST_ROUND_MODE) && !defined(SIMDE_NO_FAST_ROUND_MODE) && \
  90. defined(SIMDE_FAST_MATH)
  91. #define SIMDE_FAST_ROUND_MODE
  92. #endif
  93. /* This controls how ties are rounded. For example, does 10.5 round to
  94. * 10 or 11? IEEE 754 specifies round-towards-even, but ARMv7 (for
  95. * example) doesn't support it and it must be emulated (which is rather
  96. * slow). If you're okay with just using the default for whatever arch
  97. * you're on, you should definitely define this.
  98. *
  99. * Note that we don't use this macro to avoid correct implementations
  100. * in functions which are explicitly about rounding (such as vrnd* on
  101. * NEON, _mm_round_* on x86, etc.); it is only used for code where
  102. * rounding is a component in another function, and even then it isn't
  103. * usually a problem since such functions will use the current rounding
  104. * mode. */
  105. #if !defined(SIMDE_FAST_ROUND_TIES) && !defined(SIMDE_NO_FAST_ROUND_TIES) && \
  106. defined(SIMDE_FAST_MATH)
  107. #define SIMDE_FAST_ROUND_TIES
  108. #endif
  109. /* For functions which convert from one type to another (mostly from
  110. * floating point to integer types), sometimes we need to do a range
  111. * check and potentially return a different result if the value
  112. * falls outside that range. Skipping this check can provide a
  113. * performance boost, at the expense of faithfulness to the API we're
  114. * emulating. */
  115. #if !defined(SIMDE_FAST_CONVERSION_RANGE) && \
  116. !defined(SIMDE_NO_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_MATH)
  117. #define SIMDE_FAST_CONVERSION_RANGE
  118. #endif
  119. #if HEDLEY_HAS_BUILTIN(__builtin_constant_p) || \
  120. HEDLEY_GCC_VERSION_CHECK(3, 4, 0) || \
  121. HEDLEY_INTEL_VERSION_CHECK(13, 0, 0) || \
  122. HEDLEY_TINYC_VERSION_CHECK(0, 9, 19) || \
  123. HEDLEY_ARM_VERSION_CHECK(4, 1, 0) || \
  124. HEDLEY_IBM_VERSION_CHECK(13, 1, 0) || \
  125. HEDLEY_TI_CL6X_VERSION_CHECK(6, 1, 0) || \
  126. (HEDLEY_SUNPRO_VERSION_CHECK(5, 10, 0) && !defined(__cplusplus)) || \
  127. HEDLEY_CRAY_VERSION_CHECK(8, 1, 0)
  128. #define SIMDE_CHECK_CONSTANT_(expr) (__builtin_constant_p(expr))
  129. #elif defined(__cplusplus) && (__cplusplus > 201703L)
  130. #include <type_traits>
  131. #define SIMDE_CHECK_CONSTANT_(expr) (std::is_constant_evaluated())
  132. #endif
  133. #if !defined(SIMDE_NO_CHECK_IMMEDIATE_CONSTANT)
  134. #if defined(SIMDE_CHECK_CONSTANT_) && \
  135. SIMDE_DETECT_CLANG_VERSION_CHECK(9, 0, 0) && \
  136. (!defined(__apple_build_version__) || \
  137. ((__apple_build_version__ < 11000000) || \
  138. (__apple_build_version__ >= 12000000)))
  139. #define SIMDE_REQUIRE_CONSTANT(arg) \
  140. HEDLEY_REQUIRE_MSG(SIMDE_CHECK_CONSTANT_(arg), \
  141. "`" #arg "' must be constant")
  142. #else
  143. #define SIMDE_REQUIRE_CONSTANT(arg)
  144. #endif
  145. #else
  146. #define SIMDE_REQUIRE_CONSTANT(arg)
  147. #endif
  148. #define SIMDE_REQUIRE_RANGE(arg, min, max) \
  149. HEDLEY_REQUIRE_MSG((((arg) >= (min)) && ((arg) <= (max))), \
  150. "'" #arg "' must be in [" #min ", " #max "]")
  151. #define SIMDE_REQUIRE_CONSTANT_RANGE(arg, min, max) \
  152. SIMDE_REQUIRE_CONSTANT(arg) \
  153. SIMDE_REQUIRE_RANGE(arg, min, max)
  154. /* A copy of HEDLEY_STATIC_ASSERT, except we don't define an empty
  155. * fallback if we can't find an implementation; instead we have to
  156. * check if SIMDE_STATIC_ASSERT is defined before using it. */
  157. #if !defined(__cplusplus) && \
  158. ((defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \
  159. HEDLEY_HAS_FEATURE(c_static_assert) || \
  160. HEDLEY_GCC_VERSION_CHECK(6, 0, 0) || \
  161. HEDLEY_INTEL_VERSION_CHECK(13, 0, 0) || defined(_Static_assert))
  162. #define SIMDE_STATIC_ASSERT(expr, message) _Static_assert(expr, message)
  163. #elif (defined(__cplusplus) && (__cplusplus >= 201103L)) || \
  164. HEDLEY_MSVC_VERSION_CHECK(16, 0, 0)
  165. #define SIMDE_STATIC_ASSERT(expr, message) \
  166. HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_( \
  167. static_assert(expr, message))
  168. #endif
  169. #if (HEDLEY_HAS_ATTRIBUTE(may_alias) && !defined(HEDLEY_SUNPRO_VERSION)) || \
  170. HEDLEY_GCC_VERSION_CHECK(3, 3, 0) || \
  171. HEDLEY_INTEL_VERSION_CHECK(13, 0, 0) || \
  172. HEDLEY_IBM_VERSION_CHECK(13, 1, 0)
  173. #define SIMDE_MAY_ALIAS __attribute__((__may_alias__))
  174. #else
  175. #define SIMDE_MAY_ALIAS
  176. #endif
  177. /* Lots of compilers support GCC-style vector extensions, but many
  178. don't support all the features. Define different macros depending
  179. on support for
  180. * SIMDE_VECTOR - Declaring a vector.
  181. * SIMDE_VECTOR_OPS - basic operations (binary and unary).
  182. * SIMDE_VECTOR_NEGATE - negating a vector
  183. * SIMDE_VECTOR_SCALAR - For binary operators, the second argument
  184. can be a scalar, in which case the result is as if that scalar
  185. had been broadcast to all lanes of a vector.
  186. * SIMDE_VECTOR_SUBSCRIPT - Supports array subscript notation for
  187. extracting/inserting a single element.=
  188. SIMDE_VECTOR can be assumed if any others are defined, the
  189. others are independent. */
  190. #if !defined(SIMDE_NO_VECTOR)
  191. #if HEDLEY_GCC_VERSION_CHECK(4, 8, 0)
  192. #define SIMDE_VECTOR(size) __attribute__((__vector_size__(size)))
  193. #define SIMDE_VECTOR_OPS
  194. #define SIMDE_VECTOR_NEGATE
  195. #define SIMDE_VECTOR_SCALAR
  196. #define SIMDE_VECTOR_SUBSCRIPT
  197. #elif HEDLEY_INTEL_VERSION_CHECK(16, 0, 0)
  198. #define SIMDE_VECTOR(size) __attribute__((__vector_size__(size)))
  199. #define SIMDE_VECTOR_OPS
  200. #define SIMDE_VECTOR_NEGATE
  201. /* ICC only supports SIMDE_VECTOR_SCALAR for constants */
  202. #define SIMDE_VECTOR_SUBSCRIPT
  203. #elif HEDLEY_GCC_VERSION_CHECK(4, 1, 0) || HEDLEY_INTEL_VERSION_CHECK(13, 0, 0)
  204. #define SIMDE_VECTOR(size) __attribute__((__vector_size__(size)))
  205. #define SIMDE_VECTOR_OPS
  206. #elif HEDLEY_SUNPRO_VERSION_CHECK(5, 12, 0)
  207. #define SIMDE_VECTOR(size) __attribute__((__vector_size__(size)))
  208. #elif HEDLEY_HAS_ATTRIBUTE(vector_size)
  209. #define SIMDE_VECTOR(size) __attribute__((__vector_size__(size)))
  210. #define SIMDE_VECTOR_OPS
  211. #define SIMDE_VECTOR_NEGATE
  212. #define SIMDE_VECTOR_SUBSCRIPT
  213. #if SIMDE_DETECT_CLANG_VERSION_CHECK(5, 0, 0)
  214. #define SIMDE_VECTOR_SCALAR
  215. #endif
  216. #endif
  217. /* GCC and clang have built-in functions to handle shuffling and
  218. converting of vectors, but the implementations are slightly
  219. different. This macro is just an abstraction over them. Note that
  220. elem_size is in bits but vec_size is in bytes. */
  221. #if !defined(SIMDE_NO_SHUFFLE_VECTOR) && defined(SIMDE_VECTOR_SUBSCRIPT)
  222. HEDLEY_DIAGNOSTIC_PUSH
  223. /* We don't care about -Wvariadic-macros; all compilers that support
  224. * shufflevector/shuffle support them. */
  225. #if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic")
  226. #pragma clang diagnostic ignored "-Wc++98-compat-pedantic"
  227. #endif
  228. #if HEDLEY_HAS_WARNING("-Wvariadic-macros") || HEDLEY_GCC_VERSION_CHECK(4, 0, 0)
  229. #pragma GCC diagnostic ignored "-Wvariadic-macros"
  230. #endif
  231. #if HEDLEY_HAS_BUILTIN(__builtin_shufflevector)
  232. #define SIMDE_SHUFFLE_VECTOR_(elem_size, vec_size, a, b, ...) \
  233. __builtin_shufflevector(a, b, __VA_ARGS__)
  234. #elif HEDLEY_GCC_HAS_BUILTIN(__builtin_shuffle, 4, 7, 0) && \
  235. !defined(__INTEL_COMPILER)
  236. #define SIMDE_SHUFFLE_VECTOR_(elem_size, vec_size, a, b, ...) \
  237. (__extension__({ \
  238. int##elem_size##_t SIMDE_VECTOR(vec_size) \
  239. simde_shuffle_ = {__VA_ARGS__}; \
  240. __builtin_shuffle(a, b, simde_shuffle_); \
  241. }))
  242. #endif
  243. HEDLEY_DIAGNOSTIC_POP
  244. #endif
  245. /* TODO: this actually works on XL C/C++ without SIMDE_VECTOR_SUBSCRIPT
  246. but the code needs to be refactored a bit to take advantage. */
  247. #if !defined(SIMDE_NO_CONVERT_VECTOR) && defined(SIMDE_VECTOR_SUBSCRIPT)
  248. #if HEDLEY_HAS_BUILTIN(__builtin_convertvector) || \
  249. HEDLEY_GCC_VERSION_CHECK(9, 0, 0)
  250. #if HEDLEY_GCC_VERSION_CHECK(9, 0, 0) && !HEDLEY_GCC_VERSION_CHECK(9, 3, 0)
  251. /* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=93557 */
  252. #define SIMDE_CONVERT_VECTOR_(to, from) \
  253. ((to) = (__extension__({ \
  254. __typeof__(from) from_ = (from); \
  255. ((void)from_); \
  256. __builtin_convertvector(from_, __typeof__(to)); \
  257. })))
  258. #else
  259. #define SIMDE_CONVERT_VECTOR_(to, from) \
  260. ((to) = __builtin_convertvector((from), __typeof__(to)))
  261. #endif
  262. #endif
  263. #endif
  264. #endif
  265. /* Since we currently require SUBSCRIPT before using a vector in a
  266. union, we define these as dependencies of SUBSCRIPT. They are
  267. likely to disappear in the future, once SIMDe learns how to make
  268. use of vectors without using the union members. Do not use them
  269. in your code unless you're okay with it breaking when SIMDe
  270. changes. */
  271. #if defined(SIMDE_VECTOR_SUBSCRIPT)
  272. #if defined(SIMDE_VECTOR_OPS)
  273. #define SIMDE_VECTOR_SUBSCRIPT_OPS
  274. #endif
  275. #if defined(SIMDE_VECTOR_SCALAR)
  276. #define SIMDE_VECTOR_SUBSCRIPT_SCALAR
  277. #endif
  278. #endif
  279. #if !defined(SIMDE_ENABLE_OPENMP) && \
  280. ((defined(_OPENMP) && (_OPENMP >= 201307L)) || \
  281. (defined(_OPENMP_SIMD) && (_OPENMP_SIMD >= 201307L)))
  282. #define SIMDE_ENABLE_OPENMP
  283. #endif
  284. #if !defined(SIMDE_ENABLE_CILKPLUS) && \
  285. (defined(__cilk) || defined(HEDLEY_INTEL_VERSION))
  286. #define SIMDE_ENABLE_CILKPLUS
  287. #endif
  288. #if defined(SIMDE_ENABLE_OPENMP)
  289. #define SIMDE_VECTORIZE HEDLEY_PRAGMA(omp simd)
  290. #define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(omp simd safelen(l))
  291. #if defined(__clang__)
  292. #define SIMDE_VECTORIZE_REDUCTION(r) \
  293. HEDLEY_DIAGNOSTIC_PUSH \
  294. _Pragma("clang diagnostic ignored \"-Wsign-conversion\"") \
  295. HEDLEY_PRAGMA(omp simd reduction(r)) HEDLEY_DIAGNOSTIC_POP
  296. #else
  297. #define SIMDE_VECTORIZE_REDUCTION(r) HEDLEY_PRAGMA(omp simd reduction(r))
  298. #endif
  299. #define SIMDE_VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(omp simd aligned(a))
  300. #elif defined(SIMDE_ENABLE_CILKPLUS)
  301. #define SIMDE_VECTORIZE HEDLEY_PRAGMA(simd)
  302. #define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(simd vectorlength(l))
  303. #define SIMDE_VECTORIZE_REDUCTION(r) HEDLEY_PRAGMA(simd reduction(r))
  304. #define SIMDE_VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(simd aligned(a))
  305. #elif defined(__clang__) && !defined(HEDLEY_IBM_VERSION)
  306. #define SIMDE_VECTORIZE HEDLEY_PRAGMA(clang loop vectorize(enable))
  307. #define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(clang loop vectorize_width(l))
  308. #define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE
  309. #define SIMDE_VECTORIZE_ALIGNED(a)
  310. #elif HEDLEY_GCC_VERSION_CHECK(4, 9, 0)
  311. #define SIMDE_VECTORIZE HEDLEY_PRAGMA(GCC ivdep)
  312. #define SIMDE_VECTORIZE_SAFELEN(l) SIMDE_VECTORIZE
  313. #define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE
  314. #define SIMDE_VECTORIZE_ALIGNED(a)
  315. #elif HEDLEY_CRAY_VERSION_CHECK(5, 0, 0)
  316. #define SIMDE_VECTORIZE HEDLEY_PRAGMA(_CRI ivdep)
  317. #define SIMDE_VECTORIZE_SAFELEN(l) SIMDE_VECTORIZE
  318. #define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE
  319. #define SIMDE_VECTORIZE_ALIGNED(a)
  320. #else
  321. #define SIMDE_VECTORIZE
  322. #define SIMDE_VECTORIZE_SAFELEN(l)
  323. #define SIMDE_VECTORIZE_REDUCTION(r)
  324. #define SIMDE_VECTORIZE_ALIGNED(a)
  325. #endif
  326. #define SIMDE_MASK_NZ_(v, mask) (((v) & (mask)) | !((v) & (mask)))
  327. /* Intended for checking coverage, you should never use this in
  328. production. */
  329. #if defined(SIMDE_NO_INLINE)
  330. #define SIMDE_FUNCTION_ATTRIBUTES HEDLEY_NEVER_INLINE static
  331. #else
  332. #define SIMDE_FUNCTION_ATTRIBUTES HEDLEY_ALWAYS_INLINE static
  333. #endif
  334. #if HEDLEY_HAS_ATTRIBUTE(unused) || HEDLEY_GCC_VERSION_CHECK(2, 95, 0)
  335. #define SIMDE_FUNCTION_POSSIBLY_UNUSED_ __attribute__((__unused__))
  336. #else
  337. #define SIMDE_FUNCTION_POSSIBLY_UNUSED_
  338. #endif
  339. #if HEDLEY_HAS_WARNING("-Wused-but-marked-unused")
  340. #define SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED \
  341. _Pragma("clang diagnostic ignored \"-Wused-but-marked-unused\"")
  342. #else
  343. #define SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED
  344. #endif
  345. #if defined(_MSC_VER)
  346. #define SIMDE_BEGIN_DECLS_ \
  347. HEDLEY_DIAGNOSTIC_PUSH __pragma(warning(disable : 4996 4204)) \
  348. HEDLEY_BEGIN_C_DECLS
  349. #define SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP HEDLEY_END_C_DECLS
  350. #else
  351. #define SIMDE_BEGIN_DECLS_ \
  352. HEDLEY_DIAGNOSTIC_PUSH \
  353. SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED \
  354. HEDLEY_BEGIN_C_DECLS
  355. #define SIMDE_END_DECLS_ \
  356. HEDLEY_END_C_DECLS \
  357. HEDLEY_DIAGNOSTIC_POP
  358. #endif
  359. #if defined(__SIZEOF_INT128__)
  360. #define SIMDE_HAVE_INT128_
  361. HEDLEY_DIAGNOSTIC_PUSH
  362. SIMDE_DIAGNOSTIC_DISABLE_PEDANTIC_
  363. typedef __int128 simde_int128;
  364. typedef unsigned __int128 simde_uint128;
  365. HEDLEY_DIAGNOSTIC_POP
  366. #endif
  367. #if !defined(SIMDE_ENDIAN_LITTLE)
  368. #define SIMDE_ENDIAN_LITTLE 1234
  369. #endif
  370. #if !defined(SIMDE_ENDIAN_BIG)
  371. #define SIMDE_ENDIAN_BIG 4321
  372. #endif
  373. #if !defined(SIMDE_ENDIAN_ORDER)
  374. /* GCC (and compilers masquerading as GCC) define __BYTE_ORDER__. */
  375. #if defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && \
  376. (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
  377. #define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE
  378. #elif defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) && \
  379. (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
  380. #define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG
  381. /* TI defines _BIG_ENDIAN or _LITTLE_ENDIAN */
  382. #elif defined(_BIG_ENDIAN)
  383. #define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG
  384. #elif defined(_LITTLE_ENDIAN)
  385. #define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE
  386. /* We know the endianness of some common architectures. Common
  387. * architectures not listed (ARM, POWER, MIPS, etc.) here are
  388. * bi-endian. */
  389. #elif defined(__amd64) || defined(_M_X64) || defined(__i386) || defined(_M_IX86)
  390. #define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE
  391. #elif defined(__s390x__) || defined(__zarch__)
  392. #define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG
  393. /* Looks like we'll have to rely on the platform. If we're missing a
  394. * platform, please let us know. */
  395. #elif defined(_WIN32)
  396. #define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE
  397. #elif defined(sun) || defined(__sun) /* Solaris */
  398. #include <sys/byteorder.h>
  399. #if defined(_LITTLE_ENDIAN)
  400. #define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE
  401. #elif defined(_BIG_ENDIAN)
  402. #define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG
  403. #endif
  404. #elif defined(__APPLE__)
  405. #include <libkern/OSByteOrder.h>
  406. #if defined(__LITTLE_ENDIAN__)
  407. #define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE
  408. #elif defined(__BIG_ENDIAN__)
  409. #define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG
  410. #endif
  411. #elif defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || \
  412. defined(__bsdi__) || defined(__DragonFly__) || defined(BSD)
  413. #include <machine/endian.h>
  414. #if defined(__BYTE_ORDER) && (__BYTE_ORDER == __LITTLE_ENDIAN)
  415. #define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE
  416. #elif defined(__BYTE_ORDER) && (__BYTE_ORDER == __BIG_ENDIAN)
  417. #define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG
  418. #endif
  419. #elif defined(__linux__) || defined(__linux) || defined(__gnu_linux__)
  420. #include <endian.h>
  421. #if defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && \
  422. (__BYTE_ORDER == __LITTLE_ENDIAN)
  423. #define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE
  424. #elif defined(__BYTE_ORDER) && defined(__BIG_ENDIAN) && \
  425. (__BYTE_ORDER == __BIG_ENDIAN)
  426. #define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG
  427. #endif
  428. #endif
  429. #endif
  430. #if HEDLEY_HAS_BUILTIN(__builtin_bswap64) || \
  431. HEDLEY_GCC_VERSION_CHECK(4, 3, 0) || \
  432. HEDLEY_IBM_VERSION_CHECK(13, 1, 0) || \
  433. HEDLEY_INTEL_VERSION_CHECK(13, 0, 0)
  434. #define simde_bswap64(v) __builtin_bswap64(v)
  435. #elif HEDLEY_MSVC_VERSION_CHECK(13, 10, 0)
  436. #define simde_bswap64(v) _byteswap_uint64(v)
  437. #else
  438. SIMDE_FUNCTION_ATTRIBUTES
  439. uint64_t simde_bswap64(uint64_t v)
  440. {
  441. return ((v & (((uint64_t)0xff) << 56)) >> 56) |
  442. ((v & (((uint64_t)0xff) << 48)) >> 40) |
  443. ((v & (((uint64_t)0xff) << 40)) >> 24) |
  444. ((v & (((uint64_t)0xff) << 32)) >> 8) |
  445. ((v & (((uint64_t)0xff) << 24)) << 8) |
  446. ((v & (((uint64_t)0xff) << 16)) << 24) |
  447. ((v & (((uint64_t)0xff) << 8)) << 40) |
  448. ((v & (((uint64_t)0xff))) << 56);
  449. }
  450. #endif
  451. #if !defined(SIMDE_ENDIAN_ORDER)
  452. #error Unknown byte order; please file a bug
  453. #else
  454. #if SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE
  455. #define simde_endian_bswap64_be(value) simde_bswap64(value)
  456. #define simde_endian_bswap64_le(value) (value)
  457. #elif SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG
  458. #define simde_endian_bswap64_be(value) (value)
  459. #define simde_endian_bswap64_le(value) simde_bswap64(value)
  460. #endif
  461. #endif
  462. /* TODO: we should at least make an attempt to detect the correct
  463. types for simde_float32/float64 instead of just assuming float and
  464. double. */
  465. #if !defined(SIMDE_FLOAT32_TYPE)
  466. #define SIMDE_FLOAT32_TYPE float
  467. #define SIMDE_FLOAT32_C(value) value##f
  468. #else
  469. #define SIMDE_FLOAT32_C(value) ((SIMDE_FLOAT32_TYPE)value)
  470. #endif
  471. typedef SIMDE_FLOAT32_TYPE simde_float32;
  472. #if !defined(SIMDE_FLOAT64_TYPE)
  473. #define SIMDE_FLOAT64_TYPE double
  474. #define SIMDE_FLOAT64_C(value) value
  475. #else
  476. #define SIMDE_FLOAT32_C(value) ((SIMDE_FLOAT64_TYPE)value)
  477. #endif
  478. typedef SIMDE_FLOAT64_TYPE simde_float64;
  479. #if HEDLEY_HAS_WARNING("-Wbad-function-cast")
  480. #define SIMDE_CONVERT_FTOI(T, v) \
  481. HEDLEY_DIAGNOSTIC_PUSH \
  482. _Pragma("clang diagnostic ignored \"-Wbad-function-cast\"") \
  483. HEDLEY_STATIC_CAST(T, (v)) HEDLEY_DIAGNOSTIC_POP
  484. #else
  485. #define SIMDE_CONVERT_FTOI(T, v) ((T)(v))
  486. #endif
  487. /* TODO: detect compilers which support this outside of C11 mode */
  488. #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)
  489. #define SIMDE_CHECKED_REINTERPRET_CAST(to, from, value) \
  490. _Generic((value), to \
  491. : (value), default \
  492. : (_Generic((value), from \
  493. : ((to)(value)))))
  494. #define SIMDE_CHECKED_STATIC_CAST(to, from, value) \
  495. _Generic((value), to \
  496. : (value), default \
  497. : (_Generic((value), from \
  498. : ((to)(value)))))
  499. #else
  500. #define SIMDE_CHECKED_REINTERPRET_CAST(to, from, value) \
  501. HEDLEY_REINTERPRET_CAST(to, value)
  502. #define SIMDE_CHECKED_STATIC_CAST(to, from, value) HEDLEY_STATIC_CAST(to, value)
  503. #endif
  504. #if HEDLEY_HAS_WARNING("-Wfloat-equal")
  505. #define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL \
  506. _Pragma("clang diagnostic ignored \"-Wfloat-equal\"")
  507. #elif HEDLEY_GCC_VERSION_CHECK(3, 0, 0)
  508. #define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL \
  509. _Pragma("GCC diagnostic ignored \"-Wfloat-equal\"")
  510. #else
  511. #define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL
  512. #endif
  513. /* Some functions can trade accuracy for speed. For those functions
  514. you can control the trade-off using this macro. Possible values:
  515. 0: prefer speed
  516. 1: reasonable trade-offs
  517. 2: prefer accuracy */
  518. #if !defined(SIMDE_ACCURACY_PREFERENCE)
  519. #define SIMDE_ACCURACY_PREFERENCE 1
  520. #endif
  521. #if defined(__STDC_HOSTED__)
  522. #define SIMDE_STDC_HOSTED __STDC_HOSTED__
  523. #else
  524. #if defined(HEDLEY_PGI_VERSION) || defined(HEDLEY_MSVC_VERSION)
  525. #define SIMDE_STDC_HOSTED 1
  526. #else
  527. #define SIMDE_STDC_HOSTED 0
  528. #endif
  529. #endif
  530. /* Try to deal with environments without a standard library. */
  531. #if !defined(simde_memcpy)
  532. #if HEDLEY_HAS_BUILTIN(__builtin_memcpy)
  533. #define simde_memcpy(dest, src, n) __builtin_memcpy(dest, src, n)
  534. #endif
  535. #endif
  536. #if !defined(simde_memset)
  537. #if HEDLEY_HAS_BUILTIN(__builtin_memset)
  538. #define simde_memset(s, c, n) __builtin_memset(s, c, n)
  539. #endif
  540. #endif
  541. #if !defined(simde_memcmp)
  542. #if HEDLEY_HAS_BUILTIN(__builtin_memcmp)
  543. #define simde_memcmp(s1, s2, n) __builtin_memcmp(s1, s2, n)
  544. #endif
  545. #endif
  546. #if !defined(simde_memcpy) || !defined(simde_memset) || !defined(simde_memcmp)
  547. #if !defined(SIMDE_NO_STRING_H)
  548. #if defined(__has_include)
  549. #if !__has_include(<string.h>)
  550. #define SIMDE_NO_STRING_H
  551. #endif
  552. #elif (SIMDE_STDC_HOSTED == 0)
  553. #define SIMDE_NO_STRING_H
  554. #endif
  555. #endif
  556. #if !defined(SIMDE_NO_STRING_H)
  557. #include <string.h>
  558. #if !defined(simde_memcpy)
  559. #define simde_memcpy(dest, src, n) memcpy(dest, src, n)
  560. #endif
  561. #if !defined(simde_memset)
  562. #define simde_memset(s, c, n) memset(s, c, n)
  563. #endif
  564. #if !defined(simde_memcmp)
  565. #define simde_memcmp(s1, s2, n) memcmp(s1, s2, n)
  566. #endif
  567. #else
  568. /* These are meant to be portable, not fast. If you're hitting them you
  569. * should think about providing your own (by defining the simde_memcpy
  570. * macro prior to including any SIMDe files) or submitting a patch to
  571. * SIMDe so we can detect your system-provided memcpy/memset, like by
  572. * adding your compiler to the checks for __builtin_memcpy and/or
  573. * __builtin_memset. */
  574. #if !defined(simde_memcpy)
  575. SIMDE_FUNCTION_ATTRIBUTES
  576. void simde_memcpy_(void *dest, const void *src, size_t len)
  577. {
  578. char *dest_ = HEDLEY_STATIC_CAST(char *, dest);
  579. char *src_ = HEDLEY_STATIC_CAST(const char *, src);
  580. for (size_t i = 0; i < len; i++) {
  581. dest_[i] = src_[i];
  582. }
  583. }
  584. #define simde_memcpy(dest, src, n) simde_memcpy_(dest, src, n)
  585. #endif
  586. #if !defined(simde_memset)
  587. SIMDE_FUNCTION_ATTRIBUTES
  588. void simde_memset_(void *s, int c, size_t len)
  589. {
  590. char *s_ = HEDLEY_STATIC_CAST(char *, s);
  591. char c_ = HEDLEY_STATIC_CAST(char, c);
  592. for (size_t i = 0; i < len; i++) {
  593. s_[i] = c_[i];
  594. }
  595. }
  596. #define simde_memset(s, c, n) simde_memset_(s, c, n)
  597. #endif
  598. #if !defined(simde_memcmp)
  599. SIMDE_FUCTION_ATTRIBUTES
  600. int simde_memcmp_(const void *s1, const void *s2, size_t n)
  601. {
  602. unsigned char *s1_ = HEDLEY_STATIC_CAST(unsigned char *, s1);
  603. unsigned char *s2_ = HEDLEY_STATIC_CAST(unsigned char *, s2);
  604. for (size_t i = 0; i < len; i++) {
  605. if (s1_[i] != s2_[i]) {
  606. return (int)(s1_[i] - s2_[i]);
  607. }
  608. }
  609. return 0;
  610. }
  611. #define simde_memcmp(s1, s2, n) simde_memcmp_(s1, s2, n)
  612. #endif
  613. #endif
  614. #endif
  615. #if defined(FE_ALL_EXCEPT)
  616. #define SIMDE_HAVE_FENV_H
  617. #elif defined(__has_include)
  618. #if __has_include(<fenv.h>)
  619. #include <fenv.h>
  620. #define SIMDE_HAVE_FENV_H
  621. #endif
  622. #elif SIMDE_STDC_HOSTED == 1
  623. #include <fenv.h>
  624. #define SIMDE_HAVE_FENV_H
  625. #endif
  626. #if defined(EXIT_FAILURE)
  627. #define SIMDE_HAVE_STDLIB_H
  628. #elif defined(__has_include)
  629. #if __has_include(<stdlib.h>)
  630. #include <stdlib.h>
  631. #define SIMDE_HAVE_STDLIB_H
  632. #endif
  633. #elif SIMDE_STDC_HOSTED == 1
  634. #include <stdlib.h>
  635. #define SIMDE_HAVE_STDLIB_H
  636. #endif
  637. #if defined(__has_include)
  638. #if defined(__cplusplus) && (__cplusplus >= 201103L) && __has_include(<cfenv>)
  639. #include <cfenv>
  640. #elif __has_include(<fenv.h>)
  641. #include <fenv.h>
  642. #endif
  643. #if __has_include(<stdlib.h>)
  644. #include <stdlib.h>
  645. #endif
  646. #elif SIMDE_STDC_HOSTED == 1
  647. #include <stdlib.h>
  648. #include <fenv.h>
  649. #endif
  650. #include "check.h"
  651. /* GCC/clang have a bunch of functionality in builtins which we would
  652. * like to access, but the suffixes indicate whether the operate on
  653. * int, long, or long long, not fixed width types (e.g., int32_t).
  654. * we use these macros to attempt to map from fixed-width to the
  655. * names GCC uses. Note that you should still cast the input(s) and
  656. * return values (to/from SIMDE_BUILTIN_TYPE_*_) since often even if
  657. * types are the same size they may not be compatible according to the
  658. * compiler. For example, on x86 long and long lonsg are generally
  659. * both 64 bits, but platforms vary on whether an int64_t is mapped
  660. * to a long or long long. */
  661. #include <limits.h>
  662. HEDLEY_DIAGNOSTIC_PUSH
  663. SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_
  664. #if (INT8_MAX == INT_MAX) && (INT8_MIN == INT_MIN)
  665. #define SIMDE_BUILTIN_SUFFIX_8_
  666. #define SIMDE_BUILTIN_TYPE_8_ int
  667. #elif (INT8_MAX == LONG_MAX) && (INT8_MIN == LONG_MIN)
  668. #define SIMDE_BUILTIN_SUFFIX_8_ l
  669. #define SIMDE_BUILTIN_TYPE_8_ long
  670. #elif (INT8_MAX == LLONG_MAX) && (INT8_MIN == LLONG_MIN)
  671. #define SIMDE_BUILTIN_SUFFIX_8_ ll
  672. #define SIMDE_BUILTIN_TYPE_8_ long long
  673. #endif
  674. #if (INT16_MAX == INT_MAX) && (INT16_MIN == INT_MIN)
  675. #define SIMDE_BUILTIN_SUFFIX_16_
  676. #define SIMDE_BUILTIN_TYPE_16_ int
  677. #elif (INT16_MAX == LONG_MAX) && (INT16_MIN == LONG_MIN)
  678. #define SIMDE_BUILTIN_SUFFIX_16_ l
  679. #define SIMDE_BUILTIN_TYPE_16_ long
  680. #elif (INT16_MAX == LLONG_MAX) && (INT16_MIN == LLONG_MIN)
  681. #define SIMDE_BUILTIN_SUFFIX_16_ ll
  682. #define SIMDE_BUILTIN_TYPE_16_ long long
  683. #endif
  684. #if (INT32_MAX == INT_MAX) && (INT32_MIN == INT_MIN)
  685. #define SIMDE_BUILTIN_SUFFIX_32_
  686. #define SIMDE_BUILTIN_TYPE_32_ int
  687. #elif (INT32_MAX == LONG_MAX) && (INT32_MIN == LONG_MIN)
  688. #define SIMDE_BUILTIN_SUFFIX_32_ l
  689. #define SIMDE_BUILTIN_TYPE_32_ long
  690. #elif (INT32_MAX == LLONG_MAX) && (INT32_MIN == LLONG_MIN)
  691. #define SIMDE_BUILTIN_SUFFIX_32_ ll
  692. #define SIMDE_BUILTIN_TYPE_32_ long long
  693. #endif
  694. #if (INT64_MAX == INT_MAX) && (INT64_MIN == INT_MIN)
  695. #define SIMDE_BUILTIN_SUFFIX_64_
  696. #define SIMDE_BUILTIN_TYPE_64_ int
  697. #elif (INT64_MAX == LONG_MAX) && (INT64_MIN == LONG_MIN)
  698. #define SIMDE_BUILTIN_SUFFIX_64_ l
  699. #define SIMDE_BUILTIN_TYPE_64_ long
  700. #elif (INT64_MAX == LLONG_MAX) && (INT64_MIN == LLONG_MIN)
  701. #define SIMDE_BUILTIN_SUFFIX_64_ ll
  702. #define SIMDE_BUILTIN_TYPE_64_ long long
  703. #endif
  704. #if defined(SIMDE_BUILTIN_SUFFIX_8_)
  705. #define SIMDE_BUILTIN_8_(name) \
  706. HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_8_)
  707. #define SIMDE_BUILTIN_HAS_8_(name) \
  708. HEDLEY_HAS_BUILTIN( \
  709. HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_8_))
  710. #else
  711. #define SIMDE_BUILTIN_HAS_8_(name) 0
  712. #endif
  713. #if defined(SIMDE_BUILTIN_SUFFIX_16_)
  714. #define SIMDE_BUILTIN_16_(name) \
  715. HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_16_)
  716. #define SIMDE_BUILTIN_HAS_16_(name) \
  717. HEDLEY_HAS_BUILTIN( \
  718. HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_16_))
  719. #else
  720. #define SIMDE_BUILTIN_HAS_16_(name) 0
  721. #endif
  722. #if defined(SIMDE_BUILTIN_SUFFIX_32_)
  723. #define SIMDE_BUILTIN_32_(name) \
  724. HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_32_)
  725. #define SIMDE_BUILTIN_HAS_32_(name) \
  726. HEDLEY_HAS_BUILTIN( \
  727. HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_32_))
  728. #else
  729. #define SIMDE_BUILTIN_HAS_32_(name) 0
  730. #endif
  731. #if defined(SIMDE_BUILTIN_SUFFIX_64_)
  732. #define SIMDE_BUILTIN_64_(name) \
  733. HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_64_)
  734. #define SIMDE_BUILTIN_HAS_64_(name) \
  735. HEDLEY_HAS_BUILTIN( \
  736. HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_64_))
  737. #else
  738. #define SIMDE_BUILTIN_HAS_64_(name) 0
  739. #endif
  740. HEDLEY_DIAGNOSTIC_POP
  741. /* Sometimes we run into problems with specific versions of compilers
  742. which make the native versions unusable for us. Often this is due
  743. to missing functions, sometimes buggy implementations, etc. These
  744. macros are how we check for specific bugs. As they are fixed we'll
  745. start only defining them for problematic compiler versions. */
  746. #if !defined(SIMDE_IGNORE_COMPILER_BUGS)
  747. #if defined(HEDLEY_GCC_VERSION)
  748. #if !HEDLEY_GCC_VERSION_CHECK(4, 9, 0)
  749. #define SIMDE_BUG_GCC_REV_208793
  750. #endif
  751. #if !HEDLEY_GCC_VERSION_CHECK(5, 0, 0)
  752. #define SIMDE_BUG_GCC_BAD_MM_SRA_EPI32 /* TODO: find relevant bug or commit */
  753. #endif
  754. #if !HEDLEY_GCC_VERSION_CHECK(4, 6, 0)
  755. #define SIMDE_BUG_GCC_BAD_MM_EXTRACT_EPI8 /* TODO: find relevant bug or commit */
  756. #endif
  757. #if !HEDLEY_GCC_VERSION_CHECK(8, 0, 0)
  758. #define SIMDE_BUG_GCC_REV_247851
  759. #endif
  760. #if !HEDLEY_GCC_VERSION_CHECK(10, 0, 0)
  761. #define SIMDE_BUG_GCC_REV_274313
  762. #define SIMDE_BUG_GCC_91341
  763. #endif
  764. #if !HEDLEY_GCC_VERSION_CHECK(9, 0, 0) && defined(SIMDE_ARCH_AARCH64)
  765. #define SIMDE_BUG_GCC_ARM_SHIFT_SCALAR
  766. #endif
  767. #if defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64)
  768. #define SIMDE_BUG_GCC_94482
  769. #endif
  770. #if (defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64)) || \
  771. defined(SIMDE_ARCH_SYSTEMZ)
  772. #define SIMDE_BUG_GCC_53784
  773. #endif
  774. #if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64)
  775. #if HEDLEY_GCC_VERSION_CHECK(4, 3, 0) /* -Wsign-conversion */
  776. #define SIMDE_BUG_GCC_95144
  777. #endif
  778. #endif
  779. #if !HEDLEY_GCC_VERSION_CHECK(9, 4, 0) && defined(SIMDE_ARCH_AARCH64)
  780. #define SIMDE_BUG_GCC_94488
  781. #endif
  782. #if defined(SIMDE_ARCH_ARM)
  783. #define SIMDE_BUG_GCC_95399
  784. #define SIMDE_BUG_GCC_95471
  785. #elif defined(SIMDE_ARCH_POWER)
  786. #define SIMDE_BUG_GCC_95227
  787. #define SIMDE_BUG_GCC_95782
  788. #elif defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64)
  789. #if !HEDLEY_GCC_VERSION_CHECK(10, 2, 0) && !defined(__OPTIMIZE__)
  790. #define SIMDE_BUG_GCC_96174
  791. #endif
  792. #endif
  793. #define SIMDE_BUG_GCC_95399
  794. #elif defined(__clang__)
  795. #if defined(SIMDE_ARCH_AARCH64)
  796. #define SIMDE_BUG_CLANG_45541
  797. #define SIMDE_BUG_CLANG_46844
  798. #define SIMDE_BUG_CLANG_48257
  799. #if SIMDE_DETECT_CLANG_VERSION_CHECK(10, 0, 0) && \
  800. SIMDE_DETECT_CLANG_VERSION_NOT(11, 0, 0)
  801. #define SIMDE_BUG_CLANG_BAD_VI64_OPS
  802. #endif
  803. #endif
  804. #if defined(SIMDE_ARCH_POWER)
  805. #define SIMDE_BUG_CLANG_46770
  806. #endif
  807. #if defined(_ARCH_PWR9) && !SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0) && \
  808. !defined(__OPTIMIZE__)
  809. #define SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT
  810. #endif
  811. #if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64)
  812. #if HEDLEY_HAS_WARNING("-Wsign-conversion") && \
  813. SIMDE_DETECT_CLANG_VERSION_NOT(11, 0, 0)
  814. #define SIMDE_BUG_CLANG_45931
  815. #endif
  816. #if HEDLEY_HAS_WARNING("-Wvector-conversion") && \
  817. SIMDE_DETECT_CLANG_VERSION_NOT(11, 0, 0)
  818. #define SIMDE_BUG_CLANG_44589
  819. #endif
  820. #endif
  821. #define SIMDE_BUG_CLANG_45959
  822. #elif defined(HEDLEY_MSVC_VERSION)
  823. #if defined(SIMDE_ARCH_X86)
  824. #define SIMDE_BUG_MSVC_ROUND_EXTRACT
  825. #endif
  826. #elif defined(HEDLEY_INTEL_VERSION)
  827. #define SIMDE_BUG_INTEL_857088
  828. #endif
  829. #endif
  830. /* GCC and Clang both have the same issue:
  831. * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=95144
  832. * https://bugs.llvm.org/show_bug.cgi?id=45931
  833. * This is just an easy way to work around it.
  834. */
  835. #if (HEDLEY_HAS_WARNING("-Wsign-conversion") && \
  836. SIMDE_DETECT_CLANG_VERSION_NOT(11, 0, 0)) || \
  837. HEDLEY_GCC_VERSION_CHECK(4, 3, 0)
  838. #define SIMDE_BUG_IGNORE_SIGN_CONVERSION(expr) \
  839. (__extension__({ \
  840. HEDLEY_DIAGNOSTIC_PUSH \
  841. HEDLEY_DIAGNOSTIC_POP \
  842. _Pragma("GCC diagnostic ignored \"-Wsign-conversion\"") __typeof__(expr) \
  843. simde_bug_ignore_sign_conversion_v_ = (expr); \
  844. HEDLEY_DIAGNOSTIC_PUSH \
  845. simde_bug_ignore_sign_conversion_v_; \
  846. }))
  847. #else
  848. #define SIMDE_BUG_IGNORE_SIGN_CONVERSION(expr) (expr)
  849. #endif
  850. #endif /* !defined(SIMDE_COMMON_H) */