ソースを参照

libobs,cmake: Replace vendored SIMD Everywhere by prefix/system install

SIMD Everywhere finder needs to be installed alongside libobs CMake
package since its headers depends on it.

C++ cmath header is included on Windows on ARM to ensure that all math
functions are correctly defined when included in C++ code.
https://github.com/simd-everywhere/simde/issues/1304

macOS intrinsics is included to make sure that no redefinition error
happens.
tytan652 1 年間 前
コミット
2bd23cd50d

+ 1 - 0
.github/scripts/utils.zsh/setup_ubuntu

@@ -75,6 +75,7 @@ sudo apt-get install -y --no-install-recommends \
   libgl1-mesa-dev \
   libjansson-dev \
   uthash-dev \
+  libsimde-dev \
   libluajit-5.1-dev python3-dev \
   libx11-dev libxcb-randr0-dev libxcb-shm0-dev libxcb-xinerama0-dev \
   libxcb-composite0-dev libxinerama-dev libxcb1-dev libx11-xcb-dev libxcb-xfixes0-dev \

+ 1 - 0
build-aux/com.obsproject.Studio.json

@@ -64,6 +64,7 @@
         "modules/50-vpl-gpu-rt.json",
         "modules/90-asio.json",
         "modules/90-nlohmann-json.json",
+        "modules/90-simde.json",
         "modules/90-uthash.json",
         "modules/90-websocketpp.json",
         "modules/99-cef.json",

+ 16 - 0
build-aux/modules/90-simde.json

@@ -0,0 +1,16 @@
+{
+    "name": "simde",
+    "buildsystem": "meson",
+    "builddir": true,
+    "config-opts": [
+        "-Dtests=false"
+    ],
+    "sources": [
+        {
+            "type": "git",
+            "url": "https://github.com/simd-everywhere/simde.git",
+            "tag": "v0.8.2",
+            "commit": "71fd833d9666141edcd1d3c109a80e228303d8d7"
+        }
+    ]
+}

+ 9 - 0
cmake/common/helpers_common.cmake

@@ -404,6 +404,15 @@ function(target_export target)
     COMPONENT Development
     ${exclude_variant}
   )
+
+  if(target STREQUAL libobs)
+    install(
+      FILES "${CMAKE_SOURCE_DIR}/cmake/finders/FindSIMDe.cmake"
+      DESTINATION "${package_destination}/finders"
+      COMPONENT Development
+      ${exclude_variant}
+    )
+  endif()
 endfunction()
 
 # check_uuid: Helper function to check for valid UUID

+ 2 - 35
libobs/CMakeLists.txt

@@ -7,6 +7,7 @@ if(OS_WINDOWS AND NOT OBS_PARENT_ARCHITECTURE STREQUAL CMAKE_VS_PLATFORM_NAME)
   return()
 endif()
 
+find_package(SIMDe REQUIRED)
 find_package(Threads REQUIRED)
 
 find_package(FFmpeg 6.1 REQUIRED avformat avutil swscale swresample OPTIONAL_COMPONENTS avcodec)
@@ -135,25 +136,6 @@ target_sources(
     util/util_uint64.h
 )
 
-target_sources(
-  libobs
-  PRIVATE
-    util/simde/check.h
-    util/simde/debug-trap.h
-    util/simde/hedley.h
-    util/simde/simde-align.h
-    util/simde/simde-arch.h
-    util/simde/simde-common.h
-    util/simde/simde-constify.h
-    util/simde/simde-detect-clang.h
-    util/simde/simde-diagnostic.h
-    util/simde/simde-features.h
-    util/simde/simde-math.h
-    util/simde/x86/mmx.h
-    util/simde/x86/sse.h
-    util/simde/x86/sse2.h
-)
-
 target_sources(
   libobs
   PRIVATE
@@ -260,7 +242,7 @@ target_link_libraries(
     jansson::jansson
     Uthash::Uthash
     ZLIB::ZLIB
-  PUBLIC Threads::Threads
+  PUBLIC SIMDe::SIMDe Threads::Threads
 )
 
 if(OS_WINDOWS)
@@ -354,21 +336,6 @@ set(
   util/profiler.h
   util/profiler.hpp
   util/serializer.h
-  util/simde/check.h
-  util/simde/debug-trap.h
-  util/simde/hedley.h
-  util/simde/simde-align.h
-  util/simde/simde-arch.h
-  util/simde/simde-common.h
-  util/simde/simde-constify.h
-  util/simde/simde-detect-clang.h
-  util/simde/simde-diagnostic.h
-  util/simde/simde-features.h
-  util/simde/simde-math.h
-  util/simde/x86/mmx.h
-  util/simde/x86/sse.h
-  util/simde/x86/sse2.h
-  util/source-profiler.h
   util/sse-intrin.h
   util/task.h
   util/text-lookup.h

+ 3 - 0
libobs/cmake/libobsConfig.cmake.in

@@ -2,9 +2,12 @@
 
 include(CMakeFindDependencyMacro)
 
+list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_LIST_DIR}/finders")
+
 if(MSVC)
   find_dependency(w32-pthreads REQUIRED)
 endif()
+find_dependency(SIMDe REQUIRED)
 find_dependency(Threads REQUIRED)
 
 include("${CMAKE_CURRENT_LIST_DIR}/@[email protected]")

+ 0 - 3
libobs/util/simde/.clang-format

@@ -1,3 +0,0 @@
-Language: Cpp
-SortIncludes: false
-DisableFormat: true

+ 0 - 40
libobs/util/simde/LICENSE.simde

@@ -1,40 +0,0 @@
-simde is licensed as a combination of MIT and CC0 code.
-
-License notices for both are reproduced below:
-
-/* SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use, copy,
- * modify, merge, publish, distribute, sublicense, and/or sell copies
- * of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- * Copyright:
- *   2017-2020 Evan Nemerson <[email protected]>
- */
-
-/* Portable Snippets - https://gitub.com/nemequ/portable-snippets
- * Created by Evan Nemerson <[email protected]>
- *
- *   To the extent possible under law, the authors have waived all
- *   copyright and related or neighboring rights to this code.  For
- *   details, see the Creative Commons Zero 1.0 Universal license at
- *   https://creativecommons.org/publicdomain/zero/1.0/
- *
- * SPDX-License-Identifier: CC0-1.0
- */

+ 0 - 6
libobs/util/simde/README.libobs

@@ -1,6 +0,0 @@
-This is a slightly modified version of the simde directory in
-https://github.com/simd-everywhere/simde/commit/c3d7abfaba6729a8b11d09a314b34a4db628911d
-
-Modifications:
-1. Unused files have removed.
-2. The code was reformatted using the "formatcode.sh" script in the root of this repository.

+ 0 - 285
libobs/util/simde/check.h

@@ -1,285 +0,0 @@
-/* Check (assertions)
- * Portable Snippets - https://gitub.com/nemequ/portable-snippets
- * Created by Evan Nemerson <[email protected]>
- *
- *   To the extent possible under law, the authors have waived all
- *   copyright and related or neighboring rights to this code.  For
- *   details, see the Creative Commons Zero 1.0 Universal license at
- *   https://creativecommons.org/publicdomain/zero/1.0/
- *
- * SPDX-License-Identifier: CC0-1.0
- */
-
-#if !defined(SIMDE_CHECK_H)
-#define SIMDE_CHECK_H
-
-#if !defined(SIMDE_NDEBUG) && !defined(SIMDE_DEBUG)
-#define SIMDE_NDEBUG 1
-#endif
-
-#include "hedley.h"
-#include "simde-diagnostic.h"
-#include <stdint.h>
-
-#if !defined(_WIN32)
-#define SIMDE_SIZE_MODIFIER "z"
-#define SIMDE_CHAR_MODIFIER "hh"
-#define SIMDE_SHORT_MODIFIER "h"
-#else
-#if defined(_M_X64) || defined(__amd64__)
-#define SIMDE_SIZE_MODIFIER "I64"
-#else
-#define SIMDE_SIZE_MODIFIER ""
-#endif
-#define SIMDE_CHAR_MODIFIER ""
-#define SIMDE_SHORT_MODIFIER ""
-#endif
-
-#if defined(_MSC_VER) && (_MSC_VER >= 1500)
-#define SIMDE_PUSH_DISABLE_MSVC_C4127_ \
-	__pragma(warning(push)) __pragma(warning(disable : 4127))
-#define SIMDE_POP_DISABLE_MSVC_C4127_ __pragma(warning(pop))
-#else
-#define SIMDE_PUSH_DISABLE_MSVC_C4127_
-#define SIMDE_POP_DISABLE_MSVC_C4127_
-#endif
-
-#if !defined(simde_errorf)
-#if defined(__has_include)
-#if __has_include(<stdio.h>)
-#include <stdio.h>
-#endif
-#elif defined(SIMDE_STDC_HOSTED)
-#if SIMDE_STDC_HOSTED == 1
-#include <stdio.h>
-#endif
-#elif defined(__STDC_HOSTED__)
-#if __STDC_HOSTETD__ == 1
-#include <stdio.h>
-#endif
-#endif
-
-#include "debug-trap.h"
-
-HEDLEY_DIAGNOSTIC_PUSH
-SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_
-#if defined(EOF)
-#define simde_errorf(format, ...) \
-	(fprintf(stderr, format, __VA_ARGS__), abort())
-#else
-#define simde_errorf(format, ...) (simde_trap())
-#endif
-HEDLEY_DIAGNOSTIC_POP
-#endif
-
-#define simde_error(msg) simde_errorf("%s", msg)
-
-#if defined(SIMDE_NDEBUG) ||                                 \
-	(defined(__cplusplus) && (__cplusplus < 201103L)) || \
-	(defined(__STDC__) && (__STDC__ < 199901L))
-#if defined(SIMDE_CHECK_FAIL_DEFINED)
-#define simde_assert(expr)
-#else
-#if defined(HEDLEY_ASSUME)
-#define simde_assert(expr) HEDLEY_ASSUME(expr)
-#elif HEDLEY_GCC_VERSION_CHECK(4, 5, 0)
-#define simde_assert(expr) ((void)(!!(expr) ? 1 : (__builtin_unreachable(), 1)))
-#elif HEDLEY_MSVC_VERSION_CHECK(13, 10, 0)
-#define simde_assert(expr) __assume(expr)
-#else
-#define simde_assert(expr)
-#endif
-#endif
-#define simde_assert_true(expr) simde_assert(expr)
-#define simde_assert_false(expr) simde_assert(!(expr))
-#define simde_assert_type_full(prefix, suffix, T, fmt, a, op, b) \
-	simde_assert(((a)op(b)))
-#define simde_assert_double_equal(a, b, precision)
-#define simde_assert_string_equal(a, b)
-#define simde_assert_string_not_equal(a, b)
-#define simde_assert_memory_equal(size, a, b)
-#define simde_assert_memory_not_equal(size, a, b)
-#else
-#define simde_assert(expr)                                            \
-	do {                                                          \
-		if (!HEDLEY_LIKELY(expr)) {                           \
-			simde_error("assertion failed: " #expr "\n"); \
-		}                                                     \
-		SIMDE_PUSH_DISABLE_MSVC_C4127_                        \
-	} while (0) SIMDE_POP_DISABLE_MSVC_C4127_
-
-#define simde_assert_true(expr)                                \
-	do {                                                   \
-		if (!HEDLEY_LIKELY(expr)) {                    \
-			simde_error("assertion failed: " #expr \
-				    " is not true\n");         \
-		}                                              \
-		SIMDE_PUSH_DISABLE_MSVC_C4127_                 \
-	} while (0) SIMDE_POP_DISABLE_MSVC_C4127_
-
-#define simde_assert_false(expr)                               \
-	do {                                                   \
-		if (!HEDLEY_LIKELY(!(expr))) {                 \
-			simde_error("assertion failed: " #expr \
-				    " is not false\n");        \
-		}                                              \
-		SIMDE_PUSH_DISABLE_MSVC_C4127_                 \
-	} while (0) SIMDE_POP_DISABLE_MSVC_C4127_
-
-#define simde_assert_type_full(prefix, suffix, T, fmt, a, op, b)           \
-	do {                                                               \
-		T simde_tmp_a_ = (a);                                      \
-		T simde_tmp_b_ = (b);                                      \
-		if (!(simde_tmp_a_ op simde_tmp_b_)) {                     \
-			simde_errorf("assertion failed: %s %s %s (" prefix \
-				     "%" fmt suffix " %s " prefix          \
-				     "%" fmt suffix ")\n",                 \
-				     #a, #op, #b, simde_tmp_a_, #op,       \
-				     simde_tmp_b_);                        \
-		}                                                          \
-		SIMDE_PUSH_DISABLE_MSVC_C4127_                             \
-	} while (0) SIMDE_POP_DISABLE_MSVC_C4127_
-
-#define simde_assert_double_equal(a, b, precision)                           \
-	do {                                                                 \
-		const double simde_tmp_a_ = (a);                             \
-		const double simde_tmp_b_ = (b);                             \
-		const double simde_tmp_diff_ =                               \
-			((simde_tmp_a_ - simde_tmp_b_) < 0)                  \
-				? -(simde_tmp_a_ - simde_tmp_b_)             \
-				: (simde_tmp_a_ - simde_tmp_b_);             \
-		if (HEDLEY_UNLIKELY(simde_tmp_diff_ > 1e-##precision)) {     \
-			simde_errorf(                                        \
-				"assertion failed: %s == %s (%0." #precision \
-				"g == %0." #precision "g)\n",                \
-				#a, #b, simde_tmp_a_, simde_tmp_b_);         \
-		}                                                            \
-		SIMDE_PUSH_DISABLE_MSVC_C4127_                               \
-	} while (0) SIMDE_POP_DISABLE_MSVC_C4127_
-
-#include <string.h>
-#define simde_assert_string_equal(a, b)                                                   \
-	do {                                                                              \
-		const char *simde_tmp_a_ = a;                                             \
-		const char *simde_tmp_b_ = b;                                             \
-		if (HEDLEY_UNLIKELY(strcmp(simde_tmp_a_, simde_tmp_b_) !=                 \
-				    0)) {                                                 \
-			simde_errorf(                                                     \
-				"assertion failed: string %s == %s (\"%s\" == \"%s\")\n", \
-				#a, #b, simde_tmp_a_, simde_tmp_b_);                      \
-		}                                                                         \
-		SIMDE_PUSH_DISABLE_MSVC_C4127_                                            \
-	} while (0) SIMDE_POP_DISABLE_MSVC_C4127_
-
-#define simde_assert_string_not_equal(a, b)                                               \
-	do {                                                                              \
-		const char *simde_tmp_a_ = a;                                             \
-		const char *simde_tmp_b_ = b;                                             \
-		if (HEDLEY_UNLIKELY(strcmp(simde_tmp_a_, simde_tmp_b_) ==                 \
-				    0)) {                                                 \
-			simde_errorf(                                                     \
-				"assertion failed: string %s != %s (\"%s\" == \"%s\")\n", \
-				#a, #b, simde_tmp_a_, simde_tmp_b_);                      \
-		}                                                                         \
-		SIMDE_PUSH_DISABLE_MSVC_C4127_                                            \
-	} while (0) SIMDE_POP_DISABLE_MSVC_C4127_
-
-#define simde_assert_memory_equal(size, a, b)                                                                        \
-	do {                                                                                                         \
-		const unsigned char *simde_tmp_a_ =                                                                  \
-			(const unsigned char *)(a);                                                                  \
-		const unsigned char *simde_tmp_b_ =                                                                  \
-			(const unsigned char *)(b);                                                                  \
-		const size_t simde_tmp_size_ = (size);                                                               \
-		if (HEDLEY_UNLIKELY(memcmp(simde_tmp_a_, simde_tmp_b_,                                               \
-					   simde_tmp_size_)) != 0) {                                                 \
-			size_t simde_tmp_pos_;                                                                       \
-			for (simde_tmp_pos_ = 0;                                                                     \
-			     simde_tmp_pos_ < simde_tmp_size_;                                                       \
-			     simde_tmp_pos_++) {                                                                     \
-				if (simde_tmp_a_[simde_tmp_pos_] !=                                                  \
-				    simde_tmp_b_[simde_tmp_pos_]) {                                                  \
-					simde_errorf(                                                                \
-						"assertion failed: memory %s == %s, at offset %" SIMDE_SIZE_MODIFIER \
-						"u\n",                                                               \
-						#a, #b, simde_tmp_pos_);                                             \
-					break;                                                                       \
-				}                                                                                    \
-			}                                                                                            \
-		}                                                                                                    \
-		SIMDE_PUSH_DISABLE_MSVC_C4127_                                                                       \
-	} while (0) SIMDE_POP_DISABLE_MSVC_C4127_
-
-#define simde_assert_memory_not_equal(size, a, b)                                          \
-	do {                                                                               \
-		const unsigned char *simde_tmp_a_ =                                        \
-			(const unsigned char *)(a);                                        \
-		const unsigned char *simde_tmp_b_ =                                        \
-			(const unsigned char *)(b);                                        \
-		const size_t simde_tmp_size_ = (size);                                     \
-		if (HEDLEY_UNLIKELY(memcmp(simde_tmp_a_, simde_tmp_b_,                     \
-					   simde_tmp_size_)) == 0) {                       \
-			simde_errorf(                                                      \
-				"assertion failed: memory %s != %s (%" SIMDE_SIZE_MODIFIER \
-				"u bytes)\n",                                              \
-				#a, #b, simde_tmp_size_);                                  \
-		}                                                                          \
-		SIMDE_PUSH_DISABLE_MSVC_C4127_                                             \
-	} while (0) SIMDE_POP_DISABLE_MSVC_C4127_
-#endif
-
-#define simde_assert_type(T, fmt, a, op, b) \
-	simde_assert_type_full("", "", T, fmt, a, op, b)
-
-#define simde_assert_char(a, op, b)               \
-	simde_assert_type_full("'\\x", "'", char, \
-			       "02" SIMDE_CHAR_MODIFIER "x", a, op, b)
-#define simde_assert_uchar(a, op, b)                       \
-	simde_assert_type_full("'\\x", "'", unsigned char, \
-			       "02" SIMDE_CHAR_MODIFIER "x", a, op, b)
-#define simde_assert_short(a, op, b) \
-	simde_assert_type(short, SIMDE_SHORT_MODIFIER "d", a, op, b)
-#define simde_assert_ushort(a, op, b) \
-	simde_assert_type(unsigned short, SIMDE_SHORT_MODIFIER "u", a, op, b)
-#define simde_assert_int(a, op, b) simde_assert_type(int, "d", a, op, b)
-#define simde_assert_uint(a, op, b) \
-	simde_assert_type(unsigned int, "u", a, op, b)
-#define simde_assert_long(a, op, b) simde_assert_type(long int, "ld", a, op, b)
-#define simde_assert_ulong(a, op, b) \
-	simde_assert_type(unsigned long int, "lu", a, op, b)
-#define simde_assert_llong(a, op, b) \
-	simde_assert_type(long long int, "lld", a, op, b)
-#define simde_assert_ullong(a, op, b) \
-	simde_assert_type(unsigned long long int, "llu", a, op, b)
-
-#define simde_assert_size(a, op, b) \
-	simde_assert_type(size_t, SIMDE_SIZE_MODIFIER "u", a, op, b)
-
-#define simde_assert_float(a, op, b) simde_assert_type(float, "f", a, op, b)
-#define simde_assert_double(a, op, b) simde_assert_type(double, "g", a, op, b)
-#define simde_assert_ptr(a, op, b) \
-	simde_assert_type(const void *, "p", a, op, b)
-
-#define simde_assert_int8(a, op, b) simde_assert_type(int8_t, PRIi8, a, op, b)
-#define simde_assert_uint8(a, op, b) simde_assert_type(uint8_t, PRIu8, a, op, b)
-#define simde_assert_int16(a, op, b) \
-	simde_assert_type(int16_t, PRIi16, a, op, b)
-#define simde_assert_uint16(a, op, b) \
-	simde_assert_type(uint16_t, PRIu16, a, op, b)
-#define simde_assert_int32(a, op, b) \
-	simde_assert_type(int32_t, PRIi32, a, op, b)
-#define simde_assert_uint32(a, op, b) \
-	simde_assert_type(uint32_t, PRIu32, a, op, b)
-#define simde_assert_int64(a, op, b) \
-	simde_assert_type(int64_t, PRIi64, a, op, b)
-#define simde_assert_uint64(a, op, b) \
-	simde_assert_type(uint64_t, PRIu64, a, op, b)
-
-#define simde_assert_ptr_equal(a, b) simde_assert_ptr(a, ==, b)
-#define simde_assert_ptr_not_equal(a, b) simde_assert_ptr(a, !=, b)
-#define simde_assert_null(ptr) simde_assert_ptr(ptr, ==, NULL)
-#define simde_assert_not_null(ptr) simde_assert_ptr(ptr, !=, NULL)
-#define simde_assert_ptr_null(ptr) simde_assert_ptr(ptr, ==, NULL)
-#define simde_assert_ptr_not_null(ptr) simde_assert_ptr(ptr, !=, NULL)
-
-#endif /* !defined(SIMDE_CHECK_H) */

+ 0 - 117
libobs/util/simde/debug-trap.h

@@ -1,117 +0,0 @@
-/* Debugging assertions and traps
- * Portable Snippets - https://gitub.com/nemequ/portable-snippets
- * Created by Evan Nemerson <[email protected]>
- *
- *   To the extent possible under law, the authors have waived all
- *   copyright and related or neighboring rights to this code.  For
- *   details, see the Creative Commons Zero 1.0 Universal license at
- *   https://creativecommons.org/publicdomain/zero/1.0/
- *
- * SPDX-License-Identifier: CC0-1.0
- */
-
-#if !defined(SIMDE_DEBUG_TRAP_H)
-#define SIMDE_DEBUG_TRAP_H
-
-#if !defined(SIMDE_NDEBUG) && defined(NDEBUG) && !defined(SIMDE_DEBUG)
-#define SIMDE_NDEBUG 1
-#endif
-
-#if defined(__has_builtin) && !defined(__ibmxl__)
-#if __has_builtin(__builtin_debugtrap)
-#define simde_trap() __builtin_debugtrap()
-#elif __has_builtin(__debugbreak)
-#define simde_trap() __debugbreak()
-#endif
-#endif
-#if !defined(simde_trap)
-#if defined(_MSC_VER) || defined(__INTEL_COMPILER)
-#define simde_trap() __debugbreak()
-#elif defined(__ARMCC_VERSION)
-#define simde_trap() __breakpoint(42)
-#elif defined(__ibmxl__) || defined(__xlC__)
-#include <builtins.h>
-#define simde_trap() __trap(42)
-#elif defined(__DMC__) && defined(_M_IX86)
-static inline void simde_trap(void)
-{
-	__asm int 3h;
-}
-#elif defined(__i386__) || defined(__x86_64__)
-static inline void simde_trap(void)
-{
-	__asm__ __volatile__("int $03");
-}
-#elif defined(__thumb__)
-static inline void simde_trap(void)
-{
-	__asm__ __volatile__(".inst 0xde01");
-}
-#elif defined(__aarch64__)
-static inline void simde_trap(void)
-{
-	__asm__ __volatile__(".inst 0xd4200000");
-}
-#elif defined(__arm__)
-static inline void simde_trap(void)
-{
-	__asm__ __volatile__(".inst 0xe7f001f0");
-}
-#elif defined(__alpha__) && !defined(__osf__)
-static inline void simde_trap(void)
-{
-	__asm__ __volatile__("bpt");
-}
-#elif defined(_54_)
-static inline void simde_trap(void)
-{
-	__asm__ __volatile__("ESTOP");
-}
-#elif defined(_55_)
-static inline void simde_trap(void)
-{
-	__asm__ __volatile__(
-		";\n .if (.MNEMONIC)\n ESTOP_1\n .else\n ESTOP_1()\n .endif\n NOP");
-}
-#elif defined(_64P_)
-static inline void simde_trap(void)
-{
-	__asm__ __volatile__("SWBP 0");
-}
-#elif defined(_6x_)
-static inline void simde_trap(void)
-{
-	__asm__ __volatile__("NOP\n .word 0x10000000");
-}
-#elif defined(__STDC_HOSTED__) && (__STDC_HOSTED__ == 0) && defined(__GNUC__)
-#define simde_trap() __builtin_trap()
-#else
-#include <signal.h>
-#if defined(SIGTRAP)
-#define simde_trap() raise(SIGTRAP)
-#else
-#define simde_trap() raise(SIGABRT)
-#endif
-#endif
-#endif
-
-#if defined(HEDLEY_LIKELY)
-#define SIMDE_DBG_LIKELY(expr) HEDLEY_LIKELY(expr)
-#elif defined(__GNUC__) && (__GNUC__ >= 3)
-#define SIMDE_DBG_LIKELY(expr) __builtin_expect(!!(expr), 1)
-#else
-#define SIMDE_DBG_LIKELY(expr) (!!(expr))
-#endif
-
-#if !defined(SIMDE_NDEBUG) || (SIMDE_NDEBUG == 0)
-#define simde_dbg_assert(expr)                 \
-	do {                                   \
-		if (!SIMDE_DBG_LIKELY(expr)) { \
-			simde_trap();          \
-		}                              \
-	} while (0)
-#else
-#define simde_dbg_assert(expr)
-#endif
-
-#endif /* !defined(SIMDE_DEBUG_TRAP_H) */

+ 0 - 2123
libobs/util/simde/hedley.h

@@ -1,2123 +0,0 @@
-/* Hedley - https://nemequ.github.io/hedley
- * Created by Evan Nemerson <[email protected]>
- *
- * To the extent possible under law, the author(s) have dedicated all
- * copyright and related and neighboring rights to this software to
- * the public domain worldwide. This software is distributed without
- * any warranty.
- *
- * For details, see <http://creativecommons.org/publicdomain/zero/1.0/>.
- * SPDX-License-Identifier: CC0-1.0
- */
-
-#if !defined(HEDLEY_VERSION) || (HEDLEY_VERSION < 14)
-#if defined(HEDLEY_VERSION)
-#undef HEDLEY_VERSION
-#endif
-#define HEDLEY_VERSION 14
-
-#if defined(HEDLEY_STRINGIFY_EX)
-#undef HEDLEY_STRINGIFY_EX
-#endif
-#define HEDLEY_STRINGIFY_EX(x) #x
-
-#if defined(HEDLEY_STRINGIFY)
-#undef HEDLEY_STRINGIFY
-#endif
-#define HEDLEY_STRINGIFY(x) HEDLEY_STRINGIFY_EX(x)
-
-#if defined(HEDLEY_CONCAT_EX)
-#undef HEDLEY_CONCAT_EX
-#endif
-#define HEDLEY_CONCAT_EX(a, b) a##b
-
-#if defined(HEDLEY_CONCAT)
-#undef HEDLEY_CONCAT
-#endif
-#define HEDLEY_CONCAT(a, b) HEDLEY_CONCAT_EX(a, b)
-
-#if defined(HEDLEY_CONCAT3_EX)
-#undef HEDLEY_CONCAT3_EX
-#endif
-#define HEDLEY_CONCAT3_EX(a, b, c) a##b##c
-
-#if defined(HEDLEY_CONCAT3)
-#undef HEDLEY_CONCAT3
-#endif
-#define HEDLEY_CONCAT3(a, b, c) HEDLEY_CONCAT3_EX(a, b, c)
-
-#if defined(HEDLEY_VERSION_ENCODE)
-#undef HEDLEY_VERSION_ENCODE
-#endif
-#define HEDLEY_VERSION_ENCODE(major, minor, revision) \
-	(((major)*1000000) + ((minor)*1000) + (revision))
-
-#if defined(HEDLEY_VERSION_DECODE_MAJOR)
-#undef HEDLEY_VERSION_DECODE_MAJOR
-#endif
-#define HEDLEY_VERSION_DECODE_MAJOR(version) ((version) / 1000000)
-
-#if defined(HEDLEY_VERSION_DECODE_MINOR)
-#undef HEDLEY_VERSION_DECODE_MINOR
-#endif
-#define HEDLEY_VERSION_DECODE_MINOR(version) (((version) % 1000000) / 1000)
-
-#if defined(HEDLEY_VERSION_DECODE_REVISION)
-#undef HEDLEY_VERSION_DECODE_REVISION
-#endif
-#define HEDLEY_VERSION_DECODE_REVISION(version) ((version) % 1000)
-
-#if defined(HEDLEY_GNUC_VERSION)
-#undef HEDLEY_GNUC_VERSION
-#endif
-#if defined(__GNUC__) && defined(__GNUC_PATCHLEVEL__)
-#define HEDLEY_GNUC_VERSION \
-	HEDLEY_VERSION_ENCODE(__GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__)
-#elif defined(__GNUC__)
-#define HEDLEY_GNUC_VERSION HEDLEY_VERSION_ENCODE(__GNUC__, __GNUC_MINOR__, 0)
-#endif
-
-#if defined(HEDLEY_GNUC_VERSION_CHECK)
-#undef HEDLEY_GNUC_VERSION_CHECK
-#endif
-#if defined(HEDLEY_GNUC_VERSION)
-#define HEDLEY_GNUC_VERSION_CHECK(major, minor, patch) \
-	(HEDLEY_GNUC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch))
-#else
-#define HEDLEY_GNUC_VERSION_CHECK(major, minor, patch) (0)
-#endif
-
-#if defined(HEDLEY_MSVC_VERSION)
-#undef HEDLEY_MSVC_VERSION
-#endif
-#if defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 140000000) && !defined(__ICL)
-#define HEDLEY_MSVC_VERSION                                        \
-	HEDLEY_VERSION_ENCODE(_MSC_FULL_VER / 10000000,            \
-			      (_MSC_FULL_VER % 10000000) / 100000, \
-			      (_MSC_FULL_VER % 100000) / 100)
-#elif defined(_MSC_FULL_VER) && !defined(__ICL)
-#define HEDLEY_MSVC_VERSION                                      \
-	HEDLEY_VERSION_ENCODE(_MSC_FULL_VER / 1000000,           \
-			      (_MSC_FULL_VER % 1000000) / 10000, \
-			      (_MSC_FULL_VER % 10000) / 10)
-#elif defined(_MSC_VER) && !defined(__ICL)
-#define HEDLEY_MSVC_VERSION \
-	HEDLEY_VERSION_ENCODE(_MSC_VER / 100, _MSC_VER % 100, 0)
-#endif
-
-#if defined(HEDLEY_MSVC_VERSION_CHECK)
-#undef HEDLEY_MSVC_VERSION_CHECK
-#endif
-#if !defined(HEDLEY_MSVC_VERSION)
-#define HEDLEY_MSVC_VERSION_CHECK(major, minor, patch) (0)
-#elif defined(_MSC_VER) && (_MSC_VER >= 1400)
-#define HEDLEY_MSVC_VERSION_CHECK(major, minor, patch) \
-	(_MSC_FULL_VER >= ((major * 10000000) + (minor * 100000) + (patch)))
-#elif defined(_MSC_VER) && (_MSC_VER >= 1200)
-#define HEDLEY_MSVC_VERSION_CHECK(major, minor, patch) \
-	(_MSC_FULL_VER >= ((major * 1000000) + (minor * 10000) + (patch)))
-#else
-#define HEDLEY_MSVC_VERSION_CHECK(major, minor, patch) \
-	(_MSC_VER >= ((major * 100) + (minor)))
-#endif
-
-#if defined(HEDLEY_INTEL_VERSION)
-#undef HEDLEY_INTEL_VERSION
-#endif
-#if defined(__INTEL_COMPILER) && defined(__INTEL_COMPILER_UPDATE) && \
-	!defined(__ICL)
-#define HEDLEY_INTEL_VERSION                                                  \
-	HEDLEY_VERSION_ENCODE(__INTEL_COMPILER / 100, __INTEL_COMPILER % 100, \
-			      __INTEL_COMPILER_UPDATE)
-#elif defined(__INTEL_COMPILER) && !defined(__ICL)
-#define HEDLEY_INTEL_VERSION \
-	HEDLEY_VERSION_ENCODE(__INTEL_COMPILER / 100, __INTEL_COMPILER % 100, 0)
-#endif
-
-#if defined(HEDLEY_INTEL_VERSION_CHECK)
-#undef HEDLEY_INTEL_VERSION_CHECK
-#endif
-#if defined(HEDLEY_INTEL_VERSION)
-#define HEDLEY_INTEL_VERSION_CHECK(major, minor, patch) \
-	(HEDLEY_INTEL_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch))
-#else
-#define HEDLEY_INTEL_VERSION_CHECK(major, minor, patch) (0)
-#endif
-
-#if defined(HEDLEY_INTEL_CL_VERSION)
-#undef HEDLEY_INTEL_CL_VERSION
-#endif
-#if defined(__INTEL_COMPILER) && defined(__INTEL_COMPILER_UPDATE) && \
-	defined(__ICL)
-#define HEDLEY_INTEL_CL_VERSION \
-	HEDLEY_VERSION_ENCODE(__INTEL_COMPILER, __INTEL_COMPILER_UPDATE, 0)
-#endif
-
-#if defined(HEDLEY_INTEL_CL_VERSION_CHECK)
-#undef HEDLEY_INTEL_CL_VERSION_CHECK
-#endif
-#if defined(HEDLEY_INTEL_CL_VERSION)
-#define HEDLEY_INTEL_CL_VERSION_CHECK(major, minor, patch) \
-	(HEDLEY_INTEL_CL_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch))
-#else
-#define HEDLEY_INTEL_CL_VERSION_CHECK(major, minor, patch) (0)
-#endif
-
-#if defined(HEDLEY_PGI_VERSION)
-#undef HEDLEY_PGI_VERSION
-#endif
-#if defined(__PGI) && defined(__PGIC__) && defined(__PGIC_MINOR__) && \
-	defined(__PGIC_PATCHLEVEL__)
-#define HEDLEY_PGI_VERSION \
-	HEDLEY_VERSION_ENCODE(__PGIC__, __PGIC_MINOR__, __PGIC_PATCHLEVEL__)
-#endif
-
-#if defined(HEDLEY_PGI_VERSION_CHECK)
-#undef HEDLEY_PGI_VERSION_CHECK
-#endif
-#if defined(HEDLEY_PGI_VERSION)
-#define HEDLEY_PGI_VERSION_CHECK(major, minor, patch) \
-	(HEDLEY_PGI_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch))
-#else
-#define HEDLEY_PGI_VERSION_CHECK(major, minor, patch) (0)
-#endif
-
-#if defined(HEDLEY_SUNPRO_VERSION)
-#undef HEDLEY_SUNPRO_VERSION
-#endif
-#if defined(__SUNPRO_C) && (__SUNPRO_C > 0x1000)
-#define HEDLEY_SUNPRO_VERSION                                     \
-	HEDLEY_VERSION_ENCODE((((__SUNPRO_C >> 16) & 0xf) * 10) + \
-				      ((__SUNPRO_C >> 12) & 0xf), \
-			      (((__SUNPRO_C >> 8) & 0xf) * 10) +  \
-				      ((__SUNPRO_C >> 4) & 0xf),  \
-			      (__SUNPRO_C & 0xf) * 10)
-#elif defined(__SUNPRO_C)
-#define HEDLEY_SUNPRO_VERSION                          \
-	HEDLEY_VERSION_ENCODE((__SUNPRO_C >> 8) & 0xf, \
-			      (__SUNPRO_C >> 4) & 0xf, (__SUNPRO_C)&0xf)
-#elif defined(__SUNPRO_CC) && (__SUNPRO_CC > 0x1000)
-#define HEDLEY_SUNPRO_VERSION                                      \
-	HEDLEY_VERSION_ENCODE((((__SUNPRO_CC >> 16) & 0xf) * 10) + \
-				      ((__SUNPRO_CC >> 12) & 0xf), \
-			      (((__SUNPRO_CC >> 8) & 0xf) * 10) +  \
-				      ((__SUNPRO_CC >> 4) & 0xf),  \
-			      (__SUNPRO_CC & 0xf) * 10)
-#elif defined(__SUNPRO_CC)
-#define HEDLEY_SUNPRO_VERSION                           \
-	HEDLEY_VERSION_ENCODE((__SUNPRO_CC >> 8) & 0xf, \
-			      (__SUNPRO_CC >> 4) & 0xf, (__SUNPRO_CC)&0xf)
-#endif
-
-#if defined(HEDLEY_SUNPRO_VERSION_CHECK)
-#undef HEDLEY_SUNPRO_VERSION_CHECK
-#endif
-#if defined(HEDLEY_SUNPRO_VERSION)
-#define HEDLEY_SUNPRO_VERSION_CHECK(major, minor, patch) \
-	(HEDLEY_SUNPRO_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch))
-#else
-#define HEDLEY_SUNPRO_VERSION_CHECK(major, minor, patch) (0)
-#endif
-
-#if defined(HEDLEY_EMSCRIPTEN_VERSION)
-#undef HEDLEY_EMSCRIPTEN_VERSION
-#endif
-#if defined(__EMSCRIPTEN__)
-#define HEDLEY_EMSCRIPTEN_VERSION                                         \
-	HEDLEY_VERSION_ENCODE(__EMSCRIPTEN_major__, __EMSCRIPTEN_minor__, \
-			      __EMSCRIPTEN_tiny__)
-#endif
-
-#if defined(HEDLEY_EMSCRIPTEN_VERSION_CHECK)
-#undef HEDLEY_EMSCRIPTEN_VERSION_CHECK
-#endif
-#if defined(HEDLEY_EMSCRIPTEN_VERSION)
-#define HEDLEY_EMSCRIPTEN_VERSION_CHECK(major, minor, patch) \
-	(HEDLEY_EMSCRIPTEN_VERSION >=                        \
-	 HEDLEY_VERSION_ENCODE(major, minor, patch))
-#else
-#define HEDLEY_EMSCRIPTEN_VERSION_CHECK(major, minor, patch) (0)
-#endif
-
-#if defined(HEDLEY_ARM_VERSION)
-#undef HEDLEY_ARM_VERSION
-#endif
-#if defined(__CC_ARM) && defined(__ARMCOMPILER_VERSION)
-#define HEDLEY_ARM_VERSION                                               \
-	HEDLEY_VERSION_ENCODE(__ARMCOMPILER_VERSION / 1000000,           \
-			      (__ARMCOMPILER_VERSION % 1000000) / 10000, \
-			      (__ARMCOMPILER_VERSION % 10000) / 100)
-#elif defined(__CC_ARM) && defined(__ARMCC_VERSION)
-#define HEDLEY_ARM_VERSION                                         \
-	HEDLEY_VERSION_ENCODE(__ARMCC_VERSION / 1000000,           \
-			      (__ARMCC_VERSION % 1000000) / 10000, \
-			      (__ARMCC_VERSION % 10000) / 100)
-#endif
-
-#if defined(HEDLEY_ARM_VERSION_CHECK)
-#undef HEDLEY_ARM_VERSION_CHECK
-#endif
-#if defined(HEDLEY_ARM_VERSION)
-#define HEDLEY_ARM_VERSION_CHECK(major, minor, patch) \
-	(HEDLEY_ARM_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch))
-#else
-#define HEDLEY_ARM_VERSION_CHECK(major, minor, patch) (0)
-#endif
-
-#if defined(HEDLEY_IBM_VERSION)
-#undef HEDLEY_IBM_VERSION
-#endif
-#if defined(__ibmxl__)
-#define HEDLEY_IBM_VERSION                                          \
-	HEDLEY_VERSION_ENCODE(__ibmxl_version__, __ibmxl_release__, \
-			      __ibmxl_modification__)
-#elif defined(__xlC__) && defined(__xlC_ver__)
-#define HEDLEY_IBM_VERSION                                  \
-	HEDLEY_VERSION_ENCODE(__xlC__ >> 8, __xlC__ & 0xff, \
-			      (__xlC_ver__ >> 8) & 0xff)
-#elif defined(__xlC__)
-#define HEDLEY_IBM_VERSION \
-	HEDLEY_VERSION_ENCODE(__xlC__ >> 8, __xlC__ & 0xff, 0)
-#endif
-
-#if defined(HEDLEY_IBM_VERSION_CHECK)
-#undef HEDLEY_IBM_VERSION_CHECK
-#endif
-#if defined(HEDLEY_IBM_VERSION)
-#define HEDLEY_IBM_VERSION_CHECK(major, minor, patch) \
-	(HEDLEY_IBM_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch))
-#else
-#define HEDLEY_IBM_VERSION_CHECK(major, minor, patch) (0)
-#endif
-
-#if defined(HEDLEY_TI_VERSION)
-#undef HEDLEY_TI_VERSION
-#endif
-#if defined(__TI_COMPILER_VERSION__) &&                                       \
-	(defined(__TMS470__) || defined(__TI_ARM__) || defined(__MSP430__) || \
-	 defined(__TMS320C2000__))
-#if (__TI_COMPILER_VERSION__ >= 16000000)
-#define HEDLEY_TI_VERSION                                                 \
-	HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000,          \
-			      (__TI_COMPILER_VERSION__ % 1000000) / 1000, \
-			      (__TI_COMPILER_VERSION__ % 1000))
-#endif
-#endif
-
-#if defined(HEDLEY_TI_VERSION_CHECK)
-#undef HEDLEY_TI_VERSION_CHECK
-#endif
-#if defined(HEDLEY_TI_VERSION)
-#define HEDLEY_TI_VERSION_CHECK(major, minor, patch) \
-	(HEDLEY_TI_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch))
-#else
-#define HEDLEY_TI_VERSION_CHECK(major, minor, patch) (0)
-#endif
-
-#if defined(HEDLEY_TI_CL2000_VERSION)
-#undef HEDLEY_TI_CL2000_VERSION
-#endif
-#if defined(__TI_COMPILER_VERSION__) && defined(__TMS320C2000__)
-#define HEDLEY_TI_CL2000_VERSION                                          \
-	HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000,          \
-			      (__TI_COMPILER_VERSION__ % 1000000) / 1000, \
-			      (__TI_COMPILER_VERSION__ % 1000))
-#endif
-
-#if defined(HEDLEY_TI_CL2000_VERSION_CHECK)
-#undef HEDLEY_TI_CL2000_VERSION_CHECK
-#endif
-#if defined(HEDLEY_TI_CL2000_VERSION)
-#define HEDLEY_TI_CL2000_VERSION_CHECK(major, minor, patch) \
-	(HEDLEY_TI_CL2000_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch))
-#else
-#define HEDLEY_TI_CL2000_VERSION_CHECK(major, minor, patch) (0)
-#endif
-
-#if defined(HEDLEY_TI_CL430_VERSION)
-#undef HEDLEY_TI_CL430_VERSION
-#endif
-#if defined(__TI_COMPILER_VERSION__) && defined(__MSP430__)
-#define HEDLEY_TI_CL430_VERSION                                           \
-	HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000,          \
-			      (__TI_COMPILER_VERSION__ % 1000000) / 1000, \
-			      (__TI_COMPILER_VERSION__ % 1000))
-#endif
-
-#if defined(HEDLEY_TI_CL430_VERSION_CHECK)
-#undef HEDLEY_TI_CL430_VERSION_CHECK
-#endif
-#if defined(HEDLEY_TI_CL430_VERSION)
-#define HEDLEY_TI_CL430_VERSION_CHECK(major, minor, patch) \
-	(HEDLEY_TI_CL430_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch))
-#else
-#define HEDLEY_TI_CL430_VERSION_CHECK(major, minor, patch) (0)
-#endif
-
-#if defined(HEDLEY_TI_ARMCL_VERSION)
-#undef HEDLEY_TI_ARMCL_VERSION
-#endif
-#if defined(__TI_COMPILER_VERSION__) && \
-	(defined(__TMS470__) || defined(__TI_ARM__))
-#define HEDLEY_TI_ARMCL_VERSION                                           \
-	HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000,          \
-			      (__TI_COMPILER_VERSION__ % 1000000) / 1000, \
-			      (__TI_COMPILER_VERSION__ % 1000))
-#endif
-
-#if defined(HEDLEY_TI_ARMCL_VERSION_CHECK)
-#undef HEDLEY_TI_ARMCL_VERSION_CHECK
-#endif
-#if defined(HEDLEY_TI_ARMCL_VERSION)
-#define HEDLEY_TI_ARMCL_VERSION_CHECK(major, minor, patch) \
-	(HEDLEY_TI_ARMCL_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch))
-#else
-#define HEDLEY_TI_ARMCL_VERSION_CHECK(major, minor, patch) (0)
-#endif
-
-#if defined(HEDLEY_TI_CL6X_VERSION)
-#undef HEDLEY_TI_CL6X_VERSION
-#endif
-#if defined(__TI_COMPILER_VERSION__) && defined(__TMS320C6X__)
-#define HEDLEY_TI_CL6X_VERSION                                            \
-	HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000,          \
-			      (__TI_COMPILER_VERSION__ % 1000000) / 1000, \
-			      (__TI_COMPILER_VERSION__ % 1000))
-#endif
-
-#if defined(HEDLEY_TI_CL6X_VERSION_CHECK)
-#undef HEDLEY_TI_CL6X_VERSION_CHECK
-#endif
-#if defined(HEDLEY_TI_CL6X_VERSION)
-#define HEDLEY_TI_CL6X_VERSION_CHECK(major, minor, patch) \
-	(HEDLEY_TI_CL6X_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch))
-#else
-#define HEDLEY_TI_CL6X_VERSION_CHECK(major, minor, patch) (0)
-#endif
-
-#if defined(HEDLEY_TI_CL7X_VERSION)
-#undef HEDLEY_TI_CL7X_VERSION
-#endif
-#if defined(__TI_COMPILER_VERSION__) && defined(__C7000__)
-#define HEDLEY_TI_CL7X_VERSION                                            \
-	HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000,          \
-			      (__TI_COMPILER_VERSION__ % 1000000) / 1000, \
-			      (__TI_COMPILER_VERSION__ % 1000))
-#endif
-
-#if defined(HEDLEY_TI_CL7X_VERSION_CHECK)
-#undef HEDLEY_TI_CL7X_VERSION_CHECK
-#endif
-#if defined(HEDLEY_TI_CL7X_VERSION)
-#define HEDLEY_TI_CL7X_VERSION_CHECK(major, minor, patch) \
-	(HEDLEY_TI_CL7X_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch))
-#else
-#define HEDLEY_TI_CL7X_VERSION_CHECK(major, minor, patch) (0)
-#endif
-
-#if defined(HEDLEY_TI_CLPRU_VERSION)
-#undef HEDLEY_TI_CLPRU_VERSION
-#endif
-#if defined(__TI_COMPILER_VERSION__) && defined(__PRU__)
-#define HEDLEY_TI_CLPRU_VERSION                                           \
-	HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000,          \
-			      (__TI_COMPILER_VERSION__ % 1000000) / 1000, \
-			      (__TI_COMPILER_VERSION__ % 1000))
-#endif
-
-#if defined(HEDLEY_TI_CLPRU_VERSION_CHECK)
-#undef HEDLEY_TI_CLPRU_VERSION_CHECK
-#endif
-#if defined(HEDLEY_TI_CLPRU_VERSION)
-#define HEDLEY_TI_CLPRU_VERSION_CHECK(major, minor, patch) \
-	(HEDLEY_TI_CLPRU_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch))
-#else
-#define HEDLEY_TI_CLPRU_VERSION_CHECK(major, minor, patch) (0)
-#endif
-
-#if defined(HEDLEY_CRAY_VERSION)
-#undef HEDLEY_CRAY_VERSION
-#endif
-#if defined(_CRAYC)
-#if defined(_RELEASE_PATCHLEVEL)
-#define HEDLEY_CRAY_VERSION                                   \
-	HEDLEY_VERSION_ENCODE(_RELEASE_MAJOR, _RELEASE_MINOR, \
-			      _RELEASE_PATCHLEVEL)
-#else
-#define HEDLEY_CRAY_VERSION \
-	HEDLEY_VERSION_ENCODE(_RELEASE_MAJOR, _RELEASE_MINOR, 0)
-#endif
-#endif
-
-#if defined(HEDLEY_CRAY_VERSION_CHECK)
-#undef HEDLEY_CRAY_VERSION_CHECK
-#endif
-#if defined(HEDLEY_CRAY_VERSION)
-#define HEDLEY_CRAY_VERSION_CHECK(major, minor, patch) \
-	(HEDLEY_CRAY_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch))
-#else
-#define HEDLEY_CRAY_VERSION_CHECK(major, minor, patch) (0)
-#endif
-
-#if defined(HEDLEY_IAR_VERSION)
-#undef HEDLEY_IAR_VERSION
-#endif
-#if defined(__IAR_SYSTEMS_ICC__)
-#if __VER__ > 1000
-#define HEDLEY_IAR_VERSION                                                    \
-	HEDLEY_VERSION_ENCODE((__VER__ / 1000000), ((__VER__ / 1000) % 1000), \
-			      (__VER__ % 1000))
-#else
-#define HEDLEY_IAR_VERSION HEDLEY_VERSION_ENCODE(VER / 100, __VER__ % 100, 0)
-#endif
-#endif
-
-#if defined(HEDLEY_IAR_VERSION_CHECK)
-#undef HEDLEY_IAR_VERSION_CHECK
-#endif
-#if defined(HEDLEY_IAR_VERSION)
-#define HEDLEY_IAR_VERSION_CHECK(major, minor, patch) \
-	(HEDLEY_IAR_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch))
-#else
-#define HEDLEY_IAR_VERSION_CHECK(major, minor, patch) (0)
-#endif
-
-#if defined(HEDLEY_TINYC_VERSION)
-#undef HEDLEY_TINYC_VERSION
-#endif
-#if defined(__TINYC__)
-#define HEDLEY_TINYC_VERSION                                            \
-	HEDLEY_VERSION_ENCODE(__TINYC__ / 1000, (__TINYC__ / 100) % 10, \
-			      __TINYC__ % 100)
-#endif
-
-#if defined(HEDLEY_TINYC_VERSION_CHECK)
-#undef HEDLEY_TINYC_VERSION_CHECK
-#endif
-#if defined(HEDLEY_TINYC_VERSION)
-#define HEDLEY_TINYC_VERSION_CHECK(major, minor, patch) \
-	(HEDLEY_TINYC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch))
-#else
-#define HEDLEY_TINYC_VERSION_CHECK(major, minor, patch) (0)
-#endif
-
-#if defined(HEDLEY_DMC_VERSION)
-#undef HEDLEY_DMC_VERSION
-#endif
-#if defined(__DMC__)
-#define HEDLEY_DMC_VERSION \
-	HEDLEY_VERSION_ENCODE(__DMC__ >> 8, (__DMC__ >> 4) & 0xf, __DMC__ & 0xf)
-#endif
-
-#if defined(HEDLEY_DMC_VERSION_CHECK)
-#undef HEDLEY_DMC_VERSION_CHECK
-#endif
-#if defined(HEDLEY_DMC_VERSION)
-#define HEDLEY_DMC_VERSION_CHECK(major, minor, patch) \
-	(HEDLEY_DMC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch))
-#else
-#define HEDLEY_DMC_VERSION_CHECK(major, minor, patch) (0)
-#endif
-
-#if defined(HEDLEY_COMPCERT_VERSION)
-#undef HEDLEY_COMPCERT_VERSION
-#endif
-#if defined(__COMPCERT_VERSION__)
-#define HEDLEY_COMPCERT_VERSION                                   \
-	HEDLEY_VERSION_ENCODE(__COMPCERT_VERSION__ / 10000,       \
-			      (__COMPCERT_VERSION__ / 100) % 100, \
-			      __COMPCERT_VERSION__ % 100)
-#endif
-
-#if defined(HEDLEY_COMPCERT_VERSION_CHECK)
-#undef HEDLEY_COMPCERT_VERSION_CHECK
-#endif
-#if defined(HEDLEY_COMPCERT_VERSION)
-#define HEDLEY_COMPCERT_VERSION_CHECK(major, minor, patch) \
-	(HEDLEY_COMPCERT_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch))
-#else
-#define HEDLEY_COMPCERT_VERSION_CHECK(major, minor, patch) (0)
-#endif
-
-#if defined(HEDLEY_PELLES_VERSION)
-#undef HEDLEY_PELLES_VERSION
-#endif
-#if defined(__POCC__)
-#define HEDLEY_PELLES_VERSION \
-	HEDLEY_VERSION_ENCODE(__POCC__ / 100, __POCC__ % 100, 0)
-#endif
-
-#if defined(HEDLEY_PELLES_VERSION_CHECK)
-#undef HEDLEY_PELLES_VERSION_CHECK
-#endif
-#if defined(HEDLEY_PELLES_VERSION)
-#define HEDLEY_PELLES_VERSION_CHECK(major, minor, patch) \
-	(HEDLEY_PELLES_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch))
-#else
-#define HEDLEY_PELLES_VERSION_CHECK(major, minor, patch) (0)
-#endif
-
-#if defined(HEDLEY_GCC_VERSION)
-#undef HEDLEY_GCC_VERSION
-#endif
-#if defined(HEDLEY_GNUC_VERSION) && !defined(__clang__) &&                \
-	!defined(HEDLEY_INTEL_VERSION) && !defined(HEDLEY_PGI_VERSION) && \
-	!defined(HEDLEY_ARM_VERSION) && !defined(HEDLEY_TI_VERSION) &&    \
-	!defined(HEDLEY_TI_ARMCL_VERSION) &&                              \
-	!defined(HEDLEY_TI_CL430_VERSION) &&                              \
-	!defined(HEDLEY_TI_CL2000_VERSION) &&                             \
-	!defined(HEDLEY_TI_CL6X_VERSION) &&                               \
-	!defined(HEDLEY_TI_CL7X_VERSION) &&                               \
-	!defined(HEDLEY_TI_CLPRU_VERSION) && !defined(__COMPCERT__)
-#define HEDLEY_GCC_VERSION HEDLEY_GNUC_VERSION
-#endif
-
-#if defined(HEDLEY_GCC_VERSION_CHECK)
-#undef HEDLEY_GCC_VERSION_CHECK
-#endif
-#if defined(HEDLEY_GCC_VERSION)
-#define HEDLEY_GCC_VERSION_CHECK(major, minor, patch) \
-	(HEDLEY_GCC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch))
-#else
-#define HEDLEY_GCC_VERSION_CHECK(major, minor, patch) (0)
-#endif
-
-#if defined(HEDLEY_HAS_ATTRIBUTE)
-#undef HEDLEY_HAS_ATTRIBUTE
-#endif
-#if defined(__has_attribute)
-#define HEDLEY_HAS_ATTRIBUTE(attribute) __has_attribute(attribute)
-#else
-#define HEDLEY_HAS_ATTRIBUTE(attribute) (0)
-#endif
-
-#if defined(HEDLEY_GNUC_HAS_ATTRIBUTE)
-#undef HEDLEY_GNUC_HAS_ATTRIBUTE
-#endif
-#if defined(__has_attribute)
-#define HEDLEY_GNUC_HAS_ATTRIBUTE(attribute, major, minor, patch) \
-	__has_attribute(attribute)
-#else
-#define HEDLEY_GNUC_HAS_ATTRIBUTE(attribute, major, minor, patch) \
-	HEDLEY_GNUC_VERSION_CHECK(major, minor, patch)
-#endif
-
-#if defined(HEDLEY_GCC_HAS_ATTRIBUTE)
-#undef HEDLEY_GCC_HAS_ATTRIBUTE
-#endif
-#if defined(__has_attribute)
-#define HEDLEY_GCC_HAS_ATTRIBUTE(attribute, major, minor, patch) \
-	__has_attribute(attribute)
-#else
-#define HEDLEY_GCC_HAS_ATTRIBUTE(attribute, major, minor, patch) \
-	HEDLEY_GCC_VERSION_CHECK(major, minor, patch)
-#endif
-
-#if defined(HEDLEY_HAS_CPP_ATTRIBUTE)
-#undef HEDLEY_HAS_CPP_ATTRIBUTE
-#endif
-#if defined(__has_cpp_attribute) && defined(__cplusplus) && \
-	(!defined(HEDLEY_SUNPRO_VERSION) ||                 \
-	 HEDLEY_SUNPRO_VERSION_CHECK(5, 15, 0))
-#define HEDLEY_HAS_CPP_ATTRIBUTE(attribute) __has_cpp_attribute(attribute)
-#else
-#define HEDLEY_HAS_CPP_ATTRIBUTE(attribute) (0)
-#endif
-
-#if defined(HEDLEY_HAS_CPP_ATTRIBUTE_NS)
-#undef HEDLEY_HAS_CPP_ATTRIBUTE_NS
-#endif
-#if !defined(__cplusplus) || !defined(__has_cpp_attribute)
-#define HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns, attribute) (0)
-#elif !defined(HEDLEY_PGI_VERSION) && !defined(HEDLEY_IAR_VERSION) && \
-	(!defined(HEDLEY_SUNPRO_VERSION) ||                           \
-	 HEDLEY_SUNPRO_VERSION_CHECK(5, 15, 0)) &&                    \
-	(!defined(HEDLEY_MSVC_VERSION) ||                             \
-	 HEDLEY_MSVC_VERSION_CHECK(19, 20, 0))
-#define HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns, attribute) \
-	HEDLEY_HAS_CPP_ATTRIBUTE(ns::attribute)
-#else
-#define HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns, attribute) (0)
-#endif
-
-#if defined(HEDLEY_GNUC_HAS_CPP_ATTRIBUTE)
-#undef HEDLEY_GNUC_HAS_CPP_ATTRIBUTE
-#endif
-#if defined(__has_cpp_attribute) && defined(__cplusplus)
-#define HEDLEY_GNUC_HAS_CPP_ATTRIBUTE(attribute, major, minor, patch) \
-	__has_cpp_attribute(attribute)
-#else
-#define HEDLEY_GNUC_HAS_CPP_ATTRIBUTE(attribute, major, minor, patch) \
-	HEDLEY_GNUC_VERSION_CHECK(major, minor, patch)
-#endif
-
-#if defined(HEDLEY_GCC_HAS_CPP_ATTRIBUTE)
-#undef HEDLEY_GCC_HAS_CPP_ATTRIBUTE
-#endif
-#if defined(__has_cpp_attribute) && defined(__cplusplus)
-#define HEDLEY_GCC_HAS_CPP_ATTRIBUTE(attribute, major, minor, patch) \
-	__has_cpp_attribute(attribute)
-#else
-#define HEDLEY_GCC_HAS_CPP_ATTRIBUTE(attribute, major, minor, patch) \
-	HEDLEY_GCC_VERSION_CHECK(major, minor, patch)
-#endif
-
-#if defined(HEDLEY_HAS_BUILTIN)
-#undef HEDLEY_HAS_BUILTIN
-#endif
-#if defined(__has_builtin)
-#define HEDLEY_HAS_BUILTIN(builtin) __has_builtin(builtin)
-#else
-#define HEDLEY_HAS_BUILTIN(builtin) (0)
-#endif
-
-#if defined(HEDLEY_GNUC_HAS_BUILTIN)
-#undef HEDLEY_GNUC_HAS_BUILTIN
-#endif
-#if defined(__has_builtin)
-#define HEDLEY_GNUC_HAS_BUILTIN(builtin, major, minor, patch) \
-	__has_builtin(builtin)
-#else
-#define HEDLEY_GNUC_HAS_BUILTIN(builtin, major, minor, patch) \
-	HEDLEY_GNUC_VERSION_CHECK(major, minor, patch)
-#endif
-
-#if defined(HEDLEY_GCC_HAS_BUILTIN)
-#undef HEDLEY_GCC_HAS_BUILTIN
-#endif
-#if defined(__has_builtin)
-#define HEDLEY_GCC_HAS_BUILTIN(builtin, major, minor, patch) \
-	__has_builtin(builtin)
-#else
-#define HEDLEY_GCC_HAS_BUILTIN(builtin, major, minor, patch) \
-	HEDLEY_GCC_VERSION_CHECK(major, minor, patch)
-#endif
-
-#if defined(HEDLEY_HAS_FEATURE)
-#undef HEDLEY_HAS_FEATURE
-#endif
-#if defined(__has_feature)
-#define HEDLEY_HAS_FEATURE(feature) __has_feature(feature)
-#else
-#define HEDLEY_HAS_FEATURE(feature) (0)
-#endif
-
-#if defined(HEDLEY_GNUC_HAS_FEATURE)
-#undef HEDLEY_GNUC_HAS_FEATURE
-#endif
-#if defined(__has_feature)
-#define HEDLEY_GNUC_HAS_FEATURE(feature, major, minor, patch) \
-	__has_feature(feature)
-#else
-#define HEDLEY_GNUC_HAS_FEATURE(feature, major, minor, patch) \
-	HEDLEY_GNUC_VERSION_CHECK(major, minor, patch)
-#endif
-
-#if defined(HEDLEY_GCC_HAS_FEATURE)
-#undef HEDLEY_GCC_HAS_FEATURE
-#endif
-#if defined(__has_feature)
-#define HEDLEY_GCC_HAS_FEATURE(feature, major, minor, patch) \
-	__has_feature(feature)
-#else
-#define HEDLEY_GCC_HAS_FEATURE(feature, major, minor, patch) \
-	HEDLEY_GCC_VERSION_CHECK(major, minor, patch)
-#endif
-
-#if defined(HEDLEY_HAS_EXTENSION)
-#undef HEDLEY_HAS_EXTENSION
-#endif
-#if defined(__has_extension)
-#define HEDLEY_HAS_EXTENSION(extension) __has_extension(extension)
-#else
-#define HEDLEY_HAS_EXTENSION(extension) (0)
-#endif
-
-#if defined(HEDLEY_GNUC_HAS_EXTENSION)
-#undef HEDLEY_GNUC_HAS_EXTENSION
-#endif
-#if defined(__has_extension)
-#define HEDLEY_GNUC_HAS_EXTENSION(extension, major, minor, patch) \
-	__has_extension(extension)
-#else
-#define HEDLEY_GNUC_HAS_EXTENSION(extension, major, minor, patch) \
-	HEDLEY_GNUC_VERSION_CHECK(major, minor, patch)
-#endif
-
-#if defined(HEDLEY_GCC_HAS_EXTENSION)
-#undef HEDLEY_GCC_HAS_EXTENSION
-#endif
-#if defined(__has_extension)
-#define HEDLEY_GCC_HAS_EXTENSION(extension, major, minor, patch) \
-	__has_extension(extension)
-#else
-#define HEDLEY_GCC_HAS_EXTENSION(extension, major, minor, patch) \
-	HEDLEY_GCC_VERSION_CHECK(major, minor, patch)
-#endif
-
-#if defined(HEDLEY_HAS_DECLSPEC_ATTRIBUTE)
-#undef HEDLEY_HAS_DECLSPEC_ATTRIBUTE
-#endif
-#if defined(__has_declspec_attribute)
-#define HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) \
-	__has_declspec_attribute(attribute)
-#else
-#define HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) (0)
-#endif
-
-#if defined(HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE)
-#undef HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE
-#endif
-#if defined(__has_declspec_attribute)
-#define HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE(attribute, major, minor, patch) \
-	__has_declspec_attribute(attribute)
-#else
-#define HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE(attribute, major, minor, patch) \
-	HEDLEY_GNUC_VERSION_CHECK(major, minor, patch)
-#endif
-
-#if defined(HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE)
-#undef HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE
-#endif
-#if defined(__has_declspec_attribute)
-#define HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE(attribute, major, minor, patch) \
-	__has_declspec_attribute(attribute)
-#else
-#define HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE(attribute, major, minor, patch) \
-	HEDLEY_GCC_VERSION_CHECK(major, minor, patch)
-#endif
-
-#if defined(HEDLEY_HAS_WARNING)
-#undef HEDLEY_HAS_WARNING
-#endif
-#if defined(__has_warning)
-#define HEDLEY_HAS_WARNING(warning) __has_warning(warning)
-#else
-#define HEDLEY_HAS_WARNING(warning) (0)
-#endif
-
-#if defined(HEDLEY_GNUC_HAS_WARNING)
-#undef HEDLEY_GNUC_HAS_WARNING
-#endif
-#if defined(__has_warning)
-#define HEDLEY_GNUC_HAS_WARNING(warning, major, minor, patch) \
-	__has_warning(warning)
-#else
-#define HEDLEY_GNUC_HAS_WARNING(warning, major, minor, patch) \
-	HEDLEY_GNUC_VERSION_CHECK(major, minor, patch)
-#endif
-
-#if defined(HEDLEY_GCC_HAS_WARNING)
-#undef HEDLEY_GCC_HAS_WARNING
-#endif
-#if defined(__has_warning)
-#define HEDLEY_GCC_HAS_WARNING(warning, major, minor, patch) \
-	__has_warning(warning)
-#else
-#define HEDLEY_GCC_HAS_WARNING(warning, major, minor, patch) \
-	HEDLEY_GCC_VERSION_CHECK(major, minor, patch)
-#endif
-
-#if (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) || \
-	defined(__clang__) || HEDLEY_GCC_VERSION_CHECK(3, 0, 0) ||  \
-	HEDLEY_INTEL_VERSION_CHECK(13, 0, 0) ||                     \
-	HEDLEY_IAR_VERSION_CHECK(8, 0, 0) ||                        \
-	HEDLEY_PGI_VERSION_CHECK(18, 4, 0) ||                       \
-	HEDLEY_ARM_VERSION_CHECK(4, 1, 0) ||                        \
-	HEDLEY_TI_VERSION_CHECK(15, 12, 0) ||                       \
-	HEDLEY_TI_ARMCL_VERSION_CHECK(4, 7, 0) ||                   \
-	HEDLEY_TI_CL430_VERSION_CHECK(2, 0, 1) ||                   \
-	HEDLEY_TI_CL2000_VERSION_CHECK(6, 1, 0) ||                  \
-	HEDLEY_TI_CL6X_VERSION_CHECK(7, 0, 0) ||                    \
-	HEDLEY_TI_CL7X_VERSION_CHECK(1, 2, 0) ||                    \
-	HEDLEY_TI_CLPRU_VERSION_CHECK(2, 1, 0) ||                   \
-	HEDLEY_CRAY_VERSION_CHECK(5, 0, 0) ||                       \
-	HEDLEY_TINYC_VERSION_CHECK(0, 9, 17) ||                     \
-	HEDLEY_SUNPRO_VERSION_CHECK(8, 0, 0) ||                     \
-	(HEDLEY_IBM_VERSION_CHECK(10, 1, 0) && defined(__C99_PRAGMA_OPERATOR))
-#define HEDLEY_PRAGMA(value) _Pragma(#value)
-#elif HEDLEY_MSVC_VERSION_CHECK(15, 0, 0)
-#define HEDLEY_PRAGMA(value) __pragma(value)
-#else
-#define HEDLEY_PRAGMA(value)
-#endif
-
-#if defined(HEDLEY_DIAGNOSTIC_PUSH)
-#undef HEDLEY_DIAGNOSTIC_PUSH
-#endif
-#if defined(HEDLEY_DIAGNOSTIC_POP)
-#undef HEDLEY_DIAGNOSTIC_POP
-#endif
-#if defined(__clang__)
-#define HEDLEY_DIAGNOSTIC_PUSH _Pragma("clang diagnostic push")
-#define HEDLEY_DIAGNOSTIC_POP _Pragma("clang diagnostic pop")
-#elif HEDLEY_INTEL_VERSION_CHECK(13, 0, 0)
-#define HEDLEY_DIAGNOSTIC_PUSH _Pragma("warning(push)")
-#define HEDLEY_DIAGNOSTIC_POP _Pragma("warning(pop)")
-#elif HEDLEY_GCC_VERSION_CHECK(4, 6, 0)
-#define HEDLEY_DIAGNOSTIC_PUSH _Pragma("GCC diagnostic push")
-#define HEDLEY_DIAGNOSTIC_POP _Pragma("GCC diagnostic pop")
-#elif HEDLEY_MSVC_VERSION_CHECK(15, 0, 0) || \
-	HEDLEY_INTEL_CL_VERSION_CHECK(2021, 1, 0)
-#define HEDLEY_DIAGNOSTIC_PUSH __pragma(warning(push))
-#define HEDLEY_DIAGNOSTIC_POP __pragma(warning(pop))
-#elif HEDLEY_ARM_VERSION_CHECK(5, 6, 0)
-#define HEDLEY_DIAGNOSTIC_PUSH _Pragma("push")
-#define HEDLEY_DIAGNOSTIC_POP _Pragma("pop")
-#elif HEDLEY_TI_VERSION_CHECK(15, 12, 0) ||       \
-	HEDLEY_TI_ARMCL_VERSION_CHECK(5, 2, 0) || \
-	HEDLEY_TI_CL430_VERSION_CHECK(4, 4, 0) || \
-	HEDLEY_TI_CL6X_VERSION_CHECK(8, 1, 0) ||  \
-	HEDLEY_TI_CL7X_VERSION_CHECK(1, 2, 0) ||  \
-	HEDLEY_TI_CLPRU_VERSION_CHECK(2, 1, 0)
-#define HEDLEY_DIAGNOSTIC_PUSH _Pragma("diag_push")
-#define HEDLEY_DIAGNOSTIC_POP _Pragma("diag_pop")
-#elif HEDLEY_PELLES_VERSION_CHECK(2, 90, 0)
-#define HEDLEY_DIAGNOSTIC_PUSH _Pragma("warning(push)")
-#define HEDLEY_DIAGNOSTIC_POP _Pragma("warning(pop)")
-#else
-#define HEDLEY_DIAGNOSTIC_PUSH
-#define HEDLEY_DIAGNOSTIC_POP
-#endif
-
-/* HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_ is for
-   HEDLEY INTERNAL USE ONLY.  API subject to change without notice. */
-#if defined(HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_)
-#undef HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_
-#endif
-#if defined(__cplusplus)
-#if HEDLEY_HAS_WARNING("-Wc++98-compat")
-#if HEDLEY_HAS_WARNING("-Wc++17-extensions")
-#if HEDLEY_HAS_WARNING("-Wc++1z-extensions")
-#define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr)                  \
-	HEDLEY_DIAGNOSTIC_PUSH                                             \
-	_Pragma("clang diagnostic ignored \"-Wc++98-compat\"") _Pragma(    \
-		"clang diagnostic ignored \"-Wc++17-extensions\"")         \
-		_Pragma("clang diagnostic ignored \"-Wc++1z-extensions\"") \
-			xpr HEDLEY_DIAGNOSTIC_POP
-#else
-#define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr)                  \
-	HEDLEY_DIAGNOSTIC_PUSH                                             \
-	_Pragma("clang diagnostic ignored \"-Wc++98-compat\"")             \
-		_Pragma("clang diagnostic ignored \"-Wc++17-extensions\"") \
-			xpr HEDLEY_DIAGNOSTIC_POP
-#endif
-#else
-#define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr)      \
-	HEDLEY_DIAGNOSTIC_PUSH                                 \
-	_Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \
-		xpr HEDLEY_DIAGNOSTIC_POP
-#endif
-#endif
-#endif
-#if !defined(HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_)
-#define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(x) x
-#endif
-
-#if defined(HEDLEY_CONST_CAST)
-#undef HEDLEY_CONST_CAST
-#endif
-#if defined(__cplusplus)
-#define HEDLEY_CONST_CAST(T, expr) (const_cast<T>(expr))
-#elif HEDLEY_HAS_WARNING("-Wcast-qual") ||   \
-	HEDLEY_GCC_VERSION_CHECK(4, 6, 0) || \
-	HEDLEY_INTEL_VERSION_CHECK(13, 0, 0)
-#define HEDLEY_CONST_CAST(T, expr)                              \
-	(__extension__({                                        \
-		HEDLEY_DIAGNOSTIC_PUSH                          \
-		HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL((T)(expr)); \
-		HEDLEY_DIAGNOSTIC_POP                           \
-	}))
-#else
-#define HEDLEY_CONST_CAST(T, expr) ((T)(expr))
-#endif
-
-#if defined(HEDLEY_REINTERPRET_CAST)
-#undef HEDLEY_REINTERPRET_CAST
-#endif
-#if defined(__cplusplus)
-#define HEDLEY_REINTERPRET_CAST(T, expr) (reinterpret_cast<T>(expr))
-#else
-#define HEDLEY_REINTERPRET_CAST(T, expr) ((T)(expr))
-#endif
-
-#if defined(HEDLEY_STATIC_CAST)
-#undef HEDLEY_STATIC_CAST
-#endif
-#if defined(__cplusplus)
-#define HEDLEY_STATIC_CAST(T, expr) (static_cast<T>(expr))
-#else
-#define HEDLEY_STATIC_CAST(T, expr) ((T)(expr))
-#endif
-
-#if defined(HEDLEY_CPP_CAST)
-#undef HEDLEY_CPP_CAST
-#endif
-#if defined(__cplusplus)
-#if HEDLEY_HAS_WARNING("-Wold-style-cast")
-#define HEDLEY_CPP_CAST(T, expr)                                            \
-	HEDLEY_DIAGNOSTIC_PUSH                                              \
-	_Pragma("clang diagnostic ignored \"-Wold-style-cast\"")((T)(expr)) \
-		HEDLEY_DIAGNOSTIC_POP
-#elif HEDLEY_IAR_VERSION_CHECK(8, 3, 0)
-#define HEDLEY_CPP_CAST(T, expr) \
-	HEDLEY_DIAGNOSTIC_PUSH   \
-	_Pragma("diag_suppress=Pe137") HEDLEY_DIAGNOSTIC_POP
-#else
-#define HEDLEY_CPP_CAST(T, expr) ((T)(expr))
-#endif
-#else
-#define HEDLEY_CPP_CAST(T, expr) (expr)
-#endif
-
-#if defined(HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED)
-#undef HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED
-#endif
-#if HEDLEY_HAS_WARNING("-Wdeprecated-declarations")
-#define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED \
-	_Pragma("clang diagnostic ignored \"-Wdeprecated-declarations\"")
-#elif HEDLEY_INTEL_VERSION_CHECK(13, 0, 0)
-#define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED \
-	_Pragma("warning(disable:1478 1786)")
-#elif HEDLEY_INTEL_CL_VERSION_CHECK(2021, 1, 0)
-#define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED \
-	__pragma(warning(disable : 1478 1786))
-#elif HEDLEY_PGI_VERSION_CHECK(20, 7, 0)
-#define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED \
-	_Pragma("diag_suppress 1215,1216,1444,1445")
-#elif HEDLEY_PGI_VERSION_CHECK(17, 10, 0)
-#define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1444")
-#elif HEDLEY_GCC_VERSION_CHECK(4, 3, 0)
-#define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED \
-	_Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"")
-#elif HEDLEY_MSVC_VERSION_CHECK(15, 0, 0)
-#define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED __pragma(warning(disable : 4996))
-#elif HEDLEY_TI_VERSION_CHECK(15, 12, 0) ||         \
-	(HEDLEY_TI_ARMCL_VERSION_CHECK(4, 8, 0) &&  \
-	 defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) ||  \
-	HEDLEY_TI_ARMCL_VERSION_CHECK(5, 2, 0) ||   \
-	(HEDLEY_TI_CL2000_VERSION_CHECK(6, 0, 0) && \
-	 defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) ||  \
-	HEDLEY_TI_CL2000_VERSION_CHECK(6, 4, 0) ||  \
-	(HEDLEY_TI_CL430_VERSION_CHECK(4, 0, 0) &&  \
-	 defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) ||  \
-	HEDLEY_TI_CL430_VERSION_CHECK(4, 3, 0) ||   \
-	(HEDLEY_TI_CL6X_VERSION_CHECK(7, 2, 0) &&   \
-	 defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) ||  \
-	HEDLEY_TI_CL6X_VERSION_CHECK(7, 5, 0) ||    \
-	HEDLEY_TI_CL7X_VERSION_CHECK(1, 2, 0) ||    \
-	HEDLEY_TI_CLPRU_VERSION_CHECK(2, 1, 0)
-#define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1291,1718")
-#elif HEDLEY_SUNPRO_VERSION_CHECK(5, 13, 0) && !defined(__cplusplus)
-#define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED \
-	_Pragma("error_messages(off,E_DEPRECATED_ATT,E_DEPRECATED_ATT_MESS)")
-#elif HEDLEY_SUNPRO_VERSION_CHECK(5, 13, 0) && defined(__cplusplus)
-#define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED \
-	_Pragma("error_messages(off,symdeprecated,symdeprecated2)")
-#elif HEDLEY_IAR_VERSION_CHECK(8, 0, 0)
-#define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED \
-	_Pragma("diag_suppress=Pe1444,Pe1215")
-#elif HEDLEY_PELLES_VERSION_CHECK(2, 90, 0)
-#define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("warn(disable:2241)")
-#else
-#define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED
-#endif
-
-#if defined(HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS)
-#undef HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS
-#endif
-#if HEDLEY_HAS_WARNING("-Wunknown-pragmas")
-#define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS \
-	_Pragma("clang diagnostic ignored \"-Wunknown-pragmas\"")
-#elif HEDLEY_INTEL_VERSION_CHECK(13, 0, 0)
-#define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS \
-	_Pragma("warning(disable:161)")
-#elif HEDLEY_INTEL_CL_VERSION_CHECK(2021, 1, 0)
-#define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS \
-	__pragma(warning(disable : 161))
-#elif HEDLEY_PGI_VERSION_CHECK(17, 10, 0)
-#define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 1675")
-#elif HEDLEY_GCC_VERSION_CHECK(4, 3, 0)
-#define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS \
-	_Pragma("GCC diagnostic ignored \"-Wunknown-pragmas\"")
-#elif HEDLEY_MSVC_VERSION_CHECK(15, 0, 0)
-#define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS \
-	__pragma(warning(disable : 4068))
-#elif HEDLEY_TI_VERSION_CHECK(16, 9, 0) ||       \
-	HEDLEY_TI_CL6X_VERSION_CHECK(8, 0, 0) || \
-	HEDLEY_TI_CL7X_VERSION_CHECK(1, 2, 0) || \
-	HEDLEY_TI_CLPRU_VERSION_CHECK(2, 3, 0)
-#define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 163")
-#elif HEDLEY_TI_CL6X_VERSION_CHECK(8, 0, 0)
-#define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 163")
-#elif HEDLEY_IAR_VERSION_CHECK(8, 0, 0)
-#define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress=Pe161")
-#else
-#define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS
-#endif
-
-#if defined(HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES)
-#undef HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES
-#endif
-#if HEDLEY_HAS_WARNING("-Wunknown-attributes")
-#define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES \
-	_Pragma("clang diagnostic ignored \"-Wunknown-attributes\"")
-#elif HEDLEY_GCC_VERSION_CHECK(4, 6, 0)
-#define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES \
-	_Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"")
-#elif HEDLEY_INTEL_VERSION_CHECK(17, 0, 0)
-#define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES \
-	_Pragma("warning(disable:1292)")
-#elif HEDLEY_INTEL_CL_VERSION_CHECK(2021, 1, 0)
-#define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES \
-	__pragma(warning(disable : 1292))
-#elif HEDLEY_MSVC_VERSION_CHECK(19, 0, 0)
-#define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES \
-	__pragma(warning(disable : 5030))
-#elif HEDLEY_PGI_VERSION_CHECK(20, 7, 0)
-#define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES \
-	_Pragma("diag_suppress 1097,1098")
-#elif HEDLEY_PGI_VERSION_CHECK(17, 10, 0)
-#define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES \
-	_Pragma("diag_suppress 1097")
-#elif HEDLEY_SUNPRO_VERSION_CHECK(5, 14, 0) && defined(__cplusplus)
-#define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES \
-	_Pragma("error_messages(off,attrskipunsup)")
-#elif HEDLEY_TI_VERSION_CHECK(18, 1, 0) ||       \
-	HEDLEY_TI_CL6X_VERSION_CHECK(8, 3, 0) || \
-	HEDLEY_TI_CL7X_VERSION_CHECK(1, 2, 0)
-#define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES \
-	_Pragma("diag_suppress 1173")
-#elif HEDLEY_IAR_VERSION_CHECK(8, 0, 0)
-#define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES \
-	_Pragma("diag_suppress=Pe1097")
-#else
-#define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES
-#endif
-
-#if defined(HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL)
-#undef HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL
-#endif
-#if HEDLEY_HAS_WARNING("-Wcast-qual")
-#define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL \
-	_Pragma("clang diagnostic ignored \"-Wcast-qual\"")
-#elif HEDLEY_INTEL_VERSION_CHECK(13, 0, 0)
-#define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL \
-	_Pragma("warning(disable:2203 2331)")
-#elif HEDLEY_GCC_VERSION_CHECK(3, 0, 0)
-#define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL \
-	_Pragma("GCC diagnostic ignored \"-Wcast-qual\"")
-#else
-#define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL
-#endif
-
-#if defined(HEDLEY_DEPRECATED)
-#undef HEDLEY_DEPRECATED
-#endif
-#if defined(HEDLEY_DEPRECATED_FOR)
-#undef HEDLEY_DEPRECATED_FOR
-#endif
-#if HEDLEY_MSVC_VERSION_CHECK(14, 0, 0) || \
-	HEDLEY_INTEL_CL_VERSION_CHECK(2021, 1, 0)
-#define HEDLEY_DEPRECATED(since) __declspec(deprecated("Since " #since))
-#define HEDLEY_DEPRECATED_FOR(since, replacement) \
-	__declspec(deprecated("Since " #since "; use " #replacement))
-#elif HEDLEY_HAS_EXTENSION(attribute_deprecated_with_message) || \
-	HEDLEY_GCC_VERSION_CHECK(4, 5, 0) ||                     \
-	HEDLEY_INTEL_VERSION_CHECK(13, 0, 0) ||                  \
-	HEDLEY_ARM_VERSION_CHECK(5, 6, 0) ||                     \
-	HEDLEY_SUNPRO_VERSION_CHECK(5, 13, 0) ||                 \
-	HEDLEY_PGI_VERSION_CHECK(17, 10, 0) ||                   \
-	HEDLEY_TI_VERSION_CHECK(18, 1, 0) ||                     \
-	HEDLEY_TI_ARMCL_VERSION_CHECK(18, 1, 0) ||               \
-	HEDLEY_TI_CL6X_VERSION_CHECK(8, 3, 0) ||                 \
-	HEDLEY_TI_CL7X_VERSION_CHECK(1, 2, 0) ||                 \
-	HEDLEY_TI_CLPRU_VERSION_CHECK(2, 3, 0)
-#define HEDLEY_DEPRECATED(since) \
-	__attribute__((__deprecated__("Since " #since)))
-#define HEDLEY_DEPRECATED_FOR(since, replacement) \
-	__attribute__((__deprecated__("Since " #since "; use " #replacement)))
-#elif defined(__cplusplus) && (__cplusplus >= 201402L)
-#define HEDLEY_DEPRECATED(since)                      \
-	HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_( \
-		[[deprecated("Since " #since)]])
-#define HEDLEY_DEPRECATED_FOR(since, replacement)     \
-	HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_( \
-		[[deprecated("Since " #since "; use " #replacement)]])
-#elif HEDLEY_HAS_ATTRIBUTE(deprecated) || HEDLEY_GCC_VERSION_CHECK(3, 1, 0) || \
-	HEDLEY_ARM_VERSION_CHECK(4, 1, 0) ||                                   \
-	HEDLEY_TI_VERSION_CHECK(15, 12, 0) ||                                  \
-	(HEDLEY_TI_ARMCL_VERSION_CHECK(4, 8, 0) &&                             \
-	 defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) ||                             \
-	HEDLEY_TI_ARMCL_VERSION_CHECK(5, 2, 0) ||                              \
-	(HEDLEY_TI_CL2000_VERSION_CHECK(6, 0, 0) &&                            \
-	 defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) ||                             \
-	HEDLEY_TI_CL2000_VERSION_CHECK(6, 4, 0) ||                             \
-	(HEDLEY_TI_CL430_VERSION_CHECK(4, 0, 0) &&                             \
-	 defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) ||                             \
-	HEDLEY_TI_CL430_VERSION_CHECK(4, 3, 0) ||                              \
-	(HEDLEY_TI_CL6X_VERSION_CHECK(7, 2, 0) &&                              \
-	 defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) ||                             \
-	HEDLEY_TI_CL6X_VERSION_CHECK(7, 5, 0) ||                               \
-	HEDLEY_TI_CL7X_VERSION_CHECK(1, 2, 0) ||                               \
-	HEDLEY_TI_CLPRU_VERSION_CHECK(2, 1, 0)
-#define HEDLEY_DEPRECATED(since) __attribute__((__deprecated__))
-#define HEDLEY_DEPRECATED_FOR(since, replacement) \
-	__attribute__((__deprecated__))
-#elif HEDLEY_MSVC_VERSION_CHECK(13, 10, 0) ||    \
-	HEDLEY_PELLES_VERSION_CHECK(6, 50, 0) || \
-	HEDLEY_INTEL_CL_VERSION_CHECK(2021, 1, 0)
-#define HEDLEY_DEPRECATED(since) __declspec(deprecated)
-#define HEDLEY_DEPRECATED_FOR(since, replacement) __declspec(deprecated)
-#elif HEDLEY_IAR_VERSION_CHECK(8, 0, 0)
-#define HEDLEY_DEPRECATED(since) _Pragma("deprecated")
-#define HEDLEY_DEPRECATED_FOR(since, replacement) _Pragma("deprecated")
-#else
-#define HEDLEY_DEPRECATED(since)
-#define HEDLEY_DEPRECATED_FOR(since, replacement)
-#endif
-
-#if defined(HEDLEY_UNAVAILABLE)
-#undef HEDLEY_UNAVAILABLE
-#endif
-#if HEDLEY_HAS_ATTRIBUTE(warning) || HEDLEY_GCC_VERSION_CHECK(4, 3, 0) || \
-	HEDLEY_INTEL_VERSION_CHECK(13, 0, 0)
-#define HEDLEY_UNAVAILABLE(available_since) \
-	__attribute__((__warning__("Not available until " #available_since)))
-#else
-#define HEDLEY_UNAVAILABLE(available_since)
-#endif
-
-#if defined(HEDLEY_WARN_UNUSED_RESULT)
-#undef HEDLEY_WARN_UNUSED_RESULT
-#endif
-#if defined(HEDLEY_WARN_UNUSED_RESULT_MSG)
-#undef HEDLEY_WARN_UNUSED_RESULT_MSG
-#endif
-#if HEDLEY_HAS_ATTRIBUTE(warn_unused_result) ||                            \
-	HEDLEY_GCC_VERSION_CHECK(3, 4, 0) ||                               \
-	HEDLEY_INTEL_VERSION_CHECK(13, 0, 0) ||                            \
-	HEDLEY_TI_VERSION_CHECK(15, 12, 0) ||                              \
-	(HEDLEY_TI_ARMCL_VERSION_CHECK(4, 8, 0) &&                         \
-	 defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) ||                         \
-	HEDLEY_TI_ARMCL_VERSION_CHECK(5, 2, 0) ||                          \
-	(HEDLEY_TI_CL2000_VERSION_CHECK(6, 0, 0) &&                        \
-	 defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) ||                         \
-	HEDLEY_TI_CL2000_VERSION_CHECK(6, 4, 0) ||                         \
-	(HEDLEY_TI_CL430_VERSION_CHECK(4, 0, 0) &&                         \
-	 defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) ||                         \
-	HEDLEY_TI_CL430_VERSION_CHECK(4, 3, 0) ||                          \
-	(HEDLEY_TI_CL6X_VERSION_CHECK(7, 2, 0) &&                          \
-	 defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) ||                         \
-	HEDLEY_TI_CL6X_VERSION_CHECK(7, 5, 0) ||                           \
-	HEDLEY_TI_CL7X_VERSION_CHECK(1, 2, 0) ||                           \
-	HEDLEY_TI_CLPRU_VERSION_CHECK(2, 1, 0) ||                          \
-	(HEDLEY_SUNPRO_VERSION_CHECK(5, 15, 0) && defined(__cplusplus)) || \
-	HEDLEY_PGI_VERSION_CHECK(17, 10, 0)
-#define HEDLEY_WARN_UNUSED_RESULT __attribute__((__warn_unused_result__))
-#define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) \
-	__attribute__((__warn_unused_result__))
-#elif (HEDLEY_HAS_CPP_ATTRIBUTE(nodiscard) >= 201907L)
-#define HEDLEY_WARN_UNUSED_RESULT \
-	HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]])
-#define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) \
-	HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard(msg)]])
-#elif HEDLEY_HAS_CPP_ATTRIBUTE(nodiscard)
-#define HEDLEY_WARN_UNUSED_RESULT \
-	HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]])
-#define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) \
-	HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]])
-#elif defined(_Check_return_) /* SAL */
-#define HEDLEY_WARN_UNUSED_RESULT _Check_return_
-#define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) _Check_return_
-#else
-#define HEDLEY_WARN_UNUSED_RESULT
-#define HEDLEY_WARN_UNUSED_RESULT_MSG(msg)
-#endif
-
-#if defined(HEDLEY_SENTINEL)
-#undef HEDLEY_SENTINEL
-#endif
-#if HEDLEY_HAS_ATTRIBUTE(sentinel) || HEDLEY_GCC_VERSION_CHECK(4, 0, 0) || \
-	HEDLEY_INTEL_VERSION_CHECK(13, 0, 0) ||                            \
-	HEDLEY_ARM_VERSION_CHECK(5, 4, 0)
-#define HEDLEY_SENTINEL(position) __attribute__((__sentinel__(position)))
-#else
-#define HEDLEY_SENTINEL(position)
-#endif
-
-#if defined(HEDLEY_NO_RETURN)
-#undef HEDLEY_NO_RETURN
-#endif
-#if HEDLEY_IAR_VERSION_CHECK(8, 0, 0)
-#define HEDLEY_NO_RETURN __noreturn
-#elif HEDLEY_INTEL_VERSION_CHECK(13, 0, 0)
-#define HEDLEY_NO_RETURN __attribute__((__noreturn__))
-#elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L
-#define HEDLEY_NO_RETURN _Noreturn
-#elif defined(__cplusplus) && (__cplusplus >= 201103L)
-#define HEDLEY_NO_RETURN \
-	HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[noreturn]])
-#elif HEDLEY_HAS_ATTRIBUTE(noreturn) || HEDLEY_GCC_VERSION_CHECK(3, 2, 0) || \
-	HEDLEY_SUNPRO_VERSION_CHECK(5, 11, 0) ||                             \
-	HEDLEY_ARM_VERSION_CHECK(4, 1, 0) ||                                 \
-	HEDLEY_IBM_VERSION_CHECK(10, 1, 0) ||                                \
-	HEDLEY_TI_VERSION_CHECK(15, 12, 0) ||                                \
-	(HEDLEY_TI_ARMCL_VERSION_CHECK(4, 8, 0) &&                           \
-	 defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) ||                           \
-	HEDLEY_TI_ARMCL_VERSION_CHECK(5, 2, 0) ||                            \
-	(HEDLEY_TI_CL2000_VERSION_CHECK(6, 0, 0) &&                          \
-	 defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) ||                           \
-	HEDLEY_TI_CL2000_VERSION_CHECK(6, 4, 0) ||                           \
-	(HEDLEY_TI_CL430_VERSION_CHECK(4, 0, 0) &&                           \
-	 defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) ||                           \
-	HEDLEY_TI_CL430_VERSION_CHECK(4, 3, 0) ||                            \
-	(HEDLEY_TI_CL6X_VERSION_CHECK(7, 2, 0) &&                            \
-	 defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) ||                           \
-	HEDLEY_TI_CL6X_VERSION_CHECK(7, 5, 0) ||                             \
-	HEDLEY_TI_CL7X_VERSION_CHECK(1, 2, 0) ||                             \
-	HEDLEY_TI_CLPRU_VERSION_CHECK(2, 1, 0)
-#define HEDLEY_NO_RETURN __attribute__((__noreturn__))
-#elif HEDLEY_SUNPRO_VERSION_CHECK(5, 10, 0)
-#define HEDLEY_NO_RETURN _Pragma("does_not_return")
-#elif HEDLEY_MSVC_VERSION_CHECK(13, 10, 0) || \
-	HEDLEY_INTEL_CL_VERSION_CHECK(2021, 1, 0)
-#define HEDLEY_NO_RETURN __declspec(noreturn)
-#elif HEDLEY_TI_CL6X_VERSION_CHECK(6, 0, 0) && defined(__cplusplus)
-#define HEDLEY_NO_RETURN _Pragma("FUNC_NEVER_RETURNS;")
-#elif HEDLEY_COMPCERT_VERSION_CHECK(3, 2, 0)
-#define HEDLEY_NO_RETURN __attribute((noreturn))
-#elif HEDLEY_PELLES_VERSION_CHECK(9, 0, 0)
-#define HEDLEY_NO_RETURN __declspec(noreturn)
-#else
-#define HEDLEY_NO_RETURN
-#endif
-
-#if defined(HEDLEY_NO_ESCAPE)
-#undef HEDLEY_NO_ESCAPE
-#endif
-#if HEDLEY_HAS_ATTRIBUTE(noescape)
-#define HEDLEY_NO_ESCAPE __attribute__((__noescape__))
-#else
-#define HEDLEY_NO_ESCAPE
-#endif
-
-#if defined(HEDLEY_UNREACHABLE)
-#undef HEDLEY_UNREACHABLE
-#endif
-#if defined(HEDLEY_UNREACHABLE_RETURN)
-#undef HEDLEY_UNREACHABLE_RETURN
-#endif
-#if defined(HEDLEY_ASSUME)
-#undef HEDLEY_ASSUME
-#endif
-#if HEDLEY_MSVC_VERSION_CHECK(13, 10, 0) ||     \
-	HEDLEY_INTEL_VERSION_CHECK(13, 0, 0) || \
-	HEDLEY_INTEL_CL_VERSION_CHECK(2021, 1, 0)
-#define HEDLEY_ASSUME(expr) __assume(expr)
-#elif HEDLEY_HAS_BUILTIN(__builtin_assume)
-#define HEDLEY_ASSUME(expr) __builtin_assume(expr)
-#elif HEDLEY_TI_CL2000_VERSION_CHECK(6, 2, 0) || \
-	HEDLEY_TI_CL6X_VERSION_CHECK(4, 0, 0)
-#if defined(__cplusplus)
-#define HEDLEY_ASSUME(expr) std::_nassert(expr)
-#else
-#define HEDLEY_ASSUME(expr) _nassert(expr)
-#endif
-#endif
-#if (HEDLEY_HAS_BUILTIN(__builtin_unreachable) && \
-     (!defined(HEDLEY_ARM_VERSION))) ||           \
-	HEDLEY_GCC_VERSION_CHECK(4, 5, 0) ||      \
-	HEDLEY_PGI_VERSION_CHECK(18, 10, 0) ||    \
-	HEDLEY_INTEL_VERSION_CHECK(13, 0, 0) ||   \
-	HEDLEY_IBM_VERSION_CHECK(13, 1, 5)
-#define HEDLEY_UNREACHABLE() __builtin_unreachable()
-#elif defined(HEDLEY_ASSUME)
-#define HEDLEY_UNREACHABLE() HEDLEY_ASSUME(0)
-#endif
-#if !defined(HEDLEY_ASSUME)
-#if defined(HEDLEY_UNREACHABLE)
-#define HEDLEY_ASSUME(expr) \
-	HEDLEY_STATIC_CAST(void, ((expr) ? 1 : (HEDLEY_UNREACHABLE(), 1)))
-#else
-#define HEDLEY_ASSUME(expr) HEDLEY_STATIC_CAST(void, expr)
-#endif
-#endif
-#if defined(HEDLEY_UNREACHABLE)
-#if HEDLEY_TI_CL2000_VERSION_CHECK(6, 2, 0) || \
-	HEDLEY_TI_CL6X_VERSION_CHECK(4, 0, 0)
-#define HEDLEY_UNREACHABLE_RETURN(value) \
-	return (HEDLEY_STATIC_CAST(void, HEDLEY_ASSUME(0)), (value))
-#else
-#define HEDLEY_UNREACHABLE_RETURN(value) HEDLEY_UNREACHABLE()
-#endif
-#else
-#define HEDLEY_UNREACHABLE_RETURN(value) return (value)
-#endif
-#if !defined(HEDLEY_UNREACHABLE)
-#define HEDLEY_UNREACHABLE() HEDLEY_ASSUME(0)
-#endif
-
-HEDLEY_DIAGNOSTIC_PUSH
-#if HEDLEY_HAS_WARNING("-Wpedantic")
-#pragma clang diagnostic ignored "-Wpedantic"
-#endif
-#if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") && defined(__cplusplus)
-#pragma clang diagnostic ignored "-Wc++98-compat-pedantic"
-#endif
-#if HEDLEY_GCC_HAS_WARNING("-Wvariadic-macros", 4, 0, 0)
-#if defined(__clang__)
-#pragma clang diagnostic ignored "-Wvariadic-macros"
-#elif defined(HEDLEY_GCC_VERSION)
-#pragma GCC diagnostic ignored "-Wvariadic-macros"
-#endif
-#endif
-#if defined(HEDLEY_NON_NULL)
-#undef HEDLEY_NON_NULL
-#endif
-#if HEDLEY_HAS_ATTRIBUTE(nonnull) || HEDLEY_GCC_VERSION_CHECK(3, 3, 0) || \
-	HEDLEY_INTEL_VERSION_CHECK(13, 0, 0) ||                           \
-	HEDLEY_ARM_VERSION_CHECK(4, 1, 0)
-#define HEDLEY_NON_NULL(...) __attribute__((__nonnull__(__VA_ARGS__)))
-#else
-#define HEDLEY_NON_NULL(...)
-#endif
-HEDLEY_DIAGNOSTIC_POP
-
-#if defined(HEDLEY_PRINTF_FORMAT)
-#undef HEDLEY_PRINTF_FORMAT
-#endif
-#if defined(__MINGW32__) && HEDLEY_GCC_HAS_ATTRIBUTE(format, 4, 4, 0) && \
-	!defined(__USE_MINGW_ANSI_STDIO)
-#define HEDLEY_PRINTF_FORMAT(string_idx, first_to_check) \
-	__attribute__((__format__(ms_printf, string_idx, first_to_check)))
-#elif defined(__MINGW32__) && HEDLEY_GCC_HAS_ATTRIBUTE(format, 4, 4, 0) && \
-	defined(__USE_MINGW_ANSI_STDIO)
-#define HEDLEY_PRINTF_FORMAT(string_idx, first_to_check) \
-	__attribute__((__format__(gnu_printf, string_idx, first_to_check)))
-#elif HEDLEY_HAS_ATTRIBUTE(format) || HEDLEY_GCC_VERSION_CHECK(3, 1, 0) || \
-	HEDLEY_INTEL_VERSION_CHECK(13, 0, 0) ||                            \
-	HEDLEY_ARM_VERSION_CHECK(5, 6, 0) ||                               \
-	HEDLEY_IBM_VERSION_CHECK(10, 1, 0) ||                              \
-	HEDLEY_TI_VERSION_CHECK(15, 12, 0) ||                              \
-	(HEDLEY_TI_ARMCL_VERSION_CHECK(4, 8, 0) &&                         \
-	 defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) ||                         \
-	HEDLEY_TI_ARMCL_VERSION_CHECK(5, 2, 0) ||                          \
-	(HEDLEY_TI_CL2000_VERSION_CHECK(6, 0, 0) &&                        \
-	 defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) ||                         \
-	HEDLEY_TI_CL2000_VERSION_CHECK(6, 4, 0) ||                         \
-	(HEDLEY_TI_CL430_VERSION_CHECK(4, 0, 0) &&                         \
-	 defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) ||                         \
-	HEDLEY_TI_CL430_VERSION_CHECK(4, 3, 0) ||                          \
-	(HEDLEY_TI_CL6X_VERSION_CHECK(7, 2, 0) &&                          \
-	 defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) ||                         \
-	HEDLEY_TI_CL6X_VERSION_CHECK(7, 5, 0) ||                           \
-	HEDLEY_TI_CL7X_VERSION_CHECK(1, 2, 0) ||                           \
-	HEDLEY_TI_CLPRU_VERSION_CHECK(2, 1, 0)
-#define HEDLEY_PRINTF_FORMAT(string_idx, first_to_check) \
-	__attribute__((__format__(__printf__, string_idx, first_to_check)))
-#elif HEDLEY_PELLES_VERSION_CHECK(6, 0, 0)
-#define HEDLEY_PRINTF_FORMAT(string_idx, first_to_check) \
-	__declspec(vaformat(printf, string_idx, first_to_check))
-#else
-#define HEDLEY_PRINTF_FORMAT(string_idx, first_to_check)
-#endif
-
-#if defined(HEDLEY_CONSTEXPR)
-#undef HEDLEY_CONSTEXPR
-#endif
-#if defined(__cplusplus)
-#if __cplusplus >= 201103L
-#define HEDLEY_CONSTEXPR HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(constexpr)
-#endif
-#endif
-#if !defined(HEDLEY_CONSTEXPR)
-#define HEDLEY_CONSTEXPR
-#endif
-
-#if defined(HEDLEY_PREDICT)
-#undef HEDLEY_PREDICT
-#endif
-#if defined(HEDLEY_LIKELY)
-#undef HEDLEY_LIKELY
-#endif
-#if defined(HEDLEY_UNLIKELY)
-#undef HEDLEY_UNLIKELY
-#endif
-#if defined(HEDLEY_UNPREDICTABLE)
-#undef HEDLEY_UNPREDICTABLE
-#endif
-#if HEDLEY_HAS_BUILTIN(__builtin_unpredictable)
-#define HEDLEY_UNPREDICTABLE(expr) __builtin_unpredictable((expr))
-#endif
-#if (HEDLEY_HAS_BUILTIN(__builtin_expect_with_probability) && \
-     !defined(HEDLEY_PGI_VERSION)) ||                         \
-	HEDLEY_GCC_VERSION_CHECK(9, 0, 0)
-#define HEDLEY_PREDICT(expr, value, probability) \
-	__builtin_expect_with_probability((expr), (value), (probability))
-#define HEDLEY_PREDICT_TRUE(expr, probability) \
-	__builtin_expect_with_probability(!!(expr), 1, (probability))
-#define HEDLEY_PREDICT_FALSE(expr, probability) \
-	__builtin_expect_with_probability(!!(expr), 0, (probability))
-#define HEDLEY_LIKELY(expr) __builtin_expect(!!(expr), 1)
-#define HEDLEY_UNLIKELY(expr) __builtin_expect(!!(expr), 0)
-#elif (HEDLEY_HAS_BUILTIN(__builtin_expect) &&                             \
-       !defined(HEDLEY_INTEL_CL_VERSION)) ||                               \
-	HEDLEY_GCC_VERSION_CHECK(3, 0, 0) ||                               \
-	HEDLEY_INTEL_VERSION_CHECK(13, 0, 0) ||                            \
-	(HEDLEY_SUNPRO_VERSION_CHECK(5, 15, 0) && defined(__cplusplus)) || \
-	HEDLEY_ARM_VERSION_CHECK(4, 1, 0) ||                               \
-	HEDLEY_IBM_VERSION_CHECK(10, 1, 0) ||                              \
-	HEDLEY_TI_VERSION_CHECK(15, 12, 0) ||                              \
-	HEDLEY_TI_ARMCL_VERSION_CHECK(4, 7, 0) ||                          \
-	HEDLEY_TI_CL430_VERSION_CHECK(3, 1, 0) ||                          \
-	HEDLEY_TI_CL2000_VERSION_CHECK(6, 1, 0) ||                         \
-	HEDLEY_TI_CL6X_VERSION_CHECK(6, 1, 0) ||                           \
-	HEDLEY_TI_CL7X_VERSION_CHECK(1, 2, 0) ||                           \
-	HEDLEY_TI_CLPRU_VERSION_CHECK(2, 1, 0) ||                          \
-	HEDLEY_TINYC_VERSION_CHECK(0, 9, 27) ||                            \
-	HEDLEY_CRAY_VERSION_CHECK(8, 1, 0)
-#define HEDLEY_PREDICT(expr, expected, probability)     \
-	(((probability) >= 0.9)                         \
-		 ? __builtin_expect((expr), (expected)) \
-		 : (HEDLEY_STATIC_CAST(void, expected), (expr)))
-#define HEDLEY_PREDICT_TRUE(expr, probability)                      \
-	(__extension__({                                            \
-		double hedley_probability_ = (probability);         \
-		((hedley_probability_ >= 0.9)                       \
-			 ? __builtin_expect(!!(expr), 1)            \
-			 : ((hedley_probability_ <= 0.1)            \
-				    ? __builtin_expect(!!(expr), 0) \
-				    : !!(expr)));                   \
-	}))
-#define HEDLEY_PREDICT_FALSE(expr, probability)                     \
-	(__extension__({                                            \
-		double hedley_probability_ = (probability);         \
-		((hedley_probability_ >= 0.9)                       \
-			 ? __builtin_expect(!!(expr), 0)            \
-			 : ((hedley_probability_ <= 0.1)            \
-				    ? __builtin_expect(!!(expr), 1) \
-				    : !!(expr)));                   \
-	}))
-#define HEDLEY_LIKELY(expr) __builtin_expect(!!(expr), 1)
-#define HEDLEY_UNLIKELY(expr) __builtin_expect(!!(expr), 0)
-#else
-#define HEDLEY_PREDICT(expr, expected, probability) \
-	(HEDLEY_STATIC_CAST(void, expected), (expr))
-#define HEDLEY_PREDICT_TRUE(expr, probability) (!!(expr))
-#define HEDLEY_PREDICT_FALSE(expr, probability) (!!(expr))
-#define HEDLEY_LIKELY(expr) (!!(expr))
-#define HEDLEY_UNLIKELY(expr) (!!(expr))
-#endif
-#if !defined(HEDLEY_UNPREDICTABLE)
-#define HEDLEY_UNPREDICTABLE(expr) HEDLEY_PREDICT(expr, 1, 0.5)
-#endif
-
-#if defined(HEDLEY_MALLOC)
-#undef HEDLEY_MALLOC
-#endif
-#if HEDLEY_HAS_ATTRIBUTE(malloc) || HEDLEY_GCC_VERSION_CHECK(3, 1, 0) || \
-	HEDLEY_INTEL_VERSION_CHECK(13, 0, 0) ||                          \
-	HEDLEY_SUNPRO_VERSION_CHECK(5, 11, 0) ||                         \
-	HEDLEY_ARM_VERSION_CHECK(4, 1, 0) ||                             \
-	HEDLEY_IBM_VERSION_CHECK(12, 1, 0) ||                            \
-	HEDLEY_TI_VERSION_CHECK(15, 12, 0) ||                            \
-	(HEDLEY_TI_ARMCL_VERSION_CHECK(4, 8, 0) &&                       \
-	 defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) ||                       \
-	HEDLEY_TI_ARMCL_VERSION_CHECK(5, 2, 0) ||                        \
-	(HEDLEY_TI_CL2000_VERSION_CHECK(6, 0, 0) &&                      \
-	 defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) ||                       \
-	HEDLEY_TI_CL2000_VERSION_CHECK(6, 4, 0) ||                       \
-	(HEDLEY_TI_CL430_VERSION_CHECK(4, 0, 0) &&                       \
-	 defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) ||                       \
-	HEDLEY_TI_CL430_VERSION_CHECK(4, 3, 0) ||                        \
-	(HEDLEY_TI_CL6X_VERSION_CHECK(7, 2, 0) &&                        \
-	 defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) ||                       \
-	HEDLEY_TI_CL6X_VERSION_CHECK(7, 5, 0) ||                         \
-	HEDLEY_TI_CL7X_VERSION_CHECK(1, 2, 0) ||                         \
-	HEDLEY_TI_CLPRU_VERSION_CHECK(2, 1, 0)
-#define HEDLEY_MALLOC __attribute__((__malloc__))
-#elif HEDLEY_SUNPRO_VERSION_CHECK(5, 10, 0)
-#define HEDLEY_MALLOC _Pragma("returns_new_memory")
-#elif HEDLEY_MSVC_VERSION_CHECK(14, 0, 0) || \
-	HEDLEY_INTEL_CL_VERSION_CHECK(2021, 1, 0)
-#define HEDLEY_MALLOC __declspec(restrict)
-#else
-#define HEDLEY_MALLOC
-#endif
-
-#if defined(HEDLEY_PURE)
-#undef HEDLEY_PURE
-#endif
-#if HEDLEY_HAS_ATTRIBUTE(pure) || HEDLEY_GCC_VERSION_CHECK(2, 96, 0) || \
-	HEDLEY_INTEL_VERSION_CHECK(13, 0, 0) ||                         \
-	HEDLEY_SUNPRO_VERSION_CHECK(5, 11, 0) ||                        \
-	HEDLEY_ARM_VERSION_CHECK(4, 1, 0) ||                            \
-	HEDLEY_IBM_VERSION_CHECK(10, 1, 0) ||                           \
-	HEDLEY_TI_VERSION_CHECK(15, 12, 0) ||                           \
-	(HEDLEY_TI_ARMCL_VERSION_CHECK(4, 8, 0) &&                      \
-	 defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) ||                      \
-	HEDLEY_TI_ARMCL_VERSION_CHECK(5, 2, 0) ||                       \
-	(HEDLEY_TI_CL2000_VERSION_CHECK(6, 0, 0) &&                     \
-	 defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) ||                      \
-	HEDLEY_TI_CL2000_VERSION_CHECK(6, 4, 0) ||                      \
-	(HEDLEY_TI_CL430_VERSION_CHECK(4, 0, 0) &&                      \
-	 defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) ||                      \
-	HEDLEY_TI_CL430_VERSION_CHECK(4, 3, 0) ||                       \
-	(HEDLEY_TI_CL6X_VERSION_CHECK(7, 2, 0) &&                       \
-	 defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) ||                      \
-	HEDLEY_TI_CL6X_VERSION_CHECK(7, 5, 0) ||                        \
-	HEDLEY_TI_CL7X_VERSION_CHECK(1, 2, 0) ||                        \
-	HEDLEY_TI_CLPRU_VERSION_CHECK(2, 1, 0) ||                       \
-	HEDLEY_PGI_VERSION_CHECK(17, 10, 0)
-#define HEDLEY_PURE __attribute__((__pure__))
-#elif HEDLEY_SUNPRO_VERSION_CHECK(5, 10, 0)
-#define HEDLEY_PURE _Pragma("does_not_write_global_data")
-#elif defined(__cplusplus) && (HEDLEY_TI_CL430_VERSION_CHECK(2, 0, 1) || \
-			       HEDLEY_TI_CL6X_VERSION_CHECK(4, 0, 0) ||  \
-			       HEDLEY_TI_CL7X_VERSION_CHECK(1, 2, 0))
-#define HEDLEY_PURE _Pragma("FUNC_IS_PURE;")
-#else
-#define HEDLEY_PURE
-#endif
-
-#if defined(HEDLEY_CONST)
-#undef HEDLEY_CONST
-#endif
-#if HEDLEY_HAS_ATTRIBUTE(const) || HEDLEY_GCC_VERSION_CHECK(2, 5, 0) || \
-	HEDLEY_INTEL_VERSION_CHECK(13, 0, 0) ||                         \
-	HEDLEY_SUNPRO_VERSION_CHECK(5, 11, 0) ||                        \
-	HEDLEY_ARM_VERSION_CHECK(4, 1, 0) ||                            \
-	HEDLEY_IBM_VERSION_CHECK(10, 1, 0) ||                           \
-	HEDLEY_TI_VERSION_CHECK(15, 12, 0) ||                           \
-	(HEDLEY_TI_ARMCL_VERSION_CHECK(4, 8, 0) &&                      \
-	 defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) ||                      \
-	HEDLEY_TI_ARMCL_VERSION_CHECK(5, 2, 0) ||                       \
-	(HEDLEY_TI_CL2000_VERSION_CHECK(6, 0, 0) &&                     \
-	 defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) ||                      \
-	HEDLEY_TI_CL2000_VERSION_CHECK(6, 4, 0) ||                      \
-	(HEDLEY_TI_CL430_VERSION_CHECK(4, 0, 0) &&                      \
-	 defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) ||                      \
-	HEDLEY_TI_CL430_VERSION_CHECK(4, 3, 0) ||                       \
-	(HEDLEY_TI_CL6X_VERSION_CHECK(7, 2, 0) &&                       \
-	 defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) ||                      \
-	HEDLEY_TI_CL6X_VERSION_CHECK(7, 5, 0) ||                        \
-	HEDLEY_TI_CL7X_VERSION_CHECK(1, 2, 0) ||                        \
-	HEDLEY_TI_CLPRU_VERSION_CHECK(2, 1, 0) ||                       \
-	HEDLEY_PGI_VERSION_CHECK(17, 10, 0)
-#define HEDLEY_CONST __attribute__((__const__))
-#elif HEDLEY_SUNPRO_VERSION_CHECK(5, 10, 0)
-#define HEDLEY_CONST _Pragma("no_side_effect")
-#else
-#define HEDLEY_CONST HEDLEY_PURE
-#endif
-
-#if defined(HEDLEY_RESTRICT)
-#undef HEDLEY_RESTRICT
-#endif
-#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \
-	!defined(__cplusplus)
-#define HEDLEY_RESTRICT restrict
-#elif HEDLEY_GCC_VERSION_CHECK(3, 1, 0) ||                                 \
-	HEDLEY_MSVC_VERSION_CHECK(14, 0, 0) ||                             \
-	HEDLEY_INTEL_VERSION_CHECK(13, 0, 0) ||                            \
-	HEDLEY_INTEL_CL_VERSION_CHECK(2021, 1, 0) ||                       \
-	HEDLEY_ARM_VERSION_CHECK(4, 1, 0) ||                               \
-	HEDLEY_IBM_VERSION_CHECK(10, 1, 0) ||                              \
-	HEDLEY_PGI_VERSION_CHECK(17, 10, 0) ||                             \
-	HEDLEY_TI_CL430_VERSION_CHECK(4, 3, 0) ||                          \
-	HEDLEY_TI_CL2000_VERSION_CHECK(6, 2, 4) ||                         \
-	HEDLEY_TI_CL6X_VERSION_CHECK(8, 1, 0) ||                           \
-	HEDLEY_TI_CL7X_VERSION_CHECK(1, 2, 0) ||                           \
-	(HEDLEY_SUNPRO_VERSION_CHECK(5, 14, 0) && defined(__cplusplus)) || \
-	HEDLEY_IAR_VERSION_CHECK(8, 0, 0) || defined(__clang__)
-#define HEDLEY_RESTRICT __restrict
-#elif HEDLEY_SUNPRO_VERSION_CHECK(5, 3, 0) && !defined(__cplusplus)
-#define HEDLEY_RESTRICT _Restrict
-#else
-#define HEDLEY_RESTRICT
-#endif
-
-#if defined(HEDLEY_INLINE)
-#undef HEDLEY_INLINE
-#endif
-#if (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) || \
-	(defined(__cplusplus) && (__cplusplus >= 199711L))
-#define HEDLEY_INLINE inline
-#elif defined(HEDLEY_GCC_VERSION) || HEDLEY_ARM_VERSION_CHECK(6, 2, 0)
-#define HEDLEY_INLINE __inline__
-#elif HEDLEY_MSVC_VERSION_CHECK(12, 0, 0) ||         \
-	HEDLEY_INTEL_CL_VERSION_CHECK(2021, 1, 0) || \
-	HEDLEY_ARM_VERSION_CHECK(4, 1, 0) ||         \
-	HEDLEY_TI_ARMCL_VERSION_CHECK(5, 1, 0) ||    \
-	HEDLEY_TI_CL430_VERSION_CHECK(3, 1, 0) ||    \
-	HEDLEY_TI_CL2000_VERSION_CHECK(6, 2, 0) ||   \
-	HEDLEY_TI_CL6X_VERSION_CHECK(8, 0, 0) ||     \
-	HEDLEY_TI_CL7X_VERSION_CHECK(1, 2, 0) ||     \
-	HEDLEY_TI_CLPRU_VERSION_CHECK(2, 1, 0)
-#define HEDLEY_INLINE __inline
-#else
-#define HEDLEY_INLINE
-#endif
-
-#if defined(HEDLEY_ALWAYS_INLINE)
-#undef HEDLEY_ALWAYS_INLINE
-#endif
-#if HEDLEY_HAS_ATTRIBUTE(always_inline) ||          \
-	HEDLEY_GCC_VERSION_CHECK(4, 0, 0) ||        \
-	HEDLEY_INTEL_VERSION_CHECK(13, 0, 0) ||     \
-	HEDLEY_SUNPRO_VERSION_CHECK(5, 11, 0) ||    \
-	HEDLEY_ARM_VERSION_CHECK(4, 1, 0) ||        \
-	HEDLEY_IBM_VERSION_CHECK(10, 1, 0) ||       \
-	HEDLEY_TI_VERSION_CHECK(15, 12, 0) ||       \
-	(HEDLEY_TI_ARMCL_VERSION_CHECK(4, 8, 0) &&  \
-	 defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) ||  \
-	HEDLEY_TI_ARMCL_VERSION_CHECK(5, 2, 0) ||   \
-	(HEDLEY_TI_CL2000_VERSION_CHECK(6, 0, 0) && \
-	 defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) ||  \
-	HEDLEY_TI_CL2000_VERSION_CHECK(6, 4, 0) ||  \
-	(HEDLEY_TI_CL430_VERSION_CHECK(4, 0, 0) &&  \
-	 defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) ||  \
-	HEDLEY_TI_CL430_VERSION_CHECK(4, 3, 0) ||   \
-	(HEDLEY_TI_CL6X_VERSION_CHECK(7, 2, 0) &&   \
-	 defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) ||  \
-	HEDLEY_TI_CL6X_VERSION_CHECK(7, 5, 0) ||    \
-	HEDLEY_TI_CL7X_VERSION_CHECK(1, 2, 0) ||    \
-	HEDLEY_TI_CLPRU_VERSION_CHECK(2, 1, 0)
-#define HEDLEY_ALWAYS_INLINE __attribute__((__always_inline__)) HEDLEY_INLINE
-#elif HEDLEY_MSVC_VERSION_CHECK(12, 0, 0) || \
-	HEDLEY_INTEL_CL_VERSION_CHECK(2021, 1, 0)
-#define HEDLEY_ALWAYS_INLINE __forceinline
-#elif defined(__cplusplus) && (HEDLEY_TI_ARMCL_VERSION_CHECK(5, 2, 0) ||  \
-			       HEDLEY_TI_CL430_VERSION_CHECK(4, 3, 0) ||  \
-			       HEDLEY_TI_CL2000_VERSION_CHECK(6, 4, 0) || \
-			       HEDLEY_TI_CL6X_VERSION_CHECK(6, 1, 0) ||   \
-			       HEDLEY_TI_CL7X_VERSION_CHECK(1, 2, 0) ||   \
-			       HEDLEY_TI_CLPRU_VERSION_CHECK(2, 1, 0))
-#define HEDLEY_ALWAYS_INLINE _Pragma("FUNC_ALWAYS_INLINE;")
-#elif HEDLEY_IAR_VERSION_CHECK(8, 0, 0)
-#define HEDLEY_ALWAYS_INLINE _Pragma("inline=forced")
-#else
-#define HEDLEY_ALWAYS_INLINE HEDLEY_INLINE
-#endif
-
-#if defined(HEDLEY_NEVER_INLINE)
-#undef HEDLEY_NEVER_INLINE
-#endif
-#if HEDLEY_HAS_ATTRIBUTE(noinline) || HEDLEY_GCC_VERSION_CHECK(4, 0, 0) || \
-	HEDLEY_INTEL_VERSION_CHECK(13, 0, 0) ||                            \
-	HEDLEY_SUNPRO_VERSION_CHECK(5, 11, 0) ||                           \
-	HEDLEY_ARM_VERSION_CHECK(4, 1, 0) ||                               \
-	HEDLEY_IBM_VERSION_CHECK(10, 1, 0) ||                              \
-	HEDLEY_TI_VERSION_CHECK(15, 12, 0) ||                              \
-	(HEDLEY_TI_ARMCL_VERSION_CHECK(4, 8, 0) &&                         \
-	 defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) ||                         \
-	HEDLEY_TI_ARMCL_VERSION_CHECK(5, 2, 0) ||                          \
-	(HEDLEY_TI_CL2000_VERSION_CHECK(6, 0, 0) &&                        \
-	 defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) ||                         \
-	HEDLEY_TI_CL2000_VERSION_CHECK(6, 4, 0) ||                         \
-	(HEDLEY_TI_CL430_VERSION_CHECK(4, 0, 0) &&                         \
-	 defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) ||                         \
-	HEDLEY_TI_CL430_VERSION_CHECK(4, 3, 0) ||                          \
-	(HEDLEY_TI_CL6X_VERSION_CHECK(7, 2, 0) &&                          \
-	 defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) ||                         \
-	HEDLEY_TI_CL6X_VERSION_CHECK(7, 5, 0) ||                           \
-	HEDLEY_TI_CL7X_VERSION_CHECK(1, 2, 0) ||                           \
-	HEDLEY_TI_CLPRU_VERSION_CHECK(2, 1, 0)
-#define HEDLEY_NEVER_INLINE __attribute__((__noinline__))
-#elif HEDLEY_MSVC_VERSION_CHECK(13, 10, 0) || \
-	HEDLEY_INTEL_CL_VERSION_CHECK(2021, 1, 0)
-#define HEDLEY_NEVER_INLINE __declspec(noinline)
-#elif HEDLEY_PGI_VERSION_CHECK(10, 2, 0)
-#define HEDLEY_NEVER_INLINE _Pragma("noinline")
-#elif HEDLEY_TI_CL6X_VERSION_CHECK(6, 0, 0) && defined(__cplusplus)
-#define HEDLEY_NEVER_INLINE _Pragma("FUNC_CANNOT_INLINE;")
-#elif HEDLEY_IAR_VERSION_CHECK(8, 0, 0)
-#define HEDLEY_NEVER_INLINE _Pragma("inline=never")
-#elif HEDLEY_COMPCERT_VERSION_CHECK(3, 2, 0)
-#define HEDLEY_NEVER_INLINE __attribute((noinline))
-#elif HEDLEY_PELLES_VERSION_CHECK(9, 0, 0)
-#define HEDLEY_NEVER_INLINE __declspec(noinline)
-#else
-#define HEDLEY_NEVER_INLINE
-#endif
-
-#if defined(HEDLEY_PRIVATE)
-#undef HEDLEY_PRIVATE
-#endif
-#if defined(HEDLEY_PUBLIC)
-#undef HEDLEY_PUBLIC
-#endif
-#if defined(HEDLEY_IMPORT)
-#undef HEDLEY_IMPORT
-#endif
-#if defined(_WIN32) || defined(__CYGWIN__)
-#define HEDLEY_PRIVATE
-#define HEDLEY_PUBLIC __declspec(dllexport)
-#define HEDLEY_IMPORT __declspec(dllimport)
-#else
-#if HEDLEY_HAS_ATTRIBUTE(visibility) || HEDLEY_GCC_VERSION_CHECK(3, 3, 0) || \
-	HEDLEY_SUNPRO_VERSION_CHECK(5, 11, 0) ||                             \
-	HEDLEY_INTEL_VERSION_CHECK(13, 0, 0) ||                              \
-	HEDLEY_ARM_VERSION_CHECK(4, 1, 0) ||                                 \
-	HEDLEY_IBM_VERSION_CHECK(13, 1, 0) ||                                \
-	(defined(__TI_EABI__) && ((HEDLEY_TI_CL6X_VERSION_CHECK(7, 2, 0) &&  \
-				   defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \
-				  HEDLEY_TI_CL6X_VERSION_CHECK(7, 5, 0)))
-#define HEDLEY_PRIVATE __attribute__((__visibility__("hidden")))
-#define HEDLEY_PUBLIC __attribute__((__visibility__("default")))
-#else
-#define HEDLEY_PRIVATE
-#define HEDLEY_PUBLIC
-#endif
-#define HEDLEY_IMPORT extern
-#endif
-
-#if defined(HEDLEY_NO_THROW)
-#undef HEDLEY_NO_THROW
-#endif
-#if HEDLEY_HAS_ATTRIBUTE(nothrow) || HEDLEY_GCC_VERSION_CHECK(3, 3, 0) || \
-	HEDLEY_INTEL_VERSION_CHECK(13, 0, 0)
-#define HEDLEY_NO_THROW __attribute__((__nothrow__))
-#elif HEDLEY_MSVC_VERSION_CHECK(13, 1, 0) ||         \
-	HEDLEY_INTEL_CL_VERSION_CHECK(2021, 1, 0) || \
-	HEDLEY_ARM_VERSION_CHECK(4, 1, 0)
-#define HEDLEY_NO_THROW __declspec(nothrow)
-#else
-#define HEDLEY_NO_THROW
-#endif
-
-#if defined(HEDLEY_FALL_THROUGH)
-#undef HEDLEY_FALL_THROUGH
-#endif
-#if HEDLEY_HAS_ATTRIBUTE(fallthrough) || HEDLEY_GCC_VERSION_CHECK(7, 0, 0)
-#define HEDLEY_FALL_THROUGH __attribute__((__fallthrough__))
-#elif HEDLEY_HAS_CPP_ATTRIBUTE_NS(clang, fallthrough)
-#define HEDLEY_FALL_THROUGH \
-	HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[clang::fallthrough]])
-#elif HEDLEY_HAS_CPP_ATTRIBUTE(fallthrough)
-#define HEDLEY_FALL_THROUGH \
-	HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[fallthrough]])
-#elif defined(__fallthrough) /* SAL */
-#define HEDLEY_FALL_THROUGH __fallthrough
-#else
-#define HEDLEY_FALL_THROUGH
-#endif
-
-#if defined(HEDLEY_RETURNS_NON_NULL)
-#undef HEDLEY_RETURNS_NON_NULL
-#endif
-#if HEDLEY_HAS_ATTRIBUTE(returns_nonnull) || HEDLEY_GCC_VERSION_CHECK(4, 9, 0)
-#define HEDLEY_RETURNS_NON_NULL __attribute__((__returns_nonnull__))
-#elif defined(_Ret_notnull_) /* SAL */
-#define HEDLEY_RETURNS_NON_NULL _Ret_notnull_
-#else
-#define HEDLEY_RETURNS_NON_NULL
-#endif
-
-#if defined(HEDLEY_ARRAY_PARAM)
-#undef HEDLEY_ARRAY_PARAM
-#endif
-#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \
-	!defined(__STDC_NO_VLA__) && !defined(__cplusplus) &&     \
-	!defined(HEDLEY_PGI_VERSION) && !defined(HEDLEY_TINYC_VERSION)
-#define HEDLEY_ARRAY_PARAM(name) (name)
-#else
-#define HEDLEY_ARRAY_PARAM(name)
-#endif
-
-#if defined(HEDLEY_IS_CONSTANT)
-#undef HEDLEY_IS_CONSTANT
-#endif
-#if defined(HEDLEY_REQUIRE_CONSTEXPR)
-#undef HEDLEY_REQUIRE_CONSTEXPR
-#endif
-/* HEDLEY_IS_CONSTEXPR_ is for
-   HEDLEY INTERNAL USE ONLY.  API subject to change without notice. */
-#if defined(HEDLEY_IS_CONSTEXPR_)
-#undef HEDLEY_IS_CONSTEXPR_
-#endif
-#if HEDLEY_HAS_BUILTIN(__builtin_constant_p) ||                             \
-	HEDLEY_GCC_VERSION_CHECK(3, 4, 0) ||                                \
-	HEDLEY_INTEL_VERSION_CHECK(13, 0, 0) ||                             \
-	HEDLEY_TINYC_VERSION_CHECK(0, 9, 19) ||                             \
-	HEDLEY_ARM_VERSION_CHECK(4, 1, 0) ||                                \
-	HEDLEY_IBM_VERSION_CHECK(13, 1, 0) ||                               \
-	HEDLEY_TI_CL6X_VERSION_CHECK(6, 1, 0) ||                            \
-	(HEDLEY_SUNPRO_VERSION_CHECK(5, 10, 0) && !defined(__cplusplus)) || \
-	HEDLEY_CRAY_VERSION_CHECK(8, 1, 0)
-#define HEDLEY_IS_CONSTANT(expr) __builtin_constant_p(expr)
-#endif
-#if !defined(__cplusplus)
-#if HEDLEY_HAS_BUILTIN(__builtin_types_compatible_p) || \
-	HEDLEY_GCC_VERSION_CHECK(3, 4, 0) ||            \
-	HEDLEY_INTEL_VERSION_CHECK(13, 0, 0) ||         \
-	HEDLEY_IBM_VERSION_CHECK(13, 1, 0) ||           \
-	HEDLEY_CRAY_VERSION_CHECK(8, 1, 0) ||           \
-	HEDLEY_ARM_VERSION_CHECK(5, 4, 0) ||            \
-	HEDLEY_TINYC_VERSION_CHECK(0, 9, 24)
-#if defined(__INTPTR_TYPE__)
-#define HEDLEY_IS_CONSTEXPR_(expr)                                    \
-	__builtin_types_compatible_p(                                 \
-		__typeof__((1 ? (void *)((__INTPTR_TYPE__)((expr)*0)) \
-			      : (int *)0)),                           \
-		int *)
-#else
-#include <stdint.h>
-#define HEDLEY_IS_CONSTEXPR_(expr)                                           \
-	__builtin_types_compatible_p(                                        \
-		__typeof__((1 ? (void *)((intptr_t)((expr)*0)) : (int *)0)), \
-		int *)
-#endif
-#elif (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) &&      \
-       !defined(HEDLEY_SUNPRO_VERSION) && !defined(HEDLEY_PGI_VERSION) && \
-       !defined(HEDLEY_IAR_VERSION)) ||                                   \
-	HEDLEY_HAS_EXTENSION(c_generic_selections) ||                     \
-	HEDLEY_GCC_VERSION_CHECK(4, 9, 0) ||                              \
-	HEDLEY_INTEL_VERSION_CHECK(17, 0, 0) ||                           \
-	HEDLEY_IBM_VERSION_CHECK(12, 1, 0) ||                             \
-	HEDLEY_ARM_VERSION_CHECK(5, 3, 0)
-#if defined(__INTPTR_TYPE__)
-#define HEDLEY_IS_CONSTEXPR_(expr)                                       \
-	_Generic((1 ? (void *)((__INTPTR_TYPE__)((expr)*0)) : (int *)0), \
-		 int * : 1, void * : 0)
-#else
-#include <stdint.h>
-#define HEDLEY_IS_CONSTEXPR_(expr) \
-	_Generic((1 ? (void *)((intptr_t)*0) : (int *)0), int * : 1, void * : 0)
-#endif
-#elif defined(HEDLEY_GCC_VERSION) || defined(HEDLEY_INTEL_VERSION) ||         \
-	defined(HEDLEY_TINYC_VERSION) || defined(HEDLEY_TI_ARMCL_VERSION) ||  \
-	HEDLEY_TI_CL430_VERSION_CHECK(18, 12, 0) ||                           \
-	defined(HEDLEY_TI_CL2000_VERSION) ||                                  \
-	defined(HEDLEY_TI_CL6X_VERSION) || defined(HEDLEY_TI_CL7X_VERSION) || \
-	defined(HEDLEY_TI_CLPRU_VERSION) || defined(__clang__)
-#define HEDLEY_IS_CONSTEXPR_(expr)                                       \
-	(sizeof(void) != sizeof(*(1 ? ((void *)((expr)*0L)) : ((struct { \
-		 char v[sizeof(void) * 2];                               \
-	 } *)1))))
-#endif
-#endif
-#if defined(HEDLEY_IS_CONSTEXPR_)
-#if !defined(HEDLEY_IS_CONSTANT)
-#define HEDLEY_IS_CONSTANT(expr) HEDLEY_IS_CONSTEXPR_(expr)
-#endif
-#define HEDLEY_REQUIRE_CONSTEXPR(expr) \
-	(HEDLEY_IS_CONSTEXPR_(expr) ? (expr) : (-1))
-#else
-#if !defined(HEDLEY_IS_CONSTANT)
-#define HEDLEY_IS_CONSTANT(expr) (0)
-#endif
-#define HEDLEY_REQUIRE_CONSTEXPR(expr) (expr)
-#endif
-
-#if defined(HEDLEY_BEGIN_C_DECLS)
-#undef HEDLEY_BEGIN_C_DECLS
-#endif
-#if defined(HEDLEY_END_C_DECLS)
-#undef HEDLEY_END_C_DECLS
-#endif
-#if defined(HEDLEY_C_DECL)
-#undef HEDLEY_C_DECL
-#endif
-#if defined(__cplusplus)
-#define HEDLEY_BEGIN_C_DECLS extern "C" {
-#define HEDLEY_END_C_DECLS }
-#define HEDLEY_C_DECL extern "C"
-#else
-#define HEDLEY_BEGIN_C_DECLS
-#define HEDLEY_END_C_DECLS
-#define HEDLEY_C_DECL
-#endif
-
-#if defined(HEDLEY_STATIC_ASSERT)
-#undef HEDLEY_STATIC_ASSERT
-#endif
-#if !defined(__cplusplus) &&                                             \
-	((defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \
-	 (HEDLEY_HAS_FEATURE(c_static_assert) &&                         \
-	  !defined(HEDLEY_INTEL_CL_VERSION)) ||                          \
-	 HEDLEY_GCC_VERSION_CHECK(6, 0, 0) ||                            \
-	 HEDLEY_INTEL_VERSION_CHECK(13, 0, 0) || defined(_Static_assert))
-#define HEDLEY_STATIC_ASSERT(expr, message) _Static_assert(expr, message)
-#elif (defined(__cplusplus) && (__cplusplus >= 201103L)) || \
-	HEDLEY_MSVC_VERSION_CHECK(16, 0, 0) ||              \
-	HEDLEY_INTEL_CL_VERSION_CHECK(2021, 1, 0)
-#define HEDLEY_STATIC_ASSERT(expr, message)           \
-	HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_( \
-		static_assert(expr, message))
-#else
-#define HEDLEY_STATIC_ASSERT(expr, message)
-#endif
-
-#if defined(HEDLEY_NULL)
-#undef HEDLEY_NULL
-#endif
-#if defined(__cplusplus)
-#if __cplusplus >= 201103L
-#define HEDLEY_NULL HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(nullptr)
-#elif defined(NULL)
-#define HEDLEY_NULL NULL
-#else
-#define HEDLEY_NULL HEDLEY_STATIC_CAST(void *, 0)
-#endif
-#elif defined(NULL)
-#define HEDLEY_NULL NULL
-#else
-#define HEDLEY_NULL ((void *)0)
-#endif
-
-#if defined(HEDLEY_MESSAGE)
-#undef HEDLEY_MESSAGE
-#endif
-#if HEDLEY_HAS_WARNING("-Wunknown-pragmas")
-#define HEDLEY_MESSAGE(msg)                       \
-	HEDLEY_DIAGNOSTIC_PUSH                    \
-	HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS \
-	HEDLEY_PRAGMA(message msg)                \
-	HEDLEY_DIAGNOSTIC_POP
-#elif HEDLEY_GCC_VERSION_CHECK(4, 4, 0) || HEDLEY_INTEL_VERSION_CHECK(13, 0, 0)
-#define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(message msg)
-#elif HEDLEY_CRAY_VERSION_CHECK(5, 0, 0)
-#define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(_CRI message msg)
-#elif HEDLEY_IAR_VERSION_CHECK(8, 0, 0)
-#define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(message(msg))
-#elif HEDLEY_PELLES_VERSION_CHECK(2, 0, 0)
-#define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(message(msg))
-#else
-#define HEDLEY_MESSAGE(msg)
-#endif
-
-#if defined(HEDLEY_WARNING)
-#undef HEDLEY_WARNING
-#endif
-#if HEDLEY_HAS_WARNING("-Wunknown-pragmas")
-#define HEDLEY_WARNING(msg)                       \
-	HEDLEY_DIAGNOSTIC_PUSH                    \
-	HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS \
-	HEDLEY_PRAGMA(clang warning msg)          \
-	HEDLEY_DIAGNOSTIC_POP
-#elif HEDLEY_GCC_VERSION_CHECK(4, 8, 0) ||    \
-	HEDLEY_PGI_VERSION_CHECK(18, 4, 0) || \
-	HEDLEY_INTEL_VERSION_CHECK(13, 0, 0)
-#define HEDLEY_WARNING(msg) HEDLEY_PRAGMA(GCC warning msg)
-#elif HEDLEY_MSVC_VERSION_CHECK(15, 0, 0) || \
-	HEDLEY_INTEL_CL_VERSION_CHECK(2021, 1, 0)
-#define HEDLEY_WARNING(msg) HEDLEY_PRAGMA(message(msg))
-#else
-#define HEDLEY_WARNING(msg) HEDLEY_MESSAGE(msg)
-#endif
-
-#if defined(HEDLEY_REQUIRE)
-#undef HEDLEY_REQUIRE
-#endif
-#if defined(HEDLEY_REQUIRE_MSG)
-#undef HEDLEY_REQUIRE_MSG
-#endif
-#if HEDLEY_HAS_ATTRIBUTE(diagnose_if)
-#if HEDLEY_HAS_WARNING("-Wgcc-compat")
-#define HEDLEY_REQUIRE(expr)                                          \
-	HEDLEY_DIAGNOSTIC_PUSH                                        \
-	_Pragma("clang diagnostic ignored \"-Wgcc-compat\"")          \
-		__attribute__((diagnose_if(!(expr), #expr, "error"))) \
-		HEDLEY_DIAGNOSTIC_POP
-#define HEDLEY_REQUIRE_MSG(expr, msg)                               \
-	HEDLEY_DIAGNOSTIC_PUSH                                      \
-	_Pragma("clang diagnostic ignored \"-Wgcc-compat\"")        \
-		__attribute__((diagnose_if(!(expr), msg, "error"))) \
-		HEDLEY_DIAGNOSTIC_POP
-#else
-#define HEDLEY_REQUIRE(expr) \
-	__attribute__((diagnose_if(!(expr), #expr, "error")))
-#define HEDLEY_REQUIRE_MSG(expr, msg) \
-	__attribute__((diagnose_if(!(expr), msg, "error")))
-#endif
-#else
-#define HEDLEY_REQUIRE(expr)
-#define HEDLEY_REQUIRE_MSG(expr, msg)
-#endif
-
-#if defined(HEDLEY_FLAGS)
-#undef HEDLEY_FLAGS
-#endif
-#if HEDLEY_HAS_ATTRIBUTE(flag_enum)
-#define HEDLEY_FLAGS __attribute__((__flag_enum__))
-#else
-#define HEDLEY_FLAGS
-#endif
-
-#if defined(HEDLEY_FLAGS_CAST)
-#undef HEDLEY_FLAGS_CAST
-#endif
-#if HEDLEY_INTEL_VERSION_CHECK(19, 0, 0)
-#define HEDLEY_FLAGS_CAST(T, expr)                          \
-	(__extension__({                                    \
-		HEDLEY_DIAGNOSTIC_PUSH                      \
-		_Pragma("warning(disable:188)")((T)(expr)); \
-		HEDLEY_DIAGNOSTIC_POP                       \
-	}))
-#else
-#define HEDLEY_FLAGS_CAST(T, expr) HEDLEY_STATIC_CAST(T, expr)
-#endif
-
-#if defined(HEDLEY_EMPTY_BASES)
-#undef HEDLEY_EMPTY_BASES
-#endif
-#if (HEDLEY_MSVC_VERSION_CHECK(19, 0, 23918) && \
-     !HEDLEY_MSVC_VERSION_CHECK(20, 0, 0)) ||   \
-	HEDLEY_INTEL_CL_VERSION_CHECK(2021, 1, 0)
-#define HEDLEY_EMPTY_BASES __declspec(empty_bases)
-#else
-#define HEDLEY_EMPTY_BASES
-#endif
-
-/* Remaining macros are deprecated. */
-
-#if defined(HEDLEY_GCC_NOT_CLANG_VERSION_CHECK)
-#undef HEDLEY_GCC_NOT_CLANG_VERSION_CHECK
-#endif
-#if defined(__clang__)
-#define HEDLEY_GCC_NOT_CLANG_VERSION_CHECK(major, minor, patch) (0)
-#else
-#define HEDLEY_GCC_NOT_CLANG_VERSION_CHECK(major, minor, patch) \
-	HEDLEY_GCC_VERSION_CHECK(major, minor, patch)
-#endif
-
-#if defined(HEDLEY_CLANG_HAS_ATTRIBUTE)
-#undef HEDLEY_CLANG_HAS_ATTRIBUTE
-#endif
-#define HEDLEY_CLANG_HAS_ATTRIBUTE(attribute) HEDLEY_HAS_ATTRIBUTE(attribute)
-
-#if defined(HEDLEY_CLANG_HAS_CPP_ATTRIBUTE)
-#undef HEDLEY_CLANG_HAS_CPP_ATTRIBUTE
-#endif
-#define HEDLEY_CLANG_HAS_CPP_ATTRIBUTE(attribute) \
-	HEDLEY_HAS_CPP_ATTRIBUTE(attribute)
-
-#if defined(HEDLEY_CLANG_HAS_BUILTIN)
-#undef HEDLEY_CLANG_HAS_BUILTIN
-#endif
-#define HEDLEY_CLANG_HAS_BUILTIN(builtin) HEDLEY_HAS_BUILTIN(builtin)
-
-#if defined(HEDLEY_CLANG_HAS_FEATURE)
-#undef HEDLEY_CLANG_HAS_FEATURE
-#endif
-#define HEDLEY_CLANG_HAS_FEATURE(feature) HEDLEY_HAS_FEATURE(feature)
-
-#if defined(HEDLEY_CLANG_HAS_EXTENSION)
-#undef HEDLEY_CLANG_HAS_EXTENSION
-#endif
-#define HEDLEY_CLANG_HAS_EXTENSION(extension) HEDLEY_HAS_EXTENSION(extension)
-
-#if defined(HEDLEY_CLANG_HAS_DECLSPEC_DECLSPEC_ATTRIBUTE)
-#undef HEDLEY_CLANG_HAS_DECLSPEC_DECLSPEC_ATTRIBUTE
-#endif
-#define HEDLEY_CLANG_HAS_DECLSPEC_ATTRIBUTE(attribute) \
-	HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute)
-
-#if defined(HEDLEY_CLANG_HAS_WARNING)
-#undef HEDLEY_CLANG_HAS_WARNING
-#endif
-#define HEDLEY_CLANG_HAS_WARNING(warning) HEDLEY_HAS_WARNING(warning)
-
-#endif /* !defined(HEDLEY_VERSION) || (HEDLEY_VERSION < X) */

+ 0 - 481
libobs/util/simde/simde-align.h

@@ -1,481 +0,0 @@
-/* Alignment
- * Created by Evan Nemerson <[email protected]>
- *
- *   To the extent possible under law, the authors have waived all
- *   copyright and related or neighboring rights to this code.  For
- *   details, see the Creative Commons Zero 1.0 Universal license at
- *   <https://creativecommons.org/publicdomain/zero/1.0/>
- *
- * SPDX-License-Identifier: CC0-1.0
- *
- **********************************************************************
- *
- * This is portability layer which should help iron out some
- * differences across various compilers, as well as various verisons of
- * C and C++.
- *
- * It was originally developed for SIMD Everywhere
- * (<https://github.com/simd-everywhere/simde>), but since its only
- * dependency is Hedley (<https://nemequ.github.io/hedley>, also CC0)
- * it can easily be used in other projects, so please feel free to do
- * so.
- *
- * If you do use this in your project, please keep a link to SIMDe in
- * your code to remind you where to report any bugs and/or check for
- * updated versions.
- *
- * # API Overview
- *
- * The API has several parts, and most macros have a few variations.
- * There are APIs for declaring aligned fields/variables, optimization
- * hints, and run-time alignment checks.
- *
- * Briefly, macros ending with "_TO" take numeric values and are great
- * when you know the value you would like to use.  Macros ending with
- * "_LIKE", on the other hand, accept a type and are used when you want
- * to use the alignment of a type instead of hardcoding a value.
- *
- * Documentation for each section of the API is inline.
- *
- * True to form, MSVC is the main problem and imposes several
- * limitations on the effectiveness of the APIs.  Detailed descriptions
- * of the limitations of each macro are inline, but in general:
- *
- *  * On C11+ or C++11+ code written using this API will work.  The
- *    ASSUME macros may or may not generate a hint to the compiler, but
- *    that is only an optimization issue and will not actually cause
- *    failures.
- *  * If you're using pretty much any compiler other than MSVC,
- *    everything should basically work as well as in C11/C++11.
- */
-
-#if !defined(SIMDE_ALIGN_H)
-#define SIMDE_ALIGN_H
-
-#include "hedley.h"
-
-/* I know this seems a little silly, but some non-hosted compilers
- * don't have stddef.h, so we try to accomodate them. */
-#if !defined(SIMDE_ALIGN_SIZE_T_)
-#if defined(__SIZE_TYPE__)
-#define SIMDE_ALIGN_SIZE_T_ __SIZE_TYPE__
-#elif defined(__SIZE_T_TYPE__)
-#define SIMDE_ALIGN_SIZE_T_ __SIZE_TYPE__
-#elif defined(__cplusplus)
-#include <cstddef>
-#define SIMDE_ALIGN_SIZE_T_ size_t
-#else
-#include <stddef.h>
-#define SIMDE_ALIGN_SIZE_T_ size_t
-#endif
-#endif
-
-#if !defined(SIMDE_ALIGN_INTPTR_T_)
-#if defined(__INTPTR_TYPE__)
-#define SIMDE_ALIGN_INTPTR_T_ __INTPTR_TYPE__
-#elif defined(__PTRDIFF_TYPE__)
-#define SIMDE_ALIGN_INTPTR_T_ __PTRDIFF_TYPE__
-#elif defined(__PTRDIFF_T_TYPE__)
-#define SIMDE_ALIGN_INTPTR_T_ __PTRDIFF_T_TYPE__
-#elif defined(__cplusplus)
-#include <cstddef>
-#define SIMDE_ALIGN_INTPTR_T_ ptrdiff_t
-#else
-#include <stddef.h>
-#define SIMDE_ALIGN_INTPTR_T_ ptrdiff_t
-#endif
-#endif
-
-#if defined(SIMDE_ALIGN_DEBUG)
-#if defined(__cplusplus)
-#include <cstdio>
-#else
-#include <stdio.h>
-#endif
-#endif
-
-/* SIMDE_ALIGN_OF(Type)
- *
- * The SIMDE_ALIGN_OF macro works like alignof, or _Alignof, or
- * __alignof, or __alignof__, or __ALIGNOF__, depending on the compiler.
- * It isn't defined everywhere (only when the compiler has some alignof-
- * like feature we can use to implement it), but it should work in most
- * modern compilers, as well as C11 and C++11.
- *
- * If we can't find an implementation for SIMDE_ALIGN_OF then the macro
- * will not be defined, so if you can handle that situation sensibly
- * you may need to sprinkle some ifdefs into your code.
- */
-#if (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \
-	(0 && HEDLEY_HAS_FEATURE(c_alignof))
-#define SIMDE_ALIGN_OF(Type) _Alignof(Type)
-#elif (defined(__cplusplus) && (__cplusplus >= 201103L)) || \
-	(0 && HEDLEY_HAS_FEATURE(cxx_alignof))
-#define SIMDE_ALIGN_OF(Type) alignof(Type)
-#elif HEDLEY_GCC_VERSION_CHECK(2, 95, 0) ||                                    \
-	HEDLEY_ARM_VERSION_CHECK(4, 1, 0) ||                                   \
-	HEDLEY_INTEL_VERSION_CHECK(13, 0, 0) ||                                \
-	HEDLEY_SUNPRO_VERSION_CHECK(5, 13, 0) ||                               \
-	HEDLEY_TINYC_VERSION_CHECK(0, 9, 24) ||                                \
-	HEDLEY_PGI_VERSION_CHECK(19, 10, 0) ||                                 \
-	HEDLEY_CRAY_VERSION_CHECK(10, 0, 0) ||                                 \
-	HEDLEY_TI_ARMCL_VERSION_CHECK(16, 9, 0) ||                             \
-	HEDLEY_TI_CL2000_VERSION_CHECK(16, 9, 0) ||                            \
-	HEDLEY_TI_CL6X_VERSION_CHECK(8, 0, 0) ||                               \
-	HEDLEY_TI_CL7X_VERSION_CHECK(1, 2, 0) ||                               \
-	HEDLEY_TI_CL430_VERSION_CHECK(16, 9, 0) ||                             \
-	HEDLEY_TI_CLPRU_VERSION_CHECK(2, 3, 2) || defined(__IBM__ALIGNOF__) || \
-	defined(__clang__)
-#define SIMDE_ALIGN_OF(Type) __alignof__(Type)
-#elif HEDLEY_IAR_VERSION_CHECK(8, 40, 0)
-#define SIMDE_ALIGN_OF(Type) __ALIGNOF__(Type)
-#elif HEDLEY_MSVC_VERSION_CHECK(19, 0, 0)
-/* Probably goes back much further, but MS takes down their old docs.
-   * If you can verify that this works in earlier versions please let
-   * me know! */
-#define SIMDE_ALIGN_OF(Type) __alignof(Type)
-#endif
-
-/* SIMDE_ALIGN_MAXIMUM:
- *
- * This is the maximum alignment that the compiler supports.  You can
- * define the value prior to including SIMDe if necessary, but in that
- * case *please* submit an issue so we can add the platform to the
- * detection code.
- *
- * Most compilers are okay with types which are aligned beyond what
- * they think is the maximum, as long as the alignment is a power
- * of two.  MSVC is the exception (of course), so we need to cap the
- * alignment requests at values that the implementation supports.
- *
- * XL C/C++ will accept values larger than 16 (which is the alignment
- * of an AltiVec vector), but will not reliably align to the larger
- * value, so so we cap the value at 16 there.
- *
- * If the compiler accepts any power-of-two value within reason then
- * this macro should be left undefined, and the SIMDE_ALIGN_CAP
- * macro will just return the value passed to it. */
-#if !defined(SIMDE_ALIGN_MAXIMUM)
-#if defined(HEDLEY_MSVC_VERSION)
-#if defined(_M_IX86) || defined(_M_AMD64)
-#if HEDLEY_MSVC_VERSION_CHECK(19, 14, 0)
-#define SIMDE_ALIGN_PLATFORM_MAXIMUM 64
-#elif HEDLEY_MSVC_VERSION_CHECK(16, 0, 0)
-/* VS 2010 is really a guess based on Wikipedia; if anyone can
-         * test with old VS versions I'd really appreciate it. */
-#define SIMDE_ALIGN_PLATFORM_MAXIMUM 32
-#else
-#define SIMDE_ALIGN_PLATFORM_MAXIMUM 16
-#endif
-#elif defined(_M_ARM) || defined(_M_ARM64)
-#define SIMDE_ALIGN_PLATFORM_MAXIMUM 8
-#endif
-#elif defined(HEDLEY_IBM_VERSION)
-#define SIMDE_ALIGN_PLATFORM_MAXIMUM 16
-#endif
-#endif
-
-/* You can mostly ignore these; they're intended for internal use.
- * If you do need to use them please let me know; if they fulfill
- * a common use case I'll probably drop the trailing underscore
- * and make them part of the public API. */
-#if defined(SIMDE_ALIGN_PLATFORM_MAXIMUM)
-#if SIMDE_ALIGN_PLATFORM_MAXIMUM >= 64
-#define SIMDE_ALIGN_64_ 64
-#define SIMDE_ALIGN_32_ 32
-#define SIMDE_ALIGN_16_ 16
-#define SIMDE_ALIGN_8_ 8
-#elif SIMDE_ALIGN_PLATFORM_MAXIMUM >= 32
-#define SIMDE_ALIGN_64_ 32
-#define SIMDE_ALIGN_32_ 32
-#define SIMDE_ALIGN_16_ 16
-#define SIMDE_ALIGN_8_ 8
-#elif SIMDE_ALIGN_PLATFORM_MAXIMUM >= 16
-#define SIMDE_ALIGN_64_ 16
-#define SIMDE_ALIGN_32_ 16
-#define SIMDE_ALIGN_16_ 16
-#define SIMDE_ALIGN_8_ 8
-#elif SIMDE_ALIGN_PLATFORM_MAXIMUM >= 8
-#define SIMDE_ALIGN_64_ 8
-#define SIMDE_ALIGN_32_ 8
-#define SIMDE_ALIGN_16_ 8
-#define SIMDE_ALIGN_8_ 8
-#else
-#error Max alignment expected to be >= 8
-#endif
-#else
-#define SIMDE_ALIGN_64_ 64
-#define SIMDE_ALIGN_32_ 32
-#define SIMDE_ALIGN_16_ 16
-#define SIMDE_ALIGN_8_ 8
-#endif
-
-/**
- * SIMDE_ALIGN_CAP(Alignment)
- *
- * Returns the minimum of Alignment or SIMDE_ALIGN_MAXIMUM.
- */
-#if defined(SIMDE_ALIGN_MAXIMUM)
-#define SIMDE_ALIGN_CAP(Alignment)                      \
-	(((Alignment) < (SIMDE_ALIGN_PLATFORM_MAXIMUM)) \
-		 ? (Alignment)                          \
-		 : (SIMDE_ALIGN_PLATFORM_MAXIMUM))
-#else
-#define SIMDE_ALIGN_CAP(Alignment) (Alignment)
-#endif
-
-/* SIMDE_ALIGN_TO(Alignment)
- *
- * SIMDE_ALIGN_TO is used to declare types or variables.  It basically
- * maps to the align attribute in most compilers, the align declspec
- * in MSVC, or _Alignas/alignas in C11/C++11.
- *
- * Example:
- *
- *   struct i32x4 {
- *     SIMDE_ALIGN_TO(16) int32_t values[4];
- *   }
- *
- * Limitations:
- *
- * MSVC requires that the Alignment parameter be numeric; you can't do
- * something like `SIMDE_ALIGN_TO(SIMDE_ALIGN_OF(int))`.  This is
- * unfortunate because that's really how the LIKE macros are
- * implemented, and I am not aware of a way to get anything like this
- * to work without using the C11/C++11 keywords.
- *
- * It also means that we can't use SIMDE_ALIGN_CAP to limit the
- * alignment to the value specified, which MSVC also requires, so on
- * MSVC you should use the `SIMDE_ALIGN_TO_8/16/32/64` macros instead.
- * They work like `SIMDE_ALIGN_TO(SIMDE_ALIGN_CAP(Alignment))` would,
- * but should be safe to use on MSVC.
- *
- * All this is to say that, if you want your code to work on MSVC, you
- * should use the SIMDE_ALIGN_TO_8/16/32/64 macros below instead of
- * SIMDE_ALIGN_TO(8/16/32/64).
- */
-#if HEDLEY_HAS_ATTRIBUTE(aligned) || HEDLEY_GCC_VERSION_CHECK(2, 95, 0) || \
-	HEDLEY_CRAY_VERSION_CHECK(8, 4, 0) ||                              \
-	HEDLEY_IBM_VERSION_CHECK(11, 1, 0) ||                              \
-	HEDLEY_INTEL_VERSION_CHECK(13, 0, 0) ||                            \
-	HEDLEY_PGI_VERSION_CHECK(19, 4, 0) ||                              \
-	HEDLEY_ARM_VERSION_CHECK(4, 1, 0) ||                               \
-	HEDLEY_TINYC_VERSION_CHECK(0, 9, 24) ||                            \
-	HEDLEY_TI_ARMCL_VERSION_CHECK(16, 9, 0) ||                         \
-	HEDLEY_TI_CL2000_VERSION_CHECK(16, 9, 0) ||                        \
-	HEDLEY_TI_CL6X_VERSION_CHECK(8, 0, 0) ||                           \
-	HEDLEY_TI_CL7X_VERSION_CHECK(1, 2, 0) ||                           \
-	HEDLEY_TI_CL430_VERSION_CHECK(16, 9, 0) ||                         \
-	HEDLEY_TI_CLPRU_VERSION_CHECK(2, 3, 2)
-#define SIMDE_ALIGN_TO(Alignment) \
-	__attribute__((__aligned__(SIMDE_ALIGN_CAP(Alignment))))
-#elif (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L))
-#define SIMDE_ALIGN_TO(Alignment) _Alignas(SIMDE_ALIGN_CAP(Alignment))
-#elif (defined(__cplusplus) && (__cplusplus >= 201103L))
-#define SIMDE_ALIGN_TO(Alignment) alignas(SIMDE_ALIGN_CAP(Alignment))
-#elif defined(HEDLEY_MSVC_VERSION)
-#define SIMDE_ALIGN_TO(Alignment) __declspec(align(Alignment))
-/* Unfortunately MSVC can't handle __declspec(align(__alignof(Type)));
-   * the alignment passed to the declspec has to be an integer. */
-#define SIMDE_ALIGN_OF_UNUSABLE_FOR_LIKE
-#endif
-#define SIMDE_ALIGN_TO_64 SIMDE_ALIGN_TO(SIMDE_ALIGN_64_)
-#define SIMDE_ALIGN_TO_32 SIMDE_ALIGN_TO(SIMDE_ALIGN_32_)
-#define SIMDE_ALIGN_TO_16 SIMDE_ALIGN_TO(SIMDE_ALIGN_16_)
-#define SIMDE_ALIGN_TO_8 SIMDE_ALIGN_TO(SIMDE_ALIGN_8_)
-
-/* SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment)
- *
- * SIMDE_ALIGN_ASSUME_TO is semantically similar to C++20's
- * std::assume_aligned, or __builtin_assume_aligned.  It tells the
- * compiler to assume that the provided pointer is aligned to an
- * `Alignment`-byte boundary.
- *
- * If you define SIMDE_ALIGN_DEBUG prior to including this header then
- * SIMDE_ALIGN_ASSUME_TO will turn into a runtime check.   We don't
- * integrate with NDEBUG in this header, but it may be a good idea to
- * put something like this in your code:
- *
- *   #if !defined(NDEBUG)
- *     #define SIMDE_ALIGN_DEBUG
- *   #endif
- *   #include <.../simde-align.h>
- */
-#if HEDLEY_HAS_BUILTIN(__builtin_assume_aligned) || \
-	HEDLEY_GCC_VERSION_CHECK(4, 7, 0)
-#define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment)                   \
-	HEDLEY_REINTERPRET_CAST(                                              \
-		__typeof__(Pointer),                                          \
-		__builtin_assume_aligned(                                     \
-			HEDLEY_CONST_CAST(                                    \
-				void *, HEDLEY_REINTERPRET_CAST(const void *, \
-								Pointer)),    \
-			Alignment))
-#elif HEDLEY_INTEL_VERSION_CHECK(13, 0, 0)
-#define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment)           \
-	(__extension__({                                              \
-		__typeof__(v) simde_assume_aligned_t_ = (Pointer);    \
-		__assume_aligned(simde_assume_aligned_t_, Alignment); \
-		simde_assume_aligned_t_;                              \
-	}))
-#elif defined(__cplusplus) && (__cplusplus > 201703L)
-#include <memory>
-#define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) \
-	std::assume_aligned<Alignment>(Pointer)
-#else
-#if defined(__cplusplus)
-template<typename T>
-HEDLEY_ALWAYS_INLINE static T *
-simde_align_assume_to_unchecked(T *ptr, const size_t alignment)
-#else
-HEDLEY_ALWAYS_INLINE static void *
-simde_align_assume_to_unchecked(void *ptr, const size_t alignment)
-#endif
-{
-	HEDLEY_ASSUME((HEDLEY_REINTERPRET_CAST(size_t, (ptr)) %
-		       SIMDE_ALIGN_CAP(alignment)) == 0);
-	return ptr;
-}
-#if defined(__cplusplus)
-#define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) \
-	simde_align_assume_to_unchecked((Pointer), (Alignment))
-#else
-#define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment)                \
-	simde_align_assume_to_unchecked(                                   \
-		HEDLEY_CONST_CAST(void *, HEDLEY_REINTERPRET_CAST(         \
-						  const void *, Pointer)), \
-		(Alignment))
-#endif
-#endif
-
-#if !defined(SIMDE_ALIGN_DEBUG)
-#define SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) \
-	SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment)
-#else
-#include <stdio.h>
-#if defined(__cplusplus)
-template<typename T>
-static HEDLEY_ALWAYS_INLINE T *
-simde_align_assume_to_checked_uncapped(T *ptr, const size_t alignment,
-				       const char *file, int line,
-				       const char *ptrname)
-#else
-static HEDLEY_ALWAYS_INLINE void *
-simde_align_assume_to_checked_uncapped(void *ptr, const size_t alignment,
-				       const char *file, int line,
-				       const char *ptrname)
-#endif
-{
-	if (HEDLEY_UNLIKELY(
-		    (HEDLEY_REINTERPRET_CAST(SIMDE_ALIGN_INTPTR_T_, (ptr)) %
-		     HEDLEY_STATIC_CAST(SIMDE_ALIGN_INTPTR_T_,
-					SIMDE_ALIGN_CAP(alignment))) != 0)) {
-		fprintf(stderr,
-			"%s:%d: alignment check failed for `%s' (%p %% %u == %u)\n",
-			file, line, ptrname,
-			HEDLEY_REINTERPRET_CAST(const void *, ptr),
-			HEDLEY_STATIC_CAST(unsigned int,
-					   SIMDE_ALIGN_CAP(alignment)),
-			HEDLEY_STATIC_CAST(
-				unsigned int,
-				HEDLEY_REINTERPRET_CAST(SIMDE_ALIGN_INTPTR_T_,
-							(ptr)) %
-					HEDLEY_STATIC_CAST(
-						SIMDE_ALIGN_INTPTR_T_,
-						SIMDE_ALIGN_CAP(alignment))));
-	}
-
-	return ptr;
-}
-
-#if defined(__cplusplus)
-#define SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment)                      \
-	simde_align_assume_to_checked_uncapped((Pointer), (Alignment), \
-					       __FILE__, __LINE__, #Pointer)
-#else
-#define SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment)                          \
-	simde_align_assume_to_checked_uncapped(                            \
-		HEDLEY_CONST_CAST(void *, HEDLEY_REINTERPRET_CAST(         \
-						  const void *, Pointer)), \
-		(Alignment), __FILE__, __LINE__, #Pointer)
-#endif
-#endif
-
-/* SIMDE_ALIGN_LIKE(Type)
- * SIMDE_ALIGN_LIKE_#(Type)
- *
- * The SIMDE_ALIGN_LIKE macros are similar to the SIMDE_ALIGN_TO macros
- * except instead of an integer they take a type; basically, it's just
- * a more convenient way to do something like:
- *
- *   SIMDE_ALIGN_TO(SIMDE_ALIGN_OF(Type))
- *
- * The versions with a numeric suffix will fall back on using a numeric
- * value in the event we can't use SIMDE_ALIGN_OF(Type).  This is
- * mainly for MSVC, where __declspec(align()) can't handle anything
- * other than hard-coded numeric values.
- */
-#if defined(SIMDE_ALIGN_OF) && defined(SIMDE_ALIGN_TO) && \
-	!defined(SIMDE_ALIGN_OF_UNUSABLE_FOR_LIKE)
-#define SIMDE_ALIGN_LIKE(Type) SIMDE_ALIGN_TO(SIMDE_ALIGN_OF(Type))
-#define SIMDE_ALIGN_LIKE_64(Type) SIMDE_ALIGN_LIKE(Type)
-#define SIMDE_ALIGN_LIKE_32(Type) SIMDE_ALIGN_LIKE(Type)
-#define SIMDE_ALIGN_LIKE_16(Type) SIMDE_ALIGN_LIKE(Type)
-#define SIMDE_ALIGN_LIKE_8(Type) SIMDE_ALIGN_LIKE(Type)
-#else
-#define SIMDE_ALIGN_LIKE_64(Type) SIMDE_ALIGN_TO_64
-#define SIMDE_ALIGN_LIKE_32(Type) SIMDE_ALIGN_TO_32
-#define SIMDE_ALIGN_LIKE_16(Type) SIMDE_ALIGN_TO_16
-#define SIMDE_ALIGN_LIKE_8(Type) SIMDE_ALIGN_TO_8
-#endif
-
-/* SIMDE_ALIGN_ASSUME_LIKE(Pointer, Type)
- *
- * Tihs is similar to SIMDE_ALIGN_ASSUME_TO, except that it takes a
- * type instead of a numeric value. */
-#if defined(SIMDE_ALIGN_OF) && defined(SIMDE_ALIGN_ASSUME_TO)
-#define SIMDE_ALIGN_ASSUME_LIKE(Pointer, Type) \
-	SIMDE_ALIGN_ASSUME_TO(Pointer, SIMDE_ALIGN_OF(Type))
-#endif
-
-/* SIMDE_ALIGN_CAST(Type, Pointer)
- *
- * SIMDE_ALIGN_CAST is like C++'s reinterpret_cast, but it will try
- * to silence warnings that some compilers may produce if you try
- * to assign to a type with increased alignment requirements.
- *
- * Note that it does *not* actually attempt to tell the compiler that
- * the pointer is aligned like the destination should be; that's the
- * job of the next macro.  This macro is necessary for stupid APIs
- * like _mm_loadu_si128 where the input is a __m128i* but the function
- * is specifically for data which isn't necessarily aligned to
- * _Alignof(__m128i).
- */
-#if HEDLEY_HAS_WARNING("-Wcast-align") || defined(__clang__) || \
-	HEDLEY_GCC_VERSION_CHECK(3, 4, 0)
-#define SIMDE_ALIGN_CAST(Type, Pointer)                                 \
-	(__extension__({                                                \
-		HEDLEY_DIAGNOSTIC_PUSH                                  \
-		_Pragma("GCC diagnostic ignored \"-Wcast-align\"")      \
-			Type simde_r_ =                                 \
-				HEDLEY_REINTERPRET_CAST(Type, Pointer); \
-		HEDLEY_DIAGNOSTIC_POP                                   \
-		simde_r_;                                               \
-	}))
-#else
-#define SIMDE_ALIGN_CAST(Type, Pointer) HEDLEY_REINTERPRET_CAST(Type, Pointer)
-#endif
-
-/* SIMDE_ALIGN_ASSUME_CAST(Type, Pointer)
- *
- * This is sort of like a combination of a reinterpret_cast and a
- * SIMDE_ALIGN_ASSUME_LIKE.  It uses SIMDE_ALIGN_ASSUME_LIKE to tell
- * the compiler that the pointer is aligned like the specified type
- * and casts the pointer to the specified type while suppressing any
- * warnings from the compiler about casting to a type with greater
- * alignment requirements.
- */
-#define SIMDE_ALIGN_ASSUME_CAST(Type, Pointer) \
-	SIMDE_ALIGN_ASSUME_LIKE(SIMDE_ALIGN_CAST(Type, Pointer), Type)
-
-#endif /* !defined(SIMDE_ALIGN_H) */

+ 0 - 537
libobs/util/simde/simde-arch.h

@@ -1,537 +0,0 @@
-/* Architecture detection
- * Created by Evan Nemerson <[email protected]>
- *
- *   To the extent possible under law, the authors have waived all
- *   copyright and related or neighboring rights to this code.  For
- *   details, see the Creative Commons Zero 1.0 Universal license at
- *   <https://creativecommons.org/publicdomain/zero/1.0/>
- *
- * SPDX-License-Identifier: CC0-1.0
- *
- * Different compilers define different preprocessor macros for the
- * same architecture.  This is an attempt to provide a single
- * interface which is usable on any compiler.
- *
- * In general, a macro named SIMDE_ARCH_* is defined for each
- * architecture the CPU supports.  When there are multiple possible
- * versions, we try to define the macro to the target version.  For
- * example, if you want to check for i586+, you could do something
- * like:
- *
- *   #if defined(SIMDE_ARCH_X86) && (SIMDE_ARCH_X86 >= 5)
- *   ...
- *   #endif
- *
- * You could also just check that SIMDE_ARCH_X86 >= 5 without checking
- * if it's defined first, but some compilers may emit a warning about
- * an undefined macro being used (e.g., GCC with -Wundef).
- *
- * This was originally created for SIMDe
- * <https://github.com/simd-everywhere/simde> (hence the prefix), but this
- * header has no dependencies and may be used anywhere.  It is
- * originally based on information from
- * <https://sourceforge.net/p/predef/wiki/Architectures/>, though it
- * has been enhanced with additional information.
- *
- * If you improve this file, or find a bug, please file the issue at
- * <https://github.com/simd-everywhere/simde/issues>.  If you copy this into
- * your project, even if you change the prefix, please keep the links
- * to SIMDe intact so others know where to report issues, submit
- * enhancements, and find the latest version. */
-
-#if !defined(SIMDE_ARCH_H)
-#define SIMDE_ARCH_H
-
-/* Alpha
-   <https://en.wikipedia.org/wiki/DEC_Alpha> */
-#if defined(__alpha__) || defined(__alpha) || defined(_M_ALPHA)
-#if defined(__alpha_ev6__)
-#define SIMDE_ARCH_ALPHA 6
-#elif defined(__alpha_ev5__)
-#define SIMDE_ARCH_ALPHA 5
-#elif defined(__alpha_ev4__)
-#define SIMDE_ARCH_ALPHA 4
-#else
-#define SIMDE_ARCH_ALPHA 1
-#endif
-#endif
-#if defined(SIMDE_ARCH_ALPHA)
-#define SIMDE_ARCH_ALPHA_CHECK(version) ((version) <= SIMDE_ARCH_ALPHA)
-#else
-#define SIMDE_ARCH_ALPHA_CHECK(version) (0)
-#endif
-
-/* Atmel AVR
-   <https://en.wikipedia.org/wiki/Atmel_AVR> */
-#if defined(__AVR_ARCH__)
-#define SIMDE_ARCH_AVR __AVR_ARCH__
-#endif
-
-/* AMD64 / x86_64
-   <https://en.wikipedia.org/wiki/X86-64> */
-#if defined(__amd64__) || defined(__amd64) || defined(__x86_64__) || \
-	defined(__x86_64) || defined(_M_X64) || defined(_M_AMD64)
-#define SIMDE_ARCH_AMD64 1000
-#endif
-
-/* ARM
-   <https://en.wikipedia.org/wiki/ARM_architecture> */
-#if defined(__ARM_ARCH_8A__)
-#define SIMDE_ARCH_ARM 82
-#elif defined(__ARM_ARCH_8R__)
-#define SIMDE_ARCH_ARM 81
-#elif defined(__ARM_ARCH_8__)
-#define SIMDE_ARCH_ARM 80
-#elif defined(__ARM_ARCH_7S__)
-#define SIMDE_ARCH_ARM 74
-#elif defined(__ARM_ARCH_7M__)
-#define SIMDE_ARCH_ARM 73
-#elif defined(__ARM_ARCH_7R__)
-#define SIMDE_ARCH_ARM 72
-#elif defined(__ARM_ARCH_7A__)
-#define SIMDE_ARCH_ARM 71
-#elif defined(__ARM_ARCH_7__)
-#define SIMDE_ARCH_ARM 70
-#elif defined(__ARM_ARCH)
-#define SIMDE_ARCH_ARM (__ARM_ARCH * 10)
-#elif defined(_M_ARM)
-#define SIMDE_ARCH_ARM (_M_ARM * 10)
-#elif defined(__arm__) || defined(__thumb__) || defined(__TARGET_ARCH_ARM) || \
-	defined(_ARM) || defined(_M_ARM) || defined(_M_ARM)
-#define SIMDE_ARCH_ARM 1
-#endif
-#if defined(SIMDE_ARCH_ARM)
-#define SIMDE_ARCH_ARM_CHECK(version) ((version) <= SIMDE_ARCH_ARM)
-#else
-#define SIMDE_ARCH_ARM_CHECK(version) (0)
-#endif
-
-/* AArch64
-   <https://en.wikipedia.org/wiki/ARM_architecture> */
-#if defined(__aarch64__) || defined(_M_ARM64)
-#define SIMDE_ARCH_AARCH64 1000
-#endif
-#if defined(SIMDE_ARCH_AARCH64)
-#define SIMDE_ARCH_AARCH64_CHECK(version) ((version) <= SIMDE_ARCH_AARCH64)
-#else
-#define SIMDE_ARCH_AARCH64_CHECK(version) (0)
-#endif
-
-/* ARM SIMD ISA extensions */
-#if defined(__ARM_NEON)
-#if defined(SIMDE_ARCH_AARCH64)
-#define SIMDE_ARCH_ARM_NEON SIMDE_ARCH_AARCH64
-#elif defined(SIMDE_ARCH_ARM)
-#define SIMDE_ARCH_ARM_NEON SIMDE_ARCH_ARM
-#endif
-#endif
-#if defined(__ARM_FEATURE_SVE)
-#define SIMDE_ARCH_ARM_SVE
-#endif
-
-/* Blackfin
-   <https://en.wikipedia.org/wiki/Blackfin> */
-#if defined(__bfin) || defined(__BFIN__) || defined(__bfin__)
-#define SIMDE_ARCH_BLACKFIN 1
-#endif
-
-/* CRIS
-   <https://en.wikipedia.org/wiki/ETRAX_CRIS> */
-#if defined(__CRIS_arch_version)
-#define SIMDE_ARCH_CRIS __CRIS_arch_version
-#elif defined(__cris__) || defined(__cris) || defined(__CRIS) || \
-	defined(__CRIS__)
-#define SIMDE_ARCH_CRIS 1
-#endif
-
-/* Convex
-   <https://en.wikipedia.org/wiki/Convex_Computer> */
-#if defined(__convex_c38__)
-#define SIMDE_ARCH_CONVEX 38
-#elif defined(__convex_c34__)
-#define SIMDE_ARCH_CONVEX 34
-#elif defined(__convex_c32__)
-#define SIMDE_ARCH_CONVEX 32
-#elif defined(__convex_c2__)
-#define SIMDE_ARCH_CONVEX 2
-#elif defined(__convex__)
-#define SIMDE_ARCH_CONVEX 1
-#endif
-#if defined(SIMDE_ARCH_CONVEX)
-#define SIMDE_ARCH_CONVEX_CHECK(version) ((version) <= SIMDE_ARCH_CONVEX)
-#else
-#define SIMDE_ARCH_CONVEX_CHECK(version) (0)
-#endif
-
-/* Adapteva Epiphany
-   <https://en.wikipedia.org/wiki/Adapteva_Epiphany> */
-#if defined(__epiphany__)
-#define SIMDE_ARCH_EPIPHANY 1
-#endif
-
-/* Fujitsu FR-V
-   <https://en.wikipedia.org/wiki/FR-V_(microprocessor)> */
-#if defined(__frv__)
-#define SIMDE_ARCH_FRV 1
-#endif
-
-/* H8/300
-   <https://en.wikipedia.org/wiki/H8_Family> */
-#if defined(__H8300__)
-#define SIMDE_ARCH_H8300
-#endif
-
-/* HP/PA / PA-RISC
-   <https://en.wikipedia.org/wiki/PA-RISC> */
-#if defined(__PA8000__) || defined(__HPPA20__) || defined(__RISC2_0__) || \
-	defined(_PA_RISC2_0)
-#define SIMDE_ARCH_HPPA 20
-#elif defined(__PA7100__) || defined(__HPPA11__) || defined(_PA_RISC1_1)
-#define SIMDE_ARCH_HPPA 11
-#elif defined(_PA_RISC1_0)
-#define SIMDE_ARCH_HPPA 10
-#elif defined(__hppa__) || defined(__HPPA__) || defined(__hppa)
-#define SIMDE_ARCH_HPPA 1
-#endif
-#if defined(SIMDE_ARCH_HPPA)
-#define SIMDE_ARCH_HPPA_CHECK(version) ((version) <= SIMDE_ARCH_HPPA)
-#else
-#define SIMDE_ARCH_HPPA_CHECK(version) (0)
-#endif
-
-/* x86
-   <https://en.wikipedia.org/wiki/X86> */
-#if defined(_M_IX86)
-#define SIMDE_ARCH_X86 (_M_IX86 / 100)
-#elif defined(__I86__)
-#define SIMDE_ARCH_X86 __I86__
-#elif defined(i686) || defined(__i686) || defined(__i686__)
-#define SIMDE_ARCH_X86 6
-#elif defined(i586) || defined(__i586) || defined(__i586__)
-#define SIMDE_ARCH_X86 5
-#elif defined(i486) || defined(__i486) || defined(__i486__)
-#define SIMDE_ARCH_X86 4
-#elif defined(i386) || defined(__i386) || defined(__i386__)
-#define SIMDE_ARCH_X86 3
-#elif defined(_X86_) || defined(__X86__) || defined(__THW_INTEL__)
-#define SIMDE_ARCH_X86 3
-#endif
-#if defined(SIMDE_ARCH_X86)
-#define SIMDE_ARCH_X86_CHECK(version) ((version) <= SIMDE_ARCH_X86)
-#else
-#define SIMDE_ARCH_X86_CHECK(version) (0)
-#endif
-
-/* SIMD ISA extensions for x86/x86_64 */
-#if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64)
-#if defined(_M_IX86_FP)
-#define SIMDE_ARCH_X86_MMX
-#if (_M_IX86_FP >= 1)
-#define SIMDE_ARCH_X86_SSE 1
-#endif
-#if (_M_IX86_FP >= 2)
-#define SIMDE_ARCH_X86_SSE2 1
-#endif
-#elif defined(_M_X64)
-#define SIMDE_ARCH_X86_SSE 1
-#define SIMDE_ARCH_X86_SSE2 1
-#else
-#if defined(__MMX__)
-#define SIMDE_ARCH_X86_MMX 1
-#endif
-#if defined(__SSE__)
-#define SIMDE_ARCH_X86_SSE 1
-#endif
-#if defined(__SSE2__)
-#define SIMDE_ARCH_X86_SSE2 1
-#endif
-#endif
-#if defined(__SSE3__)
-#define SIMDE_ARCH_X86_SSE3 1
-#endif
-#if defined(__SSSE3__)
-#define SIMDE_ARCH_X86_SSSE3 1
-#endif
-#if defined(__SSE4_1__)
-#define SIMDE_ARCH_X86_SSE4_1 1
-#endif
-#if defined(__SSE4_2__)
-#define SIMDE_ARCH_X86_SSE4_2 1
-#endif
-#if defined(__AVX__)
-#define SIMDE_ARCH_X86_AVX 1
-#if !defined(SIMDE_ARCH_X86_SSE3)
-#define SIMDE_ARCH_X86_SSE3 1
-#endif
-#if !defined(SIMDE_ARCH_X86_SSE4_1)
-#define SIMDE_ARCH_X86_SSE4_1 1
-#endif
-#if !defined(SIMDE_ARCH_X86_SSE4_1)
-#define SIMDE_ARCH_X86_SSE4_2 1
-#endif
-#endif
-#if defined(__AVX2__)
-#define SIMDE_ARCH_X86_AVX2 1
-#endif
-#if defined(__FMA__)
-#define SIMDE_ARCH_X86_FMA 1
-#if !defined(SIMDE_ARCH_X86_AVX)
-#define SIMDE_ARCH_X86_AVX 1
-#endif
-#endif
-#if defined(__AVX512VP2INTERSECT__)
-#define SIMDE_ARCH_X86_AVX512VP2INTERSECT 1
-#endif
-#if defined(__AVX512VBMI__)
-#define SIMDE_ARCH_X86_AVX512VBMI 1
-#endif
-#if defined(__AVX512BW__)
-#define SIMDE_ARCH_X86_AVX512BW 1
-#endif
-#if defined(__AVX512CD__)
-#define SIMDE_ARCH_X86_AVX512CD 1
-#endif
-#if defined(__AVX512DQ__)
-#define SIMDE_ARCH_X86_AVX512DQ 1
-#endif
-#if defined(__AVX512F__)
-#define SIMDE_ARCH_X86_AVX512F 1
-#endif
-#if defined(__AVX512VL__)
-#define SIMDE_ARCH_X86_AVX512VL 1
-#endif
-#if defined(__GFNI__)
-#define SIMDE_ARCH_X86_GFNI 1
-#endif
-#if defined(__PCLMUL__)
-#define SIMDE_ARCH_X86_PCLMUL 1
-#endif
-#if defined(__VPCLMULQDQ__)
-#define SIMDE_ARCH_X86_VPCLMULQDQ 1
-#endif
-#endif
-
-/* Itanium
-   <https://en.wikipedia.org/wiki/Itanium> */
-#if defined(__ia64__) || defined(_IA64) || defined(__IA64__) || \
-	defined(__ia64) || defined(_M_IA64) || defined(__itanium__)
-#define SIMDE_ARCH_IA64 1
-#endif
-
-/* Renesas M32R
-   <https://en.wikipedia.org/wiki/M32R> */
-#if defined(__m32r__) || defined(__M32R__)
-#define SIMDE_ARCH_M32R
-#endif
-
-/* Motorola 68000
-   <https://en.wikipedia.org/wiki/Motorola_68000> */
-#if defined(__mc68060__) || defined(__MC68060__)
-#define SIMDE_ARCH_M68K 68060
-#elif defined(__mc68040__) || defined(__MC68040__)
-#define SIMDE_ARCH_M68K 68040
-#elif defined(__mc68030__) || defined(__MC68030__)
-#define SIMDE_ARCH_M68K 68030
-#elif defined(__mc68020__) || defined(__MC68020__)
-#define SIMDE_ARCH_M68K 68020
-#elif defined(__mc68010__) || defined(__MC68010__)
-#define SIMDE_ARCH_M68K 68010
-#elif defined(__mc68000__) || defined(__MC68000__)
-#define SIMDE_ARCH_M68K 68000
-#endif
-#if defined(SIMDE_ARCH_M68K)
-#define SIMDE_ARCH_M68K_CHECK(version) ((version) <= SIMDE_ARCH_M68K)
-#else
-#define SIMDE_ARCH_M68K_CHECK(version) (0)
-#endif
-
-/* Xilinx MicroBlaze
-   <https://en.wikipedia.org/wiki/MicroBlaze> */
-#if defined(__MICROBLAZE__) || defined(__microblaze__)
-#define SIMDE_ARCH_MICROBLAZE
-#endif
-
-/* MIPS
-   <https://en.wikipedia.org/wiki/MIPS_architecture> */
-#if defined(_MIPS_ISA_MIPS64R2)
-#define SIMDE_ARCH_MIPS 642
-#elif defined(_MIPS_ISA_MIPS64)
-#define SIMDE_ARCH_MIPS 640
-#elif defined(_MIPS_ISA_MIPS32R2)
-#define SIMDE_ARCH_MIPS 322
-#elif defined(_MIPS_ISA_MIPS32)
-#define SIMDE_ARCH_MIPS 320
-#elif defined(_MIPS_ISA_MIPS4)
-#define SIMDE_ARCH_MIPS 4
-#elif defined(_MIPS_ISA_MIPS3)
-#define SIMDE_ARCH_MIPS 3
-#elif defined(_MIPS_ISA_MIPS2)
-#define SIMDE_ARCH_MIPS 2
-#elif defined(_MIPS_ISA_MIPS1)
-#define SIMDE_ARCH_MIPS 1
-#elif defined(_MIPS_ISA_MIPS) || defined(__mips) || defined(__MIPS__)
-#define SIMDE_ARCH_MIPS 1
-#endif
-#if defined(SIMDE_ARCH_MIPS)
-#define SIMDE_ARCH_MIPS_CHECK(version) ((version) <= SIMDE_ARCH_MIPS)
-#else
-#define SIMDE_ARCH_MIPS_CHECK(version) (0)
-#endif
-
-#if defined(__mips_loongson_mmi)
-#define SIMDE_ARCH_MIPS_LOONGSON_MMI 1
-#endif
-
-/* Matsushita MN10300
-   <https://en.wikipedia.org/wiki/MN103> */
-#if defined(__MN10300__) || defined(__mn10300__)
-#define SIMDE_ARCH_MN10300 1
-#endif
-
-/* POWER
-   <https://en.wikipedia.org/wiki/IBM_POWER_Instruction_Set_Architecture> */
-#if defined(_M_PPC)
-#define SIMDE_ARCH_POWER _M_PPC
-#elif defined(_ARCH_PWR9)
-#define SIMDE_ARCH_POWER 900
-#elif defined(_ARCH_PWR8)
-#define SIMDE_ARCH_POWER 800
-#elif defined(_ARCH_PWR7)
-#define SIMDE_ARCH_POWER 700
-#elif defined(_ARCH_PWR6)
-#define SIMDE_ARCH_POWER 600
-#elif defined(_ARCH_PWR5)
-#define SIMDE_ARCH_POWER 500
-#elif defined(_ARCH_PWR4)
-#define SIMDE_ARCH_POWER 400
-#elif defined(_ARCH_440) || defined(__ppc440__)
-#define SIMDE_ARCH_POWER 440
-#elif defined(_ARCH_450) || defined(__ppc450__)
-#define SIMDE_ARCH_POWER 450
-#elif defined(_ARCH_601) || defined(__ppc601__)
-#define SIMDE_ARCH_POWER 601
-#elif defined(_ARCH_603) || defined(__ppc603__)
-#define SIMDE_ARCH_POWER 603
-#elif defined(_ARCH_604) || defined(__ppc604__)
-#define SIMDE_ARCH_POWER 604
-#elif defined(_ARCH_605) || defined(__ppc605__)
-#define SIMDE_ARCH_POWER 605
-#elif defined(_ARCH_620) || defined(__ppc620__)
-#define SIMDE_ARCH_POWER 620
-#elif defined(__powerpc) || defined(__powerpc__) || defined(__POWERPC__) || \
-	defined(__ppc__) || defined(__PPC__) || defined(_ARCH_PPC) ||       \
-	defined(__ppc)
-#define SIMDE_ARCH_POWER 1
-#endif
-#if defined(SIMDE_ARCH_POWER)
-#define SIMDE_ARCH_POWER_CHECK(version) ((version) <= SIMDE_ARCH_POWER)
-#else
-#define SIMDE_ARCH_POWER_CHECK(version) (0)
-#endif
-
-#if defined(__ALTIVEC__)
-#define SIMDE_ARCH_POWER_ALTIVEC SIMDE_ARCH_POWER
-#endif
-#if defined(SIMDE_ARCH_POWER)
-#define SIMDE_ARCH_POWER_ALTIVEC_CHECK(version) ((version) <= SIMDE_ARCH_POWER)
-#else
-#define SIMDE_ARCH_POWER_ALTIVEC_CHECK(version) (0)
-#endif
-
-/* SPARC
-   <https://en.wikipedia.org/wiki/SPARC> */
-#if defined(__sparc_v9__) || defined(__sparcv9)
-#define SIMDE_ARCH_SPARC 9
-#elif defined(__sparc_v8__) || defined(__sparcv8)
-#define SIMDE_ARCH_SPARC 8
-#elif defined(__sparc_v7__) || defined(__sparcv7)
-#define SIMDE_ARCH_SPARC 7
-#elif defined(__sparc_v6__) || defined(__sparcv6)
-#define SIMDE_ARCH_SPARC 6
-#elif defined(__sparc_v5__) || defined(__sparcv5)
-#define SIMDE_ARCH_SPARC 5
-#elif defined(__sparc_v4__) || defined(__sparcv4)
-#define SIMDE_ARCH_SPARC 4
-#elif defined(__sparc_v3__) || defined(__sparcv3)
-#define SIMDE_ARCH_SPARC 3
-#elif defined(__sparc_v2__) || defined(__sparcv2)
-#define SIMDE_ARCH_SPARC 2
-#elif defined(__sparc_v1__) || defined(__sparcv1)
-#define SIMDE_ARCH_SPARC 1
-#elif defined(__sparc__) || defined(__sparc)
-#define SIMDE_ARCH_SPARC 1
-#endif
-#if defined(SIMDE_ARCH_SPARC)
-#define SIMDE_ARCH_SPARC_CHECK(version) ((version) <= SIMDE_ARCH_SPARC)
-#else
-#define SIMDE_ARCH_SPARC_CHECK(version) (0)
-#endif
-
-/* SuperH
-   <https://en.wikipedia.org/wiki/SuperH> */
-#if defined(__sh5__) || defined(__SH5__)
-#define SIMDE_ARCH_SUPERH 5
-#elif defined(__sh4__) || defined(__SH4__)
-#define SIMDE_ARCH_SUPERH 4
-#elif defined(__sh3__) || defined(__SH3__)
-#define SIMDE_ARCH_SUPERH 3
-#elif defined(__sh2__) || defined(__SH2__)
-#define SIMDE_ARCH_SUPERH 2
-#elif defined(__sh1__) || defined(__SH1__)
-#define SIMDE_ARCH_SUPERH 1
-#elif defined(__sh__) || defined(__SH__)
-#define SIMDE_ARCH_SUPERH 1
-#endif
-
-/* IBM System z
-   <https://en.wikipedia.org/wiki/IBM_System_z> */
-#if defined(__370__) || defined(__THW_370__) || defined(__s390__) || \
-	defined(__s390x__) || defined(__zarch__) || defined(__SYSC_ZARCH__)
-#define SIMDE_ARCH_SYSTEMZ
-#endif
-
-/* TMS320 DSP
-   <https://en.wikipedia.org/wiki/Texas_Instruments_TMS320> */
-#if defined(_TMS320C6740) || defined(__TMS320C6740__)
-#define SIMDE_ARCH_TMS320 6740
-#elif defined(_TMS320C6700_PLUS) || defined(__TMS320C6700_PLUS__)
-#define SIMDE_ARCH_TMS320 6701
-#elif defined(_TMS320C6700) || defined(__TMS320C6700__)
-#define SIMDE_ARCH_TMS320 6700
-#elif defined(_TMS320C6600) || defined(__TMS320C6600__)
-#define SIMDE_ARCH_TMS320 6600
-#elif defined(_TMS320C6400_PLUS) || defined(__TMS320C6400_PLUS__)
-#define SIMDE_ARCH_TMS320 6401
-#elif defined(_TMS320C6400) || defined(__TMS320C6400__)
-#define SIMDE_ARCH_TMS320 6400
-#elif defined(_TMS320C6200) || defined(__TMS320C6200__)
-#define SIMDE_ARCH_TMS320 6200
-#elif defined(_TMS320C55X) || defined(__TMS320C55X__)
-#define SIMDE_ARCH_TMS320 550
-#elif defined(_TMS320C54X) || defined(__TMS320C54X__)
-#define SIMDE_ARCH_TMS320 540
-#elif defined(_TMS320C28X) || defined(__TMS320C28X__)
-#define SIMDE_ARCH_TMS320 280
-#endif
-#if defined(SIMDE_ARCH_TMS320)
-#define SIMDE_ARCH_TMS320_CHECK(version) ((version) <= SIMDE_ARCH_TMS320)
-#else
-#define SIMDE_ARCH_TMS320_CHECK(version) (0)
-#endif
-
-/* WebAssembly */
-#if defined(__wasm__)
-#define SIMDE_ARCH_WASM 1
-#endif
-
-#if defined(SIMDE_ARCH_WASM) && defined(__wasm_simd128__)
-#define SIMDE_ARCH_WASM_SIMD128
-#endif
-
-/* Xtensa
-   <https://en.wikipedia.org/wiki/> */
-#if defined(__xtensa__) || defined(__XTENSA__)
-#define SIMDE_ARCH_XTENSA 1
-#endif
-
-#endif /* !defined(SIMDE_ARCH_H) */

+ 0 - 918
libobs/util/simde/simde-common.h

@@ -1,918 +0,0 @@
-/* SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use, copy,
- * modify, merge, publish, distribute, sublicense, and/or sell copies
- * of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- * Copyright:
- *   2017-2020 Evan Nemerson <[email protected]>
- */
-
-#if !defined(SIMDE_COMMON_H)
-#define SIMDE_COMMON_H
-
-#include "hedley.h"
-
-#define SIMDE_VERSION_MAJOR 0
-#define SIMDE_VERSION_MINOR 7
-#define SIMDE_VERSION_MICRO 1
-#define SIMDE_VERSION                                                   \
-	HEDLEY_VERSION_ENCODE(SIMDE_VERSION_MAJOR, SIMDE_VERSION_MINOR, \
-			      SIMDE_VERSION_MICRO)
-
-#include <stddef.h>
-#include <stdint.h>
-
-#include "simde-detect-clang.h"
-#include "simde-arch.h"
-#include "simde-features.h"
-#include "simde-diagnostic.h"
-#include "simde-math.h"
-#include "simde-constify.h"
-#include "simde-align.h"
-
-/* In some situations, SIMDe has to make large performance sacrifices
- * for small increases in how faithfully it reproduces an API, but
- * only a relatively small number of users will actually need the API
- * to be completely accurate.  The SIMDE_FAST_* options can be used to
- * disable these trade-offs.
- *
- * They can be enabled by passing -DSIMDE_FAST_MATH to the compiler, or
- * the individual defines (e.g., -DSIMDE_FAST_NANS) if you only want to
- * enable some optimizations.  Using -ffast-math and/or
- * -ffinite-math-only will also enable the relevant options.  If you
- * don't want that you can pass -DSIMDE_NO_FAST_* to disable them. */
-
-/* Most programs avoid NaNs by never passing values which can result in
- * a NaN; for example, if you only pass non-negative values to the sqrt
- * functions, it won't generate a NaN.  On some platforms, similar
- * functions handle NaNs differently; for example, the _mm_min_ps SSE
- * function will return 0.0 if you pass it (0.0, NaN), but the NEON
- * vminq_f32 function will return NaN.  Making them behave like one
- * another is expensive; it requires generating a mask of all lanes
- * with NaNs, then performing the operation (e.g., vminq_f32), then
- * blending together the result with another vector using the mask.
- *
- * If you don't want SIMDe to worry about the differences between how
- * NaNs are handled on the two platforms, define this (or pass
- * -ffinite-math-only) */
-#if !defined(SIMDE_FAST_MATH) && !defined(SIMDE_NO_FAST_MATH) && \
-	defined(__FAST_MATH__)
-#define SIMDE_FAST_MATH
-#endif
-
-#if !defined(SIMDE_FAST_NANS) && !defined(SIMDE_NO_FAST_NANS)
-#if defined(SIMDE_FAST_MATH)
-#define SIMDE_FAST_NANS
-#elif defined(__FINITE_MATH_ONLY__)
-#if __FINITE_MATH_ONLY__
-#define SIMDE_FAST_NANS
-#endif
-#endif
-#endif
-
-/* Many functions are defined as using the current rounding mode
- * (i.e., the SIMD version of fegetround()) when converting to
- * an integer.  For example, _mm_cvtpd_epi32.  Unfortunately,
- * on some platforms (such as ARMv8+ where round-to-nearest is
- * always used, regardless of the FPSCR register) this means we
- * have to first query the current rounding mode, then choose
- * the proper function (rounnd
- , ceil, floor, etc.) */
-#if !defined(SIMDE_FAST_ROUND_MODE) && !defined(SIMDE_NO_FAST_ROUND_MODE) && \
-	defined(SIMDE_FAST_MATH)
-#define SIMDE_FAST_ROUND_MODE
-#endif
-
-/* This controls how ties are rounded.  For example, does 10.5 round to
- * 10 or 11?  IEEE 754 specifies round-towards-even, but ARMv7 (for
- * example) doesn't support it and it must be emulated (which is rather
- * slow).  If you're okay with just using the default for whatever arch
- * you're on, you should definitely define this.
- *
- * Note that we don't use this macro to avoid correct implementations
- * in functions which are explicitly about rounding (such as vrnd* on
- * NEON, _mm_round_* on x86, etc.); it is only used for code where
- * rounding is a component in another function, and even then it isn't
- * usually a problem since such functions will use the current rounding
- * mode. */
-#if !defined(SIMDE_FAST_ROUND_TIES) && !defined(SIMDE_NO_FAST_ROUND_TIES) && \
-	defined(SIMDE_FAST_MATH)
-#define SIMDE_FAST_ROUND_TIES
-#endif
-
-/* For functions which convert from one type to another (mostly from
- * floating point to integer types), sometimes we need to do a range
- * check and potentially return a different result if the value
- * falls outside that range.  Skipping this check can provide a
- * performance boost, at the expense of faithfulness to the API we're
- * emulating. */
-#if !defined(SIMDE_FAST_CONVERSION_RANGE) && \
-	!defined(SIMDE_NO_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_MATH)
-#define SIMDE_FAST_CONVERSION_RANGE
-#endif
-
-#if HEDLEY_HAS_BUILTIN(__builtin_constant_p) ||                             \
-	HEDLEY_GCC_VERSION_CHECK(3, 4, 0) ||                                \
-	HEDLEY_INTEL_VERSION_CHECK(13, 0, 0) ||                             \
-	HEDLEY_TINYC_VERSION_CHECK(0, 9, 19) ||                             \
-	HEDLEY_ARM_VERSION_CHECK(4, 1, 0) ||                                \
-	HEDLEY_IBM_VERSION_CHECK(13, 1, 0) ||                               \
-	HEDLEY_TI_CL6X_VERSION_CHECK(6, 1, 0) ||                            \
-	(HEDLEY_SUNPRO_VERSION_CHECK(5, 10, 0) && !defined(__cplusplus)) || \
-	HEDLEY_CRAY_VERSION_CHECK(8, 1, 0)
-#define SIMDE_CHECK_CONSTANT_(expr) (__builtin_constant_p(expr))
-#elif defined(__cplusplus) && (__cplusplus > 201703L)
-#include <type_traits>
-#define SIMDE_CHECK_CONSTANT_(expr) (std::is_constant_evaluated())
-#endif
-
-#if !defined(SIMDE_NO_CHECK_IMMEDIATE_CONSTANT)
-#if defined(SIMDE_CHECK_CONSTANT_) &&                \
-	SIMDE_DETECT_CLANG_VERSION_CHECK(9, 0, 0) && \
-	(!defined(__apple_build_version__) ||        \
-	 ((__apple_build_version__ < 11000000) ||    \
-	  (__apple_build_version__ >= 12000000)))
-#define SIMDE_REQUIRE_CONSTANT(arg)                    \
-	HEDLEY_REQUIRE_MSG(SIMDE_CHECK_CONSTANT_(arg), \
-			   "`" #arg "' must be constant")
-#else
-#define SIMDE_REQUIRE_CONSTANT(arg)
-#endif
-#else
-#define SIMDE_REQUIRE_CONSTANT(arg)
-#endif
-
-#define SIMDE_REQUIRE_RANGE(arg, min, max)                         \
-	HEDLEY_REQUIRE_MSG((((arg) >= (min)) && ((arg) <= (max))), \
-			   "'" #arg "' must be in [" #min ", " #max "]")
-
-#define SIMDE_REQUIRE_CONSTANT_RANGE(arg, min, max) \
-	SIMDE_REQUIRE_CONSTANT(arg)                 \
-	SIMDE_REQUIRE_RANGE(arg, min, max)
-
-/* A copy of HEDLEY_STATIC_ASSERT, except we don't define an empty
- * fallback if we can't find an implementation; instead we have to
- * check if SIMDE_STATIC_ASSERT is defined before using it. */
-#if !defined(__cplusplus) &&                                             \
-	((defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \
-	 HEDLEY_HAS_FEATURE(c_static_assert) ||                          \
-	 HEDLEY_GCC_VERSION_CHECK(6, 0, 0) ||                            \
-	 HEDLEY_INTEL_VERSION_CHECK(13, 0, 0) || defined(_Static_assert))
-#define SIMDE_STATIC_ASSERT(expr, message) _Static_assert(expr, message)
-#elif (defined(__cplusplus) && (__cplusplus >= 201103L)) || \
-	HEDLEY_MSVC_VERSION_CHECK(16, 0, 0)
-#define SIMDE_STATIC_ASSERT(expr, message)            \
-	HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_( \
-		static_assert(expr, message))
-#endif
-
-#if (HEDLEY_HAS_ATTRIBUTE(may_alias) && !defined(HEDLEY_SUNPRO_VERSION)) || \
-	HEDLEY_GCC_VERSION_CHECK(3, 3, 0) ||                                \
-	HEDLEY_INTEL_VERSION_CHECK(13, 0, 0) ||                             \
-	HEDLEY_IBM_VERSION_CHECK(13, 1, 0)
-#define SIMDE_MAY_ALIAS __attribute__((__may_alias__))
-#else
-#define SIMDE_MAY_ALIAS
-#endif
-
-/*  Lots of compilers support GCC-style vector extensions, but many
-    don't support all the features.  Define different macros depending
-    on support for
-
-    * SIMDE_VECTOR - Declaring a vector.
-    * SIMDE_VECTOR_OPS - basic operations (binary and unary).
-    * SIMDE_VECTOR_NEGATE - negating a vector
-    * SIMDE_VECTOR_SCALAR - For binary operators, the second argument
-        can be a scalar, in which case the result is as if that scalar
-        had been broadcast to all lanes of a vector.
-    * SIMDE_VECTOR_SUBSCRIPT - Supports array subscript notation for
-        extracting/inserting a single element.=
-
-    SIMDE_VECTOR can be assumed if any others are defined, the
-    others are independent. */
-#if !defined(SIMDE_NO_VECTOR)
-#if HEDLEY_GCC_VERSION_CHECK(4, 8, 0)
-#define SIMDE_VECTOR(size) __attribute__((__vector_size__(size)))
-#define SIMDE_VECTOR_OPS
-#define SIMDE_VECTOR_NEGATE
-#define SIMDE_VECTOR_SCALAR
-#define SIMDE_VECTOR_SUBSCRIPT
-#elif HEDLEY_INTEL_VERSION_CHECK(16, 0, 0)
-#define SIMDE_VECTOR(size) __attribute__((__vector_size__(size)))
-#define SIMDE_VECTOR_OPS
-#define SIMDE_VECTOR_NEGATE
-/* ICC only supports SIMDE_VECTOR_SCALAR for constants */
-#define SIMDE_VECTOR_SUBSCRIPT
-#elif HEDLEY_GCC_VERSION_CHECK(4, 1, 0) || HEDLEY_INTEL_VERSION_CHECK(13, 0, 0)
-#define SIMDE_VECTOR(size) __attribute__((__vector_size__(size)))
-#define SIMDE_VECTOR_OPS
-#elif HEDLEY_SUNPRO_VERSION_CHECK(5, 12, 0)
-#define SIMDE_VECTOR(size) __attribute__((__vector_size__(size)))
-#elif HEDLEY_HAS_ATTRIBUTE(vector_size)
-#define SIMDE_VECTOR(size) __attribute__((__vector_size__(size)))
-#define SIMDE_VECTOR_OPS
-#define SIMDE_VECTOR_NEGATE
-#define SIMDE_VECTOR_SUBSCRIPT
-#if SIMDE_DETECT_CLANG_VERSION_CHECK(5, 0, 0)
-#define SIMDE_VECTOR_SCALAR
-#endif
-#endif
-
-/* GCC and clang have built-in functions to handle shuffling and
-   converting of vectors, but the implementations are slightly
-   different.  This macro is just an abstraction over them.  Note that
-   elem_size is in bits but vec_size is in bytes. */
-#if !defined(SIMDE_NO_SHUFFLE_VECTOR) && defined(SIMDE_VECTOR_SUBSCRIPT)
-HEDLEY_DIAGNOSTIC_PUSH
-/* We don't care about -Wvariadic-macros; all compilers that support
-      * shufflevector/shuffle support them. */
-#if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic")
-#pragma clang diagnostic ignored "-Wc++98-compat-pedantic"
-#endif
-#if HEDLEY_HAS_WARNING("-Wvariadic-macros") || HEDLEY_GCC_VERSION_CHECK(4, 0, 0)
-#pragma GCC diagnostic ignored "-Wvariadic-macros"
-#endif
-
-#if HEDLEY_HAS_BUILTIN(__builtin_shufflevector)
-#define SIMDE_SHUFFLE_VECTOR_(elem_size, vec_size, a, b, ...) \
-	__builtin_shufflevector(a, b, __VA_ARGS__)
-#elif HEDLEY_GCC_HAS_BUILTIN(__builtin_shuffle, 4, 7, 0) && \
-	!defined(__INTEL_COMPILER)
-#define SIMDE_SHUFFLE_VECTOR_(elem_size, vec_size, a, b, ...) \
-	(__extension__({                                      \
-		int##elem_size##_t SIMDE_VECTOR(vec_size)     \
-			simde_shuffle_ = {__VA_ARGS__};       \
-		__builtin_shuffle(a, b, simde_shuffle_);      \
-	}))
-#endif
-HEDLEY_DIAGNOSTIC_POP
-#endif
-
-/* TODO: this actually works on XL C/C++ without SIMDE_VECTOR_SUBSCRIPT
-   but the code needs to be refactored a bit to take advantage. */
-#if !defined(SIMDE_NO_CONVERT_VECTOR) && defined(SIMDE_VECTOR_SUBSCRIPT)
-#if HEDLEY_HAS_BUILTIN(__builtin_convertvector) || \
-	HEDLEY_GCC_VERSION_CHECK(9, 0, 0)
-#if HEDLEY_GCC_VERSION_CHECK(9, 0, 0) && !HEDLEY_GCC_VERSION_CHECK(9, 3, 0)
-/* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=93557 */
-#define SIMDE_CONVERT_VECTOR_(to, from)                          \
-	((to) = (__extension__({                                 \
-		 __typeof__(from) from_ = (from);                \
-		 ((void)from_);                                  \
-		 __builtin_convertvector(from_, __typeof__(to)); \
-	 })))
-#else
-#define SIMDE_CONVERT_VECTOR_(to, from) \
-	((to) = __builtin_convertvector((from), __typeof__(to)))
-#endif
-#endif
-#endif
-#endif
-
-/* Since we currently require SUBSCRIPT before using a vector in a
-   union, we define these as dependencies of SUBSCRIPT.  They are
-   likely to disappear in the future, once SIMDe learns how to make
-   use of vectors without using the union members.  Do not use them
-   in your code unless you're okay with it breaking when SIMDe
-   changes. */
-#if defined(SIMDE_VECTOR_SUBSCRIPT)
-#if defined(SIMDE_VECTOR_OPS)
-#define SIMDE_VECTOR_SUBSCRIPT_OPS
-#endif
-#if defined(SIMDE_VECTOR_SCALAR)
-#define SIMDE_VECTOR_SUBSCRIPT_SCALAR
-#endif
-#endif
-
-#if !defined(SIMDE_ENABLE_OPENMP) &&                   \
-	((defined(_OPENMP) && (_OPENMP >= 201307L)) || \
-	 (defined(_OPENMP_SIMD) && (_OPENMP_SIMD >= 201307L)))
-#define SIMDE_ENABLE_OPENMP
-#endif
-
-#if !defined(SIMDE_ENABLE_CILKPLUS) && \
-	(defined(__cilk) || defined(HEDLEY_INTEL_VERSION))
-#define SIMDE_ENABLE_CILKPLUS
-#endif
-
-#if defined(SIMDE_ENABLE_OPENMP)
-#define SIMDE_VECTORIZE HEDLEY_PRAGMA(omp simd)
-#define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(omp simd safelen(l))
-#if defined(__clang__)
-#define SIMDE_VECTORIZE_REDUCTION(r)                              \
-	HEDLEY_DIAGNOSTIC_PUSH                                    \
-	_Pragma("clang diagnostic ignored \"-Wsign-conversion\"") \
-		HEDLEY_PRAGMA(omp simd reduction(r)) HEDLEY_DIAGNOSTIC_POP
-#else
-#define SIMDE_VECTORIZE_REDUCTION(r) HEDLEY_PRAGMA(omp simd reduction(r))
-#endif
-#define SIMDE_VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(omp simd aligned(a))
-#elif defined(SIMDE_ENABLE_CILKPLUS)
-#define SIMDE_VECTORIZE HEDLEY_PRAGMA(simd)
-#define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(simd vectorlength(l))
-#define SIMDE_VECTORIZE_REDUCTION(r) HEDLEY_PRAGMA(simd reduction(r))
-#define SIMDE_VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(simd aligned(a))
-#elif defined(__clang__) && !defined(HEDLEY_IBM_VERSION)
-#define SIMDE_VECTORIZE HEDLEY_PRAGMA(clang loop vectorize(enable))
-#define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(clang loop vectorize_width(l))
-#define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE
-#define SIMDE_VECTORIZE_ALIGNED(a)
-#elif HEDLEY_GCC_VERSION_CHECK(4, 9, 0)
-#define SIMDE_VECTORIZE HEDLEY_PRAGMA(GCC ivdep)
-#define SIMDE_VECTORIZE_SAFELEN(l) SIMDE_VECTORIZE
-#define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE
-#define SIMDE_VECTORIZE_ALIGNED(a)
-#elif HEDLEY_CRAY_VERSION_CHECK(5, 0, 0)
-#define SIMDE_VECTORIZE HEDLEY_PRAGMA(_CRI ivdep)
-#define SIMDE_VECTORIZE_SAFELEN(l) SIMDE_VECTORIZE
-#define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE
-#define SIMDE_VECTORIZE_ALIGNED(a)
-#else
-#define SIMDE_VECTORIZE
-#define SIMDE_VECTORIZE_SAFELEN(l)
-#define SIMDE_VECTORIZE_REDUCTION(r)
-#define SIMDE_VECTORIZE_ALIGNED(a)
-#endif
-
-#define SIMDE_MASK_NZ_(v, mask) (((v) & (mask)) | !((v) & (mask)))
-
-/* Intended for checking coverage, you should never use this in
-   production. */
-#if defined(SIMDE_NO_INLINE)
-#define SIMDE_FUNCTION_ATTRIBUTES HEDLEY_NEVER_INLINE static
-#else
-#define SIMDE_FUNCTION_ATTRIBUTES HEDLEY_ALWAYS_INLINE static
-#endif
-
-#if HEDLEY_HAS_ATTRIBUTE(unused) || HEDLEY_GCC_VERSION_CHECK(2, 95, 0)
-#define SIMDE_FUNCTION_POSSIBLY_UNUSED_ __attribute__((__unused__))
-#else
-#define SIMDE_FUNCTION_POSSIBLY_UNUSED_
-#endif
-
-#if HEDLEY_HAS_WARNING("-Wused-but-marked-unused")
-#define SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED \
-	_Pragma("clang diagnostic ignored \"-Wused-but-marked-unused\"")
-#else
-#define SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED
-#endif
-
-#if defined(_MSC_VER)
-#define SIMDE_BEGIN_DECLS_                                            \
-	HEDLEY_DIAGNOSTIC_PUSH __pragma(warning(disable : 4996 4204)) \
-		HEDLEY_BEGIN_C_DECLS
-#define SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP HEDLEY_END_C_DECLS
-#else
-#define SIMDE_BEGIN_DECLS_                              \
-	HEDLEY_DIAGNOSTIC_PUSH                          \
-	SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED \
-	HEDLEY_BEGIN_C_DECLS
-#define SIMDE_END_DECLS_   \
-	HEDLEY_END_C_DECLS \
-	HEDLEY_DIAGNOSTIC_POP
-#endif
-
-#if defined(__SIZEOF_INT128__)
-#define SIMDE_HAVE_INT128_
-HEDLEY_DIAGNOSTIC_PUSH
-SIMDE_DIAGNOSTIC_DISABLE_PEDANTIC_
-typedef __int128 simde_int128;
-typedef unsigned __int128 simde_uint128;
-HEDLEY_DIAGNOSTIC_POP
-#endif
-
-#if !defined(SIMDE_ENDIAN_LITTLE)
-#define SIMDE_ENDIAN_LITTLE 1234
-#endif
-#if !defined(SIMDE_ENDIAN_BIG)
-#define SIMDE_ENDIAN_BIG 4321
-#endif
-
-#if !defined(SIMDE_ENDIAN_ORDER)
-/* GCC (and compilers masquerading as GCC) define  __BYTE_ORDER__. */
-#if defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && \
-	(__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
-#define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE
-#elif defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) && \
-	(__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
-#define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG
-/* TI defines _BIG_ENDIAN or _LITTLE_ENDIAN */
-#elif defined(_BIG_ENDIAN)
-#define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG
-#elif defined(_LITTLE_ENDIAN)
-#define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE
-/* We know the endianness of some common architectures.  Common
- * architectures not listed (ARM, POWER, MIPS, etc.) here are
- * bi-endian. */
-#elif defined(__amd64) || defined(_M_X64) || defined(__i386) || defined(_M_IX86)
-#define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE
-#elif defined(__s390x__) || defined(__zarch__)
-#define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG
-/* Looks like we'll have to rely on the platform.  If we're missing a
- * platform, please let us know. */
-#elif defined(_WIN32)
-#define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE
-#elif defined(sun) || defined(__sun) /* Solaris */
-#include <sys/byteorder.h>
-#if defined(_LITTLE_ENDIAN)
-#define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE
-#elif defined(_BIG_ENDIAN)
-#define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG
-#endif
-#elif defined(__APPLE__)
-#include <libkern/OSByteOrder.h>
-#if defined(__LITTLE_ENDIAN__)
-#define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE
-#elif defined(__BIG_ENDIAN__)
-#define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG
-#endif
-#elif defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || \
-	defined(__bsdi__) || defined(__DragonFly__) || defined(BSD)
-#include <machine/endian.h>
-#if defined(__BYTE_ORDER) && (__BYTE_ORDER == __LITTLE_ENDIAN)
-#define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE
-#elif defined(__BYTE_ORDER) && (__BYTE_ORDER == __BIG_ENDIAN)
-#define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG
-#endif
-#elif defined(__linux__) || defined(__linux) || defined(__gnu_linux__)
-#include <endian.h>
-#if defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && \
-	(__BYTE_ORDER == __LITTLE_ENDIAN)
-#define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE
-#elif defined(__BYTE_ORDER) && defined(__BIG_ENDIAN) && \
-	(__BYTE_ORDER == __BIG_ENDIAN)
-#define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG
-#endif
-#endif
-#endif
-
-#if HEDLEY_HAS_BUILTIN(__builtin_bswap64) ||  \
-	HEDLEY_GCC_VERSION_CHECK(4, 3, 0) ||  \
-	HEDLEY_IBM_VERSION_CHECK(13, 1, 0) || \
-	HEDLEY_INTEL_VERSION_CHECK(13, 0, 0)
-#define simde_bswap64(v) __builtin_bswap64(v)
-#elif HEDLEY_MSVC_VERSION_CHECK(13, 10, 0)
-#define simde_bswap64(v) _byteswap_uint64(v)
-#else
-SIMDE_FUNCTION_ATTRIBUTES
-uint64_t simde_bswap64(uint64_t v)
-{
-	return ((v & (((uint64_t)0xff) << 56)) >> 56) |
-	       ((v & (((uint64_t)0xff) << 48)) >> 40) |
-	       ((v & (((uint64_t)0xff) << 40)) >> 24) |
-	       ((v & (((uint64_t)0xff) << 32)) >> 8) |
-	       ((v & (((uint64_t)0xff) << 24)) << 8) |
-	       ((v & (((uint64_t)0xff) << 16)) << 24) |
-	       ((v & (((uint64_t)0xff) << 8)) << 40) |
-	       ((v & (((uint64_t)0xff))) << 56);
-}
-#endif
-
-#if !defined(SIMDE_ENDIAN_ORDER)
-#error Unknown byte order; please file a bug
-#else
-#if SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE
-#define simde_endian_bswap64_be(value) simde_bswap64(value)
-#define simde_endian_bswap64_le(value) (value)
-#elif SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG
-#define simde_endian_bswap64_be(value) (value)
-#define simde_endian_bswap64_le(value) simde_bswap64(value)
-#endif
-#endif
-
-/* TODO: we should at least make an attempt to detect the correct
-   types for simde_float32/float64 instead of just assuming float and
-   double. */
-
-#if !defined(SIMDE_FLOAT32_TYPE)
-#define SIMDE_FLOAT32_TYPE float
-#define SIMDE_FLOAT32_C(value) value##f
-#else
-#define SIMDE_FLOAT32_C(value) ((SIMDE_FLOAT32_TYPE)value)
-#endif
-typedef SIMDE_FLOAT32_TYPE simde_float32;
-
-#if !defined(SIMDE_FLOAT64_TYPE)
-#define SIMDE_FLOAT64_TYPE double
-#define SIMDE_FLOAT64_C(value) value
-#else
-#define SIMDE_FLOAT32_C(value) ((SIMDE_FLOAT64_TYPE)value)
-#endif
-typedef SIMDE_FLOAT64_TYPE simde_float64;
-
-#if HEDLEY_HAS_WARNING("-Wbad-function-cast")
-#define SIMDE_CONVERT_FTOI(T, v)                                    \
-	HEDLEY_DIAGNOSTIC_PUSH                                      \
-	_Pragma("clang diagnostic ignored \"-Wbad-function-cast\"") \
-		HEDLEY_STATIC_CAST(T, (v)) HEDLEY_DIAGNOSTIC_POP
-#else
-#define SIMDE_CONVERT_FTOI(T, v) ((T)(v))
-#endif
-
-/* TODO: detect compilers which support this outside of C11 mode */
-#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)
-#define SIMDE_CHECKED_REINTERPRET_CAST(to, from, value) \
-	_Generic((value), to                            \
-		 : (value), default                     \
-		 : (_Generic((value), from              \
-			     : ((to)(value)))))
-#define SIMDE_CHECKED_STATIC_CAST(to, from, value) \
-	_Generic((value), to                       \
-		 : (value), default                \
-		 : (_Generic((value), from         \
-			     : ((to)(value)))))
-#else
-#define SIMDE_CHECKED_REINTERPRET_CAST(to, from, value) \
-	HEDLEY_REINTERPRET_CAST(to, value)
-#define SIMDE_CHECKED_STATIC_CAST(to, from, value) HEDLEY_STATIC_CAST(to, value)
-#endif
-
-#if HEDLEY_HAS_WARNING("-Wfloat-equal")
-#define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL \
-	_Pragma("clang diagnostic ignored \"-Wfloat-equal\"")
-#elif HEDLEY_GCC_VERSION_CHECK(3, 0, 0)
-#define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL \
-	_Pragma("GCC diagnostic ignored \"-Wfloat-equal\"")
-#else
-#define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL
-#endif
-
-/* Some functions can trade accuracy for speed.  For those functions
-   you can control the trade-off using this macro.  Possible values:
-
-   0: prefer speed
-   1: reasonable trade-offs
-   2: prefer accuracy */
-#if !defined(SIMDE_ACCURACY_PREFERENCE)
-#define SIMDE_ACCURACY_PREFERENCE 1
-#endif
-
-#if defined(__STDC_HOSTED__)
-#define SIMDE_STDC_HOSTED __STDC_HOSTED__
-#else
-#if defined(HEDLEY_PGI_VERSION) || defined(HEDLEY_MSVC_VERSION)
-#define SIMDE_STDC_HOSTED 1
-#else
-#define SIMDE_STDC_HOSTED 0
-#endif
-#endif
-
-/* Try to deal with environments without a standard library. */
-#if !defined(simde_memcpy)
-#if HEDLEY_HAS_BUILTIN(__builtin_memcpy)
-#define simde_memcpy(dest, src, n) __builtin_memcpy(dest, src, n)
-#endif
-#endif
-#if !defined(simde_memset)
-#if HEDLEY_HAS_BUILTIN(__builtin_memset)
-#define simde_memset(s, c, n) __builtin_memset(s, c, n)
-#endif
-#endif
-#if !defined(simde_memcmp)
-#if HEDLEY_HAS_BUILTIN(__builtin_memcmp)
-#define simde_memcmp(s1, s2, n) __builtin_memcmp(s1, s2, n)
-#endif
-#endif
-
-#if !defined(simde_memcpy) || !defined(simde_memset) || !defined(simde_memcmp)
-#if !defined(SIMDE_NO_STRING_H)
-#if defined(__has_include)
-#if !__has_include(<string.h>)
-#define SIMDE_NO_STRING_H
-#endif
-#elif (SIMDE_STDC_HOSTED == 0)
-#define SIMDE_NO_STRING_H
-#endif
-#endif
-
-#if !defined(SIMDE_NO_STRING_H)
-#include <string.h>
-#if !defined(simde_memcpy)
-#define simde_memcpy(dest, src, n) memcpy(dest, src, n)
-#endif
-#if !defined(simde_memset)
-#define simde_memset(s, c, n) memset(s, c, n)
-#endif
-#if !defined(simde_memcmp)
-#define simde_memcmp(s1, s2, n) memcmp(s1, s2, n)
-#endif
-#else
-/* These are meant to be portable, not fast.  If you're hitting them you
-     * should think about providing your own (by defining the simde_memcpy
-     * macro prior to including any SIMDe files) or submitting a patch to
-     * SIMDe so we can detect your system-provided memcpy/memset, like by
-     * adding your compiler to the checks for __builtin_memcpy and/or
-     * __builtin_memset. */
-#if !defined(simde_memcpy)
-SIMDE_FUNCTION_ATTRIBUTES
-void simde_memcpy_(void *dest, const void *src, size_t len)
-{
-	char *dest_ = HEDLEY_STATIC_CAST(char *, dest);
-	char *src_ = HEDLEY_STATIC_CAST(const char *, src);
-	for (size_t i = 0; i < len; i++) {
-		dest_[i] = src_[i];
-	}
-}
-#define simde_memcpy(dest, src, n) simde_memcpy_(dest, src, n)
-#endif
-
-#if !defined(simde_memset)
-SIMDE_FUNCTION_ATTRIBUTES
-void simde_memset_(void *s, int c, size_t len)
-{
-	char *s_ = HEDLEY_STATIC_CAST(char *, s);
-	char c_ = HEDLEY_STATIC_CAST(char, c);
-	for (size_t i = 0; i < len; i++) {
-		s_[i] = c_[i];
-	}
-}
-#define simde_memset(s, c, n) simde_memset_(s, c, n)
-#endif
-
-#if !defined(simde_memcmp)
-SIMDE_FUCTION_ATTRIBUTES
-int simde_memcmp_(const void *s1, const void *s2, size_t n)
-{
-	unsigned char *s1_ = HEDLEY_STATIC_CAST(unsigned char *, s1);
-	unsigned char *s2_ = HEDLEY_STATIC_CAST(unsigned char *, s2);
-	for (size_t i = 0; i < len; i++) {
-		if (s1_[i] != s2_[i]) {
-			return (int)(s1_[i] - s2_[i]);
-		}
-	}
-	return 0;
-}
-#define simde_memcmp(s1, s2, n) simde_memcmp_(s1, s2, n)
-#endif
-#endif
-#endif
-
-#if defined(FE_ALL_EXCEPT)
-#define SIMDE_HAVE_FENV_H
-#elif defined(__has_include)
-#if __has_include(<fenv.h>)
-#include <fenv.h>
-#define SIMDE_HAVE_FENV_H
-#endif
-#elif SIMDE_STDC_HOSTED == 1
-#include <fenv.h>
-#define SIMDE_HAVE_FENV_H
-#endif
-
-#if defined(EXIT_FAILURE)
-#define SIMDE_HAVE_STDLIB_H
-#elif defined(__has_include)
-#if __has_include(<stdlib.h>)
-#include <stdlib.h>
-#define SIMDE_HAVE_STDLIB_H
-#endif
-#elif SIMDE_STDC_HOSTED == 1
-#include <stdlib.h>
-#define SIMDE_HAVE_STDLIB_H
-#endif
-
-#if defined(__has_include)
-#if defined(__cplusplus) && (__cplusplus >= 201103L) && __has_include(<cfenv>)
-#include <cfenv>
-#elif __has_include(<fenv.h>)
-#include <fenv.h>
-#endif
-#if __has_include(<stdlib.h>)
-#include <stdlib.h>
-#endif
-#elif SIMDE_STDC_HOSTED == 1
-#include <stdlib.h>
-#include <fenv.h>
-#endif
-
-#include "check.h"
-
-/* GCC/clang have a bunch of functionality in builtins which we would
- * like to access, but the suffixes indicate whether the operate on
- * int, long, or long long, not fixed width types (e.g., int32_t).
- * we use these macros to attempt to map from fixed-width to the
- * names GCC uses.  Note that you should still cast the input(s) and
- * return values (to/from SIMDE_BUILTIN_TYPE_*_) since often even if
- * types are the same size they may not be compatible according to the
- * compiler.  For example, on x86 long and long lonsg are generally
- * both 64 bits, but platforms vary on whether an int64_t is mapped
- * to a long or long long. */
-
-#include <limits.h>
-
-HEDLEY_DIAGNOSTIC_PUSH
-SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_
-
-#if (INT8_MAX == INT_MAX) && (INT8_MIN == INT_MIN)
-#define SIMDE_BUILTIN_SUFFIX_8_
-#define SIMDE_BUILTIN_TYPE_8_ int
-#elif (INT8_MAX == LONG_MAX) && (INT8_MIN == LONG_MIN)
-#define SIMDE_BUILTIN_SUFFIX_8_ l
-#define SIMDE_BUILTIN_TYPE_8_ long
-#elif (INT8_MAX == LLONG_MAX) && (INT8_MIN == LLONG_MIN)
-#define SIMDE_BUILTIN_SUFFIX_8_ ll
-#define SIMDE_BUILTIN_TYPE_8_ long long
-#endif
-
-#if (INT16_MAX == INT_MAX) && (INT16_MIN == INT_MIN)
-#define SIMDE_BUILTIN_SUFFIX_16_
-#define SIMDE_BUILTIN_TYPE_16_ int
-#elif (INT16_MAX == LONG_MAX) && (INT16_MIN == LONG_MIN)
-#define SIMDE_BUILTIN_SUFFIX_16_ l
-#define SIMDE_BUILTIN_TYPE_16_ long
-#elif (INT16_MAX == LLONG_MAX) && (INT16_MIN == LLONG_MIN)
-#define SIMDE_BUILTIN_SUFFIX_16_ ll
-#define SIMDE_BUILTIN_TYPE_16_ long long
-#endif
-
-#if (INT32_MAX == INT_MAX) && (INT32_MIN == INT_MIN)
-#define SIMDE_BUILTIN_SUFFIX_32_
-#define SIMDE_BUILTIN_TYPE_32_ int
-#elif (INT32_MAX == LONG_MAX) && (INT32_MIN == LONG_MIN)
-#define SIMDE_BUILTIN_SUFFIX_32_ l
-#define SIMDE_BUILTIN_TYPE_32_ long
-#elif (INT32_MAX == LLONG_MAX) && (INT32_MIN == LLONG_MIN)
-#define SIMDE_BUILTIN_SUFFIX_32_ ll
-#define SIMDE_BUILTIN_TYPE_32_ long long
-#endif
-
-#if (INT64_MAX == INT_MAX) && (INT64_MIN == INT_MIN)
-#define SIMDE_BUILTIN_SUFFIX_64_
-#define SIMDE_BUILTIN_TYPE_64_ int
-#elif (INT64_MAX == LONG_MAX) && (INT64_MIN == LONG_MIN)
-#define SIMDE_BUILTIN_SUFFIX_64_ l
-#define SIMDE_BUILTIN_TYPE_64_ long
-#elif (INT64_MAX == LLONG_MAX) && (INT64_MIN == LLONG_MIN)
-#define SIMDE_BUILTIN_SUFFIX_64_ ll
-#define SIMDE_BUILTIN_TYPE_64_ long long
-#endif
-
-#if defined(SIMDE_BUILTIN_SUFFIX_8_)
-#define SIMDE_BUILTIN_8_(name) \
-	HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_8_)
-#define SIMDE_BUILTIN_HAS_8_(name) \
-	HEDLEY_HAS_BUILTIN(        \
-		HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_8_))
-#else
-#define SIMDE_BUILTIN_HAS_8_(name) 0
-#endif
-#if defined(SIMDE_BUILTIN_SUFFIX_16_)
-#define SIMDE_BUILTIN_16_(name) \
-	HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_16_)
-#define SIMDE_BUILTIN_HAS_16_(name) \
-	HEDLEY_HAS_BUILTIN(         \
-		HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_16_))
-#else
-#define SIMDE_BUILTIN_HAS_16_(name) 0
-#endif
-#if defined(SIMDE_BUILTIN_SUFFIX_32_)
-#define SIMDE_BUILTIN_32_(name) \
-	HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_32_)
-#define SIMDE_BUILTIN_HAS_32_(name) \
-	HEDLEY_HAS_BUILTIN(         \
-		HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_32_))
-#else
-#define SIMDE_BUILTIN_HAS_32_(name) 0
-#endif
-#if defined(SIMDE_BUILTIN_SUFFIX_64_)
-#define SIMDE_BUILTIN_64_(name) \
-	HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_64_)
-#define SIMDE_BUILTIN_HAS_64_(name) \
-	HEDLEY_HAS_BUILTIN(         \
-		HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_64_))
-#else
-#define SIMDE_BUILTIN_HAS_64_(name) 0
-#endif
-
-HEDLEY_DIAGNOSTIC_POP
-
-/* Sometimes we run into problems with specific versions of compilers
-   which make the native versions unusable for us.  Often this is due
-   to missing functions, sometimes buggy implementations, etc.  These
-   macros are how we check for specific bugs.  As they are fixed we'll
-   start only defining them for problematic compiler versions. */
-
-#if !defined(SIMDE_IGNORE_COMPILER_BUGS)
-#if defined(HEDLEY_GCC_VERSION)
-#if !HEDLEY_GCC_VERSION_CHECK(4, 9, 0)
-#define SIMDE_BUG_GCC_REV_208793
-#endif
-#if !HEDLEY_GCC_VERSION_CHECK(5, 0, 0)
-#define SIMDE_BUG_GCC_BAD_MM_SRA_EPI32 /* TODO: find relevant bug or commit */
-#endif
-#if !HEDLEY_GCC_VERSION_CHECK(4, 6, 0)
-#define SIMDE_BUG_GCC_BAD_MM_EXTRACT_EPI8 /* TODO: find relevant bug or commit */
-#endif
-#if !HEDLEY_GCC_VERSION_CHECK(8, 0, 0)
-#define SIMDE_BUG_GCC_REV_247851
-#endif
-#if !HEDLEY_GCC_VERSION_CHECK(10, 0, 0)
-#define SIMDE_BUG_GCC_REV_274313
-#define SIMDE_BUG_GCC_91341
-#endif
-#if !HEDLEY_GCC_VERSION_CHECK(9, 0, 0) && defined(SIMDE_ARCH_AARCH64)
-#define SIMDE_BUG_GCC_ARM_SHIFT_SCALAR
-#endif
-#if defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64)
-#define SIMDE_BUG_GCC_94482
-#endif
-#if (defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64)) || \
-	defined(SIMDE_ARCH_SYSTEMZ)
-#define SIMDE_BUG_GCC_53784
-#endif
-#if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64)
-#if HEDLEY_GCC_VERSION_CHECK(4, 3, 0) /* -Wsign-conversion */
-#define SIMDE_BUG_GCC_95144
-#endif
-#endif
-#if !HEDLEY_GCC_VERSION_CHECK(9, 4, 0) && defined(SIMDE_ARCH_AARCH64)
-#define SIMDE_BUG_GCC_94488
-#endif
-#if defined(SIMDE_ARCH_ARM)
-#define SIMDE_BUG_GCC_95399
-#define SIMDE_BUG_GCC_95471
-#elif defined(SIMDE_ARCH_POWER)
-#define SIMDE_BUG_GCC_95227
-#define SIMDE_BUG_GCC_95782
-#elif defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64)
-#if !HEDLEY_GCC_VERSION_CHECK(10, 2, 0) && !defined(__OPTIMIZE__)
-#define SIMDE_BUG_GCC_96174
-#endif
-#endif
-#define SIMDE_BUG_GCC_95399
-#elif defined(__clang__)
-#if defined(SIMDE_ARCH_AARCH64)
-#define SIMDE_BUG_CLANG_45541
-#define SIMDE_BUG_CLANG_46844
-#define SIMDE_BUG_CLANG_48257
-#if SIMDE_DETECT_CLANG_VERSION_CHECK(10, 0, 0) && \
-	SIMDE_DETECT_CLANG_VERSION_NOT(11, 0, 0)
-#define SIMDE_BUG_CLANG_BAD_VI64_OPS
-#endif
-#endif
-#if defined(SIMDE_ARCH_POWER)
-#define SIMDE_BUG_CLANG_46770
-#endif
-#if defined(_ARCH_PWR9) && !SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0) && \
-	!defined(__OPTIMIZE__)
-#define SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT
-#endif
-#if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64)
-#if HEDLEY_HAS_WARNING("-Wsign-conversion") && \
-	SIMDE_DETECT_CLANG_VERSION_NOT(11, 0, 0)
-#define SIMDE_BUG_CLANG_45931
-#endif
-#if HEDLEY_HAS_WARNING("-Wvector-conversion") && \
-	SIMDE_DETECT_CLANG_VERSION_NOT(11, 0, 0)
-#define SIMDE_BUG_CLANG_44589
-#endif
-#endif
-#define SIMDE_BUG_CLANG_45959
-#elif defined(HEDLEY_MSVC_VERSION)
-#if defined(SIMDE_ARCH_X86)
-#define SIMDE_BUG_MSVC_ROUND_EXTRACT
-#endif
-#elif defined(HEDLEY_INTEL_VERSION)
-#define SIMDE_BUG_INTEL_857088
-#endif
-#endif
-
-/* GCC and Clang both have the same issue:
- * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=95144
- * https://bugs.llvm.org/show_bug.cgi?id=45931
- * This is just an easy way to work around it.
- */
-#if (HEDLEY_HAS_WARNING("-Wsign-conversion") &&   \
-     SIMDE_DETECT_CLANG_VERSION_NOT(11, 0, 0)) || \
-	HEDLEY_GCC_VERSION_CHECK(4, 3, 0)
-#define SIMDE_BUG_IGNORE_SIGN_CONVERSION(expr)                                           \
-	(__extension__({                                                                 \
-		HEDLEY_DIAGNOSTIC_PUSH                                                   \
-		HEDLEY_DIAGNOSTIC_POP                                                    \
-		_Pragma("GCC diagnostic ignored \"-Wsign-conversion\"") __typeof__(expr) \
-			simde_bug_ignore_sign_conversion_v_ = (expr);                    \
-		HEDLEY_DIAGNOSTIC_PUSH                                                   \
-		simde_bug_ignore_sign_conversion_v_;                                     \
-	}))
-#else
-#define SIMDE_BUG_IGNORE_SIGN_CONVERSION(expr) (expr)
-#endif
-
-#endif /* !defined(SIMDE_COMMON_H) */

+ 0 - 925
libobs/util/simde/simde-constify.h

@@ -1,925 +0,0 @@
-/* SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use, copy,
- * modify, merge, publish, distribute, sublicense, and/or sell copies
- * of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- * Copyright:
- *   2020      Evan Nemerson <[email protected]>
- */
-
-/* Constify macros.  For internal use only.
- *
- * These are used to make it possible to call a function which takes
- * an Integer Constant Expression (ICE) using a compile time constant.
- * Technically it would also be possible to use a value not trivially
- * known by the compiler, but there would be a siginficant performance
- * hit (a switch switch is used).
- *
- * The basic idea is pretty simple; we just emit a do while loop which
- * contains a switch with a case for every possible value of the
- * constant.
- *
- * As long as the value you pass to the function in constant, pretty
- * much any copmiler shouldn't have a problem generating exactly the
- * same code as if you had used an ICE.
- *
- * This is intended to be used in the SIMDe implementations of
- * functions the compilers require to be an ICE, but the other benefit
- * is that if we also disable the warnings from
- * SIMDE_REQUIRE_CONSTANT_RANGE we can actually just allow the tests
- * to use non-ICE parameters
- */
-
-#if !defined(SIMDE_CONSTIFY_H)
-#define SIMDE_CONSTIFY_H
-
-#include "simde-diagnostic.h"
-
-HEDLEY_DIAGNOSTIC_PUSH
-SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_
-SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_
-
-#define SIMDE_CONSTIFY_2_(func_name, result, default_case, imm, ...) \
-	do {                                                         \
-		switch (imm) {                                       \
-		case 0:                                              \
-			result = func_name(__VA_ARGS__, 0);          \
-			break;                                       \
-		case 1:                                              \
-			result = func_name(__VA_ARGS__, 1);          \
-			break;                                       \
-		default:                                             \
-			result = default_case;                       \
-			break;                                       \
-		}                                                    \
-	} while (0)
-
-#define SIMDE_CONSTIFY_4_(func_name, result, default_case, imm, ...) \
-	do {                                                         \
-		switch (imm) {                                       \
-		case 0:                                              \
-			result = func_name(__VA_ARGS__, 0);          \
-			break;                                       \
-		case 1:                                              \
-			result = func_name(__VA_ARGS__, 1);          \
-			break;                                       \
-		case 2:                                              \
-			result = func_name(__VA_ARGS__, 2);          \
-			break;                                       \
-		case 3:                                              \
-			result = func_name(__VA_ARGS__, 3);          \
-			break;                                       \
-		default:                                             \
-			result = default_case;                       \
-			break;                                       \
-		}                                                    \
-	} while (0)
-
-#define SIMDE_CONSTIFY_8_(func_name, result, default_case, imm, ...) \
-	do {                                                         \
-		switch (imm) {                                       \
-		case 0:                                              \
-			result = func_name(__VA_ARGS__, 0);          \
-			break;                                       \
-		case 1:                                              \
-			result = func_name(__VA_ARGS__, 1);          \
-			break;                                       \
-		case 2:                                              \
-			result = func_name(__VA_ARGS__, 2);          \
-			break;                                       \
-		case 3:                                              \
-			result = func_name(__VA_ARGS__, 3);          \
-			break;                                       \
-		case 4:                                              \
-			result = func_name(__VA_ARGS__, 4);          \
-			break;                                       \
-		case 5:                                              \
-			result = func_name(__VA_ARGS__, 5);          \
-			break;                                       \
-		case 6:                                              \
-			result = func_name(__VA_ARGS__, 6);          \
-			break;                                       \
-		case 7:                                              \
-			result = func_name(__VA_ARGS__, 7);          \
-			break;                                       \
-		default:                                             \
-			result = default_case;                       \
-			break;                                       \
-		}                                                    \
-	} while (0)
-
-#define SIMDE_CONSTIFY_16_(func_name, result, default_case, imm, ...) \
-	do {                                                          \
-		switch (imm) {                                        \
-		case 0:                                               \
-			result = func_name(__VA_ARGS__, 0);           \
-			break;                                        \
-		case 1:                                               \
-			result = func_name(__VA_ARGS__, 1);           \
-			break;                                        \
-		case 2:                                               \
-			result = func_name(__VA_ARGS__, 2);           \
-			break;                                        \
-		case 3:                                               \
-			result = func_name(__VA_ARGS__, 3);           \
-			break;                                        \
-		case 4:                                               \
-			result = func_name(__VA_ARGS__, 4);           \
-			break;                                        \
-		case 5:                                               \
-			result = func_name(__VA_ARGS__, 5);           \
-			break;                                        \
-		case 6:                                               \
-			result = func_name(__VA_ARGS__, 6);           \
-			break;                                        \
-		case 7:                                               \
-			result = func_name(__VA_ARGS__, 7);           \
-			break;                                        \
-		case 8:                                               \
-			result = func_name(__VA_ARGS__, 8);           \
-			break;                                        \
-		case 9:                                               \
-			result = func_name(__VA_ARGS__, 9);           \
-			break;                                        \
-		case 10:                                              \
-			result = func_name(__VA_ARGS__, 10);          \
-			break;                                        \
-		case 11:                                              \
-			result = func_name(__VA_ARGS__, 11);          \
-			break;                                        \
-		case 12:                                              \
-			result = func_name(__VA_ARGS__, 12);          \
-			break;                                        \
-		case 13:                                              \
-			result = func_name(__VA_ARGS__, 13);          \
-			break;                                        \
-		case 14:                                              \
-			result = func_name(__VA_ARGS__, 14);          \
-			break;                                        \
-		case 15:                                              \
-			result = func_name(__VA_ARGS__, 15);          \
-			break;                                        \
-		default:                                              \
-			result = default_case;                        \
-			break;                                        \
-		}                                                     \
-	} while (0)
-
-#define SIMDE_CONSTIFY_32_(func_name, result, default_case, imm, ...) \
-	do {                                                          \
-		switch (imm) {                                        \
-		case 0:                                               \
-			result = func_name(__VA_ARGS__, 0);           \
-			break;                                        \
-		case 1:                                               \
-			result = func_name(__VA_ARGS__, 1);           \
-			break;                                        \
-		case 2:                                               \
-			result = func_name(__VA_ARGS__, 2);           \
-			break;                                        \
-		case 3:                                               \
-			result = func_name(__VA_ARGS__, 3);           \
-			break;                                        \
-		case 4:                                               \
-			result = func_name(__VA_ARGS__, 4);           \
-			break;                                        \
-		case 5:                                               \
-			result = func_name(__VA_ARGS__, 5);           \
-			break;                                        \
-		case 6:                                               \
-			result = func_name(__VA_ARGS__, 6);           \
-			break;                                        \
-		case 7:                                               \
-			result = func_name(__VA_ARGS__, 7);           \
-			break;                                        \
-		case 8:                                               \
-			result = func_name(__VA_ARGS__, 8);           \
-			break;                                        \
-		case 9:                                               \
-			result = func_name(__VA_ARGS__, 9);           \
-			break;                                        \
-		case 10:                                              \
-			result = func_name(__VA_ARGS__, 10);          \
-			break;                                        \
-		case 11:                                              \
-			result = func_name(__VA_ARGS__, 11);          \
-			break;                                        \
-		case 12:                                              \
-			result = func_name(__VA_ARGS__, 12);          \
-			break;                                        \
-		case 13:                                              \
-			result = func_name(__VA_ARGS__, 13);          \
-			break;                                        \
-		case 14:                                              \
-			result = func_name(__VA_ARGS__, 14);          \
-			break;                                        \
-		case 15:                                              \
-			result = func_name(__VA_ARGS__, 15);          \
-			break;                                        \
-		case 16:                                              \
-			result = func_name(__VA_ARGS__, 16);          \
-			break;                                        \
-		case 17:                                              \
-			result = func_name(__VA_ARGS__, 17);          \
-			break;                                        \
-		case 18:                                              \
-			result = func_name(__VA_ARGS__, 18);          \
-			break;                                        \
-		case 19:                                              \
-			result = func_name(__VA_ARGS__, 19);          \
-			break;                                        \
-		case 20:                                              \
-			result = func_name(__VA_ARGS__, 20);          \
-			break;                                        \
-		case 21:                                              \
-			result = func_name(__VA_ARGS__, 21);          \
-			break;                                        \
-		case 22:                                              \
-			result = func_name(__VA_ARGS__, 22);          \
-			break;                                        \
-		case 23:                                              \
-			result = func_name(__VA_ARGS__, 23);          \
-			break;                                        \
-		case 24:                                              \
-			result = func_name(__VA_ARGS__, 24);          \
-			break;                                        \
-		case 25:                                              \
-			result = func_name(__VA_ARGS__, 25);          \
-			break;                                        \
-		case 26:                                              \
-			result = func_name(__VA_ARGS__, 26);          \
-			break;                                        \
-		case 27:                                              \
-			result = func_name(__VA_ARGS__, 27);          \
-			break;                                        \
-		case 28:                                              \
-			result = func_name(__VA_ARGS__, 28);          \
-			break;                                        \
-		case 29:                                              \
-			result = func_name(__VA_ARGS__, 29);          \
-			break;                                        \
-		case 30:                                              \
-			result = func_name(__VA_ARGS__, 30);          \
-			break;                                        \
-		case 31:                                              \
-			result = func_name(__VA_ARGS__, 31);          \
-			break;                                        \
-		default:                                              \
-			result = default_case;                        \
-			break;                                        \
-		}                                                     \
-	} while (0)
-
-#define SIMDE_CONSTIFY_64_(func_name, result, default_case, imm, ...) \
-	do {                                                          \
-		switch (imm) {                                        \
-		case 0:                                               \
-			result = func_name(__VA_ARGS__, 0);           \
-			break;                                        \
-		case 1:                                               \
-			result = func_name(__VA_ARGS__, 1);           \
-			break;                                        \
-		case 2:                                               \
-			result = func_name(__VA_ARGS__, 2);           \
-			break;                                        \
-		case 3:                                               \
-			result = func_name(__VA_ARGS__, 3);           \
-			break;                                        \
-		case 4:                                               \
-			result = func_name(__VA_ARGS__, 4);           \
-			break;                                        \
-		case 5:                                               \
-			result = func_name(__VA_ARGS__, 5);           \
-			break;                                        \
-		case 6:                                               \
-			result = func_name(__VA_ARGS__, 6);           \
-			break;                                        \
-		case 7:                                               \
-			result = func_name(__VA_ARGS__, 7);           \
-			break;                                        \
-		case 8:                                               \
-			result = func_name(__VA_ARGS__, 8);           \
-			break;                                        \
-		case 9:                                               \
-			result = func_name(__VA_ARGS__, 9);           \
-			break;                                        \
-		case 10:                                              \
-			result = func_name(__VA_ARGS__, 10);          \
-			break;                                        \
-		case 11:                                              \
-			result = func_name(__VA_ARGS__, 11);          \
-			break;                                        \
-		case 12:                                              \
-			result = func_name(__VA_ARGS__, 12);          \
-			break;                                        \
-		case 13:                                              \
-			result = func_name(__VA_ARGS__, 13);          \
-			break;                                        \
-		case 14:                                              \
-			result = func_name(__VA_ARGS__, 14);          \
-			break;                                        \
-		case 15:                                              \
-			result = func_name(__VA_ARGS__, 15);          \
-			break;                                        \
-		case 16:                                              \
-			result = func_name(__VA_ARGS__, 16);          \
-			break;                                        \
-		case 17:                                              \
-			result = func_name(__VA_ARGS__, 17);          \
-			break;                                        \
-		case 18:                                              \
-			result = func_name(__VA_ARGS__, 18);          \
-			break;                                        \
-		case 19:                                              \
-			result = func_name(__VA_ARGS__, 19);          \
-			break;                                        \
-		case 20:                                              \
-			result = func_name(__VA_ARGS__, 20);          \
-			break;                                        \
-		case 21:                                              \
-			result = func_name(__VA_ARGS__, 21);          \
-			break;                                        \
-		case 22:                                              \
-			result = func_name(__VA_ARGS__, 22);          \
-			break;                                        \
-		case 23:                                              \
-			result = func_name(__VA_ARGS__, 23);          \
-			break;                                        \
-		case 24:                                              \
-			result = func_name(__VA_ARGS__, 24);          \
-			break;                                        \
-		case 25:                                              \
-			result = func_name(__VA_ARGS__, 25);          \
-			break;                                        \
-		case 26:                                              \
-			result = func_name(__VA_ARGS__, 26);          \
-			break;                                        \
-		case 27:                                              \
-			result = func_name(__VA_ARGS__, 27);          \
-			break;                                        \
-		case 28:                                              \
-			result = func_name(__VA_ARGS__, 28);          \
-			break;                                        \
-		case 29:                                              \
-			result = func_name(__VA_ARGS__, 29);          \
-			break;                                        \
-		case 30:                                              \
-			result = func_name(__VA_ARGS__, 30);          \
-			break;                                        \
-		case 31:                                              \
-			result = func_name(__VA_ARGS__, 31);          \
-			break;                                        \
-		case 32:                                              \
-			result = func_name(__VA_ARGS__, 32);          \
-			break;                                        \
-		case 33:                                              \
-			result = func_name(__VA_ARGS__, 33);          \
-			break;                                        \
-		case 34:                                              \
-			result = func_name(__VA_ARGS__, 34);          \
-			break;                                        \
-		case 35:                                              \
-			result = func_name(__VA_ARGS__, 35);          \
-			break;                                        \
-		case 36:                                              \
-			result = func_name(__VA_ARGS__, 36);          \
-			break;                                        \
-		case 37:                                              \
-			result = func_name(__VA_ARGS__, 37);          \
-			break;                                        \
-		case 38:                                              \
-			result = func_name(__VA_ARGS__, 38);          \
-			break;                                        \
-		case 39:                                              \
-			result = func_name(__VA_ARGS__, 39);          \
-			break;                                        \
-		case 40:                                              \
-			result = func_name(__VA_ARGS__, 40);          \
-			break;                                        \
-		case 41:                                              \
-			result = func_name(__VA_ARGS__, 41);          \
-			break;                                        \
-		case 42:                                              \
-			result = func_name(__VA_ARGS__, 42);          \
-			break;                                        \
-		case 43:                                              \
-			result = func_name(__VA_ARGS__, 43);          \
-			break;                                        \
-		case 44:                                              \
-			result = func_name(__VA_ARGS__, 44);          \
-			break;                                        \
-		case 45:                                              \
-			result = func_name(__VA_ARGS__, 45);          \
-			break;                                        \
-		case 46:                                              \
-			result = func_name(__VA_ARGS__, 46);          \
-			break;                                        \
-		case 47:                                              \
-			result = func_name(__VA_ARGS__, 47);          \
-			break;                                        \
-		case 48:                                              \
-			result = func_name(__VA_ARGS__, 48);          \
-			break;                                        \
-		case 49:                                              \
-			result = func_name(__VA_ARGS__, 49);          \
-			break;                                        \
-		case 50:                                              \
-			result = func_name(__VA_ARGS__, 50);          \
-			break;                                        \
-		case 51:                                              \
-			result = func_name(__VA_ARGS__, 51);          \
-			break;                                        \
-		case 52:                                              \
-			result = func_name(__VA_ARGS__, 52);          \
-			break;                                        \
-		case 53:                                              \
-			result = func_name(__VA_ARGS__, 53);          \
-			break;                                        \
-		case 54:                                              \
-			result = func_name(__VA_ARGS__, 54);          \
-			break;                                        \
-		case 55:                                              \
-			result = func_name(__VA_ARGS__, 55);          \
-			break;                                        \
-		case 56:                                              \
-			result = func_name(__VA_ARGS__, 56);          \
-			break;                                        \
-		case 57:                                              \
-			result = func_name(__VA_ARGS__, 57);          \
-			break;                                        \
-		case 58:                                              \
-			result = func_name(__VA_ARGS__, 58);          \
-			break;                                        \
-		case 59:                                              \
-			result = func_name(__VA_ARGS__, 59);          \
-			break;                                        \
-		case 60:                                              \
-			result = func_name(__VA_ARGS__, 60);          \
-			break;                                        \
-		case 61:                                              \
-			result = func_name(__VA_ARGS__, 61);          \
-			break;                                        \
-		case 62:                                              \
-			result = func_name(__VA_ARGS__, 62);          \
-			break;                                        \
-		case 63:                                              \
-			result = func_name(__VA_ARGS__, 63);          \
-			break;                                        \
-		default:                                              \
-			result = default_case;                        \
-			break;                                        \
-		}                                                     \
-	} while (0)
-
-#define SIMDE_CONSTIFY_2_NO_RESULT_(func_name, default_case, imm, ...) \
-	do {                                                           \
-		switch (imm) {                                         \
-		case 0:                                                \
-			func_name(__VA_ARGS__, 0);                     \
-			break;                                         \
-		case 1:                                                \
-			func_name(__VA_ARGS__, 1);                     \
-			break;                                         \
-		default:                                               \
-			default_case;                                  \
-			break;                                         \
-		}                                                      \
-	} while (0)
-
-#define SIMDE_CONSTIFY_4_NO_RESULT_(func_name, default_case, imm, ...) \
-	do {                                                           \
-		switch (imm) {                                         \
-		case 0:                                                \
-			func_name(__VA_ARGS__, 0);                     \
-			break;                                         \
-		case 1:                                                \
-			func_name(__VA_ARGS__, 1);                     \
-			break;                                         \
-		case 2:                                                \
-			func_name(__VA_ARGS__, 2);                     \
-			break;                                         \
-		case 3:                                                \
-			func_name(__VA_ARGS__, 3);                     \
-			break;                                         \
-		default:                                               \
-			default_case;                                  \
-			break;                                         \
-		}                                                      \
-	} while (0)
-
-#define SIMDE_CONSTIFY_8_NO_RESULT_(func_name, default_case, imm, ...) \
-	do {                                                           \
-		switch (imm) {                                         \
-		case 0:                                                \
-			func_name(__VA_ARGS__, 0);                     \
-			break;                                         \
-		case 1:                                                \
-			func_name(__VA_ARGS__, 1);                     \
-			break;                                         \
-		case 2:                                                \
-			func_name(__VA_ARGS__, 2);                     \
-			break;                                         \
-		case 3:                                                \
-			func_name(__VA_ARGS__, 3);                     \
-			break;                                         \
-		case 4:                                                \
-			func_name(__VA_ARGS__, 4);                     \
-			break;                                         \
-		case 5:                                                \
-			func_name(__VA_ARGS__, 5);                     \
-			break;                                         \
-		case 6:                                                \
-			func_name(__VA_ARGS__, 6);                     \
-			break;                                         \
-		case 7:                                                \
-			func_name(__VA_ARGS__, 7);                     \
-			break;                                         \
-		default:                                               \
-			default_case;                                  \
-			break;                                         \
-		}                                                      \
-	} while (0)
-
-#define SIMDE_CONSTIFY_16_NO_RESULT_(func_name, default_case, imm, ...) \
-	do {                                                            \
-		switch (imm) {                                          \
-		case 0:                                                 \
-			func_name(__VA_ARGS__, 0);                      \
-			break;                                          \
-		case 1:                                                 \
-			func_name(__VA_ARGS__, 1);                      \
-			break;                                          \
-		case 2:                                                 \
-			func_name(__VA_ARGS__, 2);                      \
-			break;                                          \
-		case 3:                                                 \
-			func_name(__VA_ARGS__, 3);                      \
-			break;                                          \
-		case 4:                                                 \
-			func_name(__VA_ARGS__, 4);                      \
-			break;                                          \
-		case 5:                                                 \
-			func_name(__VA_ARGS__, 5);                      \
-			break;                                          \
-		case 6:                                                 \
-			func_name(__VA_ARGS__, 6);                      \
-			break;                                          \
-		case 7:                                                 \
-			func_name(__VA_ARGS__, 7);                      \
-			break;                                          \
-		case 8:                                                 \
-			func_name(__VA_ARGS__, 8);                      \
-			break;                                          \
-		case 9:                                                 \
-			func_name(__VA_ARGS__, 9);                      \
-			break;                                          \
-		case 10:                                                \
-			func_name(__VA_ARGS__, 10);                     \
-			break;                                          \
-		case 11:                                                \
-			func_name(__VA_ARGS__, 11);                     \
-			break;                                          \
-		case 12:                                                \
-			func_name(__VA_ARGS__, 12);                     \
-			break;                                          \
-		case 13:                                                \
-			func_name(__VA_ARGS__, 13);                     \
-			break;                                          \
-		case 14:                                                \
-			func_name(__VA_ARGS__, 14);                     \
-			break;                                          \
-		case 15:                                                \
-			func_name(__VA_ARGS__, 15);                     \
-			break;                                          \
-		default:                                                \
-			default_case;                                   \
-			break;                                          \
-		}                                                       \
-	} while (0)
-
-#define SIMDE_CONSTIFY_32_NO_RESULT_(func_name, default_case, imm, ...) \
-	do {                                                            \
-		switch (imm) {                                          \
-		case 0:                                                 \
-			func_name(__VA_ARGS__, 0);                      \
-			break;                                          \
-		case 1:                                                 \
-			func_name(__VA_ARGS__, 1);                      \
-			break;                                          \
-		case 2:                                                 \
-			func_name(__VA_ARGS__, 2);                      \
-			break;                                          \
-		case 3:                                                 \
-			func_name(__VA_ARGS__, 3);                      \
-			break;                                          \
-		case 4:                                                 \
-			func_name(__VA_ARGS__, 4);                      \
-			break;                                          \
-		case 5:                                                 \
-			func_name(__VA_ARGS__, 5);                      \
-			break;                                          \
-		case 6:                                                 \
-			func_name(__VA_ARGS__, 6);                      \
-			break;                                          \
-		case 7:                                                 \
-			func_name(__VA_ARGS__, 7);                      \
-			break;                                          \
-		case 8:                                                 \
-			func_name(__VA_ARGS__, 8);                      \
-			break;                                          \
-		case 9:                                                 \
-			func_name(__VA_ARGS__, 9);                      \
-			break;                                          \
-		case 10:                                                \
-			func_name(__VA_ARGS__, 10);                     \
-			break;                                          \
-		case 11:                                                \
-			func_name(__VA_ARGS__, 11);                     \
-			break;                                          \
-		case 12:                                                \
-			func_name(__VA_ARGS__, 12);                     \
-			break;                                          \
-		case 13:                                                \
-			func_name(__VA_ARGS__, 13);                     \
-			break;                                          \
-		case 14:                                                \
-			func_name(__VA_ARGS__, 14);                     \
-			break;                                          \
-		case 15:                                                \
-			func_name(__VA_ARGS__, 15);                     \
-			break;                                          \
-		case 16:                                                \
-			func_name(__VA_ARGS__, 16);                     \
-			break;                                          \
-		case 17:                                                \
-			func_name(__VA_ARGS__, 17);                     \
-			break;                                          \
-		case 18:                                                \
-			func_name(__VA_ARGS__, 18);                     \
-			break;                                          \
-		case 19:                                                \
-			func_name(__VA_ARGS__, 19);                     \
-			break;                                          \
-		case 20:                                                \
-			func_name(__VA_ARGS__, 20);                     \
-			break;                                          \
-		case 21:                                                \
-			func_name(__VA_ARGS__, 21);                     \
-			break;                                          \
-		case 22:                                                \
-			func_name(__VA_ARGS__, 22);                     \
-			break;                                          \
-		case 23:                                                \
-			func_name(__VA_ARGS__, 23);                     \
-			break;                                          \
-		case 24:                                                \
-			func_name(__VA_ARGS__, 24);                     \
-			break;                                          \
-		case 25:                                                \
-			func_name(__VA_ARGS__, 25);                     \
-			break;                                          \
-		case 26:                                                \
-			func_name(__VA_ARGS__, 26);                     \
-			break;                                          \
-		case 27:                                                \
-			func_name(__VA_ARGS__, 27);                     \
-			break;                                          \
-		case 28:                                                \
-			func_name(__VA_ARGS__, 28);                     \
-			break;                                          \
-		case 29:                                                \
-			func_name(__VA_ARGS__, 29);                     \
-			break;                                          \
-		case 30:                                                \
-			func_name(__VA_ARGS__, 30);                     \
-			break;                                          \
-		case 31:                                                \
-			func_name(__VA_ARGS__, 31);                     \
-			break;                                          \
-		default:                                                \
-			default_case;                                   \
-			break;                                          \
-		}                                                       \
-	} while (0)
-
-#define SIMDE_CONSTIFY_64_NO_RESULT_(func_name, default_case, imm, ...) \
-	do {                                                            \
-		switch (imm) {                                          \
-		case 0:                                                 \
-			func_name(__VA_ARGS__, 0);                      \
-			break;                                          \
-		case 1:                                                 \
-			func_name(__VA_ARGS__, 1);                      \
-			break;                                          \
-		case 2:                                                 \
-			func_name(__VA_ARGS__, 2);                      \
-			break;                                          \
-		case 3:                                                 \
-			func_name(__VA_ARGS__, 3);                      \
-			break;                                          \
-		case 4:                                                 \
-			func_name(__VA_ARGS__, 4);                      \
-			break;                                          \
-		case 5:                                                 \
-			func_name(__VA_ARGS__, 5);                      \
-			break;                                          \
-		case 6:                                                 \
-			func_name(__VA_ARGS__, 6);                      \
-			break;                                          \
-		case 7:                                                 \
-			func_name(__VA_ARGS__, 7);                      \
-			break;                                          \
-		case 8:                                                 \
-			func_name(__VA_ARGS__, 8);                      \
-			break;                                          \
-		case 9:                                                 \
-			func_name(__VA_ARGS__, 9);                      \
-			break;                                          \
-		case 10:                                                \
-			func_name(__VA_ARGS__, 10);                     \
-			break;                                          \
-		case 11:                                                \
-			func_name(__VA_ARGS__, 11);                     \
-			break;                                          \
-		case 12:                                                \
-			func_name(__VA_ARGS__, 12);                     \
-			break;                                          \
-		case 13:                                                \
-			func_name(__VA_ARGS__, 13);                     \
-			break;                                          \
-		case 14:                                                \
-			func_name(__VA_ARGS__, 14);                     \
-			break;                                          \
-		case 15:                                                \
-			func_name(__VA_ARGS__, 15);                     \
-			break;                                          \
-		case 16:                                                \
-			func_name(__VA_ARGS__, 16);                     \
-			break;                                          \
-		case 17:                                                \
-			func_name(__VA_ARGS__, 17);                     \
-			break;                                          \
-		case 18:                                                \
-			func_name(__VA_ARGS__, 18);                     \
-			break;                                          \
-		case 19:                                                \
-			func_name(__VA_ARGS__, 19);                     \
-			break;                                          \
-		case 20:                                                \
-			func_name(__VA_ARGS__, 20);                     \
-			break;                                          \
-		case 21:                                                \
-			func_name(__VA_ARGS__, 21);                     \
-			break;                                          \
-		case 22:                                                \
-			func_name(__VA_ARGS__, 22);                     \
-			break;                                          \
-		case 23:                                                \
-			func_name(__VA_ARGS__, 23);                     \
-			break;                                          \
-		case 24:                                                \
-			func_name(__VA_ARGS__, 24);                     \
-			break;                                          \
-		case 25:                                                \
-			func_name(__VA_ARGS__, 25);                     \
-			break;                                          \
-		case 26:                                                \
-			func_name(__VA_ARGS__, 26);                     \
-			break;                                          \
-		case 27:                                                \
-			func_name(__VA_ARGS__, 27);                     \
-			break;                                          \
-		case 28:                                                \
-			func_name(__VA_ARGS__, 28);                     \
-			break;                                          \
-		case 29:                                                \
-			func_name(__VA_ARGS__, 29);                     \
-			break;                                          \
-		case 30:                                                \
-			func_name(__VA_ARGS__, 30);                     \
-			break;                                          \
-		case 31:                                                \
-			func_name(__VA_ARGS__, 31);                     \
-			break;                                          \
-		case 32:                                                \
-			func_name(__VA_ARGS__, 32);                     \
-			break;                                          \
-		case 33:                                                \
-			func_name(__VA_ARGS__, 33);                     \
-			break;                                          \
-		case 34:                                                \
-			func_name(__VA_ARGS__, 34);                     \
-			break;                                          \
-		case 35:                                                \
-			func_name(__VA_ARGS__, 35);                     \
-			break;                                          \
-		case 36:                                                \
-			func_name(__VA_ARGS__, 36);                     \
-			break;                                          \
-		case 37:                                                \
-			func_name(__VA_ARGS__, 37);                     \
-			break;                                          \
-		case 38:                                                \
-			func_name(__VA_ARGS__, 38);                     \
-			break;                                          \
-		case 39:                                                \
-			func_name(__VA_ARGS__, 39);                     \
-			break;                                          \
-		case 40:                                                \
-			func_name(__VA_ARGS__, 40);                     \
-			break;                                          \
-		case 41:                                                \
-			func_name(__VA_ARGS__, 41);                     \
-			break;                                          \
-		case 42:                                                \
-			func_name(__VA_ARGS__, 42);                     \
-			break;                                          \
-		case 43:                                                \
-			func_name(__VA_ARGS__, 43);                     \
-			break;                                          \
-		case 44:                                                \
-			func_name(__VA_ARGS__, 44);                     \
-			break;                                          \
-		case 45:                                                \
-			func_name(__VA_ARGS__, 45);                     \
-			break;                                          \
-		case 46:                                                \
-			func_name(__VA_ARGS__, 46);                     \
-			break;                                          \
-		case 47:                                                \
-			func_name(__VA_ARGS__, 47);                     \
-			break;                                          \
-		case 48:                                                \
-			func_name(__VA_ARGS__, 48);                     \
-			break;                                          \
-		case 49:                                                \
-			func_name(__VA_ARGS__, 49);                     \
-			break;                                          \
-		case 50:                                                \
-			func_name(__VA_ARGS__, 50);                     \
-			break;                                          \
-		case 51:                                                \
-			func_name(__VA_ARGS__, 51);                     \
-			break;                                          \
-		case 52:                                                \
-			func_name(__VA_ARGS__, 52);                     \
-			break;                                          \
-		case 53:                                                \
-			func_name(__VA_ARGS__, 53);                     \
-			break;                                          \
-		case 54:                                                \
-			func_name(__VA_ARGS__, 54);                     \
-			break;                                          \
-		case 55:                                                \
-			func_name(__VA_ARGS__, 55);                     \
-			break;                                          \
-		case 56:                                                \
-			func_name(__VA_ARGS__, 56);                     \
-			break;                                          \
-		case 57:                                                \
-			func_name(__VA_ARGS__, 57);                     \
-			break;                                          \
-		case 58:                                                \
-			func_name(__VA_ARGS__, 58);                     \
-			break;                                          \
-		case 59:                                                \
-			func_name(__VA_ARGS__, 59);                     \
-			break;                                          \
-		case 60:                                                \
-			func_name(__VA_ARGS__, 60);                     \
-			break;                                          \
-		case 61:                                                \
-			func_name(__VA_ARGS__, 61);                     \
-			break;                                          \
-		case 62:                                                \
-			func_name(__VA_ARGS__, 62);                     \
-			break;                                          \
-		case 63:                                                \
-			func_name(__VA_ARGS__, 63);                     \
-			break;                                          \
-		default:                                                \
-			default_case;                                   \
-			break;                                          \
-		}                                                       \
-	} while (0)
-
-HEDLEY_DIAGNOSTIC_POP
-
-#endif

+ 0 - 114
libobs/util/simde/simde-detect-clang.h

@@ -1,114 +0,0 @@
-/* Detect Clang Version
- * Created by Evan Nemerson <[email protected]>
- *
- * To the extent possible under law, the author(s) have dedicated all
- * copyright and related and neighboring rights to this software to
- * the public domain worldwide. This software is distributed without
- * any warranty.
- *
- * For details, see <http://creativecommons.org/publicdomain/zero/1.0/>.
- * SPDX-License-Identifier: CC0-1.0
- */
-
-/* This file was originally part of SIMDe
- * (<https://github.com/simd-everywhere/simde>).  You're free to do with it as
- * you please, but I do have a few small requests:
- *
- *  * If you make improvements, please submit them back to SIMDe
- *    (at <https://github.com/simd-everywhere/simde/issues>) so others can
- *    benefit from them.
- *  * Please keep a link to SIMDe intact so people know where to submit
- *    improvements.
- *  * If you expose it publicly, please change the SIMDE_ prefix to
- *    something specific to your project.
- *
- * The version numbers clang exposes (in the ___clang_major__,
- * __clang_minor__, and __clang_patchlevel__ macros) are unreliable.
- * Vendors such as Apple will define these values to their version
- * numbers; for example, "Apple Clang 4.0" is really clang 3.1, but
- * __clang_major__ and __clang_minor__ are defined to 4 and 0
- * respectively, instead of 3 and 1.
- *
- * The solution is *usually* to use clang's feature detection macros
- * (<https://clang.llvm.org/docs/LanguageExtensions.html#feature-checking-macros>)
- * to determine if the feature you're interested in is available.  This
- * generally works well, and it should probably be the first thing you
- * try.  Unfortunately, it's not possible to check for everything.  In
- * particular, compiler bugs.
- *
- * This file just uses the feature checking macros to detect features
- * added in specific versions of clang to identify which version of
- * clang the compiler is based on.
- *
- * Right now it only goes back to 3.6, but I'm happy to accept patches
- * to go back further.  And, of course, newer versions are welcome if
- * they're not already present, and if you find a way to detect a point
- * release that would be great, too!
- */
-
-#if !defined(SIMDE_DETECT_CLANG_H)
-#define SIMDE_DETECT_CLANG_H 1
-
-/* Attempt to detect the upstream clang version number.  I usually only
- * worry about major version numbers (at least for 4.0+), but if you
- * need more resolution I'm happy to accept patches that are able to
- * detect minor versions as well.  That said, you'll probably have a
- * hard time with detection since AFAIK most minor releases don't add
- * anything we can detect. */
-
-#if defined(__clang__) && !defined(SIMDE_DETECT_CLANG_VERSION)
-#if __has_warning("-Wformat-insufficient-args")
-#define SIMDE_DETECT_CLANG_VERSION 120000
-#elif __has_warning("-Wimplicit-const-int-float-conversion")
-#define SIMDE_DETECT_CLANG_VERSION 110000
-#elif __has_warning("-Wmisleading-indentation")
-#define SIMDE_DETECT_CLANG_VERSION 100000
-#elif defined(__FILE_NAME__)
-#define SIMDE_DETECT_CLANG_VERSION 90000
-#elif __has_warning("-Wextra-semi-stmt") || \
-	__has_builtin(__builtin_rotateleft32)
-#define SIMDE_DETECT_CLANG_VERSION 80000
-#elif __has_warning("-Wc++98-compat-extra-semi")
-#define SIMDE_DETECT_CLANG_VERSION 70000
-#elif __has_warning("-Wpragma-pack")
-#define SIMDE_DETECT_CLANG_VERSION 60000
-#elif __has_warning("-Wbitfield-enum-conversion")
-#define SIMDE_DETECT_CLANG_VERSION 50000
-#elif __has_attribute(diagnose_if)
-#define SIMDE_DETECT_CLANG_VERSION 40000
-#elif __has_warning("-Wcast-calling-convention")
-#define SIMDE_DETECT_CLANG_VERSION 30900
-#elif __has_warning("-WCL4")
-#define SIMDE_DETECT_CLANG_VERSION 30800
-#elif __has_warning("-WIndependentClass-attribute")
-#define SIMDE_DETECT_CLANG_VERSION 30700
-#elif __has_warning("-Wambiguous-ellipsis")
-#define SIMDE_DETECT_CLANG_VERSION 30600
-#else
-#define SIMDE_DETECT_CLANG_VERSION 1
-#endif
-#endif /* defined(__clang__) && !defined(SIMDE_DETECT_CLANG_VERSION) */
-
-/* The SIMDE_DETECT_CLANG_VERSION_CHECK macro is pretty
- * straightforward; it returns true if the compiler is a derivative
- * of clang >= the specified version.
- *
- * Since this file is often (primarily?) useful for working around bugs
- * it is also helpful to have a macro which returns true if only if the
- * compiler is a version of clang *older* than the specified version to
- * make it a bit easier to ifdef regions to add code for older versions,
- * such as pragmas to disable a specific warning. */
-
-#if defined(SIMDE_DETECT_CLANG_VERSION)
-#define SIMDE_DETECT_CLANG_VERSION_CHECK(major, minor, revision) \
-	(SIMDE_DETECT_CLANG_VERSION >=                           \
-	 ((major * 10000) + (minor * 1000) + (revision)))
-#define SIMDE_DETECT_CLANG_VERSION_NOT(major, minor, revision) \
-	(SIMDE_DETECT_CLANG_VERSION <                          \
-	 ((major * 10000) + (minor * 1000) + (revision)))
-#else
-#define SIMDE_DETECT_CLANG_VERSION_CHECK(major, minor, revision) (0)
-#define SIMDE_DETECT_CLANG_VERSION_NOT(major, minor, revision) (1)
-#endif
-
-#endif /* !defined(SIMDE_DETECT_CLANG_H) */

+ 0 - 447
libobs/util/simde/simde-diagnostic.h

@@ -1,447 +0,0 @@
-/* SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use, copy,
- * modify, merge, publish, distribute, sublicense, and/or sell copies
- * of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- * Copyright:
- *   2017-2020 Evan Nemerson <[email protected]>
- */
-
-/* SIMDe targets a very wide range of standards and compilers, and our
- * goal is to compile cleanly even with extremely aggressive warnings
- * (i.e., -Weverything in clang, -Wextra in GCC, /W4 for MSVC, etc.)
- * treated as errors.
- *
- * While our preference is to resolve the underlying issue a given
- * diagnostic is warning us about, sometimes that's not possible.
- * Fixing a warning in one compiler may cause problems in another.
- * Sometimes a warning doesn't really apply to us (false positives),
- * and sometimes adhering to a warning would mean dropping a feature
- * we *know* the compiler supports since we have tested specifically
- * for the compiler or feature.
- *
- * When practical, warnings are only disabled for specific code.  For
- * a list of warnings which are enabled by default in all SIMDe code,
- * see SIMDE_DISABLE_UNWANTED_DIAGNOSTICS.  Note that we restore the
- * warning stack when SIMDe is done parsing, so code which includes
- * SIMDe is not deprived of these warnings.
- */
-
-#if !defined(SIMDE_DIAGNOSTIC_H)
-#define SIMDE_DIAGNOSTIC_H
-
-#include "hedley.h"
-#include "simde-detect-clang.h"
-
-/* This is only to help us implement functions like _mm_undefined_ps. */
-#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_)
-#undef SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_
-#endif
-#if HEDLEY_HAS_WARNING("-Wuninitialized")
-#define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ \
-	_Pragma("clang diagnostic ignored \"-Wuninitialized\"")
-#elif HEDLEY_GCC_VERSION_CHECK(4, 2, 0)
-#define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ \
-	_Pragma("GCC diagnostic ignored \"-Wuninitialized\"")
-#elif HEDLEY_PGI_VERSION_CHECK(19, 10, 0)
-#define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("diag_suppress 549")
-#elif HEDLEY_SUNPRO_VERSION_CHECK(5, 14, 0) && defined(__cplusplus)
-#define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ \
-	_Pragma("error_messages(off,SEC_UNINITIALIZED_MEM_READ,SEC_UNDEFINED_RETURN_VALUE,unassigned)")
-#elif HEDLEY_SUNPRO_VERSION_CHECK(5, 14, 0)
-#define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ \
-	_Pragma("error_messages(off,SEC_UNINITIALIZED_MEM_READ,SEC_UNDEFINED_RETURN_VALUE)")
-#elif HEDLEY_SUNPRO_VERSION_CHECK(5, 12, 0) && defined(__cplusplus)
-#define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ \
-	_Pragma("error_messages(off,unassigned)")
-#elif HEDLEY_TI_VERSION_CHECK(16, 9, 9) ||       \
-	HEDLEY_TI_CL6X_VERSION_CHECK(8, 0, 0) || \
-	HEDLEY_TI_CL7X_VERSION_CHECK(1, 2, 0) || \
-	HEDLEY_TI_CLPRU_VERSION_CHECK(2, 3, 2)
-#define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("diag_suppress 551")
-#elif HEDLEY_INTEL_VERSION_CHECK(13, 0, 0)
-#define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("warning(disable:592)")
-#elif HEDLEY_MSVC_VERSION_CHECK(19, 0, 0) && !defined(__MSVC_RUNTIME_CHECKS)
-#define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ \
-	__pragma(warning(disable : 4700))
-#endif
-
-/* GCC emits a lot of "notes" about the ABI being different for things
- * in newer versions of GCC.  We don't really care because all our
- * functions are inlined and don't generate ABI. */
-#if HEDLEY_GCC_VERSION_CHECK(7, 0, 0)
-#define SIMDE_DIAGNOSTIC_DISABLE_PSABI_ \
-	_Pragma("GCC diagnostic ignored \"-Wpsabi\"")
-#else
-#define SIMDE_DIAGNOSTIC_DISABLE_PSABI_
-#endif
-
-/* Since MMX uses x87 FP registers, you're supposed to call _mm_empty()
- * after each MMX function before any floating point instructions.
- * Some compilers warn about functions which use MMX functions but
- * don't call _mm_empty().  However, since SIMDe is implementyng the
- * MMX API we shouldn't be calling _mm_empty(); we leave it to the
- * caller to invoke simde_mm_empty(). */
-#if HEDLEY_INTEL_VERSION_CHECK(19, 0, 0)
-#define SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ \
-	_Pragma("warning(disable:13200 13203)")
-#elif defined(HEDLEY_MSVC_VERSION)
-#define SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ \
-	__pragma(warning(disable : 4799))
-#else
-#define SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_
-#endif
-
-/* Intel is pushing people to use OpenMP SIMD instead of Cilk+, so they
- * emit a diagnostic if you use #pragma simd instead of
- * #pragma omp simd.  SIMDe supports OpenMP SIMD, you just need to
- * compile with -qopenmp or -qopenmp-simd and define
- * SIMDE_ENABLE_OPENMP.  Cilk+ is just a fallback. */
-#if HEDLEY_INTEL_VERSION_CHECK(18, 0, 0)
-#define SIMDE_DIAGNOSTIC_DISABLE_SIMD_PRAGMA_DEPRECATED_ \
-	_Pragma("warning(disable:3948)")
-#else
-#define SIMDE_DIAGNOSTIC_DISABLE_SIMD_PRAGMA_DEPRECATED_
-#endif
-
-/* MSVC emits a diagnostic when we call a function (like
- * simde_mm_set_epi32) while initializing a struct.  We currently do
- * this a *lot* in the tests. */
-#if defined(HEDLEY_MSVC_VERSION)
-#define SIMDE_DIAGNOSTIC_DISABLE_NON_CONSTANT_AGGREGATE_INITIALIZER_ \
-	__pragma(warning(disable : 4204))
-#else
-#define SIMDE_DIAGNOSTIC_DISABLE_NON_CONSTANT_AGGREGATE_INITIALIZER_
-#endif
-
-/* This warning needs a lot of work.  It is triggered if all you do is
- * pass the value to memcpy/__builtin_memcpy, or if you initialize a
- * member of the union, even if that member takes up the entire union.
- * Last tested with clang-10, hopefully things will improve in the
- * future; if clang fixes this I'd love to enable it. */
-#if HEDLEY_HAS_WARNING("-Wconditional-uninitialized")
-#define SIMDE_DIAGNOSTIC_DISABLE_CONDITIONAL_UNINITIALIZED_ \
-	_Pragma("clang diagnostic ignored \"-Wconditional-uninitialized\"")
-#else
-#define SIMDE_DIAGNOSTIC_DISABLE_CONDITIONAL_UNINITIALIZED_
-#endif
-
-/* This warning is meant to catch things like `0.3 + 0.4 == 0.7`, which
- * will is false.  However, SIMDe uses these operations exclusively
- * for things like _mm_cmpeq_ps, for which we really do want to check
- * for equality (or inequality).
- *
- * If someone wants to put together a SIMDE_FLOAT_EQUAL(a, op, b) macro
- * which just wraps a check in some code do disable this diagnostic I'd
- * be happy to accept it. */
-#if HEDLEY_HAS_WARNING("-Wfloat-equal") || HEDLEY_GCC_VERSION_CHECK(3, 0, 0)
-#define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_ \
-	_Pragma("GCC diagnostic ignored \"-Wfloat-equal\"")
-#else
-#define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_
-#endif
-
-/* This is because we use HEDLEY_STATIC_ASSERT for static assertions.
- * If Hedley can't find an implementation it will preprocess to
- * nothing, which means there will be a trailing semi-colon. */
-#if HEDLEY_HAS_WARNING("-Wextra-semi")
-#define SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ \
-	_Pragma("clang diagnostic ignored \"-Wextra-semi\"")
-#elif HEDLEY_GCC_VERSION_CHECK(8, 1, 0) && defined(__cplusplus)
-#define SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ \
-	_Pragma("GCC diagnostic ignored \"-Wextra-semi\"")
-#else
-#define SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_
-#endif
-
-/* We do use a few variadic macros, which technically aren't available
- * until C99 and C++11, but every compiler I'm aware of has supported
- * them for much longer.  That said, usage is isolated to the test
- * suite and compilers known to support them. */
-#if HEDLEY_HAS_WARNING("-Wvariadic-macros") || HEDLEY_GCC_VERSION_CHECK(4, 0, 0)
-#if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic")
-#define SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_                          \
-	_Pragma("clang diagnostic ignored \"-Wvariadic-macros\"") _Pragma( \
-		"clang diagnostic ignored \"-Wc++98-compat-pedantic\"")
-#else
-#define SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ \
-	_Pragma("GCC diagnostic ignored \"-Wvariadic-macros\"")
-#endif
-#else
-#define SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_
-#endif
-
-/* emscripten requires us to use a __wasm_unimplemented_simd128__ macro
- * before we can access certain SIMD intrinsics, but this diagnostic
- * warns about it being a reserved name.  It is a reserved name, but
- * it's reserved for the compiler and we are using it to convey
- * information to the compiler.
- *
- * This is also used when enabling native aliases since we don't get to
- * choose the macro names. */
-#if HEDLEY_HAS_WARNING("-Wdouble-promotion")
-#define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_ \
-	_Pragma("clang diagnostic ignored \"-Wreserved-id-macro\"")
-#else
-#define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_
-#endif
-
-/* clang 3.8 warns about the packed attribute being unnecessary when
- * used in the _mm_loadu_* functions.  That *may* be true for version
- * 3.8, but for later versions it is crucial in order to make unaligned
- * access safe. */
-#if HEDLEY_HAS_WARNING("-Wpacked")
-#define SIMDE_DIAGNOSTIC_DISABLE_PACKED_ \
-	_Pragma("clang diagnostic ignored \"-Wpacked\"")
-#else
-#define SIMDE_DIAGNOSTIC_DISABLE_PACKED_
-#endif
-
-/* Triggered when assigning a float to a double implicitly.  We use
- * explicit casts in SIMDe, this is only used in the test suite. */
-#if HEDLEY_HAS_WARNING("-Wdouble-promotion")
-#define SIMDE_DIAGNOSTIC_DISABLE_DOUBLE_PROMOTION_ \
-	_Pragma("clang diagnostic ignored \"-Wdouble-promotion\"")
-#else
-#define SIMDE_DIAGNOSTIC_DISABLE_DOUBLE_PROMOTION_
-#endif
-
-/* Several compilers treat conformant array parameters as VLAs.  We
- * test to make sure we're in C mode (C++ doesn't support CAPs), and
- * that the version of the standard supports CAPs.  We also reject
- * some buggy compilers like MSVC (the logic is in Hedley if you want
- * to take a look), but with certain warnings enabled some compilers
- * still like to emit a diagnostic. */
-#if HEDLEY_HAS_WARNING("-Wvla")
-#define SIMDE_DIAGNOSTIC_DISABLE_VLA_ \
-	_Pragma("clang diagnostic ignored \"-Wvla\"")
-#elif HEDLEY_GCC_VERSION_CHECK(4, 3, 0)
-#define SIMDE_DIAGNOSTIC_DISABLE_VLA_ \
-	_Pragma("GCC diagnostic ignored \"-Wvla\"")
-#else
-#define SIMDE_DIAGNOSTIC_DISABLE_VLA_
-#endif
-
-#if HEDLEY_HAS_WARNING("-Wused-but-marked-unused")
-#define SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ \
-	_Pragma("clang diagnostic ignored \"-Wused-but-marked-unused\"")
-#else
-#define SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_
-#endif
-
-#if HEDLEY_HAS_WARNING("-Wunused-function")
-#define SIMDE_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION_ \
-	_Pragma("clang diagnostic ignored \"-Wunused-function\"")
-#elif HEDLEY_GCC_VERSION_CHECK(3, 4, 0)
-#define SIMDE_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION_ \
-	_Pragma("GCC diagnostic ignored \"-Wunused-function\"")
-#elif HEDLEY_MSVC_VERSION_CHECK(19, 0, 0) /* Likely goes back further */
-#define SIMDE_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION_ \
-	__pragma(warning(disable : 4505))
-#else
-#define SIMDE_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION_
-#endif
-
-#if HEDLEY_HAS_WARNING("-Wpass-failed")
-#define SIMDE_DIAGNOSTIC_DISABLE_PASS_FAILED_ \
-	_Pragma("clang diagnostic ignored \"-Wpass-failed\"")
-#else
-#define SIMDE_DIAGNOSTIC_DISABLE_PASS_FAILED_
-#endif
-
-#if HEDLEY_HAS_WARNING("-Wpadded")
-#define SIMDE_DIAGNOSTIC_DISABLE_PADDED_ \
-	_Pragma("clang diagnostic ignored \"-Wpadded\"")
-#elif HEDLEY_MSVC_VERSION_CHECK(19, 0, 0) /* Likely goes back further */
-#define SIMDE_DIAGNOSTIC_DISABLE_PADDED_ __pragma(warning(disable : 4324))
-#else
-#define SIMDE_DIAGNOSTIC_DISABLE_PADDED_
-#endif
-
-#if HEDLEY_HAS_WARNING("-Wzero-as-null-pointer-constant")
-#define SIMDE_DIAGNOSTIC_DISABLE_ZERO_AS_NULL_POINTER_CONSTANT_ \
-	_Pragma("clang diagnostic ignored \"-Wzero-as-null-pointer-constant\"")
-#else
-#define SIMDE_DIAGNOSTIC_DISABLE_ZERO_AS_NULL_POINTER_CONSTANT_
-#endif
-
-#if HEDLEY_HAS_WARNING("-Wold-style-cast")
-#define SIMDE_DIAGNOSTIC_DISABLE_OLD_STYLE_CAST_ \
-	_Pragma("clang diagnostic ignored \"-Wold-style-cast\"")
-#else
-#define SIMDE_DIAGNOSTIC_DISABLE_OLD_STYLE_CAST_
-#endif
-
-#if HEDLEY_HAS_WARNING("-Wcast-function-type") || \
-	HEDLEY_GCC_VERSION_CHECK(8, 0, 0)
-#define SIMDE_DIAGNOSTIC_DISABLE_CAST_FUNCTION_TYPE_ \
-	_Pragma("GCC diagnostic ignored \"-Wcast-function-type\"")
-#else
-#define SIMDE_DIAGNOSTIC_DISABLE_CAST_FUNCTION_TYPE_
-#endif
-
-/* clang will emit this warning when we use C99 extensions whan not in
- * C99 mode, even though it does support this.  In such cases we check
- * the compiler and version first, so we know it's not a problem. */
-#if HEDLEY_HAS_WARNING("-Wc99-extensions")
-#define SIMDE_DIAGNOSTIC_DISABLE_C99_EXTENSIONS_ \
-	_Pragma("clang diagnostic ignored \"-Wc99-extensions\"")
-#else
-#define SIMDE_DIAGNOSTIC_DISABLE_C99_EXTENSIONS_
-#endif
-
-/* https://github.com/simd-everywhere/simde/issues/277 */
-#if defined(HEDLEY_GCC_VERSION) && HEDLEY_GCC_VERSION_CHECK(4, 6, 0) && \
-	!HEDLEY_GCC_VERSION_CHECK(6, 4, 0) && defined(__cplusplus)
-#define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE_ \
-	_Pragma("GCC diagnostic ignored \"-Wunused-but-set-variable\"")
-#else
-#define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE_
-#endif
-
-/* This is the warning that you normally define _CRT_SECURE_NO_WARNINGS
- * to silence, but you have to do that before including anything and
- * that would require reordering includes. */
-#if defined(_MSC_VER)
-#define SIMDE_DIAGNOSTIC_DISABLE_ANNEX_K_ __pragma(warning(disable : 4996))
-#else
-#define SIMDE_DIAGNOSTIC_DISABLE_ANNEX_K_
-#endif
-
-/* Some compilers, such as clang, may use `long long` for 64-bit
- * integers, but `long long` triggers a diagnostic with
- * -Wc++98-compat-pedantic which says 'long long' is incompatible with
- * C++98. */
-#if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic")
-#define SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ \
-	_Pragma("clang diagnostic ignored \"-Wc++98-compat-pedantic\"")
-#else
-#define SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_
-#endif
-
-/* Some problem as above */
-#if HEDLEY_HAS_WARNING("-Wc++11-long-long")
-#define SIMDE_DIAGNOSTIC_DISABLE_CPP11_LONG_LONG_ \
-	_Pragma("clang diagnostic ignored \"-Wc++11-long-long\"")
-#else
-#define SIMDE_DIAGNOSTIC_DISABLE_CPP11_LONG_LONG_
-#endif
-
-/* emscripten emits this whenever stdin/stdout/stderr is used in a
- * macro. */
-#if HEDLEY_HAS_WARNING("-Wdisabled-macro-expansion")
-#define SIMDE_DIAGNOSTIC_DISABLE_DISABLED_MACRO_EXPANSION_ \
-	_Pragma("clang diagnostic ignored \"-Wdisabled-macro-expansion\"")
-#else
-#define SIMDE_DIAGNOSTIC_DISABLE_DISABLED_MACRO_EXPANSION_
-#endif
-
-/* Clang uses C11 generic selections to implement some AltiVec
- * functions, which triggers this diagnostic when not compiling
- * in C11 mode */
-#if HEDLEY_HAS_WARNING("-Wc11-extensions")
-#define SIMDE_DIAGNOSTIC_DISABLE_C11_EXTENSIONS_ \
-	_Pragma("clang diagnostic ignored \"-Wc11-extensions\"")
-#else
-#define SIMDE_DIAGNOSTIC_DISABLE_C11_EXTENSIONS_
-#endif
-
-/* Clang sometimes triggers this warning in macros in the AltiVec and
- * NEON headers, or due to missing functions. */
-#if HEDLEY_HAS_WARNING("-Wvector-conversion")
-#define SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ \
-	_Pragma("clang diagnostic ignored \"-Wvector-conversion\"")
-/* For NEON, the situation with -Wvector-conversion in clang < 10 is
-   * bad enough that we just disable the warning altogether. */
-#if defined(SIMDE_ARCH_ARM) && SIMDE_DETECT_CLANG_VERSION_NOT(10, 0, 0)
-#define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_ \
-	SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_
-#endif
-#else
-#define SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_
-#endif
-#if !defined(SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_)
-#define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_
-#endif
-
-/* SLEEF triggers this a *lot* in their headers */
-#if HEDLEY_HAS_WARNING("-Wignored-qualifiers")
-#define SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ \
-	_Pragma("clang diagnostic ignored \"-Wignored-qualifiers\"")
-#elif HEDLEY_GCC_VERSION_CHECK(4, 3, 0)
-#define SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ \
-	_Pragma("GCC diagnostic ignored \"-Wignored-qualifiers\"")
-#else
-#define SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_
-#endif
-
-/* GCC emits this under some circumstances when using __int128 */
-#if HEDLEY_GCC_VERSION_CHECK(4, 8, 0)
-#define SIMDE_DIAGNOSTIC_DISABLE_PEDANTIC_ \
-	_Pragma("GCC diagnostic ignored \"-Wpedantic\"")
-#else
-#define SIMDE_DIAGNOSTIC_DISABLE_PEDANTIC_
-#endif
-
-/* MSVC doesn't like (__assume(0), code) and will warn about code being
- * unreachable, but we want it there because not all compilers
- * understand the unreachable macro and will complain if it is missing.
- * I'm planning on adding a new macro to Hedley to handle this a bit
- * more elegantly, but until then... */
-#if defined(HEDLEY_MSVC_VERSION)
-#define SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ __pragma(warning(disable : 4702))
-#else
-#define SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_
-#endif
-
-/* This is a false positive from GCC in a few places. */
-#if HEDLEY_GCC_VERSION_CHECK(4, 7, 0)
-#define SIMDE_DIAGNOSTIC_DISABLE_MAYBE_UNINITIAZILED_ \
-	_Pragma("GCC diagnostic ignored \"-Wmaybe-uninitialized\"")
-#else
-#define SIMDE_DIAGNOSTIC_DISABLE_MAYBE_UNINITIAZILED_
-#endif
-
-#if defined(SIMDE_ENABLE_NATIVE_ALIASES)
-#define SIMDE_DISABLE_UNWANTED_DIAGNOSTICS_NATIVE_ALIASES_ \
-	SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_
-#else
-#define SIMDE_DISABLE_UNWANTED_DIAGNOSTICS_NATIVE_ALIASES_
-#endif
-
-#define SIMDE_DISABLE_UNWANTED_DIAGNOSTICS                           \
-	SIMDE_DISABLE_UNWANTED_DIAGNOSTICS_NATIVE_ALIASES_           \
-	SIMDE_DIAGNOSTIC_DISABLE_PSABI_                              \
-	SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_                \
-	SIMDE_DIAGNOSTIC_DISABLE_SIMD_PRAGMA_DEPRECATED_             \
-	SIMDE_DIAGNOSTIC_DISABLE_CONDITIONAL_UNINITIALIZED_          \
-	SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_                        \
-	SIMDE_DIAGNOSTIC_DISABLE_NON_CONSTANT_AGGREGATE_INITIALIZER_ \
-	SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_                         \
-	SIMDE_DIAGNOSTIC_DISABLE_VLA_                                \
-	SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_             \
-	SIMDE_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION_                    \
-	SIMDE_DIAGNOSTIC_DISABLE_PASS_FAILED_                        \
-	SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_              \
-	SIMDE_DIAGNOSTIC_DISABLE_CPP11_LONG_LONG_                    \
-	SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE_      \
-	SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_
-
-#endif /* !defined(SIMDE_DIAGNOSTIC_H) */

+ 0 - 550
libobs/util/simde/simde-features.h

@@ -1,550 +0,0 @@
-/* SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use, copy,
- * modify, merge, publish, distribute, sublicense, and/or sell copies
- * of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- * Copyright:
- *   2020      Evan Nemerson <[email protected]>
- */
-
-/* simde-arch.h is used to determine which features are available according
-   to the compiler.  However, we want to make it possible to forcibly enable
-   or disable APIs */
-
-#if !defined(SIMDE_FEATURES_H)
-#define SIMDE_FEATURES_H
-
-#include "simde-arch.h"
-#include "simde-diagnostic.h"
-
-#if !defined(SIMDE_X86_SVML_NATIVE) && !defined(SIMDE_X86_SVML_NO_NATIVE) && \
-	!defined(SIMDE_NO_NATIVE)
-#if defined(SIMDE_ARCH_X86_SVML)
-#define SIMDE_X86_SVML_NATIVE
-#endif
-#endif
-#if defined(SIMDE_X86_SVML_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE)
-#define SIMDE_X86_AVX512F_NATIVE
-#endif
-
-#if !defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) &&        \
-	!defined(SIMDE_X86_AVX512VP2INTERSECT_NO_NATIVE) && \
-	!defined(SIMDE_NO_NATIVE)
-#if defined(SIMDE_ARCH_X86_AVX512VP2INTERSECT)
-#define SIMDE_X86_AVX512VP2INTERSECT_NATIVE
-#endif
-#endif
-#if defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) && \
-	!defined(SIMDE_X86_AVX512F_NATIVE)
-#define SIMDE_X86_AVX512F_NATIVE
-#endif
-
-#if !defined(SIMDE_X86_AVX512VBMI_NATIVE) && \
-	!defined(SIMDE_X86_AVX512VBMI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE)
-#if defined(SIMDE_ARCH_X86_AVX512VBMI)
-#define SIMDE_X86_AVX512VBMI_NATIVE
-#endif
-#endif
-#if defined(SIMDE_X86_AVX512VBMI_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE)
-#define SIMDE_X86_AVX512F_NATIVE
-#endif
-
-#if !defined(SIMDE_X86_AVX512CD_NATIVE) && \
-	!defined(SIMDE_X86_AVX512CD_NO_NATIVE) && !defined(SIMDE_NO_NATIVE)
-#if defined(SIMDE_ARCH_X86_AVX512CD)
-#define SIMDE_X86_AVX512CD_NATIVE
-#endif
-#endif
-#if defined(SIMDE_X86_AVX512CD_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE)
-#define SIMDE_X86_AVX512F_NATIVE
-#endif
-
-#if !defined(SIMDE_X86_AVX512DQ_NATIVE) && \
-	!defined(SIMDE_X86_AVX512DQ_NO_NATIVE) && !defined(SIMDE_NO_NATIVE)
-#if defined(SIMDE_ARCH_X86_AVX512DQ)
-#define SIMDE_X86_AVX512DQ_NATIVE
-#endif
-#endif
-#if defined(SIMDE_X86_AVX512DQ_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE)
-#define SIMDE_X86_AVX512F_NATIVE
-#endif
-
-#if !defined(SIMDE_X86_AVX512VL_NATIVE) && \
-	!defined(SIMDE_X86_AVX512VL_NO_NATIVE) && !defined(SIMDE_NO_NATIVE)
-#if defined(SIMDE_ARCH_X86_AVX512VL)
-#define SIMDE_X86_AVX512VL_NATIVE
-#endif
-#endif
-#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE)
-#define SIMDE_X86_AVX512F_NATIVE
-#endif
-
-#if !defined(SIMDE_X86_AVX512BW_NATIVE) && \
-	!defined(SIMDE_X86_AVX512BW_NO_NATIVE) && !defined(SIMDE_NO_NATIVE)
-#if defined(SIMDE_ARCH_X86_AVX512BW)
-#define SIMDE_X86_AVX512BW_NATIVE
-#endif
-#endif
-#if defined(SIMDE_X86_AVX512BW_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE)
-#define SIMDE_X86_AVX512F_NATIVE
-#endif
-
-#if !defined(SIMDE_X86_AVX512F_NATIVE) && \
-	!defined(SIMDE_X86_AVX512F_NO_NATIVE) && !defined(SIMDE_NO_NATIVE)
-#if defined(SIMDE_ARCH_X86_AVX512F)
-#define SIMDE_X86_AVX512F_NATIVE
-#endif
-#endif
-#if defined(SIMDE_X86_AVX512F_NATIVE) && !defined(SIMDE_X86_AVX2_NATIVE)
-#define SIMDE_X86_AVX2_NATIVE
-#endif
-
-#if !defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_X86_FMA_NO_NATIVE) && \
-	!defined(SIMDE_NO_NATIVE)
-#if defined(SIMDE_ARCH_X86_FMA)
-#define SIMDE_X86_FMA_NATIVE
-#endif
-#endif
-#if defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_X86_AVX_NATIVE)
-#define SIMDE_X86_AVX_NATIVE
-#endif
-
-#if !defined(SIMDE_X86_AVX2_NATIVE) && !defined(SIMDE_X86_AVX2_NO_NATIVE) && \
-	!defined(SIMDE_NO_NATIVE)
-#if defined(SIMDE_ARCH_X86_AVX2)
-#define SIMDE_X86_AVX2_NATIVE
-#endif
-#endif
-#if defined(SIMDE_X86_AVX2_NATIVE) && !defined(SIMDE_X86_AVX_NATIVE)
-#define SIMDE_X86_AVX_NATIVE
-#endif
-
-#if !defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_X86_AVX_NO_NATIVE) && \
-	!defined(SIMDE_NO_NATIVE)
-#if defined(SIMDE_ARCH_X86_AVX)
-#define SIMDE_X86_AVX_NATIVE
-#endif
-#endif
-#if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_X86_SSE4_1_NATIVE)
-#define SIMDE_X86_SSE4_2_NATIVE
-#endif
-
-#if !defined(SIMDE_X86_SSE4_2_NATIVE) && \
-	!defined(SIMDE_X86_SSE4_2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE)
-#if defined(SIMDE_ARCH_X86_SSE4_2)
-#define SIMDE_X86_SSE4_2_NATIVE
-#endif
-#endif
-#if defined(SIMDE_X86_SSE4_2_NATIVE) && !defined(SIMDE_X86_SSE4_1_NATIVE)
-#define SIMDE_X86_SSE4_1_NATIVE
-#endif
-
-#if !defined(SIMDE_X86_SSE4_1_NATIVE) && \
-	!defined(SIMDE_X86_SSE4_1_NO_NATIVE) && !defined(SIMDE_NO_NATIVE)
-#if defined(SIMDE_ARCH_X86_SSE4_1)
-#define SIMDE_X86_SSE4_1_NATIVE
-#endif
-#endif
-#if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(SIMDE_X86_SSSE3_NATIVE)
-#define SIMDE_X86_SSSE3_NATIVE
-#endif
-
-#if !defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_X86_SSSE3_NO_NATIVE) && \
-	!defined(SIMDE_NO_NATIVE)
-#if defined(SIMDE_ARCH_X86_SSSE3)
-#define SIMDE_X86_SSSE3_NATIVE
-#endif
-#endif
-#if defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_X86_SSE3_NATIVE)
-#define SIMDE_X86_SSE3_NATIVE
-#endif
-
-#if !defined(SIMDE_X86_SSE3_NATIVE) && !defined(SIMDE_X86_SSE3_NO_NATIVE) && \
-	!defined(SIMDE_NO_NATIVE)
-#if defined(SIMDE_ARCH_X86_SSE3)
-#define SIMDE_X86_SSE3_NATIVE
-#endif
-#endif
-#if defined(SIMDE_X86_SSE3_NATIVE) && !defined(SIMDE_X86_SSE2_NATIVE)
-#define SIMDE_X86_SSE2_NATIVE
-#endif
-
-#if !defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_X86_SSE2_NO_NATIVE) && \
-	!defined(SIMDE_NO_NATIVE)
-#if defined(SIMDE_ARCH_X86_SSE2)
-#define SIMDE_X86_SSE2_NATIVE
-#endif
-#endif
-#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_X86_SSE_NATIVE)
-#define SIMDE_X86_SSE_NATIVE
-#endif
-
-#if !defined(SIMDE_X86_SSE_NATIVE) && !defined(SIMDE_X86_SSE_NO_NATIVE) && \
-	!defined(SIMDE_NO_NATIVE)
-#if defined(SIMDE_ARCH_X86_SSE)
-#define SIMDE_X86_SSE_NATIVE
-#endif
-#endif
-
-#if !defined(SIMDE_X86_MMX_NATIVE) && !defined(SIMDE_X86_MMX_NO_NATIVE) && \
-	!defined(SIMDE_NO_NATIVE)
-#if defined(SIMDE_ARCH_X86_MMX)
-#define SIMDE_X86_MMX_NATIVE
-#endif
-#endif
-
-#if !defined(SIMDE_X86_GFNI_NATIVE) && !defined(SIMDE_X86_GFNI_NO_NATIVE) && \
-	!defined(SIMDE_NO_NATIVE)
-#if defined(SIMDE_ARCH_X86_GFNI)
-#define SIMDE_X86_GFNI_NATIVE
-#endif
-#endif
-
-#if !defined(SIMDE_X86_PCLMUL_NATIVE) && \
-	!defined(SIMDE_X86_PCLMUL_NO_NATIVE) && !defined(SIMDE_NO_NATIVE)
-#if defined(SIMDE_ARCH_X86_PCLMUL)
-#define SIMDE_X86_PCLMUL_NATIVE
-#endif
-#endif
-
-#if !defined(SIMDE_X86_VPCLMULQDQ_NATIVE) && \
-	!defined(SIMDE_X86_VPCLMULQDQ_NO_NATIVE) && !defined(SIMDE_NO_NATIVE)
-#if defined(SIMDE_ARCH_X86_VPCLMULQDQ)
-#define SIMDE_X86_VPCLMULQDQ_NATIVE
-#endif
-#endif
-
-#if !defined(SIMDE_X86_SVML_NATIVE) && !defined(SIMDE_X86_SVML_NO_NATIVE) && \
-	!defined(SIMDE_NO_NATIVE)
-#if defined(__INTEL_COMPILER)
-#define SIMDE_X86_SVML_NATIVE
-#endif
-#endif
-
-#if defined(HEDLEY_MSVC_VERSION)
-#pragma warning(push)
-#pragma warning(disable : 4799)
-#endif
-
-#if defined(SIMDE_X86_AVX_NATIVE) || defined(SIMDE_X86_GFNI_NATIVE)
-#include <immintrin.h>
-#elif defined(SIMDE_X86_SSE4_2_NATIVE)
-#include <nmmintrin.h>
-#elif defined(SIMDE_X86_SSE4_1_NATIVE)
-#include <smmintrin.h>
-#elif defined(SIMDE_X86_SSSE3_NATIVE)
-#include <tmmintrin.h>
-#elif defined(SIMDE_X86_SSE3_NATIVE)
-#include <pmmintrin.h>
-#elif defined(SIMDE_X86_SSE2_NATIVE)
-#include <emmintrin.h>
-#elif defined(SIMDE_X86_SSE_NATIVE)
-#include <xmmintrin.h>
-#elif defined(SIMDE_X86_MMX_NATIVE)
-#include <mmintrin.h>
-#endif
-
-#if defined(HEDLEY_MSVC_VERSION)
-#pragma warning(pop)
-#endif
-
-#if !defined(SIMDE_ARM_NEON_A64V8_NATIVE) && \
-	!defined(SIMDE_ARM_NEON_A64V8_NO_NATIVE) && !defined(SIMDE_NO_NATIVE)
-#if defined(SIMDE_ARCH_ARM_NEON) && defined(SIMDE_ARCH_AARCH64) && \
-	SIMDE_ARCH_ARM_CHECK(80)
-#define SIMDE_ARM_NEON_A64V8_NATIVE
-#endif
-#endif
-#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && \
-	!defined(SIMDE_ARM_NEON_A32V8_NATIVE)
-#define SIMDE_ARM_NEON_A32V8_NATIVE
-#endif
-
-#if !defined(SIMDE_ARM_NEON_A32V8_NATIVE) && \
-	!defined(SIMDE_ARM_NEON_A32V8_NO_NATIVE) && !defined(SIMDE_NO_NATIVE)
-#if defined(SIMDE_ARCH_ARM_NEON) && SIMDE_ARCH_ARM_CHECK(80) && \
-	(__ARM_NEON_FP & 0x02)
-#define SIMDE_ARM_NEON_A32V8_NATIVE
-#endif
-#endif
-#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && \
-	!defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-#define SIMDE_ARM_NEON_A32V7_NATIVE
-#endif
-
-#if !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \
-	!defined(SIMDE_ARM_NEON_A32V7_NO_NATIVE) && !defined(SIMDE_NO_NATIVE)
-#if defined(SIMDE_ARCH_ARM_NEON) && SIMDE_ARCH_ARM_CHECK(70)
-#define SIMDE_ARM_NEON_A32V7_NATIVE
-#endif
-#endif
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-#include <arm_neon.h>
-#endif
-
-#if !defined(SIMDE_ARM_SVE_NATIVE) && !defined(SIMDE_ARM_SVE_NO_NATIVE) && \
-	!defined(SIMDE_NO_NATIVE)
-#if defined(SIMDE_ARCH_ARM_SVE)
-#define SIMDE_ARM_SVE_NATIVE
-#include <arm_sve.h>
-#endif
-#endif
-
-#if !defined(SIMDE_WASM_SIMD128_NATIVE) && \
-	!defined(SIMDE_WASM_SIMD128_NO_NATIVE) && !defined(SIMDE_NO_NATIVE)
-#if defined(SIMDE_ARCH_WASM_SIMD128)
-#define SIMDE_WASM_SIMD128_NATIVE
-#endif
-#endif
-#if defined(SIMDE_WASM_SIMD128_NATIVE)
-#if !defined(__wasm_unimplemented_simd128__)
-HEDLEY_DIAGNOSTIC_PUSH
-SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_
-#define __wasm_unimplemented_simd128__
-HEDLEY_DIAGNOSTIC_POP
-#endif
-#include <wasm_simd128.h>
-#endif
-
-#if !defined(SIMDE_POWER_ALTIVEC_P9_NATIVE) &&        \
-	!defined(SIMDE_POWER_ALTIVEC_P9_NO_NATIVE) && \
-	!defined(SIMDE_NO_NATIVE)
-#if SIMDE_ARCH_POWER_ALTIVEC_CHECK(900)
-#define SIMDE_POWER_ALTIVEC_P9_NATIVE
-#endif
-#endif
-#if defined(SIMDE_POWER_ALTIVEC_P9_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P8)
-#define SIMDE_POWER_ALTIVEC_P8_NATIVE
-#endif
-
-#if !defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) &&        \
-	!defined(SIMDE_POWER_ALTIVEC_P8_NO_NATIVE) && \
-	!defined(SIMDE_NO_NATIVE)
-#if SIMDE_ARCH_POWER_ALTIVEC_CHECK(800)
-#define SIMDE_POWER_ALTIVEC_P8_NATIVE
-#endif
-#endif
-#if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P7)
-#define SIMDE_POWER_ALTIVEC_P7_NATIVE
-#endif
-
-#if !defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) &&        \
-	!defined(SIMDE_POWER_ALTIVEC_P7_NO_NATIVE) && \
-	!defined(SIMDE_NO_NATIVE)
-#if SIMDE_ARCH_POWER_ALTIVEC_CHECK(700)
-#define SIMDE_POWER_ALTIVEC_P7_NATIVE
-#endif
-#endif
-#if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P6)
-#define SIMDE_POWER_ALTIVEC_P6_NATIVE
-#endif
-
-#if !defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) &&        \
-	!defined(SIMDE_POWER_ALTIVEC_P6_NO_NATIVE) && \
-	!defined(SIMDE_NO_NATIVE)
-#if SIMDE_ARCH_POWER_ALTIVEC_CHECK(600)
-#define SIMDE_POWER_ALTIVEC_P6_NATIVE
-#endif
-#endif
-#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P5)
-#define SIMDE_POWER_ALTIVEC_P5_NATIVE
-#endif
-
-#if !defined(SIMDE_POWER_ALTIVEC_P5_NATIVE) &&        \
-	!defined(SIMDE_POWER_ALTIVEC_P5_NO_NATIVE) && \
-	!defined(SIMDE_NO_NATIVE)
-#if SIMDE_ARCH_POWER_ALTIVEC_CHECK(500)
-#define SIMDE_POWER_ALTIVEC_P5_NATIVE
-#endif
-#endif
-
-#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
-/* AltiVec conflicts with lots of stuff.  The bool keyword conflicts
-   * with the bool keyword in C++ and the bool macro in C99+ (defined
-   * in stdbool.h).  The vector keyword conflicts with std::vector in
-   * C++ if you are `using std;`.
-   *
-   * Luckily AltiVec allows you to use `__vector`/`__bool`/`__pixel`
-   * instead, but altivec.h will unconditionally define
-   * `vector`/`bool`/`pixel` so we need to work around that.
-   *
-   * Unfortunately this means that if your code uses AltiVec directly
-   * it may break.  If this is the case you'll want to define
-   * `SIMDE_POWER_ALTIVEC_NO_UNDEF` before including SIMDe.  Or, even
-   * better, port your code to use the double-underscore versions. */
-#if defined(bool)
-#undef bool
-#endif
-
-#include <altivec.h>
-
-#if !defined(SIMDE_POWER_ALTIVEC_NO_UNDEF)
-#if defined(vector)
-#undef vector
-#endif
-#if defined(pixel)
-#undef pixel
-#endif
-#if defined(bool)
-#undef bool
-#endif
-#endif /* !defined(SIMDE_POWER_ALTIVEC_NO_UNDEF) */
-
-/* Use these intsead of vector/pixel/bool in SIMDe. */
-#define SIMDE_POWER_ALTIVEC_VECTOR(T) __vector T
-#define SIMDE_POWER_ALTIVEC_PIXEL __pixel
-#define SIMDE_POWER_ALTIVEC_BOOL __bool
-
-/* Re-define bool if we're using stdbool.h */
-#if !defined(__cplusplus) && defined(__bool_true_false_are_defined) && \
-	!defined(SIMDE_POWER_ALTIVEC_NO_UNDEF)
-#define bool _Bool
-#endif
-#endif
-
-#if !defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) &&        \
-	!defined(SIMDE_MIPS_LOONGSON_MMI_NO_NATIVE) && \
-	!defined(SIMDE_NO_NATIVE)
-#if defined(SIMDE_ARCH_MIPS_LOONGSON_MMI)
-#define SIMDE_MIPS_LOONGSON_MMI_NATIVE 1
-#endif
-#endif
-#if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
-#include <loongson-mmiintrin.h>
-#endif
-
-/* This is used to determine whether or not to fall back on a vector
- * function in an earlier ISA extensions, as well as whether
- * we expected any attempts at vectorization to be fruitful or if we
- * expect to always be running serial code. */
-
-#if !defined(SIMDE_NATURAL_VECTOR_SIZE)
-#if defined(SIMDE_X86_AVX512F_NATIVE)
-#define SIMDE_NATURAL_VECTOR_SIZE (512)
-#elif defined(SIMDE_X86_AVX_NATIVE)
-#define SIMDE_NATURAL_VECTOR_SIZE (256)
-#elif defined(SIMDE_X86_SSE_NATIVE) || defined(SIMDE_ARM_NEON_A32V7_NATIVE) || \
-	defined(SIMDE_WASM_SIMD128_NATIVE) ||                                  \
-	defined(SIMDE_POWER_ALTIVEC_P5_NATIVE)
-#define SIMDE_NATURAL_VECTOR_SIZE (128)
-#endif
-
-#if !defined(SIMDE_NATURAL_VECTOR_SIZE)
-#define SIMDE_NATURAL_VECTOR_SIZE (0)
-#endif
-#endif
-
-#define SIMDE_NATURAL_VECTOR_SIZE_LE(x) \
-	((SIMDE_NATURAL_VECTOR_SIZE > 0) && (SIMDE_NATURAL_VECTOR_SIZE <= (x)))
-#define SIMDE_NATURAL_VECTOR_SIZE_GE(x) \
-	((SIMDE_NATURAL_VECTOR_SIZE > 0) && (SIMDE_NATURAL_VECTOR_SIZE >= (x)))
-
-/* Native aliases */
-#if defined(SIMDE_ENABLE_NATIVE_ALIASES)
-#if !defined(SIMDE_X86_MMX_NATIVE)
-#define SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES
-#endif
-#if !defined(SIMDE_X86_SSE_NATIVE)
-#define SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES
-#endif
-#if !defined(SIMDE_X86_SSE2_NATIVE)
-#define SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES
-#endif
-#if !defined(SIMDE_X86_SSE3_NATIVE)
-#define SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES
-#endif
-#if !defined(SIMDE_X86_SSSE3_NATIVE)
-#define SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES
-#endif
-#if !defined(SIMDE_X86_SSE4_1_NATIVE)
-#define SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES
-#endif
-#if !defined(SIMDE_X86_SSE4_2_NATIVE)
-#define SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES
-#endif
-#if !defined(SIMDE_X86_AVX_NATIVE)
-#define SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES
-#endif
-#if !defined(SIMDE_X86_AVX2_NATIVE)
-#define SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES
-#endif
-#if !defined(SIMDE_X86_FMA_NATIVE)
-#define SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES
-#endif
-#if !defined(SIMDE_X86_AVX512F_NATIVE)
-#define SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES
-#endif
-#if !defined(SIMDE_X86_AVX512VL_NATIVE)
-#define SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES
-#endif
-#if !defined(SIMDE_X86_AVX512BW_NATIVE)
-#define SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES
-#endif
-#if !defined(SIMDE_X86_AVX512DQ_NATIVE)
-#define SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES
-#endif
-#if !defined(SIMDE_X86_AVX512CD_NATIVE)
-#define SIMDE_X86_AVX512CD_ENABLE_NATIVE_ALIASES
-#endif
-#if !defined(SIMDE_X86_GFNI_NATIVE)
-#define SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES
-#endif
-#if !defined(SIMDE_X86_PCLMUL_NATIVE)
-#define SIMDE_X86_PCLMUL_ENABLE_NATIVE_ALIASES
-#endif
-#if !defined(SIMDE_X86_VPCLMULQDQ_NATIVE)
-#define SIMDE_X86_VPCLMULQDQ_ENABLE_NATIVE_ALIASES
-#endif
-
-#if !defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-#define SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES
-#endif
-#if !defined(SIMDE_ARM_NEON_A32V8_NATIVE)
-#define SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES
-#endif
-#if !defined(SIMDE_ARM_NEON_A64V8_NATIVE)
-#define SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES
-#endif
-#endif
-
-/* Are floating point values stored using IEEE 754?  Knowing
- * this at during preprocessing is a bit tricky, mostly because what
- * we're curious about is how values are stored and not whether the
- * implementation is fully conformant in terms of rounding, NaN
- * handling, etc.
- *
- * For example, if you use -ffast-math or -Ofast on
- * GCC or clang IEEE 754 isn't strictly followed, therefore IEE 754
- * support is not advertised (by defining __STDC_IEC_559__).
- *
- * However, what we care about is whether it is safe to assume that
- * floating point values are stored in IEEE 754 format, in which case
- * we can provide faster implementations of some functions.
- *
- * Luckily every vaugely modern architecture I'm aware of uses IEEE 754-
- * so we just assume IEEE 754 for now.  There is a test which verifies
- * this, if that test fails sowewhere please let us know and we'll add
- * an exception for that platform.  Meanwhile, you can define
- * SIMDE_NO_IEEE754_STORAGE. */
-#if !defined(SIMDE_IEEE754_STORAGE) && !defined(SIMDE_NO_IEE754_STORAGE)
-#define SIMDE_IEEE754_STORAGE
-#endif
-
-#endif /* !defined(SIMDE_FEATURES_H) */

+ 0 - 1858
libobs/util/simde/simde-math.h

@@ -1,1858 +0,0 @@
-/* SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use, copy,
- * modify, merge, publish, distribute, sublicense, and/or sell copies
- * of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- * Copyright:
- *   2017-2020 Evan Nemerson <[email protected]>
- */
-
-/* Attempt to find math functions.  Functions may be in <cmath>,
- * <math.h>, compiler built-ins/intrinsics, or platform/architecture
- * specific headers.  In some cases, especially those not built in to
- * libm, we may need to define our own implementations. */
-
-#if !defined(SIMDE_MATH_H)
-
-#include "hedley.h"
-#include "simde-features.h"
-
-#include <stdint.h>
-#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
-#include <arm_neon.h>
-#endif
-
-HEDLEY_DIAGNOSTIC_PUSH
-SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
-
-/* SLEEF support
- * https://sleef.org/
- *
- * If you include <sleef.h> prior to including SIMDe, SIMDe will use
- * SLEEF.  You can also define SIMDE_MATH_SLEEF_ENABLE prior to
- * including SIMDe to force the issue.
- *
- * Note that SLEEF does requires linking to libsleef.
- *
- * By default, SIMDe will use the 1 ULP functions, but if you use
- * SIMDE_ACCURACY_PREFERENCE of 0 we will use up to 4 ULP.  This is
- * only the case for the simde_math_* functions; for code in other
- * SIMDe headers which calls SLEEF directly we may use functions with
- * greater error if the API we're implementing is less precise (for
- * example, SVML guarantees 4 ULP, so we will generally use the 3.5
- * ULP functions from SLEEF). */
-#if !defined(SIMDE_MATH_SLEEF_DISABLE)
-#if defined(__SLEEF_H__)
-#define SIMDE_MATH_SLEEF_ENABLE
-#endif
-#endif
-
-#if defined(SIMDE_MATH_SLEEF_ENABLE) && !defined(__SLEEF_H__)
-HEDLEY_DIAGNOSTIC_PUSH
-SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_
-#include <sleef.h>
-HEDLEY_DIAGNOSTIC_POP
-#endif
-
-#if defined(SIMDE_MATH_SLEEF_ENABLE) && defined(__SLEEF_H__)
-#if defined(SLEEF_VERSION_MAJOR)
-#define SIMDE_MATH_SLEEF_VERSION_CHECK(major, minor, patch)              \
-	(HEDLEY_VERSION_ENCODE(SLEEF_VERSION_MAJOR, SLEEF_VERSION_MINOR, \
-			       SLEEF_VERSION_PATCHLEVEL) >=              \
-	 HEDLEY_VERSION_ENCODE(major, minor, patch))
-#else
-#define SIMDE_MATH_SLEEF_VERSION_CHECK(major, minor, patch) \
-	(HEDLEY_VERSION_ENCODE(3, 0, 0) >=                  \
-	 HEDLEY_VERSION_ENCODE(major, minor, patch))
-#endif
-#else
-#define SIMDE_MATH_SLEEF_VERSION_CHECK(major, minor, patch) (0)
-#endif
-
-#if defined(__has_builtin)
-#define SIMDE_MATH_BUILTIN_LIBM(func) __has_builtin(__builtin_##func)
-#elif HEDLEY_INTEL_VERSION_CHECK(13, 0, 0) || \
-	HEDLEY_ARM_VERSION_CHECK(4, 1, 0) || HEDLEY_GCC_VERSION_CHECK(4, 4, 0)
-#define SIMDE_MATH_BUILTIN_LIBM(func) (1)
-#else
-#define SIMDE_MATH_BUILTIN_LIBM(func) (0)
-#endif
-
-#if defined(HUGE_VAL)
-/* Looks like <math.h> or <cmath> has already been included. */
-
-/* The math.h from libc++ (yes, the C header from the C++ standard
-   * library) will define an isnan function, but not an isnan macro
-   * like the C standard requires.  So we detect the header guards
-   * macro libc++ uses. */
-#if defined(isnan) || (defined(_LIBCPP_MATH_H) && !defined(_LIBCPP_CMATH))
-#define SIMDE_MATH_HAVE_MATH_H
-#elif defined(__cplusplus)
-#define SIMDE_MATH_HAVE_CMATH
-#endif
-#elif defined(__has_include)
-#if defined(__cplusplus) && (__cplusplus >= 201103L) && __has_include(<cmath>)
-#define SIMDE_MATH_HAVE_CMATH
-#include <cmath>
-#elif __has_include(<math.h>)
-#define SIMDE_MATH_HAVE_MATH_H
-#include <math.h>
-#elif !defined(SIMDE_MATH_NO_LIBM)
-#define SIMDE_MATH_NO_LIBM
-#endif
-#elif !defined(SIMDE_MATH_NO_LIBM)
-#if defined(__cplusplus) && (__cplusplus >= 201103L)
-#define SIMDE_MATH_HAVE_CMATH
-HEDLEY_DIAGNOSTIC_PUSH
-#if defined(HEDLEY_MSVC_VERSION)
-/* VS 14 emits this diagnostic about noexcept being used on a
-       * <cmath> function, which we can't do anything about. */
-#pragma warning(disable : 4996)
-#endif
-#include <cmath>
-HEDLEY_DIAGNOSTIC_POP
-#else
-#define SIMDE_MATH_HAVE_MATH_H
-#include <math.h>
-#endif
-#endif
-
-/* Try to avoid including <complex> since it pulls in a *lot* of code. */
-#if HEDLEY_HAS_BUILTIN(__builtin_creal) ||   \
-	HEDLEY_GCC_VERSION_CHECK(4, 7, 0) || \
-	HEDLEY_INTEL_VERSION_CHECK(13, 0, 0)
-HEDLEY_DIAGNOSTIC_PUSH
-SIMDE_DIAGNOSTIC_DISABLE_C99_EXTENSIONS_
-typedef __complex__ float simde_cfloat32;
-typedef __complex__ double simde_cfloat64;
-HEDLEY_DIAGNOSTIC_POP
-#define SIMDE_MATH_CMPLX(x, y)           \
-	(HEDLEY_STATIC_CAST(double, x) + \
-	 HEDLEY_STATIC_CAST(double, y) * (__extension__ 1.0j))
-#define SIMDE_MATH_CMPLXF(x, y)         \
-	(HEDLEY_STATIC_CAST(float, x) + \
-	 HEDLEY_STATIC_CAST(float, y) * (__extension__ 1.0fj))
-
-#if !defined(simde_math_creal)
-#define simde_math_crealf(z) __builtin_crealf(z)
-#endif
-#if !defined(simde_math_crealf)
-#define simde_math_creal(z) __builtin_creal(z)
-#endif
-#if !defined(simde_math_cimag)
-#define simde_math_cimagf(z) __builtin_cimagf(z)
-#endif
-#if !defined(simde_math_cimagf)
-#define simde_math_cimag(z) __builtin_cimag(z)
-#endif
-#elif !defined(__cplusplus)
-#include <complex.h>
-
-#if !defined(HEDLEY_MSVC_VERSION)
-typedef float _Complex simde_cfloat32;
-typedef double _Complex simde_cfloat64;
-#else
-typedef _Fcomplex simde_cfloat32;
-typedef _Dcomplex simde_cfloat64;
-#endif
-
-#if defined(HEDLEY_MSVC_VERSION)
-#define SIMDE_MATH_CMPLX(x, y) ((simde_cfloat64){(x), (y)})
-#define SIMDE_MATH_CMPLXF(x, y) ((simde_cfloat32){(x), (y)})
-#elif defined(CMPLX) && defined(CMPLXF)
-#define SIMDE_MATH_CMPLX(x, y) CMPLX(x, y)
-#define SIMDE_MATH_CMPLXF(x, y) CMPLXF(x, y)
-#else
-#define SIMDE_MATH_CMPLX(x, y) \
-	(HEDLEY_STATIC_CAST(double, x) + HEDLEY_STATIC_CAST(double, y) * I)
-#define SIMDE_MATH_CMPLXF(x, y) \
-	(HEDLEY_STATIC_CAST(float, x) + HEDLEY_STATIC_CAST(float, y) * I)
-#endif
-
-#if !defined(simde_math_creal)
-#define simde_math_creal(z) creal(z)
-#endif
-#if !defined(simde_math_crealf)
-#define simde_math_crealf(z) crealf(z)
-#endif
-#if !defined(simde_math_cimag)
-#define simde_math_cimag(z) cimag(z)
-#endif
-#if !defined(simde_math_cimagf)
-#define simde_math_cimagf(z) cimagf(z)
-#endif
-#else
-HEDLEY_DIAGNOSTIC_PUSH
-#if defined(HEDLEY_MSVC_VERSION)
-#pragma warning(disable : 4530)
-#endif
-#include <complex>
-HEDLEY_DIAGNOSTIC_POP
-
-typedef std::complex<float> simde_cfloat32;
-typedef std::complex<double> simde_cfloat64;
-#define SIMDE_MATH_CMPLX(x, y) (std::complex<double>(x, y))
-#define SIMDE_MATH_CMPLXF(x, y) (std::complex<float>(x, y))
-
-#if !defined(simde_math_creal)
-#define simde_math_creal(z) ((z).real())
-#endif
-#if !defined(simde_math_crealf)
-#define simde_math_crealf(z) ((z).real())
-#endif
-#if !defined(simde_math_cimag)
-#define simde_math_cimag(z) ((z).imag())
-#endif
-#if !defined(simde_math_cimagf)
-#define simde_math_cimagf(z) ((z).imag())
-#endif
-#endif
-
-#if !defined(SIMDE_MATH_INFINITY)
-#if HEDLEY_HAS_BUILTIN(__builtin_inf) || HEDLEY_GCC_VERSION_CHECK(3, 3, 0) || \
-	HEDLEY_INTEL_VERSION_CHECK(13, 0, 0) ||                               \
-	HEDLEY_ARM_VERSION_CHECK(4, 1, 0) ||                                  \
-	HEDLEY_CRAY_VERSION_CHECK(8, 1, 0)
-#define SIMDE_MATH_INFINITY (__builtin_inf())
-#elif defined(INFINITY)
-#define SIMDE_MATH_INFINITY INFINITY
-#endif
-#endif
-
-#if !defined(SIMDE_INFINITYF)
-#if HEDLEY_HAS_BUILTIN(__builtin_inff) || HEDLEY_GCC_VERSION_CHECK(3, 3, 0) || \
-	HEDLEY_INTEL_VERSION_CHECK(13, 0, 0) ||                                \
-	HEDLEY_CRAY_VERSION_CHECK(8, 1, 0) ||                                  \
-	HEDLEY_IBM_VERSION_CHECK(13, 1, 0)
-#define SIMDE_MATH_INFINITYF (__builtin_inff())
-#elif defined(INFINITYF)
-#define SIMDE_MATH_INFINITYF INFINITYF
-#elif defined(SIMDE_MATH_INFINITY)
-#define SIMDE_MATH_INFINITYF HEDLEY_STATIC_CAST(float, SIMDE_MATH_INFINITY)
-#endif
-#endif
-
-#if !defined(SIMDE_MATH_NAN)
-#if HEDLEY_HAS_BUILTIN(__builtin_nan) || HEDLEY_GCC_VERSION_CHECK(3, 3, 0) || \
-	HEDLEY_INTEL_VERSION_CHECK(13, 0, 0) ||                               \
-	HEDLEY_ARM_VERSION_CHECK(4, 1, 0) ||                                  \
-	HEDLEY_CRAY_VERSION_CHECK(8, 1, 0) ||                                 \
-	HEDLEY_IBM_VERSION_CHECK(13, 1, 0)
-#define SIMDE_MATH_NAN (__builtin_nan(""))
-#elif defined(NAN)
-#define SIMDE_MATH_NAN NAN
-#endif
-#endif
-
-#if !defined(SIMDE_NANF)
-#if HEDLEY_HAS_BUILTIN(__builtin_nanf) || HEDLEY_GCC_VERSION_CHECK(3, 3, 0) || \
-	HEDLEY_INTEL_VERSION_CHECK(13, 0, 0) ||                                \
-	HEDLEY_ARM_VERSION_CHECK(4, 1, 0) ||                                   \
-	HEDLEY_CRAY_VERSION_CHECK(8, 1, 0)
-#define SIMDE_MATH_NANF (__builtin_nanf(""))
-#elif defined(NANF)
-#define SIMDE_MATH_NANF NANF
-#elif defined(SIMDE_MATH_NAN)
-#define SIMDE_MATH_NANF HEDLEY_STATIC_CAST(float, SIMDE_MATH_NAN)
-#endif
-#endif
-
-#if !defined(SIMDE_MATH_PI)
-#if defined(M_PI)
-#define SIMDE_MATH_PI M_PI
-#else
-#define SIMDE_MATH_PI 3.14159265358979323846
-#endif
-#endif
-
-#if !defined(SIMDE_MATH_PIF)
-#if defined(M_PI)
-#define SIMDE_MATH_PIF HEDLEY_STATIC_CAST(float, M_PI)
-#else
-#define SIMDE_MATH_PIF 3.14159265358979323846f
-#endif
-#endif
-
-#if !defined(SIMDE_MATH_PI_OVER_180)
-#define SIMDE_MATH_PI_OVER_180 \
-	0.0174532925199432957692369076848861271344287188854172545609719144
-#endif
-
-#if !defined(SIMDE_MATH_PI_OVER_180F)
-#define SIMDE_MATH_PI_OVER_180F \
-	0.0174532925199432957692369076848861271344287188854172545609719144f
-#endif
-
-#if !defined(SIMDE_MATH_180_OVER_PI)
-#define SIMDE_MATH_180_OVER_PI \
-	57.295779513082320876798154814105170332405472466564321549160243861
-#endif
-
-#if !defined(SIMDE_MATH_180_OVER_PIF)
-#define SIMDE_MATH_180_OVER_PIF \
-	57.295779513082320876798154814105170332405472466564321549160243861f
-#endif
-
-#if !defined(SIMDE_MATH_FLT_MIN)
-#if defined(FLT_MIN)
-#define SIMDE_MATH_FLT_MIN FLT_MIN
-#elif defined(__FLT_MIN__)
-#define SIMDE_MATH_FLT_MIN __FLT_MIN__
-#elif defined(__cplusplus)
-#include <cfloat>
-#define SIMDE_MATH_FLT_MIN FLT_MIN
-#else
-#include <float.h>
-#define SIMDE_MATH_FLT_MIN FLT_MIN
-#endif
-#endif
-
-#if !defined(SIMDE_MATH_DBL_MIN)
-#if defined(DBL_MIN)
-#define SIMDE_MATH_DBL_MIN DBL_MIN
-#elif defined(__DBL_MIN__)
-#define SIMDE_MATH_DBL_MIN __DBL_MIN__
-#elif defined(__cplusplus)
-#include <cfloat>
-#define SIMDE_MATH_DBL_MIN DBL_MIN
-#else
-#include <float.h>
-#define SIMDE_MATH_DBL_MIN DBL_MIN
-#endif
-#endif
-
-/*** Classification macros from C99 ***/
-
-#if !defined(simde_math_isinf)
-#if SIMDE_MATH_BUILTIN_LIBM(isinf)
-#define simde_math_isinf(v) __builtin_isinf(v)
-#elif defined(isinf) || defined(SIMDE_MATH_HAVE_MATH_H)
-#define simde_math_isinf(v) isinf(v)
-#elif defined(SIMDE_MATH_HAVE_CMATH)
-#define simde_math_isinf(v) std::isinf(v)
-#endif
-#endif
-
-#if !defined(simde_math_isinff)
-#if HEDLEY_HAS_BUILTIN(__builtin_isinff) ||     \
-	HEDLEY_INTEL_VERSION_CHECK(13, 0, 0) || \
-	HEDLEY_ARM_VERSION_CHECK(4, 1, 0)
-#define simde_math_isinff(v) __builtin_isinff(v)
-#elif defined(SIMDE_MATH_HAVE_CMATH)
-#define simde_math_isinff(v) std::isinf(v)
-#elif defined(simde_math_isinf)
-#define simde_math_isinff(v) simde_math_isinf(HEDLEY_STATIC_CAST(double, v))
-#endif
-#endif
-
-#if !defined(simde_math_isnan)
-#if SIMDE_MATH_BUILTIN_LIBM(isnan)
-#define simde_math_isnan(v) __builtin_isnan(v)
-#elif defined(isnan) || defined(SIMDE_MATH_HAVE_MATH_H)
-#define simde_math_isnan(v) isnan(v)
-#elif defined(SIMDE_MATH_HAVE_CMATH)
-#define simde_math_isnan(v) std::isnan(v)
-#endif
-#endif
-
-#if !defined(simde_math_isnanf)
-#if HEDLEY_HAS_BUILTIN(__builtin_isnanf) ||     \
-	HEDLEY_INTEL_VERSION_CHECK(13, 0, 0) || \
-	HEDLEY_ARM_VERSION_CHECK(4, 1, 0)
-/* XL C/C++ has __builtin_isnan but not __builtin_isnanf */
-#define simde_math_isnanf(v) __builtin_isnanf(v)
-#elif defined(SIMDE_MATH_HAVE_CMATH)
-#define simde_math_isnanf(v) std::isnan(v)
-#elif defined(simde_math_isnan)
-#define simde_math_isnanf(v) simde_math_isnan(HEDLEY_STATIC_CAST(double, v))
-#endif
-#endif
-
-#if !defined(simde_math_isnormal)
-#if SIMDE_MATH_BUILTIN_LIBM(isnormal)
-#define simde_math_isnormal(v) __builtin_isnormal(v)
-#elif defined(SIMDE_MATH_HAVE_MATH_H)
-#define simde_math_isnormal(v) isnormal(v)
-#elif defined(SIMDE_MATH_HAVE_CMATH)
-#define simde_math_isnormal(v) std::isnormal(v)
-#endif
-#endif
-
-#if !defined(simde_math_isnormalf)
-#if HEDLEY_HAS_BUILTIN(__builtin_isnormalf)
-#define simde_math_isnormalf(v) __builtin_isnormalf(v)
-#elif SIMDE_MATH_BUILTIN_LIBM(isnormal)
-#define simde_math_isnormalf(v) __builtin_isnormal(v)
-#elif defined(isnormalf)
-#define simde_math_isnormalf(v) isnormalf(v)
-#elif defined(isnormal) || defined(SIMDE_MATH_HAVE_MATH_H)
-#define simde_math_isnormalf(v) isnormal(v)
-#elif defined(SIMDE_MATH_HAVE_CMATH)
-#define simde_math_isnormalf(v) std::isnormal(v)
-#elif defined(simde_math_isnormal)
-#define simde_math_isnormalf(v) simde_math_isnormal(v)
-#endif
-#endif
-
-/*** Manipulation functions ***/
-
-#if !defined(simde_math_nextafter)
-#if (HEDLEY_HAS_BUILTIN(__builtin_nextafter) && \
-     !defined(HEDLEY_IBM_VERSION)) ||           \
-	HEDLEY_ARM_VERSION_CHECK(4, 1, 0) ||    \
-	HEDLEY_GCC_VERSION_CHECK(3, 4, 0) ||    \
-	HEDLEY_INTEL_VERSION_CHECK(13, 0, 0)
-#define simde_math_nextafter(x, y) __builtin_nextafter(x, y)
-#elif defined(SIMDE_MATH_HAVE_CMATH)
-#define simde_math_nextafter(x, y) std::nextafter(x, y)
-#elif defined(SIMDE_MATH_HAVE_MATH_H)
-#define simde_math_nextafter(x, y) nextafter(x, y)
-#endif
-#endif
-
-#if !defined(simde_math_nextafterf)
-#if (HEDLEY_HAS_BUILTIN(__builtin_nextafterf) && \
-     !defined(HEDLEY_IBM_VERSION)) ||            \
-	HEDLEY_ARM_VERSION_CHECK(4, 1, 0) ||     \
-	HEDLEY_GCC_VERSION_CHECK(3, 4, 0) ||     \
-	HEDLEY_INTEL_VERSION_CHECK(13, 0, 0)
-#define simde_math_nextafterf(x, y) __builtin_nextafterf(x, y)
-#elif defined(SIMDE_MATH_HAVE_CMATH)
-#define simde_math_nextafterf(x, y) std::nextafter(x, y)
-#elif defined(SIMDE_MATH_HAVE_MATH_H)
-#define simde_math_nextafterf(x, y) nextafterf(x, y)
-#endif
-#endif
-
-/*** Functions from C99 ***/
-
-#if !defined(simde_math_abs)
-#if SIMDE_MATH_BUILTIN_LIBM(abs)
-#define simde_math_abs(v) __builtin_abs(v)
-#elif defined(SIMDE_MATH_HAVE_CMATH)
-#define simde_math_abs(v) std::abs(v)
-#elif defined(SIMDE_MATH_HAVE_MATH_H)
-#define simde_math_abs(v) abs(v)
-#endif
-#endif
-
-#if !defined(simde_math_fabsf)
-#if SIMDE_MATH_BUILTIN_LIBM(fabsf)
-#define simde_math_fabsf(v) __builtin_fabsf(v)
-#elif defined(SIMDE_MATH_HAVE_CMATH)
-#define simde_math_fabsf(v) std::abs(v)
-#elif defined(SIMDE_MATH_HAVE_MATH_H)
-#define simde_math_fabsf(v) fabsf(v)
-#endif
-#endif
-
-#if !defined(simde_math_acos)
-#if SIMDE_MATH_BUILTIN_LIBM(acos)
-#define simde_math_acos(v) __builtin_acos(v)
-#elif defined(SIMDE_MATH_HAVE_CMATH)
-#define simde_math_acos(v) std::acos(v)
-#elif defined(SIMDE_MATH_HAVE_MATH_H)
-#define simde_math_acos(v) acos(v)
-#endif
-#endif
-
-#if !defined(simde_math_acosf)
-#if SIMDE_MATH_BUILTIN_LIBM(acosf)
-#define simde_math_acosf(v) __builtin_acosf(v)
-#elif defined(SIMDE_MATH_HAVE_CMATH)
-#define simde_math_acosf(v) std::acos(v)
-#elif defined(SIMDE_MATH_HAVE_MATH_H)
-#define simde_math_acosf(v) acosf(v)
-#endif
-#endif
-
-#if !defined(simde_math_acosh)
-#if SIMDE_MATH_BUILTIN_LIBM(acosh)
-#define simde_math_acosh(v) __builtin_acosh(v)
-#elif defined(SIMDE_MATH_HAVE_CMATH)
-#define simde_math_acosh(v) std::acosh(v)
-#elif defined(SIMDE_MATH_HAVE_MATH_H)
-#define simde_math_acosh(v) acosh(v)
-#endif
-#endif
-
-#if !defined(simde_math_acoshf)
-#if SIMDE_MATH_BUILTIN_LIBM(acoshf)
-#define simde_math_acoshf(v) __builtin_acoshf(v)
-#elif defined(SIMDE_MATH_HAVE_CMATH)
-#define simde_math_acoshf(v) std::acosh(v)
-#elif defined(SIMDE_MATH_HAVE_MATH_H)
-#define simde_math_acoshf(v) acoshf(v)
-#endif
-#endif
-
-#if !defined(simde_math_asin)
-#if SIMDE_MATH_BUILTIN_LIBM(asin)
-#define simde_math_asin(v) __builtin_asin(v)
-#elif defined(SIMDE_MATH_HAVE_CMATH)
-#define simde_math_asin(v) std::asin(v)
-#elif defined(SIMDE_MATH_HAVE_MATH_H)
-#define simde_math_asin(v) asin(v)
-#endif
-#endif
-
-#if !defined(simde_math_asinf)
-#if SIMDE_MATH_BUILTIN_LIBM(asinf)
-#define simde_math_asinf(v) __builtin_asinf(v)
-#elif defined(SIMDE_MATH_HAVE_CMATH)
-#define simde_math_asinf(v) std::asin(v)
-#elif defined(SIMDE_MATH_HAVE_MATH_H)
-#define simde_math_asinf(v) asinf(v)
-#endif
-#endif
-
-#if !defined(simde_math_asinh)
-#if SIMDE_MATH_BUILTIN_LIBM(asinh)
-#define simde_math_asinh(v) __builtin_asinh(v)
-#elif defined(SIMDE_MATH_HAVE_CMATH)
-#define simde_math_asinh(v) std::asinh(v)
-#elif defined(SIMDE_MATH_HAVE_MATH_H)
-#define simde_math_asinh(v) asinh(v)
-#endif
-#endif
-
-#if !defined(simde_math_asinhf)
-#if SIMDE_MATH_BUILTIN_LIBM(asinhf)
-#define simde_math_asinhf(v) __builtin_asinhf(v)
-#elif defined(SIMDE_MATH_HAVE_CMATH)
-#define simde_math_asinhf(v) std::asinh(v)
-#elif defined(SIMDE_MATH_HAVE_MATH_H)
-#define simde_math_asinhf(v) asinhf(v)
-#endif
-#endif
-
-#if !defined(simde_math_atan)
-#if SIMDE_MATH_BUILTIN_LIBM(atan)
-#define simde_math_atan(v) __builtin_atan(v)
-#elif defined(SIMDE_MATH_HAVE_CMATH)
-#define simde_math_atan(v) std::atan(v)
-#elif defined(SIMDE_MATH_HAVE_MATH_H)
-#define simde_math_atan(v) atan(v)
-#endif
-#endif
-
-#if !defined(simde_math_atan2)
-#if SIMDE_MATH_BUILTIN_LIBM(atan2)
-#define simde_math_atan2(y, x) __builtin_atan2(y, x)
-#elif defined(SIMDE_MATH_HAVE_CMATH)
-#define simde_math_atan2(y, x) std::atan2(y, x)
-#elif defined(SIMDE_MATH_HAVE_MATH_H)
-#define simde_math_atan2(y, x) atan2(y, x)
-#endif
-#endif
-
-#if !defined(simde_math_atan2f)
-#if SIMDE_MATH_BUILTIN_LIBM(atan2f)
-#define simde_math_atan2f(y, x) __builtin_atan2f(y, x)
-#elif defined(SIMDE_MATH_HAVE_CMATH)
-#define simde_math_atan2f(y, x) std::atan2(y, x)
-#elif defined(SIMDE_MATH_HAVE_MATH_H)
-#define simde_math_atan2f(y, x) atan2f(y, x)
-#endif
-#endif
-
-#if !defined(simde_math_atanf)
-#if SIMDE_MATH_BUILTIN_LIBM(atanf)
-#define simde_math_atanf(v) __builtin_atanf(v)
-#elif defined(SIMDE_MATH_HAVE_CMATH)
-#define simde_math_atanf(v) std::atan(v)
-#elif defined(SIMDE_MATH_HAVE_MATH_H)
-#define simde_math_atanf(v) atanf(v)
-#endif
-#endif
-
-#if !defined(simde_math_atanh)
-#if SIMDE_MATH_BUILTIN_LIBM(atanh)
-#define simde_math_atanh(v) __builtin_atanh(v)
-#elif defined(SIMDE_MATH_HAVE_CMATH)
-#define simde_math_atanh(v) std::atanh(v)
-#elif defined(SIMDE_MATH_HAVE_MATH_H)
-#define simde_math_atanh(v) atanh(v)
-#endif
-#endif
-
-#if !defined(simde_math_atanhf)
-#if SIMDE_MATH_BUILTIN_LIBM(atanhf)
-#define simde_math_atanhf(v) __builtin_atanhf(v)
-#elif defined(SIMDE_MATH_HAVE_CMATH)
-#define simde_math_atanhf(v) std::atanh(v)
-#elif defined(SIMDE_MATH_HAVE_MATH_H)
-#define simde_math_atanhf(v) atanhf(v)
-#endif
-#endif
-
-#if !defined(simde_math_cbrt)
-#if SIMDE_MATH_BUILTIN_LIBM(cbrt)
-#define simde_math_cbrt(v) __builtin_cbrt(v)
-#elif defined(SIMDE_MATH_HAVE_CMATH)
-#define simde_math_cbrt(v) std::cbrt(v)
-#elif defined(SIMDE_MATH_HAVE_MATH_H)
-#define simde_math_cbrt(v) cbrt(v)
-#endif
-#endif
-
-#if !defined(simde_math_cbrtf)
-#if SIMDE_MATH_BUILTIN_LIBM(cbrtf)
-#define simde_math_cbrtf(v) __builtin_cbrtf(v)
-#elif defined(SIMDE_MATH_HAVE_CMATH)
-#define simde_math_cbrtf(v) std::cbrt(v)
-#elif defined(SIMDE_MATH_HAVE_MATH_H)
-#define simde_math_cbrtf(v) cbrtf(v)
-#endif
-#endif
-
-#if !defined(simde_math_ceil)
-#if SIMDE_MATH_BUILTIN_LIBM(ceil)
-#define simde_math_ceil(v) __builtin_ceil(v)
-#elif defined(SIMDE_MATH_HAVE_CMATH)
-#define simde_math_ceil(v) std::ceil(v)
-#elif defined(SIMDE_MATH_HAVE_MATH_H)
-#define simde_math_ceil(v) ceil(v)
-#endif
-#endif
-
-#if !defined(simde_math_ceilf)
-#if SIMDE_MATH_BUILTIN_LIBM(ceilf)
-#define simde_math_ceilf(v) __builtin_ceilf(v)
-#elif defined(SIMDE_MATH_HAVE_CMATH)
-#define simde_math_ceilf(v) std::ceil(v)
-#elif defined(SIMDE_MATH_HAVE_MATH_H)
-#define simde_math_ceilf(v) ceilf(v)
-#endif
-#endif
-
-#if !defined(simde_math_copysign)
-#if SIMDE_MATH_BUILTIN_LIBM(copysign)
-#define simde_math_copysign(x, y) __builtin_copysign(x, y)
-#elif defined(SIMDE_MATH_HAVE_CMATH)
-#define simde_math_copysign(x, y) std::copysign(x, y)
-#elif defined(SIMDE_MATH_HAVE_MATH_H)
-#define simde_math_copysign(x, y) copysign(x, y)
-#endif
-#endif
-
-#if !defined(simde_math_copysignf)
-#if SIMDE_MATH_BUILTIN_LIBM(copysignf)
-#define simde_math_copysignf(x, y) __builtin_copysignf(x, y)
-#elif defined(SIMDE_MATH_HAVE_CMATH)
-#define simde_math_copysignf(x, y) std::copysignf(x, y)
-#elif defined(SIMDE_MATH_HAVE_MATH_H)
-#define simde_math_copysignf(x, y) copysignf(x, y)
-#endif
-#endif
-
-#if !defined(simde_math_cos)
-#if SIMDE_MATH_BUILTIN_LIBM(cos)
-#define simde_math_cos(v) __builtin_cos(v)
-#elif defined(SIMDE_MATH_HAVE_CMATH)
-#define simde_math_cos(v) std::cos(v)
-#elif defined(SIMDE_MATH_HAVE_MATH_H)
-#define simde_math_cos(v) cos(v)
-#endif
-#endif
-
-#if !defined(simde_math_cosf)
-#if defined(SIMDE_MATH_SLEEF_ENABLE)
-#if SIMDE_ACCURACY_PREFERENCE < 1
-#define simde_math_cosf(v) Sleef_cosf_u35(v)
-#else
-#define simde_math_cosf(v) Sleef_cosf_u10(v)
-#endif
-#elif SIMDE_MATH_BUILTIN_LIBM(cosf)
-#define simde_math_cosf(v) __builtin_cosf(v)
-#elif defined(SIMDE_MATH_HAVE_CMATH)
-#define simde_math_cosf(v) std::cos(v)
-#elif defined(SIMDE_MATH_HAVE_MATH_H)
-#define simde_math_cosf(v) cosf(v)
-#endif
-#endif
-
-#if !defined(simde_math_cosh)
-#if SIMDE_MATH_BUILTIN_LIBM(cosh)
-#define simde_math_cosh(v) __builtin_cosh(v)
-#elif defined(SIMDE_MATH_HAVE_CMATH)
-#define simde_math_cosh(v) std::cosh(v)
-#elif defined(SIMDE_MATH_HAVE_MATH_H)
-#define simde_math_cosh(v) cosh(v)
-#endif
-#endif
-
-#if !defined(simde_math_coshf)
-#if SIMDE_MATH_BUILTIN_LIBM(coshf)
-#define simde_math_coshf(v) __builtin_coshf(v)
-#elif defined(SIMDE_MATH_HAVE_CMATH)
-#define simde_math_coshf(v) std::cosh(v)
-#elif defined(SIMDE_MATH_HAVE_MATH_H)
-#define simde_math_coshf(v) coshf(v)
-#endif
-#endif
-
-#if !defined(simde_math_erf)
-#if SIMDE_MATH_BUILTIN_LIBM(erf)
-#define simde_math_erf(v) __builtin_erf(v)
-#elif defined(SIMDE_MATH_HAVE_CMATH)
-#define simde_math_erf(v) std::erf(v)
-#elif defined(SIMDE_MATH_HAVE_MATH_H)
-#define simde_math_erf(v) erf(v)
-#endif
-#endif
-
-#if !defined(simde_math_erff)
-#if SIMDE_MATH_BUILTIN_LIBM(erff)
-#define simde_math_erff(v) __builtin_erff(v)
-#elif defined(SIMDE_MATH_HAVE_CMATH)
-#define simde_math_erff(v) std::erf(v)
-#elif defined(SIMDE_MATH_HAVE_MATH_H)
-#define simde_math_erff(v) erff(v)
-#endif
-#endif
-
-#if !defined(simde_math_erfc)
-#if SIMDE_MATH_BUILTIN_LIBM(erfc)
-#define simde_math_erfc(v) __builtin_erfc(v)
-#elif defined(SIMDE_MATH_HAVE_CMATH)
-#define simde_math_erfc(v) std::erfc(v)
-#elif defined(SIMDE_MATH_HAVE_MATH_H)
-#define simde_math_erfc(v) erfc(v)
-#endif
-#endif
-
-#if !defined(simde_math_erfcf)
-#if SIMDE_MATH_BUILTIN_LIBM(erfcf)
-#define simde_math_erfcf(v) __builtin_erfcf(v)
-#elif defined(SIMDE_MATH_HAVE_CMATH)
-#define simde_math_erfcf(v) std::erfc(v)
-#elif defined(SIMDE_MATH_HAVE_MATH_H)
-#define simde_math_erfcf(v) erfcf(v)
-#endif
-#endif
-
-#if !defined(simde_math_exp)
-#if SIMDE_MATH_BUILTIN_LIBM(exp)
-#define simde_math_exp(v) __builtin_exp(v)
-#elif defined(SIMDE_MATH_HAVE_CMATH)
-#define simde_math_exp(v) std::exp(v)
-#elif defined(SIMDE_MATH_HAVE_MATH_H)
-#define simde_math_exp(v) exp(v)
-#endif
-#endif
-
-#if !defined(simde_math_expf)
-#if SIMDE_MATH_BUILTIN_LIBM(expf)
-#define simde_math_expf(v) __builtin_expf(v)
-#elif defined(SIMDE_MATH_HAVE_CMATH)
-#define simde_math_expf(v) std::exp(v)
-#elif defined(SIMDE_MATH_HAVE_MATH_H)
-#define simde_math_expf(v) expf(v)
-#endif
-#endif
-
-#if !defined(simde_math_expm1)
-#if SIMDE_MATH_BUILTIN_LIBM(expm1)
-#define simde_math_expm1(v) __builtin_expm1(v)
-#elif defined(SIMDE_MATH_HAVE_CMATH)
-#define simde_math_expm1(v) std::expm1(v)
-#elif defined(SIMDE_MATH_HAVE_MATH_H)
-#define simde_math_expm1(v) expm1(v)
-#endif
-#endif
-
-#if !defined(simde_math_expm1f)
-#if SIMDE_MATH_BUILTIN_LIBM(expm1f)
-#define simde_math_expm1f(v) __builtin_expm1f(v)
-#elif defined(SIMDE_MATH_HAVE_CMATH)
-#define simde_math_expm1f(v) std::expm1(v)
-#elif defined(SIMDE_MATH_HAVE_MATH_H)
-#define simde_math_expm1f(v) expm1f(v)
-#endif
-#endif
-
-#if !defined(simde_math_exp2)
-#if SIMDE_MATH_BUILTIN_LIBM(exp2)
-#define simde_math_exp2(v) __builtin_exp2(v)
-#elif defined(SIMDE_MATH_HAVE_CMATH)
-#define simde_math_exp2(v) std::exp2(v)
-#elif defined(SIMDE_MATH_HAVE_MATH_H)
-#define simde_math_exp2(v) exp2(v)
-#endif
-#endif
-
-#if !defined(simde_math_exp2f)
-#if SIMDE_MATH_BUILTIN_LIBM(exp2f)
-#define simde_math_exp2f(v) __builtin_exp2f(v)
-#elif defined(SIMDE_MATH_HAVE_CMATH)
-#define simde_math_exp2f(v) std::exp2(v)
-#elif defined(SIMDE_MATH_HAVE_MATH_H)
-#define simde_math_exp2f(v) exp2f(v)
-#endif
-#endif
-
-#if HEDLEY_HAS_BUILTIN(__builtin_exp10) || HEDLEY_GCC_VERSION_CHECK(3, 4, 0)
-#define simde_math_exp10(v) __builtin_exp10(v)
-#else
-#define simde_math_exp10(v) pow(10.0, (v))
-#endif
-
-#if HEDLEY_HAS_BUILTIN(__builtin_exp10f) || HEDLEY_GCC_VERSION_CHECK(3, 4, 0)
-#define simde_math_exp10f(v) __builtin_exp10f(v)
-#else
-#define simde_math_exp10f(v) powf(10.0f, (v))
-#endif
-
-#if !defined(simde_math_fabs)
-#if SIMDE_MATH_BUILTIN_LIBM(fabs)
-#define simde_math_fabs(v) __builtin_fabs(v)
-#elif defined(SIMDE_MATH_HAVE_CMATH)
-#define simde_math_fabs(v) std::fabs(v)
-#elif defined(SIMDE_MATH_HAVE_MATH_H)
-#define simde_math_fabs(v) fabs(v)
-#endif
-#endif
-
-#if !defined(simde_math_fabsf)
-#if SIMDE_MATH_BUILTIN_LIBM(fabsf)
-#define simde_math_fabsf(v) __builtin_fabsf(v)
-#elif defined(SIMDE_MATH_HAVE_CMATH)
-#define simde_math_fabsf(v) std::fabs(v)
-#elif defined(SIMDE_MATH_HAVE_MATH_H)
-#define simde_math_fabsf(v) fabsf(v)
-#endif
-#endif
-
-#if !defined(simde_math_floor)
-#if SIMDE_MATH_BUILTIN_LIBM(floor)
-#define simde_math_floor(v) __builtin_floor(v)
-#elif defined(SIMDE_MATH_HAVE_CMATH)
-#define simde_math_floor(v) std::floor(v)
-#elif defined(SIMDE_MATH_HAVE_MATH_H)
-#define simde_math_floor(v) floor(v)
-#endif
-#endif
-
-#if !defined(simde_math_floorf)
-#if SIMDE_MATH_BUILTIN_LIBM(floorf)
-#define simde_math_floorf(v) __builtin_floorf(v)
-#elif defined(SIMDE_MATH_HAVE_CMATH)
-#define simde_math_floorf(v) std::floor(v)
-#elif defined(SIMDE_MATH_HAVE_MATH_H)
-#define simde_math_floorf(v) floorf(v)
-#endif
-#endif
-
-#if !defined(simde_math_fma)
-#if SIMDE_MATH_BUILTIN_LIBM(fma)
-#define simde_math_fma(x, y, z) __builtin_fma(x, y, z)
-#elif defined(SIMDE_MATH_HAVE_CMATH)
-#define simde_math_fma(x, y, z) std::fma(x, y, z)
-#elif defined(SIMDE_MATH_HAVE_MATH_H)
-#define simde_math_fma(x, y, z) fma(x, y, z)
-#endif
-#endif
-
-#if !defined(simde_math_fmaf)
-#if SIMDE_MATH_BUILTIN_LIBM(fmaf)
-#define simde_math_fmaf(x, y, z) __builtin_fmaf(x, y, z)
-#elif defined(SIMDE_MATH_HAVE_CMATH)
-#define simde_math_fmaf(x, y, z) std::fma(x, y, z)
-#elif defined(SIMDE_MATH_HAVE_MATH_H)
-#define simde_math_fmaf(x, y, z) fmaf(x, y, z)
-#endif
-#endif
-
-#if !defined(simde_math_fmax)
-#if SIMDE_MATH_BUILTIN_LIBM(fmax)
-#define simde_math_fmax(x, y, z) __builtin_fmax(x, y, z)
-#elif defined(SIMDE_MATH_HAVE_CMATH)
-#define simde_math_fmax(x, y, z) std::fmax(x, y, z)
-#elif defined(SIMDE_MATH_HAVE_MATH_H)
-#define simde_math_fmax(x, y, z) fmax(x, y, z)
-#endif
-#endif
-
-#if !defined(simde_math_fmaxf)
-#if SIMDE_MATH_BUILTIN_LIBM(fmaxf)
-#define simde_math_fmaxf(x, y, z) __builtin_fmaxf(x, y, z)
-#elif defined(SIMDE_MATH_HAVE_CMATH)
-#define simde_math_fmaxf(x, y, z) std::fmax(x, y, z)
-#elif defined(SIMDE_MATH_HAVE_MATH_H)
-#define simde_math_fmaxf(x, y, z) fmaxf(x, y, z)
-#endif
-#endif
-
-#if !defined(simde_math_hypot)
-#if SIMDE_MATH_BUILTIN_LIBM(hypot)
-#define simde_math_hypot(y, x) __builtin_hypot(y, x)
-#elif defined(SIMDE_MATH_HAVE_CMATH)
-#define simde_math_hypot(y, x) std::hypot(y, x)
-#elif defined(SIMDE_MATH_HAVE_MATH_H)
-#define simde_math_hypot(y, x) hypot(y, x)
-#endif
-#endif
-
-#if !defined(simde_math_hypotf)
-#if SIMDE_MATH_BUILTIN_LIBM(hypotf)
-#define simde_math_hypotf(y, x) __builtin_hypotf(y, x)
-#elif defined(SIMDE_MATH_HAVE_CMATH)
-#define simde_math_hypotf(y, x) std::hypot(y, x)
-#elif defined(SIMDE_MATH_HAVE_MATH_H)
-#define simde_math_hypotf(y, x) hypotf(y, x)
-#endif
-#endif
-
-#if !defined(simde_math_log)
-#if SIMDE_MATH_BUILTIN_LIBM(log)
-#define simde_math_log(v) __builtin_log(v)
-#elif defined(SIMDE_MATH_HAVE_CMATH)
-#define simde_math_log(v) std::log(v)
-#elif defined(SIMDE_MATH_HAVE_MATH_H)
-#define simde_math_log(v) log(v)
-#endif
-#endif
-
-#if !defined(simde_math_logf)
-#if SIMDE_MATH_BUILTIN_LIBM(logf)
-#define simde_math_logf(v) __builtin_logf(v)
-#elif defined(SIMDE_MATH_HAVE_CMATH)
-#define simde_math_logf(v) std::log(v)
-#elif defined(SIMDE_MATH_HAVE_MATH_H)
-#define simde_math_logf(v) logf(v)
-#endif
-#endif
-
-#if !defined(simde_math_logb)
-#if SIMDE_MATH_BUILTIN_LIBM(logb)
-#define simde_math_logb(v) __builtin_logb(v)
-#elif defined(SIMDE_MATH_HAVE_CMATH)
-#define simde_math_logb(v) std::logb(v)
-#elif defined(SIMDE_MATH_HAVE_MATH_H)
-#define simde_math_logb(v) logb(v)
-#endif
-#endif
-
-#if !defined(simde_math_logbf)
-#if SIMDE_MATH_BUILTIN_LIBM(logbf)
-#define simde_math_logbf(v) __builtin_logbf(v)
-#elif defined(SIMDE_MATH_HAVE_CMATH)
-#define simde_math_logbf(v) std::logb(v)
-#elif defined(SIMDE_MATH_HAVE_MATH_H)
-#define simde_math_logbf(v) logbf(v)
-#endif
-#endif
-
-#if !defined(simde_math_log1p)
-#if SIMDE_MATH_BUILTIN_LIBM(log1p)
-#define simde_math_log1p(v) __builtin_log1p(v)
-#elif defined(SIMDE_MATH_HAVE_CMATH)
-#define simde_math_log1p(v) std::log1p(v)
-#elif defined(SIMDE_MATH_HAVE_MATH_H)
-#define simde_math_log1p(v) log1p(v)
-#endif
-#endif
-
-#if !defined(simde_math_log1pf)
-#if SIMDE_MATH_BUILTIN_LIBM(log1pf)
-#define simde_math_log1pf(v) __builtin_log1pf(v)
-#elif defined(SIMDE_MATH_HAVE_CMATH)
-#define simde_math_log1pf(v) std::log1p(v)
-#elif defined(SIMDE_MATH_HAVE_MATH_H)
-#define simde_math_log1pf(v) log1pf(v)
-#endif
-#endif
-
-#if !defined(simde_math_log2)
-#if SIMDE_MATH_BUILTIN_LIBM(log2)
-#define simde_math_log2(v) __builtin_log2(v)
-#elif defined(SIMDE_MATH_HAVE_CMATH)
-#define simde_math_log2(v) std::log2(v)
-#elif defined(SIMDE_MATH_HAVE_MATH_H)
-#define simde_math_log2(v) log2(v)
-#endif
-#endif
-
-#if !defined(simde_math_log2f)
-#if SIMDE_MATH_BUILTIN_LIBM(log2f)
-#define simde_math_log2f(v) __builtin_log2f(v)
-#elif defined(SIMDE_MATH_HAVE_CMATH)
-#define simde_math_log2f(v) std::log2(v)
-#elif defined(SIMDE_MATH_HAVE_MATH_H)
-#define simde_math_log2f(v) log2f(v)
-#endif
-#endif
-
-#if !defined(simde_math_log10)
-#if SIMDE_MATH_BUILTIN_LIBM(log10)
-#define simde_math_log10(v) __builtin_log10(v)
-#elif defined(SIMDE_MATH_HAVE_CMATH)
-#define simde_math_log10(v) std::log10(v)
-#elif defined(SIMDE_MATH_HAVE_MATH_H)
-#define simde_math_log10(v) log10(v)
-#endif
-#endif
-
-#if !defined(simde_math_log10f)
-#if SIMDE_MATH_BUILTIN_LIBM(log10f)
-#define simde_math_log10f(v) __builtin_log10f(v)
-#elif defined(SIMDE_MATH_HAVE_CMATH)
-#define simde_math_log10f(v) std::log10(v)
-#elif defined(SIMDE_MATH_HAVE_MATH_H)
-#define simde_math_log10f(v) log10f(v)
-#endif
-#endif
-
-#if !defined(simde_math_modf)
-#if SIMDE_MATH_BUILTIN_LIBM(modf)
-#define simde_math_modf(x, iptr) __builtin_modf(x, iptr)
-#elif defined(SIMDE_MATH_HAVE_CMATH)
-#define simde_math_modf(x, iptr) std::modf(x, iptr)
-#elif defined(SIMDE_MATH_HAVE_MATH_H)
-#define simde_math_modf(x, iptr) modf(x, iptr)
-#endif
-#endif
-
-#if !defined(simde_math_modff)
-#if SIMDE_MATH_BUILTIN_LIBM(modff)
-#define simde_math_modff(x, iptr) __builtin_modff(x, iptr)
-#elif defined(SIMDE_MATH_HAVE_CMATH)
-#define simde_math_modff(x, iptr) std::modf(x, iptr)
-#elif defined(SIMDE_MATH_HAVE_MATH_H)
-#define simde_math_modff(x, iptr) modff(x, iptr)
-#endif
-#endif
-
-#if !defined(simde_math_nearbyint)
-#if SIMDE_MATH_BUILTIN_LIBM(nearbyint)
-#define simde_math_nearbyint(v) __builtin_nearbyint(v)
-#elif defined(SIMDE_MATH_HAVE_CMATH)
-#define simde_math_nearbyint(v) std::nearbyint(v)
-#elif defined(SIMDE_MATH_HAVE_MATH_H)
-#define simde_math_nearbyint(v) nearbyint(v)
-#endif
-#endif
-
-#if !defined(simde_math_nearbyintf)
-#if SIMDE_MATH_BUILTIN_LIBM(nearbyintf)
-#define simde_math_nearbyintf(v) __builtin_nearbyintf(v)
-#elif defined(SIMDE_MATH_HAVE_CMATH)
-#define simde_math_nearbyintf(v) std::nearbyint(v)
-#elif defined(SIMDE_MATH_HAVE_MATH_H)
-#define simde_math_nearbyintf(v) nearbyintf(v)
-#endif
-#endif
-
-#if !defined(simde_math_pow)
-#if SIMDE_MATH_BUILTIN_LIBM(pow)
-#define simde_math_pow(y, x) __builtin_pow(y, x)
-#elif defined(SIMDE_MATH_HAVE_CMATH)
-#define simde_math_pow(y, x) std::pow(y, x)
-#elif defined(SIMDE_MATH_HAVE_MATH_H)
-#define simde_math_pow(y, x) pow(y, x)
-#endif
-#endif
-
-#if !defined(simde_math_powf)
-#if SIMDE_MATH_BUILTIN_LIBM(powf)
-#define simde_math_powf(y, x) __builtin_powf(y, x)
-#elif defined(SIMDE_MATH_HAVE_CMATH)
-#define simde_math_powf(y, x) std::pow(y, x)
-#elif defined(SIMDE_MATH_HAVE_MATH_H)
-#define simde_math_powf(y, x) powf(y, x)
-#endif
-#endif
-
-#if !defined(simde_math_rint)
-#if SIMDE_MATH_BUILTIN_LIBM(rint)
-#define simde_math_rint(v) __builtin_rint(v)
-#elif defined(SIMDE_MATH_HAVE_CMATH)
-#define simde_math_rint(v) std::rint(v)
-#elif defined(SIMDE_MATH_HAVE_MATH_H)
-#define simde_math_rint(v) rint(v)
-#endif
-#endif
-
-#if !defined(simde_math_rintf)
-#if SIMDE_MATH_BUILTIN_LIBM(rintf)
-#define simde_math_rintf(v) __builtin_rintf(v)
-#elif defined(SIMDE_MATH_HAVE_CMATH)
-#define simde_math_rintf(v) std::rint(v)
-#elif defined(SIMDE_MATH_HAVE_MATH_H)
-#define simde_math_rintf(v) rintf(v)
-#endif
-#endif
-
-#if !defined(simde_math_round)
-#if SIMDE_MATH_BUILTIN_LIBM(round)
-#define simde_math_round(v) __builtin_round(v)
-#elif defined(SIMDE_MATH_HAVE_CMATH)
-#define simde_math_round(v) std::round(v)
-#elif defined(SIMDE_MATH_HAVE_MATH_H)
-#define simde_math_round(v) round(v)
-#endif
-#endif
-
-#if !defined(simde_math_roundf)
-#if SIMDE_MATH_BUILTIN_LIBM(roundf)
-#define simde_math_roundf(v) __builtin_roundf(v)
-#elif defined(SIMDE_MATH_HAVE_CMATH)
-#define simde_math_roundf(v) std::round(v)
-#elif defined(SIMDE_MATH_HAVE_MATH_H)
-#define simde_math_roundf(v) roundf(v)
-#endif
-#endif
-
-#if !defined(simde_math_roundeven)
-#if HEDLEY_HAS_BUILTIN(__builtin_roundeven) || \
-	HEDLEY_GCC_VERSION_CHECK(10, 0, 0)
-#define simde_math_roundeven(v) __builtin_roundeven(v)
-#elif defined(simde_math_round) && defined(simde_math_fabs)
-static HEDLEY_INLINE double simde_math_roundeven(double v)
-{
-	double rounded = simde_math_round(v);
-	double diff = rounded - v;
-	if (HEDLEY_UNLIKELY(simde_math_fabs(diff) == 0.5) &&
-	    (HEDLEY_STATIC_CAST(int64_t, rounded) & 1)) {
-		rounded = v - diff;
-	}
-	return rounded;
-}
-#define simde_math_roundeven simde_math_roundeven
-#endif
-#endif
-
-#if !defined(simde_math_roundevenf)
-#if HEDLEY_HAS_BUILTIN(__builtin_roundevenf) || \
-	HEDLEY_GCC_VERSION_CHECK(10, 0, 0)
-#define simde_math_roundevenf(v) __builtin_roundevenf(v)
-#elif defined(simde_math_roundf) && defined(simde_math_fabsf)
-static HEDLEY_INLINE float simde_math_roundevenf(float v)
-{
-	float rounded = simde_math_roundf(v);
-	float diff = rounded - v;
-	if (HEDLEY_UNLIKELY(simde_math_fabsf(diff) == 0.5f) &&
-	    (HEDLEY_STATIC_CAST(int32_t, rounded) & 1)) {
-		rounded = v - diff;
-	}
-	return rounded;
-}
-#define simde_math_roundevenf simde_math_roundevenf
-#endif
-#endif
-
-#if !defined(simde_math_sin)
-#if SIMDE_MATH_BUILTIN_LIBM(sin)
-#define simde_math_sin(v) __builtin_sin(v)
-#elif defined(SIMDE_MATH_HAVE_CMATH)
-#define simde_math_sin(v) std::sin(v)
-#elif defined(SIMDE_MATH_HAVE_MATH_H)
-#define simde_math_sin(v) sin(v)
-#endif
-#endif
-
-#if !defined(simde_math_sinf)
-#if SIMDE_MATH_BUILTIN_LIBM(sinf)
-#define simde_math_sinf(v) __builtin_sinf(v)
-#elif defined(SIMDE_MATH_HAVE_CMATH)
-#define simde_math_sinf(v) std::sin(v)
-#elif defined(SIMDE_MATH_HAVE_MATH_H)
-#define simde_math_sinf(v) sinf(v)
-#endif
-#endif
-
-#if !defined(simde_math_sinh)
-#if SIMDE_MATH_BUILTIN_LIBM(sinh)
-#define simde_math_sinh(v) __builtin_sinh(v)
-#elif defined(SIMDE_MATH_HAVE_CMATH)
-#define simde_math_sinh(v) std::sinh(v)
-#elif defined(SIMDE_MATH_HAVE_MATH_H)
-#define simde_math_sinh(v) sinh(v)
-#endif
-#endif
-
-#if !defined(simde_math_sinhf)
-#if SIMDE_MATH_BUILTIN_LIBM(sinhf)
-#define simde_math_sinhf(v) __builtin_sinhf(v)
-#elif defined(SIMDE_MATH_HAVE_CMATH)
-#define simde_math_sinhf(v) std::sinh(v)
-#elif defined(SIMDE_MATH_HAVE_MATH_H)
-#define simde_math_sinhf(v) sinhf(v)
-#endif
-#endif
-
-#if !defined(simde_math_sqrt)
-#if SIMDE_MATH_BUILTIN_LIBM(sqrt)
-#define simde_math_sqrt(v) __builtin_sqrt(v)
-#elif defined(SIMDE_MATH_HAVE_CMATH)
-#define simde_math_sqrt(v) std::sqrt(v)
-#elif defined(SIMDE_MATH_HAVE_MATH_H)
-#define simde_math_sqrt(v) sqrt(v)
-#endif
-#endif
-
-#if !defined(simde_math_sqrtf)
-#if SIMDE_MATH_BUILTIN_LIBM(sqrtf)
-#define simde_math_sqrtf(v) __builtin_sqrtf(v)
-#elif defined(SIMDE_MATH_HAVE_CMATH)
-#define simde_math_sqrtf(v) std::sqrt(v)
-#elif defined(SIMDE_MATH_HAVE_MATH_H)
-#define simde_math_sqrtf(v) sqrtf(v)
-#endif
-#endif
-
-#if !defined(simde_math_tan)
-#if SIMDE_MATH_BUILTIN_LIBM(tan)
-#define simde_math_tan(v) __builtin_tan(v)
-#elif defined(SIMDE_MATH_HAVE_CMATH)
-#define simde_math_tan(v) std::tan(v)
-#elif defined(SIMDE_MATH_HAVE_MATH_H)
-#define simde_math_tan(v) tan(v)
-#endif
-#endif
-
-#if !defined(simde_math_tanf)
-#if SIMDE_MATH_BUILTIN_LIBM(tanf)
-#define simde_math_tanf(v) __builtin_tanf(v)
-#elif defined(SIMDE_MATH_HAVE_CMATH)
-#define simde_math_tanf(v) std::tan(v)
-#elif defined(SIMDE_MATH_HAVE_MATH_H)
-#define simde_math_tanf(v) tanf(v)
-#endif
-#endif
-
-#if !defined(simde_math_tanh)
-#if SIMDE_MATH_BUILTIN_LIBM(tanh)
-#define simde_math_tanh(v) __builtin_tanh(v)
-#elif defined(SIMDE_MATH_HAVE_CMATH)
-#define simde_math_tanh(v) std::tanh(v)
-#elif defined(SIMDE_MATH_HAVE_MATH_H)
-#define simde_math_tanh(v) tanh(v)
-#endif
-#endif
-
-#if !defined(simde_math_tanhf)
-#if SIMDE_MATH_BUILTIN_LIBM(tanhf)
-#define simde_math_tanhf(v) __builtin_tanhf(v)
-#elif defined(SIMDE_MATH_HAVE_CMATH)
-#define simde_math_tanhf(v) std::tanh(v)
-#elif defined(SIMDE_MATH_HAVE_MATH_H)
-#define simde_math_tanhf(v) tanhf(v)
-#endif
-#endif
-
-#if !defined(simde_math_trunc)
-#if SIMDE_MATH_BUILTIN_LIBM(trunc)
-#define simde_math_trunc(v) __builtin_trunc(v)
-#elif defined(SIMDE_MATH_HAVE_CMATH)
-#define simde_math_trunc(v) std::trunc(v)
-#elif defined(SIMDE_MATH_HAVE_MATH_H)
-#define simde_math_trunc(v) trunc(v)
-#endif
-#endif
-
-#if !defined(simde_math_truncf)
-#if SIMDE_MATH_BUILTIN_LIBM(truncf)
-#define simde_math_truncf(v) __builtin_truncf(v)
-#elif defined(SIMDE_MATH_HAVE_CMATH)
-#define simde_math_truncf(v) std::trunc(v)
-#elif defined(SIMDE_MATH_HAVE_MATH_H)
-#define simde_math_truncf(v) truncf(v)
-#endif
-#endif
-
-/***  Complex functions ***/
-
-#if !defined(simde_math_cexp)
-#if SIMDE_MATH_BUILTIN_LIBM(cexp)
-#define simde_math_cexp(v) __builtin_cexp(v)
-#elif defined(__cplusplus)
-#define simde_math_cexp(v) std::cexp(v)
-#elif defined(SIMDE_MATH_HAVE_MATH_H)
-#define simde_math_cexp(v) cexp(v)
-#endif
-#endif
-
-#if !defined(simde_math_cexpf)
-#if SIMDE_MATH_BUILTIN_LIBM(cexpf)
-#define simde_math_cexpf(v) __builtin_cexpf(v)
-#elif defined(__cplusplus)
-#define simde_math_cexpf(v) std::exp(v)
-#elif defined(SIMDE_MATH_HAVE_MATH_H)
-#define simde_math_cexpf(v) cexpf(v)
-#endif
-#endif
-
-/*** Additional functions not in libm ***/
-
-#if defined(simde_math_fabs) && defined(simde_math_sqrt) && \
-	defined(simde_math_exp)
-static HEDLEY_INLINE double simde_math_cdfnorm(double x)
-{
-	/* https://www.johndcook.com/blog/cpp_phi/
-    * Public Domain */
-	static const double a1 = 0.254829592;
-	static const double a2 = -0.284496736;
-	static const double a3 = 1.421413741;
-	static const double a4 = -1.453152027;
-	static const double a5 = 1.061405429;
-	static const double p = 0.3275911;
-
-	const int sign = x < 0;
-	x = simde_math_fabs(x) / simde_math_sqrt(2.0);
-
-	/* A&S formula 7.1.26 */
-	double t = 1.0 / (1.0 + p * x);
-	double y = 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t *
-				 simde_math_exp(-x * x);
-
-	return 0.5 * (1.0 + (sign ? -y : y));
-}
-#define simde_math_cdfnorm simde_math_cdfnorm
-#endif
-
-#if defined(simde_math_fabsf) && defined(simde_math_sqrtf) && \
-	defined(simde_math_expf)
-static HEDLEY_INLINE float simde_math_cdfnormf(float x)
-{
-	/* https://www.johndcook.com/blog/cpp_phi/
-    * Public Domain */
-	static const float a1 = 0.254829592f;
-	static const float a2 = -0.284496736f;
-	static const float a3 = 1.421413741f;
-	static const float a4 = -1.453152027f;
-	static const float a5 = 1.061405429f;
-	static const float p = 0.3275911f;
-
-	const int sign = x < 0;
-	x = simde_math_fabsf(x) / simde_math_sqrtf(2.0f);
-
-	/* A&S formula 7.1.26 */
-	float t = 1.0f / (1.0f + p * x);
-	float y = 1.0f - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t *
-				 simde_math_expf(-x * x);
-
-	return 0.5f * (1.0f + (sign ? -y : y));
-}
-#define simde_math_cdfnormf simde_math_cdfnormf
-#endif
-
-HEDLEY_DIAGNOSTIC_PUSH
-SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_
-
-#if !defined(simde_math_cdfnorminv) && defined(simde_math_log) && \
-	defined(simde_math_sqrt)
-/*https://web.archive.org/web/20150910081113/http://home.online.no/~pjacklam/notes/invnorm/impl/sprouse/ltqnorm.c*/
-static HEDLEY_INLINE double simde_math_cdfnorminv(double p)
-{
-	static const double a[] = {
-		-3.969683028665376e+01, 2.209460984245205e+02,
-		-2.759285104469687e+02, 1.383577518672690e+02,
-		-3.066479806614716e+01, 2.506628277459239e+00};
-
-	static const double b[] = {-5.447609879822406e+01,
-				   1.615858368580409e+02,
-				   -1.556989798598866e+02,
-				   6.680131188771972e+01,
-				   -1.328068155288572e+01};
-
-	static const double c[] = {
-		-7.784894002430293e-03, -3.223964580411365e-01,
-		-2.400758277161838e+00, -2.549732539343734e+00,
-		4.374664141464968e+00,  2.938163982698783e+00};
-
-	static const double d[] = {7.784695709041462e-03, 3.224671290700398e-01,
-				   2.445134137142996e+00,
-				   3.754408661907416e+00};
-
-	static const double low = 0.02425;
-	static const double high = 0.97575;
-	double q, r;
-
-	if (p < 0 || p > 1) {
-		return 0.0;
-	} else if (p == 0) {
-		return -SIMDE_MATH_INFINITY;
-	} else if (p == 1) {
-		return SIMDE_MATH_INFINITY;
-	} else if (p < low) {
-		q = simde_math_sqrt(-2.0 * simde_math_log(p));
-		return (((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q +
-			 c[4]) * q +
-			c[5]) /
-		       (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1));
-	} else if (p > high) {
-		q = simde_math_sqrt(-2.0 * simde_math_log(1.0 - p));
-		return -(((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q +
-			  c[4]) * q +
-			 c[5]) /
-		       (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1));
-	} else {
-		q = p - 0.5;
-		r = q * q;
-		return (((((a[0] * r + a[1]) * r + a[2]) * r + a[3]) * r +
-			 a[4]) * r +
-			a[5]) *
-		       q /
-		       (((((b[0] * r + b[1]) * r + b[2]) * r + b[3]) * r +
-			 b[4]) * r +
-			1);
-	}
-}
-#define simde_math_cdfnorminv simde_math_cdfnorminv
-#endif
-
-#if !defined(simde_math_cdfnorminvf) && defined(simde_math_logf) && \
-	defined(simde_math_sqrtf)
-static HEDLEY_INLINE float simde_math_cdfnorminvf(float p)
-{
-	static const float a[] = {
-		-3.969683028665376e+01f, 2.209460984245205e+02f,
-		-2.759285104469687e+02f, 1.383577518672690e+02f,
-		-3.066479806614716e+01f, 2.506628277459239e+00f};
-	static const float b[] = {-5.447609879822406e+01f,
-				  1.615858368580409e+02f,
-				  -1.556989798598866e+02f,
-				  6.680131188771972e+01f,
-				  -1.328068155288572e+01f};
-	static const float c[] = {
-		-7.784894002430293e-03f, -3.223964580411365e-01f,
-		-2.400758277161838e+00f, -2.549732539343734e+00f,
-		4.374664141464968e+00f,  2.938163982698783e+00f};
-	static const float d[] = {7.784695709041462e-03f,
-				  3.224671290700398e-01f,
-				  2.445134137142996e+00f,
-				  3.754408661907416e+00f};
-	static const float low = 0.02425f;
-	static const float high = 0.97575f;
-	float q, r;
-
-	if (p < 0 || p > 1) {
-		return 0.0f;
-	} else if (p == 0) {
-		return -SIMDE_MATH_INFINITYF;
-	} else if (p == 1) {
-		return SIMDE_MATH_INFINITYF;
-	} else if (p < low) {
-		q = simde_math_sqrtf(-2.0f * simde_math_logf(p));
-		return (((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q +
-			 c[4]) * q +
-			c[5]) /
-		       (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1));
-	} else if (p > high) {
-		q = simde_math_sqrtf(-2.0f * simde_math_logf(1.0f - p));
-		return -(((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q +
-			  c[4]) * q +
-			 c[5]) /
-		       (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1));
-	} else {
-		q = p - 0.5f;
-		r = q * q;
-		return (((((a[0] * r + a[1]) * r + a[2]) * r + a[3]) * r +
-			 a[4]) * r +
-			a[5]) *
-		       q /
-		       (((((b[0] * r + b[1]) * r + b[2]) * r + b[3]) * r +
-			 b[4]) * r +
-			1);
-	}
-}
-#define simde_math_cdfnorminvf simde_math_cdfnorminvf
-#endif
-
-#if !defined(simde_math_erfinv) && defined(simde_math_log) && \
-	defined(simde_math_copysign) && defined(simde_math_sqrt)
-static HEDLEY_INLINE double simde_math_erfinv(double x)
-{
-	/* https://stackoverflow.com/questions/27229371/inverse-error-function-in-c
-     *
-     * The original answer on SO uses a constant of 0.147, but in my
-     * testing 0.14829094707965850830078125 gives a lower average absolute error
-     * (0.0001410958211636170744895935 vs. 0.0001465479290345683693885803).
-     * That said, if your goal is to minimize the *maximum* absolute
-     * error, 0.15449436008930206298828125 provides significantly better
-     * results; 0.0009250640869140625000000000 vs ~ 0.005. */
-	double tt1, tt2, lnx;
-	double sgn = simde_math_copysign(1.0, x);
-
-	x = (1.0 - x) * (1.0 + x);
-	lnx = simde_math_log(x);
-
-	tt1 = 2.0 / (SIMDE_MATH_PI * 0.14829094707965850830078125) + 0.5 * lnx;
-	tt2 = (1.0 / 0.14829094707965850830078125) * lnx;
-
-	return sgn * simde_math_sqrt(-tt1 + simde_math_sqrt(tt1 * tt1 - tt2));
-}
-#define simde_math_erfinv simde_math_erfinv
-#endif
-
-#if !defined(simde_math_erfinvf) && defined(simde_math_logf) && \
-	defined(simde_math_copysignf) && defined(simde_math_sqrtf)
-static HEDLEY_INLINE float simde_math_erfinvf(float x)
-{
-	float tt1, tt2, lnx;
-	float sgn = simde_math_copysignf(1.0f, x);
-
-	x = (1.0f - x) * (1.0f + x);
-	lnx = simde_math_logf(x);
-
-	tt1 = 2.0f / (SIMDE_MATH_PIF * 0.14829094707965850830078125f) +
-	      0.5f * lnx;
-	tt2 = (1.0f / 0.14829094707965850830078125f) * lnx;
-
-	return sgn * simde_math_sqrtf(-tt1 + simde_math_sqrtf(tt1 * tt1 - tt2));
-}
-#define simde_math_erfinvf simde_math_erfinvf
-#endif
-
-#if !defined(simde_math_erfcinv) && defined(simde_math_erfinv) && \
-	defined(simde_math_log) && defined(simde_math_sqrt)
-static HEDLEY_INLINE double simde_math_erfcinv(double x)
-{
-	if (x >= 0.0625 && x < 2.0) {
-		return simde_math_erfinv(1.0 - x);
-	} else if (x < 0.0625 && x >= 1.0e-100) {
-		double p[6] = {0.1550470003116, 1.382719649631, 0.690969348887,
-			       -1.128081391617, 0.680544246825, -0.16444156791};
-		double q[3] = {0.155024849822, 1.385228141995, 1.000000000000};
-
-		const double t = 1.0 / simde_math_sqrt(-simde_math_log(x));
-		return (p[0] / t + p[1] +
-			t * (p[2] + t * (p[3] + t * (p[4] + t * p[5])))) /
-		       (q[0] + t * (q[1] + t * (q[2])));
-	} else if (x < 1.0e-100 && x >= SIMDE_MATH_DBL_MIN) {
-		double p[4] = {0.00980456202915, 0.363667889171, 0.97302949837,
-			       -0.5374947401};
-		double q[3] = {0.00980451277802, 0.363699971544,
-			       1.000000000000};
-
-		const double t = 1.0 / simde_math_sqrt(-simde_math_log(x));
-		return (p[0] / t + p[1] + t * (p[2] + t * p[3])) /
-		       (q[0] + t * (q[1] + t * (q[2])));
-	} else if (!simde_math_isnormal(x)) {
-		return SIMDE_MATH_INFINITY;
-	} else {
-		return -SIMDE_MATH_INFINITY;
-	}
-}
-
-#define simde_math_erfcinv simde_math_erfcinv
-#endif
-
-#if !defined(simde_math_erfcinvf) && defined(simde_math_erfinvf) && \
-	defined(simde_math_logf) && defined(simde_math_sqrtf)
-static HEDLEY_INLINE float simde_math_erfcinvf(float x)
-{
-	if (x >= 0.0625f && x < 2.0f) {
-		return simde_math_erfinvf(1.0f - x);
-	} else if (x < 0.0625f && x >= SIMDE_MATH_FLT_MIN) {
-		static const float p[6] = {0.1550470003116f, 1.382719649631f,
-					   0.690969348887f, -1.128081391617f,
-					   0.680544246825f - 0.164441567910f};
-		static const float q[3] = {0.155024849822f, 1.385228141995f,
-					   1.000000000000f};
-
-		const float t = 1.0f / simde_math_sqrtf(-simde_math_logf(x));
-		return (p[0] / t + p[1] +
-			t * (p[2] + t * (p[3] + t * (p[4] + t * p[5])))) /
-		       (q[0] + t * (q[1] + t * (q[2])));
-	} else if (x < SIMDE_MATH_FLT_MIN && simde_math_isnormalf(x)) {
-		static const float p[4] = {0.00980456202915f, 0.36366788917100f,
-					   0.97302949837000f,
-					   -0.5374947401000f};
-		static const float q[3] = {0.00980451277802f, 0.36369997154400f,
-					   1.00000000000000f};
-
-		const float t = 1.0f / simde_math_sqrtf(-simde_math_logf(x));
-		return (p[0] / t + p[1] + t * (p[2] + t * p[3])) /
-		       (q[0] + t * (q[1] + t * (q[2])));
-	} else {
-		return simde_math_isnormalf(x) ? -SIMDE_MATH_INFINITYF
-					       : SIMDE_MATH_INFINITYF;
-	}
-}
-
-#define simde_math_erfcinvf simde_math_erfcinvf
-#endif
-
-HEDLEY_DIAGNOSTIC_POP
-
-static HEDLEY_INLINE double simde_math_rad2deg(double radians)
-{
-	return radians * SIMDE_MATH_180_OVER_PI;
-}
-
-static HEDLEY_INLINE float simde_math_rad2degf(float radians)
-{
-	return radians * SIMDE_MATH_180_OVER_PIF;
-}
-
-static HEDLEY_INLINE double simde_math_deg2rad(double degrees)
-{
-	return degrees * SIMDE_MATH_PI_OVER_180;
-}
-
-static HEDLEY_INLINE float simde_math_deg2radf(float degrees)
-{
-	return degrees * (SIMDE_MATH_PI_OVER_180F);
-}
-
-/***  Saturated arithmetic ***/
-
-static HEDLEY_INLINE int8_t simde_math_adds_i8(int8_t a, int8_t b)
-{
-#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
-	return vqaddb_s8(a, b);
-#else
-	uint8_t a_ = HEDLEY_STATIC_CAST(uint8_t, a);
-	uint8_t b_ = HEDLEY_STATIC_CAST(uint8_t, b);
-	uint8_t r_ = a_ + b_;
-
-	a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT8_MAX;
-	if (HEDLEY_STATIC_CAST(int8_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) {
-		r_ = a_;
-	}
-
-	return HEDLEY_STATIC_CAST(int8_t, r_);
-#endif
-}
-
-static HEDLEY_INLINE int16_t simde_math_adds_i16(int16_t a, int16_t b)
-{
-#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
-	return vqaddh_s16(a, b);
-#else
-	uint16_t a_ = HEDLEY_STATIC_CAST(uint16_t, a);
-	uint16_t b_ = HEDLEY_STATIC_CAST(uint16_t, b);
-	uint16_t r_ = a_ + b_;
-
-	a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT16_MAX;
-	if (HEDLEY_STATIC_CAST(int16_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) {
-		r_ = a_;
-	}
-
-	return HEDLEY_STATIC_CAST(int16_t, r_);
-#endif
-}
-
-static HEDLEY_INLINE int32_t simde_math_adds_i32(int32_t a, int32_t b)
-{
-#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
-	return vqadds_s32(a, b);
-#else
-	uint32_t a_ = HEDLEY_STATIC_CAST(uint32_t, a);
-	uint32_t b_ = HEDLEY_STATIC_CAST(uint32_t, b);
-	uint32_t r_ = a_ + b_;
-
-	a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT32_MAX;
-	if (HEDLEY_STATIC_CAST(int32_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) {
-		r_ = a_;
-	}
-
-	return HEDLEY_STATIC_CAST(int32_t, r_);
-#endif
-}
-
-static HEDLEY_INLINE int64_t simde_math_adds_i64(int64_t a, int64_t b)
-{
-#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
-	return vqaddd_s64(a, b);
-#else
-	uint64_t a_ = HEDLEY_STATIC_CAST(uint64_t, a);
-	uint64_t b_ = HEDLEY_STATIC_CAST(uint64_t, b);
-	uint64_t r_ = a_ + b_;
-
-	a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT64_MAX;
-	if (HEDLEY_STATIC_CAST(int64_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) {
-		r_ = a_;
-	}
-
-	return HEDLEY_STATIC_CAST(int64_t, r_);
-#endif
-}
-
-static HEDLEY_INLINE uint8_t simde_math_adds_u8(uint8_t a, uint8_t b)
-{
-#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
-	return vqaddb_u8(a, b);
-#else
-	uint8_t r = a + b;
-	r |= -(r < a);
-	return r;
-#endif
-}
-
-static HEDLEY_INLINE uint16_t simde_math_adds_u16(uint16_t a, uint16_t b)
-{
-#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
-	return vqaddh_u16(a, b);
-#else
-	uint16_t r = a + b;
-	r |= -(r < a);
-	return r;
-#endif
-}
-
-static HEDLEY_INLINE uint32_t simde_math_adds_u32(uint32_t a, uint32_t b)
-{
-#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
-	return vqadds_u32(a, b);
-#else
-	uint32_t r = a + b;
-	r |= -(r < a);
-	return r;
-#endif
-}
-
-static HEDLEY_INLINE uint64_t simde_math_adds_u64(uint64_t a, uint64_t b)
-{
-#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
-	return vqaddd_u64(a, b);
-#else
-	uint64_t r = a + b;
-	r |= -(r < a);
-	return r;
-#endif
-}
-
-static HEDLEY_INLINE int8_t simde_math_subs_i8(int8_t a, int8_t b)
-{
-#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
-	return vqsubb_s8(a, b);
-#else
-	uint8_t a_ = HEDLEY_STATIC_CAST(uint8_t, a);
-	uint8_t b_ = HEDLEY_STATIC_CAST(uint8_t, b);
-	uint8_t r_ = a_ - b_;
-
-	a_ = (a_ >> 7) + INT8_MAX;
-
-	if (HEDLEY_STATIC_CAST(int8_t, (a_ ^ b_) & (a_ ^ r_)) < 0) {
-		r_ = a_;
-	}
-
-	return HEDLEY_STATIC_CAST(int8_t, r_);
-#endif
-}
-
-static HEDLEY_INLINE int16_t simde_math_subs_i16(int16_t a, int16_t b)
-{
-#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
-	return vqsubh_s16(a, b);
-#else
-	uint16_t a_ = HEDLEY_STATIC_CAST(uint16_t, a);
-	uint16_t b_ = HEDLEY_STATIC_CAST(uint16_t, b);
-	uint16_t r_ = a_ - b_;
-
-	a_ = (a_ >> 15) + INT16_MAX;
-
-	if (HEDLEY_STATIC_CAST(int16_t, (a_ ^ b_) & (a_ ^ r_)) < 0) {
-		r_ = a_;
-	}
-
-	return HEDLEY_STATIC_CAST(int16_t, r_);
-#endif
-}
-
-static HEDLEY_INLINE int32_t simde_math_subs_i32(int32_t a, int32_t b)
-{
-#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
-	return vqsubs_s32(a, b);
-#else
-	uint32_t a_ = HEDLEY_STATIC_CAST(uint32_t, a);
-	uint32_t b_ = HEDLEY_STATIC_CAST(uint32_t, b);
-	uint32_t r_ = a_ - b_;
-
-	a_ = (a_ >> 31) + INT32_MAX;
-
-	if (HEDLEY_STATIC_CAST(int32_t, (a_ ^ b_) & (a_ ^ r_)) < 0) {
-		r_ = a_;
-	}
-
-	return HEDLEY_STATIC_CAST(int32_t, r_);
-#endif
-}
-
-static HEDLEY_INLINE int64_t simde_math_subs_i64(int64_t a, int64_t b)
-{
-#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
-	return vqsubd_s64(a, b);
-#else
-	uint64_t a_ = HEDLEY_STATIC_CAST(uint64_t, a);
-	uint64_t b_ = HEDLEY_STATIC_CAST(uint64_t, b);
-	uint64_t r_ = a_ - b_;
-
-	a_ = (a_ >> 63) + INT64_MAX;
-
-	if (HEDLEY_STATIC_CAST(int64_t, (a_ ^ b_) & (a_ ^ r_)) < 0) {
-		r_ = a_;
-	}
-
-	return HEDLEY_STATIC_CAST(int64_t, r_);
-#endif
-}
-
-static HEDLEY_INLINE uint8_t simde_math_subs_u8(uint8_t a, uint8_t b)
-{
-#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
-	return vqsubb_u8(a, b);
-#else
-	uint8_t res = a - b;
-	res &= -(res <= a);
-	return res;
-#endif
-}
-
-static HEDLEY_INLINE uint16_t simde_math_subs_u16(uint16_t a, uint16_t b)
-{
-#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
-	return vqsubh_u16(a, b);
-#else
-	uint16_t res = a - b;
-	res &= -(res <= a);
-	return res;
-#endif
-}
-
-static HEDLEY_INLINE uint32_t simde_math_subs_u32(uint32_t a, uint32_t b)
-{
-#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
-	return vqsubs_u32(a, b);
-#else
-	uint32_t res = a - b;
-	res &= -(res <= a);
-	return res;
-#endif
-}
-
-static HEDLEY_INLINE uint64_t simde_math_subs_u64(uint64_t a, uint64_t b)
-{
-#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
-	return vqsubd_u64(a, b);
-#else
-	uint64_t res = a - b;
-	res &= -(res <= a);
-	return res;
-#endif
-}
-
-HEDLEY_DIAGNOSTIC_POP
-
-#endif /* !defined(SIMDE_MATH_H) */

+ 0 - 2456
libobs/util/simde/x86/mmx.h

@@ -1,2456 +0,0 @@
-/* SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use, copy,
- * modify, merge, publish, distribute, sublicense, and/or sell copies
- * of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- * Copyright:
- *   2017-2020 Evan Nemerson <[email protected]>
- */
-
-#if !defined(SIMDE_X86_MMX_H)
-#define SIMDE_X86_MMX_H
-
-#include "../simde-common.h"
-
-HEDLEY_DIAGNOSTIC_PUSH
-SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
-
-#if defined(SIMDE_X86_MMX_NATIVE)
-#define SIMDE_X86_MMX_USE_NATIVE_TYPE
-#elif defined(SIMDE_X86_SSE_NATIVE)
-#define SIMDE_X86_MMX_USE_NATIVE_TYPE
-#endif
-
-#if defined(SIMDE_X86_MMX_USE_NATIVE_TYPE)
-#include <mmintrin.h>
-#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-#include <arm_neon.h>
-#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
-#include <loongson-mmiintrin.h>
-#endif
-
-#include <stdint.h>
-#include <limits.h>
-
-SIMDE_BEGIN_DECLS_
-
-typedef union {
-#if defined(SIMDE_VECTOR_SUBSCRIPT)
-	SIMDE_ALIGN_TO_8 int8_t i8 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS;
-	SIMDE_ALIGN_TO_8 int16_t i16 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS;
-	SIMDE_ALIGN_TO_8 int32_t i32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS;
-	SIMDE_ALIGN_TO_8 int64_t i64 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS;
-	SIMDE_ALIGN_TO_8 uint8_t u8 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS;
-	SIMDE_ALIGN_TO_8 uint16_t u16 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS;
-	SIMDE_ALIGN_TO_8 uint32_t u32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS;
-	SIMDE_ALIGN_TO_8 uint64_t u64 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS;
-	SIMDE_ALIGN_TO_8 simde_float32 f32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS;
-	SIMDE_ALIGN_TO_8 int_fast32_t i32f SIMDE_VECTOR(8) SIMDE_MAY_ALIAS;
-	SIMDE_ALIGN_TO_8 uint_fast32_t u32f SIMDE_VECTOR(8) SIMDE_MAY_ALIAS;
-#else
-	SIMDE_ALIGN_TO_8 int8_t i8[8];
-	SIMDE_ALIGN_TO_8 int16_t i16[4];
-	SIMDE_ALIGN_TO_8 int32_t i32[2];
-	SIMDE_ALIGN_TO_8 int64_t i64[1];
-	SIMDE_ALIGN_TO_8 uint8_t u8[8];
-	SIMDE_ALIGN_TO_8 uint16_t u16[4];
-	SIMDE_ALIGN_TO_8 uint32_t u32[2];
-	SIMDE_ALIGN_TO_8 uint64_t u64[1];
-	SIMDE_ALIGN_TO_8 simde_float32 f32[2];
-	SIMDE_ALIGN_TO_8 int_fast32_t i32f[8 / sizeof(int_fast32_t)];
-	SIMDE_ALIGN_TO_8 uint_fast32_t u32f[8 / sizeof(uint_fast32_t)];
-#endif
-
-#if defined(SIMDE_X86_MMX_USE_NATIVE_TYPE)
-	__m64 n;
-#endif
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	int8x8_t neon_i8;
-	int16x4_t neon_i16;
-	int32x2_t neon_i32;
-	int64x1_t neon_i64;
-	uint8x8_t neon_u8;
-	uint16x4_t neon_u16;
-	uint32x2_t neon_u32;
-	uint64x1_t neon_u64;
-	float32x2_t neon_f32;
-#endif
-#if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
-	int8x8_t mmi_i8;
-	int16x4_t mmi_i16;
-	int32x2_t mmi_i32;
-	int64_t mmi_i64;
-	uint8x8_t mmi_u8;
-	uint16x4_t mmi_u16;
-	uint32x2_t mmi_u32;
-	uint64_t mmi_u64;
-#endif
-} simde__m64_private;
-
-#if defined(SIMDE_X86_MMX_USE_NATIVE_TYPE)
-typedef __m64 simde__m64;
-#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-typedef int32x2_t simde__m64;
-#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
-typedef int32x2_t simde__m64;
-#elif defined(SIMDE_VECTOR_SUBSCRIPT)
-typedef int32_t simde__m64 SIMDE_ALIGN_TO_8 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS;
-#else
-typedef simde__m64_private simde__m64;
-#endif
-
-#if !defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) && \
-	defined(SIMDE_ENABLE_NATIVE_ALIASES)
-#define SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES
-typedef simde__m64 __m64;
-#endif
-
-HEDLEY_STATIC_ASSERT(8 == sizeof(simde__m64), "__m64 size incorrect");
-HEDLEY_STATIC_ASSERT(8 == sizeof(simde__m64_private), "__m64 size incorrect");
-#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF)
-HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m64) == 8,
-		     "simde__m64 is not 8-byte aligned");
-HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m64_private) == 8,
-		     "simde__m64_private is not 8-byte aligned");
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m64 simde__m64_from_private(simde__m64_private v)
-{
-	simde__m64 r;
-	simde_memcpy(&r, &v, sizeof(r));
-	return r;
-}
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m64_private simde__m64_to_private(simde__m64 v)
-{
-	simde__m64_private r;
-	simde_memcpy(&r, &v, sizeof(r));
-	return r;
-}
-
-#define SIMDE_X86_GENERATE_CONVERSION_FUNCTION(simde_type, source_type, isax, \
-					       fragment)                      \
-	SIMDE_FUNCTION_ATTRIBUTES                                             \
-	simde__##simde_type simde__##simde_type##_from_##isax##_##fragment(   \
-		source_type value)                                            \
-	{                                                                     \
-		simde__##simde_type##_private r_;                             \
-		r_.isax##_##fragment = value;                                 \
-		return simde__##simde_type##_from_private(r_);                \
-	}                                                                     \
-                                                                              \
-	SIMDE_FUNCTION_ATTRIBUTES                                             \
-	source_type simde__##simde_type##_to_##isax##_##fragment(             \
-		simde__##simde_type value)                                    \
-	{                                                                     \
-		simde__##simde_type##_private r_ =                            \
-			simde__##simde_type##_to_private(value);              \
-		return r_.isax##_##fragment;                                  \
-	}
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int8x8_t, neon, i8)
-SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int16x4_t, neon, i16)
-SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int32x2_t, neon, i32)
-SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int64x1_t, neon, i64)
-SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint8x8_t, neon, u8)
-SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint16x4_t, neon, u16)
-SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint32x2_t, neon, u32)
-SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint64x1_t, neon, u64)
-SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, float32x2_t, neon, f32)
-#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */
-
-#if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
-SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int8x8_t, mmi, i8)
-SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int16x4_t, mmi, i16)
-SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int32x2_t, mmi, i32)
-SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int64_t, mmi, i64)
-SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint8x8_t, mmi, u8)
-SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint16x4_t, mmi, u16)
-SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint32x2_t, mmi, u32)
-SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint64_t, mmi, u64)
-#endif /* defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) */
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m64 simde_mm_add_pi8(simde__m64 a, simde__m64 b)
-{
-#if defined(SIMDE_X86_MMX_NATIVE)
-	return _mm_add_pi8(a, b);
-#else
-	simde__m64_private r_;
-	simde__m64_private a_ = simde__m64_to_private(a);
-	simde__m64_private b_ = simde__m64_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_i8 = vadd_s8(a_.neon_i8, b_.neon_i8);
-#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
-	r_.mmi_i8 = paddb_s(a_.mmi_i8, b_.mmi_i8);
-#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
-	r_.i8 = a_.i8 + b_.i8;
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.i8) / sizeof(r_.i8[0])); i++) {
-		r_.i8[i] = a_.i8[i] + b_.i8[i];
-	}
-#endif
-
-	return simde__m64_from_private(r_);
-#endif
-}
-#define simde_m_paddb(a, b) simde_mm_add_pi8(a, b)
-#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
-#define _mm_add_pi8(a, b) simde_mm_add_pi8(a, b)
-#define _m_paddb(a, b) simde_m_paddb(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m64 simde_mm_add_pi16(simde__m64 a, simde__m64 b)
-{
-#if defined(SIMDE_X86_MMX_NATIVE)
-	return _mm_add_pi16(a, b);
-#else
-	simde__m64_private r_;
-	simde__m64_private a_ = simde__m64_to_private(a);
-	simde__m64_private b_ = simde__m64_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_i16 = vadd_s16(a_.neon_i16, b_.neon_i16);
-#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
-	r_.mmi_i16 = paddh_s(a_.mmi_i16, b_.mmi_i16);
-#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
-	r_.i16 = a_.i16 + b_.i16;
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) {
-		r_.i16[i] = a_.i16[i] + b_.i16[i];
-	}
-#endif
-
-	return simde__m64_from_private(r_);
-#endif
-}
-#define simde_m_paddw(a, b) simde_mm_add_pi16(a, b)
-#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
-#define _mm_add_pi16(a, b) simde_mm_add_pi16(a, b)
-#define _m_paddw(a, b) simde_mm_add_pi16(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m64 simde_mm_add_pi32(simde__m64 a, simde__m64 b)
-{
-#if defined(SIMDE_X86_MMX_NATIVE)
-	return _mm_add_pi32(a, b);
-#else
-	simde__m64_private r_;
-	simde__m64_private a_ = simde__m64_to_private(a);
-	simde__m64_private b_ = simde__m64_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_i32 = vadd_s32(a_.neon_i32, b_.neon_i32);
-#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
-	r_.mmi_i32 = paddw_s(a_.mmi_i32, b_.mmi_i32);
-#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
-	r_.i32 = a_.i32 + b_.i32;
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])); i++) {
-		r_.i32[i] = a_.i32[i] + b_.i32[i];
-	}
-#endif
-
-	return simde__m64_from_private(r_);
-#endif
-}
-#define simde_m_paddd(a, b) simde_mm_add_pi32(a, b)
-#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
-#define _mm_add_pi32(a, b) simde_mm_add_pi32(a, b)
-#define _m_paddd(a, b) simde_mm_add_pi32(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m64 simde_mm_adds_pi8(simde__m64 a, simde__m64 b)
-{
-#if defined(SIMDE_X86_MMX_NATIVE)
-	return _mm_adds_pi8(a, b);
-#else
-	simde__m64_private r_, a_ = simde__m64_to_private(a),
-			       b_ = simde__m64_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_i8 = vqadd_s8(a_.neon_i8, b_.neon_i8);
-#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
-	r_.mmi_i8 = paddsb(a_.mmi_i8, b_.mmi_i8);
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.i8) / sizeof(r_.i8[0])); i++) {
-		if ((((b_.i8[i]) > 0) &&
-		     ((a_.i8[i]) > (INT8_MAX - (b_.i8[i]))))) {
-			r_.i8[i] = INT8_MAX;
-		} else if ((((b_.i8[i]) < 0) &&
-			    ((a_.i8[i]) < (INT8_MIN - (b_.i8[i]))))) {
-			r_.i8[i] = INT8_MIN;
-		} else {
-			r_.i8[i] = (a_.i8[i]) + (b_.i8[i]);
-		}
-	}
-#endif
-
-	return simde__m64_from_private(r_);
-#endif
-}
-#define simde_m_paddsb(a, b) simde_mm_adds_pi8(a, b)
-#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
-#define _mm_adds_pi8(a, b) simde_mm_adds_pi8(a, b)
-#define _m_paddsb(a, b) simde_mm_adds_pi8(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m64 simde_mm_adds_pu8(simde__m64 a, simde__m64 b)
-{
-#if defined(SIMDE_X86_MMX_NATIVE)
-	return _mm_adds_pu8(a, b);
-#else
-	simde__m64_private r_;
-	simde__m64_private a_ = simde__m64_to_private(a);
-	simde__m64_private b_ = simde__m64_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_u8 = vqadd_u8(a_.neon_u8, b_.neon_u8);
-#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
-	r_.mmi_u8 = paddusb(a_.mmi_u8, b_.mmi_u8);
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.u8) / sizeof(r_.u8[0])); i++) {
-		const uint_fast16_t x =
-			HEDLEY_STATIC_CAST(uint_fast16_t, a_.u8[i]) +
-			HEDLEY_STATIC_CAST(uint_fast16_t, b_.u8[i]);
-		if (x > UINT8_MAX)
-			r_.u8[i] = UINT8_MAX;
-		else
-			r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, x);
-	}
-#endif
-
-	return simde__m64_from_private(r_);
-#endif
-}
-#define simde_m_paddusb(a, b) simde_mm_adds_pu8(a, b)
-#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
-#define _mm_adds_pu8(a, b) simde_mm_adds_pu8(a, b)
-#define _m_paddusb(a, b) simde_mm_adds_pu8(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m64 simde_mm_adds_pi16(simde__m64 a, simde__m64 b)
-{
-#if defined(SIMDE_X86_MMX_NATIVE)
-	return _mm_adds_pi16(a, b);
-#else
-	simde__m64_private r_;
-	simde__m64_private a_ = simde__m64_to_private(a);
-	simde__m64_private b_ = simde__m64_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_i16 = vqadd_s16(a_.neon_i16, b_.neon_i16);
-#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
-	r_.mmi_i16 = paddsh(a_.mmi_i16, b_.mmi_i16);
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) {
-		if ((((b_.i16[i]) > 0) &&
-		     ((a_.i16[i]) > (INT16_MAX - (b_.i16[i]))))) {
-			r_.i16[i] = INT16_MAX;
-		} else if ((((b_.i16[i]) < 0) &&
-			    ((a_.i16[i]) < (SHRT_MIN - (b_.i16[i]))))) {
-			r_.i16[i] = SHRT_MIN;
-		} else {
-			r_.i16[i] = (a_.i16[i]) + (b_.i16[i]);
-		}
-	}
-#endif
-
-	return simde__m64_from_private(r_);
-#endif
-}
-#define simde_m_paddsw(a, b) simde_mm_adds_pi16(a, b)
-#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
-#define _mm_adds_pi16(a, b) simde_mm_adds_pi16(a, b)
-#define _m_paddsw(a, b) simde_mm_adds_pi16(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m64 simde_mm_adds_pu16(simde__m64 a, simde__m64 b)
-{
-#if defined(SIMDE_X86_MMX_NATIVE)
-	return _mm_adds_pu16(a, b);
-#else
-	simde__m64_private r_;
-	simde__m64_private a_ = simde__m64_to_private(a);
-	simde__m64_private b_ = simde__m64_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_u16 = vqadd_u16(a_.neon_u16, b_.neon_u16);
-#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
-	r_.mmi_u16 = paddush(a_.mmi_u16, b_.mmi_u16);
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) {
-		const uint32_t x = a_.u16[i] + b_.u16[i];
-		if (x > UINT16_MAX)
-			r_.u16[i] = UINT16_MAX;
-		else
-			r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, x);
-	}
-#endif
-
-	return simde__m64_from_private(r_);
-#endif
-}
-#define simde_m_paddusw(a, b) simde_mm_adds_pu16(a, b)
-#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
-#define _mm_adds_pu16(a, b) simde_mm_adds_pu16(a, b)
-#define _m_paddusw(a, b) simde_mm_adds_pu16(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m64 simde_mm_and_si64(simde__m64 a, simde__m64 b)
-{
-#if defined(SIMDE_X86_MMX_NATIVE)
-	return _mm_and_si64(a, b);
-#else
-	simde__m64_private r_;
-	simde__m64_private a_ = simde__m64_to_private(a);
-	simde__m64_private b_ = simde__m64_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_i32 = vand_s32(a_.neon_i32, b_.neon_i32);
-#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
-	r_.i64 = a_.i64 & b_.i64;
-#else
-	r_.i64[0] = a_.i64[0] & b_.i64[0];
-#endif
-
-	return simde__m64_from_private(r_);
-#endif
-}
-#define simde_m_pand(a, b) simde_mm_and_si64(a, b)
-#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
-#define _mm_and_si64(a, b) simde_mm_and_si64(a, b)
-#define _m_pand(a, b) simde_mm_and_si64(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m64 simde_mm_andnot_si64(simde__m64 a, simde__m64 b)
-{
-#if defined(SIMDE_X86_MMX_NATIVE)
-	return _mm_andnot_si64(a, b);
-#else
-	simde__m64_private r_;
-	simde__m64_private a_ = simde__m64_to_private(a);
-	simde__m64_private b_ = simde__m64_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_i32 = vbic_s32(b_.neon_i32, a_.neon_i32);
-#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
-	r_.mmi_i32 = pandn_sw(a_.mmi_i32, b_.mmi_i32);
-#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
-	r_.i32f = ~a_.i32f & b_.i32f;
-#else
-	r_.u64[0] = (~(a_.u64[0])) & (b_.u64[0]);
-#endif
-
-	return simde__m64_from_private(r_);
-#endif
-}
-#define simde_m_pandn(a, b) simde_mm_andnot_si64(a, b)
-#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
-#define _mm_andnot_si64(a, b) simde_mm_andnot_si64(a, b)
-#define _m_pandn(a, b) simde_mm_andnot_si64(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m64 simde_mm_cmpeq_pi8(simde__m64 a, simde__m64 b)
-{
-#if defined(SIMDE_X86_MMX_NATIVE)
-	return _mm_cmpeq_pi8(a, b);
-#else
-	simde__m64_private r_;
-	simde__m64_private a_ = simde__m64_to_private(a);
-	simde__m64_private b_ = simde__m64_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_u8 = vceq_s8(a_.neon_i8, b_.neon_i8);
-#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
-	r_.mmi_i8 = pcmpeqb_s(a_.mmi_i8, b_.mmi_i8);
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.i8) / sizeof(r_.i8[0])); i++) {
-		r_.i8[i] = (a_.i8[i] == b_.i8[i]) ? ~INT8_C(0) : INT8_C(0);
-	}
-#endif
-
-	return simde__m64_from_private(r_);
-#endif
-}
-#define simde_m_pcmpeqb(a, b) simde_mm_cmpeq_pi8(a, b)
-#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
-#define _mm_cmpeq_pi8(a, b) simde_mm_cmpeq_pi8(a, b)
-#define _m_pcmpeqb(a, b) simde_mm_cmpeq_pi8(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m64 simde_mm_cmpeq_pi16(simde__m64 a, simde__m64 b)
-{
-#if defined(SIMDE_X86_MMX_NATIVE)
-	return _mm_cmpeq_pi16(a, b);
-#else
-	simde__m64_private r_;
-	simde__m64_private a_ = simde__m64_to_private(a);
-	simde__m64_private b_ = simde__m64_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_u16 = vceq_s16(a_.neon_i16, b_.neon_i16);
-#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
-	r_.mmi_i16 = pcmpeqh_s(a_.mmi_i16, b_.mmi_i16);
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) {
-		r_.i16[i] = (a_.i16[i] == b_.i16[i]) ? ~INT16_C(0) : INT16_C(0);
-	}
-#endif
-
-	return simde__m64_from_private(r_);
-#endif
-}
-#define simde_m_pcmpeqw(a, b) simde_mm_cmpeq_pi16(a, b)
-#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
-#define _mm_cmpeq_pi16(a, b) simde_mm_cmpeq_pi16(a, b)
-#define _m_pcmpeqw(a, b) simde_mm_cmpeq_pi16(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m64 simde_mm_cmpeq_pi32(simde__m64 a, simde__m64 b)
-{
-#if defined(SIMDE_X86_MMX_NATIVE)
-	return _mm_cmpeq_pi32(a, b);
-#else
-	simde__m64_private r_;
-	simde__m64_private a_ = simde__m64_to_private(a);
-	simde__m64_private b_ = simde__m64_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_u32 = vceq_s32(a_.neon_i32, b_.neon_i32);
-#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
-	r_.mmi_i32 = pcmpeqw_s(a_.mmi_i32, b_.mmi_i32);
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])); i++) {
-		r_.i32[i] = (a_.i32[i] == b_.i32[i]) ? ~INT32_C(0) : INT32_C(0);
-	}
-#endif
-
-	return simde__m64_from_private(r_);
-#endif
-}
-#define simde_m_pcmpeqd(a, b) simde_mm_cmpeq_pi32(a, b)
-#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
-#define _mm_cmpeq_pi32(a, b) simde_mm_cmpeq_pi32(a, b)
-#define _m_pcmpeqd(a, b) simde_mm_cmpeq_pi32(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m64 simde_mm_cmpgt_pi8(simde__m64 a, simde__m64 b)
-{
-#if defined(SIMDE_X86_MMX_NATIVE)
-	return _mm_cmpgt_pi8(a, b);
-#else
-	simde__m64_private r_;
-	simde__m64_private a_ = simde__m64_to_private(a);
-	simde__m64_private b_ = simde__m64_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_u8 = vcgt_s8(a_.neon_i8, b_.neon_i8);
-#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
-	r_.mmi_i8 = pcmpgtb_s(a_.mmi_i8, b_.mmi_i8);
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.i8) / sizeof(r_.i8[0])); i++) {
-		r_.i8[i] = (a_.i8[i] > b_.i8[i]) ? ~INT8_C(0) : INT8_C(0);
-	}
-#endif
-
-	return simde__m64_from_private(r_);
-#endif
-}
-#define simde_m_pcmpgtb(a, b) simde_mm_cmpgt_pi8(a, b)
-#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
-#define _mm_cmpgt_pi8(a, b) simde_mm_cmpgt_pi8(a, b)
-#define _m_pcmpgtb(a, b) simde_mm_cmpgt_pi8(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m64 simde_mm_cmpgt_pi16(simde__m64 a, simde__m64 b)
-{
-#if defined(SIMDE_X86_MMX_NATIVE)
-	return _mm_cmpgt_pi16(a, b);
-#else
-	simde__m64_private r_;
-	simde__m64_private a_ = simde__m64_to_private(a);
-	simde__m64_private b_ = simde__m64_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_u16 = vcgt_s16(a_.neon_i16, b_.neon_i16);
-#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
-	r_.mmi_i16 = pcmpgth_s(a_.mmi_i16, b_.mmi_i16);
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) {
-		r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? ~INT16_C(0) : INT16_C(0);
-	}
-#endif
-
-	return simde__m64_from_private(r_);
-#endif
-}
-#define simde_m_pcmpgtw(a, b) simde_mm_cmpgt_pi16(a, b)
-#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
-#define _mm_cmpgt_pi16(a, b) simde_mm_cmpgt_pi16(a, b)
-#define _m_pcmpgtw(a, b) simde_mm_cmpgt_pi16(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m64 simde_mm_cmpgt_pi32(simde__m64 a, simde__m64 b)
-{
-#if defined(SIMDE_X86_MMX_NATIVE)
-	return _mm_cmpgt_pi32(a, b);
-#else
-	simde__m64_private r_;
-	simde__m64_private a_ = simde__m64_to_private(a);
-	simde__m64_private b_ = simde__m64_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_u32 = vcgt_s32(a_.neon_i32, b_.neon_i32);
-#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
-	r_.mmi_i32 = pcmpgtw_s(a_.mmi_i32, b_.mmi_i32);
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])); i++) {
-		r_.i32[i] = (a_.i32[i] > b_.i32[i]) ? ~INT32_C(0) : INT32_C(0);
-	}
-#endif
-
-	return simde__m64_from_private(r_);
-#endif
-}
-#define simde_m_pcmpgtd(a, b) simde_mm_cmpgt_pi32(a, b)
-#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
-#define _mm_cmpgt_pi32(a, b) simde_mm_cmpgt_pi32(a, b)
-#define _m_pcmpgtd(a, b) simde_mm_cmpgt_pi32(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-int64_t simde_mm_cvtm64_si64(simde__m64 a)
-{
-#if defined(SIMDE_X86_MMX_NATIVE) && defined(SIMDE_ARCH_AMD64) && \
-	!defined(__PGI)
-	return _mm_cvtm64_si64(a);
-#else
-	simde__m64_private a_ = simde__m64_to_private(a);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	HEDLEY_DIAGNOSTIC_PUSH
-#if HEDLEY_HAS_WARNING("-Wvector-conversion") && \
-	SIMDE_DETECT_CLANG_VERSION_NOT(10, 0, 0)
-#pragma clang diagnostic ignored "-Wvector-conversion"
-#endif
-	return vget_lane_s64(a_.neon_i64, 0);
-	HEDLEY_DIAGNOSTIC_POP
-#else
-	return a_.i64[0];
-#endif
-#endif
-}
-#define simde_m_to_int64(a) simde_mm_cvtm64_si64(a)
-#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
-#define _mm_cvtm64_si64(a) simde_mm_cvtm64_si64(a)
-#define _m_to_int64(a) simde_mm_cvtm64_si64(a)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m64 simde_mm_cvtsi32_si64(int32_t a)
-{
-#if defined(SIMDE_X86_MMX_NATIVE)
-	return _mm_cvtsi32_si64(a);
-#else
-	simde__m64_private r_;
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	const int32_t av[sizeof(r_.neon_i32) / sizeof(r_.neon_i32[0])] = {a, 0};
-	r_.neon_i32 = vld1_s32(av);
-#else
-	r_.i32[0] = a;
-	r_.i32[1] = 0;
-#endif
-
-	return simde__m64_from_private(r_);
-#endif
-}
-#define simde_m_from_int(a) simde_mm_cvtsi32_si64(a)
-#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
-#define _mm_cvtsi32_si64(a) simde_mm_cvtsi32_si64(a)
-#define _m_from_int(a) simde_mm_cvtsi32_si64(a)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m64 simde_mm_cvtsi64_m64(int64_t a)
-{
-#if defined(SIMDE_X86_MMX_NATIVE) && defined(SIMDE_ARCH_AMD64) && \
-	!defined(__PGI)
-	return _mm_cvtsi64_m64(a);
-#else
-	simde__m64_private r_;
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_i64 = vld1_s64(&a);
-#else
-	r_.i64[0] = a;
-#endif
-
-	return simde__m64_from_private(r_);
-#endif
-}
-#define simde_m_from_int64(a) simde_mm_cvtsi64_m64(a)
-#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
-#define _mm_cvtsi64_m64(a) simde_mm_cvtsi64_m64(a)
-#define _m_from_int64(a) simde_mm_cvtsi64_m64(a)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-int32_t simde_mm_cvtsi64_si32(simde__m64 a)
-{
-#if defined(SIMDE_X86_MMX_NATIVE)
-	return _mm_cvtsi64_si32(a);
-#else
-	simde__m64_private a_ = simde__m64_to_private(a);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	HEDLEY_DIAGNOSTIC_PUSH
-#if HEDLEY_HAS_WARNING("-Wvector-conversion") && \
-	SIMDE_DETECT_CLANG_VERSION_NOT(10, 0, 0)
-#pragma clang diagnostic ignored "-Wvector-conversion"
-#endif
-	return vget_lane_s32(a_.neon_i32, 0);
-	HEDLEY_DIAGNOSTIC_POP
-#else
-	return a_.i32[0];
-#endif
-#endif
-}
-#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
-#define _mm_cvtsi64_si32(a) simde_mm_cvtsi64_si32(a)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-void simde_mm_empty(void)
-{
-#if defined(SIMDE_X86_MMX_NATIVE)
-	_mm_empty();
-#else
-	/* noop */
-#endif
-}
-#define simde_m_empty() simde_mm_empty()
-#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
-#define _mm_empty() simde_mm_empty()
-#define _m_empty() simde_mm_empty()
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m64 simde_mm_madd_pi16(simde__m64 a, simde__m64 b)
-{
-#if defined(SIMDE_X86_MMX_NATIVE)
-	return _mm_madd_pi16(a, b);
-#else
-	simde__m64_private r_;
-	simde__m64_private a_ = simde__m64_to_private(a);
-	simde__m64_private b_ = simde__m64_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	int32x4_t i1 = vmull_s16(a_.neon_i16, b_.neon_i16);
-	r_.neon_i32 = vpadd_s32(vget_low_s32(i1), vget_high_s32(i1));
-#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
-	r_.mmi_i32 = pmaddhw(a_.mmi_i16, b_.mmi_i16);
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i += 2) {
-		r_.i32[i / 2] = (a_.i16[i] * b_.i16[i]) +
-				(a_.i16[i + 1] * b_.i16[i + 1]);
-	}
-#endif
-
-	return simde__m64_from_private(r_);
-#endif
-}
-#define simde_m_pmaddwd(a, b) simde_mm_madd_pi16(a, b)
-#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
-#define _mm_madd_pi16(a, b) simde_mm_madd_pi16(a, b)
-#define _m_pmaddwd(a, b) simde_mm_madd_pi16(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m64 simde_mm_mulhi_pi16(simde__m64 a, simde__m64 b)
-{
-#if defined(SIMDE_X86_MMX_NATIVE)
-	return _mm_mulhi_pi16(a, b);
-#else
-	simde__m64_private r_;
-	simde__m64_private a_ = simde__m64_to_private(a);
-	simde__m64_private b_ = simde__m64_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	const int32x4_t t1 = vmull_s16(a_.neon_i16, b_.neon_i16);
-	const uint32x4_t t2 = vshrq_n_u32(vreinterpretq_u32_s32(t1), 16);
-	const uint16x4_t t3 = vmovn_u32(t2);
-	r_.neon_u16 = t3;
-#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
-	r_.mmi_i16 = pmulhh(a_.mmi_i16, b_.mmi_i16);
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) {
-		r_.i16[i] = HEDLEY_STATIC_CAST(int16_t,
-					       ((a_.i16[i] * b_.i16[i]) >> 16));
-	}
-#endif
-
-	return simde__m64_from_private(r_);
-#endif
-}
-#define simde_m_pmulhw(a, b) simde_mm_mulhi_pi16(a, b)
-#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
-#define _mm_mulhi_pi16(a, b) simde_mm_mulhi_pi16(a, b)
-#define _m_pmulhw(a, b) simde_mm_mulhi_pi16(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m64 simde_mm_mullo_pi16(simde__m64 a, simde__m64 b)
-{
-#if defined(SIMDE_X86_MMX_NATIVE)
-	return _mm_mullo_pi16(a, b);
-#else
-	simde__m64_private r_;
-	simde__m64_private a_ = simde__m64_to_private(a);
-	simde__m64_private b_ = simde__m64_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	const int32x4_t t1 = vmull_s16(a_.neon_i16, b_.neon_i16);
-	const uint16x4_t t2 = vmovn_u32(vreinterpretq_u32_s32(t1));
-	r_.neon_u16 = t2;
-#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
-	r_.mmi_i16 = pmullh(a_.mmi_i16, b_.mmi_i16);
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) {
-		r_.i16[i] = HEDLEY_STATIC_CAST(
-			int16_t, ((a_.i16[i] * b_.i16[i]) & 0xffff));
-	}
-#endif
-
-	return simde__m64_from_private(r_);
-#endif
-}
-#define simde_m_pmullw(a, b) simde_mm_mullo_pi16(a, b)
-#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
-#define _mm_mullo_pi16(a, b) simde_mm_mullo_pi16(a, b)
-#define _m_pmullw(a, b) simde_mm_mullo_pi16(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m64 simde_mm_or_si64(simde__m64 a, simde__m64 b)
-{
-#if defined(SIMDE_X86_MMX_NATIVE)
-	return _mm_or_si64(a, b);
-#else
-	simde__m64_private r_;
-	simde__m64_private a_ = simde__m64_to_private(a);
-	simde__m64_private b_ = simde__m64_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_i32 = vorr_s32(a_.neon_i32, b_.neon_i32);
-#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
-	r_.i64 = a_.i64 | b_.i64;
-#else
-	r_.i64[0] = a_.i64[0] | b_.i64[0];
-#endif
-
-	return simde__m64_from_private(r_);
-#endif
-}
-#define simde_m_por(a, b) simde_mm_or_si64(a, b)
-#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
-#define _mm_or_si64(a, b) simde_mm_or_si64(a, b)
-#define _m_por(a, b) simde_mm_or_si64(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m64 simde_mm_packs_pi16(simde__m64 a, simde__m64 b)
-{
-#if defined(SIMDE_X86_MMX_NATIVE)
-	return _mm_packs_pi16(a, b);
-#else
-	simde__m64_private r_;
-	simde__m64_private a_ = simde__m64_to_private(a);
-	simde__m64_private b_ = simde__m64_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_i8 = vqmovn_s16(vcombine_s16(a_.neon_i16, b_.neon_i16));
-#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
-	r_.mmi_i8 = packsshb(a_.mmi_i16, b_.mmi_i16);
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) {
-		if (a_.i16[i] < INT8_MIN) {
-			r_.i8[i] = INT8_MIN;
-		} else if (a_.i16[i] > INT8_MAX) {
-			r_.i8[i] = INT8_MAX;
-		} else {
-			r_.i8[i] = HEDLEY_STATIC_CAST(int8_t, a_.i16[i]);
-		}
-	}
-
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) {
-		if (b_.i16[i] < INT8_MIN) {
-			r_.i8[i + 4] = INT8_MIN;
-		} else if (b_.i16[i] > INT8_MAX) {
-			r_.i8[i + 4] = INT8_MAX;
-		} else {
-			r_.i8[i + 4] = HEDLEY_STATIC_CAST(int8_t, b_.i16[i]);
-		}
-	}
-#endif
-
-	return simde__m64_from_private(r_);
-#endif
-}
-#define simde_m_packsswb(a, b) simde_mm_packs_pi16(a, b)
-#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
-#define _mm_packs_pi16(a, b) simde_mm_packs_pi16(a, b)
-#define _m_packsswb(a, b) simde_mm_packs_pi16(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m64 simde_mm_packs_pi32(simde__m64 a, simde__m64 b)
-{
-#if defined(SIMDE_X86_MMX_NATIVE)
-	return _mm_packs_pi32(a, b);
-#else
-	simde__m64_private r_;
-	simde__m64_private a_ = simde__m64_to_private(a);
-	simde__m64_private b_ = simde__m64_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_i16 = vqmovn_s32(vcombine_s32(a_.neon_i32, b_.neon_i32));
-#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
-	r_.mmi_i16 = packsswh(a_.mmi_i32, b_.mmi_i32);
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (8 / sizeof(a_.i32[0])); i++) {
-		if (a_.i32[i] < SHRT_MIN) {
-			r_.i16[i] = SHRT_MIN;
-		} else if (a_.i32[i] > INT16_MAX) {
-			r_.i16[i] = INT16_MAX;
-		} else {
-			r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i32[i]);
-		}
-	}
-
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (8 / sizeof(b_.i32[0])); i++) {
-		if (b_.i32[i] < SHRT_MIN) {
-			r_.i16[i + 2] = SHRT_MIN;
-		} else if (b_.i32[i] > INT16_MAX) {
-			r_.i16[i + 2] = INT16_MAX;
-		} else {
-			r_.i16[i + 2] = HEDLEY_STATIC_CAST(int16_t, b_.i32[i]);
-		}
-	}
-#endif
-
-	return simde__m64_from_private(r_);
-#endif
-}
-#define simde_m_packssdw(a, b) simde_mm_packs_pi32(a, b)
-#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
-#define _mm_packs_pi32(a, b) simde_mm_packs_pi32(a, b)
-#define _m_packssdw(a, b) simde_mm_packs_pi32(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m64 simde_mm_packs_pu16(simde__m64 a, simde__m64 b)
-{
-#if defined(SIMDE_X86_MMX_NATIVE)
-	return _mm_packs_pu16(a, b);
-#else
-	simde__m64_private r_;
-	simde__m64_private a_ = simde__m64_to_private(a);
-	simde__m64_private b_ = simde__m64_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
-	const int16x8_t t1 = vcombine_s16(a_.neon_i16, b_.neon_i16);
-
-	/* Set elements which are < 0 to 0 */
-	const int16x8_t t2 =
-		vandq_s16(t1, vreinterpretq_s16_u16(vcgezq_s16(t1)));
-
-	/* Vector with all s16 elements set to UINT8_MAX */
-	const int16x8_t vmax =
-		vmovq_n_s16(HEDLEY_STATIC_CAST(int16_t, UINT8_MAX));
-
-	/* Elements which are within the acceptable range */
-	const int16x8_t le_max =
-		vandq_s16(t2, vreinterpretq_s16_u16(vcleq_s16(t2, vmax)));
-	const int16x8_t gt_max =
-		vandq_s16(vmax, vreinterpretq_s16_u16(vcgtq_s16(t2, vmax)));
-
-	/* Final values as 16-bit integers */
-	const int16x8_t values = vorrq_s16(le_max, gt_max);
-
-	r_.neon_u8 = vmovn_u16(vreinterpretq_u16_s16(values));
-#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
-	r_.mmi_u8 = packushb(a_.mmi_u16, b_.mmi_u16);
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) {
-		if (a_.i16[i] > UINT8_MAX) {
-			r_.u8[i] = UINT8_MAX;
-		} else if (a_.i16[i] < 0) {
-			r_.u8[i] = 0;
-		} else {
-			r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, a_.i16[i]);
-		}
-	}
-
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) {
-		if (b_.i16[i] > UINT8_MAX) {
-			r_.u8[i + 4] = UINT8_MAX;
-		} else if (b_.i16[i] < 0) {
-			r_.u8[i + 4] = 0;
-		} else {
-			r_.u8[i + 4] = HEDLEY_STATIC_CAST(uint8_t, b_.i16[i]);
-		}
-	}
-#endif
-
-	return simde__m64_from_private(r_);
-#endif
-}
-#define simde_m_packuswb(a, b) simde_mm_packs_pu16(a, b)
-#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
-#define _mm_packs_pu16(a, b) simde_mm_packs_pu16(a, b)
-#define _m_packuswb(a, b) simde_mm_packs_pu16(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m64 simde_mm_set_pi8(int8_t e7, int8_t e6, int8_t e5, int8_t e4,
-			    int8_t e3, int8_t e2, int8_t e1, int8_t e0)
-{
-#if defined(SIMDE_X86_MMX_NATIVE)
-	return _mm_set_pi8(e7, e6, e5, e4, e3, e2, e1, e0);
-#else
-	simde__m64_private r_;
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	const int8_t v[sizeof(r_.i8) / sizeof(r_.i8[0])] = {e0, e1, e2, e3,
-							    e4, e5, e6, e7};
-	r_.neon_i8 = vld1_s8(v);
-#else
-	r_.i8[0] = e0;
-	r_.i8[1] = e1;
-	r_.i8[2] = e2;
-	r_.i8[3] = e3;
-	r_.i8[4] = e4;
-	r_.i8[5] = e5;
-	r_.i8[6] = e6;
-	r_.i8[7] = e7;
-#endif
-
-	return simde__m64_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
-#define _mm_set_pi8(e7, e6, e5, e4, e3, e2, e1, e0) \
-	simde_mm_set_pi8(e7, e6, e5, e4, e3, e2, e1, e0)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m64 simde_x_mm_set_pu8(uint8_t e7, uint8_t e6, uint8_t e5, uint8_t e4,
-			      uint8_t e3, uint8_t e2, uint8_t e1, uint8_t e0)
-{
-	simde__m64_private r_;
-
-#if defined(SIMDE_X86_MMX_NATIVE)
-	r_.n = _mm_set_pi8(
-		HEDLEY_STATIC_CAST(int8_t, e7), HEDLEY_STATIC_CAST(int8_t, e6),
-		HEDLEY_STATIC_CAST(int8_t, e5), HEDLEY_STATIC_CAST(int8_t, e4),
-		HEDLEY_STATIC_CAST(int8_t, e3), HEDLEY_STATIC_CAST(int8_t, e2),
-		HEDLEY_STATIC_CAST(int8_t, e1), HEDLEY_STATIC_CAST(int8_t, e0));
-#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	const uint8_t v[sizeof(r_.u8) / sizeof(r_.u8[0])] = {e0, e1, e2, e3,
-							     e4, e5, e6, e7};
-	r_.neon_u8 = vld1_u8(v);
-#else
-	r_.u8[0] = e0;
-	r_.u8[1] = e1;
-	r_.u8[2] = e2;
-	r_.u8[3] = e3;
-	r_.u8[4] = e4;
-	r_.u8[5] = e5;
-	r_.u8[6] = e6;
-	r_.u8[7] = e7;
-#endif
-
-	return simde__m64_from_private(r_);
-}
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m64 simde_mm_set_pi16(int16_t e3, int16_t e2, int16_t e1, int16_t e0)
-{
-#if defined(SIMDE_X86_MMX_NATIVE)
-	return _mm_set_pi16(e3, e2, e1, e0);
-#else
-	simde__m64_private r_;
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	const int16_t v[sizeof(r_.i16) / sizeof(r_.i16[0])] = {e0, e1, e2, e3};
-	r_.neon_i16 = vld1_s16(v);
-#else
-	r_.i16[0] = e0;
-	r_.i16[1] = e1;
-	r_.i16[2] = e2;
-	r_.i16[3] = e3;
-#endif
-
-	return simde__m64_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
-#define _mm_set_pi16(e3, e2, e1, e0) simde_mm_set_pi16(e3, e2, e1, e0)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m64 simde_x_mm_set_pu16(uint16_t e3, uint16_t e2, uint16_t e1,
-			       uint16_t e0)
-{
-	simde__m64_private r_;
-
-#if defined(SIMDE_X86_MMX_NATIVE)
-	r_.n = _mm_set_pi16(HEDLEY_STATIC_CAST(int16_t, e3),
-			    HEDLEY_STATIC_CAST(int16_t, e2),
-			    HEDLEY_STATIC_CAST(int16_t, e1),
-			    HEDLEY_STATIC_CAST(int16_t, e0));
-#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	const uint16_t v[sizeof(r_.u16) / sizeof(r_.u16[0])] = {e0, e1, e2, e3};
-	r_.neon_u16 = vld1_u16(v);
-#else
-	r_.u16[0] = e0;
-	r_.u16[1] = e1;
-	r_.u16[2] = e2;
-	r_.u16[3] = e3;
-#endif
-
-	return simde__m64_from_private(r_);
-}
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m64 simde_x_mm_set_pu32(uint32_t e1, uint32_t e0)
-{
-	simde__m64_private r_;
-
-#if defined(SIMDE_X86_MMX_NATIVE)
-	r_.n = _mm_set_pi32(HEDLEY_STATIC_CAST(int32_t, e1),
-			    HEDLEY_STATIC_CAST(int32_t, e0));
-#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	const uint32_t v[sizeof(r_.u32) / sizeof(r_.u32[0])] = {e0, e1};
-	r_.neon_u32 = vld1_u32(v);
-#else
-	r_.u32[0] = e0;
-	r_.u32[1] = e1;
-#endif
-
-	return simde__m64_from_private(r_);
-}
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m64 simde_mm_set_pi32(int32_t e1, int32_t e0)
-{
-	simde__m64_private r_;
-
-#if defined(SIMDE_X86_MMX_NATIVE)
-	r_.n = _mm_set_pi32(e1, e0);
-#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	const int32_t v[sizeof(r_.i32) / sizeof(r_.i32[0])] = {e0, e1};
-	r_.neon_i32 = vld1_s32(v);
-#else
-	r_.i32[0] = e0;
-	r_.i32[1] = e1;
-#endif
-
-	return simde__m64_from_private(r_);
-}
-#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
-#define _mm_set_pi32(e1, e0) simde_mm_set_pi32(e1, e0)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m64 simde_x_mm_set_pi64(int64_t e0)
-{
-	simde__m64_private r_;
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	const int64_t v[sizeof(r_.i64) / sizeof(r_.i64[0])] = {e0};
-	r_.neon_i64 = vld1_s64(v);
-#else
-	r_.i64[0] = e0;
-#endif
-
-	return simde__m64_from_private(r_);
-}
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m64 simde_x_mm_set_f32x2(simde_float32 e1, simde_float32 e0)
-{
-	simde__m64_private r_;
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	const simde_float32 v[sizeof(r_.f32) / sizeof(r_.f32[0])] = {e0, e1};
-	r_.neon_f32 = vld1_f32(v);
-#else
-	r_.f32[0] = e0;
-	r_.f32[1] = e1;
-#endif
-
-	return simde__m64_from_private(r_);
-}
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m64 simde_mm_set1_pi8(int8_t a)
-{
-#if defined(SIMDE_X86_MMX_NATIVE)
-	return _mm_set1_pi8(a);
-#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	simde__m64_private r_;
-	r_.neon_i8 = vmov_n_s8(a);
-	return simde__m64_from_private(r_);
-#else
-	return simde_mm_set_pi8(a, a, a, a, a, a, a, a);
-#endif
-}
-#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
-#define _mm_set1_pi8(a) simde_mm_set1_pi8(a)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m64 simde_mm_set1_pi16(int16_t a)
-{
-#if defined(SIMDE_X86_MMX_NATIVE)
-	return _mm_set1_pi16(a);
-#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	simde__m64_private r_;
-	r_.neon_i16 = vmov_n_s16(a);
-	return simde__m64_from_private(r_);
-#else
-	return simde_mm_set_pi16(a, a, a, a);
-#endif
-}
-#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
-#define _mm_set1_pi16(a) simde_mm_set1_pi16(a)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m64 simde_mm_set1_pi32(int32_t a)
-{
-#if defined(SIMDE_X86_MMX_NATIVE)
-	return _mm_set1_pi32(a);
-#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	simde__m64_private r_;
-	r_.neon_i32 = vmov_n_s32(a);
-	return simde__m64_from_private(r_);
-#else
-	return simde_mm_set_pi32(a, a);
-#endif
-}
-#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
-#define _mm_set1_pi32(a) simde_mm_set1_pi32(a)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m64 simde_mm_setr_pi8(int8_t e7, int8_t e6, int8_t e5, int8_t e4,
-			     int8_t e3, int8_t e2, int8_t e1, int8_t e0)
-{
-#if defined(SIMDE_X86_MMX_NATIVE)
-	return _mm_setr_pi8(e7, e6, e5, e4, e3, e2, e1, e0);
-#else
-	return simde_mm_set_pi8(e0, e1, e2, e3, e4, e5, e6, e7);
-#endif
-}
-#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
-#define _mm_setr_pi8(e7, e6, e5, e4, e3, e2, e1, e0) \
-	simde_mm_setr_pi8(e7, e6, e5, e4, e3, e2, e1, e0)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m64 simde_mm_setr_pi16(int16_t e3, int16_t e2, int16_t e1, int16_t e0)
-{
-#if defined(SIMDE_X86_MMX_NATIVE)
-	return _mm_setr_pi16(e3, e2, e1, e0);
-#else
-	return simde_mm_set_pi16(e0, e1, e2, e3);
-#endif
-}
-#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
-#define _mm_setr_pi16(e3, e2, e1, e0) simde_mm_setr_pi16(e3, e2, e1, e0)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m64 simde_mm_setr_pi32(int32_t e1, int32_t e0)
-{
-#if defined(SIMDE_X86_MMX_NATIVE)
-	return _mm_setr_pi32(e1, e0);
-#else
-	return simde_mm_set_pi32(e0, e1);
-#endif
-}
-#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
-#define _mm_setr_pi32(e1, e0) simde_mm_setr_pi32(e1, e0)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m64 simde_mm_setzero_si64(void)
-{
-#if defined(SIMDE_X86_MMX_NATIVE)
-	return _mm_setzero_si64();
-#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	simde__m64_private r_;
-	r_.neon_u32 = vmov_n_u32(0);
-	return simde__m64_from_private(r_);
-#else
-	return simde_mm_set_pi32(0, 0);
-#endif
-}
-#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
-#define _mm_setzero_si64() simde_mm_setzero_si64()
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m64 simde_x_mm_load_si64(const void *mem_addr)
-{
-	simde__m64 r;
-	simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m64),
-		     sizeof(r));
-	return r;
-}
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m64 simde_x_mm_loadu_si64(const void *mem_addr)
-{
-	simde__m64 r;
-	simde_memcpy(&r, mem_addr, sizeof(r));
-	return r;
-}
-
-SIMDE_FUNCTION_ATTRIBUTES
-void simde_x_mm_store_si64(void *mem_addr, simde__m64 value)
-{
-	simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m64), &value,
-		     sizeof(value));
-}
-
-SIMDE_FUNCTION_ATTRIBUTES
-void simde_x_mm_storeu_si64(void *mem_addr, simde__m64 value)
-{
-	simde_memcpy(mem_addr, &value, sizeof(value));
-}
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m64 simde_x_mm_setone_si64(void)
-{
-	return simde_mm_set1_pi32(~INT32_C(0));
-}
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m64 simde_mm_sll_pi16(simde__m64 a, simde__m64 count)
-{
-#if defined(SIMDE_X86_MMX_NATIVE)
-	return _mm_sll_pi16(a, count);
-#else
-	simde__m64_private r_;
-	simde__m64_private a_ = simde__m64_to_private(a);
-	simde__m64_private count_ = simde__m64_to_private(count);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	HEDLEY_DIAGNOSTIC_PUSH
-#if HEDLEY_HAS_WARNING("-Wvector-conversion") && \
-	SIMDE_DETECT_CLANG_VERSION_NOT(10, 0, 0)
-#pragma clang diagnostic ignored "-Wvector-conversion"
-#endif
-	r_.neon_i16 =
-		vshl_s16(a_.neon_i16,
-			 vmov_n_s16(HEDLEY_STATIC_CAST(
-				 int16_t, vget_lane_u64(count_.neon_u64, 0))));
-	HEDLEY_DIAGNOSTIC_POP
-#elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && \
-	defined(SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT)
-	if (HEDLEY_UNLIKELY(count_.u64[0] > 15))
-		return simde_mm_setzero_si64();
-
-	r_.i16 = a_.i16 << HEDLEY_STATIC_CAST(int16_t, count_.u64[0]);
-#elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
-	r_.i16 = a_.i16 << count_.u64[0];
-#else
-	if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) {
-		simde_memset(&r_, 0, sizeof(r_));
-		return simde__m64_from_private(r_);
-	}
-
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.u16) / sizeof(r_.u16[0])); i++) {
-		r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t,
-					       a_.u16[i] << count_.u64[0]);
-	}
-#endif
-
-	return simde__m64_from_private(r_);
-#endif
-}
-#define simde_m_psllw(a, count) simde_mm_sll_pi16(a, count)
-#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
-#define _mm_sll_pi16(a, count) simde_mm_sll_pi16(a, count)
-#define _m_psllw(a, count) simde_mm_sll_pi16(a, count)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m64 simde_mm_sll_pi32(simde__m64 a, simde__m64 count)
-{
-#if defined(SIMDE_X86_MMX_NATIVE)
-	return _mm_sll_pi32(a, count);
-#else
-	simde__m64_private r_;
-	simde__m64_private a_ = simde__m64_to_private(a);
-	simde__m64_private count_ = simde__m64_to_private(count);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	HEDLEY_DIAGNOSTIC_PUSH
-#if HEDLEY_HAS_WARNING("-Wvector-conversion") && \
-	SIMDE_DETECT_CLANG_VERSION_NOT(10, 0, 0)
-#pragma clang diagnostic ignored "-Wvector-conversion"
-#endif
-	r_.neon_i32 =
-		vshl_s32(a_.neon_i32,
-			 vmov_n_s32(HEDLEY_STATIC_CAST(
-				 int32_t, vget_lane_u64(count_.neon_u64, 0))));
-	HEDLEY_DIAGNOSTIC_POP
-#elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
-	r_.i32 = a_.i32 << count_.u64[0];
-#else
-	if (HEDLEY_UNLIKELY(count_.u64[0] > 31)) {
-		simde_memset(&r_, 0, sizeof(r_));
-		return simde__m64_from_private(r_);
-	}
-
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.u32) / sizeof(r_.u32[0])); i++) {
-		r_.u32[i] = a_.u32[i] << count_.u64[0];
-	}
-#endif
-
-	return simde__m64_from_private(r_);
-#endif
-}
-#define simde_m_pslld(a, count) simde_mm_sll_pi32(a, count)
-#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
-#define _mm_sll_pi32(a, count) simde_mm_sll_pi32(a, count)
-#define _m_pslld(a, count) simde_mm_sll_pi32(a, count)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m64 simde_mm_slli_pi16(simde__m64 a, int count)
-{
-#if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI)
-	return _mm_slli_pi16(a, count);
-#else
-	simde__m64_private r_;
-	simde__m64_private a_ = simde__m64_to_private(a);
-
-#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && \
-	defined(SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT)
-	if (HEDLEY_UNLIKELY(count > 15))
-		return simde_mm_setzero_si64();
-
-	r_.i16 = a_.i16 << HEDLEY_STATIC_CAST(int16_t, count);
-#elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
-	r_.i16 = a_.i16 << count;
-#elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
-#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16((int16_t)count));
-#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
-	r_.mmi_i16 = psllh_s(a_.mmi_i16, b_.mmi_i16);
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.u16) / sizeof(r_.u16[0])); i++) {
-		r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, a_.u16[i] << count);
-	}
-#endif
-
-	return simde__m64_from_private(r_);
-#endif
-}
-#define simde_m_psllwi(a, count) simde_mm_slli_pi16(a, count)
-#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
-#define _mm_slli_pi16(a, count) simde_mm_slli_pi16(a, count)
-#define _m_psllwi(a, count) simde_mm_slli_pi16(a, count)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m64 simde_mm_slli_pi32(simde__m64 a, int count)
-{
-#if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI)
-	return _mm_slli_pi32(a, count);
-#else
-	simde__m64_private r_;
-	simde__m64_private a_ = simde__m64_to_private(a);
-
-#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
-	r_.i32 = a_.i32 << count;
-#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32((int32_t)count));
-#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
-	r_.mmi_i32 = psllw_s(a_.mmi_i32, b_.mmi_i32);
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.u32) / sizeof(r_.u32[0])); i++) {
-		r_.u32[i] = a_.u32[i] << count;
-	}
-#endif
-
-	return simde__m64_from_private(r_);
-#endif
-}
-#define simde_m_pslldi(a, b) simde_mm_slli_pi32(a, b)
-#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
-#define _mm_slli_pi32(a, count) simde_mm_slli_pi32(a, count)
-#define _m_pslldi(a, count) simde_mm_slli_pi32(a, count)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m64 simde_mm_slli_si64(simde__m64 a, int count)
-{
-#if defined(SIMDE_X86_MMX_NATIVE)
-	return _mm_slli_si64(a, count);
-#else
-	simde__m64_private r_;
-	simde__m64_private a_ = simde__m64_to_private(a);
-
-#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
-	r_.i64 = a_.i64 << count;
-#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_i64 = vshl_s64(a_.neon_i64, vmov_n_s64((int64_t)count));
-#else
-	r_.u64[0] = a_.u64[0] << count;
-#endif
-
-	return simde__m64_from_private(r_);
-#endif
-}
-#define simde_m_psllqi(a, count) simde_mm_slli_si64(a, count)
-#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
-#define _mm_slli_si64(a, count) simde_mm_slli_si64(a, count)
-#define _m_psllqi(a, count) simde_mm_slli_si64(a, count)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m64 simde_mm_sll_si64(simde__m64 a, simde__m64 count)
-{
-#if defined(SIMDE_X86_MMX_NATIVE)
-	return _mm_sll_si64(a, count);
-#else
-	simde__m64_private r_;
-	simde__m64_private a_ = simde__m64_to_private(a);
-	simde__m64_private count_ = simde__m64_to_private(count);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_i64 = vshl_s64(a_.neon_i64, count_.neon_i64);
-#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
-	r_.i64 = a_.i64 << count_.i64;
-#else
-	if (HEDLEY_UNLIKELY(count_.u64[0] > 63)) {
-		simde_memset(&r_, 0, sizeof(r_));
-		return simde__m64_from_private(r_);
-	}
-
-	r_.u64[0] = a_.u64[0] << count_.u64[0];
-#endif
-
-	return simde__m64_from_private(r_);
-#endif
-}
-#define simde_m_psllq(a, count) simde_mm_sll_si64(a, count)
-#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
-#define _mm_sll_si64(a, count) simde_mm_sll_si64(a, count)
-#define _m_psllq(a, count) simde_mm_sll_si64(a, count)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m64 simde_mm_srl_pi16(simde__m64 a, simde__m64 count)
-{
-#if defined(SIMDE_X86_MMX_NATIVE)
-	return _mm_srl_pi16(a, count);
-#else
-	simde__m64_private r_;
-	simde__m64_private a_ = simde__m64_to_private(a);
-	simde__m64_private count_ = simde__m64_to_private(count);
-
-#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && \
-	defined(SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT)
-	if (HEDLEY_UNLIKELY(count_.u64[0] > 15))
-		return simde_mm_setzero_si64();
-
-	r_.i16 = a_.i16 >> HEDLEY_STATIC_CAST(int16_t, count_.u64[0]);
-#elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
-	r_.u16 = a_.u16 >> count_.u64[0];
-#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_u16 = vshl_u16(
-		a_.neon_u16,
-		vmov_n_s16(-((int16_t)vget_lane_u64(count_.neon_u64, 0))));
-#else
-	if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) {
-		simde_memset(&r_, 0, sizeof(r_));
-		return simde__m64_from_private(r_);
-	}
-
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < sizeof(r_.u16) / sizeof(r_.u16[0]); i++) {
-		r_.u16[i] = a_.u16[i] >> count_.u64[0];
-	}
-#endif
-
-	return simde__m64_from_private(r_);
-#endif
-}
-#define simde_m_psrlw(a, count) simde_mm_srl_pi16(a, count)
-#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
-#define _mm_srl_pi16(a, count) simde_mm_srl_pi16(a, count)
-#define _m_psrlw(a, count) simde_mm_srl_pi16(a, count)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m64 simde_mm_srl_pi32(simde__m64 a, simde__m64 count)
-{
-#if defined(SIMDE_X86_MMX_NATIVE)
-	return _mm_srl_pi32(a, count);
-#else
-	simde__m64_private r_;
-	simde__m64_private a_ = simde__m64_to_private(a);
-	simde__m64_private count_ = simde__m64_to_private(count);
-
-#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
-	r_.u32 = a_.u32 >> count_.u64[0];
-#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_u32 = vshl_u32(
-		a_.neon_u32,
-		vmov_n_s32(-((int32_t)vget_lane_u64(count_.neon_u64, 0))));
-#else
-	if (HEDLEY_UNLIKELY(count_.u64[0] > 31)) {
-		simde_memset(&r_, 0, sizeof(r_));
-		return simde__m64_from_private(r_);
-	}
-
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < sizeof(r_.u32) / sizeof(r_.u32[0]); i++) {
-		r_.u32[i] = a_.u32[i] >> count_.u64[0];
-	}
-#endif
-
-	return simde__m64_from_private(r_);
-#endif
-}
-#define simde_m_psrld(a, count) simde_mm_srl_pi32(a, count)
-#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
-#define _mm_srl_pi32(a, count) simde_mm_srl_pi32(a, count)
-#define _m_psrld(a, count) simde_mm_srl_pi32(a, count)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m64 simde_mm_srli_pi16(simde__m64 a, int count)
-{
-#if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI)
-	return _mm_srli_pi16(a, count);
-#else
-	simde__m64_private r_;
-	simde__m64_private a_ = simde__m64_to_private(a);
-
-#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
-	r_.u16 = a_.u16 >> count;
-#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_u16 = vshl_u16(a_.neon_u16, vmov_n_s16(-((int16_t)count)));
-#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
-	r_.mmi_i16 = psrlh_s(a_.mmi_i16, b_.mmi_i16);
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.u16) / sizeof(r_.u16[0])); i++) {
-		r_.u16[i] = a_.u16[i] >> count;
-	}
-#endif
-
-	return simde__m64_from_private(r_);
-#endif
-}
-#define simde_m_psrlwi(a, count) simde_mm_srli_pi16(a, count)
-#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
-#define _mm_srli_pi16(a, count) simde_mm_srli_pi16(a, count)
-#define _m_psrlwi(a, count) simde_mm_srli_pi16(a, count)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m64 simde_mm_srli_pi32(simde__m64 a, int count)
-{
-#if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI)
-	return _mm_srli_pi32(a, count);
-#else
-	simde__m64_private r_;
-	simde__m64_private a_ = simde__m64_to_private(a);
-
-#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
-	r_.u32 = a_.u32 >> count;
-#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_u32 = vshl_u32(a_.neon_u32, vmov_n_s32(-((int32_t)count)));
-#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
-	r_.mmi_i32 = psrlw_s(a_.mmi_i32, b_.mmi_i32);
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.u32) / sizeof(r_.u32[0])); i++) {
-		r_.u32[i] = a_.u32[i] >> count;
-	}
-#endif
-
-	return simde__m64_from_private(r_);
-#endif
-}
-#define simde_m_psrldi(a, count) simde_mm_srli_pi32(a, count)
-#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
-#define _mm_srli_pi32(a, count) simde_mm_srli_pi32(a, count)
-#define _m_psrldi(a, count) simde_mm_srli_pi32(a, count)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m64 simde_mm_srli_si64(simde__m64 a, int count)
-{
-#if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI)
-	return _mm_srli_si64(a, count);
-#else
-	simde__m64_private r_;
-	simde__m64_private a_ = simde__m64_to_private(a);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_u64 = vshl_u64(a_.neon_u64, vmov_n_s64(-count));
-#elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
-	r_.u64 = a_.u64 >> count;
-#else
-	r_.u64[0] = a_.u64[0] >> count;
-#endif
-
-	return simde__m64_from_private(r_);
-#endif
-}
-#define simde_m_psrlqi(a, count) simde_mm_srli_si64(a, count)
-#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
-#define _mm_srli_si64(a, count) simde_mm_srli_si64(a, count)
-#define _m_psrlqi(a, count) simde_mm_srli_si64(a, count)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m64 simde_mm_srl_si64(simde__m64 a, simde__m64 count)
-{
-#if defined(SIMDE_X86_MMX_NATIVE)
-	return _mm_srl_si64(a, count);
-#else
-	simde__m64_private r_;
-	simde__m64_private a_ = simde__m64_to_private(a);
-	simde__m64_private count_ = simde__m64_to_private(count);
-
-#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
-	r_.neon_u64 = vshl_u64(a_.neon_u64, vneg_s64(count_.neon_i64));
-#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
-	r_.u64 = a_.u64 >> count_.u64;
-#else
-	if (HEDLEY_UNLIKELY(count_.u64[0] > 63)) {
-		simde_memset(&r_, 0, sizeof(r_));
-		return simde__m64_from_private(r_);
-	}
-
-	r_.u64[0] = a_.u64[0] >> count_.u64[0];
-#endif
-
-	return simde__m64_from_private(r_);
-#endif
-}
-#define simde_m_psrlq(a, count) simde_mm_srl_si64(a, count)
-#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
-#define _mm_srl_si64(a, count) simde_mm_srl_si64(a, count)
-#define _m_psrlq(a, count) simde_mm_srl_si64(a, count)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m64 simde_mm_srai_pi16(simde__m64 a, int count)
-{
-#if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI)
-	return _mm_srai_pi16(a, count);
-#else
-	simde__m64_private r_;
-	simde__m64_private a_ = simde__m64_to_private(a);
-
-#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
-	r_.i16 = a_.i16 >> (count & 0xff);
-#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_i16 = vshl_s16(a_.neon_i16,
-			       vmov_n_s16(-HEDLEY_STATIC_CAST(int16_t, count)));
-#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
-	r_.mmi_i16 = psrah_s(a_.mmi_i16, count);
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) {
-		r_.i16[i] = a_.i16[i] >> (count & 0xff);
-	}
-#endif
-
-	return simde__m64_from_private(r_);
-#endif
-}
-#define simde_m_psrawi(a, count) simde_mm_srai_pi16(a, count)
-#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
-#define _mm_srai_pi16(a, count) simde_mm_srai_pi16(a, count)
-#define _m_psrawi(a, count) simde_mm_srai_pi16(a, count)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m64 simde_mm_srai_pi32(simde__m64 a, int count)
-{
-#if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI)
-	return _mm_srai_pi32(a, count);
-#else
-	simde__m64_private r_;
-	simde__m64_private a_ = simde__m64_to_private(a);
-
-#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
-	r_.i32 = a_.i32 >> (count & 0xff);
-#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_i32 = vshl_s32(a_.neon_i32,
-			       vmov_n_s32(-HEDLEY_STATIC_CAST(int32_t, count)));
-#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
-	r_.mmi_i32 = psraw_s(a_.mmi_i32, count);
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])); i++) {
-		r_.i32[i] = a_.i32[i] >> (count & 0xff);
-	}
-#endif
-
-	return simde__m64_from_private(r_);
-#endif
-}
-#define simde_m_psradi(a, count) simde_mm_srai_pi32(a, count)
-#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
-#define _mm_srai_pi32(a, count) simde_mm_srai_pi32(a, count)
-#define _m_psradi(a, count) simde_mm_srai_pi32(a, count)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m64 simde_mm_sra_pi16(simde__m64 a, simde__m64 count)
-{
-#if defined(SIMDE_X86_MMX_NATIVE)
-	return _mm_sra_pi16(a, count);
-#else
-	simde__m64_private r_;
-	simde__m64_private a_ = simde__m64_to_private(a);
-	simde__m64_private count_ = simde__m64_to_private(count);
-	const int cnt = HEDLEY_STATIC_CAST(
-		int, (count_.i64[0] > 15 ? 15 : count_.i64[0]));
-
-#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
-	r_.i16 = a_.i16 >> cnt;
-#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_i16 =
-		vshl_s16(a_.neon_i16,
-			 vmov_n_s16(-HEDLEY_STATIC_CAST(
-				 int16_t, vget_lane_u64(count_.neon_u64, 0))));
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) {
-		r_.i16[i] = a_.i16[i] >> cnt;
-	}
-#endif
-
-	return simde__m64_from_private(r_);
-#endif
-}
-#define simde_m_psraw(a, count) simde_mm_sra_pi16(a, count)
-#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
-#define _mm_sra_pi16(a, count) simde_mm_sra_pi16(a, count)
-#define _m_psraw(a, count) simde_mm_sra_pi16(a, count)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m64 simde_mm_sra_pi32(simde__m64 a, simde__m64 count)
-{
-#if defined(SIMDE_X86_MMX_NATIVE)
-	return _mm_sra_pi32(a, count);
-#else
-	simde__m64_private r_;
-	simde__m64_private a_ = simde__m64_to_private(a);
-	simde__m64_private count_ = simde__m64_to_private(count);
-	const int32_t cnt =
-		(count_.u64[0] > 31)
-			? 31
-			: HEDLEY_STATIC_CAST(int32_t, count_.u64[0]);
-
-#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
-	r_.i32 = a_.i32 >> cnt;
-#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_i32 =
-		vshl_s32(a_.neon_i32,
-			 vmov_n_s32(-HEDLEY_STATIC_CAST(
-				 int32_t, vget_lane_u64(count_.neon_u64, 0))));
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])); i++) {
-		r_.i32[i] = a_.i32[i] >> cnt;
-	}
-#endif
-
-	return simde__m64_from_private(r_);
-#endif
-}
-#define simde_m_psrad(a, b) simde_mm_sra_pi32(a, b)
-#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
-#define _mm_sra_pi32(a, count) simde_mm_sra_pi32(a, count)
-#define _m_psrad(a, count) simde_mm_sra_pi32(a, count)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m64 simde_mm_sub_pi8(simde__m64 a, simde__m64 b)
-{
-#if defined(SIMDE_X86_MMX_NATIVE)
-	return _mm_sub_pi8(a, b);
-#else
-	simde__m64_private r_;
-	simde__m64_private a_ = simde__m64_to_private(a);
-	simde__m64_private b_ = simde__m64_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_i8 = vsub_s8(a_.neon_i8, b_.neon_i8);
-#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
-	r_.mmi_i8 = psubb_s(a_.mmi_i8, b_.mmi_i8);
-#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
-	r_.i8 = a_.i8 - b_.i8;
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.i8) / sizeof(r_.i8[0])); i++) {
-		r_.i8[i] = a_.i8[i] - b_.i8[i];
-	}
-#endif
-
-	return simde__m64_from_private(r_);
-#endif
-}
-#define simde_m_psubb(a, b) simde_mm_sub_pi8(a, b)
-#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
-#define _mm_sub_pi8(a, b) simde_mm_sub_pi8(a, b)
-#define _m_psubb(a, b) simde_mm_sub_pi8(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m64 simde_mm_sub_pi16(simde__m64 a, simde__m64 b)
-{
-#if defined(SIMDE_X86_MMX_NATIVE)
-	return _mm_sub_pi16(a, b);
-#else
-	simde__m64_private r_;
-	simde__m64_private a_ = simde__m64_to_private(a);
-	simde__m64_private b_ = simde__m64_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_i16 = vsub_s16(a_.neon_i16, b_.neon_i16);
-#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
-	r_.mmi_i16 = psubh_s(a_.mmi_i16, b_.mmi_i16);
-#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
-	r_.i16 = a_.i16 - b_.i16;
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) {
-		r_.i16[i] = a_.i16[i] - b_.i16[i];
-	}
-#endif
-
-	return simde__m64_from_private(r_);
-#endif
-}
-#define simde_m_psubw(a, b) simde_mm_sub_pi16(a, b)
-#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
-#define _mm_sub_pi16(a, b) simde_mm_sub_pi16(a, b)
-#define _m_psubw(a, b) simde_mm_sub_pi16(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m64 simde_mm_sub_pi32(simde__m64 a, simde__m64 b)
-{
-#if defined(SIMDE_X86_MMX_NATIVE)
-	return _mm_sub_pi32(a, b);
-#else
-	simde__m64_private r_;
-	simde__m64_private a_ = simde__m64_to_private(a);
-	simde__m64_private b_ = simde__m64_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_i32 = vsub_s32(a_.neon_i32, b_.neon_i32);
-#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
-	r_.mmi_i32 = psubw_s(a_.mmi_i32, b_.mmi_i32);
-#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
-	r_.i32 = a_.i32 - b_.i32;
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])); i++) {
-		r_.i32[i] = a_.i32[i] - b_.i32[i];
-	}
-#endif
-
-	return simde__m64_from_private(r_);
-#endif
-}
-#define simde_m_psubd(a, b) simde_mm_sub_pi32(a, b)
-#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
-#define _mm_sub_pi32(a, b) simde_mm_sub_pi32(a, b)
-#define _m_psubd(a, b) simde_mm_sub_pi32(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m64 simde_mm_subs_pi8(simde__m64 a, simde__m64 b)
-{
-#if defined(SIMDE_X86_MMX_NATIVE)
-	return _mm_subs_pi8(a, b);
-#else
-	simde__m64_private r_;
-	simde__m64_private a_ = simde__m64_to_private(a);
-	simde__m64_private b_ = simde__m64_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_i8 = vqsub_s8(a_.neon_i8, b_.neon_i8);
-#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
-	r_.mmi_i8 = psubsb(a_.mmi_i8, b_.mmi_i8);
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.i8) / sizeof(r_.i8[0])); i++) {
-		if (((b_.i8[i]) > 0 && (a_.i8[i]) < INT8_MIN + (b_.i8[i]))) {
-			r_.i8[i] = INT8_MIN;
-		} else if ((b_.i8[i]) < 0 &&
-			   (a_.i8[i]) > INT8_MAX + (b_.i8[i])) {
-			r_.i8[i] = INT8_MAX;
-		} else {
-			r_.i8[i] = (a_.i8[i]) - (b_.i8[i]);
-		}
-	}
-#endif
-
-	return simde__m64_from_private(r_);
-#endif
-}
-#define simde_m_psubsb(a, b) simde_mm_subs_pi8(a, b)
-#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
-#define _mm_subs_pi8(a, b) simde_mm_subs_pi8(a, b)
-#define _m_psubsb(a, b) simde_mm_subs_pi8(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m64 simde_mm_subs_pu8(simde__m64 a, simde__m64 b)
-{
-#if defined(SIMDE_X86_MMX_NATIVE)
-	return _mm_subs_pu8(a, b);
-#else
-	simde__m64_private r_;
-	simde__m64_private a_ = simde__m64_to_private(a);
-	simde__m64_private b_ = simde__m64_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_u8 = vqsub_u8(a_.neon_u8, b_.neon_u8);
-#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
-	r_.mmi_u8 = psubusb(a_.mmi_u8, b_.mmi_u8);
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.u8) / sizeof(r_.u8[0])); i++) {
-		const int32_t x = a_.u8[i] - b_.u8[i];
-		if (x < 0) {
-			r_.u8[i] = 0;
-		} else if (x > UINT8_MAX) {
-			r_.u8[i] = UINT8_MAX;
-		} else {
-			r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, x);
-		}
-	}
-#endif
-
-	return simde__m64_from_private(r_);
-#endif
-}
-#define simde_m_psubusb(a, b) simde_mm_subs_pu8(a, b)
-#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
-#define _mm_subs_pu8(a, b) simde_mm_subs_pu8(a, b)
-#define _m_psubusb(a, b) simde_mm_subs_pu8(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m64 simde_mm_subs_pi16(simde__m64 a, simde__m64 b)
-{
-#if defined(SIMDE_X86_MMX_NATIVE)
-	return _mm_subs_pi16(a, b);
-#else
-	simde__m64_private r_;
-	simde__m64_private a_ = simde__m64_to_private(a);
-	simde__m64_private b_ = simde__m64_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_i16 = vqsub_s16(a_.neon_i16, b_.neon_i16);
-#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
-	r_.mmi_i16 = psubsh(a_.mmi_i16, b_.mmi_i16);
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) {
-		if (((b_.i16[i]) > 0 && (a_.i16[i]) < SHRT_MIN + (b_.i16[i]))) {
-			r_.i16[i] = SHRT_MIN;
-		} else if ((b_.i16[i]) < 0 &&
-			   (a_.i16[i]) > INT16_MAX + (b_.i16[i])) {
-			r_.i16[i] = INT16_MAX;
-		} else {
-			r_.i16[i] = (a_.i16[i]) - (b_.i16[i]);
-		}
-	}
-#endif
-
-	return simde__m64_from_private(r_);
-#endif
-}
-#define simde_m_psubsw(a, b) simde_mm_subs_pi16(a, b)
-#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
-#define _mm_subs_pi16(a, b) simde_mm_subs_pi16(a, b)
-#define _m_psubsw(a, b) simde_mm_subs_pi16(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m64 simde_mm_subs_pu16(simde__m64 a, simde__m64 b)
-{
-#if defined(SIMDE_X86_MMX_NATIVE)
-	return _mm_subs_pu16(a, b);
-#else
-	simde__m64_private r_;
-	simde__m64_private a_ = simde__m64_to_private(a);
-	simde__m64_private b_ = simde__m64_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_u16 = vqsub_u16(a_.neon_u16, b_.neon_u16);
-#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
-	r_.mmi_u16 = psubush(a_.mmi_u16, b_.mmi_u16);
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.u16) / sizeof(r_.u16[0])); i++) {
-		const int x = a_.u16[i] - b_.u16[i];
-		if (x < 0) {
-			r_.u16[i] = 0;
-		} else if (x > UINT16_MAX) {
-			r_.u16[i] = UINT16_MAX;
-		} else {
-			r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, x);
-		}
-	}
-#endif
-
-	return simde__m64_from_private(r_);
-#endif
-}
-#define simde_m_psubusw(a, b) simde_mm_subs_pu16(a, b)
-#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
-#define _mm_subs_pu16(a, b) simde_mm_subs_pu16(a, b)
-#define _m_psubusw(a, b) simde_mm_subs_pu16(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m64 simde_mm_unpackhi_pi8(simde__m64 a, simde__m64 b)
-{
-#if defined(SIMDE_X86_MMX_NATIVE)
-	return _mm_unpackhi_pi8(a, b);
-#else
-	simde__m64_private r_;
-	simde__m64_private a_ = simde__m64_to_private(a);
-	simde__m64_private b_ = simde__m64_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
-	r_.neon_i8 = vzip2_s8(a_.neon_i8, b_.neon_i8);
-#elif defined(SIMDE_SHUFFLE_VECTOR_)
-	r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 8, a_.i8, b_.i8, 4, 12, 5, 13, 6, 14,
-				      7, 15);
-#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
-	r_.mmi_i8 = punpckhbh_s(a_.mmi_i8, b_.mmi_i8);
-#else
-	r_.i8[0] = a_.i8[4];
-	r_.i8[1] = b_.i8[4];
-	r_.i8[2] = a_.i8[5];
-	r_.i8[3] = b_.i8[5];
-	r_.i8[4] = a_.i8[6];
-	r_.i8[5] = b_.i8[6];
-	r_.i8[6] = a_.i8[7];
-	r_.i8[7] = b_.i8[7];
-#endif
-
-	return simde__m64_from_private(r_);
-#endif
-}
-#define simde_m_punpckhbw(a, b) simde_mm_unpackhi_pi8(a, b)
-#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
-#define _mm_unpackhi_pi8(a, b) simde_mm_unpackhi_pi8(a, b)
-#define _m_punpckhbw(a, b) simde_mm_unpackhi_pi8(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m64 simde_mm_unpackhi_pi16(simde__m64 a, simde__m64 b)
-{
-#if defined(SIMDE_X86_MMX_NATIVE)
-	return _mm_unpackhi_pi16(a, b);
-#else
-	simde__m64_private r_;
-	simde__m64_private a_ = simde__m64_to_private(a);
-	simde__m64_private b_ = simde__m64_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
-	r_.neon_i16 = vzip2_s16(a_.neon_i16, b_.neon_i16);
-#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
-	r_.mmi_i16 = punpckhhw_s(a_.mmi_i16, b_.mmi_i16);
-#elif defined(SIMDE_SHUFFLE_VECTOR_)
-	r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 2, 6, 3, 7);
-#else
-	r_.i16[0] = a_.i16[2];
-	r_.i16[1] = b_.i16[2];
-	r_.i16[2] = a_.i16[3];
-	r_.i16[3] = b_.i16[3];
-#endif
-
-	return simde__m64_from_private(r_);
-#endif
-}
-#define simde_m_punpckhwd(a, b) simde_mm_unpackhi_pi16(a, b)
-#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
-#define _mm_unpackhi_pi16(a, b) simde_mm_unpackhi_pi16(a, b)
-#define _m_punpckhwd(a, b) simde_mm_unpackhi_pi16(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m64 simde_mm_unpackhi_pi32(simde__m64 a, simde__m64 b)
-{
-#if defined(SIMDE_X86_MMX_NATIVE)
-	return _mm_unpackhi_pi32(a, b);
-#else
-	simde__m64_private r_;
-	simde__m64_private a_ = simde__m64_to_private(a);
-	simde__m64_private b_ = simde__m64_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
-	r_.neon_i32 = vzip2_s32(a_.neon_i32, b_.neon_i32);
-#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
-	r_.mmi_i32 = punpckhwd_s(a_.mmi_i32, b_.mmi_i32);
-#elif defined(SIMDE_SHUFFLE_VECTOR_)
-	r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 1, 3);
-#else
-	r_.i32[0] = a_.i32[1];
-	r_.i32[1] = b_.i32[1];
-#endif
-
-	return simde__m64_from_private(r_);
-#endif
-}
-#define simde_m_punpckhdq(a, b) simde_mm_unpackhi_pi32(a, b)
-#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
-#define _mm_unpackhi_pi32(a, b) simde_mm_unpackhi_pi32(a, b)
-#define _m_punpckhdq(a, b) simde_mm_unpackhi_pi32(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m64 simde_mm_unpacklo_pi8(simde__m64 a, simde__m64 b)
-{
-#if defined(SIMDE_X86_MMX_NATIVE)
-	return _mm_unpacklo_pi8(a, b);
-#else
-	simde__m64_private r_;
-	simde__m64_private a_ = simde__m64_to_private(a);
-	simde__m64_private b_ = simde__m64_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
-	r_.neon_i8 = vzip1_s8(a_.neon_i8, b_.neon_i8);
-#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
-	r_.mmi_i8 = punpcklbh_s(a_.mmi_i8, b_.mmi_i8);
-#elif defined(SIMDE_SHUFFLE_VECTOR_)
-	r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 8, a_.i8, b_.i8, 0, 8, 1, 9, 2, 10, 3,
-				      11);
-#else
-	r_.i8[0] = a_.i8[0];
-	r_.i8[1] = b_.i8[0];
-	r_.i8[2] = a_.i8[1];
-	r_.i8[3] = b_.i8[1];
-	r_.i8[4] = a_.i8[2];
-	r_.i8[5] = b_.i8[2];
-	r_.i8[6] = a_.i8[3];
-	r_.i8[7] = b_.i8[3];
-#endif
-
-	return simde__m64_from_private(r_);
-#endif
-}
-#define simde_m_punpcklbw(a, b) simde_mm_unpacklo_pi8(a, b)
-#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
-#define _mm_unpacklo_pi8(a, b) simde_mm_unpacklo_pi8(a, b)
-#define _m_punpcklbw(a, b) simde_mm_unpacklo_pi8(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m64 simde_mm_unpacklo_pi16(simde__m64 a, simde__m64 b)
-{
-#if defined(SIMDE_X86_MMX_NATIVE)
-	return _mm_unpacklo_pi16(a, b);
-#else
-	simde__m64_private r_;
-	simde__m64_private a_ = simde__m64_to_private(a);
-	simde__m64_private b_ = simde__m64_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
-	r_.neon_i16 = vzip1_s16(a_.neon_i16, b_.neon_i16);
-#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
-	r_.mmi_i16 = punpcklhw_s(a_.mmi_i16, b_.mmi_i16);
-#elif defined(SIMDE_SHUFFLE_VECTOR_)
-	r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 0, 4, 1, 5);
-#else
-	r_.i16[0] = a_.i16[0];
-	r_.i16[1] = b_.i16[0];
-	r_.i16[2] = a_.i16[1];
-	r_.i16[3] = b_.i16[1];
-#endif
-
-	return simde__m64_from_private(r_);
-#endif
-}
-#define simde_m_punpcklwd(a, b) simde_mm_unpacklo_pi16(a, b)
-#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
-#define _mm_unpacklo_pi16(a, b) simde_mm_unpacklo_pi16(a, b)
-#define _m_punpcklwd(a, b) simde_mm_unpacklo_pi16(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m64 simde_mm_unpacklo_pi32(simde__m64 a, simde__m64 b)
-{
-#if defined(SIMDE_X86_MMX_NATIVE)
-	return _mm_unpacklo_pi32(a, b);
-#else
-	simde__m64_private r_;
-	simde__m64_private a_ = simde__m64_to_private(a);
-	simde__m64_private b_ = simde__m64_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
-	r_.neon_i32 = vzip1_s32(a_.neon_i32, b_.neon_i32);
-#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
-	r_.mmi_i32 = punpcklwd_s(a_.mmi_i32, b_.mmi_i32);
-#elif defined(SIMDE_SHUFFLE_VECTOR_)
-	r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 0, 2);
-#else
-	r_.i32[0] = a_.i32[0];
-	r_.i32[1] = b_.i32[0];
-#endif
-
-	return simde__m64_from_private(r_);
-#endif
-}
-#define simde_m_punpckldq(a, b) simde_mm_unpacklo_pi32(a, b)
-#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
-#define _mm_unpacklo_pi32(a, b) simde_mm_unpacklo_pi32(a, b)
-#define _m_punpckldq(a, b) simde_mm_unpacklo_pi32(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m64 simde_mm_xor_si64(simde__m64 a, simde__m64 b)
-{
-#if defined(SIMDE_X86_MMX_NATIVE)
-	return _mm_xor_si64(a, b);
-#else
-	simde__m64_private r_;
-	simde__m64_private a_ = simde__m64_to_private(a);
-	simde__m64_private b_ = simde__m64_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_i32 = veor_s32(a_.neon_i32, b_.neon_i32);
-#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
-	r_.i32f = a_.i32f ^ b_.i32f;
-#else
-	r_.u64[0] = a_.u64[0] ^ b_.u64[0];
-#endif
-
-	return simde__m64_from_private(r_);
-#endif
-}
-#define simde_m_pxor(a, b) simde_mm_xor_si64(a, b)
-#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
-#define _mm_xor_si64(a, b) simde_mm_xor_si64(a, b)
-#define _m_pxor(a, b) simde_mm_xor_si64(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-int32_t simde_m_to_int(simde__m64 a)
-{
-#if defined(SIMDE_X86_MMX_NATIVE)
-	return _m_to_int(a);
-#else
-	simde__m64_private a_ = simde__m64_to_private(a);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	HEDLEY_DIAGNOSTIC_PUSH
-#if HEDLEY_HAS_WARNING("-Wvector-conversion") && \
-	SIMDE_DETECT_CLANG_VERSION_NOT(10, 0, 0)
-#pragma clang diagnostic ignored "-Wvector-conversion"
-#endif
-	return vget_lane_s32(a_.neon_i32, 0);
-	HEDLEY_DIAGNOSTIC_POP
-#else
-	return a_.i32[0];
-#endif
-#endif
-}
-#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
-#define _m_to_int(a) simde_m_to_int(a)
-#endif
-
-SIMDE_END_DECLS_
-
-HEDLEY_DIAGNOSTIC_POP
-
-#endif /* !defined(SIMDE_X86_MMX_H) */

+ 0 - 4479
libobs/util/simde/x86/sse.h

@@ -1,4479 +0,0 @@
-/* SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use, copy,
- * modify, merge, publish, distribute, sublicense, and/or sell copies
- * of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- * Copyright:
- *   2017-2020 Evan Nemerson <[email protected]>
- *   2015-2017 John W. Ratcliff <[email protected]>
- *   2015      Brandon Rowlett <[email protected]>
- *   2015      Ken Fast <[email protected]>
- */
-
-#if !defined(SIMDE_X86_SSE_H)
-#define SIMDE_X86_SSE_H
-
-#include "mmx.h"
-
-#if defined(_WIN32)
-#include <windows.h>
-#endif
-
-HEDLEY_DIAGNOSTIC_PUSH
-SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
-SIMDE_BEGIN_DECLS_
-
-typedef union {
-#if defined(SIMDE_VECTOR_SUBSCRIPT)
-	SIMDE_ALIGN_TO_16 int8_t i8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
-	SIMDE_ALIGN_TO_16 int16_t i16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
-	SIMDE_ALIGN_TO_16 int32_t i32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
-	SIMDE_ALIGN_TO_16 int64_t i64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
-	SIMDE_ALIGN_TO_16 uint8_t u8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
-	SIMDE_ALIGN_TO_16 uint16_t u16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
-	SIMDE_ALIGN_TO_16 uint32_t u32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
-	SIMDE_ALIGN_TO_16 uint64_t u64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
-#if defined(SIMDE_HAVE_INT128_)
-	SIMDE_ALIGN_TO_16 simde_int128 i128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
-	SIMDE_ALIGN_TO_16 simde_uint128 u128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
-#endif
-	SIMDE_ALIGN_TO_16 simde_float32 f32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
-	SIMDE_ALIGN_TO_16 int_fast32_t i32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
-	SIMDE_ALIGN_TO_16 uint_fast32_t u32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
-#else
-	SIMDE_ALIGN_TO_16 int8_t i8[16];
-	SIMDE_ALIGN_TO_16 int16_t i16[8];
-	SIMDE_ALIGN_TO_16 int32_t i32[4];
-	SIMDE_ALIGN_TO_16 int64_t i64[2];
-	SIMDE_ALIGN_TO_16 uint8_t u8[16];
-	SIMDE_ALIGN_TO_16 uint16_t u16[8];
-	SIMDE_ALIGN_TO_16 uint32_t u32[4];
-	SIMDE_ALIGN_TO_16 uint64_t u64[2];
-#if defined(SIMDE_HAVE_INT128_)
-	SIMDE_ALIGN_TO_16 simde_int128 i128[1];
-	SIMDE_ALIGN_TO_16 simde_uint128 u128[1];
-#endif
-	SIMDE_ALIGN_TO_16 simde_float32 f32[4];
-	SIMDE_ALIGN_TO_16 int_fast32_t i32f[16 / sizeof(int_fast32_t)];
-	SIMDE_ALIGN_TO_16 uint_fast32_t u32f[16 / sizeof(uint_fast32_t)];
-#endif
-
-	SIMDE_ALIGN_TO_16 simde__m64_private m64_private[2];
-	SIMDE_ALIGN_TO_16 simde__m64 m64[2];
-
-#if defined(SIMDE_X86_SSE_NATIVE)
-	SIMDE_ALIGN_TO_16 __m128 n;
-#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	SIMDE_ALIGN_TO_16 int8x16_t neon_i8;
-	SIMDE_ALIGN_TO_16 int16x8_t neon_i16;
-	SIMDE_ALIGN_TO_16 int32x4_t neon_i32;
-	SIMDE_ALIGN_TO_16 int64x2_t neon_i64;
-	SIMDE_ALIGN_TO_16 uint8x16_t neon_u8;
-	SIMDE_ALIGN_TO_16 uint16x8_t neon_u16;
-	SIMDE_ALIGN_TO_16 uint32x4_t neon_u32;
-	SIMDE_ALIGN_TO_16 uint64x2_t neon_u64;
-	SIMDE_ALIGN_TO_16 float32x4_t neon_f32;
-#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
-	SIMDE_ALIGN_TO_16 float64x2_t neon_f64;
-#endif
-#elif defined(SIMDE_WASM_SIMD128_NATIVE)
-	SIMDE_ALIGN_TO_16 v128_t wasm_v128;
-#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
-	SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8;
-	SIMDE_ALIGN_TO_16
-	SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16;
-	SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32;
-	SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8;
-	SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16;
-	SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32;
-	SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32;
-#if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
-	SIMDE_ALIGN_TO_16
-	SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64;
-	SIMDE_ALIGN_TO_16
-	SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64;
-	SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64;
-#endif
-#endif
-} simde__m128_private;
-
-#if defined(SIMDE_X86_SSE_NATIVE)
-typedef __m128 simde__m128;
-#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-typedef float32x4_t simde__m128;
-#elif defined(SIMDE_WASM_SIMD128_NATIVE)
-typedef v128_t simde__m128;
-#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
-typedef SIMDE_POWER_ALTIVEC_VECTOR(float) simde__m128;
-#elif defined(SIMDE_VECTOR_SUBSCRIPT)
-typedef simde_float32
-	simde__m128 SIMDE_ALIGN_TO_16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
-#else
-typedef simde__m128_private simde__m128;
-#endif
-
-#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
-typedef simde__m128 __m128;
-#endif
-
-HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128), "simde__m128 size incorrect");
-HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128_private),
-		     "simde__m128_private size incorrect");
-#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF)
-HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128) == 16,
-		     "simde__m128 is not 16-byte aligned");
-HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128_private) == 16,
-		     "simde__m128_private is not 16-byte aligned");
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128 simde__m128_from_private(simde__m128_private v)
-{
-	simde__m128 r;
-	simde_memcpy(&r, &v, sizeof(r));
-	return r;
-}
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128_private simde__m128_to_private(simde__m128 v)
-{
-	simde__m128_private r;
-	simde_memcpy(&r, &v, sizeof(r));
-	return r;
-}
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int8x16_t, neon, i8)
-SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int16x8_t, neon, i16)
-SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int32x4_t, neon, i32)
-SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int64x2_t, neon, i64)
-SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint8x16_t, neon, u8)
-SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint16x8_t, neon, u16)
-SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint32x4_t, neon, u32)
-SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint64x2_t, neon, u64)
-SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, float32x4_t, neon, f32)
-#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
-SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, float64x2_t, neon, f64)
-#endif
-#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */
-
-#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
-SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128,
-				       SIMDE_POWER_ALTIVEC_VECTOR(signed char),
-				       altivec, i8)
-SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128,
-				       SIMDE_POWER_ALTIVEC_VECTOR(signed short),
-				       altivec, i16)
-SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128,
-				       SIMDE_POWER_ALTIVEC_VECTOR(signed int),
-				       altivec, i32)
-SIMDE_X86_GENERATE_CONVERSION_FUNCTION(
-	m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), altivec, u8)
-SIMDE_X86_GENERATE_CONVERSION_FUNCTION(
-	m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), altivec, u16)
-SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128,
-				       SIMDE_POWER_ALTIVEC_VECTOR(unsigned int),
-				       altivec, u32)
-
-#if defined(SIMDE_BUG_GCC_95782)
-SIMDE_FUNCTION_ATTRIBUTES
-SIMDE_POWER_ALTIVEC_VECTOR(float)
-simde__m128_to_altivec_f32(simde__m128 value)
-{
-	simde__m128_private r_ = simde__m128_to_private(value);
-	return r_.altivec_f32;
-}
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128 simde__m128_from_altivec_f32(SIMDE_POWER_ALTIVEC_VECTOR(float)
-						 value)
-{
-	simde__m128_private r_;
-	r_.altivec_f32 = value;
-	return simde__m128_from_private(r_);
-}
-#else
-SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(float),
-				       altivec, f32)
-#endif
-
-#if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
-SIMDE_X86_GENERATE_CONVERSION_FUNCTION(
-	m128, SIMDE_POWER_ALTIVEC_VECTOR(signed long long), altivec, i64)
-SIMDE_X86_GENERATE_CONVERSION_FUNCTION(
-	m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), altivec, u64)
-#endif
-#elif defined(SIMDE_WASM_SIMD128_NATIVE)
-SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v128_t, wasm, v128);
-#endif /* defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) */
-
-enum {
-#if defined(SIMDE_X86_SSE_NATIVE)
-	SIMDE_MM_ROUND_NEAREST = _MM_ROUND_NEAREST,
-	SIMDE_MM_ROUND_DOWN = _MM_ROUND_DOWN,
-	SIMDE_MM_ROUND_UP = _MM_ROUND_UP,
-	SIMDE_MM_ROUND_TOWARD_ZERO = _MM_ROUND_TOWARD_ZERO
-#else
-	SIMDE_MM_ROUND_NEAREST = 0x0000,
-	SIMDE_MM_ROUND_DOWN = 0x2000,
-	SIMDE_MM_ROUND_UP = 0x4000,
-	SIMDE_MM_ROUND_TOWARD_ZERO = 0x6000
-#endif
-};
-
-#if defined(_MM_FROUND_TO_NEAREST_INT)
-#define SIMDE_MM_FROUND_TO_NEAREST_INT _MM_FROUND_TO_NEAREST_INT
-#define SIMDE_MM_FROUND_TO_NEG_INF _MM_FROUND_TO_NEG_INF
-#define SIMDE_MM_FROUND_TO_POS_INF _MM_FROUND_TO_POS_INF
-#define SIMDE_MM_FROUND_TO_ZERO _MM_FROUND_TO_ZERO
-#define SIMDE_MM_FROUND_CUR_DIRECTION _MM_FROUND_CUR_DIRECTION
-
-#define SIMDE_MM_FROUND_RAISE_EXC _MM_FROUND_RAISE_EXC
-#define SIMDE_MM_FROUND_NO_EXC _MM_FROUND_NO_EXC
-#else
-#define SIMDE_MM_FROUND_TO_NEAREST_INT 0x00
-#define SIMDE_MM_FROUND_TO_NEG_INF 0x01
-#define SIMDE_MM_FROUND_TO_POS_INF 0x02
-#define SIMDE_MM_FROUND_TO_ZERO 0x03
-#define SIMDE_MM_FROUND_CUR_DIRECTION 0x04
-
-#define SIMDE_MM_FROUND_RAISE_EXC 0x00
-#define SIMDE_MM_FROUND_NO_EXC 0x08
-#endif
-
-#define SIMDE_MM_FROUND_NINT \
-	(SIMDE_MM_FROUND_TO_NEAREST_INT | SIMDE_MM_FROUND_RAISE_EXC)
-#define SIMDE_MM_FROUND_FLOOR \
-	(SIMDE_MM_FROUND_TO_NEG_INF | SIMDE_MM_FROUND_RAISE_EXC)
-#define SIMDE_MM_FROUND_CEIL \
-	(SIMDE_MM_FROUND_TO_POS_INF | SIMDE_MM_FROUND_RAISE_EXC)
-#define SIMDE_MM_FROUND_TRUNC \
-	(SIMDE_MM_FROUND_TO_ZERO | SIMDE_MM_FROUND_RAISE_EXC)
-#define SIMDE_MM_FROUND_RINT \
-	(SIMDE_MM_FROUND_CUR_DIRECTION | SIMDE_MM_FROUND_RAISE_EXC)
-#define SIMDE_MM_FROUND_NEARBYINT \
-	(SIMDE_MM_FROUND_CUR_DIRECTION | SIMDE_MM_FROUND_NO_EXC)
-
-#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) && \
-	!defined(_MM_FROUND_TO_NEAREST_INT)
-#define _MM_FROUND_TO_NEAREST_INT SIMDE_MM_FROUND_TO_NEAREST_INT
-#define _MM_FROUND_TO_NEG_INF SIMDE_MM_FROUND_TO_NEG_INF
-#define _MM_FROUND_TO_POS_INF SIMDE_MM_FROUND_TO_POS_INF
-#define _MM_FROUND_TO_ZERO SIMDE_MM_FROUND_TO_ZERO
-#define _MM_FROUND_CUR_DIRECTION SIMDE_MM_FROUND_CUR_DIRECTION
-#define _MM_FROUND_RAISE_EXC SIMDE_MM_FROUND_RAISE_EXC
-#define _MM_FROUND_NINT SIMDE_MM_FROUND_NINT
-#define _MM_FROUND_FLOOR SIMDE_MM_FROUND_FLOOR
-#define _MM_FROUND_CEIL SIMDE_MM_FROUND_CEIL
-#define _MM_FROUND_TRUNC SIMDE_MM_FROUND_TRUNC
-#define _MM_FROUND_RINT SIMDE_MM_FROUND_RINT
-#define _MM_FROUND_NEARBYINT SIMDE_MM_FROUND_NEARBYINT
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-unsigned int SIMDE_MM_GET_ROUNDING_MODE(void)
-{
-#if defined(SIMDE_X86_SSE_NATIVE)
-	return _MM_GET_ROUNDING_MODE();
-#elif defined(SIMDE_HAVE_FENV_H)
-	unsigned int vfe_mode;
-
-	switch (fegetround()) {
-#if defined(FE_TONEAREST)
-	case FE_TONEAREST:
-		vfe_mode = SIMDE_MM_ROUND_NEAREST;
-		break;
-#endif
-
-#if defined(FE_TOWARDZERO)
-	case FE_TOWARDZERO:
-		vfe_mode = SIMDE_MM_ROUND_DOWN;
-		break;
-#endif
-
-#if defined(FE_UPWARD)
-	case FE_UPWARD:
-		vfe_mode = SIMDE_MM_ROUND_UP;
-		break;
-#endif
-
-#if defined(FE_DOWNWARD)
-	case FE_DOWNWARD:
-		vfe_mode = SIMDE_MM_ROUND_TOWARD_ZERO;
-		break;
-#endif
-
-	default:
-		vfe_mode = SIMDE_MM_ROUND_NEAREST;
-		break;
-	}
-
-	return vfe_mode;
-#else
-	return SIMDE_MM_ROUND_NEAREST;
-#endif
-}
-#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
-#define _MM_GET_ROUNDING_MODE() SIMDE_MM_GET_ROUNDING_MODE()
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-void SIMDE_MM_SET_ROUNDING_MODE(unsigned int a)
-{
-#if defined(SIMDE_X86_SSE_NATIVE)
-	_MM_SET_ROUNDING_MODE(a);
-#elif defined(SIMDE_HAVE_FENV_H)
-	int fe_mode = FE_TONEAREST;
-
-	switch (a) {
-#if defined(FE_TONEAREST)
-	case SIMDE_MM_ROUND_NEAREST:
-		fe_mode = FE_TONEAREST;
-		break;
-#endif
-
-#if defined(FE_TOWARDZERO)
-	case SIMDE_MM_ROUND_TOWARD_ZERO:
-		fe_mode = FE_TOWARDZERO;
-		break;
-#endif
-
-#if defined(FE_DOWNWARD)
-	case SIMDE_MM_ROUND_DOWN:
-		fe_mode = FE_DOWNWARD;
-		break;
-#endif
-
-#if defined(FE_UPWARD)
-	case SIMDE_MM_ROUND_UP:
-		fe_mode = FE_UPWARD;
-		break;
-#endif
-
-	default:
-		return;
-	}
-
-	fesetround(fe_mode);
-#else
-	(void)a;
-#endif
-}
-#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
-#define _MM_SET_ROUNDING_MODE(a) SIMDE_MM_SET_ROUNDING_MODE(a)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-uint32_t simde_mm_getcsr(void)
-{
-#if defined(SIMDE_X86_SSE_NATIVE)
-	return _mm_getcsr();
-#else
-	return SIMDE_MM_GET_ROUNDING_MODE();
-#endif
-}
-#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
-#define _mm_getcsr() simde_mm_getcsr()
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-void simde_mm_setcsr(uint32_t a)
-{
-#if defined(SIMDE_X86_SSE_NATIVE)
-	_mm_setcsr(a);
-#else
-	SIMDE_MM_SET_ROUNDING_MODE(HEDLEY_STATIC_CAST(unsigned int, a));
-#endif
-}
-#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
-#define _mm_setcsr(a) simde_mm_setcsr(a)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128 simde_x_mm_round_ps(simde__m128 a, int rounding, int lax_rounding)
-	SIMDE_REQUIRE_CONSTANT_RANGE(rounding, 0, 15)
-		SIMDE_REQUIRE_CONSTANT_RANGE(lax_rounding, 0, 1)
-{
-	simde__m128_private r_, a_ = simde__m128_to_private(a);
-
-	(void)lax_rounding;
-
-/* For architectures which lack a current direction SIMD instruction.
-   *
-   * Note that NEON actually has a current rounding mode instruction,
-   * but in ARMv8+ the rounding mode is ignored and nearest is always
-   * used, so we treat ARMv7 as having a rounding mode but ARMv8 as
-   * not. */
-#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ARM_NEON_A32V8)
-	if ((rounding & 7) == SIMDE_MM_FROUND_CUR_DIRECTION)
-		rounding = HEDLEY_STATIC_CAST(int, SIMDE_MM_GET_ROUNDING_MODE())
-			   << 13;
-#endif
-
-	switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) {
-	case SIMDE_MM_FROUND_CUR_DIRECTION:
-#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
-		r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(
-			SIMDE_POWER_ALTIVEC_VECTOR(float),
-			vec_round(a_.altivec_f32));
-#elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_BUG_GCC_95399)
-		r_.neon_f32 = vrndiq_f32(a_.neon_f32);
-#elif defined(simde_math_nearbyintf)
-		SIMDE_VECTORIZE
-		for (size_t i = 0; i < (sizeof(r_.f32) / sizeof(r_.f32[0]));
-		     i++) {
-			r_.f32[i] = simde_math_nearbyintf(a_.f32[i]);
-		}
-#else
-		HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd());
-#endif
-		break;
-
-	case SIMDE_MM_FROUND_TO_NEAREST_INT:
-#if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE)
-		r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(
-			SIMDE_POWER_ALTIVEC_VECTOR(float),
-			vec_rint(a_.altivec_f32));
-#elif defined(SIMDE_ARM_NEON_A32V8_NATIVE)
-		r_.neon_f32 = vrndnq_f32(a_.neon_f32);
-#elif defined(simde_math_roundevenf)
-		SIMDE_VECTORIZE
-		for (size_t i = 0; i < (sizeof(r_.f32) / sizeof(r_.f32[0]));
-		     i++) {
-			r_.f32[i] = simde_math_roundevenf(a_.f32[i]);
-		}
-#else
-		HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd());
-#endif
-		break;
-
-	case SIMDE_MM_FROUND_TO_NEG_INF:
-#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
-		r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(
-			SIMDE_POWER_ALTIVEC_VECTOR(float),
-			vec_floor(a_.altivec_f32));
-#elif defined(SIMDE_ARM_NEON_A32V8_NATIVE)
-		r_.neon_f32 = vrndmq_f32(a_.neon_f32);
-#elif defined(simde_math_floorf)
-		SIMDE_VECTORIZE
-		for (size_t i = 0; i < (sizeof(r_.f32) / sizeof(r_.f32[0]));
-		     i++) {
-			r_.f32[i] = simde_math_floorf(a_.f32[i]);
-		}
-#else
-		HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd());
-#endif
-		break;
-
-	case SIMDE_MM_FROUND_TO_POS_INF:
-#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
-		r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(
-			SIMDE_POWER_ALTIVEC_VECTOR(float),
-			vec_ceil(a_.altivec_f32));
-#elif defined(SIMDE_ARM_NEON_A32V8_NATIVE)
-		r_.neon_f32 = vrndpq_f32(a_.neon_f32);
-#elif defined(simde_math_ceilf)
-		SIMDE_VECTORIZE
-		for (size_t i = 0; i < (sizeof(r_.f32) / sizeof(r_.f32[0]));
-		     i++) {
-			r_.f32[i] = simde_math_ceilf(a_.f32[i]);
-		}
-#else
-		HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd());
-#endif
-		break;
-
-	case SIMDE_MM_FROUND_TO_ZERO:
-#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
-		r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(
-			SIMDE_POWER_ALTIVEC_VECTOR(float),
-			vec_trunc(a_.altivec_f32));
-#elif defined(SIMDE_ARM_NEON_A32V8_NATIVE)
-		r_.neon_f32 = vrndq_f32(a_.neon_f32);
-#elif defined(simde_math_truncf)
-		SIMDE_VECTORIZE
-		for (size_t i = 0; i < (sizeof(r_.f32) / sizeof(r_.f32[0]));
-		     i++) {
-			r_.f32[i] = simde_math_truncf(a_.f32[i]);
-		}
-#else
-		HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd());
-#endif
-		break;
-
-	default:
-		HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd());
-	}
-
-	return simde__m128_from_private(r_);
-}
-#if defined(SIMDE_X86_SSE4_1_NATIVE)
-#define simde_mm_round_ps(a, rounding) _mm_round_ps((a), (rounding))
-#else
-#define simde_mm_round_ps(a, rounding) simde_x_mm_round_ps((a), (rounding), 0)
-#endif
-#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES)
-#define _mm_round_ps(a, rounding) simde_mm_round_ps((a), (rounding))
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128 simde_mm_set_ps(simde_float32 e3, simde_float32 e2,
-			    simde_float32 e1, simde_float32 e0)
-{
-#if defined(SIMDE_X86_SSE_NATIVE)
-	return _mm_set_ps(e3, e2, e1, e0);
-#else
-	simde__m128_private r_;
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	SIMDE_ALIGN_TO_16 simde_float32 data[4] = {e0, e1, e2, e3};
-	r_.neon_f32 = vld1q_f32(data);
-#elif defined(SIMDE_WASM_SIMD128_NATIVE)
-	r_.wasm_v128 = wasm_f32x4_make(e0, e1, e2, e3);
-#else
-	r_.f32[0] = e0;
-	r_.f32[1] = e1;
-	r_.f32[2] = e2;
-	r_.f32[3] = e3;
-#endif
-
-	return simde__m128_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
-#define _mm_set_ps(e3, e2, e1, e0) simde_mm_set_ps(e3, e2, e1, e0)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128 simde_mm_set_ps1(simde_float32 a)
-{
-#if defined(SIMDE_X86_SSE_NATIVE)
-	return _mm_set_ps1(a);
-#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	return vdupq_n_f32(a);
-#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
-	(void)a;
-	return vec_splats(a);
-#else
-	return simde_mm_set_ps(a, a, a, a);
-#endif
-}
-#define simde_mm_set1_ps(a) simde_mm_set_ps1(a)
-#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
-#define _mm_set_ps1(a) simde_mm_set_ps1(a)
-#define _mm_set1_ps(a) simde_mm_set1_ps(a)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128 simde_mm_move_ss(simde__m128 a, simde__m128 b)
-{
-#if defined(SIMDE_X86_SSE_NATIVE)
-	return _mm_move_ss(a, b);
-#else
-	simde__m128_private r_, a_ = simde__m128_to_private(a),
-				b_ = simde__m128_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_f32 =
-		vsetq_lane_f32(vgetq_lane_f32(b_.neon_f32, 0), a_.neon_f32, 0);
-#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
-	SIMDE_POWER_ALTIVEC_VECTOR(unsigned char)
-	m = {16, 17, 18, 19, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
-	r_.altivec_f32 = vec_perm(a_.altivec_f32, b_.altivec_f32, m);
-#elif defined(SIMDE_WASM_SIMD128_NATIVE)
-	r_.wasm_v128 = wasm_v8x16_shuffle(b_.wasm_v128, a_.wasm_v128, 0, 1, 2,
-					  3, 20, 21, 22, 23, 24, 25, 26, 27, 28,
-					  29, 30, 31);
-#elif defined(SIMDE_SHUFFLE_VECTOR_)
-	r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 4, 1, 2, 3);
-#else
-	r_.f32[0] = b_.f32[0];
-	r_.f32[1] = a_.f32[1];
-	r_.f32[2] = a_.f32[2];
-	r_.f32[3] = a_.f32[3];
-#endif
-
-	return simde__m128_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
-#define _mm_move_ss(a, b) simde_mm_move_ss((a), (b))
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128 simde_mm_add_ps(simde__m128 a, simde__m128 b)
-{
-#if defined(SIMDE_X86_SSE_NATIVE)
-	return _mm_add_ps(a, b);
-#else
-	simde__m128_private r_, a_ = simde__m128_to_private(a),
-				b_ = simde__m128_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_f32 = vaddq_f32(a_.neon_f32, b_.neon_f32);
-#elif defined(SIMDE_WASM_SIMD128_NATIVE)
-	r_.wasm_v128 = wasm_f32x4_add(a_.wasm_v128, b_.wasm_v128);
-#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
-	r_.altivec_f32 = vec_add(a_.altivec_f32, b_.altivec_f32);
-#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
-	r_.f32 = a_.f32 + b_.f32;
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.f32) / sizeof(r_.f32[0])); i++) {
-		r_.f32[i] = a_.f32[i] + b_.f32[i];
-	}
-#endif
-
-	return simde__m128_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
-#define _mm_add_ps(a, b) simde_mm_add_ps((a), (b))
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128 simde_mm_add_ss(simde__m128 a, simde__m128 b)
-{
-#if defined(SIMDE_X86_SSE_NATIVE)
-	return _mm_add_ss(a, b);
-#elif (SIMDE_NATURAL_VECTOR_SIZE > 0)
-	return simde_mm_move_ss(a, simde_mm_add_ps(a, b));
-#else
-	simde__m128_private r_, a_ = simde__m128_to_private(a),
-				b_ = simde__m128_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	float32_t b0 = vgetq_lane_f32(b_.neon_f32, 0);
-	float32x4_t value = vsetq_lane_f32(b0, vdupq_n_f32(0), 0);
-	// the upper values in the result must be the remnants of <a>.
-	r_.neon_f32 = vaddq_f32(a_.neon_f32, value);
-#else
-	r_.f32[0] = a_.f32[0] + b_.f32[0];
-	r_.f32[1] = a_.f32[1];
-	r_.f32[2] = a_.f32[2];
-	r_.f32[3] = a_.f32[3];
-#endif
-
-	return simde__m128_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
-#define _mm_add_ss(a, b) simde_mm_add_ss((a), (b))
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128 simde_mm_and_ps(simde__m128 a, simde__m128 b)
-{
-#if defined(SIMDE_X86_SSE_NATIVE)
-	return _mm_and_ps(a, b);
-#else
-	simde__m128_private r_, a_ = simde__m128_to_private(a),
-				b_ = simde__m128_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_i32 = vandq_s32(a_.neon_i32, b_.neon_i32);
-#elif defined(SIMDE_WASM_SIMD128_NATIVE)
-	r_.wasm_v128 = wasm_v128_and(a_.wasm_v128, b_.wasm_v128);
-#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
-	r_.i32 = a_.i32 & b_.i32;
-#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
-	r_.altivec_f32 = vec_and(a_.altivec_f32, b_.altivec_f32);
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])); i++) {
-		r_.i32[i] = a_.i32[i] & b_.i32[i];
-	}
-#endif
-
-	return simde__m128_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
-#define _mm_and_ps(a, b) simde_mm_and_ps((a), (b))
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128 simde_mm_andnot_ps(simde__m128 a, simde__m128 b)
-{
-#if defined(SIMDE_X86_SSE_NATIVE)
-	return _mm_andnot_ps(a, b);
-#else
-	simde__m128_private r_, a_ = simde__m128_to_private(a),
-				b_ = simde__m128_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_i32 = vbicq_s32(b_.neon_i32, a_.neon_i32);
-#elif defined(SIMDE_WASM_SIMD128_NATIVE)
-	r_.wasm_v128 = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128);
-#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
-	r_.altivec_f32 = vec_andc(b_.altivec_f32, a_.altivec_f32);
-#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
-	r_.i32 = ~a_.i32 & b_.i32;
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])); i++) {
-		r_.i32[i] = ~(a_.i32[i]) & b_.i32[i];
-	}
-#endif
-
-	return simde__m128_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
-#define _mm_andnot_ps(a, b) simde_mm_andnot_ps((a), (b))
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128 simde_mm_xor_ps(simde__m128 a, simde__m128 b)
-{
-#if defined(SIMDE_X86_SSE_NATIVE)
-	return _mm_xor_ps(a, b);
-#else
-	simde__m128_private r_, a_ = simde__m128_to_private(a),
-				b_ = simde__m128_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_i32 = veorq_s32(a_.neon_i32, b_.neon_i32);
-#elif defined(SIMDE_WASM_SIMD128_NATIVE)
-	r_.wasm_v128 = wasm_v128_xor(a_.wasm_v128, b_.wasm_v128);
-#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
-	r_.altivec_i32 = vec_xor(a_.altivec_i32, b_.altivec_i32);
-#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
-	r_.i32f = a_.i32f ^ b_.i32f;
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.u32) / sizeof(r_.u32[0])); i++) {
-		r_.u32[i] = a_.u32[i] ^ b_.u32[i];
-	}
-#endif
-
-	return simde__m128_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
-#define _mm_xor_ps(a, b) simde_mm_xor_ps((a), (b))
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128 simde_mm_or_ps(simde__m128 a, simde__m128 b)
-{
-#if defined(SIMDE_X86_SSE_NATIVE)
-	return _mm_or_ps(a, b);
-#else
-	simde__m128_private r_, a_ = simde__m128_to_private(a),
-				b_ = simde__m128_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_i32 = vorrq_s32(a_.neon_i32, b_.neon_i32);
-#elif defined(SIMDE_WASM_SIMD128_NATIVE)
-	r_.wasm_v128 = wasm_v128_or(a_.wasm_v128, b_.wasm_v128);
-#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
-	r_.altivec_i32 = vec_or(a_.altivec_i32, b_.altivec_i32);
-#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
-	r_.i32f = a_.i32f | b_.i32f;
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.u32) / sizeof(r_.u32[0])); i++) {
-		r_.u32[i] = a_.u32[i] | b_.u32[i];
-	}
-#endif
-
-	return simde__m128_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
-#define _mm_or_ps(a, b) simde_mm_or_ps((a), (b))
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128 simde_x_mm_not_ps(simde__m128 a)
-{
-#if defined(SIMDE_X86_AVX512VL_NATIVE)
-	__m128i ai = _mm_castps_si128(a);
-	return _mm_castsi128_ps(_mm_ternarylogic_epi32(ai, ai, ai, 0x55));
-#elif defined(SIMDE_X86_SSE2_NATIVE)
-	/* Note: we use ints instead of floats because we don't want cmpeq
-     * to return false for (NaN, NaN) */
-	__m128i ai = _mm_castps_si128(a);
-	return _mm_castsi128_ps(_mm_andnot_si128(ai, _mm_cmpeq_epi32(ai, ai)));
-#else
-	simde__m128_private r_, a_ = simde__m128_to_private(a);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_i32 = vmvnq_s32(a_.neon_i32);
-#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
-	r_.altivec_i32 = vec_nor(a_.altivec_i32, a_.altivec_i32);
-#elif defined(SIMDE_WASM_SIMD128_NATIVE)
-	r_.wasm_v128 = wasm_v128_not(a_.wasm_v128);
-#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
-	r_.i32 = ~a_.i32;
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])); i++) {
-		r_.i32[i] = ~(a_.i32[i]);
-	}
-#endif
-
-	return simde__m128_from_private(r_);
-#endif
-}
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128 simde_x_mm_select_ps(simde__m128 a, simde__m128 b, simde__m128 mask)
-{
-/* This function is for when you want to blend two elements together
-   * according to a mask.  It is similar to _mm_blendv_ps, except that
-   * it is undefined whether the blend is based on the highest bit in
-   * each lane (like blendv) or just bitwise operations.  This allows
-   * us to implement the function efficiently everywhere.
-   *
-   * Basically, you promise that all the lanes in mask are either 0 or
-   * ~0. */
-#if defined(SIMDE_X86_SSE4_1_NATIVE)
-	return _mm_blendv_ps(a, b, mask);
-#else
-	simde__m128_private r_, a_ = simde__m128_to_private(a),
-				b_ = simde__m128_to_private(b),
-				mask_ = simde__m128_to_private(mask);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_i32 = vbslq_s32(mask_.neon_u32, b_.neon_i32, a_.neon_i32);
-#elif defined(SIMDE_WASM_SIMD128_NATIVE)
-	r_.wasm_v128 = wasm_v128_bitselect(b_.wasm_v128, a_.wasm_v128,
-					   mask_.wasm_v128);
-#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
-	r_.altivec_i32 =
-		vec_sel(a_.altivec_i32, b_.altivec_i32, mask_.altivec_u32);
-#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
-	r_.i32 = a_.i32 ^ ((a_.i32 ^ b_.i32) & mask_.i32);
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])); i++) {
-		r_.i32[i] = a_.i32[i] ^
-			    ((a_.i32[i] ^ b_.i32[i]) & mask_.i32[i]);
-	}
-#endif
-
-	return simde__m128_from_private(r_);
-#endif
-}
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m64 simde_mm_avg_pu16(simde__m64 a, simde__m64 b)
-{
-#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)
-	return _mm_avg_pu16(a, b);
-#else
-	simde__m64_private r_, a_ = simde__m64_to_private(a),
-			       b_ = simde__m64_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_u16 = vrhadd_u16(b_.neon_u16, a_.neon_u16);
-#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) &&      \
-	defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && \
-	defined(SIMDE_CONVERT_VECTOR_)
-	uint32_t wa SIMDE_VECTOR(16);
-	uint32_t wb SIMDE_VECTOR(16);
-	uint32_t wr SIMDE_VECTOR(16);
-	SIMDE_CONVERT_VECTOR_(wa, a_.u16);
-	SIMDE_CONVERT_VECTOR_(wb, b_.u16);
-	wr = (wa + wb + 1) >> 1;
-	SIMDE_CONVERT_VECTOR_(r_.u16, wr);
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.u16) / sizeof(r_.u16[0])); i++) {
-		r_.u16[i] = (a_.u16[i] + b_.u16[i] + 1) >> 1;
-	}
-#endif
-
-	return simde__m64_from_private(r_);
-#endif
-}
-#define simde_m_pavgw(a, b) simde_mm_avg_pu16(a, b)
-#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
-#define _mm_avg_pu16(a, b) simde_mm_avg_pu16(a, b)
-#define _m_pavgw(a, b) simde_mm_avg_pu16(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m64 simde_mm_avg_pu8(simde__m64 a, simde__m64 b)
-{
-#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)
-	return _mm_avg_pu8(a, b);
-#else
-	simde__m64_private r_, a_ = simde__m64_to_private(a),
-			       b_ = simde__m64_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_u8 = vrhadd_u8(b_.neon_u8, a_.neon_u8);
-#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) &&      \
-	defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && \
-	defined(SIMDE_CONVERT_VECTOR_)
-	uint16_t wa SIMDE_VECTOR(16);
-	uint16_t wb SIMDE_VECTOR(16);
-	uint16_t wr SIMDE_VECTOR(16);
-	SIMDE_CONVERT_VECTOR_(wa, a_.u8);
-	SIMDE_CONVERT_VECTOR_(wb, b_.u8);
-	wr = (wa + wb + 1) >> 1;
-	SIMDE_CONVERT_VECTOR_(r_.u8, wr);
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.u8) / sizeof(r_.u8[0])); i++) {
-		r_.u8[i] = (a_.u8[i] + b_.u8[i] + 1) >> 1;
-	}
-#endif
-
-	return simde__m64_from_private(r_);
-#endif
-}
-#define simde_m_pavgb(a, b) simde_mm_avg_pu8(a, b)
-#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
-#define _mm_avg_pu8(a, b) simde_mm_avg_pu8(a, b)
-#define _m_pavgb(a, b) simde_mm_avg_pu8(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128 simde_x_mm_abs_ps(simde__m128 a)
-{
-#if defined(SIMDE_X86_AVX512F_NATIVE) && \
-	(!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7, 1, 0))
-	return _mm512_castps512_ps128(_mm512_abs_ps(_mm512_castps128_ps512(a)));
-#else
-	simde__m128_private r_, a_ = simde__m128_to_private(a);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_f32 = vabsq_f32(a_.neon_f32);
-#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
-	r_.altivec_f32 = vec_abs(a_.altivec_f32);
-#elif defined(SIMDE_WASM_SIMD128_NATIVE)
-	r_.wasm_v128 = wasm_f32x4_abs(a_.wasm_v128);
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.f32) / sizeof(r_.f32[0])); i++) {
-		r_.f32[i] = simde_math_fabsf(a_.f32[i]);
-	}
-#endif
-
-	return simde__m128_from_private(r_);
-#endif
-}
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128 simde_mm_cmpeq_ps(simde__m128 a, simde__m128 b)
-{
-#if defined(SIMDE_X86_SSE_NATIVE)
-	return _mm_cmpeq_ps(a, b);
-#else
-	simde__m128_private r_, a_ = simde__m128_to_private(a),
-				b_ = simde__m128_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_u32 = vceqq_f32(a_.neon_f32, b_.neon_f32);
-#elif defined(SIMDE_WASM_SIMD128_NATIVE)
-	r_.wasm_v128 = wasm_f32x4_eq(a_.wasm_v128, b_.wasm_v128);
-#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
-	r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(
-		SIMDE_POWER_ALTIVEC_VECTOR(float),
-		vec_cmpeq(a_.altivec_f32, b_.altivec_f32));
-#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
-	r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), a_.f32 == b_.f32);
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.f32) / sizeof(r_.f32[0])); i++) {
-		r_.u32[i] = (a_.f32[i] == b_.f32[i]) ? ~UINT32_C(0)
-						     : UINT32_C(0);
-	}
-#endif
-
-	return simde__m128_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
-#define _mm_cmpeq_ps(a, b) simde_mm_cmpeq_ps((a), (b))
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128 simde_mm_cmpeq_ss(simde__m128 a, simde__m128 b)
-{
-#if defined(SIMDE_X86_SSE_NATIVE)
-	return _mm_cmpeq_ss(a, b);
-#elif (SIMDE_NATURAL_VECTOR_SIZE > 0)
-	return simde_mm_move_ss(a, simde_mm_cmpeq_ps(a, b));
-#else
-	simde__m128_private r_, a_ = simde__m128_to_private(a),
-				b_ = simde__m128_to_private(b);
-
-	r_.u32[0] = (a_.f32[0] == b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0);
-	SIMDE_VECTORIZE
-	for (size_t i = 1; i < (sizeof(r_.f32) / sizeof(r_.f32[0])); i++) {
-		r_.u32[i] = a_.u32[i];
-	}
-
-	return simde__m128_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
-#define _mm_cmpeq_ss(a, b) simde_mm_cmpeq_ss((a), (b))
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128 simde_mm_cmpge_ps(simde__m128 a, simde__m128 b)
-{
-#if defined(SIMDE_X86_SSE_NATIVE)
-	return _mm_cmpge_ps(a, b);
-#else
-	simde__m128_private r_, a_ = simde__m128_to_private(a),
-				b_ = simde__m128_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_u32 = vcgeq_f32(a_.neon_f32, b_.neon_f32);
-#elif defined(SIMDE_WASM_SIMD128_NATIVE)
-	r_.wasm_v128 = wasm_f32x4_ge(a_.wasm_v128, b_.wasm_v128);
-#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
-	r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(
-		SIMDE_POWER_ALTIVEC_VECTOR(float),
-		vec_cmpge(a_.altivec_f32, b_.altivec_f32));
-#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
-	r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 >= b_.f32));
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.f32) / sizeof(r_.f32[0])); i++) {
-		r_.u32[i] = (a_.f32[i] >= b_.f32[i]) ? ~UINT32_C(0)
-						     : UINT32_C(0);
-	}
-#endif
-
-	return simde__m128_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
-#define _mm_cmpge_ps(a, b) simde_mm_cmpge_ps((a), (b))
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128 simde_mm_cmpge_ss(simde__m128 a, simde__m128 b)
-{
-#if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI)
-	return _mm_cmpge_ss(a, b);
-#elif (SIMDE_NATURAL_VECTOR_SIZE > 0)
-	return simde_mm_move_ss(a, simde_mm_cmpge_ps(a, b));
-#else
-	simde__m128_private r_, a_ = simde__m128_to_private(a),
-				b_ = simde__m128_to_private(b);
-
-	r_.u32[0] = (a_.f32[0] >= b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0);
-	SIMDE_VECTORIZE
-	for (size_t i = 1; i < (sizeof(r_.f32) / sizeof(r_.f32[0])); i++) {
-		r_.u32[i] = a_.u32[i];
-	}
-
-	return simde__m128_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
-#define _mm_cmpge_ss(a, b) simde_mm_cmpge_ss((a), (b))
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128 simde_mm_cmpgt_ps(simde__m128 a, simde__m128 b)
-{
-#if defined(SIMDE_X86_SSE_NATIVE)
-	return _mm_cmpgt_ps(a, b);
-#else
-	simde__m128_private r_, a_ = simde__m128_to_private(a),
-				b_ = simde__m128_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_u32 = vcgtq_f32(a_.neon_f32, b_.neon_f32);
-#elif defined(SIMDE_WASM_SIMD128_NATIVE)
-	r_.wasm_v128 = wasm_f32x4_gt(a_.wasm_v128, b_.wasm_v128);
-#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
-	r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(
-		SIMDE_POWER_ALTIVEC_VECTOR(float),
-		vec_cmpgt(a_.altivec_f32, b_.altivec_f32));
-#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
-	r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 > b_.f32));
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.f32) / sizeof(r_.f32[0])); i++) {
-		r_.u32[i] = (a_.f32[i] > b_.f32[i]) ? ~UINT32_C(0)
-						    : UINT32_C(0);
-	}
-#endif
-
-	return simde__m128_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
-#define _mm_cmpgt_ps(a, b) simde_mm_cmpgt_ps((a), (b))
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128 simde_mm_cmpgt_ss(simde__m128 a, simde__m128 b)
-{
-#if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI)
-	return _mm_cmpgt_ss(a, b);
-#elif (SIMDE_NATURAL_VECTOR_SIZE > 0)
-	return simde_mm_move_ss(a, simde_mm_cmpgt_ps(a, b));
-#else
-	simde__m128_private r_, a_ = simde__m128_to_private(a),
-				b_ = simde__m128_to_private(b);
-
-	r_.u32[0] = (a_.f32[0] > b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0);
-	SIMDE_VECTORIZE
-	for (size_t i = 1; i < (sizeof(r_.f32) / sizeof(r_.f32[0])); i++) {
-		r_.u32[i] = a_.u32[i];
-	}
-
-	return simde__m128_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
-#define _mm_cmpgt_ss(a, b) simde_mm_cmpgt_ss((a), (b))
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128 simde_mm_cmple_ps(simde__m128 a, simde__m128 b)
-{
-#if defined(SIMDE_X86_SSE_NATIVE)
-	return _mm_cmple_ps(a, b);
-#else
-	simde__m128_private r_, a_ = simde__m128_to_private(a),
-				b_ = simde__m128_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_u32 = vcleq_f32(a_.neon_f32, b_.neon_f32);
-#elif defined(SIMDE_WASM_SIMD128_NATIVE)
-	r_.wasm_v128 = wasm_f32x4_le(a_.wasm_v128, b_.wasm_v128);
-#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
-	r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(
-		SIMDE_POWER_ALTIVEC_VECTOR(float),
-		vec_cmple(a_.altivec_f32, b_.altivec_f32));
-#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
-	r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 <= b_.f32));
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.f32) / sizeof(r_.f32[0])); i++) {
-		r_.u32[i] = (a_.f32[i] <= b_.f32[i]) ? ~UINT32_C(0)
-						     : UINT32_C(0);
-	}
-#endif
-
-	return simde__m128_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
-#define _mm_cmple_ps(a, b) simde_mm_cmple_ps((a), (b))
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128 simde_mm_cmple_ss(simde__m128 a, simde__m128 b)
-{
-#if defined(SIMDE_X86_SSE_NATIVE)
-	return _mm_cmple_ss(a, b);
-#elif (SIMDE_NATURAL_VECTOR_SIZE > 0)
-	return simde_mm_move_ss(a, simde_mm_cmple_ps(a, b));
-#else
-	simde__m128_private r_, a_ = simde__m128_to_private(a),
-				b_ = simde__m128_to_private(b);
-
-	r_.u32[0] = (a_.f32[0] <= b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0);
-	SIMDE_VECTORIZE
-	for (size_t i = 1; i < (sizeof(r_.f32) / sizeof(r_.f32[0])); i++) {
-		r_.u32[i] = a_.u32[i];
-	}
-
-	return simde__m128_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
-#define _mm_cmple_ss(a, b) simde_mm_cmple_ss((a), (b))
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128 simde_mm_cmplt_ps(simde__m128 a, simde__m128 b)
-{
-#if defined(SIMDE_X86_SSE_NATIVE)
-	return _mm_cmplt_ps(a, b);
-#else
-	simde__m128_private r_, a_ = simde__m128_to_private(a),
-				b_ = simde__m128_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_u32 = vcltq_f32(a_.neon_f32, b_.neon_f32);
-#elif defined(SIMDE_WASM_SIMD128_NATIVE)
-	r_.wasm_v128 = wasm_f32x4_lt(a_.wasm_v128, b_.wasm_v128);
-#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
-	r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(
-		SIMDE_POWER_ALTIVEC_VECTOR(float),
-		vec_cmplt(a_.altivec_f32, b_.altivec_f32));
-#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
-	r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 < b_.f32));
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.f32) / sizeof(r_.f32[0])); i++) {
-		r_.u32[i] = (a_.f32[i] < b_.f32[i]) ? ~UINT32_C(0)
-						    : UINT32_C(0);
-	}
-#endif
-
-	return simde__m128_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
-#define _mm_cmplt_ps(a, b) simde_mm_cmplt_ps((a), (b))
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128 simde_mm_cmplt_ss(simde__m128 a, simde__m128 b)
-{
-#if defined(SIMDE_X86_SSE_NATIVE)
-	return _mm_cmplt_ss(a, b);
-#elif (SIMDE_NATURAL_VECTOR_SIZE > 0)
-	return simde_mm_move_ss(a, simde_mm_cmplt_ps(a, b));
-#else
-	simde__m128_private r_, a_ = simde__m128_to_private(a),
-				b_ = simde__m128_to_private(b);
-
-	r_.u32[0] = (a_.f32[0] < b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0);
-	SIMDE_VECTORIZE
-	for (size_t i = 1; i < (sizeof(r_.f32) / sizeof(r_.f32[0])); i++) {
-		r_.u32[i] = a_.u32[i];
-	}
-
-	return simde__m128_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
-#define _mm_cmplt_ss(a, b) simde_mm_cmplt_ss((a), (b))
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128 simde_mm_cmpneq_ps(simde__m128 a, simde__m128 b)
-{
-#if defined(SIMDE_X86_SSE_NATIVE)
-	return _mm_cmpneq_ps(a, b);
-#else
-	simde__m128_private r_, a_ = simde__m128_to_private(a),
-				b_ = simde__m128_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_u32 = vmvnq_u32(vceqq_f32(a_.neon_f32, b_.neon_f32));
-#elif defined(SIMDE_WASM_SIMD128_NATIVE)
-	r_.wasm_v128 = wasm_f32x4_ne(a_.wasm_v128, b_.wasm_v128);
-#elif defined(SIMDE_POWER_ALTIVEC_P9_NATIVE) && SIMDE_ARCH_POWER_CHECK(900) && \
-	!defined(HEDLEY_IBM_VERSION)
-	/* vec_cmpne(SIMDE_POWER_ALTIVEC_VECTOR(float), SIMDE_POWER_ALTIVEC_VECTOR(float))
-        is missing from XL C/C++ v16.1.1,
-        though the documentation (table 89 on page 432 of the IBM XL C/C++ for
-        Linux Compiler Reference, Version 16.1.1) shows that it should be
-        present.  Both GCC and clang support it. */
-	r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(
-		SIMDE_POWER_ALTIVEC_VECTOR(float),
-		vec_cmpne(a_.altivec_f32, b_.altivec_f32));
-#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
-	r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(
-		SIMDE_POWER_ALTIVEC_VECTOR(float),
-		vec_cmpeq(a_.altivec_f32, b_.altivec_f32));
-	r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(
-		SIMDE_POWER_ALTIVEC_VECTOR(float),
-		vec_nor(r_.altivec_f32, r_.altivec_f32));
-#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
-	r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 != b_.f32));
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.f32) / sizeof(r_.f32[0])); i++) {
-		r_.u32[i] = (a_.f32[i] != b_.f32[i]) ? ~UINT32_C(0)
-						     : UINT32_C(0);
-	}
-#endif
-
-	return simde__m128_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
-#define _mm_cmpneq_ps(a, b) simde_mm_cmpneq_ps((a), (b))
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128 simde_mm_cmpneq_ss(simde__m128 a, simde__m128 b)
-{
-#if defined(SIMDE_X86_SSE_NATIVE)
-	return _mm_cmpneq_ss(a, b);
-#elif (SIMDE_NATURAL_VECTOR_SIZE > 0)
-	return simde_mm_move_ss(a, simde_mm_cmpneq_ps(a, b));
-#else
-	simde__m128_private r_, a_ = simde__m128_to_private(a),
-				b_ = simde__m128_to_private(b);
-
-	r_.u32[0] = (a_.f32[0] != b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0);
-	SIMDE_VECTORIZE
-	for (size_t i = 1; i < (sizeof(r_.f32) / sizeof(r_.f32[0])); i++) {
-		r_.u32[i] = a_.u32[i];
-	}
-
-	return simde__m128_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
-#define _mm_cmpneq_ss(a, b) simde_mm_cmpneq_ss((a), (b))
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128 simde_mm_cmpnge_ps(simde__m128 a, simde__m128 b)
-{
-	return simde_mm_cmplt_ps(a, b);
-}
-#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
-#define _mm_cmpnge_ps(a, b) simde_mm_cmpnge_ps((a), (b))
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128 simde_mm_cmpnge_ss(simde__m128 a, simde__m128 b)
-{
-	return simde_mm_cmplt_ss(a, b);
-}
-#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
-#define _mm_cmpnge_ss(a, b) simde_mm_cmpnge_ss((a), (b))
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128 simde_mm_cmpngt_ps(simde__m128 a, simde__m128 b)
-{
-	return simde_mm_cmple_ps(a, b);
-}
-#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
-#define _mm_cmpngt_ps(a, b) simde_mm_cmpngt_ps((a), (b))
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128 simde_mm_cmpngt_ss(simde__m128 a, simde__m128 b)
-{
-	return simde_mm_cmple_ss(a, b);
-}
-#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
-#define _mm_cmpngt_ss(a, b) simde_mm_cmpngt_ss((a), (b))
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128 simde_mm_cmpnle_ps(simde__m128 a, simde__m128 b)
-{
-	return simde_mm_cmpgt_ps(a, b);
-}
-#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
-#define _mm_cmpnle_ps(a, b) simde_mm_cmpnle_ps((a), (b))
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128 simde_mm_cmpnle_ss(simde__m128 a, simde__m128 b)
-{
-	return simde_mm_cmpgt_ss(a, b);
-}
-#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
-#define _mm_cmpnle_ss(a, b) simde_mm_cmpnle_ss((a), (b))
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128 simde_mm_cmpnlt_ps(simde__m128 a, simde__m128 b)
-{
-	return simde_mm_cmpge_ps(a, b);
-}
-#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
-#define _mm_cmpnlt_ps(a, b) simde_mm_cmpnlt_ps((a), (b))
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128 simde_mm_cmpnlt_ss(simde__m128 a, simde__m128 b)
-{
-	return simde_mm_cmpge_ss(a, b);
-}
-#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
-#define _mm_cmpnlt_ss(a, b) simde_mm_cmpnlt_ss((a), (b))
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128 simde_mm_cmpord_ps(simde__m128 a, simde__m128 b)
-{
-#if defined(SIMDE_X86_SSE_NATIVE)
-	return _mm_cmpord_ps(a, b);
-#elif defined(SIMDE_WASM_SIMD128_NATIVE)
-	return wasm_v128_and(wasm_f32x4_eq(a, a), wasm_f32x4_eq(b, b));
-#else
-	simde__m128_private r_, a_ = simde__m128_to_private(a),
-				b_ = simde__m128_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	/* Note: NEON does not have ordered compare builtin
-        Need to compare a eq a and b eq b to check for NaN
-        Do AND of results to get final */
-	uint32x4_t ceqaa = vceqq_f32(a_.neon_f32, a_.neon_f32);
-	uint32x4_t ceqbb = vceqq_f32(b_.neon_f32, b_.neon_f32);
-	r_.neon_u32 = vandq_u32(ceqaa, ceqbb);
-#elif defined(SIMDE_WASM_SIMD128_NATIVE)
-	r_.wasm_v128 = wasm_v128_and(wasm_f32x4_eq(a_.wasm_v128, a_.wasm_v128),
-				     wasm_f32x4_eq(b_.wasm_v128, b_.wasm_v128));
-#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
-	r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(
-		SIMDE_POWER_ALTIVEC_VECTOR(float),
-		vec_and(vec_cmpeq(a_.altivec_f32, a_.altivec_f32),
-			vec_cmpeq(b_.altivec_f32, b_.altivec_f32)));
-#elif defined(simde_math_isnanf)
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.f32) / sizeof(r_.f32[0])); i++) {
-		r_.u32[i] = (simde_math_isnanf(a_.f32[i]) ||
-			     simde_math_isnanf(b_.f32[i]))
-				    ? UINT32_C(0)
-				    : ~UINT32_C(0);
-	}
-#else
-	HEDLEY_UNREACHABLE();
-#endif
-
-	return simde__m128_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
-#define _mm_cmpord_ps(a, b) simde_mm_cmpord_ps((a), (b))
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128 simde_mm_cmpunord_ps(simde__m128 a, simde__m128 b)
-{
-#if defined(SIMDE_X86_SSE_NATIVE)
-	return _mm_cmpunord_ps(a, b);
-#elif defined(SIMDE_WASM_SIMD128_NATIVE)
-	return wasm_v128_or(wasm_f32x4_ne(a, a), wasm_f32x4_ne(b, b));
-#else
-	simde__m128_private r_, a_ = simde__m128_to_private(a),
-				b_ = simde__m128_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	uint32x4_t ceqaa = vceqq_f32(a_.neon_f32, a_.neon_f32);
-	uint32x4_t ceqbb = vceqq_f32(b_.neon_f32, b_.neon_f32);
-	r_.neon_u32 = vmvnq_u32(vandq_u32(ceqaa, ceqbb));
-#elif defined(SIMDE_WASM_SIMD128_NATIVE)
-	r_.wasm_v128 = wasm_v128_or(wasm_f32x4_ne(a_.wasm_v128, a_.wasm_v128),
-				    wasm_f32x4_ne(b_.wasm_v128, b_.wasm_v128));
-#elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE)
-	r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(
-		SIMDE_POWER_ALTIVEC_VECTOR(float),
-		vec_nand(vec_cmpeq(a_.altivec_f32, a_.altivec_f32),
-			 vec_cmpeq(b_.altivec_f32, b_.altivec_f32)));
-#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
-	r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(
-		SIMDE_POWER_ALTIVEC_VECTOR(float),
-		vec_and(vec_cmpeq(a_.altivec_f32, a_.altivec_f32),
-			vec_cmpeq(b_.altivec_f32, b_.altivec_f32)));
-	r_.altivec_f32 = vec_nor(r_.altivec_f32, r_.altivec_f32);
-#elif defined(simde_math_isnanf)
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.f32) / sizeof(r_.f32[0])); i++) {
-		r_.u32[i] = (simde_math_isnanf(a_.f32[i]) ||
-			     simde_math_isnanf(b_.f32[i]))
-				    ? ~UINT32_C(0)
-				    : UINT32_C(0);
-	}
-#else
-	HEDLEY_UNREACHABLE();
-#endif
-
-	return simde__m128_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
-#define _mm_cmpunord_ps(a, b) simde_mm_cmpunord_ps((a), (b))
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128 simde_mm_cmpunord_ss(simde__m128 a, simde__m128 b)
-{
-#if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI)
-	return _mm_cmpunord_ss(a, b);
-#elif (SIMDE_NATURAL_VECTOR_SIZE > 0)
-	return simde_mm_move_ss(a, simde_mm_cmpunord_ps(a, b));
-#else
-	simde__m128_private r_, a_ = simde__m128_to_private(a),
-				b_ = simde__m128_to_private(b);
-
-#if defined(simde_math_isnanf)
-	r_.u32[0] =
-		(simde_math_isnanf(a_.f32[0]) || simde_math_isnanf(b_.f32[0]))
-			? ~UINT32_C(0)
-			: UINT32_C(0);
-	SIMDE_VECTORIZE
-	for (size_t i = 1; i < (sizeof(r_.u32) / sizeof(r_.u32[0])); i++) {
-		r_.u32[i] = a_.u32[i];
-	}
-#else
-	HEDLEY_UNREACHABLE();
-#endif
-
-	return simde__m128_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
-#define _mm_cmpunord_ss(a, b) simde_mm_cmpunord_ss((a), (b))
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-int simde_mm_comieq_ss(simde__m128 a, simde__m128 b)
-{
-#if defined(SIMDE_X86_SSE_NATIVE)
-	return _mm_comieq_ss(a, b);
-#else
-	simde__m128_private a_ = simde__m128_to_private(a),
-			    b_ = simde__m128_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32);
-	uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32);
-	uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan));
-	uint32x4_t a_eq_b = vceqq_f32(a_.neon_f32, b_.neon_f32);
-	return !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_eq_b), 0) != 0);
-#else
-	return a_.f32[0] == b_.f32[0];
-#endif
-#endif
-}
-#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
-#define _mm_comieq_ss(a, b) simde_mm_comieq_ss((a), (b))
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-int simde_mm_comige_ss(simde__m128 a, simde__m128 b)
-{
-#if defined(SIMDE_X86_SSE_NATIVE)
-	return _mm_comige_ss(a, b);
-#else
-	simde__m128_private a_ = simde__m128_to_private(a),
-			    b_ = simde__m128_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32);
-	uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32);
-	uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan);
-	uint32x4_t a_ge_b = vcgeq_f32(a_.neon_f32, b_.neon_f32);
-	return !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_ge_b), 0) != 0);
-#else
-	return a_.f32[0] >= b_.f32[0];
-#endif
-#endif
-}
-#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
-#define _mm_comige_ss(a, b) simde_mm_comige_ss((a), (b))
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-int simde_mm_comigt_ss(simde__m128 a, simde__m128 b)
-{
-#if defined(SIMDE_X86_SSE_NATIVE)
-	return _mm_comigt_ss(a, b);
-#else
-	simde__m128_private a_ = simde__m128_to_private(a),
-			    b_ = simde__m128_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32);
-	uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32);
-	uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan);
-	uint32x4_t a_gt_b = vcgtq_f32(a_.neon_f32, b_.neon_f32);
-	return !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_gt_b), 0) != 0);
-#else
-	return a_.f32[0] > b_.f32[0];
-#endif
-#endif
-}
-#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
-#define _mm_comigt_ss(a, b) simde_mm_comigt_ss((a), (b))
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-int simde_mm_comile_ss(simde__m128 a, simde__m128 b)
-{
-#if defined(SIMDE_X86_SSE_NATIVE)
-	return _mm_comile_ss(a, b);
-#else
-	simde__m128_private a_ = simde__m128_to_private(a),
-			    b_ = simde__m128_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32);
-	uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32);
-	uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan));
-	uint32x4_t a_le_b = vcleq_f32(a_.neon_f32, b_.neon_f32);
-	return !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_le_b), 0) != 0);
-#else
-	return a_.f32[0] <= b_.f32[0];
-#endif
-#endif
-}
-#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
-#define _mm_comile_ss(a, b) simde_mm_comile_ss((a), (b))
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-int simde_mm_comilt_ss(simde__m128 a, simde__m128 b)
-{
-#if defined(SIMDE_X86_SSE_NATIVE)
-	return _mm_comilt_ss(a, b);
-#else
-	simde__m128_private a_ = simde__m128_to_private(a),
-			    b_ = simde__m128_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32);
-	uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32);
-	uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan));
-	uint32x4_t a_lt_b = vcltq_f32(a_.neon_f32, b_.neon_f32);
-	return !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_lt_b), 0) != 0);
-#else
-	return a_.f32[0] < b_.f32[0];
-#endif
-#endif
-}
-#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
-#define _mm_comilt_ss(a, b) simde_mm_comilt_ss((a), (b))
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-int simde_mm_comineq_ss(simde__m128 a, simde__m128 b)
-{
-#if defined(SIMDE_X86_SSE_NATIVE)
-	return _mm_comineq_ss(a, b);
-#else
-	simde__m128_private a_ = simde__m128_to_private(a),
-			    b_ = simde__m128_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32);
-	uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32);
-	uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan);
-	uint32x4_t a_neq_b = vmvnq_u32(vceqq_f32(a_.neon_f32, b_.neon_f32));
-	return !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_neq_b), 0) != 0);
-#else
-	return a_.f32[0] != b_.f32[0];
-#endif
-#endif
-}
-#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
-#define _mm_comineq_ss(a, b) simde_mm_comineq_ss((a), (b))
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128 simde_x_mm_copysign_ps(simde__m128 dest, simde__m128 src)
-{
-	simde__m128_private r_, dest_ = simde__m128_to_private(dest),
-				src_ = simde__m128_to_private(src);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	const uint32x4_t sign_pos =
-		vreinterpretq_u32_f32(vdupq_n_f32(-SIMDE_FLOAT32_C(0.0)));
-	r_.neon_u32 = vbslq_u32(sign_pos, src_.neon_u32, dest_.neon_u32);
-#elif defined(SIMDE_WASM_SIMD128_NATIVE)
-	const v128_t sign_pos = wasm_f32x4_splat(-0.0f);
-	r_.wasm_v128 =
-		wasm_v128_bitselect(src_.wasm_v128, dest_.wasm_v128, sign_pos);
-#elif defined(SIMDE_POWER_ALTIVEC_P9_NATIVE)
-#if !defined(HEDLEY_IBM_VERSION)
-	r_.altivec_f32 = vec_cpsgn(dest_.altivec_f32, src_.altivec_f32);
-#else
-	r_.altivec_f32 = vec_cpsgn(src_.altivec_f32, dest_.altivec_f32);
-#endif
-#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
-	const SIMDE_POWER_ALTIVEC_VECTOR(unsigned int)
-		sign_pos = HEDLEY_REINTERPRET_CAST(
-			SIMDE_POWER_ALTIVEC_VECTOR(unsigned int),
-			vec_splats(-0.0f));
-	r_.altivec_f32 = vec_sel(dest_.altivec_f32, src_.altivec_f32, sign_pos);
-#elif defined(SIMDE_IEEE754_STORAGE)
-	(void)src_;
-	(void)dest_;
-	simde__m128 sign_pos = simde_mm_set1_ps(-0.0f);
-	r_ = simde__m128_to_private(simde_mm_xor_ps(
-		dest, simde_mm_and_ps(simde_mm_xor_ps(dest, src), sign_pos)));
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.f32) / sizeof(r_.f32[0])); i++) {
-		r_.f32[i] = simde_math_copysignf(dest_.f32[i], src_.f32[i]);
-	}
-#endif
-
-	return simde__m128_from_private(r_);
-}
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128 simde_x_mm_xorsign_ps(simde__m128 dest, simde__m128 src)
-{
-	return simde_mm_xor_ps(simde_mm_and_ps(simde_mm_set1_ps(-0.0f), src),
-			       dest);
-}
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128 simde_mm_cvt_pi2ps(simde__m128 a, simde__m64 b)
-{
-#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)
-	return _mm_cvt_pi2ps(a, b);
-#else
-	simde__m128_private r_, a_ = simde__m128_to_private(a);
-	simde__m64_private b_ = simde__m64_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_f32 = vcombine_f32(vcvt_f32_s32(b_.neon_i32),
-				   vget_high_f32(a_.neon_f32));
-#elif defined(SIMDE_CONVERT_VECTOR_)
-	SIMDE_CONVERT_VECTOR_(r_.m64_private[0].f32, b_.i32);
-	r_.m64_private[1] = a_.m64_private[1];
-#else
-	r_.f32[0] = (simde_float32)b_.i32[0];
-	r_.f32[1] = (simde_float32)b_.i32[1];
-	r_.i32[2] = a_.i32[2];
-	r_.i32[3] = a_.i32[3];
-#endif
-
-	return simde__m128_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
-#define _mm_cvt_pi2ps(a, b) simde_mm_cvt_pi2ps((a), (b))
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m64 simde_mm_cvt_ps2pi(simde__m128 a)
-{
-#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)
-	return _mm_cvt_ps2pi(a);
-#else
-	simde__m64_private r_;
-	simde__m128_private a_;
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	a_ = simde__m128_to_private(
-		simde_mm_round_ps(a, SIMDE_MM_FROUND_CUR_DIRECTION));
-	r_.neon_i32 = vcvt_s32_f32(vget_low_f32(a_.neon_f32));
-#elif defined(SIMDE_CONVERT_VECTOR_) && SIMDE_NATURAL_VECTOR_SIZE_GE(128)
-	a_ = simde__m128_to_private(
-		simde_mm_round_ps(a, SIMDE_MM_FROUND_CUR_DIRECTION));
-	SIMDE_CONVERT_VECTOR_(r_.i32, a_.m64_private[0].f32);
-#else
-	a_ = simde__m128_to_private(a);
-
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])); i++) {
-		r_.i32[i] = HEDLEY_STATIC_CAST(
-			int32_t, simde_math_nearbyintf(a_.f32[i]));
-	}
-#endif
-
-	return simde__m64_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
-#define _mm_cvt_ps2pi(a) simde_mm_cvt_ps2pi((a))
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128 simde_mm_cvt_si2ss(simde__m128 a, int32_t b)
-{
-#if defined(SIMDE_X86_SSE_NATIVE)
-	return _mm_cvt_si2ss(a, b);
-#else
-	simde__m128_private r_, a_ = simde__m128_to_private(a);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_f32 =
-		vsetq_lane_f32(HEDLEY_STATIC_CAST(float, b), a_.neon_f32, 0);
-#else
-	r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b);
-	r_.i32[1] = a_.i32[1];
-	r_.i32[2] = a_.i32[2];
-	r_.i32[3] = a_.i32[3];
-#endif
-
-	return simde__m128_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
-#define _mm_cvt_si2ss(a, b) simde_mm_cvt_si2ss((a), b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-int32_t simde_mm_cvt_ss2si(simde__m128 a)
-{
-#if defined(SIMDE_X86_SSE_NATIVE)
-	return _mm_cvt_ss2si(a);
-#elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_BUG_GCC_95399)
-	return vgetq_lane_s32(vcvtnq_s32_f32(simde__m128_to_neon_f32(a)), 0);
-#else
-	simde__m128_private a_ = simde__m128_to_private(
-		simde_mm_round_ps(a, SIMDE_MM_FROUND_CUR_DIRECTION));
-	return SIMDE_CONVERT_FTOI(int32_t, a_.f32[0]);
-#endif
-}
-#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
-#define _mm_cvt_ss2si(a) simde_mm_cvt_ss2si((a))
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128 simde_mm_cvtpi16_ps(simde__m64 a)
-{
-#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)
-	return _mm_cvtpi16_ps(a);
-#else
-	simde__m128_private r_;
-	simde__m64_private a_ = simde__m64_to_private(a);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_f32 = vcvtq_f32_s32(vmovl_s16(a_.neon_i16));
-#elif defined(SIMDE_CONVERT_VECTOR_)
-	SIMDE_CONVERT_VECTOR_(r_.f32, a_.i16);
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.f32) / sizeof(r_.f32[0])); i++) {
-		simde_float32 v = a_.i16[i];
-		r_.f32[i] = v;
-	}
-#endif
-
-	return simde__m128_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
-#define _mm_cvtpi16_ps(a) simde_mm_cvtpi16_ps(a)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128 simde_mm_cvtpi32_ps(simde__m128 a, simde__m64 b)
-{
-#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)
-	return _mm_cvtpi32_ps(a, b);
-#else
-	simde__m128_private r_, a_ = simde__m128_to_private(a);
-	simde__m64_private b_ = simde__m64_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_f32 = vcombine_f32(vcvt_f32_s32(b_.neon_i32),
-				   vget_high_f32(a_.neon_f32));
-#elif defined(SIMDE_CONVERT_VECTOR_)
-	SIMDE_CONVERT_VECTOR_(r_.m64_private[0].f32, b_.i32);
-	r_.m64_private[1] = a_.m64_private[1];
-#else
-	r_.f32[0] = (simde_float32)b_.i32[0];
-	r_.f32[1] = (simde_float32)b_.i32[1];
-	r_.i32[2] = a_.i32[2];
-	r_.i32[3] = a_.i32[3];
-#endif
-
-	return simde__m128_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
-#define _mm_cvtpi32_ps(a, b) simde_mm_cvtpi32_ps((a), b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128 simde_mm_cvtpi32x2_ps(simde__m64 a, simde__m64 b)
-{
-#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)
-	return _mm_cvtpi32x2_ps(a, b);
-#else
-	simde__m128_private r_;
-	simde__m64_private a_ = simde__m64_to_private(a),
-			   b_ = simde__m64_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_f32 = vcvtq_f32_s32(vcombine_s32(a_.neon_i32, b_.neon_i32));
-#elif defined(SIMDE_CONVERT_VECTOR_)
-	SIMDE_CONVERT_VECTOR_(r_.m64_private[0].f32, a_.i32);
-	SIMDE_CONVERT_VECTOR_(r_.m64_private[1].f32, b_.i32);
-#else
-	r_.f32[0] = (simde_float32)a_.i32[0];
-	r_.f32[1] = (simde_float32)a_.i32[1];
-	r_.f32[2] = (simde_float32)b_.i32[0];
-	r_.f32[3] = (simde_float32)b_.i32[1];
-#endif
-
-	return simde__m128_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
-#define _mm_cvtpi32x2_ps(a, b) simde_mm_cvtpi32x2_ps(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128 simde_mm_cvtpi8_ps(simde__m64 a)
-{
-#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)
-	return _mm_cvtpi8_ps(a);
-#else
-	simde__m128_private r_;
-	simde__m64_private a_ = simde__m64_to_private(a);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_f32 =
-		vcvtq_f32_s32(vmovl_s16(vget_low_s16(vmovl_s8(a_.neon_i8))));
-#else
-	r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[0]);
-	r_.f32[1] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[1]);
-	r_.f32[2] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[2]);
-	r_.f32[3] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[3]);
-#endif
-
-	return simde__m128_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
-#define _mm_cvtpi8_ps(a) simde_mm_cvtpi8_ps(a)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m64 simde_mm_cvtps_pi16(simde__m128 a)
-{
-#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)
-	return _mm_cvtps_pi16(a);
-#else
-	simde__m64_private r_;
-	simde__m128_private a_ = simde__m128_to_private(a);
-
-#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_BUG_GCC_95399)
-	r_.neon_i16 = vmovn_s32(vcvtq_s32_f32(vrndiq_f32(a_.neon_f32)));
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) {
-		r_.i16[i] = SIMDE_CONVERT_FTOI(int16_t,
-					       simde_math_roundf(a_.f32[i]));
-	}
-#endif
-
-	return simde__m64_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
-#define _mm_cvtps_pi16(a) simde_mm_cvtps_pi16((a))
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m64 simde_mm_cvtps_pi32(simde__m128 a)
-{
-#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)
-	return _mm_cvtps_pi32(a);
-#else
-	simde__m64_private r_;
-	simde__m128_private a_ = simde__m128_to_private(a);
-
-#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && \
-	defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_BUG_GCC_95399)
-	r_.neon_i32 = vcvt_s32_f32(vget_low_f32(vrndiq_f32(a_.neon_f32)));
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])); i++) {
-		simde_float32 v = simde_math_roundf(a_.f32[i]);
-#if !defined(SIMDE_FAST_CONVERSION_RANGE)
-		r_.i32[i] =
-			((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) &&
-			 (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX)))
-				? SIMDE_CONVERT_FTOI(int32_t, v)
-				: INT32_MIN;
-#else
-		r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v);
-#endif
-	}
-#endif
-
-	return simde__m64_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
-#define _mm_cvtps_pi32(a) simde_mm_cvtps_pi32((a))
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m64 simde_mm_cvtps_pi8(simde__m128 a)
-{
-#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)
-	return _mm_cvtps_pi8(a);
-#else
-	simde__m64_private r_;
-	simde__m128_private a_ = simde__m128_to_private(a);
-
-#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_BUG_GCC_95471)
-	/* Clamp the input to [INT8_MIN, INT8_MAX], round, convert to i32, narrow to
-      * i16, combine with an all-zero vector of i16 (which will become the upper
-      * half), narrow to i8. */
-	float32x4_t max =
-		vdupq_n_f32(HEDLEY_STATIC_CAST(simde_float32, INT8_MAX));
-	float32x4_t min =
-		vdupq_n_f32(HEDLEY_STATIC_CAST(simde_float32, INT8_MIN));
-	float32x4_t values =
-		vrndnq_f32(vmaxq_f32(vminq_f32(max, a_.neon_f32), min));
-	r_.neon_i8 = vmovn_s16(
-		vcombine_s16(vmovn_s32(vcvtq_s32_f32(values)), vdup_n_s16(0)));
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(a_.f32) / sizeof(a_.f32[0])); i++) {
-		if (a_.f32[i] > HEDLEY_STATIC_CAST(simde_float32, INT8_MAX))
-			r_.i8[i] = INT8_MAX;
-		else if (a_.f32[i] <
-			 HEDLEY_STATIC_CAST(simde_float32, INT8_MIN))
-			r_.i8[i] = INT8_MIN;
-		else
-			r_.i8[i] = SIMDE_CONVERT_FTOI(
-				int8_t, simde_math_roundf(a_.f32[i]));
-	}
-	/* Note: the upper half is undefined */
-#endif
-
-	return simde__m64_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
-#define _mm_cvtps_pi8(a) simde_mm_cvtps_pi8((a))
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128 simde_mm_cvtpu16_ps(simde__m64 a)
-{
-#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)
-	return _mm_cvtpu16_ps(a);
-#else
-	simde__m128_private r_;
-	simde__m64_private a_ = simde__m64_to_private(a);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_f32 = vcvtq_f32_u32(vmovl_u16(a_.neon_u16));
-#elif defined(SIMDE_CONVERT_VECTOR_)
-	SIMDE_CONVERT_VECTOR_(r_.f32, a_.u16);
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.f32) / sizeof(r_.f32[0])); i++) {
-		r_.f32[i] = (simde_float32)a_.u16[i];
-	}
-#endif
-
-	return simde__m128_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
-#define _mm_cvtpu16_ps(a) simde_mm_cvtpu16_ps(a)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128 simde_mm_cvtpu8_ps(simde__m64 a)
-{
-#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)
-	return _mm_cvtpu8_ps(a);
-#else
-	simde__m128_private r_;
-	simde__m64_private a_ = simde__m64_to_private(a);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_f32 =
-		vcvtq_f32_u32(vmovl_u16(vget_low_u16(vmovl_u8(a_.neon_u8))));
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.f32) / sizeof(r_.f32[0])); i++) {
-		r_.f32[i] = HEDLEY_STATIC_CAST(simde_float32, a_.u8[i]);
-	}
-#endif
-
-	return simde__m128_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
-#define _mm_cvtpu8_ps(a) simde_mm_cvtpu8_ps(a)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128 simde_mm_cvtsi32_ss(simde__m128 a, int32_t b)
-{
-#if defined(SIMDE_X86_SSE_NATIVE)
-	return _mm_cvtsi32_ss(a, b);
-#else
-	simde__m128_private r_;
-	simde__m128_private a_ = simde__m128_to_private(a);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_f32 = vsetq_lane_f32(HEDLEY_STATIC_CAST(float32_t, b),
-				     a_.neon_f32, 0);
-#else
-	r_ = a_;
-	r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b);
-#endif
-
-	return simde__m128_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
-#define _mm_cvtsi32_ss(a, b) simde_mm_cvtsi32_ss((a), b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128 simde_mm_cvtsi64_ss(simde__m128 a, int64_t b)
-{
-#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_ARCH_AMD64)
-#if !defined(__PGI)
-	return _mm_cvtsi64_ss(a, b);
-#else
-	return _mm_cvtsi64x_ss(a, b);
-#endif
-#else
-	simde__m128_private r_;
-	simde__m128_private a_ = simde__m128_to_private(a);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_f32 = vsetq_lane_f32(HEDLEY_STATIC_CAST(float32_t, b),
-				     a_.neon_f32, 0);
-#else
-	r_ = a_;
-	r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b);
-#endif
-
-	return simde__m128_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
-#define _mm_cvtsi64_ss(a, b) simde_mm_cvtsi64_ss((a), b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde_float32 simde_mm_cvtss_f32(simde__m128 a)
-{
-#if defined(SIMDE_X86_SSE_NATIVE)
-	return _mm_cvtss_f32(a);
-#else
-	simde__m128_private a_ = simde__m128_to_private(a);
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	return vgetq_lane_f32(a_.neon_f32, 0);
-#else
-	return a_.f32[0];
-#endif
-#endif
-}
-#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
-#define _mm_cvtss_f32(a) simde_mm_cvtss_f32((a))
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-int32_t simde_mm_cvtss_si32(simde__m128 a)
-{
-	return simde_mm_cvt_ss2si(a);
-}
-#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
-#define _mm_cvtss_si32(a) simde_mm_cvtss_si32((a))
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-int64_t simde_mm_cvtss_si64(simde__m128 a)
-{
-#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_ARCH_AMD64)
-#if !defined(__PGI)
-	return _mm_cvtss_si64(a);
-#else
-	return _mm_cvtss_si64x(a);
-#endif
-#else
-	simde__m128_private a_ = simde__m128_to_private(a);
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	return SIMDE_CONVERT_FTOI(
-		int64_t, simde_math_roundf(vgetq_lane_f32(a_.neon_f32, 0)));
-#else
-	return SIMDE_CONVERT_FTOI(int64_t, simde_math_roundf(a_.f32[0]));
-#endif
-#endif
-}
-#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
-#define _mm_cvtss_si64(a) simde_mm_cvtss_si64((a))
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m64 simde_mm_cvtt_ps2pi(simde__m128 a)
-{
-#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)
-	return _mm_cvtt_ps2pi(a);
-#else
-	simde__m64_private r_;
-	simde__m128_private a_ = simde__m128_to_private(a);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE)
-	r_.neon_i32 = vcvt_s32_f32(vget_low_f32(a_.neon_f32));
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.f32) / sizeof(r_.f32[0])); i++) {
-		simde_float32 v = a_.f32[i];
-#if !defined(SIMDE_FAST_CONVERSION_RANGE)
-		r_.i32[i] =
-			((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) &&
-			 (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX)))
-				? SIMDE_CONVERT_FTOI(int32_t, v)
-				: INT32_MIN;
-#else
-		r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v);
-#endif
-	}
-#endif
-
-	return simde__m64_from_private(r_);
-#endif
-}
-#define simde_mm_cvttps_pi32(a) simde_mm_cvtt_ps2pi(a)
-#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
-#define _mm_cvtt_ps2pi(a) simde_mm_cvtt_ps2pi((a))
-#define _mm_cvttps_pi32(a) simde_mm_cvttps_pi32((a))
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-int32_t simde_mm_cvtt_ss2si(simde__m128 a)
-{
-#if defined(SIMDE_X86_SSE_NATIVE)
-	return _mm_cvtt_ss2si(a);
-#else
-	simde__m128_private a_ = simde__m128_to_private(a);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE)
-	return SIMDE_CONVERT_FTOI(int32_t, vgetq_lane_f32(a_.neon_f32, 0));
-#else
-	simde_float32 v = a_.f32[0];
-#if !defined(SIMDE_FAST_CONVERSION_RANGE)
-	return ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) &&
-		(v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX)))
-		       ? SIMDE_CONVERT_FTOI(int32_t, v)
-		       : INT32_MIN;
-#else
-	return SIMDE_CONVERT_FTOI(int32_t, v);
-#endif
-#endif
-#endif
-}
-#define simde_mm_cvttss_si32(a) simde_mm_cvtt_ss2si((a))
-#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
-#define _mm_cvtt_ss2si(a) simde_mm_cvtt_ss2si((a))
-#define _mm_cvttss_si32(a) simde_mm_cvtt_ss2si((a))
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-int64_t simde_mm_cvttss_si64(simde__m128 a)
-{
-#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_ARCH_AMD64) && \
-	!defined(_MSC_VER)
-#if defined(__PGI)
-	return _mm_cvttss_si64x(a);
-#else
-	return _mm_cvttss_si64(a);
-#endif
-#else
-	simde__m128_private a_ = simde__m128_to_private(a);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	return SIMDE_CONVERT_FTOI(int64_t, vgetq_lane_f32(a_.neon_f32, 0));
-#else
-	return SIMDE_CONVERT_FTOI(int64_t, a_.f32[0]);
-#endif
-#endif
-}
-#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
-#define _mm_cvttss_si64(a) simde_mm_cvttss_si64((a))
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128 simde_mm_cmpord_ss(simde__m128 a, simde__m128 b)
-{
-#if defined(SIMDE_X86_SSE_NATIVE)
-	return _mm_cmpord_ss(a, b);
-#elif (SIMDE_NATURAL_VECTOR_SIZE > 0)
-	return simde_mm_move_ss(a, simde_mm_cmpord_ps(a, b));
-#else
-	simde__m128_private r_, a_ = simde__m128_to_private(a);
-
-#if defined(simde_math_isnanf)
-	r_.u32[0] = (simde_math_isnanf(simde_mm_cvtss_f32(a)) ||
-		     simde_math_isnanf(simde_mm_cvtss_f32(b)))
-			    ? UINT32_C(0)
-			    : ~UINT32_C(0);
-	SIMDE_VECTORIZE
-	for (size_t i = 1; i < (sizeof(r_.f32) / sizeof(r_.f32[0])); i++) {
-		r_.u32[i] = a_.u32[i];
-	}
-#else
-	HEDLEY_UNREACHABLE();
-#endif
-
-	return simde__m128_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
-#define _mm_cmpord_ss(a, b) simde_mm_cmpord_ss((a), (b))
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128 simde_mm_div_ps(simde__m128 a, simde__m128 b)
-{
-#if defined(SIMDE_X86_SSE_NATIVE)
-	return _mm_div_ps(a, b);
-#else
-	simde__m128_private r_, a_ = simde__m128_to_private(a),
-				b_ = simde__m128_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
-	r_.neon_f32 = vdivq_f32(a_.neon_f32, b_.neon_f32);
-#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	float32x4_t recip0 = vrecpeq_f32(b_.neon_f32);
-	float32x4_t recip1 =
-		vmulq_f32(recip0, vrecpsq_f32(recip0, b_.neon_f32));
-	r_.neon_f32 = vmulq_f32(a_.neon_f32, recip1);
-#elif defined(SIMDE_WASM_SIMD128_NATIVE)
-	r_.wasm_v128 = wasm_f32x4_div(a_.wasm_v128, b_.wasm_v128);
-#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
-	r_.altivec_f32 = vec_div(a_.altivec_f32, b_.altivec_f32);
-#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
-	r_.f32 = a_.f32 / b_.f32;
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.f32) / sizeof(r_.f32[0])); i++) {
-		r_.f32[i] = a_.f32[i] / b_.f32[i];
-	}
-#endif
-
-	return simde__m128_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
-#define _mm_div_ps(a, b) simde_mm_div_ps((a), (b))
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128 simde_mm_div_ss(simde__m128 a, simde__m128 b)
-{
-#if defined(SIMDE_X86_SSE_NATIVE)
-	return _mm_div_ss(a, b);
-#elif (SIMDE_NATURAL_VECTOR_SIZE > 0)
-	return simde_mm_move_ss(a, simde_mm_div_ps(a, b));
-#else
-	simde__m128_private r_, a_ = simde__m128_to_private(a),
-				b_ = simde__m128_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	float32_t value = vgetq_lane_f32(
-		simde__m128_to_private(simde_mm_div_ps(a, b)).neon_f32, 0);
-	r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0);
-#else
-	r_.f32[0] = a_.f32[0] / b_.f32[0];
-	SIMDE_VECTORIZE
-	for (size_t i = 1; i < (sizeof(r_.f32) / sizeof(r_.f32[0])); i++) {
-		r_.f32[i] = a_.f32[i];
-	}
-#endif
-
-	return simde__m128_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
-#define _mm_div_ss(a, b) simde_mm_div_ss((a), (b))
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-int16_t simde_mm_extract_pi16(simde__m64 a, const int imm8)
-	SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3)
-{
-	simde__m64_private a_ = simde__m64_to_private(a);
-	return a_.i16[imm8];
-}
-#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && \
-	!defined(HEDLEY_PGI_VERSION)
-#if defined(SIMDE_BUG_CLANG_44589)
-#define simde_mm_extract_pi16(a, imm8)                                      \
-	(HEDLEY_DIAGNOSTIC_PUSH _Pragma(                                    \
-		"clang diagnostic ignored \"-Wvector-conversion\"")         \
-		 HEDLEY_STATIC_CAST(int16_t, _mm_extract_pi16((a), (imm8))) \
-			 HEDLEY_DIAGNOSTIC_POP)
-#else
-#define simde_mm_extract_pi16(a, imm8) \
-	HEDLEY_STATIC_CAST(int16_t, _mm_extract_pi16(a, imm8))
-#endif
-#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-#define simde_mm_extract_pi16(a, imm8) \
-	vget_lane_s16(simde__m64_to_private(a).neon_i16, imm8)
-#endif
-#define simde_m_pextrw(a, imm8) simde_mm_extract_pi16(a, imm8)
-#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
-#define _mm_extract_pi16(a, imm8) simde_mm_extract_pi16((a), (imm8))
-#define _m_pextrw(a, imm8) simde_mm_extract_pi16((a), (imm8))
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m64 simde_mm_insert_pi16(simde__m64 a, int16_t i, const int imm8)
-	SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3)
-{
-	simde__m64_private r_, a_ = simde__m64_to_private(a);
-
-	r_.i64[0] = a_.i64[0];
-	r_.i16[imm8] = i;
-
-	return simde__m64_from_private(r_);
-}
-#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && \
-	!defined(__PGI)
-#if defined(SIMDE_BUG_CLANG_44589)
-#define ssimde_mm_insert_pi16(a, i, imm8)                            \
-	(HEDLEY_DIAGNOSTIC_PUSH _Pragma(                             \
-		"clang diagnostic ignored \"-Wvector-conversion\"")( \
-		_mm_insert_pi16((a), (i), (imm8))) HEDLEY_DIAGNOSTIC_POP)
-#else
-#define simde_mm_insert_pi16(a, i, imm8) _mm_insert_pi16(a, i, imm8)
-#endif
-#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-#define simde_mm_insert_pi16(a, i, imm8) \
-	simde__m64_from_neon_i16(        \
-		vset_lane_s16((i), simde__m64_to_neon_i16(a), (imm8)))
-#endif
-#define simde_m_pinsrw(a, i, imm8) (simde_mm_insert_pi16(a, i, imm8))
-#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
-#define _mm_insert_pi16(a, i, imm8) simde_mm_insert_pi16(a, i, imm8)
-#define _m_pinsrw(a, i, imm8) simde_mm_insert_pi16(a, i, imm8)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128
-simde_mm_load_ps(simde_float32 const mem_addr[HEDLEY_ARRAY_PARAM(4)])
-{
-#if defined(SIMDE_X86_SSE_NATIVE)
-	return _mm_load_ps(mem_addr);
-#else
-	simde__m128_private r_;
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_f32 = vld1q_f32(mem_addr);
-#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
-	r_.altivec_f32 = vec_vsx_ld(0, mem_addr);
-#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
-	r_.altivec_f32 = vec_ld(0, mem_addr);
-#else
-	simde_memcpy(&r_, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128),
-		     sizeof(r_));
-#endif
-
-	return simde__m128_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
-#define _mm_load_ps(mem_addr) simde_mm_load_ps(mem_addr)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128 simde_mm_load1_ps(simde_float32 const *mem_addr)
-{
-#if defined(SIMDE_X86_SSE_NATIVE)
-	return _mm_load_ps1(mem_addr);
-#else
-	simde__m128_private r_;
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_f32 = vld1q_dup_f32(mem_addr);
-#else
-	r_ = simde__m128_to_private(simde_mm_set1_ps(*mem_addr));
-#endif
-
-	return simde__m128_from_private(r_);
-#endif
-}
-#define simde_mm_load_ps1(mem_addr) simde_mm_load1_ps(mem_addr)
-#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
-#define _mm_load_ps1(mem_addr) simde_mm_load1_ps(mem_addr)
-#define _mm_load1_ps(mem_addr) simde_mm_load1_ps(mem_addr)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128 simde_mm_load_ss(simde_float32 const *mem_addr)
-{
-#if defined(SIMDE_X86_SSE_NATIVE)
-	return _mm_load_ss(mem_addr);
-#else
-	simde__m128_private r_;
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_f32 = vsetq_lane_f32(*mem_addr, vdupq_n_f32(0), 0);
-#else
-	r_.f32[0] = *mem_addr;
-	r_.i32[1] = 0;
-	r_.i32[2] = 0;
-	r_.i32[3] = 0;
-#endif
-
-	return simde__m128_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
-#define _mm_load_ss(mem_addr) simde_mm_load_ss(mem_addr)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128 simde_mm_loadh_pi(simde__m128 a, simde__m64 const *mem_addr)
-{
-#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)
-	return _mm_loadh_pi(a,
-			    HEDLEY_REINTERPRET_CAST(__m64 const *, mem_addr));
-#else
-	simde__m128_private r_, a_ = simde__m128_to_private(a);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_f32 = vcombine_f32(
-		vget_low_f32(a_.neon_f32),
-		vld1_f32(HEDLEY_REINTERPRET_CAST(const float32_t *, mem_addr)));
-#else
-	simde__m64_private b_ =
-		*HEDLEY_REINTERPRET_CAST(simde__m64_private const *, mem_addr);
-	r_.f32[0] = a_.f32[0];
-	r_.f32[1] = a_.f32[1];
-	r_.f32[2] = b_.f32[0];
-	r_.f32[3] = b_.f32[1];
-#endif
-
-	return simde__m128_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
-#if HEDLEY_HAS_WARNING("-Wold-style-cast")
-#define _mm_loadh_pi(a, mem_addr)                                          \
-	simde_mm_loadh_pi((a), HEDLEY_REINTERPRET_CAST(simde__m64 const *, \
-						       (mem_addr)))
-#else
-#define _mm_loadh_pi(a, mem_addr) \
-	simde_mm_loadh_pi((a), (simde__m64 const *)(mem_addr))
-#endif
-#endif
-
-/* The SSE documentation says that there are no alignment requirements
-   for mem_addr.  Unfortunately they used the __m64 type for the argument
-   which is supposed to be 8-byte aligned, so some compilers (like clang
-   with -Wcast-align) will generate a warning if you try to cast, say,
-   a simde_float32* to a simde__m64* for this function.
-
-   I think the choice of argument type is unfortunate, but I do think we
-   need to stick to it here.  If there is demand I can always add something
-   like simde_x_mm_loadl_f32(simde__m128, simde_float32 mem_addr[2]) */
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128 simde_mm_loadl_pi(simde__m128 a, simde__m64 const *mem_addr)
-{
-#if defined(SIMDE_X86_SSE_NATIVE)
-	return _mm_loadl_pi(a,
-			    HEDLEY_REINTERPRET_CAST(__m64 const *, mem_addr));
-#else
-	simde__m128_private r_, a_ = simde__m128_to_private(a);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_f32 = vcombine_f32(
-		vld1_f32(HEDLEY_REINTERPRET_CAST(const float32_t *, mem_addr)),
-		vget_high_f32(a_.neon_f32));
-#else
-	simde__m64_private b_;
-	simde_memcpy(&b_, mem_addr, sizeof(b_));
-	r_.i32[0] = b_.i32[0];
-	r_.i32[1] = b_.i32[1];
-	r_.i32[2] = a_.i32[2];
-	r_.i32[3] = a_.i32[3];
-#endif
-
-	return simde__m128_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
-#if HEDLEY_HAS_WARNING("-Wold-style-cast")
-#define _mm_loadl_pi(a, mem_addr)                                          \
-	simde_mm_loadl_pi((a), HEDLEY_REINTERPRET_CAST(simde__m64 const *, \
-						       (mem_addr)))
-#else
-#define _mm_loadl_pi(a, mem_addr) \
-	simde_mm_loadl_pi((a), (simde__m64 const *)(mem_addr))
-#endif
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128
-simde_mm_loadr_ps(simde_float32 const mem_addr[HEDLEY_ARRAY_PARAM(4)])
-{
-#if defined(SIMDE_X86_SSE_NATIVE)
-	return _mm_loadr_ps(mem_addr);
-#else
-	simde__m128_private r_,
-		v_ = simde__m128_to_private(simde_mm_load_ps(mem_addr));
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_f32 = vrev64q_f32(v_.neon_f32);
-	r_.neon_f32 = vextq_f32(r_.neon_f32, r_.neon_f32, 2);
-#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && defined(__PPC64__)
-	r_.altivec_f32 = vec_reve(v_.altivec_f32);
-#elif defined(SIMDE_SHUFFLE_VECTOR_)
-	r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, v_.f32, v_.f32, 3, 2, 1, 0);
-#else
-	r_.f32[0] = v_.f32[3];
-	r_.f32[1] = v_.f32[2];
-	r_.f32[2] = v_.f32[1];
-	r_.f32[3] = v_.f32[0];
-#endif
-
-	return simde__m128_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
-#define _mm_loadr_ps(mem_addr) simde_mm_loadr_ps(mem_addr)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128
-simde_mm_loadu_ps(simde_float32 const mem_addr[HEDLEY_ARRAY_PARAM(4)])
-{
-#if defined(SIMDE_X86_SSE_NATIVE)
-	return _mm_loadu_ps(mem_addr);
-#else
-	simde__m128_private r_;
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_f32 =
-		vld1q_f32(HEDLEY_REINTERPRET_CAST(const float32_t *, mem_addr));
-#elif defined(SIMDE_WASM_SIMD128_NATIVE)
-	r_.wasm_v128 = wasm_v128_load(mem_addr);
-#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && defined(__PPC64__)
-	r_.altivec_f32 = vec_vsx_ld(0, mem_addr);
-#else
-	simde_memcpy(&r_, mem_addr, sizeof(r_));
-#endif
-
-	return simde__m128_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
-#define _mm_loadu_ps(mem_addr) simde_mm_loadu_ps(mem_addr)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-void simde_mm_maskmove_si64(simde__m64 a, simde__m64 mask, int8_t *mem_addr)
-{
-#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)
-	_mm_maskmove_si64(a, mask, HEDLEY_REINTERPRET_CAST(char *, mem_addr));
-#else
-	simde__m64_private a_ = simde__m64_to_private(a),
-			   mask_ = simde__m64_to_private(mask);
-
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(a_.i8) / sizeof(a_.i8[0])); i++)
-		if (mask_.i8[i] < 0)
-			mem_addr[i] = a_.i8[i];
-#endif
-}
-#define simde_m_maskmovq(a, mask, mem_addr) \
-	simde_mm_maskmove_si64(a, mask, mem_addr)
-#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
-#define _mm_maskmove_si64(a, mask, mem_addr) \
-	simde_mm_maskmove_si64(              \
-		(a), (mask),                 \
-		SIMDE_CHECKED_REINTERPRET_CAST(int8_t *, char *, (mem_addr)))
-#define _m_maskmovq(a, mask, mem_addr) \
-	simde_mm_maskmove_si64(        \
-		(a), (mask),           \
-		SIMDE_CHECKED_REINTERPRET_CAST(int8_t *, char *, (mem_addr)))
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m64 simde_mm_max_pi16(simde__m64 a, simde__m64 b)
-{
-#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)
-	return _mm_max_pi16(a, b);
-#else
-	simde__m64_private r_, a_ = simde__m64_to_private(a),
-			       b_ = simde__m64_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_i16 = vmax_s16(a_.neon_i16, b_.neon_i16);
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) {
-		r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? a_.i16[i] : b_.i16[i];
-	}
-#endif
-
-	return simde__m64_from_private(r_);
-#endif
-}
-#define simde_m_pmaxsw(a, b) simde_mm_max_pi16(a, b)
-#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
-#define _mm_max_pi16(a, b) simde_mm_max_pi16(a, b)
-#define _m_pmaxsw(a, b) simde_mm_max_pi16(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128 simde_mm_max_ps(simde__m128 a, simde__m128 b)
-{
-#if defined(SIMDE_X86_SSE_NATIVE)
-	return _mm_max_ps(a, b);
-#else
-	simde__m128_private r_, a_ = simde__m128_to_private(a),
-				b_ = simde__m128_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_FAST_NANS)
-	r_.neon_f32 = vmaxq_f32(a_.neon_f32, b_.neon_f32);
-#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_f32 = vbslq_f32(vcgtq_f32(a_.neon_f32, b_.neon_f32),
-				a_.neon_f32, b_.neon_f32);
-#elif defined(SIMDE_WASM_SIMD128_NATIVE) && defined(SIMDE_FAST_NANS)
-	r_.wasm_v128 = wasm_f32x4_max(a_.wasm_v128, b_.wasm_v128);
-#elif defined(SIMDE_WASM_SIMD128_NATIVE)
-	r_.wasm_v128 =
-		wasm_v128_bitselect(a_.wasm_v128, b_.wasm_v128,
-				    wasm_f32x4_gt(a_.wasm_v128, b_.wasm_v128));
-#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && defined(SIMDE_FAST_NANS)
-	r_.altivec_f32 = vec_max(a_.altivec_f32, b_.altivec_f32);
-#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
-	r_.altivec_f32 = vec_sel(b_.altivec_f32, a_.altivec_f32,
-				 vec_cmpgt(a_.altivec_f32, b_.altivec_f32));
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.f32) / sizeof(r_.f32[0])); i++) {
-		r_.f32[i] = (a_.f32[i] > b_.f32[i]) ? a_.f32[i] : b_.f32[i];
-	}
-#endif
-
-	return simde__m128_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
-#define _mm_max_ps(a, b) simde_mm_max_ps((a), (b))
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m64 simde_mm_max_pu8(simde__m64 a, simde__m64 b)
-{
-#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)
-	return _mm_max_pu8(a, b);
-#else
-	simde__m64_private r_, a_ = simde__m64_to_private(a),
-			       b_ = simde__m64_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_u8 = vmax_u8(a_.neon_u8, b_.neon_u8);
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.u8) / sizeof(r_.u8[0])); i++) {
-		r_.u8[i] = (a_.u8[i] > b_.u8[i]) ? a_.u8[i] : b_.u8[i];
-	}
-#endif
-
-	return simde__m64_from_private(r_);
-#endif
-}
-#define simde_m_pmaxub(a, b) simde_mm_max_pu8(a, b)
-#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
-#define _mm_max_pu8(a, b) simde_mm_max_pu8(a, b)
-#define _m_pmaxub(a, b) simde_mm_max_pu8(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128 simde_mm_max_ss(simde__m128 a, simde__m128 b)
-{
-#if defined(SIMDE_X86_SSE_NATIVE)
-	return _mm_max_ss(a, b);
-#elif (SIMDE_NATURAL_VECTOR_SIZE > 0)
-	return simde_mm_move_ss(a, simde_mm_max_ps(a, b));
-#else
-	simde__m128_private r_, a_ = simde__m128_to_private(a),
-				b_ = simde__m128_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	float32_t value = vgetq_lane_f32(maxq_f32(a_.neon_f32, b_.neon_f32), 0);
-	r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0);
-#else
-	r_.f32[0] = (a_.f32[0] > b_.f32[0]) ? a_.f32[0] : b_.f32[0];
-	r_.f32[1] = a_.f32[1];
-	r_.f32[2] = a_.f32[2];
-	r_.f32[3] = a_.f32[3];
-#endif
-
-	return simde__m128_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
-#define _mm_max_ss(a, b) simde_mm_max_ss((a), (b))
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m64 simde_mm_min_pi16(simde__m64 a, simde__m64 b)
-{
-#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)
-	return _mm_min_pi16(a, b);
-#else
-	simde__m64_private r_, a_ = simde__m64_to_private(a),
-			       b_ = simde__m64_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_i16 = vmin_s16(a_.neon_i16, b_.neon_i16);
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) {
-		r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? a_.i16[i] : b_.i16[i];
-	}
-#endif
-
-	return simde__m64_from_private(r_);
-#endif
-}
-#define simde_m_pminsw(a, b) simde_mm_min_pi16(a, b)
-#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
-#define _mm_min_pi16(a, b) simde_mm_min_pi16(a, b)
-#define _m_pminsw(a, b) simde_mm_min_pi16(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128 simde_mm_min_ps(simde__m128 a, simde__m128 b)
-{
-#if defined(SIMDE_X86_SSE_NATIVE)
-	return _mm_min_ps(a, b);
-#elif defined(SIMDE_FAST_NANS) && defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	return simde__m128_from_neon_f32(vminq_f32(simde__m128_to_neon_f32(a),
-						   simde__m128_to_neon_f32(b)));
-#elif defined(SIMDE_WASM_SIMD128_NATIVE)
-	simde__m128_private r_, a_ = simde__m128_to_private(a),
-				b_ = simde__m128_to_private(b);
-#if defined(SIMDE_FAST_NANS)
-	r_.wasm_v128 = wasm_f32x4_min(a_.wasm_v128, b_.wasm_v128);
-#else
-	r_.wasm_v128 =
-		wasm_v128_bitselect(a_.wasm_v128, b_.wasm_v128,
-				    wasm_f32x4_lt(a_.wasm_v128, b_.wasm_v128));
-#endif
-	return simde__m128_from_private(r_);
-#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
-	simde__m128_private r_, a_ = simde__m128_to_private(a),
-				b_ = simde__m128_to_private(b);
-
-#if defined(SIMDE_FAST_NANS)
-	r_.altivec_f32 = vec_min(a_.altivec_f32, b_.altivec_f32);
-#else
-	r_.altivec_f32 = vec_sel(b_.altivec_f32, a_.altivec_f32,
-				 vec_cmpgt(b_.altivec_f32, a_.altivec_f32));
-#endif
-
-	return simde__m128_from_private(r_);
-#elif (SIMDE_NATURAL_VECTOR_SIZE > 0)
-	simde__m128 mask = simde_mm_cmplt_ps(a, b);
-	return simde_mm_or_ps(simde_mm_and_ps(mask, a),
-			      simde_mm_andnot_ps(mask, b));
-#else
-	simde__m128_private r_, a_ = simde__m128_to_private(a),
-				b_ = simde__m128_to_private(b);
-
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.f32) / sizeof(r_.f32[0])); i++) {
-		r_.f32[i] = (a_.f32[i] < b_.f32[i]) ? a_.f32[i] : b_.f32[i];
-	}
-
-	return simde__m128_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
-#define _mm_min_ps(a, b) simde_mm_min_ps((a), (b))
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m64 simde_mm_min_pu8(simde__m64 a, simde__m64 b)
-{
-#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)
-	return _mm_min_pu8(a, b);
-#else
-	simde__m64_private r_, a_ = simde__m64_to_private(a),
-			       b_ = simde__m64_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_u8 = vmin_u8(a_.neon_u8, b_.neon_u8);
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.u8) / sizeof(r_.u8[0])); i++) {
-		r_.u8[i] = (a_.u8[i] < b_.u8[i]) ? a_.u8[i] : b_.u8[i];
-	}
-#endif
-
-	return simde__m64_from_private(r_);
-#endif
-}
-#define simde_m_pminub(a, b) simde_mm_min_pu8(a, b)
-#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
-#define _mm_min_pu8(a, b) simde_mm_min_pu8(a, b)
-#define _m_pminub(a, b) simde_mm_min_pu8(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128 simde_mm_min_ss(simde__m128 a, simde__m128 b)
-{
-#if defined(SIMDE_X86_SSE_NATIVE)
-	return _mm_min_ss(a, b);
-#elif (SIMDE_NATURAL_VECTOR_SIZE > 0)
-	return simde_mm_move_ss(a, simde_mm_min_ps(a, b));
-#else
-	simde__m128_private r_, a_ = simde__m128_to_private(a),
-				b_ = simde__m128_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	float32_t value =
-		vgetq_lane_f32(vminq_f32(a_.neon_f32, b_.neon_f32), 0);
-	r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0);
-#else
-	r_.f32[0] = (a_.f32[0] < b_.f32[0]) ? a_.f32[0] : b_.f32[0];
-	r_.f32[1] = a_.f32[1];
-	r_.f32[2] = a_.f32[2];
-	r_.f32[3] = a_.f32[3];
-#endif
-
-	return simde__m128_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
-#define _mm_min_ss(a, b) simde_mm_min_ss((a), (b))
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128 simde_mm_movehl_ps(simde__m128 a, simde__m128 b)
-{
-#if defined(SIMDE_X86_SSE_NATIVE)
-	return _mm_movehl_ps(a, b);
-#else
-	simde__m128_private r_, a_ = simde__m128_to_private(a),
-				b_ = simde__m128_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	float32x2_t a32 = vget_high_f32(a_.neon_f32);
-	float32x2_t b32 = vget_high_f32(b_.neon_f32);
-	r_.neon_f32 = vcombine_f32(b32, a32);
-#elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE)
-	r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(
-		SIMDE_POWER_ALTIVEC_VECTOR(float),
-		vec_mergel(b_.altivec_i64, a_.altivec_i64));
-#elif defined(SIMDE_SHUFFLE_VECTOR_)
-	r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 6, 7, 2, 3);
-#else
-	r_.f32[0] = b_.f32[2];
-	r_.f32[1] = b_.f32[3];
-	r_.f32[2] = a_.f32[2];
-	r_.f32[3] = a_.f32[3];
-#endif
-
-	return simde__m128_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
-#define _mm_movehl_ps(a, b) simde_mm_movehl_ps((a), (b))
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128 simde_mm_movelh_ps(simde__m128 a, simde__m128 b)
-{
-#if defined(SIMDE_X86_SSE_NATIVE)
-	return _mm_movelh_ps(a, b);
-#else
-	simde__m128_private r_, a_ = simde__m128_to_private(a),
-				b_ = simde__m128_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	float32x2_t a10 = vget_low_f32(a_.neon_f32);
-	float32x2_t b10 = vget_low_f32(b_.neon_f32);
-	r_.neon_f32 = vcombine_f32(a10, b10);
-#elif defined(SIMDE_SHUFFLE_VECTOR_)
-	r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 0, 1, 4, 5);
-#elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE)
-	r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(
-		SIMDE_POWER_ALTIVEC_VECTOR(float),
-		vec_mergeh(a_.altivec_i64, b_.altivec_i64));
-#else
-	r_.f32[0] = a_.f32[0];
-	r_.f32[1] = a_.f32[1];
-	r_.f32[2] = b_.f32[0];
-	r_.f32[3] = b_.f32[1];
-#endif
-
-	return simde__m128_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
-#define _mm_movelh_ps(a, b) simde_mm_movelh_ps((a), (b))
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-int simde_mm_movemask_pi8(simde__m64 a)
-{
-#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)
-	return _mm_movemask_pi8(a);
-#else
-	simde__m64_private a_ = simde__m64_to_private(a);
-	int r = 0;
-
-#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
-	uint8x8_t input = a_.neon_u8;
-	const int8_t xr[8] = {-7, -6, -5, -4, -3, -2, -1, 0};
-	const uint8x8_t mask_and = vdup_n_u8(0x80);
-	const int8x8_t mask_shift = vld1_s8(xr);
-	const uint8x8_t mask_result =
-		vshl_u8(vand_u8(input, mask_and), mask_shift);
-	uint8x8_t lo = mask_result;
-	r = vaddv_u8(lo);
-#else
-	const size_t nmemb = sizeof(a_.i8) / sizeof(a_.i8[0]);
-	SIMDE_VECTORIZE_REDUCTION(| : r)
-	for (size_t i = 0; i < nmemb; i++) {
-		r |= (a_.u8[nmemb - 1 - i] >> 7) << (nmemb - 1 - i);
-	}
-#endif
-
-	return r;
-#endif
-}
-#define simde_m_pmovmskb(a) simde_mm_movemask_pi8(a)
-#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
-#define _mm_movemask_pi8(a) simde_mm_movemask_pi8(a)
-#define _m_pmovmskb(a) simde_mm_movemask_pi8(a)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-int simde_mm_movemask_ps(simde__m128 a)
-{
-#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)
-	return _mm_movemask_ps(a);
-#else
-	int r = 0;
-	simde__m128_private a_ = simde__m128_to_private(a);
-
-#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
-	static const int32_t shift_amount[] = {0, 1, 2, 3};
-	const int32x4_t shift = vld1q_s32(shift_amount);
-	uint32x4_t tmp = vshrq_n_u32(a_.neon_u32, 31);
-	return HEDLEY_STATIC_CAST(int, vaddvq_u32(vshlq_u32(tmp, shift)));
-#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	// Shift out everything but the sign bits with a 32-bit unsigned shift right.
-	uint64x2_t high_bits =
-		vreinterpretq_u64_u32(vshrq_n_u32(a_.neon_u32, 31));
-	// Merge the two pairs together with a 64-bit unsigned shift right + add.
-	uint8x16_t paired =
-		vreinterpretq_u8_u64(vsraq_n_u64(high_bits, high_bits, 31));
-	// Extract the result.
-	return vgetq_lane_u8(paired, 0) | (vgetq_lane_u8(paired, 8) << 2);
-#else
-	SIMDE_VECTORIZE_REDUCTION(| : r)
-	for (size_t i = 0; i < sizeof(a_.u32) / sizeof(a_.u32[0]); i++) {
-		r |= (a_.u32[i] >> ((sizeof(a_.u32[i]) * CHAR_BIT) - 1)) << i;
-	}
-#endif
-
-	return r;
-#endif
-}
-#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
-#define _mm_movemask_ps(a) simde_mm_movemask_ps((a))
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128 simde_mm_mul_ps(simde__m128 a, simde__m128 b)
-{
-#if defined(SIMDE_X86_SSE_NATIVE)
-	return _mm_mul_ps(a, b);
-#else
-	simde__m128_private r_, a_ = simde__m128_to_private(a),
-				b_ = simde__m128_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_f32 = vmulq_f32(a_.neon_f32, b_.neon_f32);
-#elif defined(SIMDE_WASM_SIMD128_NATIVE)
-	r_.wasm_v128 = wasm_f32x4_mul(a_.wasm_v128, b_.wasm_v128);
-#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
-	r_.f32 = a_.f32 * b_.f32;
-#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
-	r_.altivec_f32 = vec_mul(a_.altivec_f32, b_.altivec_f32);
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.f32) / sizeof(r_.f32[0])); i++) {
-		r_.f32[i] = a_.f32[i] * b_.f32[i];
-	}
-#endif
-
-	return simde__m128_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
-#define _mm_mul_ps(a, b) simde_mm_mul_ps((a), (b))
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128 simde_mm_mul_ss(simde__m128 a, simde__m128 b)
-{
-#if defined(SIMDE_X86_SSE_NATIVE)
-	return _mm_mul_ss(a, b);
-#elif (SIMDE_NATURAL_VECTOR_SIZE > 0)
-	return simde_mm_move_ss(a, simde_mm_mul_ps(a, b));
-#else
-	simde__m128_private r_, a_ = simde__m128_to_private(a),
-				b_ = simde__m128_to_private(b);
-
-	r_.f32[0] = a_.f32[0] * b_.f32[0];
-	r_.f32[1] = a_.f32[1];
-	r_.f32[2] = a_.f32[2];
-	r_.f32[3] = a_.f32[3];
-
-	return simde__m128_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
-#define _mm_mul_ss(a, b) simde_mm_mul_ss((a), (b))
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m64 simde_mm_mulhi_pu16(simde__m64 a, simde__m64 b)
-{
-#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)
-	return _mm_mulhi_pu16(a, b);
-#else
-	simde__m64_private r_, a_ = simde__m64_to_private(a),
-			       b_ = simde__m64_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	const uint32x4_t t1 = vmull_u16(a_.neon_u16, b_.neon_u16);
-	const uint32x4_t t2 = vshrq_n_u32(t1, 16);
-	const uint16x4_t t3 = vmovn_u32(t2);
-	r_.neon_u16 = t3;
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.u16) / sizeof(r_.u16[0])); i++) {
-		r_.u16[i] = HEDLEY_STATIC_CAST(
-			uint16_t, ((HEDLEY_STATIC_CAST(uint32_t, a_.u16[i]) *
-				    HEDLEY_STATIC_CAST(uint32_t, b_.u16[i])) >>
-				   UINT32_C(16)));
-	}
-#endif
-
-	return simde__m64_from_private(r_);
-#endif
-}
-#define simde_m_pmulhuw(a, b) simde_mm_mulhi_pu16(a, b)
-#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
-#define _mm_mulhi_pu16(a, b) simde_mm_mulhi_pu16(a, b)
-#define _m_pmulhuw(a, b) simde_mm_mulhi_pu16(a, b)
-#endif
-
-#if defined(SIMDE_X86_SSE_NATIVE) && defined(HEDLEY_GCC_VERSION)
-#define SIMDE_MM_HINT_NTA HEDLEY_STATIC_CAST(enum _mm_hint, 0)
-#define SIMDE_MM_HINT_T0 HEDLEY_STATIC_CAST(enum _mm_hint, 1)
-#define SIMDE_MM_HINT_T1 HEDLEY_STATIC_CAST(enum _mm_hint, 2)
-#define SIMDE_MM_HINT_T2 HEDLEY_STATIC_CAST(enum _mm_hint, 3)
-#define SIMDE_MM_HINT_ENTA HEDLEY_STATIC_CAST(enum _mm_hint, 4)
-#define SIMDE_MM_HINT_ET0 HEDLEY_STATIC_CAST(enum _mm_hint, 5)
-#define SIMDE_MM_HINT_ET1 HEDLEY_STATIC_CAST(enum _mm_hint, 6)
-#define SIMDE_MM_HINT_ET2 HEDLEY_STATIC_CAST(enum _mm_hint, 7)
-#else
-#define SIMDE_MM_HINT_NTA 0
-#define SIMDE_MM_HINT_T0 1
-#define SIMDE_MM_HINT_T1 2
-#define SIMDE_MM_HINT_T2 3
-#define SIMDE_MM_HINT_ENTA 4
-#define SIMDE_MM_HINT_ET0 5
-#define SIMDE_MM_HINT_ET1 6
-#define SIMDE_MM_HINT_ET2 7
-#endif
-
-#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
-HEDLEY_DIAGNOSTIC_PUSH
-#if HEDLEY_HAS_WARNING("-Wreserved-id-macro")
-_Pragma("clang diagnostic ignored \"-Wreserved-id-macro\"")
-#endif
-#undef _MM_HINT_NTA
-#define _MM_HINT_NTA SIMDE_MM_HINT_NTA
-#undef _MM_HINT_T0
-#define _MM_HINT_T0 SIMDE_MM_HINT_T0
-#undef _MM_HINT_T1
-#define _MM_HINT_T1 SIMDE_MM_HINT_T1
-#undef _MM_HINT_T2
-#define _MM_HINT_T2 SIMDE_MM_HINT_T2
-#undef _MM_HINT_ETNA
-#define _MM_HINT_ETNA SIMDE_MM_HINT_ETNA
-#undef _MM_HINT_ET0
-#define _MM_HINT_ET0 SIMDE_MM_HINT_ET0
-#undef _MM_HINT_ET1
-#define _MM_HINT_ET1 SIMDE_MM_HINT_ET1
-#undef _MM_HINT_ET1
-#define _MM_HINT_ET2 SIMDE_MM_HINT_ET2
-	HEDLEY_DIAGNOSTIC_POP
-#endif
-
-	SIMDE_FUNCTION_ATTRIBUTES void simde_mm_prefetch(char const *p, int i)
-{
-#if defined(HEDLEY_GCC_VERSION)
-	__builtin_prefetch(p);
-#else
-	(void)p;
-#endif
-
-	(void)i;
-}
-#if defined(SIMDE_X86_SSE_NATIVE)
-#if defined(__clang__) &&                  \
-	!SIMDE_DETECT_CLANG_VERSION_CHECK( \
-		10, 0, 0) /* https://reviews.llvm.org/D71718 */
-#define simde_mm_prefetch(p, i)                     \
-	(__extension__({                            \
-		HEDLEY_DIAGNOSTIC_PUSH              \
-		HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL \
-		_mm_prefetch((p), (i));             \
-		HEDLEY_DIAGNOSTIC_POP               \
-	}))
-#else
-#define simde_mm_prefetch(p, i) _mm_prefetch(p, i)
-#endif
-#endif
-#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
-#define _mm_prefetch(p, i) simde_mm_prefetch(p, i)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128 simde_x_mm_negate_ps(simde__m128 a)
-{
-#if defined(SIMDE_X86_SSE_NATIVE)
-	return simde_mm_xor_ps(a, _mm_set1_ps(SIMDE_FLOAT32_C(-0.0)));
-#else
-	simde__m128_private r_, a_ = simde__m128_to_private(a);
-
-#if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && \
-	(!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(8, 1, 0))
-	r_.altivec_f32 = vec_neg(a_.altivec_f32);
-#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_f32 = vnegq_f32(a_.neon_f32);
-#elif defined(SIMDE_WASM_SIMD128_NATIVE)
-	r_.wasm_v128 = wasm_f32x4_neg(a_.wasm_v128);
-#elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE)
-	r_.altivec_f32 = vec_neg(a_.altivec_f32);
-#elif defined(SIMDE_VECTOR_NEGATE)
-	r_.f32 = -a_.f32;
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.f32) / sizeof(r_.f32[0])); i++) {
-		r_.f32[i] = -a_.f32[i];
-	}
-#endif
-
-	return simde__m128_from_private(r_);
-#endif
-}
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128 simde_mm_rcp_ps(simde__m128 a)
-{
-#if defined(SIMDE_X86_SSE_NATIVE)
-	return _mm_rcp_ps(a);
-#else
-	simde__m128_private r_, a_ = simde__m128_to_private(a);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	float32x4_t recip = vrecpeq_f32(a_.neon_f32);
-
-#if SIMDE_ACCURACY_PREFERENCE > 0
-	for (int i = 0; i < SIMDE_ACCURACY_PREFERENCE; ++i) {
-		recip = vmulq_f32(recip, vrecpsq_f32(recip, a_.neon_f32));
-	}
-#endif
-
-	r_.neon_f32 = recip;
-#elif defined(SIMDE_WASM_SIMD128_NATIVE)
-	r_.wasm_v128 = wasm_f32x4_div(simde_mm_set1_ps(1.0f), a_.wasm_v128);
-#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
-	r_.altivec_f32 = vec_re(a_.altivec_f32);
-#elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
-	r_.f32 = 1.0f / a_.f32;
-#elif defined(SIMDE_IEEE754_STORAGE)
-	/* https://stackoverflow.com/questions/12227126/division-as-multiply-and-lut-fast-float-division-reciprocal/12228234#12228234 */
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.f32) / sizeof(r_.f32[0])); i++) {
-		int32_t ix;
-		simde_float32 fx = a_.f32[i];
-		simde_memcpy(&ix, &fx, sizeof(ix));
-		int32_t x = INT32_C(0x7EF311C3) - ix;
-		simde_float32 temp;
-		simde_memcpy(&temp, &x, sizeof(temp));
-		r_.f32[i] = temp * (SIMDE_FLOAT32_C(2.0) - temp * fx);
-	}
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.f32) / sizeof(r_.f32[0])); i++) {
-		r_.f32[i] = 1.0f / a_.f32[i];
-	}
-#endif
-
-	return simde__m128_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
-#define _mm_rcp_ps(a) simde_mm_rcp_ps((a))
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128 simde_mm_rcp_ss(simde__m128 a)
-{
-#if defined(SIMDE_X86_SSE_NATIVE)
-	return _mm_rcp_ss(a);
-#elif (SIMDE_NATURAL_VECTOR_SIZE > 0)
-	return simde_mm_move_ss(a, simde_mm_rcp_ps(a));
-#else
-	simde__m128_private r_, a_ = simde__m128_to_private(a);
-
-	r_.f32[0] = 1.0f / a_.f32[0];
-	r_.f32[1] = a_.f32[1];
-	r_.f32[2] = a_.f32[2];
-	r_.f32[3] = a_.f32[3];
-
-	return simde__m128_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
-#define _mm_rcp_ss(a) simde_mm_rcp_ss((a))
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128 simde_mm_rsqrt_ps(simde__m128 a)
-{
-#if defined(SIMDE_X86_SSE_NATIVE)
-	return _mm_rsqrt_ps(a);
-#else
-	simde__m128_private r_, a_ = simde__m128_to_private(a);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_f32 = vrsqrteq_f32(a_.neon_f32);
-#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
-	r_.altivec_f32 = vec_rsqrte(a_.altivec_f32);
-#elif defined(SIMDE_IEEE754_STORAGE)
-	/* https://basesandframes.files.wordpress.com/2020/04/even_faster_math_functions_green_2020.pdf
-        Pages 100 - 103 */
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.f32) / sizeof(r_.f32[0])); i++) {
-#if SIMDE_ACCURACY_PREFERENCE <= 0
-		r_.i32[i] = INT32_C(0x5F37624F) - (a_.i32[i] >> 1);
-#else
-		simde_float32 x = a_.f32[i];
-		simde_float32 xhalf = SIMDE_FLOAT32_C(0.5) * x;
-		int32_t ix;
-
-		simde_memcpy(&ix, &x, sizeof(ix));
-
-#if SIMDE_ACCURACY_PREFERENCE == 1
-		ix = INT32_C(0x5F375A82) - (ix >> 1);
-#else
-		ix = INT32_C(0x5F37599E) - (ix >> 1);
-#endif
-
-		simde_memcpy(&x, &ix, sizeof(x));
-
-#if SIMDE_ACCURACY_PREFERENCE >= 2
-		x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x);
-#endif
-		x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x);
-
-		r_.f32[i] = x;
-#endif
-	}
-#elif defined(simde_math_sqrtf)
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.f32) / sizeof(r_.f32[0])); i++) {
-		r_.f32[i] = 1.0f / simde_math_sqrtf(a_.f32[i]);
-	}
-#else
-	HEDLEY_UNREACHABLE();
-#endif
-
-	return simde__m128_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
-#define _mm_rsqrt_ps(a) simde_mm_rsqrt_ps((a))
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128 simde_mm_rsqrt_ss(simde__m128 a)
-{
-#if defined(SIMDE_X86_SSE_NATIVE)
-	return _mm_rsqrt_ss(a);
-#elif (SIMDE_NATURAL_VECTOR_SIZE > 0)
-	return simde_mm_move_ss(a, simde_mm_rsqrt_ps(a));
-#else
-	simde__m128_private r_, a_ = simde__m128_to_private(a);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_f32 =
-		vsetq_lane_f32(vgetq_lane_f32(simde_mm_rsqrt_ps(a).neon_f32, 0),
-			       a_.neon_f32, 0);
-#elif defined(SIMDE_IEEE754_STORAGE)
-	{
-#if SIMDE_ACCURACY_PREFERENCE <= 0
-		r_.i32[0] = INT32_C(0x5F37624F) - (a_.i32[0] >> 1);
-#else
-		simde_float32 x = a_.f32[0];
-		simde_float32 xhalf = SIMDE_FLOAT32_C(0.5) * x;
-		int32_t ix;
-
-		simde_memcpy(&ix, &x, sizeof(ix));
-
-#if SIMDE_ACCURACY_PREFERENCE == 1
-		ix = INT32_C(0x5F375A82) - (ix >> 1);
-#else
-		ix = INT32_C(0x5F37599E) - (ix >> 1);
-#endif
-
-		simde_memcpy(&x, &ix, sizeof(x));
-
-#if SIMDE_ACCURACY_PREFERENCE >= 2
-		x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x);
-#endif
-		x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x);
-
-		r_.f32[0] = x;
-#endif
-	}
-	r_.f32[1] = a_.f32[1];
-	r_.f32[2] = a_.f32[2];
-	r_.f32[3] = a_.f32[3];
-#elif defined(simde_math_sqrtf)
-	r_.f32[0] = 1.0f / simde_math_sqrtf(a_.f32[0]);
-	r_.f32[1] = a_.f32[1];
-	r_.f32[2] = a_.f32[2];
-	r_.f32[3] = a_.f32[3];
-#else
-	HEDLEY_UNREACHABLE();
-#endif
-
-	return simde__m128_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
-#define _mm_rsqrt_ss(a) simde_mm_rsqrt_ss((a))
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m64 simde_mm_sad_pu8(simde__m64 a, simde__m64 b)
-{
-#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)
-	return _mm_sad_pu8(a, b);
-#else
-	simde__m64_private r_, a_ = simde__m64_to_private(a),
-			       b_ = simde__m64_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	uint16x4_t t = vpaddl_u8(vabd_u8(a_.neon_u8, b_.neon_u8));
-	uint16_t r0 = t[0] + t[1] + t[2] + t[3];
-	r_.neon_u16 = vset_lane_u16(r0, vdup_n_u16(0), 0);
-#else
-	uint16_t sum = 0;
-
-#if defined(SIMDE_HAVE_STDLIB_H)
-	SIMDE_VECTORIZE_REDUCTION(+ : sum)
-	for (size_t i = 0; i < (sizeof(r_.u8) / sizeof(r_.u8[0])); i++) {
-		sum += HEDLEY_STATIC_CAST(uint8_t, abs(a_.u8[i] - b_.u8[i]));
-	}
-
-	r_.i16[0] = HEDLEY_STATIC_CAST(int16_t, sum);
-	r_.i16[1] = 0;
-	r_.i16[2] = 0;
-	r_.i16[3] = 0;
-#else
-	HEDLEY_UNREACHABLE();
-#endif
-#endif
-
-	return simde__m64_from_private(r_);
-#endif
-}
-#define simde_m_psadbw(a, b) simde_mm_sad_pu8(a, b)
-#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
-#define _mm_sad_pu8(a, b) simde_mm_sad_pu8(a, b)
-#define _m_psadbw(a, b) simde_mm_sad_pu8(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128 simde_mm_set_ss(simde_float32 a)
-{
-#if defined(SIMDE_X86_SSE_NATIVE)
-	return _mm_set_ss(a);
-#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	return vsetq_lane_f32(a, vdupq_n_f32(SIMDE_FLOAT32_C(0.0)), 0);
-#else
-	return simde_mm_set_ps(SIMDE_FLOAT32_C(0.0), SIMDE_FLOAT32_C(0.0),
-			       SIMDE_FLOAT32_C(0.0), a);
-#endif
-}
-#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
-#define _mm_set_ss(a) simde_mm_set_ss(a)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128 simde_mm_setr_ps(simde_float32 e3, simde_float32 e2,
-			     simde_float32 e1, simde_float32 e0)
-{
-#if defined(SIMDE_X86_SSE_NATIVE)
-	return _mm_setr_ps(e3, e2, e1, e0);
-#else
-	return simde_mm_set_ps(e0, e1, e2, e3);
-#endif
-}
-#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
-#define _mm_setr_ps(e3, e2, e1, e0) simde_mm_setr_ps(e3, e2, e1, e0)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128 simde_mm_setzero_ps(void)
-{
-#if defined(SIMDE_X86_SSE_NATIVE)
-	return _mm_setzero_ps();
-#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	return vdupq_n_f32(SIMDE_FLOAT32_C(0.0));
-#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
-	return vec_splats(SIMDE_FLOAT32_C(0.0));
-#else
-	simde__m128 r;
-	simde_memset(&r, 0, sizeof(r));
-	return r;
-#endif
-}
-#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
-#define _mm_setzero_ps() simde_mm_setzero_ps()
-#endif
-
-#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_)
-HEDLEY_DIAGNOSTIC_PUSH
-SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128 simde_mm_undefined_ps(void)
-{
-	simde__m128_private r_;
-
-#if defined(SIMDE_HAVE_UNDEFINED128)
-	r_.n = _mm_undefined_ps();
-#elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_)
-	r_ = simde__m128_to_private(simde_mm_setzero_ps());
-#endif
-
-	return simde__m128_from_private(r_);
-}
-#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
-#define _mm_undefined_ps() simde_mm_undefined_ps()
-#endif
-
-#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_)
-HEDLEY_DIAGNOSTIC_POP
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128 simde_x_mm_setone_ps(void)
-{
-	simde__m128 t = simde_mm_setzero_ps();
-	return simde_mm_cmpeq_ps(t, t);
-}
-
-SIMDE_FUNCTION_ATTRIBUTES
-void simde_mm_sfence(void)
-{
-	/* TODO: Use Hedley. */
-#if defined(SIMDE_X86_SSE_NATIVE)
-	_mm_sfence();
-#elif defined(__GNUC__) && \
-	((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 7))
-	__atomic_thread_fence(__ATOMIC_SEQ_CST);
-#elif !defined(__INTEL_COMPILER) && defined(__STDC_VERSION__) && \
-	(__STDC_VERSION__ >= 201112L) && !defined(__STDC_NO_ATOMICS__)
-#if defined(__GNUC__) && (__GNUC__ == 4) && (__GNUC_MINOR__ < 9)
-	__atomic_thread_fence(__ATOMIC_SEQ_CST);
-#else
-	atomic_thread_fence(memory_order_seq_cst);
-#endif
-#elif defined(_MSC_VER)
-	MemoryBarrier();
-#elif HEDLEY_HAS_EXTENSION(c_atomic)
-	__c11_atomic_thread_fence(__ATOMIC_SEQ_CST);
-#elif defined(__GNUC__) && \
-	((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1))
-	__sync_synchronize();
-#elif defined(_OPENMP)
-#pragma omp critical(simde_mm_sfence_)
-	{
-	}
-#endif
-}
-#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
-#define _mm_sfence() simde_mm_sfence()
-#endif
-
-#define SIMDE_MM_SHUFFLE(z, y, x, w) \
-	(((z) << 6) | ((y) << 4) | ((x) << 2) | (w))
-#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
-#define _MM_SHUFFLE(z, y, x, w) SIMDE_MM_SHUFFLE(z, y, x, w)
-#endif
-
-#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && \
-	!defined(__PGI)
-#define simde_mm_shuffle_pi16(a, imm8) _mm_shuffle_pi16(a, imm8)
-#elif defined(SIMDE_SHUFFLE_VECTOR_)
-#define simde_mm_shuffle_pi16(a, imm8)                                    \
-	(__extension__({                                                  \
-		const simde__m64_private simde__tmp_a_ =                  \
-			simde__m64_to_private(a);                         \
-		simde__m64_from_private((simde__m64_private){             \
-			.i16 = SIMDE_SHUFFLE_VECTOR_(                     \
-				16, 8, (simde__tmp_a_).i16,               \
-				(simde__tmp_a_).i16, (((imm8)) & 3),      \
-				(((imm8) >> 2) & 3), (((imm8) >> 4) & 3), \
-				(((imm8) >> 6) & 3))});                   \
-	}))
-#else
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m64 simde_mm_shuffle_pi16(simde__m64 a, const int imm8)
-	SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255)
-{
-	simde__m64_private r_;
-	simde__m64_private a_ = simde__m64_to_private(a);
-
-	for (size_t i = 0; i < sizeof(r_.i16) / sizeof(r_.i16[0]); i++) {
-		r_.i16[i] = a_.i16[(imm8 >> (i * 2)) & 3];
-	}
-
-	HEDLEY_DIAGNOSTIC_PUSH
-#if HEDLEY_HAS_WARNING("-Wconditional-uninitialized")
-#pragma clang diagnostic ignored "-Wconditional-uninitialized"
-#endif
-	return simde__m64_from_private(r_);
-	HEDLEY_DIAGNOSTIC_POP
-}
-#endif
-#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && \
-	!defined(__PGI)
-#define simde_m_pshufw(a, imm8) _m_pshufw(a, imm8)
-#else
-#define simde_m_pshufw(a, imm8) simde_mm_shuffle_pi16(a, imm8)
-#endif
-#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
-#define _mm_shuffle_pi16(a, imm8) simde_mm_shuffle_pi16(a, imm8)
-#define _m_pshufw(a, imm8) simde_mm_shuffle_pi16(a, imm8)
-#endif
-
-#if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI)
-#define simde_mm_shuffle_ps(a, b, imm8) _mm_shuffle_ps(a, b, imm8)
-#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-#define simde_mm_shuffle_ps(a, b, imm8)                                      \
-	__extension__({                                                      \
-		float32x4_t ret;                                             \
-		ret = vmovq_n_f32(vgetq_lane_f32(a, (imm8) & (0x3)));        \
-		ret = vsetq_lane_f32(vgetq_lane_f32(a, ((imm8) >> 2) & 0x3), \
-				     ret, 1);                                \
-		ret = vsetq_lane_f32(vgetq_lane_f32(b, ((imm8) >> 4) & 0x3), \
-				     ret, 2);                                \
-		ret = vsetq_lane_f32(vgetq_lane_f32(b, ((imm8) >> 6) & 0x3), \
-				     ret, 3);                                \
-	})
-#elif defined(SIMDE_SHUFFLE_VECTOR_)
-#define simde_mm_shuffle_ps(a, b, imm8)                                        \
-	(__extension__({                                                       \
-		simde__m128_from_private((simde__m128_private){                \
-			.f32 = SIMDE_SHUFFLE_VECTOR_(                          \
-				32, 16, simde__m128_to_private(a).f32,         \
-				simde__m128_to_private(b).f32, (((imm8)) & 3), \
-				(((imm8) >> 2) & 3), (((imm8) >> 4) & 3) + 4,  \
-				(((imm8) >> 6) & 3) + 4)});                    \
-	}))
-#else
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128 simde_mm_shuffle_ps(simde__m128 a, simde__m128 b, const int imm8)
-	SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255)
-{
-	simde__m128_private r_, a_ = simde__m128_to_private(a),
-				b_ = simde__m128_to_private(b);
-
-	r_.f32[0] = a_.f32[(imm8 >> 0) & 3];
-	r_.f32[1] = a_.f32[(imm8 >> 2) & 3];
-	r_.f32[2] = b_.f32[(imm8 >> 4) & 3];
-	r_.f32[3] = b_.f32[(imm8 >> 6) & 3];
-
-	return simde__m128_from_private(r_);
-}
-#endif
-#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
-#define _mm_shuffle_ps(a, b, imm8) simde_mm_shuffle_ps((a), (b), imm8)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128 simde_mm_sqrt_ps(simde__m128 a)
-{
-#if defined(SIMDE_X86_SSE_NATIVE)
-	return _mm_sqrt_ps(a);
-#else
-	simde__m128_private r_, a_ = simde__m128_to_private(a);
-
-#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
-	r_.neon_f32 = vsqrtq_f32(a_.neon_f32);
-#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	float32x4_t est = vrsqrteq_f32(a_.neon_f32);
-	for (int i = 0; i <= SIMDE_ACCURACY_PREFERENCE; i++) {
-		est = vmulq_f32(vrsqrtsq_f32(vmulq_f32(a_.neon_f32, est), est),
-				est);
-	}
-	r_.neon_f32 = vmulq_f32(a_.neon_f32, est);
-#elif defined(SIMDE_WASM_SIMD128_NATIVE)
-	r_.wasm_v128 = wasm_f32x4_sqrt(a_.wasm_v128);
-#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
-	r_.altivec_f32 = vec_sqrt(a_.altivec_f32);
-#elif defined(simde_math_sqrt)
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < sizeof(r_.f32) / sizeof(r_.f32[0]); i++) {
-		r_.f32[i] = simde_math_sqrtf(a_.f32[i]);
-	}
-#else
-	HEDLEY_UNREACHABLE();
-#endif
-
-	return simde__m128_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
-#define _mm_sqrt_ps(a) simde_mm_sqrt_ps((a))
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128 simde_mm_sqrt_ss(simde__m128 a)
-{
-#if defined(SIMDE_X86_SSE_NATIVE)
-	return _mm_sqrt_ss(a);
-#elif (SIMDE_NATURAL_VECTOR_SIZE > 0)
-	return simde_mm_move_ss(a, simde_mm_sqrt_ps(a));
-#else
-	simde__m128_private r_, a_ = simde__m128_to_private(a);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	float32_t value = vgetq_lane_f32(
-		simde__m128_to_private(simde_mm_sqrt_ps(a)).neon_f32, 0);
-	r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0);
-#elif defined(simde_math_sqrtf)
-	r_.f32[0] = simde_math_sqrtf(a_.f32[0]);
-	r_.f32[1] = a_.f32[1];
-	r_.f32[2] = a_.f32[2];
-	r_.f32[3] = a_.f32[3];
-#else
-	HEDLEY_UNREACHABLE();
-#endif
-
-	return simde__m128_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
-#define _mm_sqrt_ss(a) simde_mm_sqrt_ss((a))
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-void simde_mm_store_ps(simde_float32 mem_addr[4], simde__m128 a)
-{
-#if defined(SIMDE_X86_SSE_NATIVE)
-	_mm_store_ps(mem_addr, a);
-#else
-	simde__m128_private a_ = simde__m128_to_private(a);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	vst1q_f32(mem_addr, a_.neon_f32);
-#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
-	vec_st(a_.altivec_f32, 0, mem_addr);
-#elif defined(SIMDE_WASM_SIMD128_NATIVE)
-	wasm_v128_store(mem_addr, a_.wasm_v128);
-#else
-	simde_memcpy(mem_addr, &a_, sizeof(a));
-#endif
-#endif
-}
-#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
-#define _mm_store_ps(mem_addr, a)                                      \
-	simde_mm_store_ps(SIMDE_CHECKED_REINTERPRET_CAST(              \
-				  float *, simde_float32 *, mem_addr), \
-			  (a))
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-void simde_mm_store1_ps(simde_float32 mem_addr[4], simde__m128 a)
-{
-	simde_float32 *mem_addr_ =
-		SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128);
-
-#if defined(SIMDE_X86_SSE_NATIVE)
-	_mm_store_ps1(mem_addr_, a);
-#else
-	simde__m128_private a_ = simde__m128_to_private(a);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	vst1q_f32(mem_addr_, vdupq_lane_f32(vget_low_f32(a_.neon_f32), 0));
-#elif defined(SIMDE_WASM_SIMD128_NATIVE)
-	wasm_v128_store(mem_addr_,
-			wasm_v32x4_shuffle(a_.wasm_v128, a_.wasm_v128, 0, 0, 0,
-					   0));
-#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
-	vec_st(vec_splat(a_.altivec_f32, 0), 0, mem_addr_);
-#elif defined(SIMDE_SHUFFLE_VECTOR_)
-	simde__m128_private tmp_;
-	tmp_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 0, 0, 0, 0);
-	simde_mm_store_ps(mem_addr_, tmp_.f32);
-#else
-	SIMDE_VECTORIZE_ALIGNED(mem_addr_ : 16)
-	for (size_t i = 0; i < sizeof(a_.f32) / sizeof(a_.f32[0]); i++) {
-		mem_addr_[i] = a_.f32[0];
-	}
-#endif
-#endif
-}
-#define simde_mm_store_ps1(mem_addr, a) simde_mm_store1_ps(mem_addr, a)
-#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
-#define _mm_store_ps1(mem_addr, a)                                      \
-	simde_mm_store1_ps(SIMDE_CHECKED_REINTERPRET_CAST(              \
-				   float *, simde_float32 *, mem_addr), \
-			   (a))
-#define _mm_store1_ps(mem_addr, a)                                      \
-	simde_mm_store1_ps(SIMDE_CHECKED_REINTERPRET_CAST(              \
-				   float *, simde_float32 *, mem_addr), \
-			   (a))
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-void simde_mm_store_ss(simde_float32 *mem_addr, simde__m128 a)
-{
-#if defined(SIMDE_X86_SSE_NATIVE)
-	_mm_store_ss(mem_addr, a);
-#else
-	simde__m128_private a_ = simde__m128_to_private(a);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	vst1q_lane_f32(mem_addr, a_.neon_f32, 0);
-#else
-	*mem_addr = a_.f32[0];
-#endif
-#endif
-}
-#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
-#define _mm_store_ss(mem_addr, a)                                      \
-	simde_mm_store_ss(SIMDE_CHECKED_REINTERPRET_CAST(              \
-				  float *, simde_float32 *, mem_addr), \
-			  (a))
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-void simde_mm_storeh_pi(simde__m64 *mem_addr, simde__m128 a)
-{
-#if defined(SIMDE_X86_SSE_NATIVE)
-	_mm_storeh_pi(HEDLEY_REINTERPRET_CAST(__m64 *, mem_addr), a);
-#else
-	simde__m128_private a_ = simde__m128_to_private(a);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	vst1_f32(HEDLEY_REINTERPRET_CAST(float32_t *, mem_addr),
-		 vget_high_f32(a_.neon_f32));
-#else
-	simde_memcpy(mem_addr, &(a_.m64[1]), sizeof(a_.m64[1]));
-#endif
-#endif
-}
-#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
-#define _mm_storeh_pi(mem_addr, a) simde_mm_storeh_pi(mem_addr, (a))
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-void simde_mm_storel_pi(simde__m64 *mem_addr, simde__m128 a)
-{
-#if defined(SIMDE_X86_SSE_NATIVE)
-	_mm_storel_pi(HEDLEY_REINTERPRET_CAST(__m64 *, mem_addr), a);
-#else
-	simde__m64_private *dest_ =
-		HEDLEY_REINTERPRET_CAST(simde__m64_private *, mem_addr);
-	simde__m128_private a_ = simde__m128_to_private(a);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	dest_->neon_f32 = vget_low_f32(a_.neon_f32);
-#else
-	dest_->f32[0] = a_.f32[0];
-	dest_->f32[1] = a_.f32[1];
-#endif
-#endif
-}
-#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
-#define _mm_storel_pi(mem_addr, a) simde_mm_storel_pi(mem_addr, (a))
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-void simde_mm_storer_ps(simde_float32 mem_addr[4], simde__m128 a)
-{
-#if defined(SIMDE_X86_SSE_NATIVE)
-	_mm_storer_ps(mem_addr, a);
-#else
-	simde__m128_private a_ = simde__m128_to_private(a);
-
-#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
-	vec_st(vec_reve(a_.altivec_f32), 0, mem_addr);
-#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	float32x4_t tmp = vrev64q_f32(a_.neon_f32);
-	vst1q_f32(mem_addr, vextq_f32(tmp, tmp, 2));
-#elif defined(SIMDE_SHUFFLE_VECTOR_)
-	a_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 3, 2, 1, 0);
-	simde_mm_store_ps(mem_addr, simde__m128_from_private(a_));
-#else
-	SIMDE_VECTORIZE_ALIGNED(mem_addr : 16)
-	for (size_t i = 0; i < sizeof(a_.f32) / sizeof(a_.f32[0]); i++) {
-		mem_addr[i] =
-			a_.f32[((sizeof(a_.f32) / sizeof(a_.f32[0])) - 1) - i];
-	}
-#endif
-#endif
-}
-#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
-#define _mm_storer_ps(mem_addr, a)                                      \
-	simde_mm_storer_ps(SIMDE_CHECKED_REINTERPRET_CAST(              \
-				   float *, simde_float32 *, mem_addr), \
-			   (a))
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-void simde_mm_storeu_ps(simde_float32 mem_addr[4], simde__m128 a)
-{
-#if defined(SIMDE_X86_SSE_NATIVE)
-	_mm_storeu_ps(mem_addr, a);
-#else
-	simde__m128_private a_ = simde__m128_to_private(a);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	vst1q_f32(mem_addr, a_.neon_f32);
-#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
-	vec_vsx_st(a_.altivec_f32, 0, mem_addr);
-#else
-	simde_memcpy(mem_addr, &a_, sizeof(a_));
-#endif
-#endif
-}
-#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
-#define _mm_storeu_ps(mem_addr, a)                                      \
-	simde_mm_storeu_ps(SIMDE_CHECKED_REINTERPRET_CAST(              \
-				   float *, simde_float32 *, mem_addr), \
-			   (a))
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128 simde_mm_sub_ps(simde__m128 a, simde__m128 b)
-{
-#if defined(SIMDE_X86_SSE_NATIVE)
-	return _mm_sub_ps(a, b);
-#else
-	simde__m128_private r_, a_ = simde__m128_to_private(a),
-				b_ = simde__m128_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_f32 = vsubq_f32(a_.neon_f32, b_.neon_f32);
-#elif defined(SIMDE_WASM_SIMD128_NATIVE)
-	r_.wasm_v128 = wasm_f32x4_sub(a_.wasm_v128, b_.wasm_v128);
-#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
-	r_.altivec_f32 = vec_sub(a_.altivec_f32, b_.altivec_f32);
-#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
-	r_.f32 = a_.f32 - b_.f32;
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.f32) / sizeof(r_.f32[0])); i++) {
-		r_.f32[i] = a_.f32[i] - b_.f32[i];
-	}
-#endif
-
-	return simde__m128_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
-#define _mm_sub_ps(a, b) simde_mm_sub_ps((a), (b))
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128 simde_mm_sub_ss(simde__m128 a, simde__m128 b)
-{
-#if defined(SIMDE_X86_SSE_NATIVE)
-	return _mm_sub_ss(a, b);
-#elif (SIMDE_NATURAL_VECTOR_SIZE > 0)
-	return simde_mm_move_ss(a, simde_mm_sub_ps(a, b));
-#else
-	simde__m128_private r_, a_ = simde__m128_to_private(a),
-				b_ = simde__m128_to_private(b);
-
-	r_.f32[0] = a_.f32[0] - b_.f32[0];
-	r_.f32[1] = a_.f32[1];
-	r_.f32[2] = a_.f32[2];
-	r_.f32[3] = a_.f32[3];
-
-	return simde__m128_from_private(r_);
-#endif
-}
-
-#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
-#define _mm_sub_ss(a, b) simde_mm_sub_ss((a), (b))
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-int simde_mm_ucomieq_ss(simde__m128 a, simde__m128 b)
-{
-#if defined(SIMDE_X86_SSE_NATIVE)
-	return _mm_ucomieq_ss(a, b);
-#else
-	simde__m128_private a_ = simde__m128_to_private(a),
-			    b_ = simde__m128_to_private(b);
-	int r;
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32);
-	uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32);
-	uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan));
-	uint32x4_t a_eq_b = vceqq_f32(a_.neon_f32, b_.neon_f32);
-	r = !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_eq_b), 0) != 0);
-#elif defined(SIMDE_HAVE_FENV_H)
-	fenv_t envp;
-	int x = feholdexcept(&envp);
-	r = a_.f32[0] == b_.f32[0];
-	if (HEDLEY_LIKELY(x == 0))
-		fesetenv(&envp);
-#else
-	r = a_.f32[0] == b_.f32[0];
-#endif
-
-	return r;
-#endif
-}
-#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
-#define _mm_ucomieq_ss(a, b) simde_mm_ucomieq_ss((a), (b))
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-int simde_mm_ucomige_ss(simde__m128 a, simde__m128 b)
-{
-#if defined(SIMDE_X86_SSE_NATIVE)
-	return _mm_ucomige_ss(a, b);
-#else
-	simde__m128_private a_ = simde__m128_to_private(a),
-			    b_ = simde__m128_to_private(b);
-	int r;
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32);
-	uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32);
-	uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan);
-	uint32x4_t a_ge_b = vcgeq_f32(a_.neon_f32, b_.neon_f32);
-	r = !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_ge_b), 0) != 0);
-#elif defined(SIMDE_HAVE_FENV_H)
-	fenv_t envp;
-	int x = feholdexcept(&envp);
-	r = a_.f32[0] >= b_.f32[0];
-	if (HEDLEY_LIKELY(x == 0))
-		fesetenv(&envp);
-#else
-	r = a_.f32[0] >= b_.f32[0];
-#endif
-
-	return r;
-#endif
-}
-#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
-#define _mm_ucomige_ss(a, b) simde_mm_ucomige_ss((a), (b))
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-int simde_mm_ucomigt_ss(simde__m128 a, simde__m128 b)
-{
-#if defined(SIMDE_X86_SSE_NATIVE)
-	return _mm_ucomigt_ss(a, b);
-#else
-	simde__m128_private a_ = simde__m128_to_private(a),
-			    b_ = simde__m128_to_private(b);
-	int r;
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32);
-	uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32);
-	uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan);
-	uint32x4_t a_gt_b = vcgtq_f32(a_.neon_f32, b_.neon_f32);
-	r = !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_gt_b), 0) != 0);
-#elif defined(SIMDE_HAVE_FENV_H)
-	fenv_t envp;
-	int x = feholdexcept(&envp);
-	r = a_.f32[0] > b_.f32[0];
-	if (HEDLEY_LIKELY(x == 0))
-		fesetenv(&envp);
-#else
-	r = a_.f32[0] > b_.f32[0];
-#endif
-
-	return r;
-#endif
-}
-#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
-#define _mm_ucomigt_ss(a, b) simde_mm_ucomigt_ss((a), (b))
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-int simde_mm_ucomile_ss(simde__m128 a, simde__m128 b)
-{
-#if defined(SIMDE_X86_SSE_NATIVE)
-	return _mm_ucomile_ss(a, b);
-#else
-	simde__m128_private a_ = simde__m128_to_private(a),
-			    b_ = simde__m128_to_private(b);
-	int r;
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32);
-	uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32);
-	uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan));
-	uint32x4_t a_le_b = vcleq_f32(a_.neon_f32, b_.neon_f32);
-	r = !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_le_b), 0) != 0);
-#elif defined(SIMDE_HAVE_FENV_H)
-	fenv_t envp;
-	int x = feholdexcept(&envp);
-	r = a_.f32[0] <= b_.f32[0];
-	if (HEDLEY_LIKELY(x == 0))
-		fesetenv(&envp);
-#else
-	r = a_.f32[0] <= b_.f32[0];
-#endif
-
-	return r;
-#endif
-}
-#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
-#define _mm_ucomile_ss(a, b) simde_mm_ucomile_ss((a), (b))
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-int simde_mm_ucomilt_ss(simde__m128 a, simde__m128 b)
-{
-#if defined(SIMDE_X86_SSE_NATIVE)
-	return _mm_ucomilt_ss(a, b);
-#else
-	simde__m128_private a_ = simde__m128_to_private(a),
-			    b_ = simde__m128_to_private(b);
-	int r;
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32);
-	uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32);
-	uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan));
-	uint32x4_t a_lt_b = vcltq_f32(a_.neon_f32, b_.neon_f32);
-	r = !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_lt_b), 0) != 0);
-#elif defined(SIMDE_HAVE_FENV_H)
-	fenv_t envp;
-	int x = feholdexcept(&envp);
-	r = a_.f32[0] < b_.f32[0];
-	if (HEDLEY_LIKELY(x == 0))
-		fesetenv(&envp);
-#else
-	r = a_.f32[0] < b_.f32[0];
-#endif
-
-	return r;
-#endif
-}
-#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
-#define _mm_ucomilt_ss(a, b) simde_mm_ucomilt_ss((a), (b))
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-int simde_mm_ucomineq_ss(simde__m128 a, simde__m128 b)
-{
-#if defined(SIMDE_X86_SSE_NATIVE)
-	return _mm_ucomineq_ss(a, b);
-#else
-	simde__m128_private a_ = simde__m128_to_private(a),
-			    b_ = simde__m128_to_private(b);
-	int r;
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32);
-	uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32);
-	uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan);
-	uint32x4_t a_neq_b = vmvnq_u32(vceqq_f32(a_.neon_f32, b_.neon_f32));
-	r = !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_neq_b), 0) != 0);
-#elif defined(SIMDE_HAVE_FENV_H)
-	fenv_t envp;
-	int x = feholdexcept(&envp);
-	r = a_.f32[0] != b_.f32[0];
-	if (HEDLEY_LIKELY(x == 0))
-		fesetenv(&envp);
-#else
-	r = a_.f32[0] != b_.f32[0];
-#endif
-
-	return r;
-#endif
-}
-#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
-#define _mm_ucomineq_ss(a, b) simde_mm_ucomineq_ss((a), (b))
-#endif
-
-#if defined(SIMDE_X86_SSE_NATIVE)
-#if defined(__has_builtin)
-#if __has_builtin(__builtin_ia32_undef128)
-#define SIMDE_HAVE_UNDEFINED128
-#endif
-#elif !defined(__PGI) && !defined(SIMDE_BUG_GCC_REV_208793) && \
-	!defined(_MSC_VER)
-#define SIMDE_HAVE_UNDEFINED128
-#endif
-#endif
-
-#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_)
-HEDLEY_DIAGNOSTIC_PUSH
-SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128 simde_mm_unpackhi_ps(simde__m128 a, simde__m128 b)
-{
-#if defined(SIMDE_X86_SSE_NATIVE)
-	return _mm_unpackhi_ps(a, b);
-#else
-	simde__m128_private r_, a_ = simde__m128_to_private(a),
-				b_ = simde__m128_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
-	r_.neon_f32 = vzip2q_f32(a_.neon_f32, b_.neon_f32);
-#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	float32x2_t a1 = vget_high_f32(a_.neon_f32);
-	float32x2_t b1 = vget_high_f32(b_.neon_f32);
-	float32x2x2_t result = vzip_f32(a1, b1);
-	r_.neon_f32 = vcombine_f32(result.val[0], result.val[1]);
-#elif defined(SIMDE_SHUFFLE_VECTOR_)
-	r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 2, 6, 3, 7);
-#else
-	r_.f32[0] = a_.f32[2];
-	r_.f32[1] = b_.f32[2];
-	r_.f32[2] = a_.f32[3];
-	r_.f32[3] = b_.f32[3];
-#endif
-
-	return simde__m128_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
-#define _mm_unpackhi_ps(a, b) simde_mm_unpackhi_ps((a), (b))
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128 simde_mm_unpacklo_ps(simde__m128 a, simde__m128 b)
-{
-#if defined(SIMDE_X86_SSE_NATIVE)
-	return _mm_unpacklo_ps(a, b);
-#else
-	simde__m128_private r_, a_ = simde__m128_to_private(a),
-				b_ = simde__m128_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
-	r_.neon_f32 = vzip1q_f32(a_.neon_f32, b_.neon_f32);
-#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
-	r_.altivec_f32 = vec_mergeh(a_.altivec_f32, b_.altivec_f32);
-#elif defined(SIMDE_SHUFFLE_VECTOR_)
-	r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 0, 4, 1, 5);
-#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	float32x2_t a1 = vget_low_f32(a_.neon_f32);
-	float32x2_t b1 = vget_low_f32(b_.neon_f32);
-	float32x2x2_t result = vzip_f32(a1, b1);
-	r_.neon_f32 = vcombine_f32(result.val[0], result.val[1]);
-#else
-	r_.f32[0] = a_.f32[0];
-	r_.f32[1] = b_.f32[0];
-	r_.f32[2] = a_.f32[1];
-	r_.f32[3] = b_.f32[1];
-#endif
-
-	return simde__m128_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
-#define _mm_unpacklo_ps(a, b) simde_mm_unpacklo_ps((a), (b))
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-void simde_mm_stream_pi(simde__m64 *mem_addr, simde__m64 a)
-{
-#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)
-	_mm_stream_pi(HEDLEY_REINTERPRET_CAST(__m64 *, mem_addr), a);
-#else
-	simde__m64_private *dest = HEDLEY_REINTERPRET_CAST(simde__m64_private *,
-							   mem_addr),
-			   a_ = simde__m64_to_private(a);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	dest->i64[0] = vget_lane_s64(a_.neon_i64, 0);
-#else
-	dest->i64[0] = a_.i64[0];
-#endif
-#endif
-}
-#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
-#define _mm_stream_pi(mem_addr, a) simde_mm_stream_pi(mem_addr, (a))
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-void simde_mm_stream_ps(simde_float32 mem_addr[4], simde__m128 a)
-{
-#if defined(SIMDE_X86_SSE_NATIVE)
-	_mm_stream_ps(mem_addr, a);
-#elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) && \
-	defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
-	simde__m128_private a_ = simde__m128_to_private(a);
-	__builtin_nontemporal_store(
-		a_.f32, SIMDE_ALIGN_CAST(__typeof__(a_.f32) *, mem_addr));
-#else
-	simde_mm_store_ps(mem_addr, a);
-#endif
-}
-#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
-#define _mm_stream_ps(mem_addr, a)                                      \
-	simde_mm_stream_ps(SIMDE_CHECKED_REINTERPRET_CAST(              \
-				   float *, simde_float32 *, mem_addr), \
-			   (a))
-#endif
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-#define SIMDE_MM_TRANSPOSE4_PS(row0, row1, row2, row3)            \
-	do {                                                      \
-		float32x4x2_t ROW01 = vtrnq_f32(row0, row1);      \
-		float32x4x2_t ROW23 = vtrnq_f32(row2, row3);      \
-		row0 = vcombine_f32(vget_low_f32(ROW01.val[0]),   \
-				    vget_low_f32(ROW23.val[0]));  \
-		row1 = vcombine_f32(vget_low_f32(ROW01.val[1]),   \
-				    vget_low_f32(ROW23.val[1]));  \
-		row2 = vcombine_f32(vget_high_f32(ROW01.val[0]),  \
-				    vget_high_f32(ROW23.val[0])); \
-		row3 = vcombine_f32(vget_high_f32(ROW01.val[1]),  \
-				    vget_high_f32(ROW23.val[1])); \
-	} while (0)
-#else
-#define SIMDE_MM_TRANSPOSE4_PS(row0, row1, row2, row3)       \
-	do {                                                 \
-		simde__m128 tmp3, tmp2, tmp1, tmp0;          \
-		tmp0 = simde_mm_unpacklo_ps((row0), (row1)); \
-		tmp2 = simde_mm_unpacklo_ps((row2), (row3)); \
-		tmp1 = simde_mm_unpackhi_ps((row0), (row1)); \
-		tmp3 = simde_mm_unpackhi_ps((row2), (row3)); \
-		row0 = simde_mm_movelh_ps(tmp0, tmp2);       \
-		row1 = simde_mm_movehl_ps(tmp2, tmp0);       \
-		row2 = simde_mm_movelh_ps(tmp1, tmp3);       \
-		row3 = simde_mm_movehl_ps(tmp3, tmp1);       \
-	} while (0)
-#endif
-#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
-#define _MM_TRANSPOSE4_PS(row0, row1, row2, row3) \
-	SIMDE_MM_TRANSPOSE4_PS(row0, row1, row2, row3)
-#endif
-
-#if defined(_MM_EXCEPT_INVALID)
-#define SIMDE_MM_EXCEPT_INVALID _MM_EXCEPT_INVALID
-#else
-#define SIMDE_MM_EXCEPT_INVALID (0x0001)
-#endif
-#if defined(_MM_EXCEPT_DENORM)
-#define SIMDE_MM_EXCEPT_DENORM _MM_EXCEPT_DENORM
-#else
-#define SIMDE_MM_EXCEPT_DENORM (0x0002)
-#endif
-#if defined(_MM_EXCEPT_DIV_ZERO)
-#define SIMDE_MM_EXCEPT_DIV_ZERO _MM_EXCEPT_DIV_ZERO
-#else
-#define SIMDE_MM_EXCEPT_DIV_ZERO (0x0004)
-#endif
-#if defined(_MM_EXCEPT_OVERFLOW)
-#define SIMDE_MM_EXCEPT_OVERFLOW _MM_EXCEPT_OVERFLOW
-#else
-#define SIMDE_MM_EXCEPT_OVERFLOW (0x0008)
-#endif
-#if defined(_MM_EXCEPT_UNDERFLOW)
-#define SIMDE_MM_EXCEPT_UNDERFLOW _MM_EXCEPT_UNDERFLOW
-#else
-#define SIMDE_MM_EXCEPT_UNDERFLOW (0x0010)
-#endif
-#if defined(_MM_EXCEPT_INEXACT)
-#define SIMDE_MM_EXCEPT_INEXACT _MM_EXCEPT_INEXACT
-#else
-#define SIMDE_MM_EXCEPT_INEXACT (0x0020)
-#endif
-#if defined(_MM_EXCEPT_MASK)
-#define SIMDE_MM_EXCEPT_MASK _MM_EXCEPT_MASK
-#else
-#define SIMDE_MM_EXCEPT_MASK                                   \
-	(SIMDE_MM_EXCEPT_INVALID | SIMDE_MM_EXCEPT_DENORM |    \
-	 SIMDE_MM_EXCEPT_DIV_ZERO | SIMDE_MM_EXCEPT_OVERFLOW | \
-	 SIMDE_MM_EXCEPT_UNDERFLOW | SIMDE_MM_EXCEPT_INEXACT)
-#endif
-
-#if defined(_MM_MASK_INVALID)
-#define SIMDE_MM_MASK_INVALID _MM_MASK_INVALID
-#else
-#define SIMDE_MM_MASK_INVALID (0x0080)
-#endif
-#if defined(_MM_MASK_DENORM)
-#define SIMDE_MM_MASK_DENORM _MM_MASK_DENORM
-#else
-#define SIMDE_MM_MASK_DENORM (0x0100)
-#endif
-#if defined(_MM_MASK_DIV_ZERO)
-#define SIMDE_MM_MASK_DIV_ZERO _MM_MASK_DIV_ZERO
-#else
-#define SIMDE_MM_MASK_DIV_ZERO (0x0200)
-#endif
-#if defined(_MM_MASK_OVERFLOW)
-#define SIMDE_MM_MASK_OVERFLOW _MM_MASK_OVERFLOW
-#else
-#define SIMDE_MM_MASK_OVERFLOW (0x0400)
-#endif
-#if defined(_MM_MASK_UNDERFLOW)
-#define SIMDE_MM_MASK_UNDERFLOW _MM_MASK_UNDERFLOW
-#else
-#define SIMDE_MM_MASK_UNDERFLOW (0x0800)
-#endif
-#if defined(_MM_MASK_INEXACT)
-#define SIMDE_MM_MASK_INEXACT _MM_MASK_INEXACT
-#else
-#define SIMDE_MM_MASK_INEXACT (0x1000)
-#endif
-#if defined(_MM_MASK_MASK)
-#define SIMDE_MM_MASK_MASK _MM_MASK_MASK
-#else
-#define SIMDE_MM_MASK_MASK                                 \
-	(SIMDE_MM_MASK_INVALID | SIMDE_MM_MASK_DENORM |    \
-	 SIMDE_MM_MASK_DIV_ZERO | SIMDE_MM_MASK_OVERFLOW | \
-	 SIMDE_MM_MASK_UNDERFLOW | SIMDE_MM_MASK_INEXACT)
-#endif
-
-#if defined(_MM_FLUSH_ZERO_MASK)
-#define SIMDE_MM_FLUSH_ZERO_MASK _MM_FLUSH_ZERO_MASK
-#else
-#define SIMDE_MM_FLUSH_ZERO_MASK (0x8000)
-#endif
-#if defined(_MM_FLUSH_ZERO_ON)
-#define SIMDE_MM_FLUSH_ZERO_ON _MM_FLUSH_ZERO_ON
-#else
-#define SIMDE_MM_FLUSH_ZERO_ON (0x8000)
-#endif
-#if defined(_MM_FLUSH_ZERO_OFF)
-#define SIMDE_MM_FLUSH_ZERO_OFF _MM_FLUSH_ZERO_OFF
-#else
-#define SIMDE_MM_FLUSH_ZERO_OFF (0x0000)
-#endif
-
-SIMDE_END_DECLS_
-
-HEDLEY_DIAGNOSTIC_POP
-
-#endif /* !defined(SIMDE_X86_SSE_H) */

+ 0 - 7549
libobs/util/simde/x86/sse2.h

@@ -1,7549 +0,0 @@
-/* SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use, copy,
- * modify, merge, publish, distribute, sublicense, and/or sell copies
- * of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- * Copyright:
- *   2017-2020 Evan Nemerson <[email protected]>
- *   2015-2017 John W. Ratcliff <[email protected]>
- *   2015      Brandon Rowlett <[email protected]>
- *   2015      Ken Fast <[email protected]>
- *   2017      Hasindu Gamaarachchi <[email protected]>
- *   2018      Jeff Daily <[email protected]>
- */
-
-#if !defined(SIMDE_X86_SSE2_H)
-#define SIMDE_X86_SSE2_H
-
-#include "sse.h"
-
-HEDLEY_DIAGNOSTIC_PUSH
-SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
-SIMDE_BEGIN_DECLS_
-
-typedef union {
-#if defined(SIMDE_VECTOR_SUBSCRIPT)
-	SIMDE_ALIGN_TO_16 int8_t i8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
-	SIMDE_ALIGN_TO_16 int16_t i16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
-	SIMDE_ALIGN_TO_16 int32_t i32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
-	SIMDE_ALIGN_TO_16 int64_t i64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
-	SIMDE_ALIGN_TO_16 uint8_t u8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
-	SIMDE_ALIGN_TO_16 uint16_t u16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
-	SIMDE_ALIGN_TO_16 uint32_t u32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
-	SIMDE_ALIGN_TO_16 uint64_t u64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
-#if defined(SIMDE_HAVE_INT128_)
-	SIMDE_ALIGN_TO_16 simde_int128 i128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
-	SIMDE_ALIGN_TO_16 simde_uint128 u128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
-#endif
-	SIMDE_ALIGN_TO_16 simde_float32 f32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
-	SIMDE_ALIGN_TO_16 simde_float64 f64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
-
-	SIMDE_ALIGN_TO_16 int_fast32_t i32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
-	SIMDE_ALIGN_TO_16 uint_fast32_t u32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
-#else
-	SIMDE_ALIGN_TO_16 int8_t i8[16];
-	SIMDE_ALIGN_TO_16 int16_t i16[8];
-	SIMDE_ALIGN_TO_16 int32_t i32[4];
-	SIMDE_ALIGN_TO_16 int64_t i64[2];
-	SIMDE_ALIGN_TO_16 uint8_t u8[16];
-	SIMDE_ALIGN_TO_16 uint16_t u16[8];
-	SIMDE_ALIGN_TO_16 uint32_t u32[4];
-	SIMDE_ALIGN_TO_16 uint64_t u64[2];
-#if defined(SIMDE_HAVE_INT128_)
-	SIMDE_ALIGN_TO_16 simde_int128 i128[1];
-	SIMDE_ALIGN_TO_16 simde_uint128 u128[1];
-#endif
-	SIMDE_ALIGN_TO_16 simde_float32 f32[4];
-	SIMDE_ALIGN_TO_16 simde_float64 f64[2];
-
-	SIMDE_ALIGN_TO_16 int_fast32_t i32f[16 / sizeof(int_fast32_t)];
-	SIMDE_ALIGN_TO_16 uint_fast32_t u32f[16 / sizeof(uint_fast32_t)];
-#endif
-
-	SIMDE_ALIGN_TO_16 simde__m64_private m64_private[2];
-	SIMDE_ALIGN_TO_16 simde__m64 m64[2];
-
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	SIMDE_ALIGN_TO_16 __m128i n;
-#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	SIMDE_ALIGN_TO_16 int8x16_t neon_i8;
-	SIMDE_ALIGN_TO_16 int16x8_t neon_i16;
-	SIMDE_ALIGN_TO_16 int32x4_t neon_i32;
-	SIMDE_ALIGN_TO_16 int64x2_t neon_i64;
-	SIMDE_ALIGN_TO_16 uint8x16_t neon_u8;
-	SIMDE_ALIGN_TO_16 uint16x8_t neon_u16;
-	SIMDE_ALIGN_TO_16 uint32x4_t neon_u32;
-	SIMDE_ALIGN_TO_16 uint64x2_t neon_u64;
-	SIMDE_ALIGN_TO_16 float32x4_t neon_f32;
-#if defined(SIMDE_ARCH_AARCH64)
-	SIMDE_ALIGN_TO_16 float64x2_t neon_f64;
-#endif
-#elif defined(SIMDE_WASM_SIMD128_NATIVE)
-	SIMDE_ALIGN_TO_16 v128_t wasm_v128;
-#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
-	SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8;
-	SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16;
-	SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32;
-#if defined(__UINT_FAST32_TYPE__) && defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
-	SIMDE_ALIGN_TO_16
-	SIMDE_POWER_ALTIVEC_VECTOR(__INT_FAST32_TYPE__) altivec_i32f;
-#else
-	SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32f;
-#endif
-	SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8;
-	SIMDE_ALIGN_TO_16
-	SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16;
-	SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32;
-#if defined(__UINT_FAST32_TYPE__) && defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
-	SIMDE_ALIGN_TO_16
-	SIMDE_POWER_ALTIVEC_VECTOR(__UINT_FAST32_TYPE__) altivec_u32f;
-#else
-	SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32f;
-#endif
-	SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32;
-#if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
-	SIMDE_ALIGN_TO_16
-	SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64;
-	SIMDE_ALIGN_TO_16
-	SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64;
-	SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64;
-#endif
-#endif
-} simde__m128i_private;
-
-typedef union {
-#if defined(SIMDE_VECTOR_SUBSCRIPT)
-	SIMDE_ALIGN_TO_16 int8_t i8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
-	SIMDE_ALIGN_TO_16 int16_t i16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
-	SIMDE_ALIGN_TO_16 int32_t i32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
-	SIMDE_ALIGN_TO_16 int64_t i64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
-	SIMDE_ALIGN_TO_16 uint8_t u8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
-	SIMDE_ALIGN_TO_16 uint16_t u16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
-	SIMDE_ALIGN_TO_16 uint32_t u32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
-	SIMDE_ALIGN_TO_16 uint64_t u64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
-	SIMDE_ALIGN_TO_16 simde_float32 f32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
-	SIMDE_ALIGN_TO_16 simde_float64 f64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
-	SIMDE_ALIGN_TO_16 int_fast32_t i32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
-	SIMDE_ALIGN_TO_16 uint_fast32_t u32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
-#else
-	SIMDE_ALIGN_TO_16 int8_t i8[16];
-	SIMDE_ALIGN_TO_16 int16_t i16[8];
-	SIMDE_ALIGN_TO_16 int32_t i32[4];
-	SIMDE_ALIGN_TO_16 int64_t i64[2];
-	SIMDE_ALIGN_TO_16 uint8_t u8[16];
-	SIMDE_ALIGN_TO_16 uint16_t u16[8];
-	SIMDE_ALIGN_TO_16 uint32_t u32[4];
-	SIMDE_ALIGN_TO_16 uint64_t u64[2];
-	SIMDE_ALIGN_TO_16 simde_float32 f32[4];
-	SIMDE_ALIGN_TO_16 simde_float64 f64[2];
-	SIMDE_ALIGN_TO_16 int_fast32_t i32f[16 / sizeof(int_fast32_t)];
-	SIMDE_ALIGN_TO_16 uint_fast32_t u32f[16 / sizeof(uint_fast32_t)];
-#endif
-
-	SIMDE_ALIGN_TO_16 simde__m64_private m64_private[2];
-	SIMDE_ALIGN_TO_16 simde__m64 m64[2];
-
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	SIMDE_ALIGN_TO_16 __m128d n;
-#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	SIMDE_ALIGN_TO_16 int8x16_t neon_i8;
-	SIMDE_ALIGN_TO_16 int16x8_t neon_i16;
-	SIMDE_ALIGN_TO_16 int32x4_t neon_i32;
-	SIMDE_ALIGN_TO_16 int64x2_t neon_i64;
-	SIMDE_ALIGN_TO_16 uint8x16_t neon_u8;
-	SIMDE_ALIGN_TO_16 uint16x8_t neon_u16;
-	SIMDE_ALIGN_TO_16 uint32x4_t neon_u32;
-	SIMDE_ALIGN_TO_16 uint64x2_t neon_u64;
-	SIMDE_ALIGN_TO_16 float32x4_t neon_f32;
-#if defined(SIMDE_ARCH_AARCH64)
-	SIMDE_ALIGN_TO_16 float64x2_t neon_f64;
-#endif
-#elif defined(SIMDE_WASM_SIMD128_NATIVE)
-	SIMDE_ALIGN_TO_16 v128_t wasm_v128;
-#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
-	SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8;
-	SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16;
-	SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32;
-#if defined(__INT_FAST32_TYPE__) && defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
-	SIMDE_ALIGN_TO_16
-	SIMDE_POWER_ALTIVEC_VECTOR(__INT_FAST32_TYPE__) altivec_i32f;
-#else
-	SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32f;
-#endif
-	SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8;
-	SIMDE_ALIGN_TO_16
-	SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16;
-	SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32;
-#if defined(__UINT_FAST32_TYPE__) && defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
-	SIMDE_ALIGN_TO_16
-	SIMDE_POWER_ALTIVEC_VECTOR(__UINT_FAST32_TYPE__) altivec_u32f;
-#else
-	SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32f;
-#endif
-	SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32;
-#if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
-	SIMDE_ALIGN_TO_16
-	SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64;
-	SIMDE_ALIGN_TO_16
-	SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64;
-	SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64;
-#endif
-#endif
-} simde__m128d_private;
-
-#if defined(SIMDE_X86_SSE2_NATIVE)
-typedef __m128i simde__m128i;
-typedef __m128d simde__m128d;
-#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-typedef int64x2_t simde__m128i;
-#if defined(SIMDE_ARCH_AARCH64)
-typedef float64x2_t simde__m128d;
-#elif defined(SIMDE_VECTOR_SUBSCRIPT)
-typedef simde_float64 simde__m128d SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
-#else
-typedef simde__m128d_private simde__m128d;
-#endif
-#elif defined(SIMDE_WASM_SIMD128_NATIVE)
-typedef v128_t simde__m128i;
-typedef v128_t simde__m128d;
-#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
-typedef SIMDE_POWER_ALTIVEC_VECTOR(float) simde__m128i;
-#if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
-typedef SIMDE_POWER_ALTIVEC_VECTOR(double) simde__m128d;
-#else
-typedef simde__m128d_private simde__m128d;
-#endif
-#elif defined(SIMDE_VECTOR_SUBSCRIPT)
-typedef int64_t simde__m128i SIMDE_ALIGN_TO_16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
-typedef simde_float64
-	simde__m128d SIMDE_ALIGN_TO_16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
-#else
-typedef simde__m128i_private simde__m128i;
-typedef simde__m128d_private simde__m128d;
-#endif
-
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-typedef simde__m128i __m128i;
-typedef simde__m128d __m128d;
-#endif
-
-HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128i), "simde__m128i size incorrect");
-HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128i_private),
-		     "simde__m128i_private size incorrect");
-HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128d), "simde__m128d size incorrect");
-HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128d_private),
-		     "simde__m128d_private size incorrect");
-#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF)
-HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128i) == 16,
-		     "simde__m128i is not 16-byte aligned");
-HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128i_private) == 16,
-		     "simde__m128i_private is not 16-byte aligned");
-HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128d) == 16,
-		     "simde__m128d is not 16-byte aligned");
-HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128d_private) == 16,
-		     "simde__m128d_private is not 16-byte aligned");
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128i simde__m128i_from_private(simde__m128i_private v)
-{
-	simde__m128i r;
-	simde_memcpy(&r, &v, sizeof(r));
-	return r;
-}
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128i_private simde__m128i_to_private(simde__m128i v)
-{
-	simde__m128i_private r;
-	simde_memcpy(&r, &v, sizeof(r));
-	return r;
-}
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128d simde__m128d_from_private(simde__m128d_private v)
-{
-	simde__m128d r;
-	simde_memcpy(&r, &v, sizeof(r));
-	return r;
-}
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128d_private simde__m128d_to_private(simde__m128d v)
-{
-	simde__m128d_private r;
-	simde_memcpy(&r, &v, sizeof(r));
-	return r;
-}
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int8x16_t, neon, i8)
-SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int16x8_t, neon, i16)
-SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int32x4_t, neon, i32)
-SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int64x2_t, neon, i64)
-SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint8x16_t, neon, u8)
-SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint16x8_t, neon, u16)
-SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint32x4_t, neon, u32)
-SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint64x2_t, neon, u64)
-SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, float32x4_t, neon, f32)
-#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
-SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, float64x2_t, neon, f64)
-#endif
-#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
-SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i,
-				       SIMDE_POWER_ALTIVEC_VECTOR(signed char),
-				       altivec, i8)
-SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i,
-				       SIMDE_POWER_ALTIVEC_VECTOR(signed short),
-				       altivec, i16)
-SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i,
-				       SIMDE_POWER_ALTIVEC_VECTOR(signed int),
-				       altivec, i32)
-SIMDE_X86_GENERATE_CONVERSION_FUNCTION(
-	m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), altivec, u8)
-SIMDE_X86_GENERATE_CONVERSION_FUNCTION(
-	m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), altivec, u16)
-SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i,
-				       SIMDE_POWER_ALTIVEC_VECTOR(unsigned int),
-				       altivec, u32)
-#if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
-SIMDE_X86_GENERATE_CONVERSION_FUNCTION(
-	m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), altivec, u64)
-SIMDE_X86_GENERATE_CONVERSION_FUNCTION(
-	m128i, SIMDE_POWER_ALTIVEC_VECTOR(signed long long), altivec, i64)
-#endif
-#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int8x16_t, neon, i8)
-SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int16x8_t, neon, i16)
-SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int32x4_t, neon, i32)
-SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int64x2_t, neon, i64)
-SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint8x16_t, neon, u8)
-SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint16x8_t, neon, u16)
-SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint32x4_t, neon, u32)
-SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint64x2_t, neon, u64)
-SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, float32x4_t, neon, f32)
-#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
-SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, float64x2_t, neon, f64)
-#endif
-#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
-SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d,
-				       SIMDE_POWER_ALTIVEC_VECTOR(signed char),
-				       altivec, i8)
-SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d,
-				       SIMDE_POWER_ALTIVEC_VECTOR(signed short),
-				       altivec, i16)
-SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d,
-				       SIMDE_POWER_ALTIVEC_VECTOR(signed int),
-				       altivec, i32)
-SIMDE_X86_GENERATE_CONVERSION_FUNCTION(
-	m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), altivec, u8)
-SIMDE_X86_GENERATE_CONVERSION_FUNCTION(
-	m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), altivec, u16)
-SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d,
-				       SIMDE_POWER_ALTIVEC_VECTOR(unsigned int),
-				       altivec, u32)
-#if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
-SIMDE_X86_GENERATE_CONVERSION_FUNCTION(
-	m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), altivec, u64)
-SIMDE_X86_GENERATE_CONVERSION_FUNCTION(
-	m128d, SIMDE_POWER_ALTIVEC_VECTOR(signed long long), altivec, i64)
-#if defined(SIMDE_BUG_GCC_95782)
-SIMDE_FUNCTION_ATTRIBUTES
-SIMDE_POWER_ALTIVEC_VECTOR(double)
-simde__m128d_to_altivec_f64(simde__m128d value)
-{
-	simde__m128d_private r_ = simde__m128d_to_private(value);
-	return r_.altivec_f64;
-}
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128d simde__m128d_from_altivec_f64(SIMDE_POWER_ALTIVEC_VECTOR(double)
-						   value)
-{
-	simde__m128d_private r_;
-	r_.altivec_f64 = value;
-	return simde__m128d_from_private(r_);
-}
-#else
-SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d,
-				       SIMDE_POWER_ALTIVEC_VECTOR(double),
-				       altivec, f64)
-#endif
-#endif
-#elif defined(SIMDE_WASM_SIMD128_NATIVE)
-SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, v128_t, wasm, v128);
-SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, v128_t, wasm, v128);
-#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128d simde_mm_set_pd(simde_float64 e1, simde_float64 e0)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_set_pd(e1, e0);
-#else
-	simde__m128d_private r_;
-
-#if defined(SIMDE_WASM_SIMD128_NATIVE)
-	r_.wasm_v128 = wasm_f64x2_make(e0, e1);
-#elif defined(SIMDE_ARM_NEON_A64V8_NATIVE)
-	SIMDE_ALIGN_TO_16 simde_float64 data[2] = {e0, e1};
-	r_.neon_f64 = vld1q_f64(data);
-#else
-	r_.f64[0] = e0;
-	r_.f64[1] = e1;
-#endif
-
-	return simde__m128d_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_set_pd(e1, e0) simde_mm_set_pd(e1, e0)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128d simde_mm_set1_pd(simde_float64 a)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_set1_pd(a);
-#else
-	simde__m128d_private r_;
-
-#if defined(SIMDE_WASM_SIMD128_NATIVE)
-	r_.wasm_v128 = wasm_f64x2_splat(a);
-#elif defined(SIMDE_ARM_NEON_A64V8_NATIVE)
-	r_.neon_f64 = vdupq_n_f64(a);
-#elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE)
-	r_.altivec_f64 = vec_splats(HEDLEY_STATIC_CAST(double, a));
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.i64) / sizeof(r_.i64[0])); i++) {
-		r_.f64[i] = a;
-	}
-#endif
-
-	return simde__m128d_from_private(r_);
-#endif
-}
-#define simde_mm_set_pd1(a) simde_mm_set1_pd(a)
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_set1_pd(a) simde_mm_set1_pd(a)
-#define _mm_set_pd1(a) simde_mm_set1_pd(a)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128d simde_x_mm_abs_pd(simde__m128d a)
-{
-#if defined(SIMDE_X86_AVX512F_NATIVE) && \
-	(!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7, 4, 0))
-	return _mm512_castpd512_pd128(_mm512_abs_pd(_mm512_castpd128_pd512(a)));
-#else
-	simde__m128d_private r_, a_ = simde__m128d_to_private(a);
-
-#if defined(SIMDE_ARM_NEON_A32V8_NATIVE)
-	r_.neon_f32 = vabsq_f32(a_.neon_f32);
-#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
-	r_.altivec_f32 = vec_abs(a_.altivec_f32);
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.f64) / sizeof(r_.f64[0])); i++) {
-		r_.f64[i] = simde_math_fabs(a_.f64[i]);
-	}
-#endif
-
-	return simde__m128d_from_private(r_);
-#endif
-}
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128d simde_x_mm_not_pd(simde__m128d a)
-{
-#if defined(SIMDE_X86_AVX512VL_NATIVE)
-	__m128i ai = _mm_castpd_si128(a);
-	return _mm_castsi128_pd(_mm_ternarylogic_epi64(ai, ai, ai, 0x55));
-#else
-	simde__m128d_private r_, a_ = simde__m128d_to_private(a);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_i32 = vmvnq_s32(a_.neon_i32);
-#elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE)
-	r_.altivec_f64 = vec_nor(a_.altivec_f64, a_.altivec_f64);
-#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
-	r_.altivec_i32 = vec_nor(a_.altivec_i32, a_.altivec_i32);
-#elif defined(SIMDE_WASM_SIMD128_NATIVE)
-	r_.wasm_v128 = wasm_v128_not(a_.wasm_v128);
-#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
-	r_.i32f = ~a_.i32f;
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])); i++) {
-		r_.i32f[i] = ~(a_.i32f[i]);
-	}
-#endif
-
-	return simde__m128d_from_private(r_);
-#endif
-}
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128d simde_x_mm_select_pd(simde__m128d a, simde__m128d b,
-				  simde__m128d mask)
-{
-/* This function is for when you want to blend two elements together
-   * according to a mask.  It is similar to _mm_blendv_pd, except that
-   * it is undefined whether the blend is based on the highest bit in
-   * each lane (like blendv) or just bitwise operations.  This allows
-   * us to implement the function efficiently everywhere.
-   *
-   * Basically, you promise that all the lanes in mask are either 0 or
-   * ~0. */
-#if defined(SIMDE_X86_SSE4_1_NATIVE)
-	return _mm_blendv_pd(a, b, mask);
-#else
-	simde__m128d_private r_, a_ = simde__m128d_to_private(a),
-				 b_ = simde__m128d_to_private(b),
-				 mask_ = simde__m128d_to_private(mask);
-
-#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
-	r_.i64 = a_.i64 ^ ((a_.i64 ^ b_.i64) & mask_.i64);
-#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_i64 = vbslq_s64(mask_.neon_u64, b_.neon_i64, a_.neon_i64);
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.i64) / sizeof(r_.i64[0])); i++) {
-		r_.i64[i] = a_.i64[i] ^
-			    ((a_.i64[i] ^ b_.i64[i]) & mask_.i64[i]);
-	}
-#endif
-
-	return simde__m128d_from_private(r_);
-#endif
-}
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128i simde_mm_add_epi8(simde__m128i a, simde__m128i b)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_add_epi8(a, b);
-#else
-	simde__m128i_private r_, a_ = simde__m128i_to_private(a),
-				 b_ = simde__m128i_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_i8 = vaddq_s8(a_.neon_i8, b_.neon_i8);
-#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
-	r_.altivec_i8 = vec_add(a_.altivec_i8, b_.altivec_i8);
-#elif defined(SIMDE_WASM_SIMD128_NATIVE)
-	r_.wasm_v128 = wasm_i8x16_add(a_.wasm_v128, b_.wasm_v128);
-#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
-	r_.i8 = a_.i8 + b_.i8;
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.i8) / sizeof(r_.i8[0])); i++) {
-		r_.i8[i] = a_.i8[i] + b_.i8[i];
-	}
-#endif
-
-	return simde__m128i_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_add_epi8(a, b) simde_mm_add_epi8(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128i simde_mm_add_epi16(simde__m128i a, simde__m128i b)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_add_epi16(a, b);
-#else
-	simde__m128i_private r_, a_ = simde__m128i_to_private(a),
-				 b_ = simde__m128i_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_i16 = vaddq_s16(a_.neon_i16, b_.neon_i16);
-#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
-	r_.altivec_i16 = vec_add(a_.altivec_i16, b_.altivec_i16);
-#elif defined(SIMDE_WASM_SIMD128_NATIVE)
-	r_.wasm_v128 = wasm_i16x8_add(a_.wasm_v128, b_.wasm_v128);
-#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
-	r_.i16 = a_.i16 + b_.i16;
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) {
-		r_.i16[i] = a_.i16[i] + b_.i16[i];
-	}
-#endif
-
-	return simde__m128i_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_add_epi16(a, b) simde_mm_add_epi16(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128i simde_mm_add_epi32(simde__m128i a, simde__m128i b)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_add_epi32(a, b);
-#else
-	simde__m128i_private r_, a_ = simde__m128i_to_private(a),
-				 b_ = simde__m128i_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_i32 = vaddq_s32(a_.neon_i32, b_.neon_i32);
-#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
-	r_.altivec_i32 = vec_add(a_.altivec_i32, b_.altivec_i32);
-#elif defined(SIMDE_WASM_SIMD128_NATIVE)
-	r_.wasm_v128 = wasm_i32x4_add(a_.wasm_v128, b_.wasm_v128);
-#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
-	r_.i32 = a_.i32 + b_.i32;
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])); i++) {
-		r_.i32[i] = a_.i32[i] + b_.i32[i];
-	}
-#endif
-
-	return simde__m128i_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_add_epi32(a, b) simde_mm_add_epi32(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128i simde_mm_add_epi64(simde__m128i a, simde__m128i b)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_add_epi64(a, b);
-#else
-	simde__m128i_private r_, a_ = simde__m128i_to_private(a),
-				 b_ = simde__m128i_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_i64 = vaddq_s64(a_.neon_i64, b_.neon_i64);
-#elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE)
-	r_.altivec_i64 = vec_add(a_.altivec_i64, b_.altivec_i64);
-#elif defined(SIMDE_WASM_SIMD128_NATIVE)
-	r_.wasm_v128 = wasm_i64x2_add(a_.wasm_v128, b_.wasm_v128);
-#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
-	r_.i64 = a_.i64 + b_.i64;
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.i64) / sizeof(r_.i64[0])); i++) {
-		r_.i64[i] = a_.i64[i] + b_.i64[i];
-	}
-#endif
-
-	return simde__m128i_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_add_epi64(a, b) simde_mm_add_epi64(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128d simde_mm_add_pd(simde__m128d a, simde__m128d b)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_add_pd(a, b);
-#else
-	simde__m128d_private r_, a_ = simde__m128d_to_private(a),
-				 b_ = simde__m128d_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
-	r_.neon_f64 = vaddq_f64(a_.neon_f64, b_.neon_f64);
-#elif defined(SIMDE_WASM_SIMD128_NATIVE)
-	r_.wasm_v128 = wasm_f64x2_add(a_.wasm_v128, b_.wasm_v128);
-#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
-	r_.altivec_f64 = vec_add(a_.altivec_f64, b_.altivec_f64);
-#elif defined(SIMDE_WASM_SIMD128_NATIVE)
-	r_.wasm_v128 = wasm_f64x2_add(a_.wasm_v128, b_.wasm_v128);
-#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
-	r_.f64 = a_.f64 + b_.f64;
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.f64) / sizeof(r_.f64[0])); i++) {
-		r_.f64[i] = a_.f64[i] + b_.f64[i];
-	}
-#endif
-
-	return simde__m128d_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_add_pd(a, b) simde_mm_add_pd(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128d simde_mm_move_sd(simde__m128d a, simde__m128d b)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_move_sd(a, b);
-#else
-	simde__m128d_private r_, a_ = simde__m128d_to_private(a),
-				 b_ = simde__m128d_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
-	r_.neon_f64 =
-		vsetq_lane_f64(vgetq_lane_f64(b_.neon_f64, 0), a_.neon_f64, 0);
-#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
-#if defined(HEDLEY_IBM_VERSION)
-	r_.altivec_f64 = vec_xxpermdi(a_.altivec_f64, b_.altivec_f64, 1);
-#else
-	r_.altivec_f64 = vec_xxpermdi(b_.altivec_f64, a_.altivec_f64, 1);
-#endif
-#elif defined(SIMDE_WASM_SIMD128_NATIVE)
-	r_.wasm_v128 = wasm_v64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 2, 1);
-#elif defined(SIMDE_SHUFFLE_VECTOR_)
-	r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 2, 1);
-#else
-	r_.f64[0] = b_.f64[0];
-	r_.f64[1] = a_.f64[1];
-#endif
-
-	return simde__m128d_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_move_sd(a, b) simde_mm_move_sd(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128d simde_mm_add_sd(simde__m128d a, simde__m128d b)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_add_sd(a, b);
-#elif (SIMDE_NATURAL_VECTOR_SIZE > 0)
-	return simde_mm_move_sd(a, simde_mm_add_pd(a, b));
-#else
-	simde__m128d_private r_, a_ = simde__m128d_to_private(a),
-				 b_ = simde__m128d_to_private(b);
-
-	r_.f64[0] = a_.f64[0] + b_.f64[0];
-	r_.f64[1] = a_.f64[1];
-
-	return simde__m128d_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_add_sd(a, b) simde_mm_add_sd(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m64 simde_mm_add_si64(simde__m64 a, simde__m64 b)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)
-	return _mm_add_si64(a, b);
-#else
-	simde__m64_private r_, a_ = simde__m64_to_private(a),
-			       b_ = simde__m64_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_i64 = vadd_s64(a_.neon_i64, b_.neon_i64);
-#else
-	r_.i64[0] = a_.i64[0] + b_.i64[0];
-#endif
-
-	return simde__m64_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_add_si64(a, b) simde_mm_add_si64(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128i simde_mm_adds_epi8(simde__m128i a, simde__m128i b)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_adds_epi8(a, b);
-#else
-	simde__m128i_private r_, a_ = simde__m128i_to_private(a),
-				 b_ = simde__m128i_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_i8 = vqaddq_s8(a_.neon_i8, b_.neon_i8);
-#elif defined(SIMDE_WASM_SIMD128_NATIVE)
-	r_.wasm_v128 = wasm_i8x16_add_saturate(a_.wasm_v128, b_.wasm_v128);
-#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
-	r_.altivec_i8 = vec_adds(a_.altivec_i8, b_.altivec_i8);
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.i8) / sizeof(r_.i8[0])); i++) {
-		const int_fast16_t tmp =
-			HEDLEY_STATIC_CAST(int_fast16_t, a_.i8[i]) +
-			HEDLEY_STATIC_CAST(int_fast16_t, b_.i8[i]);
-		r_.i8[i] = HEDLEY_STATIC_CAST(
-			int8_t,
-			((tmp < INT8_MAX) ? ((tmp > INT8_MIN) ? tmp : INT8_MIN)
-					  : INT8_MAX));
-	}
-#endif
-
-	return simde__m128i_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_adds_epi8(a, b) simde_mm_adds_epi8(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128i simde_mm_adds_epi16(simde__m128i a, simde__m128i b)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_adds_epi16(a, b);
-#else
-	simde__m128i_private r_, a_ = simde__m128i_to_private(a),
-				 b_ = simde__m128i_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_i16 = vqaddq_s16(a_.neon_i16, b_.neon_i16);
-#elif defined(SIMDE_WASM_SIMD128_NATIVE)
-	r_.wasm_v128 = wasm_i16x8_add_saturate(a_.wasm_v128, b_.wasm_v128);
-#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
-	r_.altivec_i16 = vec_adds(a_.altivec_i16, b_.altivec_i16);
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) {
-		const int_fast32_t tmp =
-			HEDLEY_STATIC_CAST(int_fast32_t, a_.i16[i]) +
-			HEDLEY_STATIC_CAST(int_fast32_t, b_.i16[i]);
-		r_.i16[i] = HEDLEY_STATIC_CAST(
-			int16_t,
-			((tmp < INT16_MAX)
-				 ? ((tmp > INT16_MIN) ? tmp : INT16_MIN)
-				 : INT16_MAX));
-	}
-#endif
-
-	return simde__m128i_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_adds_epi16(a, b) simde_mm_adds_epi16(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128i simde_mm_adds_epu8(simde__m128i a, simde__m128i b)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_adds_epu8(a, b);
-#else
-	simde__m128i_private r_, a_ = simde__m128i_to_private(a),
-				 b_ = simde__m128i_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_u8 = vqaddq_u8(a_.neon_u8, b_.neon_u8);
-#elif defined(SIMDE_WASM_SIMD128_NATIVE)
-	r_.wasm_v128 = wasm_u8x16_add_saturate(a_.wasm_v128, b_.wasm_v128);
-#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
-	r_.altivec_u8 = vec_adds(a_.altivec_u8, b_.altivec_u8);
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.u8) / sizeof(r_.u8[0])); i++) {
-		r_.u8[i] = ((UINT8_MAX - a_.u8[i]) > b_.u8[i])
-				   ? (a_.u8[i] + b_.u8[i])
-				   : UINT8_MAX;
-	}
-#endif
-
-	return simde__m128i_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_adds_epu8(a, b) simde_mm_adds_epu8(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128i simde_mm_adds_epu16(simde__m128i a, simde__m128i b)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_adds_epu16(a, b);
-#else
-	simde__m128i_private r_, a_ = simde__m128i_to_private(a),
-				 b_ = simde__m128i_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_u16 = vqaddq_u16(a_.neon_u16, b_.neon_u16);
-#elif defined(SIMDE_WASM_SIMD128_NATIVE)
-	r_.wasm_v128 = wasm_u16x8_add_saturate(a_.wasm_v128, b_.wasm_v128);
-#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
-	r_.altivec_u16 = vec_adds(a_.altivec_u16, b_.altivec_u16);
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.u16) / sizeof(r_.u16[0])); i++) {
-		r_.u16[i] = ((UINT16_MAX - a_.u16[i]) > b_.u16[i])
-				    ? (a_.u16[i] + b_.u16[i])
-				    : UINT16_MAX;
-	}
-#endif
-
-	return simde__m128i_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_adds_epu16(a, b) simde_mm_adds_epu16(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128d simde_mm_and_pd(simde__m128d a, simde__m128d b)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_and_pd(a, b);
-#else
-	simde__m128d_private r_, a_ = simde__m128d_to_private(a),
-				 b_ = simde__m128d_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_i32 = vandq_s32(a_.neon_i32, b_.neon_i32);
-#elif defined(SIMDE_WASM_SIMD128_NATIVE)
-	r_.wasm_v128 = wasm_v128_and(a_.wasm_v128, b_.wasm_v128);
-#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
-	r_.altivec_f64 = vec_and(a_.altivec_f64, b_.altivec_f64);
-#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
-	r_.i32f = a_.i32f & b_.i32f;
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])); i++) {
-		r_.i32f[i] = a_.i32f[i] & b_.i32f[i];
-	}
-#endif
-
-	return simde__m128d_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_and_pd(a, b) simde_mm_and_pd(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128i simde_mm_and_si128(simde__m128i a, simde__m128i b)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_and_si128(a, b);
-#else
-	simde__m128i_private r_, a_ = simde__m128i_to_private(a),
-				 b_ = simde__m128i_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_i32 = vandq_s32(b_.neon_i32, a_.neon_i32);
-#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
-	r_.altivec_u32f = vec_and(a_.altivec_u32f, b_.altivec_u32f);
-#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
-	r_.i32f = a_.i32f & b_.i32f;
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])); i++) {
-		r_.i32f[i] = a_.i32f[i] & b_.i32f[i];
-	}
-#endif
-
-	return simde__m128i_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_and_si128(a, b) simde_mm_and_si128(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128d simde_mm_andnot_pd(simde__m128d a, simde__m128d b)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_andnot_pd(a, b);
-#else
-	simde__m128d_private r_, a_ = simde__m128d_to_private(a),
-				 b_ = simde__m128d_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_i32 = vbicq_s32(b_.neon_i32, a_.neon_i32);
-#elif defined(SIMDE_WASM_SIMD128_NATIVE)
-	r_.wasm_v128 = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128);
-#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
-	r_.altivec_f64 = vec_andc(b_.altivec_f64, a_.altivec_f64);
-#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
-	r_.altivec_i32f = vec_andc(b_.altivec_i32f, a_.altivec_i32f);
-#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
-	r_.i32f = ~a_.i32f & b_.i32f;
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.u64) / sizeof(r_.u64[0])); i++) {
-		r_.u64[i] = ~a_.u64[i] & b_.u64[i];
-	}
-#endif
-
-	return simde__m128d_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_andnot_pd(a, b) simde_mm_andnot_pd(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128i simde_mm_andnot_si128(simde__m128i a, simde__m128i b)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_andnot_si128(a, b);
-#else
-	simde__m128i_private r_, a_ = simde__m128i_to_private(a),
-				 b_ = simde__m128i_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_i32 = vbicq_s32(b_.neon_i32, a_.neon_i32);
-#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
-	r_.altivec_i32 = vec_andc(b_.altivec_i32, a_.altivec_i32);
-#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
-	r_.i32f = ~a_.i32f & b_.i32f;
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])); i++) {
-		r_.i32f[i] = ~(a_.i32f[i]) & b_.i32f[i];
-	}
-#endif
-
-	return simde__m128i_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_andnot_si128(a, b) simde_mm_andnot_si128(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128d simde_mm_xor_pd(simde__m128d a, simde__m128d b)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_xor_pd(a, b);
-#else
-	simde__m128d_private r_, a_ = simde__m128d_to_private(a),
-				 b_ = simde__m128d_to_private(b);
-
-#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
-	r_.i32f = a_.i32f ^ b_.i32f;
-#elif defined(SIMDE_WASM_SIMD128_NATIVE)
-	r_.wasm_v128 = wasm_v128_xor(a_.wasm_v128, b_.wasm_v128);
-#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_i64 = veorq_s64(a_.neon_i64, b_.neon_i64);
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])); i++) {
-		r_.i32f[i] = a_.i32f[i] ^ b_.i32f[i];
-	}
-#endif
-
-	return simde__m128d_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_xor_pd(a, b) simde_mm_xor_pd(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128i simde_mm_avg_epu8(simde__m128i a, simde__m128i b)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_avg_epu8(a, b);
-#else
-	simde__m128i_private r_, a_ = simde__m128i_to_private(a),
-				 b_ = simde__m128i_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_u8 = vrhaddq_u8(b_.neon_u8, a_.neon_u8);
-#elif defined(SIMDE_WASM_SIMD128_NATIVE)
-	r_.wasm_v128 = wasm_u8x16_avgr(a_.wasm_v128, b_.wasm_v128);
-#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
-	r_.altivec_u8 = vec_avg(a_.altivec_u8, b_.altivec_u8);
-#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) &&      \
-	defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && \
-	defined(SIMDE_CONVERT_VECTOR_)
-	uint16_t wa SIMDE_VECTOR(32);
-	uint16_t wb SIMDE_VECTOR(32);
-	uint16_t wr SIMDE_VECTOR(32);
-	SIMDE_CONVERT_VECTOR_(wa, a_.u8);
-	SIMDE_CONVERT_VECTOR_(wb, b_.u8);
-	wr = (wa + wb + 1) >> 1;
-	SIMDE_CONVERT_VECTOR_(r_.u8, wr);
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.u8) / sizeof(r_.u8[0])); i++) {
-		r_.u8[i] = (a_.u8[i] + b_.u8[i] + 1) >> 1;
-	}
-#endif
-
-	return simde__m128i_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_avg_epu8(a, b) simde_mm_avg_epu8(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128i simde_mm_avg_epu16(simde__m128i a, simde__m128i b)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_avg_epu16(a, b);
-#else
-	simde__m128i_private r_, a_ = simde__m128i_to_private(a),
-				 b_ = simde__m128i_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_u16 = vrhaddq_u16(b_.neon_u16, a_.neon_u16);
-#elif defined(SIMDE_WASM_SIMD128_NATIVE)
-	r_.wasm_v128 = wasm_u16x8_avgr(a_.wasm_v128, b_.wasm_v128);
-#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
-	r_.altivec_u16 = vec_avg(a_.altivec_u16, b_.altivec_u16);
-#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) &&      \
-	defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && \
-	defined(SIMDE_CONVERT_VECTOR_)
-	uint32_t wa SIMDE_VECTOR(32);
-	uint32_t wb SIMDE_VECTOR(32);
-	uint32_t wr SIMDE_VECTOR(32);
-	SIMDE_CONVERT_VECTOR_(wa, a_.u16);
-	SIMDE_CONVERT_VECTOR_(wb, b_.u16);
-	wr = (wa + wb + 1) >> 1;
-	SIMDE_CONVERT_VECTOR_(r_.u16, wr);
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.u16) / sizeof(r_.u16[0])); i++) {
-		r_.u16[i] = (a_.u16[i] + b_.u16[i] + 1) >> 1;
-	}
-#endif
-
-	return simde__m128i_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_avg_epu16(a, b) simde_mm_avg_epu16(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128i simde_mm_setzero_si128(void)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_setzero_si128();
-#else
-	simde__m128i_private r_;
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_i32 = vdupq_n_s32(0);
-#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
-	r_.altivec_i32 = vec_splats(HEDLEY_STATIC_CAST(signed int, 0));
-#elif defined(SIMDE_WASM_SIMD128_NATIVE)
-	r_.wasm_v128 = wasm_i32x4_splat(INT32_C(0));
-#elif defined(SIMDE_VECTOR_SUBSCRIPT)
-	r_.i32 = __extension__(__typeof__(r_.i32)){0, 0, 0, 0};
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])); i++) {
-		r_.i32f[i] = 0;
-	}
-#endif
-
-	return simde__m128i_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_setzero_si128() (simde_mm_setzero_si128())
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128i simde_mm_bslli_si128(simde__m128i a, const int imm8)
-	SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255)
-{
-	simde__m128i_private r_, a_ = simde__m128i_to_private(a);
-
-	if (HEDLEY_UNLIKELY((imm8 & ~15))) {
-		return simde_mm_setzero_si128();
-	}
-
-#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && defined(SIMDE_ENDIAN_ORDER)
-	r_.altivec_i8 =
-#if (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE)
-		vec_slo
-#else /* SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG */
-		vec_sro
-#endif
-		(a_.altivec_i8,
-		 vec_splats(HEDLEY_STATIC_CAST(unsigned char, imm8 * 8)));
-#elif defined(SIMDE_HAVE_INT128_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE)
-	r_.u128[0] = a_.u128[0] << (imm8 * 8);
-#else
-	r_ = simde__m128i_to_private(simde_mm_setzero_si128());
-	for (int i = imm8;
-	     i < HEDLEY_STATIC_CAST(int, sizeof(r_.i8) / sizeof(r_.i8[0]));
-	     i++) {
-		r_.i8[i] = a_.i8[i - imm8];
-	}
-#endif
-
-	return simde__m128i_from_private(r_);
-}
-#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI)
-#define simde_mm_bslli_si128(a, imm8) _mm_slli_si128(a, imm8)
-#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(__clang__)
-#define simde_mm_bslli_si128(a, imm8)                                      \
-	simde__m128i_from_neon_i8(                                         \
-		((imm8) <= 0)                                              \
-			? simde__m128i_to_neon_i8(a)                       \
-			: (((imm8) > 15)                                   \
-				   ? (vdupq_n_s8(0))                       \
-				   : (vextq_s8(vdupq_n_s8(0),              \
-					       simde__m128i_to_neon_i8(a), \
-					       16 - (imm8)))))
-#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
-#define simde_mm_bslli_si128(a, imm8)                                          \
-	(__extension__({                                                       \
-		const simde__m128i_private simde__tmp_a_ =                     \
-			simde__m128i_to_private(a);                            \
-		const simde__m128i_private simde__tmp_z_ =                     \
-			simde__m128i_to_private(simde_mm_setzero_si128());     \
-		simde__m128i_private simde__tmp_r_;                            \
-		if (HEDLEY_UNLIKELY(imm8 > 15)) {                              \
-			simde__tmp_r_ = simde__m128i_to_private(               \
-				simde_mm_setzero_si128());                     \
-		} else {                                                       \
-			simde__tmp_r_.i8 = SIMDE_SHUFFLE_VECTOR_(              \
-				8, 16, simde__tmp_z_.i8, (simde__tmp_a_).i8,   \
-				HEDLEY_STATIC_CAST(int8_t, (16 - imm8) & 31),  \
-				HEDLEY_STATIC_CAST(int8_t, (17 - imm8) & 31),  \
-				HEDLEY_STATIC_CAST(int8_t, (18 - imm8) & 31),  \
-				HEDLEY_STATIC_CAST(int8_t, (19 - imm8) & 31),  \
-				HEDLEY_STATIC_CAST(int8_t, (20 - imm8) & 31),  \
-				HEDLEY_STATIC_CAST(int8_t, (21 - imm8) & 31),  \
-				HEDLEY_STATIC_CAST(int8_t, (22 - imm8) & 31),  \
-				HEDLEY_STATIC_CAST(int8_t, (23 - imm8) & 31),  \
-				HEDLEY_STATIC_CAST(int8_t, (24 - imm8) & 31),  \
-				HEDLEY_STATIC_CAST(int8_t, (25 - imm8) & 31),  \
-				HEDLEY_STATIC_CAST(int8_t, (26 - imm8) & 31),  \
-				HEDLEY_STATIC_CAST(int8_t, (27 - imm8) & 31),  \
-				HEDLEY_STATIC_CAST(int8_t, (28 - imm8) & 31),  \
-				HEDLEY_STATIC_CAST(int8_t, (29 - imm8) & 31),  \
-				HEDLEY_STATIC_CAST(int8_t, (30 - imm8) & 31),  \
-				HEDLEY_STATIC_CAST(int8_t, (31 - imm8) & 31)); \
-		}                                                              \
-		simde__m128i_from_private(simde__tmp_r_);                      \
-	}))
-#endif
-#define simde_mm_slli_si128(a, imm8) simde_mm_bslli_si128(a, imm8)
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_bslli_si128(a, imm8) simde_mm_bslli_si128(a, imm8)
-#define _mm_slli_si128(a, imm8) simde_mm_bslli_si128(a, imm8)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128i simde_mm_bsrli_si128(simde__m128i a, const int imm8)
-	SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255)
-{
-	simde__m128i_private r_, a_ = simde__m128i_to_private(a);
-
-	if (HEDLEY_UNLIKELY((imm8 & ~15))) {
-		return simde_mm_setzero_si128();
-	}
-
-#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && defined(SIMDE_ENDIAN_ORDER)
-	r_.altivec_i8 =
-#if (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE)
-		vec_sro
-#else /* SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG */
-		vec_slo
-#endif
-		(a_.altivec_i8,
-		 vec_splats(HEDLEY_STATIC_CAST(unsigned char, imm8 * 8)));
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.i8) / sizeof(r_.i8[0])); i++) {
-		const int e = HEDLEY_STATIC_CAST(int, i) + imm8;
-		r_.i8[i] = (e < 16) ? a_.i8[e] : 0;
-	}
-#endif
-
-	return simde__m128i_from_private(r_);
-}
-#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI)
-#define simde_mm_bsrli_si128(a, imm8) _mm_srli_si128(a, imm8)
-#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(__clang__)
-#define simde_mm_bsrli_si128(a, imm8)                                   \
-	simde__m128i_from_neon_i8(                                      \
-		((imm8 < 0) || (imm8 > 15))                             \
-			? vdupq_n_s8(0)                                 \
-			: (vextq_s8(simde__m128i_to_private(a).neon_i8, \
-				    vdupq_n_s8(0),                      \
-				    ((imm8 & 15) != 0) ? imm8 : (imm8 & 15))))
-#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
-#define simde_mm_bsrli_si128(a, imm8)                                          \
-	(__extension__({                                                       \
-		const simde__m128i_private simde__tmp_a_ =                     \
-			simde__m128i_to_private(a);                            \
-		const simde__m128i_private simde__tmp_z_ =                     \
-			simde__m128i_to_private(simde_mm_setzero_si128());     \
-		simde__m128i_private simde__tmp_r_ =                           \
-			simde__m128i_to_private(a);                            \
-		if (HEDLEY_UNLIKELY(imm8 > 15)) {                              \
-			simde__tmp_r_ = simde__m128i_to_private(               \
-				simde_mm_setzero_si128());                     \
-		} else {                                                       \
-			simde__tmp_r_.i8 = SIMDE_SHUFFLE_VECTOR_(              \
-				8, 16, simde__tmp_z_.i8, (simde__tmp_a_).i8,   \
-				HEDLEY_STATIC_CAST(int8_t, (imm8 + 16) & 31),  \
-				HEDLEY_STATIC_CAST(int8_t, (imm8 + 17) & 31),  \
-				HEDLEY_STATIC_CAST(int8_t, (imm8 + 18) & 31),  \
-				HEDLEY_STATIC_CAST(int8_t, (imm8 + 19) & 31),  \
-				HEDLEY_STATIC_CAST(int8_t, (imm8 + 20) & 31),  \
-				HEDLEY_STATIC_CAST(int8_t, (imm8 + 21) & 31),  \
-				HEDLEY_STATIC_CAST(int8_t, (imm8 + 22) & 31),  \
-				HEDLEY_STATIC_CAST(int8_t, (imm8 + 23) & 31),  \
-				HEDLEY_STATIC_CAST(int8_t, (imm8 + 24) & 31),  \
-				HEDLEY_STATIC_CAST(int8_t, (imm8 + 25) & 31),  \
-				HEDLEY_STATIC_CAST(int8_t, (imm8 + 26) & 31),  \
-				HEDLEY_STATIC_CAST(int8_t, (imm8 + 27) & 31),  \
-				HEDLEY_STATIC_CAST(int8_t, (imm8 + 28) & 31),  \
-				HEDLEY_STATIC_CAST(int8_t, (imm8 + 29) & 31),  \
-				HEDLEY_STATIC_CAST(int8_t, (imm8 + 30) & 31),  \
-				HEDLEY_STATIC_CAST(int8_t, (imm8 + 31) & 31)); \
-		}                                                              \
-		simde__m128i_from_private(simde__tmp_r_);                      \
-	}))
-#endif
-#define simde_mm_srli_si128(a, imm8) simde_mm_bsrli_si128((a), (imm8))
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_bsrli_si128(a, imm8) simde_mm_bsrli_si128((a), (imm8))
-#define _mm_srli_si128(a, imm8) simde_mm_bsrli_si128((a), (imm8))
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-void simde_mm_clflush(void const *p)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	_mm_clflush(p);
-#else
-	(void)p;
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_clflush(a, b) simde_mm_clflush()
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-int simde_mm_comieq_sd(simde__m128d a, simde__m128d b)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_comieq_sd(a, b);
-#else
-	simde__m128d_private a_ = simde__m128d_to_private(a),
-			     b_ = simde__m128d_to_private(b);
-#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
-	return !!vgetq_lane_u64(vceqq_f64(a_.neon_f64, b_.neon_f64), 0);
-#elif defined(SIMDE_WASM_SIMD128_NATIVE)
-	return wasm_f64x2_extract_lane(a_.wasm_v128, 0) ==
-	       wasm_f64x2_extract_lane(b_.wasm_v128, 0);
-#else
-	return a_.f64[0] == b_.f64[0];
-#endif
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_comieq_sd(a, b) simde_mm_comieq_sd(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-int simde_mm_comige_sd(simde__m128d a, simde__m128d b)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_comige_sd(a, b);
-#else
-	simde__m128d_private a_ = simde__m128d_to_private(a),
-			     b_ = simde__m128d_to_private(b);
-#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
-	return !!vgetq_lane_u64(vcgeq_f64(a_.neon_f64, b_.neon_f64), 0);
-#elif defined(SIMDE_WASM_SIMD128_NATIVE)
-	return wasm_f64x2_extract_lane(a_.wasm_v128, 0) >=
-	       wasm_f64x2_extract_lane(b_.wasm_v128, 0);
-#else
-	return a_.f64[0] >= b_.f64[0];
-#endif
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_comige_sd(a, b) simde_mm_comige_sd(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-int simde_mm_comigt_sd(simde__m128d a, simde__m128d b)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_comigt_sd(a, b);
-#else
-	simde__m128d_private a_ = simde__m128d_to_private(a),
-			     b_ = simde__m128d_to_private(b);
-#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
-	return !!vgetq_lane_u64(vcgtq_f64(a_.neon_f64, b_.neon_f64), 0);
-#elif defined(SIMDE_WASM_SIMD128_NATIVE)
-	return wasm_f64x2_extract_lane(a_.wasm_v128, 0) >
-	       wasm_f64x2_extract_lane(b_.wasm_v128, 0);
-#else
-	return a_.f64[0] > b_.f64[0];
-#endif
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_comigt_sd(a, b) simde_mm_comigt_sd(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-int simde_mm_comile_sd(simde__m128d a, simde__m128d b)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_comile_sd(a, b);
-#else
-	simde__m128d_private a_ = simde__m128d_to_private(a),
-			     b_ = simde__m128d_to_private(b);
-#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
-	return !!vgetq_lane_u64(vcleq_f64(a_.neon_f64, b_.neon_f64), 0);
-#elif defined(SIMDE_WASM_SIMD128_NATIVE)
-	return wasm_f64x2_extract_lane(a_.wasm_v128, 0) <=
-	       wasm_f64x2_extract_lane(b_.wasm_v128, 0);
-#else
-	return a_.f64[0] <= b_.f64[0];
-#endif
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_comile_sd(a, b) simde_mm_comile_sd(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-int simde_mm_comilt_sd(simde__m128d a, simde__m128d b)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_comilt_sd(a, b);
-#else
-	simde__m128d_private a_ = simde__m128d_to_private(a),
-			     b_ = simde__m128d_to_private(b);
-#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
-	return !!vgetq_lane_u64(vcltq_f64(a_.neon_f64, b_.neon_f64), 0);
-#elif defined(SIMDE_WASM_SIMD128_NATIVE)
-	return wasm_f64x2_extract_lane(a_.wasm_v128, 0) <
-	       wasm_f64x2_extract_lane(b_.wasm_v128, 0);
-#else
-	return a_.f64[0] < b_.f64[0];
-#endif
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_comilt_sd(a, b) simde_mm_comilt_sd(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-int simde_mm_comineq_sd(simde__m128d a, simde__m128d b)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_comineq_sd(a, b);
-#else
-	simde__m128d_private a_ = simde__m128d_to_private(a),
-			     b_ = simde__m128d_to_private(b);
-#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
-	return !vgetq_lane_u64(vceqq_f64(a_.neon_f64, b_.neon_f64), 0);
-#elif defined(SIMDE_WASM_SIMD128_NATIVE)
-	return wasm_f64x2_extract_lane(a_.wasm_v128, 0) !=
-	       wasm_f64x2_extract_lane(b_.wasm_v128, 0);
-#else
-	return a_.f64[0] != b_.f64[0];
-#endif
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_comineq_sd(a, b) simde_mm_comineq_sd(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128d simde_x_mm_copysign_pd(simde__m128d dest, simde__m128d src)
-{
-	simde__m128d_private r_, dest_ = simde__m128d_to_private(dest),
-				 src_ = simde__m128d_to_private(src);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
-	uint64x2_t sign_pos =
-		vreinterpretq_u64_f64(vdupq_n_f64(-SIMDE_FLOAT64_C(0.0)));
-#else
-	simde_float64 dbl_nz = -SIMDE_FLOAT64_C(0.0);
-	uint64_t u64_nz;
-	simde_memcpy(&u64_nz, &dbl_nz, sizeof(u64_nz));
-	uint64x2_t sign_pos = vdupq_n_u64(u64_nz);
-#endif
-	r_.neon_u64 = vbslq_u64(sign_pos, src_.neon_u64, dest_.neon_u64);
-#elif defined(SIMDE_POWER_ALTIVEC_P9_NATIVE)
-#if !defined(HEDLEY_IBM_VERSION)
-	r_.altivec_f64 = vec_cpsgn(dest_.altivec_f64, src_.altivec_f64);
-#else
-	r_.altivec_f64 = vec_cpsgn(src_.altivec_f64, dest_.altivec_f64);
-#endif
-#elif defined(simde_math_copysign)
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.f64) / sizeof(r_.f64[0])); i++) {
-		r_.f64[i] = simde_math_copysign(dest_.f64[i], src_.f64[i]);
-	}
-#else
-	simde__m128d sgnbit = simde_mm_set1_pd(-SIMDE_FLOAT64_C(0.0));
-	return simde_mm_xor_pd(simde_mm_and_pd(sgnbit, src),
-			       simde_mm_andnot_pd(sgnbit, dest));
-#endif
-
-	return simde__m128d_from_private(r_);
-}
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128d simde_x_mm_xorsign_pd(simde__m128d dest, simde__m128d src)
-{
-	return simde_mm_xor_pd(simde_mm_and_pd(simde_mm_set1_pd(-0.0), src),
-			       dest);
-}
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128 simde_mm_castpd_ps(simde__m128d a)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_castpd_ps(a);
-#elif defined(SIMDE_ARM_NEON_A64V8_NATIVE)
-	return vreinterpretq_f32_f64(a);
-#else
-	simde__m128 r;
-	simde_memcpy(&r, &a, sizeof(a));
-	return r;
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_castpd_ps(a) simde_mm_castpd_ps(a)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128i simde_mm_castpd_si128(simde__m128d a)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_castpd_si128(a);
-#elif defined(SIMDE_ARM_NEON_A64V8_NATIVE)
-	return vreinterpretq_s64_f64(a);
-#else
-	simde__m128i r;
-	simde_memcpy(&r, &a, sizeof(a));
-	return r;
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_castpd_si128(a) simde_mm_castpd_si128(a)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128d simde_mm_castps_pd(simde__m128 a)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_castps_pd(a);
-#elif defined(SIMDE_ARM_NEON_A64V8_NATIVE)
-	return vreinterpretq_f64_f32(a);
-#else
-	simde__m128d r;
-	simde_memcpy(&r, &a, sizeof(a));
-	return r;
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_castps_pd(a) simde_mm_castps_pd(a)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128i simde_mm_castps_si128(simde__m128 a)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_castps_si128(a);
-#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	return simde__m128i_from_neon_i32(simde__m128_to_private(a).neon_i32);
-#else
-	simde__m128i r;
-	simde_memcpy(&r, &a, sizeof(a));
-	return r;
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_castps_si128(a) simde_mm_castps_si128(a)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128d simde_mm_castsi128_pd(simde__m128i a)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_castsi128_pd(a);
-#elif defined(SIMDE_ARM_NEON_A64V8_NATIVE)
-	return vreinterpretq_f64_s64(a);
-#else
-	simde__m128d r;
-	simde_memcpy(&r, &a, sizeof(a));
-	return r;
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_castsi128_pd(a) simde_mm_castsi128_pd(a)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128 simde_mm_castsi128_ps(simde__m128i a)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_castsi128_ps(a);
-#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
-	return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), a);
-#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	return simde__m128_from_neon_i32(simde__m128i_to_private(a).neon_i32);
-#else
-	simde__m128 r;
-	simde_memcpy(&r, &a, sizeof(a));
-	return r;
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_castsi128_ps(a) simde_mm_castsi128_ps(a)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128i simde_mm_cmpeq_epi8(simde__m128i a, simde__m128i b)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_cmpeq_epi8(a, b);
-#else
-	simde__m128i_private r_, a_ = simde__m128i_to_private(a),
-				 b_ = simde__m128i_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_u8 = vceqq_s8(b_.neon_i8, a_.neon_i8);
-#elif defined(SIMDE_WASM_SIMD128_NATIVE)
-	r_.wasm_v128 = wasm_i8x16_eq(a_.wasm_v128, b_.wasm_v128);
-#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
-	r_.altivec_i8 = HEDLEY_REINTERPRET_CAST(
-		SIMDE_POWER_ALTIVEC_VECTOR(signed char),
-		vec_cmpeq(a_.altivec_i8, b_.altivec_i8));
-#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
-	r_.i8 = HEDLEY_STATIC_CAST(__typeof__(r_.i8), (a_.i8 == b_.i8));
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.i8) / sizeof(r_.i8[0])); i++) {
-		r_.i8[i] = (a_.i8[i] == b_.i8[i]) ? ~INT8_C(0) : INT8_C(0);
-	}
-#endif
-
-	return simde__m128i_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_cmpeq_epi8(a, b) simde_mm_cmpeq_epi8(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128i simde_mm_cmpeq_epi16(simde__m128i a, simde__m128i b)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_cmpeq_epi16(a, b);
-#else
-	simde__m128i_private r_, a_ = simde__m128i_to_private(a),
-				 b_ = simde__m128i_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_u16 = vceqq_s16(b_.neon_i16, a_.neon_i16);
-#elif defined(SIMDE_WASM_SIMD128_NATIVE)
-	r_.wasm_v128 = wasm_i16x8_eq(a_.wasm_v128, b_.wasm_v128);
-#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
-	r_.altivec_i16 = HEDLEY_REINTERPRET_CAST(
-		SIMDE_POWER_ALTIVEC_VECTOR(signed short),
-		vec_cmpeq(a_.altivec_i16, b_.altivec_i16));
-#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
-	r_.i16 = (a_.i16 == b_.i16);
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) {
-		r_.i16[i] = (a_.i16[i] == b_.i16[i]) ? ~INT16_C(0) : INT16_C(0);
-	}
-#endif
-
-	return simde__m128i_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_cmpeq_epi16(a, b) simde_mm_cmpeq_epi16(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128i simde_mm_cmpeq_epi32(simde__m128i a, simde__m128i b)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_cmpeq_epi32(a, b);
-#else
-	simde__m128i_private r_, a_ = simde__m128i_to_private(a),
-				 b_ = simde__m128i_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_u32 = vceqq_s32(b_.neon_i32, a_.neon_i32);
-#elif defined(SIMDE_WASM_SIMD128_NATIVE)
-	r_.wasm_v128 = wasm_i32x4_eq(a_.wasm_v128, b_.wasm_v128);
-#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
-	r_.altivec_i32 = HEDLEY_REINTERPRET_CAST(
-		SIMDE_POWER_ALTIVEC_VECTOR(signed int),
-		vec_cmpeq(a_.altivec_i32, b_.altivec_i32));
-#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
-	r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), a_.i32 == b_.i32);
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])); i++) {
-		r_.i32[i] = (a_.i32[i] == b_.i32[i]) ? ~INT32_C(0) : INT32_C(0);
-	}
-#endif
-
-	return simde__m128i_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_cmpeq_epi32(a, b) simde_mm_cmpeq_epi32(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128d simde_mm_cmpeq_pd(simde__m128d a, simde__m128d b)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_cmpeq_pd(a, b);
-#else
-	simde__m128d_private r_, a_ = simde__m128d_to_private(a),
-				 b_ = simde__m128d_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
-	r_.neon_u64 = vceqq_s64(b_.neon_i64, a_.neon_i64);
-#elif defined(SIMDE_WASM_SIMD128_NATIVE)
-	r_.wasm_v128 = wasm_f64x2_eq(a_.wasm_v128, b_.wasm_v128);
-#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
-	r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(
-		SIMDE_POWER_ALTIVEC_VECTOR(double),
-		vec_cmpeq(a_.altivec_f64, b_.altivec_f64));
-#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
-	r_.i64 = HEDLEY_STATIC_CAST(__typeof__(r_.i64), (a_.f64 == b_.f64));
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.f64) / sizeof(r_.f64[0])); i++) {
-		r_.u64[i] = (a_.f64[i] == b_.f64[i]) ? ~UINT64_C(0)
-						     : UINT64_C(0);
-	}
-#endif
-
-	return simde__m128d_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_cmpeq_pd(a, b) simde_mm_cmpeq_pd(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128d simde_mm_cmpeq_sd(simde__m128d a, simde__m128d b)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_cmpeq_sd(a, b);
-#elif (SIMDE_NATURAL_VECTOR_SIZE > 0)
-	return simde_mm_move_sd(a, simde_mm_cmpeq_pd(a, b));
-#else
-	simde__m128d_private r_, a_ = simde__m128d_to_private(a),
-				 b_ = simde__m128d_to_private(b);
-
-	r_.u64[0] = (a_.u64[0] == b_.u64[0]) ? ~UINT64_C(0) : 0;
-	r_.u64[1] = a_.u64[1];
-
-	return simde__m128d_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_cmpeq_sd(a, b) simde_mm_cmpeq_sd(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128d simde_mm_cmpneq_pd(simde__m128d a, simde__m128d b)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_cmpneq_pd(a, b);
-#else
-	simde__m128d_private r_, a_ = simde__m128d_to_private(a),
-				 b_ = simde__m128d_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
-	r_.neon_u32 = vmvnq_u32(
-		vreinterpretq_u32_u64(vceqq_f64(b_.neon_f64, a_.neon_f64)));
-#elif defined(SIMDE_WASM_SIMD128_NATIVE)
-	r_.wasm_v128 = wasm_f64x2_ne(a_.wasm_v128, b_.wasm_v128);
-#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
-	r_.i64 = HEDLEY_STATIC_CAST(__typeof__(r_.i64), (a_.f64 != b_.f64));
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.f64) / sizeof(r_.f64[0])); i++) {
-		r_.u64[i] = (a_.f64[i] != b_.f64[i]) ? ~UINT64_C(0)
-						     : UINT64_C(0);
-	}
-#endif
-
-	return simde__m128d_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_cmpneq_pd(a, b) simde_mm_cmpneq_pd(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128d simde_mm_cmpneq_sd(simde__m128d a, simde__m128d b)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_cmpneq_sd(a, b);
-#elif (SIMDE_NATURAL_VECTOR_SIZE > 0)
-	return simde_mm_move_sd(a, simde_mm_cmpneq_pd(a, b));
-#else
-	simde__m128d_private r_, a_ = simde__m128d_to_private(a),
-				 b_ = simde__m128d_to_private(b);
-
-	r_.u64[0] = (a_.f64[0] != b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0);
-	r_.u64[1] = a_.u64[1];
-
-	return simde__m128d_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_cmpneq_sd(a, b) simde_mm_cmpneq_sd(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128i simde_mm_cmplt_epi8(simde__m128i a, simde__m128i b)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_cmplt_epi8(a, b);
-#else
-	simde__m128i_private r_, a_ = simde__m128i_to_private(a),
-				 b_ = simde__m128i_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_u8 = vcltq_s8(a_.neon_i8, b_.neon_i8);
-#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
-	r_.altivec_i8 = HEDLEY_REINTERPRET_CAST(
-		SIMDE_POWER_ALTIVEC_VECTOR(signed char),
-		vec_cmplt(a_.altivec_i8, b_.altivec_i8));
-#elif defined(SIMDE_WASM_SIMD128_NATIVE)
-	r_.wasm_v128 = wasm_i8x16_lt(a_.wasm_v128, b_.wasm_v128);
-#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
-	r_.i8 = HEDLEY_STATIC_CAST(__typeof__(r_.i8), (a_.i8 < b_.i8));
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.i8) / sizeof(r_.i8[0])); i++) {
-		r_.i8[i] = (a_.i8[i] < b_.i8[i]) ? ~INT8_C(0) : INT8_C(0);
-	}
-#endif
-
-	return simde__m128i_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_cmplt_epi8(a, b) simde_mm_cmplt_epi8(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128i simde_mm_cmplt_epi16(simde__m128i a, simde__m128i b)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_cmplt_epi16(a, b);
-#else
-	simde__m128i_private r_, a_ = simde__m128i_to_private(a),
-				 b_ = simde__m128i_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_u16 = vcltq_s16(a_.neon_i16, b_.neon_i16);
-#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
-	r_.altivec_i16 = HEDLEY_REINTERPRET_CAST(
-		SIMDE_POWER_ALTIVEC_VECTOR(signed short),
-		vec_cmplt(a_.altivec_i16, b_.altivec_i16));
-#elif defined(SIMDE_WASM_SIMD128_NATIVE)
-	r_.wasm_v128 = wasm_i16x8_lt(a_.wasm_v128, b_.wasm_v128);
-#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
-	r_.i16 = HEDLEY_STATIC_CAST(__typeof__(r_.i16), (a_.i16 < b_.i16));
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) {
-		r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? ~INT16_C(0) : INT16_C(0);
-	}
-#endif
-
-	return simde__m128i_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_cmplt_epi16(a, b) simde_mm_cmplt_epi16(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128i simde_mm_cmplt_epi32(simde__m128i a, simde__m128i b)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_cmplt_epi32(a, b);
-#else
-	simde__m128i_private r_, a_ = simde__m128i_to_private(a),
-				 b_ = simde__m128i_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_u32 = vcltq_s32(a_.neon_i32, b_.neon_i32);
-#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
-	r_.altivec_i32 = HEDLEY_REINTERPRET_CAST(
-		SIMDE_POWER_ALTIVEC_VECTOR(signed int),
-		vec_cmplt(a_.altivec_i32, b_.altivec_i32));
-#elif defined(SIMDE_WASM_SIMD128_NATIVE)
-	r_.wasm_v128 = wasm_i32x4_lt(a_.wasm_v128, b_.wasm_v128);
-#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
-	r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.i32 < b_.i32));
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])); i++) {
-		r_.i32[i] = (a_.i32[i] < b_.i32[i]) ? ~INT32_C(0) : INT32_C(0);
-	}
-#endif
-
-	return simde__m128i_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_cmplt_epi32(a, b) simde_mm_cmplt_epi32(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128d simde_mm_cmplt_pd(simde__m128d a, simde__m128d b)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_cmplt_pd(a, b);
-#else
-	simde__m128d_private r_, a_ = simde__m128d_to_private(a),
-				 b_ = simde__m128d_to_private(b);
-
-#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
-	r_.i64 = HEDLEY_STATIC_CAST(__typeof__(r_.i64), (a_.f64 < b_.f64));
-#elif defined(SIMDE_ARM_NEON_A64V8_NATIVE)
-	r_.neon_u64 = vcltq_f64(a_.neon_f64, b_.neon_f64);
-#elif defined(SIMDE_WASM_SIMD128_NATIVE)
-	r_.wasm_v128 = wasm_f64x2_lt(a_.wasm_v128, b_.wasm_v128);
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.f64) / sizeof(r_.f64[0])); i++) {
-		r_.u64[i] = (a_.f64[i] < b_.f64[i]) ? ~UINT64_C(0)
-						    : UINT64_C(0);
-	}
-#endif
-
-	return simde__m128d_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_cmplt_pd(a, b) simde_mm_cmplt_pd(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128d simde_mm_cmplt_sd(simde__m128d a, simde__m128d b)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_cmplt_sd(a, b);
-#elif (SIMDE_NATURAL_VECTOR_SIZE > 0)
-	return simde_mm_move_sd(a, simde_mm_cmplt_pd(a, b));
-#else
-	simde__m128d_private r_, a_ = simde__m128d_to_private(a),
-				 b_ = simde__m128d_to_private(b);
-
-	r_.u64[0] = (a_.f64[0] < b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0);
-	r_.u64[1] = a_.u64[1];
-
-	return simde__m128d_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_cmplt_sd(a, b) simde_mm_cmplt_sd(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128d simde_mm_cmple_pd(simde__m128d a, simde__m128d b)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_cmple_pd(a, b);
-#else
-	simde__m128d_private r_, a_ = simde__m128d_to_private(a),
-				 b_ = simde__m128d_to_private(b);
-
-#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
-	r_.i64 = HEDLEY_STATIC_CAST(__typeof__(r_.i64), (a_.f64 <= b_.f64));
-#elif defined(SIMDE_ARM_NEON_A64V8_NATIVE)
-	r_.neon_u64 = vcleq_f64(a_.neon_f64, b_.neon_f64);
-#elif defined(SIMDE_WASM_SIMD128_NATIVE)
-	r_.wasm_v128 = wasm_f64x2_le(a_.wasm_v128, b_.wasm_v128);
-#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
-	r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(
-		SIMDE_POWER_ALTIVEC_VECTOR(double),
-		vec_cmple(a_.altivec_f64, b_.altivec_f64));
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.f64) / sizeof(r_.f64[0])); i++) {
-		r_.u64[i] = (a_.f64[i] <= b_.f64[i]) ? ~UINT64_C(0)
-						     : UINT64_C(0);
-	}
-#endif
-
-	return simde__m128d_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_cmple_pd(a, b) simde_mm_cmple_pd(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128d simde_mm_cmple_sd(simde__m128d a, simde__m128d b)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_cmple_sd(a, b);
-#elif (SIMDE_NATURAL_VECTOR_SIZE > 0)
-	return simde_mm_move_sd(a, simde_mm_cmple_pd(a, b));
-#else
-	simde__m128d_private r_, a_ = simde__m128d_to_private(a),
-				 b_ = simde__m128d_to_private(b);
-
-	r_.u64[0] = (a_.f64[0] <= b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0);
-	r_.u64[1] = a_.u64[1];
-
-	return simde__m128d_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_cmple_sd(a, b) simde_mm_cmple_sd(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128i simde_mm_cmpgt_epi8(simde__m128i a, simde__m128i b)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_cmpgt_epi8(a, b);
-#else
-	simde__m128i_private r_, a_ = simde__m128i_to_private(a),
-				 b_ = simde__m128i_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_u8 = vcgtq_s8(a_.neon_i8, b_.neon_i8);
-#elif defined(SIMDE_WASM_SIMD128_NATIVE)
-	r_.wasm_v128 = wasm_i8x16_gt(a_.wasm_v128, b_.wasm_v128);
-#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
-	r_.altivec_i8 = HEDLEY_REINTERPRET_CAST(
-		SIMDE_POWER_ALTIVEC_VECTOR(signed char),
-		vec_cmpgt(a_.altivec_i8, b_.altivec_i8));
-#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
-	r_.i8 = HEDLEY_STATIC_CAST(__typeof__(r_.i8), (a_.i8 > b_.i8));
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.i8) / sizeof(r_.i8[0])); i++) {
-		r_.i8[i] = (a_.i8[i] > b_.i8[i]) ? ~INT8_C(0) : INT8_C(0);
-	}
-#endif
-
-	return simde__m128i_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_cmpgt_epi8(a, b) simde_mm_cmpgt_epi8(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128i simde_mm_cmpgt_epi16(simde__m128i a, simde__m128i b)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_cmpgt_epi16(a, b);
-#else
-	simde__m128i_private r_, a_ = simde__m128i_to_private(a),
-				 b_ = simde__m128i_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_u16 = vcgtq_s16(a_.neon_i16, b_.neon_i16);
-#elif defined(SIMDE_WASM_SIMD128_NATIVE)
-	r_.wasm_v128 = wasm_i16x8_gt(a_.wasm_v128, b_.wasm_v128);
-#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
-	r_.altivec_i16 = HEDLEY_REINTERPRET_CAST(
-		SIMDE_POWER_ALTIVEC_VECTOR(signed short),
-		vec_cmpgt(a_.altivec_i16, b_.altivec_i16));
-#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
-	r_.i16 = HEDLEY_STATIC_CAST(__typeof__(r_.i16), (a_.i16 > b_.i16));
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) {
-		r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? ~INT16_C(0) : INT16_C(0);
-	}
-#endif
-
-	return simde__m128i_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_cmpgt_epi16(a, b) simde_mm_cmpgt_epi16(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128i simde_mm_cmpgt_epi32(simde__m128i a, simde__m128i b)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_cmpgt_epi32(a, b);
-#else
-	simde__m128i_private r_, a_ = simde__m128i_to_private(a),
-				 b_ = simde__m128i_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_u32 = vcgtq_s32(a_.neon_i32, b_.neon_i32);
-#elif defined(SIMDE_WASM_SIMD128_NATIVE)
-	r_.wasm_v128 = wasm_i32x4_gt(a_.wasm_v128, b_.wasm_v128);
-#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
-	r_.altivec_i32 = HEDLEY_REINTERPRET_CAST(
-		SIMDE_POWER_ALTIVEC_VECTOR(signed int),
-		vec_cmpgt(a_.altivec_i32, b_.altivec_i32));
-#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
-	r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.i32 > b_.i32));
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])); i++) {
-		r_.i32[i] = (a_.i32[i] > b_.i32[i]) ? ~INT32_C(0) : INT32_C(0);
-	}
-#endif
-
-	return simde__m128i_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_cmpgt_epi32(a, b) simde_mm_cmpgt_epi32(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128d simde_mm_cmpgt_pd(simde__m128d a, simde__m128d b)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_cmpgt_pd(a, b);
-#else
-	simde__m128d_private r_, a_ = simde__m128d_to_private(a),
-				 b_ = simde__m128d_to_private(b);
-
-#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
-	r_.i64 = HEDLEY_STATIC_CAST(__typeof__(r_.i64), (a_.f64 > b_.f64));
-#elif defined(SIMDE_ARM_NEON_A64V8_NATIVE)
-	r_.neon_u64 = vcgtq_f64(a_.neon_f64, b_.neon_f64);
-#elif defined(SIMDE_WASM_SIMD128_NATIVE)
-	r_.wasm_v128 = wasm_f64x2_gt(a_.wasm_v128, b_.wasm_v128);
-#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
-	r_.altivec_f64 =
-		HEDLEY_STATIC_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double),
-				   vec_cmpgt(a_.altivec_f64, b_.altivec_f64));
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.f64) / sizeof(r_.f64[0])); i++) {
-		r_.u64[i] = (a_.f64[i] > b_.f64[i]) ? ~UINT64_C(0)
-						    : UINT64_C(0);
-	}
-#endif
-
-	return simde__m128d_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_cmpgt_pd(a, b) simde_mm_cmpgt_pd(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128d simde_mm_cmpgt_sd(simde__m128d a, simde__m128d b)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI)
-	return _mm_cmpgt_sd(a, b);
-#elif (SIMDE_NATURAL_VECTOR_SIZE > 0)
-	return simde_mm_move_sd(a, simde_mm_cmpgt_pd(a, b));
-#else
-	simde__m128d_private r_, a_ = simde__m128d_to_private(a),
-				 b_ = simde__m128d_to_private(b);
-
-	r_.u64[0] = (a_.f64[0] > b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0);
-	r_.u64[1] = a_.u64[1];
-
-	return simde__m128d_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_cmpgt_sd(a, b) simde_mm_cmpgt_sd(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128d simde_mm_cmpge_pd(simde__m128d a, simde__m128d b)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_cmpge_pd(a, b);
-#else
-	simde__m128d_private r_, a_ = simde__m128d_to_private(a),
-				 b_ = simde__m128d_to_private(b);
-
-#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
-	r_.i64 = HEDLEY_STATIC_CAST(__typeof__(r_.i64), (a_.f64 >= b_.f64));
-#elif defined(SIMDE_ARM_NEON_A64V8_NATIVE)
-	r_.neon_u64 = vcgeq_f64(a_.neon_f64, b_.neon_f64);
-#elif defined(SIMDE_WASM_SIMD128_NATIVE)
-	r_.wasm_v128 = wasm_f64x2_ge(a_.wasm_v128, b_.wasm_v128);
-#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
-	r_.altivec_f64 =
-		HEDLEY_STATIC_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double),
-				   vec_cmpge(a_.altivec_f64, b_.altivec_f64));
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.f64) / sizeof(r_.f64[0])); i++) {
-		r_.u64[i] = (a_.f64[i] >= b_.f64[i]) ? ~UINT64_C(0)
-						     : UINT64_C(0);
-	}
-#endif
-
-	return simde__m128d_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_cmpge_pd(a, b) simde_mm_cmpge_pd(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128d simde_mm_cmpge_sd(simde__m128d a, simde__m128d b)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI)
-	return _mm_cmpge_sd(a, b);
-#elif (SIMDE_NATURAL_VECTOR_SIZE > 0)
-	return simde_mm_move_sd(a, simde_mm_cmpge_pd(a, b));
-#else
-	simde__m128d_private r_, a_ = simde__m128d_to_private(a),
-				 b_ = simde__m128d_to_private(b);
-
-	r_.u64[0] = (a_.f64[0] >= b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0);
-	r_.u64[1] = a_.u64[1];
-
-	return simde__m128d_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_cmpge_sd(a, b) simde_mm_cmpge_sd(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128d simde_mm_cmpngt_pd(simde__m128d a, simde__m128d b)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_cmpngt_pd(a, b);
-#else
-	return simde_mm_cmple_pd(a, b);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_cmpngt_pd(a, b) simde_mm_cmpngt_pd(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128d simde_mm_cmpngt_sd(simde__m128d a, simde__m128d b)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI)
-	return _mm_cmpngt_sd(a, b);
-#else
-	return simde_mm_cmple_sd(a, b);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_cmpngt_sd(a, b) simde_mm_cmpngt_sd(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128d simde_mm_cmpnge_pd(simde__m128d a, simde__m128d b)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_cmpnge_pd(a, b);
-#else
-	return simde_mm_cmplt_pd(a, b);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_cmpnge_pd(a, b) simde_mm_cmpnge_pd(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128d simde_mm_cmpnge_sd(simde__m128d a, simde__m128d b)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI)
-	return _mm_cmpnge_sd(a, b);
-#else
-	return simde_mm_cmplt_sd(a, b);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_cmpnge_sd(a, b) simde_mm_cmpnge_sd(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128d simde_mm_cmpnlt_pd(simde__m128d a, simde__m128d b)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_cmpnlt_pd(a, b);
-#else
-	return simde_mm_cmpge_pd(a, b);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_cmpnlt_pd(a, b) simde_mm_cmpnlt_pd(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128d simde_mm_cmpnlt_sd(simde__m128d a, simde__m128d b)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_cmpnlt_sd(a, b);
-#else
-	return simde_mm_cmpge_sd(a, b);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_cmpnlt_sd(a, b) simde_mm_cmpnlt_sd(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128d simde_mm_cmpnle_pd(simde__m128d a, simde__m128d b)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_cmpnle_pd(a, b);
-#else
-	return simde_mm_cmpgt_pd(a, b);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_cmpnle_pd(a, b) simde_mm_cmpnle_pd(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128d simde_mm_cmpnle_sd(simde__m128d a, simde__m128d b)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_cmpnle_sd(a, b);
-#else
-	return simde_mm_cmpgt_sd(a, b);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_cmpnle_sd(a, b) simde_mm_cmpnle_sd(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128d simde_mm_cmpord_pd(simde__m128d a, simde__m128d b)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_cmpord_pd(a, b);
-#else
-	simde__m128d_private r_, a_ = simde__m128d_to_private(a),
-				 b_ = simde__m128d_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
-	/* Note: NEON does not have ordered compare builtin
-        Need to compare a eq a and b eq b to check for NaN
-        Do AND of results to get final */
-	uint64x2_t ceqaa = vceqq_f64(a_.neon_f64, a_.neon_f64);
-	uint64x2_t ceqbb = vceqq_f64(b_.neon_f64, b_.neon_f64);
-	r_.neon_u64 = vandq_u64(ceqaa, ceqbb);
-#elif defined(simde_math_isnan)
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.f64) / sizeof(r_.f64[0])); i++) {
-		r_.u64[i] = (!simde_math_isnan(a_.f64[i]) &&
-			     !simde_math_isnan(b_.f64[i]))
-				    ? ~UINT64_C(0)
-				    : UINT64_C(0);
-	}
-#else
-	HEDLEY_UNREACHABLE();
-#endif
-
-	return simde__m128d_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_cmpord_pd(a, b) simde_mm_cmpord_pd(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde_float64 simde_mm_cvtsd_f64(simde__m128d a)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI)
-	return _mm_cvtsd_f64(a);
-#else
-	simde__m128d_private a_ = simde__m128d_to_private(a);
-#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
-	return HEDLEY_STATIC_CAST(simde_float64,
-				  vgetq_lane_f64(a_.neon_f64, 0));
-#else
-	return a_.f64[0];
-#endif
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_cvtsd_f64(a) simde_mm_cvtsd_f64(a)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128d simde_mm_cmpord_sd(simde__m128d a, simde__m128d b)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_cmpord_sd(a, b);
-#elif (SIMDE_NATURAL_VECTOR_SIZE > 0)
-	return simde_mm_move_sd(a, simde_mm_cmpord_pd(a, b));
-#else
-	simde__m128d_private r_, a_ = simde__m128d_to_private(a),
-				 b_ = simde__m128d_to_private(b);
-
-#if defined(simde_math_isnan)
-	r_.u64[0] =
-		(!simde_math_isnan(a_.f64[0]) && !simde_math_isnan(b_.f64[0]))
-			? ~UINT64_C(0)
-			: UINT64_C(0);
-	r_.u64[1] = a_.u64[1];
-#else
-	HEDLEY_UNREACHABLE();
-#endif
-
-	return simde__m128d_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_cmpord_sd(a, b) simde_mm_cmpord_sd(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128d simde_mm_cmpunord_pd(simde__m128d a, simde__m128d b)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_cmpunord_pd(a, b);
-#else
-	simde__m128d_private r_, a_ = simde__m128d_to_private(a),
-				 b_ = simde__m128d_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
-	uint64x2_t ceqaa = vceqq_f64(a_.neon_f64, a_.neon_f64);
-	uint64x2_t ceqbb = vceqq_f64(b_.neon_f64, b_.neon_f64);
-	r_.neon_u64 = vreinterpretq_u64_u32(
-		vmvnq_u32(vreinterpretq_u32_u64(vandq_u64(ceqaa, ceqbb))));
-#elif defined(simde_math_isnan)
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.f64) / sizeof(r_.f64[0])); i++) {
-		r_.u64[i] = (simde_math_isnan(a_.f64[i]) ||
-			     simde_math_isnan(b_.f64[i]))
-				    ? ~UINT64_C(0)
-				    : UINT64_C(0);
-	}
-#else
-	HEDLEY_UNREACHABLE();
-#endif
-
-	return simde__m128d_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_cmpunord_pd(a, b) simde_mm_cmpunord_pd(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128d simde_mm_cmpunord_sd(simde__m128d a, simde__m128d b)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_cmpunord_sd(a, b);
-#elif (SIMDE_NATURAL_VECTOR_SIZE > 0)
-	return simde_mm_move_sd(a, simde_mm_cmpunord_pd(a, b));
-#else
-	simde__m128d_private r_, a_ = simde__m128d_to_private(a),
-				 b_ = simde__m128d_to_private(b);
-
-#if defined(simde_math_isnan)
-	r_.u64[0] = (simde_math_isnan(a_.f64[0]) || simde_math_isnan(b_.f64[0]))
-			    ? ~UINT64_C(0)
-			    : UINT64_C(0);
-	r_.u64[1] = a_.u64[1];
-#else
-	HEDLEY_UNREACHABLE();
-#endif
-
-	return simde__m128d_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_cmpunord_sd(a, b) simde_mm_cmpunord_sd(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128d simde_mm_cvtepi32_pd(simde__m128i a)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_cvtepi32_pd(a);
-#else
-	simde__m128d_private r_;
-	simde__m128i_private a_ = simde__m128i_to_private(a);
-
-#if defined(SIMDE_CONVERT_VECTOR_)
-	SIMDE_CONVERT_VECTOR_(r_.f64, a_.m64_private[0].i32);
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.f64) / sizeof(r_.f64[0])); i++) {
-		r_.f64[i] = (simde_float64)a_.i32[i];
-	}
-#endif
-
-	return simde__m128d_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_cvtepi32_pd(a) simde_mm_cvtepi32_pd(a)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128 simde_mm_cvtepi32_ps(simde__m128i a)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_cvtepi32_ps(a);
-#else
-	simde__m128_private r_;
-	simde__m128i_private a_ = simde__m128i_to_private(a);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_f32 = vcvtq_f32_s32(a_.neon_i32);
-#elif defined(SIMDE_WASM_SIMD128_NATIVE)
-	r_.wasm_v128 = wasm_f32x4_convert_i32x4(a_.wasm_v128);
-#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
-	HEDLEY_DIAGNOSTIC_PUSH
-#if HEDLEY_HAS_WARNING("-Wc11-extensions")
-#pragma clang diagnostic ignored "-Wc11-extensions"
-#endif
-	r_.altivec_f32 = vec_ctf(a_.altivec_i32, 0);
-	HEDLEY_DIAGNOSTIC_POP
-#elif defined(SIMDE_CONVERT_VECTOR_)
-	SIMDE_CONVERT_VECTOR_(r_.f32, a_.i32);
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.f32) / sizeof(r_.f32[0])); i++) {
-		r_.f32[i] = (simde_float32)a_.i32[i];
-	}
-#endif
-
-	return simde__m128_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_cvtepi32_ps(a) simde_mm_cvtepi32_ps(a)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m64 simde_mm_cvtpd_pi32(simde__m128d a)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)
-	return _mm_cvtpd_pi32(a);
-#else
-	simde__m64_private r_;
-	simde__m128d_private a_ = simde__m128d_to_private(a);
-
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])); i++) {
-		simde_float64 v = simde_math_round(a_.f64[i]);
-#if defined(SIMDE_FAST_CONVERSION_RANGE)
-		r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v);
-#else
-		r_.i32[i] =
-			((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) &&
-			 (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX)))
-				? SIMDE_CONVERT_FTOI(int32_t, v)
-				: INT32_MIN;
-#endif
-	}
-
-	return simde__m64_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_cvtpd_pi32(a) simde_mm_cvtpd_pi32(a)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128i simde_mm_cvtpd_epi32(simde__m128d a)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_cvtpd_epi32(a);
-#else
-	simde__m128i_private r_;
-
-	r_.m64[0] = simde_mm_cvtpd_pi32(a);
-	r_.m64[1] = simde_mm_setzero_si64();
-
-	return simde__m128i_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_cvtpd_epi32(a) simde_mm_cvtpd_epi32(a)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128 simde_mm_cvtpd_ps(simde__m128d a)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_cvtpd_ps(a);
-#else
-	simde__m128_private r_;
-	simde__m128d_private a_ = simde__m128d_to_private(a);
-
-#if defined(SIMDE_CONVERT_VECTOR_)
-	SIMDE_CONVERT_VECTOR_(r_.m64_private[0].f32, a_.f64);
-	r_.m64_private[1] = simde__m64_to_private(simde_mm_setzero_si64());
-#elif defined(SIMDE_ARM_NEON_A64V8_NATIVE)
-	r_.neon_f32 = vreinterpretq_f32_f64(
-		vcombine_f64(vreinterpret_f64_f32(vcvtx_f32_f64(a_.neon_f64)),
-			     vdup_n_f64(0)));
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(a_.f64) / sizeof(a_.f64[0])); i++) {
-		r_.f32[i] = (simde_float32)a_.f64[i];
-	}
-	simde_memset(&(r_.m64_private[1]), 0, sizeof(r_.m64_private[1]));
-#endif
-
-	return simde__m128_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_cvtpd_ps(a) simde_mm_cvtpd_ps(a)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128d simde_mm_cvtpi32_pd(simde__m64 a)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)
-	return _mm_cvtpi32_pd(a);
-#else
-	simde__m128d_private r_;
-	simde__m64_private a_ = simde__m64_to_private(a);
-
-#if defined(SIMDE_CONVERT_VECTOR_)
-	SIMDE_CONVERT_VECTOR_(r_.f64, a_.i32);
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.f64) / sizeof(r_.f64[0])); i++) {
-		r_.f64[i] = (simde_float64)a_.i32[i];
-	}
-#endif
-
-	return simde__m128d_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_cvtpi32_pd(a) simde_mm_cvtpi32_pd(a)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128i simde_mm_cvtps_epi32(simde__m128 a)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_cvtps_epi32(a);
-#else
-	simde__m128i_private r_;
-	simde__m128_private a_ = simde__m128_to_private(a);
-
-#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE)
-	r_.neon_i32 = vcvtnq_s32_f32(a_.neon_f32);
-#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \
-	defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_ROUND_TIES)
-	r_.neon_i32 = vcvtnq_s32_f32(a_.neon_f32);
-#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && \
-	defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_ROUND_TIES)
-	HEDLEY_DIAGNOSTIC_PUSH
-	SIMDE_DIAGNOSTIC_DISABLE_C11_EXTENSIONS_
-	SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_
-	r_.altivec_i32 = vec_cts(a_.altivec_f32, 1);
-	HEDLEY_DIAGNOSTIC_POP
-#else
-	a_ = simde__m128_to_private(
-		simde_x_mm_round_ps(a, SIMDE_MM_FROUND_TO_NEAREST_INT, 1));
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])); i++) {
-		simde_float32 v = simde_math_roundf(a_.f32[i]);
-#if defined(SIMDE_FAST_CONVERSION_RANGE)
-		r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v);
-#else
-		r_.i32[i] =
-			((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) &&
-			 (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX)))
-				? SIMDE_CONVERT_FTOI(int32_t, v)
-				: INT32_MIN;
-#endif
-	}
-#endif
-
-	return simde__m128i_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_cvtps_epi32(a) simde_mm_cvtps_epi32(a)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128d simde_mm_cvtps_pd(simde__m128 a)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_cvtps_pd(a);
-#else
-	simde__m128d_private r_;
-	simde__m128_private a_ = simde__m128_to_private(a);
-
-#if defined(SIMDE_CONVERT_VECTOR_)
-	SIMDE_CONVERT_VECTOR_(r_.f64, a_.m64_private[0].f32);
-#elif defined(SIMDE_ARM_NEON_A64V8_NATIVE)
-	r_.neon_f64 = vcvt_f64_f32(vget_low_f32(a_.neon_f32));
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.f64) / sizeof(r_.f64[0])); i++) {
-		r_.f64[i] = a_.f32[i];
-	}
-#endif
-
-	return simde__m128d_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_cvtps_pd(a) simde_mm_cvtps_pd(a)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-int32_t simde_mm_cvtsd_si32(simde__m128d a)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_cvtsd_si32(a);
-#else
-	simde__m128d_private a_ = simde__m128d_to_private(a);
-
-	simde_float64 v = simde_math_round(a_.f64[0]);
-#if defined(SIMDE_FAST_CONVERSION_RANGE)
-	return SIMDE_CONVERT_FTOI(int32_t, v);
-#else
-	return ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) &&
-		(v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX)))
-		       ? SIMDE_CONVERT_FTOI(int32_t, v)
-		       : INT32_MIN;
-#endif
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_cvtsd_si32(a) simde_mm_cvtsd_si32(a)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-int64_t simde_mm_cvtsd_si64(simde__m128d a)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64)
-#if defined(__PGI)
-	return _mm_cvtsd_si64x(a);
-#else
-	return _mm_cvtsd_si64(a);
-#endif
-#else
-	simde__m128d_private a_ = simde__m128d_to_private(a);
-	return SIMDE_CONVERT_FTOI(int64_t, simde_math_round(a_.f64[0]));
-#endif
-}
-#define simde_mm_cvtsd_si64x(a) simde_mm_cvtsd_si64(a)
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_cvtsd_si64(a) simde_mm_cvtsd_si64(a)
-#define _mm_cvtsd_si64x(a) simde_mm_cvtsd_si64x(a)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128 simde_mm_cvtsd_ss(simde__m128 a, simde__m128d b)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_cvtsd_ss(a, b);
-#else
-	simde__m128_private r_, a_ = simde__m128_to_private(a);
-	simde__m128d_private b_ = simde__m128d_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
-	r_.neon_f32 = vsetq_lane_f32(
-		vcvtxd_f32_f64(vgetq_lane_f64(b_.neon_f64, 0)), a_.neon_f32, 0);
-#else
-	r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b_.f64[0]);
-
-	SIMDE_VECTORIZE
-	for (size_t i = 1; i < (sizeof(r_) / sizeof(r_.i32[0])); i++) {
-		r_.i32[i] = a_.i32[i];
-	}
-#endif
-	return simde__m128_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_cvtsd_ss(a, b) simde_mm_cvtsd_ss(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-int16_t simde_x_mm_cvtsi128_si16(simde__m128i a)
-{
-	simde__m128i_private a_ = simde__m128i_to_private(a);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	return vgetq_lane_s16(a_.neon_i16, 0);
-#elif defined(SIMDE_WASM_SIMD128_NATIVE)
-	return HEDLEY_STATIC_CAST(int16_t,
-				  wasm_i16x8_extract_lane(a_.wasm_v128, 0));
-#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
-#if defined(SIMDE_BUG_GCC_95227)
-	(void)a_;
-#endif
-	return vec_extract(a_.altivec_i16, 0);
-#else
-	return a_.i16[0];
-#endif
-}
-
-SIMDE_FUNCTION_ATTRIBUTES
-int32_t simde_mm_cvtsi128_si32(simde__m128i a)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_cvtsi128_si32(a);
-#else
-	simde__m128i_private a_ = simde__m128i_to_private(a);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	return vgetq_lane_s32(a_.neon_i32, 0);
-#elif defined(SIMDE_WASM_SIMD128_NATIVE)
-	return HEDLEY_STATIC_CAST(int32_t,
-				  wasm_i32x4_extract_lane(a_.wasm_v128, 0));
-#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
-#if defined(SIMDE_BUG_GCC_95227)
-	(void)a_;
-#endif
-	return vec_extract(a_.altivec_i32, 0);
-#else
-	return a_.i32[0];
-#endif
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_cvtsi128_si32(a) simde_mm_cvtsi128_si32(a)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-int64_t simde_mm_cvtsi128_si64(simde__m128i a)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64)
-#if defined(__PGI)
-	return _mm_cvtsi128_si64x(a);
-#else
-	return _mm_cvtsi128_si64(a);
-#endif
-#else
-	simde__m128i_private a_ = simde__m128i_to_private(a);
-#if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && !defined(HEDLEY_IBM_VERSION)
-	return vec_extract(HEDLEY_REINTERPRET_CAST(
-				   SIMDE_POWER_ALTIVEC_VECTOR(signed long long),
-				   a_.i64),
-			   0);
-#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	return vgetq_lane_s64(a_.neon_i64, 0);
-#elif defined(SIMDE_WASM_SIMD128_NATIVE)
-	return HEDLEY_STATIC_CAST(int64_t,
-				  wasm_i64x2_extract_lane(a_.wasm_v128, 0));
-#endif
-	return a_.i64[0];
-#endif
-}
-#define simde_mm_cvtsi128_si64x(a) simde_mm_cvtsi128_si64(a)
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_cvtsi128_si64(a) simde_mm_cvtsi128_si64(a)
-#define _mm_cvtsi128_si64x(a) simde_mm_cvtsi128_si64x(a)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128d simde_mm_cvtsi32_sd(simde__m128d a, int32_t b)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_cvtsi32_sd(a, b);
-#else
-	simde__m128d_private r_;
-	simde__m128d_private a_ = simde__m128d_to_private(a);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARCH_AMD64)
-	r_.neon_f64 = vsetq_lane_f64(HEDLEY_STATIC_CAST(float64_t, b),
-				     a_.neon_f64, 0);
-#else
-	r_.f64[0] = HEDLEY_STATIC_CAST(simde_float64, b);
-	r_.i64[1] = a_.i64[1];
-#endif
-
-	return simde__m128d_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_cvtsi32_sd(a, b) simde_mm_cvtsi32_sd(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128i simde_x_mm_cvtsi16_si128(int16_t a)
-{
-	simde__m128i_private r_;
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_i16 = vsetq_lane_s16(a, vdupq_n_s16(0), 0);
-#elif defined(SIMDE_WASM_SIMD128_NATIVE)
-	r_.wasm_v128 = wasm_i16x8_make(a, 0, 0, 0, 0, 0, 0, 0);
-#else
-	r_.i16[0] = a;
-	r_.i16[1] = 0;
-	r_.i16[2] = 0;
-	r_.i16[3] = 0;
-	r_.i16[4] = 0;
-	r_.i16[5] = 0;
-	r_.i16[6] = 0;
-	r_.i16[7] = 0;
-#endif
-
-	return simde__m128i_from_private(r_);
-}
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128i simde_mm_cvtsi32_si128(int32_t a)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_cvtsi32_si128(a);
-#else
-	simde__m128i_private r_;
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_i32 = vsetq_lane_s32(a, vdupq_n_s32(0), 0);
-#elif defined(SIMDE_WASM_SIMD128_NATIVE)
-	r_.wasm_v128 = wasm_i32x4_make(a, 0, 0, 0);
-#else
-	r_.i32[0] = a;
-	r_.i32[1] = 0;
-	r_.i32[2] = 0;
-	r_.i32[3] = 0;
-#endif
-
-	return simde__m128i_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_cvtsi32_si128(a) simde_mm_cvtsi32_si128(a)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128d simde_mm_cvtsi64_sd(simde__m128d a, int64_t b)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64)
-#if !defined(__PGI)
-	return _mm_cvtsi64_sd(a, b);
-#else
-	return _mm_cvtsi64x_sd(a, b);
-#endif
-#else
-	simde__m128d_private r_, a_ = simde__m128d_to_private(a);
-
-#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
-	r_.neon_f64 = vsetq_lane_f64(HEDLEY_STATIC_CAST(float64_t, b),
-				     a_.neon_f64, 0);
-#else
-	r_.f64[0] = HEDLEY_STATIC_CAST(simde_float64, b);
-	r_.f64[1] = a_.f64[1];
-#endif
-
-	return simde__m128d_from_private(r_);
-#endif
-}
-#define simde_mm_cvtsi64x_sd(a, b) simde_mm_cvtsi64_sd(a, b)
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_cvtsi64_sd(a, b) simde_mm_cvtsi64_sd(a, b)
-#define _mm_cvtsi64x_sd(a, b) simde_mm_cvtsi64x_sd(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128i simde_mm_cvtsi64_si128(int64_t a)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64)
-#if !defined(__PGI)
-	return _mm_cvtsi64_si128(a);
-#else
-	return _mm_cvtsi64x_si128(a);
-#endif
-#else
-	simde__m128i_private r_;
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_i64 = vsetq_lane_s64(a, vdupq_n_s64(0), 0);
-#elif defined(SIMDE_WASM_SIMD128_NATIVE)
-	r_.wasm_v128 = wasm_i64x2_make(a, 0);
-#else
-	r_.i64[0] = a;
-	r_.i64[1] = 0;
-#endif
-
-	return simde__m128i_from_private(r_);
-#endif
-}
-#define simde_mm_cvtsi64x_si128(a) simde_mm_cvtsi64_si128(a)
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_cvtsi64_si128(a) simde_mm_cvtsi64_si128(a)
-#define _mm_cvtsi64x_si128(a) simde_mm_cvtsi64x_si128(a)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128d simde_mm_cvtss_sd(simde__m128d a, simde__m128 b)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_cvtss_sd(a, b);
-#elif defined(SIMDE_ARM_NEON_A64V8_NATIVE)
-	float64x2_t temp = vcvt_f64_f32(vset_lane_f32(
-		vgetq_lane_f32(simde__m128_to_private(b).neon_f32, 0),
-		vdup_n_f32(0), 0));
-	return vsetq_lane_f64(
-		vgetq_lane_f64(simde__m128d_to_private(a).neon_f64, 1), temp,
-		1);
-#else
-	simde__m128d_private a_ = simde__m128d_to_private(a);
-	simde__m128_private b_ = simde__m128_to_private(b);
-
-	a_.f64[0] = HEDLEY_STATIC_CAST(simde_float64, b_.f32[0]);
-
-	return simde__m128d_from_private(a_);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_cvtss_sd(a, b) simde_mm_cvtss_sd(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m64 simde_mm_cvttpd_pi32(simde__m128d a)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)
-	return _mm_cvttpd_pi32(a);
-#else
-	simde__m64_private r_;
-	simde__m128d_private a_ = simde__m128d_to_private(a);
-
-#if defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_FAST_CONVERSION_RANGE)
-	SIMDE_CONVERT_VECTOR_(r_.i32, a_.f64);
-#else
-	for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])); i++) {
-		simde_float64 v = a_.f64[i];
-#if defined(SIMDE_FAST_CONVERSION_RANGE)
-		r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v);
-#else
-		r_.i32[i] =
-			((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) &&
-			 (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX)))
-				? SIMDE_CONVERT_FTOI(int32_t, v)
-				: INT32_MIN;
-#endif
-	}
-#endif
-
-	return simde__m64_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_cvttpd_pi32(a) simde_mm_cvttpd_pi32(a)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128i simde_mm_cvttpd_epi32(simde__m128d a)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_cvttpd_epi32(a);
-#else
-	simde__m128i_private r_;
-
-	r_.m64[0] = simde_mm_cvttpd_pi32(a);
-	r_.m64[1] = simde_mm_setzero_si64();
-
-	return simde__m128i_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_cvttpd_epi32(a) simde_mm_cvttpd_epi32(a)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128i simde_mm_cvttps_epi32(simde__m128 a)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_cvttps_epi32(a);
-#else
-	simde__m128i_private r_;
-	simde__m128_private a_ = simde__m128_to_private(a);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE)
-	r_.neon_i32 = vcvtq_s32_f32(a_.neon_f32);
-#elif defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_FAST_CONVERSION_RANGE)
-	SIMDE_CONVERT_VECTOR_(r_.i32, a_.f32);
-#else
-	for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])); i++) {
-		simde_float32 v = a_.f32[i];
-#if defined(SIMDE_FAST_CONVERSION_RANGE)
-		r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v);
-#else
-		r_.i32[i] =
-			((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) &&
-			 (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX)))
-				? SIMDE_CONVERT_FTOI(int32_t, v)
-				: INT32_MIN;
-#endif
-	}
-#endif
-
-	return simde__m128i_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_cvttps_epi32(a) simde_mm_cvttps_epi32(a)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-int32_t simde_mm_cvttsd_si32(simde__m128d a)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_cvttsd_si32(a);
-#else
-	simde__m128d_private a_ = simde__m128d_to_private(a);
-	simde_float64 v = a_.f64[0];
-#if defined(SIMDE_FAST_CONVERSION_RANGE)
-	return SIMDE_CONVERT_FTOI(int32_t, v);
-#else
-	return ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) &&
-		(v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX)))
-		       ? SIMDE_CONVERT_FTOI(int32_t, v)
-		       : INT32_MIN;
-#endif
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_cvttsd_si32(a) simde_mm_cvttsd_si32(a)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-int64_t simde_mm_cvttsd_si64(simde__m128d a)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64)
-#if !defined(__PGI)
-	return _mm_cvttsd_si64(a);
-#else
-	return _mm_cvttsd_si64x(a);
-#endif
-#else
-	simde__m128d_private a_ = simde__m128d_to_private(a);
-	return SIMDE_CONVERT_FTOI(int64_t, a_.f64[0]);
-#endif
-}
-#define simde_mm_cvttsd_si64x(a) simde_mm_cvttsd_si64(a)
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_cvttsd_si64(a) simde_mm_cvttsd_si64(a)
-#define _mm_cvttsd_si64x(a) simde_mm_cvttsd_si64x(a)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128d simde_mm_div_pd(simde__m128d a, simde__m128d b)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_div_pd(a, b);
-#else
-	simde__m128d_private r_, a_ = simde__m128d_to_private(a),
-				 b_ = simde__m128d_to_private(b);
-
-#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
-	r_.f64 = a_.f64 / b_.f64;
-#elif defined(SIMDE_ARM_NEON_A64V8_NATIVE)
-	r_.neon_f64 = vdivq_f64(a_.neon_f64, b_.neon_f64);
-#elif defined(SIMDE_WASM_SIMD128_NATIVE)
-	r_.wasm_v128 = wasm_f64x2_div(a_.wasm_v128, b_.wasm_v128);
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.f64) / sizeof(r_.f64[0])); i++) {
-		r_.f64[i] = a_.f64[i] / b_.f64[i];
-	}
-#endif
-
-	return simde__m128d_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_div_pd(a, b) simde_mm_div_pd(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128d simde_mm_div_sd(simde__m128d a, simde__m128d b)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_div_sd(a, b);
-#elif (SIMDE_NATURAL_VECTOR_SIZE > 0)
-	return simde_mm_move_sd(a, simde_mm_div_pd(a, b));
-#else
-	simde__m128d_private r_, a_ = simde__m128d_to_private(a),
-				 b_ = simde__m128d_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
-	float64x2_t temp = vdivq_f64(a_.neon_f64, b_.neon_f64);
-	r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1);
-#else
-	r_.f64[0] = a_.f64[0] / b_.f64[0];
-	r_.f64[1] = a_.f64[1];
-#endif
-
-	return simde__m128d_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_div_sd(a, b) simde_mm_div_sd(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-int32_t simde_mm_extract_epi16(simde__m128i a, const int imm8)
-	SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 7)
-{
-	uint16_t r;
-	simde__m128i_private a_ = simde__m128i_to_private(a);
-
-#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
-#if defined(SIMDE_BUG_GCC_95227)
-	(void)a_;
-	(void)imm8;
-#endif
-	r = HEDLEY_STATIC_CAST(uint16_t, vec_extract(a_.altivec_i16, imm8));
-#else
-	r = a_.u16[imm8 & 7];
-#endif
-
-	return HEDLEY_STATIC_CAST(int32_t, r);
-}
-#if defined(SIMDE_X86_SSE2_NATIVE) && \
-	(!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(4, 6, 0))
-#define simde_mm_extract_epi16(a, imm8) _mm_extract_epi16(a, imm8)
-#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-#define simde_mm_extract_epi16(a, imm8)                                       \
-	(HEDLEY_STATIC_CAST(                                                  \
-		 int32_t, vgetq_lane_s16(simde__m128i_to_private(a).neon_i16, \
-					 (imm8))) &                           \
-	 (INT32_C(0x0000ffff)))
-#endif
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_extract_epi16(a, imm8) simde_mm_extract_epi16(a, imm8)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128i simde_mm_insert_epi16(simde__m128i a, int16_t i, const int imm8)
-	SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 7)
-{
-	simde__m128i_private a_ = simde__m128i_to_private(a);
-	a_.i16[imm8 & 7] = i;
-	return simde__m128i_from_private(a_);
-}
-#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI)
-#define simde_mm_insert_epi16(a, i, imm8) _mm_insert_epi16((a), (i), (imm8))
-#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-#define simde_mm_insert_epi16(a, i, imm8) \
-	simde__m128i_from_neon_i16(       \
-		vsetq_lane_s16((i), simde__m128i_to_neon_i16(a), (imm8)))
-#endif
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_insert_epi16(a, i, imm8) simde_mm_insert_epi16(a, i, imm8)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128d
-simde_mm_load_pd(simde_float64 const mem_addr[HEDLEY_ARRAY_PARAM(2)])
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_load_pd(mem_addr);
-#else
-	simde__m128d_private r_;
-
-#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
-	r_.neon_f64 = vld1q_f64(mem_addr);
-#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_u32 =
-		vld1q_u32(HEDLEY_REINTERPRET_CAST(uint32_t const *, mem_addr));
-#else
-	simde_memcpy(&r_, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128d),
-		     sizeof(r_));
-#endif
-
-	return simde__m128d_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_load_pd(mem_addr) simde_mm_load_pd(mem_addr)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128d simde_mm_load1_pd(simde_float64 const *mem_addr)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_load1_pd(mem_addr);
-#elif defined(SIMDE_ARM_NEON_A64V8_NATIVE)
-	return simde__m128d_from_neon_f64(vld1q_dup_f64(mem_addr));
-#elif defined(SIMDE_WASM_SIMD128_NATIVE)
-	return simde__m128d_from_wasm_v128(wasm_v64x2_load_splat(mem_addr));
-#else
-	return simde_mm_set1_pd(*mem_addr);
-#endif
-}
-#define simde_mm_load_pd1(mem_addr) simde_mm_load1_pd(mem_addr)
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_load_pd1(mem_addr) simde_mm_load1_pd(mem_addr)
-#define _mm_load1_pd(mem_addr) simde_mm_load1_pd(mem_addr)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128d simde_mm_load_sd(simde_float64 const *mem_addr)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_load_sd(mem_addr);
-#else
-	simde__m128d_private r_;
-
-#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
-	r_.neon_f64 = vsetq_lane_f64(*mem_addr, vdupq_n_f64(0), 0);
-#else
-	r_.f64[0] = *mem_addr;
-	r_.u64[1] = UINT64_C(0);
-#endif
-
-	return simde__m128d_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_load_sd(mem_addr) simde_mm_load_sd(mem_addr)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128i simde_mm_load_si128(simde__m128i const *mem_addr)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_load_si128(
-		HEDLEY_REINTERPRET_CAST(__m128i const *, mem_addr));
-#else
-	simde__m128i_private r_;
-
-#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
-	r_.altivec_i32 = vec_ld(
-		0, HEDLEY_REINTERPRET_CAST(
-			   SIMDE_POWER_ALTIVEC_VECTOR(int) const *, mem_addr));
-#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_i32 =
-		vld1q_s32(HEDLEY_REINTERPRET_CAST(int32_t const *, mem_addr));
-#else
-	simde_memcpy(&r_, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128i),
-		     sizeof(simde__m128i));
-#endif
-
-	return simde__m128i_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_load_si128(mem_addr) simde_mm_load_si128(mem_addr)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128d simde_mm_loadh_pd(simde__m128d a, simde_float64 const *mem_addr)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_loadh_pd(a, mem_addr);
-#else
-	simde__m128d_private r_, a_ = simde__m128d_to_private(a);
-
-#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
-	r_.neon_f64 = vcombine_f64(
-		vget_low_f64(a_.neon_f64),
-		vld1_f64(HEDLEY_REINTERPRET_CAST(const float64_t *, mem_addr)));
-#else
-	simde_float64 t;
-
-	simde_memcpy(&t, mem_addr, sizeof(t));
-	r_.f64[0] = a_.f64[0];
-	r_.f64[1] = t;
-#endif
-
-	return simde__m128d_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_loadh_pd(a, mem_addr) simde_mm_loadh_pd(a, mem_addr)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128i simde_mm_loadl_epi64(simde__m128i const *mem_addr)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_loadl_epi64(mem_addr);
-#else
-	simde__m128i_private r_;
-
-	int64_t value;
-	simde_memcpy(&value, mem_addr, sizeof(value));
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_i64 = vcombine_s64(
-		vld1_s64(HEDLEY_REINTERPRET_CAST(int64_t const *, mem_addr)),
-		vdup_n_s64(0));
-#else
-	r_.i64[0] = value;
-	r_.i64[1] = 0;
-#endif
-
-	return simde__m128i_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_loadl_epi64(mem_addr) simde_mm_loadl_epi64(mem_addr)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128d simde_mm_loadl_pd(simde__m128d a, simde_float64 const *mem_addr)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_loadl_pd(a, mem_addr);
-#else
-	simde__m128d_private r_, a_ = simde__m128d_to_private(a);
-
-#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
-	r_.neon_f64 = vcombine_f64(
-		vld1_f64(HEDLEY_REINTERPRET_CAST(const float64_t *, mem_addr)),
-		vget_high_f64(a_.neon_f64));
-#else
-	r_.f64[0] = *mem_addr;
-	r_.u64[1] = a_.u64[1];
-#endif
-
-	return simde__m128d_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_loadl_pd(a, mem_addr) simde_mm_loadl_pd(a, mem_addr)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128d
-simde_mm_loadr_pd(simde_float64 const mem_addr[HEDLEY_ARRAY_PARAM(2)])
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_loadr_pd(mem_addr);
-#else
-	simde__m128d_private r_;
-
-#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
-	r_.neon_f64 = vld1q_f64(mem_addr);
-	r_.neon_f64 = vextq_f64(r_.neon_f64, r_.neon_f64, 1);
-#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_i64 =
-		vld1q_s64(HEDLEY_REINTERPRET_CAST(int64_t const *, mem_addr));
-	r_.neon_i64 = vextq_s64(r_.neon_i64, r_.neon_i64, 1);
-#elif defined(SIMDE_WASM_SIMD128_NATIVE)
-	v128_t tmp = wasm_v128_load(mem_addr);
-	r_.wasm_v128 = wasm_v64x2_shuffle(tmp, tmp, 1, 0);
-#else
-	r_.f64[0] = mem_addr[1];
-	r_.f64[1] = mem_addr[0];
-#endif
-
-	return simde__m128d_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_loadr_pd(mem_addr) simde_mm_loadr_pd(mem_addr)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128d
-simde_mm_loadu_pd(simde_float64 const mem_addr[HEDLEY_ARRAY_PARAM(2)])
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_loadu_pd(mem_addr);
-#elif defined(SIMDE_ARM_NEON_A64V8_NATIVE)
-	return vld1q_f64(mem_addr);
-#else
-	simde__m128d_private r_;
-
-	simde_memcpy(&r_, mem_addr, sizeof(r_));
-
-	return simde__m128d_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_loadu_pd(mem_addr) simde_mm_loadu_pd(mem_addr)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128i simde_x_mm_loadu_epi8(int8_t const *mem_addr)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_loadu_si128(
-		SIMDE_ALIGN_CAST(simde__m128i const *, mem_addr));
-#else
-	simde__m128i_private r_;
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_i8 =
-		vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const *, mem_addr));
-#else
-	simde_memcpy(&r_, mem_addr, sizeof(r_));
-#endif
-
-	return simde__m128i_from_private(r_);
-#endif
-}
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128i simde_x_mm_loadu_epi16(int16_t const *mem_addr)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_loadu_si128(
-		SIMDE_ALIGN_CAST(simde__m128i const *, mem_addr));
-#else
-	simde__m128i_private r_;
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_i16 =
-		vld1q_s16(HEDLEY_REINTERPRET_CAST(int16_t const *, mem_addr));
-#else
-	simde_memcpy(&r_, mem_addr, sizeof(r_));
-#endif
-
-	return simde__m128i_from_private(r_);
-#endif
-}
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128i simde_x_mm_loadu_epi32(int32_t const *mem_addr)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_loadu_si128(
-		SIMDE_ALIGN_CAST(simde__m128i const *, mem_addr));
-#else
-	simde__m128i_private r_;
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_i32 =
-		vld1q_s32(HEDLEY_REINTERPRET_CAST(int32_t const *, mem_addr));
-#else
-	simde_memcpy(&r_, mem_addr, sizeof(r_));
-#endif
-
-	return simde__m128i_from_private(r_);
-#endif
-}
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128i simde_x_mm_loadu_epi64(int64_t const *mem_addr)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_loadu_si128(
-		SIMDE_ALIGN_CAST(simde__m128i const *, mem_addr));
-#else
-	simde__m128i_private r_;
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_i64 =
-		vld1q_s64(HEDLEY_REINTERPRET_CAST(int64_t const *, mem_addr));
-#else
-	simde_memcpy(&r_, mem_addr, sizeof(r_));
-#endif
-
-	return simde__m128i_from_private(r_);
-#endif
-}
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128i simde_mm_loadu_si128(void const *mem_addr)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_loadu_si128(HEDLEY_STATIC_CAST(__m128i const *, mem_addr));
-#else
-	simde__m128i_private r_;
-
-#if HEDLEY_GNUC_HAS_ATTRIBUTE(may_alias, 3, 3, 0)
-	HEDLEY_DIAGNOSTIC_PUSH
-	SIMDE_DIAGNOSTIC_DISABLE_PACKED_
-	struct simde_mm_loadu_si128_s {
-		__typeof__(r_) v;
-	} __attribute__((__packed__, __may_alias__));
-	r_ = HEDLEY_REINTERPRET_CAST(const struct simde_mm_loadu_si128_s *,
-				     mem_addr)
-		     ->v;
-	HEDLEY_DIAGNOSTIC_POP
-#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	/* Note that this is a lower priority than the struct above since
-       * clang assumes mem_addr is aligned (since it is a __m128i*). */
-	r_.neon_i32 =
-		vld1q_s32(HEDLEY_REINTERPRET_CAST(int32_t const *, mem_addr));
-#else
-	simde_memcpy(&r_, mem_addr, sizeof(r_));
-#endif
-
-	return simde__m128i_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_loadu_si128(mem_addr) simde_mm_loadu_si128(mem_addr)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128i simde_mm_madd_epi16(simde__m128i a, simde__m128i b)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_madd_epi16(a, b);
-#else
-	simde__m128i_private r_, a_ = simde__m128i_to_private(a),
-				 b_ = simde__m128i_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
-	int32x4_t pl =
-		vmull_s16(vget_low_s16(a_.neon_i16), vget_low_s16(b_.neon_i16));
-	int32x4_t ph = vmull_high_s16(a_.neon_i16, b_.neon_i16);
-	r_.neon_i32 = vpaddq_s32(pl, ph);
-#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	int32x4_t pl =
-		vmull_s16(vget_low_s16(a_.neon_i16), vget_low_s16(b_.neon_i16));
-	int32x4_t ph = vmull_s16(vget_high_s16(a_.neon_i16),
-				 vget_high_s16(b_.neon_i16));
-	int32x2_t rl = vpadd_s32(vget_low_s32(pl), vget_high_s32(pl));
-	int32x2_t rh = vpadd_s32(vget_low_s32(ph), vget_high_s32(ph));
-	r_.neon_i32 = vcombine_s32(rl, rh);
-#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
-	static const SIMDE_POWER_ALTIVEC_VECTOR(int) tz = {0, 0, 0, 0};
-	r_.altivec_i32 = vec_msum(a_.altivec_i16, b_.altivec_i16, tz);
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_) / sizeof(r_.i16[0])); i += 2) {
-		r_.i32[i / 2] = (a_.i16[i] * b_.i16[i]) +
-				(a_.i16[i + 1] * b_.i16[i + 1]);
-	}
-#endif
-
-	return simde__m128i_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_madd_epi16(a, b) simde_mm_madd_epi16(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-void simde_mm_maskmoveu_si128(simde__m128i a, simde__m128i mask,
-			      int8_t mem_addr[HEDLEY_ARRAY_PARAM(16)])
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	_mm_maskmoveu_si128(a, mask, HEDLEY_REINTERPRET_CAST(char *, mem_addr));
-#else
-	simde__m128i_private a_ = simde__m128i_to_private(a),
-			     mask_ = simde__m128i_to_private(mask);
-
-	for (size_t i = 0; i < (sizeof(a_.i8) / sizeof(a_.i8[0])); i++) {
-		if (mask_.u8[i] & 0x80) {
-			mem_addr[i] = a_.i8[i];
-		}
-	}
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_maskmoveu_si128(a, mask, mem_addr) \
-	simde_mm_maskmoveu_si128(              \
-		(a), (mask),                   \
-		SIMDE_CHECKED_REINTERPRET_CAST(int8_t *, char *, (mem_addr)))
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-int32_t simde_mm_movemask_epi8(simde__m128i a)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__INTEL_COMPILER)
-	/* ICC has trouble with _mm_movemask_epi8 at -O2 and above: */
-	return _mm_movemask_epi8(a);
-#else
-	int32_t r = 0;
-	simde__m128i_private a_ = simde__m128i_to_private(a);
-
-#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
-	uint8x16_t input = a_.neon_u8;
-	const int8_t xr[16] = {-7, -6, -5, -4, -3, -2, -1, 0,
-			       -7, -6, -5, -4, -3, -2, -1, 0};
-	const uint8x16_t mask_and = vdupq_n_u8(0x80);
-	const int8x16_t mask_shift = vld1q_s8(xr);
-	const uint8x16_t mask_result =
-		vshlq_u8(vandq_u8(input, mask_and), mask_shift);
-	uint8x8_t lo = vget_low_u8(mask_result);
-	uint8x8_t hi = vget_high_u8(mask_result);
-	r = vaddv_u8(lo) + (vaddv_u8(hi) << 8);
-#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	// Use increasingly wide shifts+adds to collect the sign bits
-	// together.
-	// Since the widening shifts would be rather confusing to follow in little endian, everything
-	// will be illustrated in big endian order instead. This has a different result - the bits
-	// would actually be reversed on a big endian machine.
-
-	// Starting input (only half the elements are shown):
-	// 89 ff 1d c0 00 10 99 33
-	uint8x16_t input = a_.neon_u8;
-
-	// Shift out everything but the sign bits with an unsigned shift right.
-	//
-	// Bytes of the vector::
-	// 89 ff 1d c0 00 10 99 33
-	// \  \  \  \  \  \  \  \    high_bits = (uint16x4_t)(input >> 7)
-	//  |  |  |  |  |  |  |  |
-	// 01 01 00 01 00 00 01 00
-	//
-	// Bits of first important lane(s):
-	// 10001001 (89)
-	// \______
-	//        |
-	// 00000001 (01)
-	uint16x8_t high_bits = vreinterpretq_u16_u8(vshrq_n_u8(input, 7));
-
-	// Merge the even lanes together with a 16-bit unsigned shift right + add.
-	// 'xx' represents garbage data which will be ignored in the final result.
-	// In the important bytes, the add functions like a binary OR.
-	//
-	// 01 01 00 01 00 00 01 00
-	//  \_ |  \_ |  \_ |  \_ |   paired16 = (uint32x4_t)(input + (input >> 7))
-	//    \|    \|    \|    \|
-	// xx 03 xx 01 xx 00 xx 02
-	//
-	// 00000001 00000001 (01 01)
-	//        \_______ |
-	//                \|
-	// xxxxxxxx xxxxxx11 (xx 03)
-	uint32x4_t paired16 =
-		vreinterpretq_u32_u16(vsraq_n_u16(high_bits, high_bits, 7));
-
-	// Repeat with a wider 32-bit shift + add.
-	// xx 03 xx 01 xx 00 xx 02
-	//     \____ |     \____ |  paired32 = (uint64x1_t)(paired16 + (paired16 >> 14))
-	//          \|          \|
-	// xx xx xx 0d xx xx xx 02
-	//
-	// 00000011 00000001 (03 01)
-	//        \\_____ ||
-	//         '----.\||
-	// xxxxxxxx xxxx1101 (xx 0d)
-	uint64x2_t paired32 =
-		vreinterpretq_u64_u32(vsraq_n_u32(paired16, paired16, 14));
-
-	// Last, an even wider 64-bit shift + add to get our result in the low 8 bit lanes.
-	// xx xx xx 0d xx xx xx 02
-	//            \_________ |   paired64 = (uint8x8_t)(paired32 + (paired32 >> 28))
-	//                      \|
-	// xx xx xx xx xx xx xx d2
-	//
-	// 00001101 00000010 (0d 02)
-	//     \   \___ |  |
-	//      '---.  \|  |
-	// xxxxxxxx 11010010 (xx d2)
-	uint8x16_t paired64 =
-		vreinterpretq_u8_u64(vsraq_n_u64(paired32, paired32, 28));
-
-	// Extract the low 8 bits from each 64-bit lane with 2 8-bit extracts.
-	// xx xx xx xx xx xx xx d2
-	//                      ||  return paired64[0]
-	//                      d2
-	// Note: Little endian would return the correct value 4b (01001011) instead.
-	r = vgetq_lane_u8(paired64, 0) |
-	    (HEDLEY_STATIC_CAST(int32_t, vgetq_lane_u8(paired64, 8)) << 8);
-#elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && \
-	!defined(HEDLEY_IBM_VERSION) &&         \
-	(SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE)
-	static const SIMDE_POWER_ALTIVEC_VECTOR(unsigned char)
-		perm = {120, 112, 104, 96, 88, 80, 72, 64,
-			56,  48,  40,  32, 24, 16, 8,  0};
-	r = HEDLEY_STATIC_CAST(
-		int32_t, vec_extract(vec_vbpermq(a_.altivec_u8, perm), 1));
-#elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && \
-	!defined(HEDLEY_IBM_VERSION) &&         \
-	(SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG)
-	static const SIMDE_POWER_ALTIVEC_VECTOR(unsigned char)
-		perm = {120, 112, 104, 96, 88, 80, 72, 64,
-			56,  48,  40,  32, 24, 16, 8,  0};
-	r = HEDLEY_STATIC_CAST(
-		int32_t, vec_extract(vec_vbpermq(a_.altivec_u8, perm), 14));
-#else
-	SIMDE_VECTORIZE_REDUCTION(| : r)
-	for (size_t i = 0; i < (sizeof(a_.u8) / sizeof(a_.u8[0])); i++) {
-		r |= (a_.u8[15 - i] >> 7) << (15 - i);
-	}
-#endif
-
-	return r;
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_movemask_epi8(a) simde_mm_movemask_epi8(a)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-int32_t simde_mm_movemask_pd(simde__m128d a)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_movemask_pd(a);
-#else
-	int32_t r = 0;
-	simde__m128d_private a_ = simde__m128d_to_private(a);
-
-#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
-	static const int64_t shift_amount[] = {0, 1};
-	const int64x2_t shift = vld1q_s64(shift_amount);
-	uint64x2_t tmp = vshrq_n_u64(a_.neon_u64, 63);
-	return HEDLEY_STATIC_CAST(int32_t, vaddvq_u64(vshlq_u64(tmp, shift)));
-#else
-	SIMDE_VECTORIZE_REDUCTION(| : r)
-	for (size_t i = 0; i < (sizeof(a_.u64) / sizeof(a_.u64[0])); i++) {
-		r |= (a_.u64[i] >> 63) << i;
-	}
-#endif
-
-	return r;
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_movemask_pd(a) simde_mm_movemask_pd(a)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m64 simde_mm_movepi64_pi64(simde__m128i a)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)
-	return _mm_movepi64_pi64(a);
-#else
-	simde__m64_private r_;
-	simde__m128i_private a_ = simde__m128i_to_private(a);
-
-#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
-	r_.neon_i64 = vget_low_s64(a_.neon_i64);
-#else
-	r_.i64[0] = a_.i64[0];
-#endif
-
-	return simde__m64_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_movepi64_pi64(a) simde_mm_movepi64_pi64(a)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128i simde_mm_movpi64_epi64(simde__m64 a)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)
-	return _mm_movpi64_epi64(a);
-#else
-	simde__m128i_private r_;
-	simde__m64_private a_ = simde__m64_to_private(a);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_i64 = vcombine_s64(a_.neon_i64, vdup_n_s64(0));
-#else
-	r_.i64[0] = a_.i64[0];
-	r_.i64[1] = 0;
-#endif
-
-	return simde__m128i_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_movpi64_epi64(a) simde_mm_movpi64_epi64(a)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128i simde_mm_min_epi16(simde__m128i a, simde__m128i b)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_min_epi16(a, b);
-#else
-	simde__m128i_private r_, a_ = simde__m128i_to_private(a),
-				 b_ = simde__m128i_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_i16 = vminq_s16(a_.neon_i16, b_.neon_i16);
-#elif defined(SIMDE_WASM_SIMD128_NATIVE)
-	r_.wasm_v128 = wasm_i16x8_min(a_.wasm_v128, b_.wasm_v128);
-#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
-	r_.altivec_i16 = vec_min(a_.altivec_i16, b_.altivec_i16);
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) {
-		r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? a_.i16[i] : b_.i16[i];
-	}
-#endif
-
-	return simde__m128i_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_min_epi16(a, b) simde_mm_min_epi16(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128i simde_mm_min_epu8(simde__m128i a, simde__m128i b)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_min_epu8(a, b);
-#else
-	simde__m128i_private r_, a_ = simde__m128i_to_private(a),
-				 b_ = simde__m128i_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_u8 = vminq_u8(a_.neon_u8, b_.neon_u8);
-#elif defined(SIMDE_WASM_SIMD128_NATIVE)
-	r_.wasm_v128 = wasm_u8x16_min(a_.wasm_v128, b_.wasm_v128);
-#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
-	r_.altivec_u8 = vec_min(a_.altivec_u8, b_.altivec_u8);
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.u8) / sizeof(r_.u8[0])); i++) {
-		r_.u8[i] = (a_.u8[i] < b_.u8[i]) ? a_.u8[i] : b_.u8[i];
-	}
-#endif
-
-	return simde__m128i_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_min_epu8(a, b) simde_mm_min_epu8(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128d simde_mm_min_pd(simde__m128d a, simde__m128d b)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_min_pd(a, b);
-#else
-	simde__m128d_private r_, a_ = simde__m128d_to_private(a),
-				 b_ = simde__m128d_to_private(b);
-
-#if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE)
-	r_.altivec_f64 = vec_min(a_.altivec_f64, b_.altivec_f64);
-#elif defined(SIMDE_ARM_NEON_A64V8_NATIVE)
-	r_.neon_f64 = vminq_f64(a_.neon_f64, b_.neon_f64);
-#elif defined(SIMDE_WASM_SIMD128_NATIVE)
-	r_.wasm_v128 = wasm_f64x2_min(a_.wasm_v128, b_.wasm_v128);
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.f64) / sizeof(r_.f64[0])); i++) {
-		r_.f64[i] = (a_.f64[i] < b_.f64[i]) ? a_.f64[i] : b_.f64[i];
-	}
-#endif
-
-	return simde__m128d_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_min_pd(a, b) simde_mm_min_pd(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128d simde_mm_min_sd(simde__m128d a, simde__m128d b)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_min_sd(a, b);
-#elif (SIMDE_NATURAL_VECTOR_SIZE > 0)
-	return simde_mm_move_sd(a, simde_mm_min_pd(a, b));
-#else
-	simde__m128d_private r_, a_ = simde__m128d_to_private(a),
-				 b_ = simde__m128d_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
-	float64x2_t temp = vminq_f64(a_.neon_f64, b_.neon_f64);
-	r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1);
-#else
-	r_.f64[0] = (a_.f64[0] < b_.f64[0]) ? a_.f64[0] : b_.f64[0];
-	r_.f64[1] = a_.f64[1];
-#endif
-
-	return simde__m128d_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_min_sd(a, b) simde_mm_min_sd(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128i simde_mm_max_epi16(simde__m128i a, simde__m128i b)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_max_epi16(a, b);
-#else
-	simde__m128i_private r_, a_ = simde__m128i_to_private(a),
-				 b_ = simde__m128i_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_i16 = vmaxq_s16(a_.neon_i16, b_.neon_i16);
-#elif defined(SIMDE_WASM_SIMD128_NATIVE)
-	r_.wasm_v128 = wasm_i16x8_max(a_.wasm_v128, b_.wasm_v128);
-#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
-	r_.altivec_i16 = vec_max(a_.altivec_i16, b_.altivec_i16);
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) {
-		r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? a_.i16[i] : b_.i16[i];
-	}
-#endif
-
-	return simde__m128i_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_max_epi16(a, b) simde_mm_max_epi16(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128i simde_mm_max_epu8(simde__m128i a, simde__m128i b)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_max_epu8(a, b);
-#else
-	simde__m128i_private r_, a_ = simde__m128i_to_private(a),
-				 b_ = simde__m128i_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_u8 = vmaxq_u8(a_.neon_u8, b_.neon_u8);
-#elif defined(SIMDE_WASM_SIMD128_NATIVE)
-	r_.wasm_v128 = wasm_u8x16_max(a_.wasm_v128, b_.wasm_v128);
-#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
-	r_.altivec_u8 = vec_max(a_.altivec_u8, b_.altivec_u8);
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.u8) / sizeof(r_.u8[0])); i++) {
-		r_.u8[i] = (a_.u8[i] > b_.u8[i]) ? a_.u8[i] : b_.u8[i];
-	}
-#endif
-
-	return simde__m128i_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_max_epu8(a, b) simde_mm_max_epu8(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128d simde_mm_max_pd(simde__m128d a, simde__m128d b)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_max_pd(a, b);
-#else
-	simde__m128d_private r_, a_ = simde__m128d_to_private(a),
-				 b_ = simde__m128d_to_private(b);
-
-#if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
-	r_.altivec_f64 = vec_max(a_.altivec_f64, b_.altivec_f64);
-#elif defined(SIMDE_WASM_SIMD128_NATIVE)
-	r_.wasm_v128 = wasm_f64x2_max(a_.wasm_v128, b_.wasm_v128);
-#elif defined(SIMDE_ARM_NEON_A64V8_NATIVE)
-	r_.neon_f64 = vmaxq_f64(a_.neon_f64, b_.neon_f64);
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.f64) / sizeof(r_.f64[0])); i++) {
-		r_.f64[i] = (a_.f64[i] > b_.f64[i]) ? a_.f64[i] : b_.f64[i];
-	}
-#endif
-
-	return simde__m128d_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_max_pd(a, b) simde_mm_max_pd(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128d simde_mm_max_sd(simde__m128d a, simde__m128d b)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_max_sd(a, b);
-#elif (SIMDE_NATURAL_VECTOR_SIZE > 0)
-	return simde_mm_move_sd(a, simde_mm_max_pd(a, b));
-#else
-	simde__m128d_private r_, a_ = simde__m128d_to_private(a),
-				 b_ = simde__m128d_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
-	float64x2_t temp = vmaxq_f64(a_.neon_f64, b_.neon_f64);
-	r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1);
-#else
-	r_.f64[0] = (a_.f64[0] > b_.f64[0]) ? a_.f64[0] : b_.f64[0];
-	r_.f64[1] = a_.f64[1];
-#endif
-
-	return simde__m128d_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_max_sd(a, b) simde_mm_max_sd(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128i simde_mm_move_epi64(simde__m128i a)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_move_epi64(a);
-#else
-	simde__m128i_private r_, a_ = simde__m128i_to_private(a);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_i64 = vsetq_lane_s64(0, a_.neon_i64, 1);
-#else
-	r_.i64[0] = a_.i64[0];
-	r_.i64[1] = 0;
-#endif
-
-	return simde__m128i_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_move_epi64(a) simde_mm_move_epi64(a)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128i simde_mm_mul_epu32(simde__m128i a, simde__m128i b)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_mul_epu32(a, b);
-#else
-	simde__m128i_private r_, a_ = simde__m128i_to_private(a),
-				 b_ = simde__m128i_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	uint32x2_t a_lo = vmovn_u64(a_.neon_u64);
-	uint32x2_t b_lo = vmovn_u64(b_.neon_u64);
-	r_.neon_u64 = vmull_u32(a_lo, b_lo);
-#elif defined(SIMDE_SHUFFLE_VECTOR_) && \
-	(SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE)
-	__typeof__(a_.u32) z = {
-		0,
-	};
-	a_.u32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.u32, z, 0, 4, 2, 6);
-	b_.u32 = SIMDE_SHUFFLE_VECTOR_(32, 16, b_.u32, z, 0, 4, 2, 6);
-	r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), a_.u32) *
-		 HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), b_.u32);
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.u64) / sizeof(r_.u64[0])); i++) {
-		r_.u64[i] = HEDLEY_STATIC_CAST(uint64_t, a_.u32[i * 2]) *
-			    HEDLEY_STATIC_CAST(uint64_t, b_.u32[i * 2]);
-	}
-#endif
-
-	return simde__m128i_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_mul_epu32(a, b) simde_mm_mul_epu32(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128i simde_x_mm_mul_epi64(simde__m128i a, simde__m128i b)
-{
-	simde__m128i_private r_, a_ = simde__m128i_to_private(a),
-				 b_ = simde__m128i_to_private(b);
-
-#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
-	r_.i64 = a_.i64 * b_.i64;
-#elif defined(SIMDE_ARM_NEON_A64V8_NATIVE)
-	r_.neon_f64 = vmulq_s64(a_.neon_f64, b_.neon_f64);
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.i64) / sizeof(r_.i64[0])); i++) {
-		r_.i64[i] = a_.i64[i] * b_.i64[i];
-	}
-#endif
-
-	return simde__m128i_from_private(r_);
-}
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128i simde_x_mm_mod_epi64(simde__m128i a, simde__m128i b)
-{
-	simde__m128i_private r_, a_ = simde__m128i_to_private(a),
-				 b_ = simde__m128i_to_private(b);
-
-#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
-	r_.i64 = a_.i64 % b_.i64;
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.i64) / sizeof(r_.i64[0])); i++) {
-		r_.i64[i] = a_.i64[i] % b_.i64[i];
-	}
-#endif
-
-	return simde__m128i_from_private(r_);
-}
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128d simde_mm_mul_pd(simde__m128d a, simde__m128d b)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_mul_pd(a, b);
-#else
-	simde__m128d_private r_, a_ = simde__m128d_to_private(a),
-				 b_ = simde__m128d_to_private(b);
-
-#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
-	r_.f64 = a_.f64 * b_.f64;
-#elif defined(SIMDE_ARM_NEON_A64V8_NATIVE)
-	r_.neon_f64 = vmulq_f64(a_.neon_f64, b_.neon_f64);
-#elif defined(SIMDE_WASM_SIMD128_NATIVE)
-	r_.wasm_v128 = wasm_f64x2_mul(a_.wasm_v128, b_.wasm_v128);
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.f64) / sizeof(r_.f64[0])); i++) {
-		r_.f64[i] = a_.f64[i] * b_.f64[i];
-	}
-#endif
-
-	return simde__m128d_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_mul_pd(a, b) simde_mm_mul_pd(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128d simde_mm_mul_sd(simde__m128d a, simde__m128d b)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_mul_sd(a, b);
-#elif (SIMDE_NATURAL_VECTOR_SIZE > 0)
-	return simde_mm_move_sd(a, simde_mm_mul_pd(a, b));
-#else
-	simde__m128d_private r_, a_ = simde__m128d_to_private(a),
-				 b_ = simde__m128d_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
-	float64x2_t temp = vmulq_f64(a_.neon_f64, b_.neon_f64);
-	r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1);
-#else
-	r_.f64[0] = a_.f64[0] * b_.f64[0];
-	r_.f64[1] = a_.f64[1];
-#endif
-
-	return simde__m128d_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_mul_sd(a, b) simde_mm_mul_sd(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m64 simde_mm_mul_su32(simde__m64 a, simde__m64 b)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && \
-	!defined(__PGI)
-	return _mm_mul_su32(a, b);
-#else
-	simde__m64_private r_, a_ = simde__m64_to_private(a),
-			       b_ = simde__m64_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.u64[0] = vget_lane_u64(
-		vget_low_u64(vmull_u32(vreinterpret_u32_s64(a_.neon_i64),
-				       vreinterpret_u32_s64(b_.neon_i64))),
-		0);
-#else
-	r_.u64[0] = HEDLEY_STATIC_CAST(uint64_t, a_.u32[0]) *
-		    HEDLEY_STATIC_CAST(uint64_t, b_.u32[0]);
-#endif
-
-	return simde__m64_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_mul_su32(a, b) simde_mm_mul_su32(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128i simde_mm_mulhi_epi16(simde__m128i a, simde__m128i b)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_mulhi_epi16(a, b);
-#else
-	simde__m128i_private r_, a_ = simde__m128i_to_private(a),
-				 b_ = simde__m128i_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	int16x4_t a3210 = vget_low_s16(a_.neon_i16);
-	int16x4_t b3210 = vget_low_s16(b_.neon_i16);
-	int32x4_t ab3210 = vmull_s16(a3210, b3210); /* 3333222211110000 */
-#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
-	int32x4_t ab7654 = vmull_high_s16(a_.neon_i16, b_.neon_i16);
-	r_.neon_i16 = vuzp2q_s16(vreinterpretq_s16_s32(ab3210),
-				 vreinterpretq_s16_s32(ab7654));
-#else
-	int16x4_t a7654 = vget_high_s16(a_.neon_i16);
-	int16x4_t b7654 = vget_high_s16(b_.neon_i16);
-	int32x4_t ab7654 = vmull_s16(a7654, b7654); /* 7777666655554444 */
-	uint16x8x2_t rv = vuzpq_u16(vreinterpretq_u16_s32(ab3210),
-				    vreinterpretq_u16_s32(ab7654));
-	r_.neon_u16 = rv.val[1];
-#endif
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) {
-		r_.u16[i] = HEDLEY_STATIC_CAST(
-			uint16_t,
-			(HEDLEY_STATIC_CAST(
-				 uint32_t,
-				 HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) *
-					 HEDLEY_STATIC_CAST(int32_t,
-							    b_.i16[i])) >>
-			 16));
-	}
-#endif
-
-	return simde__m128i_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_mulhi_epi16(a, b) simde_mm_mulhi_epi16(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128i simde_mm_mulhi_epu16(simde__m128i a, simde__m128i b)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI)
-	return _mm_mulhi_epu16(a, b);
-#else
-	simde__m128i_private r_, a_ = simde__m128i_to_private(a),
-				 b_ = simde__m128i_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	uint16x4_t a3210 = vget_low_u16(a_.neon_u16);
-	uint16x4_t b3210 = vget_low_u16(b_.neon_u16);
-	uint32x4_t ab3210 = vmull_u16(a3210, b3210); /* 3333222211110000 */
-#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
-	uint32x4_t ab7654 = vmull_high_u16(a_.neon_u16, b_.neon_u16);
-	r_.neon_u16 = vuzp2q_u16(vreinterpretq_u16_u32(ab3210),
-				 vreinterpretq_u16_u32(ab7654));
-#else
-	uint16x4_t a7654 = vget_high_u16(a_.neon_u16);
-	uint16x4_t b7654 = vget_high_u16(b_.neon_u16);
-	uint32x4_t ab7654 = vmull_u16(a7654, b7654); /* 7777666655554444 */
-	uint16x8x2_t neon_r = vuzpq_u16(vreinterpretq_u16_u32(ab3210),
-					vreinterpretq_u16_u32(ab7654));
-	r_.neon_u16 = neon_r.val[1];
-#endif
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.u16) / sizeof(r_.u16[0])); i++) {
-		r_.u16[i] = HEDLEY_STATIC_CAST(
-			uint16_t,
-			HEDLEY_STATIC_CAST(uint32_t, a_.u16[i]) *
-					HEDLEY_STATIC_CAST(uint32_t,
-							   b_.u16[i]) >>
-				16);
-	}
-#endif
-
-	return simde__m128i_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_mulhi_epu16(a, b) simde_mm_mulhi_epu16(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128i simde_mm_mullo_epi16(simde__m128i a, simde__m128i b)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_mullo_epi16(a, b);
-#else
-	simde__m128i_private r_, a_ = simde__m128i_to_private(a),
-				 b_ = simde__m128i_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_i16 = vmulq_s16(a_.neon_i16, b_.neon_i16);
-#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
-	(void)a_;
-	(void)b_;
-	r_.altivec_i16 = vec_mul(a_.altivec_i16, b_.altivec_i16);
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) {
-		r_.u16[i] = HEDLEY_STATIC_CAST(
-			uint16_t,
-			HEDLEY_STATIC_CAST(uint32_t, a_.u16[i]) *
-				HEDLEY_STATIC_CAST(uint32_t, b_.u16[i]));
-	}
-#endif
-
-	return simde__m128i_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_mullo_epi16(a, b) simde_mm_mullo_epi16(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128d simde_mm_or_pd(simde__m128d a, simde__m128d b)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_or_pd(a, b);
-#else
-	simde__m128d_private r_, a_ = simde__m128d_to_private(a),
-				 b_ = simde__m128d_to_private(b);
-
-#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
-	r_.i32f = a_.i32f | b_.i32f;
-#elif defined(SIMDE_WASM_SIMD128_NATIVE)
-	r_.wasm_v128 = wasm_v128_or(a_.wasm_v128, b_.wasm_v128);
-#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_i64 = vorrq_s64(a_.neon_i64, b_.neon_i64);
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])); i++) {
-		r_.i32f[i] = a_.i32f[i] | b_.i32f[i];
-	}
-#endif
-
-	return simde__m128d_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_or_pd(a, b) simde_mm_or_pd(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128i simde_mm_or_si128(simde__m128i a, simde__m128i b)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_or_si128(a, b);
-#else
-	simde__m128i_private r_, a_ = simde__m128i_to_private(a),
-				 b_ = simde__m128i_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_i32 = vorrq_s32(a_.neon_i32, b_.neon_i32);
-#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
-	r_.altivec_i32 = vec_or(a_.altivec_i32, b_.altivec_i32);
-#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
-	r_.i32f = a_.i32f | b_.i32f;
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])); i++) {
-		r_.i32f[i] = a_.i32f[i] | b_.i32f[i];
-	}
-#endif
-
-	return simde__m128i_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_or_si128(a, b) simde_mm_or_si128(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128i simde_mm_packs_epi16(simde__m128i a, simde__m128i b)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_packs_epi16(a, b);
-#else
-	simde__m128i_private r_, a_ = simde__m128i_to_private(a),
-				 b_ = simde__m128i_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_i8 =
-		vcombine_s8(vqmovn_s16(a_.neon_i16), vqmovn_s16(b_.neon_i16));
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) {
-		r_.i8[i] = (a_.i16[i] > INT8_MAX)
-				   ? INT8_MAX
-				   : ((a_.i16[i] < INT8_MIN)
-					      ? INT8_MIN
-					      : HEDLEY_STATIC_CAST(int8_t,
-								   a_.i16[i]));
-		r_.i8[i + 8] = (b_.i16[i] > INT8_MAX)
-				       ? INT8_MAX
-				       : ((b_.i16[i] < INT8_MIN)
-						  ? INT8_MIN
-						  : HEDLEY_STATIC_CAST(
-							    int8_t, b_.i16[i]));
-	}
-#endif
-
-	return simde__m128i_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_packs_epi16(a, b) simde_mm_packs_epi16(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128i simde_mm_packs_epi32(simde__m128i a, simde__m128i b)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_packs_epi32(a, b);
-#else
-	simde__m128i_private r_, a_ = simde__m128i_to_private(a),
-				 b_ = simde__m128i_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_i16 =
-		vcombine_s16(vqmovn_s32(a_.neon_i32), vqmovn_s32(b_.neon_i32));
-#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
-	r_.altivec_i16 = vec_packs(a_.altivec_i32, b_.altivec_i32);
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])); i++) {
-		r_.i16[i] = (a_.i32[i] > INT16_MAX)
-				    ? INT16_MAX
-				    : ((a_.i32[i] < INT16_MIN)
-					       ? INT16_MIN
-					       : HEDLEY_STATIC_CAST(int16_t,
-								    a_.i32[i]));
-		r_.i16[i + 4] =
-			(b_.i32[i] > INT16_MAX)
-				? INT16_MAX
-				: ((b_.i32[i] < INT16_MIN)
-					   ? INT16_MIN
-					   : HEDLEY_STATIC_CAST(int16_t,
-								b_.i32[i]));
-	}
-#endif
-
-	return simde__m128i_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_packs_epi32(a, b) simde_mm_packs_epi32(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128i simde_mm_packus_epi16(simde__m128i a, simde__m128i b)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_packus_epi16(a, b);
-#else
-	simde__m128i_private r_, a_ = simde__m128i_to_private(a),
-				 b_ = simde__m128i_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_u8 =
-		vcombine_u8(vqmovun_s16(a_.neon_i16), vqmovun_s16(b_.neon_i16));
-#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
-	r_.altivec_u8 = vec_packsu(a_.altivec_i16, b_.altivec_i16);
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) {
-		r_.u8[i] = (a_.i16[i] > UINT8_MAX)
-				   ? UINT8_MAX
-				   : ((a_.i16[i] < 0)
-					      ? UINT8_C(0)
-					      : HEDLEY_STATIC_CAST(uint8_t,
-								   a_.i16[i]));
-		r_.u8[i + 8] =
-			(b_.i16[i] > UINT8_MAX)
-				? UINT8_MAX
-				: ((b_.i16[i] < 0)
-					   ? UINT8_C(0)
-					   : HEDLEY_STATIC_CAST(uint8_t,
-								b_.i16[i]));
-	}
-#endif
-
-	return simde__m128i_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_packus_epi16(a, b) simde_mm_packus_epi16(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-void simde_mm_pause(void)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	_mm_pause();
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_pause() (simde_mm_pause())
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128i simde_mm_sad_epu8(simde__m128i a, simde__m128i b)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_sad_epu8(a, b);
-#else
-	simde__m128i_private r_, a_ = simde__m128i_to_private(a),
-				 b_ = simde__m128i_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	const uint16x8_t t = vpaddlq_u8(vabdq_u8(a_.neon_u8, b_.neon_u8));
-	r_.neon_u64 = vcombine_u64(vpaddl_u32(vpaddl_u16(vget_low_u16(t))),
-				   vpaddl_u32(vpaddl_u16(vget_high_u16(t))));
-#else
-	for (size_t i = 0; i < (sizeof(r_.i64) / sizeof(r_.i64[0])); i++) {
-		uint16_t tmp = 0;
-		SIMDE_VECTORIZE_REDUCTION(+ : tmp)
-		for (size_t j = 0; j < ((sizeof(r_.u8) / sizeof(r_.u8[0])) / 2);
-		     j++) {
-			const size_t e = j + (i * 8);
-			tmp += (a_.u8[e] > b_.u8[e]) ? (a_.u8[e] - b_.u8[e])
-						     : (b_.u8[e] - a_.u8[e]);
-		}
-		r_.i64[i] = tmp;
-	}
-#endif
-
-	return simde__m128i_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_sad_epu8(a, b) simde_mm_sad_epu8(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128i simde_mm_set_epi8(int8_t e15, int8_t e14, int8_t e13, int8_t e12,
-			       int8_t e11, int8_t e10, int8_t e9, int8_t e8,
-			       int8_t e7, int8_t e6, int8_t e5, int8_t e4,
-			       int8_t e3, int8_t e2, int8_t e1, int8_t e0)
-{
-
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_set_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5,
-			    e4, e3, e2, e1, e0);
-#else
-	simde__m128i_private r_;
-
-#if defined(SIMDE_WASM_SIMD128_NATIVE)
-	r_.wasm_v128 = wasm_i8x16_make(e0, e1, e2, e3, e4, e5, e6, e7, e8, e9,
-				       e10, e11, e12, e13, e14, e15);
-#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	SIMDE_ALIGN_LIKE_16(int8x16_t)
-	int8_t data[16] = {e0, e1, e2,  e3,  e4,  e5,  e6,  e7,
-			   e8, e9, e10, e11, e12, e13, e14, e15};
-	r_.neon_i8 = vld1q_s8(data);
-#else
-	r_.i8[0] = e0;
-	r_.i8[1] = e1;
-	r_.i8[2] = e2;
-	r_.i8[3] = e3;
-	r_.i8[4] = e4;
-	r_.i8[5] = e5;
-	r_.i8[6] = e6;
-	r_.i8[7] = e7;
-	r_.i8[8] = e8;
-	r_.i8[9] = e9;
-	r_.i8[10] = e10;
-	r_.i8[11] = e11;
-	r_.i8[12] = e12;
-	r_.i8[13] = e13;
-	r_.i8[14] = e14;
-	r_.i8[15] = e15;
-#endif
-
-	return simde__m128i_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_set_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, \
-		     e2, e1, e0)                                               \
-	simde_mm_set_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5,    \
-			  e4, e3, e2, e1, e0)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128i simde_mm_set_epi16(int16_t e7, int16_t e6, int16_t e5, int16_t e4,
-				int16_t e3, int16_t e2, int16_t e1, int16_t e0)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_set_epi16(e7, e6, e5, e4, e3, e2, e1, e0);
-#else
-	simde__m128i_private r_;
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	SIMDE_ALIGN_LIKE_16(int16x8_t)
-	int16_t data[8] = {e0, e1, e2, e3, e4, e5, e6, e7};
-	r_.neon_i16 = vld1q_s16(data);
-#elif defined(SIMDE_WASM_SIMD128_NATIVE)
-	r_.wasm_v128 = wasm_i16x8_make(e0, e1, e2, e3, e4, e5, e6, e7);
-#else
-	r_.i16[0] = e0;
-	r_.i16[1] = e1;
-	r_.i16[2] = e2;
-	r_.i16[3] = e3;
-	r_.i16[4] = e4;
-	r_.i16[5] = e5;
-	r_.i16[6] = e6;
-	r_.i16[7] = e7;
-#endif
-
-	return simde__m128i_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_set_epi16(e7, e6, e5, e4, e3, e2, e1, e0) \
-	simde_mm_set_epi16(e7, e6, e5, e4, e3, e2, e1, e0)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128i simde_mm_loadu_si16(void const *mem_addr)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE) &&                 \
-	(SIMDE_DETECT_CLANG_VERSION_CHECK(8, 0, 0) || \
-	 HEDLEY_GCC_VERSION_CHECK(11, 0, 0) ||        \
-	 HEDLEY_INTEL_VERSION_CHECK(20, 21, 1))
-	return _mm_loadu_si16(mem_addr);
-#else
-	int16_t val;
-	simde_memcpy(&val, mem_addr, sizeof(val));
-	return simde_x_mm_cvtsi16_si128(val);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_loadu_si16(mem_addr) simde_mm_loadu_si16(mem_addr)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128i simde_mm_set_epi32(int32_t e3, int32_t e2, int32_t e1, int32_t e0)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_set_epi32(e3, e2, e1, e0);
-#else
-	simde__m128i_private r_;
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	SIMDE_ALIGN_LIKE_16(int32x4_t) int32_t data[4] = {e0, e1, e2, e3};
-	r_.neon_i32 = vld1q_s32(data);
-#elif defined(SIMDE_WASM_SIMD128_NATIVE)
-	r_.wasm_v128 = wasm_i32x4_make(e0, e1, e2, e3);
-#else
-	r_.i32[0] = e0;
-	r_.i32[1] = e1;
-	r_.i32[2] = e2;
-	r_.i32[3] = e3;
-#endif
-
-	return simde__m128i_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_set_epi32(e3, e2, e1, e0) simde_mm_set_epi32(e3, e2, e1, e0)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128i simde_mm_loadu_si32(void const *mem_addr)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE) &&                 \
-	(SIMDE_DETECT_CLANG_VERSION_CHECK(8, 0, 0) || \
-	 HEDLEY_GCC_VERSION_CHECK(11, 0, 0) ||        \
-	 HEDLEY_INTEL_VERSION_CHECK(20, 21, 1))
-	return _mm_loadu_si32(mem_addr);
-#else
-	int32_t val;
-	simde_memcpy(&val, mem_addr, sizeof(val));
-	return simde_mm_cvtsi32_si128(val);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_loadu_si32(mem_addr) simde_mm_loadu_si32(mem_addr)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128i simde_mm_set_epi64(simde__m64 e1, simde__m64 e0)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)
-	return _mm_set_epi64(e1, e0);
-#else
-	simde__m128i_private r_;
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_i64 = vcombine_s64(simde__m64_to_neon_i64(e0),
-				   simde__m64_to_neon_i64(e1));
-#else
-	r_.m64[0] = e0;
-	r_.m64[1] = e1;
-#endif
-
-	return simde__m128i_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_set_epi64(e1, e0) (simde_mm_set_epi64((e1), (e0)))
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128i simde_mm_set_epi64x(int64_t e1, int64_t e0)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE) && \
-	(!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19, 0, 0))
-	return _mm_set_epi64x(e1, e0);
-#else
-	simde__m128i_private r_;
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	SIMDE_ALIGN_LIKE_16(int64x2_t) int64_t data[2] = {e0, e1};
-	r_.neon_i64 = vld1q_s64(data);
-#elif defined(SIMDE_WASM_SIMD128_NATIVE)
-	r_.wasm_v128 = wasm_i64x2_make(e0, e1);
-#else
-	r_.i64[0] = e0;
-	r_.i64[1] = e1;
-#endif
-
-	return simde__m128i_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_set_epi64x(e1, e0) simde_mm_set_epi64x(e1, e0)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128i simde_mm_loadu_si64(void const *mem_addr)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE) &&                 \
-	(SIMDE_DETECT_CLANG_VERSION_CHECK(8, 0, 0) || \
-	 HEDLEY_GCC_VERSION_CHECK(11, 0, 0) ||        \
-	 HEDLEY_INTEL_VERSION_CHECK(20, 21, 1))
-	return _mm_loadu_si64(mem_addr);
-#else
-	int64_t val;
-	simde_memcpy(&val, mem_addr, sizeof(val));
-	return simde_mm_cvtsi64_si128(val);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_loadu_si64(mem_addr) simde_mm_loadu_si64(mem_addr)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128i simde_x_mm_set_epu8(uint8_t e15, uint8_t e14, uint8_t e13,
-				 uint8_t e12, uint8_t e11, uint8_t e10,
-				 uint8_t e9, uint8_t e8, uint8_t e7, uint8_t e6,
-				 uint8_t e5, uint8_t e4, uint8_t e3, uint8_t e2,
-				 uint8_t e1, uint8_t e0)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_set_epi8(
-		HEDLEY_STATIC_CAST(char, e15), HEDLEY_STATIC_CAST(char, e14),
-		HEDLEY_STATIC_CAST(char, e13), HEDLEY_STATIC_CAST(char, e12),
-		HEDLEY_STATIC_CAST(char, e11), HEDLEY_STATIC_CAST(char, e10),
-		HEDLEY_STATIC_CAST(char, e9), HEDLEY_STATIC_CAST(char, e8),
-		HEDLEY_STATIC_CAST(char, e7), HEDLEY_STATIC_CAST(char, e6),
-		HEDLEY_STATIC_CAST(char, e5), HEDLEY_STATIC_CAST(char, e4),
-		HEDLEY_STATIC_CAST(char, e3), HEDLEY_STATIC_CAST(char, e2),
-		HEDLEY_STATIC_CAST(char, e1), HEDLEY_STATIC_CAST(char, e0));
-#else
-	simde__m128i_private r_;
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	SIMDE_ALIGN_LIKE_16(uint8x16_t)
-	uint8_t data[16] = {e0, e1, e2,  e3,  e4,  e5,  e6,  e7,
-			    e8, e9, e10, e11, e12, e13, e14, e15};
-	r_.neon_u8 = vld1q_u8(data);
-#else
-	r_.u8[0] = e0;
-	r_.u8[1] = e1;
-	r_.u8[2] = e2;
-	r_.u8[3] = e3;
-	r_.u8[4] = e4;
-	r_.u8[5] = e5;
-	r_.u8[6] = e6;
-	r_.u8[7] = e7;
-	r_.u8[8] = e8;
-	r_.u8[9] = e9;
-	r_.u8[10] = e10;
-	r_.u8[11] = e11;
-	r_.u8[12] = e12;
-	r_.u8[13] = e13;
-	r_.u8[14] = e14;
-	r_.u8[15] = e15;
-#endif
-
-	return simde__m128i_from_private(r_);
-#endif
-}
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128i simde_x_mm_set_epu16(uint16_t e7, uint16_t e6, uint16_t e5,
-				  uint16_t e4, uint16_t e3, uint16_t e2,
-				  uint16_t e1, uint16_t e0)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_set_epi16(
-		HEDLEY_STATIC_CAST(short, e7), HEDLEY_STATIC_CAST(short, e6),
-		HEDLEY_STATIC_CAST(short, e5), HEDLEY_STATIC_CAST(short, e4),
-		HEDLEY_STATIC_CAST(short, e3), HEDLEY_STATIC_CAST(short, e2),
-		HEDLEY_STATIC_CAST(short, e1), HEDLEY_STATIC_CAST(short, e0));
-#else
-	simde__m128i_private r_;
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	SIMDE_ALIGN_LIKE_16(uint16x8_t)
-	uint16_t data[8] = {e0, e1, e2, e3, e4, e5, e6, e7};
-	r_.neon_u16 = vld1q_u16(data);
-#else
-	r_.u16[0] = e0;
-	r_.u16[1] = e1;
-	r_.u16[2] = e2;
-	r_.u16[3] = e3;
-	r_.u16[4] = e4;
-	r_.u16[5] = e5;
-	r_.u16[6] = e6;
-	r_.u16[7] = e7;
-#endif
-
-	return simde__m128i_from_private(r_);
-#endif
-}
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128i simde_x_mm_set_epu32(uint32_t e3, uint32_t e2, uint32_t e1,
-				  uint32_t e0)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_set_epi32(HEDLEY_STATIC_CAST(int, e3),
-			     HEDLEY_STATIC_CAST(int, e2),
-			     HEDLEY_STATIC_CAST(int, e1),
-			     HEDLEY_STATIC_CAST(int, e0));
-#else
-	simde__m128i_private r_;
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	SIMDE_ALIGN_LIKE_16(uint32x4_t) uint32_t data[4] = {e0, e1, e2, e3};
-	r_.neon_u32 = vld1q_u32(data);
-#else
-	r_.u32[0] = e0;
-	r_.u32[1] = e1;
-	r_.u32[2] = e2;
-	r_.u32[3] = e3;
-#endif
-
-	return simde__m128i_from_private(r_);
-#endif
-}
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128i simde_x_mm_set_epu64x(uint64_t e1, uint64_t e0)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE) && \
-	(!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19, 0, 0))
-	return _mm_set_epi64x(HEDLEY_STATIC_CAST(int64_t, e1),
-			      HEDLEY_STATIC_CAST(int64_t, e0));
-#else
-	simde__m128i_private r_;
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	SIMDE_ALIGN_LIKE_16(uint64x2_t) uint64_t data[2] = {e0, e1};
-	r_.neon_u64 = vld1q_u64(data);
-#else
-	r_.u64[0] = e0;
-	r_.u64[1] = e1;
-#endif
-
-	return simde__m128i_from_private(r_);
-#endif
-}
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128d simde_mm_set_sd(simde_float64 a)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_set_sd(a);
-#elif defined(SIMDE_ARM_NEON_A64V8_NATIVE)
-	return vsetq_lane_f64(a, vdupq_n_f64(SIMDE_FLOAT64_C(0.0)), 0);
-#else
-	return simde_mm_set_pd(SIMDE_FLOAT64_C(0.0), a);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_set_sd(a) simde_mm_set_sd(a)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128i simde_mm_set1_epi8(int8_t a)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_set1_epi8(a);
-#else
-	simde__m128i_private r_;
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_i8 = vdupq_n_s8(a);
-#elif defined(SIMDE_WASM_SIMD128_NATIVE)
-	r_.wasm_v128 = wasm_i8x16_splat(a);
-#elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE)
-	r_.altivec_i8 = vec_splats(HEDLEY_STATIC_CAST(signed char, a));
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.i8) / sizeof(r_.i8[0])); i++) {
-		r_.i8[i] = a;
-	}
-#endif
-
-	return simde__m128i_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_set1_epi8(a) simde_mm_set1_epi8(a)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128i simde_mm_set1_epi16(int16_t a)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_set1_epi16(a);
-#else
-	simde__m128i_private r_;
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_i16 = vdupq_n_s16(a);
-#elif defined(SIMDE_WASM_SIMD128_NATIVE)
-	r_.wasm_v128 = wasm_i16x8_splat(a);
-#elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE)
-	r_.altivec_i16 = vec_splats(HEDLEY_STATIC_CAST(signed short, a));
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) {
-		r_.i16[i] = a;
-	}
-#endif
-
-	return simde__m128i_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_set1_epi16(a) simde_mm_set1_epi16(a)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128i simde_mm_set1_epi32(int32_t a)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_set1_epi32(a);
-#else
-	simde__m128i_private r_;
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_i32 = vdupq_n_s32(a);
-#elif defined(SIMDE_WASM_SIMD128_NATIVE)
-	r_.wasm_v128 = wasm_i32x4_splat(a);
-#elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE)
-	r_.altivec_i32 = vec_splats(HEDLEY_STATIC_CAST(signed int, a));
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])); i++) {
-		r_.i32[i] = a;
-	}
-#endif
-
-	return simde__m128i_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_set1_epi32(a) simde_mm_set1_epi32(a)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128i simde_mm_set1_epi64x(int64_t a)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE) && \
-	(!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19, 0, 0))
-	return _mm_set1_epi64x(a);
-#else
-	simde__m128i_private r_;
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_i64 = vdupq_n_s64(a);
-#elif defined(SIMDE_WASM_SIMD128_NATIVE)
-	r_.wasm_v128 = wasm_i64x2_splat(a);
-#elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE)
-	r_.altivec_i64 = vec_splats(HEDLEY_STATIC_CAST(signed long long, a));
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.i64) / sizeof(r_.i64[0])); i++) {
-		r_.i64[i] = a;
-	}
-#endif
-
-	return simde__m128i_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_set1_epi64x(a) simde_mm_set1_epi64x(a)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128i simde_mm_set1_epi64(simde__m64 a)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)
-	return _mm_set1_epi64(a);
-#else
-	simde__m64_private a_ = simde__m64_to_private(a);
-	return simde_mm_set1_epi64x(a_.i64[0]);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_set1_epi64(a) simde_mm_set1_epi64(a)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128i simde_x_mm_set1_epu8(uint8_t value)
-{
-#if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE)
-	return simde__m128i_from_altivec_u8(
-		vec_splats(HEDLEY_STATIC_CAST(unsigned char, value)));
-#else
-	return simde_mm_set1_epi8(HEDLEY_STATIC_CAST(int8_t, value));
-#endif
-}
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128i simde_x_mm_set1_epu16(uint16_t value)
-{
-#if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE)
-	return simde__m128i_from_altivec_u16(
-		vec_splats(HEDLEY_STATIC_CAST(unsigned short, value)));
-#else
-	return simde_mm_set1_epi16(HEDLEY_STATIC_CAST(int16_t, value));
-#endif
-}
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128i simde_x_mm_set1_epu32(uint32_t value)
-{
-#if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE)
-	return simde__m128i_from_altivec_u32(
-		vec_splats(HEDLEY_STATIC_CAST(unsigned int, value)));
-#else
-	return simde_mm_set1_epi32(HEDLEY_STATIC_CAST(int32_t, value));
-#endif
-}
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128i simde_x_mm_set1_epu64(uint64_t value)
-{
-#if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE)
-	return simde__m128i_from_altivec_u64(
-		vec_splats(HEDLEY_STATIC_CAST(unsigned long long, value)));
-#else
-	return simde_mm_set1_epi64x(HEDLEY_STATIC_CAST(int64_t, value));
-#endif
-}
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128i simde_mm_setr_epi8(int8_t e15, int8_t e14, int8_t e13, int8_t e12,
-				int8_t e11, int8_t e10, int8_t e9, int8_t e8,
-				int8_t e7, int8_t e6, int8_t e5, int8_t e4,
-				int8_t e3, int8_t e2, int8_t e1, int8_t e0)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_setr_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5,
-			     e4, e3, e2, e1, e0);
-#else
-	return simde_mm_set_epi8(e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10,
-				 e11, e12, e13, e14, e15);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_setr_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4,  \
-		      e3, e2, e1, e0)                                        \
-	simde_mm_setr_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, \
-			   e4, e3, e2, e1, e0)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128i simde_mm_setr_epi16(int16_t e7, int16_t e6, int16_t e5, int16_t e4,
-				 int16_t e3, int16_t e2, int16_t e1, int16_t e0)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_setr_epi16(e7, e6, e5, e4, e3, e2, e1, e0);
-#else
-	return simde_mm_set_epi16(e0, e1, e2, e3, e4, e5, e6, e7);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_setr_epi16(e7, e6, e5, e4, e3, e2, e1, e0) \
-	simde_mm_setr_epi16(e7, e6, e5, e4, e3, e2, e1, e0)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128i simde_mm_setr_epi32(int32_t e3, int32_t e2, int32_t e1, int32_t e0)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_setr_epi32(e3, e2, e1, e0);
-#else
-	return simde_mm_set_epi32(e0, e1, e2, e3);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_setr_epi32(e3, e2, e1, e0) simde_mm_setr_epi32(e3, e2, e1, e0)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128i simde_mm_setr_epi64(simde__m64 e1, simde__m64 e0)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)
-	return _mm_setr_epi64(e1, e0);
-#else
-	return simde_mm_set_epi64(e0, e1);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_setr_epi64(e1, e0) (simde_mm_setr_epi64((e1), (e0)))
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128d simde_mm_setr_pd(simde_float64 e1, simde_float64 e0)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_setr_pd(e1, e0);
-#else
-	return simde_mm_set_pd(e0, e1);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_setr_pd(e1, e0) simde_mm_setr_pd(e1, e0)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128d simde_mm_setzero_pd(void)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_setzero_pd();
-#else
-	return simde_mm_castsi128_pd(simde_mm_setzero_si128());
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_setzero_pd() simde_mm_setzero_pd()
-#endif
-
-#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_)
-HEDLEY_DIAGNOSTIC_PUSH
-SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128d simde_mm_undefined_pd(void)
-{
-	simde__m128d_private r_;
-
-#if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE__HAVE_UNDEFINED128)
-	r_.n = _mm_undefined_pd();
-#elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_)
-	r_ = simde__m128d_to_private(simde_mm_setzero_pd());
-#endif
-
-	return simde__m128d_from_private(r_);
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_undefined_pd() simde_mm_undefined_pd()
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128i simde_mm_undefined_si128(void)
-{
-	simde__m128i_private r_;
-
-#if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE__HAVE_UNDEFINED128)
-	r_.n = _mm_undefined_si128();
-#elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_)
-	r_ = simde__m128i_to_private(simde_mm_setzero_si128());
-#endif
-
-	return simde__m128i_from_private(r_);
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_undefined_si128() (simde_mm_undefined_si128())
-#endif
-
-#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_)
-HEDLEY_DIAGNOSTIC_POP
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128d simde_x_mm_setone_pd(void)
-{
-	return simde_mm_castps_pd(simde_x_mm_setone_ps());
-}
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128i simde_x_mm_setone_si128(void)
-{
-	return simde_mm_castps_si128(simde_x_mm_setone_ps());
-}
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128i simde_mm_shuffle_epi32(simde__m128i a, const int imm8)
-	SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255)
-{
-	simde__m128i_private r_, a_ = simde__m128i_to_private(a);
-
-	for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])); i++) {
-		r_.i32[i] = a_.i32[(imm8 >> (i * 2)) & 3];
-	}
-
-	return simde__m128i_from_private(r_);
-}
-#if defined(SIMDE_X86_SSE2_NATIVE)
-#define simde_mm_shuffle_epi32(a, imm8) _mm_shuffle_epi32((a), (imm8))
-#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-#define simde_mm_shuffle_epi32(a, imm8)                                       \
-	__extension__({                                                       \
-		int32x4_t ret;                                                \
-		ret = vmovq_n_s32(vgetq_lane_s32(vreinterpretq_s32_s64(a),    \
-						 (imm8) & (0x3)));            \
-		ret = vsetq_lane_s32(vgetq_lane_s32(vreinterpretq_s32_s64(a), \
-						    ((imm8) >> 2) & 0x3),     \
-				     ret, 1);                                 \
-		ret = vsetq_lane_s32(vgetq_lane_s32(vreinterpretq_s32_s64(a), \
-						    ((imm8) >> 4) & 0x3),     \
-				     ret, 2);                                 \
-		ret = vsetq_lane_s32(vgetq_lane_s32(vreinterpretq_s32_s64(a), \
-						    ((imm8) >> 6) & 0x3),     \
-				     ret, 3);                                 \
-		vreinterpretq_s64_s32(ret);                                   \
-	})
-#elif defined(SIMDE_SHUFFLE_VECTOR_)
-#define simde_mm_shuffle_epi32(a, imm8)                               \
-	(__extension__({                                              \
-		const simde__m128i_private simde__tmp_a_ =            \
-			simde__m128i_to_private(a);                   \
-		simde__m128i_from_private((simde__m128i_private){     \
-			.i32 = SIMDE_SHUFFLE_VECTOR_(                 \
-				32, 16, (simde__tmp_a_).i32,          \
-				(simde__tmp_a_).i32, ((imm8)) & 3,    \
-				((imm8) >> 2) & 3, ((imm8) >> 4) & 3, \
-				((imm8) >> 6) & 3)});                 \
-	}))
-#endif
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_shuffle_epi32(a, imm8) simde_mm_shuffle_epi32(a, imm8)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128d simde_mm_shuffle_pd(simde__m128d a, simde__m128d b, const int imm8)
-	SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3)
-{
-	simde__m128d_private r_, a_ = simde__m128d_to_private(a),
-				 b_ = simde__m128d_to_private(b);
-
-	r_.f64[0] = ((imm8 & 1) == 0) ? a_.f64[0] : a_.f64[1];
-	r_.f64[1] = ((imm8 & 2) == 0) ? b_.f64[0] : b_.f64[1];
-
-	return simde__m128d_from_private(r_);
-}
-#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI)
-#define simde_mm_shuffle_pd(a, b, imm8) _mm_shuffle_pd((a), (b), (imm8))
-#elif defined(SIMDE_SHUFFLE_VECTOR_)
-#define simde_mm_shuffle_pd(a, b, imm8)                                     \
-	(__extension__({                                                    \
-		simde__m128d_from_private((simde__m128d_private){           \
-			.f64 = SIMDE_SHUFFLE_VECTOR_(                       \
-				64, 16, simde__m128d_to_private(a).f64,     \
-				simde__m128d_to_private(b).f64,             \
-				(((imm8)) & 1), (((imm8) >> 1) & 1) + 2)}); \
-	}))
-#endif
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_shuffle_pd(a, b, imm8) simde_mm_shuffle_pd(a, b, imm8)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128i simde_mm_shufflehi_epi16(simde__m128i a, const int imm8)
-	SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255)
-{
-	simde__m128i_private r_, a_ = simde__m128i_to_private(a);
-
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < ((sizeof(a_.i16) / sizeof(a_.i16[0])) / 2);
-	     i++) {
-		r_.i16[i] = a_.i16[i];
-	}
-	for (size_t i = ((sizeof(a_.i16) / sizeof(a_.i16[0])) / 2);
-	     i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) {
-		r_.i16[i] = a_.i16[((imm8 >> ((i - 4) * 2)) & 3) + 4];
-	}
-
-	return simde__m128i_from_private(r_);
-}
-#if defined(SIMDE_X86_SSE2_NATIVE)
-#define simde_mm_shufflehi_epi16(a, imm8) _mm_shufflehi_epi16((a), (imm8))
-#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-#define simde_mm_shufflehi_epi16(a, imm8)                                      \
-	__extension__({                                                        \
-		int16x8_t ret = vreinterpretq_s16_s64(a);                      \
-		int16x4_t highBits = vget_high_s16(ret);                       \
-		ret = vsetq_lane_s16(vget_lane_s16(highBits, (imm8) & (0x3)),  \
-				     ret, 4);                                  \
-		ret = vsetq_lane_s16(                                          \
-			vget_lane_s16(highBits, ((imm8) >> 2) & 0x3), ret, 5); \
-		ret = vsetq_lane_s16(                                          \
-			vget_lane_s16(highBits, ((imm8) >> 4) & 0x3), ret, 6); \
-		ret = vsetq_lane_s16(                                          \
-			vget_lane_s16(highBits, ((imm8) >> 6) & 0x3), ret, 7); \
-		vreinterpretq_s64_s16(ret);                                    \
-	})
-#elif defined(SIMDE_SHUFFLE_VECTOR_)
-#define simde_mm_shufflehi_epi16(a, imm8)                                    \
-	(__extension__({                                                     \
-		const simde__m128i_private simde__tmp_a_ =                   \
-			simde__m128i_to_private(a);                          \
-		simde__m128i_from_private((simde__m128i_private){            \
-			.i16 = SIMDE_SHUFFLE_VECTOR_(                        \
-				16, 16, (simde__tmp_a_).i16,                 \
-				(simde__tmp_a_).i16, 0, 1, 2, 3,             \
-				(((imm8)) & 3) + 4, (((imm8) >> 2) & 3) + 4, \
-				(((imm8) >> 4) & 3) + 4,                     \
-				(((imm8) >> 6) & 3) + 4)});                  \
-	}))
-#endif
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_shufflehi_epi16(a, imm8) simde_mm_shufflehi_epi16(a, imm8)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128i simde_mm_shufflelo_epi16(simde__m128i a, const int imm8)
-	SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255)
-{
-	simde__m128i_private r_, a_ = simde__m128i_to_private(a);
-
-	for (size_t i = 0; i < ((sizeof(r_.i16) / sizeof(r_.i16[0])) / 2);
-	     i++) {
-		r_.i16[i] = a_.i16[((imm8 >> (i * 2)) & 3)];
-	}
-	SIMDE_VECTORIZE
-	for (size_t i = ((sizeof(a_.i16) / sizeof(a_.i16[0])) / 2);
-	     i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) {
-		r_.i16[i] = a_.i16[i];
-	}
-
-	return simde__m128i_from_private(r_);
-}
-#if defined(SIMDE_X86_SSE2_NATIVE)
-#define simde_mm_shufflelo_epi16(a, imm8) _mm_shufflelo_epi16((a), (imm8))
-#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-#define simde_mm_shufflelo_epi16(a, imm8)                                     \
-	__extension__({                                                       \
-		int16x8_t ret = vreinterpretq_s16_s64(a);                     \
-		int16x4_t lowBits = vget_low_s16(ret);                        \
-		ret = vsetq_lane_s16(vget_lane_s16(lowBits, (imm8) & (0x3)),  \
-				     ret, 0);                                 \
-		ret = vsetq_lane_s16(                                         \
-			vget_lane_s16(lowBits, ((imm8) >> 2) & 0x3), ret, 1); \
-		ret = vsetq_lane_s16(                                         \
-			vget_lane_s16(lowBits, ((imm8) >> 4) & 0x3), ret, 2); \
-		ret = vsetq_lane_s16(                                         \
-			vget_lane_s16(lowBits, ((imm8) >> 6) & 0x3), ret, 3); \
-		vreinterpretq_s64_s16(ret);                                   \
-	})
-#elif defined(SIMDE_SHUFFLE_VECTOR_)
-#define simde_mm_shufflelo_epi16(a, imm8)                                 \
-	(__extension__({                                                  \
-		const simde__m128i_private simde__tmp_a_ =                \
-			simde__m128i_to_private(a);                       \
-		simde__m128i_from_private((simde__m128i_private){         \
-			.i16 = SIMDE_SHUFFLE_VECTOR_(                     \
-				16, 16, (simde__tmp_a_).i16,              \
-				(simde__tmp_a_).i16, (((imm8)) & 3),      \
-				(((imm8) >> 2) & 3), (((imm8) >> 4) & 3), \
-				(((imm8) >> 6) & 3), 4, 5, 6, 7)});       \
-	}))
-#endif
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_shufflelo_epi16(a, imm8) simde_mm_shufflelo_epi16(a, imm8)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128i simde_mm_sll_epi16(simde__m128i a, simde__m128i count)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_sll_epi16(a, count);
-#else
-	simde__m128i_private r_, a_ = simde__m128i_to_private(a),
-				 count_ = simde__m128i_to_private(count);
-
-	if (count_.u64[0] > 15)
-		return simde_mm_setzero_si128();
-
-#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
-	r_.u16 = (a_.u16 << count_.u64[0]);
-#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_u16 = vshlq_u16(a_.neon_u16, vdupq_n_s16(HEDLEY_STATIC_CAST(
-						     int16_t, count_.u64[0])));
-#elif defined(SIMDE_WASM_SIMD128_NATIVE)
-	r_.wasm_v128 =
-		((wasm_i64x2_extract_lane(count_.wasm_v128, 0) < 16)
-			 ? wasm_i16x8_shl(a_.wasm_v128,
-					  HEDLEY_STATIC_CAST(
-						  int32_t,
-						  wasm_i64x2_extract_lane(
-							  count_.wasm_v128, 0)))
-			 : wasm_i16x8_const(0, 0, 0, 0, 0, 0, 0, 0));
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.u16) / sizeof(r_.u16[0])); i++) {
-		r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t,
-					       (a_.u16[i] << count_.u64[0]));
-	}
-#endif
-
-	return simde__m128i_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_sll_epi16(a, count) simde_mm_sll_epi16((a), (count))
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128i simde_mm_sll_epi32(simde__m128i a, simde__m128i count)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_sll_epi32(a, count);
-#else
-	simde__m128i_private r_, a_ = simde__m128i_to_private(a),
-				 count_ = simde__m128i_to_private(count);
-
-	if (count_.u64[0] > 31)
-		return simde_mm_setzero_si128();
-
-#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
-	r_.u32 = (a_.u32 << count_.u64[0]);
-#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_u32 = vshlq_u32(a_.neon_u32, vdupq_n_s32(HEDLEY_STATIC_CAST(
-						     int32_t, count_.u64[0])));
-#elif defined(SIMDE_WASM_SIMD128_NATIVE)
-	r_.wasm_v128 =
-		((wasm_i64x2_extract_lane(count_.wasm_v128, 0) < 32)
-			 ? wasm_i32x4_shl(a_.wasm_v128,
-					  HEDLEY_STATIC_CAST(
-						  int32_t,
-						  wasm_i64x2_extract_lane(
-							  count_.wasm_v128, 0)))
-			 : wasm_i32x4_const(0, 0, 0, 0));
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.u32) / sizeof(r_.u32[0])); i++) {
-		r_.u32[i] = HEDLEY_STATIC_CAST(uint32_t,
-					       (a_.u32[i] << count_.u64[0]));
-	}
-#endif
-
-	return simde__m128i_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_sll_epi32(a, count) (simde_mm_sll_epi32(a, (count)))
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128i simde_mm_sll_epi64(simde__m128i a, simde__m128i count)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_sll_epi64(a, count);
-#else
-	simde__m128i_private r_, a_ = simde__m128i_to_private(a),
-				 count_ = simde__m128i_to_private(count);
-
-	if (count_.u64[0] > 63)
-		return simde_mm_setzero_si128();
-
-	const int_fast16_t s = HEDLEY_STATIC_CAST(int_fast16_t, count_.u64[0]);
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_u64 = vshlq_u64(a_.neon_u64,
-				vdupq_n_s64(HEDLEY_STATIC_CAST(int64_t, s)));
-#elif defined(SIMDE_WASM_SIMD128_NATIVE)
-	r_.wasm_v128 = (s < 64) ? wasm_i64x2_shl(a_.wasm_v128, s)
-				: wasm_i64x2_const(0, 0);
-#else
-#if !defined(SIMDE_BUG_GCC_94488)
-	SIMDE_VECTORIZE
-#endif
-	for (size_t i = 0; i < (sizeof(r_.u64) / sizeof(r_.u64[0])); i++) {
-		r_.u64[i] = a_.u64[i] << s;
-	}
-#endif
-
-	return simde__m128i_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_sll_epi64(a, count) (simde_mm_sll_epi64(a, (count)))
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128d simde_mm_sqrt_pd(simde__m128d a)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_sqrt_pd(a);
-#else
-	simde__m128d_private r_, a_ = simde__m128d_to_private(a);
-
-#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
-	r_.neon_f64 = vsqrtq_f64(a_.neon_f64);
-#elif defined(SIMDE_WASM_SIMD128_NATIVE)
-	r_.wasm_v128 = wasm_f64x2_sqrt(a_.wasm_v128);
-#elif defined(simde_math_sqrt)
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.f64) / sizeof(r_.f64[0])); i++) {
-		r_.f64[i] = simde_math_sqrt(a_.f64[i]);
-	}
-#else
-	HEDLEY_UNREACHABLE();
-#endif
-
-	return simde__m128d_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_sqrt_pd(a) simde_mm_sqrt_pd(a)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128d simde_mm_sqrt_sd(simde__m128d a, simde__m128d b)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_sqrt_sd(a, b);
-#elif (SIMDE_NATURAL_VECTOR_SIZE > 0)
-	return simde_mm_move_sd(a, simde_mm_sqrt_pd(b));
-#else
-	simde__m128d_private r_, a_ = simde__m128d_to_private(a),
-				 b_ = simde__m128d_to_private(b);
-
-#if defined(simde_math_sqrt)
-	r_.f64[0] = simde_math_sqrt(b_.f64[0]);
-	r_.f64[1] = a_.f64[1];
-#else
-	HEDLEY_UNREACHABLE();
-#endif
-
-	return simde__m128d_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_sqrt_sd(a, b) simde_mm_sqrt_sd(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128i simde_mm_srl_epi16(simde__m128i a, simde__m128i count)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_srl_epi16(a, count);
-#else
-	simde__m128i_private r_, a_ = simde__m128i_to_private(a),
-				 count_ = simde__m128i_to_private(count);
-
-	const int cnt = HEDLEY_STATIC_CAST(
-		int, (count_.i64[0] > 16 ? 16 : count_.i64[0]));
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_u16 = vshlq_u16(a_.neon_u16,
-				vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, -cnt)));
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.u16) / sizeof(r_.u16[0])); i++) {
-		r_.u16[i] = a_.u16[i] >> cnt;
-	}
-#endif
-
-	return simde__m128i_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_srl_epi16(a, count) (simde_mm_srl_epi16(a, (count)))
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128i simde_mm_srl_epi32(simde__m128i a, simde__m128i count)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_srl_epi32(a, count);
-#else
-	simde__m128i_private r_, a_ = simde__m128i_to_private(a),
-				 count_ = simde__m128i_to_private(count);
-
-	const int cnt = HEDLEY_STATIC_CAST(
-		int, (count_.i64[0] > 32 ? 32 : count_.i64[0]));
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_u32 = vshlq_u32(a_.neon_u32,
-				vdupq_n_s32(HEDLEY_STATIC_CAST(int32_t, -cnt)));
-#elif defined(SIMDE_WASM_SIMD128_NATIVE)
-	r_.wasm_v128 = wasm_u32x4_shr(a_.wasm_v128, cnt);
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.u32) / sizeof(r_.u32[0])); i++) {
-		r_.u32[i] = a_.u32[i] >> cnt;
-	}
-#endif
-
-	return simde__m128i_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_srl_epi32(a, count) (simde_mm_srl_epi32(a, (count)))
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128i simde_mm_srl_epi64(simde__m128i a, simde__m128i count)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_srl_epi64(a, count);
-#else
-	simde__m128i_private r_, a_ = simde__m128i_to_private(a),
-				 count_ = simde__m128i_to_private(count);
-
-	const int cnt = HEDLEY_STATIC_CAST(
-		int, (count_.i64[0] > 64 ? 64 : count_.i64[0]));
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_u64 = vshlq_u64(a_.neon_u64,
-				vdupq_n_s64(HEDLEY_STATIC_CAST(int64_t, -cnt)));
-#elif defined(SIMDE_WASM_SIMD128_NATIVE)
-	r_.wasm_v128 = wasm_u64x2_shr(a_.wasm_v128, cnt);
-#else
-#if !defined(SIMDE_BUG_GCC_94488)
-	SIMDE_VECTORIZE
-#endif
-	for (size_t i = 0; i < (sizeof(r_.u64) / sizeof(r_.u64[0])); i++) {
-		r_.u64[i] = a_.u64[i] >> cnt;
-	}
-#endif
-
-	return simde__m128i_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_srl_epi64(a, count) (simde_mm_srl_epi64(a, (count)))
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128i simde_mm_srai_epi16(simde__m128i a, const int imm8)
-	SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255)
-{
-	/* MSVC requires a range of (0, 255). */
-	simde__m128i_private r_, a_ = simde__m128i_to_private(a);
-
-	const int cnt = (imm8 & ~15) ? 15 : imm8;
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_i16 = vshlq_s16(a_.neon_i16,
-				vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, -cnt)));
-#elif defined(SIMDE_WASM_SIMD128_NATIVE)
-	r_.wasm_v128 = wasm_i16x8_shr(a_.wasm_v128, cnt);
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_) / sizeof(r_.i16[0])); i++) {
-		r_.i16[i] = a_.i16[i] >> cnt;
-	}
-#endif
-
-	return simde__m128i_from_private(r_);
-}
-#if defined(SIMDE_X86_SSE2_NATIVE)
-#define simde_mm_srai_epi16(a, imm8) _mm_srai_epi16((a), (imm8))
-#endif
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_srai_epi16(a, imm8) simde_mm_srai_epi16(a, imm8)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128i simde_mm_srai_epi32(simde__m128i a, const int imm8)
-	SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255)
-{
-	/* MSVC requires a range of (0, 255). */
-	simde__m128i_private r_, a_ = simde__m128i_to_private(a);
-
-	const int cnt = (imm8 & ~31) ? 31 : imm8;
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_i32 = vshlq_s32(a_.neon_i32, vdupq_n_s32(-cnt));
-#elif defined(SIMDE_WASM_SIMD128_NATIVE)
-	r_.wasm_v128 = wasm_i32x4_shr(a_.wasm_v128, cnt);
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_) / sizeof(r_.i32[0])); i++) {
-		r_.i32[i] = a_.i32[i] >> cnt;
-	}
-#endif
-
-	return simde__m128i_from_private(r_);
-}
-#if defined(SIMDE_X86_SSE2_NATIVE)
-#define simde_mm_srai_epi32(a, imm8) _mm_srai_epi32((a), (imm8))
-#endif
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_srai_epi32(a, imm8) simde_mm_srai_epi32(a, imm8)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128i simde_mm_sra_epi16(simde__m128i a, simde__m128i count)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_sra_epi16(a, count);
-#else
-	simde__m128i_private r_, a_ = simde__m128i_to_private(a),
-				 count_ = simde__m128i_to_private(count);
-
-	const int cnt = HEDLEY_STATIC_CAST(
-		int, (count_.i64[0] > 15 ? 15 : count_.i64[0]));
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_i16 = vshlq_s16(a_.neon_i16,
-				vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, -cnt)));
-#elif defined(SIMDE_WASM_SIMD128_NATIVE)
-	r_.wasm_v128 = wasm_i16x8_shr(a_.wasm_v128, cnt);
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) {
-		r_.i16[i] = a_.i16[i] >> cnt;
-	}
-#endif
-
-	return simde__m128i_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_sra_epi16(a, count) (simde_mm_sra_epi16(a, count))
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128i simde_mm_sra_epi32(simde__m128i a, simde__m128i count)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_MM_SRA_EPI32)
-	return _mm_sra_epi32(a, count);
-#else
-	simde__m128i_private r_, a_ = simde__m128i_to_private(a),
-				 count_ = simde__m128i_to_private(count);
-
-	const int cnt = count_.u64[0] > 31
-				? 31
-				: HEDLEY_STATIC_CAST(int, count_.u64[0]);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_i32 = vshlq_s32(a_.neon_i32,
-				vdupq_n_s32(HEDLEY_STATIC_CAST(int32_t, -cnt)));
-#elif defined(SIMDE_WASM_SIMD128_NATIVE)
-	r_.wasm_v128 = wasm_i32x4_shr(a_.wasm_v128, cnt);
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])); i++) {
-		r_.i32[i] = a_.i32[i] >> cnt;
-	}
-#endif
-
-	return simde__m128i_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_sra_epi32(a, count) (simde_mm_sra_epi32(a, (count)))
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128i simde_mm_slli_epi16(simde__m128i a, const int imm8)
-	SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255)
-{
-	if (HEDLEY_UNLIKELY((imm8 > 15))) {
-		return simde_mm_setzero_si128();
-	}
-
-	simde__m128i_private r_, a_ = simde__m128i_to_private(a);
-
-#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
-	r_.i16 = a_.i16 << (imm8 & 0xff);
-#else
-	const int s =
-		(imm8 >
-		 HEDLEY_STATIC_CAST(int, sizeof(r_.i16[0]) * CHAR_BIT) - 1)
-			? 0
-			: imm8;
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) {
-		r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i16[i] << s);
-	}
-#endif
-
-	return simde__m128i_from_private(r_);
-}
-#if defined(SIMDE_X86_SSE2_NATIVE)
-#define simde_mm_slli_epi16(a, imm8) _mm_slli_epi16(a, imm8)
-#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-#define simde_mm_slli_epi16(a, imm8)                                        \
-	(__extension__({                                                    \
-		simde__m128i ret;                                           \
-		if ((imm8) <= 0) {                                          \
-			ret = a;                                            \
-		} else if ((imm8) > 15) {                                   \
-			ret = simde_mm_setzero_si128();                     \
-		} else {                                                    \
-			ret = simde__m128i_from_neon_i16(vshlq_n_s16(       \
-				simde__m128i_to_neon_i16(a), ((imm8)&15))); \
-		}                                                           \
-		ret;                                                        \
-	}))
-#elif defined(SIMDE_WASM_SIMD128_NATIVE)
-#define simde_mm_slli_epi16(a, imm8)                                          \
-	((imm8 < 16)                                                          \
-		 ? wasm_i16x8_shl(simde__m128i_to_private(a).wasm_v128, imm8) \
-		 : wasm_i16x8_const(0, 0, 0, 0, 0, 0, 0, 0))
-#elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE)
-#define simde_mm_slli_epi16(a, imm8)                                     \
-	((imm8 & ~15) ? simde_mm_setzero_si128()                         \
-		      : simde__m128i_from_altivec_i16(                   \
-				vec_sl(simde__m128i_to_altivec_i16(a),   \
-				       vec_splat_u16(HEDLEY_STATIC_CAST( \
-					       unsigned short, imm8)))))
-#endif
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_slli_epi16(a, imm8) simde_mm_slli_epi16(a, imm8)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128i simde_mm_slli_epi32(simde__m128i a, const int imm8)
-	SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255)
-{
-	if (HEDLEY_UNLIKELY((imm8 > 31))) {
-		return simde_mm_setzero_si128();
-	}
-	simde__m128i_private r_, a_ = simde__m128i_to_private(a);
-
-#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
-	r_.i32 = a_.i32 << imm8;
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])); i++) {
-		r_.i32[i] = a_.i32[i] << (imm8 & 0xff);
-	}
-#endif
-
-	return simde__m128i_from_private(r_);
-}
-#if defined(SIMDE_X86_SSE2_NATIVE)
-#define simde_mm_slli_epi32(a, imm8) _mm_slli_epi32(a, imm8)
-#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-#define simde_mm_slli_epi32(a, imm8)                                        \
-	(__extension__({                                                    \
-		simde__m128i ret;                                           \
-		if ((imm8) <= 0) {                                          \
-			ret = a;                                            \
-		} else if ((imm8) > 31) {                                   \
-			ret = simde_mm_setzero_si128();                     \
-		} else {                                                    \
-			ret = simde__m128i_from_neon_i32(vshlq_n_s32(       \
-				simde__m128i_to_neon_i32(a), ((imm8)&31))); \
-		}                                                           \
-		ret;                                                        \
-	}))
-#elif defined(SIMDE_WASM_SIMD128_NATIVE)
-#define simde_mm_slli_epi32(a, imm8)                                          \
-	((imm8 < 32)                                                          \
-		 ? wasm_i32x4_shl(simde__m128i_to_private(a).wasm_v128, imm8) \
-		 : wasm_i32x4_const(0, 0, 0, 0))
-#elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE)
-#define simde_mm_slli_epi32(a, imm8)                                        \
-	(__extension__({                                                    \
-		simde__m128i ret;                                           \
-		if ((imm8) <= 0) {                                          \
-			ret = a;                                            \
-		} else if ((imm8) > 31) {                                   \
-			ret = simde_mm_setzero_si128();                     \
-		} else {                                                    \
-			ret = simde__m128i_from_altivec_i32(                \
-				vec_sl(simde__m128i_to_altivec_i32(a),      \
-				       vec_splats(HEDLEY_STATIC_CAST(       \
-					       unsigned int, (imm8)&31)))); \
-		}                                                           \
-		ret;                                                        \
-	}))
-#endif
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_slli_epi32(a, imm8) simde_mm_slli_epi32(a, imm8)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128i simde_mm_slli_epi64(simde__m128i a, const int imm8)
-	SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255)
-{
-	if (HEDLEY_UNLIKELY((imm8 > 63))) {
-		return simde_mm_setzero_si128();
-	}
-	simde__m128i_private r_, a_ = simde__m128i_to_private(a);
-
-#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
-	r_.i64 = a_.i64 << imm8;
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.i64) / sizeof(r_.i64[0])); i++) {
-		r_.i64[i] = a_.i64[i] << (imm8 & 0xff);
-	}
-#endif
-
-	return simde__m128i_from_private(r_);
-}
-#if defined(SIMDE_X86_SSE2_NATIVE)
-#define simde_mm_slli_epi64(a, imm8) _mm_slli_epi64(a, imm8)
-#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-#define simde_mm_slli_epi64(a, imm8)                                        \
-	(__extension__({                                                    \
-		simde__m128i ret;                                           \
-		if ((imm8) <= 0) {                                          \
-			ret = a;                                            \
-		} else if ((imm8) > 63) {                                   \
-			ret = simde_mm_setzero_si128();                     \
-		} else {                                                    \
-			ret = simde__m128i_from_neon_i64(vshlq_n_s64(       \
-				simde__m128i_to_neon_i64(a), ((imm8)&63))); \
-		}                                                           \
-		ret;                                                        \
-	}))
-#elif defined(SIMDE_WASM_SIMD128_NATIVE)
-#define simde_mm_slli_epi64(a, imm8)                                          \
-	((imm8 < 64)                                                          \
-		 ? wasm_i64x2_shl(simde__m128i_to_private(a).wasm_v128, imm8) \
-		 : wasm_i64x2_const(0, 0))
-#endif
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_slli_epi64(a, imm8) simde_mm_slli_epi64(a, imm8)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128i simde_mm_srli_epi16(simde__m128i a, const int imm8)
-	SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255)
-{
-	if (HEDLEY_UNLIKELY((imm8 > 15))) {
-		return simde_mm_setzero_si128();
-	}
-	simde__m128i_private r_, a_ = simde__m128i_to_private(a);
-
-#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
-	r_.u16 = a_.u16 >> imm8;
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) {
-		r_.u16[i] = a_.u16[i] >> (imm8 & 0xff);
-	}
-#endif
-
-	return simde__m128i_from_private(r_);
-}
-#if defined(SIMDE_X86_SSE2_NATIVE)
-#define simde_mm_srli_epi16(a, imm8) _mm_srli_epi16(a, imm8)
-#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-#define simde_mm_srli_epi16(a, imm8)                                  \
-	(__extension__({                                              \
-		simde__m128i ret;                                     \
-		if ((imm8) <= 0) {                                    \
-			ret = a;                                      \
-		} else if ((imm8) > 15) {                             \
-			ret = simde_mm_setzero_si128();               \
-		} else {                                              \
-			ret = simde__m128i_from_neon_u16(vshrq_n_u16( \
-				simde__m128i_to_neon_u16(a),          \
-				(((imm8)&15) | (((imm8)&15) == 0)))); \
-		}                                                     \
-		ret;                                                  \
-	}))
-#elif defined(SIMDE_WASM_SIMD128_NATIVE)
-#define simde_mm_srli_epi16(a, imm8)                                          \
-	((imm8 < 16)                                                          \
-		 ? wasm_u16x8_shr(simde__m128i_to_private(a).wasm_v128, imm8) \
-		 : wasm_i16x8_const(0, 0, 0, 0, 0, 0, 0, 0))
-#elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE)
-#define simde_mm_srli_epi16(a, imm8)                                     \
-	((imm8 & ~15) ? simde_mm_setzero_si128()                         \
-		      : simde__m128i_from_altivec_i16(                   \
-				vec_sr(simde__m128i_to_altivec_i16(a),   \
-				       vec_splat_u16(HEDLEY_STATIC_CAST( \
-					       unsigned short, imm8)))))
-#endif
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_srli_epi16(a, imm8) simde_mm_srli_epi16(a, imm8)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128i simde_mm_srli_epi32(simde__m128i a, const int imm8)
-	SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255)
-{
-	if (HEDLEY_UNLIKELY((imm8 > 31))) {
-		return simde_mm_setzero_si128();
-	}
-	simde__m128i_private r_, a_ = simde__m128i_to_private(a);
-
-#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
-	r_.u32 = a_.u32 >> (imm8 & 0xff);
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])); i++) {
-		r_.u32[i] = a_.u32[i] >> (imm8 & 0xff);
-	}
-#endif
-
-	return simde__m128i_from_private(r_);
-}
-#if defined(SIMDE_X86_SSE2_NATIVE)
-#define simde_mm_srli_epi32(a, imm8) _mm_srli_epi32(a, imm8)
-#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-#define simde_mm_srli_epi32(a, imm8)                                  \
-	(__extension__({                                              \
-		simde__m128i ret;                                     \
-		if ((imm8) <= 0) {                                    \
-			ret = a;                                      \
-		} else if ((imm8) > 31) {                             \
-			ret = simde_mm_setzero_si128();               \
-		} else {                                              \
-			ret = simde__m128i_from_neon_u32(vshrq_n_u32( \
-				simde__m128i_to_neon_u32(a),          \
-				(((imm8)&31) | (((imm8)&31) == 0)))); \
-		}                                                     \
-		ret;                                                  \
-	}))
-#elif defined(SIMDE_WASM_SIMD128_NATIVE)
-#define simde_mm_srli_epi32(a, imm8)                                          \
-	((imm8 < 32)                                                          \
-		 ? wasm_u32x4_shr(simde__m128i_to_private(a).wasm_v128, imm8) \
-		 : wasm_i32x4_const(0, 0, 0, 0))
-#elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE)
-#define simde_mm_srli_epi32(a, imm8)                                        \
-	(__extension__({                                                    \
-		simde__m128i ret;                                           \
-		if ((imm8) <= 0) {                                          \
-			ret = a;                                            \
-		} else if ((imm8) > 31) {                                   \
-			ret = simde_mm_setzero_si128();                     \
-		} else {                                                    \
-			ret = simde__m128i_from_altivec_i32(                \
-				vec_sr(simde__m128i_to_altivec_i32(a),      \
-				       vec_splats(HEDLEY_STATIC_CAST(       \
-					       unsigned int, (imm8)&31)))); \
-		}                                                           \
-		ret;                                                        \
-	}))
-#endif
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_srli_epi32(a, imm8) simde_mm_srli_epi32(a, imm8)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128i simde_mm_srli_epi64(simde__m128i a, const int imm8)
-	SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255)
-{
-	simde__m128i_private r_, a_ = simde__m128i_to_private(a);
-
-	if (HEDLEY_UNLIKELY((imm8 & 63) != imm8))
-		return simde_mm_setzero_si128();
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_u64 = vshlq_u64(a_.neon_u64, vdupq_n_s64(-imm8));
-#else
-#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_94488)
-	r_.u64 = a_.u64 >> imm8;
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.i64) / sizeof(r_.i64[0])); i++) {
-		r_.u64[i] = a_.u64[i] >> imm8;
-	}
-#endif
-#endif
-
-	return simde__m128i_from_private(r_);
-}
-#if defined(SIMDE_X86_SSE2_NATIVE)
-#define simde_mm_srli_epi64(a, imm8) _mm_srli_epi64(a, imm8)
-#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-#define simde_mm_srli_epi64(a, imm8)                                  \
-	(__extension__({                                              \
-		simde__m128i ret;                                     \
-		if ((imm8) <= 0) {                                    \
-			ret = a;                                      \
-		} else if ((imm8) > 63) {                             \
-			ret = simde_mm_setzero_si128();               \
-		} else {                                              \
-			ret = simde__m128i_from_neon_u64(vshrq_n_u64( \
-				simde__m128i_to_neon_u64(a),          \
-				(((imm8)&63) | (((imm8)&63) == 0)))); \
-		}                                                     \
-		ret;                                                  \
-	}))
-#elif defined(SIMDE_WASM_SIMD128_NATIVE)
-#define simde_mm_srli_epi64(a, imm8)                                          \
-	((imm8 < 64)                                                          \
-		 ? wasm_u64x2_shr(simde__m128i_to_private(a).wasm_v128, imm8) \
-		 : wasm_i64x2_const(0, 0))
-#endif
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_srli_epi64(a, imm8) simde_mm_srli_epi64(a, imm8)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-void simde_mm_store_pd(simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)],
-		       simde__m128d a)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	_mm_store_pd(mem_addr, a);
-#elif defined(SIMDE_ARM_NEON_A64V8_NATIVE)
-	vst1q_f64(mem_addr, simde__m128d_to_private(a).neon_f64);
-#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	vst1q_s64(HEDLEY_REINTERPRET_CAST(int64_t *, mem_addr),
-		  simde__m128d_to_private(a).neon_i64);
-#else
-	simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128d), &a,
-		     sizeof(a));
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_store_pd(mem_addr, a) \
-	simde_mm_store_pd(HEDLEY_REINTERPRET_CAST(double *, mem_addr), a)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-void simde_mm_store1_pd(simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)],
-			simde__m128d a)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	_mm_store1_pd(mem_addr, a);
-#else
-	simde__m128d_private a_ = simde__m128d_to_private(a);
-
-#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
-	vst1q_f64(mem_addr, vdupq_laneq_f64(a_.neon_f64, 0));
-#else
-	mem_addr[0] = a_.f64[0];
-	mem_addr[1] = a_.f64[0];
-#endif
-#endif
-}
-#define simde_mm_store_pd1(mem_addr, a) \
-	simde_mm_store1_pd(HEDLEY_REINTERPRET_CAST(double *, mem_addr), a)
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_store1_pd(mem_addr, a) \
-	simde_mm_store1_pd(HEDLEY_REINTERPRET_CAST(double *, mem_addr), a)
-#define _mm_store_pd1(mem_addr, a) \
-	simde_mm_store_pd1(HEDLEY_REINTERPRET_CAST(double *, mem_addr), a)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-void simde_mm_store_sd(simde_float64 *mem_addr, simde__m128d a)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	_mm_store_sd(mem_addr, a);
-#else
-	simde__m128d_private a_ = simde__m128d_to_private(a);
-
-#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
-	const simde_float64 v = vgetq_lane_f64(a_.neon_f64, 0);
-	simde_memcpy(mem_addr, &v, sizeof(v));
-#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	const int64_t v = vgetq_lane_s64(a_.neon_i64, 0);
-	simde_memcpy(HEDLEY_REINTERPRET_CAST(int64_t *, mem_addr), &v,
-		     sizeof(v));
-#else
-	simde_float64 v = a_.f64[0];
-	simde_memcpy(mem_addr, &v, sizeof(simde_float64));
-#endif
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_store_sd(mem_addr, a) \
-	simde_mm_store_sd(HEDLEY_REINTERPRET_CAST(double *, mem_addr), a)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-void simde_mm_store_si128(simde__m128i *mem_addr, simde__m128i a)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	_mm_store_si128(HEDLEY_STATIC_CAST(__m128i *, mem_addr), a);
-#else
-	simde__m128i_private a_ = simde__m128i_to_private(a);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	vst1q_s32(HEDLEY_REINTERPRET_CAST(int32_t *, mem_addr), a_.neon_i32);
-#else
-	simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128i), &a_,
-		     sizeof(a_));
-#endif
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_store_si128(mem_addr, a) simde_mm_store_si128(mem_addr, a)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-void simde_mm_storeh_pd(simde_float64 *mem_addr, simde__m128d a)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	_mm_storeh_pd(mem_addr, a);
-#else
-	simde__m128d_private a_ = simde__m128d_to_private(a);
-
-#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
-	*mem_addr = vgetq_lane_f64(a_.neon_f64, 1);
-#else
-	*mem_addr = a_.f64[1];
-#endif
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_storeh_pd(mem_addr, a) \
-	simde_mm_storeh_pd(HEDLEY_REINTERPRET_CAST(double *, mem_addr), a)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-void simde_mm_storel_epi64(simde__m128i *mem_addr, simde__m128i a)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	_mm_storel_epi64(HEDLEY_STATIC_CAST(__m128i *, mem_addr), a);
-#else
-	simde__m128i_private a_ = simde__m128i_to_private(a);
-	int64_t tmp;
-
-	/* memcpy to prevent aliasing, tmp because we can't take the
-     * address of a vector element. */
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	tmp = vgetq_lane_s64(a_.neon_i64, 0);
-#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
-#if defined(SIMDE_BUG_GCC_95227)
-	(void)a_;
-#endif
-	tmp = vec_extract(a_.altivec_i64, 0);
-#else
-	tmp = a_.i64[0];
-#endif
-
-	simde_memcpy(mem_addr, &tmp, sizeof(tmp));
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_storel_epi64(mem_addr, a) simde_mm_storel_epi64(mem_addr, a)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-void simde_mm_storel_pd(simde_float64 *mem_addr, simde__m128d a)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	_mm_storel_pd(mem_addr, a);
-#else
-	simde__m128d_private a_ = simde__m128d_to_private(a);
-
-	simde_float64 tmp;
-#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
-	tmp = vgetq_lane_f64(a_.neon_f64, 0);
-#else
-	tmp = a_.f64[0];
-#endif
-	simde_memcpy(mem_addr, &tmp, sizeof(tmp));
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_storel_pd(mem_addr, a) \
-	simde_mm_storel_pd(HEDLEY_REINTERPRET_CAST(double *, mem_addr), a)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-void simde_mm_storer_pd(simde_float64 mem_addr[2], simde__m128d a)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	_mm_storer_pd(mem_addr, a);
-#else
-	simde__m128d_private a_ = simde__m128d_to_private(a);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	vst1q_s64(HEDLEY_REINTERPRET_CAST(int64_t *, mem_addr),
-		  vextq_s64(a_.neon_i64, a_.neon_i64, 1));
-#elif defined(SIMDE_SHUFFLE_VECTOR_)
-	a_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, a_.f64, 1, 0);
-	simde_mm_store_pd(mem_addr, simde__m128d_from_private(a_));
-#else
-	mem_addr[0] = a_.f64[1];
-	mem_addr[1] = a_.f64[0];
-#endif
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_storer_pd(mem_addr, a) \
-	simde_mm_storer_pd(HEDLEY_REINTERPRET_CAST(double *, mem_addr), a)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-void simde_mm_storeu_pd(simde_float64 *mem_addr, simde__m128d a)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	_mm_storeu_pd(mem_addr, a);
-#elif defined(SIMDE_ARM_NEON_A64V8_NATIVE)
-	vst1q_f64(mem_addr, simde__m128d_to_private(a).neon_f64);
-#else
-	simde_memcpy(mem_addr, &a, sizeof(a));
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_storeu_pd(mem_addr, a) \
-	simde_mm_storeu_pd(HEDLEY_REINTERPRET_CAST(double *, mem_addr), a)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-void simde_mm_storeu_si128(simde__m128i *mem_addr, simde__m128i a)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	_mm_storeu_si128(HEDLEY_STATIC_CAST(__m128i *, mem_addr), a);
-#else
-	simde_memcpy(mem_addr, &a, sizeof(a));
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_storeu_si128(mem_addr, a) simde_mm_storeu_si128(mem_addr, a)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-void simde_mm_storeu_si16(void *mem_addr, simde__m128i a)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE) &&                 \
-	(SIMDE_DETECT_CLANG_VERSION_CHECK(8, 0, 0) || \
-	 HEDLEY_GCC_VERSION_CHECK(11, 0, 0) ||        \
-	 HEDLEY_INTEL_VERSION_CHECK(20, 21, 1))
-	_mm_storeu_si16(mem_addr, a);
-#else
-	int16_t val = simde_x_mm_cvtsi128_si16(a);
-	simde_memcpy(mem_addr, &val, sizeof(val));
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_storeu_si16(mem_addr, a) simde_mm_storeu_si16(mem_addr, a)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-void simde_mm_storeu_si32(void *mem_addr, simde__m128i a)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE) &&                 \
-	(SIMDE_DETECT_CLANG_VERSION_CHECK(8, 0, 0) || \
-	 HEDLEY_GCC_VERSION_CHECK(11, 0, 0) ||        \
-	 HEDLEY_INTEL_VERSION_CHECK(20, 21, 1))
-	_mm_storeu_si32(mem_addr, a);
-#else
-	int32_t val = simde_mm_cvtsi128_si32(a);
-	simde_memcpy(mem_addr, &val, sizeof(val));
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_storeu_si32(mem_addr, a) simde_mm_storeu_si32(mem_addr, a)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-void simde_mm_storeu_si64(void *mem_addr, simde__m128i a)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE) &&                 \
-	(SIMDE_DETECT_CLANG_VERSION_CHECK(8, 0, 0) || \
-	 HEDLEY_GCC_VERSION_CHECK(11, 0, 0) ||        \
-	 HEDLEY_INTEL_VERSION_CHECK(20, 21, 1))
-	_mm_storeu_si64(mem_addr, a);
-#else
-	int64_t val = simde_mm_cvtsi128_si64(a);
-	simde_memcpy(mem_addr, &val, sizeof(val));
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_storeu_si64(mem_addr, a) simde_mm_storeu_si64(mem_addr, a)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-void simde_mm_stream_pd(simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)],
-			simde__m128d a)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	_mm_stream_pd(mem_addr, a);
-#else
-	simde_memcpy(mem_addr, &a, sizeof(a));
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_stream_pd(mem_addr, a) \
-	simde_mm_stream_pd(HEDLEY_REINTERPRET_CAST(double *, mem_addr), a)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-void simde_mm_stream_si128(simde__m128i *mem_addr, simde__m128i a)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64)
-	_mm_stream_si128(HEDLEY_STATIC_CAST(__m128i *, mem_addr), a);
-#else
-	simde_memcpy(mem_addr, &a, sizeof(a));
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_stream_si128(mem_addr, a) simde_mm_stream_si128(mem_addr, a)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-void simde_mm_stream_si32(int32_t *mem_addr, int32_t a)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	_mm_stream_si32(mem_addr, a);
-#else
-	*mem_addr = a;
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_stream_si32(mem_addr, a) simde_mm_stream_si32(mem_addr, a)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-void simde_mm_stream_si64(int64_t *mem_addr, int64_t a)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) && \
-	!defined(HEDLEY_MSVC_VERSION)
-	_mm_stream_si64(SIMDE_CHECKED_REINTERPRET_CAST(long long int *,
-						       int64_t *, mem_addr),
-			a);
-#else
-	*mem_addr = a;
-#endif
-}
-#define simde_mm_stream_si64x(mem_addr, a) simde_mm_stream_si64(mem_addr, a)
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_stream_si64(mem_addr, a)                                  \
-	simde_mm_stream_si64(SIMDE_CHECKED_REINTERPRET_CAST(          \
-				     int64_t *, __int64 *, mem_addr), \
-			     a)
-#define _mm_stream_si64x(mem_addr, a)                                 \
-	simde_mm_stream_si64(SIMDE_CHECKED_REINTERPRET_CAST(          \
-				     int64_t *, __int64 *, mem_addr), \
-			     a)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128i simde_mm_sub_epi8(simde__m128i a, simde__m128i b)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_sub_epi8(a, b);
-#else
-	simde__m128i_private r_, a_ = simde__m128i_to_private(a),
-				 b_ = simde__m128i_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_i8 = vsubq_s8(a_.neon_i8, b_.neon_i8);
-#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
-	r_.i8 = a_.i8 - b_.i8;
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.i8) / sizeof(r_.i8[0])); i++) {
-		r_.i8[i] = a_.i8[i] - b_.i8[i];
-	}
-#endif
-
-	return simde__m128i_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_sub_epi8(a, b) simde_mm_sub_epi8(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128i simde_mm_sub_epi16(simde__m128i a, simde__m128i b)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_sub_epi16(a, b);
-#else
-	simde__m128i_private r_, a_ = simde__m128i_to_private(a),
-				 b_ = simde__m128i_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_i16 = vsubq_s16(a_.neon_i16, b_.neon_i16);
-#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
-	r_.i16 = a_.i16 - b_.i16;
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) {
-		r_.i16[i] = a_.i16[i] - b_.i16[i];
-	}
-#endif
-
-	return simde__m128i_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_sub_epi16(a, b) simde_mm_sub_epi16(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128i simde_mm_sub_epi32(simde__m128i a, simde__m128i b)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_sub_epi32(a, b);
-#else
-	simde__m128i_private r_, a_ = simde__m128i_to_private(a),
-				 b_ = simde__m128i_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_i32 = vsubq_s32(a_.neon_i32, b_.neon_i32);
-#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
-	r_.i32 = a_.i32 - b_.i32;
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])); i++) {
-		r_.i32[i] = a_.i32[i] - b_.i32[i];
-	}
-#endif
-
-	return simde__m128i_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_sub_epi32(a, b) simde_mm_sub_epi32(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128i simde_mm_sub_epi64(simde__m128i a, simde__m128i b)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_sub_epi64(a, b);
-#else
-	simde__m128i_private r_, a_ = simde__m128i_to_private(a),
-				 b_ = simde__m128i_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_i64 = vsubq_s64(a_.neon_i64, b_.neon_i64);
-#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
-	r_.i64 = a_.i64 - b_.i64;
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.i64) / sizeof(r_.i64[0])); i++) {
-		r_.i64[i] = a_.i64[i] - b_.i64[i];
-	}
-#endif
-
-	return simde__m128i_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_sub_epi64(a, b) simde_mm_sub_epi64(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128i simde_x_mm_sub_epu32(simde__m128i a, simde__m128i b)
-{
-	simde__m128i_private r_, a_ = simde__m128i_to_private(a),
-				 b_ = simde__m128i_to_private(b);
-
-#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
-	r_.u32 = a_.u32 - b_.u32;
-#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_u32 = vsubq_u32(a_.neon_u32, b_.neon_u32);
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.u32) / sizeof(r_.u32[0])); i++) {
-		r_.u32[i] = a_.u32[i] - b_.u32[i];
-	}
-#endif
-
-	return simde__m128i_from_private(r_);
-}
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128d simde_mm_sub_pd(simde__m128d a, simde__m128d b)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_sub_pd(a, b);
-#else
-	simde__m128d_private r_, a_ = simde__m128d_to_private(a),
-				 b_ = simde__m128d_to_private(b);
-
-#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
-	r_.f64 = a_.f64 - b_.f64;
-#elif defined(SIMDE_ARM_NEON_A64V8_NATIVE)
-	r_.neon_f64 = vsubq_f64(a_.neon_f64, b_.neon_f64);
-#elif defined(SIMDE_WASM_SIMD128_NATIVE)
-	r_.wasm_v128 = wasm_f64x2_sub(a_.wasm_v128, b_.wasm_v128);
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.f64) / sizeof(r_.f64[0])); i++) {
-		r_.f64[i] = a_.f64[i] - b_.f64[i];
-	}
-#endif
-
-	return simde__m128d_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_sub_pd(a, b) simde_mm_sub_pd(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128d simde_mm_sub_sd(simde__m128d a, simde__m128d b)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_sub_sd(a, b);
-#elif (SIMDE_NATURAL_VECTOR_SIZE > 0)
-	return simde_mm_move_sd(a, simde_mm_sub_pd(a, b));
-#else
-	simde__m128d_private r_, a_ = simde__m128d_to_private(a),
-				 b_ = simde__m128d_to_private(b);
-
-	r_.f64[0] = a_.f64[0] - b_.f64[0];
-	r_.f64[1] = a_.f64[1];
-
-	return simde__m128d_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_sub_sd(a, b) simde_mm_sub_sd(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m64 simde_mm_sub_si64(simde__m64 a, simde__m64 b)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)
-	return _mm_sub_si64(a, b);
-#else
-	simde__m64_private r_, a_ = simde__m64_to_private(a),
-			       b_ = simde__m64_to_private(b);
-
-#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
-	r_.i64 = a_.i64 - b_.i64;
-#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_i64 = vsub_s64(a_.neon_i64, b_.neon_i64);
-#else
-	r_.i64[0] = a_.i64[0] - b_.i64[0];
-#endif
-
-	return simde__m64_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_sub_si64(a, b) simde_mm_sub_si64(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128i simde_mm_subs_epi8(simde__m128i a, simde__m128i b)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_subs_epi8(a, b);
-#else
-	simde__m128i_private r_, a_ = simde__m128i_to_private(a),
-				 b_ = simde__m128i_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_i8 = vqsubq_s8(a_.neon_i8, b_.neon_i8);
-#elif defined(SIMDE_WASM_SIMD128_NATIVE)
-	r_.wasm_v128 = wasm_i8x16_sub_saturate(a_.wasm_v128, b_.wasm_v128);
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_) / sizeof(r_.i8[0])); i++) {
-		if (((b_.i8[i]) > 0 && (a_.i8[i]) < INT8_MIN + (b_.i8[i]))) {
-			r_.i8[i] = INT8_MIN;
-		} else if ((b_.i8[i]) < 0 &&
-			   (a_.i8[i]) > INT8_MAX + (b_.i8[i])) {
-			r_.i8[i] = INT8_MAX;
-		} else {
-			r_.i8[i] = (a_.i8[i]) - (b_.i8[i]);
-		}
-	}
-#endif
-
-	return simde__m128i_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_subs_epi8(a, b) simde_mm_subs_epi8(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128i simde_mm_subs_epi16(simde__m128i a, simde__m128i b)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_subs_epi16(a, b);
-#else
-	simde__m128i_private r_, a_ = simde__m128i_to_private(a),
-				 b_ = simde__m128i_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_i16 = vqsubq_s16(a_.neon_i16, b_.neon_i16);
-#elif defined(SIMDE_WASM_SIMD128_NATIVE)
-	r_.wasm_v128 = wasm_i16x8_sub_saturate(a_.wasm_v128, b_.wasm_v128);
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_) / sizeof(r_.i16[0])); i++) {
-		if (((b_.i16[i]) > 0 &&
-		     (a_.i16[i]) < INT16_MIN + (b_.i16[i]))) {
-			r_.i16[i] = INT16_MIN;
-		} else if ((b_.i16[i]) < 0 &&
-			   (a_.i16[i]) > INT16_MAX + (b_.i16[i])) {
-			r_.i16[i] = INT16_MAX;
-		} else {
-			r_.i16[i] = (a_.i16[i]) - (b_.i16[i]);
-		}
-	}
-#endif
-
-	return simde__m128i_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_subs_epi16(a, b) simde_mm_subs_epi16(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128i simde_mm_subs_epu8(simde__m128i a, simde__m128i b)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_subs_epu8(a, b);
-#else
-	simde__m128i_private r_, a_ = simde__m128i_to_private(a),
-				 b_ = simde__m128i_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_u8 = vqsubq_u8(a_.neon_u8, b_.neon_u8);
-#elif defined(SIMDE_WASM_SIMD128_NATIVE)
-	r_.wasm_v128 = wasm_u8x16_sub_saturate(a_.wasm_v128, b_.wasm_v128);
-#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
-	r_.altivec_u8 = vec_subs(a_.altivec_u8, b_.altivec_u8);
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_) / sizeof(r_.i8[0])); i++) {
-		const int32_t x = a_.u8[i] - b_.u8[i];
-		if (x < 0) {
-			r_.u8[i] = 0;
-		} else if (x > UINT8_MAX) {
-			r_.u8[i] = UINT8_MAX;
-		} else {
-			r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, x);
-		}
-	}
-#endif
-
-	return simde__m128i_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_subs_epu8(a, b) simde_mm_subs_epu8(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128i simde_mm_subs_epu16(simde__m128i a, simde__m128i b)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_subs_epu16(a, b);
-#else
-	simde__m128i_private r_, a_ = simde__m128i_to_private(a),
-				 b_ = simde__m128i_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_u16 = vqsubq_u16(a_.neon_u16, b_.neon_u16);
-#elif defined(SIMDE_WASM_SIMD128_NATIVE)
-	r_.wasm_v128 = wasm_u16x8_sub_saturate(a_.wasm_v128, b_.wasm_v128);
-#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
-	r_.altivec_u16 = vec_subs(a_.altivec_u16, b_.altivec_u16);
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_) / sizeof(r_.i16[0])); i++) {
-		const int32_t x = a_.u16[i] - b_.u16[i];
-		if (x < 0) {
-			r_.u16[i] = 0;
-		} else if (x > UINT16_MAX) {
-			r_.u16[i] = UINT16_MAX;
-		} else {
-			r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, x);
-		}
-	}
-#endif
-
-	return simde__m128i_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_subs_epu16(a, b) simde_mm_subs_epu16(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-int simde_mm_ucomieq_sd(simde__m128d a, simde__m128d b)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_ucomieq_sd(a, b);
-#else
-	simde__m128d_private a_ = simde__m128d_to_private(a),
-			     b_ = simde__m128d_to_private(b);
-	int r;
-
-#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
-	uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64);
-	uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64);
-	uint64x2_t a_or_b_nan = vreinterpretq_u64_u32(vmvnq_u32(
-		vreinterpretq_u32_u64(vandq_u64(a_not_nan, b_not_nan))));
-	uint64x2_t a_eq_b = vceqq_f64(a_.neon_f64, b_.neon_f64);
-	r = !!(vgetq_lane_u64(vorrq_u64(a_or_b_nan, a_eq_b), 0) != 0);
-#elif defined(SIMDE_WASM_SIMD128_NATIVE)
-	return wasm_f64x2_extract_lane(a_.wasm_v128, 0) ==
-	       wasm_f64x2_extract_lane(b_.wasm_v128, 0);
-#elif defined(SIMDE_HAVE_FENV_H)
-	fenv_t envp;
-	int x = feholdexcept(&envp);
-	r = a_.f64[0] == b_.f64[0];
-	if (HEDLEY_LIKELY(x == 0))
-		fesetenv(&envp);
-#else
-	r = a_.f64[0] == b_.f64[0];
-#endif
-
-	return r;
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_ucomieq_sd(a, b) simde_mm_ucomieq_sd(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-int simde_mm_ucomige_sd(simde__m128d a, simde__m128d b)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_ucomige_sd(a, b);
-#else
-	simde__m128d_private a_ = simde__m128d_to_private(a),
-			     b_ = simde__m128d_to_private(b);
-	int r;
-
-#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
-	uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64);
-	uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64);
-	uint64x2_t a_and_b_not_nan = vandq_u64(a_not_nan, b_not_nan);
-	uint64x2_t a_ge_b = vcgeq_f64(a_.neon_f64, b_.neon_f64);
-	r = !!(vgetq_lane_u64(vandq_u64(a_and_b_not_nan, a_ge_b), 0) != 0);
-#elif defined(SIMDE_WASM_SIMD128_NATIVE)
-	return wasm_f64x2_extract_lane(a_.wasm_v128, 0) >=
-	       wasm_f64x2_extract_lane(b_.wasm_v128, 0);
-#elif defined(SIMDE_HAVE_FENV_H)
-	fenv_t envp;
-	int x = feholdexcept(&envp);
-	r = a_.f64[0] >= b_.f64[0];
-	if (HEDLEY_LIKELY(x == 0))
-		fesetenv(&envp);
-#else
-	r = a_.f64[0] >= b_.f64[0];
-#endif
-
-	return r;
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_ucomige_sd(a, b) simde_mm_ucomige_sd(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-int simde_mm_ucomigt_sd(simde__m128d a, simde__m128d b)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_ucomigt_sd(a, b);
-#else
-	simde__m128d_private a_ = simde__m128d_to_private(a),
-			     b_ = simde__m128d_to_private(b);
-	int r;
-
-#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
-	uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64);
-	uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64);
-	uint64x2_t a_and_b_not_nan = vandq_u64(a_not_nan, b_not_nan);
-	uint64x2_t a_gt_b = vcgtq_f64(a_.neon_f64, b_.neon_f64);
-	r = !!(vgetq_lane_u64(vandq_u64(a_and_b_not_nan, a_gt_b), 0) != 0);
-#elif defined(SIMDE_WASM_SIMD128_NATIVE)
-	return wasm_f64x2_extract_lane(a_.wasm_v128, 0) >
-	       wasm_f64x2_extract_lane(b_.wasm_v128, 0);
-#elif defined(SIMDE_HAVE_FENV_H)
-	fenv_t envp;
-	int x = feholdexcept(&envp);
-	r = a_.f64[0] > b_.f64[0];
-	if (HEDLEY_LIKELY(x == 0))
-		fesetenv(&envp);
-#else
-	r = a_.f64[0] > b_.f64[0];
-#endif
-
-	return r;
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_ucomigt_sd(a, b) simde_mm_ucomigt_sd(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-int simde_mm_ucomile_sd(simde__m128d a, simde__m128d b)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_ucomile_sd(a, b);
-#else
-	simde__m128d_private a_ = simde__m128d_to_private(a),
-			     b_ = simde__m128d_to_private(b);
-	int r;
-
-#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
-	uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64);
-	uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64);
-	uint64x2_t a_or_b_nan = vreinterpretq_u64_u32(vmvnq_u32(
-		vreinterpretq_u32_u64(vandq_u64(a_not_nan, b_not_nan))));
-	uint64x2_t a_le_b = vcleq_f64(a_.neon_f64, b_.neon_f64);
-	r = !!(vgetq_lane_u64(vorrq_u64(a_or_b_nan, a_le_b), 0) != 0);
-#elif defined(SIMDE_WASM_SIMD128_NATIVE)
-	return wasm_f64x2_extract_lane(a_.wasm_v128, 0) <=
-	       wasm_f64x2_extract_lane(b_.wasm_v128, 0);
-#elif defined(SIMDE_HAVE_FENV_H)
-	fenv_t envp;
-	int x = feholdexcept(&envp);
-	r = a_.f64[0] <= b_.f64[0];
-	if (HEDLEY_LIKELY(x == 0))
-		fesetenv(&envp);
-#else
-	r = a_.f64[0] <= b_.f64[0];
-#endif
-
-	return r;
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_ucomile_sd(a, b) simde_mm_ucomile_sd(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-int simde_mm_ucomilt_sd(simde__m128d a, simde__m128d b)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_ucomilt_sd(a, b);
-#else
-	simde__m128d_private a_ = simde__m128d_to_private(a),
-			     b_ = simde__m128d_to_private(b);
-	int r;
-
-#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
-	uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64);
-	uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64);
-	uint64x2_t a_or_b_nan = vreinterpretq_u64_u32(vmvnq_u32(
-		vreinterpretq_u32_u64(vandq_u64(a_not_nan, b_not_nan))));
-	uint64x2_t a_lt_b = vcltq_f64(a_.neon_f64, b_.neon_f64);
-	r = !!(vgetq_lane_u64(vorrq_u64(a_or_b_nan, a_lt_b), 0) != 0);
-#elif defined(SIMDE_WASM_SIMD128_NATIVE)
-	return wasm_f64x2_extract_lane(a_.wasm_v128, 0) <
-	       wasm_f64x2_extract_lane(b_.wasm_v128, 0);
-#elif defined(SIMDE_HAVE_FENV_H)
-	fenv_t envp;
-	int x = feholdexcept(&envp);
-	r = a_.f64[0] < b_.f64[0];
-	if (HEDLEY_LIKELY(x == 0))
-		fesetenv(&envp);
-#else
-	r = a_.f64[0] < b_.f64[0];
-#endif
-
-	return r;
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_ucomilt_sd(a, b) simde_mm_ucomilt_sd(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-int simde_mm_ucomineq_sd(simde__m128d a, simde__m128d b)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_ucomineq_sd(a, b);
-#else
-	simde__m128d_private a_ = simde__m128d_to_private(a),
-			     b_ = simde__m128d_to_private(b);
-	int r;
-
-#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
-	uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64);
-	uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64);
-	uint64x2_t a_and_b_not_nan = vandq_u64(a_not_nan, b_not_nan);
-	uint64x2_t a_neq_b = vreinterpretq_u64_u32(vmvnq_u32(
-		vreinterpretq_u32_u64(vceqq_f64(a_.neon_f64, b_.neon_f64))));
-	r = !!(vgetq_lane_u64(vandq_u64(a_and_b_not_nan, a_neq_b), 0) != 0);
-#elif defined(SIMDE_WASM_SIMD128_NATIVE)
-	return wasm_f64x2_extract_lane(a_.wasm_v128, 0) !=
-	       wasm_f64x2_extract_lane(b_.wasm_v128, 0);
-#elif defined(SIMDE_HAVE_FENV_H)
-	fenv_t envp;
-	int x = feholdexcept(&envp);
-	r = a_.f64[0] != b_.f64[0];
-	if (HEDLEY_LIKELY(x == 0))
-		fesetenv(&envp);
-#else
-	r = a_.f64[0] != b_.f64[0];
-#endif
-
-	return r;
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_ucomineq_sd(a, b) simde_mm_ucomineq_sd(a, b)
-#endif
-
-#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_)
-HEDLEY_DIAGNOSTIC_PUSH
-SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_
-#endif
-
-#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_)
-HEDLEY_DIAGNOSTIC_POP
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-void simde_mm_lfence(void)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	_mm_lfence();
-#else
-	simde_mm_sfence();
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_lfence() simde_mm_lfence()
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-void simde_mm_mfence(void)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	_mm_mfence();
-#else
-	simde_mm_sfence();
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_mfence() simde_mm_mfence()
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128i simde_mm_unpackhi_epi8(simde__m128i a, simde__m128i b)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_unpackhi_epi8(a, b);
-#else
-	simde__m128i_private r_, a_ = simde__m128i_to_private(a),
-				 b_ = simde__m128i_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
-	r_.neon_i8 = vzip2q_s8(a_.neon_i8, b_.neon_i8);
-#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	int8x8_t a1 = vreinterpret_s8_s16(vget_high_s16(a_.neon_i16));
-	int8x8_t b1 = vreinterpret_s8_s16(vget_high_s16(b_.neon_i16));
-	int8x8x2_t result = vzip_s8(a1, b1);
-	r_.neon_i8 = vcombine_s8(result.val[0], result.val[1]);
-#elif defined(SIMDE_SHUFFLE_VECTOR_)
-	r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, b_.i8, 8, 24, 9, 25, 10, 26,
-				      11, 27, 12, 28, 13, 29, 14, 30, 15, 31);
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < ((sizeof(r_) / sizeof(r_.i8[0])) / 2); i++) {
-		r_.i8[(i * 2)] =
-			a_.i8[i + ((sizeof(r_) / sizeof(r_.i8[0])) / 2)];
-		r_.i8[(i * 2) + 1] =
-			b_.i8[i + ((sizeof(r_) / sizeof(r_.i8[0])) / 2)];
-	}
-#endif
-
-	return simde__m128i_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_unpackhi_epi8(a, b) simde_mm_unpackhi_epi8(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128i simde_mm_unpackhi_epi16(simde__m128i a, simde__m128i b)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_unpackhi_epi16(a, b);
-#else
-	simde__m128i_private r_, a_ = simde__m128i_to_private(a),
-				 b_ = simde__m128i_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
-	r_.neon_i16 = vzip2q_s16(a_.neon_i16, b_.neon_i16);
-#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	int16x4_t a1 = vget_high_s16(a_.neon_i16);
-	int16x4_t b1 = vget_high_s16(b_.neon_i16);
-	int16x4x2_t result = vzip_s16(a1, b1);
-	r_.neon_i16 = vcombine_s16(result.val[0], result.val[1]);
-#elif defined(SIMDE_SHUFFLE_VECTOR_)
-	r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, b_.i16, 4, 12, 5, 13, 6,
-				       14, 7, 15);
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < ((sizeof(r_) / sizeof(r_.i16[0])) / 2); i++) {
-		r_.i16[(i * 2)] =
-			a_.i16[i + ((sizeof(r_) / sizeof(r_.i16[0])) / 2)];
-		r_.i16[(i * 2) + 1] =
-			b_.i16[i + ((sizeof(r_) / sizeof(r_.i16[0])) / 2)];
-	}
-#endif
-
-	return simde__m128i_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_unpackhi_epi16(a, b) simde_mm_unpackhi_epi16(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128i simde_mm_unpackhi_epi32(simde__m128i a, simde__m128i b)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_unpackhi_epi32(a, b);
-#else
-	simde__m128i_private r_, a_ = simde__m128i_to_private(a),
-				 b_ = simde__m128i_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
-	r_.neon_i32 = vzip2q_s32(a_.neon_i32, b_.neon_i32);
-#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	int32x2_t a1 = vget_high_s32(a_.neon_i32);
-	int32x2_t b1 = vget_high_s32(b_.neon_i32);
-	int32x2x2_t result = vzip_s32(a1, b1);
-	r_.neon_i32 = vcombine_s32(result.val[0], result.val[1]);
-#elif defined(SIMDE_SHUFFLE_VECTOR_)
-	r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, b_.i32, 2, 6, 3, 7);
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < ((sizeof(r_) / sizeof(r_.i32[0])) / 2); i++) {
-		r_.i32[(i * 2)] =
-			a_.i32[i + ((sizeof(r_) / sizeof(r_.i32[0])) / 2)];
-		r_.i32[(i * 2) + 1] =
-			b_.i32[i + ((sizeof(r_) / sizeof(r_.i32[0])) / 2)];
-	}
-#endif
-
-	return simde__m128i_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_unpackhi_epi32(a, b) simde_mm_unpackhi_epi32(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128i simde_mm_unpackhi_epi64(simde__m128i a, simde__m128i b)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_unpackhi_epi64(a, b);
-#else
-	simde__m128i_private r_, a_ = simde__m128i_to_private(a),
-				 b_ = simde__m128i_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	int64x1_t a_h = vget_high_s64(a_.neon_i64);
-	int64x1_t b_h = vget_high_s64(b_.neon_i64);
-	r_.neon_i64 = vcombine_s64(a_h, b_h);
-#elif defined(SIMDE_SHUFFLE_VECTOR_)
-	r_.i64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.i64, b_.i64, 1, 3);
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < ((sizeof(r_) / sizeof(r_.i64[0])) / 2); i++) {
-		r_.i64[(i * 2)] =
-			a_.i64[i + ((sizeof(r_) / sizeof(r_.i64[0])) / 2)];
-		r_.i64[(i * 2) + 1] =
-			b_.i64[i + ((sizeof(r_) / sizeof(r_.i64[0])) / 2)];
-	}
-#endif
-
-	return simde__m128i_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_unpackhi_epi64(a, b) simde_mm_unpackhi_epi64(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128d simde_mm_unpackhi_pd(simde__m128d a, simde__m128d b)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_unpackhi_pd(a, b);
-#else
-	simde__m128d_private r_, a_ = simde__m128d_to_private(a),
-				 b_ = simde__m128d_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
-	float64x1_t a_l = vget_high_f64(a_.f64);
-	float64x1_t b_l = vget_high_f64(b_.f64);
-	r_.neon_f64 = vcombine_f64(a_l, b_l);
-#elif defined(SIMDE_WASM_SIMD128_NATIVE)
-	r_.wasm_v128 = wasm_v64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3);
-#elif defined(SIMDE_SHUFFLE_VECTOR_)
-	r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 1, 3);
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < ((sizeof(r_) / sizeof(r_.f64[0])) / 2); i++) {
-		r_.f64[(i * 2)] =
-			a_.f64[i + ((sizeof(r_) / sizeof(r_.f64[0])) / 2)];
-		r_.f64[(i * 2) + 1] =
-			b_.f64[i + ((sizeof(r_) / sizeof(r_.f64[0])) / 2)];
-	}
-#endif
-
-	return simde__m128d_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_unpackhi_pd(a, b) simde_mm_unpackhi_pd(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128i simde_mm_unpacklo_epi8(simde__m128i a, simde__m128i b)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_unpacklo_epi8(a, b);
-#else
-	simde__m128i_private r_, a_ = simde__m128i_to_private(a),
-				 b_ = simde__m128i_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
-	r_.neon_i8 = vzip1q_s8(a_.neon_i8, b_.neon_i8);
-#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	int8x8_t a1 = vreinterpret_s8_s16(vget_low_s16(a_.neon_i16));
-	int8x8_t b1 = vreinterpret_s8_s16(vget_low_s16(b_.neon_i16));
-	int8x8x2_t result = vzip_s8(a1, b1);
-	r_.neon_i8 = vcombine_s8(result.val[0], result.val[1]);
-#elif defined(SIMDE_SHUFFLE_VECTOR_)
-	r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, b_.i8, 0, 16, 1, 17, 2, 18,
-				      3, 19, 4, 20, 5, 21, 6, 22, 7, 23);
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < ((sizeof(r_) / sizeof(r_.i8[0])) / 2); i++) {
-		r_.i8[(i * 2)] = a_.i8[i];
-		r_.i8[(i * 2) + 1] = b_.i8[i];
-	}
-#endif
-
-	return simde__m128i_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_unpacklo_epi8(a, b) simde_mm_unpacklo_epi8(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128i simde_mm_unpacklo_epi16(simde__m128i a, simde__m128i b)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_unpacklo_epi16(a, b);
-#else
-	simde__m128i_private r_, a_ = simde__m128i_to_private(a),
-				 b_ = simde__m128i_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
-	r_.neon_i16 = vzip1q_s16(a_.neon_i16, b_.neon_i16);
-#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	int16x4_t a1 = vget_low_s16(a_.neon_i16);
-	int16x4_t b1 = vget_low_s16(b_.neon_i16);
-	int16x4x2_t result = vzip_s16(a1, b1);
-	r_.neon_i16 = vcombine_s16(result.val[0], result.val[1]);
-#elif defined(SIMDE_SHUFFLE_VECTOR_)
-	r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, b_.i16, 0, 8, 1, 9, 2,
-				       10, 3, 11);
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < ((sizeof(r_) / sizeof(r_.i16[0])) / 2); i++) {
-		r_.i16[(i * 2)] = a_.i16[i];
-		r_.i16[(i * 2) + 1] = b_.i16[i];
-	}
-#endif
-
-	return simde__m128i_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_unpacklo_epi16(a, b) simde_mm_unpacklo_epi16(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128i simde_mm_unpacklo_epi32(simde__m128i a, simde__m128i b)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_unpacklo_epi32(a, b);
-#else
-	simde__m128i_private r_, a_ = simde__m128i_to_private(a),
-				 b_ = simde__m128i_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
-	r_.neon_i32 = vzip1q_s32(a_.neon_i32, b_.neon_i32);
-#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	int32x2_t a1 = vget_low_s32(a_.neon_i32);
-	int32x2_t b1 = vget_low_s32(b_.neon_i32);
-	int32x2x2_t result = vzip_s32(a1, b1);
-	r_.neon_i32 = vcombine_s32(result.val[0], result.val[1]);
-#elif defined(SIMDE_SHUFFLE_VECTOR_)
-	r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, b_.i32, 0, 4, 1, 5);
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < ((sizeof(r_) / sizeof(r_.i32[0])) / 2); i++) {
-		r_.i32[(i * 2)] = a_.i32[i];
-		r_.i32[(i * 2) + 1] = b_.i32[i];
-	}
-#endif
-
-	return simde__m128i_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_unpacklo_epi32(a, b) simde_mm_unpacklo_epi32(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128i simde_mm_unpacklo_epi64(simde__m128i a, simde__m128i b)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_unpacklo_epi64(a, b);
-#else
-	simde__m128i_private r_, a_ = simde__m128i_to_private(a),
-				 b_ = simde__m128i_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	int64x1_t a_l = vget_low_s64(a_.i64);
-	int64x1_t b_l = vget_low_s64(b_.i64);
-	r_.neon_i64 = vcombine_s64(a_l, b_l);
-#elif defined(SIMDE_SHUFFLE_VECTOR_)
-	r_.i64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.i64, b_.i64, 0, 2);
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < ((sizeof(r_) / sizeof(r_.i64[0])) / 2); i++) {
-		r_.i64[(i * 2)] = a_.i64[i];
-		r_.i64[(i * 2) + 1] = b_.i64[i];
-	}
-#endif
-
-	return simde__m128i_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_unpacklo_epi64(a, b) simde_mm_unpacklo_epi64(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128d simde_mm_unpacklo_pd(simde__m128d a, simde__m128d b)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_unpacklo_pd(a, b);
-#else
-	simde__m128d_private r_, a_ = simde__m128d_to_private(a),
-				 b_ = simde__m128d_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
-	float64x1_t a_l = vget_low_f64(a_.f64);
-	float64x1_t b_l = vget_low_f64(b_.f64);
-	r_.neon_f64 = vcombine_f64(a_l, b_l);
-#elif defined(SIMDE_SHUFFLE_VECTOR_)
-	r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 0, 2);
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < ((sizeof(r_) / sizeof(r_.f64[0])) / 2); i++) {
-		r_.f64[(i * 2)] = a_.f64[i];
-		r_.f64[(i * 2) + 1] = b_.f64[i];
-	}
-#endif
-
-	return simde__m128d_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_unpacklo_pd(a, b) simde_mm_unpacklo_pd(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128d simde_x_mm_negate_pd(simde__m128d a)
-{
-#if defined(SIMDE_X86_SSE_NATIVE)
-	return simde_mm_xor_pd(a, _mm_set1_pd(SIMDE_FLOAT64_C(-0.0)));
-#else
-	simde__m128d_private r_, a_ = simde__m128d_to_private(a);
-
-#if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && \
-	(!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(8, 1, 0))
-	r_.altivec_f64 = vec_neg(a_.altivec_f64);
-#elif defined(SIMDE_ARM_NEON_A64V8_NATIVE)
-	r_.neon_f64 = vnegq_f64(a_.neon_f64);
-#elif defined(SIMDE_WASM_SIMD128_NATIVE)
-	r_.wasm_v128 = wasm_f64x2_neg(a_.wasm_v128);
-#elif defined(SIMDE_VECTOR_NEGATE)
-	r_.f64 = -a_.f64;
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.f64) / sizeof(r_.f64[0])); i++) {
-		r_.f64[i] = -a_.f64[i];
-	}
-#endif
-
-	return simde__m128d_from_private(r_);
-#endif
-}
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128i simde_mm_xor_si128(simde__m128i a, simde__m128i b)
-{
-#if defined(SIMDE_X86_SSE2_NATIVE)
-	return _mm_xor_si128(a, b);
-#else
-	simde__m128i_private r_, a_ = simde__m128i_to_private(a),
-				 b_ = simde__m128i_to_private(b);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_i32 = veorq_s32(a_.neon_i32, b_.neon_i32);
-#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
-	r_.altivec_i32 = vec_xor(a_.altivec_i32, b_.altivec_i32);
-#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
-	r_.i32f = a_.i32f ^ b_.i32f;
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])); i++) {
-		r_.i32f[i] = a_.i32f[i] ^ b_.i32f[i];
-	}
-#endif
-
-	return simde__m128i_from_private(r_);
-#endif
-}
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _mm_xor_si128(a, b) simde_mm_xor_si128(a, b)
-#endif
-
-SIMDE_FUNCTION_ATTRIBUTES
-simde__m128i simde_x_mm_not_si128(simde__m128i a)
-{
-#if defined(SIMDE_X86_AVX512VL_NATIVE)
-	return _mm_ternarylogic_epi32(a, a, a, 0x55);
-#else
-	simde__m128i_private r_, a_ = simde__m128i_to_private(a);
-
-#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
-	r_.neon_i32 = vmvnq_s32(a_.neon_i32);
-#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
-	r_.altivec_i32 = vec_nor(a_.altivec_i32, a_.altivec_i32);
-#elif defined(SIMDE_WASM_SIMD128_NATIVE)
-	r_.wasm_v128 = wasm_v128_not(a_.wasm_v128);
-#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
-	r_.i32f = ~a_.i32f;
-#else
-	SIMDE_VECTORIZE
-	for (size_t i = 0; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])); i++) {
-		r_.i32f[i] = ~(a_.i32f[i]);
-	}
-#endif
-
-	return simde__m128i_from_private(r_);
-#endif
-}
-
-#define SIMDE_MM_SHUFFLE2(x, y) (((x) << 1) | (y))
-#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
-#define _MM_SHUFFLE2(x, y) SIMDE_MM_SHUFFLE2(x, y)
-#endif
-
-SIMDE_END_DECLS_
-
-HEDLEY_DIAGNOSTIC_POP
-
-#endif /* !defined(SIMDE_X86_SSE2_H) */

+ 10 - 1
libobs/util/sse-intrin.h

@@ -25,8 +25,17 @@
 #ifndef WIN32_LEAN_AND_MEAN
 #define WIN32_LEAN_AND_MEAN
 #endif
+
+#if defined(_MSC_VER) && defined(__cplusplus)
+#include <cmath>
+#endif
+
+#if defined(__APPLE__)
+#include <simd/base.h>
+#endif
+
 #define SIMDE_ENABLE_NATIVE_ALIASES
 PRAGMA_WARN_PUSH
-#include "simde/x86/sse2.h"
+#include <simde/x86/sse2.h>
 PRAGMA_WARN_POP
 #endif