| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356 |
- /* Copyright (c) 2017-2018 Evan Nemerson <[email protected]>
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use, copy,
- * modify, merge, publish, distribute, sublicense, and/or sell copies
- * of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
- #if !defined(SIMDE__MMX_H)
- #if !defined(SIMDE__MMX_H)
- #define SIMDE__MMX_H
- #endif
- #include "simde-common.h"
- #if defined(SIMDE_MMX_FORCE_NATIVE)
- #define SIMDE_MMX_NATIVE
- #elif defined(__MMX__) && !defined(SIMDE_MMX_NO_NATIVE) && \
- !defined(SIMDE_NO_NATIVE)
- #define SIMDE_MMX_NATIVE
- #elif defined(__ARM_NEON) && !defined(SIMDE_MMX_NO_NEON) && \
- !defined(SIMDE_NO_NEON)
- #define SIMDE_MMX_NEON
- #endif
- #if defined(SIMDE_MMX_NATIVE)
- #include <mmintrin.h>
- #else
- #if defined(SIMDE_MMX_NEON)
- #include <arm_neon.h>
- #endif
- #endif
- #include <stdint.h>
- #include <limits.h>
- #include <stdlib.h>
- #include <string.h>
- SIMDE__BEGIN_DECLS
- typedef union {
- #if defined(SIMDE__ENABLE_GCC_VEC_EXT)
- int8_t i8 __attribute__((__vector_size__(8), __may_alias__));
- int16_t i16 __attribute__((__vector_size__(8), __may_alias__));
- int32_t i32 __attribute__((__vector_size__(8), __may_alias__));
- int64_t i64 __attribute__((__vector_size__(8), __may_alias__));
- uint8_t u8 __attribute__((__vector_size__(8), __may_alias__));
- uint16_t u16 __attribute__((__vector_size__(8), __may_alias__));
- uint32_t u32 __attribute__((__vector_size__(8), __may_alias__));
- uint64_t u64 __attribute__((__vector_size__(8), __may_alias__));
- simde_float32 f32 __attribute__((__vector_size__(8), __may_alias__));
- #else
- int8_t i8[8];
- int16_t i16[4];
- int32_t i32[2];
- int64_t i64[1];
- uint8_t u8[8];
- uint16_t u16[4];
- uint32_t u32[2];
- uint64_t u64[1];
- simde_float32 f32[2];
- #endif
- #if defined(SIMDE_MMX_NATIVE)
- __m64 n;
- #elif defined(SIMDE_MMX_NEON)
- int8x8_t neon_i8;
- int16x4_t neon_i16;
- int32x2_t neon_i32;
- int64x1_t neon_i64;
- uint8x8_t neon_u8;
- uint16x4_t neon_u16;
- uint32x2_t neon_u32;
- uint64x1_t neon_u64;
- float32x2_t neon_f32;
- #endif
- } simde__m64;
- #if defined(SIMDE_MMX_NATIVE)
- HEDLEY_STATIC_ASSERT(sizeof(__m64) == sizeof(simde__m64),
- "__m64 size doesn't match simde__m64 size");
- SIMDE__FUNCTION_ATTRIBUTES simde__m64 SIMDE__M64_C(__m64 v)
- {
- simde__m64 r;
- r.n = v;
- return r;
- }
- #elif defined(SIMDE_MMX_NEON)
- #define SIMDE__M64_NEON_C(T, expr) \
- (simde__m64) { .neon_##T = (expr) }
- #endif
- HEDLEY_STATIC_ASSERT(8 == sizeof(simde__m64), "__m64 size incorrect");
- SIMDE__FUNCTION_ATTRIBUTES
- simde__m64 simde_mm_add_pi8(simde__m64 a, simde__m64 b)
- {
- #if defined(SIMDE_MMX_NATIVE)
- return SIMDE__M64_C(_mm_add_pi8(a.n, b.n));
- #else
- simde__m64 r;
- SIMDE__VECTORIZE
- for (size_t i = 0; i < 8; i++) {
- r.i8[i] = a.i8[i] + b.i8[i];
- }
- return r;
- #endif
- }
- #define simde_m_paddb(a, b) simde_mm_add_pi8(a, b)
- SIMDE__FUNCTION_ATTRIBUTES
- simde__m64 simde_mm_add_pi16(simde__m64 a, simde__m64 b)
- {
- #if defined(SIMDE_MMX_NATIVE)
- return SIMDE__M64_C(_mm_add_pi16(a.n, b.n));
- #else
- simde__m64 r;
- SIMDE__VECTORIZE
- for (size_t i = 0; i < (8 / sizeof(int16_t)); i++) {
- r.i16[i] = a.i16[i] + b.i16[i];
- }
- return r;
- #endif
- }
- #define simde_m_paddw(a, b) simde_mm_add_pi16(a, b)
- SIMDE__FUNCTION_ATTRIBUTES
- simde__m64 simde_mm_add_pi32(simde__m64 a, simde__m64 b)
- {
- #if defined(SIMDE_MMX_NATIVE)
- return SIMDE__M64_C(_mm_add_pi32(a.n, b.n));
- #else
- simde__m64 r;
- SIMDE__VECTORIZE
- for (size_t i = 0; i < (8 / sizeof(int32_t)); i++) {
- r.i32[i] = a.i32[i] + b.i32[i];
- }
- return r;
- #endif
- }
- #define simde_m_paddd(a, b) simde_mm_add_pi32(a, b)
- SIMDE__FUNCTION_ATTRIBUTES
- simde__m64 simde_mm_adds_pi8(simde__m64 a, simde__m64 b)
- {
- #if defined(SIMDE_MMX_NATIVE)
- return SIMDE__M64_C(_mm_adds_pi8(a.n, b.n));
- #else
- simde__m64 r;
- SIMDE__VECTORIZE
- for (int i = 0; i < 8; i++) {
- if ((((b.i8[i]) > 0) && ((a.i8[i]) > (INT8_MAX - (b.i8[i]))))) {
- r.i8[i] = INT8_MAX;
- } else if ((((b.i8[i]) < 0) &&
- ((a.i8[i]) < (INT8_MIN - (b.i8[i]))))) {
- r.i8[i] = INT8_MIN;
- } else {
- r.i8[i] = (a.i8[i]) + (b.i8[i]);
- }
- }
- return r;
- #endif
- }
- #define simde_m_paddsb(a, b) simde_mm_adds_pi8(a, b)
- SIMDE__FUNCTION_ATTRIBUTES
- simde__m64 simde_mm_adds_pu8(simde__m64 a, simde__m64 b)
- {
- #if defined(SIMDE_MMX_NATIVE)
- return SIMDE__M64_C(_mm_adds_pu8(a.n, b.n));
- #else
- simde__m64 r;
- SIMDE__VECTORIZE
- for (size_t i = 0; i < 8; i++) {
- const int32_t x = a.u8[i] + b.u8[i];
- if (x < 0)
- r.u8[i] = 0;
- else if (x > UINT8_MAX)
- r.u8[i] = UINT8_MAX;
- else
- r.u8[i] = (uint8_t)x;
- }
- return r;
- #endif
- }
- #define simde_m_paddusb(a, b) simde_mm_adds_pu8(a, b)
- SIMDE__FUNCTION_ATTRIBUTES
- simde__m64 simde_mm_adds_pi16(simde__m64 a, simde__m64 b)
- {
- #if defined(SIMDE_MMX_NATIVE)
- return SIMDE__M64_C(_mm_adds_pi16(a.n, b.n));
- #else
- simde__m64 r;
- SIMDE__VECTORIZE
- for (int i = 0; i < 4; i++) {
- if ((((b.i16[i]) > 0) &&
- ((a.i16[i]) > (INT16_MAX - (b.i16[i]))))) {
- r.i16[i] = INT16_MAX;
- } else if ((((b.i16[i]) < 0) &&
- ((a.i16[i]) < (SHRT_MIN - (b.i16[i]))))) {
- r.i16[i] = SHRT_MIN;
- } else {
- r.i16[i] = (a.i16[i]) + (b.i16[i]);
- }
- }
- return r;
- #endif
- }
- #define simde_m_paddsw(a, b) simde_mm_adds_pi16(a, b)
- SIMDE__FUNCTION_ATTRIBUTES
- simde__m64 simde_mm_adds_pu16(simde__m64 a, simde__m64 b)
- {
- #if defined(SIMDE_MMX_NATIVE)
- return SIMDE__M64_C(_mm_adds_pu16(a.n, b.n));
- #else
- simde__m64 r;
- SIMDE__VECTORIZE
- for (size_t i = 0; i < (8 / sizeof(int16_t)); i++) {
- const uint32_t x = a.u16[i] + b.u16[i];
- if (x > UINT16_MAX)
- r.u16[i] = UINT16_MAX;
- else
- r.u16[i] = (uint16_t)x;
- }
- return r;
- #endif
- }
- #define simde_m_paddusw(a, b) simde_mm_adds_pu16(a, b)
- SIMDE__FUNCTION_ATTRIBUTES
- simde__m64 simde_mm_and_si64(simde__m64 a, simde__m64 b)
- {
- #if defined(SIMDE_MMX_NATIVE)
- return SIMDE__M64_C(_mm_and_si64(a.n, b.n));
- #else
- simde__m64 r;
- r.i64[0] = a.i64[0] & b.i64[0];
- return r;
- #endif
- }
- #define simde_m_pand(a, b) simde_mm_and_si64(a, b)
- SIMDE__FUNCTION_ATTRIBUTES
- simde__m64 simde_mm_andnot_si64(simde__m64 a, simde__m64 b)
- {
- #if defined(SIMDE_MMX_NATIVE)
- return SIMDE__M64_C(_mm_andnot_si64(a.n, b.n));
- #else
- simde__m64 r;
- r.i64[0] = ~(a.i64[0]) & b.i64[0];
- return r;
- #endif
- }
- #define simde_m_pandn(a, b) simde_mm_andnot_si64(a, b)
- SIMDE__FUNCTION_ATTRIBUTES
- simde__m64 simde_mm_cmpeq_pi8(simde__m64 a, simde__m64 b)
- {
- #if defined(SIMDE_MMX_NATIVE)
- return SIMDE__M64_C(_mm_cmpeq_pi8(a.n, b.n));
- #else
- simde__m64 r;
- SIMDE__VECTORIZE
- for (int i = 0; i < 8; i++) {
- r.i8[i] = (a.i8[i] == b.i8[i]) * 0xff;
- }
- return r;
- #endif
- }
- #define simde_m_pcmpeqb(a, b) simde_mm_cmpeq_pi8(a, b)
- SIMDE__FUNCTION_ATTRIBUTES
- simde__m64 simde_mm_cmpeq_pi16(simde__m64 a, simde__m64 b)
- {
- #if defined(SIMDE_MMX_NATIVE)
- return SIMDE__M64_C(_mm_cmpeq_pi16(a.n, b.n));
- #else
- simde__m64 r;
- SIMDE__VECTORIZE
- for (int i = 0; i < 4; i++) {
- r.i16[i] = (a.i16[i] == b.i16[i]) * 0xffff;
- }
- return r;
- #endif
- }
- #define simde_m_pcmpeqw(a, b) simde_mm_cmpeq_pi16(a, b)
- SIMDE__FUNCTION_ATTRIBUTES
- simde__m64 simde_mm_cmpeq_pi32(simde__m64 a, simde__m64 b)
- {
- #if defined(SIMDE_MMX_NATIVE)
- return SIMDE__M64_C(_mm_cmpeq_pi32(a.n, b.n));
- #else
- simde__m64 r;
- SIMDE__VECTORIZE
- for (int i = 0; i < 2; i++) {
- r.i32[i] = (a.i32[i] == b.i32[i]) * 0xffffffff;
- }
- return r;
- #endif
- }
- #define simde_m_pcmpeqd(a, b) simde_mm_cmpeq_pi32(a, b)
- SIMDE__FUNCTION_ATTRIBUTES
- simde__m64 simde_mm_cmpgt_pi8(simde__m64 a, simde__m64 b)
- {
- #if defined(SIMDE_MMX_NATIVE)
- return SIMDE__M64_C(_mm_cmpgt_pi8(a.n, b.n));
- #else
- simde__m64 r;
- SIMDE__VECTORIZE
- for (int i = 0; i < 8; i++) {
- r.i8[i] = (a.i8[i] > b.i8[i]) * 0xff;
- }
- return r;
- #endif
- }
- #define simde_m_pcmpgtb(a, b) simde_mm_cmpgt_pi8(a, b)
- SIMDE__FUNCTION_ATTRIBUTES
- simde__m64 simde_mm_cmpgt_pi16(simde__m64 a, simde__m64 b)
- {
- #if defined(SIMDE_MMX_NATIVE)
- return SIMDE__M64_C(_mm_cmpgt_pi16(a.n, b.n));
- #else
- simde__m64 r;
- SIMDE__VECTORIZE
- for (int i = 0; i < 4; i++) {
- r.i16[i] = (a.i16[i] > b.i16[i]) * 0xffff;
- }
- return r;
- #endif
- }
- #define simde_m_pcmpgtw(a, b) simde_mm_cmpgt_pi16(a, b)
- SIMDE__FUNCTION_ATTRIBUTES
- simde__m64 simde_mm_cmpgt_pi32(simde__m64 a, simde__m64 b)
- {
- #if defined(SIMDE_MMX_NATIVE)
- return SIMDE__M64_C(_mm_cmpgt_pi32(a.n, b.n));
- #else
- simde__m64 r;
- SIMDE__VECTORIZE
- for (int i = 0; i < 2; i++) {
- r.i32[i] = (a.i32[i] > b.i32[i]) * 0xffffffff;
- }
- return r;
- #endif
- }
- #define simde_m_pcmpgtd(a, b) simde_mm_cmpgt_pi32(a, b)
- SIMDE__FUNCTION_ATTRIBUTES
- int64_t simde_mm_cvtm64_si64(simde__m64 a)
- {
- #if defined(SIMDE_MMX_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(__PGI)
- return _mm_cvtm64_si64(a.n);
- #else
- return a.i64[0];
- #endif
- }
- #define simde_m_to_int64(a) simde_mm_cvtm64_si64(a)
- SIMDE__FUNCTION_ATTRIBUTES
- simde__m64 simde_mm_cvtsi32_si64(int32_t a)
- {
- #if defined(SIMDE_MMX_NATIVE)
- return SIMDE__M64_C(_mm_cvtsi32_si64(a));
- #else
- simde__m64 r;
- r.i32[0] = a;
- r.i32[1] = 0;
- return r;
- #endif
- }
- #define simde_m_from_int(a) simde_mm_cvtsi32_si64(a)
- SIMDE__FUNCTION_ATTRIBUTES
- simde__m64 simde_mm_cvtsi64_m64(int64_t a)
- {
- #if defined(SIMDE_MMX_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(__PGI)
- return SIMDE__M64_C(_mm_cvtsi64_m64(a));
- #else
- simde__m64 r;
- r.i64[0] = a;
- return r;
- #endif
- }
- #define simde_m_from_int64(a) simde_mm_cvtsi64_m64(a)
- SIMDE__FUNCTION_ATTRIBUTES
- int32_t simde_mm_cvtsi64_si32(simde__m64 a)
- {
- #if defined(SIMDE_MMX_NATIVE)
- return _mm_cvtsi64_si32(a.n);
- #else
- return a.i32[0];
- #endif
- }
- SIMDE__FUNCTION_ATTRIBUTES
- void simde_mm_empty(void)
- {
- #if defined(SIMDE_MMX_NATIVE)
- _mm_empty();
- #else
- #endif
- }
- #define simde_m_empty() simde_mm_empty()
- SIMDE__FUNCTION_ATTRIBUTES
- simde__m64 simde_mm_madd_pi16(simde__m64 a, simde__m64 b)
- {
- #if defined(SIMDE_MMX_NATIVE)
- return SIMDE__M64_C(_mm_madd_pi16(a.n, b.n));
- #else
- simde__m64 r;
- SIMDE__VECTORIZE
- for (int i = 0; i < 4; i += 2) {
- r.i32[i / 2] =
- (a.i16[i] * b.i16[i]) + (a.i16[i + 1] * b.i16[i + 1]);
- }
- return r;
- #endif
- }
- #define simde_m_pmaddwd(a, b) simde_mm_madd_pi16(a, b)
- SIMDE__FUNCTION_ATTRIBUTES
- simde__m64 simde_mm_mulhi_pi16(simde__m64 a, simde__m64 b)
- {
- #if defined(SIMDE_MMX_NATIVE)
- return SIMDE__M64_C(_mm_mulhi_pi16(a.n, b.n));
- #else
- simde__m64 r;
- SIMDE__VECTORIZE
- for (int i = 0; i < 4; i++) {
- r.i16[i] = (int16_t)((a.i16[i] * b.i16[i]) >> 16);
- }
- return r;
- #endif
- }
- #define simde_m_pmulhw(a, b) simde_mm_mulhi_pi16(a, b)
- SIMDE__FUNCTION_ATTRIBUTES
- simde__m64 simde_mm_mullo_pi16(simde__m64 a, simde__m64 b)
- {
- #if defined(SIMDE_MMX_NATIVE)
- return SIMDE__M64_C(_mm_mullo_pi16(a.n, b.n));
- #else
- simde__m64 r;
- SIMDE__VECTORIZE
- for (int i = 0; i < 4; i++) {
- r.i16[i] = (int16_t)((a.i16[i] * b.i16[i]) & 0xffff);
- }
- return r;
- #endif
- }
- #define simde_m_pmullw(a, b) simde_mm_mullo_pi16(a, b)
- SIMDE__FUNCTION_ATTRIBUTES
- simde__m64 simde_mm_or_si64(simde__m64 a, simde__m64 b)
- {
- #if defined(SIMDE_MMX_NATIVE)
- return SIMDE__M64_C(_mm_or_si64(a.n, b.n));
- #else
- simde__m64 r;
- r.i64[0] = a.i64[0] | b.i64[0];
- return r;
- #endif
- }
- #define simde_m_por(a, b) simde_mm_or_si64(a, b)
- SIMDE__FUNCTION_ATTRIBUTES
- simde__m64 simde_mm_packs_pi16(simde__m64 a, simde__m64 b)
- {
- #if defined(SIMDE_MMX_NATIVE)
- return SIMDE__M64_C(_mm_packs_pi16(a.n, b.n));
- #else
- simde__m64 r;
- SIMDE__VECTORIZE
- for (size_t i = 0; i < (8 / sizeof(int16_t)); i++) {
- if (a.i16[i] < INT8_MIN) {
- r.i8[i] = INT8_MIN;
- } else if (a.i16[i] > INT8_MAX) {
- r.i8[i] = INT8_MAX;
- } else {
- r.i8[i] = (int8_t)a.i16[i];
- }
- }
- SIMDE__VECTORIZE
- for (size_t i = 0; i < (8 / sizeof(int16_t)); i++) {
- if (b.i16[i] < INT8_MIN) {
- r.i8[i + 4] = INT8_MIN;
- } else if (b.i16[i] > INT8_MAX) {
- r.i8[i + 4] = INT8_MAX;
- } else {
- r.i8[i + 4] = (int8_t)b.i16[i];
- }
- }
- return r;
- #endif
- }
- #define simde_m_packsswb(a, b) simde_mm_packs_pi16(a, b)
- SIMDE__FUNCTION_ATTRIBUTES
- simde__m64 simde_mm_packs_pi32(simde__m64 a, simde__m64 b)
- {
- #if defined(SIMDE_MMX_NATIVE)
- return SIMDE__M64_C(_mm_packs_pi32(a.n, b.n));
- #else
- simde__m64 r;
- SIMDE__VECTORIZE
- for (size_t i = 0; i < (8 / sizeof(a.i32[0])); i++) {
- if (a.i32[i] < SHRT_MIN) {
- r.i16[i] = SHRT_MIN;
- } else if (a.i32[i] > INT16_MAX) {
- r.i16[i] = INT16_MAX;
- } else {
- r.i16[i] = (int16_t)a.i32[i];
- }
- }
- SIMDE__VECTORIZE
- for (size_t i = 0; i < (8 / sizeof(b.i32[0])); i++) {
- if (b.i32[i] < SHRT_MIN) {
- r.i16[i + 2] = SHRT_MIN;
- } else if (b.i32[i] > INT16_MAX) {
- r.i16[i + 2] = INT16_MAX;
- } else {
- r.i16[i + 2] = (int16_t)b.i32[i];
- }
- }
- return r;
- #endif
- }
- #define simde_m_packssdw(a, b) simde_mm_packs_pi32(a, b)
- SIMDE__FUNCTION_ATTRIBUTES
- simde__m64 simde_mm_packs_pu16(simde__m64 a, simde__m64 b)
- {
- #if defined(SIMDE_MMX_NATIVE)
- return SIMDE__M64_C(_mm_packs_pu16(a.n, b.n));
- #else
- simde__m64 r;
- SIMDE__VECTORIZE
- for (size_t i = 0; i < (8 / sizeof(int16_t)); i++) {
- if (a.i16[i] > UINT8_MAX) {
- r.u8[i] = UINT8_MAX;
- } else if (a.i16[i] < 0) {
- r.u8[i] = 0;
- } else {
- r.u8[i] = (int8_t)a.i16[i];
- }
- }
- SIMDE__VECTORIZE
- for (size_t i = 0; i < (8 / sizeof(int16_t)); i++) {
- if (b.i16[i] > UINT8_MAX) {
- r.u8[i + 4] = UINT8_MAX;
- } else if (b.i16[i] < 0) {
- r.u8[i + 4] = 0;
- } else {
- r.u8[i + 4] = (int8_t)b.i16[i];
- }
- }
- return r;
- #endif
- }
- #define simde_m_packuswb(a, b) simde_mm_packs_pu16(a, b)
- SIMDE__FUNCTION_ATTRIBUTES
- simde__m64 simde_mm_set_pi8(int8_t e7, int8_t e6, int8_t e5, int8_t e4,
- int8_t e3, int8_t e2, int8_t e1, int8_t e0)
- {
- #if defined(SIMDE_MMX_NATIVE)
- return SIMDE__M64_C(_mm_set_pi8(e7, e6, e5, e4, e3, e2, e1, e0));
- #else
- simde__m64 r;
- r.i8[0] = e0;
- r.i8[1] = e1;
- r.i8[2] = e2;
- r.i8[3] = e3;
- r.i8[4] = e4;
- r.i8[5] = e5;
- r.i8[6] = e6;
- r.i8[7] = e7;
- return r;
- #endif
- }
- SIMDE__FUNCTION_ATTRIBUTES
- simde__m64 simde_x_mm_set_pu8(uint8_t e7, uint8_t e6, uint8_t e5, uint8_t e4,
- uint8_t e3, uint8_t e2, uint8_t e1, uint8_t e0)
- {
- #if defined(SIMDE_MMX_NATIVE)
- return SIMDE__M64_C(_mm_set_pi8((int8_t)e7, (int8_t)e6, (int8_t)e5,
- (int8_t)e4, (int8_t)e3, (int8_t)e2,
- (int8_t)e1, (int8_t)e0));
- #else
- simde__m64 r;
- r.u8[0] = e0;
- r.u8[1] = e1;
- r.u8[2] = e2;
- r.u8[3] = e3;
- r.u8[4] = e4;
- r.u8[5] = e5;
- r.u8[6] = e6;
- r.u8[7] = e7;
- return r;
- #endif
- }
- SIMDE__FUNCTION_ATTRIBUTES
- simde__m64 simde_mm_set_pi16(int16_t e3, int16_t e2, int16_t e1, int16_t e0)
- {
- #if defined(SIMDE_MMX_NATIVE)
- return SIMDE__M64_C(_mm_set_pi16(e3, e2, e1, e0));
- #else
- simde__m64 r;
- r.i16[0] = e0;
- r.i16[1] = e1;
- r.i16[2] = e2;
- r.i16[3] = e3;
- return r;
- #endif
- }
- SIMDE__FUNCTION_ATTRIBUTES
- simde__m64 simde_x_mm_set_pu16(uint16_t e3, uint16_t e2, uint16_t e1,
- uint16_t e0)
- {
- #if defined(SIMDE_MMX_NATIVE)
- return SIMDE__M64_C(_mm_set_pi16((int16_t)e3, (int16_t)e2, (int16_t)e1,
- (int16_t)e0));
- #else
- simde__m64 r;
- r.u16[0] = e0;
- r.u16[1] = e1;
- r.u16[2] = e2;
- r.u16[3] = e3;
- return r;
- #endif
- }
- SIMDE__FUNCTION_ATTRIBUTES
- simde__m64 simde_x_mm_set_pu32(uint32_t e1, uint32_t e0)
- {
- #if defined(SIMDE_MMX_NATIVE)
- return SIMDE__M64_C(_mm_set_pi32((int32_t)e1, (int32_t)e0));
- #else
- simde__m64 r;
- r.u32[0] = e0;
- r.u32[1] = e1;
- return r;
- #endif
- }
- SIMDE__FUNCTION_ATTRIBUTES
- simde__m64 simde_mm_set_pi32(int32_t e1, int32_t e0)
- {
- #if defined(SIMDE_MMX_NATIVE)
- return SIMDE__M64_C(_mm_set_pi32(e1, e0));
- #else
- simde__m64 r;
- r.i32[0] = e0;
- r.i32[1] = e1;
- return r;
- #endif
- }
- SIMDE__FUNCTION_ATTRIBUTES
- simde__m64 simde_mm_set1_pi8(int8_t a)
- {
- #if defined(SIMDE_MMX_NATIVE)
- return SIMDE__M64_C(_mm_set1_pi8(a));
- #else
- return simde_mm_set_pi8(a, a, a, a, a, a, a, a);
- #endif
- }
- SIMDE__FUNCTION_ATTRIBUTES
- simde__m64 simde_mm_set1_pi16(int16_t a)
- {
- #if defined(SIMDE_MMX_NATIVE)
- return SIMDE__M64_C(_mm_set1_pi16(a));
- #else
- return simde_mm_set_pi16(a, a, a, a);
- #endif
- }
- SIMDE__FUNCTION_ATTRIBUTES
- simde__m64 simde_mm_set1_pi32(int32_t a)
- {
- #if defined(SIMDE_MMX_NATIVE)
- return SIMDE__M64_C(_mm_set1_pi32(a));
- #else
- return simde_mm_set_pi32(a, a);
- #endif
- }
- SIMDE__FUNCTION_ATTRIBUTES
- simde__m64 simde_mm_setr_pi8(int8_t e7, int8_t e6, int8_t e5, int8_t e4,
- int8_t e3, int8_t e2, int8_t e1, int8_t e0)
- {
- #if defined(SIMDE_MMX_NATIVE)
- return SIMDE__M64_C(_mm_setr_pi8(e7, e6, e5, e4, e3, e2, e1, e0));
- #else
- return simde_mm_set_pi8(e0, e1, e2, e3, e4, e5, e6, e7);
- #endif
- }
- SIMDE__FUNCTION_ATTRIBUTES
- simde__m64 simde_mm_setr_pi16(int16_t e3, int16_t e2, int16_t e1, int16_t e0)
- {
- #if defined(SIMDE_MMX_NATIVE)
- return SIMDE__M64_C(_mm_setr_pi16(e3, e2, e1, e0));
- #else
- return simde_mm_set_pi16(e0, e1, e2, e3);
- #endif
- }
- SIMDE__FUNCTION_ATTRIBUTES
- simde__m64 simde_mm_setr_pi32(int32_t e1, int32_t e0)
- {
- #if defined(SIMDE_MMX_NATIVE)
- return SIMDE__M64_C(_mm_setr_pi32(e1, e0));
- #else
- return simde_mm_set_pi32(e0, e1);
- #endif
- }
- SIMDE__FUNCTION_ATTRIBUTES
- simde__m64 simde_mm_setzero_si64(void)
- {
- #if defined(SIMDE_MMX_NATIVE)
- return SIMDE__M64_C(_mm_setzero_si64());
- #else
- return simde_mm_set_pi32(0, 0);
- #endif
- }
- SIMDE__FUNCTION_ATTRIBUTES
- simde__m64 simde_mm_sll_pi16(simde__m64 a, simde__m64 count)
- {
- #if defined(SIMDE_MMX_NATIVE)
- return SIMDE__M64_C(_mm_sll_pi16(a.n, count.n));
- #else
- simde__m64 r;
- if (HEDLEY_UNLIKELY(count.u64[0] > 15)) {
- memset(&r, 0, sizeof(r));
- return r;
- }
- SIMDE__VECTORIZE
- for (size_t i = 0; i < (sizeof(r.u16) / sizeof(r.u16[0])); i++) {
- r.u16[i] = a.u16[i] << count.u64[0];
- }
- return r;
- #endif
- }
- #define simde_m_psllw(a, count) simde_mm_sll_pi16(a, count)
- SIMDE__FUNCTION_ATTRIBUTES
- simde__m64 simde_mm_sll_pi32(simde__m64 a, simde__m64 count)
- {
- #if defined(SIMDE_MMX_NATIVE)
- return SIMDE__M64_C(_mm_sll_pi32(a.n, count.n));
- #else
- simde__m64 r;
- if (HEDLEY_UNLIKELY(count.u64[0] > 31)) {
- memset(&r, 0, sizeof(r));
- return r;
- }
- SIMDE__VECTORIZE
- for (size_t i = 0; i < (sizeof(r.u32) / sizeof(r.u32[0])); i++) {
- r.u32[i] = a.u32[i] << count.u64[0];
- }
- return r;
- #endif
- }
- #define simde_m_pslld(a, count) simde_mm_sll_pi32(a, count)
- SIMDE__FUNCTION_ATTRIBUTES
- simde__m64 simde_mm_slli_pi16(simde__m64 a, int count)
- {
- #if defined(SIMDE_MMX_NATIVE) && !defined(__PGI)
- return SIMDE__M64_C(_mm_slli_pi16(a.n, count));
- #else
- simde__m64 r;
- SIMDE__VECTORIZE
- for (size_t i = 0; i < (sizeof(r.u16) / sizeof(r.u16[0])); i++) {
- r.u16[i] = a.u16[i] << count;
- }
- return r;
- #endif
- }
- #define simde_m_psllwi(a, count) simde_mm_slli_pi16(a, count)
- SIMDE__FUNCTION_ATTRIBUTES
- simde__m64 simde_mm_slli_pi32(simde__m64 a, int count)
- {
- #if defined(SIMDE_MMX_NATIVE) && !defined(__PGI)
- return SIMDE__M64_C(_mm_slli_pi32(a.n, count));
- #else
- simde__m64 r;
- SIMDE__VECTORIZE
- for (size_t i = 0; i < (8 / sizeof(int)); i++) {
- r.u32[i] = a.u32[i] << count;
- }
- return r;
- #endif
- }
- #define simde_m_pslldi(a, b) simde_mm_slli_pi32(a, b)
- SIMDE__FUNCTION_ATTRIBUTES
- simde__m64 simde_mm_slli_si64(simde__m64 a, int count)
- {
- #if defined(SIMDE_MMX_NATIVE)
- return SIMDE__M64_C(_mm_slli_si64(a.n, count));
- #else
- simde__m64 r;
- r.u64[0] = a.u64[0] << count;
- return r;
- #endif
- }
- #define simde_m_psllqi(a, count) simde_mm_slli_si64(a, count)
- SIMDE__FUNCTION_ATTRIBUTES
- simde__m64 simde_mm_sll_si64(simde__m64 a, simde__m64 count)
- {
- #if defined(SIMDE_MMX_NATIVE)
- return SIMDE__M64_C(_mm_sll_si64(a.n, count.n));
- #else
- simde__m64 r;
- if (HEDLEY_UNLIKELY(count.u64[0] > 63)) {
- memset(&r, 0, sizeof(r));
- return r;
- }
- r.u64[0] = a.u64[0] << count.u64[0];
- return r;
- #endif
- }
- #define simde_m_psllq(a, count) simde_mm_sll_si64(a, count)
- SIMDE__FUNCTION_ATTRIBUTES
- simde__m64 simde_mm_srl_pi16(simde__m64 a, simde__m64 count)
- {
- #if defined(SIMDE_MMX_NATIVE)
- return SIMDE__M64_C(_mm_srl_pi16(a.n, count.n));
- #else
- simde__m64 r;
- if (HEDLEY_UNLIKELY(count.u64[0] > 15)) {
- memset(&r, 0, sizeof(r));
- return r;
- }
- SIMDE__VECTORIZE
- for (size_t i = 0; i < sizeof(r.u16) / sizeof(r.u16[0]); i++) {
- r.u16[i] = a.u16[i] >> count.u64[0];
- }
- return r;
- #endif
- }
- #define simde_m_psrlw(a, count) simde_mm_srl_pi16(a, count)
- SIMDE__FUNCTION_ATTRIBUTES
- simde__m64 simde_mm_srl_pi32(simde__m64 a, simde__m64 count)
- {
- #if defined(SIMDE_MMX_NATIVE)
- return SIMDE__M64_C(_mm_srl_pi32(a.n, count.n));
- #else
- simde__m64 r;
- if (HEDLEY_UNLIKELY(count.u64[0] > 31)) {
- memset(&r, 0, sizeof(r));
- return r;
- }
- SIMDE__VECTORIZE
- for (size_t i = 0; i < sizeof(r.u32) / sizeof(r.u32[0]); i++) {
- r.u32[i] = a.u32[i] >> count.u64[0];
- }
- return r;
- #endif
- }
- #define simde_m_psrld(a, count) simde_mm_srl_pi32(a, count)
- SIMDE__FUNCTION_ATTRIBUTES
- simde__m64 simde_mm_srli_pi16(simde__m64 a, int count)
- {
- #if defined(SIMDE_MMX_NATIVE) && !defined(__PGI)
- return SIMDE__M64_C(_mm_srli_pi16(a.n, count));
- #else
- simde__m64 r;
- SIMDE__VECTORIZE
- for (size_t i = 0; i < (8 / sizeof(uint16_t)); i++) {
- r.u16[i] = a.u16[i] >> count;
- }
- return r;
- #endif
- }
- #define simde_m_psrlwi(a, count) simde_mm_srli_pi16(a, count)
- SIMDE__FUNCTION_ATTRIBUTES
- simde__m64 simde_mm_srli_pi32(simde__m64 a, int count)
- {
- #if defined(SIMDE_MMX_NATIVE) && !defined(__PGI)
- return SIMDE__M64_C(_mm_srli_pi32(a.n, count));
- #else
- simde__m64 r;
- SIMDE__VECTORIZE
- for (size_t i = 0; i < (8 / sizeof(int)); i++) {
- r.u32[i] = a.u32[i] >> count;
- }
- return r;
- #endif
- }
- #define simde_m_psrldi(a, count) simde_mm_srli_pi32(a, count)
- SIMDE__FUNCTION_ATTRIBUTES
- simde__m64 simde_mm_srli_si64(simde__m64 a, int count)
- {
- #if defined(SIMDE_MMX_NATIVE) && !defined(__PGI)
- return SIMDE__M64_C(_mm_srli_si64(a.n, count));
- #else
- simde__m64 r;
- r.u64[0] = a.u64[0] >> count;
- return r;
- #endif
- }
- #define simde_m_psrlqi(a, count) simde_mm_srli_si64(a, count)
- SIMDE__FUNCTION_ATTRIBUTES
- simde__m64 simde_mm_srl_si64(simde__m64 a, simde__m64 count)
- {
- #if defined(SIMDE_MMX_NATIVE)
- return SIMDE__M64_C(_mm_srl_si64(a.n, count.n));
- #else
- simde__m64 r;
- if (HEDLEY_UNLIKELY(count.u64[0] > 63)) {
- memset(&r, 0, sizeof(r));
- return r;
- }
- r.u64[0] = a.u64[0] >> count.u64[0];
- return r;
- #endif
- }
- #define simde_m_psrlq(a, count) simde_mm_srl_si64(a, count)
- SIMDE__FUNCTION_ATTRIBUTES
- simde__m64 simde_mm_srai_pi16(simde__m64 a, int count)
- {
- #if defined(SIMDE_MMX_NATIVE) && !defined(__PGI)
- return SIMDE__M64_C(_mm_srai_pi16(a.n, count));
- #else
- simde__m64 r;
- const uint16_t m =
- (uint16_t)((~0U) << ((sizeof(int16_t) * CHAR_BIT) - count));
- SIMDE__VECTORIZE
- for (size_t i = 0; i < (8 / sizeof(int16_t)); i++) {
- const uint16_t is_neg = ((uint16_t)(
- ((a.u16[i]) >> ((sizeof(int16_t) * CHAR_BIT) - 1))));
- r.u16[i] = (a.u16[i] >> count) | (m * is_neg);
- }
- return r;
- #endif
- }
- #define simde_m_psrawi(a, count) simde_mm_srai_pi16(a, count)
- SIMDE__FUNCTION_ATTRIBUTES
- simde__m64 simde_mm_srai_pi32(simde__m64 a, int count)
- {
- #if defined(SIMDE_MMX_NATIVE) && !defined(__PGI)
- return SIMDE__M64_C(_mm_srai_pi32(a.n, count));
- #else
- simde__m64 r;
- const uint32_t m =
- (uint32_t)((~0U) << ((sizeof(int) * CHAR_BIT) - count));
- SIMDE__VECTORIZE
- for (size_t i = 0; i < (8 / sizeof(int)); i++) {
- const uint32_t is_neg = ((uint32_t)(
- ((a.u32[i]) >> ((sizeof(int) * CHAR_BIT) - 1))));
- r.u32[i] = (a.u32[i] >> count) | (m * is_neg);
- }
- return r;
- #endif
- }
- #define simde_m_srai_pi32(a, count) simde_mm_srai_pi32(a, count)
- SIMDE__FUNCTION_ATTRIBUTES
- simde__m64 simde_mm_sra_pi16(simde__m64 a, simde__m64 count)
- {
- #if defined(SIMDE_MMX_NATIVE)
- return SIMDE__M64_C(_mm_sra_pi16(a.n, count.n));
- #else
- simde__m64 r;
- int cnt = (int)count.i64[0];
- if (cnt > 15 || cnt < 0) {
- for (size_t i = 0; i < (sizeof(r.i16) / sizeof(r.i16[0]));
- i++) {
- r.u16[i] = (a.i16[i] < 0) ? 0xffff : 0x0000;
- }
- } else {
- const uint16_t m = (uint16_t)(
- (~0U) << ((sizeof(int16_t) * CHAR_BIT) - cnt));
- for (size_t i = 0; i < (sizeof(r.i16) / sizeof(r.i16[0]));
- i++) {
- const uint16_t is_neg = a.i16[i] < 0;
- r.u16[i] = (a.u16[i] >> cnt) | (m * is_neg);
- }
- }
- return r;
- #endif
- }
- #define simde_m_psraw(a, count) simde_mm_sra_pi16(a, count)
- SIMDE__FUNCTION_ATTRIBUTES
- simde__m64 simde_mm_sra_pi32(simde__m64 a, simde__m64 count)
- {
- #if defined(SIMDE_MMX_NATIVE)
- return SIMDE__M64_C(_mm_sra_pi32(a.n, count.n));
- #else
- simde__m64 r;
- const uint64_t cnt = count.u64[0];
- if (cnt > 31) {
- for (size_t i = 0; i < (sizeof(r.i32) / sizeof(r.i32[0]));
- i++) {
- r.u32[i] = (a.i32[i] < 0) ? UINT32_MAX : 0;
- }
- } else if (cnt == 0) {
- memcpy(&r, &a, sizeof(r));
- } else {
- const uint32_t m = (uint32_t)(
- (~0U) << ((sizeof(int32_t) * CHAR_BIT) - cnt));
- for (size_t i = 0; i < (sizeof(r.i32) / sizeof(r.i32[0]));
- i++) {
- const uint32_t is_neg = a.i32[i] < 0;
- r.u32[i] = (a.u32[i] >> cnt) | (m * is_neg);
- }
- }
- return r;
- #endif
- }
- #define simde_m_psrad(a, b) simde_mm_sra_pi32(a, b)
- SIMDE__FUNCTION_ATTRIBUTES
- simde__m64 simde_mm_sub_pi8(simde__m64 a, simde__m64 b)
- {
- #if defined(SIMDE_MMX_NATIVE)
- return SIMDE__M64_C(_mm_sub_pi8(a.n, b.n));
- #else
- simde__m64 r;
- SIMDE__VECTORIZE
- for (size_t i = 0; i < 8; i++) {
- r.i8[i] = a.i8[i] - b.i8[i];
- }
- return r;
- #endif
- }
- #define simde_m_psubb(a, b) simde_mm_sub_pi8(a, b)
- SIMDE__FUNCTION_ATTRIBUTES
- simde__m64 simde_mm_sub_pi16(simde__m64 a, simde__m64 b)
- {
- #if defined(SIMDE_MMX_NATIVE)
- return SIMDE__M64_C(_mm_sub_pi16(a.n, b.n));
- #else
- simde__m64 r;
- SIMDE__VECTORIZE
- for (size_t i = 0; i < (8 / sizeof(int16_t)); i++) {
- r.i16[i] = a.i16[i] - b.i16[i];
- }
- return r;
- #endif
- }
- #define simde_m_psubw(a, b) simde_mm_sub_pi16(a, b)
- SIMDE__FUNCTION_ATTRIBUTES
- simde__m64 simde_mm_sub_pi32(simde__m64 a, simde__m64 b)
- {
- #if defined(SIMDE_MMX_NATIVE)
- return SIMDE__M64_C(_mm_sub_pi32(a.n, b.n));
- #else
- simde__m64 r;
- SIMDE__VECTORIZE
- for (size_t i = 0; i < (8 / sizeof(int)); i++) {
- r.i32[i] = a.i32[i] - b.i32[i];
- }
- return r;
- #endif
- }
- #define simde_m_psubd(a, b) simde_mm_sub_pi32(a, b)
- SIMDE__FUNCTION_ATTRIBUTES
- simde__m64 simde_mm_subs_pi8(simde__m64 a, simde__m64 b)
- {
- #if defined(SIMDE_MMX_NATIVE)
- return SIMDE__M64_C(_mm_subs_pi8(a.n, b.n));
- #else
- simde__m64 r;
- SIMDE__VECTORIZE
- for (size_t i = 0; i < (8); i++) {
- if (((b.i8[i]) > 0 && (a.i8[i]) < INT8_MIN + (b.i8[i]))) {
- r.i8[i] = INT8_MIN;
- } else if ((b.i8[i]) < 0 && (a.i8[i]) > INT8_MAX + (b.i8[i])) {
- r.i8[i] = INT8_MAX;
- } else {
- r.i8[i] = (a.i8[i]) - (b.i8[i]);
- }
- }
- return r;
- #endif
- }
- #define simde_m_psubsb(a, b) simde_mm_subs_pi8(a, b)
- SIMDE__FUNCTION_ATTRIBUTES
- simde__m64 simde_mm_subs_pu8(simde__m64 a, simde__m64 b)
- {
- #if defined(SIMDE_MMX_NATIVE)
- return SIMDE__M64_C(_mm_subs_pu8(a.n, b.n));
- #else
- simde__m64 r;
- SIMDE__VECTORIZE
- for (size_t i = 0; i < (8); i++) {
- const int32_t x = a.u8[i] - b.u8[i];
- if (x < 0) {
- r.u8[i] = 0;
- } else if (x > UINT8_MAX) {
- r.u8[i] = UINT8_MAX;
- } else {
- r.u8[i] = (uint8_t)x;
- }
- }
- return r;
- #endif
- }
- #define simde_m_psubusb(a, b) simde_mm_subs_pu8(a, b)
- SIMDE__FUNCTION_ATTRIBUTES
- simde__m64 simde_mm_subs_pi16(simde__m64 a, simde__m64 b)
- {
- #if defined(SIMDE_MMX_NATIVE)
- return SIMDE__M64_C(_mm_subs_pi16(a.n, b.n));
- #else
- simde__m64 r;
- SIMDE__VECTORIZE
- for (size_t i = 0; i < (8 / sizeof(int16_t)); i++) {
- if (((b.i16[i]) > 0 && (a.i16[i]) < SHRT_MIN + (b.i16[i]))) {
- r.i16[i] = SHRT_MIN;
- } else if ((b.i16[i]) < 0 &&
- (a.i16[i]) > INT16_MAX + (b.i16[i])) {
- r.i16[i] = INT16_MAX;
- } else {
- r.i16[i] = (a.i16[i]) - (b.i16[i]);
- }
- }
- return r;
- #endif
- }
- #define simde_m_psubsw(a, b) simde_mm_subs_pi16(a, b)
- SIMDE__FUNCTION_ATTRIBUTES
- simde__m64 simde_mm_subs_pu16(simde__m64 a, simde__m64 b)
- {
- #if defined(SIMDE_MMX_NATIVE)
- return SIMDE__M64_C(_mm_subs_pu16(a.n, b.n));
- #else
- simde__m64 r;
- SIMDE__VECTORIZE
- for (size_t i = 0; i < (8 / sizeof(uint16_t)); i++) {
- const int x = a.u16[i] - b.u16[i];
- if (x < 0) {
- r.u16[i] = 0;
- } else if (x > UINT16_MAX) {
- r.u16[i] = UINT16_MAX;
- } else {
- r.u16[i] = (uint16_t)x;
- }
- }
- return r;
- #endif
- }
- #define simde_m_psubusw(a, b) simde_mm_subs_pu16(a, b)
- SIMDE__FUNCTION_ATTRIBUTES
- simde__m64 simde_mm_unpackhi_pi8(simde__m64 a, simde__m64 b)
- {
- #if defined(SIMDE_MMX_NATIVE)
- return SIMDE__M64_C(_mm_unpackhi_pi8(a.n, b.n));
- #else
- simde__m64 r;
- r.i8[0] = a.i8[4];
- r.i8[1] = b.i8[4];
- r.i8[2] = a.i8[5];
- r.i8[3] = b.i8[5];
- r.i8[4] = a.i8[6];
- r.i8[5] = b.i8[6];
- r.i8[6] = a.i8[7];
- r.i8[7] = b.i8[7];
- return r;
- #endif
- }
- #define simde_m_punpckhbw(a, b) simde_mm_unpackhi_pi8(a, b)
- SIMDE__FUNCTION_ATTRIBUTES
- simde__m64 simde_mm_unpackhi_pi16(simde__m64 a, simde__m64 b)
- {
- #if defined(SIMDE_MMX_NATIVE)
- return SIMDE__M64_C(_mm_unpackhi_pi16(a.n, b.n));
- #else
- simde__m64 r;
- r.i16[0] = a.i16[2];
- r.i16[1] = b.i16[2];
- r.i16[2] = a.i16[3];
- r.i16[3] = b.i16[3];
- return r;
- #endif
- }
- #define simde_m_punpckhwd(a, b) simde_mm_unpackhi_pi16(a, b)
- SIMDE__FUNCTION_ATTRIBUTES
- simde__m64 simde_mm_unpackhi_pi32(simde__m64 a, simde__m64 b)
- {
- #if defined(SIMDE_MMX_NATIVE)
- return SIMDE__M64_C(_mm_unpackhi_pi32(a.n, b.n));
- #else
- simde__m64 r;
- r.i32[0] = a.i32[1];
- r.i32[1] = b.i32[1];
- return r;
- #endif
- }
- #define simde_m_punpckhdq(a, b) simde_mm_unpackhi_pi32(a, b)
- SIMDE__FUNCTION_ATTRIBUTES
- simde__m64 simde_mm_unpacklo_pi8(simde__m64 a, simde__m64 b)
- {
- #if defined(SIMDE_MMX_NATIVE)
- return SIMDE__M64_C(_mm_unpacklo_pi8(a.n, b.n));
- #else
- simde__m64 r;
- r.i8[0] = a.i8[0];
- r.i8[1] = b.i8[0];
- r.i8[2] = a.i8[1];
- r.i8[3] = b.i8[1];
- r.i8[4] = a.i8[2];
- r.i8[5] = b.i8[2];
- r.i8[6] = a.i8[3];
- r.i8[7] = b.i8[3];
- return r;
- #endif
- }
- #define simde_m_punpcklbw(a, b) simde_mm_unpacklo_pi8(a, b)
- SIMDE__FUNCTION_ATTRIBUTES
- simde__m64 simde_mm_unpacklo_pi16(simde__m64 a, simde__m64 b)
- {
- #if defined(SIMDE_MMX_NATIVE)
- return SIMDE__M64_C(_mm_unpacklo_pi16(a.n, b.n));
- #else
- simde__m64 r;
- r.i16[0] = a.i16[0];
- r.i16[1] = b.i16[0];
- r.i16[2] = a.i16[1];
- r.i16[3] = b.i16[1];
- return r;
- #endif
- }
- #define simde_m_punpcklwd(a, b) simde_mm_unpacklo_pi16(a, b)
- SIMDE__FUNCTION_ATTRIBUTES
- simde__m64 simde_mm_unpacklo_pi32(simde__m64 a, simde__m64 b)
- {
- #if defined(SIMDE_MMX_NATIVE)
- return SIMDE__M64_C(_mm_unpacklo_pi32(a.n, b.n));
- #else
- simde__m64 r;
- r.i32[0] = a.i32[0];
- r.i32[1] = b.i32[0];
- return r;
- #endif
- }
- #define simde_m_punpckldq(a, b) simde_mm_unpacklo_pi32(a, b)
- SIMDE__FUNCTION_ATTRIBUTES
- simde__m64 simde_mm_xor_si64(simde__m64 a, simde__m64 b)
- {
- #if defined(SIMDE_MMX_NATIVE)
- return SIMDE__M64_C(_mm_xor_si64(a.n, b.n));
- #else
- simde__m64 r;
- r.i64[0] = a.i64[0] ^ b.i64[0];
- return r;
- #endif
- }
- #define simde_m_pxor(a, b) simde_mm_xor_si64(a, b)
- SIMDE__FUNCTION_ATTRIBUTES
- int32_t simde_m_to_int(simde__m64 a)
- {
- #if defined(SIMDE_MMX_NATIVE)
- return _m_to_int(a.n);
- #else
- return a.i32[0];
- #endif
- }
- SIMDE__END_DECLS
- #endif /* !defined(SIMDE__MMX_H) */
|