| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418341934203421342234233424342534263427342834293430343134323433343434353436343734383439344034413442344334443445344634473448344934503451345234533454345534563457345834593460346134623463346434653466346734683469347034713472347334743475347634773478347934803481348234833484348534863487348834893490349134923493349434953496349734983499350035013502350335043505350635073508350935103511351235133514351535163517351835193520352135223523352435253526352735283529353035313532353335343535353635373538353935403541354235433544354535463547354835493550355135523553355435553556355735583559356035613562356335643565356635673568356935703571357235733574357535763577357835793580358135823583358435853586358735883589359035913592359335943595359635973598359936003601360236033604360536063607360836093610361136123613361436153616361736183619362036213622362336243625362636273628362936303631363236333634363536363637363836393640364136423643364436453646364736483649365036513652365336543655365636573658365936603661366236633664366536663667366836693670367136723673367436753676367736783679368036813682368336843685368636873688368936903691369236933694369536963697369836993700370137023703370437053706370737083709371037113712371337143715371637173718371937203721372237233724372537263727372837293730373137323733373437353736373737383739374037413742374337443745374637473748374937503751375237533754375537563757375837593760376137623763376437653766376737683769377037713772377337743775377637773778377937803781378237833784378537863787378837893790379137923793379437953796379737983799380038013802380338043805380638073808380938103811381238133814381538163817381838193820382138223823382438253826382738283829383038313832383338343835383638373838383938403841384238433844384538463847384838493850385138523853385438553856385738583859386038613862386338643865386638673868386938703871387238733874387538763877387838793880388138823883388438853886388738883889389038913892389338943895389638973898389939003901390239033904390539063907390839093910391139123913391439153916391739183919392039213922392339243925392639273928392939303931393239333934393539363937393839393940394139423943394439453946394739483949395039513952395339543955395639573958395939603961396239633964396539663967396839693970397139723973397439753976397739783979398039813982398339843985398639873988398939903991399239933994399539963997399839994000400140024003400440054006400740084009401040114012401340144015401640174018401940204021402240234024402540264027402840294030403140324033403440354036403740384039404040414042404340444045404640474048404940504051405240534054405540564057405840594060406140624063406440654066406740684069407040714072407340744075407640774078407940804081408240834084408540864087408840894090409140924093409440954096409740984099410041014102410341044105410641074108410941104111411241134114411541164117411841194120412141224123412441254126412741284129413041314132413341344135413641374138413941404141414241434144414541464147414841494150415141524153415441554156415741584159416041614162416341644165416641674168416941704171417241734174417541764177417841794180418141824183418441854186418741884189419041914192419341944195419641974198419942004201420242034204420542064207420842094210421142124213421442154216421742184219422042214222422342244225422642274228422942304231423242334234423542364237423842394240424142424243424442454246424742484249425042514252425342544255425642574258425942604261426242634264426542664267426842694270427142724273427442754276427742784279428042814282428342844285428642874288428942904291429242934294429542964297429842994300430143024303430443054306430743084309431043114312431343144315431643174318431943204321432243234324432543264327432843294330433143324333433443354336433743384339434043414342434343444345434643474348434943504351435243534354435543564357435843594360436143624363436443654366436743684369437043714372437343744375437643774378437943804381438243834384438543864387438843894390439143924393439443954396439743984399440044014402440344044405440644074408440944104411441244134414441544164417441844194420442144224423442444254426442744284429443044314432443344344435443644374438443944404441444244434444444544464447444844494450445144524453445444554456445744584459446044614462446344644465446644674468446944704471447244734474447544764477447844794480448144824483448444854486448744884489449044914492449344944495449644974498449945004501450245034504450545064507450845094510451145124513451445154516451745184519452045214522452345244525452645274528452945304531453245334534453545364537453845394540454145424543454445454546454745484549455045514552455345544555455645574558455945604561456245634564456545664567456845694570457145724573457445754576457745784579458045814582458345844585458645874588458945904591459245934594459545964597459845994600460146024603460446054606460746084609461046114612461346144615461646174618461946204621462246234624462546264627462846294630463146324633463446354636463746384639464046414642464346444645464646474648464946504651465246534654465546564657465846594660466146624663466446654666466746684669467046714672467346744675467646774678467946804681468246834684468546864687468846894690469146924693469446954696469746984699470047014702470347044705470647074708470947104711471247134714471547164717471847194720472147224723472447254726472747284729473047314732473347344735473647374738473947404741474247434744474547464747474847494750475147524753475447554756475747584759476047614762476347644765476647674768476947704771477247734774477547764777477847794780478147824783478447854786478747884789479047914792479347944795479647974798479948004801480248034804480548064807480848094810481148124813481448154816481748184819482048214822482348244825482648274828482948304831483248334834483548364837483848394840484148424843484448454846484748484849485048514852485348544855485648574858485948604861486248634864486548664867486848694870487148724873487448754876487748784879488048814882488348844885488648874888488948904891489248934894489548964897489848994900490149024903490449054906490749084909491049114912491349144915491649174918491949204921492249234924492549264927492849294930493149324933493449354936493749384939494049414942494349444945494649474948494949504951495249534954495549564957495849594960496149624963496449654966496749684969497049714972497349744975497649774978497949804981498249834984498549864987498849894990499149924993499449954996499749984999500050015002500350045005500650075008500950105011501250135014501550165017501850195020502150225023502450255026502750285029503050315032503350345035503650375038503950405041504250435044504550465047504850495050505150525053505450555056505750585059506050615062506350645065506650675068506950705071507250735074507550765077507850795080508150825083508450855086508750885089509050915092509350945095509650975098509951005101510251035104510551065107510851095110511151125113511451155116511751185119512051215122512351245125512651275128512951305131513251335134513551365137513851395140514151425143514451455146514751485149515051515152515351545155515651575158515951605161516251635164516551665167516851695170517151725173517451755176517751785179518051815182518351845185518651875188518951905191519251935194519551965197519851995200520152025203520452055206520752085209521052115212521352145215521652175218521952205221522252235224522552265227522852295230523152325233523452355236523752385239524052415242524352445245524652475248524952505251525252535254525552565257525852595260526152625263526452655266526752685269527052715272527352745275527652775278527952805281528252835284528552865287528852895290529152925293529452955296529752985299530053015302530353045305530653075308530953105311531253135314531553165317531853195320532153225323532453255326532753285329533053315332533353345335533653375338533953405341534253435344534553465347534853495350535153525353535453555356535753585359536053615362536353645365536653675368536953705371537253735374537553765377537853795380538153825383538453855386538753885389539053915392539353945395539653975398539954005401540254035404540554065407540854095410541154125413541454155416541754185419542054215422542354245425542654275428542954305431543254335434543554365437543854395440544154425443544454455446544754485449545054515452545354545455545654575458545954605461546254635464546554665467546854695470547154725473547454755476547754785479548054815482548354845485548654875488548954905491549254935494549554965497549854995500550155025503550455055506550755085509551055115512551355145515551655175518551955205521552255235524552555265527552855295530553155325533553455355536553755385539554055415542554355445545554655475548554955505551555255535554555555565557555855595560556155625563556455655566556755685569557055715572557355745575557655775578557955805581558255835584558555865587558855895590559155925593559455955596559755985599560056015602560356045605560656075608560956105611561256135614561556165617561856195620562156225623562456255626562756285629563056315632563356345635563656375638563956405641564256435644564556465647564856495650565156525653565456555656565756585659566056615662566356645665566656675668566956705671567256735674567556765677567856795680568156825683568456855686568756885689569056915692569356945695569656975698569957005701570257035704570557065707570857095710571157125713571457155716571757185719572057215722572357245725572657275728572957305731573257335734573557365737573857395740574157425743574457455746574757485749575057515752575357545755575657575758575957605761576257635764576557665767576857695770577157725773577457755776577757785779578057815782578357845785578657875788578957905791579257935794579557965797579857995800580158025803580458055806580758085809581058115812581358145815581658175818581958205821582258235824582558265827582858295830583158325833583458355836583758385839584058415842584358445845584658475848584958505851585258535854585558565857585858595860586158625863586458655866586758685869587058715872587358745875587658775878587958805881588258835884588558865887588858895890589158925893589458955896589758985899590059015902590359045905590659075908590959105911591259135914591559165917591859195920592159225923592459255926592759285929593059315932593359345935593659375938593959405941594259435944594559465947594859495950595159525953595459555956595759585959596059615962596359645965596659675968596959705971597259735974597559765977597859795980598159825983598459855986598759885989599059915992599359945995599659975998599960006001600260036004600560066007600860096010601160126013601460156016601760186019602060216022602360246025602660276028602960306031603260336034603560366037603860396040604160426043604460456046604760486049605060516052605360546055605660576058605960606061606260636064606560666067606860696070607160726073607460756076607760786079608060816082608360846085608660876088608960906091609260936094609560966097609860996100610161026103610461056106610761086109611061116112611361146115611661176118611961206121612261236124612561266127612861296130613161326133613461356136613761386139614061416142614361446145614661476148614961506151615261536154615561566157615861596160616161626163616461656166616761686169617061716172617361746175617661776178617961806181618261836184618561866187618861896190619161926193619461956196619761986199620062016202620362046205620662076208620962106211621262136214621562166217621862196220622162226223622462256226622762286229623062316232623362346235623662376238623962406241624262436244624562466247624862496250625162526253625462556256625762586259626062616262626362646265626662676268626962706271627262736274627562766277627862796280628162826283628462856286628762886289629062916292629362946295629662976298629963006301630263036304630563066307630863096310631163126313631463156316631763186319632063216322632363246325632663276328632963306331633263336334633563366337633863396340634163426343634463456346634763486349635063516352635363546355635663576358635963606361636263636364636563666367636863696370637163726373637463756376637763786379638063816382638363846385638663876388638963906391639263936394639563966397639863996400640164026403640464056406640764086409641064116412641364146415641664176418641964206421642264236424642564266427642864296430643164326433643464356436643764386439644064416442644364446445644664476448644964506451645264536454645564566457645864596460646164626463646464656466646764686469647064716472647364746475647664776478647964806481648264836484648564866487648864896490649164926493649464956496649764986499650065016502650365046505650665076508650965106511651265136514651565166517651865196520652165226523652465256526652765286529653065316532653365346535653665376538653965406541654265436544654565466547654865496550655165526553655465556556655765586559656065616562656365646565656665676568656965706571657265736574657565766577657865796580658165826583658465856586658765886589659065916592659365946595659665976598659966006601660266036604660566066607660866096610661166126613661466156616661766186619662066216622662366246625662666276628662966306631663266336634663566366637663866396640664166426643664466456646664766486649665066516652665366546655665666576658665966606661666266636664666566666667666866696670667166726673667466756676667766786679668066816682668366846685668666876688668966906691669266936694669566966697669866996700670167026703670467056706670767086709671067116712671367146715671667176718671967206721672267236724672567266727672867296730673167326733673467356736673767386739674067416742674367446745674667476748674967506751675267536754675567566757675867596760676167626763676467656766676767686769677067716772677367746775677667776778677967806781678267836784678567866787678867896790679167926793679467956796679767986799680068016802680368046805680668076808680968106811681268136814681568166817681868196820682168226823682468256826682768286829683068316832683368346835683668376838683968406841684268436844684568466847684868496850685168526853685468556856685768586859686068616862686368646865686668676868686968706871687268736874687568766877687868796880688168826883688468856886688768886889689068916892689368946895689668976898689969006901690269036904690569066907690869096910691169126913691469156916691769186919692069216922692369246925692669276928692969306931693269336934693569366937693869396940694169426943694469456946694769486949695069516952695369546955695669576958695969606961696269636964696569666967696869696970697169726973697469756976697769786979698069816982698369846985698669876988698969906991699269936994699569966997699869997000700170027003700470057006700770087009701070117012701370147015701670177018701970207021702270237024702570267027702870297030703170327033703470357036703770387039704070417042704370447045704670477048704970507051705270537054705570567057705870597060706170627063706470657066706770687069707070717072707370747075707670777078707970807081708270837084708570867087708870897090709170927093709470957096709770987099710071017102710371047105710671077108710971107111711271137114711571167117711871197120712171227123712471257126712771287129713071317132713371347135713671377138713971407141714271437144714571467147714871497150715171527153715471557156715771587159716071617162716371647165716671677168716971707171717271737174717571767177717871797180718171827183718471857186718771887189719071917192719371947195719671977198719972007201720272037204720572067207720872097210721172127213721472157216721772187219722072217222722372247225722672277228722972307231723272337234723572367237723872397240724172427243724472457246724772487249725072517252725372547255725672577258725972607261726272637264726572667267726872697270727172727273727472757276727772787279728072817282728372847285728672877288728972907291729272937294729572967297729872997300730173027303730473057306730773087309731073117312731373147315731673177318731973207321732273237324732573267327732873297330733173327333733473357336733773387339734073417342734373447345734673477348734973507351735273537354735573567357735873597360736173627363736473657366736773687369737073717372737373747375737673777378737973807381738273837384738573867387738873897390739173927393739473957396739773987399740074017402740374047405740674077408740974107411741274137414741574167417741874197420742174227423742474257426742774287429743074317432743374347435743674377438743974407441744274437444744574467447744874497450745174527453745474557456745774587459746074617462746374647465746674677468746974707471747274737474747574767477747874797480748174827483748474857486748774887489749074917492749374947495749674977498749975007501750275037504750575067507750875097510751175127513751475157516751775187519752075217522752375247525752675277528752975307531753275337534753575367537753875397540754175427543754475457546754775487549 |
- /* SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use, copy,
- * modify, merge, publish, distribute, sublicense, and/or sell copies
- * of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- * Copyright:
- * 2017-2020 Evan Nemerson <[email protected]>
- * 2015-2017 John W. Ratcliff <[email protected]>
- * 2015 Brandon Rowlett <[email protected]>
- * 2015 Ken Fast <[email protected]>
- * 2017 Hasindu Gamaarachchi <[email protected]>
- * 2018 Jeff Daily <[email protected]>
- */
- #if !defined(SIMDE_X86_SSE2_H)
- #define SIMDE_X86_SSE2_H
- #include "sse.h"
- HEDLEY_DIAGNOSTIC_PUSH
- SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
- SIMDE_BEGIN_DECLS_
- typedef union {
- #if defined(SIMDE_VECTOR_SUBSCRIPT)
- SIMDE_ALIGN_TO_16 int8_t i8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
- SIMDE_ALIGN_TO_16 int16_t i16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
- SIMDE_ALIGN_TO_16 int32_t i32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
- SIMDE_ALIGN_TO_16 int64_t i64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
- SIMDE_ALIGN_TO_16 uint8_t u8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
- SIMDE_ALIGN_TO_16 uint16_t u16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
- SIMDE_ALIGN_TO_16 uint32_t u32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
- SIMDE_ALIGN_TO_16 uint64_t u64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
- #if defined(SIMDE_HAVE_INT128_)
- SIMDE_ALIGN_TO_16 simde_int128 i128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
- SIMDE_ALIGN_TO_16 simde_uint128 u128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
- #endif
- SIMDE_ALIGN_TO_16 simde_float32 f32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
- SIMDE_ALIGN_TO_16 simde_float64 f64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
- SIMDE_ALIGN_TO_16 int_fast32_t i32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
- SIMDE_ALIGN_TO_16 uint_fast32_t u32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
- #else
- SIMDE_ALIGN_TO_16 int8_t i8[16];
- SIMDE_ALIGN_TO_16 int16_t i16[8];
- SIMDE_ALIGN_TO_16 int32_t i32[4];
- SIMDE_ALIGN_TO_16 int64_t i64[2];
- SIMDE_ALIGN_TO_16 uint8_t u8[16];
- SIMDE_ALIGN_TO_16 uint16_t u16[8];
- SIMDE_ALIGN_TO_16 uint32_t u32[4];
- SIMDE_ALIGN_TO_16 uint64_t u64[2];
- #if defined(SIMDE_HAVE_INT128_)
- SIMDE_ALIGN_TO_16 simde_int128 i128[1];
- SIMDE_ALIGN_TO_16 simde_uint128 u128[1];
- #endif
- SIMDE_ALIGN_TO_16 simde_float32 f32[4];
- SIMDE_ALIGN_TO_16 simde_float64 f64[2];
- SIMDE_ALIGN_TO_16 int_fast32_t i32f[16 / sizeof(int_fast32_t)];
- SIMDE_ALIGN_TO_16 uint_fast32_t u32f[16 / sizeof(uint_fast32_t)];
- #endif
- SIMDE_ALIGN_TO_16 simde__m64_private m64_private[2];
- SIMDE_ALIGN_TO_16 simde__m64 m64[2];
- #if defined(SIMDE_X86_SSE2_NATIVE)
- SIMDE_ALIGN_TO_16 __m128i n;
- #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
- SIMDE_ALIGN_TO_16 int8x16_t neon_i8;
- SIMDE_ALIGN_TO_16 int16x8_t neon_i16;
- SIMDE_ALIGN_TO_16 int32x4_t neon_i32;
- SIMDE_ALIGN_TO_16 int64x2_t neon_i64;
- SIMDE_ALIGN_TO_16 uint8x16_t neon_u8;
- SIMDE_ALIGN_TO_16 uint16x8_t neon_u16;
- SIMDE_ALIGN_TO_16 uint32x4_t neon_u32;
- SIMDE_ALIGN_TO_16 uint64x2_t neon_u64;
- SIMDE_ALIGN_TO_16 float32x4_t neon_f32;
- #if defined(SIMDE_ARCH_AARCH64)
- SIMDE_ALIGN_TO_16 float64x2_t neon_f64;
- #endif
- #elif defined(SIMDE_WASM_SIMD128_NATIVE)
- SIMDE_ALIGN_TO_16 v128_t wasm_v128;
- #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
- SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8;
- SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16;
- SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32;
- #if defined(__UINT_FAST32_TYPE__) && defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
- SIMDE_ALIGN_TO_16
- SIMDE_POWER_ALTIVEC_VECTOR(__INT_FAST32_TYPE__) altivec_i32f;
- #else
- SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32f;
- #endif
- SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8;
- SIMDE_ALIGN_TO_16
- SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16;
- SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32;
- #if defined(__UINT_FAST32_TYPE__) && defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
- SIMDE_ALIGN_TO_16
- SIMDE_POWER_ALTIVEC_VECTOR(__UINT_FAST32_TYPE__) altivec_u32f;
- #else
- SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32f;
- #endif
- SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32;
- #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
- SIMDE_ALIGN_TO_16
- SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64;
- SIMDE_ALIGN_TO_16
- SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64;
- SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64;
- #endif
- #endif
- } simde__m128i_private;
- typedef union {
- #if defined(SIMDE_VECTOR_SUBSCRIPT)
- SIMDE_ALIGN_TO_16 int8_t i8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
- SIMDE_ALIGN_TO_16 int16_t i16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
- SIMDE_ALIGN_TO_16 int32_t i32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
- SIMDE_ALIGN_TO_16 int64_t i64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
- SIMDE_ALIGN_TO_16 uint8_t u8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
- SIMDE_ALIGN_TO_16 uint16_t u16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
- SIMDE_ALIGN_TO_16 uint32_t u32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
- SIMDE_ALIGN_TO_16 uint64_t u64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
- SIMDE_ALIGN_TO_16 simde_float32 f32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
- SIMDE_ALIGN_TO_16 simde_float64 f64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
- SIMDE_ALIGN_TO_16 int_fast32_t i32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
- SIMDE_ALIGN_TO_16 uint_fast32_t u32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
- #else
- SIMDE_ALIGN_TO_16 int8_t i8[16];
- SIMDE_ALIGN_TO_16 int16_t i16[8];
- SIMDE_ALIGN_TO_16 int32_t i32[4];
- SIMDE_ALIGN_TO_16 int64_t i64[2];
- SIMDE_ALIGN_TO_16 uint8_t u8[16];
- SIMDE_ALIGN_TO_16 uint16_t u16[8];
- SIMDE_ALIGN_TO_16 uint32_t u32[4];
- SIMDE_ALIGN_TO_16 uint64_t u64[2];
- SIMDE_ALIGN_TO_16 simde_float32 f32[4];
- SIMDE_ALIGN_TO_16 simde_float64 f64[2];
- SIMDE_ALIGN_TO_16 int_fast32_t i32f[16 / sizeof(int_fast32_t)];
- SIMDE_ALIGN_TO_16 uint_fast32_t u32f[16 / sizeof(uint_fast32_t)];
- #endif
- SIMDE_ALIGN_TO_16 simde__m64_private m64_private[2];
- SIMDE_ALIGN_TO_16 simde__m64 m64[2];
- #if defined(SIMDE_X86_SSE2_NATIVE)
- SIMDE_ALIGN_TO_16 __m128d n;
- #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
- SIMDE_ALIGN_TO_16 int8x16_t neon_i8;
- SIMDE_ALIGN_TO_16 int16x8_t neon_i16;
- SIMDE_ALIGN_TO_16 int32x4_t neon_i32;
- SIMDE_ALIGN_TO_16 int64x2_t neon_i64;
- SIMDE_ALIGN_TO_16 uint8x16_t neon_u8;
- SIMDE_ALIGN_TO_16 uint16x8_t neon_u16;
- SIMDE_ALIGN_TO_16 uint32x4_t neon_u32;
- SIMDE_ALIGN_TO_16 uint64x2_t neon_u64;
- SIMDE_ALIGN_TO_16 float32x4_t neon_f32;
- #if defined(SIMDE_ARCH_AARCH64)
- SIMDE_ALIGN_TO_16 float64x2_t neon_f64;
- #endif
- #elif defined(SIMDE_WASM_SIMD128_NATIVE)
- SIMDE_ALIGN_TO_16 v128_t wasm_v128;
- #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
- SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8;
- SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16;
- SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32;
- #if defined(__INT_FAST32_TYPE__) && defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
- SIMDE_ALIGN_TO_16
- SIMDE_POWER_ALTIVEC_VECTOR(__INT_FAST32_TYPE__) altivec_i32f;
- #else
- SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32f;
- #endif
- SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8;
- SIMDE_ALIGN_TO_16
- SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16;
- SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32;
- #if defined(__UINT_FAST32_TYPE__) && defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
- SIMDE_ALIGN_TO_16
- SIMDE_POWER_ALTIVEC_VECTOR(__UINT_FAST32_TYPE__) altivec_u32f;
- #else
- SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32f;
- #endif
- SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32;
- #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
- SIMDE_ALIGN_TO_16
- SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64;
- SIMDE_ALIGN_TO_16
- SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64;
- SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64;
- #endif
- #endif
- } simde__m128d_private;
- #if defined(SIMDE_X86_SSE2_NATIVE)
- typedef __m128i simde__m128i;
- typedef __m128d simde__m128d;
- #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
- typedef int64x2_t simde__m128i;
- #if defined(SIMDE_ARCH_AARCH64)
- typedef float64x2_t simde__m128d;
- #elif defined(SIMDE_VECTOR_SUBSCRIPT)
- typedef simde_float64 simde__m128d SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
- #else
- typedef simde__m128d_private simde__m128d;
- #endif
- #elif defined(SIMDE_WASM_SIMD128_NATIVE)
- typedef v128_t simde__m128i;
- typedef v128_t simde__m128d;
- #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
- typedef SIMDE_POWER_ALTIVEC_VECTOR(float) simde__m128i;
- #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
- typedef SIMDE_POWER_ALTIVEC_VECTOR(double) simde__m128d;
- #else
- typedef simde__m128d_private simde__m128d;
- #endif
- #elif defined(SIMDE_VECTOR_SUBSCRIPT)
- typedef int64_t simde__m128i SIMDE_ALIGN_TO_16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
- typedef simde_float64
- simde__m128d SIMDE_ALIGN_TO_16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
- #else
- typedef simde__m128i_private simde__m128i;
- typedef simde__m128d_private simde__m128d;
- #endif
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- typedef simde__m128i __m128i;
- typedef simde__m128d __m128d;
- #endif
- HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128i), "simde__m128i size incorrect");
- HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128i_private),
- "simde__m128i_private size incorrect");
- HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128d), "simde__m128d size incorrect");
- HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128d_private),
- "simde__m128d_private size incorrect");
- #if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF)
- HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128i) == 16,
- "simde__m128i is not 16-byte aligned");
- HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128i_private) == 16,
- "simde__m128i_private is not 16-byte aligned");
- HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128d) == 16,
- "simde__m128d is not 16-byte aligned");
- HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128d_private) == 16,
- "simde__m128d_private is not 16-byte aligned");
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i simde__m128i_from_private(simde__m128i_private v)
- {
- simde__m128i r;
- simde_memcpy(&r, &v, sizeof(r));
- return r;
- }
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i_private simde__m128i_to_private(simde__m128i v)
- {
- simde__m128i_private r;
- simde_memcpy(&r, &v, sizeof(r));
- return r;
- }
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128d simde__m128d_from_private(simde__m128d_private v)
- {
- simde__m128d r;
- simde_memcpy(&r, &v, sizeof(r));
- return r;
- }
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128d_private simde__m128d_to_private(simde__m128d v)
- {
- simde__m128d_private r;
- simde_memcpy(&r, &v, sizeof(r));
- return r;
- }
- #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
- SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int8x16_t, neon, i8)
- SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int16x8_t, neon, i16)
- SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int32x4_t, neon, i32)
- SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int64x2_t, neon, i64)
- SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint8x16_t, neon, u8)
- SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint16x8_t, neon, u16)
- SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint32x4_t, neon, u32)
- SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint64x2_t, neon, u64)
- SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, float32x4_t, neon, f32)
- #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
- SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, float64x2_t, neon, f64)
- #endif
- #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
- SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i,
- SIMDE_POWER_ALTIVEC_VECTOR(signed char),
- altivec, i8)
- SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i,
- SIMDE_POWER_ALTIVEC_VECTOR(signed short),
- altivec, i16)
- SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i,
- SIMDE_POWER_ALTIVEC_VECTOR(signed int),
- altivec, i32)
- SIMDE_X86_GENERATE_CONVERSION_FUNCTION(
- m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), altivec, u8)
- SIMDE_X86_GENERATE_CONVERSION_FUNCTION(
- m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), altivec, u16)
- SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i,
- SIMDE_POWER_ALTIVEC_VECTOR(unsigned int),
- altivec, u32)
- #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
- SIMDE_X86_GENERATE_CONVERSION_FUNCTION(
- m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), altivec, u64)
- SIMDE_X86_GENERATE_CONVERSION_FUNCTION(
- m128i, SIMDE_POWER_ALTIVEC_VECTOR(signed long long), altivec, i64)
- #endif
- #endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */
- #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
- SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int8x16_t, neon, i8)
- SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int16x8_t, neon, i16)
- SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int32x4_t, neon, i32)
- SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int64x2_t, neon, i64)
- SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint8x16_t, neon, u8)
- SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint16x8_t, neon, u16)
- SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint32x4_t, neon, u32)
- SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint64x2_t, neon, u64)
- SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, float32x4_t, neon, f32)
- #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
- SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, float64x2_t, neon, f64)
- #endif
- #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
- SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d,
- SIMDE_POWER_ALTIVEC_VECTOR(signed char),
- altivec, i8)
- SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d,
- SIMDE_POWER_ALTIVEC_VECTOR(signed short),
- altivec, i16)
- SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d,
- SIMDE_POWER_ALTIVEC_VECTOR(signed int),
- altivec, i32)
- SIMDE_X86_GENERATE_CONVERSION_FUNCTION(
- m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), altivec, u8)
- SIMDE_X86_GENERATE_CONVERSION_FUNCTION(
- m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), altivec, u16)
- SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d,
- SIMDE_POWER_ALTIVEC_VECTOR(unsigned int),
- altivec, u32)
- #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
- SIMDE_X86_GENERATE_CONVERSION_FUNCTION(
- m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), altivec, u64)
- SIMDE_X86_GENERATE_CONVERSION_FUNCTION(
- m128d, SIMDE_POWER_ALTIVEC_VECTOR(signed long long), altivec, i64)
- #if defined(SIMDE_BUG_GCC_95782)
- SIMDE_FUNCTION_ATTRIBUTES
- SIMDE_POWER_ALTIVEC_VECTOR(double)
- simde__m128d_to_altivec_f64(simde__m128d value)
- {
- simde__m128d_private r_ = simde__m128d_to_private(value);
- return r_.altivec_f64;
- }
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128d simde__m128d_from_altivec_f64(SIMDE_POWER_ALTIVEC_VECTOR(double)
- value)
- {
- simde__m128d_private r_;
- r_.altivec_f64 = value;
- return simde__m128d_from_private(r_);
- }
- #else
- SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d,
- SIMDE_POWER_ALTIVEC_VECTOR(double),
- altivec, f64)
- #endif
- #endif
- #elif defined(SIMDE_WASM_SIMD128_NATIVE)
- SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, v128_t, wasm, v128);
- SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, v128_t, wasm, v128);
- #endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128d simde_mm_set_pd(simde_float64 e1, simde_float64 e0)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_set_pd(e1, e0);
- #else
- simde__m128d_private r_;
- #if defined(SIMDE_WASM_SIMD128_NATIVE)
- r_.wasm_v128 = wasm_f64x2_make(e0, e1);
- #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE)
- SIMDE_ALIGN_TO_16 simde_float64 data[2] = {e0, e1};
- r_.neon_f64 = vld1q_f64(data);
- #else
- r_.f64[0] = e0;
- r_.f64[1] = e1;
- #endif
- return simde__m128d_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_set_pd(e1, e0) simde_mm_set_pd(e1, e0)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128d simde_mm_set1_pd(simde_float64 a)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_set1_pd(a);
- #else
- simde__m128d_private r_;
- #if defined(SIMDE_WASM_SIMD128_NATIVE)
- r_.wasm_v128 = wasm_f64x2_splat(a);
- #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE)
- r_.neon_f64 = vdupq_n_f64(a);
- #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE)
- r_.altivec_f64 = vec_splats(HEDLEY_STATIC_CAST(double, a));
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0; i < (sizeof(r_.i64) / sizeof(r_.i64[0])); i++) {
- r_.f64[i] = a;
- }
- #endif
- return simde__m128d_from_private(r_);
- #endif
- }
- #define simde_mm_set_pd1(a) simde_mm_set1_pd(a)
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_set1_pd(a) simde_mm_set1_pd(a)
- #define _mm_set_pd1(a) simde_mm_set1_pd(a)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128d simde_x_mm_abs_pd(simde__m128d a)
- {
- #if defined(SIMDE_X86_AVX512F_NATIVE) && \
- (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7, 4, 0))
- return _mm512_castpd512_pd128(_mm512_abs_pd(_mm512_castpd128_pd512(a)));
- #else
- simde__m128d_private r_, a_ = simde__m128d_to_private(a);
- #if defined(SIMDE_ARM_NEON_A32V8_NATIVE)
- r_.neon_f32 = vabsq_f32(a_.neon_f32);
- #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
- r_.altivec_f32 = vec_abs(a_.altivec_f32);
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0; i < (sizeof(r_.f64) / sizeof(r_.f64[0])); i++) {
- r_.f64[i] = simde_math_fabs(a_.f64[i]);
- }
- #endif
- return simde__m128d_from_private(r_);
- #endif
- }
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128d simde_x_mm_not_pd(simde__m128d a)
- {
- #if defined(SIMDE_X86_AVX512VL_NATIVE)
- __m128i ai = _mm_castpd_si128(a);
- return _mm_castsi128_pd(_mm_ternarylogic_epi64(ai, ai, ai, 0x55));
- #else
- simde__m128d_private r_, a_ = simde__m128d_to_private(a);
- #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
- r_.neon_i32 = vmvnq_s32(a_.neon_i32);
- #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE)
- r_.altivec_f64 = vec_nor(a_.altivec_f64, a_.altivec_f64);
- #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
- r_.altivec_i32 = vec_nor(a_.altivec_i32, a_.altivec_i32);
- #elif defined(SIMDE_WASM_SIMD128_NATIVE)
- r_.wasm_v128 = wasm_v128_not(a_.wasm_v128);
- #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
- r_.i32f = ~a_.i32f;
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])); i++) {
- r_.i32f[i] = ~(a_.i32f[i]);
- }
- #endif
- return simde__m128d_from_private(r_);
- #endif
- }
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128d simde_x_mm_select_pd(simde__m128d a, simde__m128d b,
- simde__m128d mask)
- {
- /* This function is for when you want to blend two elements together
- * according to a mask. It is similar to _mm_blendv_pd, except that
- * it is undefined whether the blend is based on the highest bit in
- * each lane (like blendv) or just bitwise operations. This allows
- * us to implement the function efficiently everywhere.
- *
- * Basically, you promise that all the lanes in mask are either 0 or
- * ~0. */
- #if defined(SIMDE_X86_SSE4_1_NATIVE)
- return _mm_blendv_pd(a, b, mask);
- #else
- simde__m128d_private r_, a_ = simde__m128d_to_private(a),
- b_ = simde__m128d_to_private(b),
- mask_ = simde__m128d_to_private(mask);
- #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
- r_.i64 = a_.i64 ^ ((a_.i64 ^ b_.i64) & mask_.i64);
- #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
- r_.neon_i64 = vbslq_s64(mask_.neon_u64, b_.neon_i64, a_.neon_i64);
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0; i < (sizeof(r_.i64) / sizeof(r_.i64[0])); i++) {
- r_.i64[i] = a_.i64[i] ^
- ((a_.i64[i] ^ b_.i64[i]) & mask_.i64[i]);
- }
- #endif
- return simde__m128d_from_private(r_);
- #endif
- }
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i simde_mm_add_epi8(simde__m128i a, simde__m128i b)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_add_epi8(a, b);
- #else
- simde__m128i_private r_, a_ = simde__m128i_to_private(a),
- b_ = simde__m128i_to_private(b);
- #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
- r_.neon_i8 = vaddq_s8(a_.neon_i8, b_.neon_i8);
- #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
- r_.altivec_i8 = vec_add(a_.altivec_i8, b_.altivec_i8);
- #elif defined(SIMDE_WASM_SIMD128_NATIVE)
- r_.wasm_v128 = wasm_i8x16_add(a_.wasm_v128, b_.wasm_v128);
- #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
- r_.i8 = a_.i8 + b_.i8;
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0; i < (sizeof(r_.i8) / sizeof(r_.i8[0])); i++) {
- r_.i8[i] = a_.i8[i] + b_.i8[i];
- }
- #endif
- return simde__m128i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_add_epi8(a, b) simde_mm_add_epi8(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i simde_mm_add_epi16(simde__m128i a, simde__m128i b)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_add_epi16(a, b);
- #else
- simde__m128i_private r_, a_ = simde__m128i_to_private(a),
- b_ = simde__m128i_to_private(b);
- #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
- r_.neon_i16 = vaddq_s16(a_.neon_i16, b_.neon_i16);
- #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
- r_.altivec_i16 = vec_add(a_.altivec_i16, b_.altivec_i16);
- #elif defined(SIMDE_WASM_SIMD128_NATIVE)
- r_.wasm_v128 = wasm_i16x8_add(a_.wasm_v128, b_.wasm_v128);
- #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
- r_.i16 = a_.i16 + b_.i16;
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) {
- r_.i16[i] = a_.i16[i] + b_.i16[i];
- }
- #endif
- return simde__m128i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_add_epi16(a, b) simde_mm_add_epi16(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i simde_mm_add_epi32(simde__m128i a, simde__m128i b)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_add_epi32(a, b);
- #else
- simde__m128i_private r_, a_ = simde__m128i_to_private(a),
- b_ = simde__m128i_to_private(b);
- #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
- r_.neon_i32 = vaddq_s32(a_.neon_i32, b_.neon_i32);
- #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
- r_.altivec_i32 = vec_add(a_.altivec_i32, b_.altivec_i32);
- #elif defined(SIMDE_WASM_SIMD128_NATIVE)
- r_.wasm_v128 = wasm_i32x4_add(a_.wasm_v128, b_.wasm_v128);
- #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
- r_.i32 = a_.i32 + b_.i32;
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])); i++) {
- r_.i32[i] = a_.i32[i] + b_.i32[i];
- }
- #endif
- return simde__m128i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_add_epi32(a, b) simde_mm_add_epi32(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i simde_mm_add_epi64(simde__m128i a, simde__m128i b)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_add_epi64(a, b);
- #else
- simde__m128i_private r_, a_ = simde__m128i_to_private(a),
- b_ = simde__m128i_to_private(b);
- #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
- r_.neon_i64 = vaddq_s64(a_.neon_i64, b_.neon_i64);
- #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE)
- r_.altivec_i64 = vec_add(a_.altivec_i64, b_.altivec_i64);
- #elif defined(SIMDE_WASM_SIMD128_NATIVE)
- r_.wasm_v128 = wasm_i64x2_add(a_.wasm_v128, b_.wasm_v128);
- #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
- r_.i64 = a_.i64 + b_.i64;
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0; i < (sizeof(r_.i64) / sizeof(r_.i64[0])); i++) {
- r_.i64[i] = a_.i64[i] + b_.i64[i];
- }
- #endif
- return simde__m128i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_add_epi64(a, b) simde_mm_add_epi64(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128d simde_mm_add_pd(simde__m128d a, simde__m128d b)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_add_pd(a, b);
- #else
- simde__m128d_private r_, a_ = simde__m128d_to_private(a),
- b_ = simde__m128d_to_private(b);
- #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
- r_.neon_f64 = vaddq_f64(a_.neon_f64, b_.neon_f64);
- #elif defined(SIMDE_WASM_SIMD128_NATIVE)
- r_.wasm_v128 = wasm_f64x2_add(a_.wasm_v128, b_.wasm_v128);
- #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
- r_.altivec_f64 = vec_add(a_.altivec_f64, b_.altivec_f64);
- #elif defined(SIMDE_WASM_SIMD128_NATIVE)
- r_.wasm_v128 = wasm_f64x2_add(a_.wasm_v128, b_.wasm_v128);
- #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
- r_.f64 = a_.f64 + b_.f64;
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0; i < (sizeof(r_.f64) / sizeof(r_.f64[0])); i++) {
- r_.f64[i] = a_.f64[i] + b_.f64[i];
- }
- #endif
- return simde__m128d_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_add_pd(a, b) simde_mm_add_pd(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128d simde_mm_move_sd(simde__m128d a, simde__m128d b)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_move_sd(a, b);
- #else
- simde__m128d_private r_, a_ = simde__m128d_to_private(a),
- b_ = simde__m128d_to_private(b);
- #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
- r_.neon_f64 =
- vsetq_lane_f64(vgetq_lane_f64(b_.neon_f64, 0), a_.neon_f64, 0);
- #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
- #if defined(HEDLEY_IBM_VERSION)
- r_.altivec_f64 = vec_xxpermdi(a_.altivec_f64, b_.altivec_f64, 1);
- #else
- r_.altivec_f64 = vec_xxpermdi(b_.altivec_f64, a_.altivec_f64, 1);
- #endif
- #elif defined(SIMDE_WASM_SIMD128_NATIVE)
- r_.wasm_v128 = wasm_v64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 2, 1);
- #elif defined(SIMDE_SHUFFLE_VECTOR_)
- r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 2, 1);
- #else
- r_.f64[0] = b_.f64[0];
- r_.f64[1] = a_.f64[1];
- #endif
- return simde__m128d_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_move_sd(a, b) simde_mm_move_sd(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128d simde_mm_add_sd(simde__m128d a, simde__m128d b)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_add_sd(a, b);
- #elif (SIMDE_NATURAL_VECTOR_SIZE > 0)
- return simde_mm_move_sd(a, simde_mm_add_pd(a, b));
- #else
- simde__m128d_private r_, a_ = simde__m128d_to_private(a),
- b_ = simde__m128d_to_private(b);
- r_.f64[0] = a_.f64[0] + b_.f64[0];
- r_.f64[1] = a_.f64[1];
- return simde__m128d_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_add_sd(a, b) simde_mm_add_sd(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m64 simde_mm_add_si64(simde__m64 a, simde__m64 b)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)
- return _mm_add_si64(a, b);
- #else
- simde__m64_private r_, a_ = simde__m64_to_private(a),
- b_ = simde__m64_to_private(b);
- #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
- r_.neon_i64 = vadd_s64(a_.neon_i64, b_.neon_i64);
- #else
- r_.i64[0] = a_.i64[0] + b_.i64[0];
- #endif
- return simde__m64_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_add_si64(a, b) simde_mm_add_si64(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i simde_mm_adds_epi8(simde__m128i a, simde__m128i b)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_adds_epi8(a, b);
- #else
- simde__m128i_private r_, a_ = simde__m128i_to_private(a),
- b_ = simde__m128i_to_private(b);
- #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
- r_.neon_i8 = vqaddq_s8(a_.neon_i8, b_.neon_i8);
- #elif defined(SIMDE_WASM_SIMD128_NATIVE)
- r_.wasm_v128 = wasm_i8x16_add_saturate(a_.wasm_v128, b_.wasm_v128);
- #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
- r_.altivec_i8 = vec_adds(a_.altivec_i8, b_.altivec_i8);
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0; i < (sizeof(r_.i8) / sizeof(r_.i8[0])); i++) {
- const int_fast16_t tmp =
- HEDLEY_STATIC_CAST(int_fast16_t, a_.i8[i]) +
- HEDLEY_STATIC_CAST(int_fast16_t, b_.i8[i]);
- r_.i8[i] = HEDLEY_STATIC_CAST(
- int8_t,
- ((tmp < INT8_MAX) ? ((tmp > INT8_MIN) ? tmp : INT8_MIN)
- : INT8_MAX));
- }
- #endif
- return simde__m128i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_adds_epi8(a, b) simde_mm_adds_epi8(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i simde_mm_adds_epi16(simde__m128i a, simde__m128i b)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_adds_epi16(a, b);
- #else
- simde__m128i_private r_, a_ = simde__m128i_to_private(a),
- b_ = simde__m128i_to_private(b);
- #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
- r_.neon_i16 = vqaddq_s16(a_.neon_i16, b_.neon_i16);
- #elif defined(SIMDE_WASM_SIMD128_NATIVE)
- r_.wasm_v128 = wasm_i16x8_add_saturate(a_.wasm_v128, b_.wasm_v128);
- #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
- r_.altivec_i16 = vec_adds(a_.altivec_i16, b_.altivec_i16);
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) {
- const int_fast32_t tmp =
- HEDLEY_STATIC_CAST(int_fast32_t, a_.i16[i]) +
- HEDLEY_STATIC_CAST(int_fast32_t, b_.i16[i]);
- r_.i16[i] = HEDLEY_STATIC_CAST(
- int16_t,
- ((tmp < INT16_MAX)
- ? ((tmp > INT16_MIN) ? tmp : INT16_MIN)
- : INT16_MAX));
- }
- #endif
- return simde__m128i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_adds_epi16(a, b) simde_mm_adds_epi16(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i simde_mm_adds_epu8(simde__m128i a, simde__m128i b)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_adds_epu8(a, b);
- #else
- simde__m128i_private r_, a_ = simde__m128i_to_private(a),
- b_ = simde__m128i_to_private(b);
- #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
- r_.neon_u8 = vqaddq_u8(a_.neon_u8, b_.neon_u8);
- #elif defined(SIMDE_WASM_SIMD128_NATIVE)
- r_.wasm_v128 = wasm_u8x16_add_saturate(a_.wasm_v128, b_.wasm_v128);
- #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
- r_.altivec_u8 = vec_adds(a_.altivec_u8, b_.altivec_u8);
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0; i < (sizeof(r_.u8) / sizeof(r_.u8[0])); i++) {
- r_.u8[i] = ((UINT8_MAX - a_.u8[i]) > b_.u8[i])
- ? (a_.u8[i] + b_.u8[i])
- : UINT8_MAX;
- }
- #endif
- return simde__m128i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_adds_epu8(a, b) simde_mm_adds_epu8(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i simde_mm_adds_epu16(simde__m128i a, simde__m128i b)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_adds_epu16(a, b);
- #else
- simde__m128i_private r_, a_ = simde__m128i_to_private(a),
- b_ = simde__m128i_to_private(b);
- #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
- r_.neon_u16 = vqaddq_u16(a_.neon_u16, b_.neon_u16);
- #elif defined(SIMDE_WASM_SIMD128_NATIVE)
- r_.wasm_v128 = wasm_u16x8_add_saturate(a_.wasm_v128, b_.wasm_v128);
- #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
- r_.altivec_u16 = vec_adds(a_.altivec_u16, b_.altivec_u16);
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0; i < (sizeof(r_.u16) / sizeof(r_.u16[0])); i++) {
- r_.u16[i] = ((UINT16_MAX - a_.u16[i]) > b_.u16[i])
- ? (a_.u16[i] + b_.u16[i])
- : UINT16_MAX;
- }
- #endif
- return simde__m128i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_adds_epu16(a, b) simde_mm_adds_epu16(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128d simde_mm_and_pd(simde__m128d a, simde__m128d b)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_and_pd(a, b);
- #else
- simde__m128d_private r_, a_ = simde__m128d_to_private(a),
- b_ = simde__m128d_to_private(b);
- #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
- r_.neon_i32 = vandq_s32(a_.neon_i32, b_.neon_i32);
- #elif defined(SIMDE_WASM_SIMD128_NATIVE)
- r_.wasm_v128 = wasm_v128_and(a_.wasm_v128, b_.wasm_v128);
- #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
- r_.altivec_f64 = vec_and(a_.altivec_f64, b_.altivec_f64);
- #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
- r_.i32f = a_.i32f & b_.i32f;
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])); i++) {
- r_.i32f[i] = a_.i32f[i] & b_.i32f[i];
- }
- #endif
- return simde__m128d_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_and_pd(a, b) simde_mm_and_pd(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i simde_mm_and_si128(simde__m128i a, simde__m128i b)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_and_si128(a, b);
- #else
- simde__m128i_private r_, a_ = simde__m128i_to_private(a),
- b_ = simde__m128i_to_private(b);
- #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
- r_.neon_i32 = vandq_s32(b_.neon_i32, a_.neon_i32);
- #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
- r_.altivec_u32f = vec_and(a_.altivec_u32f, b_.altivec_u32f);
- #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
- r_.i32f = a_.i32f & b_.i32f;
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])); i++) {
- r_.i32f[i] = a_.i32f[i] & b_.i32f[i];
- }
- #endif
- return simde__m128i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_and_si128(a, b) simde_mm_and_si128(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128d simde_mm_andnot_pd(simde__m128d a, simde__m128d b)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_andnot_pd(a, b);
- #else
- simde__m128d_private r_, a_ = simde__m128d_to_private(a),
- b_ = simde__m128d_to_private(b);
- #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
- r_.neon_i32 = vbicq_s32(b_.neon_i32, a_.neon_i32);
- #elif defined(SIMDE_WASM_SIMD128_NATIVE)
- r_.wasm_v128 = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128);
- #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
- r_.altivec_f64 = vec_andc(b_.altivec_f64, a_.altivec_f64);
- #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
- r_.altivec_i32f = vec_andc(b_.altivec_i32f, a_.altivec_i32f);
- #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
- r_.i32f = ~a_.i32f & b_.i32f;
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0; i < (sizeof(r_.u64) / sizeof(r_.u64[0])); i++) {
- r_.u64[i] = ~a_.u64[i] & b_.u64[i];
- }
- #endif
- return simde__m128d_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_andnot_pd(a, b) simde_mm_andnot_pd(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i simde_mm_andnot_si128(simde__m128i a, simde__m128i b)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_andnot_si128(a, b);
- #else
- simde__m128i_private r_, a_ = simde__m128i_to_private(a),
- b_ = simde__m128i_to_private(b);
- #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
- r_.neon_i32 = vbicq_s32(b_.neon_i32, a_.neon_i32);
- #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
- r_.altivec_i32 = vec_andc(b_.altivec_i32, a_.altivec_i32);
- #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
- r_.i32f = ~a_.i32f & b_.i32f;
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])); i++) {
- r_.i32f[i] = ~(a_.i32f[i]) & b_.i32f[i];
- }
- #endif
- return simde__m128i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_andnot_si128(a, b) simde_mm_andnot_si128(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128d simde_mm_xor_pd(simde__m128d a, simde__m128d b)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_xor_pd(a, b);
- #else
- simde__m128d_private r_, a_ = simde__m128d_to_private(a),
- b_ = simde__m128d_to_private(b);
- #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
- r_.i32f = a_.i32f ^ b_.i32f;
- #elif defined(SIMDE_WASM_SIMD128_NATIVE)
- r_.wasm_v128 = wasm_v128_xor(a_.wasm_v128, b_.wasm_v128);
- #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
- r_.neon_i64 = veorq_s64(a_.neon_i64, b_.neon_i64);
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])); i++) {
- r_.i32f[i] = a_.i32f[i] ^ b_.i32f[i];
- }
- #endif
- return simde__m128d_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_xor_pd(a, b) simde_mm_xor_pd(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i simde_mm_avg_epu8(simde__m128i a, simde__m128i b)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_avg_epu8(a, b);
- #else
- simde__m128i_private r_, a_ = simde__m128i_to_private(a),
- b_ = simde__m128i_to_private(b);
- #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
- r_.neon_u8 = vrhaddq_u8(b_.neon_u8, a_.neon_u8);
- #elif defined(SIMDE_WASM_SIMD128_NATIVE)
- r_.wasm_v128 = wasm_u8x16_avgr(a_.wasm_v128, b_.wasm_v128);
- #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
- r_.altivec_u8 = vec_avg(a_.altivec_u8, b_.altivec_u8);
- #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && \
- defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && \
- defined(SIMDE_CONVERT_VECTOR_)
- uint16_t wa SIMDE_VECTOR(32);
- uint16_t wb SIMDE_VECTOR(32);
- uint16_t wr SIMDE_VECTOR(32);
- SIMDE_CONVERT_VECTOR_(wa, a_.u8);
- SIMDE_CONVERT_VECTOR_(wb, b_.u8);
- wr = (wa + wb + 1) >> 1;
- SIMDE_CONVERT_VECTOR_(r_.u8, wr);
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0; i < (sizeof(r_.u8) / sizeof(r_.u8[0])); i++) {
- r_.u8[i] = (a_.u8[i] + b_.u8[i] + 1) >> 1;
- }
- #endif
- return simde__m128i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_avg_epu8(a, b) simde_mm_avg_epu8(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i simde_mm_avg_epu16(simde__m128i a, simde__m128i b)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_avg_epu16(a, b);
- #else
- simde__m128i_private r_, a_ = simde__m128i_to_private(a),
- b_ = simde__m128i_to_private(b);
- #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
- r_.neon_u16 = vrhaddq_u16(b_.neon_u16, a_.neon_u16);
- #elif defined(SIMDE_WASM_SIMD128_NATIVE)
- r_.wasm_v128 = wasm_u16x8_avgr(a_.wasm_v128, b_.wasm_v128);
- #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
- r_.altivec_u16 = vec_avg(a_.altivec_u16, b_.altivec_u16);
- #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && \
- defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && \
- defined(SIMDE_CONVERT_VECTOR_)
- uint32_t wa SIMDE_VECTOR(32);
- uint32_t wb SIMDE_VECTOR(32);
- uint32_t wr SIMDE_VECTOR(32);
- SIMDE_CONVERT_VECTOR_(wa, a_.u16);
- SIMDE_CONVERT_VECTOR_(wb, b_.u16);
- wr = (wa + wb + 1) >> 1;
- SIMDE_CONVERT_VECTOR_(r_.u16, wr);
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0; i < (sizeof(r_.u16) / sizeof(r_.u16[0])); i++) {
- r_.u16[i] = (a_.u16[i] + b_.u16[i] + 1) >> 1;
- }
- #endif
- return simde__m128i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_avg_epu16(a, b) simde_mm_avg_epu16(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i simde_mm_setzero_si128(void)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_setzero_si128();
- #else
- simde__m128i_private r_;
- #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
- r_.neon_i32 = vdupq_n_s32(0);
- #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
- r_.altivec_i32 = vec_splats(HEDLEY_STATIC_CAST(signed int, 0));
- #elif defined(SIMDE_WASM_SIMD128_NATIVE)
- r_.wasm_v128 = wasm_i32x4_splat(INT32_C(0));
- #elif defined(SIMDE_VECTOR_SUBSCRIPT)
- r_.i32 = __extension__(__typeof__(r_.i32)){0, 0, 0, 0};
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])); i++) {
- r_.i32f[i] = 0;
- }
- #endif
- return simde__m128i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_setzero_si128() (simde_mm_setzero_si128())
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i simde_mm_bslli_si128(simde__m128i a, const int imm8)
- SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255)
- {
- simde__m128i_private r_, a_ = simde__m128i_to_private(a);
- if (HEDLEY_UNLIKELY((imm8 & ~15))) {
- return simde_mm_setzero_si128();
- }
- #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && defined(SIMDE_ENDIAN_ORDER)
- r_.altivec_i8 =
- #if (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE)
- vec_slo
- #else /* SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG */
- vec_sro
- #endif
- (a_.altivec_i8,
- vec_splats(HEDLEY_STATIC_CAST(unsigned char, imm8 * 8)));
- #elif defined(SIMDE_HAVE_INT128_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE)
- r_.u128[0] = a_.u128[0] << (imm8 * 8);
- #else
- r_ = simde__m128i_to_private(simde_mm_setzero_si128());
- for (int i = imm8;
- i < HEDLEY_STATIC_CAST(int, sizeof(r_.i8) / sizeof(r_.i8[0]));
- i++) {
- r_.i8[i] = a_.i8[i - imm8];
- }
- #endif
- return simde__m128i_from_private(r_);
- }
- #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI)
- #define simde_mm_bslli_si128(a, imm8) _mm_slli_si128(a, imm8)
- #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(__clang__)
- #define simde_mm_bslli_si128(a, imm8) \
- simde__m128i_from_neon_i8( \
- ((imm8) <= 0) \
- ? simde__m128i_to_neon_i8(a) \
- : (((imm8) > 15) \
- ? (vdupq_n_s8(0)) \
- : (vextq_s8(vdupq_n_s8(0), \
- simde__m128i_to_neon_i8(a), \
- 16 - (imm8)))))
- #elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
- #define simde_mm_bslli_si128(a, imm8) \
- (__extension__({ \
- const simde__m128i_private simde__tmp_a_ = \
- simde__m128i_to_private(a); \
- const simde__m128i_private simde__tmp_z_ = \
- simde__m128i_to_private(simde_mm_setzero_si128()); \
- simde__m128i_private simde__tmp_r_; \
- if (HEDLEY_UNLIKELY(imm8 > 15)) { \
- simde__tmp_r_ = simde__m128i_to_private( \
- simde_mm_setzero_si128()); \
- } else { \
- simde__tmp_r_.i8 = SIMDE_SHUFFLE_VECTOR_( \
- 8, 16, simde__tmp_z_.i8, (simde__tmp_a_).i8, \
- HEDLEY_STATIC_CAST(int8_t, (16 - imm8) & 31), \
- HEDLEY_STATIC_CAST(int8_t, (17 - imm8) & 31), \
- HEDLEY_STATIC_CAST(int8_t, (18 - imm8) & 31), \
- HEDLEY_STATIC_CAST(int8_t, (19 - imm8) & 31), \
- HEDLEY_STATIC_CAST(int8_t, (20 - imm8) & 31), \
- HEDLEY_STATIC_CAST(int8_t, (21 - imm8) & 31), \
- HEDLEY_STATIC_CAST(int8_t, (22 - imm8) & 31), \
- HEDLEY_STATIC_CAST(int8_t, (23 - imm8) & 31), \
- HEDLEY_STATIC_CAST(int8_t, (24 - imm8) & 31), \
- HEDLEY_STATIC_CAST(int8_t, (25 - imm8) & 31), \
- HEDLEY_STATIC_CAST(int8_t, (26 - imm8) & 31), \
- HEDLEY_STATIC_CAST(int8_t, (27 - imm8) & 31), \
- HEDLEY_STATIC_CAST(int8_t, (28 - imm8) & 31), \
- HEDLEY_STATIC_CAST(int8_t, (29 - imm8) & 31), \
- HEDLEY_STATIC_CAST(int8_t, (30 - imm8) & 31), \
- HEDLEY_STATIC_CAST(int8_t, (31 - imm8) & 31)); \
- } \
- simde__m128i_from_private(simde__tmp_r_); \
- }))
- #endif
- #define simde_mm_slli_si128(a, imm8) simde_mm_bslli_si128(a, imm8)
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_bslli_si128(a, imm8) simde_mm_bslli_si128(a, imm8)
- #define _mm_slli_si128(a, imm8) simde_mm_bslli_si128(a, imm8)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i simde_mm_bsrli_si128(simde__m128i a, const int imm8)
- SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255)
- {
- simde__m128i_private r_, a_ = simde__m128i_to_private(a);
- if (HEDLEY_UNLIKELY((imm8 & ~15))) {
- return simde_mm_setzero_si128();
- }
- #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && defined(SIMDE_ENDIAN_ORDER)
- r_.altivec_i8 =
- #if (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE)
- vec_sro
- #else /* SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG */
- vec_slo
- #endif
- (a_.altivec_i8,
- vec_splats(HEDLEY_STATIC_CAST(unsigned char, imm8 * 8)));
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0; i < (sizeof(r_.i8) / sizeof(r_.i8[0])); i++) {
- const int e = HEDLEY_STATIC_CAST(int, i) + imm8;
- r_.i8[i] = (e < 16) ? a_.i8[e] : 0;
- }
- #endif
- return simde__m128i_from_private(r_);
- }
- #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI)
- #define simde_mm_bsrli_si128(a, imm8) _mm_srli_si128(a, imm8)
- #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(__clang__)
- #define simde_mm_bsrli_si128(a, imm8) \
- simde__m128i_from_neon_i8( \
- ((imm8 < 0) || (imm8 > 15)) \
- ? vdupq_n_s8(0) \
- : (vextq_s8(simde__m128i_to_private(a).neon_i8, \
- vdupq_n_s8(0), \
- ((imm8 & 15) != 0) ? imm8 : (imm8 & 15))))
- #elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
- #define simde_mm_bsrli_si128(a, imm8) \
- (__extension__({ \
- const simde__m128i_private simde__tmp_a_ = \
- simde__m128i_to_private(a); \
- const simde__m128i_private simde__tmp_z_ = \
- simde__m128i_to_private(simde_mm_setzero_si128()); \
- simde__m128i_private simde__tmp_r_ = \
- simde__m128i_to_private(a); \
- if (HEDLEY_UNLIKELY(imm8 > 15)) { \
- simde__tmp_r_ = simde__m128i_to_private( \
- simde_mm_setzero_si128()); \
- } else { \
- simde__tmp_r_.i8 = SIMDE_SHUFFLE_VECTOR_( \
- 8, 16, simde__tmp_z_.i8, (simde__tmp_a_).i8, \
- HEDLEY_STATIC_CAST(int8_t, (imm8 + 16) & 31), \
- HEDLEY_STATIC_CAST(int8_t, (imm8 + 17) & 31), \
- HEDLEY_STATIC_CAST(int8_t, (imm8 + 18) & 31), \
- HEDLEY_STATIC_CAST(int8_t, (imm8 + 19) & 31), \
- HEDLEY_STATIC_CAST(int8_t, (imm8 + 20) & 31), \
- HEDLEY_STATIC_CAST(int8_t, (imm8 + 21) & 31), \
- HEDLEY_STATIC_CAST(int8_t, (imm8 + 22) & 31), \
- HEDLEY_STATIC_CAST(int8_t, (imm8 + 23) & 31), \
- HEDLEY_STATIC_CAST(int8_t, (imm8 + 24) & 31), \
- HEDLEY_STATIC_CAST(int8_t, (imm8 + 25) & 31), \
- HEDLEY_STATIC_CAST(int8_t, (imm8 + 26) & 31), \
- HEDLEY_STATIC_CAST(int8_t, (imm8 + 27) & 31), \
- HEDLEY_STATIC_CAST(int8_t, (imm8 + 28) & 31), \
- HEDLEY_STATIC_CAST(int8_t, (imm8 + 29) & 31), \
- HEDLEY_STATIC_CAST(int8_t, (imm8 + 30) & 31), \
- HEDLEY_STATIC_CAST(int8_t, (imm8 + 31) & 31)); \
- } \
- simde__m128i_from_private(simde__tmp_r_); \
- }))
- #endif
- #define simde_mm_srli_si128(a, imm8) simde_mm_bsrli_si128((a), (imm8))
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_bsrli_si128(a, imm8) simde_mm_bsrli_si128((a), (imm8))
- #define _mm_srli_si128(a, imm8) simde_mm_bsrli_si128((a), (imm8))
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- void simde_mm_clflush(void const *p)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- _mm_clflush(p);
- #else
- (void)p;
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_clflush(a, b) simde_mm_clflush()
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- int simde_mm_comieq_sd(simde__m128d a, simde__m128d b)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_comieq_sd(a, b);
- #else
- simde__m128d_private a_ = simde__m128d_to_private(a),
- b_ = simde__m128d_to_private(b);
- #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
- return !!vgetq_lane_u64(vceqq_f64(a_.neon_f64, b_.neon_f64), 0);
- #elif defined(SIMDE_WASM_SIMD128_NATIVE)
- return wasm_f64x2_extract_lane(a_.wasm_v128, 0) ==
- wasm_f64x2_extract_lane(b_.wasm_v128, 0);
- #else
- return a_.f64[0] == b_.f64[0];
- #endif
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_comieq_sd(a, b) simde_mm_comieq_sd(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- int simde_mm_comige_sd(simde__m128d a, simde__m128d b)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_comige_sd(a, b);
- #else
- simde__m128d_private a_ = simde__m128d_to_private(a),
- b_ = simde__m128d_to_private(b);
- #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
- return !!vgetq_lane_u64(vcgeq_f64(a_.neon_f64, b_.neon_f64), 0);
- #elif defined(SIMDE_WASM_SIMD128_NATIVE)
- return wasm_f64x2_extract_lane(a_.wasm_v128, 0) >=
- wasm_f64x2_extract_lane(b_.wasm_v128, 0);
- #else
- return a_.f64[0] >= b_.f64[0];
- #endif
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_comige_sd(a, b) simde_mm_comige_sd(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- int simde_mm_comigt_sd(simde__m128d a, simde__m128d b)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_comigt_sd(a, b);
- #else
- simde__m128d_private a_ = simde__m128d_to_private(a),
- b_ = simde__m128d_to_private(b);
- #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
- return !!vgetq_lane_u64(vcgtq_f64(a_.neon_f64, b_.neon_f64), 0);
- #elif defined(SIMDE_WASM_SIMD128_NATIVE)
- return wasm_f64x2_extract_lane(a_.wasm_v128, 0) >
- wasm_f64x2_extract_lane(b_.wasm_v128, 0);
- #else
- return a_.f64[0] > b_.f64[0];
- #endif
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_comigt_sd(a, b) simde_mm_comigt_sd(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- int simde_mm_comile_sd(simde__m128d a, simde__m128d b)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_comile_sd(a, b);
- #else
- simde__m128d_private a_ = simde__m128d_to_private(a),
- b_ = simde__m128d_to_private(b);
- #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
- return !!vgetq_lane_u64(vcleq_f64(a_.neon_f64, b_.neon_f64), 0);
- #elif defined(SIMDE_WASM_SIMD128_NATIVE)
- return wasm_f64x2_extract_lane(a_.wasm_v128, 0) <=
- wasm_f64x2_extract_lane(b_.wasm_v128, 0);
- #else
- return a_.f64[0] <= b_.f64[0];
- #endif
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_comile_sd(a, b) simde_mm_comile_sd(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- int simde_mm_comilt_sd(simde__m128d a, simde__m128d b)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_comilt_sd(a, b);
- #else
- simde__m128d_private a_ = simde__m128d_to_private(a),
- b_ = simde__m128d_to_private(b);
- #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
- return !!vgetq_lane_u64(vcltq_f64(a_.neon_f64, b_.neon_f64), 0);
- #elif defined(SIMDE_WASM_SIMD128_NATIVE)
- return wasm_f64x2_extract_lane(a_.wasm_v128, 0) <
- wasm_f64x2_extract_lane(b_.wasm_v128, 0);
- #else
- return a_.f64[0] < b_.f64[0];
- #endif
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_comilt_sd(a, b) simde_mm_comilt_sd(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- int simde_mm_comineq_sd(simde__m128d a, simde__m128d b)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_comineq_sd(a, b);
- #else
- simde__m128d_private a_ = simde__m128d_to_private(a),
- b_ = simde__m128d_to_private(b);
- #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
- return !vgetq_lane_u64(vceqq_f64(a_.neon_f64, b_.neon_f64), 0);
- #elif defined(SIMDE_WASM_SIMD128_NATIVE)
- return wasm_f64x2_extract_lane(a_.wasm_v128, 0) !=
- wasm_f64x2_extract_lane(b_.wasm_v128, 0);
- #else
- return a_.f64[0] != b_.f64[0];
- #endif
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_comineq_sd(a, b) simde_mm_comineq_sd(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128d simde_x_mm_copysign_pd(simde__m128d dest, simde__m128d src)
- {
- simde__m128d_private r_, dest_ = simde__m128d_to_private(dest),
- src_ = simde__m128d_to_private(src);
- #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
- #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
- uint64x2_t sign_pos =
- vreinterpretq_u64_f64(vdupq_n_f64(-SIMDE_FLOAT64_C(0.0)));
- #else
- simde_float64 dbl_nz = -SIMDE_FLOAT64_C(0.0);
- uint64_t u64_nz;
- simde_memcpy(&u64_nz, &dbl_nz, sizeof(u64_nz));
- uint64x2_t sign_pos = vdupq_n_u64(u64_nz);
- #endif
- r_.neon_u64 = vbslq_u64(sign_pos, src_.neon_u64, dest_.neon_u64);
- #elif defined(SIMDE_POWER_ALTIVEC_P9_NATIVE)
- #if !defined(HEDLEY_IBM_VERSION)
- r_.altivec_f64 = vec_cpsgn(dest_.altivec_f64, src_.altivec_f64);
- #else
- r_.altivec_f64 = vec_cpsgn(src_.altivec_f64, dest_.altivec_f64);
- #endif
- #elif defined(simde_math_copysign)
- SIMDE_VECTORIZE
- for (size_t i = 0; i < (sizeof(r_.f64) / sizeof(r_.f64[0])); i++) {
- r_.f64[i] = simde_math_copysign(dest_.f64[i], src_.f64[i]);
- }
- #else
- simde__m128d sgnbit = simde_mm_set1_pd(-SIMDE_FLOAT64_C(0.0));
- return simde_mm_xor_pd(simde_mm_and_pd(sgnbit, src),
- simde_mm_andnot_pd(sgnbit, dest));
- #endif
- return simde__m128d_from_private(r_);
- }
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128d simde_x_mm_xorsign_pd(simde__m128d dest, simde__m128d src)
- {
- return simde_mm_xor_pd(simde_mm_and_pd(simde_mm_set1_pd(-0.0), src),
- dest);
- }
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128 simde_mm_castpd_ps(simde__m128d a)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_castpd_ps(a);
- #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE)
- return vreinterpretq_f32_f64(a);
- #else
- simde__m128 r;
- simde_memcpy(&r, &a, sizeof(a));
- return r;
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_castpd_ps(a) simde_mm_castpd_ps(a)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i simde_mm_castpd_si128(simde__m128d a)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_castpd_si128(a);
- #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE)
- return vreinterpretq_s64_f64(a);
- #else
- simde__m128i r;
- simde_memcpy(&r, &a, sizeof(a));
- return r;
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_castpd_si128(a) simde_mm_castpd_si128(a)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128d simde_mm_castps_pd(simde__m128 a)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_castps_pd(a);
- #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE)
- return vreinterpretq_f64_f32(a);
- #else
- simde__m128d r;
- simde_memcpy(&r, &a, sizeof(a));
- return r;
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_castps_pd(a) simde_mm_castps_pd(a)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i simde_mm_castps_si128(simde__m128 a)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_castps_si128(a);
- #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
- return simde__m128i_from_neon_i32(simde__m128_to_private(a).neon_i32);
- #else
- simde__m128i r;
- simde_memcpy(&r, &a, sizeof(a));
- return r;
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_castps_si128(a) simde_mm_castps_si128(a)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128d simde_mm_castsi128_pd(simde__m128i a)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_castsi128_pd(a);
- #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE)
- return vreinterpretq_f64_s64(a);
- #else
- simde__m128d r;
- simde_memcpy(&r, &a, sizeof(a));
- return r;
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_castsi128_pd(a) simde_mm_castsi128_pd(a)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128 simde_mm_castsi128_ps(simde__m128i a)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_castsi128_ps(a);
- #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
- return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), a);
- #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
- return simde__m128_from_neon_i32(simde__m128i_to_private(a).neon_i32);
- #else
- simde__m128 r;
- simde_memcpy(&r, &a, sizeof(a));
- return r;
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_castsi128_ps(a) simde_mm_castsi128_ps(a)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i simde_mm_cmpeq_epi8(simde__m128i a, simde__m128i b)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_cmpeq_epi8(a, b);
- #else
- simde__m128i_private r_, a_ = simde__m128i_to_private(a),
- b_ = simde__m128i_to_private(b);
- #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
- r_.neon_u8 = vceqq_s8(b_.neon_i8, a_.neon_i8);
- #elif defined(SIMDE_WASM_SIMD128_NATIVE)
- r_.wasm_v128 = wasm_i8x16_eq(a_.wasm_v128, b_.wasm_v128);
- #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
- r_.altivec_i8 = HEDLEY_REINTERPRET_CAST(
- SIMDE_POWER_ALTIVEC_VECTOR(signed char),
- vec_cmpeq(a_.altivec_i8, b_.altivec_i8));
- #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
- r_.i8 = HEDLEY_STATIC_CAST(__typeof__(r_.i8), (a_.i8 == b_.i8));
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0; i < (sizeof(r_.i8) / sizeof(r_.i8[0])); i++) {
- r_.i8[i] = (a_.i8[i] == b_.i8[i]) ? ~INT8_C(0) : INT8_C(0);
- }
- #endif
- return simde__m128i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_cmpeq_epi8(a, b) simde_mm_cmpeq_epi8(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i simde_mm_cmpeq_epi16(simde__m128i a, simde__m128i b)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_cmpeq_epi16(a, b);
- #else
- simde__m128i_private r_, a_ = simde__m128i_to_private(a),
- b_ = simde__m128i_to_private(b);
- #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
- r_.neon_u16 = vceqq_s16(b_.neon_i16, a_.neon_i16);
- #elif defined(SIMDE_WASM_SIMD128_NATIVE)
- r_.wasm_v128 = wasm_i16x8_eq(a_.wasm_v128, b_.wasm_v128);
- #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
- r_.altivec_i16 = HEDLEY_REINTERPRET_CAST(
- SIMDE_POWER_ALTIVEC_VECTOR(signed short),
- vec_cmpeq(a_.altivec_i16, b_.altivec_i16));
- #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
- r_.i16 = (a_.i16 == b_.i16);
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) {
- r_.i16[i] = (a_.i16[i] == b_.i16[i]) ? ~INT16_C(0) : INT16_C(0);
- }
- #endif
- return simde__m128i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_cmpeq_epi16(a, b) simde_mm_cmpeq_epi16(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i simde_mm_cmpeq_epi32(simde__m128i a, simde__m128i b)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_cmpeq_epi32(a, b);
- #else
- simde__m128i_private r_, a_ = simde__m128i_to_private(a),
- b_ = simde__m128i_to_private(b);
- #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
- r_.neon_u32 = vceqq_s32(b_.neon_i32, a_.neon_i32);
- #elif defined(SIMDE_WASM_SIMD128_NATIVE)
- r_.wasm_v128 = wasm_i32x4_eq(a_.wasm_v128, b_.wasm_v128);
- #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
- r_.altivec_i32 = HEDLEY_REINTERPRET_CAST(
- SIMDE_POWER_ALTIVEC_VECTOR(signed int),
- vec_cmpeq(a_.altivec_i32, b_.altivec_i32));
- #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
- r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), a_.i32 == b_.i32);
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])); i++) {
- r_.i32[i] = (a_.i32[i] == b_.i32[i]) ? ~INT32_C(0) : INT32_C(0);
- }
- #endif
- return simde__m128i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_cmpeq_epi32(a, b) simde_mm_cmpeq_epi32(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128d simde_mm_cmpeq_pd(simde__m128d a, simde__m128d b)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_cmpeq_pd(a, b);
- #else
- simde__m128d_private r_, a_ = simde__m128d_to_private(a),
- b_ = simde__m128d_to_private(b);
- #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
- r_.neon_u64 = vceqq_s64(b_.neon_i64, a_.neon_i64);
- #elif defined(SIMDE_WASM_SIMD128_NATIVE)
- r_.wasm_v128 = wasm_f64x2_eq(a_.wasm_v128, b_.wasm_v128);
- #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
- r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(
- SIMDE_POWER_ALTIVEC_VECTOR(double),
- vec_cmpeq(a_.altivec_f64, b_.altivec_f64));
- #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
- r_.i64 = HEDLEY_STATIC_CAST(__typeof__(r_.i64), (a_.f64 == b_.f64));
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0; i < (sizeof(r_.f64) / sizeof(r_.f64[0])); i++) {
- r_.u64[i] = (a_.f64[i] == b_.f64[i]) ? ~UINT64_C(0)
- : UINT64_C(0);
- }
- #endif
- return simde__m128d_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_cmpeq_pd(a, b) simde_mm_cmpeq_pd(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128d simde_mm_cmpeq_sd(simde__m128d a, simde__m128d b)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_cmpeq_sd(a, b);
- #elif (SIMDE_NATURAL_VECTOR_SIZE > 0)
- return simde_mm_move_sd(a, simde_mm_cmpeq_pd(a, b));
- #else
- simde__m128d_private r_, a_ = simde__m128d_to_private(a),
- b_ = simde__m128d_to_private(b);
- r_.u64[0] = (a_.u64[0] == b_.u64[0]) ? ~UINT64_C(0) : 0;
- r_.u64[1] = a_.u64[1];
- return simde__m128d_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_cmpeq_sd(a, b) simde_mm_cmpeq_sd(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128d simde_mm_cmpneq_pd(simde__m128d a, simde__m128d b)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_cmpneq_pd(a, b);
- #else
- simde__m128d_private r_, a_ = simde__m128d_to_private(a),
- b_ = simde__m128d_to_private(b);
- #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
- r_.neon_u32 = vmvnq_u32(
- vreinterpretq_u32_u64(vceqq_f64(b_.neon_f64, a_.neon_f64)));
- #elif defined(SIMDE_WASM_SIMD128_NATIVE)
- r_.wasm_v128 = wasm_f64x2_ne(a_.wasm_v128, b_.wasm_v128);
- #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
- r_.i64 = HEDLEY_STATIC_CAST(__typeof__(r_.i64), (a_.f64 != b_.f64));
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0; i < (sizeof(r_.f64) / sizeof(r_.f64[0])); i++) {
- r_.u64[i] = (a_.f64[i] != b_.f64[i]) ? ~UINT64_C(0)
- : UINT64_C(0);
- }
- #endif
- return simde__m128d_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_cmpneq_pd(a, b) simde_mm_cmpneq_pd(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128d simde_mm_cmpneq_sd(simde__m128d a, simde__m128d b)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_cmpneq_sd(a, b);
- #elif (SIMDE_NATURAL_VECTOR_SIZE > 0)
- return simde_mm_move_sd(a, simde_mm_cmpneq_pd(a, b));
- #else
- simde__m128d_private r_, a_ = simde__m128d_to_private(a),
- b_ = simde__m128d_to_private(b);
- r_.u64[0] = (a_.f64[0] != b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0);
- r_.u64[1] = a_.u64[1];
- return simde__m128d_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_cmpneq_sd(a, b) simde_mm_cmpneq_sd(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i simde_mm_cmplt_epi8(simde__m128i a, simde__m128i b)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_cmplt_epi8(a, b);
- #else
- simde__m128i_private r_, a_ = simde__m128i_to_private(a),
- b_ = simde__m128i_to_private(b);
- #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
- r_.neon_u8 = vcltq_s8(a_.neon_i8, b_.neon_i8);
- #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
- r_.altivec_i8 = HEDLEY_REINTERPRET_CAST(
- SIMDE_POWER_ALTIVEC_VECTOR(signed char),
- vec_cmplt(a_.altivec_i8, b_.altivec_i8));
- #elif defined(SIMDE_WASM_SIMD128_NATIVE)
- r_.wasm_v128 = wasm_i8x16_lt(a_.wasm_v128, b_.wasm_v128);
- #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
- r_.i8 = HEDLEY_STATIC_CAST(__typeof__(r_.i8), (a_.i8 < b_.i8));
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0; i < (sizeof(r_.i8) / sizeof(r_.i8[0])); i++) {
- r_.i8[i] = (a_.i8[i] < b_.i8[i]) ? ~INT8_C(0) : INT8_C(0);
- }
- #endif
- return simde__m128i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_cmplt_epi8(a, b) simde_mm_cmplt_epi8(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i simde_mm_cmplt_epi16(simde__m128i a, simde__m128i b)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_cmplt_epi16(a, b);
- #else
- simde__m128i_private r_, a_ = simde__m128i_to_private(a),
- b_ = simde__m128i_to_private(b);
- #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
- r_.neon_u16 = vcltq_s16(a_.neon_i16, b_.neon_i16);
- #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
- r_.altivec_i16 = HEDLEY_REINTERPRET_CAST(
- SIMDE_POWER_ALTIVEC_VECTOR(signed short),
- vec_cmplt(a_.altivec_i16, b_.altivec_i16));
- #elif defined(SIMDE_WASM_SIMD128_NATIVE)
- r_.wasm_v128 = wasm_i16x8_lt(a_.wasm_v128, b_.wasm_v128);
- #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
- r_.i16 = HEDLEY_STATIC_CAST(__typeof__(r_.i16), (a_.i16 < b_.i16));
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) {
- r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? ~INT16_C(0) : INT16_C(0);
- }
- #endif
- return simde__m128i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_cmplt_epi16(a, b) simde_mm_cmplt_epi16(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i simde_mm_cmplt_epi32(simde__m128i a, simde__m128i b)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_cmplt_epi32(a, b);
- #else
- simde__m128i_private r_, a_ = simde__m128i_to_private(a),
- b_ = simde__m128i_to_private(b);
- #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
- r_.neon_u32 = vcltq_s32(a_.neon_i32, b_.neon_i32);
- #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
- r_.altivec_i32 = HEDLEY_REINTERPRET_CAST(
- SIMDE_POWER_ALTIVEC_VECTOR(signed int),
- vec_cmplt(a_.altivec_i32, b_.altivec_i32));
- #elif defined(SIMDE_WASM_SIMD128_NATIVE)
- r_.wasm_v128 = wasm_i32x4_lt(a_.wasm_v128, b_.wasm_v128);
- #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
- r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.i32 < b_.i32));
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])); i++) {
- r_.i32[i] = (a_.i32[i] < b_.i32[i]) ? ~INT32_C(0) : INT32_C(0);
- }
- #endif
- return simde__m128i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_cmplt_epi32(a, b) simde_mm_cmplt_epi32(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128d simde_mm_cmplt_pd(simde__m128d a, simde__m128d b)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_cmplt_pd(a, b);
- #else
- simde__m128d_private r_, a_ = simde__m128d_to_private(a),
- b_ = simde__m128d_to_private(b);
- #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
- r_.i64 = HEDLEY_STATIC_CAST(__typeof__(r_.i64), (a_.f64 < b_.f64));
- #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE)
- r_.neon_u64 = vcltq_f64(a_.neon_f64, b_.neon_f64);
- #elif defined(SIMDE_WASM_SIMD128_NATIVE)
- r_.wasm_v128 = wasm_f64x2_lt(a_.wasm_v128, b_.wasm_v128);
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0; i < (sizeof(r_.f64) / sizeof(r_.f64[0])); i++) {
- r_.u64[i] = (a_.f64[i] < b_.f64[i]) ? ~UINT64_C(0)
- : UINT64_C(0);
- }
- #endif
- return simde__m128d_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_cmplt_pd(a, b) simde_mm_cmplt_pd(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128d simde_mm_cmplt_sd(simde__m128d a, simde__m128d b)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_cmplt_sd(a, b);
- #elif (SIMDE_NATURAL_VECTOR_SIZE > 0)
- return simde_mm_move_sd(a, simde_mm_cmplt_pd(a, b));
- #else
- simde__m128d_private r_, a_ = simde__m128d_to_private(a),
- b_ = simde__m128d_to_private(b);
- r_.u64[0] = (a_.f64[0] < b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0);
- r_.u64[1] = a_.u64[1];
- return simde__m128d_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_cmplt_sd(a, b) simde_mm_cmplt_sd(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128d simde_mm_cmple_pd(simde__m128d a, simde__m128d b)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_cmple_pd(a, b);
- #else
- simde__m128d_private r_, a_ = simde__m128d_to_private(a),
- b_ = simde__m128d_to_private(b);
- #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
- r_.i64 = HEDLEY_STATIC_CAST(__typeof__(r_.i64), (a_.f64 <= b_.f64));
- #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE)
- r_.neon_u64 = vcleq_f64(a_.neon_f64, b_.neon_f64);
- #elif defined(SIMDE_WASM_SIMD128_NATIVE)
- r_.wasm_v128 = wasm_f64x2_le(a_.wasm_v128, b_.wasm_v128);
- #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
- r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(
- SIMDE_POWER_ALTIVEC_VECTOR(double),
- vec_cmple(a_.altivec_f64, b_.altivec_f64));
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0; i < (sizeof(r_.f64) / sizeof(r_.f64[0])); i++) {
- r_.u64[i] = (a_.f64[i] <= b_.f64[i]) ? ~UINT64_C(0)
- : UINT64_C(0);
- }
- #endif
- return simde__m128d_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_cmple_pd(a, b) simde_mm_cmple_pd(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128d simde_mm_cmple_sd(simde__m128d a, simde__m128d b)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_cmple_sd(a, b);
- #elif (SIMDE_NATURAL_VECTOR_SIZE > 0)
- return simde_mm_move_sd(a, simde_mm_cmple_pd(a, b));
- #else
- simde__m128d_private r_, a_ = simde__m128d_to_private(a),
- b_ = simde__m128d_to_private(b);
- r_.u64[0] = (a_.f64[0] <= b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0);
- r_.u64[1] = a_.u64[1];
- return simde__m128d_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_cmple_sd(a, b) simde_mm_cmple_sd(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i simde_mm_cmpgt_epi8(simde__m128i a, simde__m128i b)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_cmpgt_epi8(a, b);
- #else
- simde__m128i_private r_, a_ = simde__m128i_to_private(a),
- b_ = simde__m128i_to_private(b);
- #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
- r_.neon_u8 = vcgtq_s8(a_.neon_i8, b_.neon_i8);
- #elif defined(SIMDE_WASM_SIMD128_NATIVE)
- r_.wasm_v128 = wasm_i8x16_gt(a_.wasm_v128, b_.wasm_v128);
- #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
- r_.altivec_i8 = HEDLEY_REINTERPRET_CAST(
- SIMDE_POWER_ALTIVEC_VECTOR(signed char),
- vec_cmpgt(a_.altivec_i8, b_.altivec_i8));
- #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
- r_.i8 = HEDLEY_STATIC_CAST(__typeof__(r_.i8), (a_.i8 > b_.i8));
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0; i < (sizeof(r_.i8) / sizeof(r_.i8[0])); i++) {
- r_.i8[i] = (a_.i8[i] > b_.i8[i]) ? ~INT8_C(0) : INT8_C(0);
- }
- #endif
- return simde__m128i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_cmpgt_epi8(a, b) simde_mm_cmpgt_epi8(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i simde_mm_cmpgt_epi16(simde__m128i a, simde__m128i b)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_cmpgt_epi16(a, b);
- #else
- simde__m128i_private r_, a_ = simde__m128i_to_private(a),
- b_ = simde__m128i_to_private(b);
- #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
- r_.neon_u16 = vcgtq_s16(a_.neon_i16, b_.neon_i16);
- #elif defined(SIMDE_WASM_SIMD128_NATIVE)
- r_.wasm_v128 = wasm_i16x8_gt(a_.wasm_v128, b_.wasm_v128);
- #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
- r_.altivec_i16 = HEDLEY_REINTERPRET_CAST(
- SIMDE_POWER_ALTIVEC_VECTOR(signed short),
- vec_cmpgt(a_.altivec_i16, b_.altivec_i16));
- #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
- r_.i16 = HEDLEY_STATIC_CAST(__typeof__(r_.i16), (a_.i16 > b_.i16));
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) {
- r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? ~INT16_C(0) : INT16_C(0);
- }
- #endif
- return simde__m128i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_cmpgt_epi16(a, b) simde_mm_cmpgt_epi16(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i simde_mm_cmpgt_epi32(simde__m128i a, simde__m128i b)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_cmpgt_epi32(a, b);
- #else
- simde__m128i_private r_, a_ = simde__m128i_to_private(a),
- b_ = simde__m128i_to_private(b);
- #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
- r_.neon_u32 = vcgtq_s32(a_.neon_i32, b_.neon_i32);
- #elif defined(SIMDE_WASM_SIMD128_NATIVE)
- r_.wasm_v128 = wasm_i32x4_gt(a_.wasm_v128, b_.wasm_v128);
- #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
- r_.altivec_i32 = HEDLEY_REINTERPRET_CAST(
- SIMDE_POWER_ALTIVEC_VECTOR(signed int),
- vec_cmpgt(a_.altivec_i32, b_.altivec_i32));
- #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
- r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.i32 > b_.i32));
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])); i++) {
- r_.i32[i] = (a_.i32[i] > b_.i32[i]) ? ~INT32_C(0) : INT32_C(0);
- }
- #endif
- return simde__m128i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_cmpgt_epi32(a, b) simde_mm_cmpgt_epi32(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128d simde_mm_cmpgt_pd(simde__m128d a, simde__m128d b)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_cmpgt_pd(a, b);
- #else
- simde__m128d_private r_, a_ = simde__m128d_to_private(a),
- b_ = simde__m128d_to_private(b);
- #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
- r_.i64 = HEDLEY_STATIC_CAST(__typeof__(r_.i64), (a_.f64 > b_.f64));
- #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE)
- r_.neon_u64 = vcgtq_f64(a_.neon_f64, b_.neon_f64);
- #elif defined(SIMDE_WASM_SIMD128_NATIVE)
- r_.wasm_v128 = wasm_f64x2_gt(a_.wasm_v128, b_.wasm_v128);
- #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
- r_.altivec_f64 =
- HEDLEY_STATIC_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double),
- vec_cmpgt(a_.altivec_f64, b_.altivec_f64));
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0; i < (sizeof(r_.f64) / sizeof(r_.f64[0])); i++) {
- r_.u64[i] = (a_.f64[i] > b_.f64[i]) ? ~UINT64_C(0)
- : UINT64_C(0);
- }
- #endif
- return simde__m128d_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_cmpgt_pd(a, b) simde_mm_cmpgt_pd(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128d simde_mm_cmpgt_sd(simde__m128d a, simde__m128d b)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI)
- return _mm_cmpgt_sd(a, b);
- #elif (SIMDE_NATURAL_VECTOR_SIZE > 0)
- return simde_mm_move_sd(a, simde_mm_cmpgt_pd(a, b));
- #else
- simde__m128d_private r_, a_ = simde__m128d_to_private(a),
- b_ = simde__m128d_to_private(b);
- r_.u64[0] = (a_.f64[0] > b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0);
- r_.u64[1] = a_.u64[1];
- return simde__m128d_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_cmpgt_sd(a, b) simde_mm_cmpgt_sd(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128d simde_mm_cmpge_pd(simde__m128d a, simde__m128d b)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_cmpge_pd(a, b);
- #else
- simde__m128d_private r_, a_ = simde__m128d_to_private(a),
- b_ = simde__m128d_to_private(b);
- #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
- r_.i64 = HEDLEY_STATIC_CAST(__typeof__(r_.i64), (a_.f64 >= b_.f64));
- #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE)
- r_.neon_u64 = vcgeq_f64(a_.neon_f64, b_.neon_f64);
- #elif defined(SIMDE_WASM_SIMD128_NATIVE)
- r_.wasm_v128 = wasm_f64x2_ge(a_.wasm_v128, b_.wasm_v128);
- #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
- r_.altivec_f64 =
- HEDLEY_STATIC_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double),
- vec_cmpge(a_.altivec_f64, b_.altivec_f64));
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0; i < (sizeof(r_.f64) / sizeof(r_.f64[0])); i++) {
- r_.u64[i] = (a_.f64[i] >= b_.f64[i]) ? ~UINT64_C(0)
- : UINT64_C(0);
- }
- #endif
- return simde__m128d_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_cmpge_pd(a, b) simde_mm_cmpge_pd(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128d simde_mm_cmpge_sd(simde__m128d a, simde__m128d b)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI)
- return _mm_cmpge_sd(a, b);
- #elif (SIMDE_NATURAL_VECTOR_SIZE > 0)
- return simde_mm_move_sd(a, simde_mm_cmpge_pd(a, b));
- #else
- simde__m128d_private r_, a_ = simde__m128d_to_private(a),
- b_ = simde__m128d_to_private(b);
- r_.u64[0] = (a_.f64[0] >= b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0);
- r_.u64[1] = a_.u64[1];
- return simde__m128d_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_cmpge_sd(a, b) simde_mm_cmpge_sd(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128d simde_mm_cmpngt_pd(simde__m128d a, simde__m128d b)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_cmpngt_pd(a, b);
- #else
- return simde_mm_cmple_pd(a, b);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_cmpngt_pd(a, b) simde_mm_cmpngt_pd(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128d simde_mm_cmpngt_sd(simde__m128d a, simde__m128d b)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI)
- return _mm_cmpngt_sd(a, b);
- #else
- return simde_mm_cmple_sd(a, b);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_cmpngt_sd(a, b) simde_mm_cmpngt_sd(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128d simde_mm_cmpnge_pd(simde__m128d a, simde__m128d b)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_cmpnge_pd(a, b);
- #else
- return simde_mm_cmplt_pd(a, b);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_cmpnge_pd(a, b) simde_mm_cmpnge_pd(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128d simde_mm_cmpnge_sd(simde__m128d a, simde__m128d b)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI)
- return _mm_cmpnge_sd(a, b);
- #else
- return simde_mm_cmplt_sd(a, b);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_cmpnge_sd(a, b) simde_mm_cmpnge_sd(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128d simde_mm_cmpnlt_pd(simde__m128d a, simde__m128d b)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_cmpnlt_pd(a, b);
- #else
- return simde_mm_cmpge_pd(a, b);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_cmpnlt_pd(a, b) simde_mm_cmpnlt_pd(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128d simde_mm_cmpnlt_sd(simde__m128d a, simde__m128d b)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_cmpnlt_sd(a, b);
- #else
- return simde_mm_cmpge_sd(a, b);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_cmpnlt_sd(a, b) simde_mm_cmpnlt_sd(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128d simde_mm_cmpnle_pd(simde__m128d a, simde__m128d b)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_cmpnle_pd(a, b);
- #else
- return simde_mm_cmpgt_pd(a, b);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_cmpnle_pd(a, b) simde_mm_cmpnle_pd(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128d simde_mm_cmpnle_sd(simde__m128d a, simde__m128d b)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_cmpnle_sd(a, b);
- #else
- return simde_mm_cmpgt_sd(a, b);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_cmpnle_sd(a, b) simde_mm_cmpnle_sd(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128d simde_mm_cmpord_pd(simde__m128d a, simde__m128d b)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_cmpord_pd(a, b);
- #else
- simde__m128d_private r_, a_ = simde__m128d_to_private(a),
- b_ = simde__m128d_to_private(b);
- #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
- /* Note: NEON does not have ordered compare builtin
- Need to compare a eq a and b eq b to check for NaN
- Do AND of results to get final */
- uint64x2_t ceqaa = vceqq_f64(a_.neon_f64, a_.neon_f64);
- uint64x2_t ceqbb = vceqq_f64(b_.neon_f64, b_.neon_f64);
- r_.neon_u64 = vandq_u64(ceqaa, ceqbb);
- #elif defined(simde_math_isnan)
- SIMDE_VECTORIZE
- for (size_t i = 0; i < (sizeof(r_.f64) / sizeof(r_.f64[0])); i++) {
- r_.u64[i] = (!simde_math_isnan(a_.f64[i]) &&
- !simde_math_isnan(b_.f64[i]))
- ? ~UINT64_C(0)
- : UINT64_C(0);
- }
- #else
- HEDLEY_UNREACHABLE();
- #endif
- return simde__m128d_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_cmpord_pd(a, b) simde_mm_cmpord_pd(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde_float64 simde_mm_cvtsd_f64(simde__m128d a)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI)
- return _mm_cvtsd_f64(a);
- #else
- simde__m128d_private a_ = simde__m128d_to_private(a);
- #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
- return HEDLEY_STATIC_CAST(simde_float64,
- vgetq_lane_f64(a_.neon_f64, 0));
- #else
- return a_.f64[0];
- #endif
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_cvtsd_f64(a) simde_mm_cvtsd_f64(a)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128d simde_mm_cmpord_sd(simde__m128d a, simde__m128d b)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_cmpord_sd(a, b);
- #elif (SIMDE_NATURAL_VECTOR_SIZE > 0)
- return simde_mm_move_sd(a, simde_mm_cmpord_pd(a, b));
- #else
- simde__m128d_private r_, a_ = simde__m128d_to_private(a),
- b_ = simde__m128d_to_private(b);
- #if defined(simde_math_isnan)
- r_.u64[0] =
- (!simde_math_isnan(a_.f64[0]) && !simde_math_isnan(b_.f64[0]))
- ? ~UINT64_C(0)
- : UINT64_C(0);
- r_.u64[1] = a_.u64[1];
- #else
- HEDLEY_UNREACHABLE();
- #endif
- return simde__m128d_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_cmpord_sd(a, b) simde_mm_cmpord_sd(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128d simde_mm_cmpunord_pd(simde__m128d a, simde__m128d b)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_cmpunord_pd(a, b);
- #else
- simde__m128d_private r_, a_ = simde__m128d_to_private(a),
- b_ = simde__m128d_to_private(b);
- #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
- uint64x2_t ceqaa = vceqq_f64(a_.neon_f64, a_.neon_f64);
- uint64x2_t ceqbb = vceqq_f64(b_.neon_f64, b_.neon_f64);
- r_.neon_u64 = vreinterpretq_u64_u32(
- vmvnq_u32(vreinterpretq_u32_u64(vandq_u64(ceqaa, ceqbb))));
- #elif defined(simde_math_isnan)
- SIMDE_VECTORIZE
- for (size_t i = 0; i < (sizeof(r_.f64) / sizeof(r_.f64[0])); i++) {
- r_.u64[i] = (simde_math_isnan(a_.f64[i]) ||
- simde_math_isnan(b_.f64[i]))
- ? ~UINT64_C(0)
- : UINT64_C(0);
- }
- #else
- HEDLEY_UNREACHABLE();
- #endif
- return simde__m128d_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_cmpunord_pd(a, b) simde_mm_cmpunord_pd(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128d simde_mm_cmpunord_sd(simde__m128d a, simde__m128d b)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_cmpunord_sd(a, b);
- #elif (SIMDE_NATURAL_VECTOR_SIZE > 0)
- return simde_mm_move_sd(a, simde_mm_cmpunord_pd(a, b));
- #else
- simde__m128d_private r_, a_ = simde__m128d_to_private(a),
- b_ = simde__m128d_to_private(b);
- #if defined(simde_math_isnan)
- r_.u64[0] = (simde_math_isnan(a_.f64[0]) || simde_math_isnan(b_.f64[0]))
- ? ~UINT64_C(0)
- : UINT64_C(0);
- r_.u64[1] = a_.u64[1];
- #else
- HEDLEY_UNREACHABLE();
- #endif
- return simde__m128d_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_cmpunord_sd(a, b) simde_mm_cmpunord_sd(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128d simde_mm_cvtepi32_pd(simde__m128i a)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_cvtepi32_pd(a);
- #else
- simde__m128d_private r_;
- simde__m128i_private a_ = simde__m128i_to_private(a);
- #if defined(SIMDE_CONVERT_VECTOR_)
- SIMDE_CONVERT_VECTOR_(r_.f64, a_.m64_private[0].i32);
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0; i < (sizeof(r_.f64) / sizeof(r_.f64[0])); i++) {
- r_.f64[i] = (simde_float64)a_.i32[i];
- }
- #endif
- return simde__m128d_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_cvtepi32_pd(a) simde_mm_cvtepi32_pd(a)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128 simde_mm_cvtepi32_ps(simde__m128i a)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_cvtepi32_ps(a);
- #else
- simde__m128_private r_;
- simde__m128i_private a_ = simde__m128i_to_private(a);
- #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
- r_.neon_f32 = vcvtq_f32_s32(a_.neon_i32);
- #elif defined(SIMDE_WASM_SIMD128_NATIVE)
- r_.wasm_v128 = wasm_f32x4_convert_i32x4(a_.wasm_v128);
- #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
- HEDLEY_DIAGNOSTIC_PUSH
- #if HEDLEY_HAS_WARNING("-Wc11-extensions")
- #pragma clang diagnostic ignored "-Wc11-extensions"
- #endif
- r_.altivec_f32 = vec_ctf(a_.altivec_i32, 0);
- HEDLEY_DIAGNOSTIC_POP
- #elif defined(SIMDE_CONVERT_VECTOR_)
- SIMDE_CONVERT_VECTOR_(r_.f32, a_.i32);
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0; i < (sizeof(r_.f32) / sizeof(r_.f32[0])); i++) {
- r_.f32[i] = (simde_float32)a_.i32[i];
- }
- #endif
- return simde__m128_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_cvtepi32_ps(a) simde_mm_cvtepi32_ps(a)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m64 simde_mm_cvtpd_pi32(simde__m128d a)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)
- return _mm_cvtpd_pi32(a);
- #else
- simde__m64_private r_;
- simde__m128d_private a_ = simde__m128d_to_private(a);
- SIMDE_VECTORIZE
- for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])); i++) {
- simde_float64 v = simde_math_round(a_.f64[i]);
- #if defined(SIMDE_FAST_CONVERSION_RANGE)
- r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v);
- #else
- r_.i32[i] =
- ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) &&
- (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX)))
- ? SIMDE_CONVERT_FTOI(int32_t, v)
- : INT32_MIN;
- #endif
- }
- return simde__m64_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_cvtpd_pi32(a) simde_mm_cvtpd_pi32(a)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i simde_mm_cvtpd_epi32(simde__m128d a)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_cvtpd_epi32(a);
- #else
- simde__m128i_private r_;
- r_.m64[0] = simde_mm_cvtpd_pi32(a);
- r_.m64[1] = simde_mm_setzero_si64();
- return simde__m128i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_cvtpd_epi32(a) simde_mm_cvtpd_epi32(a)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128 simde_mm_cvtpd_ps(simde__m128d a)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_cvtpd_ps(a);
- #else
- simde__m128_private r_;
- simde__m128d_private a_ = simde__m128d_to_private(a);
- #if defined(SIMDE_CONVERT_VECTOR_)
- SIMDE_CONVERT_VECTOR_(r_.m64_private[0].f32, a_.f64);
- r_.m64_private[1] = simde__m64_to_private(simde_mm_setzero_si64());
- #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE)
- r_.neon_f32 = vreinterpretq_f32_f64(
- vcombine_f64(vreinterpret_f64_f32(vcvtx_f32_f64(a_.neon_f64)),
- vdup_n_f64(0)));
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0; i < (sizeof(a_.f64) / sizeof(a_.f64[0])); i++) {
- r_.f32[i] = (simde_float32)a_.f64[i];
- }
- simde_memset(&(r_.m64_private[1]), 0, sizeof(r_.m64_private[1]));
- #endif
- return simde__m128_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_cvtpd_ps(a) simde_mm_cvtpd_ps(a)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128d simde_mm_cvtpi32_pd(simde__m64 a)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)
- return _mm_cvtpi32_pd(a);
- #else
- simde__m128d_private r_;
- simde__m64_private a_ = simde__m64_to_private(a);
- #if defined(SIMDE_CONVERT_VECTOR_)
- SIMDE_CONVERT_VECTOR_(r_.f64, a_.i32);
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0; i < (sizeof(r_.f64) / sizeof(r_.f64[0])); i++) {
- r_.f64[i] = (simde_float64)a_.i32[i];
- }
- #endif
- return simde__m128d_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_cvtpi32_pd(a) simde_mm_cvtpi32_pd(a)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i simde_mm_cvtps_epi32(simde__m128 a)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_cvtps_epi32(a);
- #else
- simde__m128i_private r_;
- simde__m128_private a_ = simde__m128_to_private(a);
- #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE)
- r_.neon_i32 = vcvtnq_s32_f32(a_.neon_f32);
- #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \
- defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_ROUND_TIES)
- r_.neon_i32 = vcvtnq_s32_f32(a_.neon_f32);
- #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && \
- defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_ROUND_TIES)
- HEDLEY_DIAGNOSTIC_PUSH
- SIMDE_DIAGNOSTIC_DISABLE_C11_EXTENSIONS_
- SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_
- r_.altivec_i32 = vec_cts(a_.altivec_f32, 1);
- HEDLEY_DIAGNOSTIC_POP
- #else
- a_ = simde__m128_to_private(
- simde_x_mm_round_ps(a, SIMDE_MM_FROUND_TO_NEAREST_INT, 1));
- SIMDE_VECTORIZE
- for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])); i++) {
- simde_float32 v = simde_math_roundf(a_.f32[i]);
- #if defined(SIMDE_FAST_CONVERSION_RANGE)
- r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v);
- #else
- r_.i32[i] =
- ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) &&
- (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX)))
- ? SIMDE_CONVERT_FTOI(int32_t, v)
- : INT32_MIN;
- #endif
- }
- #endif
- return simde__m128i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_cvtps_epi32(a) simde_mm_cvtps_epi32(a)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128d simde_mm_cvtps_pd(simde__m128 a)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_cvtps_pd(a);
- #else
- simde__m128d_private r_;
- simde__m128_private a_ = simde__m128_to_private(a);
- #if defined(SIMDE_CONVERT_VECTOR_)
- SIMDE_CONVERT_VECTOR_(r_.f64, a_.m64_private[0].f32);
- #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE)
- r_.neon_f64 = vcvt_f64_f32(vget_low_f32(a_.neon_f32));
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0; i < (sizeof(r_.f64) / sizeof(r_.f64[0])); i++) {
- r_.f64[i] = a_.f32[i];
- }
- #endif
- return simde__m128d_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_cvtps_pd(a) simde_mm_cvtps_pd(a)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- int32_t simde_mm_cvtsd_si32(simde__m128d a)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_cvtsd_si32(a);
- #else
- simde__m128d_private a_ = simde__m128d_to_private(a);
- simde_float64 v = simde_math_round(a_.f64[0]);
- #if defined(SIMDE_FAST_CONVERSION_RANGE)
- return SIMDE_CONVERT_FTOI(int32_t, v);
- #else
- return ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) &&
- (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX)))
- ? SIMDE_CONVERT_FTOI(int32_t, v)
- : INT32_MIN;
- #endif
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_cvtsd_si32(a) simde_mm_cvtsd_si32(a)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- int64_t simde_mm_cvtsd_si64(simde__m128d a)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64)
- #if defined(__PGI)
- return _mm_cvtsd_si64x(a);
- #else
- return _mm_cvtsd_si64(a);
- #endif
- #else
- simde__m128d_private a_ = simde__m128d_to_private(a);
- return SIMDE_CONVERT_FTOI(int64_t, simde_math_round(a_.f64[0]));
- #endif
- }
- #define simde_mm_cvtsd_si64x(a) simde_mm_cvtsd_si64(a)
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_cvtsd_si64(a) simde_mm_cvtsd_si64(a)
- #define _mm_cvtsd_si64x(a) simde_mm_cvtsd_si64x(a)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128 simde_mm_cvtsd_ss(simde__m128 a, simde__m128d b)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_cvtsd_ss(a, b);
- #else
- simde__m128_private r_, a_ = simde__m128_to_private(a);
- simde__m128d_private b_ = simde__m128d_to_private(b);
- #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
- r_.neon_f32 = vsetq_lane_f32(
- vcvtxd_f32_f64(vgetq_lane_f64(b_.neon_f64, 0)), a_.neon_f32, 0);
- #else
- r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b_.f64[0]);
- SIMDE_VECTORIZE
- for (size_t i = 1; i < (sizeof(r_) / sizeof(r_.i32[0])); i++) {
- r_.i32[i] = a_.i32[i];
- }
- #endif
- return simde__m128_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_cvtsd_ss(a, b) simde_mm_cvtsd_ss(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- int16_t simde_x_mm_cvtsi128_si16(simde__m128i a)
- {
- simde__m128i_private a_ = simde__m128i_to_private(a);
- #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
- return vgetq_lane_s16(a_.neon_i16, 0);
- #elif defined(SIMDE_WASM_SIMD128_NATIVE)
- return HEDLEY_STATIC_CAST(int16_t,
- wasm_i16x8_extract_lane(a_.wasm_v128, 0));
- #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
- #if defined(SIMDE_BUG_GCC_95227)
- (void)a_;
- #endif
- return vec_extract(a_.altivec_i16, 0);
- #else
- return a_.i16[0];
- #endif
- }
- SIMDE_FUNCTION_ATTRIBUTES
- int32_t simde_mm_cvtsi128_si32(simde__m128i a)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_cvtsi128_si32(a);
- #else
- simde__m128i_private a_ = simde__m128i_to_private(a);
- #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
- return vgetq_lane_s32(a_.neon_i32, 0);
- #elif defined(SIMDE_WASM_SIMD128_NATIVE)
- return HEDLEY_STATIC_CAST(int32_t,
- wasm_i32x4_extract_lane(a_.wasm_v128, 0));
- #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
- #if defined(SIMDE_BUG_GCC_95227)
- (void)a_;
- #endif
- return vec_extract(a_.altivec_i32, 0);
- #else
- return a_.i32[0];
- #endif
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_cvtsi128_si32(a) simde_mm_cvtsi128_si32(a)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- int64_t simde_mm_cvtsi128_si64(simde__m128i a)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64)
- #if defined(__PGI)
- return _mm_cvtsi128_si64x(a);
- #else
- return _mm_cvtsi128_si64(a);
- #endif
- #else
- simde__m128i_private a_ = simde__m128i_to_private(a);
- #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && !defined(HEDLEY_IBM_VERSION)
- return vec_extract(HEDLEY_REINTERPRET_CAST(
- SIMDE_POWER_ALTIVEC_VECTOR(signed long long),
- a_.i64),
- 0);
- #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
- return vgetq_lane_s64(a_.neon_i64, 0);
- #elif defined(SIMDE_WASM_SIMD128_NATIVE)
- return HEDLEY_STATIC_CAST(int64_t,
- wasm_i64x2_extract_lane(a_.wasm_v128, 0));
- #endif
- return a_.i64[0];
- #endif
- }
- #define simde_mm_cvtsi128_si64x(a) simde_mm_cvtsi128_si64(a)
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_cvtsi128_si64(a) simde_mm_cvtsi128_si64(a)
- #define _mm_cvtsi128_si64x(a) simde_mm_cvtsi128_si64x(a)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128d simde_mm_cvtsi32_sd(simde__m128d a, int32_t b)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_cvtsi32_sd(a, b);
- #else
- simde__m128d_private r_;
- simde__m128d_private a_ = simde__m128d_to_private(a);
- #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARCH_AMD64)
- r_.neon_f64 = vsetq_lane_f64(HEDLEY_STATIC_CAST(float64_t, b),
- a_.neon_f64, 0);
- #else
- r_.f64[0] = HEDLEY_STATIC_CAST(simde_float64, b);
- r_.i64[1] = a_.i64[1];
- #endif
- return simde__m128d_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_cvtsi32_sd(a, b) simde_mm_cvtsi32_sd(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i simde_x_mm_cvtsi16_si128(int16_t a)
- {
- simde__m128i_private r_;
- #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
- r_.neon_i16 = vsetq_lane_s16(a, vdupq_n_s16(0), 0);
- #elif defined(SIMDE_WASM_SIMD128_NATIVE)
- r_.wasm_v128 = wasm_i16x8_make(a, 0, 0, 0, 0, 0, 0, 0);
- #else
- r_.i16[0] = a;
- r_.i16[1] = 0;
- r_.i16[2] = 0;
- r_.i16[3] = 0;
- r_.i16[4] = 0;
- r_.i16[5] = 0;
- r_.i16[6] = 0;
- r_.i16[7] = 0;
- #endif
- return simde__m128i_from_private(r_);
- }
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i simde_mm_cvtsi32_si128(int32_t a)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_cvtsi32_si128(a);
- #else
- simde__m128i_private r_;
- #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
- r_.neon_i32 = vsetq_lane_s32(a, vdupq_n_s32(0), 0);
- #elif defined(SIMDE_WASM_SIMD128_NATIVE)
- r_.wasm_v128 = wasm_i32x4_make(a, 0, 0, 0);
- #else
- r_.i32[0] = a;
- r_.i32[1] = 0;
- r_.i32[2] = 0;
- r_.i32[3] = 0;
- #endif
- return simde__m128i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_cvtsi32_si128(a) simde_mm_cvtsi32_si128(a)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128d simde_mm_cvtsi64_sd(simde__m128d a, int64_t b)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64)
- #if !defined(__PGI)
- return _mm_cvtsi64_sd(a, b);
- #else
- return _mm_cvtsi64x_sd(a, b);
- #endif
- #else
- simde__m128d_private r_, a_ = simde__m128d_to_private(a);
- #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
- r_.neon_f64 = vsetq_lane_f64(HEDLEY_STATIC_CAST(float64_t, b),
- a_.neon_f64, 0);
- #else
- r_.f64[0] = HEDLEY_STATIC_CAST(simde_float64, b);
- r_.f64[1] = a_.f64[1];
- #endif
- return simde__m128d_from_private(r_);
- #endif
- }
- #define simde_mm_cvtsi64x_sd(a, b) simde_mm_cvtsi64_sd(a, b)
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_cvtsi64_sd(a, b) simde_mm_cvtsi64_sd(a, b)
- #define _mm_cvtsi64x_sd(a, b) simde_mm_cvtsi64x_sd(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i simde_mm_cvtsi64_si128(int64_t a)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64)
- #if !defined(__PGI)
- return _mm_cvtsi64_si128(a);
- #else
- return _mm_cvtsi64x_si128(a);
- #endif
- #else
- simde__m128i_private r_;
- #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
- r_.neon_i64 = vsetq_lane_s64(a, vdupq_n_s64(0), 0);
- #elif defined(SIMDE_WASM_SIMD128_NATIVE)
- r_.wasm_v128 = wasm_i64x2_make(a, 0);
- #else
- r_.i64[0] = a;
- r_.i64[1] = 0;
- #endif
- return simde__m128i_from_private(r_);
- #endif
- }
- #define simde_mm_cvtsi64x_si128(a) simde_mm_cvtsi64_si128(a)
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_cvtsi64_si128(a) simde_mm_cvtsi64_si128(a)
- #define _mm_cvtsi64x_si128(a) simde_mm_cvtsi64x_si128(a)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128d simde_mm_cvtss_sd(simde__m128d a, simde__m128 b)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_cvtss_sd(a, b);
- #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE)
- float64x2_t temp = vcvt_f64_f32(vset_lane_f32(
- vgetq_lane_f32(simde__m128_to_private(b).neon_f32, 0),
- vdup_n_f32(0), 0));
- return vsetq_lane_f64(
- vgetq_lane_f64(simde__m128d_to_private(a).neon_f64, 1), temp,
- 1);
- #else
- simde__m128d_private a_ = simde__m128d_to_private(a);
- simde__m128_private b_ = simde__m128_to_private(b);
- a_.f64[0] = HEDLEY_STATIC_CAST(simde_float64, b_.f32[0]);
- return simde__m128d_from_private(a_);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_cvtss_sd(a, b) simde_mm_cvtss_sd(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m64 simde_mm_cvttpd_pi32(simde__m128d a)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)
- return _mm_cvttpd_pi32(a);
- #else
- simde__m64_private r_;
- simde__m128d_private a_ = simde__m128d_to_private(a);
- #if defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_FAST_CONVERSION_RANGE)
- SIMDE_CONVERT_VECTOR_(r_.i32, a_.f64);
- #else
- for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])); i++) {
- simde_float64 v = a_.f64[i];
- #if defined(SIMDE_FAST_CONVERSION_RANGE)
- r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v);
- #else
- r_.i32[i] =
- ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) &&
- (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX)))
- ? SIMDE_CONVERT_FTOI(int32_t, v)
- : INT32_MIN;
- #endif
- }
- #endif
- return simde__m64_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_cvttpd_pi32(a) simde_mm_cvttpd_pi32(a)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i simde_mm_cvttpd_epi32(simde__m128d a)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_cvttpd_epi32(a);
- #else
- simde__m128i_private r_;
- r_.m64[0] = simde_mm_cvttpd_pi32(a);
- r_.m64[1] = simde_mm_setzero_si64();
- return simde__m128i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_cvttpd_epi32(a) simde_mm_cvttpd_epi32(a)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i simde_mm_cvttps_epi32(simde__m128 a)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_cvttps_epi32(a);
- #else
- simde__m128i_private r_;
- simde__m128_private a_ = simde__m128_to_private(a);
- #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE)
- r_.neon_i32 = vcvtq_s32_f32(a_.neon_f32);
- #elif defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_FAST_CONVERSION_RANGE)
- SIMDE_CONVERT_VECTOR_(r_.i32, a_.f32);
- #else
- for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])); i++) {
- simde_float32 v = a_.f32[i];
- #if defined(SIMDE_FAST_CONVERSION_RANGE)
- r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v);
- #else
- r_.i32[i] =
- ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) &&
- (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX)))
- ? SIMDE_CONVERT_FTOI(int32_t, v)
- : INT32_MIN;
- #endif
- }
- #endif
- return simde__m128i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_cvttps_epi32(a) simde_mm_cvttps_epi32(a)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- int32_t simde_mm_cvttsd_si32(simde__m128d a)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_cvttsd_si32(a);
- #else
- simde__m128d_private a_ = simde__m128d_to_private(a);
- simde_float64 v = a_.f64[0];
- #if defined(SIMDE_FAST_CONVERSION_RANGE)
- return SIMDE_CONVERT_FTOI(int32_t, v);
- #else
- return ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) &&
- (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX)))
- ? SIMDE_CONVERT_FTOI(int32_t, v)
- : INT32_MIN;
- #endif
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_cvttsd_si32(a) simde_mm_cvttsd_si32(a)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- int64_t simde_mm_cvttsd_si64(simde__m128d a)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64)
- #if !defined(__PGI)
- return _mm_cvttsd_si64(a);
- #else
- return _mm_cvttsd_si64x(a);
- #endif
- #else
- simde__m128d_private a_ = simde__m128d_to_private(a);
- return SIMDE_CONVERT_FTOI(int64_t, a_.f64[0]);
- #endif
- }
- #define simde_mm_cvttsd_si64x(a) simde_mm_cvttsd_si64(a)
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_cvttsd_si64(a) simde_mm_cvttsd_si64(a)
- #define _mm_cvttsd_si64x(a) simde_mm_cvttsd_si64x(a)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128d simde_mm_div_pd(simde__m128d a, simde__m128d b)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_div_pd(a, b);
- #else
- simde__m128d_private r_, a_ = simde__m128d_to_private(a),
- b_ = simde__m128d_to_private(b);
- #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
- r_.f64 = a_.f64 / b_.f64;
- #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE)
- r_.neon_f64 = vdivq_f64(a_.neon_f64, b_.neon_f64);
- #elif defined(SIMDE_WASM_SIMD128_NATIVE)
- r_.wasm_v128 = wasm_f64x2_div(a_.wasm_v128, b_.wasm_v128);
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0; i < (sizeof(r_.f64) / sizeof(r_.f64[0])); i++) {
- r_.f64[i] = a_.f64[i] / b_.f64[i];
- }
- #endif
- return simde__m128d_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_div_pd(a, b) simde_mm_div_pd(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128d simde_mm_div_sd(simde__m128d a, simde__m128d b)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_div_sd(a, b);
- #elif (SIMDE_NATURAL_VECTOR_SIZE > 0)
- return simde_mm_move_sd(a, simde_mm_div_pd(a, b));
- #else
- simde__m128d_private r_, a_ = simde__m128d_to_private(a),
- b_ = simde__m128d_to_private(b);
- #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
- float64x2_t temp = vdivq_f64(a_.neon_f64, b_.neon_f64);
- r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1);
- #else
- r_.f64[0] = a_.f64[0] / b_.f64[0];
- r_.f64[1] = a_.f64[1];
- #endif
- return simde__m128d_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_div_sd(a, b) simde_mm_div_sd(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- int32_t simde_mm_extract_epi16(simde__m128i a, const int imm8)
- SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 7)
- {
- uint16_t r;
- simde__m128i_private a_ = simde__m128i_to_private(a);
- #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
- #if defined(SIMDE_BUG_GCC_95227)
- (void)a_;
- (void)imm8;
- #endif
- r = HEDLEY_STATIC_CAST(uint16_t, vec_extract(a_.altivec_i16, imm8));
- #else
- r = a_.u16[imm8 & 7];
- #endif
- return HEDLEY_STATIC_CAST(int32_t, r);
- }
- #if defined(SIMDE_X86_SSE2_NATIVE) && \
- (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(4, 6, 0))
- #define simde_mm_extract_epi16(a, imm8) _mm_extract_epi16(a, imm8)
- #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
- #define simde_mm_extract_epi16(a, imm8) \
- (HEDLEY_STATIC_CAST( \
- int32_t, vgetq_lane_s16(simde__m128i_to_private(a).neon_i16, \
- (imm8))) & \
- (INT32_C(0x0000ffff)))
- #endif
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_extract_epi16(a, imm8) simde_mm_extract_epi16(a, imm8)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i simde_mm_insert_epi16(simde__m128i a, int16_t i, const int imm8)
- SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 7)
- {
- simde__m128i_private a_ = simde__m128i_to_private(a);
- a_.i16[imm8 & 7] = i;
- return simde__m128i_from_private(a_);
- }
- #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI)
- #define simde_mm_insert_epi16(a, i, imm8) _mm_insert_epi16((a), (i), (imm8))
- #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
- #define simde_mm_insert_epi16(a, i, imm8) \
- simde__m128i_from_neon_i16( \
- vsetq_lane_s16((i), simde__m128i_to_neon_i16(a), (imm8)))
- #endif
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_insert_epi16(a, i, imm8) simde_mm_insert_epi16(a, i, imm8)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128d
- simde_mm_load_pd(simde_float64 const mem_addr[HEDLEY_ARRAY_PARAM(2)])
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_load_pd(mem_addr);
- #else
- simde__m128d_private r_;
- #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
- r_.neon_f64 = vld1q_f64(mem_addr);
- #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
- r_.neon_u32 =
- vld1q_u32(HEDLEY_REINTERPRET_CAST(uint32_t const *, mem_addr));
- #else
- simde_memcpy(&r_, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128d),
- sizeof(r_));
- #endif
- return simde__m128d_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_load_pd(mem_addr) simde_mm_load_pd(mem_addr)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128d simde_mm_load1_pd(simde_float64 const *mem_addr)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_load1_pd(mem_addr);
- #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE)
- return simde__m128d_from_neon_f64(vld1q_dup_f64(mem_addr));
- #elif defined(SIMDE_WASM_SIMD128_NATIVE)
- return simde__m128d_from_wasm_v128(wasm_v64x2_load_splat(mem_addr));
- #else
- return simde_mm_set1_pd(*mem_addr);
- #endif
- }
- #define simde_mm_load_pd1(mem_addr) simde_mm_load1_pd(mem_addr)
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_load_pd1(mem_addr) simde_mm_load1_pd(mem_addr)
- #define _mm_load1_pd(mem_addr) simde_mm_load1_pd(mem_addr)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128d simde_mm_load_sd(simde_float64 const *mem_addr)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_load_sd(mem_addr);
- #else
- simde__m128d_private r_;
- #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
- r_.neon_f64 = vsetq_lane_f64(*mem_addr, vdupq_n_f64(0), 0);
- #else
- r_.f64[0] = *mem_addr;
- r_.u64[1] = UINT64_C(0);
- #endif
- return simde__m128d_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_load_sd(mem_addr) simde_mm_load_sd(mem_addr)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i simde_mm_load_si128(simde__m128i const *mem_addr)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_load_si128(
- HEDLEY_REINTERPRET_CAST(__m128i const *, mem_addr));
- #else
- simde__m128i_private r_;
- #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
- r_.altivec_i32 = vec_ld(
- 0, HEDLEY_REINTERPRET_CAST(
- SIMDE_POWER_ALTIVEC_VECTOR(int) const *, mem_addr));
- #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
- r_.neon_i32 =
- vld1q_s32(HEDLEY_REINTERPRET_CAST(int32_t const *, mem_addr));
- #else
- simde_memcpy(&r_, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128i),
- sizeof(simde__m128i));
- #endif
- return simde__m128i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_load_si128(mem_addr) simde_mm_load_si128(mem_addr)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128d simde_mm_loadh_pd(simde__m128d a, simde_float64 const *mem_addr)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_loadh_pd(a, mem_addr);
- #else
- simde__m128d_private r_, a_ = simde__m128d_to_private(a);
- #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
- r_.neon_f64 = vcombine_f64(
- vget_low_f64(a_.neon_f64),
- vld1_f64(HEDLEY_REINTERPRET_CAST(const float64_t *, mem_addr)));
- #else
- simde_float64 t;
- simde_memcpy(&t, mem_addr, sizeof(t));
- r_.f64[0] = a_.f64[0];
- r_.f64[1] = t;
- #endif
- return simde__m128d_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_loadh_pd(a, mem_addr) simde_mm_loadh_pd(a, mem_addr)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i simde_mm_loadl_epi64(simde__m128i const *mem_addr)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_loadl_epi64(mem_addr);
- #else
- simde__m128i_private r_;
- int64_t value;
- simde_memcpy(&value, mem_addr, sizeof(value));
- #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
- r_.neon_i64 = vcombine_s64(
- vld1_s64(HEDLEY_REINTERPRET_CAST(int64_t const *, mem_addr)),
- vdup_n_s64(0));
- #else
- r_.i64[0] = value;
- r_.i64[1] = 0;
- #endif
- return simde__m128i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_loadl_epi64(mem_addr) simde_mm_loadl_epi64(mem_addr)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128d simde_mm_loadl_pd(simde__m128d a, simde_float64 const *mem_addr)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_loadl_pd(a, mem_addr);
- #else
- simde__m128d_private r_, a_ = simde__m128d_to_private(a);
- #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
- r_.neon_f64 = vcombine_f64(
- vld1_f64(HEDLEY_REINTERPRET_CAST(const float64_t *, mem_addr)),
- vget_high_f64(a_.neon_f64));
- #else
- r_.f64[0] = *mem_addr;
- r_.u64[1] = a_.u64[1];
- #endif
- return simde__m128d_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_loadl_pd(a, mem_addr) simde_mm_loadl_pd(a, mem_addr)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128d
- simde_mm_loadr_pd(simde_float64 const mem_addr[HEDLEY_ARRAY_PARAM(2)])
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_loadr_pd(mem_addr);
- #else
- simde__m128d_private r_;
- #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
- r_.neon_f64 = vld1q_f64(mem_addr);
- r_.neon_f64 = vextq_f64(r_.neon_f64, r_.neon_f64, 1);
- #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
- r_.neon_i64 =
- vld1q_s64(HEDLEY_REINTERPRET_CAST(int64_t const *, mem_addr));
- r_.neon_i64 = vextq_s64(r_.neon_i64, r_.neon_i64, 1);
- #elif defined(SIMDE_WASM_SIMD128_NATIVE)
- v128_t tmp = wasm_v128_load(mem_addr);
- r_.wasm_v128 = wasm_v64x2_shuffle(tmp, tmp, 1, 0);
- #else
- r_.f64[0] = mem_addr[1];
- r_.f64[1] = mem_addr[0];
- #endif
- return simde__m128d_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_loadr_pd(mem_addr) simde_mm_loadr_pd(mem_addr)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128d
- simde_mm_loadu_pd(simde_float64 const mem_addr[HEDLEY_ARRAY_PARAM(2)])
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_loadu_pd(mem_addr);
- #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE)
- return vld1q_f64(mem_addr);
- #else
- simde__m128d_private r_;
- simde_memcpy(&r_, mem_addr, sizeof(r_));
- return simde__m128d_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_loadu_pd(mem_addr) simde_mm_loadu_pd(mem_addr)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i simde_x_mm_loadu_epi8(int8_t const *mem_addr)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_loadu_si128(
- SIMDE_ALIGN_CAST(simde__m128i const *, mem_addr));
- #else
- simde__m128i_private r_;
- #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
- r_.neon_i8 =
- vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const *, mem_addr));
- #else
- simde_memcpy(&r_, mem_addr, sizeof(r_));
- #endif
- return simde__m128i_from_private(r_);
- #endif
- }
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i simde_x_mm_loadu_epi16(int16_t const *mem_addr)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_loadu_si128(
- SIMDE_ALIGN_CAST(simde__m128i const *, mem_addr));
- #else
- simde__m128i_private r_;
- #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
- r_.neon_i16 =
- vld1q_s16(HEDLEY_REINTERPRET_CAST(int16_t const *, mem_addr));
- #else
- simde_memcpy(&r_, mem_addr, sizeof(r_));
- #endif
- return simde__m128i_from_private(r_);
- #endif
- }
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i simde_x_mm_loadu_epi32(int32_t const *mem_addr)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_loadu_si128(
- SIMDE_ALIGN_CAST(simde__m128i const *, mem_addr));
- #else
- simde__m128i_private r_;
- #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
- r_.neon_i32 =
- vld1q_s32(HEDLEY_REINTERPRET_CAST(int32_t const *, mem_addr));
- #else
- simde_memcpy(&r_, mem_addr, sizeof(r_));
- #endif
- return simde__m128i_from_private(r_);
- #endif
- }
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i simde_x_mm_loadu_epi64(int64_t const *mem_addr)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_loadu_si128(
- SIMDE_ALIGN_CAST(simde__m128i const *, mem_addr));
- #else
- simde__m128i_private r_;
- #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
- r_.neon_i64 =
- vld1q_s64(HEDLEY_REINTERPRET_CAST(int64_t const *, mem_addr));
- #else
- simde_memcpy(&r_, mem_addr, sizeof(r_));
- #endif
- return simde__m128i_from_private(r_);
- #endif
- }
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i simde_mm_loadu_si128(void const *mem_addr)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_loadu_si128(HEDLEY_STATIC_CAST(__m128i const *, mem_addr));
- #else
- simde__m128i_private r_;
- #if HEDLEY_GNUC_HAS_ATTRIBUTE(may_alias, 3, 3, 0)
- HEDLEY_DIAGNOSTIC_PUSH
- SIMDE_DIAGNOSTIC_DISABLE_PACKED_
- struct simde_mm_loadu_si128_s {
- __typeof__(r_) v;
- } __attribute__((__packed__, __may_alias__));
- r_ = HEDLEY_REINTERPRET_CAST(const struct simde_mm_loadu_si128_s *,
- mem_addr)
- ->v;
- HEDLEY_DIAGNOSTIC_POP
- #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
- /* Note that this is a lower priority than the struct above since
- * clang assumes mem_addr is aligned (since it is a __m128i*). */
- r_.neon_i32 =
- vld1q_s32(HEDLEY_REINTERPRET_CAST(int32_t const *, mem_addr));
- #else
- simde_memcpy(&r_, mem_addr, sizeof(r_));
- #endif
- return simde__m128i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_loadu_si128(mem_addr) simde_mm_loadu_si128(mem_addr)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i simde_mm_madd_epi16(simde__m128i a, simde__m128i b)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_madd_epi16(a, b);
- #else
- simde__m128i_private r_, a_ = simde__m128i_to_private(a),
- b_ = simde__m128i_to_private(b);
- #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
- int32x4_t pl =
- vmull_s16(vget_low_s16(a_.neon_i16), vget_low_s16(b_.neon_i16));
- int32x4_t ph = vmull_high_s16(a_.neon_i16, b_.neon_i16);
- r_.neon_i32 = vpaddq_s32(pl, ph);
- #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
- int32x4_t pl =
- vmull_s16(vget_low_s16(a_.neon_i16), vget_low_s16(b_.neon_i16));
- int32x4_t ph = vmull_s16(vget_high_s16(a_.neon_i16),
- vget_high_s16(b_.neon_i16));
- int32x2_t rl = vpadd_s32(vget_low_s32(pl), vget_high_s32(pl));
- int32x2_t rh = vpadd_s32(vget_low_s32(ph), vget_high_s32(ph));
- r_.neon_i32 = vcombine_s32(rl, rh);
- #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
- static const SIMDE_POWER_ALTIVEC_VECTOR(int) tz = {0, 0, 0, 0};
- r_.altivec_i32 = vec_msum(a_.altivec_i16, b_.altivec_i16, tz);
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0; i < (sizeof(r_) / sizeof(r_.i16[0])); i += 2) {
- r_.i32[i / 2] = (a_.i16[i] * b_.i16[i]) +
- (a_.i16[i + 1] * b_.i16[i + 1]);
- }
- #endif
- return simde__m128i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_madd_epi16(a, b) simde_mm_madd_epi16(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- void simde_mm_maskmoveu_si128(simde__m128i a, simde__m128i mask,
- int8_t mem_addr[HEDLEY_ARRAY_PARAM(16)])
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- _mm_maskmoveu_si128(a, mask, HEDLEY_REINTERPRET_CAST(char *, mem_addr));
- #else
- simde__m128i_private a_ = simde__m128i_to_private(a),
- mask_ = simde__m128i_to_private(mask);
- for (size_t i = 0; i < (sizeof(a_.i8) / sizeof(a_.i8[0])); i++) {
- if (mask_.u8[i] & 0x80) {
- mem_addr[i] = a_.i8[i];
- }
- }
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_maskmoveu_si128(a, mask, mem_addr) \
- simde_mm_maskmoveu_si128( \
- (a), (mask), \
- SIMDE_CHECKED_REINTERPRET_CAST(int8_t *, char *, (mem_addr)))
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- int32_t simde_mm_movemask_epi8(simde__m128i a)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__INTEL_COMPILER)
- /* ICC has trouble with _mm_movemask_epi8 at -O2 and above: */
- return _mm_movemask_epi8(a);
- #else
- int32_t r = 0;
- simde__m128i_private a_ = simde__m128i_to_private(a);
- #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
- uint8x16_t input = a_.neon_u8;
- const int8_t xr[16] = {-7, -6, -5, -4, -3, -2, -1, 0,
- -7, -6, -5, -4, -3, -2, -1, 0};
- const uint8x16_t mask_and = vdupq_n_u8(0x80);
- const int8x16_t mask_shift = vld1q_s8(xr);
- const uint8x16_t mask_result =
- vshlq_u8(vandq_u8(input, mask_and), mask_shift);
- uint8x8_t lo = vget_low_u8(mask_result);
- uint8x8_t hi = vget_high_u8(mask_result);
- r = vaddv_u8(lo) + (vaddv_u8(hi) << 8);
- #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
- // Use increasingly wide shifts+adds to collect the sign bits
- // together.
- // Since the widening shifts would be rather confusing to follow in little endian, everything
- // will be illustrated in big endian order instead. This has a different result - the bits
- // would actually be reversed on a big endian machine.
- // Starting input (only half the elements are shown):
- // 89 ff 1d c0 00 10 99 33
- uint8x16_t input = a_.neon_u8;
- // Shift out everything but the sign bits with an unsigned shift right.
- //
- // Bytes of the vector::
- // 89 ff 1d c0 00 10 99 33
- // \ \ \ \ \ \ \ \ high_bits = (uint16x4_t)(input >> 7)
- // | | | | | | | |
- // 01 01 00 01 00 00 01 00
- //
- // Bits of first important lane(s):
- // 10001001 (89)
- // \______
- // |
- // 00000001 (01)
- uint16x8_t high_bits = vreinterpretq_u16_u8(vshrq_n_u8(input, 7));
- // Merge the even lanes together with a 16-bit unsigned shift right + add.
- // 'xx' represents garbage data which will be ignored in the final result.
- // In the important bytes, the add functions like a binary OR.
- //
- // 01 01 00 01 00 00 01 00
- // \_ | \_ | \_ | \_ | paired16 = (uint32x4_t)(input + (input >> 7))
- // \| \| \| \|
- // xx 03 xx 01 xx 00 xx 02
- //
- // 00000001 00000001 (01 01)
- // \_______ |
- // \|
- // xxxxxxxx xxxxxx11 (xx 03)
- uint32x4_t paired16 =
- vreinterpretq_u32_u16(vsraq_n_u16(high_bits, high_bits, 7));
- // Repeat with a wider 32-bit shift + add.
- // xx 03 xx 01 xx 00 xx 02
- // \____ | \____ | paired32 = (uint64x1_t)(paired16 + (paired16 >> 14))
- // \| \|
- // xx xx xx 0d xx xx xx 02
- //
- // 00000011 00000001 (03 01)
- // \\_____ ||
- // '----.\||
- // xxxxxxxx xxxx1101 (xx 0d)
- uint64x2_t paired32 =
- vreinterpretq_u64_u32(vsraq_n_u32(paired16, paired16, 14));
- // Last, an even wider 64-bit shift + add to get our result in the low 8 bit lanes.
- // xx xx xx 0d xx xx xx 02
- // \_________ | paired64 = (uint8x8_t)(paired32 + (paired32 >> 28))
- // \|
- // xx xx xx xx xx xx xx d2
- //
- // 00001101 00000010 (0d 02)
- // \ \___ | |
- // '---. \| |
- // xxxxxxxx 11010010 (xx d2)
- uint8x16_t paired64 =
- vreinterpretq_u8_u64(vsraq_n_u64(paired32, paired32, 28));
- // Extract the low 8 bits from each 64-bit lane with 2 8-bit extracts.
- // xx xx xx xx xx xx xx d2
- // || return paired64[0]
- // d2
- // Note: Little endian would return the correct value 4b (01001011) instead.
- r = vgetq_lane_u8(paired64, 0) |
- (HEDLEY_STATIC_CAST(int32_t, vgetq_lane_u8(paired64, 8)) << 8);
- #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && \
- !defined(HEDLEY_IBM_VERSION) && \
- (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE)
- static const SIMDE_POWER_ALTIVEC_VECTOR(unsigned char)
- perm = {120, 112, 104, 96, 88, 80, 72, 64,
- 56, 48, 40, 32, 24, 16, 8, 0};
- r = HEDLEY_STATIC_CAST(
- int32_t, vec_extract(vec_vbpermq(a_.altivec_u8, perm), 1));
- #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && \
- !defined(HEDLEY_IBM_VERSION) && \
- (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG)
- static const SIMDE_POWER_ALTIVEC_VECTOR(unsigned char)
- perm = {120, 112, 104, 96, 88, 80, 72, 64,
- 56, 48, 40, 32, 24, 16, 8, 0};
- r = HEDLEY_STATIC_CAST(
- int32_t, vec_extract(vec_vbpermq(a_.altivec_u8, perm), 14));
- #else
- SIMDE_VECTORIZE_REDUCTION(| : r)
- for (size_t i = 0; i < (sizeof(a_.u8) / sizeof(a_.u8[0])); i++) {
- r |= (a_.u8[15 - i] >> 7) << (15 - i);
- }
- #endif
- return r;
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_movemask_epi8(a) simde_mm_movemask_epi8(a)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- int32_t simde_mm_movemask_pd(simde__m128d a)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_movemask_pd(a);
- #else
- int32_t r = 0;
- simde__m128d_private a_ = simde__m128d_to_private(a);
- #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
- static const int64_t shift_amount[] = {0, 1};
- const int64x2_t shift = vld1q_s64(shift_amount);
- uint64x2_t tmp = vshrq_n_u64(a_.neon_u64, 63);
- return HEDLEY_STATIC_CAST(int32_t, vaddvq_u64(vshlq_u64(tmp, shift)));
- #else
- SIMDE_VECTORIZE_REDUCTION(| : r)
- for (size_t i = 0; i < (sizeof(a_.u64) / sizeof(a_.u64[0])); i++) {
- r |= (a_.u64[i] >> 63) << i;
- }
- #endif
- return r;
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_movemask_pd(a) simde_mm_movemask_pd(a)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m64 simde_mm_movepi64_pi64(simde__m128i a)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)
- return _mm_movepi64_pi64(a);
- #else
- simde__m64_private r_;
- simde__m128i_private a_ = simde__m128i_to_private(a);
- #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
- r_.neon_i64 = vget_low_s64(a_.neon_i64);
- #else
- r_.i64[0] = a_.i64[0];
- #endif
- return simde__m64_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_movepi64_pi64(a) simde_mm_movepi64_pi64(a)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i simde_mm_movpi64_epi64(simde__m64 a)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)
- return _mm_movpi64_epi64(a);
- #else
- simde__m128i_private r_;
- simde__m64_private a_ = simde__m64_to_private(a);
- #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
- r_.neon_i64 = vcombine_s64(a_.neon_i64, vdup_n_s64(0));
- #else
- r_.i64[0] = a_.i64[0];
- r_.i64[1] = 0;
- #endif
- return simde__m128i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_movpi64_epi64(a) simde_mm_movpi64_epi64(a)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i simde_mm_min_epi16(simde__m128i a, simde__m128i b)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_min_epi16(a, b);
- #else
- simde__m128i_private r_, a_ = simde__m128i_to_private(a),
- b_ = simde__m128i_to_private(b);
- #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
- r_.neon_i16 = vminq_s16(a_.neon_i16, b_.neon_i16);
- #elif defined(SIMDE_WASM_SIMD128_NATIVE)
- r_.wasm_v128 = wasm_i16x8_min(a_.wasm_v128, b_.wasm_v128);
- #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
- r_.altivec_i16 = vec_min(a_.altivec_i16, b_.altivec_i16);
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) {
- r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? a_.i16[i] : b_.i16[i];
- }
- #endif
- return simde__m128i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_min_epi16(a, b) simde_mm_min_epi16(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i simde_mm_min_epu8(simde__m128i a, simde__m128i b)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_min_epu8(a, b);
- #else
- simde__m128i_private r_, a_ = simde__m128i_to_private(a),
- b_ = simde__m128i_to_private(b);
- #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
- r_.neon_u8 = vminq_u8(a_.neon_u8, b_.neon_u8);
- #elif defined(SIMDE_WASM_SIMD128_NATIVE)
- r_.wasm_v128 = wasm_u8x16_min(a_.wasm_v128, b_.wasm_v128);
- #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
- r_.altivec_u8 = vec_min(a_.altivec_u8, b_.altivec_u8);
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0; i < (sizeof(r_.u8) / sizeof(r_.u8[0])); i++) {
- r_.u8[i] = (a_.u8[i] < b_.u8[i]) ? a_.u8[i] : b_.u8[i];
- }
- #endif
- return simde__m128i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_min_epu8(a, b) simde_mm_min_epu8(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128d simde_mm_min_pd(simde__m128d a, simde__m128d b)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_min_pd(a, b);
- #else
- simde__m128d_private r_, a_ = simde__m128d_to_private(a),
- b_ = simde__m128d_to_private(b);
- #if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE)
- r_.altivec_f64 = vec_min(a_.altivec_f64, b_.altivec_f64);
- #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE)
- r_.neon_f64 = vminq_f64(a_.neon_f64, b_.neon_f64);
- #elif defined(SIMDE_WASM_SIMD128_NATIVE)
- r_.wasm_v128 = wasm_f64x2_min(a_.wasm_v128, b_.wasm_v128);
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0; i < (sizeof(r_.f64) / sizeof(r_.f64[0])); i++) {
- r_.f64[i] = (a_.f64[i] < b_.f64[i]) ? a_.f64[i] : b_.f64[i];
- }
- #endif
- return simde__m128d_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_min_pd(a, b) simde_mm_min_pd(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128d simde_mm_min_sd(simde__m128d a, simde__m128d b)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_min_sd(a, b);
- #elif (SIMDE_NATURAL_VECTOR_SIZE > 0)
- return simde_mm_move_sd(a, simde_mm_min_pd(a, b));
- #else
- simde__m128d_private r_, a_ = simde__m128d_to_private(a),
- b_ = simde__m128d_to_private(b);
- #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
- float64x2_t temp = vminq_f64(a_.neon_f64, b_.neon_f64);
- r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1);
- #else
- r_.f64[0] = (a_.f64[0] < b_.f64[0]) ? a_.f64[0] : b_.f64[0];
- r_.f64[1] = a_.f64[1];
- #endif
- return simde__m128d_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_min_sd(a, b) simde_mm_min_sd(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i simde_mm_max_epi16(simde__m128i a, simde__m128i b)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_max_epi16(a, b);
- #else
- simde__m128i_private r_, a_ = simde__m128i_to_private(a),
- b_ = simde__m128i_to_private(b);
- #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
- r_.neon_i16 = vmaxq_s16(a_.neon_i16, b_.neon_i16);
- #elif defined(SIMDE_WASM_SIMD128_NATIVE)
- r_.wasm_v128 = wasm_i16x8_max(a_.wasm_v128, b_.wasm_v128);
- #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
- r_.altivec_i16 = vec_max(a_.altivec_i16, b_.altivec_i16);
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) {
- r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? a_.i16[i] : b_.i16[i];
- }
- #endif
- return simde__m128i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_max_epi16(a, b) simde_mm_max_epi16(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i simde_mm_max_epu8(simde__m128i a, simde__m128i b)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_max_epu8(a, b);
- #else
- simde__m128i_private r_, a_ = simde__m128i_to_private(a),
- b_ = simde__m128i_to_private(b);
- #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
- r_.neon_u8 = vmaxq_u8(a_.neon_u8, b_.neon_u8);
- #elif defined(SIMDE_WASM_SIMD128_NATIVE)
- r_.wasm_v128 = wasm_u8x16_max(a_.wasm_v128, b_.wasm_v128);
- #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
- r_.altivec_u8 = vec_max(a_.altivec_u8, b_.altivec_u8);
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0; i < (sizeof(r_.u8) / sizeof(r_.u8[0])); i++) {
- r_.u8[i] = (a_.u8[i] > b_.u8[i]) ? a_.u8[i] : b_.u8[i];
- }
- #endif
- return simde__m128i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_max_epu8(a, b) simde_mm_max_epu8(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128d simde_mm_max_pd(simde__m128d a, simde__m128d b)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_max_pd(a, b);
- #else
- simde__m128d_private r_, a_ = simde__m128d_to_private(a),
- b_ = simde__m128d_to_private(b);
- #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
- r_.altivec_f64 = vec_max(a_.altivec_f64, b_.altivec_f64);
- #elif defined(SIMDE_WASM_SIMD128_NATIVE)
- r_.wasm_v128 = wasm_f64x2_max(a_.wasm_v128, b_.wasm_v128);
- #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE)
- r_.neon_f64 = vmaxq_f64(a_.neon_f64, b_.neon_f64);
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0; i < (sizeof(r_.f64) / sizeof(r_.f64[0])); i++) {
- r_.f64[i] = (a_.f64[i] > b_.f64[i]) ? a_.f64[i] : b_.f64[i];
- }
- #endif
- return simde__m128d_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_max_pd(a, b) simde_mm_max_pd(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128d simde_mm_max_sd(simde__m128d a, simde__m128d b)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_max_sd(a, b);
- #elif (SIMDE_NATURAL_VECTOR_SIZE > 0)
- return simde_mm_move_sd(a, simde_mm_max_pd(a, b));
- #else
- simde__m128d_private r_, a_ = simde__m128d_to_private(a),
- b_ = simde__m128d_to_private(b);
- #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
- float64x2_t temp = vmaxq_f64(a_.neon_f64, b_.neon_f64);
- r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1);
- #else
- r_.f64[0] = (a_.f64[0] > b_.f64[0]) ? a_.f64[0] : b_.f64[0];
- r_.f64[1] = a_.f64[1];
- #endif
- return simde__m128d_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_max_sd(a, b) simde_mm_max_sd(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i simde_mm_move_epi64(simde__m128i a)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_move_epi64(a);
- #else
- simde__m128i_private r_, a_ = simde__m128i_to_private(a);
- #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
- r_.neon_i64 = vsetq_lane_s64(0, a_.neon_i64, 1);
- #else
- r_.i64[0] = a_.i64[0];
- r_.i64[1] = 0;
- #endif
- return simde__m128i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_move_epi64(a) simde_mm_move_epi64(a)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i simde_mm_mul_epu32(simde__m128i a, simde__m128i b)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_mul_epu32(a, b);
- #else
- simde__m128i_private r_, a_ = simde__m128i_to_private(a),
- b_ = simde__m128i_to_private(b);
- #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
- uint32x2_t a_lo = vmovn_u64(a_.neon_u64);
- uint32x2_t b_lo = vmovn_u64(b_.neon_u64);
- r_.neon_u64 = vmull_u32(a_lo, b_lo);
- #elif defined(SIMDE_SHUFFLE_VECTOR_) && \
- (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE)
- __typeof__(a_.u32) z = {
- 0,
- };
- a_.u32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.u32, z, 0, 4, 2, 6);
- b_.u32 = SIMDE_SHUFFLE_VECTOR_(32, 16, b_.u32, z, 0, 4, 2, 6);
- r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), a_.u32) *
- HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), b_.u32);
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0; i < (sizeof(r_.u64) / sizeof(r_.u64[0])); i++) {
- r_.u64[i] = HEDLEY_STATIC_CAST(uint64_t, a_.u32[i * 2]) *
- HEDLEY_STATIC_CAST(uint64_t, b_.u32[i * 2]);
- }
- #endif
- return simde__m128i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_mul_epu32(a, b) simde_mm_mul_epu32(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i simde_x_mm_mul_epi64(simde__m128i a, simde__m128i b)
- {
- simde__m128i_private r_, a_ = simde__m128i_to_private(a),
- b_ = simde__m128i_to_private(b);
- #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
- r_.i64 = a_.i64 * b_.i64;
- #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE)
- r_.neon_f64 = vmulq_s64(a_.neon_f64, b_.neon_f64);
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0; i < (sizeof(r_.i64) / sizeof(r_.i64[0])); i++) {
- r_.i64[i] = a_.i64[i] * b_.i64[i];
- }
- #endif
- return simde__m128i_from_private(r_);
- }
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i simde_x_mm_mod_epi64(simde__m128i a, simde__m128i b)
- {
- simde__m128i_private r_, a_ = simde__m128i_to_private(a),
- b_ = simde__m128i_to_private(b);
- #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
- r_.i64 = a_.i64 % b_.i64;
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0; i < (sizeof(r_.i64) / sizeof(r_.i64[0])); i++) {
- r_.i64[i] = a_.i64[i] % b_.i64[i];
- }
- #endif
- return simde__m128i_from_private(r_);
- }
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128d simde_mm_mul_pd(simde__m128d a, simde__m128d b)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_mul_pd(a, b);
- #else
- simde__m128d_private r_, a_ = simde__m128d_to_private(a),
- b_ = simde__m128d_to_private(b);
- #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
- r_.f64 = a_.f64 * b_.f64;
- #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE)
- r_.neon_f64 = vmulq_f64(a_.neon_f64, b_.neon_f64);
- #elif defined(SIMDE_WASM_SIMD128_NATIVE)
- r_.wasm_v128 = wasm_f64x2_mul(a_.wasm_v128, b_.wasm_v128);
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0; i < (sizeof(r_.f64) / sizeof(r_.f64[0])); i++) {
- r_.f64[i] = a_.f64[i] * b_.f64[i];
- }
- #endif
- return simde__m128d_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_mul_pd(a, b) simde_mm_mul_pd(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128d simde_mm_mul_sd(simde__m128d a, simde__m128d b)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_mul_sd(a, b);
- #elif (SIMDE_NATURAL_VECTOR_SIZE > 0)
- return simde_mm_move_sd(a, simde_mm_mul_pd(a, b));
- #else
- simde__m128d_private r_, a_ = simde__m128d_to_private(a),
- b_ = simde__m128d_to_private(b);
- #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
- float64x2_t temp = vmulq_f64(a_.neon_f64, b_.neon_f64);
- r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1);
- #else
- r_.f64[0] = a_.f64[0] * b_.f64[0];
- r_.f64[1] = a_.f64[1];
- #endif
- return simde__m128d_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_mul_sd(a, b) simde_mm_mul_sd(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m64 simde_mm_mul_su32(simde__m64 a, simde__m64 b)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && \
- !defined(__PGI)
- return _mm_mul_su32(a, b);
- #else
- simde__m64_private r_, a_ = simde__m64_to_private(a),
- b_ = simde__m64_to_private(b);
- #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
- r_.u64[0] = vget_lane_u64(
- vget_low_u64(vmull_u32(vreinterpret_u32_s64(a_.neon_i64),
- vreinterpret_u32_s64(b_.neon_i64))),
- 0);
- #else
- r_.u64[0] = HEDLEY_STATIC_CAST(uint64_t, a_.u32[0]) *
- HEDLEY_STATIC_CAST(uint64_t, b_.u32[0]);
- #endif
- return simde__m64_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_mul_su32(a, b) simde_mm_mul_su32(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i simde_mm_mulhi_epi16(simde__m128i a, simde__m128i b)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_mulhi_epi16(a, b);
- #else
- simde__m128i_private r_, a_ = simde__m128i_to_private(a),
- b_ = simde__m128i_to_private(b);
- #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
- int16x4_t a3210 = vget_low_s16(a_.neon_i16);
- int16x4_t b3210 = vget_low_s16(b_.neon_i16);
- int32x4_t ab3210 = vmull_s16(a3210, b3210); /* 3333222211110000 */
- #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
- int32x4_t ab7654 = vmull_high_s16(a_.neon_i16, b_.neon_i16);
- r_.neon_i16 = vuzp2q_s16(vreinterpretq_s16_s32(ab3210),
- vreinterpretq_s16_s32(ab7654));
- #else
- int16x4_t a7654 = vget_high_s16(a_.neon_i16);
- int16x4_t b7654 = vget_high_s16(b_.neon_i16);
- int32x4_t ab7654 = vmull_s16(a7654, b7654); /* 7777666655554444 */
- uint16x8x2_t rv = vuzpq_u16(vreinterpretq_u16_s32(ab3210),
- vreinterpretq_u16_s32(ab7654));
- r_.neon_u16 = rv.val[1];
- #endif
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) {
- r_.u16[i] = HEDLEY_STATIC_CAST(
- uint16_t,
- (HEDLEY_STATIC_CAST(
- uint32_t,
- HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) *
- HEDLEY_STATIC_CAST(int32_t,
- b_.i16[i])) >>
- 16));
- }
- #endif
- return simde__m128i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_mulhi_epi16(a, b) simde_mm_mulhi_epi16(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i simde_mm_mulhi_epu16(simde__m128i a, simde__m128i b)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI)
- return _mm_mulhi_epu16(a, b);
- #else
- simde__m128i_private r_, a_ = simde__m128i_to_private(a),
- b_ = simde__m128i_to_private(b);
- #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
- uint16x4_t a3210 = vget_low_u16(a_.neon_u16);
- uint16x4_t b3210 = vget_low_u16(b_.neon_u16);
- uint32x4_t ab3210 = vmull_u16(a3210, b3210); /* 3333222211110000 */
- #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
- uint32x4_t ab7654 = vmull_high_u16(a_.neon_u16, b_.neon_u16);
- r_.neon_u16 = vuzp2q_u16(vreinterpretq_u16_u32(ab3210),
- vreinterpretq_u16_u32(ab7654));
- #else
- uint16x4_t a7654 = vget_high_u16(a_.neon_u16);
- uint16x4_t b7654 = vget_high_u16(b_.neon_u16);
- uint32x4_t ab7654 = vmull_u16(a7654, b7654); /* 7777666655554444 */
- uint16x8x2_t neon_r = vuzpq_u16(vreinterpretq_u16_u32(ab3210),
- vreinterpretq_u16_u32(ab7654));
- r_.neon_u16 = neon_r.val[1];
- #endif
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0; i < (sizeof(r_.u16) / sizeof(r_.u16[0])); i++) {
- r_.u16[i] = HEDLEY_STATIC_CAST(
- uint16_t,
- HEDLEY_STATIC_CAST(uint32_t, a_.u16[i]) *
- HEDLEY_STATIC_CAST(uint32_t,
- b_.u16[i]) >>
- 16);
- }
- #endif
- return simde__m128i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_mulhi_epu16(a, b) simde_mm_mulhi_epu16(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i simde_mm_mullo_epi16(simde__m128i a, simde__m128i b)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_mullo_epi16(a, b);
- #else
- simde__m128i_private r_, a_ = simde__m128i_to_private(a),
- b_ = simde__m128i_to_private(b);
- #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
- r_.neon_i16 = vmulq_s16(a_.neon_i16, b_.neon_i16);
- #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
- (void)a_;
- (void)b_;
- r_.altivec_i16 = vec_mul(a_.altivec_i16, b_.altivec_i16);
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) {
- r_.u16[i] = HEDLEY_STATIC_CAST(
- uint16_t,
- HEDLEY_STATIC_CAST(uint32_t, a_.u16[i]) *
- HEDLEY_STATIC_CAST(uint32_t, b_.u16[i]));
- }
- #endif
- return simde__m128i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_mullo_epi16(a, b) simde_mm_mullo_epi16(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128d simde_mm_or_pd(simde__m128d a, simde__m128d b)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_or_pd(a, b);
- #else
- simde__m128d_private r_, a_ = simde__m128d_to_private(a),
- b_ = simde__m128d_to_private(b);
- #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
- r_.i32f = a_.i32f | b_.i32f;
- #elif defined(SIMDE_WASM_SIMD128_NATIVE)
- r_.wasm_v128 = wasm_v128_or(a_.wasm_v128, b_.wasm_v128);
- #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
- r_.neon_i64 = vorrq_s64(a_.neon_i64, b_.neon_i64);
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])); i++) {
- r_.i32f[i] = a_.i32f[i] | b_.i32f[i];
- }
- #endif
- return simde__m128d_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_or_pd(a, b) simde_mm_or_pd(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i simde_mm_or_si128(simde__m128i a, simde__m128i b)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_or_si128(a, b);
- #else
- simde__m128i_private r_, a_ = simde__m128i_to_private(a),
- b_ = simde__m128i_to_private(b);
- #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
- r_.neon_i32 = vorrq_s32(a_.neon_i32, b_.neon_i32);
- #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
- r_.altivec_i32 = vec_or(a_.altivec_i32, b_.altivec_i32);
- #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
- r_.i32f = a_.i32f | b_.i32f;
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])); i++) {
- r_.i32f[i] = a_.i32f[i] | b_.i32f[i];
- }
- #endif
- return simde__m128i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_or_si128(a, b) simde_mm_or_si128(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i simde_mm_packs_epi16(simde__m128i a, simde__m128i b)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_packs_epi16(a, b);
- #else
- simde__m128i_private r_, a_ = simde__m128i_to_private(a),
- b_ = simde__m128i_to_private(b);
- #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
- r_.neon_i8 =
- vcombine_s8(vqmovn_s16(a_.neon_i16), vqmovn_s16(b_.neon_i16));
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) {
- r_.i8[i] = (a_.i16[i] > INT8_MAX)
- ? INT8_MAX
- : ((a_.i16[i] < INT8_MIN)
- ? INT8_MIN
- : HEDLEY_STATIC_CAST(int8_t,
- a_.i16[i]));
- r_.i8[i + 8] = (b_.i16[i] > INT8_MAX)
- ? INT8_MAX
- : ((b_.i16[i] < INT8_MIN)
- ? INT8_MIN
- : HEDLEY_STATIC_CAST(
- int8_t, b_.i16[i]));
- }
- #endif
- return simde__m128i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_packs_epi16(a, b) simde_mm_packs_epi16(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i simde_mm_packs_epi32(simde__m128i a, simde__m128i b)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_packs_epi32(a, b);
- #else
- simde__m128i_private r_, a_ = simde__m128i_to_private(a),
- b_ = simde__m128i_to_private(b);
- #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
- r_.neon_i16 =
- vcombine_s16(vqmovn_s32(a_.neon_i32), vqmovn_s32(b_.neon_i32));
- #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
- r_.altivec_i16 = vec_packs(a_.altivec_i32, b_.altivec_i32);
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])); i++) {
- r_.i16[i] = (a_.i32[i] > INT16_MAX)
- ? INT16_MAX
- : ((a_.i32[i] < INT16_MIN)
- ? INT16_MIN
- : HEDLEY_STATIC_CAST(int16_t,
- a_.i32[i]));
- r_.i16[i + 4] =
- (b_.i32[i] > INT16_MAX)
- ? INT16_MAX
- : ((b_.i32[i] < INT16_MIN)
- ? INT16_MIN
- : HEDLEY_STATIC_CAST(int16_t,
- b_.i32[i]));
- }
- #endif
- return simde__m128i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_packs_epi32(a, b) simde_mm_packs_epi32(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i simde_mm_packus_epi16(simde__m128i a, simde__m128i b)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_packus_epi16(a, b);
- #else
- simde__m128i_private r_, a_ = simde__m128i_to_private(a),
- b_ = simde__m128i_to_private(b);
- #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
- r_.neon_u8 =
- vcombine_u8(vqmovun_s16(a_.neon_i16), vqmovun_s16(b_.neon_i16));
- #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
- r_.altivec_u8 = vec_packsu(a_.altivec_i16, b_.altivec_i16);
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) {
- r_.u8[i] = (a_.i16[i] > UINT8_MAX)
- ? UINT8_MAX
- : ((a_.i16[i] < 0)
- ? UINT8_C(0)
- : HEDLEY_STATIC_CAST(uint8_t,
- a_.i16[i]));
- r_.u8[i + 8] =
- (b_.i16[i] > UINT8_MAX)
- ? UINT8_MAX
- : ((b_.i16[i] < 0)
- ? UINT8_C(0)
- : HEDLEY_STATIC_CAST(uint8_t,
- b_.i16[i]));
- }
- #endif
- return simde__m128i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_packus_epi16(a, b) simde_mm_packus_epi16(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- void simde_mm_pause(void)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- _mm_pause();
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_pause() (simde_mm_pause())
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i simde_mm_sad_epu8(simde__m128i a, simde__m128i b)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_sad_epu8(a, b);
- #else
- simde__m128i_private r_, a_ = simde__m128i_to_private(a),
- b_ = simde__m128i_to_private(b);
- #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
- const uint16x8_t t = vpaddlq_u8(vabdq_u8(a_.neon_u8, b_.neon_u8));
- r_.neon_u64 = vcombine_u64(vpaddl_u32(vpaddl_u16(vget_low_u16(t))),
- vpaddl_u32(vpaddl_u16(vget_high_u16(t))));
- #else
- for (size_t i = 0; i < (sizeof(r_.i64) / sizeof(r_.i64[0])); i++) {
- uint16_t tmp = 0;
- SIMDE_VECTORIZE_REDUCTION(+ : tmp)
- for (size_t j = 0; j < ((sizeof(r_.u8) / sizeof(r_.u8[0])) / 2);
- j++) {
- const size_t e = j + (i * 8);
- tmp += (a_.u8[e] > b_.u8[e]) ? (a_.u8[e] - b_.u8[e])
- : (b_.u8[e] - a_.u8[e]);
- }
- r_.i64[i] = tmp;
- }
- #endif
- return simde__m128i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_sad_epu8(a, b) simde_mm_sad_epu8(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i simde_mm_set_epi8(int8_t e15, int8_t e14, int8_t e13, int8_t e12,
- int8_t e11, int8_t e10, int8_t e9, int8_t e8,
- int8_t e7, int8_t e6, int8_t e5, int8_t e4,
- int8_t e3, int8_t e2, int8_t e1, int8_t e0)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_set_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5,
- e4, e3, e2, e1, e0);
- #else
- simde__m128i_private r_;
- #if defined(SIMDE_WASM_SIMD128_NATIVE)
- r_.wasm_v128 = wasm_i8x16_make(e0, e1, e2, e3, e4, e5, e6, e7, e8, e9,
- e10, e11, e12, e13, e14, e15);
- #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
- SIMDE_ALIGN_LIKE_16(int8x16_t)
- int8_t data[16] = {e0, e1, e2, e3, e4, e5, e6, e7,
- e8, e9, e10, e11, e12, e13, e14, e15};
- r_.neon_i8 = vld1q_s8(data);
- #else
- r_.i8[0] = e0;
- r_.i8[1] = e1;
- r_.i8[2] = e2;
- r_.i8[3] = e3;
- r_.i8[4] = e4;
- r_.i8[5] = e5;
- r_.i8[6] = e6;
- r_.i8[7] = e7;
- r_.i8[8] = e8;
- r_.i8[9] = e9;
- r_.i8[10] = e10;
- r_.i8[11] = e11;
- r_.i8[12] = e12;
- r_.i8[13] = e13;
- r_.i8[14] = e14;
- r_.i8[15] = e15;
- #endif
- return simde__m128i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_set_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, \
- e2, e1, e0) \
- simde_mm_set_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, \
- e4, e3, e2, e1, e0)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i simde_mm_set_epi16(int16_t e7, int16_t e6, int16_t e5, int16_t e4,
- int16_t e3, int16_t e2, int16_t e1, int16_t e0)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_set_epi16(e7, e6, e5, e4, e3, e2, e1, e0);
- #else
- simde__m128i_private r_;
- #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
- SIMDE_ALIGN_LIKE_16(int16x8_t)
- int16_t data[8] = {e0, e1, e2, e3, e4, e5, e6, e7};
- r_.neon_i16 = vld1q_s16(data);
- #elif defined(SIMDE_WASM_SIMD128_NATIVE)
- r_.wasm_v128 = wasm_i16x8_make(e0, e1, e2, e3, e4, e5, e6, e7);
- #else
- r_.i16[0] = e0;
- r_.i16[1] = e1;
- r_.i16[2] = e2;
- r_.i16[3] = e3;
- r_.i16[4] = e4;
- r_.i16[5] = e5;
- r_.i16[6] = e6;
- r_.i16[7] = e7;
- #endif
- return simde__m128i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_set_epi16(e7, e6, e5, e4, e3, e2, e1, e0) \
- simde_mm_set_epi16(e7, e6, e5, e4, e3, e2, e1, e0)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i simde_mm_loadu_si16(void const *mem_addr)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE) && \
- (SIMDE_DETECT_CLANG_VERSION_CHECK(8, 0, 0) || \
- HEDLEY_GCC_VERSION_CHECK(11, 0, 0) || \
- HEDLEY_INTEL_VERSION_CHECK(20, 21, 1))
- return _mm_loadu_si16(mem_addr);
- #else
- int16_t val;
- simde_memcpy(&val, mem_addr, sizeof(val));
- return simde_x_mm_cvtsi16_si128(val);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_loadu_si16(mem_addr) simde_mm_loadu_si16(mem_addr)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i simde_mm_set_epi32(int32_t e3, int32_t e2, int32_t e1, int32_t e0)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_set_epi32(e3, e2, e1, e0);
- #else
- simde__m128i_private r_;
- #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
- SIMDE_ALIGN_LIKE_16(int32x4_t) int32_t data[4] = {e0, e1, e2, e3};
- r_.neon_i32 = vld1q_s32(data);
- #elif defined(SIMDE_WASM_SIMD128_NATIVE)
- r_.wasm_v128 = wasm_i32x4_make(e0, e1, e2, e3);
- #else
- r_.i32[0] = e0;
- r_.i32[1] = e1;
- r_.i32[2] = e2;
- r_.i32[3] = e3;
- #endif
- return simde__m128i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_set_epi32(e3, e2, e1, e0) simde_mm_set_epi32(e3, e2, e1, e0)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i simde_mm_loadu_si32(void const *mem_addr)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE) && \
- (SIMDE_DETECT_CLANG_VERSION_CHECK(8, 0, 0) || \
- HEDLEY_GCC_VERSION_CHECK(11, 0, 0) || \
- HEDLEY_INTEL_VERSION_CHECK(20, 21, 1))
- return _mm_loadu_si32(mem_addr);
- #else
- int32_t val;
- simde_memcpy(&val, mem_addr, sizeof(val));
- return simde_mm_cvtsi32_si128(val);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_loadu_si32(mem_addr) simde_mm_loadu_si32(mem_addr)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i simde_mm_set_epi64(simde__m64 e1, simde__m64 e0)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)
- return _mm_set_epi64(e1, e0);
- #else
- simde__m128i_private r_;
- #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
- r_.neon_i64 = vcombine_s64(simde__m64_to_neon_i64(e0),
- simde__m64_to_neon_i64(e1));
- #else
- r_.m64[0] = e0;
- r_.m64[1] = e1;
- #endif
- return simde__m128i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_set_epi64(e1, e0) (simde_mm_set_epi64((e1), (e0)))
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i simde_mm_set_epi64x(int64_t e1, int64_t e0)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE) && \
- (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19, 0, 0))
- return _mm_set_epi64x(e1, e0);
- #else
- simde__m128i_private r_;
- #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
- SIMDE_ALIGN_LIKE_16(int64x2_t) int64_t data[2] = {e0, e1};
- r_.neon_i64 = vld1q_s64(data);
- #elif defined(SIMDE_WASM_SIMD128_NATIVE)
- r_.wasm_v128 = wasm_i64x2_make(e0, e1);
- #else
- r_.i64[0] = e0;
- r_.i64[1] = e1;
- #endif
- return simde__m128i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_set_epi64x(e1, e0) simde_mm_set_epi64x(e1, e0)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i simde_mm_loadu_si64(void const *mem_addr)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE) && \
- (SIMDE_DETECT_CLANG_VERSION_CHECK(8, 0, 0) || \
- HEDLEY_GCC_VERSION_CHECK(11, 0, 0) || \
- HEDLEY_INTEL_VERSION_CHECK(20, 21, 1))
- return _mm_loadu_si64(mem_addr);
- #else
- int64_t val;
- simde_memcpy(&val, mem_addr, sizeof(val));
- return simde_mm_cvtsi64_si128(val);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_loadu_si64(mem_addr) simde_mm_loadu_si64(mem_addr)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i simde_x_mm_set_epu8(uint8_t e15, uint8_t e14, uint8_t e13,
- uint8_t e12, uint8_t e11, uint8_t e10,
- uint8_t e9, uint8_t e8, uint8_t e7, uint8_t e6,
- uint8_t e5, uint8_t e4, uint8_t e3, uint8_t e2,
- uint8_t e1, uint8_t e0)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_set_epi8(
- HEDLEY_STATIC_CAST(char, e15), HEDLEY_STATIC_CAST(char, e14),
- HEDLEY_STATIC_CAST(char, e13), HEDLEY_STATIC_CAST(char, e12),
- HEDLEY_STATIC_CAST(char, e11), HEDLEY_STATIC_CAST(char, e10),
- HEDLEY_STATIC_CAST(char, e9), HEDLEY_STATIC_CAST(char, e8),
- HEDLEY_STATIC_CAST(char, e7), HEDLEY_STATIC_CAST(char, e6),
- HEDLEY_STATIC_CAST(char, e5), HEDLEY_STATIC_CAST(char, e4),
- HEDLEY_STATIC_CAST(char, e3), HEDLEY_STATIC_CAST(char, e2),
- HEDLEY_STATIC_CAST(char, e1), HEDLEY_STATIC_CAST(char, e0));
- #else
- simde__m128i_private r_;
- #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
- SIMDE_ALIGN_LIKE_16(uint8x16_t)
- uint8_t data[16] = {e0, e1, e2, e3, e4, e5, e6, e7,
- e8, e9, e10, e11, e12, e13, e14, e15};
- r_.neon_u8 = vld1q_u8(data);
- #else
- r_.u8[0] = e0;
- r_.u8[1] = e1;
- r_.u8[2] = e2;
- r_.u8[3] = e3;
- r_.u8[4] = e4;
- r_.u8[5] = e5;
- r_.u8[6] = e6;
- r_.u8[7] = e7;
- r_.u8[8] = e8;
- r_.u8[9] = e9;
- r_.u8[10] = e10;
- r_.u8[11] = e11;
- r_.u8[12] = e12;
- r_.u8[13] = e13;
- r_.u8[14] = e14;
- r_.u8[15] = e15;
- #endif
- return simde__m128i_from_private(r_);
- #endif
- }
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i simde_x_mm_set_epu16(uint16_t e7, uint16_t e6, uint16_t e5,
- uint16_t e4, uint16_t e3, uint16_t e2,
- uint16_t e1, uint16_t e0)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_set_epi16(
- HEDLEY_STATIC_CAST(short, e7), HEDLEY_STATIC_CAST(short, e6),
- HEDLEY_STATIC_CAST(short, e5), HEDLEY_STATIC_CAST(short, e4),
- HEDLEY_STATIC_CAST(short, e3), HEDLEY_STATIC_CAST(short, e2),
- HEDLEY_STATIC_CAST(short, e1), HEDLEY_STATIC_CAST(short, e0));
- #else
- simde__m128i_private r_;
- #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
- SIMDE_ALIGN_LIKE_16(uint16x8_t)
- uint16_t data[8] = {e0, e1, e2, e3, e4, e5, e6, e7};
- r_.neon_u16 = vld1q_u16(data);
- #else
- r_.u16[0] = e0;
- r_.u16[1] = e1;
- r_.u16[2] = e2;
- r_.u16[3] = e3;
- r_.u16[4] = e4;
- r_.u16[5] = e5;
- r_.u16[6] = e6;
- r_.u16[7] = e7;
- #endif
- return simde__m128i_from_private(r_);
- #endif
- }
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i simde_x_mm_set_epu32(uint32_t e3, uint32_t e2, uint32_t e1,
- uint32_t e0)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_set_epi32(HEDLEY_STATIC_CAST(int, e3),
- HEDLEY_STATIC_CAST(int, e2),
- HEDLEY_STATIC_CAST(int, e1),
- HEDLEY_STATIC_CAST(int, e0));
- #else
- simde__m128i_private r_;
- #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
- SIMDE_ALIGN_LIKE_16(uint32x4_t) uint32_t data[4] = {e0, e1, e2, e3};
- r_.neon_u32 = vld1q_u32(data);
- #else
- r_.u32[0] = e0;
- r_.u32[1] = e1;
- r_.u32[2] = e2;
- r_.u32[3] = e3;
- #endif
- return simde__m128i_from_private(r_);
- #endif
- }
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i simde_x_mm_set_epu64x(uint64_t e1, uint64_t e0)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE) && \
- (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19, 0, 0))
- return _mm_set_epi64x(HEDLEY_STATIC_CAST(int64_t, e1),
- HEDLEY_STATIC_CAST(int64_t, e0));
- #else
- simde__m128i_private r_;
- #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
- SIMDE_ALIGN_LIKE_16(uint64x2_t) uint64_t data[2] = {e0, e1};
- r_.neon_u64 = vld1q_u64(data);
- #else
- r_.u64[0] = e0;
- r_.u64[1] = e1;
- #endif
- return simde__m128i_from_private(r_);
- #endif
- }
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128d simde_mm_set_sd(simde_float64 a)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_set_sd(a);
- #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE)
- return vsetq_lane_f64(a, vdupq_n_f64(SIMDE_FLOAT64_C(0.0)), 0);
- #else
- return simde_mm_set_pd(SIMDE_FLOAT64_C(0.0), a);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_set_sd(a) simde_mm_set_sd(a)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i simde_mm_set1_epi8(int8_t a)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_set1_epi8(a);
- #else
- simde__m128i_private r_;
- #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
- r_.neon_i8 = vdupq_n_s8(a);
- #elif defined(SIMDE_WASM_SIMD128_NATIVE)
- r_.wasm_v128 = wasm_i8x16_splat(a);
- #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE)
- r_.altivec_i8 = vec_splats(HEDLEY_STATIC_CAST(signed char, a));
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0; i < (sizeof(r_.i8) / sizeof(r_.i8[0])); i++) {
- r_.i8[i] = a;
- }
- #endif
- return simde__m128i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_set1_epi8(a) simde_mm_set1_epi8(a)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i simde_mm_set1_epi16(int16_t a)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_set1_epi16(a);
- #else
- simde__m128i_private r_;
- #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
- r_.neon_i16 = vdupq_n_s16(a);
- #elif defined(SIMDE_WASM_SIMD128_NATIVE)
- r_.wasm_v128 = wasm_i16x8_splat(a);
- #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE)
- r_.altivec_i16 = vec_splats(HEDLEY_STATIC_CAST(signed short, a));
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) {
- r_.i16[i] = a;
- }
- #endif
- return simde__m128i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_set1_epi16(a) simde_mm_set1_epi16(a)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i simde_mm_set1_epi32(int32_t a)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_set1_epi32(a);
- #else
- simde__m128i_private r_;
- #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
- r_.neon_i32 = vdupq_n_s32(a);
- #elif defined(SIMDE_WASM_SIMD128_NATIVE)
- r_.wasm_v128 = wasm_i32x4_splat(a);
- #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE)
- r_.altivec_i32 = vec_splats(HEDLEY_STATIC_CAST(signed int, a));
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])); i++) {
- r_.i32[i] = a;
- }
- #endif
- return simde__m128i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_set1_epi32(a) simde_mm_set1_epi32(a)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i simde_mm_set1_epi64x(int64_t a)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE) && \
- (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19, 0, 0))
- return _mm_set1_epi64x(a);
- #else
- simde__m128i_private r_;
- #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
- r_.neon_i64 = vdupq_n_s64(a);
- #elif defined(SIMDE_WASM_SIMD128_NATIVE)
- r_.wasm_v128 = wasm_i64x2_splat(a);
- #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE)
- r_.altivec_i64 = vec_splats(HEDLEY_STATIC_CAST(signed long long, a));
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0; i < (sizeof(r_.i64) / sizeof(r_.i64[0])); i++) {
- r_.i64[i] = a;
- }
- #endif
- return simde__m128i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_set1_epi64x(a) simde_mm_set1_epi64x(a)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i simde_mm_set1_epi64(simde__m64 a)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)
- return _mm_set1_epi64(a);
- #else
- simde__m64_private a_ = simde__m64_to_private(a);
- return simde_mm_set1_epi64x(a_.i64[0]);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_set1_epi64(a) simde_mm_set1_epi64(a)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i simde_x_mm_set1_epu8(uint8_t value)
- {
- #if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE)
- return simde__m128i_from_altivec_u8(
- vec_splats(HEDLEY_STATIC_CAST(unsigned char, value)));
- #else
- return simde_mm_set1_epi8(HEDLEY_STATIC_CAST(int8_t, value));
- #endif
- }
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i simde_x_mm_set1_epu16(uint16_t value)
- {
- #if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE)
- return simde__m128i_from_altivec_u16(
- vec_splats(HEDLEY_STATIC_CAST(unsigned short, value)));
- #else
- return simde_mm_set1_epi16(HEDLEY_STATIC_CAST(int16_t, value));
- #endif
- }
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i simde_x_mm_set1_epu32(uint32_t value)
- {
- #if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE)
- return simde__m128i_from_altivec_u32(
- vec_splats(HEDLEY_STATIC_CAST(unsigned int, value)));
- #else
- return simde_mm_set1_epi32(HEDLEY_STATIC_CAST(int32_t, value));
- #endif
- }
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i simde_x_mm_set1_epu64(uint64_t value)
- {
- #if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE)
- return simde__m128i_from_altivec_u64(
- vec_splats(HEDLEY_STATIC_CAST(unsigned long long, value)));
- #else
- return simde_mm_set1_epi64x(HEDLEY_STATIC_CAST(int64_t, value));
- #endif
- }
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i simde_mm_setr_epi8(int8_t e15, int8_t e14, int8_t e13, int8_t e12,
- int8_t e11, int8_t e10, int8_t e9, int8_t e8,
- int8_t e7, int8_t e6, int8_t e5, int8_t e4,
- int8_t e3, int8_t e2, int8_t e1, int8_t e0)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_setr_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5,
- e4, e3, e2, e1, e0);
- #else
- return simde_mm_set_epi8(e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10,
- e11, e12, e13, e14, e15);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_setr_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, \
- e3, e2, e1, e0) \
- simde_mm_setr_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, \
- e4, e3, e2, e1, e0)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i simde_mm_setr_epi16(int16_t e7, int16_t e6, int16_t e5, int16_t e4,
- int16_t e3, int16_t e2, int16_t e1, int16_t e0)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_setr_epi16(e7, e6, e5, e4, e3, e2, e1, e0);
- #else
- return simde_mm_set_epi16(e0, e1, e2, e3, e4, e5, e6, e7);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_setr_epi16(e7, e6, e5, e4, e3, e2, e1, e0) \
- simde_mm_setr_epi16(e7, e6, e5, e4, e3, e2, e1, e0)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i simde_mm_setr_epi32(int32_t e3, int32_t e2, int32_t e1, int32_t e0)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_setr_epi32(e3, e2, e1, e0);
- #else
- return simde_mm_set_epi32(e0, e1, e2, e3);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_setr_epi32(e3, e2, e1, e0) simde_mm_setr_epi32(e3, e2, e1, e0)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i simde_mm_setr_epi64(simde__m64 e1, simde__m64 e0)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)
- return _mm_setr_epi64(e1, e0);
- #else
- return simde_mm_set_epi64(e0, e1);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_setr_epi64(e1, e0) (simde_mm_setr_epi64((e1), (e0)))
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128d simde_mm_setr_pd(simde_float64 e1, simde_float64 e0)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_setr_pd(e1, e0);
- #else
- return simde_mm_set_pd(e0, e1);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_setr_pd(e1, e0) simde_mm_setr_pd(e1, e0)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128d simde_mm_setzero_pd(void)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_setzero_pd();
- #else
- return simde_mm_castsi128_pd(simde_mm_setzero_si128());
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_setzero_pd() simde_mm_setzero_pd()
- #endif
- #if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_)
- HEDLEY_DIAGNOSTIC_PUSH
- SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128d simde_mm_undefined_pd(void)
- {
- simde__m128d_private r_;
- #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE__HAVE_UNDEFINED128)
- r_.n = _mm_undefined_pd();
- #elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_)
- r_ = simde__m128d_to_private(simde_mm_setzero_pd());
- #endif
- return simde__m128d_from_private(r_);
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_undefined_pd() simde_mm_undefined_pd()
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i simde_mm_undefined_si128(void)
- {
- simde__m128i_private r_;
- #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE__HAVE_UNDEFINED128)
- r_.n = _mm_undefined_si128();
- #elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_)
- r_ = simde__m128i_to_private(simde_mm_setzero_si128());
- #endif
- return simde__m128i_from_private(r_);
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_undefined_si128() (simde_mm_undefined_si128())
- #endif
- #if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_)
- HEDLEY_DIAGNOSTIC_POP
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128d simde_x_mm_setone_pd(void)
- {
- return simde_mm_castps_pd(simde_x_mm_setone_ps());
- }
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i simde_x_mm_setone_si128(void)
- {
- return simde_mm_castps_si128(simde_x_mm_setone_ps());
- }
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i simde_mm_shuffle_epi32(simde__m128i a, const int imm8)
- SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255)
- {
- simde__m128i_private r_, a_ = simde__m128i_to_private(a);
- for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])); i++) {
- r_.i32[i] = a_.i32[(imm8 >> (i * 2)) & 3];
- }
- return simde__m128i_from_private(r_);
- }
- #if defined(SIMDE_X86_SSE2_NATIVE)
- #define simde_mm_shuffle_epi32(a, imm8) _mm_shuffle_epi32((a), (imm8))
- #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
- #define simde_mm_shuffle_epi32(a, imm8) \
- __extension__({ \
- int32x4_t ret; \
- ret = vmovq_n_s32(vgetq_lane_s32(vreinterpretq_s32_s64(a), \
- (imm8) & (0x3))); \
- ret = vsetq_lane_s32(vgetq_lane_s32(vreinterpretq_s32_s64(a), \
- ((imm8) >> 2) & 0x3), \
- ret, 1); \
- ret = vsetq_lane_s32(vgetq_lane_s32(vreinterpretq_s32_s64(a), \
- ((imm8) >> 4) & 0x3), \
- ret, 2); \
- ret = vsetq_lane_s32(vgetq_lane_s32(vreinterpretq_s32_s64(a), \
- ((imm8) >> 6) & 0x3), \
- ret, 3); \
- vreinterpretq_s64_s32(ret); \
- })
- #elif defined(SIMDE_SHUFFLE_VECTOR_)
- #define simde_mm_shuffle_epi32(a, imm8) \
- (__extension__({ \
- const simde__m128i_private simde__tmp_a_ = \
- simde__m128i_to_private(a); \
- simde__m128i_from_private((simde__m128i_private){ \
- .i32 = SIMDE_SHUFFLE_VECTOR_( \
- 32, 16, (simde__tmp_a_).i32, \
- (simde__tmp_a_).i32, ((imm8)) & 3, \
- ((imm8) >> 2) & 3, ((imm8) >> 4) & 3, \
- ((imm8) >> 6) & 3)}); \
- }))
- #endif
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_shuffle_epi32(a, imm8) simde_mm_shuffle_epi32(a, imm8)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128d simde_mm_shuffle_pd(simde__m128d a, simde__m128d b, const int imm8)
- SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3)
- {
- simde__m128d_private r_, a_ = simde__m128d_to_private(a),
- b_ = simde__m128d_to_private(b);
- r_.f64[0] = ((imm8 & 1) == 0) ? a_.f64[0] : a_.f64[1];
- r_.f64[1] = ((imm8 & 2) == 0) ? b_.f64[0] : b_.f64[1];
- return simde__m128d_from_private(r_);
- }
- #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI)
- #define simde_mm_shuffle_pd(a, b, imm8) _mm_shuffle_pd((a), (b), (imm8))
- #elif defined(SIMDE_SHUFFLE_VECTOR_)
- #define simde_mm_shuffle_pd(a, b, imm8) \
- (__extension__({ \
- simde__m128d_from_private((simde__m128d_private){ \
- .f64 = SIMDE_SHUFFLE_VECTOR_( \
- 64, 16, simde__m128d_to_private(a).f64, \
- simde__m128d_to_private(b).f64, \
- (((imm8)) & 1), (((imm8) >> 1) & 1) + 2)}); \
- }))
- #endif
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_shuffle_pd(a, b, imm8) simde_mm_shuffle_pd(a, b, imm8)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i simde_mm_shufflehi_epi16(simde__m128i a, const int imm8)
- SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255)
- {
- simde__m128i_private r_, a_ = simde__m128i_to_private(a);
- SIMDE_VECTORIZE
- for (size_t i = 0; i < ((sizeof(a_.i16) / sizeof(a_.i16[0])) / 2);
- i++) {
- r_.i16[i] = a_.i16[i];
- }
- for (size_t i = ((sizeof(a_.i16) / sizeof(a_.i16[0])) / 2);
- i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) {
- r_.i16[i] = a_.i16[((imm8 >> ((i - 4) * 2)) & 3) + 4];
- }
- return simde__m128i_from_private(r_);
- }
- #if defined(SIMDE_X86_SSE2_NATIVE)
- #define simde_mm_shufflehi_epi16(a, imm8) _mm_shufflehi_epi16((a), (imm8))
- #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
- #define simde_mm_shufflehi_epi16(a, imm8) \
- __extension__({ \
- int16x8_t ret = vreinterpretq_s16_s64(a); \
- int16x4_t highBits = vget_high_s16(ret); \
- ret = vsetq_lane_s16(vget_lane_s16(highBits, (imm8) & (0x3)), \
- ret, 4); \
- ret = vsetq_lane_s16( \
- vget_lane_s16(highBits, ((imm8) >> 2) & 0x3), ret, 5); \
- ret = vsetq_lane_s16( \
- vget_lane_s16(highBits, ((imm8) >> 4) & 0x3), ret, 6); \
- ret = vsetq_lane_s16( \
- vget_lane_s16(highBits, ((imm8) >> 6) & 0x3), ret, 7); \
- vreinterpretq_s64_s16(ret); \
- })
- #elif defined(SIMDE_SHUFFLE_VECTOR_)
- #define simde_mm_shufflehi_epi16(a, imm8) \
- (__extension__({ \
- const simde__m128i_private simde__tmp_a_ = \
- simde__m128i_to_private(a); \
- simde__m128i_from_private((simde__m128i_private){ \
- .i16 = SIMDE_SHUFFLE_VECTOR_( \
- 16, 16, (simde__tmp_a_).i16, \
- (simde__tmp_a_).i16, 0, 1, 2, 3, \
- (((imm8)) & 3) + 4, (((imm8) >> 2) & 3) + 4, \
- (((imm8) >> 4) & 3) + 4, \
- (((imm8) >> 6) & 3) + 4)}); \
- }))
- #endif
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_shufflehi_epi16(a, imm8) simde_mm_shufflehi_epi16(a, imm8)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i simde_mm_shufflelo_epi16(simde__m128i a, const int imm8)
- SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255)
- {
- simde__m128i_private r_, a_ = simde__m128i_to_private(a);
- for (size_t i = 0; i < ((sizeof(r_.i16) / sizeof(r_.i16[0])) / 2);
- i++) {
- r_.i16[i] = a_.i16[((imm8 >> (i * 2)) & 3)];
- }
- SIMDE_VECTORIZE
- for (size_t i = ((sizeof(a_.i16) / sizeof(a_.i16[0])) / 2);
- i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) {
- r_.i16[i] = a_.i16[i];
- }
- return simde__m128i_from_private(r_);
- }
- #if defined(SIMDE_X86_SSE2_NATIVE)
- #define simde_mm_shufflelo_epi16(a, imm8) _mm_shufflelo_epi16((a), (imm8))
- #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
- #define simde_mm_shufflelo_epi16(a, imm8) \
- __extension__({ \
- int16x8_t ret = vreinterpretq_s16_s64(a); \
- int16x4_t lowBits = vget_low_s16(ret); \
- ret = vsetq_lane_s16(vget_lane_s16(lowBits, (imm8) & (0x3)), \
- ret, 0); \
- ret = vsetq_lane_s16( \
- vget_lane_s16(lowBits, ((imm8) >> 2) & 0x3), ret, 1); \
- ret = vsetq_lane_s16( \
- vget_lane_s16(lowBits, ((imm8) >> 4) & 0x3), ret, 2); \
- ret = vsetq_lane_s16( \
- vget_lane_s16(lowBits, ((imm8) >> 6) & 0x3), ret, 3); \
- vreinterpretq_s64_s16(ret); \
- })
- #elif defined(SIMDE_SHUFFLE_VECTOR_)
- #define simde_mm_shufflelo_epi16(a, imm8) \
- (__extension__({ \
- const simde__m128i_private simde__tmp_a_ = \
- simde__m128i_to_private(a); \
- simde__m128i_from_private((simde__m128i_private){ \
- .i16 = SIMDE_SHUFFLE_VECTOR_( \
- 16, 16, (simde__tmp_a_).i16, \
- (simde__tmp_a_).i16, (((imm8)) & 3), \
- (((imm8) >> 2) & 3), (((imm8) >> 4) & 3), \
- (((imm8) >> 6) & 3), 4, 5, 6, 7)}); \
- }))
- #endif
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_shufflelo_epi16(a, imm8) simde_mm_shufflelo_epi16(a, imm8)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i simde_mm_sll_epi16(simde__m128i a, simde__m128i count)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_sll_epi16(a, count);
- #else
- simde__m128i_private r_, a_ = simde__m128i_to_private(a),
- count_ = simde__m128i_to_private(count);
- if (count_.u64[0] > 15)
- return simde_mm_setzero_si128();
- #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
- r_.u16 = (a_.u16 << count_.u64[0]);
- #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
- r_.neon_u16 = vshlq_u16(a_.neon_u16, vdupq_n_s16(HEDLEY_STATIC_CAST(
- int16_t, count_.u64[0])));
- #elif defined(SIMDE_WASM_SIMD128_NATIVE)
- r_.wasm_v128 =
- ((wasm_i64x2_extract_lane(count_.wasm_v128, 0) < 16)
- ? wasm_i16x8_shl(a_.wasm_v128,
- HEDLEY_STATIC_CAST(
- int32_t,
- wasm_i64x2_extract_lane(
- count_.wasm_v128, 0)))
- : wasm_i16x8_const(0, 0, 0, 0, 0, 0, 0, 0));
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0; i < (sizeof(r_.u16) / sizeof(r_.u16[0])); i++) {
- r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t,
- (a_.u16[i] << count_.u64[0]));
- }
- #endif
- return simde__m128i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_sll_epi16(a, count) simde_mm_sll_epi16((a), (count))
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i simde_mm_sll_epi32(simde__m128i a, simde__m128i count)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_sll_epi32(a, count);
- #else
- simde__m128i_private r_, a_ = simde__m128i_to_private(a),
- count_ = simde__m128i_to_private(count);
- if (count_.u64[0] > 31)
- return simde_mm_setzero_si128();
- #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
- r_.u32 = (a_.u32 << count_.u64[0]);
- #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
- r_.neon_u32 = vshlq_u32(a_.neon_u32, vdupq_n_s32(HEDLEY_STATIC_CAST(
- int32_t, count_.u64[0])));
- #elif defined(SIMDE_WASM_SIMD128_NATIVE)
- r_.wasm_v128 =
- ((wasm_i64x2_extract_lane(count_.wasm_v128, 0) < 32)
- ? wasm_i32x4_shl(a_.wasm_v128,
- HEDLEY_STATIC_CAST(
- int32_t,
- wasm_i64x2_extract_lane(
- count_.wasm_v128, 0)))
- : wasm_i32x4_const(0, 0, 0, 0));
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0; i < (sizeof(r_.u32) / sizeof(r_.u32[0])); i++) {
- r_.u32[i] = HEDLEY_STATIC_CAST(uint32_t,
- (a_.u32[i] << count_.u64[0]));
- }
- #endif
- return simde__m128i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_sll_epi32(a, count) (simde_mm_sll_epi32(a, (count)))
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i simde_mm_sll_epi64(simde__m128i a, simde__m128i count)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_sll_epi64(a, count);
- #else
- simde__m128i_private r_, a_ = simde__m128i_to_private(a),
- count_ = simde__m128i_to_private(count);
- if (count_.u64[0] > 63)
- return simde_mm_setzero_si128();
- const int_fast16_t s = HEDLEY_STATIC_CAST(int_fast16_t, count_.u64[0]);
- #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
- r_.neon_u64 = vshlq_u64(a_.neon_u64,
- vdupq_n_s64(HEDLEY_STATIC_CAST(int64_t, s)));
- #elif defined(SIMDE_WASM_SIMD128_NATIVE)
- r_.wasm_v128 = (s < 64) ? wasm_i64x2_shl(a_.wasm_v128, s)
- : wasm_i64x2_const(0, 0);
- #else
- #if !defined(SIMDE_BUG_GCC_94488)
- SIMDE_VECTORIZE
- #endif
- for (size_t i = 0; i < (sizeof(r_.u64) / sizeof(r_.u64[0])); i++) {
- r_.u64[i] = a_.u64[i] << s;
- }
- #endif
- return simde__m128i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_sll_epi64(a, count) (simde_mm_sll_epi64(a, (count)))
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128d simde_mm_sqrt_pd(simde__m128d a)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_sqrt_pd(a);
- #else
- simde__m128d_private r_, a_ = simde__m128d_to_private(a);
- #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
- r_.neon_f64 = vsqrtq_f64(a_.neon_f64);
- #elif defined(SIMDE_WASM_SIMD128_NATIVE)
- r_.wasm_v128 = wasm_f64x2_sqrt(a_.wasm_v128);
- #elif defined(simde_math_sqrt)
- SIMDE_VECTORIZE
- for (size_t i = 0; i < (sizeof(r_.f64) / sizeof(r_.f64[0])); i++) {
- r_.f64[i] = simde_math_sqrt(a_.f64[i]);
- }
- #else
- HEDLEY_UNREACHABLE();
- #endif
- return simde__m128d_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_sqrt_pd(a) simde_mm_sqrt_pd(a)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128d simde_mm_sqrt_sd(simde__m128d a, simde__m128d b)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_sqrt_sd(a, b);
- #elif (SIMDE_NATURAL_VECTOR_SIZE > 0)
- return simde_mm_move_sd(a, simde_mm_sqrt_pd(b));
- #else
- simde__m128d_private r_, a_ = simde__m128d_to_private(a),
- b_ = simde__m128d_to_private(b);
- #if defined(simde_math_sqrt)
- r_.f64[0] = simde_math_sqrt(b_.f64[0]);
- r_.f64[1] = a_.f64[1];
- #else
- HEDLEY_UNREACHABLE();
- #endif
- return simde__m128d_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_sqrt_sd(a, b) simde_mm_sqrt_sd(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i simde_mm_srl_epi16(simde__m128i a, simde__m128i count)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_srl_epi16(a, count);
- #else
- simde__m128i_private r_, a_ = simde__m128i_to_private(a),
- count_ = simde__m128i_to_private(count);
- const int cnt = HEDLEY_STATIC_CAST(
- int, (count_.i64[0] > 16 ? 16 : count_.i64[0]));
- #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
- r_.neon_u16 = vshlq_u16(a_.neon_u16,
- vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, -cnt)));
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0; i < (sizeof(r_.u16) / sizeof(r_.u16[0])); i++) {
- r_.u16[i] = a_.u16[i] >> cnt;
- }
- #endif
- return simde__m128i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_srl_epi16(a, count) (simde_mm_srl_epi16(a, (count)))
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i simde_mm_srl_epi32(simde__m128i a, simde__m128i count)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_srl_epi32(a, count);
- #else
- simde__m128i_private r_, a_ = simde__m128i_to_private(a),
- count_ = simde__m128i_to_private(count);
- const int cnt = HEDLEY_STATIC_CAST(
- int, (count_.i64[0] > 32 ? 32 : count_.i64[0]));
- #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
- r_.neon_u32 = vshlq_u32(a_.neon_u32,
- vdupq_n_s32(HEDLEY_STATIC_CAST(int32_t, -cnt)));
- #elif defined(SIMDE_WASM_SIMD128_NATIVE)
- r_.wasm_v128 = wasm_u32x4_shr(a_.wasm_v128, cnt);
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0; i < (sizeof(r_.u32) / sizeof(r_.u32[0])); i++) {
- r_.u32[i] = a_.u32[i] >> cnt;
- }
- #endif
- return simde__m128i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_srl_epi32(a, count) (simde_mm_srl_epi32(a, (count)))
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i simde_mm_srl_epi64(simde__m128i a, simde__m128i count)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_srl_epi64(a, count);
- #else
- simde__m128i_private r_, a_ = simde__m128i_to_private(a),
- count_ = simde__m128i_to_private(count);
- const int cnt = HEDLEY_STATIC_CAST(
- int, (count_.i64[0] > 64 ? 64 : count_.i64[0]));
- #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
- r_.neon_u64 = vshlq_u64(a_.neon_u64,
- vdupq_n_s64(HEDLEY_STATIC_CAST(int64_t, -cnt)));
- #elif defined(SIMDE_WASM_SIMD128_NATIVE)
- r_.wasm_v128 = wasm_u64x2_shr(a_.wasm_v128, cnt);
- #else
- #if !defined(SIMDE_BUG_GCC_94488)
- SIMDE_VECTORIZE
- #endif
- for (size_t i = 0; i < (sizeof(r_.u64) / sizeof(r_.u64[0])); i++) {
- r_.u64[i] = a_.u64[i] >> cnt;
- }
- #endif
- return simde__m128i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_srl_epi64(a, count) (simde_mm_srl_epi64(a, (count)))
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i simde_mm_srai_epi16(simde__m128i a, const int imm8)
- SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255)
- {
- /* MSVC requires a range of (0, 255). */
- simde__m128i_private r_, a_ = simde__m128i_to_private(a);
- const int cnt = (imm8 & ~15) ? 15 : imm8;
- #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
- r_.neon_i16 = vshlq_s16(a_.neon_i16,
- vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, -cnt)));
- #elif defined(SIMDE_WASM_SIMD128_NATIVE)
- r_.wasm_v128 = wasm_i16x8_shr(a_.wasm_v128, cnt);
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0; i < (sizeof(r_) / sizeof(r_.i16[0])); i++) {
- r_.i16[i] = a_.i16[i] >> cnt;
- }
- #endif
- return simde__m128i_from_private(r_);
- }
- #if defined(SIMDE_X86_SSE2_NATIVE)
- #define simde_mm_srai_epi16(a, imm8) _mm_srai_epi16((a), (imm8))
- #endif
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_srai_epi16(a, imm8) simde_mm_srai_epi16(a, imm8)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i simde_mm_srai_epi32(simde__m128i a, const int imm8)
- SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255)
- {
- /* MSVC requires a range of (0, 255). */
- simde__m128i_private r_, a_ = simde__m128i_to_private(a);
- const int cnt = (imm8 & ~31) ? 31 : imm8;
- #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
- r_.neon_i32 = vshlq_s32(a_.neon_i32, vdupq_n_s32(-cnt));
- #elif defined(SIMDE_WASM_SIMD128_NATIVE)
- r_.wasm_v128 = wasm_i32x4_shr(a_.wasm_v128, cnt);
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0; i < (sizeof(r_) / sizeof(r_.i32[0])); i++) {
- r_.i32[i] = a_.i32[i] >> cnt;
- }
- #endif
- return simde__m128i_from_private(r_);
- }
- #if defined(SIMDE_X86_SSE2_NATIVE)
- #define simde_mm_srai_epi32(a, imm8) _mm_srai_epi32((a), (imm8))
- #endif
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_srai_epi32(a, imm8) simde_mm_srai_epi32(a, imm8)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i simde_mm_sra_epi16(simde__m128i a, simde__m128i count)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_sra_epi16(a, count);
- #else
- simde__m128i_private r_, a_ = simde__m128i_to_private(a),
- count_ = simde__m128i_to_private(count);
- const int cnt = HEDLEY_STATIC_CAST(
- int, (count_.i64[0] > 15 ? 15 : count_.i64[0]));
- #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
- r_.neon_i16 = vshlq_s16(a_.neon_i16,
- vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, -cnt)));
- #elif defined(SIMDE_WASM_SIMD128_NATIVE)
- r_.wasm_v128 = wasm_i16x8_shr(a_.wasm_v128, cnt);
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) {
- r_.i16[i] = a_.i16[i] >> cnt;
- }
- #endif
- return simde__m128i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_sra_epi16(a, count) (simde_mm_sra_epi16(a, count))
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i simde_mm_sra_epi32(simde__m128i a, simde__m128i count)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_MM_SRA_EPI32)
- return _mm_sra_epi32(a, count);
- #else
- simde__m128i_private r_, a_ = simde__m128i_to_private(a),
- count_ = simde__m128i_to_private(count);
- const int cnt = count_.u64[0] > 31
- ? 31
- : HEDLEY_STATIC_CAST(int, count_.u64[0]);
- #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
- r_.neon_i32 = vshlq_s32(a_.neon_i32,
- vdupq_n_s32(HEDLEY_STATIC_CAST(int32_t, -cnt)));
- #elif defined(SIMDE_WASM_SIMD128_NATIVE)
- r_.wasm_v128 = wasm_i32x4_shr(a_.wasm_v128, cnt);
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])); i++) {
- r_.i32[i] = a_.i32[i] >> cnt;
- }
- #endif
- return simde__m128i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_sra_epi32(a, count) (simde_mm_sra_epi32(a, (count)))
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i simde_mm_slli_epi16(simde__m128i a, const int imm8)
- SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255)
- {
- if (HEDLEY_UNLIKELY((imm8 > 15))) {
- return simde_mm_setzero_si128();
- }
- simde__m128i_private r_, a_ = simde__m128i_to_private(a);
- #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
- r_.i16 = a_.i16 << (imm8 & 0xff);
- #else
- const int s =
- (imm8 >
- HEDLEY_STATIC_CAST(int, sizeof(r_.i16[0]) * CHAR_BIT) - 1)
- ? 0
- : imm8;
- SIMDE_VECTORIZE
- for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) {
- r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i16[i] << s);
- }
- #endif
- return simde__m128i_from_private(r_);
- }
- #if defined(SIMDE_X86_SSE2_NATIVE)
- #define simde_mm_slli_epi16(a, imm8) _mm_slli_epi16(a, imm8)
- #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
- #define simde_mm_slli_epi16(a, imm8) \
- (__extension__({ \
- simde__m128i ret; \
- if ((imm8) <= 0) { \
- ret = a; \
- } else if ((imm8) > 15) { \
- ret = simde_mm_setzero_si128(); \
- } else { \
- ret = simde__m128i_from_neon_i16(vshlq_n_s16( \
- simde__m128i_to_neon_i16(a), ((imm8)&15))); \
- } \
- ret; \
- }))
- #elif defined(SIMDE_WASM_SIMD128_NATIVE)
- #define simde_mm_slli_epi16(a, imm8) \
- ((imm8 < 16) \
- ? wasm_i16x8_shl(simde__m128i_to_private(a).wasm_v128, imm8) \
- : wasm_i16x8_const(0, 0, 0, 0, 0, 0, 0, 0))
- #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE)
- #define simde_mm_slli_epi16(a, imm8) \
- ((imm8 & ~15) ? simde_mm_setzero_si128() \
- : simde__m128i_from_altivec_i16( \
- vec_sl(simde__m128i_to_altivec_i16(a), \
- vec_splat_u16(HEDLEY_STATIC_CAST( \
- unsigned short, imm8)))))
- #endif
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_slli_epi16(a, imm8) simde_mm_slli_epi16(a, imm8)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i simde_mm_slli_epi32(simde__m128i a, const int imm8)
- SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255)
- {
- if (HEDLEY_UNLIKELY((imm8 > 31))) {
- return simde_mm_setzero_si128();
- }
- simde__m128i_private r_, a_ = simde__m128i_to_private(a);
- #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
- r_.i32 = a_.i32 << imm8;
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])); i++) {
- r_.i32[i] = a_.i32[i] << (imm8 & 0xff);
- }
- #endif
- return simde__m128i_from_private(r_);
- }
- #if defined(SIMDE_X86_SSE2_NATIVE)
- #define simde_mm_slli_epi32(a, imm8) _mm_slli_epi32(a, imm8)
- #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
- #define simde_mm_slli_epi32(a, imm8) \
- (__extension__({ \
- simde__m128i ret; \
- if ((imm8) <= 0) { \
- ret = a; \
- } else if ((imm8) > 31) { \
- ret = simde_mm_setzero_si128(); \
- } else { \
- ret = simde__m128i_from_neon_i32(vshlq_n_s32( \
- simde__m128i_to_neon_i32(a), ((imm8)&31))); \
- } \
- ret; \
- }))
- #elif defined(SIMDE_WASM_SIMD128_NATIVE)
- #define simde_mm_slli_epi32(a, imm8) \
- ((imm8 < 32) \
- ? wasm_i32x4_shl(simde__m128i_to_private(a).wasm_v128, imm8) \
- : wasm_i32x4_const(0, 0, 0, 0))
- #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE)
- #define simde_mm_slli_epi32(a, imm8) \
- (__extension__({ \
- simde__m128i ret; \
- if ((imm8) <= 0) { \
- ret = a; \
- } else if ((imm8) > 31) { \
- ret = simde_mm_setzero_si128(); \
- } else { \
- ret = simde__m128i_from_altivec_i32( \
- vec_sl(simde__m128i_to_altivec_i32(a), \
- vec_splats(HEDLEY_STATIC_CAST( \
- unsigned int, (imm8)&31)))); \
- } \
- ret; \
- }))
- #endif
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_slli_epi32(a, imm8) simde_mm_slli_epi32(a, imm8)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i simde_mm_slli_epi64(simde__m128i a, const int imm8)
- SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255)
- {
- if (HEDLEY_UNLIKELY((imm8 > 63))) {
- return simde_mm_setzero_si128();
- }
- simde__m128i_private r_, a_ = simde__m128i_to_private(a);
- #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
- r_.i64 = a_.i64 << imm8;
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0; i < (sizeof(r_.i64) / sizeof(r_.i64[0])); i++) {
- r_.i64[i] = a_.i64[i] << (imm8 & 0xff);
- }
- #endif
- return simde__m128i_from_private(r_);
- }
- #if defined(SIMDE_X86_SSE2_NATIVE)
- #define simde_mm_slli_epi64(a, imm8) _mm_slli_epi64(a, imm8)
- #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
- #define simde_mm_slli_epi64(a, imm8) \
- (__extension__({ \
- simde__m128i ret; \
- if ((imm8) <= 0) { \
- ret = a; \
- } else if ((imm8) > 63) { \
- ret = simde_mm_setzero_si128(); \
- } else { \
- ret = simde__m128i_from_neon_i64(vshlq_n_s64( \
- simde__m128i_to_neon_i64(a), ((imm8)&63))); \
- } \
- ret; \
- }))
- #elif defined(SIMDE_WASM_SIMD128_NATIVE)
- #define simde_mm_slli_epi64(a, imm8) \
- ((imm8 < 64) \
- ? wasm_i64x2_shl(simde__m128i_to_private(a).wasm_v128, imm8) \
- : wasm_i64x2_const(0, 0))
- #endif
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_slli_epi64(a, imm8) simde_mm_slli_epi64(a, imm8)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i simde_mm_srli_epi16(simde__m128i a, const int imm8)
- SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255)
- {
- if (HEDLEY_UNLIKELY((imm8 > 15))) {
- return simde_mm_setzero_si128();
- }
- simde__m128i_private r_, a_ = simde__m128i_to_private(a);
- #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
- r_.u16 = a_.u16 >> imm8;
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) {
- r_.u16[i] = a_.u16[i] >> (imm8 & 0xff);
- }
- #endif
- return simde__m128i_from_private(r_);
- }
- #if defined(SIMDE_X86_SSE2_NATIVE)
- #define simde_mm_srli_epi16(a, imm8) _mm_srli_epi16(a, imm8)
- #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
- #define simde_mm_srli_epi16(a, imm8) \
- (__extension__({ \
- simde__m128i ret; \
- if ((imm8) <= 0) { \
- ret = a; \
- } else if ((imm8) > 15) { \
- ret = simde_mm_setzero_si128(); \
- } else { \
- ret = simde__m128i_from_neon_u16(vshrq_n_u16( \
- simde__m128i_to_neon_u16(a), \
- (((imm8)&15) | (((imm8)&15) == 0)))); \
- } \
- ret; \
- }))
- #elif defined(SIMDE_WASM_SIMD128_NATIVE)
- #define simde_mm_srli_epi16(a, imm8) \
- ((imm8 < 16) \
- ? wasm_u16x8_shr(simde__m128i_to_private(a).wasm_v128, imm8) \
- : wasm_i16x8_const(0, 0, 0, 0, 0, 0, 0, 0))
- #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE)
- #define simde_mm_srli_epi16(a, imm8) \
- ((imm8 & ~15) ? simde_mm_setzero_si128() \
- : simde__m128i_from_altivec_i16( \
- vec_sr(simde__m128i_to_altivec_i16(a), \
- vec_splat_u16(HEDLEY_STATIC_CAST( \
- unsigned short, imm8)))))
- #endif
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_srli_epi16(a, imm8) simde_mm_srli_epi16(a, imm8)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i simde_mm_srli_epi32(simde__m128i a, const int imm8)
- SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255)
- {
- if (HEDLEY_UNLIKELY((imm8 > 31))) {
- return simde_mm_setzero_si128();
- }
- simde__m128i_private r_, a_ = simde__m128i_to_private(a);
- #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
- r_.u32 = a_.u32 >> (imm8 & 0xff);
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])); i++) {
- r_.u32[i] = a_.u32[i] >> (imm8 & 0xff);
- }
- #endif
- return simde__m128i_from_private(r_);
- }
- #if defined(SIMDE_X86_SSE2_NATIVE)
- #define simde_mm_srli_epi32(a, imm8) _mm_srli_epi32(a, imm8)
- #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
- #define simde_mm_srli_epi32(a, imm8) \
- (__extension__({ \
- simde__m128i ret; \
- if ((imm8) <= 0) { \
- ret = a; \
- } else if ((imm8) > 31) { \
- ret = simde_mm_setzero_si128(); \
- } else { \
- ret = simde__m128i_from_neon_u32(vshrq_n_u32( \
- simde__m128i_to_neon_u32(a), \
- (((imm8)&31) | (((imm8)&31) == 0)))); \
- } \
- ret; \
- }))
- #elif defined(SIMDE_WASM_SIMD128_NATIVE)
- #define simde_mm_srli_epi32(a, imm8) \
- ((imm8 < 32) \
- ? wasm_u32x4_shr(simde__m128i_to_private(a).wasm_v128, imm8) \
- : wasm_i32x4_const(0, 0, 0, 0))
- #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE)
- #define simde_mm_srli_epi32(a, imm8) \
- (__extension__({ \
- simde__m128i ret; \
- if ((imm8) <= 0) { \
- ret = a; \
- } else if ((imm8) > 31) { \
- ret = simde_mm_setzero_si128(); \
- } else { \
- ret = simde__m128i_from_altivec_i32( \
- vec_sr(simde__m128i_to_altivec_i32(a), \
- vec_splats(HEDLEY_STATIC_CAST( \
- unsigned int, (imm8)&31)))); \
- } \
- ret; \
- }))
- #endif
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_srli_epi32(a, imm8) simde_mm_srli_epi32(a, imm8)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i simde_mm_srli_epi64(simde__m128i a, const int imm8)
- SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255)
- {
- simde__m128i_private r_, a_ = simde__m128i_to_private(a);
- if (HEDLEY_UNLIKELY((imm8 & 63) != imm8))
- return simde_mm_setzero_si128();
- #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
- r_.neon_u64 = vshlq_u64(a_.neon_u64, vdupq_n_s64(-imm8));
- #else
- #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_94488)
- r_.u64 = a_.u64 >> imm8;
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0; i < (sizeof(r_.i64) / sizeof(r_.i64[0])); i++) {
- r_.u64[i] = a_.u64[i] >> imm8;
- }
- #endif
- #endif
- return simde__m128i_from_private(r_);
- }
- #if defined(SIMDE_X86_SSE2_NATIVE)
- #define simde_mm_srli_epi64(a, imm8) _mm_srli_epi64(a, imm8)
- #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
- #define simde_mm_srli_epi64(a, imm8) \
- (__extension__({ \
- simde__m128i ret; \
- if ((imm8) <= 0) { \
- ret = a; \
- } else if ((imm8) > 63) { \
- ret = simde_mm_setzero_si128(); \
- } else { \
- ret = simde__m128i_from_neon_u64(vshrq_n_u64( \
- simde__m128i_to_neon_u64(a), \
- (((imm8)&63) | (((imm8)&63) == 0)))); \
- } \
- ret; \
- }))
- #elif defined(SIMDE_WASM_SIMD128_NATIVE)
- #define simde_mm_srli_epi64(a, imm8) \
- ((imm8 < 64) \
- ? wasm_u64x2_shr(simde__m128i_to_private(a).wasm_v128, imm8) \
- : wasm_i64x2_const(0, 0))
- #endif
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_srli_epi64(a, imm8) simde_mm_srli_epi64(a, imm8)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- void simde_mm_store_pd(simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)],
- simde__m128d a)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- _mm_store_pd(mem_addr, a);
- #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE)
- vst1q_f64(mem_addr, simde__m128d_to_private(a).neon_f64);
- #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
- vst1q_s64(HEDLEY_REINTERPRET_CAST(int64_t *, mem_addr),
- simde__m128d_to_private(a).neon_i64);
- #else
- simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128d), &a,
- sizeof(a));
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_store_pd(mem_addr, a) \
- simde_mm_store_pd(HEDLEY_REINTERPRET_CAST(double *, mem_addr), a)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- void simde_mm_store1_pd(simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)],
- simde__m128d a)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- _mm_store1_pd(mem_addr, a);
- #else
- simde__m128d_private a_ = simde__m128d_to_private(a);
- #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
- vst1q_f64(mem_addr, vdupq_laneq_f64(a_.neon_f64, 0));
- #else
- mem_addr[0] = a_.f64[0];
- mem_addr[1] = a_.f64[0];
- #endif
- #endif
- }
- #define simde_mm_store_pd1(mem_addr, a) \
- simde_mm_store1_pd(HEDLEY_REINTERPRET_CAST(double *, mem_addr), a)
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_store1_pd(mem_addr, a) \
- simde_mm_store1_pd(HEDLEY_REINTERPRET_CAST(double *, mem_addr), a)
- #define _mm_store_pd1(mem_addr, a) \
- simde_mm_store_pd1(HEDLEY_REINTERPRET_CAST(double *, mem_addr), a)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- void simde_mm_store_sd(simde_float64 *mem_addr, simde__m128d a)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- _mm_store_sd(mem_addr, a);
- #else
- simde__m128d_private a_ = simde__m128d_to_private(a);
- #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
- const simde_float64 v = vgetq_lane_f64(a_.neon_f64, 0);
- simde_memcpy(mem_addr, &v, sizeof(v));
- #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
- const int64_t v = vgetq_lane_s64(a_.neon_i64, 0);
- simde_memcpy(HEDLEY_REINTERPRET_CAST(int64_t *, mem_addr), &v,
- sizeof(v));
- #else
- simde_float64 v = a_.f64[0];
- simde_memcpy(mem_addr, &v, sizeof(simde_float64));
- #endif
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_store_sd(mem_addr, a) \
- simde_mm_store_sd(HEDLEY_REINTERPRET_CAST(double *, mem_addr), a)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- void simde_mm_store_si128(simde__m128i *mem_addr, simde__m128i a)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- _mm_store_si128(HEDLEY_STATIC_CAST(__m128i *, mem_addr), a);
- #else
- simde__m128i_private a_ = simde__m128i_to_private(a);
- #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
- vst1q_s32(HEDLEY_REINTERPRET_CAST(int32_t *, mem_addr), a_.neon_i32);
- #else
- simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128i), &a_,
- sizeof(a_));
- #endif
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_store_si128(mem_addr, a) simde_mm_store_si128(mem_addr, a)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- void simde_mm_storeh_pd(simde_float64 *mem_addr, simde__m128d a)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- _mm_storeh_pd(mem_addr, a);
- #else
- simde__m128d_private a_ = simde__m128d_to_private(a);
- #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
- *mem_addr = vgetq_lane_f64(a_.neon_f64, 1);
- #else
- *mem_addr = a_.f64[1];
- #endif
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_storeh_pd(mem_addr, a) \
- simde_mm_storeh_pd(HEDLEY_REINTERPRET_CAST(double *, mem_addr), a)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- void simde_mm_storel_epi64(simde__m128i *mem_addr, simde__m128i a)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- _mm_storel_epi64(HEDLEY_STATIC_CAST(__m128i *, mem_addr), a);
- #else
- simde__m128i_private a_ = simde__m128i_to_private(a);
- int64_t tmp;
- /* memcpy to prevent aliasing, tmp because we can't take the
- * address of a vector element. */
- #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
- tmp = vgetq_lane_s64(a_.neon_i64, 0);
- #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
- #if defined(SIMDE_BUG_GCC_95227)
- (void)a_;
- #endif
- tmp = vec_extract(a_.altivec_i64, 0);
- #else
- tmp = a_.i64[0];
- #endif
- simde_memcpy(mem_addr, &tmp, sizeof(tmp));
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_storel_epi64(mem_addr, a) simde_mm_storel_epi64(mem_addr, a)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- void simde_mm_storel_pd(simde_float64 *mem_addr, simde__m128d a)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- _mm_storel_pd(mem_addr, a);
- #else
- simde__m128d_private a_ = simde__m128d_to_private(a);
- simde_float64 tmp;
- #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
- tmp = vgetq_lane_f64(a_.neon_f64, 0);
- #else
- tmp = a_.f64[0];
- #endif
- simde_memcpy(mem_addr, &tmp, sizeof(tmp));
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_storel_pd(mem_addr, a) \
- simde_mm_storel_pd(HEDLEY_REINTERPRET_CAST(double *, mem_addr), a)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- void simde_mm_storer_pd(simde_float64 mem_addr[2], simde__m128d a)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- _mm_storer_pd(mem_addr, a);
- #else
- simde__m128d_private a_ = simde__m128d_to_private(a);
- #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
- vst1q_s64(HEDLEY_REINTERPRET_CAST(int64_t *, mem_addr),
- vextq_s64(a_.neon_i64, a_.neon_i64, 1));
- #elif defined(SIMDE_SHUFFLE_VECTOR_)
- a_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, a_.f64, 1, 0);
- simde_mm_store_pd(mem_addr, simde__m128d_from_private(a_));
- #else
- mem_addr[0] = a_.f64[1];
- mem_addr[1] = a_.f64[0];
- #endif
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_storer_pd(mem_addr, a) \
- simde_mm_storer_pd(HEDLEY_REINTERPRET_CAST(double *, mem_addr), a)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- void simde_mm_storeu_pd(simde_float64 *mem_addr, simde__m128d a)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- _mm_storeu_pd(mem_addr, a);
- #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE)
- vst1q_f64(mem_addr, simde__m128d_to_private(a).neon_f64);
- #else
- simde_memcpy(mem_addr, &a, sizeof(a));
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_storeu_pd(mem_addr, a) \
- simde_mm_storeu_pd(HEDLEY_REINTERPRET_CAST(double *, mem_addr), a)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- void simde_mm_storeu_si128(simde__m128i *mem_addr, simde__m128i a)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- _mm_storeu_si128(HEDLEY_STATIC_CAST(__m128i *, mem_addr), a);
- #else
- simde_memcpy(mem_addr, &a, sizeof(a));
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_storeu_si128(mem_addr, a) simde_mm_storeu_si128(mem_addr, a)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- void simde_mm_storeu_si16(void *mem_addr, simde__m128i a)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE) && \
- (SIMDE_DETECT_CLANG_VERSION_CHECK(8, 0, 0) || \
- HEDLEY_GCC_VERSION_CHECK(11, 0, 0) || \
- HEDLEY_INTEL_VERSION_CHECK(20, 21, 1))
- _mm_storeu_si16(mem_addr, a);
- #else
- int16_t val = simde_x_mm_cvtsi128_si16(a);
- simde_memcpy(mem_addr, &val, sizeof(val));
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_storeu_si16(mem_addr, a) simde_mm_storeu_si16(mem_addr, a)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- void simde_mm_storeu_si32(void *mem_addr, simde__m128i a)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE) && \
- (SIMDE_DETECT_CLANG_VERSION_CHECK(8, 0, 0) || \
- HEDLEY_GCC_VERSION_CHECK(11, 0, 0) || \
- HEDLEY_INTEL_VERSION_CHECK(20, 21, 1))
- _mm_storeu_si32(mem_addr, a);
- #else
- int32_t val = simde_mm_cvtsi128_si32(a);
- simde_memcpy(mem_addr, &val, sizeof(val));
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_storeu_si32(mem_addr, a) simde_mm_storeu_si32(mem_addr, a)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- void simde_mm_storeu_si64(void *mem_addr, simde__m128i a)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE) && \
- (SIMDE_DETECT_CLANG_VERSION_CHECK(8, 0, 0) || \
- HEDLEY_GCC_VERSION_CHECK(11, 0, 0) || \
- HEDLEY_INTEL_VERSION_CHECK(20, 21, 1))
- _mm_storeu_si64(mem_addr, a);
- #else
- int64_t val = simde_mm_cvtsi128_si64(a);
- simde_memcpy(mem_addr, &val, sizeof(val));
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_storeu_si64(mem_addr, a) simde_mm_storeu_si64(mem_addr, a)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- void simde_mm_stream_pd(simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)],
- simde__m128d a)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- _mm_stream_pd(mem_addr, a);
- #else
- simde_memcpy(mem_addr, &a, sizeof(a));
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_stream_pd(mem_addr, a) \
- simde_mm_stream_pd(HEDLEY_REINTERPRET_CAST(double *, mem_addr), a)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- void simde_mm_stream_si128(simde__m128i *mem_addr, simde__m128i a)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64)
- _mm_stream_si128(HEDLEY_STATIC_CAST(__m128i *, mem_addr), a);
- #else
- simde_memcpy(mem_addr, &a, sizeof(a));
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_stream_si128(mem_addr, a) simde_mm_stream_si128(mem_addr, a)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- void simde_mm_stream_si32(int32_t *mem_addr, int32_t a)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- _mm_stream_si32(mem_addr, a);
- #else
- *mem_addr = a;
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_stream_si32(mem_addr, a) simde_mm_stream_si32(mem_addr, a)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- void simde_mm_stream_si64(int64_t *mem_addr, int64_t a)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) && \
- !defined(HEDLEY_MSVC_VERSION)
- _mm_stream_si64(SIMDE_CHECKED_REINTERPRET_CAST(long long int *,
- int64_t *, mem_addr),
- a);
- #else
- *mem_addr = a;
- #endif
- }
- #define simde_mm_stream_si64x(mem_addr, a) simde_mm_stream_si64(mem_addr, a)
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_stream_si64(mem_addr, a) \
- simde_mm_stream_si64(SIMDE_CHECKED_REINTERPRET_CAST( \
- int64_t *, __int64 *, mem_addr), \
- a)
- #define _mm_stream_si64x(mem_addr, a) \
- simde_mm_stream_si64(SIMDE_CHECKED_REINTERPRET_CAST( \
- int64_t *, __int64 *, mem_addr), \
- a)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i simde_mm_sub_epi8(simde__m128i a, simde__m128i b)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_sub_epi8(a, b);
- #else
- simde__m128i_private r_, a_ = simde__m128i_to_private(a),
- b_ = simde__m128i_to_private(b);
- #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
- r_.neon_i8 = vsubq_s8(a_.neon_i8, b_.neon_i8);
- #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
- r_.i8 = a_.i8 - b_.i8;
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0; i < (sizeof(r_.i8) / sizeof(r_.i8[0])); i++) {
- r_.i8[i] = a_.i8[i] - b_.i8[i];
- }
- #endif
- return simde__m128i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_sub_epi8(a, b) simde_mm_sub_epi8(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i simde_mm_sub_epi16(simde__m128i a, simde__m128i b)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_sub_epi16(a, b);
- #else
- simde__m128i_private r_, a_ = simde__m128i_to_private(a),
- b_ = simde__m128i_to_private(b);
- #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
- r_.neon_i16 = vsubq_s16(a_.neon_i16, b_.neon_i16);
- #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
- r_.i16 = a_.i16 - b_.i16;
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) {
- r_.i16[i] = a_.i16[i] - b_.i16[i];
- }
- #endif
- return simde__m128i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_sub_epi16(a, b) simde_mm_sub_epi16(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i simde_mm_sub_epi32(simde__m128i a, simde__m128i b)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_sub_epi32(a, b);
- #else
- simde__m128i_private r_, a_ = simde__m128i_to_private(a),
- b_ = simde__m128i_to_private(b);
- #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
- r_.neon_i32 = vsubq_s32(a_.neon_i32, b_.neon_i32);
- #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
- r_.i32 = a_.i32 - b_.i32;
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])); i++) {
- r_.i32[i] = a_.i32[i] - b_.i32[i];
- }
- #endif
- return simde__m128i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_sub_epi32(a, b) simde_mm_sub_epi32(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i simde_mm_sub_epi64(simde__m128i a, simde__m128i b)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_sub_epi64(a, b);
- #else
- simde__m128i_private r_, a_ = simde__m128i_to_private(a),
- b_ = simde__m128i_to_private(b);
- #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
- r_.neon_i64 = vsubq_s64(a_.neon_i64, b_.neon_i64);
- #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
- r_.i64 = a_.i64 - b_.i64;
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0; i < (sizeof(r_.i64) / sizeof(r_.i64[0])); i++) {
- r_.i64[i] = a_.i64[i] - b_.i64[i];
- }
- #endif
- return simde__m128i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_sub_epi64(a, b) simde_mm_sub_epi64(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i simde_x_mm_sub_epu32(simde__m128i a, simde__m128i b)
- {
- simde__m128i_private r_, a_ = simde__m128i_to_private(a),
- b_ = simde__m128i_to_private(b);
- #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
- r_.u32 = a_.u32 - b_.u32;
- #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
- r_.neon_u32 = vsubq_u32(a_.neon_u32, b_.neon_u32);
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0; i < (sizeof(r_.u32) / sizeof(r_.u32[0])); i++) {
- r_.u32[i] = a_.u32[i] - b_.u32[i];
- }
- #endif
- return simde__m128i_from_private(r_);
- }
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128d simde_mm_sub_pd(simde__m128d a, simde__m128d b)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_sub_pd(a, b);
- #else
- simde__m128d_private r_, a_ = simde__m128d_to_private(a),
- b_ = simde__m128d_to_private(b);
- #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
- r_.f64 = a_.f64 - b_.f64;
- #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE)
- r_.neon_f64 = vsubq_f64(a_.neon_f64, b_.neon_f64);
- #elif defined(SIMDE_WASM_SIMD128_NATIVE)
- r_.wasm_v128 = wasm_f64x2_sub(a_.wasm_v128, b_.wasm_v128);
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0; i < (sizeof(r_.f64) / sizeof(r_.f64[0])); i++) {
- r_.f64[i] = a_.f64[i] - b_.f64[i];
- }
- #endif
- return simde__m128d_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_sub_pd(a, b) simde_mm_sub_pd(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128d simde_mm_sub_sd(simde__m128d a, simde__m128d b)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_sub_sd(a, b);
- #elif (SIMDE_NATURAL_VECTOR_SIZE > 0)
- return simde_mm_move_sd(a, simde_mm_sub_pd(a, b));
- #else
- simde__m128d_private r_, a_ = simde__m128d_to_private(a),
- b_ = simde__m128d_to_private(b);
- r_.f64[0] = a_.f64[0] - b_.f64[0];
- r_.f64[1] = a_.f64[1];
- return simde__m128d_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_sub_sd(a, b) simde_mm_sub_sd(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m64 simde_mm_sub_si64(simde__m64 a, simde__m64 b)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)
- return _mm_sub_si64(a, b);
- #else
- simde__m64_private r_, a_ = simde__m64_to_private(a),
- b_ = simde__m64_to_private(b);
- #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
- r_.i64 = a_.i64 - b_.i64;
- #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
- r_.neon_i64 = vsub_s64(a_.neon_i64, b_.neon_i64);
- #else
- r_.i64[0] = a_.i64[0] - b_.i64[0];
- #endif
- return simde__m64_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_sub_si64(a, b) simde_mm_sub_si64(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i simde_mm_subs_epi8(simde__m128i a, simde__m128i b)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_subs_epi8(a, b);
- #else
- simde__m128i_private r_, a_ = simde__m128i_to_private(a),
- b_ = simde__m128i_to_private(b);
- #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
- r_.neon_i8 = vqsubq_s8(a_.neon_i8, b_.neon_i8);
- #elif defined(SIMDE_WASM_SIMD128_NATIVE)
- r_.wasm_v128 = wasm_i8x16_sub_saturate(a_.wasm_v128, b_.wasm_v128);
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0; i < (sizeof(r_) / sizeof(r_.i8[0])); i++) {
- if (((b_.i8[i]) > 0 && (a_.i8[i]) < INT8_MIN + (b_.i8[i]))) {
- r_.i8[i] = INT8_MIN;
- } else if ((b_.i8[i]) < 0 &&
- (a_.i8[i]) > INT8_MAX + (b_.i8[i])) {
- r_.i8[i] = INT8_MAX;
- } else {
- r_.i8[i] = (a_.i8[i]) - (b_.i8[i]);
- }
- }
- #endif
- return simde__m128i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_subs_epi8(a, b) simde_mm_subs_epi8(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i simde_mm_subs_epi16(simde__m128i a, simde__m128i b)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_subs_epi16(a, b);
- #else
- simde__m128i_private r_, a_ = simde__m128i_to_private(a),
- b_ = simde__m128i_to_private(b);
- #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
- r_.neon_i16 = vqsubq_s16(a_.neon_i16, b_.neon_i16);
- #elif defined(SIMDE_WASM_SIMD128_NATIVE)
- r_.wasm_v128 = wasm_i16x8_sub_saturate(a_.wasm_v128, b_.wasm_v128);
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0; i < (sizeof(r_) / sizeof(r_.i16[0])); i++) {
- if (((b_.i16[i]) > 0 &&
- (a_.i16[i]) < INT16_MIN + (b_.i16[i]))) {
- r_.i16[i] = INT16_MIN;
- } else if ((b_.i16[i]) < 0 &&
- (a_.i16[i]) > INT16_MAX + (b_.i16[i])) {
- r_.i16[i] = INT16_MAX;
- } else {
- r_.i16[i] = (a_.i16[i]) - (b_.i16[i]);
- }
- }
- #endif
- return simde__m128i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_subs_epi16(a, b) simde_mm_subs_epi16(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i simde_mm_subs_epu8(simde__m128i a, simde__m128i b)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_subs_epu8(a, b);
- #else
- simde__m128i_private r_, a_ = simde__m128i_to_private(a),
- b_ = simde__m128i_to_private(b);
- #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
- r_.neon_u8 = vqsubq_u8(a_.neon_u8, b_.neon_u8);
- #elif defined(SIMDE_WASM_SIMD128_NATIVE)
- r_.wasm_v128 = wasm_u8x16_sub_saturate(a_.wasm_v128, b_.wasm_v128);
- #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
- r_.altivec_u8 = vec_subs(a_.altivec_u8, b_.altivec_u8);
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0; i < (sizeof(r_) / sizeof(r_.i8[0])); i++) {
- const int32_t x = a_.u8[i] - b_.u8[i];
- if (x < 0) {
- r_.u8[i] = 0;
- } else if (x > UINT8_MAX) {
- r_.u8[i] = UINT8_MAX;
- } else {
- r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, x);
- }
- }
- #endif
- return simde__m128i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_subs_epu8(a, b) simde_mm_subs_epu8(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i simde_mm_subs_epu16(simde__m128i a, simde__m128i b)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_subs_epu16(a, b);
- #else
- simde__m128i_private r_, a_ = simde__m128i_to_private(a),
- b_ = simde__m128i_to_private(b);
- #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
- r_.neon_u16 = vqsubq_u16(a_.neon_u16, b_.neon_u16);
- #elif defined(SIMDE_WASM_SIMD128_NATIVE)
- r_.wasm_v128 = wasm_u16x8_sub_saturate(a_.wasm_v128, b_.wasm_v128);
- #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
- r_.altivec_u16 = vec_subs(a_.altivec_u16, b_.altivec_u16);
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0; i < (sizeof(r_) / sizeof(r_.i16[0])); i++) {
- const int32_t x = a_.u16[i] - b_.u16[i];
- if (x < 0) {
- r_.u16[i] = 0;
- } else if (x > UINT16_MAX) {
- r_.u16[i] = UINT16_MAX;
- } else {
- r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, x);
- }
- }
- #endif
- return simde__m128i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_subs_epu16(a, b) simde_mm_subs_epu16(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- int simde_mm_ucomieq_sd(simde__m128d a, simde__m128d b)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_ucomieq_sd(a, b);
- #else
- simde__m128d_private a_ = simde__m128d_to_private(a),
- b_ = simde__m128d_to_private(b);
- int r;
- #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
- uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64);
- uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64);
- uint64x2_t a_or_b_nan = vreinterpretq_u64_u32(vmvnq_u32(
- vreinterpretq_u32_u64(vandq_u64(a_not_nan, b_not_nan))));
- uint64x2_t a_eq_b = vceqq_f64(a_.neon_f64, b_.neon_f64);
- r = !!(vgetq_lane_u64(vorrq_u64(a_or_b_nan, a_eq_b), 0) != 0);
- #elif defined(SIMDE_WASM_SIMD128_NATIVE)
- return wasm_f64x2_extract_lane(a_.wasm_v128, 0) ==
- wasm_f64x2_extract_lane(b_.wasm_v128, 0);
- #elif defined(SIMDE_HAVE_FENV_H)
- fenv_t envp;
- int x = feholdexcept(&envp);
- r = a_.f64[0] == b_.f64[0];
- if (HEDLEY_LIKELY(x == 0))
- fesetenv(&envp);
- #else
- r = a_.f64[0] == b_.f64[0];
- #endif
- return r;
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_ucomieq_sd(a, b) simde_mm_ucomieq_sd(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- int simde_mm_ucomige_sd(simde__m128d a, simde__m128d b)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_ucomige_sd(a, b);
- #else
- simde__m128d_private a_ = simde__m128d_to_private(a),
- b_ = simde__m128d_to_private(b);
- int r;
- #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
- uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64);
- uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64);
- uint64x2_t a_and_b_not_nan = vandq_u64(a_not_nan, b_not_nan);
- uint64x2_t a_ge_b = vcgeq_f64(a_.neon_f64, b_.neon_f64);
- r = !!(vgetq_lane_u64(vandq_u64(a_and_b_not_nan, a_ge_b), 0) != 0);
- #elif defined(SIMDE_WASM_SIMD128_NATIVE)
- return wasm_f64x2_extract_lane(a_.wasm_v128, 0) >=
- wasm_f64x2_extract_lane(b_.wasm_v128, 0);
- #elif defined(SIMDE_HAVE_FENV_H)
- fenv_t envp;
- int x = feholdexcept(&envp);
- r = a_.f64[0] >= b_.f64[0];
- if (HEDLEY_LIKELY(x == 0))
- fesetenv(&envp);
- #else
- r = a_.f64[0] >= b_.f64[0];
- #endif
- return r;
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_ucomige_sd(a, b) simde_mm_ucomige_sd(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- int simde_mm_ucomigt_sd(simde__m128d a, simde__m128d b)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_ucomigt_sd(a, b);
- #else
- simde__m128d_private a_ = simde__m128d_to_private(a),
- b_ = simde__m128d_to_private(b);
- int r;
- #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
- uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64);
- uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64);
- uint64x2_t a_and_b_not_nan = vandq_u64(a_not_nan, b_not_nan);
- uint64x2_t a_gt_b = vcgtq_f64(a_.neon_f64, b_.neon_f64);
- r = !!(vgetq_lane_u64(vandq_u64(a_and_b_not_nan, a_gt_b), 0) != 0);
- #elif defined(SIMDE_WASM_SIMD128_NATIVE)
- return wasm_f64x2_extract_lane(a_.wasm_v128, 0) >
- wasm_f64x2_extract_lane(b_.wasm_v128, 0);
- #elif defined(SIMDE_HAVE_FENV_H)
- fenv_t envp;
- int x = feholdexcept(&envp);
- r = a_.f64[0] > b_.f64[0];
- if (HEDLEY_LIKELY(x == 0))
- fesetenv(&envp);
- #else
- r = a_.f64[0] > b_.f64[0];
- #endif
- return r;
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_ucomigt_sd(a, b) simde_mm_ucomigt_sd(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- int simde_mm_ucomile_sd(simde__m128d a, simde__m128d b)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_ucomile_sd(a, b);
- #else
- simde__m128d_private a_ = simde__m128d_to_private(a),
- b_ = simde__m128d_to_private(b);
- int r;
- #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
- uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64);
- uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64);
- uint64x2_t a_or_b_nan = vreinterpretq_u64_u32(vmvnq_u32(
- vreinterpretq_u32_u64(vandq_u64(a_not_nan, b_not_nan))));
- uint64x2_t a_le_b = vcleq_f64(a_.neon_f64, b_.neon_f64);
- r = !!(vgetq_lane_u64(vorrq_u64(a_or_b_nan, a_le_b), 0) != 0);
- #elif defined(SIMDE_WASM_SIMD128_NATIVE)
- return wasm_f64x2_extract_lane(a_.wasm_v128, 0) <=
- wasm_f64x2_extract_lane(b_.wasm_v128, 0);
- #elif defined(SIMDE_HAVE_FENV_H)
- fenv_t envp;
- int x = feholdexcept(&envp);
- r = a_.f64[0] <= b_.f64[0];
- if (HEDLEY_LIKELY(x == 0))
- fesetenv(&envp);
- #else
- r = a_.f64[0] <= b_.f64[0];
- #endif
- return r;
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_ucomile_sd(a, b) simde_mm_ucomile_sd(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- int simde_mm_ucomilt_sd(simde__m128d a, simde__m128d b)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_ucomilt_sd(a, b);
- #else
- simde__m128d_private a_ = simde__m128d_to_private(a),
- b_ = simde__m128d_to_private(b);
- int r;
- #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
- uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64);
- uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64);
- uint64x2_t a_or_b_nan = vreinterpretq_u64_u32(vmvnq_u32(
- vreinterpretq_u32_u64(vandq_u64(a_not_nan, b_not_nan))));
- uint64x2_t a_lt_b = vcltq_f64(a_.neon_f64, b_.neon_f64);
- r = !!(vgetq_lane_u64(vorrq_u64(a_or_b_nan, a_lt_b), 0) != 0);
- #elif defined(SIMDE_WASM_SIMD128_NATIVE)
- return wasm_f64x2_extract_lane(a_.wasm_v128, 0) <
- wasm_f64x2_extract_lane(b_.wasm_v128, 0);
- #elif defined(SIMDE_HAVE_FENV_H)
- fenv_t envp;
- int x = feholdexcept(&envp);
- r = a_.f64[0] < b_.f64[0];
- if (HEDLEY_LIKELY(x == 0))
- fesetenv(&envp);
- #else
- r = a_.f64[0] < b_.f64[0];
- #endif
- return r;
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_ucomilt_sd(a, b) simde_mm_ucomilt_sd(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- int simde_mm_ucomineq_sd(simde__m128d a, simde__m128d b)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_ucomineq_sd(a, b);
- #else
- simde__m128d_private a_ = simde__m128d_to_private(a),
- b_ = simde__m128d_to_private(b);
- int r;
- #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
- uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64);
- uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64);
- uint64x2_t a_and_b_not_nan = vandq_u64(a_not_nan, b_not_nan);
- uint64x2_t a_neq_b = vreinterpretq_u64_u32(vmvnq_u32(
- vreinterpretq_u32_u64(vceqq_f64(a_.neon_f64, b_.neon_f64))));
- r = !!(vgetq_lane_u64(vandq_u64(a_and_b_not_nan, a_neq_b), 0) != 0);
- #elif defined(SIMDE_WASM_SIMD128_NATIVE)
- return wasm_f64x2_extract_lane(a_.wasm_v128, 0) !=
- wasm_f64x2_extract_lane(b_.wasm_v128, 0);
- #elif defined(SIMDE_HAVE_FENV_H)
- fenv_t envp;
- int x = feholdexcept(&envp);
- r = a_.f64[0] != b_.f64[0];
- if (HEDLEY_LIKELY(x == 0))
- fesetenv(&envp);
- #else
- r = a_.f64[0] != b_.f64[0];
- #endif
- return r;
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_ucomineq_sd(a, b) simde_mm_ucomineq_sd(a, b)
- #endif
- #if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_)
- HEDLEY_DIAGNOSTIC_PUSH
- SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_
- #endif
- #if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_)
- HEDLEY_DIAGNOSTIC_POP
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- void simde_mm_lfence(void)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- _mm_lfence();
- #else
- simde_mm_sfence();
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_lfence() simde_mm_lfence()
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- void simde_mm_mfence(void)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- _mm_mfence();
- #else
- simde_mm_sfence();
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_mfence() simde_mm_mfence()
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i simde_mm_unpackhi_epi8(simde__m128i a, simde__m128i b)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_unpackhi_epi8(a, b);
- #else
- simde__m128i_private r_, a_ = simde__m128i_to_private(a),
- b_ = simde__m128i_to_private(b);
- #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
- r_.neon_i8 = vzip2q_s8(a_.neon_i8, b_.neon_i8);
- #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
- int8x8_t a1 = vreinterpret_s8_s16(vget_high_s16(a_.neon_i16));
- int8x8_t b1 = vreinterpret_s8_s16(vget_high_s16(b_.neon_i16));
- int8x8x2_t result = vzip_s8(a1, b1);
- r_.neon_i8 = vcombine_s8(result.val[0], result.val[1]);
- #elif defined(SIMDE_SHUFFLE_VECTOR_)
- r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, b_.i8, 8, 24, 9, 25, 10, 26,
- 11, 27, 12, 28, 13, 29, 14, 30, 15, 31);
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0; i < ((sizeof(r_) / sizeof(r_.i8[0])) / 2); i++) {
- r_.i8[(i * 2)] =
- a_.i8[i + ((sizeof(r_) / sizeof(r_.i8[0])) / 2)];
- r_.i8[(i * 2) + 1] =
- b_.i8[i + ((sizeof(r_) / sizeof(r_.i8[0])) / 2)];
- }
- #endif
- return simde__m128i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_unpackhi_epi8(a, b) simde_mm_unpackhi_epi8(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i simde_mm_unpackhi_epi16(simde__m128i a, simde__m128i b)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_unpackhi_epi16(a, b);
- #else
- simde__m128i_private r_, a_ = simde__m128i_to_private(a),
- b_ = simde__m128i_to_private(b);
- #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
- r_.neon_i16 = vzip2q_s16(a_.neon_i16, b_.neon_i16);
- #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
- int16x4_t a1 = vget_high_s16(a_.neon_i16);
- int16x4_t b1 = vget_high_s16(b_.neon_i16);
- int16x4x2_t result = vzip_s16(a1, b1);
- r_.neon_i16 = vcombine_s16(result.val[0], result.val[1]);
- #elif defined(SIMDE_SHUFFLE_VECTOR_)
- r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, b_.i16, 4, 12, 5, 13, 6,
- 14, 7, 15);
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0; i < ((sizeof(r_) / sizeof(r_.i16[0])) / 2); i++) {
- r_.i16[(i * 2)] =
- a_.i16[i + ((sizeof(r_) / sizeof(r_.i16[0])) / 2)];
- r_.i16[(i * 2) + 1] =
- b_.i16[i + ((sizeof(r_) / sizeof(r_.i16[0])) / 2)];
- }
- #endif
- return simde__m128i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_unpackhi_epi16(a, b) simde_mm_unpackhi_epi16(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i simde_mm_unpackhi_epi32(simde__m128i a, simde__m128i b)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_unpackhi_epi32(a, b);
- #else
- simde__m128i_private r_, a_ = simde__m128i_to_private(a),
- b_ = simde__m128i_to_private(b);
- #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
- r_.neon_i32 = vzip2q_s32(a_.neon_i32, b_.neon_i32);
- #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
- int32x2_t a1 = vget_high_s32(a_.neon_i32);
- int32x2_t b1 = vget_high_s32(b_.neon_i32);
- int32x2x2_t result = vzip_s32(a1, b1);
- r_.neon_i32 = vcombine_s32(result.val[0], result.val[1]);
- #elif defined(SIMDE_SHUFFLE_VECTOR_)
- r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, b_.i32, 2, 6, 3, 7);
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0; i < ((sizeof(r_) / sizeof(r_.i32[0])) / 2); i++) {
- r_.i32[(i * 2)] =
- a_.i32[i + ((sizeof(r_) / sizeof(r_.i32[0])) / 2)];
- r_.i32[(i * 2) + 1] =
- b_.i32[i + ((sizeof(r_) / sizeof(r_.i32[0])) / 2)];
- }
- #endif
- return simde__m128i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_unpackhi_epi32(a, b) simde_mm_unpackhi_epi32(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i simde_mm_unpackhi_epi64(simde__m128i a, simde__m128i b)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_unpackhi_epi64(a, b);
- #else
- simde__m128i_private r_, a_ = simde__m128i_to_private(a),
- b_ = simde__m128i_to_private(b);
- #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
- int64x1_t a_h = vget_high_s64(a_.neon_i64);
- int64x1_t b_h = vget_high_s64(b_.neon_i64);
- r_.neon_i64 = vcombine_s64(a_h, b_h);
- #elif defined(SIMDE_SHUFFLE_VECTOR_)
- r_.i64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.i64, b_.i64, 1, 3);
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0; i < ((sizeof(r_) / sizeof(r_.i64[0])) / 2); i++) {
- r_.i64[(i * 2)] =
- a_.i64[i + ((sizeof(r_) / sizeof(r_.i64[0])) / 2)];
- r_.i64[(i * 2) + 1] =
- b_.i64[i + ((sizeof(r_) / sizeof(r_.i64[0])) / 2)];
- }
- #endif
- return simde__m128i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_unpackhi_epi64(a, b) simde_mm_unpackhi_epi64(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128d simde_mm_unpackhi_pd(simde__m128d a, simde__m128d b)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_unpackhi_pd(a, b);
- #else
- simde__m128d_private r_, a_ = simde__m128d_to_private(a),
- b_ = simde__m128d_to_private(b);
- #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
- float64x1_t a_l = vget_high_f64(a_.f64);
- float64x1_t b_l = vget_high_f64(b_.f64);
- r_.neon_f64 = vcombine_f64(a_l, b_l);
- #elif defined(SIMDE_WASM_SIMD128_NATIVE)
- r_.wasm_v128 = wasm_v64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3);
- #elif defined(SIMDE_SHUFFLE_VECTOR_)
- r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 1, 3);
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0; i < ((sizeof(r_) / sizeof(r_.f64[0])) / 2); i++) {
- r_.f64[(i * 2)] =
- a_.f64[i + ((sizeof(r_) / sizeof(r_.f64[0])) / 2)];
- r_.f64[(i * 2) + 1] =
- b_.f64[i + ((sizeof(r_) / sizeof(r_.f64[0])) / 2)];
- }
- #endif
- return simde__m128d_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_unpackhi_pd(a, b) simde_mm_unpackhi_pd(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i simde_mm_unpacklo_epi8(simde__m128i a, simde__m128i b)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_unpacklo_epi8(a, b);
- #else
- simde__m128i_private r_, a_ = simde__m128i_to_private(a),
- b_ = simde__m128i_to_private(b);
- #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
- r_.neon_i8 = vzip1q_s8(a_.neon_i8, b_.neon_i8);
- #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
- int8x8_t a1 = vreinterpret_s8_s16(vget_low_s16(a_.neon_i16));
- int8x8_t b1 = vreinterpret_s8_s16(vget_low_s16(b_.neon_i16));
- int8x8x2_t result = vzip_s8(a1, b1);
- r_.neon_i8 = vcombine_s8(result.val[0], result.val[1]);
- #elif defined(SIMDE_SHUFFLE_VECTOR_)
- r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, b_.i8, 0, 16, 1, 17, 2, 18,
- 3, 19, 4, 20, 5, 21, 6, 22, 7, 23);
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0; i < ((sizeof(r_) / sizeof(r_.i8[0])) / 2); i++) {
- r_.i8[(i * 2)] = a_.i8[i];
- r_.i8[(i * 2) + 1] = b_.i8[i];
- }
- #endif
- return simde__m128i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_unpacklo_epi8(a, b) simde_mm_unpacklo_epi8(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i simde_mm_unpacklo_epi16(simde__m128i a, simde__m128i b)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_unpacklo_epi16(a, b);
- #else
- simde__m128i_private r_, a_ = simde__m128i_to_private(a),
- b_ = simde__m128i_to_private(b);
- #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
- r_.neon_i16 = vzip1q_s16(a_.neon_i16, b_.neon_i16);
- #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
- int16x4_t a1 = vget_low_s16(a_.neon_i16);
- int16x4_t b1 = vget_low_s16(b_.neon_i16);
- int16x4x2_t result = vzip_s16(a1, b1);
- r_.neon_i16 = vcombine_s16(result.val[0], result.val[1]);
- #elif defined(SIMDE_SHUFFLE_VECTOR_)
- r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, b_.i16, 0, 8, 1, 9, 2,
- 10, 3, 11);
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0; i < ((sizeof(r_) / sizeof(r_.i16[0])) / 2); i++) {
- r_.i16[(i * 2)] = a_.i16[i];
- r_.i16[(i * 2) + 1] = b_.i16[i];
- }
- #endif
- return simde__m128i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_unpacklo_epi16(a, b) simde_mm_unpacklo_epi16(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i simde_mm_unpacklo_epi32(simde__m128i a, simde__m128i b)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_unpacklo_epi32(a, b);
- #else
- simde__m128i_private r_, a_ = simde__m128i_to_private(a),
- b_ = simde__m128i_to_private(b);
- #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
- r_.neon_i32 = vzip1q_s32(a_.neon_i32, b_.neon_i32);
- #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
- int32x2_t a1 = vget_low_s32(a_.neon_i32);
- int32x2_t b1 = vget_low_s32(b_.neon_i32);
- int32x2x2_t result = vzip_s32(a1, b1);
- r_.neon_i32 = vcombine_s32(result.val[0], result.val[1]);
- #elif defined(SIMDE_SHUFFLE_VECTOR_)
- r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, b_.i32, 0, 4, 1, 5);
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0; i < ((sizeof(r_) / sizeof(r_.i32[0])) / 2); i++) {
- r_.i32[(i * 2)] = a_.i32[i];
- r_.i32[(i * 2) + 1] = b_.i32[i];
- }
- #endif
- return simde__m128i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_unpacklo_epi32(a, b) simde_mm_unpacklo_epi32(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i simde_mm_unpacklo_epi64(simde__m128i a, simde__m128i b)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_unpacklo_epi64(a, b);
- #else
- simde__m128i_private r_, a_ = simde__m128i_to_private(a),
- b_ = simde__m128i_to_private(b);
- #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
- int64x1_t a_l = vget_low_s64(a_.i64);
- int64x1_t b_l = vget_low_s64(b_.i64);
- r_.neon_i64 = vcombine_s64(a_l, b_l);
- #elif defined(SIMDE_SHUFFLE_VECTOR_)
- r_.i64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.i64, b_.i64, 0, 2);
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0; i < ((sizeof(r_) / sizeof(r_.i64[0])) / 2); i++) {
- r_.i64[(i * 2)] = a_.i64[i];
- r_.i64[(i * 2) + 1] = b_.i64[i];
- }
- #endif
- return simde__m128i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_unpacklo_epi64(a, b) simde_mm_unpacklo_epi64(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128d simde_mm_unpacklo_pd(simde__m128d a, simde__m128d b)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_unpacklo_pd(a, b);
- #else
- simde__m128d_private r_, a_ = simde__m128d_to_private(a),
- b_ = simde__m128d_to_private(b);
- #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
- float64x1_t a_l = vget_low_f64(a_.f64);
- float64x1_t b_l = vget_low_f64(b_.f64);
- r_.neon_f64 = vcombine_f64(a_l, b_l);
- #elif defined(SIMDE_SHUFFLE_VECTOR_)
- r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 0, 2);
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0; i < ((sizeof(r_) / sizeof(r_.f64[0])) / 2); i++) {
- r_.f64[(i * 2)] = a_.f64[i];
- r_.f64[(i * 2) + 1] = b_.f64[i];
- }
- #endif
- return simde__m128d_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_unpacklo_pd(a, b) simde_mm_unpacklo_pd(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128d simde_x_mm_negate_pd(simde__m128d a)
- {
- #if defined(SIMDE_X86_SSE_NATIVE)
- return simde_mm_xor_pd(a, _mm_set1_pd(SIMDE_FLOAT64_C(-0.0)));
- #else
- simde__m128d_private r_, a_ = simde__m128d_to_private(a);
- #if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && \
- (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(8, 1, 0))
- r_.altivec_f64 = vec_neg(a_.altivec_f64);
- #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE)
- r_.neon_f64 = vnegq_f64(a_.neon_f64);
- #elif defined(SIMDE_WASM_SIMD128_NATIVE)
- r_.wasm_v128 = wasm_f64x2_neg(a_.wasm_v128);
- #elif defined(SIMDE_VECTOR_NEGATE)
- r_.f64 = -a_.f64;
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0; i < (sizeof(r_.f64) / sizeof(r_.f64[0])); i++) {
- r_.f64[i] = -a_.f64[i];
- }
- #endif
- return simde__m128d_from_private(r_);
- #endif
- }
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i simde_mm_xor_si128(simde__m128i a, simde__m128i b)
- {
- #if defined(SIMDE_X86_SSE2_NATIVE)
- return _mm_xor_si128(a, b);
- #else
- simde__m128i_private r_, a_ = simde__m128i_to_private(a),
- b_ = simde__m128i_to_private(b);
- #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
- r_.neon_i32 = veorq_s32(a_.neon_i32, b_.neon_i32);
- #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
- r_.altivec_i32 = vec_xor(a_.altivec_i32, b_.altivec_i32);
- #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
- r_.i32f = a_.i32f ^ b_.i32f;
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])); i++) {
- r_.i32f[i] = a_.i32f[i] ^ b_.i32f[i];
- }
- #endif
- return simde__m128i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _mm_xor_si128(a, b) simde_mm_xor_si128(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i simde_x_mm_not_si128(simde__m128i a)
- {
- #if defined(SIMDE_X86_AVX512VL_NATIVE)
- return _mm_ternarylogic_epi32(a, a, a, 0x55);
- #else
- simde__m128i_private r_, a_ = simde__m128i_to_private(a);
- #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
- r_.neon_i32 = vmvnq_s32(a_.neon_i32);
- #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
- r_.altivec_i32 = vec_nor(a_.altivec_i32, a_.altivec_i32);
- #elif defined(SIMDE_WASM_SIMD128_NATIVE)
- r_.wasm_v128 = wasm_v128_not(a_.wasm_v128);
- #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
- r_.i32f = ~a_.i32f;
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])); i++) {
- r_.i32f[i] = ~(a_.i32f[i]);
- }
- #endif
- return simde__m128i_from_private(r_);
- #endif
- }
- #define SIMDE_MM_SHUFFLE2(x, y) (((x) << 1) | (y))
- #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
- #define _MM_SHUFFLE2(x, y) SIMDE_MM_SHUFFLE2(x, y)
- #endif
- SIMDE_END_DECLS_
- HEDLEY_DIAGNOSTIC_POP
- #endif /* !defined(SIMDE_X86_SSE2_H) */
|