aes-s390x.pl 54 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287
  1. #! /usr/bin/env perl
  2. # Copyright 2007-2025 The OpenSSL Project Authors. All Rights Reserved.
  3. #
  4. # Licensed under the Apache License 2.0 (the "License"). You may not use
  5. # this file except in compliance with the License. You can obtain a copy
  6. # in the file LICENSE in the source distribution or at
  7. # https://www.openssl.org/source/license.html
  8. # ====================================================================
  9. # Written by Andy Polyakov <[email protected]> for the OpenSSL
  10. # project. The module is, however, dual licensed under OpenSSL and
  11. # CRYPTOGAMS licenses depending on where you obtain it. For further
  12. # details see http://www.openssl.org/~appro/cryptogams/.
  13. # ====================================================================
  14. # AES for s390x.
  15. # April 2007.
  16. #
  17. # Software performance improvement over gcc-generated code is ~70% and
  18. # in absolute terms is ~73 cycles per byte processed with 128-bit key.
  19. # You're likely to exclaim "why so slow?" Keep in mind that z-CPUs are
  20. # *strictly* in-order execution and issued instruction [in this case
  21. # load value from memory is critical] has to complete before execution
  22. # flow proceeds. S-boxes are compressed to 2KB[+256B].
  23. #
  24. # As for hardware acceleration support. It's basically a "teaser," as
  25. # it can and should be improved in several ways. Most notably support
  26. # for CBC is not utilized, nor multiple blocks are ever processed.
  27. # Then software key schedule can be postponed till hardware support
  28. # detection... Performance improvement over assembler is reportedly
  29. # ~2.5x, but can reach >8x [naturally on larger chunks] if proper
  30. # support is implemented.
  31. # May 2007.
  32. #
  33. # Implement AES_set_[en|de]crypt_key. Key schedule setup is avoided
  34. # for 128-bit keys, if hardware support is detected.
  35. # January 2009.
  36. #
  37. # Add support for hardware AES192/256 and reschedule instructions to
  38. # minimize/avoid Address Generation Interlock hazard and to favour
  39. # dual-issue z10 pipeline. This gave ~25% improvement on z10 and
  40. # almost 50% on z9. The gain is smaller on z10, because being dual-
  41. # issue z10 makes it impossible to eliminate the interlock condition:
  42. # critical path is not long enough. Yet it spends ~24 cycles per byte
  43. # processed with 128-bit key.
  44. #
  45. # Unlike previous version hardware support detection takes place only
  46. # at the moment of key schedule setup, which is denoted in key->rounds.
  47. # This is done, because deferred key setup can't be made MT-safe, not
  48. # for keys longer than 128 bits.
  49. #
  50. # Add AES_cbc_encrypt, which gives incredible performance improvement,
  51. # it was measured to be ~6.6x. It's less than previously mentioned 8x,
  52. # because software implementation was optimized.
  53. # May 2010.
  54. #
  55. # Add AES_ctr32_encrypt. If hardware-assisted, it provides up to 4.3x
  56. # performance improvement over "generic" counter mode routine relying
  57. # on single-block, also hardware-assisted, AES_encrypt. "Up to" refers
  58. # to the fact that exact throughput value depends on current stack
  59. # frame alignment within 4KB page. In worst case you get ~75% of the
  60. # maximum, but *on average* it would be as much as ~98%. Meaning that
  61. # worst case is unlike, it's like hitting ravine on plateau.
  62. # November 2010.
  63. #
  64. # Adapt for -m31 build. If kernel supports what's called "highgprs"
  65. # feature on Linux [see /proc/cpuinfo], it's possible to use 64-bit
  66. # instructions and achieve "64-bit" performance even in 31-bit legacy
  67. # application context. The feature is not specific to any particular
  68. # processor, as long as it's "z-CPU". Latter implies that the code
  69. # remains z/Architecture specific. On z990 it was measured to perform
  70. # 2x better than code generated by gcc 4.3.
  71. # December 2010.
  72. #
  73. # Add support for z196 "cipher message with counter" instruction.
  74. # Note however that it's disengaged, because it was measured to
  75. # perform ~12% worse than vanilla km-based code...
  76. # February 2011.
  77. #
  78. # Add AES_xts_[en|de]crypt. This includes support for z196 km-xts-aes
  79. # instructions, which deliver ~70% improvement at 8KB block size over
  80. # vanilla km-based code, 37% - at most like 512-bytes block size.
  81. # $output is the last argument if it looks like a file (it has an extension)
  82. # $flavour is the first argument if it doesn't look like a file
  83. $output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
  84. $flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef;
  85. if ($flavour =~ /3[12]/) {
  86. $SIZE_T=4;
  87. $g="";
  88. } else {
  89. $SIZE_T=8;
  90. $g="g";
  91. }
  92. $output and open STDOUT,">$output";
  93. $softonly=0; # allow hardware support
  94. $t0="%r0"; $mask="%r0";
  95. $t1="%r1";
  96. $t2="%r2"; $inp="%r2";
  97. $t3="%r3"; $out="%r3"; $bits="%r3";
  98. $key="%r4";
  99. $i1="%r5";
  100. $i2="%r6";
  101. $i3="%r7";
  102. $s0="%r8";
  103. $s1="%r9";
  104. $s2="%r10";
  105. $s3="%r11";
  106. $tbl="%r12";
  107. $rounds="%r13";
  108. $ra="%r14";
  109. $sp="%r15";
  110. $stdframe=16*$SIZE_T+4*8;
  111. sub _data_word()
  112. { my $i;
  113. while(defined($i=shift)) { $code.=sprintf".long\t0x%08x,0x%08x\n",$i,$i; }
  114. }
  115. $code=<<___;
  116. #include "s390x_arch.h"
  117. .text
  118. .type AES_Te,\@object
  119. .align 256
  120. AES_Te:
  121. ___
  122. &_data_word(
  123. 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d,
  124. 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554,
  125. 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d,
  126. 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a,
  127. 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87,
  128. 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b,
  129. 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea,
  130. 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b,
  131. 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a,
  132. 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f,
  133. 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108,
  134. 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f,
  135. 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e,
  136. 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5,
  137. 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d,
  138. 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f,
  139. 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e,
  140. 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb,
  141. 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce,
  142. 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497,
  143. 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c,
  144. 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed,
  145. 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b,
  146. 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a,
  147. 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16,
  148. 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594,
  149. 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81,
  150. 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3,
  151. 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a,
  152. 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504,
  153. 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163,
  154. 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d,
  155. 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f,
  156. 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739,
  157. 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47,
  158. 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395,
  159. 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f,
  160. 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883,
  161. 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c,
  162. 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76,
  163. 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e,
  164. 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4,
  165. 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6,
  166. 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b,
  167. 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7,
  168. 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0,
  169. 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25,
  170. 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818,
  171. 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72,
  172. 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651,
  173. 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21,
  174. 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85,
  175. 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa,
  176. 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12,
  177. 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0,
  178. 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9,
  179. 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133,
  180. 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7,
  181. 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920,
  182. 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a,
  183. 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17,
  184. 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8,
  185. 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11,
  186. 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a);
  187. $code.=<<___;
  188. # Te4[256]
  189. .byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
  190. .byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
  191. .byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
  192. .byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
  193. .byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
  194. .byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
  195. .byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
  196. .byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
  197. .byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
  198. .byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
  199. .byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
  200. .byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
  201. .byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
  202. .byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
  203. .byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
  204. .byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
  205. .byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
  206. .byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
  207. .byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
  208. .byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
  209. .byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
  210. .byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
  211. .byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
  212. .byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
  213. .byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
  214. .byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
  215. .byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
  216. .byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
  217. .byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
  218. .byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
  219. .byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
  220. .byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
  221. # rcon[]
  222. .long 0x01000000, 0x02000000, 0x04000000, 0x08000000
  223. .long 0x10000000, 0x20000000, 0x40000000, 0x80000000
  224. .long 0x1B000000, 0x36000000, 0, 0, 0, 0, 0, 0
  225. .align 256
  226. .size AES_Te,.-AES_Te
  227. # void AES_encrypt(const unsigned char *inp, unsigned char *out,
  228. # const AES_KEY *key) {
  229. .globl AES_encrypt
  230. .type AES_encrypt,\@function
  231. AES_encrypt:
  232. ___
  233. $code.=<<___ if (!$softonly);
  234. l %r0,240($key)
  235. lhi %r1,16
  236. clr %r0,%r1
  237. jl .Lesoft
  238. la %r1,0($key)
  239. #la %r2,0($inp)
  240. la %r4,0($out)
  241. lghi %r3,16 # single block length
  242. .long 0xb92e0042 # km %r4,%r2
  243. brc 1,.-4 # can this happen?
  244. br %r14
  245. .align 64
  246. .Lesoft:
  247. ___
  248. $code.=<<___;
  249. stm${g} %r3,$ra,3*$SIZE_T($sp)
  250. llgf $s0,0($inp)
  251. llgf $s1,4($inp)
  252. llgf $s2,8($inp)
  253. llgf $s3,12($inp)
  254. larl $tbl,AES_Te
  255. bras $ra,_s390x_AES_encrypt
  256. l${g} $out,3*$SIZE_T($sp)
  257. st $s0,0($out)
  258. st $s1,4($out)
  259. st $s2,8($out)
  260. st $s3,12($out)
  261. lm${g} %r6,$ra,6*$SIZE_T($sp)
  262. br $ra
  263. .size AES_encrypt,.-AES_encrypt
  264. .type _s390x_AES_encrypt,\@function
  265. .align 16
  266. _s390x_AES_encrypt:
  267. st${g} $ra,15*$SIZE_T($sp)
  268. x $s0,0($key)
  269. x $s1,4($key)
  270. x $s2,8($key)
  271. x $s3,12($key)
  272. l $rounds,240($key)
  273. llill $mask,`0xff<<3`
  274. aghi $rounds,-1
  275. j .Lenc_loop
  276. .align 16
  277. .Lenc_loop:
  278. sllg $t1,$s0,`0+3`
  279. srlg $t2,$s0,`8-3`
  280. srlg $t3,$s0,`16-3`
  281. srl $s0,`24-3`
  282. nr $s0,$mask
  283. ngr $t1,$mask
  284. nr $t2,$mask
  285. nr $t3,$mask
  286. srlg $i1,$s1,`16-3` # i0
  287. sllg $i2,$s1,`0+3`
  288. srlg $i3,$s1,`8-3`
  289. srl $s1,`24-3`
  290. nr $i1,$mask
  291. nr $s1,$mask
  292. ngr $i2,$mask
  293. nr $i3,$mask
  294. l $s0,0($s0,$tbl) # Te0[s0>>24]
  295. l $t1,1($t1,$tbl) # Te3[s0>>0]
  296. l $t2,2($t2,$tbl) # Te2[s0>>8]
  297. l $t3,3($t3,$tbl) # Te1[s0>>16]
  298. x $s0,3($i1,$tbl) # Te1[s1>>16]
  299. l $s1,0($s1,$tbl) # Te0[s1>>24]
  300. x $t2,1($i2,$tbl) # Te3[s1>>0]
  301. x $t3,2($i3,$tbl) # Te2[s1>>8]
  302. srlg $i1,$s2,`8-3` # i0
  303. srlg $i2,$s2,`16-3` # i1
  304. nr $i1,$mask
  305. nr $i2,$mask
  306. sllg $i3,$s2,`0+3`
  307. srl $s2,`24-3`
  308. nr $s2,$mask
  309. ngr $i3,$mask
  310. xr $s1,$t1
  311. srlg $ra,$s3,`8-3` # i1
  312. sllg $t1,$s3,`0+3` # i0
  313. nr $ra,$mask
  314. la $key,16($key)
  315. ngr $t1,$mask
  316. x $s0,2($i1,$tbl) # Te2[s2>>8]
  317. x $s1,3($i2,$tbl) # Te1[s2>>16]
  318. l $s2,0($s2,$tbl) # Te0[s2>>24]
  319. x $t3,1($i3,$tbl) # Te3[s2>>0]
  320. srlg $i3,$s3,`16-3` # i2
  321. xr $s2,$t2
  322. srl $s3,`24-3`
  323. nr $i3,$mask
  324. nr $s3,$mask
  325. x $s0,0($key)
  326. x $s1,4($key)
  327. x $s2,8($key)
  328. x $t3,12($key)
  329. x $s0,1($t1,$tbl) # Te3[s3>>0]
  330. x $s1,2($ra,$tbl) # Te2[s3>>8]
  331. x $s2,3($i3,$tbl) # Te1[s3>>16]
  332. l $s3,0($s3,$tbl) # Te0[s3>>24]
  333. xr $s3,$t3
  334. brct $rounds,.Lenc_loop
  335. .align 16
  336. sllg $t1,$s0,`0+3`
  337. srlg $t2,$s0,`8-3`
  338. ngr $t1,$mask
  339. srlg $t3,$s0,`16-3`
  340. srl $s0,`24-3`
  341. nr $s0,$mask
  342. nr $t2,$mask
  343. nr $t3,$mask
  344. srlg $i1,$s1,`16-3` # i0
  345. sllg $i2,$s1,`0+3`
  346. ngr $i2,$mask
  347. srlg $i3,$s1,`8-3`
  348. srl $s1,`24-3`
  349. nr $i1,$mask
  350. nr $s1,$mask
  351. nr $i3,$mask
  352. llgc $s0,2($s0,$tbl) # Te4[s0>>24]
  353. llgc $t1,2($t1,$tbl) # Te4[s0>>0]
  354. sll $s0,24
  355. llgc $t2,2($t2,$tbl) # Te4[s0>>8]
  356. llgc $t3,2($t3,$tbl) # Te4[s0>>16]
  357. sll $t2,8
  358. sll $t3,16
  359. llgc $i1,2($i1,$tbl) # Te4[s1>>16]
  360. llgc $s1,2($s1,$tbl) # Te4[s1>>24]
  361. llgc $i2,2($i2,$tbl) # Te4[s1>>0]
  362. llgc $i3,2($i3,$tbl) # Te4[s1>>8]
  363. sll $i1,16
  364. sll $s1,24
  365. sll $i3,8
  366. or $s0,$i1
  367. or $s1,$t1
  368. or $t2,$i2
  369. or $t3,$i3
  370. srlg $i1,$s2,`8-3` # i0
  371. srlg $i2,$s2,`16-3` # i1
  372. nr $i1,$mask
  373. nr $i2,$mask
  374. sllg $i3,$s2,`0+3`
  375. srl $s2,`24-3`
  376. ngr $i3,$mask
  377. nr $s2,$mask
  378. sllg $t1,$s3,`0+3` # i0
  379. srlg $ra,$s3,`8-3` # i1
  380. ngr $t1,$mask
  381. llgc $i1,2($i1,$tbl) # Te4[s2>>8]
  382. llgc $i2,2($i2,$tbl) # Te4[s2>>16]
  383. sll $i1,8
  384. llgc $s2,2($s2,$tbl) # Te4[s2>>24]
  385. llgc $i3,2($i3,$tbl) # Te4[s2>>0]
  386. sll $i2,16
  387. nr $ra,$mask
  388. sll $s2,24
  389. or $s0,$i1
  390. or $s1,$i2
  391. or $s2,$t2
  392. or $t3,$i3
  393. srlg $i3,$s3,`16-3` # i2
  394. srl $s3,`24-3`
  395. nr $i3,$mask
  396. nr $s3,$mask
  397. l $t0,16($key)
  398. l $t2,20($key)
  399. llgc $i1,2($t1,$tbl) # Te4[s3>>0]
  400. llgc $i2,2($ra,$tbl) # Te4[s3>>8]
  401. llgc $i3,2($i3,$tbl) # Te4[s3>>16]
  402. llgc $s3,2($s3,$tbl) # Te4[s3>>24]
  403. sll $i2,8
  404. sll $i3,16
  405. sll $s3,24
  406. or $s0,$i1
  407. or $s1,$i2
  408. or $s2,$i3
  409. or $s3,$t3
  410. l${g} $ra,15*$SIZE_T($sp)
  411. xr $s0,$t0
  412. xr $s1,$t2
  413. x $s2,24($key)
  414. x $s3,28($key)
  415. br $ra
  416. .size _s390x_AES_encrypt,.-_s390x_AES_encrypt
  417. ___
  418. $code.=<<___;
  419. .type AES_Td,\@object
  420. .align 256
  421. AES_Td:
  422. ___
  423. &_data_word(
  424. 0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96,
  425. 0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393,
  426. 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25,
  427. 0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f,
  428. 0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1,
  429. 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6,
  430. 0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da,
  431. 0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844,
  432. 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd,
  433. 0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4,
  434. 0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45,
  435. 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94,
  436. 0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7,
  437. 0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a,
  438. 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5,
  439. 0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c,
  440. 0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1,
  441. 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a,
  442. 0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75,
  443. 0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051,
  444. 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46,
  445. 0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff,
  446. 0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77,
  447. 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb,
  448. 0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000,
  449. 0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e,
  450. 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927,
  451. 0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a,
  452. 0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e,
  453. 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16,
  454. 0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d,
  455. 0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8,
  456. 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd,
  457. 0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34,
  458. 0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163,
  459. 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120,
  460. 0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d,
  461. 0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0,
  462. 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422,
  463. 0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef,
  464. 0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36,
  465. 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4,
  466. 0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662,
  467. 0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5,
  468. 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3,
  469. 0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b,
  470. 0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8,
  471. 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6,
  472. 0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6,
  473. 0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0,
  474. 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815,
  475. 0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f,
  476. 0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df,
  477. 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f,
  478. 0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e,
  479. 0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713,
  480. 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89,
  481. 0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c,
  482. 0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf,
  483. 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86,
  484. 0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f,
  485. 0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541,
  486. 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190,
  487. 0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742);
  488. $code.=<<___;
  489. # Td4[256]
  490. .byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
  491. .byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
  492. .byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
  493. .byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
  494. .byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
  495. .byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
  496. .byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
  497. .byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
  498. .byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
  499. .byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
  500. .byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
  501. .byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
  502. .byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
  503. .byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
  504. .byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
  505. .byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
  506. .byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
  507. .byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
  508. .byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
  509. .byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
  510. .byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
  511. .byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
  512. .byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
  513. .byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
  514. .byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
  515. .byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
  516. .byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
  517. .byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
  518. .byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
  519. .byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
  520. .byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
  521. .byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
  522. .size AES_Td,.-AES_Td
  523. # void AES_decrypt(const unsigned char *inp, unsigned char *out,
  524. # const AES_KEY *key) {
  525. .globl AES_decrypt
  526. .type AES_decrypt,\@function
  527. AES_decrypt:
  528. ___
  529. $code.=<<___ if (!$softonly);
  530. l %r0,240($key)
  531. lhi %r1,16
  532. clr %r0,%r1
  533. jl .Ldsoft
  534. la %r1,0($key)
  535. #la %r2,0($inp)
  536. la %r4,0($out)
  537. lghi %r3,16 # single block length
  538. .long 0xb92e0042 # km %r4,%r2
  539. brc 1,.-4 # can this happen?
  540. br %r14
  541. .align 64
  542. .Ldsoft:
  543. ___
  544. $code.=<<___;
  545. stm${g} %r3,$ra,3*$SIZE_T($sp)
  546. llgf $s0,0($inp)
  547. llgf $s1,4($inp)
  548. llgf $s2,8($inp)
  549. llgf $s3,12($inp)
  550. larl $tbl,AES_Td
  551. bras $ra,_s390x_AES_decrypt
  552. l${g} $out,3*$SIZE_T($sp)
  553. st $s0,0($out)
  554. st $s1,4($out)
  555. st $s2,8($out)
  556. st $s3,12($out)
  557. lm${g} %r6,$ra,6*$SIZE_T($sp)
  558. br $ra
  559. .size AES_decrypt,.-AES_decrypt
  560. .type _s390x_AES_decrypt,\@function
  561. .align 16
  562. _s390x_AES_decrypt:
  563. st${g} $ra,15*$SIZE_T($sp)
  564. x $s0,0($key)
  565. x $s1,4($key)
  566. x $s2,8($key)
  567. x $s3,12($key)
  568. l $rounds,240($key)
  569. llill $mask,`0xff<<3`
  570. aghi $rounds,-1
  571. j .Ldec_loop
  572. .align 16
  573. .Ldec_loop:
  574. srlg $t1,$s0,`16-3`
  575. srlg $t2,$s0,`8-3`
  576. sllg $t3,$s0,`0+3`
  577. srl $s0,`24-3`
  578. nr $s0,$mask
  579. nr $t1,$mask
  580. nr $t2,$mask
  581. ngr $t3,$mask
  582. sllg $i1,$s1,`0+3` # i0
  583. srlg $i2,$s1,`16-3`
  584. srlg $i3,$s1,`8-3`
  585. srl $s1,`24-3`
  586. ngr $i1,$mask
  587. nr $s1,$mask
  588. nr $i2,$mask
  589. nr $i3,$mask
  590. l $s0,0($s0,$tbl) # Td0[s0>>24]
  591. l $t1,3($t1,$tbl) # Td1[s0>>16]
  592. l $t2,2($t2,$tbl) # Td2[s0>>8]
  593. l $t3,1($t3,$tbl) # Td3[s0>>0]
  594. x $s0,1($i1,$tbl) # Td3[s1>>0]
  595. l $s1,0($s1,$tbl) # Td0[s1>>24]
  596. x $t2,3($i2,$tbl) # Td1[s1>>16]
  597. x $t3,2($i3,$tbl) # Td2[s1>>8]
  598. srlg $i1,$s2,`8-3` # i0
  599. sllg $i2,$s2,`0+3` # i1
  600. srlg $i3,$s2,`16-3`
  601. srl $s2,`24-3`
  602. nr $i1,$mask
  603. ngr $i2,$mask
  604. nr $s2,$mask
  605. nr $i3,$mask
  606. xr $s1,$t1
  607. srlg $ra,$s3,`8-3` # i1
  608. srlg $t1,$s3,`16-3` # i0
  609. nr $ra,$mask
  610. la $key,16($key)
  611. nr $t1,$mask
  612. x $s0,2($i1,$tbl) # Td2[s2>>8]
  613. x $s1,1($i2,$tbl) # Td3[s2>>0]
  614. l $s2,0($s2,$tbl) # Td0[s2>>24]
  615. x $t3,3($i3,$tbl) # Td1[s2>>16]
  616. sllg $i3,$s3,`0+3` # i2
  617. srl $s3,`24-3`
  618. ngr $i3,$mask
  619. nr $s3,$mask
  620. xr $s2,$t2
  621. x $s0,0($key)
  622. x $s1,4($key)
  623. x $s2,8($key)
  624. x $t3,12($key)
  625. x $s0,3($t1,$tbl) # Td1[s3>>16]
  626. x $s1,2($ra,$tbl) # Td2[s3>>8]
  627. x $s2,1($i3,$tbl) # Td3[s3>>0]
  628. l $s3,0($s3,$tbl) # Td0[s3>>24]
  629. xr $s3,$t3
  630. brct $rounds,.Ldec_loop
  631. .align 16
  632. l $t1,`2048+0`($tbl) # prefetch Td4
  633. l $t2,`2048+64`($tbl)
  634. l $t3,`2048+128`($tbl)
  635. l $i1,`2048+192`($tbl)
  636. llill $mask,0xff
  637. srlg $i3,$s0,24 # i0
  638. srlg $t1,$s0,16
  639. srlg $t2,$s0,8
  640. nr $s0,$mask # i3
  641. nr $t1,$mask
  642. srlg $i1,$s1,24
  643. nr $t2,$mask
  644. srlg $i2,$s1,16
  645. srlg $ra,$s1,8
  646. nr $s1,$mask # i0
  647. nr $i2,$mask
  648. nr $ra,$mask
  649. llgc $i3,2048($i3,$tbl) # Td4[s0>>24]
  650. llgc $t1,2048($t1,$tbl) # Td4[s0>>16]
  651. llgc $t2,2048($t2,$tbl) # Td4[s0>>8]
  652. sll $t1,16
  653. llgc $t3,2048($s0,$tbl) # Td4[s0>>0]
  654. sllg $s0,$i3,24
  655. sll $t2,8
  656. llgc $s1,2048($s1,$tbl) # Td4[s1>>0]
  657. llgc $i1,2048($i1,$tbl) # Td4[s1>>24]
  658. llgc $i2,2048($i2,$tbl) # Td4[s1>>16]
  659. sll $i1,24
  660. llgc $i3,2048($ra,$tbl) # Td4[s1>>8]
  661. sll $i2,16
  662. sll $i3,8
  663. or $s0,$s1
  664. or $t1,$i1
  665. or $t2,$i2
  666. or $t3,$i3
  667. srlg $i1,$s2,8 # i0
  668. srlg $i2,$s2,24
  669. srlg $i3,$s2,16
  670. nr $s2,$mask # i1
  671. nr $i1,$mask
  672. nr $i3,$mask
  673. llgc $i1,2048($i1,$tbl) # Td4[s2>>8]
  674. llgc $s1,2048($s2,$tbl) # Td4[s2>>0]
  675. llgc $i2,2048($i2,$tbl) # Td4[s2>>24]
  676. llgc $i3,2048($i3,$tbl) # Td4[s2>>16]
  677. sll $i1,8
  678. sll $i2,24
  679. or $s0,$i1
  680. sll $i3,16
  681. or $t2,$i2
  682. or $t3,$i3
  683. srlg $i1,$s3,16 # i0
  684. srlg $i2,$s3,8 # i1
  685. srlg $i3,$s3,24
  686. nr $s3,$mask # i2
  687. nr $i1,$mask
  688. nr $i2,$mask
  689. l${g} $ra,15*$SIZE_T($sp)
  690. or $s1,$t1
  691. l $t0,16($key)
  692. l $t1,20($key)
  693. llgc $i1,2048($i1,$tbl) # Td4[s3>>16]
  694. llgc $i2,2048($i2,$tbl) # Td4[s3>>8]
  695. sll $i1,16
  696. llgc $s2,2048($s3,$tbl) # Td4[s3>>0]
  697. llgc $s3,2048($i3,$tbl) # Td4[s3>>24]
  698. sll $i2,8
  699. sll $s3,24
  700. or $s0,$i1
  701. or $s1,$i2
  702. or $s2,$t2
  703. or $s3,$t3
  704. xr $s0,$t0
  705. xr $s1,$t1
  706. x $s2,24($key)
  707. x $s3,28($key)
  708. br $ra
  709. .size _s390x_AES_decrypt,.-_s390x_AES_decrypt
  710. ___
  711. $code.=<<___;
  712. # void AES_set_encrypt_key(const unsigned char *in, int bits,
  713. # AES_KEY *key) {
  714. .globl AES_set_encrypt_key
  715. .type AES_set_encrypt_key,\@function
  716. .align 16
  717. AES_set_encrypt_key:
  718. _s390x_AES_set_encrypt_key:
  719. lghi $t0,0
  720. cl${g}r $inp,$t0
  721. je .Lminus1
  722. cl${g}r $key,$t0
  723. je .Lminus1
  724. lghi $t0,128
  725. clr $bits,$t0
  726. je .Lproceed
  727. lghi $t0,192
  728. clr $bits,$t0
  729. je .Lproceed
  730. lghi $t0,256
  731. clr $bits,$t0
  732. je .Lproceed
  733. lghi %r2,-2
  734. br %r14
  735. .align 16
  736. .Lproceed:
  737. ___
  738. $code.=<<___ if (!$softonly);
  739. # convert bits to km(c) code, [128,192,256]->[18,19,20]
  740. lhi %r5,-128
  741. lhi %r0,18
  742. ar %r5,$bits
  743. srl %r5,6
  744. ar %r5,%r0
  745. larl %r1,OPENSSL_s390xcap_P
  746. llihh %r0,0x8000
  747. srlg %r0,%r0,0(%r5)
  748. ng %r0,S390X_KM(%r1) # check availability of both km...
  749. ng %r0,S390X_KMC(%r1) # ...and kmc support for given key length
  750. jz .Lekey_internal
  751. lmg %r0,%r1,0($inp) # just copy 128 bits...
  752. stmg %r0,%r1,0($key)
  753. lhi %r0,192
  754. cr $bits,%r0
  755. jl 1f
  756. lg %r1,16($inp)
  757. stg %r1,16($key)
  758. je 1f
  759. lg %r1,24($inp)
  760. stg %r1,24($key)
  761. 1: st $bits,236($key) # save bits [for debugging purposes]
  762. lgr $t0,%r5
  763. st %r5,240($key) # save km(c) code
  764. lghi %r2,0
  765. br %r14
  766. ___
  767. $code.=<<___;
  768. .align 16
  769. .Lekey_internal:
  770. stm${g} %r4,%r13,4*$SIZE_T($sp) # all non-volatile regs and $key
  771. larl $tbl,AES_Te+2048
  772. llgf $s0,0($inp)
  773. llgf $s1,4($inp)
  774. llgf $s2,8($inp)
  775. llgf $s3,12($inp)
  776. st $s0,0($key)
  777. st $s1,4($key)
  778. st $s2,8($key)
  779. st $s3,12($key)
  780. lghi $t0,128
  781. cr $bits,$t0
  782. jne .Lnot128
  783. llill $mask,0xff
  784. lghi $t3,0 # i=0
  785. lghi $rounds,10
  786. st $rounds,240($key)
  787. llgfr $t2,$s3 # temp=rk[3]
  788. srlg $i1,$s3,8
  789. srlg $i2,$s3,16
  790. srlg $i3,$s3,24
  791. nr $t2,$mask
  792. nr $i1,$mask
  793. nr $i2,$mask
  794. .align 16
  795. .L128_loop:
  796. la $t2,0($t2,$tbl)
  797. la $i1,0($i1,$tbl)
  798. la $i2,0($i2,$tbl)
  799. la $i3,0($i3,$tbl)
  800. icm $t2,2,0($t2) # Te4[rk[3]>>0]<<8
  801. icm $t2,4,0($i1) # Te4[rk[3]>>8]<<16
  802. icm $t2,8,0($i2) # Te4[rk[3]>>16]<<24
  803. icm $t2,1,0($i3) # Te4[rk[3]>>24]
  804. x $t2,256($t3,$tbl) # rcon[i]
  805. xr $s0,$t2 # rk[4]=rk[0]^...
  806. xr $s1,$s0 # rk[5]=rk[1]^rk[4]
  807. xr $s2,$s1 # rk[6]=rk[2]^rk[5]
  808. xr $s3,$s2 # rk[7]=rk[3]^rk[6]
  809. llgfr $t2,$s3 # temp=rk[3]
  810. srlg $i1,$s3,8
  811. srlg $i2,$s3,16
  812. nr $t2,$mask
  813. nr $i1,$mask
  814. srlg $i3,$s3,24
  815. nr $i2,$mask
  816. st $s0,16($key)
  817. st $s1,20($key)
  818. st $s2,24($key)
  819. st $s3,28($key)
  820. la $key,16($key) # key+=4
  821. la $t3,4($t3) # i++
  822. brct $rounds,.L128_loop
  823. lghi $t0,10
  824. lghi %r2,0
  825. lm${g} %r4,%r13,4*$SIZE_T($sp)
  826. br $ra
  827. .align 16
  828. .Lnot128:
  829. llgf $t0,16($inp)
  830. llgf $t1,20($inp)
  831. st $t0,16($key)
  832. st $t1,20($key)
  833. lghi $t0,192
  834. cr $bits,$t0
  835. jne .Lnot192
  836. llill $mask,0xff
  837. lghi $t3,0 # i=0
  838. lghi $rounds,12
  839. st $rounds,240($key)
  840. lghi $rounds,8
  841. srlg $i1,$t1,8
  842. srlg $i2,$t1,16
  843. srlg $i3,$t1,24
  844. nr $t1,$mask
  845. nr $i1,$mask
  846. nr $i2,$mask
  847. .align 16
  848. .L192_loop:
  849. la $t1,0($t1,$tbl)
  850. la $i1,0($i1,$tbl)
  851. la $i2,0($i2,$tbl)
  852. la $i3,0($i3,$tbl)
  853. icm $t1,2,0($t1) # Te4[rk[5]>>0]<<8
  854. icm $t1,4,0($i1) # Te4[rk[5]>>8]<<16
  855. icm $t1,8,0($i2) # Te4[rk[5]>>16]<<24
  856. icm $t1,1,0($i3) # Te4[rk[5]>>24]
  857. x $t1,256($t3,$tbl) # rcon[i]
  858. xr $s0,$t1 # rk[6]=rk[0]^...
  859. xr $s1,$s0 # rk[7]=rk[1]^rk[6]
  860. xr $s2,$s1 # rk[8]=rk[2]^rk[7]
  861. xr $s3,$s2 # rk[9]=rk[3]^rk[8]
  862. st $s0,24($key)
  863. st $s1,28($key)
  864. st $s2,32($key)
  865. st $s3,36($key)
  866. brct $rounds,.L192_continue
  867. lghi $t0,12
  868. lghi %r2,0
  869. lm${g} %r4,%r13,4*$SIZE_T($sp)
  870. br $ra
  871. .align 16
  872. .L192_continue:
  873. lgr $t1,$s3
  874. x $t1,16($key) # rk[10]=rk[4]^rk[9]
  875. st $t1,40($key)
  876. x $t1,20($key) # rk[11]=rk[5]^rk[10]
  877. st $t1,44($key)
  878. srlg $i1,$t1,8
  879. srlg $i2,$t1,16
  880. srlg $i3,$t1,24
  881. nr $t1,$mask
  882. nr $i1,$mask
  883. nr $i2,$mask
  884. la $key,24($key) # key+=6
  885. la $t3,4($t3) # i++
  886. j .L192_loop
  887. .align 16
  888. .Lnot192:
  889. llgf $t0,24($inp)
  890. llgf $t1,28($inp)
  891. st $t0,24($key)
  892. st $t1,28($key)
  893. llill $mask,0xff
  894. lghi $t3,0 # i=0
  895. lghi $rounds,14
  896. st $rounds,240($key)
  897. lghi $rounds,7
  898. srlg $i1,$t1,8
  899. srlg $i2,$t1,16
  900. srlg $i3,$t1,24
  901. nr $t1,$mask
  902. nr $i1,$mask
  903. nr $i2,$mask
  904. .align 16
  905. .L256_loop:
  906. la $t1,0($t1,$tbl)
  907. la $i1,0($i1,$tbl)
  908. la $i2,0($i2,$tbl)
  909. la $i3,0($i3,$tbl)
  910. icm $t1,2,0($t1) # Te4[rk[7]>>0]<<8
  911. icm $t1,4,0($i1) # Te4[rk[7]>>8]<<16
  912. icm $t1,8,0($i2) # Te4[rk[7]>>16]<<24
  913. icm $t1,1,0($i3) # Te4[rk[7]>>24]
  914. x $t1,256($t3,$tbl) # rcon[i]
  915. xr $s0,$t1 # rk[8]=rk[0]^...
  916. xr $s1,$s0 # rk[9]=rk[1]^rk[8]
  917. xr $s2,$s1 # rk[10]=rk[2]^rk[9]
  918. xr $s3,$s2 # rk[11]=rk[3]^rk[10]
  919. st $s0,32($key)
  920. st $s1,36($key)
  921. st $s2,40($key)
  922. st $s3,44($key)
  923. brct $rounds,.L256_continue
  924. lghi $t0,14
  925. lghi %r2,0
  926. lm${g} %r4,%r13,4*$SIZE_T($sp)
  927. br $ra
  928. .align 16
  929. .L256_continue:
  930. lgr $t1,$s3 # temp=rk[11]
  931. srlg $i1,$s3,8
  932. srlg $i2,$s3,16
  933. srlg $i3,$s3,24
  934. nr $t1,$mask
  935. nr $i1,$mask
  936. nr $i2,$mask
  937. la $t1,0($t1,$tbl)
  938. la $i1,0($i1,$tbl)
  939. la $i2,0($i2,$tbl)
  940. la $i3,0($i3,$tbl)
  941. llgc $t1,0($t1) # Te4[rk[11]>>0]
  942. icm $t1,2,0($i1) # Te4[rk[11]>>8]<<8
  943. icm $t1,4,0($i2) # Te4[rk[11]>>16]<<16
  944. icm $t1,8,0($i3) # Te4[rk[11]>>24]<<24
  945. x $t1,16($key) # rk[12]=rk[4]^...
  946. st $t1,48($key)
  947. x $t1,20($key) # rk[13]=rk[5]^rk[12]
  948. st $t1,52($key)
  949. x $t1,24($key) # rk[14]=rk[6]^rk[13]
  950. st $t1,56($key)
  951. x $t1,28($key) # rk[15]=rk[7]^rk[14]
  952. st $t1,60($key)
  953. srlg $i1,$t1,8
  954. srlg $i2,$t1,16
  955. srlg $i3,$t1,24
  956. nr $t1,$mask
  957. nr $i1,$mask
  958. nr $i2,$mask
  959. la $key,32($key) # key+=8
  960. la $t3,4($t3) # i++
  961. j .L256_loop
  962. .Lminus1:
  963. lghi %r2,-1
  964. br $ra
  965. .size AES_set_encrypt_key,.-AES_set_encrypt_key
  966. # void AES_set_decrypt_key(const unsigned char *in, int bits,
  967. # AES_KEY *key) {
  968. .globl AES_set_decrypt_key
  969. .type AES_set_decrypt_key,\@function
  970. .align 16
  971. AES_set_decrypt_key:
  972. #st${g} $key,4*$SIZE_T($sp) # I rely on AES_set_encrypt_key to
  973. st${g} $ra,14*$SIZE_T($sp) # save non-volatile registers and $key!
  974. bras $ra,_s390x_AES_set_encrypt_key
  975. #l${g} $key,4*$SIZE_T($sp)
  976. l${g} $ra,14*$SIZE_T($sp)
  977. ltgr %r2,%r2
  978. bnzr $ra
  979. ___
  980. $code.=<<___ if (!$softonly);
  981. #l $t0,240($key)
  982. lhi $t1,16
  983. cr $t0,$t1
  984. jl .Lgo
  985. oill $t0,S390X_DECRYPT # set "decrypt" bit
  986. st $t0,240($key)
  987. br $ra
  988. ___
  989. $code.=<<___;
  990. .align 16
  991. .Lgo: lgr $rounds,$t0 #llgf $rounds,240($key)
  992. la $i1,0($key)
  993. sllg $i2,$rounds,4
  994. la $i2,0($i2,$key)
  995. srl $rounds,1
  996. lghi $t1,-16
  997. .align 16
  998. .Linv: lmg $s0,$s1,0($i1)
  999. lmg $s2,$s3,0($i2)
  1000. stmg $s0,$s1,0($i2)
  1001. stmg $s2,$s3,0($i1)
  1002. la $i1,16($i1)
  1003. la $i2,0($t1,$i2)
  1004. brct $rounds,.Linv
  1005. ___
  1006. $mask80=$i1;
  1007. $mask1b=$i2;
  1008. $maskfe=$i3;
  1009. $code.=<<___;
  1010. llgf $rounds,240($key)
  1011. aghi $rounds,-1
  1012. sll $rounds,2 # (rounds-1)*4
  1013. llilh $mask80,0x8080
  1014. llilh $mask1b,0x1b1b
  1015. llilh $maskfe,0xfefe
  1016. oill $mask80,0x8080
  1017. oill $mask1b,0x1b1b
  1018. oill $maskfe,0xfefe
  1019. .align 16
  1020. .Lmix: l $s0,16($key) # tp1
  1021. lr $s1,$s0
  1022. ngr $s1,$mask80
  1023. srlg $t1,$s1,7
  1024. slr $s1,$t1
  1025. nr $s1,$mask1b
  1026. sllg $t1,$s0,1
  1027. nr $t1,$maskfe
  1028. xr $s1,$t1 # tp2
  1029. lr $s2,$s1
  1030. ngr $s2,$mask80
  1031. srlg $t1,$s2,7
  1032. slr $s2,$t1
  1033. nr $s2,$mask1b
  1034. sllg $t1,$s1,1
  1035. nr $t1,$maskfe
  1036. xr $s2,$t1 # tp4
  1037. lr $s3,$s2
  1038. ngr $s3,$mask80
  1039. srlg $t1,$s3,7
  1040. slr $s3,$t1
  1041. nr $s3,$mask1b
  1042. sllg $t1,$s2,1
  1043. nr $t1,$maskfe
  1044. xr $s3,$t1 # tp8
  1045. xr $s1,$s0 # tp2^tp1
  1046. xr $s2,$s0 # tp4^tp1
  1047. rll $s0,$s0,24 # = ROTATE(tp1,8)
  1048. xr $s2,$s3 # ^=tp8
  1049. xr $s0,$s1 # ^=tp2^tp1
  1050. xr $s1,$s3 # tp2^tp1^tp8
  1051. xr $s0,$s2 # ^=tp4^tp1^tp8
  1052. rll $s1,$s1,8
  1053. rll $s2,$s2,16
  1054. xr $s0,$s1 # ^= ROTATE(tp8^tp2^tp1,24)
  1055. rll $s3,$s3,24
  1056. xr $s0,$s2 # ^= ROTATE(tp8^tp4^tp1,16)
  1057. xr $s0,$s3 # ^= ROTATE(tp8,8)
  1058. st $s0,16($key)
  1059. la $key,4($key)
  1060. brct $rounds,.Lmix
  1061. lm${g} %r6,%r13,6*$SIZE_T($sp)# as was saved by AES_set_encrypt_key!
  1062. lghi %r2,0
  1063. br $ra
  1064. .size AES_set_decrypt_key,.-AES_set_decrypt_key
  1065. ___
  1066. ########################################################################
  1067. # void AES_cbc_encrypt(const unsigned char *in, unsigned char *out,
  1068. # size_t length, const AES_KEY *key,
  1069. # unsigned char *ivec, const int enc)
  1070. {
  1071. my $inp="%r2";
  1072. my $out="%r4"; # length and out are swapped
  1073. my $len="%r3";
  1074. my $key="%r5";
  1075. my $ivp="%r6";
  1076. $code.=<<___;
  1077. .globl AES_cbc_encrypt
  1078. .type AES_cbc_encrypt,\@function
  1079. .align 16
  1080. AES_cbc_encrypt:
  1081. xgr %r3,%r4 # flip %r3 and %r4, out and len
  1082. xgr %r4,%r3
  1083. xgr %r3,%r4
  1084. ___
  1085. $code.=<<___ if (!$softonly);
  1086. lhi %r0,16
  1087. cl %r0,240($key)
  1088. jh .Lcbc_software
  1089. lg %r0,0($ivp) # copy ivec
  1090. lg %r1,8($ivp)
  1091. stmg %r0,%r1,16($sp)
  1092. lmg %r0,%r1,0($key) # copy key, cover 256 bit
  1093. stmg %r0,%r1,32($sp)
  1094. lmg %r0,%r1,16($key)
  1095. stmg %r0,%r1,48($sp)
  1096. l %r0,240($key) # load kmc code
  1097. lghi $key,15 # res=len%16, len-=res;
  1098. ngr $key,$len
  1099. sl${g}r $len,$key
  1100. la %r1,16($sp) # parameter block - ivec || key
  1101. jz .Lkmc_truncated
  1102. .long 0xb92f0042 # kmc %r4,%r2
  1103. brc 1,.-4 # pay attention to "partial completion"
  1104. ltr $key,$key
  1105. jnz .Lkmc_truncated
  1106. .Lkmc_done:
  1107. lmg %r0,%r1,16($sp) # copy ivec to caller
  1108. stg %r0,0($ivp)
  1109. stg %r1,8($ivp)
  1110. br $ra
  1111. .align 16
  1112. .Lkmc_truncated:
  1113. ahi $key,-1 # it's the way it's encoded in mvc
  1114. tmll %r0,S390X_DECRYPT
  1115. jnz .Lkmc_truncated_dec
  1116. lghi %r1,0
  1117. stg %r1,16*$SIZE_T($sp)
  1118. stg %r1,16*$SIZE_T+8($sp)
  1119. bras %r1,1f
  1120. mvc 16*$SIZE_T(1,$sp),0($inp)
  1121. 1: ex $key,0(%r1)
  1122. la %r1,16($sp) # restore parameter block
  1123. la $inp,16*$SIZE_T($sp)
  1124. lghi $len,16
  1125. .long 0xb92f0042 # kmc %r4,%r2
  1126. j .Lkmc_done
  1127. .align 16
  1128. .Lkmc_truncated_dec:
  1129. st${g} $out,4*$SIZE_T($sp)
  1130. la $out,16*$SIZE_T($sp)
  1131. lghi $len,16
  1132. .long 0xb92f0042 # kmc %r4,%r2
  1133. l${g} $out,4*$SIZE_T($sp)
  1134. bras %r1,2f
  1135. mvc 0(1,$out),16*$SIZE_T($sp)
  1136. 2: ex $key,0(%r1)
  1137. j .Lkmc_done
  1138. .align 16
  1139. .Lcbc_software:
  1140. ___
  1141. $code.=<<___;
  1142. stm${g} $key,$ra,5*$SIZE_T($sp)
  1143. lhi %r0,0
  1144. cl %r0,`$stdframe+$SIZE_T-4`($sp)
  1145. je .Lcbc_decrypt
  1146. larl $tbl,AES_Te
  1147. llgf $s0,0($ivp)
  1148. llgf $s1,4($ivp)
  1149. llgf $s2,8($ivp)
  1150. llgf $s3,12($ivp)
  1151. lghi $t0,16
  1152. sl${g}r $len,$t0
  1153. brc 4,.Lcbc_enc_tail # if borrow
  1154. .Lcbc_enc_loop:
  1155. stm${g} $inp,$out,2*$SIZE_T($sp)
  1156. x $s0,0($inp)
  1157. x $s1,4($inp)
  1158. x $s2,8($inp)
  1159. x $s3,12($inp)
  1160. lgr %r4,$key
  1161. bras $ra,_s390x_AES_encrypt
  1162. lm${g} $inp,$key,2*$SIZE_T($sp)
  1163. st $s0,0($out)
  1164. st $s1,4($out)
  1165. st $s2,8($out)
  1166. st $s3,12($out)
  1167. la $inp,16($inp)
  1168. la $out,16($out)
  1169. lghi $t0,16
  1170. lt${g}r $len,$len
  1171. jz .Lcbc_enc_done
  1172. sl${g}r $len,$t0
  1173. brc 4,.Lcbc_enc_tail # if borrow
  1174. j .Lcbc_enc_loop
  1175. .align 16
  1176. .Lcbc_enc_done:
  1177. l${g} $ivp,6*$SIZE_T($sp)
  1178. st $s0,0($ivp)
  1179. st $s1,4($ivp)
  1180. st $s2,8($ivp)
  1181. st $s3,12($ivp)
  1182. lm${g} %r7,$ra,7*$SIZE_T($sp)
  1183. br $ra
  1184. .align 16
  1185. .Lcbc_enc_tail:
  1186. aghi $len,15
  1187. lghi $t0,0
  1188. stg $t0,16*$SIZE_T($sp)
  1189. stg $t0,16*$SIZE_T+8($sp)
  1190. bras $t1,3f
  1191. mvc 16*$SIZE_T(1,$sp),0($inp)
  1192. 3: ex $len,0($t1)
  1193. lghi $len,0
  1194. la $inp,16*$SIZE_T($sp)
  1195. j .Lcbc_enc_loop
  1196. .align 16
  1197. .Lcbc_decrypt:
  1198. larl $tbl,AES_Td
  1199. lg $t0,0($ivp)
  1200. lg $t1,8($ivp)
  1201. stmg $t0,$t1,16*$SIZE_T($sp)
  1202. .Lcbc_dec_loop:
  1203. stm${g} $inp,$out,2*$SIZE_T($sp)
  1204. llgf $s0,0($inp)
  1205. llgf $s1,4($inp)
  1206. llgf $s2,8($inp)
  1207. llgf $s3,12($inp)
  1208. lgr %r4,$key
  1209. bras $ra,_s390x_AES_decrypt
  1210. lm${g} $inp,$key,2*$SIZE_T($sp)
  1211. sllg $s0,$s0,32
  1212. sllg $s2,$s2,32
  1213. lr $s0,$s1
  1214. lr $s2,$s3
  1215. lg $t0,0($inp)
  1216. lg $t1,8($inp)
  1217. xg $s0,16*$SIZE_T($sp)
  1218. xg $s2,16*$SIZE_T+8($sp)
  1219. lghi $s1,16
  1220. sl${g}r $len,$s1
  1221. brc 4,.Lcbc_dec_tail # if borrow
  1222. brc 2,.Lcbc_dec_done # if zero
  1223. stg $s0,0($out)
  1224. stg $s2,8($out)
  1225. stmg $t0,$t1,16*$SIZE_T($sp)
  1226. la $inp,16($inp)
  1227. la $out,16($out)
  1228. j .Lcbc_dec_loop
  1229. .Lcbc_dec_done:
  1230. stg $s0,0($out)
  1231. stg $s2,8($out)
  1232. .Lcbc_dec_exit:
  1233. lm${g} %r6,$ra,6*$SIZE_T($sp)
  1234. stmg $t0,$t1,0($ivp)
  1235. br $ra
  1236. .align 16
  1237. .Lcbc_dec_tail:
  1238. aghi $len,15
  1239. stg $s0,16*$SIZE_T($sp)
  1240. stg $s2,16*$SIZE_T+8($sp)
  1241. bras $s1,4f
  1242. mvc 0(1,$out),16*$SIZE_T($sp)
  1243. 4: ex $len,0($s1)
  1244. j .Lcbc_dec_exit
  1245. .size AES_cbc_encrypt,.-AES_cbc_encrypt
  1246. ___
  1247. }
  1248. ########################################################################
  1249. # void AES_ctr32_encrypt(const unsigned char *in, unsigned char *out,
  1250. # size_t blocks, const AES_KEY *key,
  1251. # const unsigned char *ivec)
  1252. {
  1253. my $inp="%r2";
  1254. my $out="%r4"; # blocks and out are swapped
  1255. my $len="%r3";
  1256. my $key="%r5"; my $iv0="%r5";
  1257. my $ivp="%r6";
  1258. my $fp ="%r7";
  1259. $code.=<<___;
  1260. .globl AES_ctr32_encrypt
  1261. .type AES_ctr32_encrypt,\@function
  1262. .align 16
  1263. AES_ctr32_encrypt:
  1264. xgr %r3,%r4 # flip %r3 and %r4, $out and $len
  1265. xgr %r4,%r3
  1266. xgr %r3,%r4
  1267. llgfr $len,$len # safe in ctr32 subroutine even in 64-bit case
  1268. ___
  1269. $code.=<<___ if (!$softonly);
  1270. l %r0,240($key)
  1271. lhi %r1,16
  1272. clr %r0,%r1
  1273. jl .Lctr32_software
  1274. st${g} $s2,10*$SIZE_T($sp)
  1275. st${g} $s3,11*$SIZE_T($sp)
  1276. clr $len,%r1 # does work even in 64-bit mode
  1277. jle .Lctr32_nokma # kma is slower for <= 16 blocks
  1278. larl %r1,OPENSSL_s390xcap_P
  1279. lr $s2,%r0
  1280. llihh $s3,0x8000
  1281. srlg $s3,$s3,0($s2)
  1282. ng $s3,S390X_KMA(%r1) # check kma capability vector
  1283. jz .Lctr32_nokma
  1284. l${g}hi %r1,-$stdframe-112
  1285. l${g}r $s3,$sp
  1286. la $sp,0(%r1,$sp) # prepare parameter block
  1287. lhi %r1,0x0600
  1288. sllg $len,$len,4
  1289. or %r0,%r1 # set HS and LAAD flags
  1290. st${g} $s3,0($sp) # backchain
  1291. la %r1,$stdframe($sp)
  1292. xc $stdframe+0(64,$sp),$stdframe+0($sp) # clear reserved/unused
  1293. # in parameter block
  1294. lmg $s2,$s3,0($key) # copy key
  1295. stg $s2,$stdframe+80($sp)
  1296. stg $s3,$stdframe+88($sp)
  1297. lmg $s2,$s3,16($key)
  1298. stg $s2,$stdframe+96($sp)
  1299. stg $s3,$stdframe+104($sp)
  1300. lmg $s2,$s3,0($ivp) # copy iv
  1301. stg $s2,$stdframe+64($sp)
  1302. ahi $s3,-1 # kma requires counter-1
  1303. stg $s3,$stdframe+72($sp)
  1304. st $s3,$stdframe+12($sp) # copy counter
  1305. lghi $s2,0 # no AAD
  1306. lghi $s3,0
  1307. .long 0xb929a042 # kma $out,$s2,$inp
  1308. brc 1,.-4 # pay attention to "partial completion"
  1309. stg %r0,$stdframe+80($sp) # wipe key
  1310. stg %r0,$stdframe+88($sp)
  1311. stg %r0,$stdframe+96($sp)
  1312. stg %r0,$stdframe+104($sp)
  1313. la $sp,$stdframe+112($sp)
  1314. lm${g} $s2,$s3,10*$SIZE_T($sp)
  1315. br $ra
  1316. .align 16
  1317. .Lctr32_nokma:
  1318. stm${g} %r6,$s1,6*$SIZE_T($sp)
  1319. slgr $out,$inp
  1320. la %r1,0($key) # %r1 is permanent copy of $key
  1321. lg $iv0,0($ivp) # load ivec
  1322. lg $ivp,8($ivp)
  1323. # prepare and allocate stack frame at the top of 4K page
  1324. # with 1K reserved for eventual signal handling
  1325. lghi $s0,-1024-256-16# guarantee at least 256-bytes buffer
  1326. lghi $s1,-4096
  1327. algr $s0,$sp
  1328. lgr $fp,$sp
  1329. ngr $s0,$s1 # align at page boundary
  1330. slgr $fp,$s0 # total buffer size
  1331. lgr $s2,$sp
  1332. lghi $s1,1024+16 # sl[g]fi is extended-immediate facility
  1333. slgr $fp,$s1 # deduct reservation to get usable buffer size
  1334. # buffer size is at lest 256 and at most 3072+256-16
  1335. la $sp,1024($s0) # alloca
  1336. srlg $fp,$fp,4 # convert bytes to blocks, minimum 16
  1337. st${g} $s2,0($sp) # back-chain
  1338. st${g} $fp,$SIZE_T($sp)
  1339. slgr $len,$fp
  1340. brc 1,.Lctr32_hw_switch # not zero, no borrow
  1341. algr $fp,$len # input is shorter than allocated buffer
  1342. lghi $len,0
  1343. st${g} $fp,$SIZE_T($sp)
  1344. .Lctr32_hw_switch:
  1345. ___
  1346. $code.=<<___ if (!$softonly && 0);# kmctr code was measured to be ~12% slower
  1347. llgfr $s0,%r0
  1348. lgr $s1,%r1
  1349. larl %r1,OPENSSL_s390xcap_P
  1350. llihh %r0,0x8000 # check if kmctr supports the function code
  1351. srlg %r0,%r0,0($s0)
  1352. ng %r0,S390X_KMCTR(%r1) # check kmctr capability vector
  1353. lgr %r0,$s0
  1354. lgr %r1,$s1
  1355. jz .Lctr32_km_loop
  1356. ####### kmctr code
  1357. algr $out,$inp # restore $out
  1358. lgr $s1,$len # $s1 undertakes $len
  1359. j .Lctr32_kmctr_loop
  1360. .align 16
  1361. .Lctr32_kmctr_loop:
  1362. la $s2,16($sp)
  1363. lgr $s3,$fp
  1364. .Lctr32_kmctr_prepare:
  1365. stg $iv0,0($s2)
  1366. stg $ivp,8($s2)
  1367. la $s2,16($s2)
  1368. ahi $ivp,1 # 32-bit increment, preserves upper half
  1369. brct $s3,.Lctr32_kmctr_prepare
  1370. #la $inp,0($inp) # inp
  1371. sllg $len,$fp,4 # len
  1372. #la $out,0($out) # out
  1373. la $s2,16($sp) # iv
  1374. .long 0xb92da042 # kmctr $out,$s2,$inp
  1375. brc 1,.-4 # pay attention to "partial completion"
  1376. slgr $s1,$fp
  1377. brc 1,.Lctr32_kmctr_loop # not zero, no borrow
  1378. algr $fp,$s1
  1379. lghi $s1,0
  1380. brc 4+1,.Lctr32_kmctr_loop # not zero
  1381. l${g} $sp,0($sp)
  1382. lm${g} %r6,$s3,6*$SIZE_T($sp)
  1383. br $ra
  1384. .align 16
  1385. ___
  1386. $code.=<<___ if (!$softonly);
  1387. .Lctr32_km_loop:
  1388. la $s2,16($sp)
  1389. lgr $s3,$fp
  1390. .Lctr32_km_prepare:
  1391. stg $iv0,0($s2)
  1392. stg $ivp,8($s2)
  1393. la $s2,16($s2)
  1394. ahi $ivp,1 # 32-bit increment, preserves upper half
  1395. brct $s3,.Lctr32_km_prepare
  1396. la $s0,16($sp) # inp
  1397. sllg $s1,$fp,4 # len
  1398. la $s2,16($sp) # out
  1399. .long 0xb92e00a8 # km %r10,%r8
  1400. brc 1,.-4 # pay attention to "partial completion"
  1401. la $s2,16($sp)
  1402. lgr $s3,$fp
  1403. slgr $s2,$inp
  1404. .Lctr32_km_xor:
  1405. lg $s0,0($inp)
  1406. lg $s1,8($inp)
  1407. xg $s0,0($s2,$inp)
  1408. xg $s1,8($s2,$inp)
  1409. stg $s0,0($out,$inp)
  1410. stg $s1,8($out,$inp)
  1411. la $inp,16($inp)
  1412. brct $s3,.Lctr32_km_xor
  1413. slgr $len,$fp
  1414. brc 1,.Lctr32_km_loop # not zero, no borrow
  1415. algr $fp,$len
  1416. lghi $len,0
  1417. brc 4+1,.Lctr32_km_loop # not zero
  1418. l${g} $s0,0($sp)
  1419. l${g} $s1,$SIZE_T($sp)
  1420. la $s2,16($sp)
  1421. .Lctr32_km_zap:
  1422. stg $s0,0($s2)
  1423. stg $s0,8($s2)
  1424. la $s2,16($s2)
  1425. brct $s1,.Lctr32_km_zap
  1426. la $sp,0($s0)
  1427. lm${g} %r6,$s3,6*$SIZE_T($sp)
  1428. br $ra
  1429. .align 16
  1430. .Lctr32_software:
  1431. ___
  1432. $code.=<<___;
  1433. stm${g} $key,$ra,5*$SIZE_T($sp)
  1434. sl${g}r $inp,$out
  1435. larl $tbl,AES_Te
  1436. llgf $t1,12($ivp)
  1437. .Lctr32_loop:
  1438. stm${g} $inp,$out,2*$SIZE_T($sp)
  1439. llgf $s0,0($ivp)
  1440. llgf $s1,4($ivp)
  1441. llgf $s2,8($ivp)
  1442. lgr $s3,$t1
  1443. st $t1,16*$SIZE_T($sp)
  1444. lgr %r4,$key
  1445. bras $ra,_s390x_AES_encrypt
  1446. lm${g} $inp,$ivp,2*$SIZE_T($sp)
  1447. llgf $t1,16*$SIZE_T($sp)
  1448. x $s0,0($inp,$out)
  1449. x $s1,4($inp,$out)
  1450. x $s2,8($inp,$out)
  1451. x $s3,12($inp,$out)
  1452. stm $s0,$s3,0($out)
  1453. la $out,16($out)
  1454. ahi $t1,1 # 32-bit increment
  1455. brct $len,.Lctr32_loop
  1456. lm${g} %r6,$ra,6*$SIZE_T($sp)
  1457. br $ra
  1458. .size AES_ctr32_encrypt,.-AES_ctr32_encrypt
  1459. ___
  1460. }
  1461. ########################################################################
  1462. # void AES_xts_encrypt(const unsigned char *inp, unsigned char *out,
  1463. # size_t len, const AES_KEY *key1, const AES_KEY *key2,
  1464. # const unsigned char iv[16]);
  1465. #
  1466. {
  1467. my $inp="%r2";
  1468. my $out="%r4"; # len and out are swapped
  1469. my $len="%r3";
  1470. my $key1="%r5"; # $i1
  1471. my $key2="%r6"; # $i2
  1472. my $fp="%r7"; # $i3
  1473. my $tweak=16*$SIZE_T+16; # or $stdframe-16, bottom of the frame...
  1474. $code.=<<___;
  1475. .type _s390x_xts_km,\@function
  1476. .align 16
  1477. _s390x_xts_km:
  1478. ___
  1479. $code.=<<___ if(1);
  1480. llgfr $s0,%r0 # put aside the function code
  1481. lghi $s1,0x7f
  1482. nr $s1,%r0
  1483. larl %r1,OPENSSL_s390xcap_P
  1484. llihh %r0,0x8000
  1485. srlg %r0,%r0,32($s1) # check for 32+function code
  1486. ng %r0,S390X_KM(%r1) # check km capability vector
  1487. lgr %r0,$s0 # restore the function code
  1488. la %r1,0($key1) # restore $key1
  1489. jz .Lxts_km_vanilla
  1490. lmg $i2,$i3,$tweak($sp) # put aside the tweak value
  1491. algr $out,$inp
  1492. oill %r0,32 # switch to xts function code
  1493. aghi $s1,-18 #
  1494. sllg $s1,$s1,3 # (function code - 18)*8, 0 or 16
  1495. la %r1,$tweak-16($sp)
  1496. slgr %r1,$s1 # parameter block position
  1497. lmg $s0,$s3,0($key1) # load 256 bits of key material,
  1498. stmg $s0,$s3,0(%r1) # and copy it to parameter block.
  1499. # yes, it contains junk and overlaps
  1500. # with the tweak in 128-bit case.
  1501. # it's done to avoid conditional
  1502. # branch.
  1503. stmg $i2,$i3,$tweak($sp) # "re-seat" the tweak value
  1504. .long 0xb92e0042 # km %r4,%r2
  1505. brc 1,.-4 # pay attention to "partial completion"
  1506. lrvg $s0,$tweak+0($sp) # load the last tweak
  1507. lrvg $s1,$tweak+8($sp)
  1508. stmg %r0,%r3,$tweak-32($sp) # wipe copy of the key
  1509. nill %r0,0xffdf # switch back to original function code
  1510. la %r1,0($key1) # restore pointer to $key1
  1511. slgr $out,$inp
  1512. llgc $len,2*$SIZE_T-1($sp)
  1513. nill $len,0x0f # $len%=16
  1514. br $ra
  1515. .align 16
  1516. .Lxts_km_vanilla:
  1517. ___
  1518. $code.=<<___;
  1519. # prepare and allocate stack frame at the top of 4K page
  1520. # with 1K reserved for eventual signal handling
  1521. lghi $s0,-1024-256-16# guarantee at least 256-bytes buffer
  1522. lghi $s1,-4096
  1523. algr $s0,$sp
  1524. lgr $fp,$sp
  1525. ngr $s0,$s1 # align at page boundary
  1526. slgr $fp,$s0 # total buffer size
  1527. lgr $s2,$sp
  1528. lghi $s1,1024+16 # sl[g]fi is extended-immediate facility
  1529. slgr $fp,$s1 # deduct reservation to get usable buffer size
  1530. # buffer size is at lest 256 and at most 3072+256-16
  1531. la $sp,1024($s0) # alloca
  1532. nill $fp,0xfff0 # round to 16*n
  1533. st${g} $s2,0($sp) # back-chain
  1534. nill $len,0xfff0 # redundant
  1535. st${g} $fp,$SIZE_T($sp)
  1536. slgr $len,$fp
  1537. brc 1,.Lxts_km_go # not zero, no borrow
  1538. algr $fp,$len # input is shorter than allocated buffer
  1539. lghi $len,0
  1540. st${g} $fp,$SIZE_T($sp)
  1541. .Lxts_km_go:
  1542. lrvg $s0,$tweak+0($s2) # load the tweak value in little-endian
  1543. lrvg $s1,$tweak+8($s2)
  1544. la $s2,16($sp) # vector of ascending tweak values
  1545. slgr $s2,$inp
  1546. srlg $s3,$fp,4
  1547. j .Lxts_km_start
  1548. .Lxts_km_loop:
  1549. la $s2,16($sp)
  1550. slgr $s2,$inp
  1551. srlg $s3,$fp,4
  1552. .Lxts_km_prepare:
  1553. lghi $i1,0x87
  1554. srag $i2,$s1,63 # broadcast upper bit
  1555. ngr $i1,$i2 # rem
  1556. algr $s0,$s0
  1557. alcgr $s1,$s1
  1558. xgr $s0,$i1
  1559. .Lxts_km_start:
  1560. lrvgr $i1,$s0 # flip byte order
  1561. lrvgr $i2,$s1
  1562. stg $i1,0($s2,$inp)
  1563. stg $i2,8($s2,$inp)
  1564. xg $i1,0($inp)
  1565. xg $i2,8($inp)
  1566. stg $i1,0($out,$inp)
  1567. stg $i2,8($out,$inp)
  1568. la $inp,16($inp)
  1569. brct $s3,.Lxts_km_prepare
  1570. slgr $inp,$fp # rewind $inp
  1571. la $s2,0($out,$inp)
  1572. lgr $s3,$fp
  1573. .long 0xb92e00aa # km $s2,$s2
  1574. brc 1,.-4 # pay attention to "partial completion"
  1575. la $s2,16($sp)
  1576. slgr $s2,$inp
  1577. srlg $s3,$fp,4
  1578. .Lxts_km_xor:
  1579. lg $i1,0($out,$inp)
  1580. lg $i2,8($out,$inp)
  1581. xg $i1,0($s2,$inp)
  1582. xg $i2,8($s2,$inp)
  1583. stg $i1,0($out,$inp)
  1584. stg $i2,8($out,$inp)
  1585. la $inp,16($inp)
  1586. brct $s3,.Lxts_km_xor
  1587. slgr $len,$fp
  1588. brc 1,.Lxts_km_loop # not zero, no borrow
  1589. algr $fp,$len
  1590. lghi $len,0
  1591. brc 4+1,.Lxts_km_loop # not zero
  1592. l${g} $i1,0($sp) # back-chain
  1593. llgf $fp,`2*$SIZE_T-4`($sp) # bytes used
  1594. la $i2,16($sp)
  1595. srlg $fp,$fp,4
  1596. .Lxts_km_zap:
  1597. stg $i1,0($i2)
  1598. stg $i1,8($i2)
  1599. la $i2,16($i2)
  1600. brct $fp,.Lxts_km_zap
  1601. la $sp,0($i1)
  1602. llgc $len,2*$SIZE_T-1($i1)
  1603. nill $len,0x0f # $len%=16
  1604. bzr $ra
  1605. # generate one more tweak...
  1606. lghi $i1,0x87
  1607. srag $i2,$s1,63 # broadcast upper bit
  1608. ngr $i1,$i2 # rem
  1609. algr $s0,$s0
  1610. alcgr $s1,$s1
  1611. xgr $s0,$i1
  1612. ltr $len,$len # clear zero flag
  1613. br $ra
  1614. .size _s390x_xts_km,.-_s390x_xts_km
  1615. .globl AES_xts_encrypt
  1616. .type AES_xts_encrypt,\@function
  1617. .align 16
  1618. AES_xts_encrypt:
  1619. xgr %r3,%r4 # flip %r3 and %r4, $out and $len
  1620. xgr %r4,%r3
  1621. xgr %r3,%r4
  1622. ___
  1623. $code.=<<___ if ($SIZE_T==4);
  1624. llgfr $len,$len
  1625. ___
  1626. $code.=<<___;
  1627. st${g} $len,1*$SIZE_T($sp) # save copy of $len
  1628. srag $len,$len,4 # formally wrong, because it expands
  1629. # sign byte, but who can afford asking
  1630. # to process more than 2^63-1 bytes?
  1631. # I use it, because it sets condition
  1632. # code...
  1633. bcr 8,$ra # abort if zero (i.e. less than 16)
  1634. ___
  1635. $code.=<<___ if (!$softonly);
  1636. llgf %r0,240($key2)
  1637. lhi %r1,16
  1638. clr %r0,%r1
  1639. jl .Lxts_enc_software
  1640. st${g} $ra,5*$SIZE_T($sp)
  1641. stm${g} %r6,$s3,6*$SIZE_T($sp)
  1642. sllg $len,$len,4 # $len&=~15
  1643. slgr $out,$inp
  1644. # generate the tweak value
  1645. l${g} $s3,$stdframe($sp) # pointer to iv
  1646. la $s2,$tweak($sp)
  1647. lmg $s0,$s1,0($s3)
  1648. lghi $s3,16
  1649. stmg $s0,$s1,0($s2)
  1650. la %r1,0($key2) # $key2 is not needed anymore
  1651. .long 0xb92e00aa # km $s2,$s2, generate the tweak
  1652. brc 1,.-4 # can this happen?
  1653. l %r0,240($key1)
  1654. la %r1,0($key1) # $key1 is not needed anymore
  1655. bras $ra,_s390x_xts_km
  1656. jz .Lxts_enc_km_done
  1657. aghi $inp,-16 # take one step back
  1658. la $i3,0($out,$inp) # put aside real $out
  1659. .Lxts_enc_km_steal:
  1660. llgc $i1,16($inp)
  1661. llgc $i2,0($out,$inp)
  1662. stc $i1,0($out,$inp)
  1663. stc $i2,16($out,$inp)
  1664. la $inp,1($inp)
  1665. brct $len,.Lxts_enc_km_steal
  1666. la $s2,0($i3)
  1667. lghi $s3,16
  1668. lrvgr $i1,$s0 # flip byte order
  1669. lrvgr $i2,$s1
  1670. xg $i1,0($s2)
  1671. xg $i2,8($s2)
  1672. stg $i1,0($s2)
  1673. stg $i2,8($s2)
  1674. .long 0xb92e00aa # km $s2,$s2
  1675. brc 1,.-4 # can this happen?
  1676. lrvgr $i1,$s0 # flip byte order
  1677. lrvgr $i2,$s1
  1678. xg $i1,0($i3)
  1679. xg $i2,8($i3)
  1680. stg $i1,0($i3)
  1681. stg $i2,8($i3)
  1682. .Lxts_enc_km_done:
  1683. stg $sp,$tweak+0($sp) # wipe tweak
  1684. stg $sp,$tweak+8($sp)
  1685. l${g} $ra,5*$SIZE_T($sp)
  1686. lm${g} %r6,$s3,6*$SIZE_T($sp)
  1687. br $ra
  1688. .align 16
  1689. .Lxts_enc_software:
  1690. ___
  1691. $code.=<<___;
  1692. stm${g} %r6,$ra,6*$SIZE_T($sp)
  1693. slgr $out,$inp
  1694. l${g} $s3,$stdframe($sp) # ivp
  1695. llgf $s0,0($s3) # load iv
  1696. llgf $s1,4($s3)
  1697. llgf $s2,8($s3)
  1698. llgf $s3,12($s3)
  1699. stm${g} %r2,%r5,2*$SIZE_T($sp)
  1700. la $key,0($key2)
  1701. larl $tbl,AES_Te
  1702. bras $ra,_s390x_AES_encrypt # generate the tweak
  1703. lm${g} %r2,%r5,2*$SIZE_T($sp)
  1704. stm $s0,$s3,$tweak($sp) # save the tweak
  1705. j .Lxts_enc_enter
  1706. .align 16
  1707. .Lxts_enc_loop:
  1708. lrvg $s1,$tweak+0($sp) # load the tweak in little-endian
  1709. lrvg $s3,$tweak+8($sp)
  1710. lghi %r1,0x87
  1711. srag %r0,$s3,63 # broadcast upper bit
  1712. ngr %r1,%r0 # rem
  1713. algr $s1,$s1
  1714. alcgr $s3,$s3
  1715. xgr $s1,%r1
  1716. lrvgr $s1,$s1 # flip byte order
  1717. lrvgr $s3,$s3
  1718. srlg $s0,$s1,32 # smash the tweak to 4x32-bits
  1719. stg $s1,$tweak+0($sp) # save the tweak
  1720. llgfr $s1,$s1
  1721. srlg $s2,$s3,32
  1722. stg $s3,$tweak+8($sp)
  1723. llgfr $s3,$s3
  1724. la $inp,16($inp) # $inp+=16
  1725. .Lxts_enc_enter:
  1726. x $s0,0($inp) # ^=*($inp)
  1727. x $s1,4($inp)
  1728. x $s2,8($inp)
  1729. x $s3,12($inp)
  1730. stm${g} %r2,%r3,2*$SIZE_T($sp) # only two registers are changing
  1731. la $key,0($key1)
  1732. bras $ra,_s390x_AES_encrypt
  1733. lm${g} %r2,%r5,2*$SIZE_T($sp)
  1734. x $s0,$tweak+0($sp) # ^=tweak
  1735. x $s1,$tweak+4($sp)
  1736. x $s2,$tweak+8($sp)
  1737. x $s3,$tweak+12($sp)
  1738. st $s0,0($out,$inp)
  1739. st $s1,4($out,$inp)
  1740. st $s2,8($out,$inp)
  1741. st $s3,12($out,$inp)
  1742. brct${g} $len,.Lxts_enc_loop
  1743. llgc $len,`2*$SIZE_T-1`($sp)
  1744. nill $len,0x0f # $len%16
  1745. jz .Lxts_enc_done
  1746. la $i3,0($inp,$out) # put aside real $out
  1747. .Lxts_enc_steal:
  1748. llgc %r0,16($inp)
  1749. llgc %r1,0($out,$inp)
  1750. stc %r0,0($out,$inp)
  1751. stc %r1,16($out,$inp)
  1752. la $inp,1($inp)
  1753. brct $len,.Lxts_enc_steal
  1754. la $out,0($i3) # restore real $out
  1755. # generate last tweak...
  1756. lrvg $s1,$tweak+0($sp) # load the tweak in little-endian
  1757. lrvg $s3,$tweak+8($sp)
  1758. lghi %r1,0x87
  1759. srag %r0,$s3,63 # broadcast upper bit
  1760. ngr %r1,%r0 # rem
  1761. algr $s1,$s1
  1762. alcgr $s3,$s3
  1763. xgr $s1,%r1
  1764. lrvgr $s1,$s1 # flip byte order
  1765. lrvgr $s3,$s3
  1766. srlg $s0,$s1,32 # smash the tweak to 4x32-bits
  1767. stg $s1,$tweak+0($sp) # save the tweak
  1768. llgfr $s1,$s1
  1769. srlg $s2,$s3,32
  1770. stg $s3,$tweak+8($sp)
  1771. llgfr $s3,$s3
  1772. x $s0,0($out) # ^=*(inp)|stolen cipther-text
  1773. x $s1,4($out)
  1774. x $s2,8($out)
  1775. x $s3,12($out)
  1776. st${g} $out,4*$SIZE_T($sp)
  1777. la $key,0($key1)
  1778. bras $ra,_s390x_AES_encrypt
  1779. l${g} $out,4*$SIZE_T($sp)
  1780. x $s0,`$tweak+0`($sp) # ^=tweak
  1781. x $s1,`$tweak+4`($sp)
  1782. x $s2,`$tweak+8`($sp)
  1783. x $s3,`$tweak+12`($sp)
  1784. st $s0,0($out)
  1785. st $s1,4($out)
  1786. st $s2,8($out)
  1787. st $s3,12($out)
  1788. .Lxts_enc_done:
  1789. stg $sp,$tweak+0($sp) # wipe tweak
  1790. stg $sp,$tweak+8($sp)
  1791. lm${g} %r6,$ra,6*$SIZE_T($sp)
  1792. br $ra
  1793. .size AES_xts_encrypt,.-AES_xts_encrypt
  1794. ___
  1795. # void AES_xts_decrypt(const unsigned char *inp, unsigned char *out,
  1796. # size_t len, const AES_KEY *key1, const AES_KEY *key2,
  1797. # const unsigned char iv[16]);
  1798. #
  1799. $code.=<<___;
  1800. .globl AES_xts_decrypt
  1801. .type AES_xts_decrypt,\@function
  1802. .align 16
  1803. AES_xts_decrypt:
  1804. xgr %r3,%r4 # flip %r3 and %r4, $out and $len
  1805. xgr %r4,%r3
  1806. xgr %r3,%r4
  1807. ___
  1808. $code.=<<___ if ($SIZE_T==4);
  1809. llgfr $len,$len
  1810. ___
  1811. $code.=<<___;
  1812. st${g} $len,1*$SIZE_T($sp) # save copy of $len
  1813. aghi $len,-16
  1814. bcr 4,$ra # abort if less than zero. formally
  1815. # wrong, because $len is unsigned,
  1816. # but who can afford asking to
  1817. # process more than 2^63-1 bytes?
  1818. tmll $len,0x0f
  1819. jnz .Lxts_dec_proceed
  1820. aghi $len,16
  1821. .Lxts_dec_proceed:
  1822. ___
  1823. $code.=<<___ if (!$softonly);
  1824. llgf %r0,240($key2)
  1825. lhi %r1,16
  1826. clr %r0,%r1
  1827. jl .Lxts_dec_software
  1828. st${g} $ra,5*$SIZE_T($sp)
  1829. stm${g} %r6,$s3,6*$SIZE_T($sp)
  1830. nill $len,0xfff0 # $len&=~15
  1831. slgr $out,$inp
  1832. # generate the tweak value
  1833. l${g} $s3,$stdframe($sp) # pointer to iv
  1834. la $s2,$tweak($sp)
  1835. lmg $s0,$s1,0($s3)
  1836. lghi $s3,16
  1837. stmg $s0,$s1,0($s2)
  1838. la %r1,0($key2) # $key2 is not needed past this point
  1839. .long 0xb92e00aa # km $s2,$s2, generate the tweak
  1840. brc 1,.-4 # can this happen?
  1841. l %r0,240($key1)
  1842. la %r1,0($key1) # $key1 is not needed anymore
  1843. ltgr $len,$len
  1844. jz .Lxts_dec_km_short
  1845. bras $ra,_s390x_xts_km
  1846. jz .Lxts_dec_km_done
  1847. lrvgr $s2,$s0 # make copy in reverse byte order
  1848. lrvgr $s3,$s1
  1849. j .Lxts_dec_km_2ndtweak
  1850. .Lxts_dec_km_short:
  1851. llgc $len,`2*$SIZE_T-1`($sp)
  1852. nill $len,0x0f # $len%=16
  1853. lrvg $s0,$tweak+0($sp) # load the tweak
  1854. lrvg $s1,$tweak+8($sp)
  1855. lrvgr $s2,$s0 # make copy in reverse byte order
  1856. lrvgr $s3,$s1
  1857. .Lxts_dec_km_2ndtweak:
  1858. lghi $i1,0x87
  1859. srag $i2,$s1,63 # broadcast upper bit
  1860. ngr $i1,$i2 # rem
  1861. algr $s0,$s0
  1862. alcgr $s1,$s1
  1863. xgr $s0,$i1
  1864. lrvgr $i1,$s0 # flip byte order
  1865. lrvgr $i2,$s1
  1866. xg $i1,0($inp)
  1867. xg $i2,8($inp)
  1868. stg $i1,0($out,$inp)
  1869. stg $i2,8($out,$inp)
  1870. la $i2,0($out,$inp)
  1871. lghi $i3,16
  1872. .long 0xb92e0066 # km $i2,$i2
  1873. brc 1,.-4 # can this happen?
  1874. lrvgr $i1,$s0
  1875. lrvgr $i2,$s1
  1876. xg $i1,0($out,$inp)
  1877. xg $i2,8($out,$inp)
  1878. stg $i1,0($out,$inp)
  1879. stg $i2,8($out,$inp)
  1880. la $i3,0($out,$inp) # put aside real $out
  1881. .Lxts_dec_km_steal:
  1882. llgc $i1,16($inp)
  1883. llgc $i2,0($out,$inp)
  1884. stc $i1,0($out,$inp)
  1885. stc $i2,16($out,$inp)
  1886. la $inp,1($inp)
  1887. brct $len,.Lxts_dec_km_steal
  1888. lgr $s0,$s2
  1889. lgr $s1,$s3
  1890. xg $s0,0($i3)
  1891. xg $s1,8($i3)
  1892. stg $s0,0($i3)
  1893. stg $s1,8($i3)
  1894. la $s0,0($i3)
  1895. lghi $s1,16
  1896. .long 0xb92e0088 # km $s0,$s0
  1897. brc 1,.-4 # can this happen?
  1898. xg $s2,0($i3)
  1899. xg $s3,8($i3)
  1900. stg $s2,0($i3)
  1901. stg $s3,8($i3)
  1902. .Lxts_dec_km_done:
  1903. stg $sp,$tweak+0($sp) # wipe tweak
  1904. stg $sp,$tweak+8($sp)
  1905. l${g} $ra,5*$SIZE_T($sp)
  1906. lm${g} %r6,$s3,6*$SIZE_T($sp)
  1907. br $ra
  1908. .align 16
  1909. .Lxts_dec_software:
  1910. ___
  1911. $code.=<<___;
  1912. stm${g} %r6,$ra,6*$SIZE_T($sp)
  1913. srlg $len,$len,4
  1914. slgr $out,$inp
  1915. l${g} $s3,$stdframe($sp) # ivp
  1916. llgf $s0,0($s3) # load iv
  1917. llgf $s1,4($s3)
  1918. llgf $s2,8($s3)
  1919. llgf $s3,12($s3)
  1920. stm${g} %r2,%r5,2*$SIZE_T($sp)
  1921. la $key,0($key2)
  1922. larl $tbl,AES_Te
  1923. bras $ra,_s390x_AES_encrypt # generate the tweak
  1924. lm${g} %r2,%r5,2*$SIZE_T($sp)
  1925. larl $tbl,AES_Td
  1926. lt${g}r $len,$len
  1927. stm $s0,$s3,$tweak($sp) # save the tweak
  1928. jz .Lxts_dec_short
  1929. j .Lxts_dec_enter
  1930. .align 16
  1931. .Lxts_dec_loop:
  1932. lrvg $s1,$tweak+0($sp) # load the tweak in little-endian
  1933. lrvg $s3,$tweak+8($sp)
  1934. lghi %r1,0x87
  1935. srag %r0,$s3,63 # broadcast upper bit
  1936. ngr %r1,%r0 # rem
  1937. algr $s1,$s1
  1938. alcgr $s3,$s3
  1939. xgr $s1,%r1
  1940. lrvgr $s1,$s1 # flip byte order
  1941. lrvgr $s3,$s3
  1942. srlg $s0,$s1,32 # smash the tweak to 4x32-bits
  1943. stg $s1,$tweak+0($sp) # save the tweak
  1944. llgfr $s1,$s1
  1945. srlg $s2,$s3,32
  1946. stg $s3,$tweak+8($sp)
  1947. llgfr $s3,$s3
  1948. .Lxts_dec_enter:
  1949. x $s0,0($inp) # tweak^=*(inp)
  1950. x $s1,4($inp)
  1951. x $s2,8($inp)
  1952. x $s3,12($inp)
  1953. stm${g} %r2,%r3,2*$SIZE_T($sp) # only two registers are changing
  1954. la $key,0($key1)
  1955. bras $ra,_s390x_AES_decrypt
  1956. lm${g} %r2,%r5,2*$SIZE_T($sp)
  1957. x $s0,$tweak+0($sp) # ^=tweak
  1958. x $s1,$tweak+4($sp)
  1959. x $s2,$tweak+8($sp)
  1960. x $s3,$tweak+12($sp)
  1961. st $s0,0($out,$inp)
  1962. st $s1,4($out,$inp)
  1963. st $s2,8($out,$inp)
  1964. st $s3,12($out,$inp)
  1965. la $inp,16($inp)
  1966. brct${g} $len,.Lxts_dec_loop
  1967. llgc $len,`2*$SIZE_T-1`($sp)
  1968. nill $len,0x0f # $len%16
  1969. jz .Lxts_dec_done
  1970. # generate pair of tweaks...
  1971. lrvg $s1,$tweak+0($sp) # load the tweak in little-endian
  1972. lrvg $s3,$tweak+8($sp)
  1973. lghi %r1,0x87
  1974. srag %r0,$s3,63 # broadcast upper bit
  1975. ngr %r1,%r0 # rem
  1976. algr $s1,$s1
  1977. alcgr $s3,$s3
  1978. xgr $s1,%r1
  1979. lrvgr $i2,$s1 # flip byte order
  1980. lrvgr $i3,$s3
  1981. stmg $i2,$i3,$tweak($sp) # save the 1st tweak
  1982. j .Lxts_dec_2ndtweak
  1983. .align 16
  1984. .Lxts_dec_short:
  1985. llgc $len,`2*$SIZE_T-1`($sp)
  1986. nill $len,0x0f # $len%16
  1987. lrvg $s1,$tweak+0($sp) # load the tweak in little-endian
  1988. lrvg $s3,$tweak+8($sp)
  1989. .Lxts_dec_2ndtweak:
  1990. lghi %r1,0x87
  1991. srag %r0,$s3,63 # broadcast upper bit
  1992. ngr %r1,%r0 # rem
  1993. algr $s1,$s1
  1994. alcgr $s3,$s3
  1995. xgr $s1,%r1
  1996. lrvgr $s1,$s1 # flip byte order
  1997. lrvgr $s3,$s3
  1998. srlg $s0,$s1,32 # smash the tweak to 4x32-bits
  1999. stg $s1,$tweak-16+0($sp) # save the 2nd tweak
  2000. llgfr $s1,$s1
  2001. srlg $s2,$s3,32
  2002. stg $s3,$tweak-16+8($sp)
  2003. llgfr $s3,$s3
  2004. x $s0,0($inp) # tweak_the_2nd^=*(inp)
  2005. x $s1,4($inp)
  2006. x $s2,8($inp)
  2007. x $s3,12($inp)
  2008. stm${g} %r2,%r3,2*$SIZE_T($sp)
  2009. la $key,0($key1)
  2010. bras $ra,_s390x_AES_decrypt
  2011. lm${g} %r2,%r5,2*$SIZE_T($sp)
  2012. x $s0,$tweak-16+0($sp) # ^=tweak_the_2nd
  2013. x $s1,$tweak-16+4($sp)
  2014. x $s2,$tweak-16+8($sp)
  2015. x $s3,$tweak-16+12($sp)
  2016. st $s0,0($out,$inp)
  2017. st $s1,4($out,$inp)
  2018. st $s2,8($out,$inp)
  2019. st $s3,12($out,$inp)
  2020. la $i3,0($out,$inp) # put aside real $out
  2021. .Lxts_dec_steal:
  2022. llgc %r0,16($inp)
  2023. llgc %r1,0($out,$inp)
  2024. stc %r0,0($out,$inp)
  2025. stc %r1,16($out,$inp)
  2026. la $inp,1($inp)
  2027. brct $len,.Lxts_dec_steal
  2028. la $out,0($i3) # restore real $out
  2029. lm $s0,$s3,$tweak($sp) # load the 1st tweak
  2030. x $s0,0($out) # tweak^=*(inp)|stolen cipher-text
  2031. x $s1,4($out)
  2032. x $s2,8($out)
  2033. x $s3,12($out)
  2034. st${g} $out,4*$SIZE_T($sp)
  2035. la $key,0($key1)
  2036. bras $ra,_s390x_AES_decrypt
  2037. l${g} $out,4*$SIZE_T($sp)
  2038. x $s0,$tweak+0($sp) # ^=tweak
  2039. x $s1,$tweak+4($sp)
  2040. x $s2,$tweak+8($sp)
  2041. x $s3,$tweak+12($sp)
  2042. st $s0,0($out)
  2043. st $s1,4($out)
  2044. st $s2,8($out)
  2045. st $s3,12($out)
  2046. stg $sp,$tweak-16+0($sp) # wipe 2nd tweak
  2047. stg $sp,$tweak-16+8($sp)
  2048. .Lxts_dec_done:
  2049. stg $sp,$tweak+0($sp) # wipe tweak
  2050. stg $sp,$tweak+8($sp)
  2051. lm${g} %r6,$ra,6*$SIZE_T($sp)
  2052. br $ra
  2053. .size AES_xts_decrypt,.-AES_xts_decrypt
  2054. ___
  2055. }
  2056. $code.=<<___;
  2057. .string "AES for s390x, CRYPTOGAMS by <appro\@openssl.org>"
  2058. ___
  2059. $code =~ s/\`([^\`]*)\`/eval $1/gem;
  2060. print $code;
  2061. close STDOUT or die "error closing STDOUT: $!"; # force flush