sha256blockAvx2_amd64.s 74 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442
  1. //+build !noasm !appengine
  2. // SHA256 implementation for AVX2
  3. //
  4. // Minio Cloud Storage, (C) 2016 Minio, Inc.
  5. //
  6. // Licensed under the Apache License, Version 2.0 (the "License");
  7. // you may not use this file except in compliance with the License.
  8. // You may obtain a copy of the License at
  9. //
  10. // http://www.apache.org/licenses/LICENSE-2.0
  11. //
  12. // Unless required by applicable law or agreed to in writing, software
  13. // distributed under the License is distributed on an "AS IS" BASIS,
  14. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. // See the License for the specific language governing permissions and
  16. // limitations under the License.
  17. //
  18. //
  19. // This code is based on an Intel White-Paper:
  20. // "Fast SHA-256 Implementations on Intel Architecture Processors"
  21. //
  22. // together with the reference implementation from the following authors:
  23. // James Guilford <[email protected]>
  24. // Kirk Yap <[email protected]>
  25. // Tim Chen <[email protected]>
  26. //
  27. // For Golang it has been converted to Plan 9 assembly with the help of
  28. // github.com/minio/asm2plan9s to assemble Intel instructions to their Plan9
  29. // equivalents
  30. //
  31. #include "textflag.h"
  32. DATA K256<>+0x000(SB)/8, $0x71374491428a2f98
  33. DATA K256<>+0x008(SB)/8, $0xe9b5dba5b5c0fbcf
  34. DATA K256<>+0x010(SB)/8, $0x71374491428a2f98
  35. DATA K256<>+0x018(SB)/8, $0xe9b5dba5b5c0fbcf
  36. DATA K256<>+0x020(SB)/8, $0x59f111f13956c25b
  37. DATA K256<>+0x028(SB)/8, $0xab1c5ed5923f82a4
  38. DATA K256<>+0x030(SB)/8, $0x59f111f13956c25b
  39. DATA K256<>+0x038(SB)/8, $0xab1c5ed5923f82a4
  40. DATA K256<>+0x040(SB)/8, $0x12835b01d807aa98
  41. DATA K256<>+0x048(SB)/8, $0x550c7dc3243185be
  42. DATA K256<>+0x050(SB)/8, $0x12835b01d807aa98
  43. DATA K256<>+0x058(SB)/8, $0x550c7dc3243185be
  44. DATA K256<>+0x060(SB)/8, $0x80deb1fe72be5d74
  45. DATA K256<>+0x068(SB)/8, $0xc19bf1749bdc06a7
  46. DATA K256<>+0x070(SB)/8, $0x80deb1fe72be5d74
  47. DATA K256<>+0x078(SB)/8, $0xc19bf1749bdc06a7
  48. DATA K256<>+0x080(SB)/8, $0xefbe4786e49b69c1
  49. DATA K256<>+0x088(SB)/8, $0x240ca1cc0fc19dc6
  50. DATA K256<>+0x090(SB)/8, $0xefbe4786e49b69c1
  51. DATA K256<>+0x098(SB)/8, $0x240ca1cc0fc19dc6
  52. DATA K256<>+0x0a0(SB)/8, $0x4a7484aa2de92c6f
  53. DATA K256<>+0x0a8(SB)/8, $0x76f988da5cb0a9dc
  54. DATA K256<>+0x0b0(SB)/8, $0x4a7484aa2de92c6f
  55. DATA K256<>+0x0b8(SB)/8, $0x76f988da5cb0a9dc
  56. DATA K256<>+0x0c0(SB)/8, $0xa831c66d983e5152
  57. DATA K256<>+0x0c8(SB)/8, $0xbf597fc7b00327c8
  58. DATA K256<>+0x0d0(SB)/8, $0xa831c66d983e5152
  59. DATA K256<>+0x0d8(SB)/8, $0xbf597fc7b00327c8
  60. DATA K256<>+0x0e0(SB)/8, $0xd5a79147c6e00bf3
  61. DATA K256<>+0x0e8(SB)/8, $0x1429296706ca6351
  62. DATA K256<>+0x0f0(SB)/8, $0xd5a79147c6e00bf3
  63. DATA K256<>+0x0f8(SB)/8, $0x1429296706ca6351
  64. DATA K256<>+0x100(SB)/8, $0x2e1b213827b70a85
  65. DATA K256<>+0x108(SB)/8, $0x53380d134d2c6dfc
  66. DATA K256<>+0x110(SB)/8, $0x2e1b213827b70a85
  67. DATA K256<>+0x118(SB)/8, $0x53380d134d2c6dfc
  68. DATA K256<>+0x120(SB)/8, $0x766a0abb650a7354
  69. DATA K256<>+0x128(SB)/8, $0x92722c8581c2c92e
  70. DATA K256<>+0x130(SB)/8, $0x766a0abb650a7354
  71. DATA K256<>+0x138(SB)/8, $0x92722c8581c2c92e
  72. DATA K256<>+0x140(SB)/8, $0xa81a664ba2bfe8a1
  73. DATA K256<>+0x148(SB)/8, $0xc76c51a3c24b8b70
  74. DATA K256<>+0x150(SB)/8, $0xa81a664ba2bfe8a1
  75. DATA K256<>+0x158(SB)/8, $0xc76c51a3c24b8b70
  76. DATA K256<>+0x160(SB)/8, $0xd6990624d192e819
  77. DATA K256<>+0x168(SB)/8, $0x106aa070f40e3585
  78. DATA K256<>+0x170(SB)/8, $0xd6990624d192e819
  79. DATA K256<>+0x178(SB)/8, $0x106aa070f40e3585
  80. DATA K256<>+0x180(SB)/8, $0x1e376c0819a4c116
  81. DATA K256<>+0x188(SB)/8, $0x34b0bcb52748774c
  82. DATA K256<>+0x190(SB)/8, $0x1e376c0819a4c116
  83. DATA K256<>+0x198(SB)/8, $0x34b0bcb52748774c
  84. DATA K256<>+0x1a0(SB)/8, $0x4ed8aa4a391c0cb3
  85. DATA K256<>+0x1a8(SB)/8, $0x682e6ff35b9cca4f
  86. DATA K256<>+0x1b0(SB)/8, $0x4ed8aa4a391c0cb3
  87. DATA K256<>+0x1b8(SB)/8, $0x682e6ff35b9cca4f
  88. DATA K256<>+0x1c0(SB)/8, $0x78a5636f748f82ee
  89. DATA K256<>+0x1c8(SB)/8, $0x8cc7020884c87814
  90. DATA K256<>+0x1d0(SB)/8, $0x78a5636f748f82ee
  91. DATA K256<>+0x1d8(SB)/8, $0x8cc7020884c87814
  92. DATA K256<>+0x1e0(SB)/8, $0xa4506ceb90befffa
  93. DATA K256<>+0x1e8(SB)/8, $0xc67178f2bef9a3f7
  94. DATA K256<>+0x1f0(SB)/8, $0xa4506ceb90befffa
  95. DATA K256<>+0x1f8(SB)/8, $0xc67178f2bef9a3f7
  96. DATA K256<>+0x200(SB)/8, $0x0405060700010203
  97. DATA K256<>+0x208(SB)/8, $0x0c0d0e0f08090a0b
  98. DATA K256<>+0x210(SB)/8, $0x0405060700010203
  99. DATA K256<>+0x218(SB)/8, $0x0c0d0e0f08090a0b
  100. DATA K256<>+0x220(SB)/8, $0x0b0a090803020100
  101. DATA K256<>+0x228(SB)/8, $0xffffffffffffffff
  102. DATA K256<>+0x230(SB)/8, $0x0b0a090803020100
  103. DATA K256<>+0x238(SB)/8, $0xffffffffffffffff
  104. DATA K256<>+0x240(SB)/8, $0xffffffffffffffff
  105. DATA K256<>+0x248(SB)/8, $0x0b0a090803020100
  106. DATA K256<>+0x250(SB)/8, $0xffffffffffffffff
  107. DATA K256<>+0x258(SB)/8, $0x0b0a090803020100
  108. GLOBL K256<>(SB), 8, $608
  109. // func blockAvx2(h []uint32, message []uint8)
  110. TEXT ·blockAvx2(SB), 7, $0
  111. MOVQ ctx+0(FP), DI // DI: &h
  112. MOVQ inp+24(FP), SI // SI: &message
  113. MOVQ inplength+32(FP), DX // len(message)
  114. ADDQ SI, DX // end pointer of input
  115. MOVQ SP, R11 // copy stack pointer
  116. SUBQ $0x220, SP // sp -= 0x220
  117. ANDQ $0xfffffffffffffc00, SP // align stack frame
  118. ADDQ $0x1c0, SP
  119. MOVQ DI, 0x40(SP) // save ctx
  120. MOVQ SI, 0x48(SP) // save input
  121. MOVQ DX, 0x50(SP) // save end pointer
  122. MOVQ R11, 0x58(SP) // save copy of stack pointer
  123. WORD $0xf8c5; BYTE $0x77 // vzeroupper
  124. ADDQ $0x40, SI // input++
  125. MOVL (DI), AX
  126. MOVQ SI, R12 // borrow $T1
  127. MOVL 4(DI), BX
  128. CMPQ SI, DX // $_end
  129. MOVL 8(DI), CX
  130. LONG $0xe4440f4c // cmove r12,rsp /* next block or random data */
  131. MOVL 12(DI), DX
  132. MOVL 16(DI), R8
  133. MOVL 20(DI), R9
  134. MOVL 24(DI), R10
  135. MOVL 28(DI), R11
  136. LEAQ K256<>(SB), BP
  137. LONG $0x856f7dc5; LONG $0x00000220 // VMOVDQA YMM8, 0x220[rbp] /* vmovdqa ymm8,YMMWORD PTR [rip+0x220] */
  138. LONG $0x8d6f7dc5; LONG $0x00000240 // VMOVDQA YMM9, 0x240[rbp] /* vmovdqa ymm9,YMMWORD PTR [rip+0x240] */
  139. LONG $0x956f7dc5; LONG $0x00000200 // VMOVDQA YMM10, 0x200[rbp] /* vmovdqa ymm7,YMMWORD PTR [rip+0x200] */
  140. loop0:
  141. LONG $0x6f7dc1c4; BYTE $0xfa // VMOVDQA YMM7, YMM10
  142. // Load first 16 dwords from two blocks
  143. MOVOU -64(SI), X0 // vmovdqu xmm0,XMMWORD PTR [rsi-0x40]
  144. MOVOU -48(SI), X1 // vmovdqu xmm1,XMMWORD PTR [rsi-0x30]
  145. MOVOU -32(SI), X2 // vmovdqu xmm2,XMMWORD PTR [rsi-0x20]
  146. MOVOU -16(SI), X3 // vmovdqu xmm3,XMMWORD PTR [rsi-0x10]
  147. // Byte swap data and transpose data into high/low
  148. LONG $0x387dc3c4; WORD $0x2404; BYTE $0x01 // vinserti128 ymm0,ymm0,[r12],0x1
  149. LONG $0x3875c3c4; LONG $0x0110244c // vinserti128 ymm1,ymm1,0x10[r12],0x1
  150. LONG $0x007de2c4; BYTE $0xc7 // vpshufb ymm0,ymm0,ymm7
  151. LONG $0x386dc3c4; LONG $0x01202454 // vinserti128 ymm2,ymm2,0x20[r12],0x1
  152. LONG $0x0075e2c4; BYTE $0xcf // vpshufb ymm1,ymm1,ymm7
  153. LONG $0x3865c3c4; LONG $0x0130245c // vinserti128 ymm3,ymm3,0x30[r12],0x1
  154. LEAQ K256<>(SB), BP
  155. LONG $0x006de2c4; BYTE $0xd7 // vpshufb ymm2,ymm2,ymm7
  156. LONG $0x65fefdc5; BYTE $0x00 // vpaddd ymm4,ymm0,[rbp]
  157. LONG $0x0065e2c4; BYTE $0xdf // vpshufb ymm3,ymm3,ymm7
  158. LONG $0x6dfef5c5; BYTE $0x20 // vpaddd ymm5,ymm1,0x20[rbp]
  159. LONG $0x75feedc5; BYTE $0x40 // vpaddd ymm6,ymm2,0x40[rbp]
  160. LONG $0x7dfee5c5; BYTE $0x60 // vpaddd ymm7,ymm3,0x60[rbp]
  161. LONG $0x247ffdc5; BYTE $0x24 // vmovdqa [rsp],ymm4
  162. XORQ R14, R14
  163. LONG $0x6c7ffdc5; WORD $0x2024 // vmovdqa [rsp+0x20],ymm5
  164. ADDQ $-0x40, SP
  165. MOVQ BX, DI
  166. LONG $0x347ffdc5; BYTE $0x24 // vmovdqa [rsp],ymm6
  167. XORQ CX, DI // magic
  168. LONG $0x7c7ffdc5; WORD $0x2024 // vmovdqa [rsp+0x20],ymm7
  169. MOVQ R9, R12
  170. ADDQ $0x80,BP
  171. loop1:
  172. // Schedule 48 input dwords, by doing 3 rounds of 12 each
  173. // Note: SIMD instructions are interleaved with the SHA calculations
  174. ADDQ $-0x40, SP
  175. LONG $0x0f75e3c4; WORD $0x04e0 // vpalignr ymm4,ymm1,ymm0,0x4
  176. // ROUND(AX, BX, CX, DX, R8, R9, R10, R11, R12, R13, R14, R15, DI, SP, 0x80)
  177. LONG $0x249c0344; LONG $0x00000080 // add r11d,[rsp+0x80]
  178. WORD $0x2145; BYTE $0xc4 // and r12d,r8d
  179. LONG $0xf07b43c4; WORD $0x19e8 // rorx r13d,r8d,0x19
  180. LONG $0x0f65e3c4; WORD $0x04fa // vpalignr ymm7,ymm3,ymm2,0x4
  181. LONG $0xf07b43c4; WORD $0x0bf8 // rorx r15d,r8d,0xb
  182. LONG $0x30048d42 // lea eax,[rax+r14*1]
  183. LONG $0x231c8d47 // lea r11d,[r11+r12*1]
  184. LONG $0xd472cdc5; BYTE $0x07 // vpsrld ymm6,ymm4,0x7
  185. LONG $0xf23842c4; BYTE $0xe2 // andn r12d,r8d,r10d
  186. WORD $0x3145; BYTE $0xfd // xor r13d,r15d
  187. LONG $0xf07b43c4; WORD $0x06f0 // rorx r14d,r8d,0x6
  188. LONG $0xc7fefdc5 // vpaddd ymm0,ymm0,ymm7
  189. LONG $0x231c8d47 // lea r11d,[r11+r12*1]
  190. WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
  191. WORD $0x8941; BYTE $0xc7 // mov r15d,eax
  192. LONG $0xd472c5c5; BYTE $0x03 // vpsrld ymm7,ymm4,0x3
  193. LONG $0xf07b63c4; WORD $0x16e0 // rorx r12d,eax,0x16
  194. LONG $0x2b1c8d47 // lea r11d,[r11+r13*1]
  195. WORD $0x3141; BYTE $0xdf // xor r15d,ebx
  196. LONG $0xf472d5c5; BYTE $0x0e // vpslld ymm5,ymm4,0xe
  197. LONG $0xf07b63c4; WORD $0x0df0 // rorx r14d,eax,0xd
  198. LONG $0xf07b63c4; WORD $0x02e8 // rorx r13d,eax,0x2
  199. LONG $0x1a148d42 // lea edx,[rdx+r11*1]
  200. LONG $0xe6efc5c5 // vpxor ymm4,ymm7,ymm6
  201. WORD $0x2144; BYTE $0xff // and edi,r15d
  202. WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
  203. WORD $0xdf31 // xor edi,ebx
  204. LONG $0xfb70fdc5; BYTE $0xfa // vpshufd ymm7,ymm3,0xfa
  205. WORD $0x3145; BYTE $0xee // xor r14d,r13d
  206. LONG $0x3b1c8d45 // lea r11d,[r11+rdi*1]
  207. WORD $0x8945; BYTE $0xc4 // mov r12d,r8d
  208. LONG $0xd672cdc5; BYTE $0x0b // vpsrld ymm6,ymm6,0xb
  209. // ROUND(R11, AX, BX, CX, DX, R8, R9, R10, R12, R13, R14, DI, R15, SP, 0x84)
  210. LONG $0x24940344; LONG $0x00000084 // add r10d,[rsp+0x84]
  211. WORD $0x2141; BYTE $0xd4 // and r12d,edx
  212. LONG $0xf07b63c4; WORD $0x19ea // rorx r13d,edx,0x19
  213. LONG $0xe5efddc5 // vpxor ymm4,ymm4,ymm5
  214. LONG $0xf07be3c4; WORD $0x0bfa // rorx edi,edx,0xb
  215. LONG $0x331c8d47 // lea r11d,[r11+r14*1]
  216. LONG $0x22148d47 // lea r10d,[r10+r12*1]
  217. LONG $0xf572d5c5; BYTE $0x0b // vpslld ymm5,ymm5,0xb
  218. LONG $0xf26842c4; BYTE $0xe1 // andn r12d,edx,r9d
  219. WORD $0x3141; BYTE $0xfd // xor r13d,edi
  220. LONG $0xf07b63c4; WORD $0x06f2 // rorx r14d,edx,0x6
  221. LONG $0xe6efddc5 // vpxor ymm4,ymm4,ymm6
  222. LONG $0x22148d47 // lea r10d,[r10+r12*1]
  223. WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
  224. WORD $0x8944; BYTE $0xdf // mov edi,r11d
  225. LONG $0xd772cdc5; BYTE $0x0a // vpsrld ymm6,ymm7,0xa
  226. LONG $0xf07b43c4; WORD $0x16e3 // rorx r12d,r11d,0x16
  227. LONG $0x2a148d47 // lea r10d,[r10+r13*1]
  228. WORD $0xc731 // xor edi,eax
  229. LONG $0xe5efddc5 // vpxor ymm4,ymm4,ymm5
  230. LONG $0xf07b43c4; WORD $0x0df3 // rorx r14d,r11d,0xd
  231. LONG $0xf07b43c4; WORD $0x02eb // rorx r13d,r11d,0x2
  232. LONG $0x110c8d42 // lea ecx,[rcx+r10*1]
  233. LONG $0xd773c5c5; BYTE $0x11 // vpsrlq ymm7,ymm7,0x11
  234. WORD $0x2141; BYTE $0xff // and r15d,edi
  235. WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
  236. WORD $0x3141; BYTE $0xc7 // xor r15d,eax
  237. LONG $0xc4fefdc5 // vpaddd ymm0,ymm0,ymm4
  238. WORD $0x3145; BYTE $0xee // xor r14d,r13d
  239. LONG $0x3a148d47 // lea r10d,[r10+r15*1]
  240. WORD $0x8941; BYTE $0xd4 // mov r12d,edx
  241. LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7
  242. // ROUND(R10, R11, AX, BX, CX, DX, R8, R9, R12, R13, R14, R15, DI, SP, 0x88)
  243. LONG $0x248c0344; LONG $0x00000088 // add r9d,[rsp+0x88]
  244. WORD $0x2141; BYTE $0xcc // and r12d,ecx
  245. LONG $0xf07b63c4; WORD $0x19e9 // rorx r13d,ecx,0x19
  246. LONG $0xd773c5c5; BYTE $0x02 // vpsrlq ymm7,ymm7,0x2
  247. LONG $0xf07b63c4; WORD $0x0bf9 // rorx r15d,ecx,0xb
  248. LONG $0x32148d47 // lea r10d,[r10+r14*1]
  249. LONG $0x210c8d47 // lea r9d,[r9+r12*1]
  250. LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7
  251. LONG $0xf27042c4; BYTE $0xe0 // andn r12d,ecx,r8d
  252. WORD $0x3145; BYTE $0xfd // xor r13d,r15d
  253. LONG $0xf07b63c4; WORD $0x06f1 // rorx r14d,ecx,0x6
  254. LONG $0x004dc2c4; BYTE $0xf0 // vpshufb ymm6,ymm6,ymm8
  255. LONG $0x210c8d47 // lea r9d,[r9+r12*1]
  256. WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
  257. WORD $0x8945; BYTE $0xd7 // mov r15d,r10d
  258. LONG $0xc6fefdc5 // vpaddd ymm0,ymm0,ymm6
  259. LONG $0xf07b43c4; WORD $0x16e2 // rorx r12d,r10d,0x16
  260. LONG $0x290c8d47 // lea r9d,[r9+r13*1]
  261. WORD $0x3145; BYTE $0xdf // xor r15d,r11d
  262. LONG $0xf870fdc5; BYTE $0x50 // vpshufd ymm7,ymm0,0x50
  263. LONG $0xf07b43c4; WORD $0x0df2 // rorx r14d,r10d,0xd
  264. LONG $0xf07b43c4; WORD $0x02ea // rorx r13d,r10d,0x2
  265. LONG $0x0b1c8d42 // lea ebx,[rbx+r9*1]
  266. LONG $0xd772cdc5; BYTE $0x0a // vpsrld ymm6,ymm7,0xa
  267. WORD $0x2144; BYTE $0xff // and edi,r15d
  268. WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
  269. WORD $0x3144; BYTE $0xdf // xor edi,r11d
  270. LONG $0xd773c5c5; BYTE $0x11 // vpsrlq ymm7,ymm7,0x11
  271. WORD $0x3145; BYTE $0xee // xor r14d,r13d
  272. LONG $0x390c8d45 // lea r9d,[r9+rdi*1]
  273. WORD $0x8941; BYTE $0xcc // mov r12d,ecx
  274. LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7
  275. // ROUND(R9, R10, R11, AX, BX, CX, DX, R8, R12, R13, R14, DI, R15, SP, 0x8c)
  276. LONG $0x24840344; LONG $0x0000008c // add r8d,[rsp+0x8c]
  277. WORD $0x2141; BYTE $0xdc // and r12d,ebx
  278. LONG $0xf07b63c4; WORD $0x19eb // rorx r13d,ebx,0x19
  279. LONG $0xd773c5c5; BYTE $0x02 // vpsrlq ymm7,ymm7,0x2
  280. LONG $0xf07be3c4; WORD $0x0bfb // rorx edi,ebx,0xb
  281. LONG $0x310c8d47 // lea r9d,[r9+r14*1]
  282. LONG $0x20048d47 // lea r8d,[r8+r12*1]
  283. LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7
  284. LONG $0xf26062c4; BYTE $0xe2 // andn r12d,ebx,edx
  285. WORD $0x3141; BYTE $0xfd // xor r13d,edi
  286. LONG $0xf07b63c4; WORD $0x06f3 // rorx r14d,ebx,0x6
  287. LONG $0x004dc2c4; BYTE $0xf1 // vpshufb ymm6,ymm6,ymm9
  288. LONG $0x20048d47 // lea r8d,[r8+r12*1]
  289. WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
  290. WORD $0x8944; BYTE $0xcf // mov edi,r9d
  291. LONG $0xc6fefdc5 // vpaddd ymm0,ymm0,ymm6
  292. LONG $0xf07b43c4; WORD $0x16e1 // rorx r12d,r9d,0x16
  293. LONG $0x28048d47 // lea r8d,[r8+r13*1]
  294. WORD $0x3144; BYTE $0xd7 // xor edi,r10d
  295. LONG $0x75fefdc5; BYTE $0x00 // vpaddd ymm6,ymm0,[rbp+0x0]
  296. LONG $0xf07b43c4; WORD $0x0df1 // rorx r14d,r9d,0xd
  297. LONG $0xf07b43c4; WORD $0x02e9 // rorx r13d,r9d,0x2
  298. LONG $0x00048d42 // lea eax,[rax+r8*1]
  299. WORD $0x2141; BYTE $0xff // and r15d,edi
  300. WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
  301. WORD $0x3145; BYTE $0xd7 // xor r15d,r10d
  302. WORD $0x3145; BYTE $0xee // xor r14d,r13d
  303. LONG $0x38048d47 // lea r8d,[r8+r15*1]
  304. WORD $0x8941; BYTE $0xdc // mov r12d,ebx
  305. LONG $0x347ffdc5; BYTE $0x24 // vmovdqa [rsp],ymm6
  306. LONG $0x0f6de3c4; WORD $0x04e1 // vpalignr ymm4,ymm2,ymm1,0x4
  307. // ROUND(R8, R9, R10, R11, AX, BX, CX, DX, R12, R13, R14, R15, DI, SP, 0xa0)
  308. LONG $0xa0249403; WORD $0x0000; BYTE $0x00 // add edx,[rsp+0xa0]
  309. WORD $0x2141; BYTE $0xc4 // and r12d,eax
  310. LONG $0xf07b63c4; WORD $0x19e8 // rorx r13d,eax,0x19
  311. LONG $0x0f7de3c4; WORD $0x04fb // vpalignr ymm7,ymm0,ymm3,0x4
  312. LONG $0xf07b63c4; WORD $0x0bf8 // rorx r15d,eax,0xb
  313. LONG $0x30048d47 // lea r8d,[r8+r14*1]
  314. LONG $0x22148d42 // lea edx,[rdx+r12*1]
  315. LONG $0xd472cdc5; BYTE $0x07 // vpsrld ymm6,ymm4,0x7
  316. LONG $0xf27862c4; BYTE $0xe1 // andn r12d,eax,ecx
  317. WORD $0x3145; BYTE $0xfd // xor r13d,r15d
  318. LONG $0xf07b63c4; WORD $0x06f0 // rorx r14d,eax,0x6
  319. LONG $0xcffef5c5 // vpaddd ymm1,ymm1,ymm7
  320. LONG $0x22148d42 // lea edx,[rdx+r12*1]
  321. WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
  322. WORD $0x8945; BYTE $0xc7 // mov r15d,r8d
  323. LONG $0xd472c5c5; BYTE $0x03 // vpsrld ymm7,ymm4,0x3
  324. LONG $0xf07b43c4; WORD $0x16e0 // rorx r12d,r8d,0x16
  325. LONG $0x2a148d42 // lea edx,[rdx+r13*1]
  326. WORD $0x3145; BYTE $0xcf // xor r15d,r9d
  327. LONG $0xf472d5c5; BYTE $0x0e // vpslld ymm5,ymm4,0xe
  328. LONG $0xf07b43c4; WORD $0x0df0 // rorx r14d,r8d,0xd
  329. LONG $0xf07b43c4; WORD $0x02e8 // rorx r13d,r8d,0x2
  330. LONG $0x131c8d45 // lea r11d,[r11+rdx*1]
  331. LONG $0xe6efc5c5 // vpxor ymm4,ymm7,ymm6
  332. WORD $0x2144; BYTE $0xff // and edi,r15d
  333. WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
  334. WORD $0x3144; BYTE $0xcf // xor edi,r9d
  335. LONG $0xf870fdc5; BYTE $0xfa // vpshufd ymm7,ymm0,0xfa
  336. WORD $0x3145; BYTE $0xee // xor r14d,r13d
  337. WORD $0x148d; BYTE $0x3a // lea edx,[rdx+rdi*1]
  338. WORD $0x8941; BYTE $0xc4 // mov r12d,eax
  339. LONG $0xd672cdc5; BYTE $0x0b // vpsrld ymm6,ymm6,0xb
  340. // ROUND(DX, R8, R9, R10, R11, AX, BX, CX, R12, R13, R14, DI, R15, SP, 0xa4)
  341. LONG $0xa4248c03; WORD $0x0000; BYTE $0x00 // add ecx,[rsp+0xa4]
  342. WORD $0x2145; BYTE $0xdc // and r12d,r11d
  343. LONG $0xf07b43c4; WORD $0x19eb // rorx r13d,r11d,0x19
  344. LONG $0xe5efddc5 // vpxor ymm4,ymm4,ymm5
  345. LONG $0xf07bc3c4; WORD $0x0bfb // rorx edi,r11d,0xb
  346. LONG $0x32148d42 // lea edx,[rdx+r14*1]
  347. LONG $0x210c8d42 // lea ecx,[rcx+r12*1]
  348. LONG $0xf572d5c5; BYTE $0x0b // vpslld ymm5,ymm5,0xb
  349. LONG $0xf22062c4; BYTE $0xe3 // andn r12d,r11d,ebx
  350. WORD $0x3141; BYTE $0xfd // xor r13d,edi
  351. LONG $0xf07b43c4; WORD $0x06f3 // rorx r14d,r11d,0x6
  352. LONG $0xe6efddc5 // vpxor ymm4,ymm4,ymm6
  353. LONG $0x210c8d42 // lea ecx,[rcx+r12*1]
  354. WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
  355. WORD $0xd789 // mov edi,edx
  356. LONG $0xd772cdc5; BYTE $0x0a // vpsrld ymm6,ymm7,0xa
  357. LONG $0xf07b63c4; WORD $0x16e2 // rorx r12d,edx,0x16
  358. LONG $0x290c8d42 // lea ecx,[rcx+r13*1]
  359. WORD $0x3144; BYTE $0xc7 // xor edi,r8d
  360. LONG $0xe5efddc5 // vpxor ymm4,ymm4,ymm5
  361. LONG $0xf07b63c4; WORD $0x0df2 // rorx r14d,edx,0xd
  362. LONG $0xf07b63c4; WORD $0x02ea // rorx r13d,edx,0x2
  363. LONG $0x0a148d45 // lea r10d,[r10+rcx*1]
  364. LONG $0xd773c5c5; BYTE $0x11 // vpsrlq ymm7,ymm7,0x11
  365. WORD $0x2141; BYTE $0xff // and r15d,edi
  366. WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
  367. WORD $0x3145; BYTE $0xc7 // xor r15d,r8d
  368. LONG $0xccfef5c5 // vpaddd ymm1,ymm1,ymm4
  369. WORD $0x3145; BYTE $0xee // xor r14d,r13d
  370. LONG $0x390c8d42 // lea ecx,[rcx+r15*1]
  371. WORD $0x8945; BYTE $0xdc // mov r12d,r11d
  372. LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7
  373. // ROUND(CX, DX, R8, R9, R10, R11, AX, BX, R12, R13, R14, R15, DI, SP, 0xa8)
  374. LONG $0xa8249c03; WORD $0x0000; BYTE $0x00 // add ebx,[rsp+0xa8]
  375. WORD $0x2145; BYTE $0xd4 // and r12d,r10d
  376. LONG $0xf07b43c4; WORD $0x19ea // rorx r13d,r10d,0x19
  377. LONG $0xd773c5c5; BYTE $0x02 // vpsrlq ymm7,ymm7,0x2
  378. LONG $0xf07b43c4; WORD $0x0bfa // rorx r15d,r10d,0xb
  379. LONG $0x310c8d42 // lea ecx,[rcx+r14*1]
  380. LONG $0x231c8d42 // lea ebx,[rbx+r12*1]
  381. LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7
  382. LONG $0xf22862c4; BYTE $0xe0 // andn r12d,r10d,eax
  383. WORD $0x3145; BYTE $0xfd // xor r13d,r15d
  384. LONG $0xf07b43c4; WORD $0x06f2 // rorx r14d,r10d,0x6
  385. LONG $0x004dc2c4; BYTE $0xf0 // vpshufb ymm6,ymm6,ymm8
  386. LONG $0x231c8d42 // lea ebx,[rbx+r12*1]
  387. WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
  388. WORD $0x8941; BYTE $0xcf // mov r15d,ecx
  389. LONG $0xcefef5c5 // vpaddd ymm1,ymm1,ymm6
  390. LONG $0xf07b63c4; WORD $0x16e1 // rorx r12d,ecx,0x16
  391. LONG $0x2b1c8d42 // lea ebx,[rbx+r13*1]
  392. WORD $0x3141; BYTE $0xd7 // xor r15d,edx
  393. LONG $0xf970fdc5; BYTE $0x50 // vpshufd ymm7,ymm1,0x50
  394. LONG $0xf07b63c4; WORD $0x0df1 // rorx r14d,ecx,0xd
  395. LONG $0xf07b63c4; WORD $0x02e9 // rorx r13d,ecx,0x2
  396. LONG $0x190c8d45 // lea r9d,[r9+rbx*1]
  397. LONG $0xd772cdc5; BYTE $0x0a // vpsrld ymm6,ymm7,0xa
  398. WORD $0x2144; BYTE $0xff // and edi,r15d
  399. WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
  400. WORD $0xd731 // xor edi,edx
  401. LONG $0xd773c5c5; BYTE $0x11 // vpsrlq ymm7,ymm7,0x11
  402. WORD $0x3145; BYTE $0xee // xor r14d,r13d
  403. WORD $0x1c8d; BYTE $0x3b // lea ebx,[rbx+rdi*1]
  404. WORD $0x8945; BYTE $0xd4 // mov r12d,r10d
  405. LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7
  406. // ROUND(BX, CX, DX, R8, R9, R10, R11, AX, R12, R13, R14, DI, R15, SP, 0xac)
  407. LONG $0xac248403; WORD $0x0000; BYTE $0x00 // add eax,[rsp+0xac]
  408. WORD $0x2145; BYTE $0xcc // and r12d,r9d
  409. LONG $0xf07b43c4; WORD $0x19e9 // rorx r13d,r9d,0x19
  410. LONG $0xd773c5c5; BYTE $0x02 // vpsrlq ymm7,ymm7,0x2
  411. LONG $0xf07bc3c4; WORD $0x0bf9 // rorx edi,r9d,0xb
  412. LONG $0x331c8d42 // lea ebx,[rbx+r14*1]
  413. LONG $0x20048d42 // lea eax,[rax+r12*1]
  414. LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7
  415. LONG $0xf23042c4; BYTE $0xe3 // andn r12d,r9d,r11d
  416. WORD $0x3141; BYTE $0xfd // xor r13d,edi
  417. LONG $0xf07b43c4; WORD $0x06f1 // rorx r14d,r9d,0x6
  418. LONG $0x004dc2c4; BYTE $0xf1 // vpshufb ymm6,ymm6,ymm9
  419. LONG $0x20048d42 // lea eax,[rax+r12*1]
  420. WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
  421. WORD $0xdf89 // mov edi,ebx
  422. LONG $0xcefef5c5 // vpaddd ymm1,ymm1,ymm6
  423. LONG $0xf07b63c4; WORD $0x16e3 // rorx r12d,ebx,0x16
  424. LONG $0x28048d42 // lea eax,[rax+r13*1]
  425. WORD $0xcf31 // xor edi,ecx
  426. LONG $0x75fef5c5; BYTE $0x20 // vpaddd ymm6,ymm1,[rbp+0x20]
  427. LONG $0xf07b63c4; WORD $0x0df3 // rorx r14d,ebx,0xd
  428. LONG $0xf07b63c4; WORD $0x02eb // rorx r13d,ebx,0x2
  429. LONG $0x00048d45 // lea r8d,[r8+rax*1]
  430. WORD $0x2141; BYTE $0xff // and r15d,edi
  431. WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
  432. WORD $0x3141; BYTE $0xcf // xor r15d,ecx
  433. WORD $0x3145; BYTE $0xee // xor r14d,r13d
  434. LONG $0x38048d42 // lea eax,[rax+r15*1]
  435. WORD $0x8945; BYTE $0xcc // mov r12d,r9d
  436. LONG $0x747ffdc5; WORD $0x2024 // vmovdqa [rsp+0x20],ymm6
  437. LONG $0x24648d48; BYTE $0xc0 // lea rsp,[rsp-0x40]
  438. LONG $0x0f65e3c4; WORD $0x04e2 // vpalignr ymm4,ymm3,ymm2,0x4
  439. // ROUND(AX, BX, CX, DX, R8, R9, R10, R11, R12, R13, R14, R15, DI, SP, 0x80)
  440. LONG $0x249c0344; LONG $0x00000080 // add r11d,[rsp+0x80]
  441. WORD $0x2145; BYTE $0xc4 // and r12d,r8d
  442. LONG $0xf07b43c4; WORD $0x19e8 // rorx r13d,r8d,0x19
  443. LONG $0x0f75e3c4; WORD $0x04f8 // vpalignr ymm7,ymm1,ymm0,0x4
  444. LONG $0xf07b43c4; WORD $0x0bf8 // rorx r15d,r8d,0xb
  445. LONG $0x30048d42 // lea eax,[rax+r14*1]
  446. LONG $0x231c8d47 // lea r11d,[r11+r12*1]
  447. LONG $0xd472cdc5; BYTE $0x07 // vpsrld ymm6,ymm4,0x7
  448. LONG $0xf23842c4; BYTE $0xe2 // andn r12d,r8d,r10d
  449. WORD $0x3145; BYTE $0xfd // xor r13d,r15d
  450. LONG $0xf07b43c4; WORD $0x06f0 // rorx r14d,r8d,0x6
  451. LONG $0xd7feedc5 // vpaddd ymm2,ymm2,ymm7
  452. LONG $0x231c8d47 // lea r11d,[r11+r12*1]
  453. WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
  454. WORD $0x8941; BYTE $0xc7 // mov r15d,eax
  455. LONG $0xd472c5c5; BYTE $0x03 // vpsrld ymm7,ymm4,0x3
  456. LONG $0xf07b63c4; WORD $0x16e0 // rorx r12d,eax,0x16
  457. LONG $0x2b1c8d47 // lea r11d,[r11+r13*1]
  458. WORD $0x3141; BYTE $0xdf // xor r15d,ebx
  459. LONG $0xf472d5c5; BYTE $0x0e // vpslld ymm5,ymm4,0xe
  460. LONG $0xf07b63c4; WORD $0x0df0 // rorx r14d,eax,0xd
  461. LONG $0xf07b63c4; WORD $0x02e8 // rorx r13d,eax,0x2
  462. LONG $0x1a148d42 // lea edx,[rdx+r11*1]
  463. LONG $0xe6efc5c5 // vpxor ymm4,ymm7,ymm6
  464. WORD $0x2144; BYTE $0xff // and edi,r15d
  465. WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
  466. WORD $0xdf31 // xor edi,ebx
  467. LONG $0xf970fdc5; BYTE $0xfa // vpshufd ymm7,ymm1,0xfa
  468. WORD $0x3145; BYTE $0xee // xor r14d,r13d
  469. LONG $0x3b1c8d45 // lea r11d,[r11+rdi*1]
  470. WORD $0x8945; BYTE $0xc4 // mov r12d,r8d
  471. LONG $0xd672cdc5; BYTE $0x0b // vpsrld ymm6,ymm6,0xb
  472. // ROUND(R11, AX, BX, CX, DX, R8, R9, R10, R12, R13, R14, DI, R15, SP, 0x84)
  473. LONG $0x24940344; LONG $0x00000084 // add r10d,[rsp+0x84]
  474. WORD $0x2141; BYTE $0xd4 // and r12d,edx
  475. LONG $0xf07b63c4; WORD $0x19ea // rorx r13d,edx,0x19
  476. LONG $0xe5efddc5 // vpxor ymm4,ymm4,ymm5
  477. LONG $0xf07be3c4; WORD $0x0bfa // rorx edi,edx,0xb
  478. LONG $0x331c8d47 // lea r11d,[r11+r14*1]
  479. LONG $0x22148d47 // lea r10d,[r10+r12*1]
  480. LONG $0xf572d5c5; BYTE $0x0b // vpslld ymm5,ymm5,0xb
  481. LONG $0xf26842c4; BYTE $0xe1 // andn r12d,edx,r9d
  482. WORD $0x3141; BYTE $0xfd // xor r13d,edi
  483. LONG $0xf07b63c4; WORD $0x06f2 // rorx r14d,edx,0x6
  484. LONG $0xe6efddc5 // vpxor ymm4,ymm4,ymm6
  485. LONG $0x22148d47 // lea r10d,[r10+r12*1]
  486. WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
  487. WORD $0x8944; BYTE $0xdf // mov edi,r11d
  488. LONG $0xd772cdc5; BYTE $0x0a // vpsrld ymm6,ymm7,0xa
  489. LONG $0xf07b43c4; WORD $0x16e3 // rorx r12d,r11d,0x16
  490. LONG $0x2a148d47 // lea r10d,[r10+r13*1]
  491. WORD $0xc731 // xor edi,eax
  492. LONG $0xe5efddc5 // vpxor ymm4,ymm4,ymm5
  493. LONG $0xf07b43c4; WORD $0x0df3 // rorx r14d,r11d,0xd
  494. LONG $0xf07b43c4; WORD $0x02eb // rorx r13d,r11d,0x2
  495. LONG $0x110c8d42 // lea ecx,[rcx+r10*1]
  496. LONG $0xd773c5c5; BYTE $0x11 // vpsrlq ymm7,ymm7,0x11
  497. WORD $0x2141; BYTE $0xff // and r15d,edi
  498. WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
  499. WORD $0x3141; BYTE $0xc7 // xor r15d,eax
  500. LONG $0xd4feedc5 // vpaddd ymm2,ymm2,ymm4
  501. WORD $0x3145; BYTE $0xee // xor r14d,r13d
  502. LONG $0x3a148d47 // lea r10d,[r10+r15*1]
  503. WORD $0x8941; BYTE $0xd4 // mov r12d,edx
  504. LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7
  505. // ROUND(R10, R11, AX, BX, CX, DX, R8, R9, R12, R13, R14, R15, DI, SP, 0x88)
  506. LONG $0x248c0344; LONG $0x00000088 // add r9d,[rsp+0x88]
  507. WORD $0x2141; BYTE $0xcc // and r12d,ecx
  508. LONG $0xf07b63c4; WORD $0x19e9 // rorx r13d,ecx,0x19
  509. LONG $0xd773c5c5; BYTE $0x02 // vpsrlq ymm7,ymm7,0x2
  510. LONG $0xf07b63c4; WORD $0x0bf9 // rorx r15d,ecx,0xb
  511. LONG $0x32148d47 // lea r10d,[r10+r14*1]
  512. LONG $0x210c8d47 // lea r9d,[r9+r12*1]
  513. LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7
  514. LONG $0xf27042c4; BYTE $0xe0 // andn r12d,ecx,r8d
  515. WORD $0x3145; BYTE $0xfd // xor r13d,r15d
  516. LONG $0xf07b63c4; WORD $0x06f1 // rorx r14d,ecx,0x6
  517. LONG $0x004dc2c4; BYTE $0xf0 // vpshufb ymm6,ymm6,ymm8
  518. LONG $0x210c8d47 // lea r9d,[r9+r12*1]
  519. WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
  520. WORD $0x8945; BYTE $0xd7 // mov r15d,r10d
  521. LONG $0xd6feedc5 // vpaddd ymm2,ymm2,ymm6
  522. LONG $0xf07b43c4; WORD $0x16e2 // rorx r12d,r10d,0x16
  523. LONG $0x290c8d47 // lea r9d,[r9+r13*1]
  524. WORD $0x3145; BYTE $0xdf // xor r15d,r11d
  525. LONG $0xfa70fdc5; BYTE $0x50 // vpshufd ymm7,ymm2,0x50
  526. LONG $0xf07b43c4; WORD $0x0df2 // rorx r14d,r10d,0xd
  527. LONG $0xf07b43c4; WORD $0x02ea // rorx r13d,r10d,0x2
  528. LONG $0x0b1c8d42 // lea ebx,[rbx+r9*1]
  529. LONG $0xd772cdc5; BYTE $0x0a // vpsrld ymm6,ymm7,0xa
  530. WORD $0x2144; BYTE $0xff // and edi,r15d
  531. WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
  532. WORD $0x3144; BYTE $0xdf // xor edi,r11d
  533. LONG $0xd773c5c5; BYTE $0x11 // vpsrlq ymm7,ymm7,0x11
  534. WORD $0x3145; BYTE $0xee // xor r14d,r13d
  535. LONG $0x390c8d45 // lea r9d,[r9+rdi*1]
  536. WORD $0x8941; BYTE $0xcc // mov r12d,ecx
  537. LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7
  538. // ROUND(R9, R10, R11, AX, BX, CX, DX, R8, R12, R13, R14, DI, R15, SP, 0x8c)
  539. LONG $0x24840344; LONG $0x0000008c // add r8d,[rsp+0x8c]
  540. WORD $0x2141; BYTE $0xdc // and r12d,ebx
  541. LONG $0xf07b63c4; WORD $0x19eb // rorx r13d,ebx,0x19
  542. LONG $0xd773c5c5; BYTE $0x02 // vpsrlq ymm7,ymm7,0x2
  543. LONG $0xf07be3c4; WORD $0x0bfb // rorx edi,ebx,0xb
  544. LONG $0x310c8d47 // lea r9d,[r9+r14*1]
  545. LONG $0x20048d47 // lea r8d,[r8+r12*1]
  546. LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7
  547. LONG $0xf26062c4; BYTE $0xe2 // andn r12d,ebx,edx
  548. WORD $0x3141; BYTE $0xfd // xor r13d,edi
  549. LONG $0xf07b63c4; WORD $0x06f3 // rorx r14d,ebx,0x6
  550. LONG $0x004dc2c4; BYTE $0xf1 // vpshufb ymm6,ymm6,ymm9
  551. LONG $0x20048d47 // lea r8d,[r8+r12*1]
  552. WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
  553. WORD $0x8944; BYTE $0xcf // mov edi,r9d
  554. LONG $0xd6feedc5 // vpaddd ymm2,ymm2,ymm6
  555. LONG $0xf07b43c4; WORD $0x16e1 // rorx r12d,r9d,0x16
  556. LONG $0x28048d47 // lea r8d,[r8+r13*1]
  557. WORD $0x3144; BYTE $0xd7 // xor edi,r10d
  558. LONG $0x75feedc5; BYTE $0x40 // vpaddd ymm6,ymm2,[rbp+0x40]
  559. LONG $0xf07b43c4; WORD $0x0df1 // rorx r14d,r9d,0xd
  560. LONG $0xf07b43c4; WORD $0x02e9 // rorx r13d,r9d,0x2
  561. LONG $0x00048d42 // lea eax,[rax+r8*1]
  562. WORD $0x2141; BYTE $0xff // and r15d,edi
  563. WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
  564. WORD $0x3145; BYTE $0xd7 // xor r15d,r10d
  565. WORD $0x3145; BYTE $0xee // xor r14d,r13d
  566. LONG $0x38048d47 // lea r8d,[r8+r15*1]
  567. WORD $0x8941; BYTE $0xdc // mov r12d,ebx
  568. LONG $0x347ffdc5; BYTE $0x24 // vmovdqa [rsp],ymm6
  569. LONG $0x0f7de3c4; WORD $0x04e3 // vpalignr ymm4,ymm0,ymm3,0x4
  570. // ROUND(R8, R9, R10, R11, AX, BX, CX, DX, R12, R13, R14, R15, DI, SP, 0xa0)
  571. LONG $0xa0249403; WORD $0x0000; BYTE $0x00 // add edx,[rsp+0xa0]
  572. WORD $0x2141; BYTE $0xc4 // and r12d,eax
  573. LONG $0xf07b63c4; WORD $0x19e8 // rorx r13d,eax,0x19
  574. LONG $0x0f6de3c4; WORD $0x04f9 // vpalignr ymm7,ymm2,ymm1,0x4
  575. LONG $0xf07b63c4; WORD $0x0bf8 // rorx r15d,eax,0xb
  576. LONG $0x30048d47 // lea r8d,[r8+r14*1]
  577. LONG $0x22148d42 // lea edx,[rdx+r12*1]
  578. LONG $0xd472cdc5; BYTE $0x07 // vpsrld ymm6,ymm4,0x7
  579. LONG $0xf27862c4; BYTE $0xe1 // andn r12d,eax,ecx
  580. WORD $0x3145; BYTE $0xfd // xor r13d,r15d
  581. LONG $0xf07b63c4; WORD $0x06f0 // rorx r14d,eax,0x6
  582. LONG $0xdffee5c5 // vpaddd ymm3,ymm3,ymm7
  583. LONG $0x22148d42 // lea edx,[rdx+r12*1]
  584. WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
  585. WORD $0x8945; BYTE $0xc7 // mov r15d,r8d
  586. LONG $0xd472c5c5; BYTE $0x03 // vpsrld ymm7,ymm4,0x3
  587. LONG $0xf07b43c4; WORD $0x16e0 // rorx r12d,r8d,0x16
  588. LONG $0x2a148d42 // lea edx,[rdx+r13*1]
  589. WORD $0x3145; BYTE $0xcf // xor r15d,r9d
  590. LONG $0xf472d5c5; BYTE $0x0e // vpslld ymm5,ymm4,0xe
  591. LONG $0xf07b43c4; WORD $0x0df0 // rorx r14d,r8d,0xd
  592. LONG $0xf07b43c4; WORD $0x02e8 // rorx r13d,r8d,0x2
  593. LONG $0x131c8d45 // lea r11d,[r11+rdx*1]
  594. LONG $0xe6efc5c5 // vpxor ymm4,ymm7,ymm6
  595. WORD $0x2144; BYTE $0xff // and edi,r15d
  596. WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
  597. WORD $0x3144; BYTE $0xcf // xor edi,r9d
  598. LONG $0xfa70fdc5; BYTE $0xfa // vpshufd ymm7,ymm2,0xfa
  599. WORD $0x3145; BYTE $0xee // xor r14d,r13d
  600. WORD $0x148d; BYTE $0x3a // lea edx,[rdx+rdi*1]
  601. WORD $0x8941; BYTE $0xc4 // mov r12d,eax
  602. LONG $0xd672cdc5; BYTE $0x0b // vpsrld ymm6,ymm6,0xb
  603. // ROUND(DX, R8, R9, R10, R11, AX, BX, CX, R12, R13, R14, DI, R15, SP, 0xa4)
  604. LONG $0xa4248c03; WORD $0x0000; BYTE $0x00 // add ecx,[rsp+0xa4]
  605. WORD $0x2145; BYTE $0xdc // and r12d,r11d
  606. LONG $0xf07b43c4; WORD $0x19eb // rorx r13d,r11d,0x19
  607. LONG $0xe5efddc5 // vpxor ymm4,ymm4,ymm5
  608. LONG $0xf07bc3c4; WORD $0x0bfb // rorx edi,r11d,0xb
  609. LONG $0x32148d42 // lea edx,[rdx+r14*1]
  610. LONG $0x210c8d42 // lea ecx,[rcx+r12*1]
  611. LONG $0xf572d5c5; BYTE $0x0b // vpslld ymm5,ymm5,0xb
  612. LONG $0xf22062c4; BYTE $0xe3 // andn r12d,r11d,ebx
  613. WORD $0x3141; BYTE $0xfd // xor r13d,edi
  614. LONG $0xf07b43c4; WORD $0x06f3 // rorx r14d,r11d,0x6
  615. LONG $0xe6efddc5 // vpxor ymm4,ymm4,ymm6
  616. LONG $0x210c8d42 // lea ecx,[rcx+r12*1]
  617. WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
  618. WORD $0xd789 // mov edi,edx
  619. LONG $0xd772cdc5; BYTE $0x0a // vpsrld ymm6,ymm7,0xa
  620. LONG $0xf07b63c4; WORD $0x16e2 // rorx r12d,edx,0x16
  621. LONG $0x290c8d42 // lea ecx,[rcx+r13*1]
  622. WORD $0x3144; BYTE $0xc7 // xor edi,r8d
  623. LONG $0xe5efddc5 // vpxor ymm4,ymm4,ymm5
  624. LONG $0xf07b63c4; WORD $0x0df2 // rorx r14d,edx,0xd
  625. LONG $0xf07b63c4; WORD $0x02ea // rorx r13d,edx,0x2
  626. LONG $0x0a148d45 // lea r10d,[r10+rcx*1]
  627. LONG $0xd773c5c5; BYTE $0x11 // vpsrlq ymm7,ymm7,0x11
  628. WORD $0x2141; BYTE $0xff // and r15d,edi
  629. WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
  630. WORD $0x3145; BYTE $0xc7 // xor r15d,r8d
  631. LONG $0xdcfee5c5 // vpaddd ymm3,ymm3,ymm4
  632. WORD $0x3145; BYTE $0xee // xor r14d,r13d
  633. LONG $0x390c8d42 // lea ecx,[rcx+r15*1]
  634. WORD $0x8945; BYTE $0xdc // mov r12d,r11d
  635. LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7
  636. // ROUND(CX, DX, R8, R9, R10, R11, AX, BX, R12, R13, R14, R15, DI, SP, 0xa8)
  637. LONG $0xa8249c03; WORD $0x0000; BYTE $0x00 // add ebx,[rsp+0xa8]
  638. WORD $0x2145; BYTE $0xd4 // and r12d,r10d
  639. LONG $0xf07b43c4; WORD $0x19ea // rorx r13d,r10d,0x19
  640. LONG $0xd773c5c5; BYTE $0x02 // vpsrlq ymm7,ymm7,0x2
  641. LONG $0xf07b43c4; WORD $0x0bfa // rorx r15d,r10d,0xb
  642. LONG $0x310c8d42 // lea ecx,[rcx+r14*1]
  643. LONG $0x231c8d42 // lea ebx,[rbx+r12*1]
  644. LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7
  645. LONG $0xf22862c4; BYTE $0xe0 // andn r12d,r10d,eax
  646. WORD $0x3145; BYTE $0xfd // xor r13d,r15d
  647. LONG $0xf07b43c4; WORD $0x06f2 // rorx r14d,r10d,0x6
  648. LONG $0x004dc2c4; BYTE $0xf0 // vpshufb ymm6,ymm6,ymm8
  649. LONG $0x231c8d42 // lea ebx,[rbx+r12*1]
  650. WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
  651. WORD $0x8941; BYTE $0xcf // mov r15d,ecx
  652. LONG $0xdefee5c5 // vpaddd ymm3,ymm3,ymm6
  653. LONG $0xf07b63c4; WORD $0x16e1 // rorx r12d,ecx,0x16
  654. LONG $0x2b1c8d42 // lea ebx,[rbx+r13*1]
  655. WORD $0x3141; BYTE $0xd7 // xor r15d,edx
  656. LONG $0xfb70fdc5; BYTE $0x50 // vpshufd ymm7,ymm3,0x50
  657. LONG $0xf07b63c4; WORD $0x0df1 // rorx r14d,ecx,0xd
  658. LONG $0xf07b63c4; WORD $0x02e9 // rorx r13d,ecx,0x2
  659. LONG $0x190c8d45 // lea r9d,[r9+rbx*1]
  660. LONG $0xd772cdc5; BYTE $0x0a // vpsrld ymm6,ymm7,0xa
  661. WORD $0x2144; BYTE $0xff // and edi,r15d
  662. WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
  663. WORD $0xd731 // xor edi,edx
  664. LONG $0xd773c5c5; BYTE $0x11 // vpsrlq ymm7,ymm7,0x11
  665. WORD $0x3145; BYTE $0xee // xor r14d,r13d
  666. WORD $0x1c8d; BYTE $0x3b // lea ebx,[rbx+rdi*1]
  667. WORD $0x8945; BYTE $0xd4 // mov r12d,r10d
  668. LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7
  669. // ROUND(BX, CX, DX, R8, R9, R10, R11, AX, R12, R13, R14, DI, R15, SP, 0xac)
  670. LONG $0xac248403; WORD $0x0000; BYTE $0x00 // add eax,[rsp+0xac]
  671. WORD $0x2145; BYTE $0xcc // and r12d,r9d
  672. LONG $0xf07b43c4; WORD $0x19e9 // rorx r13d,r9d,0x19
  673. LONG $0xd773c5c5; BYTE $0x02 // vpsrlq ymm7,ymm7,0x2
  674. LONG $0xf07bc3c4; WORD $0x0bf9 // rorx edi,r9d,0xb
  675. LONG $0x331c8d42 // lea ebx,[rbx+r14*1]
  676. LONG $0x20048d42 // lea eax,[rax+r12*1]
  677. LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7
  678. LONG $0xf23042c4; BYTE $0xe3 // andn r12d,r9d,r11d
  679. WORD $0x3141; BYTE $0xfd // xor r13d,edi
  680. LONG $0xf07b43c4; WORD $0x06f1 // rorx r14d,r9d,0x6
  681. LONG $0x004dc2c4; BYTE $0xf1 // vpshufb ymm6,ymm6,ymm9
  682. LONG $0x20048d42 // lea eax,[rax+r12*1]
  683. WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
  684. WORD $0xdf89 // mov edi,ebx
  685. LONG $0xdefee5c5 // vpaddd ymm3,ymm3,ymm6
  686. LONG $0xf07b63c4; WORD $0x16e3 // rorx r12d,ebx,0x16
  687. LONG $0x28048d42 // lea eax,[rax+r13*1]
  688. WORD $0xcf31 // xor edi,ecx
  689. LONG $0x75fee5c5; BYTE $0x60 // vpaddd ymm6,ymm3,[rbp+0x60]
  690. LONG $0xf07b63c4; WORD $0x0df3 // rorx r14d,ebx,0xd
  691. LONG $0xf07b63c4; WORD $0x02eb // rorx r13d,ebx,0x2
  692. LONG $0x00048d45 // lea r8d,[r8+rax*1]
  693. WORD $0x2141; BYTE $0xff // and r15d,edi
  694. WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
  695. WORD $0x3141; BYTE $0xcf // xor r15d,ecx
  696. WORD $0x3145; BYTE $0xee // xor r14d,r13d
  697. LONG $0x38048d42 // lea eax,[rax+r15*1]
  698. WORD $0x8945; BYTE $0xcc // mov r12d,r9d
  699. LONG $0x747ffdc5; WORD $0x2024 // vmovdqa [rsp+0x20],ymm6
  700. ADDQ $0x80, BP
  701. CMPB 0x3(BP),$0x0
  702. JNE loop1
  703. // ROUND(AX, BX, CX, DX, R8, R9, R10, R11, R12, R13, R14, R15, DI, SP, 0x40)
  704. LONG $0x245c0344; BYTE $0x40 // add r11d,[rsp+0x40]
  705. WORD $0x2145; BYTE $0xc4 // and r12d,r8d
  706. LONG $0xf07b43c4; WORD $0x19e8 // rorx r13d,r8d,0x19
  707. LONG $0xf07b43c4; WORD $0x0bf8 // rorx r15d,r8d,0xb
  708. LONG $0x30048d42 // lea eax,[rax+r14*1]
  709. LONG $0x231c8d47 // lea r11d,[r11+r12*1]
  710. LONG $0xf23842c4; BYTE $0xe2 // andn r12d,r8d,r10d
  711. WORD $0x3145; BYTE $0xfd // xor r13d,r15d
  712. LONG $0xf07b43c4; WORD $0x06f0 // rorx r14d,r8d,0x6
  713. LONG $0x231c8d47 // lea r11d,[r11+r12*1]
  714. WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
  715. WORD $0x8941; BYTE $0xc7 // mov r15d,eax
  716. LONG $0xf07b63c4; WORD $0x16e0 // rorx r12d,eax,0x16
  717. LONG $0x2b1c8d47 // lea r11d,[r11+r13*1]
  718. WORD $0x3141; BYTE $0xdf // xor r15d,ebx
  719. LONG $0xf07b63c4; WORD $0x0df0 // rorx r14d,eax,0xd
  720. LONG $0xf07b63c4; WORD $0x02e8 // rorx r13d,eax,0x2
  721. LONG $0x1a148d42 // lea edx,[rdx+r11*1]
  722. WORD $0x2144; BYTE $0xff // and edi,r15d
  723. WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
  724. WORD $0xdf31 // xor edi,ebx
  725. WORD $0x3145; BYTE $0xee // xor r14d,r13d
  726. LONG $0x3b1c8d45 // lea r11d,[r11+rdi*1]
  727. WORD $0x8945; BYTE $0xc4 // mov r12d,r8d
  728. // ROUND(R11, AX, BX, CX, DX, R8, R9, R10, R12, R13, R14, DI, R15, SP, 0x44)
  729. LONG $0x24540344; BYTE $0x44 // add r10d,[rsp+0x44]
  730. WORD $0x2141; BYTE $0xd4 // and r12d,edx
  731. LONG $0xf07b63c4; WORD $0x19ea // rorx r13d,edx,0x19
  732. LONG $0xf07be3c4; WORD $0x0bfa // rorx edi,edx,0xb
  733. LONG $0x331c8d47 // lea r11d,[r11+r14*1]
  734. LONG $0x22148d47 // lea r10d,[r10+r12*1]
  735. LONG $0xf26842c4; BYTE $0xe1 // andn r12d,edx,r9d
  736. WORD $0x3141; BYTE $0xfd // xor r13d,edi
  737. LONG $0xf07b63c4; WORD $0x06f2 // rorx r14d,edx,0x6
  738. LONG $0x22148d47 // lea r10d,[r10+r12*1]
  739. WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
  740. WORD $0x8944; BYTE $0xdf // mov edi,r11d
  741. LONG $0xf07b43c4; WORD $0x16e3 // rorx r12d,r11d,0x16
  742. LONG $0x2a148d47 // lea r10d,[r10+r13*1]
  743. WORD $0xc731 // xor edi,eax
  744. LONG $0xf07b43c4; WORD $0x0df3 // rorx r14d,r11d,0xd
  745. LONG $0xf07b43c4; WORD $0x02eb // rorx r13d,r11d,0x2
  746. LONG $0x110c8d42 // lea ecx,[rcx+r10*1]
  747. WORD $0x2141; BYTE $0xff // and r15d,edi
  748. WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
  749. WORD $0x3141; BYTE $0xc7 // xor r15d,eax
  750. WORD $0x3145; BYTE $0xee // xor r14d,r13d
  751. LONG $0x3a148d47 // lea r10d,[r10+r15*1]
  752. WORD $0x8941; BYTE $0xd4 // mov r12d,edx
  753. // ROUND(R10, R11, AX, BX, CX, DX, R8, R9, R12, R13, R14, R15, DI, SP, 0x48)
  754. LONG $0x244c0344; BYTE $0x48 // add r9d,[rsp+0x48]
  755. WORD $0x2141; BYTE $0xcc // and r12d,ecx
  756. LONG $0xf07b63c4; WORD $0x19e9 // rorx r13d,ecx,0x19
  757. LONG $0xf07b63c4; WORD $0x0bf9 // rorx r15d,ecx,0xb
  758. LONG $0x32148d47 // lea r10d,[r10+r14*1]
  759. LONG $0x210c8d47 // lea r9d,[r9+r12*1]
  760. LONG $0xf27042c4; BYTE $0xe0 // andn r12d,ecx,r8d
  761. WORD $0x3145; BYTE $0xfd // xor r13d,r15d
  762. LONG $0xf07b63c4; WORD $0x06f1 // rorx r14d,ecx,0x6
  763. LONG $0x210c8d47 // lea r9d,[r9+r12*1]
  764. WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
  765. WORD $0x8945; BYTE $0xd7 // mov r15d,r10d
  766. LONG $0xf07b43c4; WORD $0x16e2 // rorx r12d,r10d,0x16
  767. LONG $0x290c8d47 // lea r9d,[r9+r13*1]
  768. WORD $0x3145; BYTE $0xdf // xor r15d,r11d
  769. LONG $0xf07b43c4; WORD $0x0df2 // rorx r14d,r10d,0xd
  770. LONG $0xf07b43c4; WORD $0x02ea // rorx r13d,r10d,0x2
  771. LONG $0x0b1c8d42 // lea ebx,[rbx+r9*1]
  772. WORD $0x2144; BYTE $0xff // and edi,r15d
  773. WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
  774. WORD $0x3144; BYTE $0xdf // xor edi,r11d
  775. WORD $0x3145; BYTE $0xee // xor r14d,r13d
  776. LONG $0x390c8d45 // lea r9d,[r9+rdi*1]
  777. WORD $0x8941; BYTE $0xcc // mov r12d,ecx
  778. // ROUND(R9, R10, R11, AX, BX, CX, DX, R8, R12, R13, R14, DI, R15, SP, 0x4c)
  779. LONG $0x24440344; BYTE $0x4c // add r8d,[rsp+0x4c]
  780. WORD $0x2141; BYTE $0xdc // and r12d,ebx
  781. LONG $0xf07b63c4; WORD $0x19eb // rorx r13d,ebx,0x19
  782. LONG $0xf07be3c4; WORD $0x0bfb // rorx edi,ebx,0xb
  783. LONG $0x310c8d47 // lea r9d,[r9+r14*1]
  784. LONG $0x20048d47 // lea r8d,[r8+r12*1]
  785. LONG $0xf26062c4; BYTE $0xe2 // andn r12d,ebx,edx
  786. WORD $0x3141; BYTE $0xfd // xor r13d,edi
  787. LONG $0xf07b63c4; WORD $0x06f3 // rorx r14d,ebx,0x6
  788. LONG $0x20048d47 // lea r8d,[r8+r12*1]
  789. WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
  790. WORD $0x8944; BYTE $0xcf // mov edi,r9d
  791. LONG $0xf07b43c4; WORD $0x16e1 // rorx r12d,r9d,0x16
  792. LONG $0x28048d47 // lea r8d,[r8+r13*1]
  793. WORD $0x3144; BYTE $0xd7 // xor edi,r10d
  794. LONG $0xf07b43c4; WORD $0x0df1 // rorx r14d,r9d,0xd
  795. LONG $0xf07b43c4; WORD $0x02e9 // rorx r13d,r9d,0x2
  796. LONG $0x00048d42 // lea eax,[rax+r8*1]
  797. WORD $0x2141; BYTE $0xff // and r15d,edi
  798. WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
  799. WORD $0x3145; BYTE $0xd7 // xor r15d,r10d
  800. WORD $0x3145; BYTE $0xee // xor r14d,r13d
  801. LONG $0x38048d47 // lea r8d,[r8+r15*1]
  802. WORD $0x8941; BYTE $0xdc // mov r12d,ebx
  803. // ROUND(R8, R9, R10, R11, AX, BX, CX, DX, R12, R13, R14, R15, DI, SP, 0x60)
  804. LONG $0x60245403 // add edx,[rsp+0x60]
  805. WORD $0x2141; BYTE $0xc4 // and r12d,eax
  806. LONG $0xf07b63c4; WORD $0x19e8 // rorx r13d,eax,0x19
  807. LONG $0xf07b63c4; WORD $0x0bf8 // rorx r15d,eax,0xb
  808. LONG $0x30048d47 // lea r8d,[r8+r14*1]
  809. LONG $0x22148d42 // lea edx,[rdx+r12*1]
  810. LONG $0xf27862c4; BYTE $0xe1 // andn r12d,eax,ecx
  811. WORD $0x3145; BYTE $0xfd // xor r13d,r15d
  812. LONG $0xf07b63c4; WORD $0x06f0 // rorx r14d,eax,0x6
  813. LONG $0x22148d42 // lea edx,[rdx+r12*1]
  814. WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
  815. WORD $0x8945; BYTE $0xc7 // mov r15d,r8d
  816. LONG $0xf07b43c4; WORD $0x16e0 // rorx r12d,r8d,0x16
  817. LONG $0x2a148d42 // lea edx,[rdx+r13*1]
  818. WORD $0x3145; BYTE $0xcf // xor r15d,r9d
  819. LONG $0xf07b43c4; WORD $0x0df0 // rorx r14d,r8d,0xd
  820. LONG $0xf07b43c4; WORD $0x02e8 // rorx r13d,r8d,0x2
  821. LONG $0x131c8d45 // lea r11d,[r11+rdx*1]
  822. WORD $0x2144; BYTE $0xff // and edi,r15d
  823. WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
  824. WORD $0x3144; BYTE $0xcf // xor edi,r9d
  825. WORD $0x3145; BYTE $0xee // xor r14d,r13d
  826. WORD $0x148d; BYTE $0x3a // lea edx,[rdx+rdi*1]
  827. WORD $0x8941; BYTE $0xc4 // mov r12d,eax
  828. // ROUND(DX, R8, R9, R10, R11, AX, BX, CX, R12, R13, R14, DI, R15, SP, 0x64)
  829. LONG $0x64244c03 // add ecx,[rsp+0x64]
  830. WORD $0x2145; BYTE $0xdc // and r12d,r11d
  831. LONG $0xf07b43c4; WORD $0x19eb // rorx r13d,r11d,0x19
  832. LONG $0xf07bc3c4; WORD $0x0bfb // rorx edi,r11d,0xb
  833. LONG $0x32148d42 // lea edx,[rdx+r14*1]
  834. LONG $0x210c8d42 // lea ecx,[rcx+r12*1]
  835. LONG $0xf22062c4; BYTE $0xe3 // andn r12d,r11d,ebx
  836. WORD $0x3141; BYTE $0xfd // xor r13d,edi
  837. LONG $0xf07b43c4; WORD $0x06f3 // rorx r14d,r11d,0x6
  838. LONG $0x210c8d42 // lea ecx,[rcx+r12*1]
  839. WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
  840. WORD $0xd789 // mov edi,edx
  841. LONG $0xf07b63c4; WORD $0x16e2 // rorx r12d,edx,0x16
  842. LONG $0x290c8d42 // lea ecx,[rcx+r13*1]
  843. WORD $0x3144; BYTE $0xc7 // xor edi,r8d
  844. LONG $0xf07b63c4; WORD $0x0df2 // rorx r14d,edx,0xd
  845. LONG $0xf07b63c4; WORD $0x02ea // rorx r13d,edx,0x2
  846. LONG $0x0a148d45 // lea r10d,[r10+rcx*1]
  847. WORD $0x2141; BYTE $0xff // and r15d,edi
  848. WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
  849. WORD $0x3145; BYTE $0xc7 // xor r15d,r8d
  850. WORD $0x3145; BYTE $0xee // xor r14d,r13d
  851. LONG $0x390c8d42 // lea ecx,[rcx+r15*1]
  852. WORD $0x8945; BYTE $0xdc // mov r12d,r11d
  853. // ROUND(CX, DX, R8, R9, R10, R11, AX, BX, R12, R13, R14, R15, DI, SP, 0x68)
  854. LONG $0x68245c03 // add ebx,[rsp+0x68]
  855. WORD $0x2145; BYTE $0xd4 // and r12d,r10d
  856. LONG $0xf07b43c4; WORD $0x19ea // rorx r13d,r10d,0x19
  857. LONG $0xf07b43c4; WORD $0x0bfa // rorx r15d,r10d,0xb
  858. LONG $0x310c8d42 // lea ecx,[rcx+r14*1]
  859. LONG $0x231c8d42 // lea ebx,[rbx+r12*1]
  860. LONG $0xf22862c4; BYTE $0xe0 // andn r12d,r10d,eax
  861. WORD $0x3145; BYTE $0xfd // xor r13d,r15d
  862. LONG $0xf07b43c4; WORD $0x06f2 // rorx r14d,r10d,0x6
  863. LONG $0x231c8d42 // lea ebx,[rbx+r12*1]
  864. WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
  865. WORD $0x8941; BYTE $0xcf // mov r15d,ecx
  866. LONG $0xf07b63c4; WORD $0x16e1 // rorx r12d,ecx,0x16
  867. LONG $0x2b1c8d42 // lea ebx,[rbx+r13*1]
  868. WORD $0x3141; BYTE $0xd7 // xor r15d,edx
  869. LONG $0xf07b63c4; WORD $0x0df1 // rorx r14d,ecx,0xd
  870. LONG $0xf07b63c4; WORD $0x02e9 // rorx r13d,ecx,0x2
  871. LONG $0x190c8d45 // lea r9d,[r9+rbx*1]
  872. WORD $0x2144; BYTE $0xff // and edi,r15d
  873. WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
  874. WORD $0xd731 // xor edi,edx
  875. WORD $0x3145; BYTE $0xee // xor r14d,r13d
  876. WORD $0x1c8d; BYTE $0x3b // lea ebx,[rbx+rdi*1]
  877. WORD $0x8945; BYTE $0xd4 // mov r12d,r10d
  878. // ROUND(BX, CX, DX, R8, R9, R10, R11, AX, R12, R13, R14, DI, R15, SP, 0x6c)
  879. LONG $0x6c244403 // add eax,[rsp+0x6c]
  880. WORD $0x2145; BYTE $0xcc // and r12d,r9d
  881. LONG $0xf07b43c4; WORD $0x19e9 // rorx r13d,r9d,0x19
  882. LONG $0xf07bc3c4; WORD $0x0bf9 // rorx edi,r9d,0xb
  883. LONG $0x331c8d42 // lea ebx,[rbx+r14*1]
  884. LONG $0x20048d42 // lea eax,[rax+r12*1]
  885. LONG $0xf23042c4; BYTE $0xe3 // andn r12d,r9d,r11d
  886. WORD $0x3141; BYTE $0xfd // xor r13d,edi
  887. LONG $0xf07b43c4; WORD $0x06f1 // rorx r14d,r9d,0x6
  888. LONG $0x20048d42 // lea eax,[rax+r12*1]
  889. WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
  890. WORD $0xdf89 // mov edi,ebx
  891. LONG $0xf07b63c4; WORD $0x16e3 // rorx r12d,ebx,0x16
  892. LONG $0x28048d42 // lea eax,[rax+r13*1]
  893. WORD $0xcf31 // xor edi,ecx
  894. LONG $0xf07b63c4; WORD $0x0df3 // rorx r14d,ebx,0xd
  895. LONG $0xf07b63c4; WORD $0x02eb // rorx r13d,ebx,0x2
  896. LONG $0x00048d45 // lea r8d,[r8+rax*1]
  897. WORD $0x2141; BYTE $0xff // and r15d,edi
  898. WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
  899. WORD $0x3141; BYTE $0xcf // xor r15d,ecx
  900. WORD $0x3145; BYTE $0xee // xor r14d,r13d
  901. LONG $0x38048d42 // lea eax,[rax+r15*1]
  902. WORD $0x8945; BYTE $0xcc // mov r12d,r9d
  903. // ROUND(AX, BX, CX, DX, R8, R9, R10, R11, R12, R13, R14, R15, DI, SP, 0x00)
  904. LONG $0x241c0344 // add r11d,[rsp]
  905. WORD $0x2145; BYTE $0xc4 // and r12d,r8d
  906. LONG $0xf07b43c4; WORD $0x19e8 // rorx r13d,r8d,0x19
  907. LONG $0xf07b43c4; WORD $0x0bf8 // rorx r15d,r8d,0xb
  908. LONG $0x30048d42 // lea eax,[rax+r14*1]
  909. LONG $0x231c8d47 // lea r11d,[r11+r12*1]
  910. LONG $0xf23842c4; BYTE $0xe2 // andn r12d,r8d,r10d
  911. WORD $0x3145; BYTE $0xfd // xor r13d,r15d
  912. LONG $0xf07b43c4; WORD $0x06f0 // rorx r14d,r8d,0x6
  913. LONG $0x231c8d47 // lea r11d,[r11+r12*1]
  914. WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
  915. WORD $0x8941; BYTE $0xc7 // mov r15d,eax
  916. LONG $0xf07b63c4; WORD $0x16e0 // rorx r12d,eax,0x16
  917. LONG $0x2b1c8d47 // lea r11d,[r11+r13*1]
  918. WORD $0x3141; BYTE $0xdf // xor r15d,ebx
  919. LONG $0xf07b63c4; WORD $0x0df0 // rorx r14d,eax,0xd
  920. LONG $0xf07b63c4; WORD $0x02e8 // rorx r13d,eax,0x2
  921. LONG $0x1a148d42 // lea edx,[rdx+r11*1]
  922. WORD $0x2144; BYTE $0xff // and edi,r15d
  923. WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
  924. WORD $0xdf31 // xor edi,ebx
  925. WORD $0x3145; BYTE $0xee // xor r14d,r13d
  926. LONG $0x3b1c8d45 // lea r11d,[r11+rdi*1]
  927. WORD $0x8945; BYTE $0xc4 // mov r12d,r8d
  928. // ROUND(R11, AX, BX, CX, DX, R8, R9, R10, R12, R13, R14, DI, R15, SP, 0x04)
  929. LONG $0x24540344; BYTE $0x04 // add r10d,[rsp+0x4]
  930. WORD $0x2141; BYTE $0xd4 // and r12d,edx
  931. LONG $0xf07b63c4; WORD $0x19ea // rorx r13d,edx,0x19
  932. LONG $0xf07be3c4; WORD $0x0bfa // rorx edi,edx,0xb
  933. LONG $0x331c8d47 // lea r11d,[r11+r14*1]
  934. LONG $0x22148d47 // lea r10d,[r10+r12*1]
  935. LONG $0xf26842c4; BYTE $0xe1 // andn r12d,edx,r9d
  936. WORD $0x3141; BYTE $0xfd // xor r13d,edi
  937. LONG $0xf07b63c4; WORD $0x06f2 // rorx r14d,edx,0x6
  938. LONG $0x22148d47 // lea r10d,[r10+r12*1]
  939. WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
  940. WORD $0x8944; BYTE $0xdf // mov edi,r11d
  941. LONG $0xf07b43c4; WORD $0x16e3 // rorx r12d,r11d,0x16
  942. LONG $0x2a148d47 // lea r10d,[r10+r13*1]
  943. WORD $0xc731 // xor edi,eax
  944. LONG $0xf07b43c4; WORD $0x0df3 // rorx r14d,r11d,0xd
  945. LONG $0xf07b43c4; WORD $0x02eb // rorx r13d,r11d,0x2
  946. LONG $0x110c8d42 // lea ecx,[rcx+r10*1]
  947. WORD $0x2141; BYTE $0xff // and r15d,edi
  948. WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
  949. WORD $0x3141; BYTE $0xc7 // xor r15d,eax
  950. WORD $0x3145; BYTE $0xee // xor r14d,r13d
  951. LONG $0x3a148d47 // lea r10d,[r10+r15*1]
  952. WORD $0x8941; BYTE $0xd4 // mov r12d,edx
  953. // ROUND(R10, R11, AX, BX, CX, DX, R8, R9, R12, R13, R14, R15, DI, SP, 0x08)
  954. LONG $0x244c0344; BYTE $0x08 // add r9d,[rsp+0x8]
  955. WORD $0x2141; BYTE $0xcc // and r12d,ecx
  956. LONG $0xf07b63c4; WORD $0x19e9 // rorx r13d,ecx,0x19
  957. LONG $0xf07b63c4; WORD $0x0bf9 // rorx r15d,ecx,0xb
  958. LONG $0x32148d47 // lea r10d,[r10+r14*1]
  959. LONG $0x210c8d47 // lea r9d,[r9+r12*1]
  960. LONG $0xf27042c4; BYTE $0xe0 // andn r12d,ecx,r8d
  961. WORD $0x3145; BYTE $0xfd // xor r13d,r15d
  962. LONG $0xf07b63c4; WORD $0x06f1 // rorx r14d,ecx,0x6
  963. LONG $0x210c8d47 // lea r9d,[r9+r12*1]
  964. WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
  965. WORD $0x8945; BYTE $0xd7 // mov r15d,r10d
  966. LONG $0xf07b43c4; WORD $0x16e2 // rorx r12d,r10d,0x16
  967. LONG $0x290c8d47 // lea r9d,[r9+r13*1]
  968. WORD $0x3145; BYTE $0xdf // xor r15d,r11d
  969. LONG $0xf07b43c4; WORD $0x0df2 // rorx r14d,r10d,0xd
  970. LONG $0xf07b43c4; WORD $0x02ea // rorx r13d,r10d,0x2
  971. LONG $0x0b1c8d42 // lea ebx,[rbx+r9*1]
  972. WORD $0x2144; BYTE $0xff // and edi,r15d
  973. WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
  974. WORD $0x3144; BYTE $0xdf // xor edi,r11d
  975. WORD $0x3145; BYTE $0xee // xor r14d,r13d
  976. LONG $0x390c8d45 // lea r9d,[r9+rdi*1]
  977. WORD $0x8941; BYTE $0xcc // mov r12d,ecx
  978. // ROUND(R9, R10, R11, AX, BX, CX, DX, R8, R12, R13, R14, DI, R15, SP, 0x0c)
  979. LONG $0x24440344; BYTE $0x0c // add r8d,[rsp+0xc]
  980. WORD $0x2141; BYTE $0xdc // and r12d,ebx
  981. LONG $0xf07b63c4; WORD $0x19eb // rorx r13d,ebx,0x19
  982. LONG $0xf07be3c4; WORD $0x0bfb // rorx edi,ebx,0xb
  983. LONG $0x310c8d47 // lea r9d,[r9+r14*1]
  984. LONG $0x20048d47 // lea r8d,[r8+r12*1]
  985. LONG $0xf26062c4; BYTE $0xe2 // andn r12d,ebx,edx
  986. WORD $0x3141; BYTE $0xfd // xor r13d,edi
  987. LONG $0xf07b63c4; WORD $0x06f3 // rorx r14d,ebx,0x6
  988. LONG $0x20048d47 // lea r8d,[r8+r12*1]
  989. WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
  990. WORD $0x8944; BYTE $0xcf // mov edi,r9d
  991. LONG $0xf07b43c4; WORD $0x16e1 // rorx r12d,r9d,0x16
  992. LONG $0x28048d47 // lea r8d,[r8+r13*1]
  993. WORD $0x3144; BYTE $0xd7 // xor edi,r10d
  994. LONG $0xf07b43c4; WORD $0x0df1 // rorx r14d,r9d,0xd
  995. LONG $0xf07b43c4; WORD $0x02e9 // rorx r13d,r9d,0x2
  996. LONG $0x00048d42 // lea eax,[rax+r8*1]
  997. WORD $0x2141; BYTE $0xff // and r15d,edi
  998. WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
  999. WORD $0x3145; BYTE $0xd7 // xor r15d,r10d
  1000. WORD $0x3145; BYTE $0xee // xor r14d,r13d
  1001. LONG $0x38048d47 // lea r8d,[r8+r15*1]
  1002. WORD $0x8941; BYTE $0xdc // mov r12d,ebx
  1003. // ROUND(R8, R9, R10, R11, AX, BX, CX, DX, R12, R13, R14, R15, DI, SP, 0x20)
  1004. LONG $0x20245403 // add edx,[rsp+0x20]
  1005. WORD $0x2141; BYTE $0xc4 // and r12d,eax
  1006. LONG $0xf07b63c4; WORD $0x19e8 // rorx r13d,eax,0x19
  1007. LONG $0xf07b63c4; WORD $0x0bf8 // rorx r15d,eax,0xb
  1008. LONG $0x30048d47 // lea r8d,[r8+r14*1]
  1009. LONG $0x22148d42 // lea edx,[rdx+r12*1]
  1010. LONG $0xf27862c4; BYTE $0xe1 // andn r12d,eax,ecx
  1011. WORD $0x3145; BYTE $0xfd // xor r13d,r15d
  1012. LONG $0xf07b63c4; WORD $0x06f0 // rorx r14d,eax,0x6
  1013. LONG $0x22148d42 // lea edx,[rdx+r12*1]
  1014. WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
  1015. WORD $0x8945; BYTE $0xc7 // mov r15d,r8d
  1016. LONG $0xf07b43c4; WORD $0x16e0 // rorx r12d,r8d,0x16
  1017. LONG $0x2a148d42 // lea edx,[rdx+r13*1]
  1018. WORD $0x3145; BYTE $0xcf // xor r15d,r9d
  1019. LONG $0xf07b43c4; WORD $0x0df0 // rorx r14d,r8d,0xd
  1020. LONG $0xf07b43c4; WORD $0x02e8 // rorx r13d,r8d,0x2
  1021. LONG $0x131c8d45 // lea r11d,[r11+rdx*1]
  1022. WORD $0x2144; BYTE $0xff // and edi,r15d
  1023. WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
  1024. WORD $0x3144; BYTE $0xcf // xor edi,r9d
  1025. WORD $0x3145; BYTE $0xee // xor r14d,r13d
  1026. WORD $0x148d; BYTE $0x3a // lea edx,[rdx+rdi*1]
  1027. WORD $0x8941; BYTE $0xc4 // mov r12d,eax
  1028. // ROUND(DX, R8, R9, R10, R11, AX, BX, CX, R12, R13, R14, DI, R15, SP, 0x24)
  1029. LONG $0x24244c03 // add ecx,[rsp+0x24]
  1030. WORD $0x2145; BYTE $0xdc // and r12d,r11d
  1031. LONG $0xf07b43c4; WORD $0x19eb // rorx r13d,r11d,0x19
  1032. LONG $0xf07bc3c4; WORD $0x0bfb // rorx edi,r11d,0xb
  1033. LONG $0x32148d42 // lea edx,[rdx+r14*1]
  1034. LONG $0x210c8d42 // lea ecx,[rcx+r12*1]
  1035. LONG $0xf22062c4; BYTE $0xe3 // andn r12d,r11d,ebx
  1036. WORD $0x3141; BYTE $0xfd // xor r13d,edi
  1037. LONG $0xf07b43c4; WORD $0x06f3 // rorx r14d,r11d,0x6
  1038. LONG $0x210c8d42 // lea ecx,[rcx+r12*1]
  1039. WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
  1040. WORD $0xd789 // mov edi,edx
  1041. LONG $0xf07b63c4; WORD $0x16e2 // rorx r12d,edx,0x16
  1042. LONG $0x290c8d42 // lea ecx,[rcx+r13*1]
  1043. WORD $0x3144; BYTE $0xc7 // xor edi,r8d
  1044. LONG $0xf07b63c4; WORD $0x0df2 // rorx r14d,edx,0xd
  1045. LONG $0xf07b63c4; WORD $0x02ea // rorx r13d,edx,0x2
  1046. LONG $0x0a148d45 // lea r10d,[r10+rcx*1]
  1047. WORD $0x2141; BYTE $0xff // and r15d,edi
  1048. WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
  1049. WORD $0x3145; BYTE $0xc7 // xor r15d,r8d
  1050. WORD $0x3145; BYTE $0xee // xor r14d,r13d
  1051. LONG $0x390c8d42 // lea ecx,[rcx+r15*1]
  1052. WORD $0x8945; BYTE $0xdc // mov r12d,r11d
  1053. // ROUND(CX, DX, R8, R9, R10, R11, AX, BX, R12, R13, R14, R15, DI, SP, 0x28)
  1054. LONG $0x28245c03 // add ebx,[rsp+0x28]
  1055. WORD $0x2145; BYTE $0xd4 // and r12d,r10d
  1056. LONG $0xf07b43c4; WORD $0x19ea // rorx r13d,r10d,0x19
  1057. LONG $0xf07b43c4; WORD $0x0bfa // rorx r15d,r10d,0xb
  1058. LONG $0x310c8d42 // lea ecx,[rcx+r14*1]
  1059. LONG $0x231c8d42 // lea ebx,[rbx+r12*1]
  1060. LONG $0xf22862c4; BYTE $0xe0 // andn r12d,r10d,eax
  1061. WORD $0x3145; BYTE $0xfd // xor r13d,r15d
  1062. LONG $0xf07b43c4; WORD $0x06f2 // rorx r14d,r10d,0x6
  1063. LONG $0x231c8d42 // lea ebx,[rbx+r12*1]
  1064. WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
  1065. WORD $0x8941; BYTE $0xcf // mov r15d,ecx
  1066. LONG $0xf07b63c4; WORD $0x16e1 // rorx r12d,ecx,0x16
  1067. LONG $0x2b1c8d42 // lea ebx,[rbx+r13*1]
  1068. WORD $0x3141; BYTE $0xd7 // xor r15d,edx
  1069. LONG $0xf07b63c4; WORD $0x0df1 // rorx r14d,ecx,0xd
  1070. LONG $0xf07b63c4; WORD $0x02e9 // rorx r13d,ecx,0x2
  1071. LONG $0x190c8d45 // lea r9d,[r9+rbx*1]
  1072. WORD $0x2144; BYTE $0xff // and edi,r15d
  1073. WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
  1074. WORD $0xd731 // xor edi,edx
  1075. WORD $0x3145; BYTE $0xee // xor r14d,r13d
  1076. WORD $0x1c8d; BYTE $0x3b // lea ebx,[rbx+rdi*1]
  1077. WORD $0x8945; BYTE $0xd4 // mov r12d,r10d
  1078. // ROUND(BX, CX, DX, R8, R9, R10, R11, AX, R12, R13, R14, DI, R15, SP, 0x2c)
  1079. LONG $0x2c244403 // add eax,[rsp+0x2c]
  1080. WORD $0x2145; BYTE $0xcc // and r12d,r9d
  1081. LONG $0xf07b43c4; WORD $0x19e9 // rorx r13d,r9d,0x19
  1082. LONG $0xf07bc3c4; WORD $0x0bf9 // rorx edi,r9d,0xb
  1083. LONG $0x331c8d42 // lea ebx,[rbx+r14*1]
  1084. LONG $0x20048d42 // lea eax,[rax+r12*1]
  1085. LONG $0xf23042c4; BYTE $0xe3 // andn r12d,r9d,r11d
  1086. WORD $0x3141; BYTE $0xfd // xor r13d,edi
  1087. LONG $0xf07b43c4; WORD $0x06f1 // rorx r14d,r9d,0x6
  1088. LONG $0x20048d42 // lea eax,[rax+r12*1]
  1089. WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
  1090. WORD $0xdf89 // mov edi,ebx
  1091. LONG $0xf07b63c4; WORD $0x16e3 // rorx r12d,ebx,0x16
  1092. LONG $0x28048d42 // lea eax,[rax+r13*1]
  1093. WORD $0xcf31 // xor edi,ecx
  1094. LONG $0xf07b63c4; WORD $0x0df3 // rorx r14d,ebx,0xd
  1095. LONG $0xf07b63c4; WORD $0x02eb // rorx r13d,ebx,0x2
  1096. LONG $0x00048d45 // lea r8d,[r8+rax*1]
  1097. WORD $0x2141; BYTE $0xff // and r15d,edi
  1098. WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
  1099. WORD $0x3141; BYTE $0xcf // xor r15d,ecx
  1100. WORD $0x3145; BYTE $0xee // xor r14d,r13d
  1101. LONG $0x38048d42 // lea eax,[rax+r15*1]
  1102. WORD $0x8945; BYTE $0xcc // mov r12d,r9d
  1103. MOVQ 0x200(SP), DI // $_ctx
  1104. ADDQ R14, AX
  1105. LEAQ 0x1c0(SP), BP
  1106. ADDL (DI), AX
  1107. ADDL 4(DI), BX
  1108. ADDL 8(DI), CX
  1109. ADDL 12(DI), DX
  1110. ADDL 16(DI), R8
  1111. ADDL 20(DI), R9
  1112. ADDL 24(DI), R10
  1113. ADDL 28(DI), R11
  1114. MOVL AX, (DI)
  1115. MOVL BX, 4(DI)
  1116. MOVL CX, 8(DI)
  1117. MOVL DX, 12(DI)
  1118. MOVL R8, 16(DI)
  1119. MOVL R9, 20(DI)
  1120. MOVL R10, 24(DI)
  1121. MOVL R11, 28(DI)
  1122. CMPQ SI, 0x50(BP) // $_end
  1123. JE done
  1124. XORQ R14, R14
  1125. MOVQ BX, DI
  1126. XORQ CX, DI // magic
  1127. MOVQ R9, R12
  1128. loop2:
  1129. // ROUND(AX, BX, CX, DX, R8, R9, R10, R11, R12, R13, R14, R15, DI, BP, 0x10)
  1130. LONG $0x105d0344 // add r11d,[rbp+0x10]
  1131. WORD $0x2145; BYTE $0xc4 // and r12d,r8d
  1132. LONG $0xf07b43c4; WORD $0x19e8 // rorx r13d,r8d,0x19
  1133. LONG $0xf07b43c4; WORD $0x0bf8 // rorx r15d,r8d,0xb
  1134. LONG $0x30048d42 // lea eax,[rax+r14*1]
  1135. LONG $0x231c8d47 // lea r11d,[r11+r12*1]
  1136. LONG $0xf23842c4; BYTE $0xe2 // andn r12d,r8d,r10d
  1137. WORD $0x3145; BYTE $0xfd // xor r13d,r15d
  1138. LONG $0xf07b43c4; WORD $0x06f0 // rorx r14d,r8d,0x6
  1139. LONG $0x231c8d47 // lea r11d,[r11+r12*1]
  1140. WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
  1141. WORD $0x8941; BYTE $0xc7 // mov r15d,eax
  1142. LONG $0xf07b63c4; WORD $0x16e0 // rorx r12d,eax,0x16
  1143. LONG $0x2b1c8d47 // lea r11d,[r11+r13*1]
  1144. WORD $0x3141; BYTE $0xdf // xor r15d,ebx
  1145. LONG $0xf07b63c4; WORD $0x0df0 // rorx r14d,eax,0xd
  1146. LONG $0xf07b63c4; WORD $0x02e8 // rorx r13d,eax,0x2
  1147. LONG $0x1a148d42 // lea edx,[rdx+r11*1]
  1148. WORD $0x2144; BYTE $0xff // and edi,r15d
  1149. WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
  1150. WORD $0xdf31 // xor edi,ebx
  1151. WORD $0x3145; BYTE $0xee // xor r14d,r13d
  1152. LONG $0x3b1c8d45 // lea r11d,[r11+rdi*1]
  1153. WORD $0x8945; BYTE $0xc4 // mov r12d,r8d
  1154. // ROUND(R11, AX, BX, CX, DX, R8, R9, R10, R12, R13, R14, DI, R15, BP, 0x14)
  1155. LONG $0x14550344 // add r10d,[rbp+0x14]
  1156. WORD $0x2141; BYTE $0xd4 // and r12d,edx
  1157. LONG $0xf07b63c4; WORD $0x19ea // rorx r13d,edx,0x19
  1158. LONG $0xf07be3c4; WORD $0x0bfa // rorx edi,edx,0xb
  1159. LONG $0x331c8d47 // lea r11d,[r11+r14*1]
  1160. LONG $0x22148d47 // lea r10d,[r10+r12*1]
  1161. LONG $0xf26842c4; BYTE $0xe1 // andn r12d,edx,r9d
  1162. WORD $0x3141; BYTE $0xfd // xor r13d,edi
  1163. LONG $0xf07b63c4; WORD $0x06f2 // rorx r14d,edx,0x6
  1164. LONG $0x22148d47 // lea r10d,[r10+r12*1]
  1165. WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
  1166. WORD $0x8944; BYTE $0xdf // mov edi,r11d
  1167. LONG $0xf07b43c4; WORD $0x16e3 // rorx r12d,r11d,0x16
  1168. LONG $0x2a148d47 // lea r10d,[r10+r13*1]
  1169. WORD $0xc731 // xor edi,eax
  1170. LONG $0xf07b43c4; WORD $0x0df3 // rorx r14d,r11d,0xd
  1171. LONG $0xf07b43c4; WORD $0x02eb // rorx r13d,r11d,0x2
  1172. LONG $0x110c8d42 // lea ecx,[rcx+r10*1]
  1173. WORD $0x2141; BYTE $0xff // and r15d,edi
  1174. WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
  1175. WORD $0x3141; BYTE $0xc7 // xor r15d,eax
  1176. WORD $0x3145; BYTE $0xee // xor r14d,r13d
  1177. LONG $0x3a148d47 // lea r10d,[r10+r15*1]
  1178. WORD $0x8941; BYTE $0xd4 // mov r12d,edx
  1179. // ROUND(R10, R11, AX, BX, CX, DX, R8, R9, R12, R13, R14, R15, DI, BP, 0x18)
  1180. LONG $0x184d0344 // add r9d,[rbp+0x18]
  1181. WORD $0x2141; BYTE $0xcc // and r12d,ecx
  1182. LONG $0xf07b63c4; WORD $0x19e9 // rorx r13d,ecx,0x19
  1183. LONG $0xf07b63c4; WORD $0x0bf9 // rorx r15d,ecx,0xb
  1184. LONG $0x32148d47 // lea r10d,[r10+r14*1]
  1185. LONG $0x210c8d47 // lea r9d,[r9+r12*1]
  1186. LONG $0xf27042c4; BYTE $0xe0 // andn r12d,ecx,r8d
  1187. WORD $0x3145; BYTE $0xfd // xor r13d,r15d
  1188. LONG $0xf07b63c4; WORD $0x06f1 // rorx r14d,ecx,0x6
  1189. LONG $0x210c8d47 // lea r9d,[r9+r12*1]
  1190. WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
  1191. WORD $0x8945; BYTE $0xd7 // mov r15d,r10d
  1192. LONG $0xf07b43c4; WORD $0x16e2 // rorx r12d,r10d,0x16
  1193. LONG $0x290c8d47 // lea r9d,[r9+r13*1]
  1194. WORD $0x3145; BYTE $0xdf // xor r15d,r11d
  1195. LONG $0xf07b43c4; WORD $0x0df2 // rorx r14d,r10d,0xd
  1196. LONG $0xf07b43c4; WORD $0x02ea // rorx r13d,r10d,0x2
  1197. LONG $0x0b1c8d42 // lea ebx,[rbx+r9*1]
  1198. WORD $0x2144; BYTE $0xff // and edi,r15d
  1199. WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
  1200. WORD $0x3144; BYTE $0xdf // xor edi,r11d
  1201. WORD $0x3145; BYTE $0xee // xor r14d,r13d
  1202. LONG $0x390c8d45 // lea r9d,[r9+rdi*1]
  1203. WORD $0x8941; BYTE $0xcc // mov r12d,ecx
  1204. // ROUND(R9, R10, R11, AX, BX, CX, DX, R8, R12, R13, R14, DI, R15, BP, 0x1c)
  1205. LONG $0x1c450344 // add r8d,[rbp+0x1c]
  1206. WORD $0x2141; BYTE $0xdc // and r12d,ebx
  1207. LONG $0xf07b63c4; WORD $0x19eb // rorx r13d,ebx,0x19
  1208. LONG $0xf07be3c4; WORD $0x0bfb // rorx edi,ebx,0xb
  1209. LONG $0x310c8d47 // lea r9d,[r9+r14*1]
  1210. LONG $0x20048d47 // lea r8d,[r8+r12*1]
  1211. LONG $0xf26062c4; BYTE $0xe2 // andn r12d,ebx,edx
  1212. WORD $0x3141; BYTE $0xfd // xor r13d,edi
  1213. LONG $0xf07b63c4; WORD $0x06f3 // rorx r14d,ebx,0x6
  1214. LONG $0x20048d47 // lea r8d,[r8+r12*1]
  1215. WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
  1216. WORD $0x8944; BYTE $0xcf // mov edi,r9d
  1217. LONG $0xf07b43c4; WORD $0x16e1 // rorx r12d,r9d,0x16
  1218. LONG $0x28048d47 // lea r8d,[r8+r13*1]
  1219. WORD $0x3144; BYTE $0xd7 // xor edi,r10d
  1220. LONG $0xf07b43c4; WORD $0x0df1 // rorx r14d,r9d,0xd
  1221. LONG $0xf07b43c4; WORD $0x02e9 // rorx r13d,r9d,0x2
  1222. LONG $0x00048d42 // lea eax,[rax+r8*1]
  1223. WORD $0x2141; BYTE $0xff // and r15d,edi
  1224. WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
  1225. WORD $0x3145; BYTE $0xd7 // xor r15d,r10d
  1226. WORD $0x3145; BYTE $0xee // xor r14d,r13d
  1227. LONG $0x38048d47 // lea r8d,[r8+r15*1]
  1228. WORD $0x8941; BYTE $0xdc // mov r12d,ebx
  1229. // ROUND(R8, R9, R10, R11, AX, BX, CX, DX, R12, R13, R14, R15, DI, BP, 0x30)
  1230. WORD $0x5503; BYTE $0x30 // add edx,[rbp+0x30]
  1231. WORD $0x2141; BYTE $0xc4 // and r12d,eax
  1232. LONG $0xf07b63c4; WORD $0x19e8 // rorx r13d,eax,0x19
  1233. LONG $0xf07b63c4; WORD $0x0bf8 // rorx r15d,eax,0xb
  1234. LONG $0x30048d47 // lea r8d,[r8+r14*1]
  1235. LONG $0x22148d42 // lea edx,[rdx+r12*1]
  1236. LONG $0xf27862c4; BYTE $0xe1 // andn r12d,eax,ecx
  1237. WORD $0x3145; BYTE $0xfd // xor r13d,r15d
  1238. LONG $0xf07b63c4; WORD $0x06f0 // rorx r14d,eax,0x6
  1239. LONG $0x22148d42 // lea edx,[rdx+r12*1]
  1240. WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
  1241. WORD $0x8945; BYTE $0xc7 // mov r15d,r8d
  1242. LONG $0xf07b43c4; WORD $0x16e0 // rorx r12d,r8d,0x16
  1243. LONG $0x2a148d42 // lea edx,[rdx+r13*1]
  1244. WORD $0x3145; BYTE $0xcf // xor r15d,r9d
  1245. LONG $0xf07b43c4; WORD $0x0df0 // rorx r14d,r8d,0xd
  1246. LONG $0xf07b43c4; WORD $0x02e8 // rorx r13d,r8d,0x2
  1247. LONG $0x131c8d45 // lea r11d,[r11+rdx*1]
  1248. WORD $0x2144; BYTE $0xff // and edi,r15d
  1249. WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
  1250. WORD $0x3144; BYTE $0xcf // xor edi,r9d
  1251. WORD $0x3145; BYTE $0xee // xor r14d,r13d
  1252. WORD $0x148d; BYTE $0x3a // lea edx,[rdx+rdi*1]
  1253. WORD $0x8941; BYTE $0xc4 // mov r12d,eax
  1254. // ROUND(DX, R8, R9, R10, R11, AX, BX, CX, R12, R13, R14, DI, R15, BP, 0x34)
  1255. WORD $0x4d03; BYTE $0x34 // add ecx,[rbp+0x34]
  1256. WORD $0x2145; BYTE $0xdc // and r12d,r11d
  1257. LONG $0xf07b43c4; WORD $0x19eb // rorx r13d,r11d,0x19
  1258. LONG $0xf07bc3c4; WORD $0x0bfb // rorx edi,r11d,0xb
  1259. LONG $0x32148d42 // lea edx,[rdx+r14*1]
  1260. LONG $0x210c8d42 // lea ecx,[rcx+r12*1]
  1261. LONG $0xf22062c4; BYTE $0xe3 // andn r12d,r11d,ebx
  1262. WORD $0x3141; BYTE $0xfd // xor r13d,edi
  1263. LONG $0xf07b43c4; WORD $0x06f3 // rorx r14d,r11d,0x6
  1264. LONG $0x210c8d42 // lea ecx,[rcx+r12*1]
  1265. WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
  1266. WORD $0xd789 // mov edi,edx
  1267. LONG $0xf07b63c4; WORD $0x16e2 // rorx r12d,edx,0x16
  1268. LONG $0x290c8d42 // lea ecx,[rcx+r13*1]
  1269. WORD $0x3144; BYTE $0xc7 // xor edi,r8d
  1270. LONG $0xf07b63c4; WORD $0x0df2 // rorx r14d,edx,0xd
  1271. LONG $0xf07b63c4; WORD $0x02ea // rorx r13d,edx,0x2
  1272. LONG $0x0a148d45 // lea r10d,[r10+rcx*1]
  1273. WORD $0x2141; BYTE $0xff // and r15d,edi
  1274. WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
  1275. WORD $0x3145; BYTE $0xc7 // xor r15d,r8d
  1276. WORD $0x3145; BYTE $0xee // xor r14d,r13d
  1277. LONG $0x390c8d42 // lea ecx,[rcx+r15*1]
  1278. WORD $0x8945; BYTE $0xdc // mov r12d,r11d
  1279. // ROUND(CX, DX, R8, R9, R10, R11, AX, BX, R12, R13, R14, R15, DI, BP, 0x38)
  1280. WORD $0x5d03; BYTE $0x38 // add ebx,[rbp+0x38]
  1281. WORD $0x2145; BYTE $0xd4 // and r12d,r10d
  1282. LONG $0xf07b43c4; WORD $0x19ea // rorx r13d,r10d,0x19
  1283. LONG $0xf07b43c4; WORD $0x0bfa // rorx r15d,r10d,0xb
  1284. LONG $0x310c8d42 // lea ecx,[rcx+r14*1]
  1285. LONG $0x231c8d42 // lea ebx,[rbx+r12*1]
  1286. LONG $0xf22862c4; BYTE $0xe0 // andn r12d,r10d,eax
  1287. WORD $0x3145; BYTE $0xfd // xor r13d,r15d
  1288. LONG $0xf07b43c4; WORD $0x06f2 // rorx r14d,r10d,0x6
  1289. LONG $0x231c8d42 // lea ebx,[rbx+r12*1]
  1290. WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
  1291. WORD $0x8941; BYTE $0xcf // mov r15d,ecx
  1292. LONG $0xf07b63c4; WORD $0x16e1 // rorx r12d,ecx,0x16
  1293. LONG $0x2b1c8d42 // lea ebx,[rbx+r13*1]
  1294. WORD $0x3141; BYTE $0xd7 // xor r15d,edx
  1295. LONG $0xf07b63c4; WORD $0x0df1 // rorx r14d,ecx,0xd
  1296. LONG $0xf07b63c4; WORD $0x02e9 // rorx r13d,ecx,0x2
  1297. LONG $0x190c8d45 // lea r9d,[r9+rbx*1]
  1298. WORD $0x2144; BYTE $0xff // and edi,r15d
  1299. WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
  1300. WORD $0xd731 // xor edi,edx
  1301. WORD $0x3145; BYTE $0xee // xor r14d,r13d
  1302. WORD $0x1c8d; BYTE $0x3b // lea ebx,[rbx+rdi*1]
  1303. WORD $0x8945; BYTE $0xd4 // mov r12d,r10d
  1304. // ROUND(BX, CX, DX, R8, R9, R10, R11, AX, R12, R13, R14, DI, R15, BP, 0x3c)
  1305. WORD $0x4503; BYTE $0x3c // add eax,[rbp+0x3c]
  1306. WORD $0x2145; BYTE $0xcc // and r12d,r9d
  1307. LONG $0xf07b43c4; WORD $0x19e9 // rorx r13d,r9d,0x19
  1308. LONG $0xf07bc3c4; WORD $0x0bf9 // rorx edi,r9d,0xb
  1309. LONG $0x331c8d42 // lea ebx,[rbx+r14*1]
  1310. LONG $0x20048d42 // lea eax,[rax+r12*1]
  1311. LONG $0xf23042c4; BYTE $0xe3 // andn r12d,r9d,r11d
  1312. WORD $0x3141; BYTE $0xfd // xor r13d,edi
  1313. LONG $0xf07b43c4; WORD $0x06f1 // rorx r14d,r9d,0x6
  1314. LONG $0x20048d42 // lea eax,[rax+r12*1]
  1315. WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
  1316. WORD $0xdf89 // mov edi,ebx
  1317. LONG $0xf07b63c4; WORD $0x16e3 // rorx r12d,ebx,0x16
  1318. LONG $0x28048d42 // lea eax,[rax+r13*1]
  1319. WORD $0xcf31 // xor edi,ecx
  1320. LONG $0xf07b63c4; WORD $0x0df3 // rorx r14d,ebx,0xd
  1321. LONG $0xf07b63c4; WORD $0x02eb // rorx r13d,ebx,0x2
  1322. LONG $0x00048d45 // lea r8d,[r8+rax*1]
  1323. WORD $0x2141; BYTE $0xff // and r15d,edi
  1324. WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
  1325. WORD $0x3141; BYTE $0xcf // xor r15d,ecx
  1326. WORD $0x3145; BYTE $0xee // xor r14d,r13d
  1327. LONG $0x38048d42 // lea eax,[rax+r15*1]
  1328. WORD $0x8945; BYTE $0xcc // mov r12d,r9d
  1329. ADDQ $-0x40, BP
  1330. CMPQ BP, SP
  1331. JAE loop2
  1332. MOVQ 0x200(SP), DI // $_ctx
  1333. ADDQ R14, AX
  1334. ADDQ $0x1c0, SP
  1335. ADDL (DI), AX
  1336. ADDL 4(DI), BX
  1337. ADDL 8(DI), CX
  1338. ADDL 12(DI), DX
  1339. ADDL 16(DI), R8
  1340. ADDL 20(DI), R9
  1341. ADDQ $0x80, SI // input += 2
  1342. ADDL 24(DI), R10
  1343. MOVQ SI, R12
  1344. ADDL 28(DI), R11
  1345. CMPQ SI, 0x50(SP) // input == _end
  1346. MOVL AX, (DI)
  1347. LONG $0xe4440f4c // cmove r12,rsp /* next block or stale data */
  1348. MOVL AX, (DI)
  1349. MOVL BX, 4(DI)
  1350. MOVL CX, 8(DI)
  1351. MOVL DX, 12(DI)
  1352. MOVL R8, 16(DI)
  1353. MOVL R9, 20(DI)
  1354. MOVL R10, 24(DI)
  1355. MOVL R11, 28(DI)
  1356. JBE loop0
  1357. LEAQ (SP), BP
  1358. done:
  1359. MOVQ BP, SP
  1360. MOVQ 0x58(SP), SP
  1361. WORD $0xf8c5; BYTE $0x77 // vzeroupper
  1362. RET