co_586.asm 21 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253
  1. %ifidn __OUTPUT_FORMAT__,obj
  2. section code use32 class=code align=256
  3. %elifidn __OUTPUT_FORMAT__,win32
  4. [email protected] equ 1
  5. section .text code align=256
  6. %else
  7. section .text code
  8. %endif
  9. global _bn_mul_comba8
  10. align 16
  11. _bn_mul_comba8:
  12. L$_bn_mul_comba8_begin:
  13. push esi
  14. mov esi,DWORD [12+esp]
  15. push edi
  16. mov edi,DWORD [20+esp]
  17. push ebp
  18. push ebx
  19. xor ebx,ebx
  20. mov eax,DWORD [esi]
  21. xor ecx,ecx
  22. mov edx,DWORD [edi]
  23. ; ################## Calculate word 0
  24. xor ebp,ebp
  25. ; mul a[0]*b[0]
  26. mul edx
  27. add ebx,eax
  28. mov eax,DWORD [20+esp]
  29. adc ecx,edx
  30. mov edx,DWORD [edi]
  31. adc ebp,0
  32. mov DWORD [eax],ebx
  33. mov eax,DWORD [4+esi]
  34. ; saved r[0]
  35. ; ################## Calculate word 1
  36. xor ebx,ebx
  37. ; mul a[1]*b[0]
  38. mul edx
  39. add ecx,eax
  40. mov eax,DWORD [esi]
  41. adc ebp,edx
  42. mov edx,DWORD [4+edi]
  43. adc ebx,0
  44. ; mul a[0]*b[1]
  45. mul edx
  46. add ecx,eax
  47. mov eax,DWORD [20+esp]
  48. adc ebp,edx
  49. mov edx,DWORD [edi]
  50. adc ebx,0
  51. mov DWORD [4+eax],ecx
  52. mov eax,DWORD [8+esi]
  53. ; saved r[1]
  54. ; ################## Calculate word 2
  55. xor ecx,ecx
  56. ; mul a[2]*b[0]
  57. mul edx
  58. add ebp,eax
  59. mov eax,DWORD [4+esi]
  60. adc ebx,edx
  61. mov edx,DWORD [4+edi]
  62. adc ecx,0
  63. ; mul a[1]*b[1]
  64. mul edx
  65. add ebp,eax
  66. mov eax,DWORD [esi]
  67. adc ebx,edx
  68. mov edx,DWORD [8+edi]
  69. adc ecx,0
  70. ; mul a[0]*b[2]
  71. mul edx
  72. add ebp,eax
  73. mov eax,DWORD [20+esp]
  74. adc ebx,edx
  75. mov edx,DWORD [edi]
  76. adc ecx,0
  77. mov DWORD [8+eax],ebp
  78. mov eax,DWORD [12+esi]
  79. ; saved r[2]
  80. ; ################## Calculate word 3
  81. xor ebp,ebp
  82. ; mul a[3]*b[0]
  83. mul edx
  84. add ebx,eax
  85. mov eax,DWORD [8+esi]
  86. adc ecx,edx
  87. mov edx,DWORD [4+edi]
  88. adc ebp,0
  89. ; mul a[2]*b[1]
  90. mul edx
  91. add ebx,eax
  92. mov eax,DWORD [4+esi]
  93. adc ecx,edx
  94. mov edx,DWORD [8+edi]
  95. adc ebp,0
  96. ; mul a[1]*b[2]
  97. mul edx
  98. add ebx,eax
  99. mov eax,DWORD [esi]
  100. adc ecx,edx
  101. mov edx,DWORD [12+edi]
  102. adc ebp,0
  103. ; mul a[0]*b[3]
  104. mul edx
  105. add ebx,eax
  106. mov eax,DWORD [20+esp]
  107. adc ecx,edx
  108. mov edx,DWORD [edi]
  109. adc ebp,0
  110. mov DWORD [12+eax],ebx
  111. mov eax,DWORD [16+esi]
  112. ; saved r[3]
  113. ; ################## Calculate word 4
  114. xor ebx,ebx
  115. ; mul a[4]*b[0]
  116. mul edx
  117. add ecx,eax
  118. mov eax,DWORD [12+esi]
  119. adc ebp,edx
  120. mov edx,DWORD [4+edi]
  121. adc ebx,0
  122. ; mul a[3]*b[1]
  123. mul edx
  124. add ecx,eax
  125. mov eax,DWORD [8+esi]
  126. adc ebp,edx
  127. mov edx,DWORD [8+edi]
  128. adc ebx,0
  129. ; mul a[2]*b[2]
  130. mul edx
  131. add ecx,eax
  132. mov eax,DWORD [4+esi]
  133. adc ebp,edx
  134. mov edx,DWORD [12+edi]
  135. adc ebx,0
  136. ; mul a[1]*b[3]
  137. mul edx
  138. add ecx,eax
  139. mov eax,DWORD [esi]
  140. adc ebp,edx
  141. mov edx,DWORD [16+edi]
  142. adc ebx,0
  143. ; mul a[0]*b[4]
  144. mul edx
  145. add ecx,eax
  146. mov eax,DWORD [20+esp]
  147. adc ebp,edx
  148. mov edx,DWORD [edi]
  149. adc ebx,0
  150. mov DWORD [16+eax],ecx
  151. mov eax,DWORD [20+esi]
  152. ; saved r[4]
  153. ; ################## Calculate word 5
  154. xor ecx,ecx
  155. ; mul a[5]*b[0]
  156. mul edx
  157. add ebp,eax
  158. mov eax,DWORD [16+esi]
  159. adc ebx,edx
  160. mov edx,DWORD [4+edi]
  161. adc ecx,0
  162. ; mul a[4]*b[1]
  163. mul edx
  164. add ebp,eax
  165. mov eax,DWORD [12+esi]
  166. adc ebx,edx
  167. mov edx,DWORD [8+edi]
  168. adc ecx,0
  169. ; mul a[3]*b[2]
  170. mul edx
  171. add ebp,eax
  172. mov eax,DWORD [8+esi]
  173. adc ebx,edx
  174. mov edx,DWORD [12+edi]
  175. adc ecx,0
  176. ; mul a[2]*b[3]
  177. mul edx
  178. add ebp,eax
  179. mov eax,DWORD [4+esi]
  180. adc ebx,edx
  181. mov edx,DWORD [16+edi]
  182. adc ecx,0
  183. ; mul a[1]*b[4]
  184. mul edx
  185. add ebp,eax
  186. mov eax,DWORD [esi]
  187. adc ebx,edx
  188. mov edx,DWORD [20+edi]
  189. adc ecx,0
  190. ; mul a[0]*b[5]
  191. mul edx
  192. add ebp,eax
  193. mov eax,DWORD [20+esp]
  194. adc ebx,edx
  195. mov edx,DWORD [edi]
  196. adc ecx,0
  197. mov DWORD [20+eax],ebp
  198. mov eax,DWORD [24+esi]
  199. ; saved r[5]
  200. ; ################## Calculate word 6
  201. xor ebp,ebp
  202. ; mul a[6]*b[0]
  203. mul edx
  204. add ebx,eax
  205. mov eax,DWORD [20+esi]
  206. adc ecx,edx
  207. mov edx,DWORD [4+edi]
  208. adc ebp,0
  209. ; mul a[5]*b[1]
  210. mul edx
  211. add ebx,eax
  212. mov eax,DWORD [16+esi]
  213. adc ecx,edx
  214. mov edx,DWORD [8+edi]
  215. adc ebp,0
  216. ; mul a[4]*b[2]
  217. mul edx
  218. add ebx,eax
  219. mov eax,DWORD [12+esi]
  220. adc ecx,edx
  221. mov edx,DWORD [12+edi]
  222. adc ebp,0
  223. ; mul a[3]*b[3]
  224. mul edx
  225. add ebx,eax
  226. mov eax,DWORD [8+esi]
  227. adc ecx,edx
  228. mov edx,DWORD [16+edi]
  229. adc ebp,0
  230. ; mul a[2]*b[4]
  231. mul edx
  232. add ebx,eax
  233. mov eax,DWORD [4+esi]
  234. adc ecx,edx
  235. mov edx,DWORD [20+edi]
  236. adc ebp,0
  237. ; mul a[1]*b[5]
  238. mul edx
  239. add ebx,eax
  240. mov eax,DWORD [esi]
  241. adc ecx,edx
  242. mov edx,DWORD [24+edi]
  243. adc ebp,0
  244. ; mul a[0]*b[6]
  245. mul edx
  246. add ebx,eax
  247. mov eax,DWORD [20+esp]
  248. adc ecx,edx
  249. mov edx,DWORD [edi]
  250. adc ebp,0
  251. mov DWORD [24+eax],ebx
  252. mov eax,DWORD [28+esi]
  253. ; saved r[6]
  254. ; ################## Calculate word 7
  255. xor ebx,ebx
  256. ; mul a[7]*b[0]
  257. mul edx
  258. add ecx,eax
  259. mov eax,DWORD [24+esi]
  260. adc ebp,edx
  261. mov edx,DWORD [4+edi]
  262. adc ebx,0
  263. ; mul a[6]*b[1]
  264. mul edx
  265. add ecx,eax
  266. mov eax,DWORD [20+esi]
  267. adc ebp,edx
  268. mov edx,DWORD [8+edi]
  269. adc ebx,0
  270. ; mul a[5]*b[2]
  271. mul edx
  272. add ecx,eax
  273. mov eax,DWORD [16+esi]
  274. adc ebp,edx
  275. mov edx,DWORD [12+edi]
  276. adc ebx,0
  277. ; mul a[4]*b[3]
  278. mul edx
  279. add ecx,eax
  280. mov eax,DWORD [12+esi]
  281. adc ebp,edx
  282. mov edx,DWORD [16+edi]
  283. adc ebx,0
  284. ; mul a[3]*b[4]
  285. mul edx
  286. add ecx,eax
  287. mov eax,DWORD [8+esi]
  288. adc ebp,edx
  289. mov edx,DWORD [20+edi]
  290. adc ebx,0
  291. ; mul a[2]*b[5]
  292. mul edx
  293. add ecx,eax
  294. mov eax,DWORD [4+esi]
  295. adc ebp,edx
  296. mov edx,DWORD [24+edi]
  297. adc ebx,0
  298. ; mul a[1]*b[6]
  299. mul edx
  300. add ecx,eax
  301. mov eax,DWORD [esi]
  302. adc ebp,edx
  303. mov edx,DWORD [28+edi]
  304. adc ebx,0
  305. ; mul a[0]*b[7]
  306. mul edx
  307. add ecx,eax
  308. mov eax,DWORD [20+esp]
  309. adc ebp,edx
  310. mov edx,DWORD [4+edi]
  311. adc ebx,0
  312. mov DWORD [28+eax],ecx
  313. mov eax,DWORD [28+esi]
  314. ; saved r[7]
  315. ; ################## Calculate word 8
  316. xor ecx,ecx
  317. ; mul a[7]*b[1]
  318. mul edx
  319. add ebp,eax
  320. mov eax,DWORD [24+esi]
  321. adc ebx,edx
  322. mov edx,DWORD [8+edi]
  323. adc ecx,0
  324. ; mul a[6]*b[2]
  325. mul edx
  326. add ebp,eax
  327. mov eax,DWORD [20+esi]
  328. adc ebx,edx
  329. mov edx,DWORD [12+edi]
  330. adc ecx,0
  331. ; mul a[5]*b[3]
  332. mul edx
  333. add ebp,eax
  334. mov eax,DWORD [16+esi]
  335. adc ebx,edx
  336. mov edx,DWORD [16+edi]
  337. adc ecx,0
  338. ; mul a[4]*b[4]
  339. mul edx
  340. add ebp,eax
  341. mov eax,DWORD [12+esi]
  342. adc ebx,edx
  343. mov edx,DWORD [20+edi]
  344. adc ecx,0
  345. ; mul a[3]*b[5]
  346. mul edx
  347. add ebp,eax
  348. mov eax,DWORD [8+esi]
  349. adc ebx,edx
  350. mov edx,DWORD [24+edi]
  351. adc ecx,0
  352. ; mul a[2]*b[6]
  353. mul edx
  354. add ebp,eax
  355. mov eax,DWORD [4+esi]
  356. adc ebx,edx
  357. mov edx,DWORD [28+edi]
  358. adc ecx,0
  359. ; mul a[1]*b[7]
  360. mul edx
  361. add ebp,eax
  362. mov eax,DWORD [20+esp]
  363. adc ebx,edx
  364. mov edx,DWORD [8+edi]
  365. adc ecx,0
  366. mov DWORD [32+eax],ebp
  367. mov eax,DWORD [28+esi]
  368. ; saved r[8]
  369. ; ################## Calculate word 9
  370. xor ebp,ebp
  371. ; mul a[7]*b[2]
  372. mul edx
  373. add ebx,eax
  374. mov eax,DWORD [24+esi]
  375. adc ecx,edx
  376. mov edx,DWORD [12+edi]
  377. adc ebp,0
  378. ; mul a[6]*b[3]
  379. mul edx
  380. add ebx,eax
  381. mov eax,DWORD [20+esi]
  382. adc ecx,edx
  383. mov edx,DWORD [16+edi]
  384. adc ebp,0
  385. ; mul a[5]*b[4]
  386. mul edx
  387. add ebx,eax
  388. mov eax,DWORD [16+esi]
  389. adc ecx,edx
  390. mov edx,DWORD [20+edi]
  391. adc ebp,0
  392. ; mul a[4]*b[5]
  393. mul edx
  394. add ebx,eax
  395. mov eax,DWORD [12+esi]
  396. adc ecx,edx
  397. mov edx,DWORD [24+edi]
  398. adc ebp,0
  399. ; mul a[3]*b[6]
  400. mul edx
  401. add ebx,eax
  402. mov eax,DWORD [8+esi]
  403. adc ecx,edx
  404. mov edx,DWORD [28+edi]
  405. adc ebp,0
  406. ; mul a[2]*b[7]
  407. mul edx
  408. add ebx,eax
  409. mov eax,DWORD [20+esp]
  410. adc ecx,edx
  411. mov edx,DWORD [12+edi]
  412. adc ebp,0
  413. mov DWORD [36+eax],ebx
  414. mov eax,DWORD [28+esi]
  415. ; saved r[9]
  416. ; ################## Calculate word 10
  417. xor ebx,ebx
  418. ; mul a[7]*b[3]
  419. mul edx
  420. add ecx,eax
  421. mov eax,DWORD [24+esi]
  422. adc ebp,edx
  423. mov edx,DWORD [16+edi]
  424. adc ebx,0
  425. ; mul a[6]*b[4]
  426. mul edx
  427. add ecx,eax
  428. mov eax,DWORD [20+esi]
  429. adc ebp,edx
  430. mov edx,DWORD [20+edi]
  431. adc ebx,0
  432. ; mul a[5]*b[5]
  433. mul edx
  434. add ecx,eax
  435. mov eax,DWORD [16+esi]
  436. adc ebp,edx
  437. mov edx,DWORD [24+edi]
  438. adc ebx,0
  439. ; mul a[4]*b[6]
  440. mul edx
  441. add ecx,eax
  442. mov eax,DWORD [12+esi]
  443. adc ebp,edx
  444. mov edx,DWORD [28+edi]
  445. adc ebx,0
  446. ; mul a[3]*b[7]
  447. mul edx
  448. add ecx,eax
  449. mov eax,DWORD [20+esp]
  450. adc ebp,edx
  451. mov edx,DWORD [16+edi]
  452. adc ebx,0
  453. mov DWORD [40+eax],ecx
  454. mov eax,DWORD [28+esi]
  455. ; saved r[10]
  456. ; ################## Calculate word 11
  457. xor ecx,ecx
  458. ; mul a[7]*b[4]
  459. mul edx
  460. add ebp,eax
  461. mov eax,DWORD [24+esi]
  462. adc ebx,edx
  463. mov edx,DWORD [20+edi]
  464. adc ecx,0
  465. ; mul a[6]*b[5]
  466. mul edx
  467. add ebp,eax
  468. mov eax,DWORD [20+esi]
  469. adc ebx,edx
  470. mov edx,DWORD [24+edi]
  471. adc ecx,0
  472. ; mul a[5]*b[6]
  473. mul edx
  474. add ebp,eax
  475. mov eax,DWORD [16+esi]
  476. adc ebx,edx
  477. mov edx,DWORD [28+edi]
  478. adc ecx,0
  479. ; mul a[4]*b[7]
  480. mul edx
  481. add ebp,eax
  482. mov eax,DWORD [20+esp]
  483. adc ebx,edx
  484. mov edx,DWORD [20+edi]
  485. adc ecx,0
  486. mov DWORD [44+eax],ebp
  487. mov eax,DWORD [28+esi]
  488. ; saved r[11]
  489. ; ################## Calculate word 12
  490. xor ebp,ebp
  491. ; mul a[7]*b[5]
  492. mul edx
  493. add ebx,eax
  494. mov eax,DWORD [24+esi]
  495. adc ecx,edx
  496. mov edx,DWORD [24+edi]
  497. adc ebp,0
  498. ; mul a[6]*b[6]
  499. mul edx
  500. add ebx,eax
  501. mov eax,DWORD [20+esi]
  502. adc ecx,edx
  503. mov edx,DWORD [28+edi]
  504. adc ebp,0
  505. ; mul a[5]*b[7]
  506. mul edx
  507. add ebx,eax
  508. mov eax,DWORD [20+esp]
  509. adc ecx,edx
  510. mov edx,DWORD [24+edi]
  511. adc ebp,0
  512. mov DWORD [48+eax],ebx
  513. mov eax,DWORD [28+esi]
  514. ; saved r[12]
  515. ; ################## Calculate word 13
  516. xor ebx,ebx
  517. ; mul a[7]*b[6]
  518. mul edx
  519. add ecx,eax
  520. mov eax,DWORD [24+esi]
  521. adc ebp,edx
  522. mov edx,DWORD [28+edi]
  523. adc ebx,0
  524. ; mul a[6]*b[7]
  525. mul edx
  526. add ecx,eax
  527. mov eax,DWORD [20+esp]
  528. adc ebp,edx
  529. mov edx,DWORD [28+edi]
  530. adc ebx,0
  531. mov DWORD [52+eax],ecx
  532. mov eax,DWORD [28+esi]
  533. ; saved r[13]
  534. ; ################## Calculate word 14
  535. xor ecx,ecx
  536. ; mul a[7]*b[7]
  537. mul edx
  538. add ebp,eax
  539. mov eax,DWORD [20+esp]
  540. adc ebx,edx
  541. adc ecx,0
  542. mov DWORD [56+eax],ebp
  543. ; saved r[14]
  544. ; save r[15]
  545. mov DWORD [60+eax],ebx
  546. pop ebx
  547. pop ebp
  548. pop edi
  549. pop esi
  550. ret
  551. global _bn_mul_comba4
  552. align 16
  553. _bn_mul_comba4:
  554. L$_bn_mul_comba4_begin:
  555. push esi
  556. mov esi,DWORD [12+esp]
  557. push edi
  558. mov edi,DWORD [20+esp]
  559. push ebp
  560. push ebx
  561. xor ebx,ebx
  562. mov eax,DWORD [esi]
  563. xor ecx,ecx
  564. mov edx,DWORD [edi]
  565. ; ################## Calculate word 0
  566. xor ebp,ebp
  567. ; mul a[0]*b[0]
  568. mul edx
  569. add ebx,eax
  570. mov eax,DWORD [20+esp]
  571. adc ecx,edx
  572. mov edx,DWORD [edi]
  573. adc ebp,0
  574. mov DWORD [eax],ebx
  575. mov eax,DWORD [4+esi]
  576. ; saved r[0]
  577. ; ################## Calculate word 1
  578. xor ebx,ebx
  579. ; mul a[1]*b[0]
  580. mul edx
  581. add ecx,eax
  582. mov eax,DWORD [esi]
  583. adc ebp,edx
  584. mov edx,DWORD [4+edi]
  585. adc ebx,0
  586. ; mul a[0]*b[1]
  587. mul edx
  588. add ecx,eax
  589. mov eax,DWORD [20+esp]
  590. adc ebp,edx
  591. mov edx,DWORD [edi]
  592. adc ebx,0
  593. mov DWORD [4+eax],ecx
  594. mov eax,DWORD [8+esi]
  595. ; saved r[1]
  596. ; ################## Calculate word 2
  597. xor ecx,ecx
  598. ; mul a[2]*b[0]
  599. mul edx
  600. add ebp,eax
  601. mov eax,DWORD [4+esi]
  602. adc ebx,edx
  603. mov edx,DWORD [4+edi]
  604. adc ecx,0
  605. ; mul a[1]*b[1]
  606. mul edx
  607. add ebp,eax
  608. mov eax,DWORD [esi]
  609. adc ebx,edx
  610. mov edx,DWORD [8+edi]
  611. adc ecx,0
  612. ; mul a[0]*b[2]
  613. mul edx
  614. add ebp,eax
  615. mov eax,DWORD [20+esp]
  616. adc ebx,edx
  617. mov edx,DWORD [edi]
  618. adc ecx,0
  619. mov DWORD [8+eax],ebp
  620. mov eax,DWORD [12+esi]
  621. ; saved r[2]
  622. ; ################## Calculate word 3
  623. xor ebp,ebp
  624. ; mul a[3]*b[0]
  625. mul edx
  626. add ebx,eax
  627. mov eax,DWORD [8+esi]
  628. adc ecx,edx
  629. mov edx,DWORD [4+edi]
  630. adc ebp,0
  631. ; mul a[2]*b[1]
  632. mul edx
  633. add ebx,eax
  634. mov eax,DWORD [4+esi]
  635. adc ecx,edx
  636. mov edx,DWORD [8+edi]
  637. adc ebp,0
  638. ; mul a[1]*b[2]
  639. mul edx
  640. add ebx,eax
  641. mov eax,DWORD [esi]
  642. adc ecx,edx
  643. mov edx,DWORD [12+edi]
  644. adc ebp,0
  645. ; mul a[0]*b[3]
  646. mul edx
  647. add ebx,eax
  648. mov eax,DWORD [20+esp]
  649. adc ecx,edx
  650. mov edx,DWORD [4+edi]
  651. adc ebp,0
  652. mov DWORD [12+eax],ebx
  653. mov eax,DWORD [12+esi]
  654. ; saved r[3]
  655. ; ################## Calculate word 4
  656. xor ebx,ebx
  657. ; mul a[3]*b[1]
  658. mul edx
  659. add ecx,eax
  660. mov eax,DWORD [8+esi]
  661. adc ebp,edx
  662. mov edx,DWORD [8+edi]
  663. adc ebx,0
  664. ; mul a[2]*b[2]
  665. mul edx
  666. add ecx,eax
  667. mov eax,DWORD [4+esi]
  668. adc ebp,edx
  669. mov edx,DWORD [12+edi]
  670. adc ebx,0
  671. ; mul a[1]*b[3]
  672. mul edx
  673. add ecx,eax
  674. mov eax,DWORD [20+esp]
  675. adc ebp,edx
  676. mov edx,DWORD [8+edi]
  677. adc ebx,0
  678. mov DWORD [16+eax],ecx
  679. mov eax,DWORD [12+esi]
  680. ; saved r[4]
  681. ; ################## Calculate word 5
  682. xor ecx,ecx
  683. ; mul a[3]*b[2]
  684. mul edx
  685. add ebp,eax
  686. mov eax,DWORD [8+esi]
  687. adc ebx,edx
  688. mov edx,DWORD [12+edi]
  689. adc ecx,0
  690. ; mul a[2]*b[3]
  691. mul edx
  692. add ebp,eax
  693. mov eax,DWORD [20+esp]
  694. adc ebx,edx
  695. mov edx,DWORD [12+edi]
  696. adc ecx,0
  697. mov DWORD [20+eax],ebp
  698. mov eax,DWORD [12+esi]
  699. ; saved r[5]
  700. ; ################## Calculate word 6
  701. xor ebp,ebp
  702. ; mul a[3]*b[3]
  703. mul edx
  704. add ebx,eax
  705. mov eax,DWORD [20+esp]
  706. adc ecx,edx
  707. adc ebp,0
  708. mov DWORD [24+eax],ebx
  709. ; saved r[6]
  710. ; save r[7]
  711. mov DWORD [28+eax],ecx
  712. pop ebx
  713. pop ebp
  714. pop edi
  715. pop esi
  716. ret
  717. global _bn_sqr_comba8
  718. align 16
  719. _bn_sqr_comba8:
  720. L$_bn_sqr_comba8_begin:
  721. push esi
  722. push edi
  723. push ebp
  724. push ebx
  725. mov edi,DWORD [20+esp]
  726. mov esi,DWORD [24+esp]
  727. xor ebx,ebx
  728. xor ecx,ecx
  729. mov eax,DWORD [esi]
  730. ; ############### Calculate word 0
  731. xor ebp,ebp
  732. ; sqr a[0]*a[0]
  733. mul eax
  734. add ebx,eax
  735. adc ecx,edx
  736. mov edx,DWORD [esi]
  737. adc ebp,0
  738. mov DWORD [edi],ebx
  739. mov eax,DWORD [4+esi]
  740. ; saved r[0]
  741. ; ############### Calculate word 1
  742. xor ebx,ebx
  743. ; sqr a[1]*a[0]
  744. mul edx
  745. add eax,eax
  746. adc edx,edx
  747. adc ebx,0
  748. add ecx,eax
  749. adc ebp,edx
  750. mov eax,DWORD [8+esi]
  751. adc ebx,0
  752. mov DWORD [4+edi],ecx
  753. mov edx,DWORD [esi]
  754. ; saved r[1]
  755. ; ############### Calculate word 2
  756. xor ecx,ecx
  757. ; sqr a[2]*a[0]
  758. mul edx
  759. add eax,eax
  760. adc edx,edx
  761. adc ecx,0
  762. add ebp,eax
  763. adc ebx,edx
  764. mov eax,DWORD [4+esi]
  765. adc ecx,0
  766. ; sqr a[1]*a[1]
  767. mul eax
  768. add ebp,eax
  769. adc ebx,edx
  770. mov edx,DWORD [esi]
  771. adc ecx,0
  772. mov DWORD [8+edi],ebp
  773. mov eax,DWORD [12+esi]
  774. ; saved r[2]
  775. ; ############### Calculate word 3
  776. xor ebp,ebp
  777. ; sqr a[3]*a[0]
  778. mul edx
  779. add eax,eax
  780. adc edx,edx
  781. adc ebp,0
  782. add ebx,eax
  783. adc ecx,edx
  784. mov eax,DWORD [8+esi]
  785. adc ebp,0
  786. mov edx,DWORD [4+esi]
  787. ; sqr a[2]*a[1]
  788. mul edx
  789. add eax,eax
  790. adc edx,edx
  791. adc ebp,0
  792. add ebx,eax
  793. adc ecx,edx
  794. mov eax,DWORD [16+esi]
  795. adc ebp,0
  796. mov DWORD [12+edi],ebx
  797. mov edx,DWORD [esi]
  798. ; saved r[3]
  799. ; ############### Calculate word 4
  800. xor ebx,ebx
  801. ; sqr a[4]*a[0]
  802. mul edx
  803. add eax,eax
  804. adc edx,edx
  805. adc ebx,0
  806. add ecx,eax
  807. adc ebp,edx
  808. mov eax,DWORD [12+esi]
  809. adc ebx,0
  810. mov edx,DWORD [4+esi]
  811. ; sqr a[3]*a[1]
  812. mul edx
  813. add eax,eax
  814. adc edx,edx
  815. adc ebx,0
  816. add ecx,eax
  817. adc ebp,edx
  818. mov eax,DWORD [8+esi]
  819. adc ebx,0
  820. ; sqr a[2]*a[2]
  821. mul eax
  822. add ecx,eax
  823. adc ebp,edx
  824. mov edx,DWORD [esi]
  825. adc ebx,0
  826. mov DWORD [16+edi],ecx
  827. mov eax,DWORD [20+esi]
  828. ; saved r[4]
  829. ; ############### Calculate word 5
  830. xor ecx,ecx
  831. ; sqr a[5]*a[0]
  832. mul edx
  833. add eax,eax
  834. adc edx,edx
  835. adc ecx,0
  836. add ebp,eax
  837. adc ebx,edx
  838. mov eax,DWORD [16+esi]
  839. adc ecx,0
  840. mov edx,DWORD [4+esi]
  841. ; sqr a[4]*a[1]
  842. mul edx
  843. add eax,eax
  844. adc edx,edx
  845. adc ecx,0
  846. add ebp,eax
  847. adc ebx,edx
  848. mov eax,DWORD [12+esi]
  849. adc ecx,0
  850. mov edx,DWORD [8+esi]
  851. ; sqr a[3]*a[2]
  852. mul edx
  853. add eax,eax
  854. adc edx,edx
  855. adc ecx,0
  856. add ebp,eax
  857. adc ebx,edx
  858. mov eax,DWORD [24+esi]
  859. adc ecx,0
  860. mov DWORD [20+edi],ebp
  861. mov edx,DWORD [esi]
  862. ; saved r[5]
  863. ; ############### Calculate word 6
  864. xor ebp,ebp
  865. ; sqr a[6]*a[0]
  866. mul edx
  867. add eax,eax
  868. adc edx,edx
  869. adc ebp,0
  870. add ebx,eax
  871. adc ecx,edx
  872. mov eax,DWORD [20+esi]
  873. adc ebp,0
  874. mov edx,DWORD [4+esi]
  875. ; sqr a[5]*a[1]
  876. mul edx
  877. add eax,eax
  878. adc edx,edx
  879. adc ebp,0
  880. add ebx,eax
  881. adc ecx,edx
  882. mov eax,DWORD [16+esi]
  883. adc ebp,0
  884. mov edx,DWORD [8+esi]
  885. ; sqr a[4]*a[2]
  886. mul edx
  887. add eax,eax
  888. adc edx,edx
  889. adc ebp,0
  890. add ebx,eax
  891. adc ecx,edx
  892. mov eax,DWORD [12+esi]
  893. adc ebp,0
  894. ; sqr a[3]*a[3]
  895. mul eax
  896. add ebx,eax
  897. adc ecx,edx
  898. mov edx,DWORD [esi]
  899. adc ebp,0
  900. mov DWORD [24+edi],ebx
  901. mov eax,DWORD [28+esi]
  902. ; saved r[6]
  903. ; ############### Calculate word 7
  904. xor ebx,ebx
  905. ; sqr a[7]*a[0]
  906. mul edx
  907. add eax,eax
  908. adc edx,edx
  909. adc ebx,0
  910. add ecx,eax
  911. adc ebp,edx
  912. mov eax,DWORD [24+esi]
  913. adc ebx,0
  914. mov edx,DWORD [4+esi]
  915. ; sqr a[6]*a[1]
  916. mul edx
  917. add eax,eax
  918. adc edx,edx
  919. adc ebx,0
  920. add ecx,eax
  921. adc ebp,edx
  922. mov eax,DWORD [20+esi]
  923. adc ebx,0
  924. mov edx,DWORD [8+esi]
  925. ; sqr a[5]*a[2]
  926. mul edx
  927. add eax,eax
  928. adc edx,edx
  929. adc ebx,0
  930. add ecx,eax
  931. adc ebp,edx
  932. mov eax,DWORD [16+esi]
  933. adc ebx,0
  934. mov edx,DWORD [12+esi]
  935. ; sqr a[4]*a[3]
  936. mul edx
  937. add eax,eax
  938. adc edx,edx
  939. adc ebx,0
  940. add ecx,eax
  941. adc ebp,edx
  942. mov eax,DWORD [28+esi]
  943. adc ebx,0
  944. mov DWORD [28+edi],ecx
  945. mov edx,DWORD [4+esi]
  946. ; saved r[7]
  947. ; ############### Calculate word 8
  948. xor ecx,ecx
  949. ; sqr a[7]*a[1]
  950. mul edx
  951. add eax,eax
  952. adc edx,edx
  953. adc ecx,0
  954. add ebp,eax
  955. adc ebx,edx
  956. mov eax,DWORD [24+esi]
  957. adc ecx,0
  958. mov edx,DWORD [8+esi]
  959. ; sqr a[6]*a[2]
  960. mul edx
  961. add eax,eax
  962. adc edx,edx
  963. adc ecx,0
  964. add ebp,eax
  965. adc ebx,edx
  966. mov eax,DWORD [20+esi]
  967. adc ecx,0
  968. mov edx,DWORD [12+esi]
  969. ; sqr a[5]*a[3]
  970. mul edx
  971. add eax,eax
  972. adc edx,edx
  973. adc ecx,0
  974. add ebp,eax
  975. adc ebx,edx
  976. mov eax,DWORD [16+esi]
  977. adc ecx,0
  978. ; sqr a[4]*a[4]
  979. mul eax
  980. add ebp,eax
  981. adc ebx,edx
  982. mov edx,DWORD [8+esi]
  983. adc ecx,0
  984. mov DWORD [32+edi],ebp
  985. mov eax,DWORD [28+esi]
  986. ; saved r[8]
  987. ; ############### Calculate word 9
  988. xor ebp,ebp
  989. ; sqr a[7]*a[2]
  990. mul edx
  991. add eax,eax
  992. adc edx,edx
  993. adc ebp,0
  994. add ebx,eax
  995. adc ecx,edx
  996. mov eax,DWORD [24+esi]
  997. adc ebp,0
  998. mov edx,DWORD [12+esi]
  999. ; sqr a[6]*a[3]
  1000. mul edx
  1001. add eax,eax
  1002. adc edx,edx
  1003. adc ebp,0
  1004. add ebx,eax
  1005. adc ecx,edx
  1006. mov eax,DWORD [20+esi]
  1007. adc ebp,0
  1008. mov edx,DWORD [16+esi]
  1009. ; sqr a[5]*a[4]
  1010. mul edx
  1011. add eax,eax
  1012. adc edx,edx
  1013. adc ebp,0
  1014. add ebx,eax
  1015. adc ecx,edx
  1016. mov eax,DWORD [28+esi]
  1017. adc ebp,0
  1018. mov DWORD [36+edi],ebx
  1019. mov edx,DWORD [12+esi]
  1020. ; saved r[9]
  1021. ; ############### Calculate word 10
  1022. xor ebx,ebx
  1023. ; sqr a[7]*a[3]
  1024. mul edx
  1025. add eax,eax
  1026. adc edx,edx
  1027. adc ebx,0
  1028. add ecx,eax
  1029. adc ebp,edx
  1030. mov eax,DWORD [24+esi]
  1031. adc ebx,0
  1032. mov edx,DWORD [16+esi]
  1033. ; sqr a[6]*a[4]
  1034. mul edx
  1035. add eax,eax
  1036. adc edx,edx
  1037. adc ebx,0
  1038. add ecx,eax
  1039. adc ebp,edx
  1040. mov eax,DWORD [20+esi]
  1041. adc ebx,0
  1042. ; sqr a[5]*a[5]
  1043. mul eax
  1044. add ecx,eax
  1045. adc ebp,edx
  1046. mov edx,DWORD [16+esi]
  1047. adc ebx,0
  1048. mov DWORD [40+edi],ecx
  1049. mov eax,DWORD [28+esi]
  1050. ; saved r[10]
  1051. ; ############### Calculate word 11
  1052. xor ecx,ecx
  1053. ; sqr a[7]*a[4]
  1054. mul edx
  1055. add eax,eax
  1056. adc edx,edx
  1057. adc ecx,0
  1058. add ebp,eax
  1059. adc ebx,edx
  1060. mov eax,DWORD [24+esi]
  1061. adc ecx,0
  1062. mov edx,DWORD [20+esi]
  1063. ; sqr a[6]*a[5]
  1064. mul edx
  1065. add eax,eax
  1066. adc edx,edx
  1067. adc ecx,0
  1068. add ebp,eax
  1069. adc ebx,edx
  1070. mov eax,DWORD [28+esi]
  1071. adc ecx,0
  1072. mov DWORD [44+edi],ebp
  1073. mov edx,DWORD [20+esi]
  1074. ; saved r[11]
  1075. ; ############### Calculate word 12
  1076. xor ebp,ebp
  1077. ; sqr a[7]*a[5]
  1078. mul edx
  1079. add eax,eax
  1080. adc edx,edx
  1081. adc ebp,0
  1082. add ebx,eax
  1083. adc ecx,edx
  1084. mov eax,DWORD [24+esi]
  1085. adc ebp,0
  1086. ; sqr a[6]*a[6]
  1087. mul eax
  1088. add ebx,eax
  1089. adc ecx,edx
  1090. mov edx,DWORD [24+esi]
  1091. adc ebp,0
  1092. mov DWORD [48+edi],ebx
  1093. mov eax,DWORD [28+esi]
  1094. ; saved r[12]
  1095. ; ############### Calculate word 13
  1096. xor ebx,ebx
  1097. ; sqr a[7]*a[6]
  1098. mul edx
  1099. add eax,eax
  1100. adc edx,edx
  1101. adc ebx,0
  1102. add ecx,eax
  1103. adc ebp,edx
  1104. mov eax,DWORD [28+esi]
  1105. adc ebx,0
  1106. mov DWORD [52+edi],ecx
  1107. ; saved r[13]
  1108. ; ############### Calculate word 14
  1109. xor ecx,ecx
  1110. ; sqr a[7]*a[7]
  1111. mul eax
  1112. add ebp,eax
  1113. adc ebx,edx
  1114. adc ecx,0
  1115. mov DWORD [56+edi],ebp
  1116. ; saved r[14]
  1117. mov DWORD [60+edi],ebx
  1118. pop ebx
  1119. pop ebp
  1120. pop edi
  1121. pop esi
  1122. ret
  1123. global _bn_sqr_comba4
  1124. align 16
  1125. _bn_sqr_comba4:
  1126. L$_bn_sqr_comba4_begin:
  1127. push esi
  1128. push edi
  1129. push ebp
  1130. push ebx
  1131. mov edi,DWORD [20+esp]
  1132. mov esi,DWORD [24+esp]
  1133. xor ebx,ebx
  1134. xor ecx,ecx
  1135. mov eax,DWORD [esi]
  1136. ; ############### Calculate word 0
  1137. xor ebp,ebp
  1138. ; sqr a[0]*a[0]
  1139. mul eax
  1140. add ebx,eax
  1141. adc ecx,edx
  1142. mov edx,DWORD [esi]
  1143. adc ebp,0
  1144. mov DWORD [edi],ebx
  1145. mov eax,DWORD [4+esi]
  1146. ; saved r[0]
  1147. ; ############### Calculate word 1
  1148. xor ebx,ebx
  1149. ; sqr a[1]*a[0]
  1150. mul edx
  1151. add eax,eax
  1152. adc edx,edx
  1153. adc ebx,0
  1154. add ecx,eax
  1155. adc ebp,edx
  1156. mov eax,DWORD [8+esi]
  1157. adc ebx,0
  1158. mov DWORD [4+edi],ecx
  1159. mov edx,DWORD [esi]
  1160. ; saved r[1]
  1161. ; ############### Calculate word 2
  1162. xor ecx,ecx
  1163. ; sqr a[2]*a[0]
  1164. mul edx
  1165. add eax,eax
  1166. adc edx,edx
  1167. adc ecx,0
  1168. add ebp,eax
  1169. adc ebx,edx
  1170. mov eax,DWORD [4+esi]
  1171. adc ecx,0
  1172. ; sqr a[1]*a[1]
  1173. mul eax
  1174. add ebp,eax
  1175. adc ebx,edx
  1176. mov edx,DWORD [esi]
  1177. adc ecx,0
  1178. mov DWORD [8+edi],ebp
  1179. mov eax,DWORD [12+esi]
  1180. ; saved r[2]
  1181. ; ############### Calculate word 3
  1182. xor ebp,ebp
  1183. ; sqr a[3]*a[0]
  1184. mul edx
  1185. add eax,eax
  1186. adc edx,edx
  1187. adc ebp,0
  1188. add ebx,eax
  1189. adc ecx,edx
  1190. mov eax,DWORD [8+esi]
  1191. adc ebp,0
  1192. mov edx,DWORD [4+esi]
  1193. ; sqr a[2]*a[1]
  1194. mul edx
  1195. add eax,eax
  1196. adc edx,edx
  1197. adc ebp,0
  1198. add ebx,eax
  1199. adc ecx,edx
  1200. mov eax,DWORD [12+esi]
  1201. adc ebp,0
  1202. mov DWORD [12+edi],ebx
  1203. mov edx,DWORD [4+esi]
  1204. ; saved r[3]
  1205. ; ############### Calculate word 4
  1206. xor ebx,ebx
  1207. ; sqr a[3]*a[1]
  1208. mul edx
  1209. add eax,eax
  1210. adc edx,edx
  1211. adc ebx,0
  1212. add ecx,eax
  1213. adc ebp,edx
  1214. mov eax,DWORD [8+esi]
  1215. adc ebx,0
  1216. ; sqr a[2]*a[2]
  1217. mul eax
  1218. add ecx,eax
  1219. adc ebp,edx
  1220. mov edx,DWORD [8+esi]
  1221. adc ebx,0
  1222. mov DWORD [16+edi],ecx
  1223. mov eax,DWORD [12+esi]
  1224. ; saved r[4]
  1225. ; ############### Calculate word 5
  1226. xor ecx,ecx
  1227. ; sqr a[3]*a[2]
  1228. mul edx
  1229. add eax,eax
  1230. adc edx,edx
  1231. adc ecx,0
  1232. add ebp,eax
  1233. adc ebx,edx
  1234. mov eax,DWORD [12+esi]
  1235. adc ecx,0
  1236. mov DWORD [20+edi],ebp
  1237. ; saved r[5]
  1238. ; ############### Calculate word 6
  1239. xor ebp,ebp
  1240. ; sqr a[3]*a[3]
  1241. mul eax
  1242. add ebx,eax
  1243. adc ecx,edx
  1244. adc ebp,0
  1245. mov DWORD [24+edi],ebx
  1246. ; saved r[6]
  1247. mov DWORD [28+edi],ecx
  1248. pop ebx
  1249. pop ebp
  1250. pop edi
  1251. pop esi
  1252. ret