divsufsort.c 54 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924
  1. /*
  2. * divsufsort.c for libdivsufsort-lite
  3. * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
  4. *
  5. * Permission is hereby granted, free of charge, to any person
  6. * obtaining a copy of this software and associated documentation
  7. * files (the "Software"), to deal in the Software without
  8. * restriction, including without limitation the rights to use,
  9. * copy, modify, merge, publish, distribute, sublicense, and/or sell
  10. * copies of the Software, and to permit persons to whom the
  11. * Software is furnished to do so, subject to the following
  12. * conditions:
  13. *
  14. * The above copyright notice and this permission notice shall be
  15. * included in all copies or substantial portions of the Software.
  16. *
  17. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  18. * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
  19. * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  20. * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
  21. * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
  22. * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  23. * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  24. * OTHER DEALINGS IN THE SOFTWARE.
  25. */
  26. /*- Compiler specifics -*/
  27. #ifdef __clang__
  28. #pragma clang diagnostic ignored "-Wshorten-64-to-32"
  29. #endif
  30. #if defined(_MSC_VER)
  31. # pragma warning(disable : 4244)
  32. # pragma warning(disable : 4127) /* C4127 : Condition expression is constant */
  33. #endif
  34. /*- Dependencies -*/
  35. #include <assert.h>
  36. #include <stdio.h>
  37. #include <stdlib.h>
  38. #ifdef __clang_analyzer__
  39. #include <string.h>
  40. #endif
  41. #include "divsufsort.h"
  42. /*- Constants -*/
  43. #if defined(INLINE)
  44. # undef INLINE
  45. #endif
  46. #if !defined(INLINE)
  47. # define INLINE __inline
  48. #endif
  49. #if defined(ALPHABET_SIZE) && (ALPHABET_SIZE < 1)
  50. # undef ALPHABET_SIZE
  51. #endif
  52. #if !defined(ALPHABET_SIZE)
  53. # define ALPHABET_SIZE (256)
  54. #endif
  55. #define BUCKET_A_SIZE (ALPHABET_SIZE)
  56. #define BUCKET_B_SIZE (ALPHABET_SIZE * ALPHABET_SIZE)
  57. #if defined(SS_INSERTIONSORT_THRESHOLD)
  58. # if SS_INSERTIONSORT_THRESHOLD < 1
  59. # undef SS_INSERTIONSORT_THRESHOLD
  60. # define SS_INSERTIONSORT_THRESHOLD (1)
  61. # endif
  62. #else
  63. # define SS_INSERTIONSORT_THRESHOLD (8)
  64. #endif
  65. #if defined(SS_BLOCKSIZE)
  66. # if SS_BLOCKSIZE < 0
  67. # undef SS_BLOCKSIZE
  68. # define SS_BLOCKSIZE (0)
  69. # elif 32768 <= SS_BLOCKSIZE
  70. # undef SS_BLOCKSIZE
  71. # define SS_BLOCKSIZE (32767)
  72. # endif
  73. #else
  74. # define SS_BLOCKSIZE (1024)
  75. #endif
  76. /* minstacksize = log(SS_BLOCKSIZE) / log(3) * 2 */
  77. #if SS_BLOCKSIZE == 0
  78. # define SS_MISORT_STACKSIZE (96)
  79. #elif SS_BLOCKSIZE <= 4096
  80. # define SS_MISORT_STACKSIZE (16)
  81. #else
  82. # define SS_MISORT_STACKSIZE (24)
  83. #endif
  84. #define SS_SMERGE_STACKSIZE (32)
  85. #define TR_INSERTIONSORT_THRESHOLD (8)
  86. #define TR_STACKSIZE (64)
  87. /*- Macros -*/
  88. #ifndef SWAP
  89. # define SWAP(_a, _b) do { t = (_a); (_a) = (_b); (_b) = t; } while(0)
  90. #endif /* SWAP */
  91. #ifndef MIN
  92. # define MIN(_a, _b) (((_a) < (_b)) ? (_a) : (_b))
  93. #endif /* MIN */
  94. #ifndef MAX
  95. # define MAX(_a, _b) (((_a) > (_b)) ? (_a) : (_b))
  96. #endif /* MAX */
  97. #define STACK_PUSH(_a, _b, _c, _d)\
  98. do {\
  99. assert(ssize < STACK_SIZE);\
  100. stack[ssize].a = (_a), stack[ssize].b = (_b),\
  101. stack[ssize].c = (_c), stack[ssize++].d = (_d);\
  102. } while(0)
  103. #define STACK_PUSH5(_a, _b, _c, _d, _e)\
  104. do {\
  105. assert(ssize < STACK_SIZE);\
  106. stack[ssize].a = (_a), stack[ssize].b = (_b),\
  107. stack[ssize].c = (_c), stack[ssize].d = (_d), stack[ssize++].e = (_e);\
  108. } while(0)
  109. #define STACK_POP(_a, _b, _c, _d)\
  110. do {\
  111. assert(0 <= ssize);\
  112. if(ssize == 0) { return; }\
  113. (_a) = stack[--ssize].a, (_b) = stack[ssize].b,\
  114. (_c) = stack[ssize].c, (_d) = stack[ssize].d;\
  115. } while(0)
  116. #define STACK_POP5(_a, _b, _c, _d, _e)\
  117. do {\
  118. assert(0 <= ssize);\
  119. if(ssize == 0) { return; }\
  120. (_a) = stack[--ssize].a, (_b) = stack[ssize].b,\
  121. (_c) = stack[ssize].c, (_d) = stack[ssize].d, (_e) = stack[ssize].e;\
  122. } while(0)
  123. #define BUCKET_A(_c0) bucket_A[(_c0)]
  124. #if ALPHABET_SIZE == 256
  125. #define BUCKET_B(_c0, _c1) (bucket_B[((_c1) << 8) | (_c0)])
  126. #define BUCKET_BSTAR(_c0, _c1) (bucket_B[((_c0) << 8) | (_c1)])
  127. #else
  128. #define BUCKET_B(_c0, _c1) (bucket_B[(_c1) * ALPHABET_SIZE + (_c0)])
  129. #define BUCKET_BSTAR(_c0, _c1) (bucket_B[(_c0) * ALPHABET_SIZE + (_c1)])
  130. #endif
  131. /*- Private Functions -*/
  132. static const int lg_table[256]= {
  133. -1,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
  134. 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
  135. 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
  136. 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
  137. 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
  138. 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
  139. 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
  140. 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7
  141. };
  142. #if (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE)
  143. static INLINE
  144. int
  145. ss_ilg(int n) {
  146. #if SS_BLOCKSIZE == 0
  147. return (n & 0xffff0000) ?
  148. ((n & 0xff000000) ?
  149. 24 + lg_table[(n >> 24) & 0xff] :
  150. 16 + lg_table[(n >> 16) & 0xff]) :
  151. ((n & 0x0000ff00) ?
  152. 8 + lg_table[(n >> 8) & 0xff] :
  153. 0 + lg_table[(n >> 0) & 0xff]);
  154. #elif SS_BLOCKSIZE < 256
  155. return lg_table[n];
  156. #else
  157. return (n & 0xff00) ?
  158. 8 + lg_table[(n >> 8) & 0xff] :
  159. 0 + lg_table[(n >> 0) & 0xff];
  160. #endif
  161. }
  162. #endif /* (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) */
  163. #if SS_BLOCKSIZE != 0
  164. static const int sqq_table[256] = {
  165. 0, 16, 22, 27, 32, 35, 39, 42, 45, 48, 50, 53, 55, 57, 59, 61,
  166. 64, 65, 67, 69, 71, 73, 75, 76, 78, 80, 81, 83, 84, 86, 87, 89,
  167. 90, 91, 93, 94, 96, 97, 98, 99, 101, 102, 103, 104, 106, 107, 108, 109,
  168. 110, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126,
  169. 128, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142,
  170. 143, 144, 144, 145, 146, 147, 148, 149, 150, 150, 151, 152, 153, 154, 155, 155,
  171. 156, 157, 158, 159, 160, 160, 161, 162, 163, 163, 164, 165, 166, 167, 167, 168,
  172. 169, 170, 170, 171, 172, 173, 173, 174, 175, 176, 176, 177, 178, 178, 179, 180,
  173. 181, 181, 182, 183, 183, 184, 185, 185, 186, 187, 187, 188, 189, 189, 190, 191,
  174. 192, 192, 193, 193, 194, 195, 195, 196, 197, 197, 198, 199, 199, 200, 201, 201,
  175. 202, 203, 203, 204, 204, 205, 206, 206, 207, 208, 208, 209, 209, 210, 211, 211,
  176. 212, 212, 213, 214, 214, 215, 215, 216, 217, 217, 218, 218, 219, 219, 220, 221,
  177. 221, 222, 222, 223, 224, 224, 225, 225, 226, 226, 227, 227, 228, 229, 229, 230,
  178. 230, 231, 231, 232, 232, 233, 234, 234, 235, 235, 236, 236, 237, 237, 238, 238,
  179. 239, 240, 240, 241, 241, 242, 242, 243, 243, 244, 244, 245, 245, 246, 246, 247,
  180. 247, 248, 248, 249, 249, 250, 250, 251, 251, 252, 252, 253, 253, 254, 254, 255
  181. };
  182. static INLINE
  183. int
  184. ss_isqrt(int x) {
  185. int y, e;
  186. if(x >= (SS_BLOCKSIZE * SS_BLOCKSIZE)) { return SS_BLOCKSIZE; }
  187. e = (x & 0xffff0000) ?
  188. ((x & 0xff000000) ?
  189. 24 + lg_table[(x >> 24) & 0xff] :
  190. 16 + lg_table[(x >> 16) & 0xff]) :
  191. ((x & 0x0000ff00) ?
  192. 8 + lg_table[(x >> 8) & 0xff] :
  193. 0 + lg_table[(x >> 0) & 0xff]);
  194. if(e >= 16) {
  195. y = sqq_table[x >> ((e - 6) - (e & 1))] << ((e >> 1) - 7);
  196. if(e >= 24) { y = (y + 1 + x / y) >> 1; }
  197. y = (y + 1 + x / y) >> 1;
  198. } else if(e >= 8) {
  199. y = (sqq_table[x >> ((e - 6) - (e & 1))] >> (7 - (e >> 1))) + 1;
  200. } else {
  201. return sqq_table[x] >> 4;
  202. }
  203. return (x < (y * y)) ? y - 1 : y;
  204. }
  205. #endif /* SS_BLOCKSIZE != 0 */
  206. /*---------------------------------------------------------------------------*/
  207. /* Compares two suffixes. */
  208. static INLINE
  209. int
  210. ss_compare(const unsigned char *T,
  211. const int *p1, const int *p2,
  212. int depth) {
  213. const unsigned char *U1, *U2, *U1n, *U2n;
  214. for(U1 = T + depth + *p1,
  215. U2 = T + depth + *p2,
  216. U1n = T + *(p1 + 1) + 2,
  217. U2n = T + *(p2 + 1) + 2;
  218. (U1 < U1n) && (U2 < U2n) && (*U1 == *U2);
  219. ++U1, ++U2) {
  220. }
  221. return U1 < U1n ?
  222. (U2 < U2n ? *U1 - *U2 : 1) :
  223. (U2 < U2n ? -1 : 0);
  224. }
  225. /*---------------------------------------------------------------------------*/
  226. #if (SS_BLOCKSIZE != 1) && (SS_INSERTIONSORT_THRESHOLD != 1)
  227. /* Insertionsort for small size groups */
  228. static
  229. void
  230. ss_insertionsort(const unsigned char *T, const int *PA,
  231. int *first, int *last, int depth) {
  232. int *i, *j;
  233. int t;
  234. int r;
  235. for(i = last - 2; first <= i; --i) {
  236. for(t = *i, j = i + 1; 0 < (r = ss_compare(T, PA + t, PA + *j, depth));) {
  237. do { *(j - 1) = *j; } while((++j < last) && (*j < 0));
  238. if(last <= j) { break; }
  239. }
  240. if(r == 0) { *j = ~*j; }
  241. *(j - 1) = t;
  242. }
  243. }
  244. #endif /* (SS_BLOCKSIZE != 1) && (SS_INSERTIONSORT_THRESHOLD != 1) */
  245. /*---------------------------------------------------------------------------*/
  246. #if (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE)
  247. static INLINE
  248. void
  249. ss_fixdown(const unsigned char *Td, const int *PA,
  250. int *SA, int i, int size) {
  251. int j, k;
  252. int v;
  253. int c, d, e;
  254. for(v = SA[i], c = Td[PA[v]]; (j = 2 * i + 1) < size; SA[i] = SA[k], i = k) {
  255. d = Td[PA[SA[k = j++]]];
  256. if(d < (e = Td[PA[SA[j]]])) { k = j; d = e; }
  257. if(d <= c) { break; }
  258. }
  259. SA[i] = v;
  260. }
  261. /* Simple top-down heapsort. */
  262. static
  263. void
  264. ss_heapsort(const unsigned char *Td, const int *PA, int *SA, int size) {
  265. int i, m;
  266. int t;
  267. m = size;
  268. if((size % 2) == 0) {
  269. m--;
  270. if(Td[PA[SA[m / 2]]] < Td[PA[SA[m]]]) { SWAP(SA[m], SA[m / 2]); }
  271. }
  272. for(i = m / 2 - 1; 0 <= i; --i) { ss_fixdown(Td, PA, SA, i, m); }
  273. if((size % 2) == 0) { SWAP(SA[0], SA[m]); ss_fixdown(Td, PA, SA, 0, m); }
  274. for(i = m - 1; 0 < i; --i) {
  275. t = SA[0], SA[0] = SA[i];
  276. ss_fixdown(Td, PA, SA, 0, i);
  277. SA[i] = t;
  278. }
  279. }
  280. /*---------------------------------------------------------------------------*/
  281. /* Returns the median of three elements. */
  282. static INLINE
  283. int *
  284. ss_median3(const unsigned char *Td, const int *PA,
  285. int *v1, int *v2, int *v3) {
  286. int *t;
  287. if(Td[PA[*v1]] > Td[PA[*v2]]) { SWAP(v1, v2); }
  288. if(Td[PA[*v2]] > Td[PA[*v3]]) {
  289. if(Td[PA[*v1]] > Td[PA[*v3]]) { return v1; }
  290. else { return v3; }
  291. }
  292. return v2;
  293. }
  294. /* Returns the median of five elements. */
  295. static INLINE
  296. int *
  297. ss_median5(const unsigned char *Td, const int *PA,
  298. int *v1, int *v2, int *v3, int *v4, int *v5) {
  299. int *t;
  300. if(Td[PA[*v2]] > Td[PA[*v3]]) { SWAP(v2, v3); }
  301. if(Td[PA[*v4]] > Td[PA[*v5]]) { SWAP(v4, v5); }
  302. if(Td[PA[*v2]] > Td[PA[*v4]]) { SWAP(v2, v4); SWAP(v3, v5); }
  303. if(Td[PA[*v1]] > Td[PA[*v3]]) { SWAP(v1, v3); }
  304. if(Td[PA[*v1]] > Td[PA[*v4]]) { SWAP(v1, v4); SWAP(v3, v5); }
  305. if(Td[PA[*v3]] > Td[PA[*v4]]) { return v4; }
  306. return v3;
  307. }
  308. /* Returns the pivot element. */
  309. static INLINE
  310. int *
  311. ss_pivot(const unsigned char *Td, const int *PA, int *first, int *last) {
  312. int *middle;
  313. int t;
  314. t = last - first;
  315. middle = first + t / 2;
  316. if(t <= 512) {
  317. if(t <= 32) {
  318. return ss_median3(Td, PA, first, middle, last - 1);
  319. } else {
  320. t >>= 2;
  321. return ss_median5(Td, PA, first, first + t, middle, last - 1 - t, last - 1);
  322. }
  323. }
  324. t >>= 3;
  325. first = ss_median3(Td, PA, first, first + t, first + (t << 1));
  326. middle = ss_median3(Td, PA, middle - t, middle, middle + t);
  327. last = ss_median3(Td, PA, last - 1 - (t << 1), last - 1 - t, last - 1);
  328. return ss_median3(Td, PA, first, middle, last);
  329. }
  330. /*---------------------------------------------------------------------------*/
  331. /* Binary partition for substrings. */
  332. static INLINE
  333. int *
  334. ss_partition(const int *PA,
  335. int *first, int *last, int depth) {
  336. int *a, *b;
  337. int t;
  338. for(a = first - 1, b = last;;) {
  339. for(; (++a < b) && ((PA[*a] + depth) >= (PA[*a + 1] + 1));) { *a = ~*a; }
  340. for(; (a < --b) && ((PA[*b] + depth) < (PA[*b + 1] + 1));) { }
  341. if(b <= a) { break; }
  342. t = ~*b;
  343. *b = *a;
  344. *a = t;
  345. }
  346. if(first < a) { *first = ~*first; }
  347. return a;
  348. }
  349. /* Multikey introsort for medium size groups. */
  350. static
  351. void
  352. ss_mintrosort(const unsigned char *T, const int *PA,
  353. int *first, int *last,
  354. int depth) {
  355. #define STACK_SIZE SS_MISORT_STACKSIZE
  356. struct { int *a, *b, c; int d; } stack[STACK_SIZE];
  357. const unsigned char *Td;
  358. int *a, *b, *c, *d, *e, *f;
  359. int s, t;
  360. int ssize;
  361. int limit;
  362. int v, x = 0;
  363. for(ssize = 0, limit = ss_ilg(last - first);;) {
  364. if((last - first) <= SS_INSERTIONSORT_THRESHOLD) {
  365. #if 1 < SS_INSERTIONSORT_THRESHOLD
  366. if(1 < (last - first)) { ss_insertionsort(T, PA, first, last, depth); }
  367. #endif
  368. STACK_POP(first, last, depth, limit);
  369. continue;
  370. }
  371. Td = T + depth;
  372. if(limit-- == 0) { ss_heapsort(Td, PA, first, last - first); }
  373. if(limit < 0) {
  374. for(a = first + 1, v = Td[PA[*first]]; a < last; ++a) {
  375. if((x = Td[PA[*a]]) != v) {
  376. if(1 < (a - first)) { break; }
  377. v = x;
  378. first = a;
  379. }
  380. }
  381. if(Td[PA[*first] - 1] < v) {
  382. first = ss_partition(PA, first, a, depth);
  383. }
  384. if((a - first) <= (last - a)) {
  385. if(1 < (a - first)) {
  386. STACK_PUSH(a, last, depth, -1);
  387. last = a, depth += 1, limit = ss_ilg(a - first);
  388. } else {
  389. first = a, limit = -1;
  390. }
  391. } else {
  392. if(1 < (last - a)) {
  393. STACK_PUSH(first, a, depth + 1, ss_ilg(a - first));
  394. first = a, limit = -1;
  395. } else {
  396. last = a, depth += 1, limit = ss_ilg(a - first);
  397. }
  398. }
  399. continue;
  400. }
  401. /* choose pivot */
  402. a = ss_pivot(Td, PA, first, last);
  403. v = Td[PA[*a]];
  404. SWAP(*first, *a);
  405. /* partition */
  406. for(b = first; (++b < last) && ((x = Td[PA[*b]]) == v);) { }
  407. if(((a = b) < last) && (x < v)) {
  408. for(; (++b < last) && ((x = Td[PA[*b]]) <= v);) {
  409. if(x == v) { SWAP(*b, *a); ++a; }
  410. }
  411. }
  412. for(c = last; (b < --c) && ((x = Td[PA[*c]]) == v);) { }
  413. if((b < (d = c)) && (x > v)) {
  414. for(; (b < --c) && ((x = Td[PA[*c]]) >= v);) {
  415. if(x == v) { SWAP(*c, *d); --d; }
  416. }
  417. }
  418. for(; b < c;) {
  419. SWAP(*b, *c);
  420. for(; (++b < c) && ((x = Td[PA[*b]]) <= v);) {
  421. if(x == v) { SWAP(*b, *a); ++a; }
  422. }
  423. for(; (b < --c) && ((x = Td[PA[*c]]) >= v);) {
  424. if(x == v) { SWAP(*c, *d); --d; }
  425. }
  426. }
  427. if(a <= d) {
  428. c = b - 1;
  429. if((s = a - first) > (t = b - a)) { s = t; }
  430. for(e = first, f = b - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
  431. if((s = d - c) > (t = last - d - 1)) { s = t; }
  432. for(e = b, f = last - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
  433. a = first + (b - a), c = last - (d - c);
  434. b = (v <= Td[PA[*a] - 1]) ? a : ss_partition(PA, a, c, depth);
  435. if((a - first) <= (last - c)) {
  436. if((last - c) <= (c - b)) {
  437. STACK_PUSH(b, c, depth + 1, ss_ilg(c - b));
  438. STACK_PUSH(c, last, depth, limit);
  439. last = a;
  440. } else if((a - first) <= (c - b)) {
  441. STACK_PUSH(c, last, depth, limit);
  442. STACK_PUSH(b, c, depth + 1, ss_ilg(c - b));
  443. last = a;
  444. } else {
  445. STACK_PUSH(c, last, depth, limit);
  446. STACK_PUSH(first, a, depth, limit);
  447. first = b, last = c, depth += 1, limit = ss_ilg(c - b);
  448. }
  449. } else {
  450. if((a - first) <= (c - b)) {
  451. STACK_PUSH(b, c, depth + 1, ss_ilg(c - b));
  452. STACK_PUSH(first, a, depth, limit);
  453. first = c;
  454. } else if((last - c) <= (c - b)) {
  455. STACK_PUSH(first, a, depth, limit);
  456. STACK_PUSH(b, c, depth + 1, ss_ilg(c - b));
  457. first = c;
  458. } else {
  459. STACK_PUSH(first, a, depth, limit);
  460. STACK_PUSH(c, last, depth, limit);
  461. first = b, last = c, depth += 1, limit = ss_ilg(c - b);
  462. }
  463. }
  464. } else {
  465. limit += 1;
  466. if(Td[PA[*first] - 1] < v) {
  467. first = ss_partition(PA, first, last, depth);
  468. limit = ss_ilg(last - first);
  469. }
  470. depth += 1;
  471. }
  472. }
  473. #undef STACK_SIZE
  474. }
  475. #endif /* (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) */
  476. /*---------------------------------------------------------------------------*/
  477. #if SS_BLOCKSIZE != 0
  478. static INLINE
  479. void
  480. ss_blockswap(int *a, int *b, int n) {
  481. int t;
  482. for(; 0 < n; --n, ++a, ++b) {
  483. t = *a, *a = *b, *b = t;
  484. }
  485. }
  486. static INLINE
  487. void
  488. ss_rotate(int *first, int *middle, int *last) {
  489. int *a, *b, t;
  490. int l, r;
  491. l = middle - first, r = last - middle;
  492. for(; (0 < l) && (0 < r);) {
  493. if(l == r) { ss_blockswap(first, middle, l); break; }
  494. if(l < r) {
  495. a = last - 1, b = middle - 1;
  496. t = *a;
  497. do {
  498. *a-- = *b, *b-- = *a;
  499. if(b < first) {
  500. *a = t;
  501. last = a;
  502. if((r -= l + 1) <= l) { break; }
  503. a -= 1, b = middle - 1;
  504. t = *a;
  505. }
  506. } while(1);
  507. } else {
  508. a = first, b = middle;
  509. t = *a;
  510. do {
  511. *a++ = *b, *b++ = *a;
  512. if(last <= b) {
  513. *a = t;
  514. first = a + 1;
  515. if((l -= r + 1) <= r) { break; }
  516. a += 1, b = middle;
  517. t = *a;
  518. }
  519. } while(1);
  520. }
  521. }
  522. }
  523. /*---------------------------------------------------------------------------*/
  524. static
  525. void
  526. ss_inplacemerge(const unsigned char *T, const int *PA,
  527. int *first, int *middle, int *last,
  528. int depth) {
  529. const int *p;
  530. int *a, *b;
  531. int len, half;
  532. int q, r;
  533. int x;
  534. for(;;) {
  535. if(*(last - 1) < 0) { x = 1; p = PA + ~*(last - 1); }
  536. else { x = 0; p = PA + *(last - 1); }
  537. for(a = first, len = middle - first, half = len >> 1, r = -1;
  538. 0 < len;
  539. len = half, half >>= 1) {
  540. b = a + half;
  541. q = ss_compare(T, PA + ((0 <= *b) ? *b : ~*b), p, depth);
  542. if(q < 0) {
  543. a = b + 1;
  544. half -= (len & 1) ^ 1;
  545. } else {
  546. r = q;
  547. }
  548. }
  549. if(a < middle) {
  550. if(r == 0) { *a = ~*a; }
  551. ss_rotate(a, middle, last);
  552. last -= middle - a;
  553. middle = a;
  554. if(first == middle) { break; }
  555. }
  556. --last;
  557. if(x != 0) { while(*--last < 0) { } }
  558. if(middle == last) { break; }
  559. }
  560. }
  561. /*---------------------------------------------------------------------------*/
  562. /* Merge-forward with internal buffer. */
  563. static
  564. void
  565. ss_mergeforward(const unsigned char *T, const int *PA,
  566. int *first, int *middle, int *last,
  567. int *buf, int depth) {
  568. int *a, *b, *c, *bufend;
  569. int t;
  570. int r;
  571. bufend = buf + (middle - first) - 1;
  572. ss_blockswap(buf, first, middle - first);
  573. for(t = *(a = first), b = buf, c = middle;;) {
  574. r = ss_compare(T, PA + *b, PA + *c, depth);
  575. if(r < 0) {
  576. do {
  577. *a++ = *b;
  578. if(bufend <= b) { *bufend = t; return; }
  579. *b++ = *a;
  580. } while(*b < 0);
  581. } else if(r > 0) {
  582. do {
  583. *a++ = *c, *c++ = *a;
  584. if(last <= c) {
  585. while(b < bufend) { *a++ = *b, *b++ = *a; }
  586. *a = *b, *b = t;
  587. return;
  588. }
  589. } while(*c < 0);
  590. } else {
  591. *c = ~*c;
  592. do {
  593. *a++ = *b;
  594. if(bufend <= b) { *bufend = t; return; }
  595. *b++ = *a;
  596. } while(*b < 0);
  597. do {
  598. *a++ = *c, *c++ = *a;
  599. if(last <= c) {
  600. while(b < bufend) { *a++ = *b, *b++ = *a; }
  601. *a = *b, *b = t;
  602. return;
  603. }
  604. } while(*c < 0);
  605. }
  606. }
  607. }
  608. /* Merge-backward with internal buffer. */
  609. static
  610. void
  611. ss_mergebackward(const unsigned char *T, const int *PA,
  612. int *first, int *middle, int *last,
  613. int *buf, int depth) {
  614. const int *p1, *p2;
  615. int *a, *b, *c, *bufend;
  616. int t;
  617. int r;
  618. int x;
  619. bufend = buf + (last - middle) - 1;
  620. ss_blockswap(buf, middle, last - middle);
  621. x = 0;
  622. if(*bufend < 0) { p1 = PA + ~*bufend; x |= 1; }
  623. else { p1 = PA + *bufend; }
  624. if(*(middle - 1) < 0) { p2 = PA + ~*(middle - 1); x |= 2; }
  625. else { p2 = PA + *(middle - 1); }
  626. for(t = *(a = last - 1), b = bufend, c = middle - 1;;) {
  627. r = ss_compare(T, p1, p2, depth);
  628. if(0 < r) {
  629. if(x & 1) { do { *a-- = *b, *b-- = *a; } while(*b < 0); x ^= 1; }
  630. *a-- = *b;
  631. if(b <= buf) { *buf = t; break; }
  632. *b-- = *a;
  633. if(*b < 0) { p1 = PA + ~*b; x |= 1; }
  634. else { p1 = PA + *b; }
  635. } else if(r < 0) {
  636. if(x & 2) { do { *a-- = *c, *c-- = *a; } while(*c < 0); x ^= 2; }
  637. *a-- = *c, *c-- = *a;
  638. if(c < first) {
  639. while(buf < b) { *a-- = *b, *b-- = *a; }
  640. *a = *b, *b = t;
  641. break;
  642. }
  643. if(*c < 0) { p2 = PA + ~*c; x |= 2; }
  644. else { p2 = PA + *c; }
  645. } else {
  646. if(x & 1) { do { *a-- = *b, *b-- = *a; } while(*b < 0); x ^= 1; }
  647. *a-- = ~*b;
  648. if(b <= buf) { *buf = t; break; }
  649. *b-- = *a;
  650. if(x & 2) { do { *a-- = *c, *c-- = *a; } while(*c < 0); x ^= 2; }
  651. *a-- = *c, *c-- = *a;
  652. if(c < first) {
  653. while(buf < b) { *a-- = *b, *b-- = *a; }
  654. *a = *b, *b = t;
  655. break;
  656. }
  657. if(*b < 0) { p1 = PA + ~*b; x |= 1; }
  658. else { p1 = PA + *b; }
  659. if(*c < 0) { p2 = PA + ~*c; x |= 2; }
  660. else { p2 = PA + *c; }
  661. }
  662. }
  663. }
  664. /* D&C based merge. */
  665. static
  666. void
  667. ss_swapmerge(const unsigned char *T, const int *PA,
  668. int *first, int *middle, int *last,
  669. int *buf, int bufsize, int depth) {
  670. #define STACK_SIZE SS_SMERGE_STACKSIZE
  671. #define GETIDX(a) ((0 <= (a)) ? (a) : (~(a)))
  672. #define MERGE_CHECK(a, b, c)\
  673. do {\
  674. if(((c) & 1) ||\
  675. (((c) & 2) && (ss_compare(T, PA + GETIDX(*((a) - 1)), PA + *(a), depth) == 0))) {\
  676. *(a) = ~*(a);\
  677. }\
  678. if(((c) & 4) && ((ss_compare(T, PA + GETIDX(*((b) - 1)), PA + *(b), depth) == 0))) {\
  679. *(b) = ~*(b);\
  680. }\
  681. } while(0)
  682. struct { int *a, *b, *c; int d; } stack[STACK_SIZE];
  683. int *l, *r, *lm, *rm;
  684. int m, len, half;
  685. int ssize;
  686. int check, next;
  687. for(check = 0, ssize = 0;;) {
  688. if((last - middle) <= bufsize) {
  689. if((first < middle) && (middle < last)) {
  690. ss_mergebackward(T, PA, first, middle, last, buf, depth);
  691. }
  692. MERGE_CHECK(first, last, check);
  693. STACK_POP(first, middle, last, check);
  694. continue;
  695. }
  696. if((middle - first) <= bufsize) {
  697. if(first < middle) {
  698. ss_mergeforward(T, PA, first, middle, last, buf, depth);
  699. }
  700. MERGE_CHECK(first, last, check);
  701. STACK_POP(first, middle, last, check);
  702. continue;
  703. }
  704. for(m = 0, len = MIN(middle - first, last - middle), half = len >> 1;
  705. 0 < len;
  706. len = half, half >>= 1) {
  707. if(ss_compare(T, PA + GETIDX(*(middle + m + half)),
  708. PA + GETIDX(*(middle - m - half - 1)), depth) < 0) {
  709. m += half + 1;
  710. half -= (len & 1) ^ 1;
  711. }
  712. }
  713. if(0 < m) {
  714. lm = middle - m, rm = middle + m;
  715. ss_blockswap(lm, middle, m);
  716. l = r = middle, next = 0;
  717. if(rm < last) {
  718. if(*rm < 0) {
  719. *rm = ~*rm;
  720. if(first < lm) { for(; *--l < 0;) { } next |= 4; }
  721. next |= 1;
  722. } else if(first < lm) {
  723. for(; *r < 0; ++r) { }
  724. next |= 2;
  725. }
  726. }
  727. if((l - first) <= (last - r)) {
  728. STACK_PUSH(r, rm, last, (next & 3) | (check & 4));
  729. middle = lm, last = l, check = (check & 3) | (next & 4);
  730. } else {
  731. if((next & 2) && (r == middle)) { next ^= 6; }
  732. STACK_PUSH(first, lm, l, (check & 3) | (next & 4));
  733. first = r, middle = rm, check = (next & 3) | (check & 4);
  734. }
  735. } else {
  736. if(ss_compare(T, PA + GETIDX(*(middle - 1)), PA + *middle, depth) == 0) {
  737. *middle = ~*middle;
  738. }
  739. MERGE_CHECK(first, last, check);
  740. STACK_POP(first, middle, last, check);
  741. }
  742. }
  743. #undef STACK_SIZE
  744. }
  745. #endif /* SS_BLOCKSIZE != 0 */
  746. /*---------------------------------------------------------------------------*/
  747. /* Substring sort */
  748. static
  749. void
  750. sssort(const unsigned char *T, const int *PA,
  751. int *first, int *last,
  752. int *buf, int bufsize,
  753. int depth, int n, int lastsuffix) {
  754. int *a;
  755. #if SS_BLOCKSIZE != 0
  756. int *b, *middle, *curbuf;
  757. int j, k, curbufsize, limit;
  758. #endif
  759. int i;
  760. if(lastsuffix != 0) { ++first; }
  761. #if SS_BLOCKSIZE == 0
  762. ss_mintrosort(T, PA, first, last, depth);
  763. #else
  764. if((bufsize < SS_BLOCKSIZE) &&
  765. (bufsize < (last - first)) &&
  766. (bufsize < (limit = ss_isqrt(last - first)))) {
  767. if(SS_BLOCKSIZE < limit) { limit = SS_BLOCKSIZE; }
  768. buf = middle = last - limit, bufsize = limit;
  769. } else {
  770. middle = last, limit = 0;
  771. }
  772. for(a = first, i = 0; SS_BLOCKSIZE < (middle - a); a += SS_BLOCKSIZE, ++i) {
  773. #if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE
  774. ss_mintrosort(T, PA, a, a + SS_BLOCKSIZE, depth);
  775. #elif 1 < SS_BLOCKSIZE
  776. ss_insertionsort(T, PA, a, a + SS_BLOCKSIZE, depth);
  777. #endif
  778. curbufsize = last - (a + SS_BLOCKSIZE);
  779. curbuf = a + SS_BLOCKSIZE;
  780. if(curbufsize <= bufsize) { curbufsize = bufsize, curbuf = buf; }
  781. for(b = a, k = SS_BLOCKSIZE, j = i; j & 1; b -= k, k <<= 1, j >>= 1) {
  782. ss_swapmerge(T, PA, b - k, b, b + k, curbuf, curbufsize, depth);
  783. }
  784. }
  785. #if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE
  786. ss_mintrosort(T, PA, a, middle, depth);
  787. #elif 1 < SS_BLOCKSIZE
  788. ss_insertionsort(T, PA, a, middle, depth);
  789. #endif
  790. for(k = SS_BLOCKSIZE; i != 0; k <<= 1, i >>= 1) {
  791. if(i & 1) {
  792. ss_swapmerge(T, PA, a - k, a, middle, buf, bufsize, depth);
  793. a -= k;
  794. }
  795. }
  796. if(limit != 0) {
  797. #if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE
  798. ss_mintrosort(T, PA, middle, last, depth);
  799. #elif 1 < SS_BLOCKSIZE
  800. ss_insertionsort(T, PA, middle, last, depth);
  801. #endif
  802. ss_inplacemerge(T, PA, first, middle, last, depth);
  803. }
  804. #endif
  805. if(lastsuffix != 0) {
  806. /* Insert last type B* suffix. */
  807. int PAi[2]; PAi[0] = PA[*(first - 1)], PAi[1] = n - 2;
  808. for(a = first, i = *(first - 1);
  809. (a < last) && ((*a < 0) || (0 < ss_compare(T, &(PAi[0]), PA + *a, depth)));
  810. ++a) {
  811. *(a - 1) = *a;
  812. }
  813. *(a - 1) = i;
  814. }
  815. }
  816. /*---------------------------------------------------------------------------*/
  817. static INLINE
  818. int
  819. tr_ilg(int n) {
  820. return (n & 0xffff0000) ?
  821. ((n & 0xff000000) ?
  822. 24 + lg_table[(n >> 24) & 0xff] :
  823. 16 + lg_table[(n >> 16) & 0xff]) :
  824. ((n & 0x0000ff00) ?
  825. 8 + lg_table[(n >> 8) & 0xff] :
  826. 0 + lg_table[(n >> 0) & 0xff]);
  827. }
  828. /*---------------------------------------------------------------------------*/
  829. /* Simple insertionsort for small size groups. */
  830. static
  831. void
  832. tr_insertionsort(const int *ISAd, int *first, int *last) {
  833. int *a, *b;
  834. int t, r;
  835. for(a = first + 1; a < last; ++a) {
  836. for(t = *a, b = a - 1; 0 > (r = ISAd[t] - ISAd[*b]);) {
  837. do { *(b + 1) = *b; } while((first <= --b) && (*b < 0));
  838. if(b < first) { break; }
  839. }
  840. if(r == 0) { *b = ~*b; }
  841. *(b + 1) = t;
  842. }
  843. }
  844. /*---------------------------------------------------------------------------*/
  845. static INLINE
  846. void
  847. tr_fixdown(const int *ISAd, int *SA, int i, int size) {
  848. int j, k;
  849. int v;
  850. int c, d, e;
  851. for(v = SA[i], c = ISAd[v]; (j = 2 * i + 1) < size; SA[i] = SA[k], i = k) {
  852. d = ISAd[SA[k = j++]];
  853. if(d < (e = ISAd[SA[j]])) { k = j; d = e; }
  854. if(d <= c) { break; }
  855. }
  856. SA[i] = v;
  857. }
  858. /* Simple top-down heapsort. */
  859. static
  860. void
  861. tr_heapsort(const int *ISAd, int *SA, int size) {
  862. int i, m;
  863. int t;
  864. m = size;
  865. if((size % 2) == 0) {
  866. m--;
  867. if(ISAd[SA[m / 2]] < ISAd[SA[m]]) { SWAP(SA[m], SA[m / 2]); }
  868. }
  869. for(i = m / 2 - 1; 0 <= i; --i) { tr_fixdown(ISAd, SA, i, m); }
  870. if((size % 2) == 0) { SWAP(SA[0], SA[m]); tr_fixdown(ISAd, SA, 0, m); }
  871. for(i = m - 1; 0 < i; --i) {
  872. t = SA[0], SA[0] = SA[i];
  873. tr_fixdown(ISAd, SA, 0, i);
  874. SA[i] = t;
  875. }
  876. }
  877. /*---------------------------------------------------------------------------*/
  878. /* Returns the median of three elements. */
  879. static INLINE
  880. int *
  881. tr_median3(const int *ISAd, int *v1, int *v2, int *v3) {
  882. int *t;
  883. if(ISAd[*v1] > ISAd[*v2]) { SWAP(v1, v2); }
  884. if(ISAd[*v2] > ISAd[*v3]) {
  885. if(ISAd[*v1] > ISAd[*v3]) { return v1; }
  886. else { return v3; }
  887. }
  888. return v2;
  889. }
  890. /* Returns the median of five elements. */
  891. static INLINE
  892. int *
  893. tr_median5(const int *ISAd,
  894. int *v1, int *v2, int *v3, int *v4, int *v5) {
  895. int *t;
  896. if(ISAd[*v2] > ISAd[*v3]) { SWAP(v2, v3); }
  897. if(ISAd[*v4] > ISAd[*v5]) { SWAP(v4, v5); }
  898. if(ISAd[*v2] > ISAd[*v4]) { SWAP(v2, v4); SWAP(v3, v5); }
  899. if(ISAd[*v1] > ISAd[*v3]) { SWAP(v1, v3); }
  900. if(ISAd[*v1] > ISAd[*v4]) { SWAP(v1, v4); SWAP(v3, v5); }
  901. if(ISAd[*v3] > ISAd[*v4]) { return v4; }
  902. return v3;
  903. }
  904. /* Returns the pivot element. */
  905. static INLINE
  906. int *
  907. tr_pivot(const int *ISAd, int *first, int *last) {
  908. int *middle;
  909. int t;
  910. t = last - first;
  911. middle = first + t / 2;
  912. if(t <= 512) {
  913. if(t <= 32) {
  914. return tr_median3(ISAd, first, middle, last - 1);
  915. } else {
  916. t >>= 2;
  917. return tr_median5(ISAd, first, first + t, middle, last - 1 - t, last - 1);
  918. }
  919. }
  920. t >>= 3;
  921. first = tr_median3(ISAd, first, first + t, first + (t << 1));
  922. middle = tr_median3(ISAd, middle - t, middle, middle + t);
  923. last = tr_median3(ISAd, last - 1 - (t << 1), last - 1 - t, last - 1);
  924. return tr_median3(ISAd, first, middle, last);
  925. }
  926. /*---------------------------------------------------------------------------*/
  927. typedef struct _trbudget_t trbudget_t;
  928. struct _trbudget_t {
  929. int chance;
  930. int remain;
  931. int incval;
  932. int count;
  933. };
  934. static INLINE
  935. void
  936. trbudget_init(trbudget_t *budget, int chance, int incval) {
  937. budget->chance = chance;
  938. budget->remain = budget->incval = incval;
  939. }
  940. static INLINE
  941. int
  942. trbudget_check(trbudget_t *budget, int size) {
  943. if(size <= budget->remain) { budget->remain -= size; return 1; }
  944. if(budget->chance == 0) { budget->count += size; return 0; }
  945. budget->remain += budget->incval - size;
  946. budget->chance -= 1;
  947. return 1;
  948. }
  949. /*---------------------------------------------------------------------------*/
  950. static INLINE
  951. void
  952. tr_partition(const int *ISAd,
  953. int *first, int *middle, int *last,
  954. int **pa, int **pb, int v) {
  955. int *a, *b, *c, *d, *e, *f;
  956. int t, s;
  957. int x = 0;
  958. for(b = middle - 1; (++b < last) && ((x = ISAd[*b]) == v);) { }
  959. if(((a = b) < last) && (x < v)) {
  960. for(; (++b < last) && ((x = ISAd[*b]) <= v);) {
  961. if(x == v) { SWAP(*b, *a); ++a; }
  962. }
  963. }
  964. for(c = last; (b < --c) && ((x = ISAd[*c]) == v);) { }
  965. if((b < (d = c)) && (x > v)) {
  966. for(; (b < --c) && ((x = ISAd[*c]) >= v);) {
  967. if(x == v) { SWAP(*c, *d); --d; }
  968. }
  969. }
  970. for(; b < c;) {
  971. SWAP(*b, *c);
  972. for(; (++b < c) && ((x = ISAd[*b]) <= v);) {
  973. if(x == v) { SWAP(*b, *a); ++a; }
  974. }
  975. for(; (b < --c) && ((x = ISAd[*c]) >= v);) {
  976. if(x == v) { SWAP(*c, *d); --d; }
  977. }
  978. }
  979. if(a <= d) {
  980. c = b - 1;
  981. if((s = a - first) > (t = b - a)) { s = t; }
  982. for(e = first, f = b - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
  983. if((s = d - c) > (t = last - d - 1)) { s = t; }
  984. for(e = b, f = last - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
  985. first += (b - a), last -= (d - c);
  986. }
  987. *pa = first, *pb = last;
  988. }
  989. static
  990. void
  991. tr_copy(int *ISA, const int *SA,
  992. int *first, int *a, int *b, int *last,
  993. int depth) {
  994. /* sort suffixes of middle partition
  995. by using sorted order of suffixes of left and right partition. */
  996. int *c, *d, *e;
  997. int s, v;
  998. v = b - SA - 1;
  999. for(c = first, d = a - 1; c <= d; ++c) {
  1000. #ifdef __clang_analyzer__
  1001. assert(c);
  1002. #endif
  1003. if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
  1004. *++d = s;
  1005. ISA[s] = d - SA;
  1006. }
  1007. }
  1008. for(c = last - 1, e = d + 1, d = b; e < d; --c) {
  1009. if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
  1010. *--d = s;
  1011. ISA[s] = d - SA;
  1012. }
  1013. }
  1014. }
  1015. static
  1016. void
  1017. tr_partialcopy(int *ISA, const int *SA,
  1018. int *first, int *a, int *b, int *last,
  1019. int depth) {
  1020. int *c, *d, *e;
  1021. int s, v;
  1022. int rank, lastrank, newrank = -1;
  1023. v = b - SA - 1;
  1024. lastrank = -1;
  1025. for(c = first, d = a - 1; c <= d; ++c) {
  1026. if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
  1027. *++d = s;
  1028. rank = ISA[s + depth];
  1029. if(lastrank != rank) { lastrank = rank; newrank = d - SA; }
  1030. ISA[s] = newrank;
  1031. }
  1032. }
  1033. lastrank = -1;
  1034. for(e = d; first <= e; --e) {
  1035. rank = ISA[*e];
  1036. if(lastrank != rank) { lastrank = rank; newrank = e - SA; }
  1037. if(newrank != rank) { ISA[*e] = newrank; }
  1038. }
  1039. lastrank = -1;
  1040. for(c = last - 1, e = d + 1, d = b; e < d; --c) {
  1041. if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
  1042. *--d = s;
  1043. rank = ISA[s + depth];
  1044. if(lastrank != rank) { lastrank = rank; newrank = d - SA; }
  1045. ISA[s] = newrank;
  1046. }
  1047. }
  1048. }
  1049. static
  1050. void
  1051. tr_introsort(int *ISA, const int *ISAd,
  1052. int *SA, int *first, int *last,
  1053. trbudget_t *budget) {
  1054. #define STACK_SIZE TR_STACKSIZE
  1055. struct { const int *a; int *b, *c; int d, e; }stack[STACK_SIZE];
  1056. int *a, *b, *c;
  1057. int t;
  1058. int v, x = 0;
  1059. int incr = ISAd - ISA;
  1060. int limit, next;
  1061. int ssize, trlink = -1;
  1062. #ifdef __clang_analyzer__
  1063. memset(stack, 0, sizeof(stack));
  1064. #endif
  1065. for(ssize = 0, limit = tr_ilg(last - first);;) {
  1066. if(limit < 0) {
  1067. if(limit == -1) {
  1068. /* tandem repeat partition */
  1069. tr_partition(ISAd - incr, first, first, last, &a, &b, last - SA - 1);
  1070. /* update ranks */
  1071. if(a < last) {
  1072. for(c = first, v = a - SA - 1; c < a; ++c) { ISA[*c] = v; }
  1073. }
  1074. if(b < last) {
  1075. for(c = a, v = b - SA - 1; c < b; ++c) { ISA[*c] = v; }
  1076. }
  1077. /* push */
  1078. if(1 < (b - a)) {
  1079. STACK_PUSH5(NULL, a, b, 0, 0);
  1080. STACK_PUSH5(ISAd - incr, first, last, -2, trlink);
  1081. trlink = ssize - 2;
  1082. }
  1083. if((a - first) <= (last - b)) {
  1084. if(1 < (a - first)) {
  1085. STACK_PUSH5(ISAd, b, last, tr_ilg(last - b), trlink);
  1086. last = a, limit = tr_ilg(a - first);
  1087. } else if(1 < (last - b)) {
  1088. first = b, limit = tr_ilg(last - b);
  1089. } else {
  1090. STACK_POP5(ISAd, first, last, limit, trlink);
  1091. }
  1092. } else {
  1093. if(1 < (last - b)) {
  1094. STACK_PUSH5(ISAd, first, a, tr_ilg(a - first), trlink);
  1095. first = b, limit = tr_ilg(last - b);
  1096. } else if(1 < (a - first)) {
  1097. last = a, limit = tr_ilg(a - first);
  1098. } else {
  1099. STACK_POP5(ISAd, first, last, limit, trlink);
  1100. }
  1101. }
  1102. } else if(limit == -2) {
  1103. /* tandem repeat copy */
  1104. a = stack[--ssize].b, b = stack[ssize].c;
  1105. if(stack[ssize].d == 0) {
  1106. tr_copy(ISA, SA, first, a, b, last, ISAd - ISA);
  1107. } else {
  1108. if(0 <= trlink) { stack[trlink].d = -1; }
  1109. tr_partialcopy(ISA, SA, first, a, b, last, ISAd - ISA);
  1110. }
  1111. STACK_POP5(ISAd, first, last, limit, trlink);
  1112. } else {
  1113. /* sorted partition */
  1114. if(0 <= *first) {
  1115. a = first;
  1116. do { ISA[*a] = a - SA; } while((++a < last) && (0 <= *a));
  1117. first = a;
  1118. }
  1119. if(first < last) {
  1120. a = first; do { *a = ~*a; } while(*++a < 0);
  1121. next = (ISA[*a] != ISAd[*a]) ? tr_ilg(a - first + 1) : -1;
  1122. if(++a < last) { for(b = first, v = a - SA - 1; b < a; ++b) { ISA[*b] = v; } }
  1123. /* push */
  1124. if(trbudget_check(budget, a - first)) {
  1125. if((a - first) <= (last - a)) {
  1126. STACK_PUSH5(ISAd, a, last, -3, trlink);
  1127. ISAd += incr, last = a, limit = next;
  1128. } else {
  1129. if(1 < (last - a)) {
  1130. STACK_PUSH5(ISAd + incr, first, a, next, trlink);
  1131. first = a, limit = -3;
  1132. } else {
  1133. ISAd += incr, last = a, limit = next;
  1134. }
  1135. }
  1136. } else {
  1137. if(0 <= trlink) { stack[trlink].d = -1; }
  1138. if(1 < (last - a)) {
  1139. first = a, limit = -3;
  1140. } else {
  1141. STACK_POP5(ISAd, first, last, limit, trlink);
  1142. }
  1143. }
  1144. } else {
  1145. STACK_POP5(ISAd, first, last, limit, trlink);
  1146. }
  1147. }
  1148. continue;
  1149. }
  1150. if((last - first) <= TR_INSERTIONSORT_THRESHOLD) {
  1151. tr_insertionsort(ISAd, first, last);
  1152. limit = -3;
  1153. continue;
  1154. }
  1155. if(limit-- == 0) {
  1156. tr_heapsort(ISAd, first, last - first);
  1157. for(a = last - 1; first < a; a = b) {
  1158. for(x = ISAd[*a], b = a - 1; (first <= b) && (ISAd[*b] == x); --b) { *b = ~*b; }
  1159. }
  1160. limit = -3;
  1161. continue;
  1162. }
  1163. /* choose pivot */
  1164. a = tr_pivot(ISAd, first, last);
  1165. SWAP(*first, *a);
  1166. v = ISAd[*first];
  1167. /* partition */
  1168. tr_partition(ISAd, first, first + 1, last, &a, &b, v);
  1169. if((last - first) != (b - a)) {
  1170. next = (ISA[*a] != v) ? tr_ilg(b - a) : -1;
  1171. /* update ranks */
  1172. for(c = first, v = a - SA - 1; c < a; ++c) { ISA[*c] = v; }
  1173. if(b < last) { for(c = a, v = b - SA - 1; c < b; ++c) { ISA[*c] = v; } }
  1174. /* push */
  1175. if((1 < (b - a)) && (trbudget_check(budget, b - a))) {
  1176. if((a - first) <= (last - b)) {
  1177. if((last - b) <= (b - a)) {
  1178. if(1 < (a - first)) {
  1179. STACK_PUSH5(ISAd + incr, a, b, next, trlink);
  1180. STACK_PUSH5(ISAd, b, last, limit, trlink);
  1181. last = a;
  1182. } else if(1 < (last - b)) {
  1183. STACK_PUSH5(ISAd + incr, a, b, next, trlink);
  1184. first = b;
  1185. } else {
  1186. ISAd += incr, first = a, last = b, limit = next;
  1187. }
  1188. } else if((a - first) <= (b - a)) {
  1189. if(1 < (a - first)) {
  1190. STACK_PUSH5(ISAd, b, last, limit, trlink);
  1191. STACK_PUSH5(ISAd + incr, a, b, next, trlink);
  1192. last = a;
  1193. } else {
  1194. STACK_PUSH5(ISAd, b, last, limit, trlink);
  1195. ISAd += incr, first = a, last = b, limit = next;
  1196. }
  1197. } else {
  1198. STACK_PUSH5(ISAd, b, last, limit, trlink);
  1199. STACK_PUSH5(ISAd, first, a, limit, trlink);
  1200. ISAd += incr, first = a, last = b, limit = next;
  1201. }
  1202. } else {
  1203. if((a - first) <= (b - a)) {
  1204. if(1 < (last - b)) {
  1205. STACK_PUSH5(ISAd + incr, a, b, next, trlink);
  1206. STACK_PUSH5(ISAd, first, a, limit, trlink);
  1207. first = b;
  1208. } else if(1 < (a - first)) {
  1209. STACK_PUSH5(ISAd + incr, a, b, next, trlink);
  1210. last = a;
  1211. } else {
  1212. ISAd += incr, first = a, last = b, limit = next;
  1213. }
  1214. } else if((last - b) <= (b - a)) {
  1215. if(1 < (last - b)) {
  1216. STACK_PUSH5(ISAd, first, a, limit, trlink);
  1217. STACK_PUSH5(ISAd + incr, a, b, next, trlink);
  1218. first = b;
  1219. } else {
  1220. STACK_PUSH5(ISAd, first, a, limit, trlink);
  1221. ISAd += incr, first = a, last = b, limit = next;
  1222. }
  1223. } else {
  1224. STACK_PUSH5(ISAd, first, a, limit, trlink);
  1225. STACK_PUSH5(ISAd, b, last, limit, trlink);
  1226. ISAd += incr, first = a, last = b, limit = next;
  1227. }
  1228. }
  1229. } else {
  1230. if((1 < (b - a)) && (0 <= trlink)) { stack[trlink].d = -1; }
  1231. if((a - first) <= (last - b)) {
  1232. if(1 < (a - first)) {
  1233. STACK_PUSH5(ISAd, b, last, limit, trlink);
  1234. last = a;
  1235. } else if(1 < (last - b)) {
  1236. first = b;
  1237. } else {
  1238. STACK_POP5(ISAd, first, last, limit, trlink);
  1239. }
  1240. } else {
  1241. if(1 < (last - b)) {
  1242. STACK_PUSH5(ISAd, first, a, limit, trlink);
  1243. first = b;
  1244. } else if(1 < (a - first)) {
  1245. last = a;
  1246. } else {
  1247. STACK_POP5(ISAd, first, last, limit, trlink);
  1248. }
  1249. }
  1250. }
  1251. } else {
  1252. if(trbudget_check(budget, last - first)) {
  1253. limit = tr_ilg(last - first), ISAd += incr;
  1254. } else {
  1255. if(0 <= trlink) { stack[trlink].d = -1; }
  1256. STACK_POP5(ISAd, first, last, limit, trlink);
  1257. }
  1258. }
  1259. }
  1260. #undef STACK_SIZE
  1261. }
  1262. /*---------------------------------------------------------------------------*/
  1263. /* Tandem repeat sort */
  1264. static
  1265. void
  1266. trsort(int *ISA, int *SA, int n, int depth) {
  1267. int *ISAd;
  1268. int *first, *last;
  1269. trbudget_t budget;
  1270. int t, skip, unsorted;
  1271. trbudget_init(&budget, tr_ilg(n) * 2 / 3, n);
  1272. /* trbudget_init(&budget, tr_ilg(n) * 3 / 4, n); */
  1273. for(ISAd = ISA + depth; -n < *SA; ISAd += ISAd - ISA) {
  1274. first = SA;
  1275. skip = 0;
  1276. unsorted = 0;
  1277. do {
  1278. if((t = *first) < 0) { first -= t; skip += t; }
  1279. else {
  1280. if(skip != 0) { *(first + skip) = skip; skip = 0; }
  1281. last = SA + ISA[t] + 1;
  1282. if(1 < (last - first)) {
  1283. budget.count = 0;
  1284. tr_introsort(ISA, ISAd, SA, first, last, &budget);
  1285. if(budget.count != 0) { unsorted += budget.count; }
  1286. else { skip = first - last; }
  1287. } else if((last - first) == 1) {
  1288. skip = -1;
  1289. }
  1290. first = last;
  1291. }
  1292. } while(first < (SA + n));
  1293. if(skip != 0) { *(first + skip) = skip; }
  1294. if(unsorted == 0) { break; }
  1295. }
  1296. }
  1297. /*---------------------------------------------------------------------------*/
  1298. /* Sorts suffixes of type B*. */
  1299. static
  1300. int
  1301. sort_typeBstar(const unsigned char *T, int *SA,
  1302. int *bucket_A, int *bucket_B,
  1303. int n, int openMP) {
  1304. int *PAb, *ISAb, *buf;
  1305. #ifdef LIBBSC_OPENMP
  1306. int *curbuf;
  1307. int l;
  1308. #endif
  1309. int i, j, k, t, m, bufsize;
  1310. int c0, c1;
  1311. #ifdef LIBBSC_OPENMP
  1312. int d0, d1;
  1313. #endif
  1314. (void)openMP;
  1315. /* Initialize bucket arrays. */
  1316. for(i = 0; i < BUCKET_A_SIZE; ++i) { bucket_A[i] = 0; }
  1317. for(i = 0; i < BUCKET_B_SIZE; ++i) { bucket_B[i] = 0; }
  1318. /* Count the number of occurrences of the first one or two characters of each
  1319. type A, B and B* suffix. Moreover, store the beginning position of all
  1320. type B* suffixes into the array SA. */
  1321. for(i = n - 1, m = n, c0 = T[n - 1]; 0 <= i;) {
  1322. /* type A suffix. */
  1323. do { ++BUCKET_A(c1 = c0); } while((0 <= --i) && ((c0 = T[i]) >= c1));
  1324. if(0 <= i) {
  1325. /* type B* suffix. */
  1326. ++BUCKET_BSTAR(c0, c1);
  1327. SA[--m] = i;
  1328. /* type B suffix. */
  1329. for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) <= c1); --i, c1 = c0) {
  1330. ++BUCKET_B(c0, c1);
  1331. }
  1332. }
  1333. }
  1334. m = n - m;
  1335. /*
  1336. note:
  1337. A type B* suffix is lexicographically smaller than a type B suffix that
  1338. begins with the same first two characters.
  1339. */
  1340. /* Calculate the index of start/end point of each bucket. */
  1341. for(c0 = 0, i = 0, j = 0; c0 < ALPHABET_SIZE; ++c0) {
  1342. t = i + BUCKET_A(c0);
  1343. BUCKET_A(c0) = i + j; /* start point */
  1344. i = t + BUCKET_B(c0, c0);
  1345. for(c1 = c0 + 1; c1 < ALPHABET_SIZE; ++c1) {
  1346. j += BUCKET_BSTAR(c0, c1);
  1347. BUCKET_BSTAR(c0, c1) = j; /* end point */
  1348. i += BUCKET_B(c0, c1);
  1349. }
  1350. }
  1351. if(0 < m) {
  1352. /* Sort the type B* suffixes by their first two characters. */
  1353. PAb = SA + n - m; ISAb = SA + m;
  1354. for(i = m - 2; 0 <= i; --i) {
  1355. t = PAb[i], c0 = T[t], c1 = T[t + 1];
  1356. SA[--BUCKET_BSTAR(c0, c1)] = i;
  1357. }
  1358. t = PAb[m - 1], c0 = T[t], c1 = T[t + 1];
  1359. SA[--BUCKET_BSTAR(c0, c1)] = m - 1;
  1360. /* Sort the type B* substrings using sssort. */
  1361. #ifdef LIBBSC_OPENMP
  1362. if (openMP)
  1363. {
  1364. buf = SA + m;
  1365. c0 = ALPHABET_SIZE - 2, c1 = ALPHABET_SIZE - 1, j = m;
  1366. #pragma omp parallel default(shared) private(bufsize, curbuf, k, l, d0, d1)
  1367. {
  1368. bufsize = (n - (2 * m)) / omp_get_num_threads();
  1369. curbuf = buf + omp_get_thread_num() * bufsize;
  1370. k = 0;
  1371. for(;;) {
  1372. #pragma omp critical(sssort_lock)
  1373. {
  1374. if(0 < (l = j)) {
  1375. d0 = c0, d1 = c1;
  1376. do {
  1377. k = BUCKET_BSTAR(d0, d1);
  1378. if(--d1 <= d0) {
  1379. d1 = ALPHABET_SIZE - 1;
  1380. if(--d0 < 0) { break; }
  1381. }
  1382. } while(((l - k) <= 1) && (0 < (l = k)));
  1383. c0 = d0, c1 = d1, j = k;
  1384. }
  1385. }
  1386. if(l == 0) { break; }
  1387. sssort(T, PAb, SA + k, SA + l,
  1388. curbuf, bufsize, 2, n, *(SA + k) == (m - 1));
  1389. }
  1390. }
  1391. }
  1392. else
  1393. {
  1394. buf = SA + m, bufsize = n - (2 * m);
  1395. for(c0 = ALPHABET_SIZE - 2, j = m; 0 < j; --c0) {
  1396. for(c1 = ALPHABET_SIZE - 1; c0 < c1; j = i, --c1) {
  1397. i = BUCKET_BSTAR(c0, c1);
  1398. if(1 < (j - i)) {
  1399. sssort(T, PAb, SA + i, SA + j,
  1400. buf, bufsize, 2, n, *(SA + i) == (m - 1));
  1401. }
  1402. }
  1403. }
  1404. }
  1405. #else
  1406. buf = SA + m, bufsize = n - (2 * m);
  1407. for(c0 = ALPHABET_SIZE - 2, j = m; 0 < j; --c0) {
  1408. for(c1 = ALPHABET_SIZE - 1; c0 < c1; j = i, --c1) {
  1409. i = BUCKET_BSTAR(c0, c1);
  1410. if(1 < (j - i)) {
  1411. sssort(T, PAb, SA + i, SA + j,
  1412. buf, bufsize, 2, n, *(SA + i) == (m - 1));
  1413. }
  1414. }
  1415. }
  1416. #endif
  1417. /* Compute ranks of type B* substrings. */
  1418. for(i = m - 1; 0 <= i; --i) {
  1419. if(0 <= SA[i]) {
  1420. j = i;
  1421. do { ISAb[SA[i]] = i; } while((0 <= --i) && (0 <= SA[i]));
  1422. SA[i + 1] = i - j;
  1423. if(i <= 0) { break; }
  1424. }
  1425. j = i;
  1426. do { ISAb[SA[i] = ~SA[i]] = j; } while(SA[--i] < 0);
  1427. ISAb[SA[i]] = j;
  1428. }
  1429. /* Construct the inverse suffix array of type B* suffixes using trsort. */
  1430. trsort(ISAb, SA, m, 1);
  1431. /* Set the sorted order of type B* suffixes. */
  1432. for(i = n - 1, j = m, c0 = T[n - 1]; 0 <= i;) {
  1433. for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) >= c1); --i, c1 = c0) { }
  1434. if(0 <= i) {
  1435. t = i;
  1436. for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) <= c1); --i, c1 = c0) { }
  1437. SA[ISAb[--j]] = ((t == 0) || (1 < (t - i))) ? t : ~t;
  1438. }
  1439. }
  1440. /* Calculate the index of start/end point of each bucket. */
  1441. BUCKET_B(ALPHABET_SIZE - 1, ALPHABET_SIZE - 1) = n; /* end point */
  1442. for(c0 = ALPHABET_SIZE - 2, k = m - 1; 0 <= c0; --c0) {
  1443. i = BUCKET_A(c0 + 1) - 1;
  1444. for(c1 = ALPHABET_SIZE - 1; c0 < c1; --c1) {
  1445. t = i - BUCKET_B(c0, c1);
  1446. BUCKET_B(c0, c1) = i; /* end point */
  1447. /* Move all type B* suffixes to the correct position. */
  1448. for(i = t, j = BUCKET_BSTAR(c0, c1);
  1449. j <= k;
  1450. --i, --k) { SA[i] = SA[k]; }
  1451. }
  1452. BUCKET_BSTAR(c0, c0 + 1) = i - BUCKET_B(c0, c0) + 1; /* start point */
  1453. BUCKET_B(c0, c0) = i; /* end point */
  1454. }
  1455. }
  1456. return m;
  1457. }
  1458. /* Constructs the suffix array by using the sorted order of type B* suffixes. */
  1459. static
  1460. void
  1461. construct_SA(const unsigned char *T, int *SA,
  1462. int *bucket_A, int *bucket_B,
  1463. int n, int m) {
  1464. int *i, *j, *k;
  1465. int s;
  1466. int c0, c1, c2;
  1467. if(0 < m) {
  1468. /* Construct the sorted order of type B suffixes by using
  1469. the sorted order of type B* suffixes. */
  1470. for(c1 = ALPHABET_SIZE - 2; 0 <= c1; --c1) {
  1471. /* Scan the suffix array from right to left. */
  1472. for(i = SA + BUCKET_BSTAR(c1, c1 + 1),
  1473. j = SA + BUCKET_A(c1 + 1) - 1, k = NULL, c2 = -1;
  1474. i <= j;
  1475. --j) {
  1476. if(0 < (s = *j)) {
  1477. assert(T[s] == c1);
  1478. assert(((s + 1) < n) && (T[s] <= T[s + 1]));
  1479. assert(T[s - 1] <= T[s]);
  1480. *j = ~s;
  1481. c0 = T[--s];
  1482. if((0 < s) && (T[s - 1] > c0)) { s = ~s; }
  1483. if(c0 != c2) {
  1484. if(0 <= c2) { BUCKET_B(c2, c1) = k - SA; }
  1485. k = SA + BUCKET_B(c2 = c0, c1);
  1486. }
  1487. assert(k < j); assert(k != NULL);
  1488. *k-- = s;
  1489. } else {
  1490. assert(((s == 0) && (T[s] == c1)) || (s < 0));
  1491. *j = ~s;
  1492. }
  1493. }
  1494. }
  1495. }
  1496. /* Construct the suffix array by using
  1497. the sorted order of type B suffixes. */
  1498. k = SA + BUCKET_A(c2 = T[n - 1]);
  1499. *k++ = (T[n - 2] < c2) ? ~(n - 1) : (n - 1);
  1500. /* Scan the suffix array from left to right. */
  1501. for(i = SA, j = SA + n; i < j; ++i) {
  1502. if(0 < (s = *i)) {
  1503. assert(T[s - 1] >= T[s]);
  1504. c0 = T[--s];
  1505. if((s == 0) || (T[s - 1] < c0)) { s = ~s; }
  1506. if(c0 != c2) {
  1507. BUCKET_A(c2) = k - SA;
  1508. k = SA + BUCKET_A(c2 = c0);
  1509. }
  1510. assert(i < k);
  1511. *k++ = s;
  1512. } else {
  1513. assert(s < 0);
  1514. *i = ~s;
  1515. }
  1516. }
  1517. }
  1518. /* Constructs the burrows-wheeler transformed string directly
  1519. by using the sorted order of type B* suffixes. */
  1520. static
  1521. int
  1522. construct_BWT(const unsigned char *T, int *SA,
  1523. int *bucket_A, int *bucket_B,
  1524. int n, int m) {
  1525. int *i, *j, *k, *orig;
  1526. int s;
  1527. int c0, c1, c2;
  1528. if(0 < m) {
  1529. /* Construct the sorted order of type B suffixes by using
  1530. the sorted order of type B* suffixes. */
  1531. for(c1 = ALPHABET_SIZE - 2; 0 <= c1; --c1) {
  1532. /* Scan the suffix array from right to left. */
  1533. for(i = SA + BUCKET_BSTAR(c1, c1 + 1),
  1534. j = SA + BUCKET_A(c1 + 1) - 1, k = NULL, c2 = -1;
  1535. i <= j;
  1536. --j) {
  1537. if(0 < (s = *j)) {
  1538. assert(T[s] == c1);
  1539. assert(((s + 1) < n) && (T[s] <= T[s + 1]));
  1540. assert(T[s - 1] <= T[s]);
  1541. c0 = T[--s];
  1542. *j = ~((int)c0);
  1543. if((0 < s) && (T[s - 1] > c0)) { s = ~s; }
  1544. if(c0 != c2) {
  1545. if(0 <= c2) { BUCKET_B(c2, c1) = k - SA; }
  1546. k = SA + BUCKET_B(c2 = c0, c1);
  1547. }
  1548. assert(k < j); assert(k != NULL);
  1549. *k-- = s;
  1550. } else if(s != 0) {
  1551. *j = ~s;
  1552. #ifndef NDEBUG
  1553. } else {
  1554. assert(T[s] == c1);
  1555. #endif
  1556. }
  1557. }
  1558. }
  1559. }
  1560. /* Construct the BWTed string by using
  1561. the sorted order of type B suffixes. */
  1562. k = SA + BUCKET_A(c2 = T[n - 1]);
  1563. *k++ = (T[n - 2] < c2) ? ~((int)T[n - 2]) : (n - 1);
  1564. /* Scan the suffix array from left to right. */
  1565. for(i = SA, j = SA + n, orig = SA; i < j; ++i) {
  1566. if(0 < (s = *i)) {
  1567. assert(T[s - 1] >= T[s]);
  1568. c0 = T[--s];
  1569. *i = c0;
  1570. if((0 < s) && (T[s - 1] < c0)) { s = ~((int)T[s - 1]); }
  1571. if(c0 != c2) {
  1572. BUCKET_A(c2) = k - SA;
  1573. k = SA + BUCKET_A(c2 = c0);
  1574. }
  1575. assert(i < k);
  1576. *k++ = s;
  1577. } else if(s != 0) {
  1578. *i = ~s;
  1579. } else {
  1580. orig = i;
  1581. }
  1582. }
  1583. return orig - SA;
  1584. }
  1585. /* Constructs the burrows-wheeler transformed string directly
  1586. by using the sorted order of type B* suffixes. */
  1587. static
  1588. int
  1589. construct_BWT_indexes(const unsigned char *T, int *SA,
  1590. int *bucket_A, int *bucket_B,
  1591. int n, int m,
  1592. unsigned char * num_indexes, int * indexes) {
  1593. int *i, *j, *k, *orig;
  1594. int s;
  1595. int c0, c1, c2;
  1596. int mod = n / 8;
  1597. {
  1598. mod |= mod >> 1; mod |= mod >> 2;
  1599. mod |= mod >> 4; mod |= mod >> 8;
  1600. mod |= mod >> 16; mod >>= 1;
  1601. *num_indexes = (unsigned char)((n - 1) / (mod + 1));
  1602. }
  1603. if(0 < m) {
  1604. /* Construct the sorted order of type B suffixes by using
  1605. the sorted order of type B* suffixes. */
  1606. for(c1 = ALPHABET_SIZE - 2; 0 <= c1; --c1) {
  1607. /* Scan the suffix array from right to left. */
  1608. for(i = SA + BUCKET_BSTAR(c1, c1 + 1),
  1609. j = SA + BUCKET_A(c1 + 1) - 1, k = NULL, c2 = -1;
  1610. i <= j;
  1611. --j) {
  1612. if(0 < (s = *j)) {
  1613. assert(T[s] == c1);
  1614. assert(((s + 1) < n) && (T[s] <= T[s + 1]));
  1615. assert(T[s - 1] <= T[s]);
  1616. if ((s & mod) == 0) indexes[s / (mod + 1) - 1] = j - SA;
  1617. c0 = T[--s];
  1618. *j = ~((int)c0);
  1619. if((0 < s) && (T[s - 1] > c0)) { s = ~s; }
  1620. if(c0 != c2) {
  1621. if(0 <= c2) { BUCKET_B(c2, c1) = k - SA; }
  1622. k = SA + BUCKET_B(c2 = c0, c1);
  1623. }
  1624. assert(k < j); assert(k != NULL);
  1625. *k-- = s;
  1626. } else if(s != 0) {
  1627. *j = ~s;
  1628. #ifndef NDEBUG
  1629. } else {
  1630. assert(T[s] == c1);
  1631. #endif
  1632. }
  1633. }
  1634. }
  1635. }
  1636. /* Construct the BWTed string by using
  1637. the sorted order of type B suffixes. */
  1638. k = SA + BUCKET_A(c2 = T[n - 1]);
  1639. if (T[n - 2] < c2) {
  1640. if (((n - 1) & mod) == 0) indexes[(n - 1) / (mod + 1) - 1] = k - SA;
  1641. *k++ = ~((int)T[n - 2]);
  1642. }
  1643. else {
  1644. *k++ = n - 1;
  1645. }
  1646. /* Scan the suffix array from left to right. */
  1647. for(i = SA, j = SA + n, orig = SA; i < j; ++i) {
  1648. if(0 < (s = *i)) {
  1649. assert(T[s - 1] >= T[s]);
  1650. if ((s & mod) == 0) indexes[s / (mod + 1) - 1] = i - SA;
  1651. c0 = T[--s];
  1652. *i = c0;
  1653. if(c0 != c2) {
  1654. BUCKET_A(c2) = k - SA;
  1655. k = SA + BUCKET_A(c2 = c0);
  1656. }
  1657. assert(i < k);
  1658. if((0 < s) && (T[s - 1] < c0)) {
  1659. if ((s & mod) == 0) indexes[s / (mod + 1) - 1] = k - SA;
  1660. *k++ = ~((int)T[s - 1]);
  1661. } else
  1662. *k++ = s;
  1663. } else if(s != 0) {
  1664. *i = ~s;
  1665. } else {
  1666. orig = i;
  1667. }
  1668. }
  1669. return orig - SA;
  1670. }
  1671. /*---------------------------------------------------------------------------*/
  1672. /*- Function -*/
  1673. int
  1674. divsufsort(const unsigned char *T, int *SA, int n, int openMP) {
  1675. int *bucket_A, *bucket_B;
  1676. int m;
  1677. int err = 0;
  1678. /* Check arguments. */
  1679. if((T == NULL) || (SA == NULL) || (n < 0)) { return -1; }
  1680. else if(n == 0) { return 0; }
  1681. else if(n == 1) { SA[0] = 0; return 0; }
  1682. else if(n == 2) { m = (T[0] < T[1]); SA[m ^ 1] = 0, SA[m] = 1; return 0; }
  1683. bucket_A = (int *)malloc(BUCKET_A_SIZE * sizeof(int));
  1684. bucket_B = (int *)malloc(BUCKET_B_SIZE * sizeof(int));
  1685. /* Suffixsort. */
  1686. if((bucket_A != NULL) && (bucket_B != NULL)) {
  1687. m = sort_typeBstar(T, SA, bucket_A, bucket_B, n, openMP);
  1688. construct_SA(T, SA, bucket_A, bucket_B, n, m);
  1689. } else {
  1690. err = -2;
  1691. }
  1692. free(bucket_B);
  1693. free(bucket_A);
  1694. return err;
  1695. }
  1696. int
  1697. divbwt(const unsigned char *T, unsigned char *U, int *A, int n, unsigned char * num_indexes, int * indexes, int openMP) {
  1698. int *B;
  1699. int *bucket_A, *bucket_B;
  1700. int m, pidx, i;
  1701. /* Check arguments. */
  1702. if((T == NULL) || (U == NULL) || (n < 0)) { return -1; }
  1703. else if(n <= 1) { if(n == 1) { U[0] = T[0]; } return n; }
  1704. if((B = A) == NULL) { B = (int *)malloc((size_t)(n + 1) * sizeof(int)); }
  1705. bucket_A = (int *)malloc(BUCKET_A_SIZE * sizeof(int));
  1706. bucket_B = (int *)malloc(BUCKET_B_SIZE * sizeof(int));
  1707. /* Burrows-Wheeler Transform. */
  1708. if((B != NULL) && (bucket_A != NULL) && (bucket_B != NULL)) {
  1709. m = sort_typeBstar(T, B, bucket_A, bucket_B, n, openMP);
  1710. if (num_indexes == NULL || indexes == NULL) {
  1711. pidx = construct_BWT(T, B, bucket_A, bucket_B, n, m);
  1712. } else {
  1713. pidx = construct_BWT_indexes(T, B, bucket_A, bucket_B, n, m, num_indexes, indexes);
  1714. }
  1715. /* Copy to output string. */
  1716. U[0] = T[n - 1];
  1717. for(i = 0; i < pidx; ++i) { U[i + 1] = (unsigned char)B[i]; }
  1718. for(i += 1; i < n; ++i) { U[i] = (unsigned char)B[i]; }
  1719. pidx += 1;
  1720. } else {
  1721. pidx = -2;
  1722. }
  1723. free(bucket_B);
  1724. free(bucket_A);
  1725. if(A == NULL) { free(B); }
  1726. return pidx;
  1727. }