utf8compare.c 70 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235
  1. /** --- BEGIN COPYRIGHT BLOCK ---
  2. * Copyright (C) 2001 Sun Microsystems, Inc. Used by permission.
  3. * Copyright (C) 2005 Red Hat, Inc.
  4. * All rights reserved.
  5. --- END COPYRIGHT BLOCK --- */
  6. #include <stdio.h>
  7. #include <string.h>
  8. #include <ctype.h>
  9. #include "ldap.h"
  10. #include "dsgw.h"
  11. typedef struct sUpperLowerTbl {
  12. char *upper, *lower;
  13. int tsz; /* target size */
  14. } UpperLowerTbl_t;
  15. /*
  16. * dsgw_has8thBit: check the input string
  17. * return 1 if the string contains 8-bit character
  18. * return 0 otherwise
  19. */
  20. int
  21. dsgw_has8thBit(unsigned char *s)
  22. {
  23. unsigned char *p, *tail;
  24. tail = s + strlen((char *)s);
  25. for (p = s; p < tail; p++) {
  26. if (0x80 & *p) {
  27. return 1;
  28. }
  29. }
  30. return 0;
  31. }
  32. /*
  33. * UpperToLower Tables: sorted by upper characters
  34. */
  35. UpperLowerTbl_t Upper2LowerTbl20[] = {
  36. /* upper, lower */
  37. {"\303\200", "\303\240", 2},
  38. {"\303\201", "\303\241", 2},
  39. {"\303\202", "\303\242", 2},
  40. {"\303\203", "\303\243", 2},
  41. {"\303\204", "\303\244", 2},
  42. {"\303\205", "\303\245", 2},
  43. {"\303\206", "\303\246", 2},
  44. {"\303\207", "\303\247", 2},
  45. {"\303\210", "\303\250", 2},
  46. {"\303\211", "\303\251", 2},
  47. {"\303\212", "\303\252", 2},
  48. {"\303\213", "\303\253", 2},
  49. {"\303\214", "\303\254", 2},
  50. {"\303\215", "\303\255", 2},
  51. {"\303\216", "\303\256", 2},
  52. {"\303\217", "\303\257", 2},
  53. {"\303\220", "\303\260", 2},
  54. {"\303\221", "\303\261", 2},
  55. {"\303\222", "\303\262", 2},
  56. {"\303\223", "\303\263", 2},
  57. {"\303\224", "\303\264", 2},
  58. {"\303\225", "\303\265", 2},
  59. {"\303\226", "\303\266", 2},
  60. {"\303\230", "\303\270", 2},
  61. {"\303\231", "\303\271", 2},
  62. {"\303\232", "\303\272", 2},
  63. {"\303\233", "\303\273", 2},
  64. {"\303\234", "\303\274", 2},
  65. {"\303\235", "\303\275", 2},
  66. {"\303\236", "\303\276", 2},
  67. {NULL, NULL, 0}
  68. };
  69. UpperLowerTbl_t Upper2LowerTbl21[] = {
  70. {"\304\200", "\304\201", 2},
  71. {"\304\202", "\304\203", 2},
  72. {"\304\204", "\304\205", 2},
  73. {"\304\206", "\304\207", 2},
  74. {"\304\210", "\304\211", 2},
  75. {"\304\212", "\304\213", 2},
  76. {"\304\214", "\304\215", 2},
  77. {"\304\216", "\304\217", 2},
  78. {"\304\220", "\304\221", 2},
  79. {"\304\222", "\304\223", 2},
  80. {"\304\224", "\304\225", 2},
  81. {"\304\226", "\304\227", 2},
  82. {"\304\230", "\304\231", 2},
  83. {"\304\232", "\304\233", 2},
  84. {"\304\234", "\304\235", 2},
  85. {"\304\236", "\304\237", 2},
  86. {"\304\240", "\304\241", 2},
  87. {"\304\242", "\304\243", 2},
  88. {"\304\244", "\304\245", 2},
  89. {"\304\246", "\304\247", 2},
  90. {"\304\250", "\304\251", 2},
  91. {"\304\252", "\304\253", 2},
  92. {"\304\254", "\304\255", 2},
  93. {"\304\256", "\304\257", 2},
  94. {"\304\260", "\151", 1},
  95. {"\304\262", "\304\263", 2},
  96. {"\304\264", "\304\265", 2},
  97. {"\304\266", "\304\267", 2},
  98. {"\304\271", "\304\272", 2},
  99. {"\304\273", "\304\274", 2},
  100. {"\304\275", "\304\276", 2},
  101. {"\304\277", "\305\200", 2},
  102. {NULL, NULL, 0}
  103. };
  104. UpperLowerTbl_t Upper2LowerTbl22[] = {
  105. {"\305\201", "\305\202", 2},
  106. {"\305\203", "\305\204", 2},
  107. {"\305\205", "\305\206", 2},
  108. {"\305\207", "\305\210", 2},
  109. {"\305\212", "\305\213", 2},
  110. {"\305\214", "\305\215", 2},
  111. {"\305\216", "\305\217", 2},
  112. {"\305\220", "\305\221", 2},
  113. {"\305\222", "\305\223", 2},
  114. {"\305\224", "\305\225", 2},
  115. {"\305\226", "\305\227", 2},
  116. {"\305\230", "\305\231", 2},
  117. {"\305\232", "\305\233", 2},
  118. {"\305\234", "\305\235", 2},
  119. {"\305\236", "\305\237", 2},
  120. {"\305\240", "\305\241", 2},
  121. {"\305\242", "\305\243", 2},
  122. {"\305\244", "\305\245", 2},
  123. {"\305\246", "\305\247", 2},
  124. {"\305\250", "\305\251", 2},
  125. {"\305\252", "\305\253", 2},
  126. {"\305\254", "\305\255", 2},
  127. {"\305\256", "\305\257", 2},
  128. {"\305\260", "\305\261", 2},
  129. {"\305\262", "\305\263", 2},
  130. {"\305\264", "\305\265", 2},
  131. {"\305\266", "\305\267", 2},
  132. {"\305\270", "\303\277", 2},
  133. {"\305\271", "\305\272", 2},
  134. {"\305\273", "\305\274", 2},
  135. {"\305\275", "\305\276", 2},
  136. {NULL, NULL, 0}
  137. };
  138. UpperLowerTbl_t Upper2LowerTbl23[] = {
  139. {"\306\201", "\311\223", 2},
  140. {"\306\202", "\306\203", 2},
  141. {"\306\204", "\306\205", 2},
  142. {"\306\206", "\311\224", 2},
  143. {"\306\207", "\306\210", 2},
  144. {"\306\211", "\311\226", 2},
  145. {"\306\212", "\311\227", 2},
  146. {"\306\213", "\306\214", 2},
  147. {"\306\216", "\311\230", 2},
  148. {"\306\217", "\311\231", 2},
  149. {"\306\220", "\311\233", 2},
  150. {"\306\221", "\306\222", 2},
  151. {"\306\223", "\311\240", 2},
  152. {"\306\224", "\311\243", 2},
  153. {"\306\226", "\311\251", 2},
  154. {"\306\227", "\311\250", 2},
  155. {"\306\230", "\306\231", 2},
  156. {"\306\234", "\311\257", 2},
  157. {"\306\235", "\311\262", 2},
  158. {"\306\237", "\306\237", 2},
  159. {"\306\240", "\306\241", 2},
  160. {"\306\242", "\306\243", 2},
  161. {"\306\244", "\306\245", 2},
  162. {"\306\246", "\306\246", 2},
  163. {"\306\247", "\306\250", 2},
  164. {"\306\251", "\312\203", 2},
  165. {"\306\254", "\306\255", 2},
  166. {"\306\256", "\312\210", 2},
  167. {"\306\257", "\306\260", 2},
  168. {"\306\261", "\312\212", 2},
  169. {"\306\262", "\312\213", 2},
  170. {"\306\263", "\306\264", 2},
  171. {"\306\265", "\306\266", 2},
  172. {"\306\267", "\312\222", 2},
  173. {"\306\270", "\306\271", 2},
  174. {"\306\274", "\306\275", 2},
  175. {NULL, NULL, 0}
  176. };
  177. UpperLowerTbl_t Upper2LowerTbl24[] = {
  178. {"\307\204", "\307\205", 2},
  179. {"\307\205", "\307\204", 2},
  180. {"\307\207", "\307\210", 2},
  181. {"\307\210", "\307\207", 2},
  182. {"\307\212", "\307\213", 2},
  183. {"\307\213", "\307\212", 2},
  184. {"\307\215", "\307\216", 2},
  185. {"\307\217", "\307\220", 2},
  186. {"\307\221", "\307\222", 2},
  187. {"\307\223", "\307\224", 2},
  188. {"\307\225", "\307\226", 2},
  189. {"\307\227", "\307\230", 2},
  190. {"\307\231", "\307\232", 2},
  191. {"\307\233", "\307\234", 2},
  192. {"\307\236", "\307\237", 2},
  193. {"\307\240", "\307\241", 2},
  194. {"\307\242", "\307\243", 2},
  195. {"\307\244", "\307\245", 2},
  196. {"\307\246", "\307\247", 2},
  197. {"\307\250", "\307\251", 2},
  198. {"\307\252", "\307\253", 2},
  199. {"\307\254", "\307\255", 2},
  200. {"\307\256", "\307\257", 2},
  201. {"\307\261", "\307\262", 2},
  202. {"\307\262", "\307\261", 2},
  203. {"\307\264", "\307\265", 2},
  204. {"\307\272", "\307\273", 2},
  205. {"\307\274", "\307\275", 2},
  206. {"\307\276", "\307\277", 2},
  207. {NULL, NULL, 0}
  208. };
  209. UpperLowerTbl_t Upper2LowerTbl25[] = {
  210. {"\310\200", "\310\201", 2},
  211. {"\310\202", "\310\203", 2},
  212. {"\310\204", "\310\205", 2},
  213. {"\310\206", "\310\207", 2},
  214. {"\310\210", "\310\211", 2},
  215. {"\310\212", "\310\213", 2},
  216. {"\310\214", "\310\215", 2},
  217. {"\310\216", "\310\217", 2},
  218. {"\310\220", "\310\221", 2},
  219. {"\310\222", "\310\223", 2},
  220. {"\310\224", "\310\225", 2},
  221. {"\310\226", "\310\227", 2},
  222. {NULL, NULL, 0}
  223. };
  224. UpperLowerTbl_t Upper2LowerTbl26[] = {
  225. {"\316\206", "\316\254", 2},
  226. {"\316\210", "\316\255", 2},
  227. {"\316\211", "\316\256", 2},
  228. {"\316\212", "\316\257", 2},
  229. {"\316\214", "\317\214", 2},
  230. {"\316\216", "\317\215", 2},
  231. {"\316\217", "\317\216", 2},
  232. {"\316\221", "\316\261", 2},
  233. {"\316\222", "\316\262", 2},
  234. {"\316\223", "\316\263", 2},
  235. {"\316\224", "\316\264", 2},
  236. {"\316\225", "\316\265", 2},
  237. {"\316\226", "\316\266", 2},
  238. {"\316\227", "\316\267", 2},
  239. {"\316\230", "\316\270", 2},
  240. {"\316\231", "\316\271", 2},
  241. {"\316\232", "\316\272", 2},
  242. {"\316\233", "\316\273", 2},
  243. {"\316\234", "\316\274", 2},
  244. {"\316\235", "\316\275", 2},
  245. {"\316\236", "\316\276", 2},
  246. {"\316\237", "\316\277", 2},
  247. {"\316\240", "\317\200", 2},
  248. {"\316\241", "\317\201", 2},
  249. {"\316\243", "\317\203", 2},
  250. {"\316\244", "\317\204", 2},
  251. {"\316\245", "\317\205", 2},
  252. {"\316\246", "\317\206", 2},
  253. {"\316\247", "\317\207", 2},
  254. {"\316\250", "\317\210", 2},
  255. {"\316\251", "\317\211", 2},
  256. {"\316\252", "\317\212", 2},
  257. {"\316\253", "\317\213", 2},
  258. {NULL, NULL, 0}
  259. };
  260. UpperLowerTbl_t Upper2LowerTbl27[] = {
  261. {"\317\222", "\317\222", 2},
  262. {"\317\223", "\317\223", 2},
  263. {"\317\224", "\317\224", 2},
  264. {"\317\232", "\317\232", 2},
  265. {"\317\234", "\317\234", 2},
  266. {"\317\236", "\317\236", 2},
  267. {"\317\240", "\317\240", 2},
  268. {"\317\242", "\317\243", 2},
  269. {"\317\244", "\317\245", 2},
  270. {"\317\246", "\317\247", 2},
  271. {"\317\250", "\317\251", 2},
  272. {"\317\252", "\317\253", 2},
  273. {"\317\254", "\317\255", 2},
  274. {"\317\256", "\317\257", 2},
  275. {NULL, NULL, 0}
  276. };
  277. UpperLowerTbl_t Upper2LowerTbl28[] = {
  278. {"\320\201", "\321\221", 2},
  279. {"\320\202", "\321\222", 2},
  280. {"\320\203", "\321\223", 2},
  281. {"\320\204", "\321\224", 2},
  282. {"\320\205", "\321\225", 2},
  283. {"\320\206", "\321\226", 2},
  284. {"\320\207", "\321\227", 2},
  285. {"\320\210", "\321\230", 2},
  286. {"\320\211", "\321\231", 2},
  287. {"\320\212", "\321\232", 2},
  288. {"\320\213", "\321\233", 2},
  289. {"\320\214", "\321\234", 2},
  290. {"\320\216", "\321\236", 2},
  291. {"\320\217", "\321\237", 2},
  292. {"\320\220", "\320\260", 2},
  293. {"\320\221", "\320\261", 2},
  294. {"\320\222", "\320\262", 2},
  295. {"\320\223", "\320\263", 2},
  296. {"\320\224", "\320\264", 2},
  297. {"\320\225", "\320\265", 2},
  298. {"\320\226", "\320\266", 2},
  299. {"\320\227", "\320\267", 2},
  300. {"\320\230", "\320\270", 2},
  301. {"\320\231", "\320\271", 2},
  302. {"\320\232", "\320\272", 2},
  303. {"\320\233", "\320\273", 2},
  304. {"\320\234", "\320\274", 2},
  305. {"\320\235", "\320\275", 2},
  306. {"\320\236", "\320\276", 2},
  307. {"\320\237", "\320\277", 2},
  308. {"\320\240", "\321\200", 2},
  309. {"\320\241", "\321\201", 2},
  310. {"\320\242", "\321\202", 2},
  311. {"\320\243", "\321\203", 2},
  312. {"\320\244", "\321\204", 2},
  313. {"\320\245", "\321\205", 2},
  314. {"\320\246", "\321\206", 2},
  315. {"\320\247", "\321\207", 2},
  316. {"\320\250", "\321\210", 2},
  317. {"\320\251", "\321\211", 2},
  318. {"\320\252", "\321\212", 2},
  319. {"\320\253", "\321\213", 2},
  320. {"\320\254", "\321\214", 2},
  321. {"\320\255", "\321\215", 2},
  322. {"\320\256", "\321\216", 2},
  323. {"\320\257", "\321\217", 2},
  324. {NULL, NULL, 0}
  325. };
  326. UpperLowerTbl_t Upper2LowerTbl29[] = {
  327. {"\321\240", "\321\241", 2},
  328. {"\321\242", "\321\243", 2},
  329. {"\321\244", "\321\245", 2},
  330. {"\321\246", "\321\247", 2},
  331. {"\321\250", "\321\251", 2},
  332. {"\321\252", "\321\253", 2},
  333. {"\321\254", "\321\255", 2},
  334. {"\321\256", "\321\257", 2},
  335. {"\321\260", "\321\261", 2},
  336. {"\321\262", "\321\263", 2},
  337. {"\321\264", "\321\265", 2},
  338. {"\321\266", "\321\267", 2},
  339. {"\321\270", "\321\271", 2},
  340. {"\321\272", "\321\273", 2},
  341. {"\321\274", "\321\275", 2},
  342. {"\321\276", "\321\277", 2},
  343. {NULL, NULL, 0}
  344. };
  345. UpperLowerTbl_t Upper2LowerTbl2a[] = {
  346. {"\322\200", "\322\201", 2},
  347. {"\322\220", "\322\221", 2},
  348. {"\322\222", "\322\223", 2},
  349. {"\322\224", "\322\225", 2},
  350. {"\322\226", "\322\227", 2},
  351. {"\322\230", "\322\231", 2},
  352. {"\322\232", "\322\233", 2},
  353. {"\322\234", "\322\235", 2},
  354. {"\322\236", "\322\237", 2},
  355. {"\322\240", "\322\241", 2},
  356. {"\322\242", "\322\243", 2},
  357. {"\322\244", "\322\245", 2},
  358. {"\322\246", "\322\247", 2},
  359. {"\322\250", "\322\251", 2},
  360. {"\322\252", "\322\253", 2},
  361. {"\322\254", "\322\255", 2},
  362. {"\322\256", "\322\257", 2},
  363. {"\322\260", "\322\261", 2},
  364. {"\322\262", "\322\263", 2},
  365. {"\322\264", "\322\265", 2},
  366. {"\322\266", "\322\267", 2},
  367. {"\322\270", "\322\271", 2},
  368. {"\322\272", "\322\273", 2},
  369. {"\322\274", "\322\275", 2},
  370. {"\322\276", "\322\277", 2},
  371. {NULL, NULL, 0}
  372. };
  373. UpperLowerTbl_t Upper2LowerTbl2b[] = {
  374. {"\323\201", "\323\202", 2},
  375. {"\323\203", "\323\204", 2},
  376. {"\323\207", "\323\210", 2},
  377. {"\323\213", "\323\214", 2},
  378. {"\323\220", "\323\221", 2},
  379. {"\323\222", "\323\223", 2},
  380. {"\323\224", "\323\225", 2},
  381. {"\323\226", "\323\227", 2},
  382. {"\323\230", "\323\231", 2},
  383. {"\323\232", "\323\233", 2},
  384. {"\323\234", "\323\235", 2},
  385. {"\323\236", "\323\237", 2},
  386. {"\323\240", "\323\241", 2},
  387. {"\323\242", "\323\243", 2},
  388. {"\323\244", "\323\245", 2},
  389. {"\323\246", "\323\247", 2},
  390. {"\323\250", "\323\251", 2},
  391. {"\323\252", "\323\253", 2},
  392. {"\323\256", "\323\257", 2},
  393. {"\323\260", "\323\261", 2},
  394. {"\323\262", "\323\263", 2},
  395. {"\323\264", "\323\265", 2},
  396. {"\323\270", "\323\271", 2},
  397. {NULL, NULL, 0}
  398. };
  399. UpperLowerTbl_t Upper2LowerTbl2c[] = {
  400. {"\324\261", "\325\241", 2},
  401. {"\324\262", "\325\242", 2},
  402. {"\324\263", "\325\243", 2},
  403. {"\324\264", "\325\244", 2},
  404. {"\324\265", "\325\245", 2},
  405. {"\324\266", "\325\246", 2},
  406. {"\324\267", "\325\247", 2},
  407. {"\324\270", "\325\250", 2},
  408. {"\324\271", "\325\251", 2},
  409. {"\324\272", "\325\252", 2},
  410. {"\324\273", "\325\253", 2},
  411. {"\324\274", "\325\254", 2},
  412. {"\324\275", "\325\255", 2},
  413. {"\324\276", "\325\256", 2},
  414. {"\324\277", "\325\257", 2},
  415. {NULL, NULL, 0}
  416. };
  417. UpperLowerTbl_t Upper2LowerTbl2d[] = {
  418. {"\325\200", "\325\260", 2},
  419. {"\325\201", "\325\261", 2},
  420. {"\325\202", "\325\262", 2},
  421. {"\325\203", "\325\263", 2},
  422. {"\325\204", "\325\264", 2},
  423. {"\325\205", "\325\265", 2},
  424. {"\325\206", "\325\266", 2},
  425. {"\325\207", "\325\267", 2},
  426. {"\325\210", "\325\270", 2},
  427. {"\325\211", "\325\271", 2},
  428. {"\325\212", "\325\272", 2},
  429. {"\325\213", "\325\273", 2},
  430. {"\325\214", "\325\274", 2},
  431. {"\325\215", "\325\275", 2},
  432. {"\325\216", "\325\276", 2},
  433. {"\325\217", "\325\277", 2},
  434. {"\325\220", "\326\200", 2},
  435. {"\325\221", "\326\201", 2},
  436. {"\325\222", "\326\202", 2},
  437. {"\325\223", "\326\203", 2},
  438. {"\325\224", "\326\204", 2},
  439. {"\325\225", "\326\205", 2},
  440. {"\325\226", "\326\206", 2},
  441. {NULL, NULL, 0}
  442. /* upper, lower */
  443. };
  444. UpperLowerTbl_t Upper2LowerTbl30[] = {
  445. /* upper, lower */
  446. {"\341\202\240", "\341\203\220", 3},
  447. {"\341\202\241", "\341\203\221", 3},
  448. {"\341\202\242", "\341\203\222", 3},
  449. {"\341\202\243", "\341\203\223", 3},
  450. {"\341\202\244", "\341\203\224", 3},
  451. {"\341\202\245", "\341\203\225", 3},
  452. {"\341\202\246", "\341\203\226", 3},
  453. {"\341\202\247", "\341\203\227", 3},
  454. {"\341\202\250", "\341\203\230", 3},
  455. {"\341\202\251", "\341\203\231", 3},
  456. {"\341\202\252", "\341\203\232", 3},
  457. {"\341\202\253", "\341\203\233", 3},
  458. {"\341\202\254", "\341\203\234", 3},
  459. {"\341\202\255", "\341\203\235", 3},
  460. {"\341\202\256", "\341\203\236", 3},
  461. {"\341\202\257", "\341\203\237", 3},
  462. {"\341\202\260", "\341\203\240", 3},
  463. {"\341\202\261", "\341\203\241", 3},
  464. {"\341\202\262", "\341\203\242", 3},
  465. {"\341\202\263", "\341\203\243", 3},
  466. {"\341\202\264", "\341\203\244", 3},
  467. {"\341\202\265", "\341\203\245", 3},
  468. {"\341\202\266", "\341\203\246", 3},
  469. {"\341\202\267", "\341\203\247", 3},
  470. {"\341\202\270", "\341\203\250", 3},
  471. {"\341\202\271", "\341\203\251", 3},
  472. {"\341\202\272", "\341\203\252", 3},
  473. {"\341\202\273", "\341\203\253", 3},
  474. {"\341\202\274", "\341\203\254", 3},
  475. {"\341\202\275", "\341\203\255", 3},
  476. {"\341\202\276", "\341\203\256", 3},
  477. {"\341\202\277", "\341\203\257", 3},
  478. {"\341\203\200", "\341\203\260", 3},
  479. {"\341\203\201", "\341\203\261", 3},
  480. {"\341\203\202", "\341\203\262", 3},
  481. {"\341\203\203", "\341\203\263", 3},
  482. {"\341\203\204", "\341\203\264", 3},
  483. {"\341\203\205", "\341\203\265", 3},
  484. {"\341\270\200", "\341\270\201", 3},
  485. {"\341\270\202", "\341\270\203", 3},
  486. {"\341\270\204", "\341\270\205", 3},
  487. {"\341\270\206", "\341\270\207", 3},
  488. {"\341\270\210", "\341\270\211", 3},
  489. {"\341\270\212", "\341\270\213", 3},
  490. {"\341\270\214", "\341\270\215", 3},
  491. {"\341\270\216", "\341\270\217", 3},
  492. {"\341\270\220", "\341\270\221", 3},
  493. {"\341\270\222", "\341\270\223", 3},
  494. {"\341\270\224", "\341\270\225", 3},
  495. {"\341\270\226", "\341\270\227", 3},
  496. {"\341\270\230", "\341\270\231", 3},
  497. {"\341\270\232", "\341\270\233", 3},
  498. {"\341\270\234", "\341\270\235", 3},
  499. {"\341\270\236", "\341\270\237", 3},
  500. {"\341\270\240", "\341\270\241", 3},
  501. {"\341\270\242", "\341\270\243", 3},
  502. {"\341\270\244", "\341\270\245", 3},
  503. {"\341\270\246", "\341\270\247", 3},
  504. {"\341\270\250", "\341\270\251", 3},
  505. {"\341\270\252", "\341\270\253", 3},
  506. {"\341\270\254", "\341\270\255", 3},
  507. {"\341\270\256", "\341\270\257", 3},
  508. {"\341\270\260", "\341\270\261", 3},
  509. {"\341\270\262", "\341\270\263", 3},
  510. {"\341\270\264", "\341\270\265", 3},
  511. {"\341\270\266", "\341\270\267", 3},
  512. {"\341\270\270", "\341\270\271", 3},
  513. {"\341\270\272", "\341\270\273", 3},
  514. {"\341\270\274", "\341\270\275", 3},
  515. {"\341\270\276", "\341\270\277", 3},
  516. {"\341\271\200", "\341\271\201", 3},
  517. {"\341\271\202", "\341\271\203", 3},
  518. {"\341\271\204", "\341\271\205", 3},
  519. {"\341\271\206", "\341\271\207", 3},
  520. {"\341\271\210", "\341\271\211", 3},
  521. {"\341\271\212", "\341\271\213", 3},
  522. {"\341\271\214", "\341\271\215", 3},
  523. {"\341\271\216", "\341\271\217", 3},
  524. {"\341\271\220", "\341\271\221", 3},
  525. {"\341\271\222", "\341\271\223", 3},
  526. {"\341\271\224", "\341\271\225", 3},
  527. {"\341\271\226", "\341\271\227", 3},
  528. {"\341\271\230", "\341\271\231", 3},
  529. {"\341\271\232", "\341\271\233", 3},
  530. {"\341\271\234", "\341\271\235", 3},
  531. {"\341\271\236", "\341\271\237", 3},
  532. {"\341\271\240", "\341\271\241", 3},
  533. {"\341\271\242", "\341\271\243", 3},
  534. {"\341\271\244", "\341\271\245", 3},
  535. {"\341\271\246", "\341\271\247", 3},
  536. {"\341\271\250", "\341\271\251", 3},
  537. {"\341\271\252", "\341\271\253", 3},
  538. {"\341\271\254", "\341\271\255", 3},
  539. {"\341\271\256", "\341\271\257", 3},
  540. {"\341\271\260", "\341\271\261", 3},
  541. {"\341\271\262", "\341\271\263", 3},
  542. {"\341\271\264", "\341\271\265", 3},
  543. {"\341\271\266", "\341\271\267", 3},
  544. {"\341\271\270", "\341\271\271", 3},
  545. {"\341\271\272", "\341\271\273", 3},
  546. {"\341\271\274", "\341\271\275", 3},
  547. {"\341\271\276", "\341\271\277", 3},
  548. {"\341\272\200", "\341\272\201", 3},
  549. {"\341\272\202", "\341\272\203", 3},
  550. {"\341\272\204", "\341\272\205", 3},
  551. {"\341\272\206", "\341\272\207", 3},
  552. {"\341\272\210", "\341\272\211", 3},
  553. {"\341\272\212", "\341\272\213", 3},
  554. {"\341\272\214", "\341\272\215", 3},
  555. {"\341\272\216", "\341\272\217", 3},
  556. {"\341\272\220", "\341\272\221", 3},
  557. {"\341\272\222", "\341\272\223", 3},
  558. {"\341\272\224", "\341\272\225", 3},
  559. {"\341\272\240", "\341\272\241", 3},
  560. {"\341\272\242", "\341\272\243", 3},
  561. {"\341\272\244", "\341\272\245", 3},
  562. {"\341\272\246", "\341\272\247", 3},
  563. {"\341\272\250", "\341\272\251", 3},
  564. {"\341\272\252", "\341\272\253", 3},
  565. {"\341\272\254", "\341\272\255", 3},
  566. {"\341\272\256", "\341\272\257", 3},
  567. {"\341\272\260", "\341\272\261", 3},
  568. {"\341\272\262", "\341\272\263", 3},
  569. {"\341\272\264", "\341\272\265", 3},
  570. {"\341\272\266", "\341\272\267", 3},
  571. {"\341\272\270", "\341\272\271", 3},
  572. {"\341\272\272", "\341\272\273", 3},
  573. {"\341\272\274", "\341\272\275", 3},
  574. {"\341\272\276", "\341\272\277", 3},
  575. {"\341\273\200", "\341\273\201", 3},
  576. {"\341\273\202", "\341\273\203", 3},
  577. {"\341\273\204", "\341\273\205", 3},
  578. {"\341\273\206", "\341\273\207", 3},
  579. {"\341\273\210", "\341\273\211", 3},
  580. {"\341\273\212", "\341\273\213", 3},
  581. {"\341\273\214", "\341\273\215", 3},
  582. {"\341\273\216", "\341\273\217", 3},
  583. {"\341\273\220", "\341\273\221", 3},
  584. {"\341\273\222", "\341\273\223", 3},
  585. {"\341\273\224", "\341\273\225", 3},
  586. {"\341\273\226", "\341\273\227", 3},
  587. {"\341\273\230", "\341\273\231", 3},
  588. {"\341\273\232", "\341\273\233", 3},
  589. {"\341\273\234", "\341\273\235", 3},
  590. {"\341\273\236", "\341\273\237", 3},
  591. {"\341\273\240", "\341\273\241", 3},
  592. {"\341\273\242", "\341\273\243", 3},
  593. {"\341\273\244", "\341\273\245", 3},
  594. {"\341\273\246", "\341\273\247", 3},
  595. {"\341\273\250", "\341\273\251", 3},
  596. {"\341\273\252", "\341\273\253", 3},
  597. {"\341\273\254", "\341\273\255", 3},
  598. {"\341\273\256", "\341\273\257", 3},
  599. {"\341\273\260", "\341\273\261", 3},
  600. {"\341\273\262", "\341\273\263", 3},
  601. {"\341\273\264", "\341\273\265", 3},
  602. {"\341\273\266", "\341\273\267", 3},
  603. {"\341\273\270", "\341\273\271", 3},
  604. {"\341\274\210", "\341\274\200", 3},
  605. {"\341\274\211", "\341\274\201", 3},
  606. {"\341\274\212", "\341\274\202", 3},
  607. {"\341\274\213", "\341\274\203", 3},
  608. {"\341\274\214", "\341\274\204", 3},
  609. {"\341\274\215", "\341\274\205", 3},
  610. {"\341\274\216", "\341\274\206", 3},
  611. {"\341\274\217", "\341\274\207", 3},
  612. {"\341\274\230", "\341\274\220", 3},
  613. {"\341\274\231", "\341\274\221", 3},
  614. {"\341\274\232", "\341\274\222", 3},
  615. {"\341\274\233", "\341\274\223", 3},
  616. {"\341\274\234", "\341\274\224", 3},
  617. {"\341\274\235", "\341\274\225", 3},
  618. {"\341\274\250", "\341\274\240", 3},
  619. {"\341\274\251", "\341\274\241", 3},
  620. {"\341\274\252", "\341\274\242", 3},
  621. {"\341\274\253", "\341\274\243", 3},
  622. {"\341\274\254", "\341\274\244", 3},
  623. {"\341\274\255", "\341\274\245", 3},
  624. {"\341\274\256", "\341\274\246", 3},
  625. {"\341\274\257", "\341\274\247", 3},
  626. {"\341\274\270", "\341\274\260", 3},
  627. {"\341\274\271", "\341\274\261", 3},
  628. {"\341\274\272", "\341\274\262", 3},
  629. {"\341\274\273", "\341\274\263", 3},
  630. {"\341\274\274", "\341\274\264", 3},
  631. {"\341\274\275", "\341\274\265", 3},
  632. {"\341\274\276", "\341\274\266", 3},
  633. {"\341\274\277", "\341\274\267", 3},
  634. {"\341\275\210", "\341\275\200", 3},
  635. {"\341\275\211", "\341\275\201", 3},
  636. {"\341\275\212", "\341\275\202", 3},
  637. {"\341\275\213", "\341\275\203", 3},
  638. {"\341\275\214", "\341\275\204", 3},
  639. {"\341\275\215", "\341\275\205", 3},
  640. {"\341\275\231", "\341\275\221", 3},
  641. {"\341\275\233", "\341\275\223", 3},
  642. {"\341\275\235", "\341\275\225", 3},
  643. {"\341\275\237", "\341\275\227", 3},
  644. {"\341\275\250", "\341\275\240", 3},
  645. {"\341\275\251", "\341\275\241", 3},
  646. {"\341\275\252", "\341\275\242", 3},
  647. {"\341\275\253", "\341\275\243", 3},
  648. {"\341\275\254", "\341\275\244", 3},
  649. {"\341\275\255", "\341\275\245", 3},
  650. {"\341\275\256", "\341\275\246", 3},
  651. {"\341\275\257", "\341\275\247", 3},
  652. {"\341\276\210", "\341\276\200", 3},
  653. {"\341\276\211", "\341\276\201", 3},
  654. {"\341\276\212", "\341\276\202", 3},
  655. {"\341\276\213", "\341\276\203", 3},
  656. {"\341\276\214", "\341\276\204", 3},
  657. {"\341\276\215", "\341\276\205", 3},
  658. {"\341\276\216", "\341\276\206", 3},
  659. {"\341\276\217", "\341\276\207", 3},
  660. {"\341\276\230", "\341\276\220", 3},
  661. {"\341\276\231", "\341\276\221", 3},
  662. {"\341\276\232", "\341\276\222", 3},
  663. {"\341\276\233", "\341\276\223", 3},
  664. {"\341\276\234", "\341\276\224", 3},
  665. {"\341\276\235", "\341\276\225", 3},
  666. {"\341\276\236", "\341\276\226", 3},
  667. {"\341\276\237", "\341\276\227", 3},
  668. {"\341\276\250", "\341\276\240", 3},
  669. {"\341\276\251", "\341\276\241", 3},
  670. {"\341\276\252", "\341\276\242", 3},
  671. {"\341\276\253", "\341\276\243", 3},
  672. {"\341\276\254", "\341\276\244", 3},
  673. {"\341\276\255", "\341\276\245", 3},
  674. {"\341\276\256", "\341\276\246", 3},
  675. {"\341\276\257", "\341\276\247", 3},
  676. {"\341\276\270", "\341\276\260", 3},
  677. {"\341\276\271", "\341\276\261", 3},
  678. {"\341\276\272", "\341\275\260", 3},
  679. {"\341\276\273", "\341\275\261", 3},
  680. {"\341\276\274", "\341\276\263", 3},
  681. {"\341\276\276", "\341\276\276", 3},
  682. {"\341\277\210", "\341\275\262", 3},
  683. {"\341\277\211", "\341\275\263", 3},
  684. {"\341\277\212", "\341\275\264", 3},
  685. {"\341\277\213", "\341\275\265", 3},
  686. {"\341\277\214", "\341\277\203", 3},
  687. {"\341\277\230", "\341\277\220", 3},
  688. {"\341\277\231", "\341\277\221", 3},
  689. {"\341\277\232", "\341\275\266", 3},
  690. {"\341\277\233", "\341\275\267", 3},
  691. {"\341\277\250", "\341\277\240", 3},
  692. {"\341\277\251", "\341\277\241", 3},
  693. {"\341\277\252", "\341\275\272", 3},
  694. {"\341\277\253", "\341\275\273", 3},
  695. {"\341\277\254", "\341\277\245", 3},
  696. {"\341\277\270", "\341\275\270", 3},
  697. {"\341\277\271", "\341\275\271", 3},
  698. {"\341\277\272", "\341\275\274", 3},
  699. {"\341\277\273", "\341\275\275", 3},
  700. {"\341\277\274", "\341\277\263", 3},
  701. {NULL, NULL, 0}
  702. };
  703. UpperLowerTbl_t Upper2LowerTbl31[] = {
  704. {"\357\274\241", "\357\275\201", 3},
  705. {"\357\274\242", "\357\275\202", 3},
  706. {"\357\274\243", "\357\275\203", 3},
  707. {"\357\274\244", "\357\275\204", 3},
  708. {"\357\274\245", "\357\275\205", 3},
  709. {"\357\274\246", "\357\275\206", 3},
  710. {"\357\274\247", "\357\275\207", 3},
  711. {"\357\274\250", "\357\275\210", 3},
  712. {"\357\274\251", "\357\275\211", 3},
  713. {"\357\274\252", "\357\275\212", 3},
  714. {"\357\274\253", "\357\275\213", 3},
  715. {"\357\274\254", "\357\275\214", 3},
  716. {"\357\274\255", "\357\275\215", 3},
  717. {"\357\274\256", "\357\275\216", 3},
  718. {"\357\274\257", "\357\275\217", 3},
  719. {"\357\274\260", "\357\275\220", 3},
  720. {"\357\274\261", "\357\275\221", 3},
  721. {"\357\274\262", "\357\275\222", 3},
  722. {"\357\274\263", "\357\275\223", 3},
  723. {"\357\274\264", "\357\275\224", 3},
  724. {"\357\274\265", "\357\275\225", 3},
  725. {"\357\274\266", "\357\275\226", 3},
  726. {"\357\274\267", "\357\275\227", 3},
  727. {"\357\274\270", "\357\275\230", 3},
  728. {"\357\274\271", "\357\275\231", 3},
  729. {"\357\274\272", "\357\275\232", 3},
  730. {NULL, NULL, 0}
  731. /* upper, lower */
  732. };
  733. UpperLowerTbl_t *Upper2LowerTbl2[] = {
  734. Upper2LowerTbl20, /* \303 */
  735. Upper2LowerTbl21, /* \304 */
  736. Upper2LowerTbl22, /* \305 */
  737. Upper2LowerTbl23, /* \306 */
  738. Upper2LowerTbl24, /* \307 */
  739. Upper2LowerTbl25, /* \310 */
  740. NULL, /* \311 */
  741. NULL, /* \312 */
  742. NULL, /* \313 */
  743. NULL, /* \314 */
  744. NULL, /* \315 */
  745. Upper2LowerTbl26, /* \316 */
  746. Upper2LowerTbl27, /* \317 */
  747. Upper2LowerTbl28, /* \320 */
  748. Upper2LowerTbl29, /* \321 */
  749. Upper2LowerTbl2a, /* \322 */
  750. Upper2LowerTbl2b, /* \323 */
  751. Upper2LowerTbl2c, /* \324 */
  752. Upper2LowerTbl2d /* \325 */
  753. };
  754. UpperLowerTbl_t *Upper2LowerTbl3[] = {
  755. Upper2LowerTbl30, /* \341 */
  756. NULL, /* \342 */
  757. NULL, /* \343 */
  758. NULL, /* \344 */
  759. NULL, /* \345 */
  760. NULL, /* \346 */
  761. NULL, /* \347 */
  762. NULL, /* \350 */
  763. NULL, /* \351 */
  764. NULL, /* \352 */
  765. NULL, /* \353 */
  766. NULL, /* \354 */
  767. NULL, /* \355 */
  768. NULL, /* \356 */
  769. Upper2LowerTbl31 /* \357 */
  770. };
  771. #define UL2S (unsigned char)'\303'
  772. #define UL2E (unsigned char)'\325'
  773. #define UL3S (unsigned char)'\341'
  774. #define UL3E (unsigned char)'\357'
  775. /*
  776. * dsgw_utf8StrToLower: translate upper-case string to lower-case
  777. *
  778. * input: a null terminated UTF-8 string
  779. * output: a null terminated UTF-8 string which characters are
  780. * converted to lower-case; characters which are not
  781. * upper-case are copied as is. If it's not considered
  782. * a UTF-8 string, NULL is returned.
  783. *
  784. * Notes: This function takes a string (made of multiple UTF-8 characters)
  785. * for the input (not one character as in "tolower").
  786. * Output string is allocated in this function, which needs to be
  787. * released when it's not needed any more.
  788. */
  789. unsigned char *
  790. dsgw_utf8StrToLower(unsigned char *s)
  791. {
  792. UpperLowerTbl_t *ultp;
  793. unsigned char *p, *np, *tail;
  794. unsigned char *lp, *lphead;
  795. int len, sz;
  796. if (s == NULL || *s == '\0') {
  797. return s;
  798. }
  799. len = strlen((char *)s);
  800. tail = s + len;
  801. lphead = lp = (unsigned char *)dsgw_ch_malloc(len + 1);
  802. p = s;
  803. while ((np = (unsigned char *)ldap_utf8next((char *)p)) <= tail) {
  804. switch(sz = np - p) {
  805. case 1:
  806. sprintf((char *)lp, "%c", tolower(*p));
  807. break;
  808. case 2:
  809. if (*p < UL2S || *p > UL2E) { /* out of range */
  810. memcpy(lp, p, sz);
  811. break;
  812. }
  813. for (ultp = Upper2LowerTbl2[*p - UL2S];
  814. ultp && ultp->upper && memcmp(p, ultp->upper, sz);
  815. ultp++)
  816. ;
  817. if (!ultp) { /* out of range */
  818. memcpy(lp, p, sz);
  819. } if (ultp->upper) { /* matched */
  820. memcpy(lp, ultp->lower, ultp->tsz);
  821. sz = ultp->tsz;
  822. } else {
  823. memcpy(lp, p, sz);
  824. }
  825. break;
  826. case 3:
  827. if (*p != UL3S && *p != UL3E) { /* out of range */
  828. memcpy(lp, p, sz);
  829. break;
  830. }
  831. for (ultp = Upper2LowerTbl3[*p - UL3S];
  832. ultp && ultp->upper && memcmp(p, ultp->upper, sz);
  833. ultp++)
  834. ;
  835. if (!ultp) { /* out of range */
  836. memcpy(lp, p, sz);
  837. } if (ultp->upper) { /* matched */
  838. memcpy(lp, ultp->lower, sz);
  839. } else {
  840. memcpy(lp, p, sz);
  841. }
  842. break;
  843. case 4:
  844. memcpy(lp, p, sz);
  845. break;
  846. default: /* not UTF-8 */
  847. free(lphead);
  848. return NULL;
  849. }
  850. lp += sz;
  851. p = np;
  852. if (p == tail) {
  853. break;
  854. }
  855. }
  856. *lp = '\0';
  857. return lphead;
  858. }
  859. /*
  860. * dsgw_utf8ToLower: translate upper-case character to lower-case
  861. *
  862. * input: a UTF-8 character (s)
  863. * output: a UTF-8 character which is converted to lower-case (d)
  864. * length (in bytes) of input character (ssz) and
  865. * output character (dsz)
  866. *
  867. * Notes: This function takes a UTF-8 character (could be multiple bytes)
  868. * for the input. Memory for the output character is NOT allocated
  869. * in this function, caller should have allocated it (d).
  870. * "memmove" is used since (s) and (d) are overlapped.
  871. */
  872. void
  873. dsgw_utf8ToLower(unsigned char *s, unsigned char *d, int *ssz, int *dsz)
  874. {
  875. UpperLowerTbl_t *ultp;
  876. unsigned char *tail;
  877. if (s == NULL || *s == '\0') {
  878. *ssz = *dsz = 0;
  879. return;
  880. }
  881. if (!(*s & 0x80)) { /* ASCII */
  882. *dsz = *ssz = 1;
  883. *d = tolower(*s);
  884. return;
  885. }
  886. tail = (unsigned char *)ldap_utf8next((char *)s);
  887. *dsz = *ssz = tail - s;
  888. switch(*ssz) {
  889. case 1: /* ASCII */
  890. *d = tolower(*s);
  891. break;
  892. case 2: /* 2 bytes */
  893. if (*s < UL2S || *s > UL2E) { /* out of range */
  894. memmove(d, s, *ssz);
  895. break;
  896. }
  897. for (ultp = Upper2LowerTbl2[*s - UL2S];
  898. ultp && ultp->upper && memcmp(s, ultp->upper, *ssz);
  899. ultp++)
  900. ;
  901. if (!ultp) { /* out of range */
  902. memmove(d, s, *ssz);
  903. } else if (ultp->upper) { /* matched */
  904. memmove(d, ultp->lower, ultp->tsz);
  905. *dsz = ultp->tsz;
  906. } else {
  907. memmove(d, s, *ssz);
  908. }
  909. break;
  910. case 3: /* 3 bytes */
  911. if (*s != UL3S && *s != UL3E) { /* out of range */
  912. memmove(d, s, *ssz);
  913. break;
  914. }
  915. for (ultp = Upper2LowerTbl3[*s - UL3S];
  916. ultp && ultp->upper && memcmp(s, ultp->upper, *ssz);
  917. ultp++)
  918. ;
  919. if (!ultp) { /* out of range */
  920. memmove(d, s, *ssz);
  921. } else if (ultp->upper) { /* matched */
  922. memmove(d, ultp->lower, *ssz);
  923. } else {
  924. memmove(d, s, *ssz);
  925. }
  926. break;
  927. }
  928. return;
  929. }
  930. /*
  931. * dsgw_utf8isUpper: tests for a character that is a upper-case letter in
  932. * UTF-8
  933. *
  934. * input: a UTF-8 character (could be multi-byte)
  935. * output: 1 if the character is a upper-case letter
  936. * 0 if the character is not a upper-case letter
  937. */
  938. int
  939. dsgw_utf8isUpper(unsigned char *s)
  940. {
  941. UpperLowerTbl_t *ultp;
  942. unsigned char *next;
  943. int sz;
  944. if (s == NULL || *s == '\0') {
  945. return 0;
  946. }
  947. if (!(*s & 0x80)) { /* ASCII */
  948. return isupper(*s);
  949. }
  950. next = (unsigned char *)ldap_utf8next((char *)s);
  951. switch(sz = next - s) {
  952. case 1: /* ASCII */
  953. return isupper(*s);
  954. case 2:
  955. if (*s < UL2S || *s > UL2E) { /* out of range */
  956. return 0;
  957. }
  958. for (ultp = Upper2LowerTbl2[*s - UL2S];
  959. ultp && ultp->upper && memcmp(s, ultp->upper, sz);
  960. ultp++)
  961. ;
  962. if (!ultp) { /* out of range */
  963. return 0;
  964. } if (ultp->upper) { /* matched */
  965. return 1;
  966. } else {
  967. return 0;
  968. }
  969. case 3:
  970. if (*s < UL3S || *s > UL3E) { /* out of range */
  971. return 0;
  972. }
  973. for (ultp = Upper2LowerTbl3[*s - UL3S];
  974. ultp && ultp->upper && memcmp(s, ultp->upper, sz);
  975. ultp++)
  976. ;
  977. if (!ultp) { /* out of range */
  978. return 0;
  979. } if (ultp->upper) { /* matched */
  980. return 1;
  981. } else {
  982. return 0;
  983. }
  984. default:
  985. return 0;
  986. }
  987. }
  988. /*
  989. * Lower2Upper Tables: sorted by lower characters
  990. */
  991. UpperLowerTbl_t Lower2UpperTbl20[] = {
  992. /* upper, lower */
  993. {"\303\200", "\303\240", 2},
  994. {"\303\201", "\303\241", 2},
  995. {"\303\202", "\303\242", 2},
  996. {"\303\203", "\303\243", 2},
  997. {"\303\204", "\303\244", 2},
  998. {"\303\205", "\303\245", 2},
  999. {"\303\206", "\303\246", 2},
  1000. {"\303\207", "\303\247", 2},
  1001. {"\303\210", "\303\250", 2},
  1002. {"\303\211", "\303\251", 2},
  1003. {"\303\212", "\303\252", 2},
  1004. {"\303\213", "\303\253", 2},
  1005. {"\303\214", "\303\254", 2},
  1006. {"\303\215", "\303\255", 2},
  1007. {"\303\216", "\303\256", 2},
  1008. {"\303\217", "\303\257", 2},
  1009. {"\303\220", "\303\260", 2},
  1010. {"\303\221", "\303\261", 2},
  1011. {"\303\222", "\303\262", 2},
  1012. {"\303\223", "\303\263", 2},
  1013. {"\303\224", "\303\264", 2},
  1014. {"\303\225", "\303\265", 2},
  1015. {"\303\226", "\303\266", 2},
  1016. {"\303\230", "\303\270", 2},
  1017. {"\303\231", "\303\271", 2},
  1018. {"\303\232", "\303\272", 2},
  1019. {"\303\233", "\303\273", 2},
  1020. {"\303\234", "\303\274", 2},
  1021. {"\303\235", "\303\275", 2},
  1022. {"\303\236", "\303\276", 2},
  1023. {"\305\270", "\303\277", 2},
  1024. {NULL, NULL, 0}
  1025. };
  1026. UpperLowerTbl_t Lower2UpperTbl21[] = {
  1027. {"\304\200", "\304\201", 2},
  1028. {"\304\202", "\304\203", 2},
  1029. {"\304\204", "\304\205", 2},
  1030. {"\304\206", "\304\207", 2},
  1031. {"\304\210", "\304\211", 2},
  1032. {"\304\212", "\304\213", 2},
  1033. {"\304\214", "\304\215", 2},
  1034. {"\304\216", "\304\217", 2},
  1035. {"\304\220", "\304\221", 2},
  1036. {"\304\222", "\304\223", 2},
  1037. {"\304\224", "\304\225", 2},
  1038. {"\304\226", "\304\227", 2},
  1039. {"\304\230", "\304\231", 2},
  1040. {"\304\232", "\304\233", 2},
  1041. {"\304\234", "\304\235", 2},
  1042. {"\304\236", "\304\237", 2},
  1043. {"\304\240", "\304\241", 2},
  1044. {"\304\242", "\304\243", 2},
  1045. {"\304\244", "\304\245", 2},
  1046. {"\304\246", "\304\247", 2},
  1047. {"\304\250", "\304\251", 2},
  1048. {"\304\252", "\304\253", 2},
  1049. {"\304\254", "\304\255", 2},
  1050. {"\304\256", "\304\257", 2},
  1051. {"\111", "\304\261", 1},
  1052. {"\304\262", "\304\263", 2},
  1053. {"\304\264", "\304\265", 2},
  1054. {"\304\266", "\304\267", 2},
  1055. {"\304\271", "\304\272", 2},
  1056. {"\304\273", "\304\274", 2},
  1057. {"\304\275", "\304\276", 2},
  1058. {NULL, NULL}
  1059. };
  1060. UpperLowerTbl_t Lower2UpperTbl22[] = {
  1061. {"\304\277", "\305\200", 2},
  1062. {"\305\201", "\305\202", 2},
  1063. {"\305\203", "\305\204", 2},
  1064. {"\305\205", "\305\206", 2},
  1065. {"\305\207", "\305\210", 2},
  1066. {"\305\212", "\305\213", 2},
  1067. {"\305\214", "\305\215", 2},
  1068. {"\305\216", "\305\217", 2},
  1069. {"\305\220", "\305\221", 2},
  1070. {"\305\222", "\305\223", 2},
  1071. {"\305\224", "\305\225", 2},
  1072. {"\305\226", "\305\227", 2},
  1073. {"\305\230", "\305\231", 2},
  1074. {"\305\232", "\305\233", 2},
  1075. {"\305\234", "\305\235", 2},
  1076. {"\305\236", "\305\237", 2},
  1077. {"\305\240", "\305\241", 2},
  1078. {"\305\242", "\305\243", 2},
  1079. {"\305\244", "\305\245", 2},
  1080. {"\305\246", "\305\247", 2},
  1081. {"\305\250", "\305\251", 2},
  1082. {"\305\252", "\305\253", 2},
  1083. {"\305\254", "\305\255", 2},
  1084. {"\305\256", "\305\257", 2},
  1085. {"\305\260", "\305\261", 2},
  1086. {"\305\262", "\305\263", 2},
  1087. {"\305\264", "\305\265", 2},
  1088. {"\305\266", "\305\267", 2},
  1089. {"\305\271", "\305\272", 2},
  1090. {"\305\273", "\305\274", 2},
  1091. {"\305\275", "\305\276", 2},
  1092. {"\123", "\305\277", 1},
  1093. {NULL, NULL, 0}
  1094. };
  1095. UpperLowerTbl_t Lower2UpperTbl23[] = {
  1096. {"\306\202", "\306\203", 2},
  1097. {"\306\204", "\306\205", 2},
  1098. {"\306\207", "\306\210", 2},
  1099. {"\306\213", "\306\214", 2},
  1100. {"\306\221", "\306\222", 2},
  1101. {"\306\230", "\306\231", 2},
  1102. {"\306\240", "\306\241", 2},
  1103. {"\306\242", "\306\243", 2},
  1104. {"\306\244", "\306\245", 2},
  1105. {"\306\247", "\306\250", 2},
  1106. {"\306\254", "\306\255", 2},
  1107. {"\306\257", "\306\260", 2},
  1108. {"\306\263", "\306\264", 2},
  1109. {"\306\265", "\306\266", 2},
  1110. {"\306\270", "\306\271", 2},
  1111. {"\306\274", "\306\275", 2},
  1112. {NULL, NULL, 0}
  1113. };
  1114. UpperLowerTbl_t Lower2UpperTbl24[] = {
  1115. {"\307\204", "\307\206", 2},
  1116. {"\307\207", "\307\211", 2},
  1117. {"\307\212", "\307\214", 2},
  1118. {"\307\215", "\307\216", 2},
  1119. {"\307\217", "\307\220", 2},
  1120. {"\307\221", "\307\222", 2},
  1121. {"\307\223", "\307\224", 2},
  1122. {"\307\225", "\307\226", 2},
  1123. {"\307\227", "\307\230", 2},
  1124. {"\307\231", "\307\232", 2},
  1125. {"\307\233", "\307\234", 2},
  1126. {"\307\236", "\307\237", 2},
  1127. {"\307\240", "\307\241", 2},
  1128. {"\307\242", "\307\243", 2},
  1129. {"\307\244", "\307\245", 2},
  1130. {"\307\246", "\307\247", 2},
  1131. {"\307\250", "\307\251", 2},
  1132. {"\307\252", "\307\253", 2},
  1133. {"\307\254", "\307\255", 2},
  1134. {"\307\256", "\307\257", 2},
  1135. {"\307\261", "\307\263", 2},
  1136. {"\307\264", "\307\265", 2},
  1137. {"\307\272", "\307\273", 2},
  1138. {"\307\274", "\307\275", 2},
  1139. {"\307\276", "\307\277", 2},
  1140. {NULL, NULL, 0}
  1141. };
  1142. UpperLowerTbl_t Lower2UpperTbl25[] = {
  1143. {"\310\200", "\310\201", 2},
  1144. {"\310\202", "\310\203", 2},
  1145. {"\310\204", "\310\205", 2},
  1146. {"\310\206", "\310\207", 2},
  1147. {"\310\210", "\310\211", 2},
  1148. {"\310\212", "\310\213", 2},
  1149. {"\310\214", "\310\215", 2},
  1150. {"\310\216", "\310\217", 2},
  1151. {"\310\220", "\310\221", 2},
  1152. {"\310\222", "\310\223", 2},
  1153. {"\310\224", "\310\225", 2},
  1154. {"\310\226", "\310\227", 2},
  1155. {NULL, NULL, 0}
  1156. };
  1157. UpperLowerTbl_t Lower2UpperTbl26[] = {
  1158. {"\306\201", "\311\223", 2},
  1159. {"\306\206", "\311\224", 2},
  1160. {"\306\211", "\311\226", 2},
  1161. {"\306\212", "\311\227", 2},
  1162. {"\306\216", "\311\230", 2},
  1163. {"\306\217", "\311\231", 2},
  1164. {"\306\220", "\311\233", 2},
  1165. {"\306\223", "\311\240", 2},
  1166. {"\306\224", "\311\243", 2},
  1167. {"\306\227", "\311\250", 2},
  1168. {"\306\226", "\311\251", 2},
  1169. {"\306\234", "\311\257", 2},
  1170. {"\306\235", "\311\262", 2},
  1171. {NULL, NULL, 0}
  1172. };
  1173. UpperLowerTbl_t Lower2UpperTbl27[] = {
  1174. {"\306\251", "\312\203", 2},
  1175. {"\306\256", "\312\210", 2},
  1176. {"\306\261", "\312\212", 2},
  1177. {"\306\262", "\312\213", 2},
  1178. {"\306\267", "\312\222", 2},
  1179. {NULL, NULL, 0}
  1180. };
  1181. UpperLowerTbl_t Lower2UpperTbl28[] = {
  1182. {"\316\206", "\316\254", 2},
  1183. {"\316\210", "\316\255", 2},
  1184. {"\316\211", "\316\256", 2},
  1185. {"\316\212", "\316\257", 2},
  1186. {"\316\221", "\316\261", 2},
  1187. {"\316\222", "\316\262", 2},
  1188. {"\316\223", "\316\263", 2},
  1189. {"\316\224", "\316\264", 2},
  1190. {"\316\225", "\316\265", 2},
  1191. {"\316\226", "\316\266", 2},
  1192. {"\316\227", "\316\267", 2},
  1193. {"\316\230", "\316\270", 2},
  1194. {"\316\231", "\316\271", 2},
  1195. {"\316\232", "\316\272", 2},
  1196. {"\316\233", "\316\273", 2},
  1197. {"\316\234", "\316\274", 2},
  1198. {"\316\235", "\316\275", 2},
  1199. {"\316\236", "\316\276", 2},
  1200. {"\316\237", "\316\277", 2},
  1201. {NULL, NULL, 0}
  1202. };
  1203. UpperLowerTbl_t Lower2UpperTbl29[] = {
  1204. {"\316\240", "\317\200", 2},
  1205. {"\316\241", "\317\201", 2},
  1206. {"\316\243", "\317\202", 2},
  1207. {"\316\243", "\317\203", 2},
  1208. {"\316\244", "\317\204", 2},
  1209. {"\316\245", "\317\205", 2},
  1210. {"\316\246", "\317\206", 2},
  1211. {"\316\247", "\317\207", 2},
  1212. {"\316\250", "\317\210", 2},
  1213. {"\316\251", "\317\211", 2},
  1214. {"\316\252", "\317\212", 2},
  1215. {"\316\253", "\317\213", 2},
  1216. {"\316\214", "\317\214", 2},
  1217. {"\316\216", "\317\215", 2},
  1218. {"\316\217", "\317\216", 2},
  1219. {"\316\222", "\317\220", 2},
  1220. {"\316\230", "\317\221", 2},
  1221. {"\316\246", "\317\225", 2},
  1222. {"\316\240", "\317\226", 2},
  1223. {"\317\242", "\317\243", 2},
  1224. {"\317\244", "\317\245", 2},
  1225. {"\317\246", "\317\247", 2},
  1226. {"\317\250", "\317\251", 2},
  1227. {"\317\252", "\317\253", 2},
  1228. {"\317\254", "\317\255", 2},
  1229. {"\317\256", "\317\257", 2},
  1230. {"\316\232", "\317\260", 2},
  1231. {"\316\241", "\317\261", 2},
  1232. {NULL, NULL, 0}
  1233. };
  1234. UpperLowerTbl_t Lower2UpperTbl2a[] = {
  1235. {"\320\220", "\320\260", 2},
  1236. {"\320\221", "\320\261", 2},
  1237. {"\320\222", "\320\262", 2},
  1238. {"\320\223", "\320\263", 2},
  1239. {"\320\224", "\320\264", 2},
  1240. {"\320\225", "\320\265", 2},
  1241. {"\320\226", "\320\266", 2},
  1242. {"\320\227", "\320\267", 2},
  1243. {"\320\230", "\320\270", 2},
  1244. {"\320\231", "\320\271", 2},
  1245. {"\320\232", "\320\272", 2},
  1246. {"\320\233", "\320\273", 2},
  1247. {"\320\234", "\320\274", 2},
  1248. {"\320\235", "\320\275", 2},
  1249. {"\320\236", "\320\276", 2},
  1250. {"\320\237", "\320\277", 2},
  1251. {NULL, NULL, 0}
  1252. };
  1253. UpperLowerTbl_t Lower2UpperTbl2b[] = {
  1254. {"\320\240", "\321\200", 2},
  1255. {"\320\241", "\321\201", 2},
  1256. {"\320\242", "\321\202", 2},
  1257. {"\320\243", "\321\203", 2},
  1258. {"\320\244", "\321\204", 2},
  1259. {"\320\245", "\321\205", 2},
  1260. {"\320\246", "\321\206", 2},
  1261. {"\320\247", "\321\207", 2},
  1262. {"\320\250", "\321\210", 2},
  1263. {"\320\251", "\321\211", 2},
  1264. {"\320\252", "\321\212", 2},
  1265. {"\320\253", "\321\213", 2},
  1266. {"\320\254", "\321\214", 2},
  1267. {"\320\255", "\321\215", 2},
  1268. {"\320\256", "\321\216", 2},
  1269. {"\320\257", "\321\217", 2},
  1270. {"\320\201", "\321\221", 2},
  1271. {"\320\202", "\321\222", 2},
  1272. {"\320\203", "\321\223", 2},
  1273. {"\320\204", "\321\224", 2},
  1274. {"\320\205", "\321\225", 2},
  1275. {"\320\206", "\321\226", 2},
  1276. {"\320\207", "\321\227", 2},
  1277. {"\320\210", "\321\230", 2},
  1278. {"\320\211", "\321\231", 2},
  1279. {"\320\212", "\321\232", 2},
  1280. {"\320\213", "\321\233", 2},
  1281. {"\320\214", "\321\234", 2},
  1282. {"\320\216", "\321\236", 2},
  1283. {"\320\217", "\321\237", 2},
  1284. {"\321\240", "\321\241", 2},
  1285. {"\321\242", "\321\243", 2},
  1286. {"\321\244", "\321\245", 2},
  1287. {"\321\246", "\321\247", 2},
  1288. {"\321\250", "\321\251", 2},
  1289. {"\321\252", "\321\253", 2},
  1290. {"\321\254", "\321\255", 2},
  1291. {"\321\256", "\321\257", 2},
  1292. {"\321\260", "\321\261", 2},
  1293. {"\321\262", "\321\263", 2},
  1294. {"\321\264", "\321\265", 2},
  1295. {"\321\266", "\321\267", 2},
  1296. {"\321\270", "\321\271", 2},
  1297. {"\321\272", "\321\273", 2},
  1298. {"\321\274", "\321\275", 2},
  1299. {"\321\276", "\321\277", 2},
  1300. {NULL, NULL, 0}
  1301. };
  1302. UpperLowerTbl_t Lower2UpperTbl2c[] = {
  1303. {"\322\200", "\322\201", 2},
  1304. {"\322\220", "\322\221", 2},
  1305. {"\322\222", "\322\223", 2},
  1306. {"\322\224", "\322\225", 2},
  1307. {"\322\226", "\322\227", 2},
  1308. {"\322\230", "\322\231", 2},
  1309. {"\322\232", "\322\233", 2},
  1310. {"\322\234", "\322\235", 2},
  1311. {"\322\236", "\322\237", 2},
  1312. {"\322\240", "\322\241", 2},
  1313. {"\322\242", "\322\243", 2},
  1314. {"\322\244", "\322\245", 2},
  1315. {"\322\246", "\322\247", 2},
  1316. {"\322\250", "\322\251", 2},
  1317. {"\322\252", "\322\253", 2},
  1318. {"\322\254", "\322\255", 2},
  1319. {"\322\256", "\322\257", 2},
  1320. {"\322\260", "\322\261", 2},
  1321. {"\322\262", "\322\263", 2},
  1322. {"\322\264", "\322\265", 2},
  1323. {"\322\266", "\322\267", 2},
  1324. {"\322\270", "\322\271", 2},
  1325. {"\322\272", "\322\273", 2},
  1326. {"\322\274", "\322\275", 2},
  1327. {"\322\276", "\322\277", 2},
  1328. {NULL, NULL, 0}
  1329. };
  1330. UpperLowerTbl_t Lower2UpperTbl2d[] = {
  1331. {"\323\201", "\323\202", 2},
  1332. {"\323\203", "\323\204", 2},
  1333. {"\323\207", "\323\210", 2},
  1334. {"\323\213", "\323\214", 2},
  1335. {"\323\220", "\323\221", 2},
  1336. {"\323\222", "\323\223", 2},
  1337. {"\323\224", "\323\225", 2},
  1338. {"\323\226", "\323\227", 2},
  1339. {"\323\230", "\323\231", 2},
  1340. {"\323\232", "\323\233", 2},
  1341. {"\323\234", "\323\235", 2},
  1342. {"\323\236", "\323\237", 2},
  1343. {"\323\240", "\323\241", 2},
  1344. {"\323\242", "\323\243", 2},
  1345. {"\323\244", "\323\245", 2},
  1346. {"\323\246", "\323\247", 2},
  1347. {"\323\250", "\323\251", 2},
  1348. {"\323\252", "\323\253", 2},
  1349. {"\323\256", "\323\257", 2},
  1350. {"\323\260", "\323\261", 2},
  1351. {"\323\262", "\323\263", 2},
  1352. {"\323\264", "\323\265", 2},
  1353. {"\323\270", "\323\271", 2},
  1354. {NULL, NULL, 0}
  1355. };
  1356. UpperLowerTbl_t Lower2UpperTbl2e[] = {
  1357. {"\324\261", "\325\241", 2},
  1358. {"\324\262", "\325\242", 2},
  1359. {"\324\263", "\325\243", 2},
  1360. {"\324\264", "\325\244", 2},
  1361. {"\324\265", "\325\245", 2},
  1362. {"\324\266", "\325\246", 2},
  1363. {"\324\267", "\325\247", 2},
  1364. {"\324\270", "\325\250", 2},
  1365. {"\324\271", "\325\251", 2},
  1366. {"\324\272", "\325\252", 2},
  1367. {"\324\273", "\325\253", 2},
  1368. {"\324\274", "\325\254", 2},
  1369. {"\324\275", "\325\255", 2},
  1370. {"\324\276", "\325\256", 2},
  1371. {"\324\277", "\325\257", 2},
  1372. {"\325\200", "\325\260", 2},
  1373. {"\325\201", "\325\261", 2},
  1374. {"\325\202", "\325\262", 2},
  1375. {"\325\203", "\325\263", 2},
  1376. {"\325\204", "\325\264", 2},
  1377. {"\325\205", "\325\265", 2},
  1378. {"\325\206", "\325\266", 2},
  1379. {"\325\207", "\325\267", 2},
  1380. {"\325\210", "\325\270", 2},
  1381. {"\325\211", "\325\271", 2},
  1382. {"\325\212", "\325\272", 2},
  1383. {"\325\213", "\325\273", 2},
  1384. {"\325\214", "\325\274", 2},
  1385. {"\325\215", "\325\275", 2},
  1386. {"\325\216", "\325\276", 2},
  1387. {"\325\217", "\325\277", 2},
  1388. {NULL, NULL, 0}
  1389. };
  1390. UpperLowerTbl_t Lower2UpperTbl2f[] = {
  1391. {"\325\220", "\326\200", 2},
  1392. {"\325\221", "\326\201", 2},
  1393. {"\325\222", "\326\202", 2},
  1394. {"\325\223", "\326\203", 2},
  1395. {"\325\224", "\326\204", 2},
  1396. {"\325\225", "\326\205", 2},
  1397. {"\325\226", "\326\206", 2},
  1398. {NULL, NULL, 0}
  1399. };
  1400. UpperLowerTbl_t Lower2UpperTbl30[] = {
  1401. {"\341\202\240", "\341\203\220", 3},
  1402. {"\341\202\241", "\341\203\221", 3},
  1403. {"\341\202\242", "\341\203\222", 3},
  1404. {"\341\202\243", "\341\203\223", 3},
  1405. {"\341\202\244", "\341\203\224", 3},
  1406. {"\341\202\245", "\341\203\225", 3},
  1407. {"\341\202\246", "\341\203\226", 3},
  1408. {"\341\202\247", "\341\203\227", 3},
  1409. {"\341\202\250", "\341\203\230", 3},
  1410. {"\341\202\251", "\341\203\231", 3},
  1411. {"\341\202\252", "\341\203\232", 3},
  1412. {"\341\202\253", "\341\203\233", 3},
  1413. {"\341\202\254", "\341\203\234", 3},
  1414. {"\341\202\255", "\341\203\235", 3},
  1415. {"\341\202\256", "\341\203\236", 3},
  1416. {"\341\202\257", "\341\203\237", 3},
  1417. {"\341\202\260", "\341\203\240", 3},
  1418. {"\341\202\261", "\341\203\241", 3},
  1419. {"\341\202\262", "\341\203\242", 3},
  1420. {"\341\202\263", "\341\203\243", 3},
  1421. {"\341\202\264", "\341\203\244", 3},
  1422. {"\341\202\265", "\341\203\245", 3},
  1423. {"\341\202\266", "\341\203\246", 3},
  1424. {"\341\202\267", "\341\203\247", 3},
  1425. {"\341\202\270", "\341\203\250", 3},
  1426. {"\341\202\271", "\341\203\251", 3},
  1427. {"\341\202\272", "\341\203\252", 3},
  1428. {"\341\202\273", "\341\203\253", 3},
  1429. {"\341\202\274", "\341\203\254", 3},
  1430. {"\341\202\275", "\341\203\255", 3},
  1431. {"\341\202\276", "\341\203\256", 3},
  1432. {"\341\202\277", "\341\203\257", 3},
  1433. {"\341\203\200", "\341\203\260", 3},
  1434. {"\341\203\201", "\341\203\261", 3},
  1435. {"\341\203\202", "\341\203\262", 3},
  1436. {"\341\203\203", "\341\203\263", 3},
  1437. {"\341\203\204", "\341\203\264", 3},
  1438. {"\341\203\205", "\341\203\265", 3},
  1439. {"\341\270\200", "\341\270\201", 3},
  1440. {"\341\270\202", "\341\270\203", 3},
  1441. {"\341\270\204", "\341\270\205", 3},
  1442. {"\341\270\206", "\341\270\207", 3},
  1443. {"\341\270\210", "\341\270\211", 3},
  1444. {"\341\270\212", "\341\270\213", 3},
  1445. {"\341\270\214", "\341\270\215", 3},
  1446. {"\341\270\216", "\341\270\217", 3},
  1447. {"\341\270\220", "\341\270\221", 3},
  1448. {"\341\270\222", "\341\270\223", 3},
  1449. {"\341\270\224", "\341\270\225", 3},
  1450. {"\341\270\226", "\341\270\227", 3},
  1451. {"\341\270\230", "\341\270\231", 3},
  1452. {"\341\270\232", "\341\270\233", 3},
  1453. {"\341\270\234", "\341\270\235", 3},
  1454. {"\341\270\236", "\341\270\237", 3},
  1455. {"\341\270\240", "\341\270\241", 3},
  1456. {"\341\270\242", "\341\270\243", 3},
  1457. {"\341\270\244", "\341\270\245", 3},
  1458. {"\341\270\246", "\341\270\247", 3},
  1459. {"\341\270\250", "\341\270\251", 3},
  1460. {"\341\270\252", "\341\270\253", 3},
  1461. {"\341\270\254", "\341\270\255", 3},
  1462. {"\341\270\256", "\341\270\257", 3},
  1463. {"\341\270\260", "\341\270\261", 3},
  1464. {"\341\270\262", "\341\270\263", 3},
  1465. {"\341\270\264", "\341\270\265", 3},
  1466. {"\341\270\266", "\341\270\267", 3},
  1467. {"\341\270\270", "\341\270\271", 3},
  1468. {"\341\270\272", "\341\270\273", 3},
  1469. {"\341\270\274", "\341\270\275", 3},
  1470. {"\341\270\276", "\341\270\277", 3},
  1471. {"\341\271\200", "\341\271\201", 3},
  1472. {"\341\271\202", "\341\271\203", 3},
  1473. {"\341\271\204", "\341\271\205", 3},
  1474. {"\341\271\206", "\341\271\207", 3},
  1475. {"\341\271\210", "\341\271\211", 3},
  1476. {"\341\271\212", "\341\271\213", 3},
  1477. {"\341\271\214", "\341\271\215", 3},
  1478. {"\341\271\216", "\341\271\217", 3},
  1479. {"\341\271\220", "\341\271\221", 3},
  1480. {"\341\271\222", "\341\271\223", 3},
  1481. {"\341\271\224", "\341\271\225", 3},
  1482. {"\341\271\226", "\341\271\227", 3},
  1483. {"\341\271\230", "\341\271\231", 3},
  1484. {"\341\271\232", "\341\271\233", 3},
  1485. {"\341\271\234", "\341\271\235", 3},
  1486. {"\341\271\236", "\341\271\237", 3},
  1487. {"\341\271\240", "\341\271\241", 3},
  1488. {"\341\271\242", "\341\271\243", 3},
  1489. {"\341\271\244", "\341\271\245", 3},
  1490. {"\341\271\246", "\341\271\247", 3},
  1491. {"\341\271\250", "\341\271\251", 3},
  1492. {"\341\271\252", "\341\271\253", 3},
  1493. {"\341\271\254", "\341\271\255", 3},
  1494. {"\341\271\256", "\341\271\257", 3},
  1495. {"\341\271\260", "\341\271\261", 3},
  1496. {"\341\271\262", "\341\271\263", 3},
  1497. {"\341\271\264", "\341\271\265", 3},
  1498. {"\341\271\266", "\341\271\267", 3},
  1499. {"\341\271\270", "\341\271\271", 3},
  1500. {"\341\271\272", "\341\271\273", 3},
  1501. {"\341\271\274", "\341\271\275", 3},
  1502. {"\341\271\276", "\341\271\277", 3},
  1503. {"\341\272\200", "\341\272\201", 3},
  1504. {"\341\272\202", "\341\272\203", 3},
  1505. {"\341\272\204", "\341\272\205", 3},
  1506. {"\341\272\206", "\341\272\207", 3},
  1507. {"\341\272\210", "\341\272\211", 3},
  1508. {"\341\272\212", "\341\272\213", 3},
  1509. {"\341\272\214", "\341\272\215", 3},
  1510. {"\341\272\216", "\341\272\217", 3},
  1511. {"\341\272\220", "\341\272\221", 3},
  1512. {"\341\272\222", "\341\272\223", 3},
  1513. {"\341\272\224", "\341\272\225", 3},
  1514. {"\341\272\240", "\341\272\241", 3},
  1515. {"\341\272\242", "\341\272\243", 3},
  1516. {"\341\272\244", "\341\272\245", 3},
  1517. {"\341\272\246", "\341\272\247", 3},
  1518. {"\341\272\250", "\341\272\251", 3},
  1519. {"\341\272\252", "\341\272\253", 3},
  1520. {"\341\272\254", "\341\272\255", 3},
  1521. {"\341\272\256", "\341\272\257", 3},
  1522. {"\341\272\260", "\341\272\261", 3},
  1523. {"\341\272\262", "\341\272\263", 3},
  1524. {"\341\272\264", "\341\272\265", 3},
  1525. {"\341\272\266", "\341\272\267", 3},
  1526. {"\341\272\270", "\341\272\271", 3},
  1527. {"\341\272\272", "\341\272\273", 3},
  1528. {"\341\272\274", "\341\272\275", 3},
  1529. {"\341\272\276", "\341\272\277", 3},
  1530. {"\341\273\200", "\341\273\201", 3},
  1531. {"\341\273\202", "\341\273\203", 3},
  1532. {"\341\273\204", "\341\273\205", 3},
  1533. {"\341\273\206", "\341\273\207", 3},
  1534. {"\341\273\210", "\341\273\211", 3},
  1535. {"\341\273\212", "\341\273\213", 3},
  1536. {"\341\273\214", "\341\273\215", 3},
  1537. {"\341\273\216", "\341\273\217", 3},
  1538. {"\341\273\220", "\341\273\221", 3},
  1539. {"\341\273\222", "\341\273\223", 3},
  1540. {"\341\273\224", "\341\273\225", 3},
  1541. {"\341\273\226", "\341\273\227", 3},
  1542. {"\341\273\230", "\341\273\231", 3},
  1543. {"\341\273\232", "\341\273\233", 3},
  1544. {"\341\273\234", "\341\273\235", 3},
  1545. {"\341\273\236", "\341\273\237", 3},
  1546. {"\341\273\240", "\341\273\241", 3},
  1547. {"\341\273\242", "\341\273\243", 3},
  1548. {"\341\273\244", "\341\273\245", 3},
  1549. {"\341\273\246", "\341\273\247", 3},
  1550. {"\341\273\250", "\341\273\251", 3},
  1551. {"\341\273\252", "\341\273\253", 3},
  1552. {"\341\273\254", "\341\273\255", 3},
  1553. {"\341\273\256", "\341\273\257", 3},
  1554. {"\341\273\260", "\341\273\261", 3},
  1555. {"\341\273\262", "\341\273\263", 3},
  1556. {"\341\273\264", "\341\273\265", 3},
  1557. {"\341\273\266", "\341\273\267", 3},
  1558. {"\341\273\270", "\341\273\271", 3},
  1559. {"\341\274\210", "\341\274\200", 3},
  1560. {"\341\274\211", "\341\274\201", 3},
  1561. {"\341\274\212", "\341\274\202", 3},
  1562. {"\341\274\213", "\341\274\203", 3},
  1563. {"\341\274\214", "\341\274\204", 3},
  1564. {"\341\274\215", "\341\274\205", 3},
  1565. {"\341\274\216", "\341\274\206", 3},
  1566. {"\341\274\217", "\341\274\207", 3},
  1567. {"\341\274\230", "\341\274\220", 3},
  1568. {"\341\274\231", "\341\274\221", 3},
  1569. {"\341\274\232", "\341\274\222", 3},
  1570. {"\341\274\233", "\341\274\223", 3},
  1571. {"\341\274\234", "\341\274\224", 3},
  1572. {"\341\274\235", "\341\274\225", 3},
  1573. {"\341\274\250", "\341\274\240", 3},
  1574. {"\341\274\251", "\341\274\241", 3},
  1575. {"\341\274\252", "\341\274\242", 3},
  1576. {"\341\274\253", "\341\274\243", 3},
  1577. {"\341\274\254", "\341\274\244", 3},
  1578. {"\341\274\255", "\341\274\245", 3},
  1579. {"\341\274\256", "\341\274\246", 3},
  1580. {"\341\274\257", "\341\274\247", 3},
  1581. {"\341\274\270", "\341\274\260", 3},
  1582. {"\341\274\271", "\341\274\261", 3},
  1583. {"\341\274\272", "\341\274\262", 3},
  1584. {"\341\274\273", "\341\274\263", 3},
  1585. {"\341\274\274", "\341\274\264", 3},
  1586. {"\341\274\275", "\341\274\265", 3},
  1587. {"\341\274\276", "\341\274\266", 3},
  1588. {"\341\274\277", "\341\274\267", 3},
  1589. {"\341\275\210", "\341\275\200", 3},
  1590. {"\341\275\211", "\341\275\201", 3},
  1591. {"\341\275\212", "\341\275\202", 3},
  1592. {"\341\275\213", "\341\275\203", 3},
  1593. {"\341\275\214", "\341\275\204", 3},
  1594. {"\341\275\215", "\341\275\205", 3},
  1595. {"\341\275\231", "\341\275\221", 3},
  1596. {"\341\275\233", "\341\275\223", 3},
  1597. {"\341\275\235", "\341\275\225", 3},
  1598. {"\341\275\237", "\341\275\227", 3},
  1599. {"\341\275\250", "\341\275\240", 3},
  1600. {"\341\275\251", "\341\275\241", 3},
  1601. {"\341\275\252", "\341\275\242", 3},
  1602. {"\341\275\253", "\341\275\243", 3},
  1603. {"\341\275\254", "\341\275\244", 3},
  1604. {"\341\275\255", "\341\275\245", 3},
  1605. {"\341\275\256", "\341\275\246", 3},
  1606. {"\341\275\257", "\341\275\247", 3},
  1607. {"\341\276\272", "\341\275\260", 3},
  1608. {"\341\276\273", "\341\275\261", 3},
  1609. {"\341\277\210", "\341\275\262", 3},
  1610. {"\341\277\211", "\341\275\263", 3},
  1611. {"\341\277\212", "\341\275\264", 3},
  1612. {"\341\277\213", "\341\275\265", 3},
  1613. {"\341\277\232", "\341\275\266", 3},
  1614. {"\341\277\233", "\341\275\267", 3},
  1615. {"\341\277\270", "\341\275\270", 3},
  1616. {"\341\277\271", "\341\275\271", 3},
  1617. {"\341\277\252", "\341\275\272", 3},
  1618. {"\341\277\253", "\341\275\273", 3},
  1619. {"\341\277\272", "\341\275\274", 3},
  1620. {"\341\277\273", "\341\275\275", 3},
  1621. {"\341\276\210", "\341\276\200", 3},
  1622. {"\341\276\211", "\341\276\201", 3},
  1623. {"\341\276\212", "\341\276\202", 3},
  1624. {"\341\276\213", "\341\276\203", 3},
  1625. {"\341\276\214", "\341\276\204", 3},
  1626. {"\341\276\215", "\341\276\205", 3},
  1627. {"\341\276\216", "\341\276\206", 3},
  1628. {"\341\276\217", "\341\276\207", 3},
  1629. {"\341\276\230", "\341\276\220", 3},
  1630. {"\341\276\231", "\341\276\221", 3},
  1631. {"\341\276\232", "\341\276\222", 3},
  1632. {"\341\276\233", "\341\276\223", 3},
  1633. {"\341\276\234", "\341\276\224", 3},
  1634. {"\341\276\235", "\341\276\225", 3},
  1635. {"\341\276\236", "\341\276\226", 3},
  1636. {"\341\276\237", "\341\276\227", 3},
  1637. {"\341\276\250", "\341\276\240", 3},
  1638. {"\341\276\251", "\341\276\241", 3},
  1639. {"\341\276\252", "\341\276\242", 3},
  1640. {"\341\276\253", "\341\276\243", 3},
  1641. {"\341\276\254", "\341\276\244", 3},
  1642. {"\341\276\255", "\341\276\245", 3},
  1643. {"\341\276\256", "\341\276\246", 3},
  1644. {"\341\276\257", "\341\276\247", 3},
  1645. {"\341\276\270", "\341\276\260", 3},
  1646. {"\341\276\271", "\341\276\261", 3},
  1647. {"\341\276\274", "\341\276\263", 3},
  1648. {"\341\277\214", "\341\277\203", 3},
  1649. {"\341\277\230", "\341\277\220", 3},
  1650. {"\341\277\231", "\341\277\221", 3},
  1651. {"\341\277\250", "\341\277\240", 3},
  1652. {"\341\277\251", "\341\277\241", 3},
  1653. {"\341\277\254", "\341\277\245", 3},
  1654. {"\341\277\274", "\341\277\263", 3},
  1655. {NULL, NULL, 0}
  1656. };
  1657. UpperLowerTbl_t Lower2UpperTbl31[] = {
  1658. {"\357\274\241", "\357\275\201", 3},
  1659. {"\357\274\242", "\357\275\202", 3},
  1660. {"\357\274\243", "\357\275\203", 3},
  1661. {"\357\274\244", "\357\275\204", 3},
  1662. {"\357\274\245", "\357\275\205", 3},
  1663. {"\357\274\246", "\357\275\206", 3},
  1664. {"\357\274\247", "\357\275\207", 3},
  1665. {"\357\274\250", "\357\275\210", 3},
  1666. {"\357\274\251", "\357\275\211", 3},
  1667. {"\357\274\252", "\357\275\212", 3},
  1668. {"\357\274\253", "\357\275\213", 3},
  1669. {"\357\274\254", "\357\275\214", 3},
  1670. {"\357\274\255", "\357\275\215", 3},
  1671. {"\357\274\256", "\357\275\216", 3},
  1672. {"\357\274\257", "\357\275\217", 3},
  1673. {"\357\274\260", "\357\275\220", 3},
  1674. {"\357\274\261", "\357\275\221", 3},
  1675. {"\357\274\262", "\357\275\222", 3},
  1676. {"\357\274\263", "\357\275\223", 3},
  1677. {"\357\274\264", "\357\275\224", 3},
  1678. {"\357\274\265", "\357\275\225", 3},
  1679. {"\357\274\266", "\357\275\226", 3},
  1680. {"\357\274\267", "\357\275\227", 3},
  1681. {"\357\274\270", "\357\275\230", 3},
  1682. {"\357\274\271", "\357\275\231", 3},
  1683. {"\357\274\272", "\357\275\232", 3},
  1684. {NULL, NULL, 0}
  1685. /* upper, lower */
  1686. };
  1687. UpperLowerTbl_t *Lower2UpperTbl2[] = {
  1688. Lower2UpperTbl20, /* \303 */
  1689. Lower2UpperTbl21, /* \304 */
  1690. Lower2UpperTbl22, /* \305 */
  1691. Lower2UpperTbl23, /* \306 */
  1692. Lower2UpperTbl24, /* \307 */
  1693. Lower2UpperTbl25, /* \310 */
  1694. Lower2UpperTbl26, /* \311 */
  1695. Lower2UpperTbl27, /* \312 */
  1696. NULL, /* \313 */
  1697. NULL, /* \314 */
  1698. NULL, /* \315 */
  1699. Lower2UpperTbl28, /* \316 */
  1700. Lower2UpperTbl29, /* \317 */
  1701. Lower2UpperTbl2a, /* \320 */
  1702. Lower2UpperTbl2b, /* \321 */
  1703. Lower2UpperTbl2c, /* \322 */
  1704. Lower2UpperTbl2d, /* \323 */
  1705. NULL, /* \324 */
  1706. Lower2UpperTbl2e, /* \325 */
  1707. Lower2UpperTbl2f /* \326 */
  1708. };
  1709. UpperLowerTbl_t *Lower2UpperTbl3[] = {
  1710. Lower2UpperTbl30, /* \341 */
  1711. NULL, /* \342 */
  1712. NULL, /* \343 */
  1713. NULL, /* \344 */
  1714. NULL, /* \345 */
  1715. NULL, /* \346 */
  1716. NULL, /* \347 */
  1717. NULL, /* \350 */
  1718. NULL, /* \351 */
  1719. NULL, /* \352 */
  1720. NULL, /* \353 */
  1721. NULL, /* \354 */
  1722. NULL, /* \355 */
  1723. NULL, /* \356 */
  1724. Lower2UpperTbl31 /* \357 */
  1725. };
  1726. #define LU2S (unsigned char)'\303'
  1727. #define LU2E (unsigned char)'\326'
  1728. #define LU3S (unsigned char)'\341'
  1729. #define LU3E (unsigned char)'\357'
  1730. /*
  1731. * dsgw_utf8StrToUpper: translate lower-case string to upper-case
  1732. *
  1733. * input: a null terminated UTF-8 string
  1734. * output: a null terminated UTF-8 string which characters are
  1735. * converted to upper-case; characters which are not
  1736. * lower-case are copied as is. If it's not considered
  1737. * a UTF-8 string, NULL is returned.
  1738. *
  1739. * Notes: This function takes a string (made of multiple UTF-8 characters)
  1740. * for the input (not one character as in "toupper").
  1741. * Output string is allocated in this function, which needs to be
  1742. * released when it's not needed any more.
  1743. */
  1744. unsigned char *
  1745. dsgw_utf8StrToUpper(unsigned char *s)
  1746. {
  1747. UpperLowerTbl_t *ultp;
  1748. unsigned char *p, *np, *tail;
  1749. unsigned char *up, *uphead;
  1750. int len, sz;
  1751. if (s == NULL || *s == '\0') {
  1752. return s;
  1753. }
  1754. len = strlen((char *)s);
  1755. tail = s + len;
  1756. uphead = up = (unsigned char *)dsgw_ch_malloc(len + 1);
  1757. p = s;
  1758. while ((np = (unsigned char *)ldap_utf8next((char *)p)) <= tail) {
  1759. switch(sz = np - p) {
  1760. case 1: /* ASCII */
  1761. sprintf((char *)up, "%c", toupper(*p));
  1762. break;
  1763. case 2: /* 2 bytes */
  1764. if (*p < LU2S || *p > LU2E) { /* out of range */
  1765. memcpy(up, p, sz);
  1766. break;
  1767. }
  1768. for (ultp = Lower2UpperTbl2[*p - LU2S];
  1769. ultp && ultp->lower && memcmp(p, ultp->lower, sz);
  1770. ultp++)
  1771. ;
  1772. if (!ultp) { /* out of range */
  1773. memcpy(up, p, sz);
  1774. } if (ultp->lower) { /* matched */
  1775. memcpy(up, ultp->upper, ultp->tsz);
  1776. sz = ultp->tsz;
  1777. } else {
  1778. memcpy(up, p, sz);
  1779. }
  1780. break;
  1781. case 3: /* 3 bytes */
  1782. if (*p != LU3S && *p != LU3E) { /* out of range */
  1783. memcpy(up, p, sz);
  1784. break;
  1785. }
  1786. for (ultp = Lower2UpperTbl3[*p - LU3S];
  1787. ultp && ultp->lower && memcmp(p, ultp->lower, sz);
  1788. ultp++)
  1789. ;
  1790. if (!ultp) { /* out of range */
  1791. memcpy(up, p, sz);
  1792. } if (ultp->lower) { /* matched */
  1793. memcpy(up, ultp->upper, sz);
  1794. } else {
  1795. memcpy(up, p, sz);
  1796. }
  1797. break;
  1798. case 4:
  1799. memcpy(up, p, sz);
  1800. break;
  1801. default: /* not UTF-8 */
  1802. free(uphead);
  1803. return NULL;
  1804. }
  1805. up += sz;
  1806. p = np;
  1807. if (p == tail) {
  1808. break;
  1809. }
  1810. }
  1811. *up = '\0';
  1812. return uphead;
  1813. }
  1814. /*
  1815. * dsgw_utf8ToUpper: translate lower-case character to upper-case
  1816. *
  1817. * input: a UTF-8 character (s)
  1818. * output: a UTF-8 character which is converted to upper-case (d)
  1819. * length (in bytes) of input character (ssz) and
  1820. * output character (dsz)
  1821. *
  1822. * Notes: This function takes a UTF-8 character (could be multiple bytes)
  1823. * for the input. Memory for the output character is NOT allocated
  1824. * in this function, caller should have allocated it (d).
  1825. * "memmove" is used since (s) and (d) are overlapped.
  1826. */
  1827. void
  1828. dsgw_utf8ToUpper(unsigned char *s, unsigned char *d, int *ssz, int *dsz)
  1829. {
  1830. UpperLowerTbl_t *ultp;
  1831. unsigned char *tail;
  1832. if (s == NULL || *s == '\0') {
  1833. *ssz = *dsz = 0;
  1834. return;
  1835. }
  1836. if (!(*s & 0x80)) { /* ASCII */
  1837. *dsz = *ssz = 1;
  1838. *d = toupper(*s);
  1839. return;
  1840. }
  1841. tail = (unsigned char *)ldap_utf8next((char *)s);
  1842. *dsz = *ssz = tail - s;
  1843. switch(*ssz) {
  1844. case 1: /* ASCII */
  1845. *d = toupper(*s);
  1846. break;
  1847. case 2: /* 2 bytes */
  1848. if (*s < LU2S || *s > LU2E) { /* out of range */
  1849. memmove(d, s, *ssz);
  1850. break;
  1851. }
  1852. for (ultp = Lower2UpperTbl2[*s - LU2S];
  1853. ultp && ultp->lower && memcmp(s, ultp->lower, *ssz);
  1854. ultp++)
  1855. ;
  1856. if (!ultp) { /* out of range */
  1857. memmove(d, s, *ssz);
  1858. } else if (ultp->lower) { /* matched */
  1859. memmove(d, ultp->upper, ultp->tsz);
  1860. *dsz = ultp->tsz;
  1861. } else {
  1862. memmove(d, s, *ssz);
  1863. }
  1864. break;
  1865. case 3: /* 3 bytes */
  1866. if (*s != LU3S && *s != LU3E) { /* out of range */
  1867. memmove(d, s, *ssz);
  1868. break;
  1869. }
  1870. for (ultp = Lower2UpperTbl3[*s - LU3S];
  1871. ultp && ultp->lower && memcmp(s, ultp->lower, *ssz);
  1872. ultp++)
  1873. ;
  1874. if (!ultp) { /* out of range */
  1875. memmove(d, s, *ssz);
  1876. } else if (ultp->lower) { /* matched */
  1877. memmove(d, ultp->upper, *ssz);
  1878. } else {
  1879. memmove(d, s, *ssz);
  1880. }
  1881. break;
  1882. }
  1883. return;
  1884. }
  1885. /*
  1886. * dsgw_utf8isLower: tests for a character that is a lower-case letter in
  1887. * UTF-8
  1888. *
  1889. * input: a UTF-8 character (could be multi-byte)
  1890. * output: 1 if the character is a lower-case letter
  1891. * 0 if the character is not a lower-case letter
  1892. */
  1893. int
  1894. dsgw_utf8isLower(unsigned char *s)
  1895. {
  1896. UpperLowerTbl_t *ultp;
  1897. unsigned char *next;
  1898. int sz;
  1899. if (s == NULL || *s == '\0') {
  1900. return 0;
  1901. }
  1902. if (!(*s & 0x80)) { /* ASCII */
  1903. return islower(*s);
  1904. }
  1905. next = (unsigned char *)ldap_utf8next((char *)s);
  1906. switch(sz = next - s) {
  1907. case 1: /* ASCII */
  1908. return islower(*s);
  1909. case 2:
  1910. if (*s < LU2S || *s > LU2E) { /* out of range */
  1911. return 0;
  1912. }
  1913. for (ultp = Lower2UpperTbl2[*s - LU2S];
  1914. ultp && ultp->lower && memcmp(s, ultp->lower, sz);
  1915. ultp++)
  1916. ;
  1917. if (!ultp) { /* out of range */
  1918. return 0;
  1919. } if (ultp->lower) { /* matched */
  1920. return 1;
  1921. } else {
  1922. return 0;
  1923. }
  1924. case 3:
  1925. if (*s < LU3S || *s > LU3E) { /* out of range */
  1926. return 0;
  1927. }
  1928. for (ultp = Lower2UpperTbl3[*s - LU3S];
  1929. ultp && ultp->lower && memcmp(s, ultp->lower, sz);
  1930. ultp++)
  1931. ;
  1932. if (!ultp) { /* out of range */
  1933. return 0;
  1934. } if (ultp->lower) { /* matched */
  1935. return 1;
  1936. } else {
  1937. return 0;
  1938. }
  1939. default:
  1940. return 0;
  1941. }
  1942. }
  1943. /*
  1944. * dsgw_utf8casecmp: case-insensitive string compare for UTF-8 strings
  1945. *
  1946. * input: two UTF-8 strings (s0, s1) to be compared
  1947. * output: positive number, if s0 is after s1
  1948. * 0, if the two strings are identical ignoring the case
  1949. * negative number, if s1 is after s0
  1950. *
  1951. * Rules: If both UTF-8 strings are NULL or 0-length, 0 is returned.
  1952. * If one of the strings is NULL or 0-length, the NULL/0-length
  1953. * string is smaller.
  1954. * If one or both of the strings are not UTF-8, system provided
  1955. * strcasecmp is used.
  1956. * If one of the two strings contains no 8-bit characters,
  1957. * strcasecmp is used.
  1958. * The strings are compared after converted to lower-case UTF-8.
  1959. * Each character is compared from the beginning.
  1960. * Evaluation goes in this order:
  1961. * If the length of one character is shorter then the other,
  1962. * the difference of the two lengths is returned.
  1963. * If the length of the corresponsing characters is same,
  1964. * each byte in the characters is compared.
  1965. * If there's a difference between two bytes,
  1966. * the diff is returned.
  1967. * If one string is shorter then the other, the diff is returned.
  1968. *
  1969. * Notes: Don't use this function for collation
  1970. * 1) there's no notion of locale in this function.
  1971. * 2) it's UTF-8 code order, which is different from the locale
  1972. * based collation.
  1973. */
  1974. int
  1975. dsgw_utf8casecmp(unsigned char *s0, unsigned char *s1)
  1976. {
  1977. unsigned char *d0, *d1; /* store lower-case strings */
  1978. unsigned char *p0, *p1; /* current UTF-8 char */
  1979. unsigned char *n0, *n1; /* next UTF-8 char */
  1980. unsigned char *t0, *t1; /* tail of the strings */
  1981. unsigned char *x0, *x1; /* current byte in a char */
  1982. int i0, i1; /* length of characters */
  1983. int l0, l1; /* length of leftover */
  1984. int rval;
  1985. int has8_s0;
  1986. int has8_s1;
  1987. d0 = d1 = NULL;
  1988. if (s0 == NULL || *s0 == '\0') {
  1989. if (s1 == NULL || *s1 == '\0') {
  1990. rval = 0;
  1991. } else {
  1992. rval = -1; /* regardless s1, s0 < s1 */
  1993. }
  1994. goto end;
  1995. } else if (s1 == NULL || *s1 == '\0') {
  1996. rval = 1; /* regardless s0, s0 > s1 */
  1997. goto end;
  1998. }
  1999. has8_s0 = dsgw_has8thBit(s0);
  2000. has8_s1 = dsgw_has8thBit(s1);
  2001. if (has8_s0 == has8_s1) { /* both has-8th-bit or both do not */
  2002. if (has8_s0 == 0) { /* neither has-8th-bit */
  2003. rval = strcasecmp((char *)s0, (char *)s1);
  2004. goto end;
  2005. }
  2006. } else { /* one has and the other do not */
  2007. rval = strcasecmp((char *)s0, (char *)s1);
  2008. goto end;
  2009. }
  2010. d0 = dsgw_utf8StrToLower(s0);
  2011. d1 = dsgw_utf8StrToLower(s1);
  2012. if (d0 == NULL || d1 == NULL || /* either is not a UTF-8 string */
  2013. (d0 && *d0 == '\0') || (d1 && *d1 == '\0')) {
  2014. rval = strcasecmp((char *)s0, (char *)s1);
  2015. goto end;
  2016. }
  2017. p0 = d0;
  2018. p1 = d1;
  2019. t0 = d0 + strlen((char *)d0);
  2020. t1 = d1 + strlen((char *)d1);
  2021. rval = 0;
  2022. while (1) {
  2023. n0 = (unsigned char *)ldap_utf8next((char *)p0);
  2024. n1 = (unsigned char *)ldap_utf8next((char *)p1);
  2025. if (n0 > t0 || n1 > t1) {
  2026. break;
  2027. }
  2028. i0 = n0 - p0;
  2029. i1 = n1 - p1;
  2030. rval = i0 - i1;
  2031. if (rval) { /* length is different */
  2032. goto end;
  2033. }
  2034. /* i0 == i1: same length */
  2035. for (x0 = p0, x1 = p1; x0 < n0; x0++, x1++) {
  2036. rval = *x0 - *x1;
  2037. if (rval) {
  2038. goto end;
  2039. }
  2040. }
  2041. p0 = n0; p1 = n1; /* goto next */
  2042. }
  2043. /* finished scanning the shared part and check the leftover */
  2044. l0 = t0 - n0;
  2045. l1 = t1 - n1;
  2046. rval = l0 - l1;
  2047. end:
  2048. if (d0)
  2049. free(d0);
  2050. if (d1)
  2051. free(d1);
  2052. return rval;
  2053. }
  2054. /*
  2055. * dsgw_utf8ncasecmp: case-insensitive string compare (n chars) for UTF-8
  2056. * strings
  2057. *
  2058. * input: two UTF-8 strings (s0, s1) to be compared
  2059. * number or characters
  2060. * output: positive number, if s0 is after s1
  2061. * 0, if the two strings are identical ignoring the case
  2062. * negative number, if s1 is after s0
  2063. *
  2064. * Rules: Same as dsgw_utf8casecmp except the n characters limit.
  2065. *
  2066. * Notes: Don't use this function for collation
  2067. * 1) there's no notion of locale in this function.
  2068. * 2) it's UTF-8 code order, which is different from the locale
  2069. * based collation.
  2070. * n characters, NOT n bytes
  2071. */
  2072. int
  2073. dsgw_utf8ncasecmp(unsigned char *s0, unsigned char *s1, int n)
  2074. {
  2075. unsigned char *d0, *d1; /* store lower-case strings */
  2076. unsigned char *p0, *p1; /* current UTF-8 char */
  2077. unsigned char *n0, *n1; /* next UTF-8 char */
  2078. unsigned char *t0, *t1; /* tail of the strings */
  2079. unsigned char *x0, *x1; /* current byte in a char */
  2080. int i0, i1; /* length of characters */
  2081. int l0, l1; /* length of leftover */
  2082. int cnt;
  2083. int rval;
  2084. int has8_s0;
  2085. int has8_s1;
  2086. d0 = d1 = NULL;
  2087. if (s0 == NULL || *s0 == '\0') {
  2088. if (s1 == NULL || *s1 == '\0') {
  2089. rval = 0;
  2090. } else {
  2091. rval = -1; /* regardless s1, s0 < s1 */
  2092. }
  2093. goto end;
  2094. } else if (s1 == NULL || *s1 == '\0') {
  2095. rval = 1; /* regardless s0, s0 > s1 */
  2096. goto end;
  2097. }
  2098. has8_s0 = dsgw_has8thBit(s0);
  2099. has8_s1 = dsgw_has8thBit(s1);
  2100. if (has8_s0 == has8_s1) { /* both has-8th-bit or both do not */
  2101. if (has8_s0 == 0) { /* neither has-8th-bit */
  2102. rval = strncasecmp((char *)s0, (char *)s1, n);
  2103. goto end;
  2104. }
  2105. } else { /* one has and the other do not */
  2106. rval = strncasecmp((char *)s0, (char *)s1, n);
  2107. goto end;
  2108. }
  2109. d0 = dsgw_utf8StrToLower(s0);
  2110. d1 = dsgw_utf8StrToLower(s1);
  2111. if (d0 == NULL || d1 == NULL || /* either is not a UTF-8 string */
  2112. (d0 && *d0 == '\0') || (d1 && *d1 == '\0')) {
  2113. rval = strncasecmp((char *)s0, (char *)s1, n);
  2114. goto end;
  2115. }
  2116. p0 = d0;
  2117. p1 = d1;
  2118. t0 = d0 + strlen((char *)d0);
  2119. t1 = d1 + strlen((char *)d1);
  2120. rval = 0;
  2121. cnt = 0;
  2122. while (1) {
  2123. n0 = (unsigned char *)ldap_utf8next((char *)p0);
  2124. n1 = (unsigned char *)ldap_utf8next((char *)p1);
  2125. if (n0 > t0 || n1 > t1 || cnt == n) {
  2126. break;
  2127. }
  2128. i0 = n0 - p0;
  2129. i1 = n1 - p1;
  2130. rval = i0 - i1;
  2131. if (rval) /* length is different */
  2132. goto end;
  2133. /* i0 == i1: same length */
  2134. for (x0 = p0, x1 = p1; x0 < n0; x0++, x1++) {
  2135. rval = *x0 - *x1;
  2136. if (rval)
  2137. goto end;
  2138. }
  2139. p0 = n0; p1 = n1; /* goto next */
  2140. cnt++;
  2141. }
  2142. if (cnt == n)
  2143. rval = 0;
  2144. else {
  2145. /* finished scanning the shared part and check the leftover */
  2146. l0 = t0 - n0;
  2147. l1 = t1 - n1;
  2148. rval = l0 - l1;
  2149. }
  2150. end:
  2151. if (d0)
  2152. free(d0);
  2153. if (d1)
  2154. free(d1);
  2155. return rval;
  2156. }