utf8compare.c 72 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315
  1. /** BEGIN COPYRIGHT BLOCK
  2. * Copyright (C) 2001 Sun Microsystems, Inc. Used by permission.
  3. * Copyright (C) 2005 Red Hat, Inc.
  4. * All rights reserved.
  5. *
  6. * License: GPL (version 3 or any later version).
  7. * See LICENSE for details.
  8. * END COPYRIGHT BLOCK **/
  9. #ifdef HAVE_CONFIG_H
  10. # include <config.h>
  11. #endif
  12. #include <stdio.h>
  13. #include <string.h>
  14. #include <ctype.h>
  15. #include "ldap.h"
  16. #include "slap.h"
  17. #include "slapi-plugin.h"
  18. typedef struct sUpperLowerTbl {
  19. char *upper, *lower;
  20. int tsz; /* target size */
  21. } UpperLowerTbl_t;
  22. /*
  23. * slapi_has8thBit: check the input string
  24. * return 1 if the string contains 8-bit character
  25. * return 0 otherwise
  26. */
  27. int
  28. slapi_has8thBit(unsigned char *s)
  29. {
  30. #if (defined(CPU_x86) || defined(CPU_x86_64))
  31. #define MY8THBITWIDTH 4 /* sizeof(PRUint32) */
  32. #define MY8THBITFILTER 0x80808080
  33. unsigned char *p, *stail, *ltail;
  34. PRUint32 *uip;
  35. size_t len = strlen((const char *)s);
  36. ltail = s + len;
  37. stail = ltail - (len % MY8THBITWIDTH);
  38. for (p = s; p < stail; p += MY8THBITWIDTH) {
  39. uip = (PRUint32 *)p;
  40. if (MY8THBITFILTER & *uip) {
  41. return 1;
  42. }
  43. }
  44. #undef MY8THBITWIDTH
  45. #undef MY8THBITFILTER
  46. for (; p < ltail; p++)
  47. #else
  48. unsigned char *p, *tail;
  49. tail = s + strlen((char *)s);
  50. for (p = s; p < tail; p++)
  51. #endif
  52. {
  53. if (0x80 & *p) {
  54. return 1;
  55. }
  56. }
  57. return 0;
  58. }
  59. /*
  60. * UpperToLower Tables: sorted by upper characters
  61. */
  62. UpperLowerTbl_t Upper2LowerTbl20[] = {
  63. /* upper, lower */
  64. {"\303\200", "\303\240", 2},
  65. {"\303\201", "\303\241", 2},
  66. {"\303\202", "\303\242", 2},
  67. {"\303\203", "\303\243", 2},
  68. {"\303\204", "\303\244", 2},
  69. {"\303\205", "\303\245", 2},
  70. {"\303\206", "\303\246", 2},
  71. {"\303\207", "\303\247", 2},
  72. {"\303\210", "\303\250", 2},
  73. {"\303\211", "\303\251", 2},
  74. {"\303\212", "\303\252", 2},
  75. {"\303\213", "\303\253", 2},
  76. {"\303\214", "\303\254", 2},
  77. {"\303\215", "\303\255", 2},
  78. {"\303\216", "\303\256", 2},
  79. {"\303\217", "\303\257", 2},
  80. {"\303\220", "\303\260", 2},
  81. {"\303\221", "\303\261", 2},
  82. {"\303\222", "\303\262", 2},
  83. {"\303\223", "\303\263", 2},
  84. {"\303\224", "\303\264", 2},
  85. {"\303\225", "\303\265", 2},
  86. {"\303\226", "\303\266", 2},
  87. {"\303\230", "\303\270", 2},
  88. {"\303\231", "\303\271", 2},
  89. {"\303\232", "\303\272", 2},
  90. {"\303\233", "\303\273", 2},
  91. {"\303\234", "\303\274", 2},
  92. {"\303\235", "\303\275", 2},
  93. {"\303\236", "\303\276", 2},
  94. {NULL, NULL, 0}
  95. };
  96. UpperLowerTbl_t Upper2LowerTbl21[] = {
  97. {"\304\200", "\304\201", 2},
  98. {"\304\202", "\304\203", 2},
  99. {"\304\204", "\304\205", 2},
  100. {"\304\206", "\304\207", 2},
  101. {"\304\210", "\304\211", 2},
  102. {"\304\212", "\304\213", 2},
  103. {"\304\214", "\304\215", 2},
  104. {"\304\216", "\304\217", 2},
  105. {"\304\220", "\304\221", 2},
  106. {"\304\222", "\304\223", 2},
  107. {"\304\224", "\304\225", 2},
  108. {"\304\226", "\304\227", 2},
  109. {"\304\230", "\304\231", 2},
  110. {"\304\232", "\304\233", 2},
  111. {"\304\234", "\304\235", 2},
  112. {"\304\236", "\304\237", 2},
  113. {"\304\240", "\304\241", 2},
  114. {"\304\242", "\304\243", 2},
  115. {"\304\244", "\304\245", 2},
  116. {"\304\246", "\304\247", 2},
  117. {"\304\250", "\304\251", 2},
  118. {"\304\252", "\304\253", 2},
  119. {"\304\254", "\304\255", 2},
  120. {"\304\256", "\304\257", 2},
  121. {"\304\260", "\304\261", 2},
  122. {"\304\262", "\304\263", 2},
  123. {"\304\264", "\304\265", 2},
  124. {"\304\266", "\304\267", 2},
  125. {"\304\271", "\304\272", 2},
  126. {"\304\273", "\304\274", 2},
  127. {"\304\275", "\304\276", 2},
  128. {"\304\277", "\305\200", 2},
  129. {NULL, NULL, 0}
  130. };
  131. UpperLowerTbl_t Upper2LowerTbl22[] = {
  132. {"\305\201", "\305\202", 2},
  133. {"\305\203", "\305\204", 2},
  134. {"\305\205", "\305\206", 2},
  135. {"\305\207", "\305\210", 2},
  136. {"\305\212", "\305\213", 2},
  137. {"\305\214", "\305\215", 2},
  138. {"\305\216", "\305\217", 2},
  139. {"\305\220", "\305\221", 2},
  140. {"\305\222", "\305\223", 2},
  141. {"\305\224", "\305\225", 2},
  142. {"\305\226", "\305\227", 2},
  143. {"\305\230", "\305\231", 2},
  144. {"\305\232", "\305\233", 2},
  145. {"\305\234", "\305\235", 2},
  146. {"\305\236", "\305\237", 2},
  147. {"\305\240", "\305\241", 2},
  148. {"\305\242", "\305\243", 2},
  149. {"\305\244", "\305\245", 2},
  150. {"\305\246", "\305\247", 2},
  151. {"\305\250", "\305\251", 2},
  152. {"\305\252", "\305\253", 2},
  153. {"\305\254", "\305\255", 2},
  154. {"\305\256", "\305\257", 2},
  155. {"\305\260", "\305\261", 2},
  156. {"\305\262", "\305\263", 2},
  157. {"\305\264", "\305\265", 2},
  158. {"\305\266", "\305\267", 2},
  159. {"\305\270", "\303\277", 2},
  160. {"\305\271", "\305\272", 2},
  161. {"\305\273", "\305\274", 2},
  162. {"\305\275", "\305\276", 2},
  163. {NULL, NULL, 0}
  164. };
  165. UpperLowerTbl_t Upper2LowerTbl23[] = {
  166. {"\306\201", "\311\223", 2},
  167. {"\306\202", "\306\203", 2},
  168. {"\306\204", "\306\205", 2},
  169. {"\306\206", "\311\224", 2},
  170. {"\306\207", "\306\210", 2},
  171. {"\306\211", "\311\226", 2},
  172. {"\306\212", "\311\227", 2},
  173. {"\306\213", "\306\214", 2},
  174. {"\306\216", "\311\230", 2},
  175. {"\306\217", "\311\231", 2},
  176. {"\306\220", "\311\233", 2},
  177. {"\306\221", "\306\222", 2},
  178. {"\306\223", "\311\240", 2},
  179. {"\306\224", "\311\243", 2},
  180. {"\306\226", "\311\251", 2},
  181. {"\306\227", "\311\250", 2},
  182. {"\306\230", "\306\231", 2},
  183. {"\306\234", "\311\257", 2},
  184. {"\306\235", "\311\262", 2},
  185. {"\306\237", "\306\237", 2},
  186. {"\306\240", "\306\241", 2},
  187. {"\306\242", "\306\243", 2},
  188. {"\306\244", "\306\245", 2},
  189. {"\306\246", "\306\246", 2},
  190. {"\306\247", "\306\250", 2},
  191. {"\306\251", "\312\203", 2},
  192. {"\306\254", "\306\255", 2},
  193. {"\306\256", "\312\210", 2},
  194. {"\306\257", "\306\260", 2},
  195. {"\306\261", "\312\212", 2},
  196. {"\306\262", "\312\213", 2},
  197. {"\306\263", "\306\264", 2},
  198. {"\306\265", "\306\266", 2},
  199. {"\306\267", "\312\222", 2},
  200. {"\306\270", "\306\271", 2},
  201. {"\306\274", "\306\275", 2},
  202. {NULL, NULL, 0}
  203. };
  204. UpperLowerTbl_t Upper2LowerTbl24[] = {
  205. {"\307\204", "\307\205", 2},
  206. {"\307\205", "\307\204", 2},
  207. {"\307\207", "\307\210", 2},
  208. {"\307\210", "\307\207", 2},
  209. {"\307\212", "\307\213", 2},
  210. {"\307\213", "\307\212", 2},
  211. {"\307\215", "\307\216", 2},
  212. {"\307\217", "\307\220", 2},
  213. {"\307\221", "\307\222", 2},
  214. {"\307\223", "\307\224", 2},
  215. {"\307\225", "\307\226", 2},
  216. {"\307\227", "\307\230", 2},
  217. {"\307\231", "\307\232", 2},
  218. {"\307\233", "\307\234", 2},
  219. {"\307\236", "\307\237", 2},
  220. {"\307\240", "\307\241", 2},
  221. {"\307\242", "\307\243", 2},
  222. {"\307\244", "\307\245", 2},
  223. {"\307\246", "\307\247", 2},
  224. {"\307\250", "\307\251", 2},
  225. {"\307\252", "\307\253", 2},
  226. {"\307\254", "\307\255", 2},
  227. {"\307\256", "\307\257", 2},
  228. {"\307\261", "\307\262", 2},
  229. {"\307\262", "\307\261", 2},
  230. {"\307\264", "\307\265", 2},
  231. {"\307\272", "\307\273", 2},
  232. {"\307\274", "\307\275", 2},
  233. {"\307\276", "\307\277", 2},
  234. {NULL, NULL, 0}
  235. };
  236. UpperLowerTbl_t Upper2LowerTbl25[] = {
  237. {"\310\200", "\310\201", 2},
  238. {"\310\202", "\310\203", 2},
  239. {"\310\204", "\310\205", 2},
  240. {"\310\206", "\310\207", 2},
  241. {"\310\210", "\310\211", 2},
  242. {"\310\212", "\310\213", 2},
  243. {"\310\214", "\310\215", 2},
  244. {"\310\216", "\310\217", 2},
  245. {"\310\220", "\310\221", 2},
  246. {"\310\222", "\310\223", 2},
  247. {"\310\224", "\310\225", 2},
  248. {"\310\226", "\310\227", 2},
  249. {NULL, NULL, 0}
  250. };
  251. UpperLowerTbl_t Upper2LowerTbl26[] = {
  252. {"\316\206", "\316\254", 2},
  253. {"\316\210", "\316\255", 2},
  254. {"\316\211", "\316\256", 2},
  255. {"\316\212", "\316\257", 2},
  256. {"\316\214", "\317\214", 2},
  257. {"\316\216", "\317\215", 2},
  258. {"\316\217", "\317\216", 2},
  259. {"\316\221", "\316\261", 2},
  260. {"\316\222", "\316\262", 2},
  261. {"\316\223", "\316\263", 2},
  262. {"\316\224", "\316\264", 2},
  263. {"\316\225", "\316\265", 2},
  264. {"\316\226", "\316\266", 2},
  265. {"\316\227", "\316\267", 2},
  266. {"\316\230", "\316\270", 2},
  267. {"\316\231", "\316\271", 2},
  268. {"\316\232", "\316\272", 2},
  269. {"\316\233", "\316\273", 2},
  270. {"\316\234", "\316\274", 2},
  271. {"\316\235", "\316\275", 2},
  272. {"\316\236", "\316\276", 2},
  273. {"\316\237", "\316\277", 2},
  274. {"\316\240", "\317\200", 2},
  275. {"\316\241", "\317\201", 2},
  276. {"\316\243", "\317\203", 2},
  277. {"\316\244", "\317\204", 2},
  278. {"\316\245", "\317\205", 2},
  279. {"\316\246", "\317\206", 2},
  280. {"\316\247", "\317\207", 2},
  281. {"\316\250", "\317\210", 2},
  282. {"\316\251", "\317\211", 2},
  283. {"\316\252", "\317\212", 2},
  284. {"\316\253", "\317\213", 2},
  285. {NULL, NULL, 0}
  286. };
  287. UpperLowerTbl_t Upper2LowerTbl27[] = {
  288. {"\317\222", "\317\222", 2},
  289. {"\317\223", "\317\223", 2},
  290. {"\317\224", "\317\224", 2},
  291. {"\317\232", "\317\232", 2},
  292. {"\317\234", "\317\234", 2},
  293. {"\317\236", "\317\236", 2},
  294. {"\317\240", "\317\240", 2},
  295. {"\317\242", "\317\243", 2},
  296. {"\317\244", "\317\245", 2},
  297. {"\317\246", "\317\247", 2},
  298. {"\317\250", "\317\251", 2},
  299. {"\317\252", "\317\253", 2},
  300. {"\317\254", "\317\255", 2},
  301. {"\317\256", "\317\257", 2},
  302. {NULL, NULL, 0}
  303. };
  304. UpperLowerTbl_t Upper2LowerTbl28[] = {
  305. {"\320\201", "\321\221", 2},
  306. {"\320\202", "\321\222", 2},
  307. {"\320\203", "\321\223", 2},
  308. {"\320\204", "\321\224", 2},
  309. {"\320\205", "\321\225", 2},
  310. {"\320\206", "\321\226", 2},
  311. {"\320\207", "\321\227", 2},
  312. {"\320\210", "\321\230", 2},
  313. {"\320\211", "\321\231", 2},
  314. {"\320\212", "\321\232", 2},
  315. {"\320\213", "\321\233", 2},
  316. {"\320\214", "\321\234", 2},
  317. {"\320\216", "\321\236", 2},
  318. {"\320\217", "\321\237", 2},
  319. {"\320\220", "\320\260", 2},
  320. {"\320\221", "\320\261", 2},
  321. {"\320\222", "\320\262", 2},
  322. {"\320\223", "\320\263", 2},
  323. {"\320\224", "\320\264", 2},
  324. {"\320\225", "\320\265", 2},
  325. {"\320\226", "\320\266", 2},
  326. {"\320\227", "\320\267", 2},
  327. {"\320\230", "\320\270", 2},
  328. {"\320\231", "\320\271", 2},
  329. {"\320\232", "\320\272", 2},
  330. {"\320\233", "\320\273", 2},
  331. {"\320\234", "\320\274", 2},
  332. {"\320\235", "\320\275", 2},
  333. {"\320\236", "\320\276", 2},
  334. {"\320\237", "\320\277", 2},
  335. {"\320\240", "\321\200", 2},
  336. {"\320\241", "\321\201", 2},
  337. {"\320\242", "\321\202", 2},
  338. {"\320\243", "\321\203", 2},
  339. {"\320\244", "\321\204", 2},
  340. {"\320\245", "\321\205", 2},
  341. {"\320\246", "\321\206", 2},
  342. {"\320\247", "\321\207", 2},
  343. {"\320\250", "\321\210", 2},
  344. {"\320\251", "\321\211", 2},
  345. {"\320\252", "\321\212", 2},
  346. {"\320\253", "\321\213", 2},
  347. {"\320\254", "\321\214", 2},
  348. {"\320\255", "\321\215", 2},
  349. {"\320\256", "\321\216", 2},
  350. {"\320\257", "\321\217", 2},
  351. {NULL, NULL, 0}
  352. };
  353. UpperLowerTbl_t Upper2LowerTbl29[] = {
  354. {"\321\240", "\321\241", 2},
  355. {"\321\242", "\321\243", 2},
  356. {"\321\244", "\321\245", 2},
  357. {"\321\246", "\321\247", 2},
  358. {"\321\250", "\321\251", 2},
  359. {"\321\252", "\321\253", 2},
  360. {"\321\254", "\321\255", 2},
  361. {"\321\256", "\321\257", 2},
  362. {"\321\260", "\321\261", 2},
  363. {"\321\262", "\321\263", 2},
  364. {"\321\264", "\321\265", 2},
  365. {"\321\266", "\321\267", 2},
  366. {"\321\270", "\321\271", 2},
  367. {"\321\272", "\321\273", 2},
  368. {"\321\274", "\321\275", 2},
  369. {"\321\276", "\321\277", 2},
  370. {NULL, NULL, 0}
  371. };
  372. UpperLowerTbl_t Upper2LowerTbl2a[] = {
  373. {"\322\200", "\322\201", 2},
  374. {"\322\220", "\322\221", 2},
  375. {"\322\222", "\322\223", 2},
  376. {"\322\224", "\322\225", 2},
  377. {"\322\226", "\322\227", 2},
  378. {"\322\230", "\322\231", 2},
  379. {"\322\232", "\322\233", 2},
  380. {"\322\234", "\322\235", 2},
  381. {"\322\236", "\322\237", 2},
  382. {"\322\240", "\322\241", 2},
  383. {"\322\242", "\322\243", 2},
  384. {"\322\244", "\322\245", 2},
  385. {"\322\246", "\322\247", 2},
  386. {"\322\250", "\322\251", 2},
  387. {"\322\252", "\322\253", 2},
  388. {"\322\254", "\322\255", 2},
  389. {"\322\256", "\322\257", 2},
  390. {"\322\260", "\322\261", 2},
  391. {"\322\262", "\322\263", 2},
  392. {"\322\264", "\322\265", 2},
  393. {"\322\266", "\322\267", 2},
  394. {"\322\270", "\322\271", 2},
  395. {"\322\272", "\322\273", 2},
  396. {"\322\274", "\322\275", 2},
  397. {"\322\276", "\322\277", 2},
  398. {NULL, NULL, 0}
  399. };
  400. UpperLowerTbl_t Upper2LowerTbl2b[] = {
  401. {"\323\201", "\323\202", 2},
  402. {"\323\203", "\323\204", 2},
  403. {"\323\207", "\323\210", 2},
  404. {"\323\213", "\323\214", 2},
  405. {"\323\220", "\323\221", 2},
  406. {"\323\222", "\323\223", 2},
  407. {"\323\224", "\323\225", 2},
  408. {"\323\226", "\323\227", 2},
  409. {"\323\230", "\323\231", 2},
  410. {"\323\232", "\323\233", 2},
  411. {"\323\234", "\323\235", 2},
  412. {"\323\236", "\323\237", 2},
  413. {"\323\240", "\323\241", 2},
  414. {"\323\242", "\323\243", 2},
  415. {"\323\244", "\323\245", 2},
  416. {"\323\246", "\323\247", 2},
  417. {"\323\250", "\323\251", 2},
  418. {"\323\252", "\323\253", 2},
  419. {"\323\256", "\323\257", 2},
  420. {"\323\260", "\323\261", 2},
  421. {"\323\262", "\323\263", 2},
  422. {"\323\264", "\323\265", 2},
  423. {"\323\270", "\323\271", 2},
  424. {NULL, NULL, 0}
  425. };
  426. UpperLowerTbl_t Upper2LowerTbl2c[] = {
  427. {"\324\261", "\325\241", 2},
  428. {"\324\262", "\325\242", 2},
  429. {"\324\263", "\325\243", 2},
  430. {"\324\264", "\325\244", 2},
  431. {"\324\265", "\325\245", 2},
  432. {"\324\266", "\325\246", 2},
  433. {"\324\267", "\325\247", 2},
  434. {"\324\270", "\325\250", 2},
  435. {"\324\271", "\325\251", 2},
  436. {"\324\272", "\325\252", 2},
  437. {"\324\273", "\325\253", 2},
  438. {"\324\274", "\325\254", 2},
  439. {"\324\275", "\325\255", 2},
  440. {"\324\276", "\325\256", 2},
  441. {"\324\277", "\325\257", 2},
  442. {NULL, NULL, 0}
  443. };
  444. UpperLowerTbl_t Upper2LowerTbl2d[] = {
  445. {"\325\200", "\325\260", 2},
  446. {"\325\201", "\325\261", 2},
  447. {"\325\202", "\325\262", 2},
  448. {"\325\203", "\325\263", 2},
  449. {"\325\204", "\325\264", 2},
  450. {"\325\205", "\325\265", 2},
  451. {"\325\206", "\325\266", 2},
  452. {"\325\207", "\325\267", 2},
  453. {"\325\210", "\325\270", 2},
  454. {"\325\211", "\325\271", 2},
  455. {"\325\212", "\325\272", 2},
  456. {"\325\213", "\325\273", 2},
  457. {"\325\214", "\325\274", 2},
  458. {"\325\215", "\325\275", 2},
  459. {"\325\216", "\325\276", 2},
  460. {"\325\217", "\325\277", 2},
  461. {"\325\220", "\326\200", 2},
  462. {"\325\221", "\326\201", 2},
  463. {"\325\222", "\326\202", 2},
  464. {"\325\223", "\326\203", 2},
  465. {"\325\224", "\326\204", 2},
  466. {"\325\225", "\326\205", 2},
  467. {"\325\226", "\326\206", 2},
  468. {NULL, NULL, 0}
  469. /* upper, lower */
  470. };
  471. UpperLowerTbl_t Upper2LowerTbl30[] = {
  472. /* upper, lower */
  473. {"\341\202\240", "\341\203\220", 3},
  474. {"\341\202\241", "\341\203\221", 3},
  475. {"\341\202\242", "\341\203\222", 3},
  476. {"\341\202\243", "\341\203\223", 3},
  477. {"\341\202\244", "\341\203\224", 3},
  478. {"\341\202\245", "\341\203\225", 3},
  479. {"\341\202\246", "\341\203\226", 3},
  480. {"\341\202\247", "\341\203\227", 3},
  481. {"\341\202\250", "\341\203\230", 3},
  482. {"\341\202\251", "\341\203\231", 3},
  483. {"\341\202\252", "\341\203\232", 3},
  484. {"\341\202\253", "\341\203\233", 3},
  485. {"\341\202\254", "\341\203\234", 3},
  486. {"\341\202\255", "\341\203\235", 3},
  487. {"\341\202\256", "\341\203\236", 3},
  488. {"\341\202\257", "\341\203\237", 3},
  489. {"\341\202\260", "\341\203\240", 3},
  490. {"\341\202\261", "\341\203\241", 3},
  491. {"\341\202\262", "\341\203\242", 3},
  492. {"\341\202\263", "\341\203\243", 3},
  493. {"\341\202\264", "\341\203\244", 3},
  494. {"\341\202\265", "\341\203\245", 3},
  495. {"\341\202\266", "\341\203\246", 3},
  496. {"\341\202\267", "\341\203\247", 3},
  497. {"\341\202\270", "\341\203\250", 3},
  498. {"\341\202\271", "\341\203\251", 3},
  499. {"\341\202\272", "\341\203\252", 3},
  500. {"\341\202\273", "\341\203\253", 3},
  501. {"\341\202\274", "\341\203\254", 3},
  502. {"\341\202\275", "\341\203\255", 3},
  503. {"\341\202\276", "\341\203\256", 3},
  504. {"\341\202\277", "\341\203\257", 3},
  505. {"\341\203\200", "\341\203\260", 3},
  506. {"\341\203\201", "\341\203\261", 3},
  507. {"\341\203\202", "\341\203\262", 3},
  508. {"\341\203\203", "\341\203\263", 3},
  509. {"\341\203\204", "\341\203\264", 3},
  510. {"\341\203\205", "\341\203\265", 3},
  511. {"\341\270\200", "\341\270\201", 3},
  512. {"\341\270\202", "\341\270\203", 3},
  513. {"\341\270\204", "\341\270\205", 3},
  514. {"\341\270\206", "\341\270\207", 3},
  515. {"\341\270\210", "\341\270\211", 3},
  516. {"\341\270\212", "\341\270\213", 3},
  517. {"\341\270\214", "\341\270\215", 3},
  518. {"\341\270\216", "\341\270\217", 3},
  519. {"\341\270\220", "\341\270\221", 3},
  520. {"\341\270\222", "\341\270\223", 3},
  521. {"\341\270\224", "\341\270\225", 3},
  522. {"\341\270\226", "\341\270\227", 3},
  523. {"\341\270\230", "\341\270\231", 3},
  524. {"\341\270\232", "\341\270\233", 3},
  525. {"\341\270\234", "\341\270\235", 3},
  526. {"\341\270\236", "\341\270\237", 3},
  527. {"\341\270\240", "\341\270\241", 3},
  528. {"\341\270\242", "\341\270\243", 3},
  529. {"\341\270\244", "\341\270\245", 3},
  530. {"\341\270\246", "\341\270\247", 3},
  531. {"\341\270\250", "\341\270\251", 3},
  532. {"\341\270\252", "\341\270\253", 3},
  533. {"\341\270\254", "\341\270\255", 3},
  534. {"\341\270\256", "\341\270\257", 3},
  535. {"\341\270\260", "\341\270\261", 3},
  536. {"\341\270\262", "\341\270\263", 3},
  537. {"\341\270\264", "\341\270\265", 3},
  538. {"\341\270\266", "\341\270\267", 3},
  539. {"\341\270\270", "\341\270\271", 3},
  540. {"\341\270\272", "\341\270\273", 3},
  541. {"\341\270\274", "\341\270\275", 3},
  542. {"\341\270\276", "\341\270\277", 3},
  543. {"\341\271\200", "\341\271\201", 3},
  544. {"\341\271\202", "\341\271\203", 3},
  545. {"\341\271\204", "\341\271\205", 3},
  546. {"\341\271\206", "\341\271\207", 3},
  547. {"\341\271\210", "\341\271\211", 3},
  548. {"\341\271\212", "\341\271\213", 3},
  549. {"\341\271\214", "\341\271\215", 3},
  550. {"\341\271\216", "\341\271\217", 3},
  551. {"\341\271\220", "\341\271\221", 3},
  552. {"\341\271\222", "\341\271\223", 3},
  553. {"\341\271\224", "\341\271\225", 3},
  554. {"\341\271\226", "\341\271\227", 3},
  555. {"\341\271\230", "\341\271\231", 3},
  556. {"\341\271\232", "\341\271\233", 3},
  557. {"\341\271\234", "\341\271\235", 3},
  558. {"\341\271\236", "\341\271\237", 3},
  559. {"\341\271\240", "\341\271\241", 3},
  560. {"\341\271\242", "\341\271\243", 3},
  561. {"\341\271\244", "\341\271\245", 3},
  562. {"\341\271\246", "\341\271\247", 3},
  563. {"\341\271\250", "\341\271\251", 3},
  564. {"\341\271\252", "\341\271\253", 3},
  565. {"\341\271\254", "\341\271\255", 3},
  566. {"\341\271\256", "\341\271\257", 3},
  567. {"\341\271\260", "\341\271\261", 3},
  568. {"\341\271\262", "\341\271\263", 3},
  569. {"\341\271\264", "\341\271\265", 3},
  570. {"\341\271\266", "\341\271\267", 3},
  571. {"\341\271\270", "\341\271\271", 3},
  572. {"\341\271\272", "\341\271\273", 3},
  573. {"\341\271\274", "\341\271\275", 3},
  574. {"\341\271\276", "\341\271\277", 3},
  575. {"\341\272\200", "\341\272\201", 3},
  576. {"\341\272\202", "\341\272\203", 3},
  577. {"\341\272\204", "\341\272\205", 3},
  578. {"\341\272\206", "\341\272\207", 3},
  579. {"\341\272\210", "\341\272\211", 3},
  580. {"\341\272\212", "\341\272\213", 3},
  581. {"\341\272\214", "\341\272\215", 3},
  582. {"\341\272\216", "\341\272\217", 3},
  583. {"\341\272\220", "\341\272\221", 3},
  584. {"\341\272\222", "\341\272\223", 3},
  585. {"\341\272\224", "\341\272\225", 3},
  586. {"\341\272\240", "\341\272\241", 3},
  587. {"\341\272\242", "\341\272\243", 3},
  588. {"\341\272\244", "\341\272\245", 3},
  589. {"\341\272\246", "\341\272\247", 3},
  590. {"\341\272\250", "\341\272\251", 3},
  591. {"\341\272\252", "\341\272\253", 3},
  592. {"\341\272\254", "\341\272\255", 3},
  593. {"\341\272\256", "\341\272\257", 3},
  594. {"\341\272\260", "\341\272\261", 3},
  595. {"\341\272\262", "\341\272\263", 3},
  596. {"\341\272\264", "\341\272\265", 3},
  597. {"\341\272\266", "\341\272\267", 3},
  598. {"\341\272\270", "\341\272\271", 3},
  599. {"\341\272\272", "\341\272\273", 3},
  600. {"\341\272\274", "\341\272\275", 3},
  601. {"\341\272\276", "\341\272\277", 3},
  602. {"\341\273\200", "\341\273\201", 3},
  603. {"\341\273\202", "\341\273\203", 3},
  604. {"\341\273\204", "\341\273\205", 3},
  605. {"\341\273\206", "\341\273\207", 3},
  606. {"\341\273\210", "\341\273\211", 3},
  607. {"\341\273\212", "\341\273\213", 3},
  608. {"\341\273\214", "\341\273\215", 3},
  609. {"\341\273\216", "\341\273\217", 3},
  610. {"\341\273\220", "\341\273\221", 3},
  611. {"\341\273\222", "\341\273\223", 3},
  612. {"\341\273\224", "\341\273\225", 3},
  613. {"\341\273\226", "\341\273\227", 3},
  614. {"\341\273\230", "\341\273\231", 3},
  615. {"\341\273\232", "\341\273\233", 3},
  616. {"\341\273\234", "\341\273\235", 3},
  617. {"\341\273\236", "\341\273\237", 3},
  618. {"\341\273\240", "\341\273\241", 3},
  619. {"\341\273\242", "\341\273\243", 3},
  620. {"\341\273\244", "\341\273\245", 3},
  621. {"\341\273\246", "\341\273\247", 3},
  622. {"\341\273\250", "\341\273\251", 3},
  623. {"\341\273\252", "\341\273\253", 3},
  624. {"\341\273\254", "\341\273\255", 3},
  625. {"\341\273\256", "\341\273\257", 3},
  626. {"\341\273\260", "\341\273\261", 3},
  627. {"\341\273\262", "\341\273\263", 3},
  628. {"\341\273\264", "\341\273\265", 3},
  629. {"\341\273\266", "\341\273\267", 3},
  630. {"\341\273\270", "\341\273\271", 3},
  631. {"\341\274\210", "\341\274\200", 3},
  632. {"\341\274\211", "\341\274\201", 3},
  633. {"\341\274\212", "\341\274\202", 3},
  634. {"\341\274\213", "\341\274\203", 3},
  635. {"\341\274\214", "\341\274\204", 3},
  636. {"\341\274\215", "\341\274\205", 3},
  637. {"\341\274\216", "\341\274\206", 3},
  638. {"\341\274\217", "\341\274\207", 3},
  639. {"\341\274\230", "\341\274\220", 3},
  640. {"\341\274\231", "\341\274\221", 3},
  641. {"\341\274\232", "\341\274\222", 3},
  642. {"\341\274\233", "\341\274\223", 3},
  643. {"\341\274\234", "\341\274\224", 3},
  644. {"\341\274\235", "\341\274\225", 3},
  645. {"\341\274\250", "\341\274\240", 3},
  646. {"\341\274\251", "\341\274\241", 3},
  647. {"\341\274\252", "\341\274\242", 3},
  648. {"\341\274\253", "\341\274\243", 3},
  649. {"\341\274\254", "\341\274\244", 3},
  650. {"\341\274\255", "\341\274\245", 3},
  651. {"\341\274\256", "\341\274\246", 3},
  652. {"\341\274\257", "\341\274\247", 3},
  653. {"\341\274\270", "\341\274\260", 3},
  654. {"\341\274\271", "\341\274\261", 3},
  655. {"\341\274\272", "\341\274\262", 3},
  656. {"\341\274\273", "\341\274\263", 3},
  657. {"\341\274\274", "\341\274\264", 3},
  658. {"\341\274\275", "\341\274\265", 3},
  659. {"\341\274\276", "\341\274\266", 3},
  660. {"\341\274\277", "\341\274\267", 3},
  661. {"\341\275\210", "\341\275\200", 3},
  662. {"\341\275\211", "\341\275\201", 3},
  663. {"\341\275\212", "\341\275\202", 3},
  664. {"\341\275\213", "\341\275\203", 3},
  665. {"\341\275\214", "\341\275\204", 3},
  666. {"\341\275\215", "\341\275\205", 3},
  667. {"\341\275\231", "\341\275\221", 3},
  668. {"\341\275\233", "\341\275\223", 3},
  669. {"\341\275\235", "\341\275\225", 3},
  670. {"\341\275\237", "\341\275\227", 3},
  671. {"\341\275\250", "\341\275\240", 3},
  672. {"\341\275\251", "\341\275\241", 3},
  673. {"\341\275\252", "\341\275\242", 3},
  674. {"\341\275\253", "\341\275\243", 3},
  675. {"\341\275\254", "\341\275\244", 3},
  676. {"\341\275\255", "\341\275\245", 3},
  677. {"\341\275\256", "\341\275\246", 3},
  678. {"\341\275\257", "\341\275\247", 3},
  679. {"\341\276\210", "\341\276\200", 3},
  680. {"\341\276\211", "\341\276\201", 3},
  681. {"\341\276\212", "\341\276\202", 3},
  682. {"\341\276\213", "\341\276\203", 3},
  683. {"\341\276\214", "\341\276\204", 3},
  684. {"\341\276\215", "\341\276\205", 3},
  685. {"\341\276\216", "\341\276\206", 3},
  686. {"\341\276\217", "\341\276\207", 3},
  687. {"\341\276\230", "\341\276\220", 3},
  688. {"\341\276\231", "\341\276\221", 3},
  689. {"\341\276\232", "\341\276\222", 3},
  690. {"\341\276\233", "\341\276\223", 3},
  691. {"\341\276\234", "\341\276\224", 3},
  692. {"\341\276\235", "\341\276\225", 3},
  693. {"\341\276\236", "\341\276\226", 3},
  694. {"\341\276\237", "\341\276\227", 3},
  695. {"\341\276\250", "\341\276\240", 3},
  696. {"\341\276\251", "\341\276\241", 3},
  697. {"\341\276\252", "\341\276\242", 3},
  698. {"\341\276\253", "\341\276\243", 3},
  699. {"\341\276\254", "\341\276\244", 3},
  700. {"\341\276\255", "\341\276\245", 3},
  701. {"\341\276\256", "\341\276\246", 3},
  702. {"\341\276\257", "\341\276\247", 3},
  703. {"\341\276\270", "\341\276\260", 3},
  704. {"\341\276\271", "\341\276\261", 3},
  705. {"\341\276\272", "\341\275\260", 3},
  706. {"\341\276\273", "\341\275\261", 3},
  707. {"\341\276\274", "\341\276\263", 3},
  708. {"\341\276\276", "\341\276\276", 3},
  709. {"\341\277\210", "\341\275\262", 3},
  710. {"\341\277\211", "\341\275\263", 3},
  711. {"\341\277\212", "\341\275\264", 3},
  712. {"\341\277\213", "\341\275\265", 3},
  713. {"\341\277\214", "\341\277\203", 3},
  714. {"\341\277\230", "\341\277\220", 3},
  715. {"\341\277\231", "\341\277\221", 3},
  716. {"\341\277\232", "\341\275\266", 3},
  717. {"\341\277\233", "\341\275\267", 3},
  718. {"\341\277\250", "\341\277\240", 3},
  719. {"\341\277\251", "\341\277\241", 3},
  720. {"\341\277\252", "\341\275\272", 3},
  721. {"\341\277\253", "\341\275\273", 3},
  722. {"\341\277\254", "\341\277\245", 3},
  723. {"\341\277\270", "\341\275\270", 3},
  724. {"\341\277\271", "\341\275\271", 3},
  725. {"\341\277\272", "\341\275\274", 3},
  726. {"\341\277\273", "\341\275\275", 3},
  727. {"\341\277\274", "\341\277\263", 3},
  728. {NULL, NULL, 0}
  729. };
  730. UpperLowerTbl_t Upper2LowerTbl31[] = {
  731. {"\357\274\241", "\357\275\201", 3},
  732. {"\357\274\242", "\357\275\202", 3},
  733. {"\357\274\243", "\357\275\203", 3},
  734. {"\357\274\244", "\357\275\204", 3},
  735. {"\357\274\245", "\357\275\205", 3},
  736. {"\357\274\246", "\357\275\206", 3},
  737. {"\357\274\247", "\357\275\207", 3},
  738. {"\357\274\250", "\357\275\210", 3},
  739. {"\357\274\251", "\357\275\211", 3},
  740. {"\357\274\252", "\357\275\212", 3},
  741. {"\357\274\253", "\357\275\213", 3},
  742. {"\357\274\254", "\357\275\214", 3},
  743. {"\357\274\255", "\357\275\215", 3},
  744. {"\357\274\256", "\357\275\216", 3},
  745. {"\357\274\257", "\357\275\217", 3},
  746. {"\357\274\260", "\357\275\220", 3},
  747. {"\357\274\261", "\357\275\221", 3},
  748. {"\357\274\262", "\357\275\222", 3},
  749. {"\357\274\263", "\357\275\223", 3},
  750. {"\357\274\264", "\357\275\224", 3},
  751. {"\357\274\265", "\357\275\225", 3},
  752. {"\357\274\266", "\357\275\226", 3},
  753. {"\357\274\267", "\357\275\227", 3},
  754. {"\357\274\270", "\357\275\230", 3},
  755. {"\357\274\271", "\357\275\231", 3},
  756. {"\357\274\272", "\357\275\232", 3},
  757. {NULL, NULL, 0}
  758. /* upper, lower */
  759. };
  760. UpperLowerTbl_t *Upper2LowerTbl2[] = {
  761. Upper2LowerTbl20, /* \303 */
  762. Upper2LowerTbl21, /* \304 */
  763. Upper2LowerTbl22, /* \305 */
  764. Upper2LowerTbl23, /* \306 */
  765. Upper2LowerTbl24, /* \307 */
  766. Upper2LowerTbl25, /* \310 */
  767. NULL, /* \311 */
  768. NULL, /* \312 */
  769. NULL, /* \313 */
  770. NULL, /* \314 */
  771. NULL, /* \315 */
  772. Upper2LowerTbl26, /* \316 */
  773. Upper2LowerTbl27, /* \317 */
  774. Upper2LowerTbl28, /* \320 */
  775. Upper2LowerTbl29, /* \321 */
  776. Upper2LowerTbl2a, /* \322 */
  777. Upper2LowerTbl2b, /* \323 */
  778. Upper2LowerTbl2c, /* \324 */
  779. Upper2LowerTbl2d /* \325 */
  780. };
  781. UpperLowerTbl_t *Upper2LowerTbl3[] = {
  782. Upper2LowerTbl30, /* \341 */
  783. NULL, /* \342 */
  784. NULL, /* \343 */
  785. NULL, /* \344 */
  786. NULL, /* \345 */
  787. NULL, /* \346 */
  788. NULL, /* \347 */
  789. NULL, /* \350 */
  790. NULL, /* \351 */
  791. NULL, /* \352 */
  792. NULL, /* \353 */
  793. NULL, /* \354 */
  794. NULL, /* \355 */
  795. NULL, /* \356 */
  796. Upper2LowerTbl31 /* \357 */
  797. };
  798. #define UL2S (unsigned char)'\303'
  799. #define UL2E (unsigned char)'\325'
  800. #define UL3S (unsigned char)'\341'
  801. #define UL3E (unsigned char)'\357'
  802. /*
  803. * slapi_utf8StrToLower: translate upper-case string to lower-case
  804. *
  805. * input: a null terminated UTF-8 string
  806. * output: a null terminated UTF-8 string which characters are
  807. * converted to lower-case; characters which are not
  808. * upper-case are copied as is. If it's not considered
  809. * a UTF-8 string, NULL is returned.
  810. *
  811. * Notes: This function takes a string (made of multiple UTF-8 characters)
  812. * for the input (not one character as in "tolower").
  813. * Output string is allocated in this function, which needs to be
  814. * released when it's not needed any more.
  815. */
  816. unsigned char *
  817. slapi_UTF8STRTOLOWER(char *s)
  818. {
  819. return slapi_utf8StrToLower((unsigned char *)s);
  820. }
  821. unsigned char *
  822. slapi_utf8StrToLower(unsigned char *s)
  823. {
  824. UpperLowerTbl_t *ultp;
  825. unsigned char *p, *np, *tail;
  826. unsigned char *lp, *lphead;
  827. int len, sz;
  828. if (s == NULL || *s == '\0') {
  829. return s;
  830. }
  831. len = strlen((char *)s);
  832. tail = s + len;
  833. lphead = lp = (unsigned char *)slapi_ch_malloc(len + 1);
  834. p = s;
  835. while ((np = (unsigned char *)ldap_utf8next((char *)p)) <= tail) {
  836. switch(sz = np - p) {
  837. case 1:
  838. *lp = tolower(*p);
  839. break;
  840. case 2:
  841. if (*p < UL2S || *p > UL2E) { /* out of range */
  842. memcpy(lp, p, sz);
  843. break;
  844. }
  845. for (ultp = Upper2LowerTbl2[*p - UL2S];
  846. ultp && ultp->upper && memcmp(p, ultp->upper, sz);
  847. ultp++)
  848. ;
  849. if (!ultp) { /* out of range */
  850. memcpy(lp, p, sz);
  851. } else if (ultp->upper) { /* matched */
  852. memcpy(lp, ultp->lower, ultp->tsz);
  853. sz = ultp->tsz;
  854. } else {
  855. memcpy(lp, p, sz);
  856. }
  857. break;
  858. case 3:
  859. if (*p != UL3S && *p != UL3E) { /* out of range */
  860. memcpy(lp, p, sz);
  861. break;
  862. }
  863. for (ultp = Upper2LowerTbl3[*p - UL3S];
  864. ultp && ultp->upper && memcmp(p, ultp->upper, sz);
  865. ultp++)
  866. ;
  867. if (!ultp) { /* out of range */
  868. memcpy(lp, p, sz);
  869. } else if (ultp->upper) { /* matched */
  870. memcpy(lp, ultp->lower, sz);
  871. } else {
  872. memcpy(lp, p, sz);
  873. }
  874. break;
  875. case 4:
  876. memcpy(lp, p, sz);
  877. break;
  878. default: /* not UTF-8 */
  879. slapi_ch_free((void **)&lphead);
  880. return NULL;
  881. }
  882. lp += sz;
  883. p = np;
  884. if (p == tail) {
  885. break;
  886. }
  887. }
  888. *lp = '\0';
  889. return lphead;
  890. }
  891. /*
  892. * slapi_utf8ToLower: translate upper-case character to lower-case
  893. *
  894. * input: a UTF-8 character (s)
  895. * output: a UTF-8 character which is converted to lower-case (d)
  896. * length (in bytes) of input character (ssz) and
  897. * output character (dsz)
  898. *
  899. * Notes: This function takes a UTF-8 character (could be multiple bytes)
  900. * for the input. Memory for the output character is NOT allocated
  901. * in this function, caller should have allocated it (d).
  902. * "memmove" is used since (s) and (d) are overlapped.
  903. */
  904. void
  905. slapi_UTF8TOLOWER(char *s, char *d, int *ssz, int *dsz)
  906. {
  907. slapi_utf8ToLower((unsigned char *)s, (unsigned char *)d, ssz, dsz);
  908. return;
  909. }
  910. void
  911. slapi_utf8ToLower(unsigned char *s, unsigned char *d, int *ssz, int *dsz)
  912. {
  913. UpperLowerTbl_t *ultp;
  914. unsigned char *tail;
  915. if (s == NULL || *s == '\0') {
  916. *ssz = *dsz = 0;
  917. return;
  918. }
  919. if (!(*s & 0x80)) { /* ASCII */
  920. *dsz = *ssz = 1;
  921. *d = tolower(*s);
  922. return;
  923. }
  924. tail = (unsigned char *)ldap_utf8next((char *)s);
  925. *dsz = *ssz = tail - s;
  926. switch(*ssz) {
  927. case 1: /* ASCII */
  928. *d = tolower(*s);
  929. break;
  930. case 2: /* 2 bytes */
  931. if (*s < UL2S || *s > UL2E) { /* out of range */
  932. memmove(d, s, *ssz);
  933. break;
  934. }
  935. for (ultp = Upper2LowerTbl2[*s - UL2S];
  936. ultp && ultp->upper && memcmp(s, ultp->upper, *ssz);
  937. ultp++)
  938. ;
  939. if (!ultp) { /* out of range */
  940. memmove(d, s, *ssz);
  941. } else if (ultp->upper) { /* matched */
  942. memmove(d, ultp->lower, ultp->tsz);
  943. *dsz = ultp->tsz;
  944. } else {
  945. memmove(d, s, *ssz);
  946. }
  947. break;
  948. case 3: /* 3 bytes */
  949. if (*s != UL3S && *s != UL3E) { /* out of range */
  950. memmove(d, s, *ssz);
  951. break;
  952. }
  953. for (ultp = Upper2LowerTbl3[*s - UL3S];
  954. ultp && ultp->upper && memcmp(s, ultp->upper, *ssz);
  955. ultp++)
  956. ;
  957. if (!ultp) { /* out of range */
  958. memmove(d, s, *ssz);
  959. } else if (ultp->upper) { /* matched */
  960. memmove(d, ultp->lower, *ssz);
  961. } else {
  962. memmove(d, s, *ssz);
  963. }
  964. break;
  965. }
  966. return;
  967. }
  968. /*
  969. * slapi_utf8isUpper: tests for a character that is a upper-case letter in
  970. * UTF-8
  971. *
  972. * input: a UTF-8 character (could be multi-byte)
  973. * output: 1 if the character is a upper-case letter
  974. * 0 if the character is not a upper-case letter
  975. */
  976. int
  977. slapi_UTF8ISUPPER(char *s)
  978. {
  979. return slapi_utf8isUpper((unsigned char *)s);
  980. }
  981. int
  982. slapi_utf8isUpper(unsigned char *s)
  983. {
  984. UpperLowerTbl_t *ultp;
  985. unsigned char *next;
  986. int sz;
  987. if (s == NULL || *s == '\0') {
  988. return 0;
  989. }
  990. if (!(*s & 0x80)) { /* ASCII */
  991. return isupper(*s);
  992. }
  993. next = (unsigned char *)ldap_utf8next((char *)s);
  994. switch(sz = next - s) {
  995. case 1: /* ASCII */
  996. return isupper(*s);
  997. case 2:
  998. if (*s < UL2S || *s > UL2E) { /* out of range */
  999. return 0;
  1000. }
  1001. for (ultp = Upper2LowerTbl2[*s - UL2S];
  1002. ultp && ultp->upper && memcmp(s, ultp->upper, sz);
  1003. ultp++)
  1004. ;
  1005. if (!ultp) { /* out of range */
  1006. return 0;
  1007. } else if (ultp->upper) { /* matched */
  1008. return 1;
  1009. } else {
  1010. return 0;
  1011. }
  1012. case 3:
  1013. if (*s < UL3S || *s > UL3E) { /* out of range */
  1014. return 0;
  1015. }
  1016. for (ultp = Upper2LowerTbl3[*s - UL3S];
  1017. ultp && ultp->upper && memcmp(s, ultp->upper, sz);
  1018. ultp++)
  1019. ;
  1020. if (!ultp) { /* out of range */
  1021. return 0;
  1022. } else if (ultp->upper) { /* matched */
  1023. return 1;
  1024. } else {
  1025. return 0;
  1026. }
  1027. default:
  1028. return 0;
  1029. }
  1030. }
  1031. /*
  1032. * Lower2Upper Tables: sorted by lower characters
  1033. */
  1034. UpperLowerTbl_t Lower2UpperTbl20[] = {
  1035. /* upper, lower */
  1036. {"\303\200", "\303\240", 2},
  1037. {"\303\201", "\303\241", 2},
  1038. {"\303\202", "\303\242", 2},
  1039. {"\303\203", "\303\243", 2},
  1040. {"\303\204", "\303\244", 2},
  1041. {"\303\205", "\303\245", 2},
  1042. {"\303\206", "\303\246", 2},
  1043. {"\303\207", "\303\247", 2},
  1044. {"\303\210", "\303\250", 2},
  1045. {"\303\211", "\303\251", 2},
  1046. {"\303\212", "\303\252", 2},
  1047. {"\303\213", "\303\253", 2},
  1048. {"\303\214", "\303\254", 2},
  1049. {"\303\215", "\303\255", 2},
  1050. {"\303\216", "\303\256", 2},
  1051. {"\303\217", "\303\257", 2},
  1052. {"\303\220", "\303\260", 2},
  1053. {"\303\221", "\303\261", 2},
  1054. {"\303\222", "\303\262", 2},
  1055. {"\303\223", "\303\263", 2},
  1056. {"\303\224", "\303\264", 2},
  1057. {"\303\225", "\303\265", 2},
  1058. {"\303\226", "\303\266", 2},
  1059. {"\303\230", "\303\270", 2},
  1060. {"\303\231", "\303\271", 2},
  1061. {"\303\232", "\303\272", 2},
  1062. {"\303\233", "\303\273", 2},
  1063. {"\303\234", "\303\274", 2},
  1064. {"\303\235", "\303\275", 2},
  1065. {"\303\236", "\303\276", 2},
  1066. {"\305\270", "\303\277", 2},
  1067. {NULL, NULL, 0}
  1068. };
  1069. UpperLowerTbl_t Lower2UpperTbl21[] = {
  1070. {"\304\200", "\304\201", 2},
  1071. {"\304\202", "\304\203", 2},
  1072. {"\304\204", "\304\205", 2},
  1073. {"\304\206", "\304\207", 2},
  1074. {"\304\210", "\304\211", 2},
  1075. {"\304\212", "\304\213", 2},
  1076. {"\304\214", "\304\215", 2},
  1077. {"\304\216", "\304\217", 2},
  1078. {"\304\220", "\304\221", 2},
  1079. {"\304\222", "\304\223", 2},
  1080. {"\304\224", "\304\225", 2},
  1081. {"\304\226", "\304\227", 2},
  1082. {"\304\230", "\304\231", 2},
  1083. {"\304\232", "\304\233", 2},
  1084. {"\304\234", "\304\235", 2},
  1085. {"\304\236", "\304\237", 2},
  1086. {"\304\240", "\304\241", 2},
  1087. {"\304\242", "\304\243", 2},
  1088. {"\304\244", "\304\245", 2},
  1089. {"\304\246", "\304\247", 2},
  1090. {"\304\250", "\304\251", 2},
  1091. {"\304\252", "\304\253", 2},
  1092. {"\304\254", "\304\255", 2},
  1093. {"\304\256", "\304\257", 2},
  1094. {"\111", "\304\261", 1},
  1095. {"\304\262", "\304\263", 2},
  1096. {"\304\264", "\304\265", 2},
  1097. {"\304\266", "\304\267", 2},
  1098. {"\304\271", "\304\272", 2},
  1099. {"\304\273", "\304\274", 2},
  1100. {"\304\275", "\304\276", 2},
  1101. {NULL, NULL}
  1102. };
  1103. UpperLowerTbl_t Lower2UpperTbl22[] = {
  1104. {"\304\277", "\305\200", 2},
  1105. {"\305\201", "\305\202", 2},
  1106. {"\305\203", "\305\204", 2},
  1107. {"\305\205", "\305\206", 2},
  1108. {"\305\207", "\305\210", 2},
  1109. {"\305\212", "\305\213", 2},
  1110. {"\305\214", "\305\215", 2},
  1111. {"\305\216", "\305\217", 2},
  1112. {"\305\220", "\305\221", 2},
  1113. {"\305\222", "\305\223", 2},
  1114. {"\305\224", "\305\225", 2},
  1115. {"\305\226", "\305\227", 2},
  1116. {"\305\230", "\305\231", 2},
  1117. {"\305\232", "\305\233", 2},
  1118. {"\305\234", "\305\235", 2},
  1119. {"\305\236", "\305\237", 2},
  1120. {"\305\240", "\305\241", 2},
  1121. {"\305\242", "\305\243", 2},
  1122. {"\305\244", "\305\245", 2},
  1123. {"\305\246", "\305\247", 2},
  1124. {"\305\250", "\305\251", 2},
  1125. {"\305\252", "\305\253", 2},
  1126. {"\305\254", "\305\255", 2},
  1127. {"\305\256", "\305\257", 2},
  1128. {"\305\260", "\305\261", 2},
  1129. {"\305\262", "\305\263", 2},
  1130. {"\305\264", "\305\265", 2},
  1131. {"\305\266", "\305\267", 2},
  1132. {"\305\271", "\305\272", 2},
  1133. {"\305\273", "\305\274", 2},
  1134. {"\305\275", "\305\276", 2},
  1135. {"\123", "\305\277", 1},
  1136. {NULL, NULL, 0}
  1137. };
  1138. UpperLowerTbl_t Lower2UpperTbl23[] = {
  1139. {"\306\202", "\306\203", 2},
  1140. {"\306\204", "\306\205", 2},
  1141. {"\306\207", "\306\210", 2},
  1142. {"\306\213", "\306\214", 2},
  1143. {"\306\221", "\306\222", 2},
  1144. {"\306\230", "\306\231", 2},
  1145. {"\306\240", "\306\241", 2},
  1146. {"\306\242", "\306\243", 2},
  1147. {"\306\244", "\306\245", 2},
  1148. {"\306\247", "\306\250", 2},
  1149. {"\306\254", "\306\255", 2},
  1150. {"\306\257", "\306\260", 2},
  1151. {"\306\263", "\306\264", 2},
  1152. {"\306\265", "\306\266", 2},
  1153. {"\306\270", "\306\271", 2},
  1154. {"\306\274", "\306\275", 2},
  1155. {NULL, NULL, 0}
  1156. };
  1157. UpperLowerTbl_t Lower2UpperTbl24[] = {
  1158. {"\307\204", "\307\206", 2},
  1159. {"\307\207", "\307\211", 2},
  1160. {"\307\212", "\307\214", 2},
  1161. {"\307\215", "\307\216", 2},
  1162. {"\307\217", "\307\220", 2},
  1163. {"\307\221", "\307\222", 2},
  1164. {"\307\223", "\307\224", 2},
  1165. {"\307\225", "\307\226", 2},
  1166. {"\307\227", "\307\230", 2},
  1167. {"\307\231", "\307\232", 2},
  1168. {"\307\233", "\307\234", 2},
  1169. {"\307\236", "\307\237", 2},
  1170. {"\307\240", "\307\241", 2},
  1171. {"\307\242", "\307\243", 2},
  1172. {"\307\244", "\307\245", 2},
  1173. {"\307\246", "\307\247", 2},
  1174. {"\307\250", "\307\251", 2},
  1175. {"\307\252", "\307\253", 2},
  1176. {"\307\254", "\307\255", 2},
  1177. {"\307\256", "\307\257", 2},
  1178. {"\307\261", "\307\263", 2},
  1179. {"\307\264", "\307\265", 2},
  1180. {"\307\272", "\307\273", 2},
  1181. {"\307\274", "\307\275", 2},
  1182. {"\307\276", "\307\277", 2},
  1183. {NULL, NULL, 0}
  1184. };
  1185. UpperLowerTbl_t Lower2UpperTbl25[] = {
  1186. {"\310\200", "\310\201", 2},
  1187. {"\310\202", "\310\203", 2},
  1188. {"\310\204", "\310\205", 2},
  1189. {"\310\206", "\310\207", 2},
  1190. {"\310\210", "\310\211", 2},
  1191. {"\310\212", "\310\213", 2},
  1192. {"\310\214", "\310\215", 2},
  1193. {"\310\216", "\310\217", 2},
  1194. {"\310\220", "\310\221", 2},
  1195. {"\310\222", "\310\223", 2},
  1196. {"\310\224", "\310\225", 2},
  1197. {"\310\226", "\310\227", 2},
  1198. {NULL, NULL, 0}
  1199. };
  1200. UpperLowerTbl_t Lower2UpperTbl26[] = {
  1201. {"\306\201", "\311\223", 2},
  1202. {"\306\206", "\311\224", 2},
  1203. {"\306\211", "\311\226", 2},
  1204. {"\306\212", "\311\227", 2},
  1205. {"\306\216", "\311\230", 2},
  1206. {"\306\217", "\311\231", 2},
  1207. {"\306\220", "\311\233", 2},
  1208. {"\306\223", "\311\240", 2},
  1209. {"\306\224", "\311\243", 2},
  1210. {"\306\227", "\311\250", 2},
  1211. {"\306\226", "\311\251", 2},
  1212. {"\306\234", "\311\257", 2},
  1213. {"\306\235", "\311\262", 2},
  1214. {NULL, NULL, 0}
  1215. };
  1216. UpperLowerTbl_t Lower2UpperTbl27[] = {
  1217. {"\306\251", "\312\203", 2},
  1218. {"\306\256", "\312\210", 2},
  1219. {"\306\261", "\312\212", 2},
  1220. {"\306\262", "\312\213", 2},
  1221. {"\306\267", "\312\222", 2},
  1222. {NULL, NULL, 0}
  1223. };
  1224. UpperLowerTbl_t Lower2UpperTbl28[] = {
  1225. {"\316\206", "\316\254", 2},
  1226. {"\316\210", "\316\255", 2},
  1227. {"\316\211", "\316\256", 2},
  1228. {"\316\212", "\316\257", 2},
  1229. {"\316\221", "\316\261", 2},
  1230. {"\316\222", "\316\262", 2},
  1231. {"\316\223", "\316\263", 2},
  1232. {"\316\224", "\316\264", 2},
  1233. {"\316\225", "\316\265", 2},
  1234. {"\316\226", "\316\266", 2},
  1235. {"\316\227", "\316\267", 2},
  1236. {"\316\230", "\316\270", 2},
  1237. {"\316\231", "\316\271", 2},
  1238. {"\316\232", "\316\272", 2},
  1239. {"\316\233", "\316\273", 2},
  1240. {"\316\234", "\316\274", 2},
  1241. {"\316\235", "\316\275", 2},
  1242. {"\316\236", "\316\276", 2},
  1243. {"\316\237", "\316\277", 2},
  1244. {NULL, NULL, 0}
  1245. };
  1246. UpperLowerTbl_t Lower2UpperTbl29[] = {
  1247. {"\316\240", "\317\200", 2},
  1248. {"\316\241", "\317\201", 2},
  1249. {"\316\243", "\317\202", 2},
  1250. {"\316\243", "\317\203", 2},
  1251. {"\316\244", "\317\204", 2},
  1252. {"\316\245", "\317\205", 2},
  1253. {"\316\246", "\317\206", 2},
  1254. {"\316\247", "\317\207", 2},
  1255. {"\316\250", "\317\210", 2},
  1256. {"\316\251", "\317\211", 2},
  1257. {"\316\252", "\317\212", 2},
  1258. {"\316\253", "\317\213", 2},
  1259. {"\316\214", "\317\214", 2},
  1260. {"\316\216", "\317\215", 2},
  1261. {"\316\217", "\317\216", 2},
  1262. {"\316\222", "\317\220", 2},
  1263. {"\316\230", "\317\221", 2},
  1264. {"\316\246", "\317\225", 2},
  1265. {"\316\240", "\317\226", 2},
  1266. {"\317\242", "\317\243", 2},
  1267. {"\317\244", "\317\245", 2},
  1268. {"\317\246", "\317\247", 2},
  1269. {"\317\250", "\317\251", 2},
  1270. {"\317\252", "\317\253", 2},
  1271. {"\317\254", "\317\255", 2},
  1272. {"\317\256", "\317\257", 2},
  1273. {"\316\232", "\317\260", 2},
  1274. {"\316\241", "\317\261", 2},
  1275. {NULL, NULL, 0}
  1276. };
  1277. UpperLowerTbl_t Lower2UpperTbl2a[] = {
  1278. {"\320\220", "\320\260", 2},
  1279. {"\320\221", "\320\261", 2},
  1280. {"\320\222", "\320\262", 2},
  1281. {"\320\223", "\320\263", 2},
  1282. {"\320\224", "\320\264", 2},
  1283. {"\320\225", "\320\265", 2},
  1284. {"\320\226", "\320\266", 2},
  1285. {"\320\227", "\320\267", 2},
  1286. {"\320\230", "\320\270", 2},
  1287. {"\320\231", "\320\271", 2},
  1288. {"\320\232", "\320\272", 2},
  1289. {"\320\233", "\320\273", 2},
  1290. {"\320\234", "\320\274", 2},
  1291. {"\320\235", "\320\275", 2},
  1292. {"\320\236", "\320\276", 2},
  1293. {"\320\237", "\320\277", 2},
  1294. {NULL, NULL, 0}
  1295. };
  1296. UpperLowerTbl_t Lower2UpperTbl2b[] = {
  1297. {"\320\240", "\321\200", 2},
  1298. {"\320\241", "\321\201", 2},
  1299. {"\320\242", "\321\202", 2},
  1300. {"\320\243", "\321\203", 2},
  1301. {"\320\244", "\321\204", 2},
  1302. {"\320\245", "\321\205", 2},
  1303. {"\320\246", "\321\206", 2},
  1304. {"\320\247", "\321\207", 2},
  1305. {"\320\250", "\321\210", 2},
  1306. {"\320\251", "\321\211", 2},
  1307. {"\320\252", "\321\212", 2},
  1308. {"\320\253", "\321\213", 2},
  1309. {"\320\254", "\321\214", 2},
  1310. {"\320\255", "\321\215", 2},
  1311. {"\320\256", "\321\216", 2},
  1312. {"\320\257", "\321\217", 2},
  1313. {"\320\201", "\321\221", 2},
  1314. {"\320\202", "\321\222", 2},
  1315. {"\320\203", "\321\223", 2},
  1316. {"\320\204", "\321\224", 2},
  1317. {"\320\205", "\321\225", 2},
  1318. {"\320\206", "\321\226", 2},
  1319. {"\320\207", "\321\227", 2},
  1320. {"\320\210", "\321\230", 2},
  1321. {"\320\211", "\321\231", 2},
  1322. {"\320\212", "\321\232", 2},
  1323. {"\320\213", "\321\233", 2},
  1324. {"\320\214", "\321\234", 2},
  1325. {"\320\216", "\321\236", 2},
  1326. {"\320\217", "\321\237", 2},
  1327. {"\321\240", "\321\241", 2},
  1328. {"\321\242", "\321\243", 2},
  1329. {"\321\244", "\321\245", 2},
  1330. {"\321\246", "\321\247", 2},
  1331. {"\321\250", "\321\251", 2},
  1332. {"\321\252", "\321\253", 2},
  1333. {"\321\254", "\321\255", 2},
  1334. {"\321\256", "\321\257", 2},
  1335. {"\321\260", "\321\261", 2},
  1336. {"\321\262", "\321\263", 2},
  1337. {"\321\264", "\321\265", 2},
  1338. {"\321\266", "\321\267", 2},
  1339. {"\321\270", "\321\271", 2},
  1340. {"\321\272", "\321\273", 2},
  1341. {"\321\274", "\321\275", 2},
  1342. {"\321\276", "\321\277", 2},
  1343. {NULL, NULL, 0}
  1344. };
  1345. UpperLowerTbl_t Lower2UpperTbl2c[] = {
  1346. {"\322\200", "\322\201", 2},
  1347. {"\322\220", "\322\221", 2},
  1348. {"\322\222", "\322\223", 2},
  1349. {"\322\224", "\322\225", 2},
  1350. {"\322\226", "\322\227", 2},
  1351. {"\322\230", "\322\231", 2},
  1352. {"\322\232", "\322\233", 2},
  1353. {"\322\234", "\322\235", 2},
  1354. {"\322\236", "\322\237", 2},
  1355. {"\322\240", "\322\241", 2},
  1356. {"\322\242", "\322\243", 2},
  1357. {"\322\244", "\322\245", 2},
  1358. {"\322\246", "\322\247", 2},
  1359. {"\322\250", "\322\251", 2},
  1360. {"\322\252", "\322\253", 2},
  1361. {"\322\254", "\322\255", 2},
  1362. {"\322\256", "\322\257", 2},
  1363. {"\322\260", "\322\261", 2},
  1364. {"\322\262", "\322\263", 2},
  1365. {"\322\264", "\322\265", 2},
  1366. {"\322\266", "\322\267", 2},
  1367. {"\322\270", "\322\271", 2},
  1368. {"\322\272", "\322\273", 2},
  1369. {"\322\274", "\322\275", 2},
  1370. {"\322\276", "\322\277", 2},
  1371. {NULL, NULL, 0}
  1372. };
  1373. UpperLowerTbl_t Lower2UpperTbl2d[] = {
  1374. {"\323\201", "\323\202", 2},
  1375. {"\323\203", "\323\204", 2},
  1376. {"\323\207", "\323\210", 2},
  1377. {"\323\213", "\323\214", 2},
  1378. {"\323\220", "\323\221", 2},
  1379. {"\323\222", "\323\223", 2},
  1380. {"\323\224", "\323\225", 2},
  1381. {"\323\226", "\323\227", 2},
  1382. {"\323\230", "\323\231", 2},
  1383. {"\323\232", "\323\233", 2},
  1384. {"\323\234", "\323\235", 2},
  1385. {"\323\236", "\323\237", 2},
  1386. {"\323\240", "\323\241", 2},
  1387. {"\323\242", "\323\243", 2},
  1388. {"\323\244", "\323\245", 2},
  1389. {"\323\246", "\323\247", 2},
  1390. {"\323\250", "\323\251", 2},
  1391. {"\323\252", "\323\253", 2},
  1392. {"\323\256", "\323\257", 2},
  1393. {"\323\260", "\323\261", 2},
  1394. {"\323\262", "\323\263", 2},
  1395. {"\323\264", "\323\265", 2},
  1396. {"\323\270", "\323\271", 2},
  1397. {NULL, NULL, 0}
  1398. };
  1399. UpperLowerTbl_t Lower2UpperTbl2e[] = {
  1400. {"\324\261", "\325\241", 2},
  1401. {"\324\262", "\325\242", 2},
  1402. {"\324\263", "\325\243", 2},
  1403. {"\324\264", "\325\244", 2},
  1404. {"\324\265", "\325\245", 2},
  1405. {"\324\266", "\325\246", 2},
  1406. {"\324\267", "\325\247", 2},
  1407. {"\324\270", "\325\250", 2},
  1408. {"\324\271", "\325\251", 2},
  1409. {"\324\272", "\325\252", 2},
  1410. {"\324\273", "\325\253", 2},
  1411. {"\324\274", "\325\254", 2},
  1412. {"\324\275", "\325\255", 2},
  1413. {"\324\276", "\325\256", 2},
  1414. {"\324\277", "\325\257", 2},
  1415. {"\325\200", "\325\260", 2},
  1416. {"\325\201", "\325\261", 2},
  1417. {"\325\202", "\325\262", 2},
  1418. {"\325\203", "\325\263", 2},
  1419. {"\325\204", "\325\264", 2},
  1420. {"\325\205", "\325\265", 2},
  1421. {"\325\206", "\325\266", 2},
  1422. {"\325\207", "\325\267", 2},
  1423. {"\325\210", "\325\270", 2},
  1424. {"\325\211", "\325\271", 2},
  1425. {"\325\212", "\325\272", 2},
  1426. {"\325\213", "\325\273", 2},
  1427. {"\325\214", "\325\274", 2},
  1428. {"\325\215", "\325\275", 2},
  1429. {"\325\216", "\325\276", 2},
  1430. {"\325\217", "\325\277", 2},
  1431. {NULL, NULL, 0}
  1432. };
  1433. UpperLowerTbl_t Lower2UpperTbl2f[] = {
  1434. {"\325\220", "\326\200", 2},
  1435. {"\325\221", "\326\201", 2},
  1436. {"\325\222", "\326\202", 2},
  1437. {"\325\223", "\326\203", 2},
  1438. {"\325\224", "\326\204", 2},
  1439. {"\325\225", "\326\205", 2},
  1440. {"\325\226", "\326\206", 2},
  1441. {NULL, NULL, 0}
  1442. };
  1443. UpperLowerTbl_t Lower2UpperTbl30[] = {
  1444. {"\341\202\240", "\341\203\220", 3},
  1445. {"\341\202\241", "\341\203\221", 3},
  1446. {"\341\202\242", "\341\203\222", 3},
  1447. {"\341\202\243", "\341\203\223", 3},
  1448. {"\341\202\244", "\341\203\224", 3},
  1449. {"\341\202\245", "\341\203\225", 3},
  1450. {"\341\202\246", "\341\203\226", 3},
  1451. {"\341\202\247", "\341\203\227", 3},
  1452. {"\341\202\250", "\341\203\230", 3},
  1453. {"\341\202\251", "\341\203\231", 3},
  1454. {"\341\202\252", "\341\203\232", 3},
  1455. {"\341\202\253", "\341\203\233", 3},
  1456. {"\341\202\254", "\341\203\234", 3},
  1457. {"\341\202\255", "\341\203\235", 3},
  1458. {"\341\202\256", "\341\203\236", 3},
  1459. {"\341\202\257", "\341\203\237", 3},
  1460. {"\341\202\260", "\341\203\240", 3},
  1461. {"\341\202\261", "\341\203\241", 3},
  1462. {"\341\202\262", "\341\203\242", 3},
  1463. {"\341\202\263", "\341\203\243", 3},
  1464. {"\341\202\264", "\341\203\244", 3},
  1465. {"\341\202\265", "\341\203\245", 3},
  1466. {"\341\202\266", "\341\203\246", 3},
  1467. {"\341\202\267", "\341\203\247", 3},
  1468. {"\341\202\270", "\341\203\250", 3},
  1469. {"\341\202\271", "\341\203\251", 3},
  1470. {"\341\202\272", "\341\203\252", 3},
  1471. {"\341\202\273", "\341\203\253", 3},
  1472. {"\341\202\274", "\341\203\254", 3},
  1473. {"\341\202\275", "\341\203\255", 3},
  1474. {"\341\202\276", "\341\203\256", 3},
  1475. {"\341\202\277", "\341\203\257", 3},
  1476. {"\341\203\200", "\341\203\260", 3},
  1477. {"\341\203\201", "\341\203\261", 3},
  1478. {"\341\203\202", "\341\203\262", 3},
  1479. {"\341\203\203", "\341\203\263", 3},
  1480. {"\341\203\204", "\341\203\264", 3},
  1481. {"\341\203\205", "\341\203\265", 3},
  1482. {"\341\270\200", "\341\270\201", 3},
  1483. {"\341\270\202", "\341\270\203", 3},
  1484. {"\341\270\204", "\341\270\205", 3},
  1485. {"\341\270\206", "\341\270\207", 3},
  1486. {"\341\270\210", "\341\270\211", 3},
  1487. {"\341\270\212", "\341\270\213", 3},
  1488. {"\341\270\214", "\341\270\215", 3},
  1489. {"\341\270\216", "\341\270\217", 3},
  1490. {"\341\270\220", "\341\270\221", 3},
  1491. {"\341\270\222", "\341\270\223", 3},
  1492. {"\341\270\224", "\341\270\225", 3},
  1493. {"\341\270\226", "\341\270\227", 3},
  1494. {"\341\270\230", "\341\270\231", 3},
  1495. {"\341\270\232", "\341\270\233", 3},
  1496. {"\341\270\234", "\341\270\235", 3},
  1497. {"\341\270\236", "\341\270\237", 3},
  1498. {"\341\270\240", "\341\270\241", 3},
  1499. {"\341\270\242", "\341\270\243", 3},
  1500. {"\341\270\244", "\341\270\245", 3},
  1501. {"\341\270\246", "\341\270\247", 3},
  1502. {"\341\270\250", "\341\270\251", 3},
  1503. {"\341\270\252", "\341\270\253", 3},
  1504. {"\341\270\254", "\341\270\255", 3},
  1505. {"\341\270\256", "\341\270\257", 3},
  1506. {"\341\270\260", "\341\270\261", 3},
  1507. {"\341\270\262", "\341\270\263", 3},
  1508. {"\341\270\264", "\341\270\265", 3},
  1509. {"\341\270\266", "\341\270\267", 3},
  1510. {"\341\270\270", "\341\270\271", 3},
  1511. {"\341\270\272", "\341\270\273", 3},
  1512. {"\341\270\274", "\341\270\275", 3},
  1513. {"\341\270\276", "\341\270\277", 3},
  1514. {"\341\271\200", "\341\271\201", 3},
  1515. {"\341\271\202", "\341\271\203", 3},
  1516. {"\341\271\204", "\341\271\205", 3},
  1517. {"\341\271\206", "\341\271\207", 3},
  1518. {"\341\271\210", "\341\271\211", 3},
  1519. {"\341\271\212", "\341\271\213", 3},
  1520. {"\341\271\214", "\341\271\215", 3},
  1521. {"\341\271\216", "\341\271\217", 3},
  1522. {"\341\271\220", "\341\271\221", 3},
  1523. {"\341\271\222", "\341\271\223", 3},
  1524. {"\341\271\224", "\341\271\225", 3},
  1525. {"\341\271\226", "\341\271\227", 3},
  1526. {"\341\271\230", "\341\271\231", 3},
  1527. {"\341\271\232", "\341\271\233", 3},
  1528. {"\341\271\234", "\341\271\235", 3},
  1529. {"\341\271\236", "\341\271\237", 3},
  1530. {"\341\271\240", "\341\271\241", 3},
  1531. {"\341\271\242", "\341\271\243", 3},
  1532. {"\341\271\244", "\341\271\245", 3},
  1533. {"\341\271\246", "\341\271\247", 3},
  1534. {"\341\271\250", "\341\271\251", 3},
  1535. {"\341\271\252", "\341\271\253", 3},
  1536. {"\341\271\254", "\341\271\255", 3},
  1537. {"\341\271\256", "\341\271\257", 3},
  1538. {"\341\271\260", "\341\271\261", 3},
  1539. {"\341\271\262", "\341\271\263", 3},
  1540. {"\341\271\264", "\341\271\265", 3},
  1541. {"\341\271\266", "\341\271\267", 3},
  1542. {"\341\271\270", "\341\271\271", 3},
  1543. {"\341\271\272", "\341\271\273", 3},
  1544. {"\341\271\274", "\341\271\275", 3},
  1545. {"\341\271\276", "\341\271\277", 3},
  1546. {"\341\272\200", "\341\272\201", 3},
  1547. {"\341\272\202", "\341\272\203", 3},
  1548. {"\341\272\204", "\341\272\205", 3},
  1549. {"\341\272\206", "\341\272\207", 3},
  1550. {"\341\272\210", "\341\272\211", 3},
  1551. {"\341\272\212", "\341\272\213", 3},
  1552. {"\341\272\214", "\341\272\215", 3},
  1553. {"\341\272\216", "\341\272\217", 3},
  1554. {"\341\272\220", "\341\272\221", 3},
  1555. {"\341\272\222", "\341\272\223", 3},
  1556. {"\341\272\224", "\341\272\225", 3},
  1557. {"\341\272\240", "\341\272\241", 3},
  1558. {"\341\272\242", "\341\272\243", 3},
  1559. {"\341\272\244", "\341\272\245", 3},
  1560. {"\341\272\246", "\341\272\247", 3},
  1561. {"\341\272\250", "\341\272\251", 3},
  1562. {"\341\272\252", "\341\272\253", 3},
  1563. {"\341\272\254", "\341\272\255", 3},
  1564. {"\341\272\256", "\341\272\257", 3},
  1565. {"\341\272\260", "\341\272\261", 3},
  1566. {"\341\272\262", "\341\272\263", 3},
  1567. {"\341\272\264", "\341\272\265", 3},
  1568. {"\341\272\266", "\341\272\267", 3},
  1569. {"\341\272\270", "\341\272\271", 3},
  1570. {"\341\272\272", "\341\272\273", 3},
  1571. {"\341\272\274", "\341\272\275", 3},
  1572. {"\341\272\276", "\341\272\277", 3},
  1573. {"\341\273\200", "\341\273\201", 3},
  1574. {"\341\273\202", "\341\273\203", 3},
  1575. {"\341\273\204", "\341\273\205", 3},
  1576. {"\341\273\206", "\341\273\207", 3},
  1577. {"\341\273\210", "\341\273\211", 3},
  1578. {"\341\273\212", "\341\273\213", 3},
  1579. {"\341\273\214", "\341\273\215", 3},
  1580. {"\341\273\216", "\341\273\217", 3},
  1581. {"\341\273\220", "\341\273\221", 3},
  1582. {"\341\273\222", "\341\273\223", 3},
  1583. {"\341\273\224", "\341\273\225", 3},
  1584. {"\341\273\226", "\341\273\227", 3},
  1585. {"\341\273\230", "\341\273\231", 3},
  1586. {"\341\273\232", "\341\273\233", 3},
  1587. {"\341\273\234", "\341\273\235", 3},
  1588. {"\341\273\236", "\341\273\237", 3},
  1589. {"\341\273\240", "\341\273\241", 3},
  1590. {"\341\273\242", "\341\273\243", 3},
  1591. {"\341\273\244", "\341\273\245", 3},
  1592. {"\341\273\246", "\341\273\247", 3},
  1593. {"\341\273\250", "\341\273\251", 3},
  1594. {"\341\273\252", "\341\273\253", 3},
  1595. {"\341\273\254", "\341\273\255", 3},
  1596. {"\341\273\256", "\341\273\257", 3},
  1597. {"\341\273\260", "\341\273\261", 3},
  1598. {"\341\273\262", "\341\273\263", 3},
  1599. {"\341\273\264", "\341\273\265", 3},
  1600. {"\341\273\266", "\341\273\267", 3},
  1601. {"\341\273\270", "\341\273\271", 3},
  1602. {"\341\274\210", "\341\274\200", 3},
  1603. {"\341\274\211", "\341\274\201", 3},
  1604. {"\341\274\212", "\341\274\202", 3},
  1605. {"\341\274\213", "\341\274\203", 3},
  1606. {"\341\274\214", "\341\274\204", 3},
  1607. {"\341\274\215", "\341\274\205", 3},
  1608. {"\341\274\216", "\341\274\206", 3},
  1609. {"\341\274\217", "\341\274\207", 3},
  1610. {"\341\274\230", "\341\274\220", 3},
  1611. {"\341\274\231", "\341\274\221", 3},
  1612. {"\341\274\232", "\341\274\222", 3},
  1613. {"\341\274\233", "\341\274\223", 3},
  1614. {"\341\274\234", "\341\274\224", 3},
  1615. {"\341\274\235", "\341\274\225", 3},
  1616. {"\341\274\250", "\341\274\240", 3},
  1617. {"\341\274\251", "\341\274\241", 3},
  1618. {"\341\274\252", "\341\274\242", 3},
  1619. {"\341\274\253", "\341\274\243", 3},
  1620. {"\341\274\254", "\341\274\244", 3},
  1621. {"\341\274\255", "\341\274\245", 3},
  1622. {"\341\274\256", "\341\274\246", 3},
  1623. {"\341\274\257", "\341\274\247", 3},
  1624. {"\341\274\270", "\341\274\260", 3},
  1625. {"\341\274\271", "\341\274\261", 3},
  1626. {"\341\274\272", "\341\274\262", 3},
  1627. {"\341\274\273", "\341\274\263", 3},
  1628. {"\341\274\274", "\341\274\264", 3},
  1629. {"\341\274\275", "\341\274\265", 3},
  1630. {"\341\274\276", "\341\274\266", 3},
  1631. {"\341\274\277", "\341\274\267", 3},
  1632. {"\341\275\210", "\341\275\200", 3},
  1633. {"\341\275\211", "\341\275\201", 3},
  1634. {"\341\275\212", "\341\275\202", 3},
  1635. {"\341\275\213", "\341\275\203", 3},
  1636. {"\341\275\214", "\341\275\204", 3},
  1637. {"\341\275\215", "\341\275\205", 3},
  1638. {"\341\275\231", "\341\275\221", 3},
  1639. {"\341\275\233", "\341\275\223", 3},
  1640. {"\341\275\235", "\341\275\225", 3},
  1641. {"\341\275\237", "\341\275\227", 3},
  1642. {"\341\275\250", "\341\275\240", 3},
  1643. {"\341\275\251", "\341\275\241", 3},
  1644. {"\341\275\252", "\341\275\242", 3},
  1645. {"\341\275\253", "\341\275\243", 3},
  1646. {"\341\275\254", "\341\275\244", 3},
  1647. {"\341\275\255", "\341\275\245", 3},
  1648. {"\341\275\256", "\341\275\246", 3},
  1649. {"\341\275\257", "\341\275\247", 3},
  1650. {"\341\276\272", "\341\275\260", 3},
  1651. {"\341\276\273", "\341\275\261", 3},
  1652. {"\341\277\210", "\341\275\262", 3},
  1653. {"\341\277\211", "\341\275\263", 3},
  1654. {"\341\277\212", "\341\275\264", 3},
  1655. {"\341\277\213", "\341\275\265", 3},
  1656. {"\341\277\232", "\341\275\266", 3},
  1657. {"\341\277\233", "\341\275\267", 3},
  1658. {"\341\277\270", "\341\275\270", 3},
  1659. {"\341\277\271", "\341\275\271", 3},
  1660. {"\341\277\252", "\341\275\272", 3},
  1661. {"\341\277\253", "\341\275\273", 3},
  1662. {"\341\277\272", "\341\275\274", 3},
  1663. {"\341\277\273", "\341\275\275", 3},
  1664. {"\341\276\210", "\341\276\200", 3},
  1665. {"\341\276\211", "\341\276\201", 3},
  1666. {"\341\276\212", "\341\276\202", 3},
  1667. {"\341\276\213", "\341\276\203", 3},
  1668. {"\341\276\214", "\341\276\204", 3},
  1669. {"\341\276\215", "\341\276\205", 3},
  1670. {"\341\276\216", "\341\276\206", 3},
  1671. {"\341\276\217", "\341\276\207", 3},
  1672. {"\341\276\230", "\341\276\220", 3},
  1673. {"\341\276\231", "\341\276\221", 3},
  1674. {"\341\276\232", "\341\276\222", 3},
  1675. {"\341\276\233", "\341\276\223", 3},
  1676. {"\341\276\234", "\341\276\224", 3},
  1677. {"\341\276\235", "\341\276\225", 3},
  1678. {"\341\276\236", "\341\276\226", 3},
  1679. {"\341\276\237", "\341\276\227", 3},
  1680. {"\341\276\250", "\341\276\240", 3},
  1681. {"\341\276\251", "\341\276\241", 3},
  1682. {"\341\276\252", "\341\276\242", 3},
  1683. {"\341\276\253", "\341\276\243", 3},
  1684. {"\341\276\254", "\341\276\244", 3},
  1685. {"\341\276\255", "\341\276\245", 3},
  1686. {"\341\276\256", "\341\276\246", 3},
  1687. {"\341\276\257", "\341\276\247", 3},
  1688. {"\341\276\270", "\341\276\260", 3},
  1689. {"\341\276\271", "\341\276\261", 3},
  1690. {"\341\276\274", "\341\276\263", 3},
  1691. {"\341\277\214", "\341\277\203", 3},
  1692. {"\341\277\230", "\341\277\220", 3},
  1693. {"\341\277\231", "\341\277\221", 3},
  1694. {"\341\277\250", "\341\277\240", 3},
  1695. {"\341\277\251", "\341\277\241", 3},
  1696. {"\341\277\254", "\341\277\245", 3},
  1697. {"\341\277\274", "\341\277\263", 3},
  1698. {NULL, NULL, 0}
  1699. };
  1700. UpperLowerTbl_t Lower2UpperTbl31[] = {
  1701. {"\357\274\241", "\357\275\201", 3},
  1702. {"\357\274\242", "\357\275\202", 3},
  1703. {"\357\274\243", "\357\275\203", 3},
  1704. {"\357\274\244", "\357\275\204", 3},
  1705. {"\357\274\245", "\357\275\205", 3},
  1706. {"\357\274\246", "\357\275\206", 3},
  1707. {"\357\274\247", "\357\275\207", 3},
  1708. {"\357\274\250", "\357\275\210", 3},
  1709. {"\357\274\251", "\357\275\211", 3},
  1710. {"\357\274\252", "\357\275\212", 3},
  1711. {"\357\274\253", "\357\275\213", 3},
  1712. {"\357\274\254", "\357\275\214", 3},
  1713. {"\357\274\255", "\357\275\215", 3},
  1714. {"\357\274\256", "\357\275\216", 3},
  1715. {"\357\274\257", "\357\275\217", 3},
  1716. {"\357\274\260", "\357\275\220", 3},
  1717. {"\357\274\261", "\357\275\221", 3},
  1718. {"\357\274\262", "\357\275\222", 3},
  1719. {"\357\274\263", "\357\275\223", 3},
  1720. {"\357\274\264", "\357\275\224", 3},
  1721. {"\357\274\265", "\357\275\225", 3},
  1722. {"\357\274\266", "\357\275\226", 3},
  1723. {"\357\274\267", "\357\275\227", 3},
  1724. {"\357\274\270", "\357\275\230", 3},
  1725. {"\357\274\271", "\357\275\231", 3},
  1726. {"\357\274\272", "\357\275\232", 3},
  1727. {NULL, NULL, 0}
  1728. /* upper, lower */
  1729. };
  1730. UpperLowerTbl_t *Lower2UpperTbl2[] = {
  1731. Lower2UpperTbl20, /* \303 */
  1732. Lower2UpperTbl21, /* \304 */
  1733. Lower2UpperTbl22, /* \305 */
  1734. Lower2UpperTbl23, /* \306 */
  1735. Lower2UpperTbl24, /* \307 */
  1736. Lower2UpperTbl25, /* \310 */
  1737. Lower2UpperTbl26, /* \311 */
  1738. Lower2UpperTbl27, /* \312 */
  1739. NULL, /* \313 */
  1740. NULL, /* \314 */
  1741. NULL, /* \315 */
  1742. Lower2UpperTbl28, /* \316 */
  1743. Lower2UpperTbl29, /* \317 */
  1744. Lower2UpperTbl2a, /* \320 */
  1745. Lower2UpperTbl2b, /* \321 */
  1746. Lower2UpperTbl2c, /* \322 */
  1747. Lower2UpperTbl2d, /* \323 */
  1748. NULL, /* \324 */
  1749. Lower2UpperTbl2e, /* \325 */
  1750. Lower2UpperTbl2f /* \326 */
  1751. };
  1752. UpperLowerTbl_t *Lower2UpperTbl3[] = {
  1753. Lower2UpperTbl30, /* \341 */
  1754. NULL, /* \342 */
  1755. NULL, /* \343 */
  1756. NULL, /* \344 */
  1757. NULL, /* \345 */
  1758. NULL, /* \346 */
  1759. NULL, /* \347 */
  1760. NULL, /* \350 */
  1761. NULL, /* \351 */
  1762. NULL, /* \352 */
  1763. NULL, /* \353 */
  1764. NULL, /* \354 */
  1765. NULL, /* \355 */
  1766. NULL, /* \356 */
  1767. Lower2UpperTbl31 /* \357 */
  1768. };
  1769. #define LU2S (unsigned char)'\303'
  1770. #define LU2E (unsigned char)'\326'
  1771. #define LU3S (unsigned char)'\341'
  1772. #define LU3E (unsigned char)'\357'
  1773. /*
  1774. * slapi_utf8StrToUpper: translate lower-case string to upper-case
  1775. *
  1776. * input: a null terminated UTF-8 string
  1777. * output: a null terminated UTF-8 string which characters are
  1778. * converted to upper-case; characters which are not
  1779. * lower-case are copied as is. If it's not considered
  1780. * a UTF-8 string, NULL is returned.
  1781. *
  1782. * Notes: This function takes a string (made of multiple UTF-8 characters)
  1783. * for the input (not one character as in "toupper").
  1784. * Output string is allocated in this function, which needs to be
  1785. * released when it's not needed any more.
  1786. */
  1787. unsigned char *
  1788. slapi_UTF8STRTOUPPER(char *s)
  1789. {
  1790. return slapi_utf8StrToUpper((unsigned char *)s);
  1791. }
  1792. unsigned char *
  1793. slapi_utf8StrToUpper(unsigned char *s)
  1794. {
  1795. UpperLowerTbl_t *ultp;
  1796. unsigned char *p, *np, *tail;
  1797. unsigned char *up, *uphead;
  1798. int len, sz;
  1799. if (s == NULL || *s == '\0') {
  1800. return s;
  1801. }
  1802. len = strlen((char *)s);
  1803. tail = s + len;
  1804. uphead = up = (unsigned char *)slapi_ch_malloc(len + 1);
  1805. p = s;
  1806. while ((np = (unsigned char *)ldap_utf8next((char *)p)) <= tail) {
  1807. switch(sz = np - p) {
  1808. case 1: /* ASCII */
  1809. *up = toupper(*p);
  1810. break;
  1811. case 2: /* 2 bytes */
  1812. if (*p < LU2S || *p > LU2E) { /* out of range */
  1813. memcpy(up, p, sz);
  1814. break;
  1815. }
  1816. for (ultp = Lower2UpperTbl2[*p - LU2S];
  1817. ultp && ultp->lower && memcmp(p, ultp->lower, sz);
  1818. ultp++)
  1819. ;
  1820. if (!ultp) { /* out of range */
  1821. memcpy(up, p, sz);
  1822. } else if (ultp->lower) { /* matched */
  1823. memcpy(up, ultp->upper, ultp->tsz);
  1824. sz = ultp->tsz;
  1825. } else {
  1826. memcpy(up, p, sz);
  1827. }
  1828. break;
  1829. case 3: /* 3 bytes */
  1830. if (*p != LU3S && *p != LU3E) { /* out of range */
  1831. memcpy(up, p, sz);
  1832. break;
  1833. }
  1834. for (ultp = Lower2UpperTbl3[*p - LU3S];
  1835. ultp && ultp->lower && memcmp(p, ultp->lower, sz);
  1836. ultp++)
  1837. ;
  1838. if (!ultp) { /* out of range */
  1839. memcpy(up, p, sz);
  1840. } else if (ultp->lower) { /* matched */
  1841. memcpy(up, ultp->upper, sz);
  1842. } else {
  1843. memcpy(up, p, sz);
  1844. }
  1845. break;
  1846. case 4:
  1847. memcpy(up, p, sz);
  1848. break;
  1849. default: /* not UTF-8 */
  1850. slapi_ch_free((void **)&uphead);
  1851. return NULL;
  1852. }
  1853. up += sz;
  1854. p = np;
  1855. if (p == tail) {
  1856. break;
  1857. }
  1858. }
  1859. *up = '\0';
  1860. return uphead;
  1861. }
  1862. /*
  1863. * slapi_utf8ToUpper: translate lower-case character to upper-case
  1864. *
  1865. * input: a UTF-8 character (s)
  1866. * output: a UTF-8 character which is converted to upper-case (d)
  1867. * length (in bytes) of input character (ssz) and
  1868. * output character (dsz)
  1869. *
  1870. * Notes: This function takes a UTF-8 character (could be multiple bytes)
  1871. * for the input. Memory for the output character is NOT allocated
  1872. * in this function, caller should have allocated it (d).
  1873. * "memmove" is used since (s) and (d) are overlapped.
  1874. */
  1875. void
  1876. slapi_UTF8TOUPPER(char *s, char *d, int *ssz, int *dsz)
  1877. {
  1878. slapi_utf8ToUpper((unsigned char *)s, (unsigned char *)d, ssz, dsz);
  1879. return;
  1880. }
  1881. void
  1882. slapi_utf8ToUpper(unsigned char *s, unsigned char *d, int *ssz, int *dsz)
  1883. {
  1884. UpperLowerTbl_t *ultp;
  1885. unsigned char *tail;
  1886. if (s == NULL || *s == '\0') {
  1887. *ssz = *dsz = 0;
  1888. return;
  1889. }
  1890. if (!(*s & 0x80)) { /* ASCII */
  1891. *dsz = *ssz = 1;
  1892. *d = toupper(*s);
  1893. return;
  1894. }
  1895. tail = (unsigned char *)ldap_utf8next((char *)s);
  1896. *dsz = *ssz = tail - s;
  1897. switch(*ssz) {
  1898. case 1: /* ASCII */
  1899. *d = toupper(*s);
  1900. break;
  1901. case 2: /* 2 bytes */
  1902. if (*s < LU2S || *s > LU2E) { /* out of range */
  1903. memmove(d, s, *ssz);
  1904. break;
  1905. }
  1906. for (ultp = Lower2UpperTbl2[*s - LU2S];
  1907. ultp && ultp->lower && memcmp(s, ultp->lower, *ssz);
  1908. ultp++)
  1909. ;
  1910. if (!ultp) { /* out of range */
  1911. memmove(d, s, *ssz);
  1912. } else if (ultp->lower) { /* matched */
  1913. memmove(d, ultp->upper, ultp->tsz);
  1914. *dsz = ultp->tsz;
  1915. } else {
  1916. memmove(d, s, *ssz);
  1917. }
  1918. break;
  1919. case 3: /* 3 bytes */
  1920. if (*s != LU3S && *s != LU3E) { /* out of range */
  1921. memmove(d, s, *ssz);
  1922. break;
  1923. }
  1924. for (ultp = Lower2UpperTbl3[*s - LU3S];
  1925. ultp && ultp->lower && memcmp(s, ultp->lower, *ssz);
  1926. ultp++)
  1927. ;
  1928. if (!ultp) { /* out of range */
  1929. memmove(d, s, *ssz);
  1930. } else if (ultp->lower) { /* matched */
  1931. memmove(d, ultp->upper, *ssz);
  1932. } else {
  1933. memmove(d, s, *ssz);
  1934. }
  1935. break;
  1936. }
  1937. return;
  1938. }
  1939. /*
  1940. * slapi_utf8isLower: tests for a character that is a lower-case letter in
  1941. * UTF-8
  1942. *
  1943. * input: a UTF-8 character (could be multi-byte)
  1944. * output: 1 if the character is a lower-case letter
  1945. * 0 if the character is not a lower-case letter
  1946. */
  1947. int
  1948. slapi_UTF8ISLOWER(char *s)
  1949. {
  1950. return slapi_utf8isLower((unsigned char *)s);
  1951. }
  1952. int
  1953. slapi_utf8isLower(unsigned char *s)
  1954. {
  1955. UpperLowerTbl_t *ultp;
  1956. unsigned char *next;
  1957. int sz;
  1958. if (s == NULL || *s == '\0') {
  1959. return 0;
  1960. }
  1961. if (!(*s & 0x80)) { /* ASCII */
  1962. return islower(*s);
  1963. }
  1964. next = (unsigned char *)ldap_utf8next((char *)s);
  1965. switch(sz = next - s) {
  1966. case 1: /* ASCII */
  1967. return islower(*s);
  1968. case 2:
  1969. if (*s < LU2S || *s > LU2E) { /* out of range */
  1970. return 0;
  1971. }
  1972. for (ultp = Lower2UpperTbl2[*s - LU2S];
  1973. ultp && ultp->lower && memcmp(s, ultp->lower, sz);
  1974. ultp++)
  1975. ;
  1976. if (!ultp) { /* out of range */
  1977. return 0;
  1978. } else if (ultp->lower) { /* matched */
  1979. return 1;
  1980. } else {
  1981. return 0;
  1982. }
  1983. case 3:
  1984. if (*s < LU3S || *s > LU3E) { /* out of range */
  1985. return 0;
  1986. }
  1987. for (ultp = Lower2UpperTbl3[*s - LU3S];
  1988. ultp && ultp->lower && memcmp(s, ultp->lower, sz);
  1989. ultp++)
  1990. ;
  1991. if (!ultp) { /* out of range */
  1992. return 0;
  1993. } else if (ultp->lower) { /* matched */
  1994. return 1;
  1995. } else {
  1996. return 0;
  1997. }
  1998. default:
  1999. return 0;
  2000. }
  2001. }
  2002. /*
  2003. * slapi_utf8casecmp: case-insensitive string compare for UTF-8 strings
  2004. *
  2005. * input: two UTF-8 strings (s0, s1) to be compared
  2006. * output: positive number, if s0 is after s1
  2007. * 0, if the two strings are identical ignoring the case
  2008. * negative number, if s1 is after s0
  2009. *
  2010. * Rules: If both UTF-8 strings are NULL or 0-length, 0 is returned.
  2011. * If one of the strings is NULL or 0-length, the NULL/0-length
  2012. * string is smaller.
  2013. * If one or both of the strings are not UTF-8, system provided
  2014. * strcasecmp is used.
  2015. * If one of the two strings contains no 8-bit characters,
  2016. * strcasecmp is used.
  2017. * The strings are compared after converted to lower-case UTF-8.
  2018. * Each character is compared from the beginning.
  2019. * Evaluation goes in this order:
  2020. * If the length of one character is shorter then the other,
  2021. * the difference of the two lengths is returned.
  2022. * If the length of the corresponsing characters is same,
  2023. * each byte in the characters is compared.
  2024. * If there's a difference between two bytes,
  2025. * the diff is returned.
  2026. * If one string is shorter then the other, the diff is returned.
  2027. *
  2028. * Notes: Don't use this function for collation
  2029. * 1) there's no notion of locale in this function.
  2030. * 2) it's UTF-8 code order, which is different from the locale
  2031. * based collation.
  2032. */
  2033. int
  2034. slapi_UTF8CASECMP(char *s0, char *s1)
  2035. {
  2036. return slapi_utf8casecmp((unsigned char *)s0, (unsigned char *)s1);
  2037. }
  2038. int
  2039. slapi_utf8casecmp(unsigned char *s0, unsigned char *s1)
  2040. {
  2041. unsigned char *d0, *d1; /* store lower-case strings */
  2042. unsigned char *p0, *p1; /* current UTF-8 char */
  2043. unsigned char *n0, *n1; /* next UTF-8 char */
  2044. unsigned char *t0, *t1; /* tail of the strings */
  2045. unsigned char *x0, *x1; /* current byte in a char */
  2046. int i0, i1; /* length of characters */
  2047. int l0, l1; /* length of leftover */
  2048. int rval;
  2049. int has8_s0;
  2050. int has8_s1;
  2051. d0 = d1 = NULL;
  2052. if (s0 == NULL || *s0 == '\0') {
  2053. if (s1 == NULL || *s1 == '\0') {
  2054. rval = 0;
  2055. } else {
  2056. rval = -1; /* regardless s1, s0 < s1 */
  2057. }
  2058. goto end;
  2059. } else if (s1 == NULL || *s1 == '\0') {
  2060. rval = 1; /* regardless s0, s0 > s1 */
  2061. goto end;
  2062. }
  2063. has8_s0 = slapi_has8thBit(s0);
  2064. has8_s1 = slapi_has8thBit(s1);
  2065. if (has8_s0 == has8_s1) { /* both has-8th-bit or both do not */
  2066. if (has8_s0 == 0) { /* neither has-8th-bit */
  2067. rval = strcasecmp((char *)s0, (char *)s1);
  2068. goto end;
  2069. }
  2070. } else { /* one has and the other do not */
  2071. rval = strcasecmp((char *)s0, (char *)s1);
  2072. goto end;
  2073. }
  2074. d0 = slapi_utf8StrToLower(s0);
  2075. d1 = slapi_utf8StrToLower(s1);
  2076. if (d0 == NULL || d1 == NULL || /* either is not a UTF-8 string */
  2077. (d0 && *d0 == '\0') || (d1 && *d1 == '\0')) {
  2078. rval = strcasecmp((char *)s0, (char *)s1);
  2079. goto end;
  2080. }
  2081. p0 = d0;
  2082. p1 = d1;
  2083. t0 = d0 + strlen((char *)d0);
  2084. t1 = d1 + strlen((char *)d1);
  2085. rval = 0;
  2086. while (1) {
  2087. n0 = (unsigned char *)ldap_utf8next((char *)p0);
  2088. n1 = (unsigned char *)ldap_utf8next((char *)p1);
  2089. if (n0 > t0 || n1 > t1) {
  2090. break;
  2091. }
  2092. i0 = n0 - p0;
  2093. i1 = n1 - p1;
  2094. rval = i0 - i1;
  2095. if (rval) { /* length is different */
  2096. goto end;
  2097. }
  2098. /* i0 == i1: same length */
  2099. for (x0 = p0, x1 = p1; x0 < n0; x0++, x1++) {
  2100. rval = *x0 - *x1;
  2101. if (rval) {
  2102. goto end;
  2103. }
  2104. }
  2105. p0 = n0; p1 = n1; /* goto next */
  2106. }
  2107. /* finished scanning the shared part and check the leftover */
  2108. l0 = t0 - n0;
  2109. l1 = t1 - n1;
  2110. rval = l0 - l1;
  2111. end:
  2112. if (d0)
  2113. slapi_ch_free((void **)&d0);
  2114. if (d1)
  2115. slapi_ch_free((void **)&d1);
  2116. return rval;
  2117. }
  2118. /*
  2119. * slapi_utf8ncasecmp: case-insensitive string compare (n chars) for UTF-8
  2120. * strings
  2121. *
  2122. * input: two UTF-8 strings (s0, s1) to be compared
  2123. * number or characters
  2124. * output: positive number, if s0 is after s1
  2125. * 0, if the two strings are identical ignoring the case
  2126. * negative number, if s1 is after s0
  2127. *
  2128. * Rules: Same as slapi_utf8casecmp except the n characters limit.
  2129. *
  2130. * Notes: Don't use this function for collation
  2131. * 1) there's no notion of locale in this function.
  2132. * 2) it's UTF-8 code order, which is different from the locale
  2133. * based collation.
  2134. * n characters, NOT n bytes
  2135. */
  2136. int
  2137. slapi_UTF8NCASECMP(char *s0, char *s1, int n)
  2138. {
  2139. return slapi_utf8ncasecmp((unsigned char *)s0, (unsigned char *)s1, n);
  2140. }
  2141. int
  2142. slapi_utf8ncasecmp(unsigned char *s0, unsigned char *s1, int n)
  2143. {
  2144. unsigned char *d0, *d1; /* store lower-case strings */
  2145. unsigned char *p0, *p1; /* current UTF-8 char */
  2146. unsigned char *n0, *n1; /* next UTF-8 char */
  2147. unsigned char *t0, *t1; /* tail of the strings */
  2148. unsigned char *x0, *x1; /* current byte in a char */
  2149. int i0, i1; /* length of characters */
  2150. int l0, l1; /* length of leftover */
  2151. int cnt;
  2152. int rval;
  2153. int has8_s0;
  2154. int has8_s1;
  2155. d0 = d1 = NULL;
  2156. if (s0 == NULL || *s0 == '\0') {
  2157. if (s1 == NULL || *s1 == '\0') {
  2158. rval = 0;
  2159. } else {
  2160. rval = -1; /* regardless s1, s0 < s1 */
  2161. }
  2162. goto end;
  2163. } else if (s1 == NULL || *s1 == '\0') {
  2164. rval = 1; /* regardless s0, s0 > s1 */
  2165. goto end;
  2166. }
  2167. has8_s0 = slapi_has8thBit(s0);
  2168. has8_s1 = slapi_has8thBit(s1);
  2169. if (has8_s0 == has8_s1) { /* both has-8th-bit or both do not */
  2170. if (has8_s0 == 0) { /* neither has-8th-bit */
  2171. rval = strncasecmp((char *)s0, (char *)s1, n);
  2172. goto end;
  2173. }
  2174. } else { /* one has and the other do not */
  2175. rval = strncasecmp((char *)s0, (char *)s1, n);
  2176. goto end;
  2177. }
  2178. d0 = slapi_utf8StrToLower(s0);
  2179. d1 = slapi_utf8StrToLower(s1);
  2180. if (d0 == NULL || d1 == NULL || /* either is not a UTF-8 string */
  2181. (d0 && *d0 == '\0') || (d1 && *d1 == '\0')) {
  2182. rval = strncasecmp((char *)s0, (char *)s1, n);
  2183. goto end;
  2184. }
  2185. p0 = d0;
  2186. p1 = d1;
  2187. t0 = d0 + strlen((char *)d0);
  2188. t1 = d1 + strlen((char *)d1);
  2189. rval = 0;
  2190. cnt = 0;
  2191. while (1) {
  2192. n0 = (unsigned char *)ldap_utf8next((char *)p0);
  2193. n1 = (unsigned char *)ldap_utf8next((char *)p1);
  2194. if (n0 > t0 || n1 > t1 || cnt == n) {
  2195. break;
  2196. }
  2197. i0 = n0 - p0;
  2198. i1 = n1 - p1;
  2199. rval = i0 - i1;
  2200. if (rval) /* length is different */
  2201. goto end;
  2202. /* i0 == i1: same length */
  2203. for (x0 = p0, x1 = p1; x0 < n0; x0++, x1++) {
  2204. rval = *x0 - *x1;
  2205. if (rval)
  2206. goto end;
  2207. }
  2208. p0 = n0; p1 = n1; /* goto next */
  2209. cnt++;
  2210. }
  2211. if (cnt == n)
  2212. rval = 0;
  2213. else {
  2214. /* finished scanning the shared part and check the leftover */
  2215. l0 = t0 - n0;
  2216. l1 = t1 - n1;
  2217. rval = l0 - l1;
  2218. }
  2219. end:
  2220. if (d0)
  2221. slapi_ch_free((void **)&d0);
  2222. if (d1)
  2223. slapi_ch_free((void **)&d1);
  2224. return rval;
  2225. }