utf8compare.c 74 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324
  1. /** BEGIN COPYRIGHT BLOCK
  2. * This Program is free software; you can redistribute it and/or modify it under
  3. * the terms of the GNU General Public License as published by the Free Software
  4. * Foundation; version 2 of the License.
  5. *
  6. * This Program is distributed in the hope that it will be useful, but WITHOUT
  7. * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  8. * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
  9. *
  10. * You should have received a copy of the GNU General Public License along with
  11. * this Program; if not, write to the Free Software Foundation, Inc., 59 Temple
  12. * Place, Suite 330, Boston, MA 02111-1307 USA.
  13. *
  14. * In addition, as a special exception, Red Hat, Inc. gives You the additional
  15. * right to link the code of this Program with code not covered under the GNU
  16. * General Public License ("Non-GPL Code") and to distribute linked combinations
  17. * including the two, subject to the limitations in this paragraph. Non-GPL Code
  18. * permitted under this exception must only link to the code of this Program
  19. * through those well defined interfaces identified in the file named EXCEPTION
  20. * found in the source code files (the "Approved Interfaces"). The files of
  21. * Non-GPL Code may instantiate templates or use macros or inline functions from
  22. * the Approved Interfaces without causing the resulting work to be covered by
  23. * the GNU General Public License. Only Red Hat, Inc. may make changes or
  24. * additions to the list of Approved Interfaces. You must obey the GNU General
  25. * Public License in all respects for all of the Program code and other code used
  26. * in conjunction with the Program except the Non-GPL Code covered by this
  27. * exception. If you modify this file, you may extend this exception to your
  28. * version of the file, but you are not obligated to do so. If you do not wish to
  29. * provide this exception without modification, you must delete this exception
  30. * statement from your version and license this file solely under the GPL without
  31. * exception.
  32. *
  33. *
  34. * Copyright (C) 2001 Sun Microsystems, Inc. Used by permission.
  35. * Copyright (C) 2005 Red Hat, Inc.
  36. * All rights reserved.
  37. * END COPYRIGHT BLOCK **/
  38. #ifdef HAVE_CONFIG_H
  39. # include <config.h>
  40. #endif
  41. #include <stdio.h>
  42. #include <string.h>
  43. #include <ctype.h>
  44. #include "ldap.h"
  45. #include "slap.h"
  46. #include "slapi-plugin.h"
  47. typedef struct sUpperLowerTbl {
  48. char *upper, *lower;
  49. int tsz; /* target size */
  50. } UpperLowerTbl_t;
  51. /*
  52. * slapi_has8thBit: check the input string
  53. * return 1 if the string contains 8-bit character
  54. * return 0 otherwise
  55. */
  56. int
  57. slapi_has8thBit(unsigned char *s)
  58. {
  59. unsigned char *p, *tail;
  60. tail = s + strlen((char *)s);
  61. for (p = s; p < tail; p++) {
  62. if (0x80 & *p) {
  63. return 1;
  64. }
  65. }
  66. return 0;
  67. }
  68. /*
  69. * UpperToLower Tables: sorted by upper characters
  70. */
  71. UpperLowerTbl_t Upper2LowerTbl20[] = {
  72. /* upper, lower */
  73. {"\303\200", "\303\240", 2},
  74. {"\303\201", "\303\241", 2},
  75. {"\303\202", "\303\242", 2},
  76. {"\303\203", "\303\243", 2},
  77. {"\303\204", "\303\244", 2},
  78. {"\303\205", "\303\245", 2},
  79. {"\303\206", "\303\246", 2},
  80. {"\303\207", "\303\247", 2},
  81. {"\303\210", "\303\250", 2},
  82. {"\303\211", "\303\251", 2},
  83. {"\303\212", "\303\252", 2},
  84. {"\303\213", "\303\253", 2},
  85. {"\303\214", "\303\254", 2},
  86. {"\303\215", "\303\255", 2},
  87. {"\303\216", "\303\256", 2},
  88. {"\303\217", "\303\257", 2},
  89. {"\303\220", "\303\260", 2},
  90. {"\303\221", "\303\261", 2},
  91. {"\303\222", "\303\262", 2},
  92. {"\303\223", "\303\263", 2},
  93. {"\303\224", "\303\264", 2},
  94. {"\303\225", "\303\265", 2},
  95. {"\303\226", "\303\266", 2},
  96. {"\303\230", "\303\270", 2},
  97. {"\303\231", "\303\271", 2},
  98. {"\303\232", "\303\272", 2},
  99. {"\303\233", "\303\273", 2},
  100. {"\303\234", "\303\274", 2},
  101. {"\303\235", "\303\275", 2},
  102. {"\303\236", "\303\276", 2},
  103. {NULL, NULL, 0}
  104. };
  105. UpperLowerTbl_t Upper2LowerTbl21[] = {
  106. {"\304\200", "\304\201", 2},
  107. {"\304\202", "\304\203", 2},
  108. {"\304\204", "\304\205", 2},
  109. {"\304\206", "\304\207", 2},
  110. {"\304\210", "\304\211", 2},
  111. {"\304\212", "\304\213", 2},
  112. {"\304\214", "\304\215", 2},
  113. {"\304\216", "\304\217", 2},
  114. {"\304\220", "\304\221", 2},
  115. {"\304\222", "\304\223", 2},
  116. {"\304\224", "\304\225", 2},
  117. {"\304\226", "\304\227", 2},
  118. {"\304\230", "\304\231", 2},
  119. {"\304\232", "\304\233", 2},
  120. {"\304\234", "\304\235", 2},
  121. {"\304\236", "\304\237", 2},
  122. {"\304\240", "\304\241", 2},
  123. {"\304\242", "\304\243", 2},
  124. {"\304\244", "\304\245", 2},
  125. {"\304\246", "\304\247", 2},
  126. {"\304\250", "\304\251", 2},
  127. {"\304\252", "\304\253", 2},
  128. {"\304\254", "\304\255", 2},
  129. {"\304\256", "\304\257", 2},
  130. {"\304\260", "\151", 1},
  131. {"\304\262", "\304\263", 2},
  132. {"\304\264", "\304\265", 2},
  133. {"\304\266", "\304\267", 2},
  134. {"\304\271", "\304\272", 2},
  135. {"\304\273", "\304\274", 2},
  136. {"\304\275", "\304\276", 2},
  137. {"\304\277", "\305\200", 2},
  138. {NULL, NULL, 0}
  139. };
  140. UpperLowerTbl_t Upper2LowerTbl22[] = {
  141. {"\305\201", "\305\202", 2},
  142. {"\305\203", "\305\204", 2},
  143. {"\305\205", "\305\206", 2},
  144. {"\305\207", "\305\210", 2},
  145. {"\305\212", "\305\213", 2},
  146. {"\305\214", "\305\215", 2},
  147. {"\305\216", "\305\217", 2},
  148. {"\305\220", "\305\221", 2},
  149. {"\305\222", "\305\223", 2},
  150. {"\305\224", "\305\225", 2},
  151. {"\305\226", "\305\227", 2},
  152. {"\305\230", "\305\231", 2},
  153. {"\305\232", "\305\233", 2},
  154. {"\305\234", "\305\235", 2},
  155. {"\305\236", "\305\237", 2},
  156. {"\305\240", "\305\241", 2},
  157. {"\305\242", "\305\243", 2},
  158. {"\305\244", "\305\245", 2},
  159. {"\305\246", "\305\247", 2},
  160. {"\305\250", "\305\251", 2},
  161. {"\305\252", "\305\253", 2},
  162. {"\305\254", "\305\255", 2},
  163. {"\305\256", "\305\257", 2},
  164. {"\305\260", "\305\261", 2},
  165. {"\305\262", "\305\263", 2},
  166. {"\305\264", "\305\265", 2},
  167. {"\305\266", "\305\267", 2},
  168. {"\305\270", "\303\277", 2},
  169. {"\305\271", "\305\272", 2},
  170. {"\305\273", "\305\274", 2},
  171. {"\305\275", "\305\276", 2},
  172. {NULL, NULL, 0}
  173. };
  174. UpperLowerTbl_t Upper2LowerTbl23[] = {
  175. {"\306\201", "\311\223", 2},
  176. {"\306\202", "\306\203", 2},
  177. {"\306\204", "\306\205", 2},
  178. {"\306\206", "\311\224", 2},
  179. {"\306\207", "\306\210", 2},
  180. {"\306\211", "\311\226", 2},
  181. {"\306\212", "\311\227", 2},
  182. {"\306\213", "\306\214", 2},
  183. {"\306\216", "\311\230", 2},
  184. {"\306\217", "\311\231", 2},
  185. {"\306\220", "\311\233", 2},
  186. {"\306\221", "\306\222", 2},
  187. {"\306\223", "\311\240", 2},
  188. {"\306\224", "\311\243", 2},
  189. {"\306\226", "\311\251", 2},
  190. {"\306\227", "\311\250", 2},
  191. {"\306\230", "\306\231", 2},
  192. {"\306\234", "\311\257", 2},
  193. {"\306\235", "\311\262", 2},
  194. {"\306\237", "\306\237", 2},
  195. {"\306\240", "\306\241", 2},
  196. {"\306\242", "\306\243", 2},
  197. {"\306\244", "\306\245", 2},
  198. {"\306\246", "\306\246", 2},
  199. {"\306\247", "\306\250", 2},
  200. {"\306\251", "\312\203", 2},
  201. {"\306\254", "\306\255", 2},
  202. {"\306\256", "\312\210", 2},
  203. {"\306\257", "\306\260", 2},
  204. {"\306\261", "\312\212", 2},
  205. {"\306\262", "\312\213", 2},
  206. {"\306\263", "\306\264", 2},
  207. {"\306\265", "\306\266", 2},
  208. {"\306\267", "\312\222", 2},
  209. {"\306\270", "\306\271", 2},
  210. {"\306\274", "\306\275", 2},
  211. {NULL, NULL, 0}
  212. };
  213. UpperLowerTbl_t Upper2LowerTbl24[] = {
  214. {"\307\204", "\307\205", 2},
  215. {"\307\205", "\307\204", 2},
  216. {"\307\207", "\307\210", 2},
  217. {"\307\210", "\307\207", 2},
  218. {"\307\212", "\307\213", 2},
  219. {"\307\213", "\307\212", 2},
  220. {"\307\215", "\307\216", 2},
  221. {"\307\217", "\307\220", 2},
  222. {"\307\221", "\307\222", 2},
  223. {"\307\223", "\307\224", 2},
  224. {"\307\225", "\307\226", 2},
  225. {"\307\227", "\307\230", 2},
  226. {"\307\231", "\307\232", 2},
  227. {"\307\233", "\307\234", 2},
  228. {"\307\236", "\307\237", 2},
  229. {"\307\240", "\307\241", 2},
  230. {"\307\242", "\307\243", 2},
  231. {"\307\244", "\307\245", 2},
  232. {"\307\246", "\307\247", 2},
  233. {"\307\250", "\307\251", 2},
  234. {"\307\252", "\307\253", 2},
  235. {"\307\254", "\307\255", 2},
  236. {"\307\256", "\307\257", 2},
  237. {"\307\261", "\307\262", 2},
  238. {"\307\262", "\307\261", 2},
  239. {"\307\264", "\307\265", 2},
  240. {"\307\272", "\307\273", 2},
  241. {"\307\274", "\307\275", 2},
  242. {"\307\276", "\307\277", 2},
  243. {NULL, NULL, 0}
  244. };
  245. UpperLowerTbl_t Upper2LowerTbl25[] = {
  246. {"\310\200", "\310\201", 2},
  247. {"\310\202", "\310\203", 2},
  248. {"\310\204", "\310\205", 2},
  249. {"\310\206", "\310\207", 2},
  250. {"\310\210", "\310\211", 2},
  251. {"\310\212", "\310\213", 2},
  252. {"\310\214", "\310\215", 2},
  253. {"\310\216", "\310\217", 2},
  254. {"\310\220", "\310\221", 2},
  255. {"\310\222", "\310\223", 2},
  256. {"\310\224", "\310\225", 2},
  257. {"\310\226", "\310\227", 2},
  258. {NULL, NULL, 0}
  259. };
  260. UpperLowerTbl_t Upper2LowerTbl26[] = {
  261. {"\316\206", "\316\254", 2},
  262. {"\316\210", "\316\255", 2},
  263. {"\316\211", "\316\256", 2},
  264. {"\316\212", "\316\257", 2},
  265. {"\316\214", "\317\214", 2},
  266. {"\316\216", "\317\215", 2},
  267. {"\316\217", "\317\216", 2},
  268. {"\316\221", "\316\261", 2},
  269. {"\316\222", "\316\262", 2},
  270. {"\316\223", "\316\263", 2},
  271. {"\316\224", "\316\264", 2},
  272. {"\316\225", "\316\265", 2},
  273. {"\316\226", "\316\266", 2},
  274. {"\316\227", "\316\267", 2},
  275. {"\316\230", "\316\270", 2},
  276. {"\316\231", "\316\271", 2},
  277. {"\316\232", "\316\272", 2},
  278. {"\316\233", "\316\273", 2},
  279. {"\316\234", "\316\274", 2},
  280. {"\316\235", "\316\275", 2},
  281. {"\316\236", "\316\276", 2},
  282. {"\316\237", "\316\277", 2},
  283. {"\316\240", "\317\200", 2},
  284. {"\316\241", "\317\201", 2},
  285. {"\316\243", "\317\203", 2},
  286. {"\316\244", "\317\204", 2},
  287. {"\316\245", "\317\205", 2},
  288. {"\316\246", "\317\206", 2},
  289. {"\316\247", "\317\207", 2},
  290. {"\316\250", "\317\210", 2},
  291. {"\316\251", "\317\211", 2},
  292. {"\316\252", "\317\212", 2},
  293. {"\316\253", "\317\213", 2},
  294. {NULL, NULL, 0}
  295. };
  296. UpperLowerTbl_t Upper2LowerTbl27[] = {
  297. {"\317\222", "\317\222", 2},
  298. {"\317\223", "\317\223", 2},
  299. {"\317\224", "\317\224", 2},
  300. {"\317\232", "\317\232", 2},
  301. {"\317\234", "\317\234", 2},
  302. {"\317\236", "\317\236", 2},
  303. {"\317\240", "\317\240", 2},
  304. {"\317\242", "\317\243", 2},
  305. {"\317\244", "\317\245", 2},
  306. {"\317\246", "\317\247", 2},
  307. {"\317\250", "\317\251", 2},
  308. {"\317\252", "\317\253", 2},
  309. {"\317\254", "\317\255", 2},
  310. {"\317\256", "\317\257", 2},
  311. {NULL, NULL, 0}
  312. };
  313. UpperLowerTbl_t Upper2LowerTbl28[] = {
  314. {"\320\201", "\321\221", 2},
  315. {"\320\202", "\321\222", 2},
  316. {"\320\203", "\321\223", 2},
  317. {"\320\204", "\321\224", 2},
  318. {"\320\205", "\321\225", 2},
  319. {"\320\206", "\321\226", 2},
  320. {"\320\207", "\321\227", 2},
  321. {"\320\210", "\321\230", 2},
  322. {"\320\211", "\321\231", 2},
  323. {"\320\212", "\321\232", 2},
  324. {"\320\213", "\321\233", 2},
  325. {"\320\214", "\321\234", 2},
  326. {"\320\216", "\321\236", 2},
  327. {"\320\217", "\321\237", 2},
  328. {"\320\220", "\320\260", 2},
  329. {"\320\221", "\320\261", 2},
  330. {"\320\222", "\320\262", 2},
  331. {"\320\223", "\320\263", 2},
  332. {"\320\224", "\320\264", 2},
  333. {"\320\225", "\320\265", 2},
  334. {"\320\226", "\320\266", 2},
  335. {"\320\227", "\320\267", 2},
  336. {"\320\230", "\320\270", 2},
  337. {"\320\231", "\320\271", 2},
  338. {"\320\232", "\320\272", 2},
  339. {"\320\233", "\320\273", 2},
  340. {"\320\234", "\320\274", 2},
  341. {"\320\235", "\320\275", 2},
  342. {"\320\236", "\320\276", 2},
  343. {"\320\237", "\320\277", 2},
  344. {"\320\240", "\321\200", 2},
  345. {"\320\241", "\321\201", 2},
  346. {"\320\242", "\321\202", 2},
  347. {"\320\243", "\321\203", 2},
  348. {"\320\244", "\321\204", 2},
  349. {"\320\245", "\321\205", 2},
  350. {"\320\246", "\321\206", 2},
  351. {"\320\247", "\321\207", 2},
  352. {"\320\250", "\321\210", 2},
  353. {"\320\251", "\321\211", 2},
  354. {"\320\252", "\321\212", 2},
  355. {"\320\253", "\321\213", 2},
  356. {"\320\254", "\321\214", 2},
  357. {"\320\255", "\321\215", 2},
  358. {"\320\256", "\321\216", 2},
  359. {"\320\257", "\321\217", 2},
  360. {NULL, NULL, 0}
  361. };
  362. UpperLowerTbl_t Upper2LowerTbl29[] = {
  363. {"\321\240", "\321\241", 2},
  364. {"\321\242", "\321\243", 2},
  365. {"\321\244", "\321\245", 2},
  366. {"\321\246", "\321\247", 2},
  367. {"\321\250", "\321\251", 2},
  368. {"\321\252", "\321\253", 2},
  369. {"\321\254", "\321\255", 2},
  370. {"\321\256", "\321\257", 2},
  371. {"\321\260", "\321\261", 2},
  372. {"\321\262", "\321\263", 2},
  373. {"\321\264", "\321\265", 2},
  374. {"\321\266", "\321\267", 2},
  375. {"\321\270", "\321\271", 2},
  376. {"\321\272", "\321\273", 2},
  377. {"\321\274", "\321\275", 2},
  378. {"\321\276", "\321\277", 2},
  379. {NULL, NULL, 0}
  380. };
  381. UpperLowerTbl_t Upper2LowerTbl2a[] = {
  382. {"\322\200", "\322\201", 2},
  383. {"\322\220", "\322\221", 2},
  384. {"\322\222", "\322\223", 2},
  385. {"\322\224", "\322\225", 2},
  386. {"\322\226", "\322\227", 2},
  387. {"\322\230", "\322\231", 2},
  388. {"\322\232", "\322\233", 2},
  389. {"\322\234", "\322\235", 2},
  390. {"\322\236", "\322\237", 2},
  391. {"\322\240", "\322\241", 2},
  392. {"\322\242", "\322\243", 2},
  393. {"\322\244", "\322\245", 2},
  394. {"\322\246", "\322\247", 2},
  395. {"\322\250", "\322\251", 2},
  396. {"\322\252", "\322\253", 2},
  397. {"\322\254", "\322\255", 2},
  398. {"\322\256", "\322\257", 2},
  399. {"\322\260", "\322\261", 2},
  400. {"\322\262", "\322\263", 2},
  401. {"\322\264", "\322\265", 2},
  402. {"\322\266", "\322\267", 2},
  403. {"\322\270", "\322\271", 2},
  404. {"\322\272", "\322\273", 2},
  405. {"\322\274", "\322\275", 2},
  406. {"\322\276", "\322\277", 2},
  407. {NULL, NULL, 0}
  408. };
  409. UpperLowerTbl_t Upper2LowerTbl2b[] = {
  410. {"\323\201", "\323\202", 2},
  411. {"\323\203", "\323\204", 2},
  412. {"\323\207", "\323\210", 2},
  413. {"\323\213", "\323\214", 2},
  414. {"\323\220", "\323\221", 2},
  415. {"\323\222", "\323\223", 2},
  416. {"\323\224", "\323\225", 2},
  417. {"\323\226", "\323\227", 2},
  418. {"\323\230", "\323\231", 2},
  419. {"\323\232", "\323\233", 2},
  420. {"\323\234", "\323\235", 2},
  421. {"\323\236", "\323\237", 2},
  422. {"\323\240", "\323\241", 2},
  423. {"\323\242", "\323\243", 2},
  424. {"\323\244", "\323\245", 2},
  425. {"\323\246", "\323\247", 2},
  426. {"\323\250", "\323\251", 2},
  427. {"\323\252", "\323\253", 2},
  428. {"\323\256", "\323\257", 2},
  429. {"\323\260", "\323\261", 2},
  430. {"\323\262", "\323\263", 2},
  431. {"\323\264", "\323\265", 2},
  432. {"\323\270", "\323\271", 2},
  433. {NULL, NULL, 0}
  434. };
  435. UpperLowerTbl_t Upper2LowerTbl2c[] = {
  436. {"\324\261", "\325\241", 2},
  437. {"\324\262", "\325\242", 2},
  438. {"\324\263", "\325\243", 2},
  439. {"\324\264", "\325\244", 2},
  440. {"\324\265", "\325\245", 2},
  441. {"\324\266", "\325\246", 2},
  442. {"\324\267", "\325\247", 2},
  443. {"\324\270", "\325\250", 2},
  444. {"\324\271", "\325\251", 2},
  445. {"\324\272", "\325\252", 2},
  446. {"\324\273", "\325\253", 2},
  447. {"\324\274", "\325\254", 2},
  448. {"\324\275", "\325\255", 2},
  449. {"\324\276", "\325\256", 2},
  450. {"\324\277", "\325\257", 2},
  451. {NULL, NULL, 0}
  452. };
  453. UpperLowerTbl_t Upper2LowerTbl2d[] = {
  454. {"\325\200", "\325\260", 2},
  455. {"\325\201", "\325\261", 2},
  456. {"\325\202", "\325\262", 2},
  457. {"\325\203", "\325\263", 2},
  458. {"\325\204", "\325\264", 2},
  459. {"\325\205", "\325\265", 2},
  460. {"\325\206", "\325\266", 2},
  461. {"\325\207", "\325\267", 2},
  462. {"\325\210", "\325\270", 2},
  463. {"\325\211", "\325\271", 2},
  464. {"\325\212", "\325\272", 2},
  465. {"\325\213", "\325\273", 2},
  466. {"\325\214", "\325\274", 2},
  467. {"\325\215", "\325\275", 2},
  468. {"\325\216", "\325\276", 2},
  469. {"\325\217", "\325\277", 2},
  470. {"\325\220", "\326\200", 2},
  471. {"\325\221", "\326\201", 2},
  472. {"\325\222", "\326\202", 2},
  473. {"\325\223", "\326\203", 2},
  474. {"\325\224", "\326\204", 2},
  475. {"\325\225", "\326\205", 2},
  476. {"\325\226", "\326\206", 2},
  477. {NULL, NULL, 0}
  478. /* upper, lower */
  479. };
  480. UpperLowerTbl_t Upper2LowerTbl30[] = {
  481. /* upper, lower */
  482. {"\341\202\240", "\341\203\220", 3},
  483. {"\341\202\241", "\341\203\221", 3},
  484. {"\341\202\242", "\341\203\222", 3},
  485. {"\341\202\243", "\341\203\223", 3},
  486. {"\341\202\244", "\341\203\224", 3},
  487. {"\341\202\245", "\341\203\225", 3},
  488. {"\341\202\246", "\341\203\226", 3},
  489. {"\341\202\247", "\341\203\227", 3},
  490. {"\341\202\250", "\341\203\230", 3},
  491. {"\341\202\251", "\341\203\231", 3},
  492. {"\341\202\252", "\341\203\232", 3},
  493. {"\341\202\253", "\341\203\233", 3},
  494. {"\341\202\254", "\341\203\234", 3},
  495. {"\341\202\255", "\341\203\235", 3},
  496. {"\341\202\256", "\341\203\236", 3},
  497. {"\341\202\257", "\341\203\237", 3},
  498. {"\341\202\260", "\341\203\240", 3},
  499. {"\341\202\261", "\341\203\241", 3},
  500. {"\341\202\262", "\341\203\242", 3},
  501. {"\341\202\263", "\341\203\243", 3},
  502. {"\341\202\264", "\341\203\244", 3},
  503. {"\341\202\265", "\341\203\245", 3},
  504. {"\341\202\266", "\341\203\246", 3},
  505. {"\341\202\267", "\341\203\247", 3},
  506. {"\341\202\270", "\341\203\250", 3},
  507. {"\341\202\271", "\341\203\251", 3},
  508. {"\341\202\272", "\341\203\252", 3},
  509. {"\341\202\273", "\341\203\253", 3},
  510. {"\341\202\274", "\341\203\254", 3},
  511. {"\341\202\275", "\341\203\255", 3},
  512. {"\341\202\276", "\341\203\256", 3},
  513. {"\341\202\277", "\341\203\257", 3},
  514. {"\341\203\200", "\341\203\260", 3},
  515. {"\341\203\201", "\341\203\261", 3},
  516. {"\341\203\202", "\341\203\262", 3},
  517. {"\341\203\203", "\341\203\263", 3},
  518. {"\341\203\204", "\341\203\264", 3},
  519. {"\341\203\205", "\341\203\265", 3},
  520. {"\341\270\200", "\341\270\201", 3},
  521. {"\341\270\202", "\341\270\203", 3},
  522. {"\341\270\204", "\341\270\205", 3},
  523. {"\341\270\206", "\341\270\207", 3},
  524. {"\341\270\210", "\341\270\211", 3},
  525. {"\341\270\212", "\341\270\213", 3},
  526. {"\341\270\214", "\341\270\215", 3},
  527. {"\341\270\216", "\341\270\217", 3},
  528. {"\341\270\220", "\341\270\221", 3},
  529. {"\341\270\222", "\341\270\223", 3},
  530. {"\341\270\224", "\341\270\225", 3},
  531. {"\341\270\226", "\341\270\227", 3},
  532. {"\341\270\230", "\341\270\231", 3},
  533. {"\341\270\232", "\341\270\233", 3},
  534. {"\341\270\234", "\341\270\235", 3},
  535. {"\341\270\236", "\341\270\237", 3},
  536. {"\341\270\240", "\341\270\241", 3},
  537. {"\341\270\242", "\341\270\243", 3},
  538. {"\341\270\244", "\341\270\245", 3},
  539. {"\341\270\246", "\341\270\247", 3},
  540. {"\341\270\250", "\341\270\251", 3},
  541. {"\341\270\252", "\341\270\253", 3},
  542. {"\341\270\254", "\341\270\255", 3},
  543. {"\341\270\256", "\341\270\257", 3},
  544. {"\341\270\260", "\341\270\261", 3},
  545. {"\341\270\262", "\341\270\263", 3},
  546. {"\341\270\264", "\341\270\265", 3},
  547. {"\341\270\266", "\341\270\267", 3},
  548. {"\341\270\270", "\341\270\271", 3},
  549. {"\341\270\272", "\341\270\273", 3},
  550. {"\341\270\274", "\341\270\275", 3},
  551. {"\341\270\276", "\341\270\277", 3},
  552. {"\341\271\200", "\341\271\201", 3},
  553. {"\341\271\202", "\341\271\203", 3},
  554. {"\341\271\204", "\341\271\205", 3},
  555. {"\341\271\206", "\341\271\207", 3},
  556. {"\341\271\210", "\341\271\211", 3},
  557. {"\341\271\212", "\341\271\213", 3},
  558. {"\341\271\214", "\341\271\215", 3},
  559. {"\341\271\216", "\341\271\217", 3},
  560. {"\341\271\220", "\341\271\221", 3},
  561. {"\341\271\222", "\341\271\223", 3},
  562. {"\341\271\224", "\341\271\225", 3},
  563. {"\341\271\226", "\341\271\227", 3},
  564. {"\341\271\230", "\341\271\231", 3},
  565. {"\341\271\232", "\341\271\233", 3},
  566. {"\341\271\234", "\341\271\235", 3},
  567. {"\341\271\236", "\341\271\237", 3},
  568. {"\341\271\240", "\341\271\241", 3},
  569. {"\341\271\242", "\341\271\243", 3},
  570. {"\341\271\244", "\341\271\245", 3},
  571. {"\341\271\246", "\341\271\247", 3},
  572. {"\341\271\250", "\341\271\251", 3},
  573. {"\341\271\252", "\341\271\253", 3},
  574. {"\341\271\254", "\341\271\255", 3},
  575. {"\341\271\256", "\341\271\257", 3},
  576. {"\341\271\260", "\341\271\261", 3},
  577. {"\341\271\262", "\341\271\263", 3},
  578. {"\341\271\264", "\341\271\265", 3},
  579. {"\341\271\266", "\341\271\267", 3},
  580. {"\341\271\270", "\341\271\271", 3},
  581. {"\341\271\272", "\341\271\273", 3},
  582. {"\341\271\274", "\341\271\275", 3},
  583. {"\341\271\276", "\341\271\277", 3},
  584. {"\341\272\200", "\341\272\201", 3},
  585. {"\341\272\202", "\341\272\203", 3},
  586. {"\341\272\204", "\341\272\205", 3},
  587. {"\341\272\206", "\341\272\207", 3},
  588. {"\341\272\210", "\341\272\211", 3},
  589. {"\341\272\212", "\341\272\213", 3},
  590. {"\341\272\214", "\341\272\215", 3},
  591. {"\341\272\216", "\341\272\217", 3},
  592. {"\341\272\220", "\341\272\221", 3},
  593. {"\341\272\222", "\341\272\223", 3},
  594. {"\341\272\224", "\341\272\225", 3},
  595. {"\341\272\240", "\341\272\241", 3},
  596. {"\341\272\242", "\341\272\243", 3},
  597. {"\341\272\244", "\341\272\245", 3},
  598. {"\341\272\246", "\341\272\247", 3},
  599. {"\341\272\250", "\341\272\251", 3},
  600. {"\341\272\252", "\341\272\253", 3},
  601. {"\341\272\254", "\341\272\255", 3},
  602. {"\341\272\256", "\341\272\257", 3},
  603. {"\341\272\260", "\341\272\261", 3},
  604. {"\341\272\262", "\341\272\263", 3},
  605. {"\341\272\264", "\341\272\265", 3},
  606. {"\341\272\266", "\341\272\267", 3},
  607. {"\341\272\270", "\341\272\271", 3},
  608. {"\341\272\272", "\341\272\273", 3},
  609. {"\341\272\274", "\341\272\275", 3},
  610. {"\341\272\276", "\341\272\277", 3},
  611. {"\341\273\200", "\341\273\201", 3},
  612. {"\341\273\202", "\341\273\203", 3},
  613. {"\341\273\204", "\341\273\205", 3},
  614. {"\341\273\206", "\341\273\207", 3},
  615. {"\341\273\210", "\341\273\211", 3},
  616. {"\341\273\212", "\341\273\213", 3},
  617. {"\341\273\214", "\341\273\215", 3},
  618. {"\341\273\216", "\341\273\217", 3},
  619. {"\341\273\220", "\341\273\221", 3},
  620. {"\341\273\222", "\341\273\223", 3},
  621. {"\341\273\224", "\341\273\225", 3},
  622. {"\341\273\226", "\341\273\227", 3},
  623. {"\341\273\230", "\341\273\231", 3},
  624. {"\341\273\232", "\341\273\233", 3},
  625. {"\341\273\234", "\341\273\235", 3},
  626. {"\341\273\236", "\341\273\237", 3},
  627. {"\341\273\240", "\341\273\241", 3},
  628. {"\341\273\242", "\341\273\243", 3},
  629. {"\341\273\244", "\341\273\245", 3},
  630. {"\341\273\246", "\341\273\247", 3},
  631. {"\341\273\250", "\341\273\251", 3},
  632. {"\341\273\252", "\341\273\253", 3},
  633. {"\341\273\254", "\341\273\255", 3},
  634. {"\341\273\256", "\341\273\257", 3},
  635. {"\341\273\260", "\341\273\261", 3},
  636. {"\341\273\262", "\341\273\263", 3},
  637. {"\341\273\264", "\341\273\265", 3},
  638. {"\341\273\266", "\341\273\267", 3},
  639. {"\341\273\270", "\341\273\271", 3},
  640. {"\341\274\210", "\341\274\200", 3},
  641. {"\341\274\211", "\341\274\201", 3},
  642. {"\341\274\212", "\341\274\202", 3},
  643. {"\341\274\213", "\341\274\203", 3},
  644. {"\341\274\214", "\341\274\204", 3},
  645. {"\341\274\215", "\341\274\205", 3},
  646. {"\341\274\216", "\341\274\206", 3},
  647. {"\341\274\217", "\341\274\207", 3},
  648. {"\341\274\230", "\341\274\220", 3},
  649. {"\341\274\231", "\341\274\221", 3},
  650. {"\341\274\232", "\341\274\222", 3},
  651. {"\341\274\233", "\341\274\223", 3},
  652. {"\341\274\234", "\341\274\224", 3},
  653. {"\341\274\235", "\341\274\225", 3},
  654. {"\341\274\250", "\341\274\240", 3},
  655. {"\341\274\251", "\341\274\241", 3},
  656. {"\341\274\252", "\341\274\242", 3},
  657. {"\341\274\253", "\341\274\243", 3},
  658. {"\341\274\254", "\341\274\244", 3},
  659. {"\341\274\255", "\341\274\245", 3},
  660. {"\341\274\256", "\341\274\246", 3},
  661. {"\341\274\257", "\341\274\247", 3},
  662. {"\341\274\270", "\341\274\260", 3},
  663. {"\341\274\271", "\341\274\261", 3},
  664. {"\341\274\272", "\341\274\262", 3},
  665. {"\341\274\273", "\341\274\263", 3},
  666. {"\341\274\274", "\341\274\264", 3},
  667. {"\341\274\275", "\341\274\265", 3},
  668. {"\341\274\276", "\341\274\266", 3},
  669. {"\341\274\277", "\341\274\267", 3},
  670. {"\341\275\210", "\341\275\200", 3},
  671. {"\341\275\211", "\341\275\201", 3},
  672. {"\341\275\212", "\341\275\202", 3},
  673. {"\341\275\213", "\341\275\203", 3},
  674. {"\341\275\214", "\341\275\204", 3},
  675. {"\341\275\215", "\341\275\205", 3},
  676. {"\341\275\231", "\341\275\221", 3},
  677. {"\341\275\233", "\341\275\223", 3},
  678. {"\341\275\235", "\341\275\225", 3},
  679. {"\341\275\237", "\341\275\227", 3},
  680. {"\341\275\250", "\341\275\240", 3},
  681. {"\341\275\251", "\341\275\241", 3},
  682. {"\341\275\252", "\341\275\242", 3},
  683. {"\341\275\253", "\341\275\243", 3},
  684. {"\341\275\254", "\341\275\244", 3},
  685. {"\341\275\255", "\341\275\245", 3},
  686. {"\341\275\256", "\341\275\246", 3},
  687. {"\341\275\257", "\341\275\247", 3},
  688. {"\341\276\210", "\341\276\200", 3},
  689. {"\341\276\211", "\341\276\201", 3},
  690. {"\341\276\212", "\341\276\202", 3},
  691. {"\341\276\213", "\341\276\203", 3},
  692. {"\341\276\214", "\341\276\204", 3},
  693. {"\341\276\215", "\341\276\205", 3},
  694. {"\341\276\216", "\341\276\206", 3},
  695. {"\341\276\217", "\341\276\207", 3},
  696. {"\341\276\230", "\341\276\220", 3},
  697. {"\341\276\231", "\341\276\221", 3},
  698. {"\341\276\232", "\341\276\222", 3},
  699. {"\341\276\233", "\341\276\223", 3},
  700. {"\341\276\234", "\341\276\224", 3},
  701. {"\341\276\235", "\341\276\225", 3},
  702. {"\341\276\236", "\341\276\226", 3},
  703. {"\341\276\237", "\341\276\227", 3},
  704. {"\341\276\250", "\341\276\240", 3},
  705. {"\341\276\251", "\341\276\241", 3},
  706. {"\341\276\252", "\341\276\242", 3},
  707. {"\341\276\253", "\341\276\243", 3},
  708. {"\341\276\254", "\341\276\244", 3},
  709. {"\341\276\255", "\341\276\245", 3},
  710. {"\341\276\256", "\341\276\246", 3},
  711. {"\341\276\257", "\341\276\247", 3},
  712. {"\341\276\270", "\341\276\260", 3},
  713. {"\341\276\271", "\341\276\261", 3},
  714. {"\341\276\272", "\341\275\260", 3},
  715. {"\341\276\273", "\341\275\261", 3},
  716. {"\341\276\274", "\341\276\263", 3},
  717. {"\341\276\276", "\341\276\276", 3},
  718. {"\341\277\210", "\341\275\262", 3},
  719. {"\341\277\211", "\341\275\263", 3},
  720. {"\341\277\212", "\341\275\264", 3},
  721. {"\341\277\213", "\341\275\265", 3},
  722. {"\341\277\214", "\341\277\203", 3},
  723. {"\341\277\230", "\341\277\220", 3},
  724. {"\341\277\231", "\341\277\221", 3},
  725. {"\341\277\232", "\341\275\266", 3},
  726. {"\341\277\233", "\341\275\267", 3},
  727. {"\341\277\250", "\341\277\240", 3},
  728. {"\341\277\251", "\341\277\241", 3},
  729. {"\341\277\252", "\341\275\272", 3},
  730. {"\341\277\253", "\341\275\273", 3},
  731. {"\341\277\254", "\341\277\245", 3},
  732. {"\341\277\270", "\341\275\270", 3},
  733. {"\341\277\271", "\341\275\271", 3},
  734. {"\341\277\272", "\341\275\274", 3},
  735. {"\341\277\273", "\341\275\275", 3},
  736. {"\341\277\274", "\341\277\263", 3},
  737. {NULL, NULL, 0}
  738. };
  739. UpperLowerTbl_t Upper2LowerTbl31[] = {
  740. {"\357\274\241", "\357\275\201", 3},
  741. {"\357\274\242", "\357\275\202", 3},
  742. {"\357\274\243", "\357\275\203", 3},
  743. {"\357\274\244", "\357\275\204", 3},
  744. {"\357\274\245", "\357\275\205", 3},
  745. {"\357\274\246", "\357\275\206", 3},
  746. {"\357\274\247", "\357\275\207", 3},
  747. {"\357\274\250", "\357\275\210", 3},
  748. {"\357\274\251", "\357\275\211", 3},
  749. {"\357\274\252", "\357\275\212", 3},
  750. {"\357\274\253", "\357\275\213", 3},
  751. {"\357\274\254", "\357\275\214", 3},
  752. {"\357\274\255", "\357\275\215", 3},
  753. {"\357\274\256", "\357\275\216", 3},
  754. {"\357\274\257", "\357\275\217", 3},
  755. {"\357\274\260", "\357\275\220", 3},
  756. {"\357\274\261", "\357\275\221", 3},
  757. {"\357\274\262", "\357\275\222", 3},
  758. {"\357\274\263", "\357\275\223", 3},
  759. {"\357\274\264", "\357\275\224", 3},
  760. {"\357\274\265", "\357\275\225", 3},
  761. {"\357\274\266", "\357\275\226", 3},
  762. {"\357\274\267", "\357\275\227", 3},
  763. {"\357\274\270", "\357\275\230", 3},
  764. {"\357\274\271", "\357\275\231", 3},
  765. {"\357\274\272", "\357\275\232", 3},
  766. {NULL, NULL, 0}
  767. /* upper, lower */
  768. };
  769. UpperLowerTbl_t *Upper2LowerTbl2[] = {
  770. Upper2LowerTbl20, /* \303 */
  771. Upper2LowerTbl21, /* \304 */
  772. Upper2LowerTbl22, /* \305 */
  773. Upper2LowerTbl23, /* \306 */
  774. Upper2LowerTbl24, /* \307 */
  775. Upper2LowerTbl25, /* \310 */
  776. NULL, /* \311 */
  777. NULL, /* \312 */
  778. NULL, /* \313 */
  779. NULL, /* \314 */
  780. NULL, /* \315 */
  781. Upper2LowerTbl26, /* \316 */
  782. Upper2LowerTbl27, /* \317 */
  783. Upper2LowerTbl28, /* \320 */
  784. Upper2LowerTbl29, /* \321 */
  785. Upper2LowerTbl2a, /* \322 */
  786. Upper2LowerTbl2b, /* \323 */
  787. Upper2LowerTbl2c, /* \324 */
  788. Upper2LowerTbl2d /* \325 */
  789. };
  790. UpperLowerTbl_t *Upper2LowerTbl3[] = {
  791. Upper2LowerTbl30, /* \341 */
  792. NULL, /* \342 */
  793. NULL, /* \343 */
  794. NULL, /* \344 */
  795. NULL, /* \345 */
  796. NULL, /* \346 */
  797. NULL, /* \347 */
  798. NULL, /* \350 */
  799. NULL, /* \351 */
  800. NULL, /* \352 */
  801. NULL, /* \353 */
  802. NULL, /* \354 */
  803. NULL, /* \355 */
  804. NULL, /* \356 */
  805. Upper2LowerTbl31 /* \357 */
  806. };
  807. #define UL2S (unsigned char)'\303'
  808. #define UL2E (unsigned char)'\325'
  809. #define UL3S (unsigned char)'\341'
  810. #define UL3E (unsigned char)'\357'
  811. /*
  812. * slapi_utf8StrToLower: translate upper-case string to lower-case
  813. *
  814. * input: a null terminated UTF-8 string
  815. * output: a null terminated UTF-8 string which characters are
  816. * converted to lower-case; characters which are not
  817. * upper-case are copied as is. If it's not considered
  818. * a UTF-8 string, NULL is returned.
  819. *
  820. * Notes: This function takes a string (made of multiple UTF-8 characters)
  821. * for the input (not one character as in "tolower").
  822. * Output string is allocated in this function, which needs to be
  823. * released when it's not needed any more.
  824. */
  825. unsigned char *
  826. slapi_UTF8STRTOLOWER(char *s)
  827. {
  828. return slapi_utf8StrToLower((unsigned char *)s);
  829. }
  830. unsigned char *
  831. slapi_utf8StrToLower(unsigned char *s)
  832. {
  833. UpperLowerTbl_t *ultp;
  834. unsigned char *p, *np, *tail;
  835. unsigned char *lp, *lphead;
  836. int len, sz;
  837. if (s == NULL || *s == '\0') {
  838. return s;
  839. }
  840. len = strlen((char *)s);
  841. tail = s + len;
  842. lphead = lp = (unsigned char *)slapi_ch_malloc(len + 1);
  843. p = s;
  844. while ((np = (unsigned char *)ldap_utf8next((char *)p)) <= tail) {
  845. switch(sz = np - p) {
  846. case 1:
  847. sprintf((char *)lp, "%c", tolower(*p));
  848. break;
  849. case 2:
  850. if (*p < UL2S || *p > UL2E) { /* out of range */
  851. memcpy(lp, p, sz);
  852. break;
  853. }
  854. for (ultp = Upper2LowerTbl2[*p - UL2S];
  855. ultp && ultp->upper && memcmp(p, ultp->upper, sz);
  856. ultp++)
  857. ;
  858. if (!ultp) { /* out of range */
  859. memcpy(lp, p, sz);
  860. } else if (ultp->upper) { /* matched */
  861. memcpy(lp, ultp->lower, ultp->tsz);
  862. sz = ultp->tsz;
  863. } else {
  864. memcpy(lp, p, sz);
  865. }
  866. break;
  867. case 3:
  868. if (*p != UL3S && *p != UL3E) { /* out of range */
  869. memcpy(lp, p, sz);
  870. break;
  871. }
  872. for (ultp = Upper2LowerTbl3[*p - UL3S];
  873. ultp && ultp->upper && memcmp(p, ultp->upper, sz);
  874. ultp++)
  875. ;
  876. if (!ultp) { /* out of range */
  877. memcpy(lp, p, sz);
  878. } else if (ultp->upper) { /* matched */
  879. memcpy(lp, ultp->lower, sz);
  880. } else {
  881. memcpy(lp, p, sz);
  882. }
  883. break;
  884. case 4:
  885. memcpy(lp, p, sz);
  886. break;
  887. default: /* not UTF-8 */
  888. slapi_ch_free((void **)&lphead);
  889. return NULL;
  890. }
  891. lp += sz;
  892. p = np;
  893. if (p == tail) {
  894. break;
  895. }
  896. }
  897. *lp = '\0';
  898. return lphead;
  899. }
  900. /*
  901. * slapi_utf8ToLower: translate upper-case character to lower-case
  902. *
  903. * input: a UTF-8 character (s)
  904. * output: a UTF-8 character which is converted to lower-case (d)
  905. * length (in bytes) of input character (ssz) and
  906. * output character (dsz)
  907. *
  908. * Notes: This function takes a UTF-8 character (could be multiple bytes)
  909. * for the input. Memory for the output character is NOT allocated
  910. * in this function, caller should have allocated it (d).
  911. * "memmove" is used since (s) and (d) are overlapped.
  912. */
  913. void
  914. slapi_UTF8TOLOWER(char *s, char *d, int *ssz, int *dsz)
  915. {
  916. slapi_utf8ToLower((unsigned char *)s, (unsigned char *)d, ssz, dsz);
  917. return;
  918. }
  919. void
  920. slapi_utf8ToLower(unsigned char *s, unsigned char *d, int *ssz, int *dsz)
  921. {
  922. UpperLowerTbl_t *ultp;
  923. unsigned char *tail;
  924. if (s == NULL || *s == '\0') {
  925. *ssz = *dsz = 0;
  926. return;
  927. }
  928. if (!(*s & 0x80)) { /* ASCII */
  929. *dsz = *ssz = 1;
  930. *d = tolower(*s);
  931. return;
  932. }
  933. tail = (unsigned char *)ldap_utf8next((char *)s);
  934. *dsz = *ssz = tail - s;
  935. switch(*ssz) {
  936. case 1: /* ASCII */
  937. *d = tolower(*s);
  938. break;
  939. case 2: /* 2 bytes */
  940. if (*s < UL2S || *s > UL2E) { /* out of range */
  941. memmove(d, s, *ssz);
  942. break;
  943. }
  944. for (ultp = Upper2LowerTbl2[*s - UL2S];
  945. ultp && ultp->upper && memcmp(s, ultp->upper, *ssz);
  946. ultp++)
  947. ;
  948. if (!ultp) { /* out of range */
  949. memmove(d, s, *ssz);
  950. } else if (ultp->upper) { /* matched */
  951. memmove(d, ultp->lower, ultp->tsz);
  952. *dsz = ultp->tsz;
  953. } else {
  954. memmove(d, s, *ssz);
  955. }
  956. break;
  957. case 3: /* 3 bytes */
  958. if (*s != UL3S && *s != UL3E) { /* out of range */
  959. memmove(d, s, *ssz);
  960. break;
  961. }
  962. for (ultp = Upper2LowerTbl3[*s - UL3S];
  963. ultp && ultp->upper && memcmp(s, ultp->upper, *ssz);
  964. ultp++)
  965. ;
  966. if (!ultp) { /* out of range */
  967. memmove(d, s, *ssz);
  968. } else if (ultp->upper) { /* matched */
  969. memmove(d, ultp->lower, *ssz);
  970. } else {
  971. memmove(d, s, *ssz);
  972. }
  973. break;
  974. }
  975. return;
  976. }
  977. /*
  978. * slapi_utf8isUpper: tests for a character that is a upper-case letter in
  979. * UTF-8
  980. *
  981. * input: a UTF-8 character (could be multi-byte)
  982. * output: 1 if the character is a upper-case letter
  983. * 0 if the character is not a upper-case letter
  984. */
  985. int
  986. slapi_UTF8ISUPPER(char *s)
  987. {
  988. return slapi_utf8isUpper((unsigned char *)s);
  989. }
  990. int
  991. slapi_utf8isUpper(unsigned char *s)
  992. {
  993. UpperLowerTbl_t *ultp;
  994. unsigned char *next;
  995. int sz;
  996. if (s == NULL || *s == '\0') {
  997. return 0;
  998. }
  999. if (!(*s & 0x80)) { /* ASCII */
  1000. return isupper(*s);
  1001. }
  1002. next = (unsigned char *)ldap_utf8next((char *)s);
  1003. switch(sz = next - s) {
  1004. case 1: /* ASCII */
  1005. return isupper(*s);
  1006. case 2:
  1007. if (*s < UL2S || *s > UL2E) { /* out of range */
  1008. return 0;
  1009. }
  1010. for (ultp = Upper2LowerTbl2[*s - UL2S];
  1011. ultp && ultp->upper && memcmp(s, ultp->upper, sz);
  1012. ultp++)
  1013. ;
  1014. if (!ultp) { /* out of range */
  1015. return 0;
  1016. } else if (ultp->upper) { /* matched */
  1017. return 1;
  1018. } else {
  1019. return 0;
  1020. }
  1021. case 3:
  1022. if (*s < UL3S || *s > UL3E) { /* out of range */
  1023. return 0;
  1024. }
  1025. for (ultp = Upper2LowerTbl3[*s - UL3S];
  1026. ultp && ultp->upper && memcmp(s, ultp->upper, sz);
  1027. ultp++)
  1028. ;
  1029. if (!ultp) { /* out of range */
  1030. return 0;
  1031. } else if (ultp->upper) { /* matched */
  1032. return 1;
  1033. } else {
  1034. return 0;
  1035. }
  1036. default:
  1037. return 0;
  1038. }
  1039. }
  1040. /*
  1041. * Lower2Upper Tables: sorted by lower characters
  1042. */
  1043. UpperLowerTbl_t Lower2UpperTbl20[] = {
  1044. /* upper, lower */
  1045. {"\303\200", "\303\240", 2},
  1046. {"\303\201", "\303\241", 2},
  1047. {"\303\202", "\303\242", 2},
  1048. {"\303\203", "\303\243", 2},
  1049. {"\303\204", "\303\244", 2},
  1050. {"\303\205", "\303\245", 2},
  1051. {"\303\206", "\303\246", 2},
  1052. {"\303\207", "\303\247", 2},
  1053. {"\303\210", "\303\250", 2},
  1054. {"\303\211", "\303\251", 2},
  1055. {"\303\212", "\303\252", 2},
  1056. {"\303\213", "\303\253", 2},
  1057. {"\303\214", "\303\254", 2},
  1058. {"\303\215", "\303\255", 2},
  1059. {"\303\216", "\303\256", 2},
  1060. {"\303\217", "\303\257", 2},
  1061. {"\303\220", "\303\260", 2},
  1062. {"\303\221", "\303\261", 2},
  1063. {"\303\222", "\303\262", 2},
  1064. {"\303\223", "\303\263", 2},
  1065. {"\303\224", "\303\264", 2},
  1066. {"\303\225", "\303\265", 2},
  1067. {"\303\226", "\303\266", 2},
  1068. {"\303\230", "\303\270", 2},
  1069. {"\303\231", "\303\271", 2},
  1070. {"\303\232", "\303\272", 2},
  1071. {"\303\233", "\303\273", 2},
  1072. {"\303\234", "\303\274", 2},
  1073. {"\303\235", "\303\275", 2},
  1074. {"\303\236", "\303\276", 2},
  1075. {"\305\270", "\303\277", 2},
  1076. {NULL, NULL, 0}
  1077. };
  1078. UpperLowerTbl_t Lower2UpperTbl21[] = {
  1079. {"\304\200", "\304\201", 2},
  1080. {"\304\202", "\304\203", 2},
  1081. {"\304\204", "\304\205", 2},
  1082. {"\304\206", "\304\207", 2},
  1083. {"\304\210", "\304\211", 2},
  1084. {"\304\212", "\304\213", 2},
  1085. {"\304\214", "\304\215", 2},
  1086. {"\304\216", "\304\217", 2},
  1087. {"\304\220", "\304\221", 2},
  1088. {"\304\222", "\304\223", 2},
  1089. {"\304\224", "\304\225", 2},
  1090. {"\304\226", "\304\227", 2},
  1091. {"\304\230", "\304\231", 2},
  1092. {"\304\232", "\304\233", 2},
  1093. {"\304\234", "\304\235", 2},
  1094. {"\304\236", "\304\237", 2},
  1095. {"\304\240", "\304\241", 2},
  1096. {"\304\242", "\304\243", 2},
  1097. {"\304\244", "\304\245", 2},
  1098. {"\304\246", "\304\247", 2},
  1099. {"\304\250", "\304\251", 2},
  1100. {"\304\252", "\304\253", 2},
  1101. {"\304\254", "\304\255", 2},
  1102. {"\304\256", "\304\257", 2},
  1103. {"\111", "\304\261", 1},
  1104. {"\304\262", "\304\263", 2},
  1105. {"\304\264", "\304\265", 2},
  1106. {"\304\266", "\304\267", 2},
  1107. {"\304\271", "\304\272", 2},
  1108. {"\304\273", "\304\274", 2},
  1109. {"\304\275", "\304\276", 2},
  1110. {NULL, NULL}
  1111. };
  1112. UpperLowerTbl_t Lower2UpperTbl22[] = {
  1113. {"\304\277", "\305\200", 2},
  1114. {"\305\201", "\305\202", 2},
  1115. {"\305\203", "\305\204", 2},
  1116. {"\305\205", "\305\206", 2},
  1117. {"\305\207", "\305\210", 2},
  1118. {"\305\212", "\305\213", 2},
  1119. {"\305\214", "\305\215", 2},
  1120. {"\305\216", "\305\217", 2},
  1121. {"\305\220", "\305\221", 2},
  1122. {"\305\222", "\305\223", 2},
  1123. {"\305\224", "\305\225", 2},
  1124. {"\305\226", "\305\227", 2},
  1125. {"\305\230", "\305\231", 2},
  1126. {"\305\232", "\305\233", 2},
  1127. {"\305\234", "\305\235", 2},
  1128. {"\305\236", "\305\237", 2},
  1129. {"\305\240", "\305\241", 2},
  1130. {"\305\242", "\305\243", 2},
  1131. {"\305\244", "\305\245", 2},
  1132. {"\305\246", "\305\247", 2},
  1133. {"\305\250", "\305\251", 2},
  1134. {"\305\252", "\305\253", 2},
  1135. {"\305\254", "\305\255", 2},
  1136. {"\305\256", "\305\257", 2},
  1137. {"\305\260", "\305\261", 2},
  1138. {"\305\262", "\305\263", 2},
  1139. {"\305\264", "\305\265", 2},
  1140. {"\305\266", "\305\267", 2},
  1141. {"\305\271", "\305\272", 2},
  1142. {"\305\273", "\305\274", 2},
  1143. {"\305\275", "\305\276", 2},
  1144. {"\123", "\305\277", 1},
  1145. {NULL, NULL, 0}
  1146. };
  1147. UpperLowerTbl_t Lower2UpperTbl23[] = {
  1148. {"\306\202", "\306\203", 2},
  1149. {"\306\204", "\306\205", 2},
  1150. {"\306\207", "\306\210", 2},
  1151. {"\306\213", "\306\214", 2},
  1152. {"\306\221", "\306\222", 2},
  1153. {"\306\230", "\306\231", 2},
  1154. {"\306\240", "\306\241", 2},
  1155. {"\306\242", "\306\243", 2},
  1156. {"\306\244", "\306\245", 2},
  1157. {"\306\247", "\306\250", 2},
  1158. {"\306\254", "\306\255", 2},
  1159. {"\306\257", "\306\260", 2},
  1160. {"\306\263", "\306\264", 2},
  1161. {"\306\265", "\306\266", 2},
  1162. {"\306\270", "\306\271", 2},
  1163. {"\306\274", "\306\275", 2},
  1164. {NULL, NULL, 0}
  1165. };
  1166. UpperLowerTbl_t Lower2UpperTbl24[] = {
  1167. {"\307\204", "\307\206", 2},
  1168. {"\307\207", "\307\211", 2},
  1169. {"\307\212", "\307\214", 2},
  1170. {"\307\215", "\307\216", 2},
  1171. {"\307\217", "\307\220", 2},
  1172. {"\307\221", "\307\222", 2},
  1173. {"\307\223", "\307\224", 2},
  1174. {"\307\225", "\307\226", 2},
  1175. {"\307\227", "\307\230", 2},
  1176. {"\307\231", "\307\232", 2},
  1177. {"\307\233", "\307\234", 2},
  1178. {"\307\236", "\307\237", 2},
  1179. {"\307\240", "\307\241", 2},
  1180. {"\307\242", "\307\243", 2},
  1181. {"\307\244", "\307\245", 2},
  1182. {"\307\246", "\307\247", 2},
  1183. {"\307\250", "\307\251", 2},
  1184. {"\307\252", "\307\253", 2},
  1185. {"\307\254", "\307\255", 2},
  1186. {"\307\256", "\307\257", 2},
  1187. {"\307\261", "\307\263", 2},
  1188. {"\307\264", "\307\265", 2},
  1189. {"\307\272", "\307\273", 2},
  1190. {"\307\274", "\307\275", 2},
  1191. {"\307\276", "\307\277", 2},
  1192. {NULL, NULL, 0}
  1193. };
  1194. UpperLowerTbl_t Lower2UpperTbl25[] = {
  1195. {"\310\200", "\310\201", 2},
  1196. {"\310\202", "\310\203", 2},
  1197. {"\310\204", "\310\205", 2},
  1198. {"\310\206", "\310\207", 2},
  1199. {"\310\210", "\310\211", 2},
  1200. {"\310\212", "\310\213", 2},
  1201. {"\310\214", "\310\215", 2},
  1202. {"\310\216", "\310\217", 2},
  1203. {"\310\220", "\310\221", 2},
  1204. {"\310\222", "\310\223", 2},
  1205. {"\310\224", "\310\225", 2},
  1206. {"\310\226", "\310\227", 2},
  1207. {NULL, NULL, 0}
  1208. };
  1209. UpperLowerTbl_t Lower2UpperTbl26[] = {
  1210. {"\306\201", "\311\223", 2},
  1211. {"\306\206", "\311\224", 2},
  1212. {"\306\211", "\311\226", 2},
  1213. {"\306\212", "\311\227", 2},
  1214. {"\306\216", "\311\230", 2},
  1215. {"\306\217", "\311\231", 2},
  1216. {"\306\220", "\311\233", 2},
  1217. {"\306\223", "\311\240", 2},
  1218. {"\306\224", "\311\243", 2},
  1219. {"\306\227", "\311\250", 2},
  1220. {"\306\226", "\311\251", 2},
  1221. {"\306\234", "\311\257", 2},
  1222. {"\306\235", "\311\262", 2},
  1223. {NULL, NULL, 0}
  1224. };
  1225. UpperLowerTbl_t Lower2UpperTbl27[] = {
  1226. {"\306\251", "\312\203", 2},
  1227. {"\306\256", "\312\210", 2},
  1228. {"\306\261", "\312\212", 2},
  1229. {"\306\262", "\312\213", 2},
  1230. {"\306\267", "\312\222", 2},
  1231. {NULL, NULL, 0}
  1232. };
  1233. UpperLowerTbl_t Lower2UpperTbl28[] = {
  1234. {"\316\206", "\316\254", 2},
  1235. {"\316\210", "\316\255", 2},
  1236. {"\316\211", "\316\256", 2},
  1237. {"\316\212", "\316\257", 2},
  1238. {"\316\221", "\316\261", 2},
  1239. {"\316\222", "\316\262", 2},
  1240. {"\316\223", "\316\263", 2},
  1241. {"\316\224", "\316\264", 2},
  1242. {"\316\225", "\316\265", 2},
  1243. {"\316\226", "\316\266", 2},
  1244. {"\316\227", "\316\267", 2},
  1245. {"\316\230", "\316\270", 2},
  1246. {"\316\231", "\316\271", 2},
  1247. {"\316\232", "\316\272", 2},
  1248. {"\316\233", "\316\273", 2},
  1249. {"\316\234", "\316\274", 2},
  1250. {"\316\235", "\316\275", 2},
  1251. {"\316\236", "\316\276", 2},
  1252. {"\316\237", "\316\277", 2},
  1253. {NULL, NULL, 0}
  1254. };
  1255. UpperLowerTbl_t Lower2UpperTbl29[] = {
  1256. {"\316\240", "\317\200", 2},
  1257. {"\316\241", "\317\201", 2},
  1258. {"\316\243", "\317\202", 2},
  1259. {"\316\243", "\317\203", 2},
  1260. {"\316\244", "\317\204", 2},
  1261. {"\316\245", "\317\205", 2},
  1262. {"\316\246", "\317\206", 2},
  1263. {"\316\247", "\317\207", 2},
  1264. {"\316\250", "\317\210", 2},
  1265. {"\316\251", "\317\211", 2},
  1266. {"\316\252", "\317\212", 2},
  1267. {"\316\253", "\317\213", 2},
  1268. {"\316\214", "\317\214", 2},
  1269. {"\316\216", "\317\215", 2},
  1270. {"\316\217", "\317\216", 2},
  1271. {"\316\222", "\317\220", 2},
  1272. {"\316\230", "\317\221", 2},
  1273. {"\316\246", "\317\225", 2},
  1274. {"\316\240", "\317\226", 2},
  1275. {"\317\242", "\317\243", 2},
  1276. {"\317\244", "\317\245", 2},
  1277. {"\317\246", "\317\247", 2},
  1278. {"\317\250", "\317\251", 2},
  1279. {"\317\252", "\317\253", 2},
  1280. {"\317\254", "\317\255", 2},
  1281. {"\317\256", "\317\257", 2},
  1282. {"\316\232", "\317\260", 2},
  1283. {"\316\241", "\317\261", 2},
  1284. {NULL, NULL, 0}
  1285. };
  1286. UpperLowerTbl_t Lower2UpperTbl2a[] = {
  1287. {"\320\220", "\320\260", 2},
  1288. {"\320\221", "\320\261", 2},
  1289. {"\320\222", "\320\262", 2},
  1290. {"\320\223", "\320\263", 2},
  1291. {"\320\224", "\320\264", 2},
  1292. {"\320\225", "\320\265", 2},
  1293. {"\320\226", "\320\266", 2},
  1294. {"\320\227", "\320\267", 2},
  1295. {"\320\230", "\320\270", 2},
  1296. {"\320\231", "\320\271", 2},
  1297. {"\320\232", "\320\272", 2},
  1298. {"\320\233", "\320\273", 2},
  1299. {"\320\234", "\320\274", 2},
  1300. {"\320\235", "\320\275", 2},
  1301. {"\320\236", "\320\276", 2},
  1302. {"\320\237", "\320\277", 2},
  1303. {NULL, NULL, 0}
  1304. };
  1305. UpperLowerTbl_t Lower2UpperTbl2b[] = {
  1306. {"\320\240", "\321\200", 2},
  1307. {"\320\241", "\321\201", 2},
  1308. {"\320\242", "\321\202", 2},
  1309. {"\320\243", "\321\203", 2},
  1310. {"\320\244", "\321\204", 2},
  1311. {"\320\245", "\321\205", 2},
  1312. {"\320\246", "\321\206", 2},
  1313. {"\320\247", "\321\207", 2},
  1314. {"\320\250", "\321\210", 2},
  1315. {"\320\251", "\321\211", 2},
  1316. {"\320\252", "\321\212", 2},
  1317. {"\320\253", "\321\213", 2},
  1318. {"\320\254", "\321\214", 2},
  1319. {"\320\255", "\321\215", 2},
  1320. {"\320\256", "\321\216", 2},
  1321. {"\320\257", "\321\217", 2},
  1322. {"\320\201", "\321\221", 2},
  1323. {"\320\202", "\321\222", 2},
  1324. {"\320\203", "\321\223", 2},
  1325. {"\320\204", "\321\224", 2},
  1326. {"\320\205", "\321\225", 2},
  1327. {"\320\206", "\321\226", 2},
  1328. {"\320\207", "\321\227", 2},
  1329. {"\320\210", "\321\230", 2},
  1330. {"\320\211", "\321\231", 2},
  1331. {"\320\212", "\321\232", 2},
  1332. {"\320\213", "\321\233", 2},
  1333. {"\320\214", "\321\234", 2},
  1334. {"\320\216", "\321\236", 2},
  1335. {"\320\217", "\321\237", 2},
  1336. {"\321\240", "\321\241", 2},
  1337. {"\321\242", "\321\243", 2},
  1338. {"\321\244", "\321\245", 2},
  1339. {"\321\246", "\321\247", 2},
  1340. {"\321\250", "\321\251", 2},
  1341. {"\321\252", "\321\253", 2},
  1342. {"\321\254", "\321\255", 2},
  1343. {"\321\256", "\321\257", 2},
  1344. {"\321\260", "\321\261", 2},
  1345. {"\321\262", "\321\263", 2},
  1346. {"\321\264", "\321\265", 2},
  1347. {"\321\266", "\321\267", 2},
  1348. {"\321\270", "\321\271", 2},
  1349. {"\321\272", "\321\273", 2},
  1350. {"\321\274", "\321\275", 2},
  1351. {"\321\276", "\321\277", 2},
  1352. {NULL, NULL, 0}
  1353. };
  1354. UpperLowerTbl_t Lower2UpperTbl2c[] = {
  1355. {"\322\200", "\322\201", 2},
  1356. {"\322\220", "\322\221", 2},
  1357. {"\322\222", "\322\223", 2},
  1358. {"\322\224", "\322\225", 2},
  1359. {"\322\226", "\322\227", 2},
  1360. {"\322\230", "\322\231", 2},
  1361. {"\322\232", "\322\233", 2},
  1362. {"\322\234", "\322\235", 2},
  1363. {"\322\236", "\322\237", 2},
  1364. {"\322\240", "\322\241", 2},
  1365. {"\322\242", "\322\243", 2},
  1366. {"\322\244", "\322\245", 2},
  1367. {"\322\246", "\322\247", 2},
  1368. {"\322\250", "\322\251", 2},
  1369. {"\322\252", "\322\253", 2},
  1370. {"\322\254", "\322\255", 2},
  1371. {"\322\256", "\322\257", 2},
  1372. {"\322\260", "\322\261", 2},
  1373. {"\322\262", "\322\263", 2},
  1374. {"\322\264", "\322\265", 2},
  1375. {"\322\266", "\322\267", 2},
  1376. {"\322\270", "\322\271", 2},
  1377. {"\322\272", "\322\273", 2},
  1378. {"\322\274", "\322\275", 2},
  1379. {"\322\276", "\322\277", 2},
  1380. {NULL, NULL, 0}
  1381. };
  1382. UpperLowerTbl_t Lower2UpperTbl2d[] = {
  1383. {"\323\201", "\323\202", 2},
  1384. {"\323\203", "\323\204", 2},
  1385. {"\323\207", "\323\210", 2},
  1386. {"\323\213", "\323\214", 2},
  1387. {"\323\220", "\323\221", 2},
  1388. {"\323\222", "\323\223", 2},
  1389. {"\323\224", "\323\225", 2},
  1390. {"\323\226", "\323\227", 2},
  1391. {"\323\230", "\323\231", 2},
  1392. {"\323\232", "\323\233", 2},
  1393. {"\323\234", "\323\235", 2},
  1394. {"\323\236", "\323\237", 2},
  1395. {"\323\240", "\323\241", 2},
  1396. {"\323\242", "\323\243", 2},
  1397. {"\323\244", "\323\245", 2},
  1398. {"\323\246", "\323\247", 2},
  1399. {"\323\250", "\323\251", 2},
  1400. {"\323\252", "\323\253", 2},
  1401. {"\323\256", "\323\257", 2},
  1402. {"\323\260", "\323\261", 2},
  1403. {"\323\262", "\323\263", 2},
  1404. {"\323\264", "\323\265", 2},
  1405. {"\323\270", "\323\271", 2},
  1406. {NULL, NULL, 0}
  1407. };
  1408. UpperLowerTbl_t Lower2UpperTbl2e[] = {
  1409. {"\324\261", "\325\241", 2},
  1410. {"\324\262", "\325\242", 2},
  1411. {"\324\263", "\325\243", 2},
  1412. {"\324\264", "\325\244", 2},
  1413. {"\324\265", "\325\245", 2},
  1414. {"\324\266", "\325\246", 2},
  1415. {"\324\267", "\325\247", 2},
  1416. {"\324\270", "\325\250", 2},
  1417. {"\324\271", "\325\251", 2},
  1418. {"\324\272", "\325\252", 2},
  1419. {"\324\273", "\325\253", 2},
  1420. {"\324\274", "\325\254", 2},
  1421. {"\324\275", "\325\255", 2},
  1422. {"\324\276", "\325\256", 2},
  1423. {"\324\277", "\325\257", 2},
  1424. {"\325\200", "\325\260", 2},
  1425. {"\325\201", "\325\261", 2},
  1426. {"\325\202", "\325\262", 2},
  1427. {"\325\203", "\325\263", 2},
  1428. {"\325\204", "\325\264", 2},
  1429. {"\325\205", "\325\265", 2},
  1430. {"\325\206", "\325\266", 2},
  1431. {"\325\207", "\325\267", 2},
  1432. {"\325\210", "\325\270", 2},
  1433. {"\325\211", "\325\271", 2},
  1434. {"\325\212", "\325\272", 2},
  1435. {"\325\213", "\325\273", 2},
  1436. {"\325\214", "\325\274", 2},
  1437. {"\325\215", "\325\275", 2},
  1438. {"\325\216", "\325\276", 2},
  1439. {"\325\217", "\325\277", 2},
  1440. {NULL, NULL, 0}
  1441. };
  1442. UpperLowerTbl_t Lower2UpperTbl2f[] = {
  1443. {"\325\220", "\326\200", 2},
  1444. {"\325\221", "\326\201", 2},
  1445. {"\325\222", "\326\202", 2},
  1446. {"\325\223", "\326\203", 2},
  1447. {"\325\224", "\326\204", 2},
  1448. {"\325\225", "\326\205", 2},
  1449. {"\325\226", "\326\206", 2},
  1450. {NULL, NULL, 0}
  1451. };
  1452. UpperLowerTbl_t Lower2UpperTbl30[] = {
  1453. {"\341\202\240", "\341\203\220", 3},
  1454. {"\341\202\241", "\341\203\221", 3},
  1455. {"\341\202\242", "\341\203\222", 3},
  1456. {"\341\202\243", "\341\203\223", 3},
  1457. {"\341\202\244", "\341\203\224", 3},
  1458. {"\341\202\245", "\341\203\225", 3},
  1459. {"\341\202\246", "\341\203\226", 3},
  1460. {"\341\202\247", "\341\203\227", 3},
  1461. {"\341\202\250", "\341\203\230", 3},
  1462. {"\341\202\251", "\341\203\231", 3},
  1463. {"\341\202\252", "\341\203\232", 3},
  1464. {"\341\202\253", "\341\203\233", 3},
  1465. {"\341\202\254", "\341\203\234", 3},
  1466. {"\341\202\255", "\341\203\235", 3},
  1467. {"\341\202\256", "\341\203\236", 3},
  1468. {"\341\202\257", "\341\203\237", 3},
  1469. {"\341\202\260", "\341\203\240", 3},
  1470. {"\341\202\261", "\341\203\241", 3},
  1471. {"\341\202\262", "\341\203\242", 3},
  1472. {"\341\202\263", "\341\203\243", 3},
  1473. {"\341\202\264", "\341\203\244", 3},
  1474. {"\341\202\265", "\341\203\245", 3},
  1475. {"\341\202\266", "\341\203\246", 3},
  1476. {"\341\202\267", "\341\203\247", 3},
  1477. {"\341\202\270", "\341\203\250", 3},
  1478. {"\341\202\271", "\341\203\251", 3},
  1479. {"\341\202\272", "\341\203\252", 3},
  1480. {"\341\202\273", "\341\203\253", 3},
  1481. {"\341\202\274", "\341\203\254", 3},
  1482. {"\341\202\275", "\341\203\255", 3},
  1483. {"\341\202\276", "\341\203\256", 3},
  1484. {"\341\202\277", "\341\203\257", 3},
  1485. {"\341\203\200", "\341\203\260", 3},
  1486. {"\341\203\201", "\341\203\261", 3},
  1487. {"\341\203\202", "\341\203\262", 3},
  1488. {"\341\203\203", "\341\203\263", 3},
  1489. {"\341\203\204", "\341\203\264", 3},
  1490. {"\341\203\205", "\341\203\265", 3},
  1491. {"\341\270\200", "\341\270\201", 3},
  1492. {"\341\270\202", "\341\270\203", 3},
  1493. {"\341\270\204", "\341\270\205", 3},
  1494. {"\341\270\206", "\341\270\207", 3},
  1495. {"\341\270\210", "\341\270\211", 3},
  1496. {"\341\270\212", "\341\270\213", 3},
  1497. {"\341\270\214", "\341\270\215", 3},
  1498. {"\341\270\216", "\341\270\217", 3},
  1499. {"\341\270\220", "\341\270\221", 3},
  1500. {"\341\270\222", "\341\270\223", 3},
  1501. {"\341\270\224", "\341\270\225", 3},
  1502. {"\341\270\226", "\341\270\227", 3},
  1503. {"\341\270\230", "\341\270\231", 3},
  1504. {"\341\270\232", "\341\270\233", 3},
  1505. {"\341\270\234", "\341\270\235", 3},
  1506. {"\341\270\236", "\341\270\237", 3},
  1507. {"\341\270\240", "\341\270\241", 3},
  1508. {"\341\270\242", "\341\270\243", 3},
  1509. {"\341\270\244", "\341\270\245", 3},
  1510. {"\341\270\246", "\341\270\247", 3},
  1511. {"\341\270\250", "\341\270\251", 3},
  1512. {"\341\270\252", "\341\270\253", 3},
  1513. {"\341\270\254", "\341\270\255", 3},
  1514. {"\341\270\256", "\341\270\257", 3},
  1515. {"\341\270\260", "\341\270\261", 3},
  1516. {"\341\270\262", "\341\270\263", 3},
  1517. {"\341\270\264", "\341\270\265", 3},
  1518. {"\341\270\266", "\341\270\267", 3},
  1519. {"\341\270\270", "\341\270\271", 3},
  1520. {"\341\270\272", "\341\270\273", 3},
  1521. {"\341\270\274", "\341\270\275", 3},
  1522. {"\341\270\276", "\341\270\277", 3},
  1523. {"\341\271\200", "\341\271\201", 3},
  1524. {"\341\271\202", "\341\271\203", 3},
  1525. {"\341\271\204", "\341\271\205", 3},
  1526. {"\341\271\206", "\341\271\207", 3},
  1527. {"\341\271\210", "\341\271\211", 3},
  1528. {"\341\271\212", "\341\271\213", 3},
  1529. {"\341\271\214", "\341\271\215", 3},
  1530. {"\341\271\216", "\341\271\217", 3},
  1531. {"\341\271\220", "\341\271\221", 3},
  1532. {"\341\271\222", "\341\271\223", 3},
  1533. {"\341\271\224", "\341\271\225", 3},
  1534. {"\341\271\226", "\341\271\227", 3},
  1535. {"\341\271\230", "\341\271\231", 3},
  1536. {"\341\271\232", "\341\271\233", 3},
  1537. {"\341\271\234", "\341\271\235", 3},
  1538. {"\341\271\236", "\341\271\237", 3},
  1539. {"\341\271\240", "\341\271\241", 3},
  1540. {"\341\271\242", "\341\271\243", 3},
  1541. {"\341\271\244", "\341\271\245", 3},
  1542. {"\341\271\246", "\341\271\247", 3},
  1543. {"\341\271\250", "\341\271\251", 3},
  1544. {"\341\271\252", "\341\271\253", 3},
  1545. {"\341\271\254", "\341\271\255", 3},
  1546. {"\341\271\256", "\341\271\257", 3},
  1547. {"\341\271\260", "\341\271\261", 3},
  1548. {"\341\271\262", "\341\271\263", 3},
  1549. {"\341\271\264", "\341\271\265", 3},
  1550. {"\341\271\266", "\341\271\267", 3},
  1551. {"\341\271\270", "\341\271\271", 3},
  1552. {"\341\271\272", "\341\271\273", 3},
  1553. {"\341\271\274", "\341\271\275", 3},
  1554. {"\341\271\276", "\341\271\277", 3},
  1555. {"\341\272\200", "\341\272\201", 3},
  1556. {"\341\272\202", "\341\272\203", 3},
  1557. {"\341\272\204", "\341\272\205", 3},
  1558. {"\341\272\206", "\341\272\207", 3},
  1559. {"\341\272\210", "\341\272\211", 3},
  1560. {"\341\272\212", "\341\272\213", 3},
  1561. {"\341\272\214", "\341\272\215", 3},
  1562. {"\341\272\216", "\341\272\217", 3},
  1563. {"\341\272\220", "\341\272\221", 3},
  1564. {"\341\272\222", "\341\272\223", 3},
  1565. {"\341\272\224", "\341\272\225", 3},
  1566. {"\341\272\240", "\341\272\241", 3},
  1567. {"\341\272\242", "\341\272\243", 3},
  1568. {"\341\272\244", "\341\272\245", 3},
  1569. {"\341\272\246", "\341\272\247", 3},
  1570. {"\341\272\250", "\341\272\251", 3},
  1571. {"\341\272\252", "\341\272\253", 3},
  1572. {"\341\272\254", "\341\272\255", 3},
  1573. {"\341\272\256", "\341\272\257", 3},
  1574. {"\341\272\260", "\341\272\261", 3},
  1575. {"\341\272\262", "\341\272\263", 3},
  1576. {"\341\272\264", "\341\272\265", 3},
  1577. {"\341\272\266", "\341\272\267", 3},
  1578. {"\341\272\270", "\341\272\271", 3},
  1579. {"\341\272\272", "\341\272\273", 3},
  1580. {"\341\272\274", "\341\272\275", 3},
  1581. {"\341\272\276", "\341\272\277", 3},
  1582. {"\341\273\200", "\341\273\201", 3},
  1583. {"\341\273\202", "\341\273\203", 3},
  1584. {"\341\273\204", "\341\273\205", 3},
  1585. {"\341\273\206", "\341\273\207", 3},
  1586. {"\341\273\210", "\341\273\211", 3},
  1587. {"\341\273\212", "\341\273\213", 3},
  1588. {"\341\273\214", "\341\273\215", 3},
  1589. {"\341\273\216", "\341\273\217", 3},
  1590. {"\341\273\220", "\341\273\221", 3},
  1591. {"\341\273\222", "\341\273\223", 3},
  1592. {"\341\273\224", "\341\273\225", 3},
  1593. {"\341\273\226", "\341\273\227", 3},
  1594. {"\341\273\230", "\341\273\231", 3},
  1595. {"\341\273\232", "\341\273\233", 3},
  1596. {"\341\273\234", "\341\273\235", 3},
  1597. {"\341\273\236", "\341\273\237", 3},
  1598. {"\341\273\240", "\341\273\241", 3},
  1599. {"\341\273\242", "\341\273\243", 3},
  1600. {"\341\273\244", "\341\273\245", 3},
  1601. {"\341\273\246", "\341\273\247", 3},
  1602. {"\341\273\250", "\341\273\251", 3},
  1603. {"\341\273\252", "\341\273\253", 3},
  1604. {"\341\273\254", "\341\273\255", 3},
  1605. {"\341\273\256", "\341\273\257", 3},
  1606. {"\341\273\260", "\341\273\261", 3},
  1607. {"\341\273\262", "\341\273\263", 3},
  1608. {"\341\273\264", "\341\273\265", 3},
  1609. {"\341\273\266", "\341\273\267", 3},
  1610. {"\341\273\270", "\341\273\271", 3},
  1611. {"\341\274\210", "\341\274\200", 3},
  1612. {"\341\274\211", "\341\274\201", 3},
  1613. {"\341\274\212", "\341\274\202", 3},
  1614. {"\341\274\213", "\341\274\203", 3},
  1615. {"\341\274\214", "\341\274\204", 3},
  1616. {"\341\274\215", "\341\274\205", 3},
  1617. {"\341\274\216", "\341\274\206", 3},
  1618. {"\341\274\217", "\341\274\207", 3},
  1619. {"\341\274\230", "\341\274\220", 3},
  1620. {"\341\274\231", "\341\274\221", 3},
  1621. {"\341\274\232", "\341\274\222", 3},
  1622. {"\341\274\233", "\341\274\223", 3},
  1623. {"\341\274\234", "\341\274\224", 3},
  1624. {"\341\274\235", "\341\274\225", 3},
  1625. {"\341\274\250", "\341\274\240", 3},
  1626. {"\341\274\251", "\341\274\241", 3},
  1627. {"\341\274\252", "\341\274\242", 3},
  1628. {"\341\274\253", "\341\274\243", 3},
  1629. {"\341\274\254", "\341\274\244", 3},
  1630. {"\341\274\255", "\341\274\245", 3},
  1631. {"\341\274\256", "\341\274\246", 3},
  1632. {"\341\274\257", "\341\274\247", 3},
  1633. {"\341\274\270", "\341\274\260", 3},
  1634. {"\341\274\271", "\341\274\261", 3},
  1635. {"\341\274\272", "\341\274\262", 3},
  1636. {"\341\274\273", "\341\274\263", 3},
  1637. {"\341\274\274", "\341\274\264", 3},
  1638. {"\341\274\275", "\341\274\265", 3},
  1639. {"\341\274\276", "\341\274\266", 3},
  1640. {"\341\274\277", "\341\274\267", 3},
  1641. {"\341\275\210", "\341\275\200", 3},
  1642. {"\341\275\211", "\341\275\201", 3},
  1643. {"\341\275\212", "\341\275\202", 3},
  1644. {"\341\275\213", "\341\275\203", 3},
  1645. {"\341\275\214", "\341\275\204", 3},
  1646. {"\341\275\215", "\341\275\205", 3},
  1647. {"\341\275\231", "\341\275\221", 3},
  1648. {"\341\275\233", "\341\275\223", 3},
  1649. {"\341\275\235", "\341\275\225", 3},
  1650. {"\341\275\237", "\341\275\227", 3},
  1651. {"\341\275\250", "\341\275\240", 3},
  1652. {"\341\275\251", "\341\275\241", 3},
  1653. {"\341\275\252", "\341\275\242", 3},
  1654. {"\341\275\253", "\341\275\243", 3},
  1655. {"\341\275\254", "\341\275\244", 3},
  1656. {"\341\275\255", "\341\275\245", 3},
  1657. {"\341\275\256", "\341\275\246", 3},
  1658. {"\341\275\257", "\341\275\247", 3},
  1659. {"\341\276\272", "\341\275\260", 3},
  1660. {"\341\276\273", "\341\275\261", 3},
  1661. {"\341\277\210", "\341\275\262", 3},
  1662. {"\341\277\211", "\341\275\263", 3},
  1663. {"\341\277\212", "\341\275\264", 3},
  1664. {"\341\277\213", "\341\275\265", 3},
  1665. {"\341\277\232", "\341\275\266", 3},
  1666. {"\341\277\233", "\341\275\267", 3},
  1667. {"\341\277\270", "\341\275\270", 3},
  1668. {"\341\277\271", "\341\275\271", 3},
  1669. {"\341\277\252", "\341\275\272", 3},
  1670. {"\341\277\253", "\341\275\273", 3},
  1671. {"\341\277\272", "\341\275\274", 3},
  1672. {"\341\277\273", "\341\275\275", 3},
  1673. {"\341\276\210", "\341\276\200", 3},
  1674. {"\341\276\211", "\341\276\201", 3},
  1675. {"\341\276\212", "\341\276\202", 3},
  1676. {"\341\276\213", "\341\276\203", 3},
  1677. {"\341\276\214", "\341\276\204", 3},
  1678. {"\341\276\215", "\341\276\205", 3},
  1679. {"\341\276\216", "\341\276\206", 3},
  1680. {"\341\276\217", "\341\276\207", 3},
  1681. {"\341\276\230", "\341\276\220", 3},
  1682. {"\341\276\231", "\341\276\221", 3},
  1683. {"\341\276\232", "\341\276\222", 3},
  1684. {"\341\276\233", "\341\276\223", 3},
  1685. {"\341\276\234", "\341\276\224", 3},
  1686. {"\341\276\235", "\341\276\225", 3},
  1687. {"\341\276\236", "\341\276\226", 3},
  1688. {"\341\276\237", "\341\276\227", 3},
  1689. {"\341\276\250", "\341\276\240", 3},
  1690. {"\341\276\251", "\341\276\241", 3},
  1691. {"\341\276\252", "\341\276\242", 3},
  1692. {"\341\276\253", "\341\276\243", 3},
  1693. {"\341\276\254", "\341\276\244", 3},
  1694. {"\341\276\255", "\341\276\245", 3},
  1695. {"\341\276\256", "\341\276\246", 3},
  1696. {"\341\276\257", "\341\276\247", 3},
  1697. {"\341\276\270", "\341\276\260", 3},
  1698. {"\341\276\271", "\341\276\261", 3},
  1699. {"\341\276\274", "\341\276\263", 3},
  1700. {"\341\277\214", "\341\277\203", 3},
  1701. {"\341\277\230", "\341\277\220", 3},
  1702. {"\341\277\231", "\341\277\221", 3},
  1703. {"\341\277\250", "\341\277\240", 3},
  1704. {"\341\277\251", "\341\277\241", 3},
  1705. {"\341\277\254", "\341\277\245", 3},
  1706. {"\341\277\274", "\341\277\263", 3},
  1707. {NULL, NULL, 0}
  1708. };
  1709. UpperLowerTbl_t Lower2UpperTbl31[] = {
  1710. {"\357\274\241", "\357\275\201", 3},
  1711. {"\357\274\242", "\357\275\202", 3},
  1712. {"\357\274\243", "\357\275\203", 3},
  1713. {"\357\274\244", "\357\275\204", 3},
  1714. {"\357\274\245", "\357\275\205", 3},
  1715. {"\357\274\246", "\357\275\206", 3},
  1716. {"\357\274\247", "\357\275\207", 3},
  1717. {"\357\274\250", "\357\275\210", 3},
  1718. {"\357\274\251", "\357\275\211", 3},
  1719. {"\357\274\252", "\357\275\212", 3},
  1720. {"\357\274\253", "\357\275\213", 3},
  1721. {"\357\274\254", "\357\275\214", 3},
  1722. {"\357\274\255", "\357\275\215", 3},
  1723. {"\357\274\256", "\357\275\216", 3},
  1724. {"\357\274\257", "\357\275\217", 3},
  1725. {"\357\274\260", "\357\275\220", 3},
  1726. {"\357\274\261", "\357\275\221", 3},
  1727. {"\357\274\262", "\357\275\222", 3},
  1728. {"\357\274\263", "\357\275\223", 3},
  1729. {"\357\274\264", "\357\275\224", 3},
  1730. {"\357\274\265", "\357\275\225", 3},
  1731. {"\357\274\266", "\357\275\226", 3},
  1732. {"\357\274\267", "\357\275\227", 3},
  1733. {"\357\274\270", "\357\275\230", 3},
  1734. {"\357\274\271", "\357\275\231", 3},
  1735. {"\357\274\272", "\357\275\232", 3},
  1736. {NULL, NULL, 0}
  1737. /* upper, lower */
  1738. };
  1739. UpperLowerTbl_t *Lower2UpperTbl2[] = {
  1740. Lower2UpperTbl20, /* \303 */
  1741. Lower2UpperTbl21, /* \304 */
  1742. Lower2UpperTbl22, /* \305 */
  1743. Lower2UpperTbl23, /* \306 */
  1744. Lower2UpperTbl24, /* \307 */
  1745. Lower2UpperTbl25, /* \310 */
  1746. Lower2UpperTbl26, /* \311 */
  1747. Lower2UpperTbl27, /* \312 */
  1748. NULL, /* \313 */
  1749. NULL, /* \314 */
  1750. NULL, /* \315 */
  1751. Lower2UpperTbl28, /* \316 */
  1752. Lower2UpperTbl29, /* \317 */
  1753. Lower2UpperTbl2a, /* \320 */
  1754. Lower2UpperTbl2b, /* \321 */
  1755. Lower2UpperTbl2c, /* \322 */
  1756. Lower2UpperTbl2d, /* \323 */
  1757. NULL, /* \324 */
  1758. Lower2UpperTbl2e, /* \325 */
  1759. Lower2UpperTbl2f /* \326 */
  1760. };
  1761. UpperLowerTbl_t *Lower2UpperTbl3[] = {
  1762. Lower2UpperTbl30, /* \341 */
  1763. NULL, /* \342 */
  1764. NULL, /* \343 */
  1765. NULL, /* \344 */
  1766. NULL, /* \345 */
  1767. NULL, /* \346 */
  1768. NULL, /* \347 */
  1769. NULL, /* \350 */
  1770. NULL, /* \351 */
  1771. NULL, /* \352 */
  1772. NULL, /* \353 */
  1773. NULL, /* \354 */
  1774. NULL, /* \355 */
  1775. NULL, /* \356 */
  1776. Lower2UpperTbl31 /* \357 */
  1777. };
  1778. #define LU2S (unsigned char)'\303'
  1779. #define LU2E (unsigned char)'\326'
  1780. #define LU3S (unsigned char)'\341'
  1781. #define LU3E (unsigned char)'\357'
  1782. /*
  1783. * slapi_utf8StrToUpper: translate lower-case string to upper-case
  1784. *
  1785. * input: a null terminated UTF-8 string
  1786. * output: a null terminated UTF-8 string which characters are
  1787. * converted to upper-case; characters which are not
  1788. * lower-case are copied as is. If it's not considered
  1789. * a UTF-8 string, NULL is returned.
  1790. *
  1791. * Notes: This function takes a string (made of multiple UTF-8 characters)
  1792. * for the input (not one character as in "toupper").
  1793. * Output string is allocated in this function, which needs to be
  1794. * released when it's not needed any more.
  1795. */
  1796. unsigned char *
  1797. slapi_UTF8STRTOUPPER(char *s)
  1798. {
  1799. return slapi_utf8StrToUpper((unsigned char *)s);
  1800. }
  1801. unsigned char *
  1802. slapi_utf8StrToUpper(unsigned char *s)
  1803. {
  1804. UpperLowerTbl_t *ultp;
  1805. unsigned char *p, *np, *tail;
  1806. unsigned char *up, *uphead;
  1807. int len, sz;
  1808. if (s == NULL || *s == '\0') {
  1809. return s;
  1810. }
  1811. len = strlen((char *)s);
  1812. tail = s + len;
  1813. uphead = up = (unsigned char *)slapi_ch_malloc(len + 1);
  1814. p = s;
  1815. while ((np = (unsigned char *)ldap_utf8next((char *)p)) <= tail) {
  1816. switch(sz = np - p) {
  1817. case 1: /* ASCII */
  1818. sprintf((char *)up, "%c", toupper(*p));
  1819. break;
  1820. case 2: /* 2 bytes */
  1821. if (*p < LU2S || *p > LU2E) { /* out of range */
  1822. memcpy(up, p, sz);
  1823. break;
  1824. }
  1825. for (ultp = Lower2UpperTbl2[*p - LU2S];
  1826. ultp && ultp->lower && memcmp(p, ultp->lower, sz);
  1827. ultp++)
  1828. ;
  1829. if (!ultp) { /* out of range */
  1830. memcpy(up, p, sz);
  1831. } else if (ultp->lower) { /* matched */
  1832. memcpy(up, ultp->upper, ultp->tsz);
  1833. sz = ultp->tsz;
  1834. } else {
  1835. memcpy(up, p, sz);
  1836. }
  1837. break;
  1838. case 3: /* 3 bytes */
  1839. if (*p != LU3S && *p != LU3E) { /* out of range */
  1840. memcpy(up, p, sz);
  1841. break;
  1842. }
  1843. for (ultp = Lower2UpperTbl3[*p - LU3S];
  1844. ultp && ultp->lower && memcmp(p, ultp->lower, sz);
  1845. ultp++)
  1846. ;
  1847. if (!ultp) { /* out of range */
  1848. memcpy(up, p, sz);
  1849. } else if (ultp->lower) { /* matched */
  1850. memcpy(up, ultp->upper, sz);
  1851. } else {
  1852. memcpy(up, p, sz);
  1853. }
  1854. break;
  1855. case 4:
  1856. memcpy(up, p, sz);
  1857. break;
  1858. default: /* not UTF-8 */
  1859. slapi_ch_free((void **)&uphead);
  1860. return NULL;
  1861. }
  1862. up += sz;
  1863. p = np;
  1864. if (p == tail) {
  1865. break;
  1866. }
  1867. }
  1868. *up = '\0';
  1869. return uphead;
  1870. }
  1871. /*
  1872. * slapi_utf8ToUpper: translate lower-case character to upper-case
  1873. *
  1874. * input: a UTF-8 character (s)
  1875. * output: a UTF-8 character which is converted to upper-case (d)
  1876. * length (in bytes) of input character (ssz) and
  1877. * output character (dsz)
  1878. *
  1879. * Notes: This function takes a UTF-8 character (could be multiple bytes)
  1880. * for the input. Memory for the output character is NOT allocated
  1881. * in this function, caller should have allocated it (d).
  1882. * "memmove" is used since (s) and (d) are overlapped.
  1883. */
  1884. void
  1885. slapi_UTF8TOUPPER(char *s, char *d, int *ssz, int *dsz)
  1886. {
  1887. slapi_utf8ToUpper((unsigned char *)s, (unsigned char *)d, ssz, dsz);
  1888. return;
  1889. }
  1890. void
  1891. slapi_utf8ToUpper(unsigned char *s, unsigned char *d, int *ssz, int *dsz)
  1892. {
  1893. UpperLowerTbl_t *ultp;
  1894. unsigned char *tail;
  1895. if (s == NULL || *s == '\0') {
  1896. *ssz = *dsz = 0;
  1897. return;
  1898. }
  1899. if (!(*s & 0x80)) { /* ASCII */
  1900. *dsz = *ssz = 1;
  1901. *d = toupper(*s);
  1902. return;
  1903. }
  1904. tail = (unsigned char *)ldap_utf8next((char *)s);
  1905. *dsz = *ssz = tail - s;
  1906. switch(*ssz) {
  1907. case 1: /* ASCII */
  1908. *d = toupper(*s);
  1909. break;
  1910. case 2: /* 2 bytes */
  1911. if (*s < LU2S || *s > LU2E) { /* out of range */
  1912. memmove(d, s, *ssz);
  1913. break;
  1914. }
  1915. for (ultp = Lower2UpperTbl2[*s - LU2S];
  1916. ultp && ultp->lower && memcmp(s, ultp->lower, *ssz);
  1917. ultp++)
  1918. ;
  1919. if (!ultp) { /* out of range */
  1920. memmove(d, s, *ssz);
  1921. } else if (ultp->lower) { /* matched */
  1922. memmove(d, ultp->upper, ultp->tsz);
  1923. *dsz = ultp->tsz;
  1924. } else {
  1925. memmove(d, s, *ssz);
  1926. }
  1927. break;
  1928. case 3: /* 3 bytes */
  1929. if (*s != LU3S && *s != LU3E) { /* out of range */
  1930. memmove(d, s, *ssz);
  1931. break;
  1932. }
  1933. for (ultp = Lower2UpperTbl3[*s - LU3S];
  1934. ultp && ultp->lower && memcmp(s, ultp->lower, *ssz);
  1935. ultp++)
  1936. ;
  1937. if (!ultp) { /* out of range */
  1938. memmove(d, s, *ssz);
  1939. } else if (ultp->lower) { /* matched */
  1940. memmove(d, ultp->upper, *ssz);
  1941. } else {
  1942. memmove(d, s, *ssz);
  1943. }
  1944. break;
  1945. }
  1946. return;
  1947. }
  1948. /*
  1949. * slapi_utf8isLower: tests for a character that is a lower-case letter in
  1950. * UTF-8
  1951. *
  1952. * input: a UTF-8 character (could be multi-byte)
  1953. * output: 1 if the character is a lower-case letter
  1954. * 0 if the character is not a lower-case letter
  1955. */
  1956. int
  1957. slapi_UTF8ISLOWER(char *s)
  1958. {
  1959. return slapi_utf8isLower((unsigned char *)s);
  1960. }
  1961. int
  1962. slapi_utf8isLower(unsigned char *s)
  1963. {
  1964. UpperLowerTbl_t *ultp;
  1965. unsigned char *next;
  1966. int sz;
  1967. if (s == NULL || *s == '\0') {
  1968. return 0;
  1969. }
  1970. if (!(*s & 0x80)) { /* ASCII */
  1971. return islower(*s);
  1972. }
  1973. next = (unsigned char *)ldap_utf8next((char *)s);
  1974. switch(sz = next - s) {
  1975. case 1: /* ASCII */
  1976. return islower(*s);
  1977. case 2:
  1978. if (*s < LU2S || *s > LU2E) { /* out of range */
  1979. return 0;
  1980. }
  1981. for (ultp = Lower2UpperTbl2[*s - LU2S];
  1982. ultp && ultp->lower && memcmp(s, ultp->lower, sz);
  1983. ultp++)
  1984. ;
  1985. if (!ultp) { /* out of range */
  1986. return 0;
  1987. } else if (ultp->lower) { /* matched */
  1988. return 1;
  1989. } else {
  1990. return 0;
  1991. }
  1992. case 3:
  1993. if (*s < LU3S || *s > LU3E) { /* out of range */
  1994. return 0;
  1995. }
  1996. for (ultp = Lower2UpperTbl3[*s - LU3S];
  1997. ultp && ultp->lower && memcmp(s, ultp->lower, sz);
  1998. ultp++)
  1999. ;
  2000. if (!ultp) { /* out of range */
  2001. return 0;
  2002. } else if (ultp->lower) { /* matched */
  2003. return 1;
  2004. } else {
  2005. return 0;
  2006. }
  2007. default:
  2008. return 0;
  2009. }
  2010. }
  2011. /*
  2012. * slapi_utf8casecmp: case-insensitive string compare for UTF-8 strings
  2013. *
  2014. * input: two UTF-8 strings (s0, s1) to be compared
  2015. * output: positive number, if s0 is after s1
  2016. * 0, if the two strings are identical ignoring the case
  2017. * negative number, if s1 is after s0
  2018. *
  2019. * Rules: If both UTF-8 strings are NULL or 0-length, 0 is returned.
  2020. * If one of the strings is NULL or 0-length, the NULL/0-length
  2021. * string is smaller.
  2022. * If one or both of the strings are not UTF-8, system provided
  2023. * strcasecmp is used.
  2024. * If one of the two strings contains no 8-bit characters,
  2025. * strcasecmp is used.
  2026. * The strings are compared after converted to lower-case UTF-8.
  2027. * Each character is compared from the beginning.
  2028. * Evaluation goes in this order:
  2029. * If the length of one character is shorter then the other,
  2030. * the difference of the two lengths is returned.
  2031. * If the length of the corresponsing characters is same,
  2032. * each byte in the characters is compared.
  2033. * If there's a difference between two bytes,
  2034. * the diff is returned.
  2035. * If one string is shorter then the other, the diff is returned.
  2036. *
  2037. * Notes: Don't use this function for collation
  2038. * 1) there's no notion of locale in this function.
  2039. * 2) it's UTF-8 code order, which is different from the locale
  2040. * based collation.
  2041. */
  2042. int
  2043. slapi_UTF8CASECMP(char *s0, char *s1)
  2044. {
  2045. return slapi_utf8casecmp((unsigned char *)s0, (unsigned char *)s1);
  2046. }
  2047. int
  2048. slapi_utf8casecmp(unsigned char *s0, unsigned char *s1)
  2049. {
  2050. unsigned char *d0, *d1; /* store lower-case strings */
  2051. unsigned char *p0, *p1; /* current UTF-8 char */
  2052. unsigned char *n0, *n1; /* next UTF-8 char */
  2053. unsigned char *t0, *t1; /* tail of the strings */
  2054. unsigned char *x0, *x1; /* current byte in a char */
  2055. int i0, i1; /* length of characters */
  2056. int l0, l1; /* length of leftover */
  2057. int rval;
  2058. int has8_s0;
  2059. int has8_s1;
  2060. d0 = d1 = NULL;
  2061. if (s0 == NULL || *s0 == '\0') {
  2062. if (s1 == NULL || *s1 == '\0') {
  2063. rval = 0;
  2064. } else {
  2065. rval = -1; /* regardless s1, s0 < s1 */
  2066. }
  2067. goto end;
  2068. } else if (s1 == NULL || *s1 == '\0') {
  2069. rval = 1; /* regardless s0, s0 > s1 */
  2070. goto end;
  2071. }
  2072. has8_s0 = slapi_has8thBit(s0);
  2073. has8_s1 = slapi_has8thBit(s1);
  2074. if (has8_s0 == has8_s1) { /* both has-8th-bit or both do not */
  2075. if (has8_s0 == 0) { /* neither has-8th-bit */
  2076. rval = strcasecmp((char *)s0, (char *)s1);
  2077. goto end;
  2078. }
  2079. } else { /* one has and the other do not */
  2080. rval = strcasecmp((char *)s0, (char *)s1);
  2081. goto end;
  2082. }
  2083. d0 = slapi_utf8StrToLower(s0);
  2084. d1 = slapi_utf8StrToLower(s1);
  2085. if (d0 == NULL || d1 == NULL || /* either is not a UTF-8 string */
  2086. (d0 && *d0 == '\0') || (d1 && *d1 == '\0')) {
  2087. rval = strcasecmp((char *)s0, (char *)s1);
  2088. goto end;
  2089. }
  2090. p0 = d0;
  2091. p1 = d1;
  2092. t0 = d0 + strlen((char *)d0);
  2093. t1 = d1 + strlen((char *)d1);
  2094. rval = 0;
  2095. while (1) {
  2096. n0 = (unsigned char *)ldap_utf8next((char *)p0);
  2097. n1 = (unsigned char *)ldap_utf8next((char *)p1);
  2098. if (n0 > t0 || n1 > t1) {
  2099. break;
  2100. }
  2101. i0 = n0 - p0;
  2102. i1 = n1 - p1;
  2103. rval = i0 - i1;
  2104. if (rval) { /* length is different */
  2105. goto end;
  2106. }
  2107. /* i0 == i1: same length */
  2108. for (x0 = p0, x1 = p1; x0 < n0; x0++, x1++) {
  2109. rval = *x0 - *x1;
  2110. if (rval) {
  2111. goto end;
  2112. }
  2113. }
  2114. p0 = n0; p1 = n1; /* goto next */
  2115. }
  2116. /* finished scanning the shared part and check the leftover */
  2117. l0 = t0 - n0;
  2118. l1 = t1 - n1;
  2119. rval = l0 - l1;
  2120. end:
  2121. if (d0)
  2122. slapi_ch_free((void **)&d0);
  2123. if (d1)
  2124. slapi_ch_free((void **)&d1);
  2125. return rval;
  2126. }
  2127. /*
  2128. * slapi_utf8ncasecmp: case-insensitive string compare (n chars) for UTF-8
  2129. * strings
  2130. *
  2131. * input: two UTF-8 strings (s0, s1) to be compared
  2132. * number or characters
  2133. * output: positive number, if s0 is after s1
  2134. * 0, if the two strings are identical ignoring the case
  2135. * negative number, if s1 is after s0
  2136. *
  2137. * Rules: Same as slapi_utf8casecmp except the n characters limit.
  2138. *
  2139. * Notes: Don't use this function for collation
  2140. * 1) there's no notion of locale in this function.
  2141. * 2) it's UTF-8 code order, which is different from the locale
  2142. * based collation.
  2143. * n characters, NOT n bytes
  2144. */
  2145. int
  2146. slapi_UTF8NCASECMP(char *s0, char *s1, int n)
  2147. {
  2148. return slapi_utf8ncasecmp((unsigned char *)s0, (unsigned char *)s1, n);
  2149. }
  2150. int
  2151. slapi_utf8ncasecmp(unsigned char *s0, unsigned char *s1, int n)
  2152. {
  2153. unsigned char *d0, *d1; /* store lower-case strings */
  2154. unsigned char *p0, *p1; /* current UTF-8 char */
  2155. unsigned char *n0, *n1; /* next UTF-8 char */
  2156. unsigned char *t0, *t1; /* tail of the strings */
  2157. unsigned char *x0, *x1; /* current byte in a char */
  2158. int i0, i1; /* length of characters */
  2159. int l0, l1; /* length of leftover */
  2160. int cnt;
  2161. int rval;
  2162. int has8_s0;
  2163. int has8_s1;
  2164. d0 = d1 = NULL;
  2165. if (s0 == NULL || *s0 == '\0') {
  2166. if (s1 == NULL || *s1 == '\0') {
  2167. rval = 0;
  2168. } else {
  2169. rval = -1; /* regardless s1, s0 < s1 */
  2170. }
  2171. goto end;
  2172. } else if (s1 == NULL || *s1 == '\0') {
  2173. rval = 1; /* regardless s0, s0 > s1 */
  2174. goto end;
  2175. }
  2176. has8_s0 = slapi_has8thBit(s0);
  2177. has8_s1 = slapi_has8thBit(s1);
  2178. if (has8_s0 == has8_s1) { /* both has-8th-bit or both do not */
  2179. if (has8_s0 == 0) { /* neither has-8th-bit */
  2180. rval = strncasecmp((char *)s0, (char *)s1, n);
  2181. goto end;
  2182. }
  2183. } else { /* one has and the other do not */
  2184. rval = strncasecmp((char *)s0, (char *)s1, n);
  2185. goto end;
  2186. }
  2187. d0 = slapi_utf8StrToLower(s0);
  2188. d1 = slapi_utf8StrToLower(s1);
  2189. if (d0 == NULL || d1 == NULL || /* either is not a UTF-8 string */
  2190. (d0 && *d0 == '\0') || (d1 && *d1 == '\0')) {
  2191. rval = strncasecmp((char *)s0, (char *)s1, n);
  2192. goto end;
  2193. }
  2194. p0 = d0;
  2195. p1 = d1;
  2196. t0 = d0 + strlen((char *)d0);
  2197. t1 = d1 + strlen((char *)d1);
  2198. rval = 0;
  2199. cnt = 0;
  2200. while (1) {
  2201. n0 = (unsigned char *)ldap_utf8next((char *)p0);
  2202. n1 = (unsigned char *)ldap_utf8next((char *)p1);
  2203. if (n0 > t0 || n1 > t1 || cnt == n) {
  2204. break;
  2205. }
  2206. i0 = n0 - p0;
  2207. i1 = n1 - p1;
  2208. rval = i0 - i1;
  2209. if (rval) /* length is different */
  2210. goto end;
  2211. /* i0 == i1: same length */
  2212. for (x0 = p0, x1 = p1; x0 < n0; x0++, x1++) {
  2213. rval = *x0 - *x1;
  2214. if (rval)
  2215. goto end;
  2216. }
  2217. p0 = n0; p1 = n1; /* goto next */
  2218. cnt++;
  2219. }
  2220. if (cnt == n)
  2221. rval = 0;
  2222. else {
  2223. /* finished scanning the shared part and check the leftover */
  2224. l0 = t0 - n0;
  2225. l1 = t1 - n1;
  2226. rval = l0 - l1;
  2227. }
  2228. end:
  2229. if (d0)
  2230. slapi_ch_free((void **)&d0);
  2231. if (d1)
  2232. slapi_ch_free((void **)&d1);
  2233. return rval;
  2234. }