utf8compare.c 74 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319
  1. /** BEGIN COPYRIGHT BLOCK
  2. * This Program is free software; you can redistribute it and/or modify it under
  3. * the terms of the GNU General Public License as published by the Free Software
  4. * Foundation; version 2 of the License.
  5. *
  6. * This Program is distributed in the hope that it will be useful, but WITHOUT
  7. * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  8. * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
  9. *
  10. * You should have received a copy of the GNU General Public License along with
  11. * this Program; if not, write to the Free Software Foundation, Inc., 59 Temple
  12. * Place, Suite 330, Boston, MA 02111-1307 USA.
  13. *
  14. * In addition, as a special exception, Red Hat, Inc. gives You the additional
  15. * right to link the code of this Program with code not covered under the GNU
  16. * General Public License ("Non-GPL Code") and to distribute linked combinations
  17. * including the two, subject to the limitations in this paragraph. Non-GPL Code
  18. * permitted under this exception must only link to the code of this Program
  19. * through those well defined interfaces identified in the file named EXCEPTION
  20. * found in the source code files (the "Approved Interfaces"). The files of
  21. * Non-GPL Code may instantiate templates or use macros or inline functions from
  22. * the Approved Interfaces without causing the resulting work to be covered by
  23. * the GNU General Public License. Only Red Hat, Inc. may make changes or
  24. * additions to the list of Approved Interfaces. You must obey the GNU General
  25. * Public License in all respects for all of the Program code and other code used
  26. * in conjunction with the Program except the Non-GPL Code covered by this
  27. * exception. If you modify this file, you may extend this exception to your
  28. * version of the file, but you are not obligated to do so. If you do not wish to
  29. * provide this exception without modification, you must delete this exception
  30. * statement from your version and license this file solely under the GPL without
  31. * exception.
  32. *
  33. *
  34. * Copyright (C) 2001 Sun Microsystems, Inc. Used by permission.
  35. * Copyright (C) 2005 Red Hat, Inc.
  36. * All rights reserved.
  37. * END COPYRIGHT BLOCK **/
  38. #include <stdio.h>
  39. #include <string.h>
  40. #include <ctype.h>
  41. #include "ldap.h"
  42. #include "slap.h"
  43. #include "slapi-plugin.h"
  44. typedef struct sUpperLowerTbl {
  45. char *upper, *lower;
  46. int tsz; /* target size */
  47. } UpperLowerTbl_t;
  48. /*
  49. * slapi_has8thBit: check the input string
  50. * return 1 if the string contains 8-bit character
  51. * return 0 otherwise
  52. */
  53. int
  54. slapi_has8thBit(unsigned char *s)
  55. {
  56. unsigned char *p, *tail;
  57. tail = s + strlen((char *)s);
  58. for (p = s; p < tail; p++) {
  59. if (0x80 & *p) {
  60. return 1;
  61. }
  62. }
  63. return 0;
  64. }
  65. /*
  66. * UpperToLower Tables: sorted by upper characters
  67. */
  68. UpperLowerTbl_t Upper2LowerTbl20[] = {
  69. /* upper, lower */
  70. {"\303\200", "\303\240", 2},
  71. {"\303\201", "\303\241", 2},
  72. {"\303\202", "\303\242", 2},
  73. {"\303\203", "\303\243", 2},
  74. {"\303\204", "\303\244", 2},
  75. {"\303\205", "\303\245", 2},
  76. {"\303\206", "\303\246", 2},
  77. {"\303\207", "\303\247", 2},
  78. {"\303\210", "\303\250", 2},
  79. {"\303\211", "\303\251", 2},
  80. {"\303\212", "\303\252", 2},
  81. {"\303\213", "\303\253", 2},
  82. {"\303\214", "\303\254", 2},
  83. {"\303\215", "\303\255", 2},
  84. {"\303\216", "\303\256", 2},
  85. {"\303\217", "\303\257", 2},
  86. {"\303\220", "\303\260", 2},
  87. {"\303\221", "\303\261", 2},
  88. {"\303\222", "\303\262", 2},
  89. {"\303\223", "\303\263", 2},
  90. {"\303\224", "\303\264", 2},
  91. {"\303\225", "\303\265", 2},
  92. {"\303\226", "\303\266", 2},
  93. {"\303\230", "\303\270", 2},
  94. {"\303\231", "\303\271", 2},
  95. {"\303\232", "\303\272", 2},
  96. {"\303\233", "\303\273", 2},
  97. {"\303\234", "\303\274", 2},
  98. {"\303\235", "\303\275", 2},
  99. {"\303\236", "\303\276", 2},
  100. {NULL, NULL, 0}
  101. };
  102. UpperLowerTbl_t Upper2LowerTbl21[] = {
  103. {"\304\200", "\304\201", 2},
  104. {"\304\202", "\304\203", 2},
  105. {"\304\204", "\304\205", 2},
  106. {"\304\206", "\304\207", 2},
  107. {"\304\210", "\304\211", 2},
  108. {"\304\212", "\304\213", 2},
  109. {"\304\214", "\304\215", 2},
  110. {"\304\216", "\304\217", 2},
  111. {"\304\220", "\304\221", 2},
  112. {"\304\222", "\304\223", 2},
  113. {"\304\224", "\304\225", 2},
  114. {"\304\226", "\304\227", 2},
  115. {"\304\230", "\304\231", 2},
  116. {"\304\232", "\304\233", 2},
  117. {"\304\234", "\304\235", 2},
  118. {"\304\236", "\304\237", 2},
  119. {"\304\240", "\304\241", 2},
  120. {"\304\242", "\304\243", 2},
  121. {"\304\244", "\304\245", 2},
  122. {"\304\246", "\304\247", 2},
  123. {"\304\250", "\304\251", 2},
  124. {"\304\252", "\304\253", 2},
  125. {"\304\254", "\304\255", 2},
  126. {"\304\256", "\304\257", 2},
  127. {"\304\260", "\151", 1},
  128. {"\304\262", "\304\263", 2},
  129. {"\304\264", "\304\265", 2},
  130. {"\304\266", "\304\267", 2},
  131. {"\304\271", "\304\272", 2},
  132. {"\304\273", "\304\274", 2},
  133. {"\304\275", "\304\276", 2},
  134. {"\304\277", "\305\200", 2},
  135. {NULL, NULL, 0}
  136. };
  137. UpperLowerTbl_t Upper2LowerTbl22[] = {
  138. {"\305\201", "\305\202", 2},
  139. {"\305\203", "\305\204", 2},
  140. {"\305\205", "\305\206", 2},
  141. {"\305\207", "\305\210", 2},
  142. {"\305\212", "\305\213", 2},
  143. {"\305\214", "\305\215", 2},
  144. {"\305\216", "\305\217", 2},
  145. {"\305\220", "\305\221", 2},
  146. {"\305\222", "\305\223", 2},
  147. {"\305\224", "\305\225", 2},
  148. {"\305\226", "\305\227", 2},
  149. {"\305\230", "\305\231", 2},
  150. {"\305\232", "\305\233", 2},
  151. {"\305\234", "\305\235", 2},
  152. {"\305\236", "\305\237", 2},
  153. {"\305\240", "\305\241", 2},
  154. {"\305\242", "\305\243", 2},
  155. {"\305\244", "\305\245", 2},
  156. {"\305\246", "\305\247", 2},
  157. {"\305\250", "\305\251", 2},
  158. {"\305\252", "\305\253", 2},
  159. {"\305\254", "\305\255", 2},
  160. {"\305\256", "\305\257", 2},
  161. {"\305\260", "\305\261", 2},
  162. {"\305\262", "\305\263", 2},
  163. {"\305\264", "\305\265", 2},
  164. {"\305\266", "\305\267", 2},
  165. {"\305\270", "\303\277", 2},
  166. {"\305\271", "\305\272", 2},
  167. {"\305\273", "\305\274", 2},
  168. {"\305\275", "\305\276", 2},
  169. {NULL, NULL, 0}
  170. };
  171. UpperLowerTbl_t Upper2LowerTbl23[] = {
  172. {"\306\201", "\311\223", 2},
  173. {"\306\202", "\306\203", 2},
  174. {"\306\204", "\306\205", 2},
  175. {"\306\206", "\311\224", 2},
  176. {"\306\207", "\306\210", 2},
  177. {"\306\211", "\311\226", 2},
  178. {"\306\212", "\311\227", 2},
  179. {"\306\213", "\306\214", 2},
  180. {"\306\216", "\311\230", 2},
  181. {"\306\217", "\311\231", 2},
  182. {"\306\220", "\311\233", 2},
  183. {"\306\221", "\306\222", 2},
  184. {"\306\223", "\311\240", 2},
  185. {"\306\224", "\311\243", 2},
  186. {"\306\226", "\311\251", 2},
  187. {"\306\227", "\311\250", 2},
  188. {"\306\230", "\306\231", 2},
  189. {"\306\234", "\311\257", 2},
  190. {"\306\235", "\311\262", 2},
  191. {"\306\237", "\306\237", 2},
  192. {"\306\240", "\306\241", 2},
  193. {"\306\242", "\306\243", 2},
  194. {"\306\244", "\306\245", 2},
  195. {"\306\246", "\306\246", 2},
  196. {"\306\247", "\306\250", 2},
  197. {"\306\251", "\312\203", 2},
  198. {"\306\254", "\306\255", 2},
  199. {"\306\256", "\312\210", 2},
  200. {"\306\257", "\306\260", 2},
  201. {"\306\261", "\312\212", 2},
  202. {"\306\262", "\312\213", 2},
  203. {"\306\263", "\306\264", 2},
  204. {"\306\265", "\306\266", 2},
  205. {"\306\267", "\312\222", 2},
  206. {"\306\270", "\306\271", 2},
  207. {"\306\274", "\306\275", 2},
  208. {NULL, NULL, 0}
  209. };
  210. UpperLowerTbl_t Upper2LowerTbl24[] = {
  211. {"\307\204", "\307\205", 2},
  212. {"\307\205", "\307\204", 2},
  213. {"\307\207", "\307\210", 2},
  214. {"\307\210", "\307\207", 2},
  215. {"\307\212", "\307\213", 2},
  216. {"\307\213", "\307\212", 2},
  217. {"\307\215", "\307\216", 2},
  218. {"\307\217", "\307\220", 2},
  219. {"\307\221", "\307\222", 2},
  220. {"\307\223", "\307\224", 2},
  221. {"\307\225", "\307\226", 2},
  222. {"\307\227", "\307\230", 2},
  223. {"\307\231", "\307\232", 2},
  224. {"\307\233", "\307\234", 2},
  225. {"\307\236", "\307\237", 2},
  226. {"\307\240", "\307\241", 2},
  227. {"\307\242", "\307\243", 2},
  228. {"\307\244", "\307\245", 2},
  229. {"\307\246", "\307\247", 2},
  230. {"\307\250", "\307\251", 2},
  231. {"\307\252", "\307\253", 2},
  232. {"\307\254", "\307\255", 2},
  233. {"\307\256", "\307\257", 2},
  234. {"\307\261", "\307\262", 2},
  235. {"\307\262", "\307\261", 2},
  236. {"\307\264", "\307\265", 2},
  237. {"\307\272", "\307\273", 2},
  238. {"\307\274", "\307\275", 2},
  239. {"\307\276", "\307\277", 2},
  240. {NULL, NULL, 0}
  241. };
  242. UpperLowerTbl_t Upper2LowerTbl25[] = {
  243. {"\310\200", "\310\201", 2},
  244. {"\310\202", "\310\203", 2},
  245. {"\310\204", "\310\205", 2},
  246. {"\310\206", "\310\207", 2},
  247. {"\310\210", "\310\211", 2},
  248. {"\310\212", "\310\213", 2},
  249. {"\310\214", "\310\215", 2},
  250. {"\310\216", "\310\217", 2},
  251. {"\310\220", "\310\221", 2},
  252. {"\310\222", "\310\223", 2},
  253. {"\310\224", "\310\225", 2},
  254. {"\310\226", "\310\227", 2},
  255. {NULL, NULL, 0}
  256. };
  257. UpperLowerTbl_t Upper2LowerTbl26[] = {
  258. {"\316\206", "\316\254", 2},
  259. {"\316\210", "\316\255", 2},
  260. {"\316\211", "\316\256", 2},
  261. {"\316\212", "\316\257", 2},
  262. {"\316\214", "\317\214", 2},
  263. {"\316\216", "\317\215", 2},
  264. {"\316\217", "\317\216", 2},
  265. {"\316\221", "\316\261", 2},
  266. {"\316\222", "\316\262", 2},
  267. {"\316\223", "\316\263", 2},
  268. {"\316\224", "\316\264", 2},
  269. {"\316\225", "\316\265", 2},
  270. {"\316\226", "\316\266", 2},
  271. {"\316\227", "\316\267", 2},
  272. {"\316\230", "\316\270", 2},
  273. {"\316\231", "\316\271", 2},
  274. {"\316\232", "\316\272", 2},
  275. {"\316\233", "\316\273", 2},
  276. {"\316\234", "\316\274", 2},
  277. {"\316\235", "\316\275", 2},
  278. {"\316\236", "\316\276", 2},
  279. {"\316\237", "\316\277", 2},
  280. {"\316\240", "\317\200", 2},
  281. {"\316\241", "\317\201", 2},
  282. {"\316\243", "\317\203", 2},
  283. {"\316\244", "\317\204", 2},
  284. {"\316\245", "\317\205", 2},
  285. {"\316\246", "\317\206", 2},
  286. {"\316\247", "\317\207", 2},
  287. {"\316\250", "\317\210", 2},
  288. {"\316\251", "\317\211", 2},
  289. {"\316\252", "\317\212", 2},
  290. {"\316\253", "\317\213", 2},
  291. {NULL, NULL, 0}
  292. };
  293. UpperLowerTbl_t Upper2LowerTbl27[] = {
  294. {"\317\222", "\317\222", 2},
  295. {"\317\223", "\317\223", 2},
  296. {"\317\224", "\317\224", 2},
  297. {"\317\232", "\317\232", 2},
  298. {"\317\234", "\317\234", 2},
  299. {"\317\236", "\317\236", 2},
  300. {"\317\240", "\317\240", 2},
  301. {"\317\242", "\317\243", 2},
  302. {"\317\244", "\317\245", 2},
  303. {"\317\246", "\317\247", 2},
  304. {"\317\250", "\317\251", 2},
  305. {"\317\252", "\317\253", 2},
  306. {"\317\254", "\317\255", 2},
  307. {"\317\256", "\317\257", 2},
  308. {NULL, NULL, 0}
  309. };
  310. UpperLowerTbl_t Upper2LowerTbl28[] = {
  311. {"\320\201", "\321\221", 2},
  312. {"\320\202", "\321\222", 2},
  313. {"\320\203", "\321\223", 2},
  314. {"\320\204", "\321\224", 2},
  315. {"\320\205", "\321\225", 2},
  316. {"\320\206", "\321\226", 2},
  317. {"\320\207", "\321\227", 2},
  318. {"\320\210", "\321\230", 2},
  319. {"\320\211", "\321\231", 2},
  320. {"\320\212", "\321\232", 2},
  321. {"\320\213", "\321\233", 2},
  322. {"\320\214", "\321\234", 2},
  323. {"\320\216", "\321\236", 2},
  324. {"\320\217", "\321\237", 2},
  325. {"\320\220", "\320\260", 2},
  326. {"\320\221", "\320\261", 2},
  327. {"\320\222", "\320\262", 2},
  328. {"\320\223", "\320\263", 2},
  329. {"\320\224", "\320\264", 2},
  330. {"\320\225", "\320\265", 2},
  331. {"\320\226", "\320\266", 2},
  332. {"\320\227", "\320\267", 2},
  333. {"\320\230", "\320\270", 2},
  334. {"\320\231", "\320\271", 2},
  335. {"\320\232", "\320\272", 2},
  336. {"\320\233", "\320\273", 2},
  337. {"\320\234", "\320\274", 2},
  338. {"\320\235", "\320\275", 2},
  339. {"\320\236", "\320\276", 2},
  340. {"\320\237", "\320\277", 2},
  341. {"\320\240", "\321\200", 2},
  342. {"\320\241", "\321\201", 2},
  343. {"\320\242", "\321\202", 2},
  344. {"\320\243", "\321\203", 2},
  345. {"\320\244", "\321\204", 2},
  346. {"\320\245", "\321\205", 2},
  347. {"\320\246", "\321\206", 2},
  348. {"\320\247", "\321\207", 2},
  349. {"\320\250", "\321\210", 2},
  350. {"\320\251", "\321\211", 2},
  351. {"\320\252", "\321\212", 2},
  352. {"\320\253", "\321\213", 2},
  353. {"\320\254", "\321\214", 2},
  354. {"\320\255", "\321\215", 2},
  355. {"\320\256", "\321\216", 2},
  356. {"\320\257", "\321\217", 2},
  357. {NULL, NULL, 0}
  358. };
  359. UpperLowerTbl_t Upper2LowerTbl29[] = {
  360. {"\321\240", "\321\241", 2},
  361. {"\321\242", "\321\243", 2},
  362. {"\321\244", "\321\245", 2},
  363. {"\321\246", "\321\247", 2},
  364. {"\321\250", "\321\251", 2},
  365. {"\321\252", "\321\253", 2},
  366. {"\321\254", "\321\255", 2},
  367. {"\321\256", "\321\257", 2},
  368. {"\321\260", "\321\261", 2},
  369. {"\321\262", "\321\263", 2},
  370. {"\321\264", "\321\265", 2},
  371. {"\321\266", "\321\267", 2},
  372. {"\321\270", "\321\271", 2},
  373. {"\321\272", "\321\273", 2},
  374. {"\321\274", "\321\275", 2},
  375. {"\321\276", "\321\277", 2},
  376. {NULL, NULL, 0}
  377. };
  378. UpperLowerTbl_t Upper2LowerTbl2a[] = {
  379. {"\322\200", "\322\201", 2},
  380. {"\322\220", "\322\221", 2},
  381. {"\322\222", "\322\223", 2},
  382. {"\322\224", "\322\225", 2},
  383. {"\322\226", "\322\227", 2},
  384. {"\322\230", "\322\231", 2},
  385. {"\322\232", "\322\233", 2},
  386. {"\322\234", "\322\235", 2},
  387. {"\322\236", "\322\237", 2},
  388. {"\322\240", "\322\241", 2},
  389. {"\322\242", "\322\243", 2},
  390. {"\322\244", "\322\245", 2},
  391. {"\322\246", "\322\247", 2},
  392. {"\322\250", "\322\251", 2},
  393. {"\322\252", "\322\253", 2},
  394. {"\322\254", "\322\255", 2},
  395. {"\322\256", "\322\257", 2},
  396. {"\322\260", "\322\261", 2},
  397. {"\322\262", "\322\263", 2},
  398. {"\322\264", "\322\265", 2},
  399. {"\322\266", "\322\267", 2},
  400. {"\322\270", "\322\271", 2},
  401. {"\322\272", "\322\273", 2},
  402. {"\322\274", "\322\275", 2},
  403. {"\322\276", "\322\277", 2},
  404. {NULL, NULL, 0}
  405. };
  406. UpperLowerTbl_t Upper2LowerTbl2b[] = {
  407. {"\323\201", "\323\202", 2},
  408. {"\323\203", "\323\204", 2},
  409. {"\323\207", "\323\210", 2},
  410. {"\323\213", "\323\214", 2},
  411. {"\323\220", "\323\221", 2},
  412. {"\323\222", "\323\223", 2},
  413. {"\323\224", "\323\225", 2},
  414. {"\323\226", "\323\227", 2},
  415. {"\323\230", "\323\231", 2},
  416. {"\323\232", "\323\233", 2},
  417. {"\323\234", "\323\235", 2},
  418. {"\323\236", "\323\237", 2},
  419. {"\323\240", "\323\241", 2},
  420. {"\323\242", "\323\243", 2},
  421. {"\323\244", "\323\245", 2},
  422. {"\323\246", "\323\247", 2},
  423. {"\323\250", "\323\251", 2},
  424. {"\323\252", "\323\253", 2},
  425. {"\323\256", "\323\257", 2},
  426. {"\323\260", "\323\261", 2},
  427. {"\323\262", "\323\263", 2},
  428. {"\323\264", "\323\265", 2},
  429. {"\323\270", "\323\271", 2},
  430. {NULL, NULL, 0}
  431. };
  432. UpperLowerTbl_t Upper2LowerTbl2c[] = {
  433. {"\324\261", "\325\241", 2},
  434. {"\324\262", "\325\242", 2},
  435. {"\324\263", "\325\243", 2},
  436. {"\324\264", "\325\244", 2},
  437. {"\324\265", "\325\245", 2},
  438. {"\324\266", "\325\246", 2},
  439. {"\324\267", "\325\247", 2},
  440. {"\324\270", "\325\250", 2},
  441. {"\324\271", "\325\251", 2},
  442. {"\324\272", "\325\252", 2},
  443. {"\324\273", "\325\253", 2},
  444. {"\324\274", "\325\254", 2},
  445. {"\324\275", "\325\255", 2},
  446. {"\324\276", "\325\256", 2},
  447. {"\324\277", "\325\257", 2},
  448. {NULL, NULL, 0}
  449. };
  450. UpperLowerTbl_t Upper2LowerTbl2d[] = {
  451. {"\325\200", "\325\260", 2},
  452. {"\325\201", "\325\261", 2},
  453. {"\325\202", "\325\262", 2},
  454. {"\325\203", "\325\263", 2},
  455. {"\325\204", "\325\264", 2},
  456. {"\325\205", "\325\265", 2},
  457. {"\325\206", "\325\266", 2},
  458. {"\325\207", "\325\267", 2},
  459. {"\325\210", "\325\270", 2},
  460. {"\325\211", "\325\271", 2},
  461. {"\325\212", "\325\272", 2},
  462. {"\325\213", "\325\273", 2},
  463. {"\325\214", "\325\274", 2},
  464. {"\325\215", "\325\275", 2},
  465. {"\325\216", "\325\276", 2},
  466. {"\325\217", "\325\277", 2},
  467. {"\325\220", "\326\200", 2},
  468. {"\325\221", "\326\201", 2},
  469. {"\325\222", "\326\202", 2},
  470. {"\325\223", "\326\203", 2},
  471. {"\325\224", "\326\204", 2},
  472. {"\325\225", "\326\205", 2},
  473. {"\325\226", "\326\206", 2},
  474. {NULL, NULL, 0}
  475. /* upper, lower */
  476. };
  477. UpperLowerTbl_t Upper2LowerTbl30[] = {
  478. /* upper, lower */
  479. {"\341\202\240", "\341\203\220", 3},
  480. {"\341\202\241", "\341\203\221", 3},
  481. {"\341\202\242", "\341\203\222", 3},
  482. {"\341\202\243", "\341\203\223", 3},
  483. {"\341\202\244", "\341\203\224", 3},
  484. {"\341\202\245", "\341\203\225", 3},
  485. {"\341\202\246", "\341\203\226", 3},
  486. {"\341\202\247", "\341\203\227", 3},
  487. {"\341\202\250", "\341\203\230", 3},
  488. {"\341\202\251", "\341\203\231", 3},
  489. {"\341\202\252", "\341\203\232", 3},
  490. {"\341\202\253", "\341\203\233", 3},
  491. {"\341\202\254", "\341\203\234", 3},
  492. {"\341\202\255", "\341\203\235", 3},
  493. {"\341\202\256", "\341\203\236", 3},
  494. {"\341\202\257", "\341\203\237", 3},
  495. {"\341\202\260", "\341\203\240", 3},
  496. {"\341\202\261", "\341\203\241", 3},
  497. {"\341\202\262", "\341\203\242", 3},
  498. {"\341\202\263", "\341\203\243", 3},
  499. {"\341\202\264", "\341\203\244", 3},
  500. {"\341\202\265", "\341\203\245", 3},
  501. {"\341\202\266", "\341\203\246", 3},
  502. {"\341\202\267", "\341\203\247", 3},
  503. {"\341\202\270", "\341\203\250", 3},
  504. {"\341\202\271", "\341\203\251", 3},
  505. {"\341\202\272", "\341\203\252", 3},
  506. {"\341\202\273", "\341\203\253", 3},
  507. {"\341\202\274", "\341\203\254", 3},
  508. {"\341\202\275", "\341\203\255", 3},
  509. {"\341\202\276", "\341\203\256", 3},
  510. {"\341\202\277", "\341\203\257", 3},
  511. {"\341\203\200", "\341\203\260", 3},
  512. {"\341\203\201", "\341\203\261", 3},
  513. {"\341\203\202", "\341\203\262", 3},
  514. {"\341\203\203", "\341\203\263", 3},
  515. {"\341\203\204", "\341\203\264", 3},
  516. {"\341\203\205", "\341\203\265", 3},
  517. {"\341\270\200", "\341\270\201", 3},
  518. {"\341\270\202", "\341\270\203", 3},
  519. {"\341\270\204", "\341\270\205", 3},
  520. {"\341\270\206", "\341\270\207", 3},
  521. {"\341\270\210", "\341\270\211", 3},
  522. {"\341\270\212", "\341\270\213", 3},
  523. {"\341\270\214", "\341\270\215", 3},
  524. {"\341\270\216", "\341\270\217", 3},
  525. {"\341\270\220", "\341\270\221", 3},
  526. {"\341\270\222", "\341\270\223", 3},
  527. {"\341\270\224", "\341\270\225", 3},
  528. {"\341\270\226", "\341\270\227", 3},
  529. {"\341\270\230", "\341\270\231", 3},
  530. {"\341\270\232", "\341\270\233", 3},
  531. {"\341\270\234", "\341\270\235", 3},
  532. {"\341\270\236", "\341\270\237", 3},
  533. {"\341\270\240", "\341\270\241", 3},
  534. {"\341\270\242", "\341\270\243", 3},
  535. {"\341\270\244", "\341\270\245", 3},
  536. {"\341\270\246", "\341\270\247", 3},
  537. {"\341\270\250", "\341\270\251", 3},
  538. {"\341\270\252", "\341\270\253", 3},
  539. {"\341\270\254", "\341\270\255", 3},
  540. {"\341\270\256", "\341\270\257", 3},
  541. {"\341\270\260", "\341\270\261", 3},
  542. {"\341\270\262", "\341\270\263", 3},
  543. {"\341\270\264", "\341\270\265", 3},
  544. {"\341\270\266", "\341\270\267", 3},
  545. {"\341\270\270", "\341\270\271", 3},
  546. {"\341\270\272", "\341\270\273", 3},
  547. {"\341\270\274", "\341\270\275", 3},
  548. {"\341\270\276", "\341\270\277", 3},
  549. {"\341\271\200", "\341\271\201", 3},
  550. {"\341\271\202", "\341\271\203", 3},
  551. {"\341\271\204", "\341\271\205", 3},
  552. {"\341\271\206", "\341\271\207", 3},
  553. {"\341\271\210", "\341\271\211", 3},
  554. {"\341\271\212", "\341\271\213", 3},
  555. {"\341\271\214", "\341\271\215", 3},
  556. {"\341\271\216", "\341\271\217", 3},
  557. {"\341\271\220", "\341\271\221", 3},
  558. {"\341\271\222", "\341\271\223", 3},
  559. {"\341\271\224", "\341\271\225", 3},
  560. {"\341\271\226", "\341\271\227", 3},
  561. {"\341\271\230", "\341\271\231", 3},
  562. {"\341\271\232", "\341\271\233", 3},
  563. {"\341\271\234", "\341\271\235", 3},
  564. {"\341\271\236", "\341\271\237", 3},
  565. {"\341\271\240", "\341\271\241", 3},
  566. {"\341\271\242", "\341\271\243", 3},
  567. {"\341\271\244", "\341\271\245", 3},
  568. {"\341\271\246", "\341\271\247", 3},
  569. {"\341\271\250", "\341\271\251", 3},
  570. {"\341\271\252", "\341\271\253", 3},
  571. {"\341\271\254", "\341\271\255", 3},
  572. {"\341\271\256", "\341\271\257", 3},
  573. {"\341\271\260", "\341\271\261", 3},
  574. {"\341\271\262", "\341\271\263", 3},
  575. {"\341\271\264", "\341\271\265", 3},
  576. {"\341\271\266", "\341\271\267", 3},
  577. {"\341\271\270", "\341\271\271", 3},
  578. {"\341\271\272", "\341\271\273", 3},
  579. {"\341\271\274", "\341\271\275", 3},
  580. {"\341\271\276", "\341\271\277", 3},
  581. {"\341\272\200", "\341\272\201", 3},
  582. {"\341\272\202", "\341\272\203", 3},
  583. {"\341\272\204", "\341\272\205", 3},
  584. {"\341\272\206", "\341\272\207", 3},
  585. {"\341\272\210", "\341\272\211", 3},
  586. {"\341\272\212", "\341\272\213", 3},
  587. {"\341\272\214", "\341\272\215", 3},
  588. {"\341\272\216", "\341\272\217", 3},
  589. {"\341\272\220", "\341\272\221", 3},
  590. {"\341\272\222", "\341\272\223", 3},
  591. {"\341\272\224", "\341\272\225", 3},
  592. {"\341\272\240", "\341\272\241", 3},
  593. {"\341\272\242", "\341\272\243", 3},
  594. {"\341\272\244", "\341\272\245", 3},
  595. {"\341\272\246", "\341\272\247", 3},
  596. {"\341\272\250", "\341\272\251", 3},
  597. {"\341\272\252", "\341\272\253", 3},
  598. {"\341\272\254", "\341\272\255", 3},
  599. {"\341\272\256", "\341\272\257", 3},
  600. {"\341\272\260", "\341\272\261", 3},
  601. {"\341\272\262", "\341\272\263", 3},
  602. {"\341\272\264", "\341\272\265", 3},
  603. {"\341\272\266", "\341\272\267", 3},
  604. {"\341\272\270", "\341\272\271", 3},
  605. {"\341\272\272", "\341\272\273", 3},
  606. {"\341\272\274", "\341\272\275", 3},
  607. {"\341\272\276", "\341\272\277", 3},
  608. {"\341\273\200", "\341\273\201", 3},
  609. {"\341\273\202", "\341\273\203", 3},
  610. {"\341\273\204", "\341\273\205", 3},
  611. {"\341\273\206", "\341\273\207", 3},
  612. {"\341\273\210", "\341\273\211", 3},
  613. {"\341\273\212", "\341\273\213", 3},
  614. {"\341\273\214", "\341\273\215", 3},
  615. {"\341\273\216", "\341\273\217", 3},
  616. {"\341\273\220", "\341\273\221", 3},
  617. {"\341\273\222", "\341\273\223", 3},
  618. {"\341\273\224", "\341\273\225", 3},
  619. {"\341\273\226", "\341\273\227", 3},
  620. {"\341\273\230", "\341\273\231", 3},
  621. {"\341\273\232", "\341\273\233", 3},
  622. {"\341\273\234", "\341\273\235", 3},
  623. {"\341\273\236", "\341\273\237", 3},
  624. {"\341\273\240", "\341\273\241", 3},
  625. {"\341\273\242", "\341\273\243", 3},
  626. {"\341\273\244", "\341\273\245", 3},
  627. {"\341\273\246", "\341\273\247", 3},
  628. {"\341\273\250", "\341\273\251", 3},
  629. {"\341\273\252", "\341\273\253", 3},
  630. {"\341\273\254", "\341\273\255", 3},
  631. {"\341\273\256", "\341\273\257", 3},
  632. {"\341\273\260", "\341\273\261", 3},
  633. {"\341\273\262", "\341\273\263", 3},
  634. {"\341\273\264", "\341\273\265", 3},
  635. {"\341\273\266", "\341\273\267", 3},
  636. {"\341\273\270", "\341\273\271", 3},
  637. {"\341\274\210", "\341\274\200", 3},
  638. {"\341\274\211", "\341\274\201", 3},
  639. {"\341\274\212", "\341\274\202", 3},
  640. {"\341\274\213", "\341\274\203", 3},
  641. {"\341\274\214", "\341\274\204", 3},
  642. {"\341\274\215", "\341\274\205", 3},
  643. {"\341\274\216", "\341\274\206", 3},
  644. {"\341\274\217", "\341\274\207", 3},
  645. {"\341\274\230", "\341\274\220", 3},
  646. {"\341\274\231", "\341\274\221", 3},
  647. {"\341\274\232", "\341\274\222", 3},
  648. {"\341\274\233", "\341\274\223", 3},
  649. {"\341\274\234", "\341\274\224", 3},
  650. {"\341\274\235", "\341\274\225", 3},
  651. {"\341\274\250", "\341\274\240", 3},
  652. {"\341\274\251", "\341\274\241", 3},
  653. {"\341\274\252", "\341\274\242", 3},
  654. {"\341\274\253", "\341\274\243", 3},
  655. {"\341\274\254", "\341\274\244", 3},
  656. {"\341\274\255", "\341\274\245", 3},
  657. {"\341\274\256", "\341\274\246", 3},
  658. {"\341\274\257", "\341\274\247", 3},
  659. {"\341\274\270", "\341\274\260", 3},
  660. {"\341\274\271", "\341\274\261", 3},
  661. {"\341\274\272", "\341\274\262", 3},
  662. {"\341\274\273", "\341\274\263", 3},
  663. {"\341\274\274", "\341\274\264", 3},
  664. {"\341\274\275", "\341\274\265", 3},
  665. {"\341\274\276", "\341\274\266", 3},
  666. {"\341\274\277", "\341\274\267", 3},
  667. {"\341\275\210", "\341\275\200", 3},
  668. {"\341\275\211", "\341\275\201", 3},
  669. {"\341\275\212", "\341\275\202", 3},
  670. {"\341\275\213", "\341\275\203", 3},
  671. {"\341\275\214", "\341\275\204", 3},
  672. {"\341\275\215", "\341\275\205", 3},
  673. {"\341\275\231", "\341\275\221", 3},
  674. {"\341\275\233", "\341\275\223", 3},
  675. {"\341\275\235", "\341\275\225", 3},
  676. {"\341\275\237", "\341\275\227", 3},
  677. {"\341\275\250", "\341\275\240", 3},
  678. {"\341\275\251", "\341\275\241", 3},
  679. {"\341\275\252", "\341\275\242", 3},
  680. {"\341\275\253", "\341\275\243", 3},
  681. {"\341\275\254", "\341\275\244", 3},
  682. {"\341\275\255", "\341\275\245", 3},
  683. {"\341\275\256", "\341\275\246", 3},
  684. {"\341\275\257", "\341\275\247", 3},
  685. {"\341\276\210", "\341\276\200", 3},
  686. {"\341\276\211", "\341\276\201", 3},
  687. {"\341\276\212", "\341\276\202", 3},
  688. {"\341\276\213", "\341\276\203", 3},
  689. {"\341\276\214", "\341\276\204", 3},
  690. {"\341\276\215", "\341\276\205", 3},
  691. {"\341\276\216", "\341\276\206", 3},
  692. {"\341\276\217", "\341\276\207", 3},
  693. {"\341\276\230", "\341\276\220", 3},
  694. {"\341\276\231", "\341\276\221", 3},
  695. {"\341\276\232", "\341\276\222", 3},
  696. {"\341\276\233", "\341\276\223", 3},
  697. {"\341\276\234", "\341\276\224", 3},
  698. {"\341\276\235", "\341\276\225", 3},
  699. {"\341\276\236", "\341\276\226", 3},
  700. {"\341\276\237", "\341\276\227", 3},
  701. {"\341\276\250", "\341\276\240", 3},
  702. {"\341\276\251", "\341\276\241", 3},
  703. {"\341\276\252", "\341\276\242", 3},
  704. {"\341\276\253", "\341\276\243", 3},
  705. {"\341\276\254", "\341\276\244", 3},
  706. {"\341\276\255", "\341\276\245", 3},
  707. {"\341\276\256", "\341\276\246", 3},
  708. {"\341\276\257", "\341\276\247", 3},
  709. {"\341\276\270", "\341\276\260", 3},
  710. {"\341\276\271", "\341\276\261", 3},
  711. {"\341\276\272", "\341\275\260", 3},
  712. {"\341\276\273", "\341\275\261", 3},
  713. {"\341\276\274", "\341\276\263", 3},
  714. {"\341\276\276", "\341\276\276", 3},
  715. {"\341\277\210", "\341\275\262", 3},
  716. {"\341\277\211", "\341\275\263", 3},
  717. {"\341\277\212", "\341\275\264", 3},
  718. {"\341\277\213", "\341\275\265", 3},
  719. {"\341\277\214", "\341\277\203", 3},
  720. {"\341\277\230", "\341\277\220", 3},
  721. {"\341\277\231", "\341\277\221", 3},
  722. {"\341\277\232", "\341\275\266", 3},
  723. {"\341\277\233", "\341\275\267", 3},
  724. {"\341\277\250", "\341\277\240", 3},
  725. {"\341\277\251", "\341\277\241", 3},
  726. {"\341\277\252", "\341\275\272", 3},
  727. {"\341\277\253", "\341\275\273", 3},
  728. {"\341\277\254", "\341\277\245", 3},
  729. {"\341\277\270", "\341\275\270", 3},
  730. {"\341\277\271", "\341\275\271", 3},
  731. {"\341\277\272", "\341\275\274", 3},
  732. {"\341\277\273", "\341\275\275", 3},
  733. {"\341\277\274", "\341\277\263", 3},
  734. {NULL, NULL, 0}
  735. };
  736. UpperLowerTbl_t Upper2LowerTbl31[] = {
  737. {"\357\274\241", "\357\275\201", 3},
  738. {"\357\274\242", "\357\275\202", 3},
  739. {"\357\274\243", "\357\275\203", 3},
  740. {"\357\274\244", "\357\275\204", 3},
  741. {"\357\274\245", "\357\275\205", 3},
  742. {"\357\274\246", "\357\275\206", 3},
  743. {"\357\274\247", "\357\275\207", 3},
  744. {"\357\274\250", "\357\275\210", 3},
  745. {"\357\274\251", "\357\275\211", 3},
  746. {"\357\274\252", "\357\275\212", 3},
  747. {"\357\274\253", "\357\275\213", 3},
  748. {"\357\274\254", "\357\275\214", 3},
  749. {"\357\274\255", "\357\275\215", 3},
  750. {"\357\274\256", "\357\275\216", 3},
  751. {"\357\274\257", "\357\275\217", 3},
  752. {"\357\274\260", "\357\275\220", 3},
  753. {"\357\274\261", "\357\275\221", 3},
  754. {"\357\274\262", "\357\275\222", 3},
  755. {"\357\274\263", "\357\275\223", 3},
  756. {"\357\274\264", "\357\275\224", 3},
  757. {"\357\274\265", "\357\275\225", 3},
  758. {"\357\274\266", "\357\275\226", 3},
  759. {"\357\274\267", "\357\275\227", 3},
  760. {"\357\274\270", "\357\275\230", 3},
  761. {"\357\274\271", "\357\275\231", 3},
  762. {"\357\274\272", "\357\275\232", 3},
  763. {NULL, NULL, 0}
  764. /* upper, lower */
  765. };
  766. UpperLowerTbl_t *Upper2LowerTbl2[] = {
  767. Upper2LowerTbl20, /* \303 */
  768. Upper2LowerTbl21, /* \304 */
  769. Upper2LowerTbl22, /* \305 */
  770. Upper2LowerTbl23, /* \306 */
  771. Upper2LowerTbl24, /* \307 */
  772. Upper2LowerTbl25, /* \310 */
  773. NULL, /* \311 */
  774. NULL, /* \312 */
  775. NULL, /* \313 */
  776. NULL, /* \314 */
  777. NULL, /* \315 */
  778. Upper2LowerTbl26, /* \316 */
  779. Upper2LowerTbl27, /* \317 */
  780. Upper2LowerTbl28, /* \320 */
  781. Upper2LowerTbl29, /* \321 */
  782. Upper2LowerTbl2a, /* \322 */
  783. Upper2LowerTbl2b, /* \323 */
  784. Upper2LowerTbl2c, /* \324 */
  785. Upper2LowerTbl2d /* \325 */
  786. };
  787. UpperLowerTbl_t *Upper2LowerTbl3[] = {
  788. Upper2LowerTbl30, /* \341 */
  789. NULL, /* \342 */
  790. NULL, /* \343 */
  791. NULL, /* \344 */
  792. NULL, /* \345 */
  793. NULL, /* \346 */
  794. NULL, /* \347 */
  795. NULL, /* \350 */
  796. NULL, /* \351 */
  797. NULL, /* \352 */
  798. NULL, /* \353 */
  799. NULL, /* \354 */
  800. NULL, /* \355 */
  801. NULL, /* \356 */
  802. Upper2LowerTbl31 /* \357 */
  803. };
  804. #define UL2S (unsigned char)'\303'
  805. #define UL2E (unsigned char)'\325'
  806. #define UL3S (unsigned char)'\341'
  807. #define UL3E (unsigned char)'\357'
  808. /*
  809. * slapi_utf8StrToLower: translate upper-case string to lower-case
  810. *
  811. * input: a null terminated UTF-8 string
  812. * output: a null terminated UTF-8 string which characters are
  813. * converted to lower-case; characters which are not
  814. * upper-case are copied as is. If it's not considered
  815. * a UTF-8 string, NULL is returned.
  816. *
  817. * Notes: This function takes a string (made of multiple UTF-8 characters)
  818. * for the input (not one character as in "tolower").
  819. * Output string is allocated in this function, which needs to be
  820. * released when it's not needed any more.
  821. */
  822. unsigned char *
  823. slapi_UTF8STRTOLOWER(char *s)
  824. {
  825. return slapi_utf8StrToLower((unsigned char *)s);
  826. }
  827. unsigned char *
  828. slapi_utf8StrToLower(unsigned char *s)
  829. {
  830. UpperLowerTbl_t *ultp;
  831. unsigned char *p, *np, *tail;
  832. unsigned char *lp, *lphead;
  833. int len, sz;
  834. if (s == NULL || *s == '\0') {
  835. return s;
  836. }
  837. len = strlen((char *)s);
  838. tail = s + len;
  839. lphead = lp = (unsigned char *)slapi_ch_malloc(len + 1);
  840. p = s;
  841. while ((np = (unsigned char *)ldap_utf8next((char *)p)) <= tail) {
  842. switch(sz = np - p) {
  843. case 1:
  844. sprintf((char *)lp, "%c", tolower(*p));
  845. break;
  846. case 2:
  847. if (*p < UL2S || *p > UL2E) { /* out of range */
  848. memcpy(lp, p, sz);
  849. break;
  850. }
  851. for (ultp = Upper2LowerTbl2[*p - UL2S];
  852. ultp && ultp->upper && memcmp(p, ultp->upper, sz);
  853. ultp++)
  854. ;
  855. if (!ultp) { /* out of range */
  856. memcpy(lp, p, sz);
  857. } else if (ultp->upper) { /* matched */
  858. memcpy(lp, ultp->lower, ultp->tsz);
  859. sz = ultp->tsz;
  860. } else {
  861. memcpy(lp, p, sz);
  862. }
  863. break;
  864. case 3:
  865. if (*p != UL3S && *p != UL3E) { /* out of range */
  866. memcpy(lp, p, sz);
  867. break;
  868. }
  869. for (ultp = Upper2LowerTbl3[*p - UL3S];
  870. ultp && ultp->upper && memcmp(p, ultp->upper, sz);
  871. ultp++)
  872. ;
  873. if (!ultp) { /* out of range */
  874. memcpy(lp, p, sz);
  875. } else if (ultp->upper) { /* matched */
  876. memcpy(lp, ultp->lower, sz);
  877. } else {
  878. memcpy(lp, p, sz);
  879. }
  880. break;
  881. case 4:
  882. memcpy(lp, p, sz);
  883. break;
  884. default: /* not UTF-8 */
  885. slapi_ch_free((void **)&lphead);
  886. return NULL;
  887. }
  888. lp += sz;
  889. p = np;
  890. if (p == tail) {
  891. break;
  892. }
  893. }
  894. *lp = '\0';
  895. return lphead;
  896. }
  897. /*
  898. * slapi_utf8ToLower: translate upper-case character to lower-case
  899. *
  900. * input: a UTF-8 character (s)
  901. * output: a UTF-8 character which is converted to lower-case (d)
  902. * length (in bytes) of input character (ssz) and
  903. * output character (dsz)
  904. *
  905. * Notes: This function takes a UTF-8 character (could be multiple bytes)
  906. * for the input. Memory for the output character is NOT allocated
  907. * in this function, caller should have allocated it (d).
  908. * "memmove" is used since (s) and (d) are overlapped.
  909. */
  910. void
  911. slapi_UTF8TOLOWER(char *s, char *d, int *ssz, int *dsz)
  912. {
  913. slapi_utf8ToLower((unsigned char *)s, (unsigned char *)d, ssz, dsz);
  914. return;
  915. }
  916. void
  917. slapi_utf8ToLower(unsigned char *s, unsigned char *d, int *ssz, int *dsz)
  918. {
  919. UpperLowerTbl_t *ultp;
  920. unsigned char *tail;
  921. if (s == NULL || *s == '\0') {
  922. *ssz = *dsz = 0;
  923. return;
  924. }
  925. if (!(*s & 0x80)) { /* ASCII */
  926. *dsz = *ssz = 1;
  927. *d = tolower(*s);
  928. return;
  929. }
  930. tail = (unsigned char *)ldap_utf8next((char *)s);
  931. *dsz = *ssz = tail - s;
  932. switch(*ssz) {
  933. case 1: /* ASCII */
  934. *d = tolower(*s);
  935. break;
  936. case 2: /* 2 bytes */
  937. if (*s < UL2S || *s > UL2E) { /* out of range */
  938. memmove(d, s, *ssz);
  939. break;
  940. }
  941. for (ultp = Upper2LowerTbl2[*s - UL2S];
  942. ultp && ultp->upper && memcmp(s, ultp->upper, *ssz);
  943. ultp++)
  944. ;
  945. if (!ultp) { /* out of range */
  946. memmove(d, s, *ssz);
  947. } else if (ultp->upper) { /* matched */
  948. memmove(d, ultp->lower, ultp->tsz);
  949. *dsz = ultp->tsz;
  950. } else {
  951. memmove(d, s, *ssz);
  952. }
  953. break;
  954. case 3: /* 3 bytes */
  955. if (*s != UL3S && *s != UL3E) { /* out of range */
  956. memmove(d, s, *ssz);
  957. break;
  958. }
  959. for (ultp = Upper2LowerTbl3[*s - UL3S];
  960. ultp && ultp->upper && memcmp(s, ultp->upper, *ssz);
  961. ultp++)
  962. ;
  963. if (!ultp) { /* out of range */
  964. memmove(d, s, *ssz);
  965. } else if (ultp->upper) { /* matched */
  966. memmove(d, ultp->lower, *ssz);
  967. } else {
  968. memmove(d, s, *ssz);
  969. }
  970. break;
  971. }
  972. return;
  973. }
  974. /*
  975. * slapi_utf8isUpper: tests for a character that is a upper-case letter in
  976. * UTF-8
  977. *
  978. * input: a UTF-8 character (could be multi-byte)
  979. * output: 1 if the character is a upper-case letter
  980. * 0 if the character is not a upper-case letter
  981. */
  982. int
  983. slapi_UTF8ISUPPER(char *s)
  984. {
  985. return slapi_utf8isUpper((unsigned char *)s);
  986. }
  987. int
  988. slapi_utf8isUpper(unsigned char *s)
  989. {
  990. UpperLowerTbl_t *ultp;
  991. unsigned char *next;
  992. int sz;
  993. if (s == NULL || *s == '\0') {
  994. return 0;
  995. }
  996. if (!(*s & 0x80)) { /* ASCII */
  997. return isupper(*s);
  998. }
  999. next = (unsigned char *)ldap_utf8next((char *)s);
  1000. switch(sz = next - s) {
  1001. case 1: /* ASCII */
  1002. return isupper(*s);
  1003. case 2:
  1004. if (*s < UL2S || *s > UL2E) { /* out of range */
  1005. return 0;
  1006. }
  1007. for (ultp = Upper2LowerTbl2[*s - UL2S];
  1008. ultp && ultp->upper && memcmp(s, ultp->upper, sz);
  1009. ultp++)
  1010. ;
  1011. if (!ultp) { /* out of range */
  1012. return 0;
  1013. } else if (ultp->upper) { /* matched */
  1014. return 1;
  1015. } else {
  1016. return 0;
  1017. }
  1018. case 3:
  1019. if (*s < UL3S || *s > UL3E) { /* out of range */
  1020. return 0;
  1021. }
  1022. for (ultp = Upper2LowerTbl3[*s - UL3S];
  1023. ultp && ultp->upper && memcmp(s, ultp->upper, sz);
  1024. ultp++)
  1025. ;
  1026. if (!ultp) { /* out of range */
  1027. return 0;
  1028. } else if (ultp->upper) { /* matched */
  1029. return 1;
  1030. } else {
  1031. return 0;
  1032. }
  1033. default:
  1034. return 0;
  1035. }
  1036. }
  1037. /*
  1038. * Lower2Upper Tables: sorted by lower characters
  1039. */
  1040. UpperLowerTbl_t Lower2UpperTbl20[] = {
  1041. /* upper, lower */
  1042. {"\303\200", "\303\240", 2},
  1043. {"\303\201", "\303\241", 2},
  1044. {"\303\202", "\303\242", 2},
  1045. {"\303\203", "\303\243", 2},
  1046. {"\303\204", "\303\244", 2},
  1047. {"\303\205", "\303\245", 2},
  1048. {"\303\206", "\303\246", 2},
  1049. {"\303\207", "\303\247", 2},
  1050. {"\303\210", "\303\250", 2},
  1051. {"\303\211", "\303\251", 2},
  1052. {"\303\212", "\303\252", 2},
  1053. {"\303\213", "\303\253", 2},
  1054. {"\303\214", "\303\254", 2},
  1055. {"\303\215", "\303\255", 2},
  1056. {"\303\216", "\303\256", 2},
  1057. {"\303\217", "\303\257", 2},
  1058. {"\303\220", "\303\260", 2},
  1059. {"\303\221", "\303\261", 2},
  1060. {"\303\222", "\303\262", 2},
  1061. {"\303\223", "\303\263", 2},
  1062. {"\303\224", "\303\264", 2},
  1063. {"\303\225", "\303\265", 2},
  1064. {"\303\226", "\303\266", 2},
  1065. {"\303\230", "\303\270", 2},
  1066. {"\303\231", "\303\271", 2},
  1067. {"\303\232", "\303\272", 2},
  1068. {"\303\233", "\303\273", 2},
  1069. {"\303\234", "\303\274", 2},
  1070. {"\303\235", "\303\275", 2},
  1071. {"\303\236", "\303\276", 2},
  1072. {"\305\270", "\303\277", 2},
  1073. {NULL, NULL, 0}
  1074. };
  1075. UpperLowerTbl_t Lower2UpperTbl21[] = {
  1076. {"\304\200", "\304\201", 2},
  1077. {"\304\202", "\304\203", 2},
  1078. {"\304\204", "\304\205", 2},
  1079. {"\304\206", "\304\207", 2},
  1080. {"\304\210", "\304\211", 2},
  1081. {"\304\212", "\304\213", 2},
  1082. {"\304\214", "\304\215", 2},
  1083. {"\304\216", "\304\217", 2},
  1084. {"\304\220", "\304\221", 2},
  1085. {"\304\222", "\304\223", 2},
  1086. {"\304\224", "\304\225", 2},
  1087. {"\304\226", "\304\227", 2},
  1088. {"\304\230", "\304\231", 2},
  1089. {"\304\232", "\304\233", 2},
  1090. {"\304\234", "\304\235", 2},
  1091. {"\304\236", "\304\237", 2},
  1092. {"\304\240", "\304\241", 2},
  1093. {"\304\242", "\304\243", 2},
  1094. {"\304\244", "\304\245", 2},
  1095. {"\304\246", "\304\247", 2},
  1096. {"\304\250", "\304\251", 2},
  1097. {"\304\252", "\304\253", 2},
  1098. {"\304\254", "\304\255", 2},
  1099. {"\304\256", "\304\257", 2},
  1100. {"\111", "\304\261", 1},
  1101. {"\304\262", "\304\263", 2},
  1102. {"\304\264", "\304\265", 2},
  1103. {"\304\266", "\304\267", 2},
  1104. {"\304\271", "\304\272", 2},
  1105. {"\304\273", "\304\274", 2},
  1106. {"\304\275", "\304\276", 2},
  1107. {NULL, NULL}
  1108. };
  1109. UpperLowerTbl_t Lower2UpperTbl22[] = {
  1110. {"\304\277", "\305\200", 2},
  1111. {"\305\201", "\305\202", 2},
  1112. {"\305\203", "\305\204", 2},
  1113. {"\305\205", "\305\206", 2},
  1114. {"\305\207", "\305\210", 2},
  1115. {"\305\212", "\305\213", 2},
  1116. {"\305\214", "\305\215", 2},
  1117. {"\305\216", "\305\217", 2},
  1118. {"\305\220", "\305\221", 2},
  1119. {"\305\222", "\305\223", 2},
  1120. {"\305\224", "\305\225", 2},
  1121. {"\305\226", "\305\227", 2},
  1122. {"\305\230", "\305\231", 2},
  1123. {"\305\232", "\305\233", 2},
  1124. {"\305\234", "\305\235", 2},
  1125. {"\305\236", "\305\237", 2},
  1126. {"\305\240", "\305\241", 2},
  1127. {"\305\242", "\305\243", 2},
  1128. {"\305\244", "\305\245", 2},
  1129. {"\305\246", "\305\247", 2},
  1130. {"\305\250", "\305\251", 2},
  1131. {"\305\252", "\305\253", 2},
  1132. {"\305\254", "\305\255", 2},
  1133. {"\305\256", "\305\257", 2},
  1134. {"\305\260", "\305\261", 2},
  1135. {"\305\262", "\305\263", 2},
  1136. {"\305\264", "\305\265", 2},
  1137. {"\305\266", "\305\267", 2},
  1138. {"\305\271", "\305\272", 2},
  1139. {"\305\273", "\305\274", 2},
  1140. {"\305\275", "\305\276", 2},
  1141. {"\123", "\305\277", 1},
  1142. {NULL, NULL, 0}
  1143. };
  1144. UpperLowerTbl_t Lower2UpperTbl23[] = {
  1145. {"\306\202", "\306\203", 2},
  1146. {"\306\204", "\306\205", 2},
  1147. {"\306\207", "\306\210", 2},
  1148. {"\306\213", "\306\214", 2},
  1149. {"\306\221", "\306\222", 2},
  1150. {"\306\230", "\306\231", 2},
  1151. {"\306\240", "\306\241", 2},
  1152. {"\306\242", "\306\243", 2},
  1153. {"\306\244", "\306\245", 2},
  1154. {"\306\247", "\306\250", 2},
  1155. {"\306\254", "\306\255", 2},
  1156. {"\306\257", "\306\260", 2},
  1157. {"\306\263", "\306\264", 2},
  1158. {"\306\265", "\306\266", 2},
  1159. {"\306\270", "\306\271", 2},
  1160. {"\306\274", "\306\275", 2},
  1161. {NULL, NULL, 0}
  1162. };
  1163. UpperLowerTbl_t Lower2UpperTbl24[] = {
  1164. {"\307\204", "\307\206", 2},
  1165. {"\307\207", "\307\211", 2},
  1166. {"\307\212", "\307\214", 2},
  1167. {"\307\215", "\307\216", 2},
  1168. {"\307\217", "\307\220", 2},
  1169. {"\307\221", "\307\222", 2},
  1170. {"\307\223", "\307\224", 2},
  1171. {"\307\225", "\307\226", 2},
  1172. {"\307\227", "\307\230", 2},
  1173. {"\307\231", "\307\232", 2},
  1174. {"\307\233", "\307\234", 2},
  1175. {"\307\236", "\307\237", 2},
  1176. {"\307\240", "\307\241", 2},
  1177. {"\307\242", "\307\243", 2},
  1178. {"\307\244", "\307\245", 2},
  1179. {"\307\246", "\307\247", 2},
  1180. {"\307\250", "\307\251", 2},
  1181. {"\307\252", "\307\253", 2},
  1182. {"\307\254", "\307\255", 2},
  1183. {"\307\256", "\307\257", 2},
  1184. {"\307\261", "\307\263", 2},
  1185. {"\307\264", "\307\265", 2},
  1186. {"\307\272", "\307\273", 2},
  1187. {"\307\274", "\307\275", 2},
  1188. {"\307\276", "\307\277", 2},
  1189. {NULL, NULL, 0}
  1190. };
  1191. UpperLowerTbl_t Lower2UpperTbl25[] = {
  1192. {"\310\200", "\310\201", 2},
  1193. {"\310\202", "\310\203", 2},
  1194. {"\310\204", "\310\205", 2},
  1195. {"\310\206", "\310\207", 2},
  1196. {"\310\210", "\310\211", 2},
  1197. {"\310\212", "\310\213", 2},
  1198. {"\310\214", "\310\215", 2},
  1199. {"\310\216", "\310\217", 2},
  1200. {"\310\220", "\310\221", 2},
  1201. {"\310\222", "\310\223", 2},
  1202. {"\310\224", "\310\225", 2},
  1203. {"\310\226", "\310\227", 2},
  1204. {NULL, NULL, 0}
  1205. };
  1206. UpperLowerTbl_t Lower2UpperTbl26[] = {
  1207. {"\306\201", "\311\223", 2},
  1208. {"\306\206", "\311\224", 2},
  1209. {"\306\211", "\311\226", 2},
  1210. {"\306\212", "\311\227", 2},
  1211. {"\306\216", "\311\230", 2},
  1212. {"\306\217", "\311\231", 2},
  1213. {"\306\220", "\311\233", 2},
  1214. {"\306\223", "\311\240", 2},
  1215. {"\306\224", "\311\243", 2},
  1216. {"\306\227", "\311\250", 2},
  1217. {"\306\226", "\311\251", 2},
  1218. {"\306\234", "\311\257", 2},
  1219. {"\306\235", "\311\262", 2},
  1220. {NULL, NULL, 0}
  1221. };
  1222. UpperLowerTbl_t Lower2UpperTbl27[] = {
  1223. {"\306\251", "\312\203", 2},
  1224. {"\306\256", "\312\210", 2},
  1225. {"\306\261", "\312\212", 2},
  1226. {"\306\262", "\312\213", 2},
  1227. {"\306\267", "\312\222", 2},
  1228. {NULL, NULL, 0}
  1229. };
  1230. UpperLowerTbl_t Lower2UpperTbl28[] = {
  1231. {"\316\206", "\316\254", 2},
  1232. {"\316\210", "\316\255", 2},
  1233. {"\316\211", "\316\256", 2},
  1234. {"\316\212", "\316\257", 2},
  1235. {"\316\221", "\316\261", 2},
  1236. {"\316\222", "\316\262", 2},
  1237. {"\316\223", "\316\263", 2},
  1238. {"\316\224", "\316\264", 2},
  1239. {"\316\225", "\316\265", 2},
  1240. {"\316\226", "\316\266", 2},
  1241. {"\316\227", "\316\267", 2},
  1242. {"\316\230", "\316\270", 2},
  1243. {"\316\231", "\316\271", 2},
  1244. {"\316\232", "\316\272", 2},
  1245. {"\316\233", "\316\273", 2},
  1246. {"\316\234", "\316\274", 2},
  1247. {"\316\235", "\316\275", 2},
  1248. {"\316\236", "\316\276", 2},
  1249. {"\316\237", "\316\277", 2},
  1250. {NULL, NULL, 0}
  1251. };
  1252. UpperLowerTbl_t Lower2UpperTbl29[] = {
  1253. {"\316\240", "\317\200", 2},
  1254. {"\316\241", "\317\201", 2},
  1255. {"\316\243", "\317\202", 2},
  1256. {"\316\243", "\317\203", 2},
  1257. {"\316\244", "\317\204", 2},
  1258. {"\316\245", "\317\205", 2},
  1259. {"\316\246", "\317\206", 2},
  1260. {"\316\247", "\317\207", 2},
  1261. {"\316\250", "\317\210", 2},
  1262. {"\316\251", "\317\211", 2},
  1263. {"\316\252", "\317\212", 2},
  1264. {"\316\253", "\317\213", 2},
  1265. {"\316\214", "\317\214", 2},
  1266. {"\316\216", "\317\215", 2},
  1267. {"\316\217", "\317\216", 2},
  1268. {"\316\222", "\317\220", 2},
  1269. {"\316\230", "\317\221", 2},
  1270. {"\316\246", "\317\225", 2},
  1271. {"\316\240", "\317\226", 2},
  1272. {"\317\242", "\317\243", 2},
  1273. {"\317\244", "\317\245", 2},
  1274. {"\317\246", "\317\247", 2},
  1275. {"\317\250", "\317\251", 2},
  1276. {"\317\252", "\317\253", 2},
  1277. {"\317\254", "\317\255", 2},
  1278. {"\317\256", "\317\257", 2},
  1279. {"\316\232", "\317\260", 2},
  1280. {"\316\241", "\317\261", 2},
  1281. {NULL, NULL, 0}
  1282. };
  1283. UpperLowerTbl_t Lower2UpperTbl2a[] = {
  1284. {"\320\220", "\320\260", 2},
  1285. {"\320\221", "\320\261", 2},
  1286. {"\320\222", "\320\262", 2},
  1287. {"\320\223", "\320\263", 2},
  1288. {"\320\224", "\320\264", 2},
  1289. {"\320\225", "\320\265", 2},
  1290. {"\320\226", "\320\266", 2},
  1291. {"\320\227", "\320\267", 2},
  1292. {"\320\230", "\320\270", 2},
  1293. {"\320\231", "\320\271", 2},
  1294. {"\320\232", "\320\272", 2},
  1295. {"\320\233", "\320\273", 2},
  1296. {"\320\234", "\320\274", 2},
  1297. {"\320\235", "\320\275", 2},
  1298. {"\320\236", "\320\276", 2},
  1299. {"\320\237", "\320\277", 2},
  1300. {NULL, NULL, 0}
  1301. };
  1302. UpperLowerTbl_t Lower2UpperTbl2b[] = {
  1303. {"\320\240", "\321\200", 2},
  1304. {"\320\241", "\321\201", 2},
  1305. {"\320\242", "\321\202", 2},
  1306. {"\320\243", "\321\203", 2},
  1307. {"\320\244", "\321\204", 2},
  1308. {"\320\245", "\321\205", 2},
  1309. {"\320\246", "\321\206", 2},
  1310. {"\320\247", "\321\207", 2},
  1311. {"\320\250", "\321\210", 2},
  1312. {"\320\251", "\321\211", 2},
  1313. {"\320\252", "\321\212", 2},
  1314. {"\320\253", "\321\213", 2},
  1315. {"\320\254", "\321\214", 2},
  1316. {"\320\255", "\321\215", 2},
  1317. {"\320\256", "\321\216", 2},
  1318. {"\320\257", "\321\217", 2},
  1319. {"\320\201", "\321\221", 2},
  1320. {"\320\202", "\321\222", 2},
  1321. {"\320\203", "\321\223", 2},
  1322. {"\320\204", "\321\224", 2},
  1323. {"\320\205", "\321\225", 2},
  1324. {"\320\206", "\321\226", 2},
  1325. {"\320\207", "\321\227", 2},
  1326. {"\320\210", "\321\230", 2},
  1327. {"\320\211", "\321\231", 2},
  1328. {"\320\212", "\321\232", 2},
  1329. {"\320\213", "\321\233", 2},
  1330. {"\320\214", "\321\234", 2},
  1331. {"\320\216", "\321\236", 2},
  1332. {"\320\217", "\321\237", 2},
  1333. {"\321\240", "\321\241", 2},
  1334. {"\321\242", "\321\243", 2},
  1335. {"\321\244", "\321\245", 2},
  1336. {"\321\246", "\321\247", 2},
  1337. {"\321\250", "\321\251", 2},
  1338. {"\321\252", "\321\253", 2},
  1339. {"\321\254", "\321\255", 2},
  1340. {"\321\256", "\321\257", 2},
  1341. {"\321\260", "\321\261", 2},
  1342. {"\321\262", "\321\263", 2},
  1343. {"\321\264", "\321\265", 2},
  1344. {"\321\266", "\321\267", 2},
  1345. {"\321\270", "\321\271", 2},
  1346. {"\321\272", "\321\273", 2},
  1347. {"\321\274", "\321\275", 2},
  1348. {"\321\276", "\321\277", 2},
  1349. {NULL, NULL, 0}
  1350. };
  1351. UpperLowerTbl_t Lower2UpperTbl2c[] = {
  1352. {"\322\200", "\322\201", 2},
  1353. {"\322\220", "\322\221", 2},
  1354. {"\322\222", "\322\223", 2},
  1355. {"\322\224", "\322\225", 2},
  1356. {"\322\226", "\322\227", 2},
  1357. {"\322\230", "\322\231", 2},
  1358. {"\322\232", "\322\233", 2},
  1359. {"\322\234", "\322\235", 2},
  1360. {"\322\236", "\322\237", 2},
  1361. {"\322\240", "\322\241", 2},
  1362. {"\322\242", "\322\243", 2},
  1363. {"\322\244", "\322\245", 2},
  1364. {"\322\246", "\322\247", 2},
  1365. {"\322\250", "\322\251", 2},
  1366. {"\322\252", "\322\253", 2},
  1367. {"\322\254", "\322\255", 2},
  1368. {"\322\256", "\322\257", 2},
  1369. {"\322\260", "\322\261", 2},
  1370. {"\322\262", "\322\263", 2},
  1371. {"\322\264", "\322\265", 2},
  1372. {"\322\266", "\322\267", 2},
  1373. {"\322\270", "\322\271", 2},
  1374. {"\322\272", "\322\273", 2},
  1375. {"\322\274", "\322\275", 2},
  1376. {"\322\276", "\322\277", 2},
  1377. {NULL, NULL, 0}
  1378. };
  1379. UpperLowerTbl_t Lower2UpperTbl2d[] = {
  1380. {"\323\201", "\323\202", 2},
  1381. {"\323\203", "\323\204", 2},
  1382. {"\323\207", "\323\210", 2},
  1383. {"\323\213", "\323\214", 2},
  1384. {"\323\220", "\323\221", 2},
  1385. {"\323\222", "\323\223", 2},
  1386. {"\323\224", "\323\225", 2},
  1387. {"\323\226", "\323\227", 2},
  1388. {"\323\230", "\323\231", 2},
  1389. {"\323\232", "\323\233", 2},
  1390. {"\323\234", "\323\235", 2},
  1391. {"\323\236", "\323\237", 2},
  1392. {"\323\240", "\323\241", 2},
  1393. {"\323\242", "\323\243", 2},
  1394. {"\323\244", "\323\245", 2},
  1395. {"\323\246", "\323\247", 2},
  1396. {"\323\250", "\323\251", 2},
  1397. {"\323\252", "\323\253", 2},
  1398. {"\323\256", "\323\257", 2},
  1399. {"\323\260", "\323\261", 2},
  1400. {"\323\262", "\323\263", 2},
  1401. {"\323\264", "\323\265", 2},
  1402. {"\323\270", "\323\271", 2},
  1403. {NULL, NULL, 0}
  1404. };
  1405. UpperLowerTbl_t Lower2UpperTbl2e[] = {
  1406. {"\324\261", "\325\241", 2},
  1407. {"\324\262", "\325\242", 2},
  1408. {"\324\263", "\325\243", 2},
  1409. {"\324\264", "\325\244", 2},
  1410. {"\324\265", "\325\245", 2},
  1411. {"\324\266", "\325\246", 2},
  1412. {"\324\267", "\325\247", 2},
  1413. {"\324\270", "\325\250", 2},
  1414. {"\324\271", "\325\251", 2},
  1415. {"\324\272", "\325\252", 2},
  1416. {"\324\273", "\325\253", 2},
  1417. {"\324\274", "\325\254", 2},
  1418. {"\324\275", "\325\255", 2},
  1419. {"\324\276", "\325\256", 2},
  1420. {"\324\277", "\325\257", 2},
  1421. {"\325\200", "\325\260", 2},
  1422. {"\325\201", "\325\261", 2},
  1423. {"\325\202", "\325\262", 2},
  1424. {"\325\203", "\325\263", 2},
  1425. {"\325\204", "\325\264", 2},
  1426. {"\325\205", "\325\265", 2},
  1427. {"\325\206", "\325\266", 2},
  1428. {"\325\207", "\325\267", 2},
  1429. {"\325\210", "\325\270", 2},
  1430. {"\325\211", "\325\271", 2},
  1431. {"\325\212", "\325\272", 2},
  1432. {"\325\213", "\325\273", 2},
  1433. {"\325\214", "\325\274", 2},
  1434. {"\325\215", "\325\275", 2},
  1435. {"\325\216", "\325\276", 2},
  1436. {"\325\217", "\325\277", 2},
  1437. {NULL, NULL, 0}
  1438. };
  1439. UpperLowerTbl_t Lower2UpperTbl2f[] = {
  1440. {"\325\220", "\326\200", 2},
  1441. {"\325\221", "\326\201", 2},
  1442. {"\325\222", "\326\202", 2},
  1443. {"\325\223", "\326\203", 2},
  1444. {"\325\224", "\326\204", 2},
  1445. {"\325\225", "\326\205", 2},
  1446. {"\325\226", "\326\206", 2},
  1447. {NULL, NULL, 0}
  1448. };
  1449. UpperLowerTbl_t Lower2UpperTbl30[] = {
  1450. {"\341\202\240", "\341\203\220", 3},
  1451. {"\341\202\241", "\341\203\221", 3},
  1452. {"\341\202\242", "\341\203\222", 3},
  1453. {"\341\202\243", "\341\203\223", 3},
  1454. {"\341\202\244", "\341\203\224", 3},
  1455. {"\341\202\245", "\341\203\225", 3},
  1456. {"\341\202\246", "\341\203\226", 3},
  1457. {"\341\202\247", "\341\203\227", 3},
  1458. {"\341\202\250", "\341\203\230", 3},
  1459. {"\341\202\251", "\341\203\231", 3},
  1460. {"\341\202\252", "\341\203\232", 3},
  1461. {"\341\202\253", "\341\203\233", 3},
  1462. {"\341\202\254", "\341\203\234", 3},
  1463. {"\341\202\255", "\341\203\235", 3},
  1464. {"\341\202\256", "\341\203\236", 3},
  1465. {"\341\202\257", "\341\203\237", 3},
  1466. {"\341\202\260", "\341\203\240", 3},
  1467. {"\341\202\261", "\341\203\241", 3},
  1468. {"\341\202\262", "\341\203\242", 3},
  1469. {"\341\202\263", "\341\203\243", 3},
  1470. {"\341\202\264", "\341\203\244", 3},
  1471. {"\341\202\265", "\341\203\245", 3},
  1472. {"\341\202\266", "\341\203\246", 3},
  1473. {"\341\202\267", "\341\203\247", 3},
  1474. {"\341\202\270", "\341\203\250", 3},
  1475. {"\341\202\271", "\341\203\251", 3},
  1476. {"\341\202\272", "\341\203\252", 3},
  1477. {"\341\202\273", "\341\203\253", 3},
  1478. {"\341\202\274", "\341\203\254", 3},
  1479. {"\341\202\275", "\341\203\255", 3},
  1480. {"\341\202\276", "\341\203\256", 3},
  1481. {"\341\202\277", "\341\203\257", 3},
  1482. {"\341\203\200", "\341\203\260", 3},
  1483. {"\341\203\201", "\341\203\261", 3},
  1484. {"\341\203\202", "\341\203\262", 3},
  1485. {"\341\203\203", "\341\203\263", 3},
  1486. {"\341\203\204", "\341\203\264", 3},
  1487. {"\341\203\205", "\341\203\265", 3},
  1488. {"\341\270\200", "\341\270\201", 3},
  1489. {"\341\270\202", "\341\270\203", 3},
  1490. {"\341\270\204", "\341\270\205", 3},
  1491. {"\341\270\206", "\341\270\207", 3},
  1492. {"\341\270\210", "\341\270\211", 3},
  1493. {"\341\270\212", "\341\270\213", 3},
  1494. {"\341\270\214", "\341\270\215", 3},
  1495. {"\341\270\216", "\341\270\217", 3},
  1496. {"\341\270\220", "\341\270\221", 3},
  1497. {"\341\270\222", "\341\270\223", 3},
  1498. {"\341\270\224", "\341\270\225", 3},
  1499. {"\341\270\226", "\341\270\227", 3},
  1500. {"\341\270\230", "\341\270\231", 3},
  1501. {"\341\270\232", "\341\270\233", 3},
  1502. {"\341\270\234", "\341\270\235", 3},
  1503. {"\341\270\236", "\341\270\237", 3},
  1504. {"\341\270\240", "\341\270\241", 3},
  1505. {"\341\270\242", "\341\270\243", 3},
  1506. {"\341\270\244", "\341\270\245", 3},
  1507. {"\341\270\246", "\341\270\247", 3},
  1508. {"\341\270\250", "\341\270\251", 3},
  1509. {"\341\270\252", "\341\270\253", 3},
  1510. {"\341\270\254", "\341\270\255", 3},
  1511. {"\341\270\256", "\341\270\257", 3},
  1512. {"\341\270\260", "\341\270\261", 3},
  1513. {"\341\270\262", "\341\270\263", 3},
  1514. {"\341\270\264", "\341\270\265", 3},
  1515. {"\341\270\266", "\341\270\267", 3},
  1516. {"\341\270\270", "\341\270\271", 3},
  1517. {"\341\270\272", "\341\270\273", 3},
  1518. {"\341\270\274", "\341\270\275", 3},
  1519. {"\341\270\276", "\341\270\277", 3},
  1520. {"\341\271\200", "\341\271\201", 3},
  1521. {"\341\271\202", "\341\271\203", 3},
  1522. {"\341\271\204", "\341\271\205", 3},
  1523. {"\341\271\206", "\341\271\207", 3},
  1524. {"\341\271\210", "\341\271\211", 3},
  1525. {"\341\271\212", "\341\271\213", 3},
  1526. {"\341\271\214", "\341\271\215", 3},
  1527. {"\341\271\216", "\341\271\217", 3},
  1528. {"\341\271\220", "\341\271\221", 3},
  1529. {"\341\271\222", "\341\271\223", 3},
  1530. {"\341\271\224", "\341\271\225", 3},
  1531. {"\341\271\226", "\341\271\227", 3},
  1532. {"\341\271\230", "\341\271\231", 3},
  1533. {"\341\271\232", "\341\271\233", 3},
  1534. {"\341\271\234", "\341\271\235", 3},
  1535. {"\341\271\236", "\341\271\237", 3},
  1536. {"\341\271\240", "\341\271\241", 3},
  1537. {"\341\271\242", "\341\271\243", 3},
  1538. {"\341\271\244", "\341\271\245", 3},
  1539. {"\341\271\246", "\341\271\247", 3},
  1540. {"\341\271\250", "\341\271\251", 3},
  1541. {"\341\271\252", "\341\271\253", 3},
  1542. {"\341\271\254", "\341\271\255", 3},
  1543. {"\341\271\256", "\341\271\257", 3},
  1544. {"\341\271\260", "\341\271\261", 3},
  1545. {"\341\271\262", "\341\271\263", 3},
  1546. {"\341\271\264", "\341\271\265", 3},
  1547. {"\341\271\266", "\341\271\267", 3},
  1548. {"\341\271\270", "\341\271\271", 3},
  1549. {"\341\271\272", "\341\271\273", 3},
  1550. {"\341\271\274", "\341\271\275", 3},
  1551. {"\341\271\276", "\341\271\277", 3},
  1552. {"\341\272\200", "\341\272\201", 3},
  1553. {"\341\272\202", "\341\272\203", 3},
  1554. {"\341\272\204", "\341\272\205", 3},
  1555. {"\341\272\206", "\341\272\207", 3},
  1556. {"\341\272\210", "\341\272\211", 3},
  1557. {"\341\272\212", "\341\272\213", 3},
  1558. {"\341\272\214", "\341\272\215", 3},
  1559. {"\341\272\216", "\341\272\217", 3},
  1560. {"\341\272\220", "\341\272\221", 3},
  1561. {"\341\272\222", "\341\272\223", 3},
  1562. {"\341\272\224", "\341\272\225", 3},
  1563. {"\341\272\240", "\341\272\241", 3},
  1564. {"\341\272\242", "\341\272\243", 3},
  1565. {"\341\272\244", "\341\272\245", 3},
  1566. {"\341\272\246", "\341\272\247", 3},
  1567. {"\341\272\250", "\341\272\251", 3},
  1568. {"\341\272\252", "\341\272\253", 3},
  1569. {"\341\272\254", "\341\272\255", 3},
  1570. {"\341\272\256", "\341\272\257", 3},
  1571. {"\341\272\260", "\341\272\261", 3},
  1572. {"\341\272\262", "\341\272\263", 3},
  1573. {"\341\272\264", "\341\272\265", 3},
  1574. {"\341\272\266", "\341\272\267", 3},
  1575. {"\341\272\270", "\341\272\271", 3},
  1576. {"\341\272\272", "\341\272\273", 3},
  1577. {"\341\272\274", "\341\272\275", 3},
  1578. {"\341\272\276", "\341\272\277", 3},
  1579. {"\341\273\200", "\341\273\201", 3},
  1580. {"\341\273\202", "\341\273\203", 3},
  1581. {"\341\273\204", "\341\273\205", 3},
  1582. {"\341\273\206", "\341\273\207", 3},
  1583. {"\341\273\210", "\341\273\211", 3},
  1584. {"\341\273\212", "\341\273\213", 3},
  1585. {"\341\273\214", "\341\273\215", 3},
  1586. {"\341\273\216", "\341\273\217", 3},
  1587. {"\341\273\220", "\341\273\221", 3},
  1588. {"\341\273\222", "\341\273\223", 3},
  1589. {"\341\273\224", "\341\273\225", 3},
  1590. {"\341\273\226", "\341\273\227", 3},
  1591. {"\341\273\230", "\341\273\231", 3},
  1592. {"\341\273\232", "\341\273\233", 3},
  1593. {"\341\273\234", "\341\273\235", 3},
  1594. {"\341\273\236", "\341\273\237", 3},
  1595. {"\341\273\240", "\341\273\241", 3},
  1596. {"\341\273\242", "\341\273\243", 3},
  1597. {"\341\273\244", "\341\273\245", 3},
  1598. {"\341\273\246", "\341\273\247", 3},
  1599. {"\341\273\250", "\341\273\251", 3},
  1600. {"\341\273\252", "\341\273\253", 3},
  1601. {"\341\273\254", "\341\273\255", 3},
  1602. {"\341\273\256", "\341\273\257", 3},
  1603. {"\341\273\260", "\341\273\261", 3},
  1604. {"\341\273\262", "\341\273\263", 3},
  1605. {"\341\273\264", "\341\273\265", 3},
  1606. {"\341\273\266", "\341\273\267", 3},
  1607. {"\341\273\270", "\341\273\271", 3},
  1608. {"\341\274\210", "\341\274\200", 3},
  1609. {"\341\274\211", "\341\274\201", 3},
  1610. {"\341\274\212", "\341\274\202", 3},
  1611. {"\341\274\213", "\341\274\203", 3},
  1612. {"\341\274\214", "\341\274\204", 3},
  1613. {"\341\274\215", "\341\274\205", 3},
  1614. {"\341\274\216", "\341\274\206", 3},
  1615. {"\341\274\217", "\341\274\207", 3},
  1616. {"\341\274\230", "\341\274\220", 3},
  1617. {"\341\274\231", "\341\274\221", 3},
  1618. {"\341\274\232", "\341\274\222", 3},
  1619. {"\341\274\233", "\341\274\223", 3},
  1620. {"\341\274\234", "\341\274\224", 3},
  1621. {"\341\274\235", "\341\274\225", 3},
  1622. {"\341\274\250", "\341\274\240", 3},
  1623. {"\341\274\251", "\341\274\241", 3},
  1624. {"\341\274\252", "\341\274\242", 3},
  1625. {"\341\274\253", "\341\274\243", 3},
  1626. {"\341\274\254", "\341\274\244", 3},
  1627. {"\341\274\255", "\341\274\245", 3},
  1628. {"\341\274\256", "\341\274\246", 3},
  1629. {"\341\274\257", "\341\274\247", 3},
  1630. {"\341\274\270", "\341\274\260", 3},
  1631. {"\341\274\271", "\341\274\261", 3},
  1632. {"\341\274\272", "\341\274\262", 3},
  1633. {"\341\274\273", "\341\274\263", 3},
  1634. {"\341\274\274", "\341\274\264", 3},
  1635. {"\341\274\275", "\341\274\265", 3},
  1636. {"\341\274\276", "\341\274\266", 3},
  1637. {"\341\274\277", "\341\274\267", 3},
  1638. {"\341\275\210", "\341\275\200", 3},
  1639. {"\341\275\211", "\341\275\201", 3},
  1640. {"\341\275\212", "\341\275\202", 3},
  1641. {"\341\275\213", "\341\275\203", 3},
  1642. {"\341\275\214", "\341\275\204", 3},
  1643. {"\341\275\215", "\341\275\205", 3},
  1644. {"\341\275\231", "\341\275\221", 3},
  1645. {"\341\275\233", "\341\275\223", 3},
  1646. {"\341\275\235", "\341\275\225", 3},
  1647. {"\341\275\237", "\341\275\227", 3},
  1648. {"\341\275\250", "\341\275\240", 3},
  1649. {"\341\275\251", "\341\275\241", 3},
  1650. {"\341\275\252", "\341\275\242", 3},
  1651. {"\341\275\253", "\341\275\243", 3},
  1652. {"\341\275\254", "\341\275\244", 3},
  1653. {"\341\275\255", "\341\275\245", 3},
  1654. {"\341\275\256", "\341\275\246", 3},
  1655. {"\341\275\257", "\341\275\247", 3},
  1656. {"\341\276\272", "\341\275\260", 3},
  1657. {"\341\276\273", "\341\275\261", 3},
  1658. {"\341\277\210", "\341\275\262", 3},
  1659. {"\341\277\211", "\341\275\263", 3},
  1660. {"\341\277\212", "\341\275\264", 3},
  1661. {"\341\277\213", "\341\275\265", 3},
  1662. {"\341\277\232", "\341\275\266", 3},
  1663. {"\341\277\233", "\341\275\267", 3},
  1664. {"\341\277\270", "\341\275\270", 3},
  1665. {"\341\277\271", "\341\275\271", 3},
  1666. {"\341\277\252", "\341\275\272", 3},
  1667. {"\341\277\253", "\341\275\273", 3},
  1668. {"\341\277\272", "\341\275\274", 3},
  1669. {"\341\277\273", "\341\275\275", 3},
  1670. {"\341\276\210", "\341\276\200", 3},
  1671. {"\341\276\211", "\341\276\201", 3},
  1672. {"\341\276\212", "\341\276\202", 3},
  1673. {"\341\276\213", "\341\276\203", 3},
  1674. {"\341\276\214", "\341\276\204", 3},
  1675. {"\341\276\215", "\341\276\205", 3},
  1676. {"\341\276\216", "\341\276\206", 3},
  1677. {"\341\276\217", "\341\276\207", 3},
  1678. {"\341\276\230", "\341\276\220", 3},
  1679. {"\341\276\231", "\341\276\221", 3},
  1680. {"\341\276\232", "\341\276\222", 3},
  1681. {"\341\276\233", "\341\276\223", 3},
  1682. {"\341\276\234", "\341\276\224", 3},
  1683. {"\341\276\235", "\341\276\225", 3},
  1684. {"\341\276\236", "\341\276\226", 3},
  1685. {"\341\276\237", "\341\276\227", 3},
  1686. {"\341\276\250", "\341\276\240", 3},
  1687. {"\341\276\251", "\341\276\241", 3},
  1688. {"\341\276\252", "\341\276\242", 3},
  1689. {"\341\276\253", "\341\276\243", 3},
  1690. {"\341\276\254", "\341\276\244", 3},
  1691. {"\341\276\255", "\341\276\245", 3},
  1692. {"\341\276\256", "\341\276\246", 3},
  1693. {"\341\276\257", "\341\276\247", 3},
  1694. {"\341\276\270", "\341\276\260", 3},
  1695. {"\341\276\271", "\341\276\261", 3},
  1696. {"\341\276\274", "\341\276\263", 3},
  1697. {"\341\277\214", "\341\277\203", 3},
  1698. {"\341\277\230", "\341\277\220", 3},
  1699. {"\341\277\231", "\341\277\221", 3},
  1700. {"\341\277\250", "\341\277\240", 3},
  1701. {"\341\277\251", "\341\277\241", 3},
  1702. {"\341\277\254", "\341\277\245", 3},
  1703. {"\341\277\274", "\341\277\263", 3},
  1704. {NULL, NULL, 0}
  1705. };
  1706. UpperLowerTbl_t Lower2UpperTbl31[] = {
  1707. {"\357\274\241", "\357\275\201", 3},
  1708. {"\357\274\242", "\357\275\202", 3},
  1709. {"\357\274\243", "\357\275\203", 3},
  1710. {"\357\274\244", "\357\275\204", 3},
  1711. {"\357\274\245", "\357\275\205", 3},
  1712. {"\357\274\246", "\357\275\206", 3},
  1713. {"\357\274\247", "\357\275\207", 3},
  1714. {"\357\274\250", "\357\275\210", 3},
  1715. {"\357\274\251", "\357\275\211", 3},
  1716. {"\357\274\252", "\357\275\212", 3},
  1717. {"\357\274\253", "\357\275\213", 3},
  1718. {"\357\274\254", "\357\275\214", 3},
  1719. {"\357\274\255", "\357\275\215", 3},
  1720. {"\357\274\256", "\357\275\216", 3},
  1721. {"\357\274\257", "\357\275\217", 3},
  1722. {"\357\274\260", "\357\275\220", 3},
  1723. {"\357\274\261", "\357\275\221", 3},
  1724. {"\357\274\262", "\357\275\222", 3},
  1725. {"\357\274\263", "\357\275\223", 3},
  1726. {"\357\274\264", "\357\275\224", 3},
  1727. {"\357\274\265", "\357\275\225", 3},
  1728. {"\357\274\266", "\357\275\226", 3},
  1729. {"\357\274\267", "\357\275\227", 3},
  1730. {"\357\274\270", "\357\275\230", 3},
  1731. {"\357\274\271", "\357\275\231", 3},
  1732. {"\357\274\272", "\357\275\232", 3},
  1733. {NULL, NULL, 0}
  1734. /* upper, lower */
  1735. };
  1736. UpperLowerTbl_t *Lower2UpperTbl2[] = {
  1737. Lower2UpperTbl20, /* \303 */
  1738. Lower2UpperTbl21, /* \304 */
  1739. Lower2UpperTbl22, /* \305 */
  1740. Lower2UpperTbl23, /* \306 */
  1741. Lower2UpperTbl24, /* \307 */
  1742. Lower2UpperTbl25, /* \310 */
  1743. Lower2UpperTbl26, /* \311 */
  1744. Lower2UpperTbl27, /* \312 */
  1745. NULL, /* \313 */
  1746. NULL, /* \314 */
  1747. NULL, /* \315 */
  1748. Lower2UpperTbl28, /* \316 */
  1749. Lower2UpperTbl29, /* \317 */
  1750. Lower2UpperTbl2a, /* \320 */
  1751. Lower2UpperTbl2b, /* \321 */
  1752. Lower2UpperTbl2c, /* \322 */
  1753. Lower2UpperTbl2d, /* \323 */
  1754. NULL, /* \324 */
  1755. Lower2UpperTbl2e, /* \325 */
  1756. Lower2UpperTbl2f /* \326 */
  1757. };
  1758. UpperLowerTbl_t *Lower2UpperTbl3[] = {
  1759. Lower2UpperTbl30, /* \341 */
  1760. NULL, /* \342 */
  1761. NULL, /* \343 */
  1762. NULL, /* \344 */
  1763. NULL, /* \345 */
  1764. NULL, /* \346 */
  1765. NULL, /* \347 */
  1766. NULL, /* \350 */
  1767. NULL, /* \351 */
  1768. NULL, /* \352 */
  1769. NULL, /* \353 */
  1770. NULL, /* \354 */
  1771. NULL, /* \355 */
  1772. NULL, /* \356 */
  1773. Lower2UpperTbl31 /* \357 */
  1774. };
  1775. #define LU2S (unsigned char)'\303'
  1776. #define LU2E (unsigned char)'\326'
  1777. #define LU3S (unsigned char)'\341'
  1778. #define LU3E (unsigned char)'\357'
  1779. /*
  1780. * slapi_utf8StrToUpper: translate lower-case string to upper-case
  1781. *
  1782. * input: a null terminated UTF-8 string
  1783. * output: a null terminated UTF-8 string which characters are
  1784. * converted to upper-case; characters which are not
  1785. * lower-case are copied as is. If it's not considered
  1786. * a UTF-8 string, NULL is returned.
  1787. *
  1788. * Notes: This function takes a string (made of multiple UTF-8 characters)
  1789. * for the input (not one character as in "toupper").
  1790. * Output string is allocated in this function, which needs to be
  1791. * released when it's not needed any more.
  1792. */
  1793. unsigned char *
  1794. slapi_UTF8STRTOUPPER(char *s)
  1795. {
  1796. return slapi_utf8StrToUpper((unsigned char *)s);
  1797. }
  1798. unsigned char *
  1799. slapi_utf8StrToUpper(unsigned char *s)
  1800. {
  1801. UpperLowerTbl_t *ultp;
  1802. unsigned char *p, *np, *tail;
  1803. unsigned char *up, *uphead;
  1804. int len, sz;
  1805. if (s == NULL || *s == '\0') {
  1806. return s;
  1807. }
  1808. len = strlen((char *)s);
  1809. tail = s + len;
  1810. uphead = up = (unsigned char *)slapi_ch_malloc(len + 1);
  1811. p = s;
  1812. while ((np = (unsigned char *)ldap_utf8next((char *)p)) <= tail) {
  1813. switch(sz = np - p) {
  1814. case 1: /* ASCII */
  1815. sprintf((char *)up, "%c", toupper(*p));
  1816. break;
  1817. case 2: /* 2 bytes */
  1818. if (*p < LU2S || *p > LU2E) { /* out of range */
  1819. memcpy(up, p, sz);
  1820. break;
  1821. }
  1822. for (ultp = Lower2UpperTbl2[*p - LU2S];
  1823. ultp && ultp->lower && memcmp(p, ultp->lower, sz);
  1824. ultp++)
  1825. ;
  1826. if (!ultp) { /* out of range */
  1827. memcpy(up, p, sz);
  1828. } else if (ultp->lower) { /* matched */
  1829. memcpy(up, ultp->upper, ultp->tsz);
  1830. sz = ultp->tsz;
  1831. } else {
  1832. memcpy(up, p, sz);
  1833. }
  1834. break;
  1835. case 3: /* 3 bytes */
  1836. if (*p != LU3S && *p != LU3E) { /* out of range */
  1837. memcpy(up, p, sz);
  1838. break;
  1839. }
  1840. for (ultp = Lower2UpperTbl3[*p - LU3S];
  1841. ultp && ultp->lower && memcmp(p, ultp->lower, sz);
  1842. ultp++)
  1843. ;
  1844. if (!ultp) { /* out of range */
  1845. memcpy(up, p, sz);
  1846. } else if (ultp->lower) { /* matched */
  1847. memcpy(up, ultp->upper, sz);
  1848. } else {
  1849. memcpy(up, p, sz);
  1850. }
  1851. break;
  1852. case 4:
  1853. memcpy(up, p, sz);
  1854. break;
  1855. default: /* not UTF-8 */
  1856. slapi_ch_free((void **)&uphead);
  1857. return NULL;
  1858. }
  1859. up += sz;
  1860. p = np;
  1861. if (p == tail) {
  1862. break;
  1863. }
  1864. }
  1865. *up = '\0';
  1866. return uphead;
  1867. }
  1868. /*
  1869. * slapi_utf8ToUpper: translate lower-case character to upper-case
  1870. *
  1871. * input: a UTF-8 character (s)
  1872. * output: a UTF-8 character which is converted to upper-case (d)
  1873. * length (in bytes) of input character (ssz) and
  1874. * output character (dsz)
  1875. *
  1876. * Notes: This function takes a UTF-8 character (could be multiple bytes)
  1877. * for the input. Memory for the output character is NOT allocated
  1878. * in this function, caller should have allocated it (d).
  1879. * "memmove" is used since (s) and (d) are overlapped.
  1880. */
  1881. void
  1882. slapi_UTF8TOUPPER(char *s, char *d, int *ssz, int *dsz)
  1883. {
  1884. slapi_utf8ToUpper((unsigned char *)s, (unsigned char *)d, ssz, dsz);
  1885. return;
  1886. }
  1887. void
  1888. slapi_utf8ToUpper(unsigned char *s, unsigned char *d, int *ssz, int *dsz)
  1889. {
  1890. UpperLowerTbl_t *ultp;
  1891. unsigned char *tail;
  1892. if (s == NULL || *s == '\0') {
  1893. *ssz = *dsz = 0;
  1894. return;
  1895. }
  1896. if (!(*s & 0x80)) { /* ASCII */
  1897. *dsz = *ssz = 1;
  1898. *d = toupper(*s);
  1899. return;
  1900. }
  1901. tail = (unsigned char *)ldap_utf8next((char *)s);
  1902. *dsz = *ssz = tail - s;
  1903. switch(*ssz) {
  1904. case 1: /* ASCII */
  1905. *d = toupper(*s);
  1906. break;
  1907. case 2: /* 2 bytes */
  1908. if (*s < LU2S || *s > LU2E) { /* out of range */
  1909. memmove(d, s, *ssz);
  1910. break;
  1911. }
  1912. for (ultp = Lower2UpperTbl2[*s - LU2S];
  1913. ultp && ultp->lower && memcmp(s, ultp->lower, *ssz);
  1914. ultp++)
  1915. ;
  1916. if (!ultp) { /* out of range */
  1917. memmove(d, s, *ssz);
  1918. } else if (ultp->lower) { /* matched */
  1919. memmove(d, ultp->upper, ultp->tsz);
  1920. *dsz = ultp->tsz;
  1921. } else {
  1922. memmove(d, s, *ssz);
  1923. }
  1924. break;
  1925. case 3: /* 3 bytes */
  1926. if (*s != LU3S && *s != LU3E) { /* out of range */
  1927. memmove(d, s, *ssz);
  1928. break;
  1929. }
  1930. for (ultp = Lower2UpperTbl3[*s - LU3S];
  1931. ultp && ultp->lower && memcmp(s, ultp->lower, *ssz);
  1932. ultp++)
  1933. ;
  1934. if (!ultp) { /* out of range */
  1935. memmove(d, s, *ssz);
  1936. } else if (ultp->lower) { /* matched */
  1937. memmove(d, ultp->upper, *ssz);
  1938. } else {
  1939. memmove(d, s, *ssz);
  1940. }
  1941. break;
  1942. }
  1943. return;
  1944. }
  1945. /*
  1946. * slapi_utf8isLower: tests for a character that is a lower-case letter in
  1947. * UTF-8
  1948. *
  1949. * input: a UTF-8 character (could be multi-byte)
  1950. * output: 1 if the character is a lower-case letter
  1951. * 0 if the character is not a lower-case letter
  1952. */
  1953. int
  1954. slapi_UTF8ISLOWER(char *s)
  1955. {
  1956. return slapi_utf8isLower((unsigned char *)s);
  1957. }
  1958. int
  1959. slapi_utf8isLower(unsigned char *s)
  1960. {
  1961. UpperLowerTbl_t *ultp;
  1962. unsigned char *next;
  1963. int sz;
  1964. if (s == NULL || *s == '\0') {
  1965. return 0;
  1966. }
  1967. if (!(*s & 0x80)) { /* ASCII */
  1968. return islower(*s);
  1969. }
  1970. next = (unsigned char *)ldap_utf8next((char *)s);
  1971. switch(sz = next - s) {
  1972. case 1: /* ASCII */
  1973. return islower(*s);
  1974. case 2:
  1975. if (*s < LU2S || *s > LU2E) { /* out of range */
  1976. return 0;
  1977. }
  1978. for (ultp = Lower2UpperTbl2[*s - LU2S];
  1979. ultp && ultp->lower && memcmp(s, ultp->lower, sz);
  1980. ultp++)
  1981. ;
  1982. if (!ultp) { /* out of range */
  1983. return 0;
  1984. } else if (ultp->lower) { /* matched */
  1985. return 1;
  1986. } else {
  1987. return 0;
  1988. }
  1989. case 3:
  1990. if (*s < LU3S || *s > LU3E) { /* out of range */
  1991. return 0;
  1992. }
  1993. for (ultp = Lower2UpperTbl3[*s - LU3S];
  1994. ultp && ultp->lower && memcmp(s, ultp->lower, sz);
  1995. ultp++)
  1996. ;
  1997. if (!ultp) { /* out of range */
  1998. return 0;
  1999. } else if (ultp->lower) { /* matched */
  2000. return 1;
  2001. } else {
  2002. return 0;
  2003. }
  2004. default:
  2005. return 0;
  2006. }
  2007. }
  2008. /*
  2009. * slapi_utf8casecmp: case-insensitive string compare for UTF-8 strings
  2010. *
  2011. * input: two UTF-8 strings (s0, s1) to be compared
  2012. * output: positive number, if s0 is after s1
  2013. * 0, if the two strings are identical ignoring the case
  2014. * negative number, if s1 is after s0
  2015. *
  2016. * Rules: If both UTF-8 strings are NULL or 0-length, 0 is returned.
  2017. * If one of the strings is NULL or 0-length, the NULL/0-length
  2018. * string is smaller.
  2019. * If one or both of the strings are not UTF-8, system provided
  2020. * strcasecmp is used.
  2021. * If one of the two strings contains no 8-bit characters,
  2022. * strcasecmp is used.
  2023. * The strings are compared after converted to lower-case UTF-8.
  2024. * Each character is compared from the beginning.
  2025. * Evaluation goes in this order:
  2026. * If the length of one character is shorter then the other,
  2027. * the difference of the two lengths is returned.
  2028. * If the length of the corresponsing characters is same,
  2029. * each byte in the characters is compared.
  2030. * If there's a difference between two bytes,
  2031. * the diff is returned.
  2032. * If one string is shorter then the other, the diff is returned.
  2033. *
  2034. * Notes: Don't use this function for collation
  2035. * 1) there's no notion of locale in this function.
  2036. * 2) it's UTF-8 code order, which is different from the locale
  2037. * based collation.
  2038. */
  2039. int
  2040. slapi_UTF8CASECMP(char *s0, char *s1)
  2041. {
  2042. return slapi_utf8casecmp((unsigned char *)s0, (unsigned char *)s1);
  2043. }
  2044. int
  2045. slapi_utf8casecmp(unsigned char *s0, unsigned char *s1)
  2046. {
  2047. unsigned char *d0, *d1; /* store lower-case strings */
  2048. unsigned char *p0, *p1; /* current UTF-8 char */
  2049. unsigned char *n0, *n1; /* next UTF-8 char */
  2050. unsigned char *t0, *t1; /* tail of the strings */
  2051. unsigned char *x0, *x1; /* current byte in a char */
  2052. int i0, i1; /* length of characters */
  2053. int l0, l1; /* length of leftover */
  2054. int rval;
  2055. int has8_s0;
  2056. int has8_s1;
  2057. d0 = d1 = NULL;
  2058. if (s0 == NULL || *s0 == '\0') {
  2059. if (s1 == NULL || *s1 == '\0') {
  2060. rval = 0;
  2061. } else {
  2062. rval = -1; /* regardless s1, s0 < s1 */
  2063. }
  2064. goto end;
  2065. } else if (s1 == NULL || *s1 == '\0') {
  2066. rval = 1; /* regardless s0, s0 > s1 */
  2067. goto end;
  2068. }
  2069. has8_s0 = slapi_has8thBit(s0);
  2070. has8_s1 = slapi_has8thBit(s1);
  2071. if (has8_s0 == has8_s1) { /* both has-8th-bit or both do not */
  2072. if (has8_s0 == 0) { /* neither has-8th-bit */
  2073. rval = strcasecmp((char *)s0, (char *)s1);
  2074. goto end;
  2075. }
  2076. } else { /* one has and the other do not */
  2077. rval = strcasecmp((char *)s0, (char *)s1);
  2078. goto end;
  2079. }
  2080. d0 = slapi_utf8StrToLower(s0);
  2081. d1 = slapi_utf8StrToLower(s1);
  2082. if (d0 == NULL || d1 == NULL || /* either is not a UTF-8 string */
  2083. (d0 && *d0 == '\0') || (d1 && *d1 == '\0')) {
  2084. rval = strcasecmp((char *)s0, (char *)s1);
  2085. goto end;
  2086. }
  2087. p0 = d0;
  2088. p1 = d1;
  2089. t0 = d0 + strlen((char *)d0);
  2090. t1 = d1 + strlen((char *)d1);
  2091. rval = 0;
  2092. while (1) {
  2093. n0 = (unsigned char *)ldap_utf8next((char *)p0);
  2094. n1 = (unsigned char *)ldap_utf8next((char *)p1);
  2095. if (n0 > t0 || n1 > t1) {
  2096. break;
  2097. }
  2098. i0 = n0 - p0;
  2099. i1 = n1 - p1;
  2100. rval = i0 - i1;
  2101. if (rval) { /* length is different */
  2102. goto end;
  2103. }
  2104. /* i0 == i1: same length */
  2105. for (x0 = p0, x1 = p1; x0 < n0; x0++, x1++) {
  2106. rval = *x0 - *x1;
  2107. if (rval) {
  2108. goto end;
  2109. }
  2110. }
  2111. p0 = n0; p1 = n1; /* goto next */
  2112. }
  2113. /* finished scanning the shared part and check the leftover */
  2114. l0 = t0 - n0;
  2115. l1 = t1 - n1;
  2116. rval = l0 - l1;
  2117. end:
  2118. if (d0)
  2119. slapi_ch_free((void **)&d0);
  2120. if (d1)
  2121. slapi_ch_free((void **)&d1);
  2122. return rval;
  2123. }
  2124. /*
  2125. * slapi_utf8ncasecmp: case-insensitive string compare (n chars) for UTF-8
  2126. * strings
  2127. *
  2128. * input: two UTF-8 strings (s0, s1) to be compared
  2129. * number or characters
  2130. * output: positive number, if s0 is after s1
  2131. * 0, if the two strings are identical ignoring the case
  2132. * negative number, if s1 is after s0
  2133. *
  2134. * Rules: Same as slapi_utf8casecmp except the n characters limit.
  2135. *
  2136. * Notes: Don't use this function for collation
  2137. * 1) there's no notion of locale in this function.
  2138. * 2) it's UTF-8 code order, which is different from the locale
  2139. * based collation.
  2140. * n characters, NOT n bytes
  2141. */
  2142. int
  2143. slapi_UTF8NCASECMP(char *s0, char *s1, int n)
  2144. {
  2145. return slapi_utf8ncasecmp((unsigned char *)s0, (unsigned char *)s1, n);
  2146. }
  2147. int
  2148. slapi_utf8ncasecmp(unsigned char *s0, unsigned char *s1, int n)
  2149. {
  2150. unsigned char *d0, *d1; /* store lower-case strings */
  2151. unsigned char *p0, *p1; /* current UTF-8 char */
  2152. unsigned char *n0, *n1; /* next UTF-8 char */
  2153. unsigned char *t0, *t1; /* tail of the strings */
  2154. unsigned char *x0, *x1; /* current byte in a char */
  2155. int i0, i1; /* length of characters */
  2156. int l0, l1; /* length of leftover */
  2157. int cnt;
  2158. int rval;
  2159. int has8_s0;
  2160. int has8_s1;
  2161. d0 = d1 = NULL;
  2162. if (s0 == NULL || *s0 == '\0') {
  2163. if (s1 == NULL || *s1 == '\0') {
  2164. rval = 0;
  2165. } else {
  2166. rval = -1; /* regardless s1, s0 < s1 */
  2167. }
  2168. goto end;
  2169. } else if (s1 == NULL || *s1 == '\0') {
  2170. rval = 1; /* regardless s0, s0 > s1 */
  2171. goto end;
  2172. }
  2173. has8_s0 = slapi_has8thBit(s0);
  2174. has8_s1 = slapi_has8thBit(s1);
  2175. if (has8_s0 == has8_s1) { /* both has-8th-bit or both do not */
  2176. if (has8_s0 == 0) { /* neither has-8th-bit */
  2177. rval = strncasecmp((char *)s0, (char *)s1, n);
  2178. goto end;
  2179. }
  2180. } else { /* one has and the other do not */
  2181. rval = strncasecmp((char *)s0, (char *)s1, n);
  2182. goto end;
  2183. }
  2184. d0 = slapi_utf8StrToLower(s0);
  2185. d1 = slapi_utf8StrToLower(s1);
  2186. if (d0 == NULL || d1 == NULL || /* either is not a UTF-8 string */
  2187. (d0 && *d0 == '\0') || (d1 && *d1 == '\0')) {
  2188. rval = strncasecmp((char *)s0, (char *)s1, n);
  2189. goto end;
  2190. }
  2191. p0 = d0;
  2192. p1 = d1;
  2193. t0 = d0 + strlen((char *)d0);
  2194. t1 = d1 + strlen((char *)d1);
  2195. rval = 0;
  2196. cnt = 0;
  2197. while (1) {
  2198. n0 = (unsigned char *)ldap_utf8next((char *)p0);
  2199. n1 = (unsigned char *)ldap_utf8next((char *)p1);
  2200. if (n0 > t0 || n1 > t1 || cnt == n) {
  2201. break;
  2202. }
  2203. i0 = n0 - p0;
  2204. i1 = n1 - p1;
  2205. rval = i0 - i1;
  2206. if (rval) /* length is different */
  2207. goto end;
  2208. /* i0 == i1: same length */
  2209. for (x0 = p0, x1 = p1; x0 < n0; x0++, x1++) {
  2210. rval = *x0 - *x1;
  2211. if (rval)
  2212. goto end;
  2213. }
  2214. p0 = n0; p1 = n1; /* goto next */
  2215. cnt++;
  2216. }
  2217. if (cnt == n)
  2218. rval = 0;
  2219. else {
  2220. /* finished scanning the shared part and check the leftover */
  2221. l0 = t0 - n0;
  2222. l1 = t1 - n1;
  2223. rval = l0 - l1;
  2224. }
  2225. end:
  2226. if (d0)
  2227. slapi_ch_free((void **)&d0);
  2228. if (d1)
  2229. slapi_ch_free((void **)&d1);
  2230. return rval;
  2231. }