| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427 |
- /** --- BEGIN COPYRIGHT BLOCK ---
- * This Program is free software; you can redistribute it and/or modify it under
- * the terms of the GNU General Public License as published by the Free Software
- * Foundation; version 2 of the License.
- *
- * This Program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
- * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this Program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place, Suite 330, Boston, MA 02111-1307 USA.
- *
- * In addition, as a special exception, Red Hat, Inc. gives You the additional
- * right to link the code of this Program with code not covered under the GNU
- * General Public License ("Non-GPL Code") and to distribute linked combinations
- * including the two, subject to the limitations in this paragraph. Non-GPL Code
- * permitted under this exception must only link to the code of this Program
- * through those well defined interfaces identified in the file named EXCEPTION
- * found in the source code files (the "Approved Interfaces"). The files of
- * Non-GPL Code may instantiate templates or use macros or inline functions from
- * the Approved Interfaces without causing the resulting work to be covered by
- * the GNU General Public License. Only Red Hat, Inc. may make changes or
- * additions to the list of Approved Interfaces. You must obey the GNU General
- * Public License in all respects for all of the Program code and other code used
- * in conjunction with the Program except the Non-GPL Code covered by this
- * exception. If you modify this file, you may extend this exception to your
- * version of the file, but you are not obligated to do so. If you do not wish to
- * provide this exception without modification, you must delete this exception
- * statement from your version and license this file solely under the GPL without
- * exception.
- *
- *
- * Copyright (C) 2001 Sun Microsystems, Inc. Used by permission.
- * Copyright (C) 2005 Red Hat, Inc.
- * All rights reserved.
- --- END COPYRIGHT BLOCK --- */
- /*
- * collate.c -- routines to collate character strings
- */
- #include <stdio.h>
- #include "dsgw.h"
- #include <ldap.h> /* ldap_utf8* */
- #include <unicode/ucol.h> /* Collation */
- #include <unicode/ucnv.h> /* Conversion */
- #include <unicode/ustring.h> /* UTF8 conversion */
- #ifdef _WINDOWS
- #undef strcasecmp
- #define strcasecmp _strcmpi
- #endif
- /*
- Convert the given string s, encoded in UTF8, into a Unicode (UTF16 or 32, depending on sizeof(UChar))
- string for use with collation and key generation
- The given string U will be filled in if it's capacity (given by Ulen) is big enough,
- otherwise, it will be malloced (or realloced if already allocated)
- */
- static UErrorCode
- SetUnicodeStringFromUTF_8 (UChar** U, int32_t* Ulen, int *isAlloced, const char *s)
- /* Copy the UTF-8 string bv into the UnicodeString U,
- but remove leading and trailing whitespace, and
- convert consecutive whitespaces into a single space.
- Ulen is set to the number of UChars in the array (not necessarily the number of bytes!)
- */
- {
- int32_t len = 0; /* length of non-space string */
- int32_t needLen = 0; /* number of bytes needed for string */
- UErrorCode err = U_ZERO_ERROR;
- const char* begin; /* will point to beginning of non-space in s */
- /* first, set s to the first non-space char in bv->bv_val */
- while (s && *s && ldap_utf8isspace((char *)s)) { /* cast away const */
- const char *next = LDAP_UTF8NEXT((char *)s); /* cast away const */
- s = next;
- }
- begin = s;
- if (!s || !*s) {
- return U_INVALID_FORMAT_ERROR; /* don't know what else to use here */
- }
- /* next, find the length of the non-space string */
- while (s && *s && !ldap_utf8isspace((char *)s)) { /* cast away const */
- const char *next = LDAP_UTF8NEXT((char *)s); /* cast away const */
- len += (next - s); /* count bytes, not chars */
- needLen++; /* needLen counts chars */
- s = next;
- }
- if (needLen == 0) { /* bogus */
- return U_INVALID_FORMAT_ERROR; /* don't know what else to use here */
- }
- needLen++; /* +1 for trailing UChar space */
- if (needLen > *Ulen) { /* need more space */
- if (*isAlloced) { /* realloc space */
- *U = (UChar *)dsgw_ch_realloc((char *)*U, sizeof(UChar) * needLen);
- } else { /* must use malloc */
- *U = (UChar *)dsgw_ch_malloc(sizeof(UChar) * needLen);
- *isAlloced = 1; /* no longer using fixed buffer */
- }
- *Ulen = needLen;
- }
- u_strFromUTF8(*U, sizeof(UChar) * (*Ulen), NULL, begin, len, &err);
- return err;
- }
- static UCollator*
- get_collator (int flavor)
- {
- static UCollator* collator[2] = {NULL, NULL};
- /* dsgw_emitf("get_collator (%i)<br>\n", flavor); */
- if (collator[flavor] == NULL &&
- gc->gc_ClientLanguage && gc->gc_ClientLanguage[0]) {
- /* Try to create a Collation for the client's preferred language */
- ACCEPT_LANGUAGE_LIST langlist;
- size_t langs;
- /* dsgw_emitf ("ClientLanguage = \"%s\"<br>\n", gc->gc_ClientLanguage); */
- langs = AcceptLangList (gc->gc_ClientLanguage, langlist);
- if (langs <= 0) {
- dsgw_emitf ("AcceptLangList (%s) = %lu<br>\n",
- gc->gc_ClientLanguage, (unsigned long)langs);
- } else {
- UCollator* fallback_collator = NULL;
- UCollator* default_collator = NULL;
- UErrorCode err = U_ZERO_ERROR;
- size_t i;
- for (i = 0; i < langs; ++i) {
- /* Try to create a Collation for langs[i] */
- char* lang = langlist[i];
- collator[flavor] = ucol_open(lang, &err);
- if (err == U_ZERO_ERROR && collator[flavor]) {
- dsgw_emitf("<!-- New Collator (%s) == SUCCESS -->\n", lang);
- break;
- } else {
- if (err == U_USING_FALLBACK_WARNING) {
- if (fallback_collator == NULL) {
- fallback_collator = collator[flavor];
- dsgw_emitf("<!-- New Collator (%s) == USING_FALLBACK_LOCALE -->\n", lang);
- } else {
- ucol_close (collator[flavor]);
- }
- } else if (err == U_USING_DEFAULT_WARNING) {
- if (default_collator == NULL) {
- default_collator = collator[flavor];
- dsgw_emitf("<!-- New Collator (%s) == USING_DEFAULT_LOCALE -->\n", lang);
- } else {
- ucol_close (collator[flavor]);
- }
- } else {
- dsgw_emitf("New Collator error (%s) == %i<br>\n", lang, err);
- }
- collator[flavor] = NULL;
- }
- }
- if (collator[flavor] == NULL) {
- if (fallback_collator != NULL) {
- collator[flavor] = fallback_collator;
- fallback_collator = NULL;
- } else if (default_collator != NULL) {
- collator[flavor] = default_collator;
- default_collator = NULL;
- }
- }
- if (collator[flavor] != NULL) {
- switch (flavor) {
- case CASE_EXACT:
- dsgw_emits("<!-- CollationSetStrength (TERTIARY) -->\n");
- ucol_setAttribute (collator[flavor], UCOL_STRENGTH, UCOL_TERTIARY, &err);
- break;
- default: /* CASE_IGNORE */
- if (dsgw_scriptorder()->so_caseIgnoreAccents) {
- dsgw_emits("<!-- CollationSetStrength (PRIMARY) -->\n");
- ucol_setAttribute (collator[flavor], UCOL_STRENGTH, UCOL_PRIMARY, &err);
- } else {
- dsgw_emits("<!-- CollationSetStrength (SECONDARY) -->\n");
- ucol_setAttribute (collator[flavor], UCOL_STRENGTH, UCOL_SECONDARY, &err);
- }
- break;
- }
- }
- if (default_collator != NULL) {
- ucol_close (default_collator);
- default_collator = NULL;
- }
- if (fallback_collator != NULL) {
- ucol_close (fallback_collator);
- fallback_collator = NULL;
- }
- }
- }
- return collator[flavor];
- }
- static int
- valcmp (const char** L, const char** R)
- {
- return strcmp (*L, *R);
- }
- static int
- valcasecmp (const char** L, const char** R)
- {
- return strcasecmp (*L, *R);
- }
- static int
- strXcollate (int flavor, const char* L, const char* R)
- {
- UCollator* collator = get_collator (flavor);
- if (collator != NULL) {
- UChar LuBuffer[128];
- UChar* Lu = LuBuffer;
- int32_t LuLen = u_strlen(LuBuffer);
- int LuisAlloced = 0;
- if (SetUnicodeStringFromUTF_8 (&Lu, &LuLen, &LuisAlloced, L) == U_ZERO_ERROR) {
- UChar RuBuffer[128];
- UChar* Ru = RuBuffer;
- int32_t RuLen = u_strlen(RuBuffer);
- int RuisAlloced = 0;
- if (SetUnicodeStringFromUTF_8 (&Ru, &RuLen, &RuisAlloced, R) == U_ZERO_ERROR) {
- UCollationResult colres = ucol_strcoll(collator, Lu, LuLen, Ru, RuLen);
- int result = 0;
- switch (colres) {
- case UCOL_LESS:
- result = -1;
- break;
- case UCOL_GREATER:
- result = 1;
- break;
- default:
- break;
- }
- #ifdef DSGW_DEBUG
- {
- auto char* Le = dsgw_strdup_escaped (L);
- auto char* Re = dsgw_strdup_escaped (R);
- dsgw_log ("strXcollate:%s %s %s\n",
- Le, result < 0 ? "<" : (result == 0 ? "=" : ">"), Re);
- free (Le);
- free (Re);
- }
- #endif
- if (RuisAlloced) {
- free(Ru);
- Ru = NULL;
- }
- if (LuisAlloced) {
- free(Lu);
- Lu = NULL;
- }
- return result;
- }
- if (LuisAlloced) {
- free(Lu);
- Lu = NULL;
- }
- }
- }
- return flavor ? strcasecmp (L, R) : strcmp (L, R);
- }
- static int
- strcollate (const char* L, const char* R)
- {
- return strXcollate (CASE_EXACT, L, R);
- }
- static int
- strcasecollate (const char* L, const char* R)
- {
- return strXcollate (CASE_INSENSITIVE, L, R);
- }
- static int
- valcollate (const char** L, const char** R)
- {
- return strXcollate (CASE_EXACT, *L, *R);
- }
- static int
- valcasecollate (const char** L, const char** R)
- {
- return strXcollate (CASE_INSENSITIVE, *L, *R);
- }
- strcmp_t
- dsgw_strcmp (int flavor)
- {
- if (get_collator (flavor) != NULL) {
- return flavor ? strcasecollate : strcollate;
- }
- return flavor ? strcasecmp : strcmp;
- }
- valcmp_t
- dsgw_valcmp (int flavor)
- {
- if (get_collator (flavor) != NULL) {
- return flavor ? valcasecollate : valcollate;
- }
- return flavor ? valcasecmp : valcmp;
- }
- static size_t
- dsgw_scriptof (const char* s, scriptrange_t** ranges)
- {
- auto size_t result = 0;
- if (s && ranges) {
- auto unsigned long u;
- while ((u = LDAP_UTF8GETCC (s)) != 0) {
- auto size_t ss;
- auto scriptrange_t* sr;
- for (ss = 0; (sr = ranges[ss]) != NULL; ++ss) {
- do {
- if (sr->sr_min <= u && u <= sr->sr_max) {
- break;
- }
- } while ((sr = sr->sr_next) != NULL);
- if (sr) {
- if (result < ss) result = ss;
- break;
- }
- }
- if (!sr) {
- result = ss;
- break;
- }
- }
- }
- #ifdef DSGW_DEBUG
- dsgw_log ("script %lu\n", (unsigned long)result);
- #endif
- return result;
- }
- static struct berval key_first = {0, 0};
- static struct berval key_last = {0, 0};
- struct berval* dsgw_key_first = &key_first;
- struct berval* dsgw_key_last = &key_last;
- void LDAP_C LDAP_CALLBACK
- dsgw_keyfree( void *arg, const struct berval* key )
- {
- if (key->bv_val) free (key->bv_val);
- else if (key == dsgw_key_first || key == dsgw_key_last) return;
- free ((void*)key);
- }
- int LDAP_C LDAP_CALLBACK
- dsgw_keycmp( void *arg, const struct berval *L, const struct berval *R )
- {
- int result = 0;
- if (L == R) {
- } else if (L->bv_val == NULL) { /* L is either first or last */
- result = (L == dsgw_key_last) ? 1 : -1;
- } else if (R->bv_val == NULL) { /* R is either first or last */
- result = (R == dsgw_key_last) ? -1 : 1;
- } else
- /* copied from slapi_berval_cmp(), in ../../servers/slapd/plugin.c: */
- if (L->bv_len < R->bv_len) {
- result = memcmp (L->bv_val, R->bv_val, L->bv_len);
- if (result == 0)
- result = -1;
- } else {
- result = memcmp (L->bv_val, R->bv_val, R->bv_len);
- if (result == 0 && (L->bv_len > R->bv_len))
- result = 1;
- }
- return result;
- }
- struct berval*
- dsgw_strkeygen (int flavor, const char* s)
- {
- auto struct berval* v = (struct berval*)dsgw_ch_malloc (sizeof (struct berval));
- auto UCollator* collator = get_collator (flavor);
- v->bv_val = NULL;
- if (collator != NULL) {
- UChar uBuffer[128];
- UChar* u = uBuffer;
- int32_t uLen = u_strlen(uBuffer);
- int uisAlloced = 0;
- if (SetUnicodeStringFromUTF_8 (&u, &uLen, &uisAlloced, s) == U_ZERO_ERROR) {
- char keyBuffer[128]; /* try to use static space buffer to avoid malloc */
- int32_t keyLen = sizeof(keyBuffer);
- char* key = keyBuffer; /* but key can grow if necessary */
- int32_t realLen = ucol_getSortKey(collator, u, uLen, (uint8_t *)key, keyLen);
- if (realLen > keyLen) { /* need more space */
- key = (char*)dsgw_ch_malloc(sizeof(char) * realLen);
- keyLen = ucol_getSortKey(collator, u, uLen, (uint8_t *)key, realLen);
- }
- v->bv_len = realLen + 2;
- v->bv_val = dsgw_ch_malloc (v->bv_len);
- memcpy(v->bv_val+1, key, realLen);
- if (uisAlloced) {
- free(u);
- u = NULL;
- }
- if (key != keyBuffer) {
- free(key);
- key = NULL;
- }
- }
- }
- if (v->bv_val == NULL) {
- v->bv_len = (s ? strlen (s) : 0) + 2;
- v->bv_val = dsgw_ch_malloc (v->bv_len);
- if (v->bv_len > 2) memcpy (v->bv_val+1, s, v->bv_len-2);
- if (flavor) {
- register char* t;
- for (t = v->bv_val+1; *t; ++t) {
- if (isascii (*t)) *t = tolower (*t);
- }
- }
- }
- v->bv_val[0] = (char) dsgw_scriptof (s, dsgw_scriptorder()->so_sort);
- v->bv_val[v->bv_len-1] = '\0';
- return v;
- }
|