filtercmp.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434
  1. /** BEGIN COPYRIGHT BLOCK
  2. * This Program is free software; you can redistribute it and/or modify it under
  3. * the terms of the GNU General Public License as published by the Free Software
  4. * Foundation; version 2 of the License.
  5. *
  6. * This Program is distributed in the hope that it will be useful, but WITHOUT
  7. * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  8. * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
  9. *
  10. * You should have received a copy of the GNU General Public License along with
  11. * this Program; if not, write to the Free Software Foundation, Inc., 59 Temple
  12. * Place, Suite 330, Boston, MA 02111-1307 USA.
  13. *
  14. * In addition, as a special exception, Red Hat, Inc. gives You the additional
  15. * right to link the code of this Program with code not covered under the GNU
  16. * General Public License ("Non-GPL Code") and to distribute linked combinations
  17. * including the two, subject to the limitations in this paragraph. Non-GPL Code
  18. * permitted under this exception must only link to the code of this Program
  19. * through those well defined interfaces identified in the file named EXCEPTION
  20. * found in the source code files (the "Approved Interfaces"). The files of
  21. * Non-GPL Code may instantiate templates or use macros or inline functions from
  22. * the Approved Interfaces without causing the resulting work to be covered by
  23. * the GNU General Public License. Only Red Hat, Inc. may make changes or
  24. * additions to the list of Approved Interfaces. You must obey the GNU General
  25. * Public License in all respects for all of the Program code and other code used
  26. * in conjunction with the Program except the Non-GPL Code covered by this
  27. * exception. If you modify this file, you may extend this exception to your
  28. * version of the file, but you are not obligated to do so. If you do not wish to
  29. * provide this exception without modification, you must delete this exception
  30. * statement from your version and license this file solely under the GPL without
  31. * exception.
  32. *
  33. *
  34. * Copyright (C) 2001 Sun Microsystems, Inc. Used by permission.
  35. * Copyright (C) 2005 Red Hat, Inc.
  36. * All rights reserved.
  37. * END COPYRIGHT BLOCK **/
  38. /* filtercmp.c - routines for comparing filters */
  39. #include <stdio.h>
  40. #include <string.h>
  41. #include <sys/types.h>
  42. #include "slap.h"
  43. /* very simple hash function */
  44. static PRUint32 addhash(PRUint32 hash, unsigned char *data, int size)
  45. {
  46. int i;
  47. if (!data || !size)
  48. return hash;
  49. for (i = 0; i < size; i++)
  50. hash = (hash << 5) + (hash >> 27) + data[i];
  51. return hash;
  52. }
  53. #define addhash_long(h, l) addhash((h), (unsigned char *)&(l), sizeof(long))
  54. #define addhash_str(h, str) addhash((h), (unsigned char *)(str), strlen(str))
  55. #define addhash_bv(h, bv) addhash((h), (unsigned char *)(bv).bv_val, \
  56. (bv).bv_len)
  57. static PRUint32 addhash_casestr(PRUint32 hash, char *data)
  58. {
  59. unsigned char *normstr;
  60. normstr = slapi_utf8StrToLower((unsigned char *)data);
  61. hash = addhash(hash, normstr, strlen((char *)normstr));
  62. if ((char *)normstr != data)
  63. slapi_ch_free((void **)&normstr);
  64. return hash;
  65. }
  66. static PRUint32 stir(PRUint32 hash, PRUint32 x)
  67. {
  68. hash = (hash << 5) + (hash >> 27);
  69. hash = hash ^ (x << 16);
  70. hash = hash ^ (x >> 16);
  71. return hash;
  72. }
  73. #define STIR(h) (h) = stir((h), 0x2EC6DEAD);
  74. static Slapi_Value **get_normalized_value(struct ava *ava)
  75. {
  76. void *plugin;
  77. Slapi_Value *svlist[2], **keylist, sv;
  78. slapi_attr_type2plugin(ava->ava_type, &plugin);
  79. sv.bv = ava->ava_value;
  80. sv.v_csnset = NULL;
  81. svlist[0] = &sv;
  82. svlist[1] = NULL;
  83. if ((slapi_call_syntax_values2keys_sv(plugin, svlist, &keylist,
  84. LDAP_FILTER_EQUALITY) != 0) ||
  85. !keylist || !keylist[0])
  86. return NULL;
  87. return keylist;
  88. }
  89. /* this is not pretty. matching rules seem to be pretty elaborate to use,
  90. * so comparing these kind of filters may be undesirably slow just because
  91. * of the overhead of normalizing the values. most of this code is stolen
  92. * from the backend vlv code (matchrule.c)
  93. */
  94. static Slapi_PBlock *get_mr_normval(char *oid, char *type,
  95. struct berval **inval,
  96. struct berval ***outval)
  97. {
  98. Slapi_PBlock *pb = slapi_pblock_new();
  99. unsigned int sort_indicator = SLAPI_PLUGIN_MR_USAGE_SORT;
  100. IFP mrIndex = NULL;
  101. if (!pb)
  102. return NULL;
  103. slapi_pblock_set(pb, SLAPI_PLUGIN_MR_OID, oid);
  104. slapi_pblock_set(pb, SLAPI_PLUGIN_MR_TYPE, type);
  105. slapi_pblock_set(pb, SLAPI_PLUGIN_MR_USAGE, (void *)&sort_indicator);
  106. if (slapi_mr_indexer_create(pb) != 0) {
  107. slapi_pblock_destroy(pb);
  108. return NULL;
  109. }
  110. if ((slapi_pblock_get(pb, SLAPI_PLUGIN_MR_INDEX_FN, &mrIndex) != 0) ||
  111. !mrIndex) {
  112. /* shouldn't ever happen */
  113. slapi_pblock_destroy(pb);
  114. return NULL;
  115. }
  116. /* now, call the indexer */
  117. slapi_pblock_set(pb, SLAPI_PLUGIN_MR_VALUES, inval);
  118. (*mrIndex)(pb);
  119. slapi_pblock_get(pb, SLAPI_PLUGIN_MR_KEYS, outval);
  120. return pb;
  121. }
  122. /* the opposite of above: shut down the matching rule pblock and free
  123. * the memory.
  124. */
  125. static void done_mr_normval(Slapi_PBlock *pb)
  126. {
  127. IFP mrDestroy = NULL;
  128. if (slapi_pblock_get(pb, SLAPI_PLUGIN_DESTROY_FN, &mrDestroy) == 0) {
  129. if (mrDestroy)
  130. (*mrDestroy)(pb);
  131. }
  132. slapi_pblock_destroy(pb);
  133. }
  134. static int hash_filters = 0;
  135. void set_hash_filters(int i) { hash_filters = i; }
  136. /* calculate the hash value of a node in a filter (assumes that any sub-nodes
  137. * of the filter have already had their hash value calculated).
  138. * -- the annoying part of this is normalizing any values in the filter.
  139. */
  140. void filter_compute_hash(struct slapi_filter *f)
  141. {
  142. PRUint32 h;
  143. char **a;
  144. struct slapi_filter *fx;
  145. Slapi_Value **keylist;
  146. Slapi_PBlock *pb;
  147. struct berval *inval[2], **outval;
  148. if (! hash_filters)
  149. return;
  150. h = addhash_long(0, f->f_choice);
  151. switch (f->f_choice) {
  152. case LDAP_FILTER_EQUALITY:
  153. case LDAP_FILTER_GE:
  154. case LDAP_FILTER_LE:
  155. case LDAP_FILTER_APPROX:
  156. keylist = get_normalized_value(&f->f_ava);
  157. if (keylist) {
  158. h = addhash_str(h, f->f_avtype);
  159. STIR(h);
  160. h = addhash_bv(h, *(slapi_value_get_berval(keylist[0])));
  161. valuearray_free(&keylist);
  162. }
  163. break;
  164. case LDAP_FILTER_SUBSTRINGS:
  165. h = addhash_str(h, f->f_sub_type);
  166. STIR(h);
  167. if (f->f_sub_initial)
  168. h = addhash_casestr(h, f->f_sub_initial);
  169. if (f->f_sub_any) {
  170. for (a = f->f_sub_any; *a; a++) {
  171. STIR(h);
  172. h = addhash_casestr(h, *a);
  173. }
  174. }
  175. STIR(h);
  176. if (f->f_sub_final)
  177. h = addhash_casestr(h, f->f_sub_final);
  178. break;
  179. case LDAP_FILTER_PRESENT:
  180. h = addhash_str(h, f->f_type);
  181. break;
  182. case LDAP_FILTER_AND:
  183. case LDAP_FILTER_OR:
  184. case LDAP_FILTER_NOT:
  185. /* should be able to just mix in the hashes from lower levels */
  186. for (fx = f->f_list; fx; fx = fx->f_next)
  187. h = h ^ fx->f_hash;
  188. break;
  189. case LDAP_FILTER_EXTENDED:
  190. if (f->f_mr_oid)
  191. h = addhash_str(h, f->f_mr_oid);
  192. STIR(h);
  193. if (f->f_mr_type)
  194. h = addhash_str(h, f->f_mr_type);
  195. inval[0] = &f->f_mr_value;
  196. inval[1] = NULL;
  197. /* get the normalized value (according to the matching rule) */
  198. pb = get_mr_normval(f->f_mr_oid, f->f_mr_type, inval, &outval);
  199. if (pb && outval && outval[0]) {
  200. STIR(h);
  201. h = addhash_bv(h, *(outval[0]));
  202. }
  203. done_mr_normval(pb);
  204. if (f->f_mr_dnAttrs)
  205. STIR(h);
  206. break;
  207. default:
  208. LDAPDebug(LDAP_DEBUG_ANY, "$$$ can't handle filter type %d !\n",
  209. f->f_choice, 0, 0);
  210. }
  211. f->f_hash = h;
  212. }
  213. /* match compare: given two arrays of size N, determine if each item in
  214. * the first array matches with each item in the second array, with a
  215. * one-to-one correspondence. this will be DOG SLOW for large values of N
  216. * (it scales as N^2) but we generally expect N < 5.
  217. */
  218. static int filter_compare_substrings(struct slapi_filter *f1,
  219. struct slapi_filter *f2)
  220. {
  221. int buf[20], *tally;
  222. char **a1, **a2;
  223. int count1 = 0, count2 = 0, ret, i, j, ok;
  224. /* ok to pass NULL to utf8casecmp */
  225. if ((slapi_UTF8CASECMP(f1->f_sub_initial, f2->f_sub_initial) != 0) ||
  226. (slapi_UTF8CASECMP(f1->f_sub_final, f2->f_sub_final) != 0))
  227. return 1;
  228. /* match compare (would be expensive for large numbers of 'any'
  229. * substrings, which we don't expect to see)
  230. */
  231. for (a1 = f1->f_sub_any; a1 && *a1; a1++, count1++);
  232. for (a2 = f2->f_sub_any; a2 && *a2; a2++, count2++);
  233. if (count1 != count2)
  234. return 1;
  235. ret = 1; /* assume failure until done comparing */
  236. if (count1 > 20)
  237. tally = (int *)malloc(count1);
  238. else
  239. tally = buf;
  240. if (!tally)
  241. goto done; /* this is bad; out of memory */
  242. for (i = 0; i < count1; i++)
  243. tally[i] = 0;
  244. /* ok. the theory is we tally up all the matched pairs we find,
  245. * stopping if we can't find a match that hasn't already been paired.
  246. */
  247. a1 = f1->f_sub_any;
  248. for (i = 0; i < count1; i++, a1++) {
  249. a2 = f2->f_sub_any;
  250. ok = 0;
  251. for (j = 0; j < count1; j++, a2++) {
  252. if (!tally[j] && (slapi_UTF8CASECMP(*a1, *a2) == 0)) {
  253. tally[j] = ok = 1;
  254. break;
  255. }
  256. }
  257. if (!ok)
  258. goto done; /* didn't find a match for that one */
  259. }
  260. /* done! matched */
  261. ret = 0;
  262. done:
  263. if ((count1 > 20) && tally)
  264. free(tally);
  265. return ret;
  266. }
  267. /* same as above, but this time for lists of filter nodes */
  268. static int filter_compare_lists(struct slapi_filter *f1,
  269. struct slapi_filter *f2)
  270. {
  271. int buf[20], *tally;
  272. struct slapi_filter *fx1, *fx2;
  273. int count1 = 0, count2 = 0, ret, i, j, ok;
  274. for (fx1 = f1->f_list; fx1; fx1 = fx1->f_next, count1++);
  275. for (fx2 = f2->f_list; fx2; fx2 = fx2->f_next, count2++);
  276. if (count1 != count2)
  277. return 1;
  278. ret = 1;
  279. if (count1 > 20)
  280. tally = (int *)malloc(count1);
  281. else
  282. tally = buf;
  283. if (!tally)
  284. goto done; /* very bad */
  285. for (i = 0; i < count1; i++)
  286. tally[i] = 0;
  287. /* brute-force match compare now */
  288. fx1 = f1->f_list;
  289. for (i = 0; i < count1; i++, fx1 = fx1->f_next) {
  290. fx2 = f2->f_list;
  291. ok = 0;
  292. for (j = 0; j < count1; j++, fx2 = fx2->f_next) {
  293. if (!tally[j] && (slapi_filter_compare(fx1, fx2) == 0)) {
  294. tally[j] = ok = 1;
  295. break;
  296. }
  297. }
  298. if (!ok)
  299. goto done; /* no match */
  300. }
  301. /* done! all matched */
  302. ret = 0;
  303. done:
  304. if ((count1 > 20) && tally)
  305. free(tally);
  306. return ret;
  307. }
  308. /* returns 0 if two filters are "identical"
  309. * (items under AND/OR are allowed to be in different order)
  310. */
  311. int slapi_filter_compare(struct slapi_filter *f1, struct slapi_filter *f2)
  312. {
  313. Slapi_Value **key1, **key2;
  314. Slapi_PBlock *pb1, *pb2;
  315. struct berval *inval1[2], *inval2[2], **outval1, **outval2;
  316. int ret;
  317. LDAPDebug(LDAP_DEBUG_TRACE, "=> filter compare\n", 0, 0, 0);
  318. /* allow for the possibility that one of the filters hasn't had a hash
  319. * computed (and is therefore 0). this means that a filter node whose
  320. * hash is computed as 0 will always get compared the expensive way,
  321. * but this should happen VERY rarely (if ever).
  322. */
  323. if ((f1->f_hash != f2->f_hash) && (f1->f_hash) && (f2->f_hash)) {
  324. ret = 1;
  325. goto done;
  326. }
  327. /* brute-force comparison now */
  328. if (f1->f_choice != f2->f_choice) {
  329. ret = 1;
  330. goto done;
  331. }
  332. switch (f1->f_choice) {
  333. case LDAP_FILTER_EQUALITY:
  334. case LDAP_FILTER_GE:
  335. case LDAP_FILTER_LE:
  336. case LDAP_FILTER_APPROX:
  337. if (slapi_UTF8CASECMP(f1->f_avtype, f2->f_avtype) != 0) {
  338. ret = 1;
  339. break;
  340. }
  341. key1 = get_normalized_value(&f1->f_ava);
  342. if (key1) {
  343. key2 = get_normalized_value(&f2->f_ava);
  344. if (key2) {
  345. ret = memcmp(slapi_value_get_string(key1[0]),
  346. slapi_value_get_string(key2[0]),
  347. slapi_value_get_length(key1[0]));
  348. valuearray_free(&key1);
  349. valuearray_free(&key2);
  350. break;
  351. }
  352. valuearray_free(&key1);
  353. }
  354. ret = 1;
  355. break;
  356. case LDAP_FILTER_PRESENT:
  357. ret = (slapi_UTF8CASECMP(f1->f_type, f2->f_type));
  358. break;
  359. case LDAP_FILTER_SUBSTRINGS:
  360. ret = filter_compare_substrings(f1, f2);
  361. break;
  362. case LDAP_FILTER_AND:
  363. case LDAP_FILTER_OR:
  364. case LDAP_FILTER_NOT:
  365. ret = filter_compare_lists(f1, f2);
  366. break;
  367. case LDAP_FILTER_EXTENDED:
  368. if ((slapi_UTF8CASECMP(f1->f_mr_oid, f2->f_mr_oid) != 0) ||
  369. (slapi_UTF8CASECMP(f1->f_mr_type, f2->f_mr_type) != 0) ||
  370. (f1->f_mr_dnAttrs != f2->f_mr_dnAttrs)) {
  371. ret = 1;
  372. break;
  373. }
  374. /* painstakingly compare the values (using the matching rule) */
  375. inval1[0] = &f1->f_mr_value;
  376. inval2[0] = &f2->f_mr_value;
  377. inval1[1] = inval2[1] = NULL;
  378. pb1 = get_mr_normval(f1->f_mr_oid, f1->f_mr_type, inval1, &outval1);
  379. pb2 = get_mr_normval(f2->f_mr_oid, f2->f_mr_type, inval2, &outval2);
  380. if (!pb1 || !pb2 || !outval1 || !outval2 || !outval1[0] ||
  381. !outval2[0] || (outval1[0]->bv_len != outval2[0]->bv_len) ||
  382. (memcmp(outval1[0]->bv_val, outval2[0]->bv_val,
  383. outval1[0]->bv_len) != 0)) {
  384. ret = 1;
  385. } else {
  386. ret = 0;
  387. }
  388. if (pb1)
  389. done_mr_normval(pb1);
  390. if (pb2)
  391. done_mr_normval(pb2);
  392. break;
  393. default:
  394. LDAPDebug(LDAP_DEBUG_ANY, "ERR can't handle filter %d\n", f1->f_choice,
  395. 0, 0);
  396. ret = 1;
  397. }
  398. done:
  399. LDAPDebug(LDAP_DEBUG_TRACE, "<= filter compare: %d\n", ret, 0, 0);
  400. return ret;
  401. }