filtercmp.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440
  1. /** BEGIN COPYRIGHT BLOCK
  2. * This Program is free software; you can redistribute it and/or modify it under
  3. * the terms of the GNU General Public License as published by the Free Software
  4. * Foundation; version 2 of the License.
  5. *
  6. * This Program is distributed in the hope that it will be useful, but WITHOUT
  7. * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  8. * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
  9. *
  10. * You should have received a copy of the GNU General Public License along with
  11. * this Program; if not, write to the Free Software Foundation, Inc., 59 Temple
  12. * Place, Suite 330, Boston, MA 02111-1307 USA.
  13. *
  14. * In addition, as a special exception, Red Hat, Inc. gives You the additional
  15. * right to link the code of this Program with code not covered under the GNU
  16. * General Public License ("Non-GPL Code") and to distribute linked combinations
  17. * including the two, subject to the limitations in this paragraph. Non-GPL Code
  18. * permitted under this exception must only link to the code of this Program
  19. * through those well defined interfaces identified in the file named EXCEPTION
  20. * found in the source code files (the "Approved Interfaces"). The files of
  21. * Non-GPL Code may instantiate templates or use macros or inline functions from
  22. * the Approved Interfaces without causing the resulting work to be covered by
  23. * the GNU General Public License. Only Red Hat, Inc. may make changes or
  24. * additions to the list of Approved Interfaces. You must obey the GNU General
  25. * Public License in all respects for all of the Program code and other code used
  26. * in conjunction with the Program except the Non-GPL Code covered by this
  27. * exception. If you modify this file, you may extend this exception to your
  28. * version of the file, but you are not obligated to do so. If you do not wish to
  29. * provide this exception without modification, you must delete this exception
  30. * statement from your version and license this file solely under the GPL without
  31. * exception.
  32. *
  33. *
  34. * Copyright (C) 2001 Sun Microsystems, Inc. Used by permission.
  35. * Copyright (C) 2005 Red Hat, Inc.
  36. * All rights reserved.
  37. * END COPYRIGHT BLOCK **/
  38. #ifdef HAVE_CONFIG_H
  39. # include <config.h>
  40. #endif
  41. /* filtercmp.c - routines for comparing filters */
  42. #include <stdio.h>
  43. #include <string.h>
  44. #include <sys/types.h>
  45. #include "slap.h"
  46. /* very simple hash function */
  47. static PRUint32 addhash(PRUint32 hash, unsigned char *data, int size)
  48. {
  49. int i;
  50. if (!data || !size)
  51. return hash;
  52. for (i = 0; i < size; i++)
  53. hash = (hash << 5) + (hash >> 27) + data[i];
  54. return hash;
  55. }
  56. #define addhash_long(h, l) addhash((h), (unsigned char *)&(l), sizeof(long))
  57. #define addhash_str(h, str) addhash((h), (unsigned char *)(str), strlen(str))
  58. #define addhash_bv(h, bv) addhash((h), (unsigned char *)(bv).bv_val, \
  59. (bv).bv_len)
  60. static PRUint32 addhash_casestr(PRUint32 hash, char *data)
  61. {
  62. unsigned char *normstr;
  63. normstr = slapi_utf8StrToLower((unsigned char *)data);
  64. hash = addhash(hash, normstr,
  65. normstr ? strlen((char *)normstr) : 0);
  66. if ((char *)normstr != data)
  67. slapi_ch_free((void **)&normstr);
  68. return hash;
  69. }
  70. static PRUint32 stir(PRUint32 hash, PRUint32 x)
  71. {
  72. hash = (hash << 5) + (hash >> 27);
  73. hash = hash ^ (x << 16);
  74. hash = hash ^ (x >> 16);
  75. return hash;
  76. }
  77. #define STIR(h) (h) = stir((h), 0x2EC6DEAD);
  78. static Slapi_Value **get_normalized_value(struct ava *ava)
  79. {
  80. void *plugin;
  81. Slapi_Value *svlist[2], **keylist, sv;
  82. slapi_attr_type2plugin(ava->ava_type, &plugin);
  83. sv.bv = ava->ava_value;
  84. sv.v_csnset = NULL;
  85. svlist[0] = &sv;
  86. svlist[1] = NULL;
  87. if ((slapi_call_syntax_values2keys_sv(plugin, svlist, &keylist,
  88. LDAP_FILTER_EQUALITY) != 0) ||
  89. !keylist || !keylist[0])
  90. return NULL;
  91. return keylist;
  92. }
  93. /* this is not pretty. matching rules seem to be pretty elaborate to use,
  94. * so comparing these kind of filters may be undesirably slow just because
  95. * of the overhead of normalizing the values. most of this code is stolen
  96. * from the backend vlv code (matchrule.c)
  97. */
  98. static Slapi_PBlock *get_mr_normval(char *oid, char *type,
  99. struct berval **inval,
  100. struct berval ***outval)
  101. {
  102. Slapi_PBlock *pb = slapi_pblock_new();
  103. unsigned int sort_indicator = SLAPI_PLUGIN_MR_USAGE_SORT;
  104. IFP mrIndex = NULL;
  105. if (!pb)
  106. return NULL;
  107. slapi_pblock_set(pb, SLAPI_PLUGIN_MR_OID, oid);
  108. slapi_pblock_set(pb, SLAPI_PLUGIN_MR_TYPE, type);
  109. slapi_pblock_set(pb, SLAPI_PLUGIN_MR_USAGE, (void *)&sort_indicator);
  110. if (slapi_mr_indexer_create(pb) != 0) {
  111. slapi_pblock_destroy(pb);
  112. return NULL;
  113. }
  114. if ((slapi_pblock_get(pb, SLAPI_PLUGIN_MR_INDEX_FN, &mrIndex) != 0) ||
  115. !mrIndex) {
  116. /* shouldn't ever happen */
  117. slapi_pblock_destroy(pb);
  118. return NULL;
  119. }
  120. /* now, call the indexer */
  121. slapi_pblock_set(pb, SLAPI_PLUGIN_MR_VALUES, inval);
  122. (*mrIndex)(pb);
  123. slapi_pblock_get(pb, SLAPI_PLUGIN_MR_KEYS, outval);
  124. return pb;
  125. }
  126. /* the opposite of above: shut down the matching rule pblock and free
  127. * the memory.
  128. */
  129. static void done_mr_normval(Slapi_PBlock *pb)
  130. {
  131. IFP mrDestroy = NULL;
  132. if (slapi_pblock_get(pb, SLAPI_PLUGIN_DESTROY_FN, &mrDestroy) == 0) {
  133. if (mrDestroy)
  134. (*mrDestroy)(pb);
  135. }
  136. slapi_pblock_destroy(pb);
  137. }
  138. static int hash_filters = 0;
  139. void set_hash_filters(int i) { hash_filters = i; }
  140. /* calculate the hash value of a node in a filter (assumes that any sub-nodes
  141. * of the filter have already had their hash value calculated).
  142. * -- the annoying part of this is normalizing any values in the filter.
  143. */
  144. void filter_compute_hash(struct slapi_filter *f)
  145. {
  146. PRUint32 h;
  147. char **a;
  148. struct slapi_filter *fx;
  149. Slapi_Value **keylist;
  150. Slapi_PBlock *pb;
  151. struct berval *inval[2], **outval;
  152. if (! hash_filters)
  153. return;
  154. h = addhash_long(0, f->f_choice);
  155. switch (f->f_choice) {
  156. case LDAP_FILTER_EQUALITY:
  157. case LDAP_FILTER_GE:
  158. case LDAP_FILTER_LE:
  159. case LDAP_FILTER_APPROX:
  160. keylist = get_normalized_value(&f->f_ava);
  161. if (keylist) {
  162. h = addhash_str(h, f->f_avtype);
  163. STIR(h);
  164. h = addhash_bv(h, *(slapi_value_get_berval(keylist[0])));
  165. valuearray_free(&keylist);
  166. }
  167. break;
  168. case LDAP_FILTER_SUBSTRINGS:
  169. h = addhash_str(h, f->f_sub_type);
  170. STIR(h);
  171. if (f->f_sub_initial)
  172. h = addhash_casestr(h, f->f_sub_initial);
  173. if (f->f_sub_any) {
  174. for (a = f->f_sub_any; *a; a++) {
  175. STIR(h);
  176. h = addhash_casestr(h, *a);
  177. }
  178. }
  179. STIR(h);
  180. if (f->f_sub_final)
  181. h = addhash_casestr(h, f->f_sub_final);
  182. break;
  183. case LDAP_FILTER_PRESENT:
  184. h = addhash_str(h, f->f_type);
  185. break;
  186. case LDAP_FILTER_AND:
  187. case LDAP_FILTER_OR:
  188. case LDAP_FILTER_NOT:
  189. /* should be able to just mix in the hashes from lower levels */
  190. for (fx = f->f_list; fx; fx = fx->f_next)
  191. h = h ^ fx->f_hash;
  192. break;
  193. case LDAP_FILTER_EXTENDED:
  194. if (f->f_mr_oid)
  195. h = addhash_str(h, f->f_mr_oid);
  196. STIR(h);
  197. if (f->f_mr_type)
  198. h = addhash_str(h, f->f_mr_type);
  199. inval[0] = &f->f_mr_value;
  200. inval[1] = NULL;
  201. /* get the normalized value (according to the matching rule) */
  202. pb = get_mr_normval(f->f_mr_oid, f->f_mr_type, inval, &outval);
  203. if (pb && outval && outval[0]) {
  204. STIR(h);
  205. h = addhash_bv(h, *(outval[0]));
  206. }
  207. done_mr_normval(pb);
  208. if (f->f_mr_dnAttrs)
  209. STIR(h);
  210. break;
  211. default:
  212. LDAPDebug(LDAP_DEBUG_ANY, "$$$ can't handle filter type %d !\n",
  213. f->f_choice, 0, 0);
  214. }
  215. f->f_hash = h;
  216. }
  217. /* match compare: given two arrays of size N, determine if each item in
  218. * the first array matches with each item in the second array, with a
  219. * one-to-one correspondence. this will be DOG SLOW for large values of N
  220. * (it scales as N^2) but we generally expect N < 5.
  221. */
  222. static int filter_compare_substrings(struct slapi_filter *f1,
  223. struct slapi_filter *f2)
  224. {
  225. int buf[20], *tally;
  226. char **a1, **a2;
  227. int count1 = 0, count2 = 0, ret, i, j, ok;
  228. /* ok to pass NULL to utf8casecmp */
  229. if ((slapi_UTF8CASECMP(f1->f_sub_initial, f2->f_sub_initial) != 0) ||
  230. (slapi_UTF8CASECMP(f1->f_sub_final, f2->f_sub_final) != 0))
  231. return 1;
  232. /* match compare (would be expensive for large numbers of 'any'
  233. * substrings, which we don't expect to see)
  234. */
  235. for (a1 = f1->f_sub_any; a1 && *a1; a1++, count1++);
  236. for (a2 = f2->f_sub_any; a2 && *a2; a2++, count2++);
  237. if (count1 != count2)
  238. return 1;
  239. ret = 1; /* assume failure until done comparing */
  240. if (count1 > 20)
  241. tally = (int *)malloc(count1);
  242. else
  243. tally = buf;
  244. if (!tally)
  245. goto done; /* this is bad; out of memory */
  246. for (i = 0; i < count1; i++)
  247. tally[i] = 0;
  248. /* ok. the theory is we tally up all the matched pairs we find,
  249. * stopping if we can't find a match that hasn't already been paired.
  250. */
  251. a1 = f1->f_sub_any;
  252. for (i = 0; i < count1; i++, a1++) {
  253. a2 = f2->f_sub_any;
  254. ok = 0;
  255. for (j = 0; j < count1; j++, a2++) {
  256. if (!tally[j] && (slapi_UTF8CASECMP(*a1, *a2) == 0)) {
  257. tally[j] = ok = 1;
  258. break;
  259. }
  260. }
  261. if (!ok)
  262. goto done; /* didn't find a match for that one */
  263. }
  264. /* done! matched */
  265. ret = 0;
  266. done:
  267. if ((count1 > 20) && tally)
  268. free(tally);
  269. return ret;
  270. }
  271. /* same as above, but this time for lists of filter nodes */
  272. static int filter_compare_lists(struct slapi_filter *f1,
  273. struct slapi_filter *f2)
  274. {
  275. int buf[20], *tally;
  276. struct slapi_filter *fx1, *fx2;
  277. int count1 = 0, count2 = 0, ret, i, j, ok;
  278. for (fx1 = f1->f_list; fx1; fx1 = fx1->f_next, count1++);
  279. for (fx2 = f2->f_list; fx2; fx2 = fx2->f_next, count2++);
  280. if (count1 != count2)
  281. return 1;
  282. ret = 1;
  283. if (count1 > 20)
  284. tally = (int *)malloc(count1);
  285. else
  286. tally = buf;
  287. if (!tally)
  288. goto done; /* very bad */
  289. for (i = 0; i < count1; i++)
  290. tally[i] = 0;
  291. /* brute-force match compare now */
  292. fx1 = f1->f_list;
  293. for (i = 0; i < count1; i++, fx1 = fx1->f_next) {
  294. fx2 = f2->f_list;
  295. ok = 0;
  296. for (j = 0; j < count1; j++, fx2 = fx2->f_next) {
  297. if (!tally[j] && (slapi_filter_compare(fx1, fx2) == 0)) {
  298. tally[j] = ok = 1;
  299. break;
  300. }
  301. }
  302. if (!ok)
  303. goto done; /* no match */
  304. }
  305. /* done! all matched */
  306. ret = 0;
  307. done:
  308. if ((count1 > 20) && tally)
  309. free(tally);
  310. return ret;
  311. }
  312. /* returns 0 if two filters are "identical"
  313. * (items under AND/OR are allowed to be in different order)
  314. */
  315. int slapi_filter_compare(struct slapi_filter *f1, struct slapi_filter *f2)
  316. {
  317. Slapi_Value **key1, **key2;
  318. Slapi_PBlock *pb1, *pb2;
  319. struct berval *inval1[2], *inval2[2], **outval1, **outval2;
  320. int ret;
  321. LDAPDebug(LDAP_DEBUG_TRACE, "=> filter compare\n", 0, 0, 0);
  322. /* allow for the possibility that one of the filters hasn't had a hash
  323. * computed (and is therefore 0). this means that a filter node whose
  324. * hash is computed as 0 will always get compared the expensive way,
  325. * but this should happen VERY rarely (if ever).
  326. */
  327. if ((f1->f_hash != f2->f_hash) && (f1->f_hash) && (f2->f_hash)) {
  328. ret = 1;
  329. goto done;
  330. }
  331. /* brute-force comparison now */
  332. if (f1->f_choice != f2->f_choice) {
  333. ret = 1;
  334. goto done;
  335. }
  336. switch (f1->f_choice) {
  337. case LDAP_FILTER_EQUALITY:
  338. case LDAP_FILTER_GE:
  339. case LDAP_FILTER_LE:
  340. case LDAP_FILTER_APPROX:
  341. if (slapi_UTF8CASECMP(f1->f_avtype, f2->f_avtype) != 0) {
  342. ret = 1;
  343. break;
  344. }
  345. key1 = get_normalized_value(&f1->f_ava);
  346. if (key1) {
  347. key2 = get_normalized_value(&f2->f_ava);
  348. if (key2) {
  349. ret = memcmp(slapi_value_get_string(key1[0]),
  350. slapi_value_get_string(key2[0]),
  351. slapi_value_get_length(key1[0]));
  352. valuearray_free(&key1);
  353. valuearray_free(&key2);
  354. break;
  355. }
  356. valuearray_free(&key1);
  357. }
  358. ret = 1;
  359. break;
  360. case LDAP_FILTER_PRESENT:
  361. ret = (slapi_UTF8CASECMP(f1->f_type, f2->f_type));
  362. break;
  363. case LDAP_FILTER_SUBSTRINGS:
  364. ret = filter_compare_substrings(f1, f2);
  365. break;
  366. case LDAP_FILTER_AND:
  367. case LDAP_FILTER_OR:
  368. case LDAP_FILTER_NOT:
  369. ret = filter_compare_lists(f1, f2);
  370. break;
  371. case LDAP_FILTER_EXTENDED:
  372. if ((slapi_UTF8CASECMP(f1->f_mr_oid, f2->f_mr_oid) != 0) ||
  373. (slapi_UTF8CASECMP(f1->f_mr_type, f2->f_mr_type) != 0) ||
  374. (f1->f_mr_dnAttrs != f2->f_mr_dnAttrs)) {
  375. ret = 1;
  376. break;
  377. }
  378. /* painstakingly compare the values (using the matching rule) */
  379. inval1[0] = &f1->f_mr_value;
  380. inval2[0] = &f2->f_mr_value;
  381. inval1[1] = inval2[1] = NULL;
  382. pb1 = get_mr_normval(f1->f_mr_oid, f1->f_mr_type, inval1, &outval1);
  383. pb2 = get_mr_normval(f2->f_mr_oid, f2->f_mr_type, inval2, &outval2);
  384. if (!pb1 || !pb2 || !outval1 || !outval2 || !outval1[0] ||
  385. !outval2[0] || (outval1[0]->bv_len != outval2[0]->bv_len) ||
  386. (memcmp(outval1[0]->bv_val, outval2[0]->bv_val,
  387. outval1[0]->bv_len) != 0)) {
  388. ret = 1;
  389. } else {
  390. ret = 0;
  391. }
  392. if (pb1)
  393. done_mr_normval(pb1);
  394. if (pb2)
  395. done_mr_normval(pb2);
  396. break;
  397. default:
  398. LDAPDebug(LDAP_DEBUG_ANY, "ERR can't handle filter %d\n", f1->f_choice,
  399. 0, 0);
  400. ret = 1;
  401. }
  402. done:
  403. LDAPDebug(LDAP_DEBUG_TRACE, "<= filter compare: %d\n", ret, 0, 0);
  404. return ret;
  405. }