filtercmp.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421
  1. /** BEGIN COPYRIGHT BLOCK
  2. * Copyright (C) 2001 Sun Microsystems, Inc. Used by permission.
  3. * Copyright (C) 2005 Red Hat, Inc.
  4. * All rights reserved.
  5. *
  6. * License: GPL (version 3 or any later version).
  7. * See LICENSE for details.
  8. * END COPYRIGHT BLOCK **/
  9. #ifdef HAVE_CONFIG_H
  10. # include <config.h>
  11. #endif
  12. /* filtercmp.c - routines for comparing filters */
  13. #include <stdio.h>
  14. #include <string.h>
  15. #include <sys/types.h>
  16. #include "slap.h"
  17. /* very simple hash function */
  18. static PRUint32 addhash(PRUint32 hash, unsigned char *data, int size)
  19. {
  20. int i;
  21. if (!data || !size)
  22. return hash;
  23. for (i = 0; i < size; i++)
  24. hash = (hash << 5) + (hash >> 27) + data[i];
  25. return hash;
  26. }
  27. #define addhash_long(h, l) addhash((h), (unsigned char *)&(l), sizeof(long))
  28. #define addhash_str(h, str) addhash((h), (unsigned char *)(str), strlen(str))
  29. #define addhash_bv(h, bv) addhash((h), (unsigned char *)(bv).bv_val, \
  30. (bv).bv_len)
  31. static PRUint32 addhash_casestr(PRUint32 hash, char *data)
  32. {
  33. unsigned char *normstr;
  34. normstr = slapi_utf8StrToLower((unsigned char *)data);
  35. hash = addhash(hash, normstr,
  36. normstr ? strlen((char *)normstr) : 0);
  37. if ((char *)normstr != data)
  38. slapi_ch_free((void **)&normstr);
  39. return hash;
  40. }
  41. static PRUint32 stir(PRUint32 hash, PRUint32 x)
  42. {
  43. hash = (hash << 5) + (hash >> 27);
  44. hash = hash ^ (x << 16);
  45. hash = hash ^ (x >> 16);
  46. return hash;
  47. }
  48. #define STIR(h) (h) = stir((h), 0x2EC6DEAD);
  49. static Slapi_Value **get_normalized_value(const Slapi_Attr *sattr, struct ava *ava)
  50. {
  51. Slapi_Value *svlist[2], **keylist, sv;
  52. sv.bv = ava->ava_value;
  53. sv.v_csnset = NULL;
  54. sv.v_flags = 0;
  55. svlist[0] = &sv;
  56. svlist[1] = NULL;
  57. if ((slapi_attr_values2keys_sv(sattr, svlist, &keylist,
  58. LDAP_FILTER_EQUALITY) != 0) ||
  59. !keylist || !keylist[0])
  60. return NULL;
  61. return keylist;
  62. }
  63. /* this is not pretty. matching rules seem to be pretty elaborate to use,
  64. * so comparing these kind of filters may be undesirably slow just because
  65. * of the overhead of normalizing the values. most of this code is stolen
  66. * from the backend vlv code (matchrule.c)
  67. */
  68. static Slapi_PBlock *get_mr_normval(char *oid, char *type,
  69. struct berval **inval,
  70. struct berval ***outval)
  71. {
  72. Slapi_PBlock *pb = slapi_pblock_new();
  73. unsigned int sort_indicator = SLAPI_PLUGIN_MR_USAGE_SORT;
  74. IFP mrIndex = NULL;
  75. if (!pb)
  76. return NULL;
  77. slapi_pblock_set(pb, SLAPI_PLUGIN_MR_OID, oid);
  78. slapi_pblock_set(pb, SLAPI_PLUGIN_MR_TYPE, type);
  79. slapi_pblock_set(pb, SLAPI_PLUGIN_MR_USAGE, (void *)&sort_indicator);
  80. if (slapi_mr_indexer_create(pb) != 0) {
  81. slapi_pblock_destroy(pb);
  82. return NULL;
  83. }
  84. if ((slapi_pblock_get(pb, SLAPI_PLUGIN_MR_INDEX_FN, &mrIndex) != 0) ||
  85. !mrIndex) {
  86. /* shouldn't ever happen */
  87. slapi_pblock_destroy(pb);
  88. return NULL;
  89. }
  90. /* now, call the indexer */
  91. slapi_pblock_set(pb, SLAPI_PLUGIN_MR_VALUES, inval);
  92. (*mrIndex)(pb);
  93. slapi_pblock_get(pb, SLAPI_PLUGIN_MR_KEYS, outval);
  94. return pb;
  95. }
  96. /* the opposite of above: shut down the matching rule pblock and free
  97. * the memory.
  98. */
  99. static void done_mr_normval(Slapi_PBlock *pb)
  100. {
  101. IFP mrDestroy = NULL;
  102. if (slapi_pblock_get(pb, SLAPI_PLUGIN_DESTROY_FN, &mrDestroy) == 0) {
  103. if (mrDestroy)
  104. (*mrDestroy)(pb);
  105. }
  106. slapi_pblock_destroy(pb);
  107. }
  108. static int hash_filters = 0;
  109. void set_hash_filters(int i) { hash_filters = i; }
  110. /* calculate the hash value of a node in a filter (assumes that any sub-nodes
  111. * of the filter have already had their hash value calculated).
  112. * -- the annoying part of this is normalizing any values in the filter.
  113. */
  114. void filter_compute_hash(struct slapi_filter *f)
  115. {
  116. PRUint32 h;
  117. char **a;
  118. struct slapi_filter *fx;
  119. Slapi_Value **keylist;
  120. Slapi_PBlock *pb;
  121. struct berval *inval[2], **outval;
  122. Slapi_Attr sattr;
  123. if (! hash_filters)
  124. return;
  125. h = addhash_long(0, f->f_choice);
  126. switch (f->f_choice) {
  127. case LDAP_FILTER_EQUALITY:
  128. case LDAP_FILTER_GE:
  129. case LDAP_FILTER_LE:
  130. case LDAP_FILTER_APPROX:
  131. slapi_attr_init(&sattr, f->f_ava.ava_type);
  132. keylist = get_normalized_value(&sattr, &f->f_ava);
  133. attr_done(&sattr);
  134. if (keylist) {
  135. h = addhash_str(h, f->f_avtype);
  136. STIR(h);
  137. h = addhash_bv(h, *(slapi_value_get_berval(keylist[0])));
  138. valuearray_free(&keylist);
  139. }
  140. break;
  141. case LDAP_FILTER_SUBSTRINGS:
  142. h = addhash_str(h, f->f_sub_type);
  143. STIR(h);
  144. if (f->f_sub_initial)
  145. h = addhash_casestr(h, f->f_sub_initial);
  146. if (f->f_sub_any) {
  147. for (a = f->f_sub_any; *a; a++) {
  148. STIR(h);
  149. h = addhash_casestr(h, *a);
  150. }
  151. }
  152. STIR(h);
  153. if (f->f_sub_final)
  154. h = addhash_casestr(h, f->f_sub_final);
  155. break;
  156. case LDAP_FILTER_PRESENT:
  157. h = addhash_str(h, f->f_type);
  158. break;
  159. case LDAP_FILTER_AND:
  160. case LDAP_FILTER_OR:
  161. case LDAP_FILTER_NOT:
  162. /* should be able to just mix in the hashes from lower levels */
  163. for (fx = f->f_list; fx; fx = fx->f_next)
  164. h = h ^ fx->f_hash;
  165. break;
  166. case LDAP_FILTER_EXTENDED:
  167. if (f->f_mr_oid)
  168. h = addhash_str(h, f->f_mr_oid);
  169. STIR(h);
  170. if (f->f_mr_type)
  171. h = addhash_str(h, f->f_mr_type);
  172. inval[0] = &f->f_mr_value;
  173. inval[1] = NULL;
  174. /* get the normalized value (according to the matching rule) */
  175. pb = get_mr_normval(f->f_mr_oid, f->f_mr_type, inval, &outval);
  176. if (!pb) {
  177. LDAPDebug(LDAP_DEBUG_ANY, "$$$ out of memory !\n", 0, 0, 0);
  178. return;
  179. }
  180. if (outval && outval[0]) {
  181. STIR(h);
  182. h = addhash_bv(h, *(outval[0]));
  183. }
  184. done_mr_normval(pb);
  185. if (f->f_mr_dnAttrs)
  186. STIR(h);
  187. break;
  188. default:
  189. LDAPDebug(LDAP_DEBUG_ANY, "$$$ can't handle filter type %d !\n",
  190. f->f_choice, 0, 0);
  191. }
  192. f->f_hash = h;
  193. }
  194. /* match compare: given two arrays of size N, determine if each item in
  195. * the first array matches with each item in the second array, with a
  196. * one-to-one correspondence. this will be DOG SLOW for large values of N
  197. * (it scales as N^2) but we generally expect N < 5.
  198. */
  199. static int filter_compare_substrings(struct slapi_filter *f1,
  200. struct slapi_filter *f2)
  201. {
  202. int buf[20], *tally;
  203. char **a1, **a2;
  204. int count1 = 0, count2 = 0, ret, i, j, ok;
  205. /* ok to pass NULL to utf8casecmp */
  206. if ((slapi_UTF8CASECMP(f1->f_sub_initial, f2->f_sub_initial) != 0) ||
  207. (slapi_UTF8CASECMP(f1->f_sub_final, f2->f_sub_final) != 0))
  208. return 1;
  209. /* match compare (would be expensive for large numbers of 'any'
  210. * substrings, which we don't expect to see)
  211. */
  212. for (a1 = f1->f_sub_any; a1 && *a1; a1++, count1++);
  213. for (a2 = f2->f_sub_any; a2 && *a2; a2++, count2++);
  214. if (count1 != count2)
  215. return 1;
  216. ret = 1; /* assume failure until done comparing */
  217. if (count1 > 20)
  218. tally = (int *)slapi_ch_malloc(count1);
  219. else
  220. tally = buf;
  221. if (!tally)
  222. goto done; /* this is bad; out of memory */
  223. for (i = 0; i < count1; i++)
  224. tally[i] = 0;
  225. /* ok. the theory is we tally up all the matched pairs we find,
  226. * stopping if we can't find a match that hasn't already been paired.
  227. */
  228. a1 = f1->f_sub_any;
  229. for (i = 0; i < count1; i++, a1++) {
  230. a2 = f2->f_sub_any;
  231. ok = 0;
  232. for (j = 0; j < count1; j++, a2++) {
  233. if (!tally[j] && (slapi_UTF8CASECMP(*a1, *a2) == 0)) {
  234. tally[j] = ok = 1;
  235. break;
  236. }
  237. }
  238. if (!ok)
  239. goto done; /* didn't find a match for that one */
  240. }
  241. /* done! matched */
  242. ret = 0;
  243. done:
  244. if ((count1 > 20) && tally)
  245. slapi_ch_free((void **)&tally);
  246. return ret;
  247. }
  248. /* same as above, but this time for lists of filter nodes */
  249. static int filter_compare_lists(struct slapi_filter *f1,
  250. struct slapi_filter *f2)
  251. {
  252. int buf[20], *tally;
  253. struct slapi_filter *fx1, *fx2;
  254. int count1 = 0, count2 = 0, ret, i, j, ok;
  255. for (fx1 = f1->f_list; fx1; fx1 = fx1->f_next, count1++);
  256. for (fx2 = f2->f_list; fx2; fx2 = fx2->f_next, count2++);
  257. if (count1 != count2)
  258. return 1;
  259. ret = 1;
  260. if (count1 > 20)
  261. tally = (int *)slapi_ch_malloc(count1);
  262. else
  263. tally = buf;
  264. if (!tally)
  265. goto done; /* very bad */
  266. for (i = 0; i < count1; i++)
  267. tally[i] = 0;
  268. /* brute-force match compare now */
  269. fx1 = f1->f_list;
  270. for (i = 0; i < count1; i++, fx1 = fx1->f_next) {
  271. fx2 = f2->f_list;
  272. ok = 0;
  273. for (j = 0; j < count1; j++, fx2 = fx2->f_next) {
  274. if (!tally[j] && (slapi_filter_compare(fx1, fx2) == 0)) {
  275. tally[j] = ok = 1;
  276. break;
  277. }
  278. }
  279. if (!ok)
  280. goto done; /* no match */
  281. }
  282. /* done! all matched */
  283. ret = 0;
  284. done:
  285. if ((count1 > 20) && tally)
  286. slapi_ch_free((void **)&tally);
  287. return ret;
  288. }
  289. /* returns 0 if two filters are "identical"
  290. * (items under AND/OR are allowed to be in different order)
  291. */
  292. int slapi_filter_compare(struct slapi_filter *f1, struct slapi_filter *f2)
  293. {
  294. Slapi_Value **key1, **key2;
  295. Slapi_PBlock *pb1, *pb2;
  296. struct berval *inval1[2], *inval2[2], **outval1, **outval2;
  297. int ret;
  298. Slapi_Attr sattr;
  299. LDAPDebug(LDAP_DEBUG_TRACE, "=> filter compare\n", 0, 0, 0);
  300. /* allow for the possibility that one of the filters hasn't had a hash
  301. * computed (and is therefore 0). this means that a filter node whose
  302. * hash is computed as 0 will always get compared the expensive way,
  303. * but this should happen VERY rarely (if ever).
  304. */
  305. if ((f1->f_hash != f2->f_hash) && (f1->f_hash) && (f2->f_hash)) {
  306. ret = 1;
  307. goto done;
  308. }
  309. /* brute-force comparison now */
  310. if (f1->f_choice != f2->f_choice) {
  311. ret = 1;
  312. goto done;
  313. }
  314. switch (f1->f_choice) {
  315. case LDAP_FILTER_EQUALITY:
  316. case LDAP_FILTER_GE:
  317. case LDAP_FILTER_LE:
  318. case LDAP_FILTER_APPROX:
  319. if (slapi_UTF8CASECMP(f1->f_avtype, f2->f_avtype) != 0) {
  320. ret = 1;
  321. break;
  322. }
  323. slapi_attr_init(&sattr, f1->f_ava.ava_type);
  324. key1 = get_normalized_value(&sattr, &f1->f_ava);
  325. if (key1) {
  326. key2 = get_normalized_value(&sattr, &f2->f_ava);
  327. if (key2) {
  328. ret = memcmp(slapi_value_get_string(key1[0]),
  329. slapi_value_get_string(key2[0]),
  330. slapi_value_get_length(key1[0]));
  331. valuearray_free(&key1);
  332. valuearray_free(&key2);
  333. attr_done(&sattr);
  334. break;
  335. }
  336. valuearray_free(&key1);
  337. }
  338. attr_done(&sattr);
  339. ret = 1;
  340. break;
  341. case LDAP_FILTER_PRESENT:
  342. ret = (slapi_UTF8CASECMP(f1->f_type, f2->f_type));
  343. break;
  344. case LDAP_FILTER_SUBSTRINGS:
  345. ret = filter_compare_substrings(f1, f2);
  346. break;
  347. case LDAP_FILTER_AND:
  348. case LDAP_FILTER_OR:
  349. case LDAP_FILTER_NOT:
  350. ret = filter_compare_lists(f1, f2);
  351. break;
  352. case LDAP_FILTER_EXTENDED:
  353. if ((slapi_UTF8CASECMP(f1->f_mr_oid, f2->f_mr_oid) != 0) ||
  354. (slapi_UTF8CASECMP(f1->f_mr_type, f2->f_mr_type) != 0) ||
  355. (f1->f_mr_dnAttrs != f2->f_mr_dnAttrs)) {
  356. ret = 1;
  357. break;
  358. }
  359. /* painstakingly compare the values (using the matching rule) */
  360. inval1[0] = &f1->f_mr_value;
  361. inval2[0] = &f2->f_mr_value;
  362. inval1[1] = inval2[1] = NULL;
  363. pb1 = get_mr_normval(f1->f_mr_oid, f1->f_mr_type, inval1, &outval1);
  364. pb2 = get_mr_normval(f2->f_mr_oid, f2->f_mr_type, inval2, &outval2);
  365. if (!pb1 || !pb2 || !outval1 || !outval2 || !outval1[0] ||
  366. !outval2[0] || (outval1[0]->bv_len != outval2[0]->bv_len) ||
  367. (memcmp(outval1[0]->bv_val, outval2[0]->bv_val,
  368. outval1[0]->bv_len) != 0)) {
  369. ret = 1;
  370. } else {
  371. ret = 0;
  372. }
  373. if (pb1)
  374. done_mr_normval(pb1);
  375. if (pb2)
  376. done_mr_normval(pb2);
  377. break;
  378. default:
  379. LDAPDebug(LDAP_DEBUG_ANY, "ERR can't handle filter %d\n", f1->f_choice,
  380. 0, 0);
  381. ret = 1;
  382. }
  383. done:
  384. LDAPDebug(LDAP_DEBUG_TRACE, "<= filter compare: %d\n", ret, 0, 0);
  385. return ret;
  386. }