apr_fnmatch.c 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482
  1. /* Licensed to the Apache Software Foundation (ASF) under one or more
  2. * contributor license agreements. See the NOTICE file distributed with
  3. * this work for additional information regarding copyright ownership.
  4. * The ASF licenses this file to You under the Apache License, Version 2.0
  5. * (the "License"); you may not use this file except in compliance with
  6. * the License. You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. /* Derived from The Open Group Base Specifications Issue 7, IEEE Std 1003.1-2008
  17. * as described in;
  18. * http://pubs.opengroup.org/onlinepubs/9699919799/functions/fnmatch.html
  19. *
  20. * Filename pattern matches defined in section 2.13, "Pattern Matching Notation"
  21. * from chapter 2. "Shell Command Language"
  22. * http://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_13
  23. * where; 1. A bracket expression starting with an unquoted <circumflex> '^'
  24. * character CONTINUES to specify a non-matching list; 2. an explicit <period> '.'
  25. * in a bracket expression matching list, e.g. "[.abc]" does NOT match a leading
  26. * <period> in a filename; 3. a <left-square-bracket> '[' which does not introduce
  27. * a valid bracket expression is treated as an ordinary character; 4. a differing
  28. * number of consecutive slashes within pattern and string will NOT match;
  29. * 5. a trailing '\' in FNM_ESCAPE mode is treated as an ordinary '\' character.
  30. *
  31. * Bracket expansion defined in section 9.3.5, "RE Bracket Expression",
  32. * from chapter 9, "Regular Expressions"
  33. * http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap09.html#tag_09_03_05
  34. * with no support for collating symbols, equivalence class expressions or
  35. * character class expressions. A partial range expression with a leading
  36. * hyphen following a valid range expression will match only the ordinary
  37. * <hyphen> and the ending character (e.g. "[a-m-z]" will match characters
  38. * 'a' through 'm', a <hyphen> '-', or a 'z').
  39. *
  40. * NOTE: Only POSIX/C single byte locales are correctly supported at this time.
  41. * Notably, non-POSIX locales with FNM_CASEFOLD produce undefined results,
  42. * particularly in ranges of mixed case (e.g. "[A-z]") or spanning alpha and
  43. * nonalpha characters within a range.
  44. *
  45. * XXX comments below indicate porting required for multi-byte character sets
  46. * and non-POSIX locale collation orders; requires mbr* APIs to track shift
  47. * state of pattern and string (rewinding pattern and string repeatedly).
  48. *
  49. * Certain parts of the code assume 0x00-0x3F are unique with any MBCS (e.g.
  50. * UTF-8, SHIFT-JIS, etc). Any implementation allowing '\' as an alternate
  51. * path delimiter must be aware that 0x5C is NOT unique within SHIFT-JIS.
  52. */
  53. #include "apr_file_info.h"
  54. #include "apr_fnmatch.h"
  55. #include "apr_tables.h"
  56. #include "apr_lib.h"
  57. #include "apr_strings.h"
  58. #include <string.h>
  59. #if APR_HAVE_CTYPE_H
  60. # include <ctype.h>
  61. #endif
  62. /* Most MBCS/collation/case issues handled here. Wildcard '*' is not handled.
  63. * EOS '\0' and the FNM_PATHNAME '/' delimiters are not advanced over,
  64. * however the "\/" sequence is advanced to '/'.
  65. *
  66. * Both pattern and string are **char to support pointer increment of arbitrary
  67. * multibyte characters for the given locale, in a later iteration of this code
  68. */
  69. static APR_INLINE int fnmatch_ch(const char **pattern, const char **string, int flags)
  70. {
  71. const char * const mismatch = *pattern;
  72. const int nocase = !!(flags & APR_FNM_CASE_BLIND);
  73. const int escape = !(flags & APR_FNM_NOESCAPE);
  74. const int slash = !!(flags & APR_FNM_PATHNAME);
  75. int result = APR_FNM_NOMATCH;
  76. const char *startch;
  77. int negate;
  78. if (**pattern == '[')
  79. {
  80. ++*pattern;
  81. /* Handle negation, either leading ! or ^ operators (never both) */
  82. negate = ((**pattern == '!') || (**pattern == '^'));
  83. if (negate)
  84. ++*pattern;
  85. /* ']' is an ordinary character at the start of the range pattern */
  86. if (**pattern == ']')
  87. goto leadingclosebrace;
  88. while (**pattern)
  89. {
  90. if (**pattern == ']') {
  91. ++*pattern;
  92. /* XXX: Fix for MBCS character width */
  93. ++*string;
  94. return (result ^ negate);
  95. }
  96. if (escape && (**pattern == '\\')) {
  97. ++*pattern;
  98. /* Patterns must be terminated with ']', not EOS */
  99. if (!**pattern)
  100. break;
  101. }
  102. /* Patterns must be terminated with ']' not '/' */
  103. if (slash && (**pattern == '/'))
  104. break;
  105. leadingclosebrace:
  106. /* Look at only well-formed range patterns;
  107. * "x-]" is not allowed unless escaped ("x-\]")
  108. * XXX: Fix for locale/MBCS character width
  109. */
  110. if (((*pattern)[1] == '-') && ((*pattern)[2] != ']'))
  111. {
  112. startch = *pattern;
  113. *pattern += (escape && ((*pattern)[2] == '\\')) ? 3 : 2;
  114. /* NOT a properly balanced [expr] pattern, EOS terminated
  115. * or ranges containing a slash in FNM_PATHNAME mode pattern
  116. * fall out to to the rewind and test '[' literal code path
  117. */
  118. if (!**pattern || (slash && (**pattern == '/')))
  119. break;
  120. /* XXX: handle locale/MBCS comparison, advance by MBCS char width */
  121. if ((**string >= *startch) && (**string <= **pattern))
  122. result = 0;
  123. else if (nocase && (isupper(**string) || isupper(*startch)
  124. || isupper(**pattern))
  125. && (tolower(**string) >= tolower(*startch))
  126. && (tolower(**string) <= tolower(**pattern)))
  127. result = 0;
  128. ++*pattern;
  129. continue;
  130. }
  131. /* XXX: handle locale/MBCS comparison, advance by MBCS char width */
  132. if ((**string == **pattern))
  133. result = 0;
  134. else if (nocase && (isupper(**string) || isupper(**pattern))
  135. && (tolower(**string) == tolower(**pattern)))
  136. result = 0;
  137. ++*pattern;
  138. }
  139. /* NOT a properly balanced [expr] pattern; Rewind
  140. * and reset result to test '[' literal
  141. */
  142. *pattern = mismatch;
  143. result = APR_FNM_NOMATCH;
  144. }
  145. else if (**pattern == '?') {
  146. /* Optimize '?' match before unescaping **pattern */
  147. if (!**string || (slash && (**string == '/')))
  148. return APR_FNM_NOMATCH;
  149. result = 0;
  150. goto fnmatch_ch_success;
  151. }
  152. else if (escape && (**pattern == '\\') && (*pattern)[1]) {
  153. ++*pattern;
  154. }
  155. /* XXX: handle locale/MBCS comparison, advance by the MBCS char width */
  156. if (**string == **pattern)
  157. result = 0;
  158. else if (nocase && (isupper(**string) || isupper(**pattern))
  159. && (tolower(**string) == tolower(**pattern)))
  160. result = 0;
  161. /* Refuse to advance over trailing slash or nulls
  162. */
  163. if (!**string || !**pattern || (slash && ((**string == '/') || (**pattern == '/'))))
  164. return result;
  165. fnmatch_ch_success:
  166. ++*pattern;
  167. ++*string;
  168. return result;
  169. }
  170. APR_DECLARE(int) apr_fnmatch(const char *pattern, const char *string, int flags)
  171. {
  172. static const char dummystring[2] = {' ', 0};
  173. const int escape = !(flags & APR_FNM_NOESCAPE);
  174. const int slash = !!(flags & APR_FNM_PATHNAME);
  175. const char *strendseg;
  176. const char *dummyptr;
  177. const char *matchptr;
  178. int wild;
  179. /* For '*' wild processing only; surpress 'used before initialization'
  180. * warnings with dummy initialization values;
  181. */
  182. const char *strstartseg = NULL;
  183. const char *mismatch = NULL;
  184. int matchlen = 0;
  185. if (*pattern == '*')
  186. goto firstsegment;
  187. while (*pattern && *string)
  188. {
  189. /* Pre-decode "\/" which has no special significance, and
  190. * match balanced slashes, starting a new segment pattern
  191. */
  192. if (slash && escape && (*pattern == '\\') && (pattern[1] == '/'))
  193. ++pattern;
  194. if (slash && (*pattern == '/') && (*string == '/')) {
  195. ++pattern;
  196. ++string;
  197. }
  198. firstsegment:
  199. /* At the beginning of each segment, validate leading period behavior.
  200. */
  201. if ((flags & APR_FNM_PERIOD) && (*string == '.'))
  202. {
  203. if (*pattern == '.')
  204. ++pattern;
  205. else if (escape && (*pattern == '\\') && (pattern[1] == '.'))
  206. pattern += 2;
  207. else
  208. return APR_FNM_NOMATCH;
  209. ++string;
  210. }
  211. /* Determine the end of string segment
  212. *
  213. * Presumes '/' character is unique, not composite in any MBCS encoding
  214. */
  215. if (slash) {
  216. strendseg = strchr(string, '/');
  217. if (!strendseg)
  218. strendseg = strchr(string, '\0');
  219. }
  220. else {
  221. strendseg = strchr(string, '\0');
  222. }
  223. /* Allow pattern '*' to be consumed even with no remaining string to match
  224. */
  225. while (*pattern)
  226. {
  227. if ((string > strendseg)
  228. || ((string == strendseg) && (*pattern != '*')))
  229. break;
  230. if (slash && ((*pattern == '/')
  231. || (escape && (*pattern == '\\')
  232. && (pattern[1] == '/'))))
  233. break;
  234. /* Reduce groups of '*' and '?' to n '?' matches
  235. * followed by one '*' test for simplicity
  236. */
  237. for (wild = 0; ((*pattern == '*') || (*pattern == '?')); ++pattern)
  238. {
  239. if (*pattern == '*') {
  240. wild = 1;
  241. }
  242. else if (string < strendseg) { /* && (*pattern == '?') */
  243. /* XXX: Advance 1 char for MBCS locale */
  244. ++string;
  245. }
  246. else { /* (string >= strendseg) && (*pattern == '?') */
  247. return APR_FNM_NOMATCH;
  248. }
  249. }
  250. if (wild)
  251. {
  252. strstartseg = string;
  253. mismatch = pattern;
  254. /* Count fixed (non '*') char matches remaining in pattern
  255. * excluding '/' (or "\/") and '*'
  256. */
  257. for (matchptr = pattern, matchlen = 0; 1; ++matchlen)
  258. {
  259. if ((*matchptr == '\0')
  260. || (slash && ((*matchptr == '/')
  261. || (escape && (*matchptr == '\\')
  262. && (matchptr[1] == '/')))))
  263. {
  264. /* Compare precisely this many trailing string chars,
  265. * the resulting match needs no wildcard loop
  266. */
  267. /* XXX: Adjust for MBCS */
  268. if (string + matchlen > strendseg)
  269. return APR_FNM_NOMATCH;
  270. string = strendseg - matchlen;
  271. wild = 0;
  272. break;
  273. }
  274. if (*matchptr == '*')
  275. {
  276. /* Ensure at least this many trailing string chars remain
  277. * for the first comparison
  278. */
  279. /* XXX: Adjust for MBCS */
  280. if (string + matchlen > strendseg)
  281. return APR_FNM_NOMATCH;
  282. /* Begin first wild comparison at the current position */
  283. break;
  284. }
  285. /* Skip forward in pattern by a single character match
  286. * Use a dummy fnmatch_ch() test to count one "[range]" escape
  287. */
  288. /* XXX: Adjust for MBCS */
  289. if (escape && (*matchptr == '\\') && matchptr[1]) {
  290. matchptr += 2;
  291. }
  292. else if (*matchptr == '[') {
  293. dummyptr = dummystring;
  294. fnmatch_ch(&matchptr, &dummyptr, flags);
  295. }
  296. else {
  297. ++matchptr;
  298. }
  299. }
  300. }
  301. /* Incrementally match string against the pattern
  302. */
  303. while (*pattern && (string < strendseg))
  304. {
  305. /* Success; begin a new wild pattern search
  306. */
  307. if (*pattern == '*')
  308. break;
  309. if (slash && ((*string == '/')
  310. || (*pattern == '/')
  311. || (escape && (*pattern == '\\')
  312. && (pattern[1] == '/'))))
  313. break;
  314. /* Compare ch's (the pattern is advanced over "\/" to the '/',
  315. * but slashes will mismatch, and are not consumed)
  316. */
  317. if (!fnmatch_ch(&pattern, &string, flags))
  318. continue;
  319. /* Failed to match, loop against next char offset of string segment
  320. * until not enough string chars remain to match the fixed pattern
  321. */
  322. if (wild) {
  323. /* XXX: Advance 1 char for MBCS locale */
  324. string = ++strstartseg;
  325. if (string + matchlen > strendseg)
  326. return APR_FNM_NOMATCH;
  327. pattern = mismatch;
  328. continue;
  329. }
  330. else
  331. return APR_FNM_NOMATCH;
  332. }
  333. }
  334. if (*string && !(slash && (*string == '/')))
  335. return APR_FNM_NOMATCH;
  336. if (*pattern && !(slash && ((*pattern == '/')
  337. || (escape && (*pattern == '\\')
  338. && (pattern[1] == '/')))))
  339. return APR_FNM_NOMATCH;
  340. }
  341. /* Where both pattern and string are at EOS, declare success
  342. */
  343. if (!*string && !*pattern)
  344. return 0;
  345. /* pattern didn't match to the end of string */
  346. return APR_FNM_NOMATCH;
  347. }
  348. /* This function is an Apache addition
  349. * return non-zero if pattern has any glob chars in it
  350. * @bug Function does not distinguish for FNM_PATHNAME mode, which renders
  351. * a false positive for test[/]this (which is not a range, but
  352. * seperate test[ and ]this segments and no glob.)
  353. * @bug Function does not distinguish for non-FNM_ESCAPE mode.
  354. * @bug Function does not parse []] correctly
  355. * Solution may be to use fnmatch_ch() to walk the patterns?
  356. */
  357. APR_DECLARE(int) apr_fnmatch_test(const char *pattern)
  358. {
  359. int nesting;
  360. nesting = 0;
  361. while (*pattern) {
  362. switch (*pattern) {
  363. case '?':
  364. case '*':
  365. return 1;
  366. case '\\':
  367. if (*++pattern == '\0') {
  368. return 0;
  369. }
  370. break;
  371. case '[': /* '[' is only a glob if it has a matching ']' */
  372. ++nesting;
  373. break;
  374. case ']':
  375. if (nesting) {
  376. return 1;
  377. }
  378. break;
  379. }
  380. ++pattern; }
  381. return 0;
  382. }
  383. /* Find all files matching the specified pattern */
  384. APR_DECLARE(apr_status_t) apr_match_glob(const char *pattern,
  385. apr_array_header_t **result,
  386. apr_pool_t *p)
  387. {
  388. apr_dir_t *dir;
  389. apr_finfo_t finfo;
  390. apr_status_t rv;
  391. char *path;
  392. /* XXX So, this is kind of bogus. Basically, I need to strip any leading
  393. * directories off the pattern, but there is no portable way to do that.
  394. * So, for now we just find the last occurance of '/' and if that doesn't
  395. * return anything, then we look for '\'. This means that we could
  396. * screw up on unix if the pattern is something like "foo\.*" That '\'
  397. * isn't a directory delimiter, it is a part of the filename. To fix this,
  398. * we really need apr_filepath_basename, which will be coming as soon as
  399. * I get to it. rbb
  400. */
  401. char *idx = strrchr(pattern, '/');
  402. if (idx == NULL) {
  403. idx = strrchr(pattern, '\\');
  404. }
  405. if (idx == NULL) {
  406. path = ".";
  407. }
  408. else {
  409. path = apr_pstrndup(p, pattern, idx - pattern);
  410. pattern = idx + 1;
  411. }
  412. *result = apr_array_make(p, 0, sizeof(char *));
  413. rv = apr_dir_open(&dir, path, p);
  414. if (rv != APR_SUCCESS) {
  415. return rv;
  416. }
  417. while (apr_dir_read(&finfo, APR_FINFO_NAME, dir) == APR_SUCCESS) {
  418. if (apr_fnmatch(pattern, finfo.name, 0) == APR_SUCCESS) {
  419. *(const char **)apr_array_push(*result) = apr_pstrdup(p, finfo.name);
  420. }
  421. }
  422. apr_dir_close(dir);
  423. return APR_SUCCESS;
  424. }