curl_fnmatch.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426
  1. /***************************************************************************
  2. * _ _ ____ _
  3. * Project ___| | | | _ \| |
  4. * / __| | | | |_) | |
  5. * | (__| |_| | _ <| |___
  6. * \___|\___/|_| \_\_____|
  7. *
  8. * Copyright (C) 1998 - 2016, Daniel Stenberg, <[email protected]>, et al.
  9. *
  10. * This software is licensed as described in the file COPYING, which
  11. * you should have received as part of this distribution. The terms
  12. * are also available at https://curl.haxx.se/docs/copyright.html.
  13. *
  14. * You may opt to use, copy, modify, merge, publish, distribute and/or sell
  15. * copies of the Software, and permit persons to whom the Software is
  16. * furnished to do so, under the terms of the COPYING file.
  17. *
  18. * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
  19. * KIND, either express or implied.
  20. *
  21. ***************************************************************************/
  22. #include "curl_setup.h"
  23. #include <curl/curl.h>
  24. #include "curl_fnmatch.h"
  25. #include "curl_memory.h"
  26. /* The last #include file should be: */
  27. #include "memdebug.h"
  28. #define CURLFNM_CHARSET_LEN (sizeof(char) * 256)
  29. #define CURLFNM_CHSET_SIZE (CURLFNM_CHARSET_LEN + 15)
  30. #define CURLFNM_NEGATE CURLFNM_CHARSET_LEN
  31. #define CURLFNM_ALNUM (CURLFNM_CHARSET_LEN + 1)
  32. #define CURLFNM_DIGIT (CURLFNM_CHARSET_LEN + 2)
  33. #define CURLFNM_XDIGIT (CURLFNM_CHARSET_LEN + 3)
  34. #define CURLFNM_ALPHA (CURLFNM_CHARSET_LEN + 4)
  35. #define CURLFNM_PRINT (CURLFNM_CHARSET_LEN + 5)
  36. #define CURLFNM_BLANK (CURLFNM_CHARSET_LEN + 6)
  37. #define CURLFNM_LOWER (CURLFNM_CHARSET_LEN + 7)
  38. #define CURLFNM_GRAPH (CURLFNM_CHARSET_LEN + 8)
  39. #define CURLFNM_SPACE (CURLFNM_CHARSET_LEN + 9)
  40. #define CURLFNM_UPPER (CURLFNM_CHARSET_LEN + 10)
  41. typedef enum {
  42. CURLFNM_LOOP_DEFAULT = 0,
  43. CURLFNM_LOOP_BACKSLASH
  44. } loop_state;
  45. typedef enum {
  46. CURLFNM_SCHS_DEFAULT = 0,
  47. CURLFNM_SCHS_MAYRANGE,
  48. CURLFNM_SCHS_MAYRANGE2,
  49. CURLFNM_SCHS_RIGHTBR,
  50. CURLFNM_SCHS_RIGHTBRLEFTBR
  51. } setcharset_state;
  52. typedef enum {
  53. CURLFNM_PKW_INIT = 0,
  54. CURLFNM_PKW_DDOT
  55. } parsekey_state;
  56. #define SETCHARSET_OK 1
  57. #define SETCHARSET_FAIL 0
  58. static int parsekeyword(unsigned char **pattern, unsigned char *charset)
  59. {
  60. parsekey_state state = CURLFNM_PKW_INIT;
  61. #define KEYLEN 10
  62. char keyword[KEYLEN] = { 0 };
  63. int found = FALSE;
  64. int i;
  65. unsigned char *p = *pattern;
  66. for(i = 0; !found; i++) {
  67. char c = *p++;
  68. if(i >= KEYLEN)
  69. return SETCHARSET_FAIL;
  70. switch(state) {
  71. case CURLFNM_PKW_INIT:
  72. if(ISALPHA(c) && ISLOWER(c))
  73. keyword[i] = c;
  74. else if(c == ':')
  75. state = CURLFNM_PKW_DDOT;
  76. else
  77. return 0;
  78. break;
  79. case CURLFNM_PKW_DDOT:
  80. if(c == ']')
  81. found = TRUE;
  82. else
  83. return SETCHARSET_FAIL;
  84. }
  85. }
  86. #undef KEYLEN
  87. *pattern = p; /* move caller's pattern pointer */
  88. if(strcmp(keyword, "digit") == 0)
  89. charset[CURLFNM_DIGIT] = 1;
  90. else if(strcmp(keyword, "alnum") == 0)
  91. charset[CURLFNM_ALNUM] = 1;
  92. else if(strcmp(keyword, "alpha") == 0)
  93. charset[CURLFNM_ALPHA] = 1;
  94. else if(strcmp(keyword, "xdigit") == 0)
  95. charset[CURLFNM_XDIGIT] = 1;
  96. else if(strcmp(keyword, "print") == 0)
  97. charset[CURLFNM_PRINT] = 1;
  98. else if(strcmp(keyword, "graph") == 0)
  99. charset[CURLFNM_GRAPH] = 1;
  100. else if(strcmp(keyword, "space") == 0)
  101. charset[CURLFNM_SPACE] = 1;
  102. else if(strcmp(keyword, "blank") == 0)
  103. charset[CURLFNM_BLANK] = 1;
  104. else if(strcmp(keyword, "upper") == 0)
  105. charset[CURLFNM_UPPER] = 1;
  106. else if(strcmp(keyword, "lower") == 0)
  107. charset[CURLFNM_LOWER] = 1;
  108. else
  109. return SETCHARSET_FAIL;
  110. return SETCHARSET_OK;
  111. }
  112. /* returns 1 (true) if pattern is OK, 0 if is bad ("p" is pattern pointer) */
  113. static int setcharset(unsigned char **p, unsigned char *charset)
  114. {
  115. setcharset_state state = CURLFNM_SCHS_DEFAULT;
  116. unsigned char rangestart = 0;
  117. unsigned char lastchar = 0;
  118. bool something_found = FALSE;
  119. unsigned char c;
  120. for(;;) {
  121. c = **p;
  122. switch(state) {
  123. case CURLFNM_SCHS_DEFAULT:
  124. if(ISALNUM(c)) { /* ASCII value */
  125. rangestart = c;
  126. charset[c] = 1;
  127. (*p)++;
  128. state = CURLFNM_SCHS_MAYRANGE;
  129. something_found = TRUE;
  130. }
  131. else if(c == ']') {
  132. if(something_found)
  133. return SETCHARSET_OK;
  134. else
  135. something_found = TRUE;
  136. state = CURLFNM_SCHS_RIGHTBR;
  137. charset[c] = 1;
  138. (*p)++;
  139. }
  140. else if(c == '[') {
  141. char c2 = *((*p)+1);
  142. if(c2 == ':') { /* there has to be a keyword */
  143. (*p) += 2;
  144. if(parsekeyword(p, charset)) {
  145. state = CURLFNM_SCHS_DEFAULT;
  146. }
  147. else
  148. return SETCHARSET_FAIL;
  149. }
  150. else {
  151. charset[c] = 1;
  152. (*p)++;
  153. }
  154. something_found = TRUE;
  155. }
  156. else if(c == '?' || c == '*') {
  157. something_found = TRUE;
  158. charset[c] = 1;
  159. (*p)++;
  160. }
  161. else if(c == '^' || c == '!') {
  162. if(!something_found) {
  163. if(charset[CURLFNM_NEGATE]) {
  164. charset[c] = 1;
  165. something_found = TRUE;
  166. }
  167. else
  168. charset[CURLFNM_NEGATE] = 1; /* negate charset */
  169. }
  170. else
  171. charset[c] = 1;
  172. (*p)++;
  173. }
  174. else if(c == '\\') {
  175. c = *(++(*p));
  176. if(ISPRINT((c))) {
  177. something_found = TRUE;
  178. state = CURLFNM_SCHS_MAYRANGE;
  179. charset[c] = 1;
  180. rangestart = c;
  181. (*p)++;
  182. }
  183. else
  184. return SETCHARSET_FAIL;
  185. }
  186. else if(c == '\0') {
  187. return SETCHARSET_FAIL;
  188. }
  189. else {
  190. charset[c] = 1;
  191. (*p)++;
  192. something_found = TRUE;
  193. }
  194. break;
  195. case CURLFNM_SCHS_MAYRANGE:
  196. if(c == '-') {
  197. charset[c] = 1;
  198. (*p)++;
  199. lastchar = '-';
  200. state = CURLFNM_SCHS_MAYRANGE2;
  201. }
  202. else if(c == '[') {
  203. state = CURLFNM_SCHS_DEFAULT;
  204. }
  205. else if(ISALNUM(c)) {
  206. charset[c] = 1;
  207. (*p)++;
  208. }
  209. else if(c == '\\') {
  210. c = *(++(*p));
  211. if(ISPRINT(c)) {
  212. charset[c] = 1;
  213. (*p)++;
  214. }
  215. else
  216. return SETCHARSET_FAIL;
  217. }
  218. else if(c == ']') {
  219. return SETCHARSET_OK;
  220. }
  221. else
  222. return SETCHARSET_FAIL;
  223. break;
  224. case CURLFNM_SCHS_MAYRANGE2:
  225. if(c == '\\') {
  226. c = *(++(*p));
  227. if(!ISPRINT(c))
  228. return SETCHARSET_FAIL;
  229. }
  230. if(c == ']') {
  231. return SETCHARSET_OK;
  232. }
  233. else if(c == '\\') {
  234. c = *(++(*p));
  235. if(ISPRINT(c)) {
  236. charset[c] = 1;
  237. state = CURLFNM_SCHS_DEFAULT;
  238. (*p)++;
  239. }
  240. else
  241. return SETCHARSET_FAIL;
  242. }
  243. if(c >= rangestart) {
  244. if((ISLOWER(c) && ISLOWER(rangestart)) ||
  245. (ISDIGIT(c) && ISDIGIT(rangestart)) ||
  246. (ISUPPER(c) && ISUPPER(rangestart))) {
  247. charset[lastchar] = 0;
  248. rangestart++;
  249. while(rangestart++ <= c)
  250. charset[rangestart-1] = 1;
  251. (*p)++;
  252. state = CURLFNM_SCHS_DEFAULT;
  253. }
  254. else
  255. return SETCHARSET_FAIL;
  256. }
  257. break;
  258. case CURLFNM_SCHS_RIGHTBR:
  259. if(c == '[') {
  260. state = CURLFNM_SCHS_RIGHTBRLEFTBR;
  261. charset[c] = 1;
  262. (*p)++;
  263. }
  264. else if(c == ']') {
  265. return SETCHARSET_OK;
  266. }
  267. else if(c == '\0') {
  268. return SETCHARSET_FAIL;
  269. }
  270. else if(ISPRINT(c)) {
  271. charset[c] = 1;
  272. (*p)++;
  273. state = CURLFNM_SCHS_DEFAULT;
  274. }
  275. else
  276. /* used 'goto fail' instead of 'return SETCHARSET_FAIL' to avoid a
  277. * nonsense warning 'statement not reached' at end of the fnc when
  278. * compiling on Solaris */
  279. goto fail;
  280. break;
  281. case CURLFNM_SCHS_RIGHTBRLEFTBR:
  282. if(c == ']') {
  283. return SETCHARSET_OK;
  284. }
  285. else {
  286. state = CURLFNM_SCHS_DEFAULT;
  287. charset[c] = 1;
  288. (*p)++;
  289. }
  290. break;
  291. }
  292. }
  293. fail:
  294. return SETCHARSET_FAIL;
  295. }
  296. static int loop(const unsigned char *pattern, const unsigned char *string)
  297. {
  298. loop_state state = CURLFNM_LOOP_DEFAULT;
  299. unsigned char *p = (unsigned char *)pattern;
  300. unsigned char *s = (unsigned char *)string;
  301. unsigned char charset[CURLFNM_CHSET_SIZE] = { 0 };
  302. int rc = 0;
  303. for(;;) {
  304. switch(state) {
  305. case CURLFNM_LOOP_DEFAULT:
  306. if(*p == '*') {
  307. while(*(p+1) == '*') /* eliminate multiple stars */
  308. p++;
  309. if(*s == '\0' && *(p+1) == '\0')
  310. return CURL_FNMATCH_MATCH;
  311. rc = loop(p + 1, s); /* *.txt matches .txt <=> .txt matches .txt */
  312. if(rc == CURL_FNMATCH_MATCH)
  313. return CURL_FNMATCH_MATCH;
  314. if(*s) /* let the star eat up one character */
  315. s++;
  316. else
  317. return CURL_FNMATCH_NOMATCH;
  318. }
  319. else if(*p == '?') {
  320. if(ISPRINT(*s)) {
  321. s++;
  322. p++;
  323. }
  324. else if(*s == '\0')
  325. return CURL_FNMATCH_NOMATCH;
  326. else
  327. return CURL_FNMATCH_FAIL; /* cannot deal with other character */
  328. }
  329. else if(*p == '\0') {
  330. if(*s == '\0')
  331. return CURL_FNMATCH_MATCH;
  332. else
  333. return CURL_FNMATCH_NOMATCH;
  334. }
  335. else if(*p == '\\') {
  336. state = CURLFNM_LOOP_BACKSLASH;
  337. p++;
  338. }
  339. else if(*p == '[') {
  340. unsigned char *pp = p+1; /* cannot handle with pointer to register */
  341. if(setcharset(&pp, charset)) {
  342. int found = FALSE;
  343. if(charset[(unsigned int)*s])
  344. found = TRUE;
  345. else if(charset[CURLFNM_ALNUM])
  346. found = ISALNUM(*s);
  347. else if(charset[CURLFNM_ALPHA])
  348. found = ISALPHA(*s);
  349. else if(charset[CURLFNM_DIGIT])
  350. found = ISDIGIT(*s);
  351. else if(charset[CURLFNM_XDIGIT])
  352. found = ISXDIGIT(*s);
  353. else if(charset[CURLFNM_PRINT])
  354. found = ISPRINT(*s);
  355. else if(charset[CURLFNM_SPACE])
  356. found = ISSPACE(*s);
  357. else if(charset[CURLFNM_UPPER])
  358. found = ISUPPER(*s);
  359. else if(charset[CURLFNM_LOWER])
  360. found = ISLOWER(*s);
  361. else if(charset[CURLFNM_BLANK])
  362. found = ISBLANK(*s);
  363. else if(charset[CURLFNM_GRAPH])
  364. found = ISGRAPH(*s);
  365. if(charset[CURLFNM_NEGATE])
  366. found = !found;
  367. if(found) {
  368. p = pp+1;
  369. s++;
  370. memset(charset, 0, CURLFNM_CHSET_SIZE);
  371. }
  372. else
  373. return CURL_FNMATCH_NOMATCH;
  374. }
  375. else
  376. return CURL_FNMATCH_FAIL;
  377. }
  378. else {
  379. if(*p++ != *s++)
  380. return CURL_FNMATCH_NOMATCH;
  381. }
  382. break;
  383. case CURLFNM_LOOP_BACKSLASH:
  384. if(ISPRINT(*p)) {
  385. if(*p++ == *s++)
  386. state = CURLFNM_LOOP_DEFAULT;
  387. else
  388. return CURL_FNMATCH_NOMATCH;
  389. }
  390. else
  391. return CURL_FNMATCH_FAIL;
  392. break;
  393. }
  394. }
  395. }
  396. /*
  397. * @unittest: 1307
  398. */
  399. int Curl_fnmatch(void *ptr, const char *pattern, const char *string)
  400. {
  401. (void)ptr; /* the argument is specified by the curl_fnmatch_callback
  402. prototype, but not used by Curl_fnmatch() */
  403. if(!pattern || !string) {
  404. return CURL_FNMATCH_FAIL;
  405. }
  406. return loop((unsigned char *)pattern, (unsigned char *)string);
  407. }