misc_tests.c 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523
  1. /* Tests in the "miscellaneous" test case for the Expat test suite
  2. __ __ _
  3. ___\ \/ /_ __ __ _| |_
  4. / _ \\ /| '_ \ / _` | __|
  5. | __// \| |_) | (_| | |_
  6. \___/_/\_\ .__/ \__,_|\__|
  7. |_| XML parser
  8. Copyright (c) 2001-2006 Fred L. Drake, Jr. <[email protected]>
  9. Copyright (c) 2003 Greg Stein <[email protected]>
  10. Copyright (c) 2005-2007 Steven Solie <[email protected]>
  11. Copyright (c) 2005-2012 Karl Waclawek <[email protected]>
  12. Copyright (c) 2016-2024 Sebastian Pipping <[email protected]>
  13. Copyright (c) 2017-2022 Rhodri James <[email protected]>
  14. Copyright (c) 2017 Joe Orton <[email protected]>
  15. Copyright (c) 2017 José Gutiérrez de la Concha <[email protected]>
  16. Copyright (c) 2018 Marco Maggi <[email protected]>
  17. Copyright (c) 2019 David Loffredo <[email protected]>
  18. Copyright (c) 2020 Tim Gates <[email protected]>
  19. Copyright (c) 2021 Donghee Na <[email protected]>
  20. Copyright (c) 2023 Sony Corporation / Snild Dolkow <[email protected]>
  21. Licensed under the MIT license:
  22. Permission is hereby granted, free of charge, to any person obtaining
  23. a copy of this software and associated documentation files (the
  24. "Software"), to deal in the Software without restriction, including
  25. without limitation the rights to use, copy, modify, merge, publish,
  26. distribute, sublicense, and/or sell copies of the Software, and to permit
  27. persons to whom the Software is furnished to do so, subject to the
  28. following conditions:
  29. The above copyright notice and this permission notice shall be included
  30. in all copies or substantial portions of the Software.
  31. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  32. EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  33. MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
  34. NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
  35. DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
  36. OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  37. USE OR OTHER DEALINGS IN THE SOFTWARE.
  38. */
  39. #if defined(NDEBUG)
  40. # undef NDEBUG /* because test suite relies on assert(...) at the moment */
  41. #endif
  42. #include <assert.h>
  43. #include <string.h>
  44. #include "expat_config.h"
  45. #include "expat.h"
  46. #include "internal.h"
  47. #include "minicheck.h"
  48. #include "memcheck.h"
  49. #include "common.h"
  50. #include "ascii.h" /* for ASCII_xxx */
  51. #include "handlers.h"
  52. #include "misc_tests.h"
  53. /* Test that a failure to allocate the parser structure fails gracefully */
  54. START_TEST(test_misc_alloc_create_parser) {
  55. XML_Memory_Handling_Suite memsuite = {duff_allocator, realloc, free};
  56. unsigned int i;
  57. const unsigned int max_alloc_count = 10;
  58. /* Something this simple shouldn't need more than 10 allocations */
  59. for (i = 0; i < max_alloc_count; i++) {
  60. g_allocation_count = i;
  61. g_parser = XML_ParserCreate_MM(NULL, &memsuite, NULL);
  62. if (g_parser != NULL)
  63. break;
  64. }
  65. if (i == 0)
  66. fail("Parser unexpectedly ignored failing allocator");
  67. else if (i == max_alloc_count)
  68. fail("Parser not created with max allocation count");
  69. }
  70. END_TEST
  71. /* Test memory allocation failures for a parser with an encoding */
  72. START_TEST(test_misc_alloc_create_parser_with_encoding) {
  73. XML_Memory_Handling_Suite memsuite = {duff_allocator, realloc, free};
  74. unsigned int i;
  75. const unsigned int max_alloc_count = 10;
  76. /* Try several levels of allocation */
  77. for (i = 0; i < max_alloc_count; i++) {
  78. g_allocation_count = i;
  79. g_parser = XML_ParserCreate_MM(XCS("us-ascii"), &memsuite, NULL);
  80. if (g_parser != NULL)
  81. break;
  82. }
  83. if (i == 0)
  84. fail("Parser ignored failing allocator");
  85. else if (i == max_alloc_count)
  86. fail("Parser not created with max allocation count");
  87. }
  88. END_TEST
  89. /* Test that freeing a NULL parser doesn't cause an explosion.
  90. * (Not actually tested anywhere else)
  91. */
  92. START_TEST(test_misc_null_parser) {
  93. XML_ParserFree(NULL);
  94. }
  95. END_TEST
  96. #if defined(__has_feature)
  97. # if __has_feature(undefined_behavior_sanitizer)
  98. # define EXPAT_TESTS_UBSAN 1
  99. # else
  100. # define EXPAT_TESTS_UBSAN 0
  101. # endif
  102. #else
  103. # define EXPAT_TESTS_UBSAN 0
  104. #endif
  105. /* Test that XML_ErrorString rejects out-of-range codes */
  106. START_TEST(test_misc_error_string) {
  107. #if ! EXPAT_TESTS_UBSAN // because this would trigger UBSan
  108. union {
  109. enum XML_Error xml_error;
  110. int integer;
  111. } trickery;
  112. assert_true(sizeof(enum XML_Error) == sizeof(int)); // self-test
  113. trickery.integer = -1;
  114. if (XML_ErrorString(trickery.xml_error) != NULL)
  115. fail("Negative error code not rejected");
  116. trickery.integer = 100;
  117. if (XML_ErrorString(trickery.xml_error) != NULL)
  118. fail("Large error code not rejected");
  119. #endif
  120. }
  121. END_TEST
  122. /* Test the version information is consistent */
  123. /* Since we are working in XML_LChars (potentially 16-bits), we
  124. * can't use the standard C library functions for character
  125. * manipulation and have to roll our own.
  126. */
  127. static int
  128. parse_version(const XML_LChar *version_text,
  129. XML_Expat_Version *version_struct) {
  130. if (! version_text)
  131. return XML_FALSE;
  132. while (*version_text != 0x00) {
  133. if (*version_text >= ASCII_0 && *version_text <= ASCII_9)
  134. break;
  135. version_text++;
  136. }
  137. if (*version_text == 0x00)
  138. return XML_FALSE;
  139. /* version_struct->major = strtoul(version_text, 10, &version_text) */
  140. version_struct->major = 0;
  141. while (*version_text >= ASCII_0 && *version_text <= ASCII_9) {
  142. version_struct->major
  143. = 10 * version_struct->major + (*version_text++ - ASCII_0);
  144. }
  145. if (*version_text++ != ASCII_PERIOD)
  146. return XML_FALSE;
  147. /* Now for the minor version number */
  148. version_struct->minor = 0;
  149. while (*version_text >= ASCII_0 && *version_text <= ASCII_9) {
  150. version_struct->minor
  151. = 10 * version_struct->minor + (*version_text++ - ASCII_0);
  152. }
  153. if (*version_text++ != ASCII_PERIOD)
  154. return XML_FALSE;
  155. /* Finally the micro version number */
  156. version_struct->micro = 0;
  157. while (*version_text >= ASCII_0 && *version_text <= ASCII_9) {
  158. version_struct->micro
  159. = 10 * version_struct->micro + (*version_text++ - ASCII_0);
  160. }
  161. if (*version_text != 0x00)
  162. return XML_FALSE;
  163. return XML_TRUE;
  164. }
  165. static int
  166. versions_equal(const XML_Expat_Version *first,
  167. const XML_Expat_Version *second) {
  168. return (first->major == second->major && first->minor == second->minor
  169. && first->micro == second->micro);
  170. }
  171. START_TEST(test_misc_version) {
  172. XML_Expat_Version read_version = XML_ExpatVersionInfo();
  173. /* Silence compiler warning with the following assignment */
  174. XML_Expat_Version parsed_version = {0, 0, 0};
  175. const XML_LChar *version_text = XML_ExpatVersion();
  176. if (version_text == NULL)
  177. fail("Could not obtain version text");
  178. assert(version_text != NULL);
  179. if (! parse_version(version_text, &parsed_version))
  180. fail("Unable to parse version text");
  181. if (! versions_equal(&read_version, &parsed_version))
  182. fail("Version mismatch");
  183. if (xcstrcmp(version_text, XCS("expat_2.6.1"))) /* needs bump on releases */
  184. fail("XML_*_VERSION in expat.h out of sync?\n");
  185. }
  186. END_TEST
  187. /* Test feature information */
  188. START_TEST(test_misc_features) {
  189. const XML_Feature *features = XML_GetFeatureList();
  190. /* Prevent problems with double-freeing parsers */
  191. g_parser = NULL;
  192. if (features == NULL) {
  193. fail("Failed to get feature information");
  194. } else {
  195. /* Loop through the features checking what we can */
  196. while (features->feature != XML_FEATURE_END) {
  197. switch (features->feature) {
  198. case XML_FEATURE_SIZEOF_XML_CHAR:
  199. if (features->value != sizeof(XML_Char))
  200. fail("Incorrect size of XML_Char");
  201. break;
  202. case XML_FEATURE_SIZEOF_XML_LCHAR:
  203. if (features->value != sizeof(XML_LChar))
  204. fail("Incorrect size of XML_LChar");
  205. break;
  206. default:
  207. break;
  208. }
  209. features++;
  210. }
  211. }
  212. }
  213. END_TEST
  214. /* Regression test for GitHub Issue #17: memory leak parsing attribute
  215. * values with mixed bound and unbound namespaces.
  216. */
  217. START_TEST(test_misc_attribute_leak) {
  218. const char *text = "<D xmlns:L=\"D\" l:a='' L:a=''/>";
  219. XML_Memory_Handling_Suite memsuite
  220. = {tracking_malloc, tracking_realloc, tracking_free};
  221. g_parser = XML_ParserCreate_MM(XCS("UTF-8"), &memsuite, XCS("\n"));
  222. expect_failure(text, XML_ERROR_UNBOUND_PREFIX, "Unbound prefixes not found");
  223. XML_ParserFree(g_parser);
  224. /* Prevent the teardown trying to double free */
  225. g_parser = NULL;
  226. if (! tracking_report())
  227. fail("Memory leak found");
  228. }
  229. END_TEST
  230. /* Test parser created for UTF-16LE is successful */
  231. START_TEST(test_misc_utf16le) {
  232. const char text[] =
  233. /* <?xml version='1.0'?><q>Hi</q> */
  234. "<\0?\0x\0m\0l\0 \0"
  235. "v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0?\0>\0"
  236. "<\0q\0>\0H\0i\0<\0/\0q\0>\0";
  237. const XML_Char *expected = XCS("Hi");
  238. CharData storage;
  239. g_parser = XML_ParserCreate(XCS("UTF-16LE"));
  240. if (g_parser == NULL)
  241. fail("Parser not created");
  242. CharData_Init(&storage);
  243. XML_SetUserData(g_parser, &storage);
  244. XML_SetCharacterDataHandler(g_parser, accumulate_characters);
  245. if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
  246. == XML_STATUS_ERROR)
  247. xml_failure(g_parser);
  248. CharData_CheckXMLChars(&storage, expected);
  249. }
  250. END_TEST
  251. START_TEST(test_misc_stop_during_end_handler_issue_240_1) {
  252. XML_Parser parser;
  253. DataIssue240 *mydata;
  254. enum XML_Status result;
  255. const char *const doc1 = "<doc><e1/><e><foo/></e></doc>";
  256. parser = XML_ParserCreate(NULL);
  257. XML_SetElementHandler(parser, start_element_issue_240, end_element_issue_240);
  258. mydata = (DataIssue240 *)malloc(sizeof(DataIssue240));
  259. mydata->parser = parser;
  260. mydata->deep = 0;
  261. XML_SetUserData(parser, mydata);
  262. result = _XML_Parse_SINGLE_BYTES(parser, doc1, (int)strlen(doc1), 1);
  263. XML_ParserFree(parser);
  264. free(mydata);
  265. if (result != XML_STATUS_ERROR)
  266. fail("Stopping the parser did not work as expected");
  267. }
  268. END_TEST
  269. START_TEST(test_misc_stop_during_end_handler_issue_240_2) {
  270. XML_Parser parser;
  271. DataIssue240 *mydata;
  272. enum XML_Status result;
  273. const char *const doc2 = "<doc><elem/></doc>";
  274. parser = XML_ParserCreate(NULL);
  275. XML_SetElementHandler(parser, start_element_issue_240, end_element_issue_240);
  276. mydata = (DataIssue240 *)malloc(sizeof(DataIssue240));
  277. mydata->parser = parser;
  278. mydata->deep = 0;
  279. XML_SetUserData(parser, mydata);
  280. result = _XML_Parse_SINGLE_BYTES(parser, doc2, (int)strlen(doc2), 1);
  281. XML_ParserFree(parser);
  282. free(mydata);
  283. if (result != XML_STATUS_ERROR)
  284. fail("Stopping the parser did not work as expected");
  285. }
  286. END_TEST
  287. START_TEST(test_misc_deny_internal_entity_closing_doctype_issue_317) {
  288. const char *const inputOne = "<!DOCTYPE d [\n"
  289. "<!ENTITY % e ']><d/>'>\n"
  290. "\n"
  291. "%e;";
  292. const char *const inputTwo = "<!DOCTYPE d [\n"
  293. "<!ENTITY % e1 ']><d/>'><!ENTITY % e2 '&e1;'>\n"
  294. "\n"
  295. "%e2;";
  296. const char *const inputThree = "<!DOCTYPE d [\n"
  297. "<!ENTITY % e ']><d'>\n"
  298. "\n"
  299. "%e;";
  300. const char *const inputIssue317 = "<!DOCTYPE doc [\n"
  301. "<!ENTITY % foo ']>\n"
  302. "<doc>Hell<oc (#PCDATA)*>'>\n"
  303. "%foo;\n"
  304. "]>\n"
  305. "<doc>Hello, world</dVc>";
  306. const char *const inputs[] = {inputOne, inputTwo, inputThree, inputIssue317};
  307. size_t inputIndex = 0;
  308. for (; inputIndex < sizeof(inputs) / sizeof(inputs[0]); inputIndex++) {
  309. set_subtest("%s", inputs[inputIndex]);
  310. XML_Parser parser;
  311. enum XML_Status parseResult;
  312. int setParamEntityResult;
  313. XML_Size lineNumber;
  314. XML_Size columnNumber;
  315. const char *const input = inputs[inputIndex];
  316. parser = XML_ParserCreate(NULL);
  317. setParamEntityResult
  318. = XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
  319. if (setParamEntityResult != 1)
  320. fail("Failed to set XML_PARAM_ENTITY_PARSING_ALWAYS.");
  321. parseResult = _XML_Parse_SINGLE_BYTES(parser, input, (int)strlen(input), 0);
  322. if (parseResult != XML_STATUS_ERROR) {
  323. parseResult = _XML_Parse_SINGLE_BYTES(parser, "", 0, 1);
  324. if (parseResult != XML_STATUS_ERROR) {
  325. fail("Parsing was expected to fail but succeeded.");
  326. }
  327. }
  328. if (XML_GetErrorCode(parser) != XML_ERROR_INVALID_TOKEN)
  329. fail("Error code does not match XML_ERROR_INVALID_TOKEN");
  330. lineNumber = XML_GetCurrentLineNumber(parser);
  331. if (lineNumber != 4)
  332. fail("XML_GetCurrentLineNumber does not work as expected.");
  333. columnNumber = XML_GetCurrentColumnNumber(parser);
  334. if (columnNumber != 0)
  335. fail("XML_GetCurrentColumnNumber does not work as expected.");
  336. XML_ParserFree(parser);
  337. }
  338. }
  339. END_TEST
  340. START_TEST(test_misc_tag_mismatch_reset_leak) {
  341. #ifdef XML_NS
  342. const char *const text = "<open xmlns='https://namespace1.test'></close>";
  343. XML_Parser parser = XML_ParserCreateNS(NULL, XCS('\n'));
  344. if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
  345. != XML_STATUS_ERROR)
  346. fail("Call to parse was expected to fail");
  347. if (XML_GetErrorCode(parser) != XML_ERROR_TAG_MISMATCH)
  348. fail("Call to parse was expected to fail from a closing tag mismatch");
  349. XML_ParserReset(parser, NULL);
  350. if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
  351. != XML_STATUS_ERROR)
  352. fail("Call to parse was expected to fail");
  353. if (XML_GetErrorCode(parser) != XML_ERROR_TAG_MISMATCH)
  354. fail("Call to parse was expected to fail from a closing tag mismatch");
  355. XML_ParserFree(parser);
  356. #endif
  357. }
  358. END_TEST
  359. START_TEST(test_misc_create_external_entity_parser_with_null_context) {
  360. // With XML_DTD undefined, the only supported case of external entities
  361. // is pattern "<!ENTITY entity123 SYSTEM 'filename123'>". A NULL context
  362. // was causing a segfault through a null pointer dereference in function
  363. // setContext, previously.
  364. XML_Parser parser = XML_ParserCreate(NULL);
  365. XML_Parser ext_parser = XML_ExternalEntityParserCreate(parser, NULL, NULL);
  366. #ifdef XML_DTD
  367. assert_true(ext_parser != NULL);
  368. XML_ParserFree(ext_parser);
  369. #else
  370. assert_true(ext_parser == NULL);
  371. #endif /* XML_DTD */
  372. XML_ParserFree(parser);
  373. }
  374. END_TEST
  375. START_TEST(test_misc_general_entities_support) {
  376. const char *const doc
  377. = "<!DOCTYPE r [\n"
  378. "<!ENTITY e1 'v1'>\n"
  379. "<!ENTITY e2 SYSTEM 'v2'>\n"
  380. "]>\n"
  381. "<r a1='[&e1;]'>[&e1;][&e2;][&amp;&apos;&gt;&lt;&quot;]</r>";
  382. CharData storage;
  383. CharData_Init(&storage);
  384. XML_Parser parser = XML_ParserCreate(NULL);
  385. XML_SetUserData(parser, &storage);
  386. XML_SetStartElementHandler(parser, accumulate_start_element);
  387. XML_SetExternalEntityRefHandler(parser,
  388. external_entity_failer__if_not_xml_ge);
  389. XML_SetEntityDeclHandler(parser, accumulate_entity_decl);
  390. XML_SetCharacterDataHandler(parser, accumulate_char_data);
  391. if (_XML_Parse_SINGLE_BYTES(parser, doc, (int)strlen(doc), XML_TRUE)
  392. != XML_STATUS_OK) {
  393. xml_failure(parser);
  394. }
  395. XML_ParserFree(parser);
  396. CharData_CheckXMLChars(&storage,
  397. /* clang-format off */
  398. #if XML_GE == 1
  399. XCS("e1=v1\n")
  400. XCS("e2=(null)\n")
  401. XCS("(r(a1=[v1]))\n")
  402. XCS("[v1][][&'><\"]")
  403. #else
  404. XCS("e1=&amp;e1;\n")
  405. XCS("e2=(null)\n")
  406. XCS("(r(a1=[&e1;]))\n")
  407. XCS("[&e1;][&e2;][&'><\"]")
  408. #endif
  409. );
  410. /* clang-format on */
  411. }
  412. END_TEST
  413. static void XMLCALL
  414. resumable_stopping_character_handler(void *userData, const XML_Char *s,
  415. int len) {
  416. UNUSED_P(s);
  417. UNUSED_P(len);
  418. XML_Parser parser = (XML_Parser)userData;
  419. XML_StopParser(parser, XML_TRUE);
  420. }
  421. // NOTE: This test needs active LeakSanitizer to be of actual use
  422. START_TEST(test_misc_char_handler_stop_without_leak) {
  423. const char *const data
  424. = "<!DOCTYPE t1[<!ENTITY e1 'angle<'><!ENTITY e2 '&e1;'>]><t1>&e2;";
  425. XML_Parser parser = XML_ParserCreate(NULL);
  426. assert_true(parser != NULL);
  427. XML_SetUserData(parser, parser);
  428. XML_SetCharacterDataHandler(parser, resumable_stopping_character_handler);
  429. _XML_Parse_SINGLE_BYTES(parser, data, (int)strlen(data), XML_FALSE);
  430. XML_ParserFree(parser);
  431. }
  432. END_TEST
  433. void
  434. make_miscellaneous_test_case(Suite *s) {
  435. TCase *tc_misc = tcase_create("miscellaneous tests");
  436. suite_add_tcase(s, tc_misc);
  437. tcase_add_checked_fixture(tc_misc, NULL, basic_teardown);
  438. tcase_add_test(tc_misc, test_misc_alloc_create_parser);
  439. tcase_add_test(tc_misc, test_misc_alloc_create_parser_with_encoding);
  440. tcase_add_test(tc_misc, test_misc_null_parser);
  441. tcase_add_test(tc_misc, test_misc_error_string);
  442. tcase_add_test(tc_misc, test_misc_version);
  443. tcase_add_test(tc_misc, test_misc_features);
  444. tcase_add_test(tc_misc, test_misc_attribute_leak);
  445. tcase_add_test(tc_misc, test_misc_utf16le);
  446. tcase_add_test(tc_misc, test_misc_stop_during_end_handler_issue_240_1);
  447. tcase_add_test(tc_misc, test_misc_stop_during_end_handler_issue_240_2);
  448. tcase_add_test__ifdef_xml_dtd(
  449. tc_misc, test_misc_deny_internal_entity_closing_doctype_issue_317);
  450. tcase_add_test(tc_misc, test_misc_tag_mismatch_reset_leak);
  451. tcase_add_test(tc_misc,
  452. test_misc_create_external_entity_parser_with_null_context);
  453. tcase_add_test(tc_misc, test_misc_general_entities_support);
  454. tcase_add_test(tc_misc, test_misc_char_handler_stop_without_leak);
  455. }