basic_tests.c 244 KB


  1. /* Tests in the "basic" test case for the Expat test suite
  2. __ __ _
  3. ___\ \/ /_ __ __ _| |_
  4. / _ \\ /| '_ \ / _` | __|
  5. | __// \| |_) | (_| | |_
  6. \___/_/\_\ .__/ \__,_|\__|
  7. |_| XML parser
  8. Copyright (c) 2001-2006 Fred L. Drake, Jr. <[email protected]>
  9. Copyright (c) 2003 Greg Stein <[email protected]>
  10. Copyright (c) 2005-2007 Steven Solie <[email protected]>
  11. Copyright (c) 2005-2012 Karl Waclawek <[email protected]>
  12. Copyright (c) 2016-2025 Sebastian Pipping <[email protected]>
  13. Copyright (c) 2017-2022 Rhodri James <[email protected]>
  14. Copyright (c) 2017 Joe Orton <[email protected]>
  15. Copyright (c) 2017 José Gutiérrez de la Concha <[email protected]>
  16. Copyright (c) 2018 Marco Maggi <[email protected]>
  17. Copyright (c) 2019 David Loffredo <[email protected]>
  18. Copyright (c) 2020 Tim Gates <[email protected]>
  19. Copyright (c) 2021 Donghee Na <[email protected]>
  20. Copyright (c) 2023-2024 Sony Corporation / Snild Dolkow <[email protected]>
  21. Copyright (c) 2024-2025 Berkay Eren Ürün <[email protected]>
  22. Licensed under the MIT license:
  23. Permission is hereby granted, free of charge, to any person obtaining
  24. a copy of this software and associated documentation files (the
  25. "Software"), to deal in the Software without restriction, including
  26. without limitation the rights to use, copy, modify, merge, publish,
  27. distribute, sublicense, and/or sell copies of the Software, and to permit
  28. persons to whom the Software is furnished to do so, subject to the
  29. following conditions:
  30. The above copyright notice and this permission notice shall be included
  31. in all copies or substantial portions of the Software.
  32. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  33. EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  34. MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
  35. NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
  36. DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
  37. OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  38. USE OR OTHER DEALINGS IN THE SOFTWARE.
  39. */
  40. #if defined(NDEBUG)
  41. # undef NDEBUG /* because test suite relies on assert(...) at the moment */
  42. #endif
  43. #include <assert.h>
  44. #include <stdio.h>
  45. #include <string.h>
  46. #include <time.h>
  47. #if ! defined(__cplusplus)
  48. # include <stdbool.h>
  49. #endif
  50. #include "expat_config.h"
  51. #include "expat.h"
  52. #include "internal.h"
  53. #include "minicheck.h"
  54. #include "structdata.h"
  55. #include "common.h"
  56. #include "dummy.h"
  57. #include "handlers.h"
  58. #include "siphash.h"
  59. #include "basic_tests.h"
  60. static void
  61. basic_setup(void) {
  62. g_parser = XML_ParserCreate(NULL);
  63. if (g_parser == NULL)
  64. fail("Parser not created.");
  65. }
  66. /*
  67. * Character & encoding tests.
  68. */
  69. START_TEST(test_nul_byte) {
  70. char text[] = "<doc>\0</doc>";
  71. /* test that a NUL byte (in US-ASCII data) is an error */
  72. if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE)
  73. == XML_STATUS_OK)
  74. fail("Parser did not report error on NUL-byte.");
  75. if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN)
  76. xml_failure(g_parser);
  77. }
  78. END_TEST
  79. START_TEST(test_u0000_char) {
  80. /* test that a NUL byte (in US-ASCII data) is an error */
  81. expect_failure("<doc>&#0;</doc>", XML_ERROR_BAD_CHAR_REF,
  82. "Parser did not report error on NUL-byte.");
  83. }
  84. END_TEST
  85. START_TEST(test_siphash_self) {
  86. if (! sip24_valid())
  87. fail("SipHash self-test failed");
  88. }
  89. END_TEST
  90. START_TEST(test_siphash_spec) {
  91. /* https://131002.net/siphash/siphash.pdf (page 19, "Test values") */
  92. const char message[] = "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09"
  93. "\x0a\x0b\x0c\x0d\x0e";
  94. const size_t len = sizeof(message) - 1;
  95. const uint64_t expected = SIP_ULL(0xa129ca61U, 0x49be45e5U);
  96. struct siphash state;
  97. struct sipkey key;
  98. sip_tokey(&key, "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09"
  99. "\x0a\x0b\x0c\x0d\x0e\x0f");
  100. sip24_init(&state, &key);
  101. /* Cover spread across calls */
  102. sip24_update(&state, message, 4);
  103. sip24_update(&state, message + 4, len - 4);
  104. /* Cover null length */
  105. sip24_update(&state, message, 0);
  106. if (sip24_final(&state) != expected)
  107. fail("sip24_final failed spec test\n");
  108. /* Cover wrapper */
  109. if (siphash24(message, len, &key) != expected)
  110. fail("siphash24 failed spec test\n");
  111. }
  112. END_TEST
  113. START_TEST(test_bom_utf8) {
  114. /* This test is really just making sure we don't core on a UTF-8 BOM. */
  115. const char *text = "\357\273\277<e/>";
  116. if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
  117. == XML_STATUS_ERROR)
  118. xml_failure(g_parser);
  119. }
  120. END_TEST
  121. START_TEST(test_bom_utf16_be) {
  122. char text[] = "\376\377\0<\0e\0/\0>";
  123. if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE)
  124. == XML_STATUS_ERROR)
  125. xml_failure(g_parser);
  126. }
  127. END_TEST
  128. START_TEST(test_bom_utf16_le) {
  129. char text[] = "\377\376<\0e\0/\0>\0";
  130. if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE)
  131. == XML_STATUS_ERROR)
  132. xml_failure(g_parser);
  133. }
  134. END_TEST
  135. START_TEST(test_nobom_utf16_le) {
  136. char text[] = " \0<\0e\0/\0>\0";
  137. if (g_chunkSize == 1) {
  138. // TODO: with just the first byte, we can't tell the difference between
  139. // UTF-16-LE and UTF-8. Avoid the failure for now.
  140. return;
  141. }
  142. if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE)
  143. == XML_STATUS_ERROR)
  144. xml_failure(g_parser);
  145. }
  146. END_TEST
  147. START_TEST(test_hash_collision) {
  148. /* For full coverage of the lookup routine, we need to ensure a
  149. * hash collision even though we can only tell that we have one
  150. * through breakpoint debugging or coverage statistics. The
  151. * following will cause a hash collision on machines with a 64-bit
  152. * long type; others will have to experiment. The full coverage
  153. * tests invoked from qa.sh usually provide a hash collision, but
  154. * not always. This is an attempt to provide insurance.
  155. */
  156. #define COLLIDING_HASH_SALT (unsigned long)SIP_ULL(0xffffffffU, 0xff99fc90U)
  157. const char *text
  158. = "<doc>\n"
  159. "<a1/><a2/><a3/><a4/><a5/><a6/><a7/><a8/>\n"
  160. "<b1></b1><b2 attr='foo'>This is a foo</b2><b3></b3><b4></b4>\n"
  161. "<b5></b5><b6></b6><b7></b7><b8></b8>\n"
  162. "<c1/><c2/><c3/><c4/><c5/><c6/><c7/><c8/>\n"
  163. "<d1/><d2/><d3/><d4/><d5/><d6/><d7/>\n"
  164. "<d8>This triggers the table growth and collides with b2</d8>\n"
  165. "</doc>\n";
  166. XML_SetHashSalt(g_parser, COLLIDING_HASH_SALT);
  167. if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
  168. == XML_STATUS_ERROR)
  169. xml_failure(g_parser);
  170. }
  171. END_TEST
  172. #undef COLLIDING_HASH_SALT
  173. /* Regression test for SF bug #491986. */
  174. START_TEST(test_danish_latin1) {
  175. const char *text = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
  176. "<e>J\xF8rgen \xE6\xF8\xE5\xC6\xD8\xC5</e>";
  177. #ifdef XML_UNICODE
  178. const XML_Char *expected
  179. = XCS("J\x00f8rgen \x00e6\x00f8\x00e5\x00c6\x00d8\x00c5");
  180. #else
  181. const XML_Char *expected
  182. = XCS("J\xC3\xB8rgen \xC3\xA6\xC3\xB8\xC3\xA5\xC3\x86\xC3\x98\xC3\x85");
  183. #endif
  184. run_character_check(text, expected);
  185. }
  186. END_TEST
  187. /* Regression test for SF bug #514281. */
  188. START_TEST(test_french_charref_hexidecimal) {
  189. const char *text = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
  190. "<doc>&#xE9;&#xE8;&#xE0;&#xE7;&#xEA;&#xC8;</doc>";
  191. #ifdef XML_UNICODE
  192. const XML_Char *expected = XCS("\x00e9\x00e8\x00e0\x00e7\x00ea\x00c8");
  193. #else
  194. const XML_Char *expected
  195. = XCS("\xC3\xA9\xC3\xA8\xC3\xA0\xC3\xA7\xC3\xAA\xC3\x88");
  196. #endif
  197. run_character_check(text, expected);
  198. }
  199. END_TEST
  200. START_TEST(test_french_charref_decimal) {
  201. const char *text = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
  202. "<doc>&#233;&#232;&#224;&#231;&#234;&#200;</doc>";
  203. #ifdef XML_UNICODE
  204. const XML_Char *expected = XCS("\x00e9\x00e8\x00e0\x00e7\x00ea\x00c8");
  205. #else
  206. const XML_Char *expected
  207. = XCS("\xC3\xA9\xC3\xA8\xC3\xA0\xC3\xA7\xC3\xAA\xC3\x88");
  208. #endif
  209. run_character_check(text, expected);
  210. }
  211. END_TEST
  212. START_TEST(test_french_latin1) {
  213. const char *text = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
  214. "<doc>\xE9\xE8\xE0\xE7\xEa\xC8</doc>";
  215. #ifdef XML_UNICODE
  216. const XML_Char *expected = XCS("\x00e9\x00e8\x00e0\x00e7\x00ea\x00c8");
  217. #else
  218. const XML_Char *expected
  219. = XCS("\xC3\xA9\xC3\xA8\xC3\xA0\xC3\xA7\xC3\xAA\xC3\x88");
  220. #endif
  221. run_character_check(text, expected);
  222. }
  223. END_TEST
  224. START_TEST(test_french_utf8) {
  225. const char *text = "<?xml version='1.0' encoding='utf-8'?>\n"
  226. "<doc>\xC3\xA9</doc>";
  227. #ifdef XML_UNICODE
  228. const XML_Char *expected = XCS("\x00e9");
  229. #else
  230. const XML_Char *expected = XCS("\xC3\xA9");
  231. #endif
  232. run_character_check(text, expected);
  233. }
  234. END_TEST
  235. /* Regression test for SF bug #600479.
  236. XXX There should be a test that exercises all legal XML Unicode
  237. characters as PCDATA and attribute value content, and XML Name
  238. characters as part of element and attribute names.
  239. */
  240. START_TEST(test_utf8_false_rejection) {
  241. const char *text = "<doc>\xEF\xBA\xBF</doc>";
  242. #ifdef XML_UNICODE
  243. const XML_Char *expected = XCS("\xfebf");
  244. #else
  245. const XML_Char *expected = XCS("\xEF\xBA\xBF");
  246. #endif
  247. run_character_check(text, expected);
  248. }
  249. END_TEST
  250. /* Regression test for SF bug #477667.
  251. This test assures that any 8-bit character followed by a 7-bit
  252. character will not be mistakenly interpreted as a valid UTF-8
  253. sequence.
  254. */
  255. START_TEST(test_illegal_utf8) {
  256. char text[100];
  257. int i;
  258. for (i = 128; i <= 255; ++i) {
  259. snprintf(text, sizeof(text), "<e>%ccd</e>", i);
  260. if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
  261. == XML_STATUS_OK) {
  262. snprintf(text, sizeof(text),
  263. "expected token error for '%c' (ordinal %d) in UTF-8 text", i,
  264. i);
  265. fail(text);
  266. } else if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN)
  267. xml_failure(g_parser);
  268. /* Reset the parser since we use the same parser repeatedly. */
  269. XML_ParserReset(g_parser, NULL);
  270. }
  271. }
  272. END_TEST
  273. /* Examples, not masks: */
  274. #define UTF8_LEAD_1 "\x7f" /* 0b01111111 */
  275. #define UTF8_LEAD_2 "\xdf" /* 0b11011111 */
  276. #define UTF8_LEAD_3 "\xef" /* 0b11101111 */
  277. #define UTF8_LEAD_4 "\xf7" /* 0b11110111 */
  278. #define UTF8_FOLLOW "\xbf" /* 0b10111111 */
  279. START_TEST(test_utf8_auto_align) {
  280. struct TestCase {
  281. ptrdiff_t expectedMovementInChars;
  282. const char *input;
  283. };
  284. struct TestCase cases[] = {
  285. {00, ""},
  286. {00, UTF8_LEAD_1},
  287. {-1, UTF8_LEAD_2},
  288. {00, UTF8_LEAD_2 UTF8_FOLLOW},
  289. {-1, UTF8_LEAD_3},
  290. {-2, UTF8_LEAD_3 UTF8_FOLLOW},
  291. {00, UTF8_LEAD_3 UTF8_FOLLOW UTF8_FOLLOW},
  292. {-1, UTF8_LEAD_4},
  293. {-2, UTF8_LEAD_4 UTF8_FOLLOW},
  294. {-3, UTF8_LEAD_4 UTF8_FOLLOW UTF8_FOLLOW},
  295. {00, UTF8_LEAD_4 UTF8_FOLLOW UTF8_FOLLOW UTF8_FOLLOW},
  296. };
  297. size_t i = 0;
  298. bool success = true;
  299. for (; i < sizeof(cases) / sizeof(*cases); i++) {
  300. const char *fromLim = cases[i].input + strlen(cases[i].input);
  301. const char *const fromLimInitially = fromLim;
  302. ptrdiff_t actualMovementInChars;
  303. _INTERNAL_trim_to_complete_utf8_characters(cases[i].input, &fromLim);
  304. actualMovementInChars = (fromLim - fromLimInitially);
  305. if (actualMovementInChars != cases[i].expectedMovementInChars) {
  306. size_t j = 0;
  307. success = false;
  308. printf("[-] UTF-8 case %2u: Expected movement by %2d chars"
  309. ", actually moved by %2d chars: \"",
  310. (unsigned)(i + 1), (int)cases[i].expectedMovementInChars,
  311. (int)actualMovementInChars);
  312. for (; j < strlen(cases[i].input); j++) {
  313. printf("\\x%02x", (unsigned char)cases[i].input[j]);
  314. }
  315. printf("\"\n");
  316. }
  317. }
  318. if (! success) {
  319. fail("UTF-8 auto-alignment is not bullet-proof\n");
  320. }
  321. }
  322. END_TEST
  323. START_TEST(test_utf16) {
  324. /* <?xml version="1.0" encoding="UTF-16"?>
  325. * <doc a='123'>some {A} text</doc>
  326. *
  327. * where {A} is U+FF21, FULLWIDTH LATIN CAPITAL LETTER A
  328. */
  329. char text[]
  330. = "\000<\000?\000x\000m\000\154\000 \000v\000e\000r\000s\000i\000o"
  331. "\000n\000=\000'\0001\000.\000\060\000'\000 \000e\000n\000c\000o"
  332. "\000d\000i\000n\000g\000=\000'\000U\000T\000F\000-\0001\000\066"
  333. "\000'\000?\000>\000\n"
  334. "\000<\000d\000o\000c\000 \000a\000=\000'\0001\0002\0003\000'\000>"
  335. "\000s\000o\000m\000e\000 \xff\x21\000 \000t\000e\000x\000t\000"
  336. "<\000/\000d\000o\000c\000>";
  337. #ifdef XML_UNICODE
  338. const XML_Char *expected = XCS("some \xff21 text");
  339. #else
  340. const XML_Char *expected = XCS("some \357\274\241 text");
  341. #endif
  342. CharData storage;
  343. CharData_Init(&storage);
  344. XML_SetUserData(g_parser, &storage);
  345. XML_SetCharacterDataHandler(g_parser, accumulate_characters);
  346. if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE)
  347. == XML_STATUS_ERROR)
  348. xml_failure(g_parser);
  349. CharData_CheckXMLChars(&storage, expected);
  350. }
  351. END_TEST
  352. START_TEST(test_utf16_le_epilog_newline) {
  353. unsigned int first_chunk_bytes = 17;
  354. char text[] = "\xFF\xFE" /* BOM */
  355. "<\000e\000/\000>\000" /* document element */
  356. "\r\000\n\000\r\000\n\000"; /* epilog */
  357. if (first_chunk_bytes >= sizeof(text) - 1)
  358. fail("bad value of first_chunk_bytes");
  359. if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)first_chunk_bytes, XML_FALSE)
  360. == XML_STATUS_ERROR)
  361. xml_failure(g_parser);
  362. else {
  363. enum XML_Status rc;
  364. rc = _XML_Parse_SINGLE_BYTES(g_parser, text + first_chunk_bytes,
  365. (int)(sizeof(text) - first_chunk_bytes - 1),
  366. XML_TRUE);
  367. if (rc == XML_STATUS_ERROR)
  368. xml_failure(g_parser);
  369. }
  370. }
  371. END_TEST
  372. /* Test that an outright lie in the encoding is faulted */
  373. START_TEST(test_not_utf16) {
  374. const char *text = "<?xml version='1.0' encoding='utf-16'?>"
  375. "<doc>Hi</doc>";
  376. /* Use a handler to provoke the appropriate code paths */
  377. XML_SetXmlDeclHandler(g_parser, dummy_xdecl_handler);
  378. expect_failure(text, XML_ERROR_INCORRECT_ENCODING,
  379. "UTF-16 declared in UTF-8 not faulted");
  380. }
  381. END_TEST
  382. /* Test that an unknown encoding is rejected */
  383. START_TEST(test_bad_encoding) {
  384. const char *text = "<doc>Hi</doc>";
  385. if (! XML_SetEncoding(g_parser, XCS("unknown-encoding")))
  386. fail("XML_SetEncoding failed");
  387. expect_failure(text, XML_ERROR_UNKNOWN_ENCODING,
  388. "Unknown encoding not faulted");
  389. }
  390. END_TEST
  391. /* Regression test for SF bug #481609, #774028. */
  392. START_TEST(test_latin1_umlauts) {
  393. const char *text
  394. = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
  395. "<e a='\xE4 \xF6 \xFC &#228; &#246; &#252; &#x00E4; &#x0F6; &#xFC; >'\n"
  396. " >\xE4 \xF6 \xFC &#228; &#246; &#252; &#x00E4; &#x0F6; &#xFC; ></e>";
  397. #ifdef XML_UNICODE
  398. /* Expected results in UTF-16 */
  399. const XML_Char *expected = XCS("\x00e4 \x00f6 \x00fc ")
  400. XCS("\x00e4 \x00f6 \x00fc ") XCS("\x00e4 \x00f6 \x00fc >");
  401. #else
  402. /* Expected results in UTF-8 */
  403. const XML_Char *expected = XCS("\xC3\xA4 \xC3\xB6 \xC3\xBC ")
  404. XCS("\xC3\xA4 \xC3\xB6 \xC3\xBC ") XCS("\xC3\xA4 \xC3\xB6 \xC3\xBC >");
  405. #endif
  406. run_character_check(text, expected);
  407. XML_ParserReset(g_parser, NULL);
  408. run_attribute_check(text, expected);
  409. /* Repeat with a default handler */
  410. XML_ParserReset(g_parser, NULL);
  411. XML_SetDefaultHandler(g_parser, dummy_default_handler);
  412. run_character_check(text, expected);
  413. XML_ParserReset(g_parser, NULL);
  414. XML_SetDefaultHandler(g_parser, dummy_default_handler);
  415. run_attribute_check(text, expected);
  416. }
  417. END_TEST
  418. /* Test that an element name with a 4-byte UTF-8 character is rejected */
  419. START_TEST(test_long_utf8_character) {
  420. const char *text
  421. = "<?xml version='1.0' encoding='utf-8'?>\n"
  422. /* 0xf0 0x90 0x80 0x80 = U+10000, the first Linear B character */
  423. "<do\xf0\x90\x80\x80/>";
  424. expect_failure(text, XML_ERROR_INVALID_TOKEN,
  425. "4-byte UTF-8 character in element name not faulted");
  426. }
  427. END_TEST
  428. /* Test that a long latin-1 attribute (too long to convert in one go)
  429. * is correctly converted
  430. */
  431. START_TEST(test_long_latin1_attribute) {
  432. const char *text
  433. = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
  434. "<doc att='"
  435. /* 64 characters per line */
  436. "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
  437. "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
  438. "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
  439. "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
  440. "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
  441. "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
  442. "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
  443. "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
  444. "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
  445. "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
  446. "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
  447. "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
  448. "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
  449. "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
  450. "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
  451. "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNO"
  452. /* Last character splits across a buffer boundary */
  453. "\xe4'>\n</doc>";
  454. const XML_Char *expected =
  455. /* 64 characters per line */
  456. /* clang-format off */
  457. XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
  458. XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
  459. XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
  460. XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
  461. XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
  462. XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
  463. XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
  464. XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
  465. XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
  466. XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
  467. XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
  468. XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
  469. XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
  470. XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
  471. XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
  472. XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNO")
  473. /* clang-format on */
  474. #ifdef XML_UNICODE
  475. XCS("\x00e4");
  476. #else
  477. XCS("\xc3\xa4");
  478. #endif
  479. run_attribute_check(text, expected);
  480. }
  481. END_TEST
  482. /* Test that a long ASCII attribute (too long to convert in one go)
  483. * is correctly converted
  484. */
  485. START_TEST(test_long_ascii_attribute) {
  486. const char *text
  487. = "<?xml version='1.0' encoding='us-ascii'?>\n"
  488. "<doc att='"
  489. /* 64 characters per line */
  490. "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
  491. "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
  492. "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
  493. "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
  494. "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
  495. "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
  496. "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
  497. "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
  498. "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
  499. "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
  500. "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
  501. "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
  502. "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
  503. "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
  504. "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
  505. "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
  506. "01234'>\n</doc>";
  507. const XML_Char *expected =
  508. /* 64 characters per line */
  509. /* clang-format off */
  510. XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
  511. XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
  512. XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
  513. XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
  514. XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
  515. XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
  516. XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
  517. XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
  518. XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
  519. XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
  520. XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
  521. XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
  522. XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
  523. XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
  524. XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
  525. XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
  526. XCS("01234");
  527. /* clang-format on */
  528. run_attribute_check(text, expected);
  529. }
  530. END_TEST
  531. /* Regression test #1 for SF bug #653180. */
  532. START_TEST(test_line_number_after_parse) {
  533. const char *text = "<tag>\n"
  534. "\n"
  535. "\n</tag>";
  536. XML_Size lineno;
  537. if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
  538. == XML_STATUS_ERROR)
  539. xml_failure(g_parser);
  540. lineno = XML_GetCurrentLineNumber(g_parser);
  541. if (lineno != 4) {
  542. char buffer[100];
  543. snprintf(buffer, sizeof(buffer),
  544. "expected 4 lines, saw %" XML_FMT_INT_MOD "u", lineno);
  545. fail(buffer);
  546. }
  547. }
  548. END_TEST
  549. /* Regression test #2 for SF bug #653180. */
  550. START_TEST(test_column_number_after_parse) {
  551. const char *text = "<tag></tag>";
  552. XML_Size colno;
  553. if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
  554. == XML_STATUS_ERROR)
  555. xml_failure(g_parser);
  556. colno = XML_GetCurrentColumnNumber(g_parser);
  557. if (colno != 11) {
  558. char buffer[100];
  559. snprintf(buffer, sizeof(buffer),
  560. "expected 11 columns, saw %" XML_FMT_INT_MOD "u", colno);
  561. fail(buffer);
  562. }
  563. }
  564. END_TEST
  565. /* Regression test #3 for SF bug #653180. */
  566. START_TEST(test_line_and_column_numbers_inside_handlers) {
  567. const char *text = "<a>\n" /* Unix end-of-line */
  568. " <b>\r\n" /* Windows end-of-line */
  569. " <c/>\r" /* Mac OS end-of-line */
  570. " </b>\n"
  571. " <d>\n"
  572. " <f/>\n"
  573. " </d>\n"
  574. "</a>";
  575. const StructDataEntry expected[]
  576. = {{XCS("a"), 0, 1, STRUCT_START_TAG}, {XCS("b"), 2, 2, STRUCT_START_TAG},
  577. {XCS("c"), 4, 3, STRUCT_START_TAG}, {XCS("c"), 8, 3, STRUCT_END_TAG},
  578. {XCS("b"), 2, 4, STRUCT_END_TAG}, {XCS("d"), 2, 5, STRUCT_START_TAG},
  579. {XCS("f"), 4, 6, STRUCT_START_TAG}, {XCS("f"), 8, 6, STRUCT_END_TAG},
  580. {XCS("d"), 2, 7, STRUCT_END_TAG}, {XCS("a"), 0, 8, STRUCT_END_TAG}};
  581. const int expected_count = sizeof(expected) / sizeof(StructDataEntry);
  582. StructData storage;
  583. StructData_Init(&storage);
  584. XML_SetUserData(g_parser, &storage);
  585. XML_SetStartElementHandler(g_parser, start_element_event_handler2);
  586. XML_SetEndElementHandler(g_parser, end_element_event_handler2);
  587. if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
  588. == XML_STATUS_ERROR)
  589. xml_failure(g_parser);
  590. StructData_CheckItems(&storage, expected, expected_count);
  591. StructData_Dispose(&storage);
  592. }
  593. END_TEST
  594. /* Regression test #4 for SF bug #653180. */
  595. START_TEST(test_line_number_after_error) {
  596. const char *text = "<a>\n"
  597. " <b>\n"
  598. " </a>"; /* missing </b> */
  599. XML_Size lineno;
  600. if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
  601. != XML_STATUS_ERROR)
  602. fail("Expected a parse error");
  603. lineno = XML_GetCurrentLineNumber(g_parser);
  604. if (lineno != 3) {
  605. char buffer[100];
  606. snprintf(buffer, sizeof(buffer),
  607. "expected 3 lines, saw %" XML_FMT_INT_MOD "u", lineno);
  608. fail(buffer);
  609. }
  610. }
  611. END_TEST
  612. /* Regression test #5 for SF bug #653180. */
  613. START_TEST(test_column_number_after_error) {
  614. const char *text = "<a>\n"
  615. " <b>\n"
  616. " </a>"; /* missing </b> */
  617. XML_Size colno;
  618. if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
  619. != XML_STATUS_ERROR)
  620. fail("Expected a parse error");
  621. colno = XML_GetCurrentColumnNumber(g_parser);
  622. if (colno != 4) {
  623. char buffer[100];
  624. snprintf(buffer, sizeof(buffer),
  625. "expected 4 columns, saw %" XML_FMT_INT_MOD "u", colno);
  626. fail(buffer);
  627. }
  628. }
  629. END_TEST
  630. /* Regression test for SF bug #478332. */
  631. START_TEST(test_really_long_lines) {
  632. /* This parses an input line longer than INIT_DATA_BUF_SIZE
  633. characters long (defined to be 1024 in xmlparse.c). We take a
  634. really cheesy approach to building the input buffer, because
  635. this avoids writing bugs in buffer-filling code.
  636. */
  637. const char *text
  638. = "<e>"
  639. /* 64 chars */
  640. "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
  641. /* until we have at least 1024 characters on the line: */
  642. "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
  643. "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
  644. "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
  645. "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
  646. "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
  647. "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
  648. "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
  649. "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
  650. "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
  651. "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
  652. "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
  653. "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
  654. "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
  655. "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
  656. "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
  657. "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
  658. "</e>";
  659. if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
  660. == XML_STATUS_ERROR)
  661. xml_failure(g_parser);
  662. }
  663. END_TEST
  664. /* Test cdata processing across a buffer boundary */
  665. START_TEST(test_really_long_encoded_lines) {
  666. /* As above, except that we want to provoke an output buffer
  667. * overflow with a non-trivial encoding. For this we need to pass
  668. * the whole cdata in one go, not byte-by-byte.
  669. */
  670. void *buffer;
  671. const char *text
  672. = "<?xml version='1.0' encoding='iso-8859-1'?>"
  673. "<e>"
  674. /* 64 chars */
  675. "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
  676. /* until we have at least 1024 characters on the line: */
  677. "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
  678. "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
  679. "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
  680. "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
  681. "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
  682. "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
  683. "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
  684. "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
  685. "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
  686. "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
  687. "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
  688. "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
  689. "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
  690. "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
  691. "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
  692. "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
  693. "</e>";
  694. int parse_len = (int)strlen(text);
  695. /* Need a cdata handler to provoke the code path we want to test */
  696. XML_SetCharacterDataHandler(g_parser, dummy_cdata_handler);
  697. buffer = XML_GetBuffer(g_parser, parse_len);
  698. if (buffer == NULL)
  699. fail("Could not allocate parse buffer");
  700. assert(buffer != NULL);
  701. memcpy(buffer, text, parse_len);
  702. if (XML_ParseBuffer(g_parser, parse_len, XML_TRUE) == XML_STATUS_ERROR)
  703. xml_failure(g_parser);
  704. }
  705. END_TEST
  706. /*
  707. * Element event tests.
  708. */
  709. START_TEST(test_end_element_events) {
  710. const char *text = "<a><b><c/></b><d><f/></d></a>";
  711. const XML_Char *expected = XCS("/c/b/f/d/a");
  712. CharData storage;
  713. CharData_Init(&storage);
  714. XML_SetUserData(g_parser, &storage);
  715. XML_SetEndElementHandler(g_parser, end_element_event_handler);
  716. if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
  717. == XML_STATUS_ERROR)
  718. xml_failure(g_parser);
  719. CharData_CheckXMLChars(&storage, expected);
  720. }
  721. END_TEST
  722. /*
  723. * Attribute tests.
  724. */
  725. /* Helper used by the following tests; this checks any "attr" and "refs"
  726. attributes to make sure whitespace has been normalized.
  727. Return true if whitespace has been normalized in a string, using
  728. the rules for attribute value normalization. The 'is_cdata' flag
  729. is needed since CDATA attributes don't need to have multiple
  730. whitespace characters collapsed to a single space, while other
  731. attribute data types do. (Section 3.3.3 of the recommendation.)
  732. */
  733. static int
  734. is_whitespace_normalized(const XML_Char *s, int is_cdata) {
  735. int blanks = 0;
  736. int at_start = 1;
  737. while (*s) {
  738. if (*s == XCS(' '))
  739. ++blanks;
  740. else if (*s == XCS('\t') || *s == XCS('\n') || *s == XCS('\r'))
  741. return 0;
  742. else {
  743. if (at_start) {
  744. at_start = 0;
  745. if (blanks && ! is_cdata)
  746. /* illegal leading blanks */
  747. return 0;
  748. } else if (blanks > 1 && ! is_cdata)
  749. return 0;
  750. blanks = 0;
  751. }
  752. ++s;
  753. }
  754. if (blanks && ! is_cdata)
  755. return 0;
  756. return 1;
  757. }
  758. /* Check the attribute whitespace checker: */
  759. START_TEST(test_helper_is_whitespace_normalized) {
  760. assert(is_whitespace_normalized(XCS("abc"), 0));
  761. assert(is_whitespace_normalized(XCS("abc"), 1));
  762. assert(is_whitespace_normalized(XCS("abc def ghi"), 0));
  763. assert(is_whitespace_normalized(XCS("abc def ghi"), 1));
  764. assert(! is_whitespace_normalized(XCS(" abc def ghi"), 0));
  765. assert(is_whitespace_normalized(XCS(" abc def ghi"), 1));
  766. assert(! is_whitespace_normalized(XCS("abc def ghi"), 0));
  767. assert(is_whitespace_normalized(XCS("abc def ghi"), 1));
  768. assert(! is_whitespace_normalized(XCS("abc def ghi "), 0));
  769. assert(is_whitespace_normalized(XCS("abc def ghi "), 1));
  770. assert(! is_whitespace_normalized(XCS(" "), 0));
  771. assert(is_whitespace_normalized(XCS(" "), 1));
  772. assert(! is_whitespace_normalized(XCS("\t"), 0));
  773. assert(! is_whitespace_normalized(XCS("\t"), 1));
  774. assert(! is_whitespace_normalized(XCS("\n"), 0));
  775. assert(! is_whitespace_normalized(XCS("\n"), 1));
  776. assert(! is_whitespace_normalized(XCS("\r"), 0));
  777. assert(! is_whitespace_normalized(XCS("\r"), 1));
  778. assert(! is_whitespace_normalized(XCS("abc\t def"), 1));
  779. }
  780. END_TEST
  781. static void XMLCALL
  782. check_attr_contains_normalized_whitespace(void *userData, const XML_Char *name,
  783. const XML_Char **atts) {
  784. int i;
  785. UNUSED_P(userData);
  786. UNUSED_P(name);
  787. for (i = 0; atts[i] != NULL; i += 2) {
  788. const XML_Char *attrname = atts[i];
  789. const XML_Char *value = atts[i + 1];
  790. if (xcstrcmp(XCS("attr"), attrname) == 0
  791. || xcstrcmp(XCS("ents"), attrname) == 0
  792. || xcstrcmp(XCS("refs"), attrname) == 0) {
  793. if (! is_whitespace_normalized(value, 0)) {
  794. char buffer[256];
  795. snprintf(buffer, sizeof(buffer),
  796. "attribute value not normalized: %" XML_FMT_STR
  797. "='%" XML_FMT_STR "'",
  798. attrname, value);
  799. fail(buffer);
  800. }
  801. }
  802. }
  803. }
  804. START_TEST(test_attr_whitespace_normalization) {
  805. const char *text
  806. = "<!DOCTYPE doc [\n"
  807. " <!ATTLIST doc\n"
  808. " attr NMTOKENS #REQUIRED\n"
  809. " ents ENTITIES #REQUIRED\n"
  810. " refs IDREFS #REQUIRED>\n"
  811. "]>\n"
  812. "<doc attr=' a b c\t\td\te\t' refs=' id-1 \t id-2\t\t' \n"
  813. " ents=' ent-1 \t\r\n"
  814. " ent-2 ' >\n"
  815. " <e id='id-1'/>\n"
  816. " <e id='id-2'/>\n"
  817. "</doc>";
  818. XML_SetStartElementHandler(g_parser,
  819. check_attr_contains_normalized_whitespace);
  820. if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
  821. == XML_STATUS_ERROR)
  822. xml_failure(g_parser);
  823. }
  824. END_TEST
  825. /*
  826. * XML declaration tests.
  827. */
  828. START_TEST(test_xmldecl_misplaced) {
  829. expect_failure("\n"
  830. "<?xml version='1.0'?>\n"
  831. "<a/>",
  832. XML_ERROR_MISPLACED_XML_PI,
  833. "failed to report misplaced XML declaration");
  834. }
  835. END_TEST
  836. START_TEST(test_xmldecl_invalid) {
  837. expect_failure("<?xml version='1.0' \xc3\xa7?>\n<doc/>", XML_ERROR_XML_DECL,
  838. "Failed to report invalid XML declaration");
  839. }
  840. END_TEST
  841. START_TEST(test_xmldecl_missing_attr) {
  842. expect_failure("<?xml ='1.0'?>\n<doc/>\n", XML_ERROR_XML_DECL,
  843. "Failed to report missing XML declaration attribute");
  844. }
  845. END_TEST
  846. START_TEST(test_xmldecl_missing_value) {
  847. expect_failure("<?xml version='1.0' encoding='us-ascii' standalone?>\n"
  848. "<doc/>",
  849. XML_ERROR_XML_DECL,
  850. "Failed to report missing attribute value");
  851. }
  852. END_TEST
  853. /* Regression test for SF bug #584832. */
  854. START_TEST(test_unknown_encoding_internal_entity) {
  855. const char *text = "<?xml version='1.0' encoding='unsupported-encoding'?>\n"
  856. "<!DOCTYPE test [<!ENTITY foo 'bar'>]>\n"
  857. "<test a='&foo;'/>";
  858. XML_SetUnknownEncodingHandler(g_parser, UnknownEncodingHandler, NULL);
  859. if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
  860. == XML_STATUS_ERROR)
  861. xml_failure(g_parser);
  862. }
  863. END_TEST
  864. /* Test unrecognised encoding handler */
  865. START_TEST(test_unrecognised_encoding_internal_entity) {
  866. const char *text = "<?xml version='1.0' encoding='unsupported-encoding'?>\n"
  867. "<!DOCTYPE test [<!ENTITY foo 'bar'>]>\n"
  868. "<test a='&foo;'/>";
  869. XML_SetUnknownEncodingHandler(g_parser, UnrecognisedEncodingHandler, NULL);
  870. if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
  871. != XML_STATUS_ERROR)
  872. fail("Unrecognised encoding not rejected");
  873. }
  874. END_TEST
  875. /* Regression test for SF bug #620106. */
  876. START_TEST(test_ext_entity_set_encoding) {
  877. const char *text = "<!DOCTYPE doc [\n"
  878. " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
  879. "]>\n"
  880. "<doc>&en;</doc>";
  881. ExtTest test_data
  882. = {/* This text says it's an unsupported encoding, but it's really
  883. UTF-8, which we tell Expat using XML_SetEncoding().
  884. */
  885. "<?xml encoding='iso-8859-3'?>\xC3\xA9", XCS("utf-8"), NULL};
  886. #ifdef XML_UNICODE
  887. const XML_Char *expected = XCS("\x00e9");
  888. #else
  889. const XML_Char *expected = XCS("\xc3\xa9");
  890. #endif
  891. XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
  892. run_ext_character_check(text, &test_data, expected);
  893. }
  894. END_TEST
  895. /* Test external entities with no handler */
  896. START_TEST(test_ext_entity_no_handler) {
  897. const char *text = "<!DOCTYPE doc [\n"
  898. " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
  899. "]>\n"
  900. "<doc>&en;</doc>";
  901. XML_SetDefaultHandler(g_parser, dummy_default_handler);
  902. run_character_check(text, XCS(""));
  903. }
  904. END_TEST
  905. /* Test UTF-8 BOM is accepted */
  906. START_TEST(test_ext_entity_set_bom) {
  907. const char *text = "<!DOCTYPE doc [\n"
  908. " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
  909. "]>\n"
  910. "<doc>&en;</doc>";
  911. ExtTest test_data = {"\xEF\xBB\xBF" /* BOM */
  912. "<?xml encoding='iso-8859-3'?>"
  913. "\xC3\xA9",
  914. XCS("utf-8"), NULL};
  915. #ifdef XML_UNICODE
  916. const XML_Char *expected = XCS("\x00e9");
  917. #else
  918. const XML_Char *expected = XCS("\xc3\xa9");
  919. #endif
  920. XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
  921. run_ext_character_check(text, &test_data, expected);
  922. }
  923. END_TEST
  924. /* Test that bad encodings are faulted */
  925. START_TEST(test_ext_entity_bad_encoding) {
  926. const char *text = "<!DOCTYPE doc [\n"
  927. " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
  928. "]>\n"
  929. "<doc>&en;</doc>";
  930. ExtFaults fault
  931. = {"<?xml encoding='iso-8859-3'?>u", "Unsupported encoding not faulted",
  932. XCS("unknown"), XML_ERROR_UNKNOWN_ENCODING};
  933. XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
  934. XML_SetUserData(g_parser, &fault);
  935. expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
  936. "Bad encoding should not have been accepted");
  937. }
  938. END_TEST
  939. /* Try handing an invalid encoding to an external entity parser */
  940. START_TEST(test_ext_entity_bad_encoding_2) {
  941. const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
  942. "<!DOCTYPE doc SYSTEM 'foo'>\n"
  943. "<doc>&entity;</doc>";
  944. ExtFaults fault
  945. = {"<!ELEMENT doc (#PCDATA)*>", "Unknown encoding not faulted",
  946. XCS("unknown-encoding"), XML_ERROR_UNKNOWN_ENCODING};
  947. XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
  948. XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
  949. XML_SetUserData(g_parser, &fault);
  950. expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
  951. "Bad encoding not faulted in external entity handler");
  952. }
  953. END_TEST
  954. /* Test that no error is reported for unknown entities if we don't
  955. read an external subset. This was fixed in Expat 1.95.5.
  956. */
  957. START_TEST(test_wfc_undeclared_entity_unread_external_subset) {
  958. const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n"
  959. "<doc>&entity;</doc>";
  960. if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
  961. == XML_STATUS_ERROR)
  962. xml_failure(g_parser);
  963. }
  964. END_TEST
  965. /* Test that an error is reported for unknown entities if we don't
  966. have an external subset.
  967. */
  968. START_TEST(test_wfc_undeclared_entity_no_external_subset) {
  969. expect_failure("<doc>&entity;</doc>", XML_ERROR_UNDEFINED_ENTITY,
  970. "Parser did not report undefined entity w/out a DTD.");
  971. }
  972. END_TEST
  973. /* Test that an error is reported for unknown entities if we don't
  974. read an external subset, but have been declared standalone.
  975. */
  976. START_TEST(test_wfc_undeclared_entity_standalone) {
  977. const char *text
  978. = "<?xml version='1.0' encoding='us-ascii' standalone='yes'?>\n"
  979. "<!DOCTYPE doc SYSTEM 'foo'>\n"
  980. "<doc>&entity;</doc>";
  981. expect_failure(text, XML_ERROR_UNDEFINED_ENTITY,
  982. "Parser did not report undefined entity (standalone).");
  983. }
  984. END_TEST
  985. /* Test that an error is reported for unknown entities if we have read
  986. an external subset, and standalone is true.
  987. */
  988. START_TEST(test_wfc_undeclared_entity_with_external_subset_standalone) {
  989. const char *text
  990. = "<?xml version='1.0' encoding='us-ascii' standalone='yes'?>\n"
  991. "<!DOCTYPE doc SYSTEM 'foo'>\n"
  992. "<doc>&entity;</doc>";
  993. ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
  994. XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
  995. XML_SetUserData(g_parser, &test_data);
  996. XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
  997. expect_failure(text, XML_ERROR_UNDEFINED_ENTITY,
  998. "Parser did not report undefined entity (external DTD).");
  999. }
  1000. END_TEST
  1001. /* Test that external entity handling is not done if the parsing flag
  1002. * is set to UNLESS_STANDALONE
  1003. */
  1004. START_TEST(test_entity_with_external_subset_unless_standalone) {
  1005. const char *text
  1006. = "<?xml version='1.0' encoding='us-ascii' standalone='yes'?>\n"
  1007. "<!DOCTYPE doc SYSTEM 'foo'>\n"
  1008. "<doc>&entity;</doc>";
  1009. ExtTest test_data = {"<!ENTITY entity 'bar'>", NULL, NULL};
  1010. XML_SetParamEntityParsing(g_parser,
  1011. XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE);
  1012. XML_SetUserData(g_parser, &test_data);
  1013. XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
  1014. expect_failure(text, XML_ERROR_UNDEFINED_ENTITY,
  1015. "Parser did not report undefined entity");
  1016. }
  1017. END_TEST
  1018. /* Test that no error is reported for unknown entities if we have read
  1019. an external subset, and standalone is false.
  1020. */
  1021. START_TEST(test_wfc_undeclared_entity_with_external_subset) {
  1022. const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
  1023. "<!DOCTYPE doc SYSTEM 'foo'>\n"
  1024. "<doc>&entity;</doc>";
  1025. ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
  1026. XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
  1027. XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
  1028. run_ext_character_check(text, &test_data, XCS(""));
  1029. }
  1030. END_TEST
  1031. /* Test that an error is reported if our NotStandalone handler fails */
  1032. START_TEST(test_not_standalone_handler_reject) {
  1033. const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
  1034. "<!DOCTYPE doc SYSTEM 'foo'>\n"
  1035. "<doc>&entity;</doc>";
  1036. ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
  1037. XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
  1038. XML_SetUserData(g_parser, &test_data);
  1039. XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
  1040. XML_SetNotStandaloneHandler(g_parser, reject_not_standalone_handler);
  1041. expect_failure(text, XML_ERROR_NOT_STANDALONE,
  1042. "NotStandalone handler failed to reject");
  1043. /* Try again but without external entity handling */
  1044. XML_ParserReset(g_parser, NULL);
  1045. XML_SetNotStandaloneHandler(g_parser, reject_not_standalone_handler);
  1046. expect_failure(text, XML_ERROR_NOT_STANDALONE,
  1047. "NotStandalone handler failed to reject");
  1048. }
  1049. END_TEST
  1050. /* Test that no error is reported if our NotStandalone handler succeeds */
  1051. START_TEST(test_not_standalone_handler_accept) {
  1052. const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
  1053. "<!DOCTYPE doc SYSTEM 'foo'>\n"
  1054. "<doc>&entity;</doc>";
  1055. ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
  1056. XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
  1057. XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
  1058. XML_SetNotStandaloneHandler(g_parser, accept_not_standalone_handler);
  1059. run_ext_character_check(text, &test_data, XCS(""));
  1060. /* Repeat without the external entity handler */
  1061. XML_ParserReset(g_parser, NULL);
  1062. XML_SetNotStandaloneHandler(g_parser, accept_not_standalone_handler);
  1063. run_character_check(text, XCS(""));
  1064. }
  1065. END_TEST
  1066. START_TEST(test_entity_start_tag_level_greater_than_one) {
  1067. const char *const text = "<!DOCTYPE t1 [\n"
  1068. " <!ENTITY e1 'hello'>\n"
  1069. "]>\n"
  1070. "<t1>\n"
  1071. " <t2>&e1;</t2>\n"
  1072. "</t1>\n";
  1073. XML_Parser parser = XML_ParserCreate(NULL);
  1074. assert_true(_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text),
  1075. /*isFinal*/ XML_TRUE)
  1076. == XML_STATUS_OK);
  1077. XML_ParserFree(parser);
  1078. }
  1079. END_TEST
  1080. START_TEST(test_wfc_no_recursive_entity_refs) {
  1081. const char *text = "<!DOCTYPE doc [\n"
  1082. " <!ENTITY entity '&#38;entity;'>\n"
  1083. "]>\n"
  1084. "<doc>&entity;</doc>";
  1085. expect_failure(text, XML_ERROR_RECURSIVE_ENTITY_REF,
  1086. "Parser did not report recursive entity reference.");
  1087. }
  1088. END_TEST
  1089. START_TEST(test_no_indirectly_recursive_entity_refs) {
  1090. struct TestCase {
  1091. const char *doc;
  1092. bool usesParameterEntities;
  1093. };
  1094. const struct TestCase cases[] = {
  1095. // general entity + character data
  1096. {"<!DOCTYPE a [\n"
  1097. " <!ENTITY e1 '&e2;'>\n"
  1098. " <!ENTITY e2 '&e1;'>\n"
  1099. "]><a>&e2;</a>\n",
  1100. false},
  1101. // general entity + attribute value
  1102. {"<!DOCTYPE a [\n"
  1103. " <!ENTITY e1 '&e2;'>\n"
  1104. " <!ENTITY e2 '&e1;'>\n"
  1105. "]><a k1='&e2;' />\n",
  1106. false},
  1107. // parameter entity
  1108. {"<!DOCTYPE doc [\n"
  1109. " <!ENTITY % p1 '&#37;p2;'>\n"
  1110. " <!ENTITY % p2 '&#37;p1;'>\n"
  1111. " <!ENTITY % define_g \"<!ENTITY g '&#37;p2;'>\">\n"
  1112. " %define_g;\n"
  1113. "]>\n"
  1114. "<doc/>\n",
  1115. true},
  1116. };
  1117. const XML_Bool reset_or_not[] = {XML_TRUE, XML_FALSE};
  1118. for (size_t i = 0; i < sizeof(cases) / sizeof(cases[0]); i++) {
  1119. for (size_t j = 0; j < sizeof(reset_or_not) / sizeof(reset_or_not[0]);
  1120. j++) {
  1121. const XML_Bool reset_wanted = reset_or_not[j];
  1122. const char *const doc = cases[i].doc;
  1123. const bool usesParameterEntities = cases[i].usesParameterEntities;
  1124. set_subtest("[%i,reset=%i] %s", (int)i, (int)j, doc);
  1125. #ifdef XML_DTD // both GE and DTD
  1126. const bool rejection_expected = true;
  1127. #elif XML_GE == 1 // GE but not DTD
  1128. const bool rejection_expected = ! usesParameterEntities;
  1129. #else // neither DTD nor GE
  1130. const bool rejection_expected = false;
  1131. #endif
  1132. XML_Parser parser = XML_ParserCreate(NULL);
  1133. #ifdef XML_DTD
  1134. if (usesParameterEntities) {
  1135. assert_true(
  1136. XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS)
  1137. == 1);
  1138. }
  1139. #else
  1140. UNUSED_P(usesParameterEntities);
  1141. #endif // XML_DTD
  1142. const enum XML_Status status
  1143. = _XML_Parse_SINGLE_BYTES(parser, doc, (int)strlen(doc),
  1144. /*isFinal*/ XML_TRUE);
  1145. if (rejection_expected) {
  1146. assert_true(status == XML_STATUS_ERROR);
  1147. assert_true(XML_GetErrorCode(parser) == XML_ERROR_RECURSIVE_ENTITY_REF);
  1148. } else {
  1149. assert_true(status == XML_STATUS_OK);
  1150. }
  1151. if (reset_wanted) {
  1152. // This covers free'ing of (eventually) all three open entity lists by
  1153. // XML_ParserReset.
  1154. XML_ParserReset(parser, NULL);
  1155. }
  1156. // This covers free'ing of (eventually) all three open entity lists by
  1157. // XML_ParserFree (unless XML_ParserReset has already done that above).
  1158. XML_ParserFree(parser);
  1159. }
  1160. }
  1161. }
  1162. END_TEST
  1163. START_TEST(test_recursive_external_parameter_entity_2) {
  1164. struct TestCase {
  1165. const char *doc;
  1166. enum XML_Status expectedStatus;
  1167. };
  1168. struct TestCase cases[] = {
  1169. {"<!ENTITY % p1 '%p1;'>", XML_STATUS_ERROR},
  1170. {"<!ENTITY % p1 '%p1;'>"
  1171. "<!ENTITY % p1 'first declaration wins'>",
  1172. XML_STATUS_ERROR},
  1173. {"<!ENTITY % p1 'first declaration wins'>"
  1174. "<!ENTITY % p1 '%p1;'>",
  1175. XML_STATUS_OK},
  1176. {"<!ENTITY % p1 '&#37;p1;'>", XML_STATUS_OK},
  1177. };
  1178. for (size_t i = 0; i < sizeof(cases) / sizeof(cases[0]); i++) {
  1179. const char *const doc = cases[i].doc;
  1180. const enum XML_Status expectedStatus = cases[i].expectedStatus;
  1181. set_subtest("%s", doc);
  1182. XML_Parser parser = XML_ParserCreate(NULL);
  1183. assert_true(parser != NULL);
  1184. XML_Parser ext_parser = XML_ExternalEntityParserCreate(parser, NULL, NULL);
  1185. assert_true(ext_parser != NULL);
  1186. const enum XML_Status actualStatus
  1187. = _XML_Parse_SINGLE_BYTES(ext_parser, doc, (int)strlen(doc), XML_TRUE);
  1188. assert_true(actualStatus == expectedStatus);
  1189. if (actualStatus != XML_STATUS_OK) {
  1190. assert_true(XML_GetErrorCode(ext_parser)
  1191. == XML_ERROR_RECURSIVE_ENTITY_REF);
  1192. }
  1193. XML_ParserFree(ext_parser);
  1194. XML_ParserFree(parser);
  1195. }
  1196. }
  1197. END_TEST
  1198. /* Test incomplete external entities are faulted */
  1199. START_TEST(test_ext_entity_invalid_parse) {
  1200. const char *text = "<!DOCTYPE doc [\n"
  1201. " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
  1202. "]>\n"
  1203. "<doc>&en;</doc>";
  1204. const ExtFaults faults[]
  1205. = {{"<", "Incomplete element declaration not faulted", NULL,
  1206. XML_ERROR_UNCLOSED_TOKEN},
  1207. {"<\xe2\x82", /* First two bytes of a three-byte char */
  1208. "Incomplete character not faulted", NULL, XML_ERROR_PARTIAL_CHAR},
  1209. {"<tag>\xe2\x82", "Incomplete character in CDATA not faulted", NULL,
  1210. XML_ERROR_PARTIAL_CHAR},
  1211. {NULL, NULL, NULL, XML_ERROR_NONE}};
  1212. const ExtFaults *fault = faults;
  1213. for (; fault->parse_text != NULL; fault++) {
  1214. set_subtest("\"%s\"", fault->parse_text);
  1215. XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
  1216. XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
  1217. XML_SetUserData(g_parser, (void *)fault);
  1218. expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
  1219. "Parser did not report external entity error");
  1220. XML_ParserReset(g_parser, NULL);
  1221. }
  1222. }
  1223. END_TEST
  1224. /* Regression test for SF bug #483514. */
  1225. START_TEST(test_dtd_default_handling) {
  1226. const char *text = "<!DOCTYPE doc [\n"
  1227. "<!ENTITY e SYSTEM 'http://example.org/e'>\n"
  1228. "<!NOTATION n SYSTEM 'http://example.org/n'>\n"
  1229. "<!ELEMENT doc EMPTY>\n"
  1230. "<!ATTLIST doc a CDATA #IMPLIED>\n"
  1231. "<?pi in dtd?>\n"
  1232. "<!--comment in dtd-->\n"
  1233. "]><doc/>";
  1234. XML_SetDefaultHandler(g_parser, accumulate_characters);
  1235. XML_SetStartDoctypeDeclHandler(g_parser, dummy_start_doctype_handler);
  1236. XML_SetEndDoctypeDeclHandler(g_parser, dummy_end_doctype_handler);
  1237. XML_SetEntityDeclHandler(g_parser, dummy_entity_decl_handler);
  1238. XML_SetNotationDeclHandler(g_parser, dummy_notation_decl_handler);
  1239. XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
  1240. XML_SetAttlistDeclHandler(g_parser, dummy_attlist_decl_handler);
  1241. XML_SetProcessingInstructionHandler(g_parser, dummy_pi_handler);
  1242. XML_SetCommentHandler(g_parser, dummy_comment_handler);
  1243. XML_SetStartCdataSectionHandler(g_parser, dummy_start_cdata_handler);
  1244. XML_SetEndCdataSectionHandler(g_parser, dummy_end_cdata_handler);
  1245. run_character_check(text, XCS("\n\n\n\n\n\n\n<doc/>"));
  1246. }
  1247. END_TEST
  1248. /* Test handling of attribute declarations */
  1249. START_TEST(test_dtd_attr_handling) {
  1250. const char *prolog = "<!DOCTYPE doc [\n"
  1251. "<!ELEMENT doc EMPTY>\n";
  1252. AttTest attr_data[]
  1253. = {{"<!ATTLIST doc a ( one | two | three ) #REQUIRED>\n"
  1254. "]>"
  1255. "<doc a='two'/>",
  1256. XCS("doc"), XCS("a"),
  1257. XCS("(one|two|three)"), /* Extraneous spaces will be removed */
  1258. NULL, XML_TRUE},
  1259. {"<!NOTATION foo SYSTEM 'http://example.org/foo'>\n"
  1260. "<!ATTLIST doc a NOTATION (foo) #IMPLIED>\n"
  1261. "]>"
  1262. "<doc/>",
  1263. XCS("doc"), XCS("a"), XCS("NOTATION(foo)"), NULL, XML_FALSE},
  1264. {"<!ATTLIST doc a NOTATION (foo) 'bar'>\n"
  1265. "]>"
  1266. "<doc/>",
  1267. XCS("doc"), XCS("a"), XCS("NOTATION(foo)"), XCS("bar"), XML_FALSE},
  1268. {"<!ATTLIST doc a CDATA '\xdb\xb2'>\n"
  1269. "]>"
  1270. "<doc/>",
  1271. XCS("doc"), XCS("a"), XCS("CDATA"),
  1272. #ifdef XML_UNICODE
  1273. XCS("\x06f2"),
  1274. #else
  1275. XCS("\xdb\xb2"),
  1276. #endif
  1277. XML_FALSE},
  1278. {NULL, NULL, NULL, NULL, NULL, XML_FALSE}};
  1279. AttTest *test;
  1280. for (test = attr_data; test->definition != NULL; test++) {
  1281. set_subtest("%s", test->definition);
  1282. XML_SetAttlistDeclHandler(g_parser, verify_attlist_decl_handler);
  1283. XML_SetUserData(g_parser, test);
  1284. if (_XML_Parse_SINGLE_BYTES(g_parser, prolog, (int)strlen(prolog),
  1285. XML_FALSE)
  1286. == XML_STATUS_ERROR)
  1287. xml_failure(g_parser);
  1288. if (_XML_Parse_SINGLE_BYTES(g_parser, test->definition,
  1289. (int)strlen(test->definition), XML_TRUE)
  1290. == XML_STATUS_ERROR)
  1291. xml_failure(g_parser);
  1292. XML_ParserReset(g_parser, NULL);
  1293. }
  1294. }
  1295. END_TEST
  1296. /* See related SF bug #673791.
  1297. When namespace processing is enabled, setting the namespace URI for
  1298. a prefix is not allowed; this test ensures that it *is* allowed
  1299. when namespace processing is not enabled.
  1300. (See Namespaces in XML, section 2.)
  1301. */
  1302. START_TEST(test_empty_ns_without_namespaces) {
  1303. const char *text = "<doc xmlns:prefix='http://example.org/'>\n"
  1304. " <e xmlns:prefix=''/>\n"
  1305. "</doc>";
  1306. if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
  1307. == XML_STATUS_ERROR)
  1308. xml_failure(g_parser);
  1309. }
  1310. END_TEST
  1311. /* Regression test for SF bug #824420.
  1312. Checks that an xmlns:prefix attribute set in an attribute's default
  1313. value isn't misinterpreted.
  1314. */
  1315. START_TEST(test_ns_in_attribute_default_without_namespaces) {
  1316. const char *text = "<!DOCTYPE e:element [\n"
  1317. " <!ATTLIST e:element\n"
  1318. " xmlns:e CDATA 'http://example.org/'>\n"
  1319. " ]>\n"
  1320. "<e:element/>";
  1321. if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
  1322. == XML_STATUS_ERROR)
  1323. xml_failure(g_parser);
  1324. }
  1325. END_TEST
  1326. /* Regression test for SF bug #1515266: missing check of stopped
  1327. parser in doContext() 'for' loop. */
  1328. START_TEST(test_stop_parser_between_char_data_calls) {
  1329. /* The sample data must be big enough that there are two calls to
  1330. the character data handler from within the inner "for" loop of
  1331. the XML_TOK_DATA_CHARS case in doContent(), and the character
  1332. handler must stop the parser and clear the character data
  1333. handler.
  1334. */
  1335. const char *text = long_character_data_text;
  1336. XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
  1337. g_resumable = XML_FALSE;
  1338. if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
  1339. != XML_STATUS_ERROR)
  1340. xml_failure(g_parser);
  1341. if (XML_GetErrorCode(g_parser) != XML_ERROR_ABORTED)
  1342. xml_failure(g_parser);
  1343. }
  1344. END_TEST
  1345. /* Regression test for SF bug #1515266: missing check of stopped
  1346. parser in doContext() 'for' loop. */
  1347. START_TEST(test_suspend_parser_between_char_data_calls) {
  1348. /* The sample data must be big enough that there are two calls to
  1349. the character data handler from within the inner "for" loop of
  1350. the XML_TOK_DATA_CHARS case in doContent(), and the character
  1351. handler must stop the parser and clear the character data
  1352. handler.
  1353. */
  1354. const char *text = long_character_data_text;
  1355. XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
  1356. g_resumable = XML_TRUE;
  1357. // can't use SINGLE_BYTES here, because it'll return early on suspension, and
  1358. // we won't know exactly how much input we actually managed to give Expat.
  1359. if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
  1360. != XML_STATUS_SUSPENDED)
  1361. xml_failure(g_parser);
  1362. if (XML_GetErrorCode(g_parser) != XML_ERROR_NONE)
  1363. xml_failure(g_parser);
  1364. /* Try parsing directly */
  1365. if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
  1366. != XML_STATUS_ERROR)
  1367. fail("Attempt to continue parse while suspended not faulted");
  1368. if (XML_GetErrorCode(g_parser) != XML_ERROR_SUSPENDED)
  1369. fail("Suspended parse not faulted with correct error");
  1370. }
  1371. END_TEST
  1372. /* Test repeated calls to XML_StopParser are handled correctly */
  1373. START_TEST(test_repeated_stop_parser_between_char_data_calls) {
  1374. const char *text = long_character_data_text;
  1375. XML_SetCharacterDataHandler(g_parser, parser_stop_character_handler);
  1376. g_resumable = XML_FALSE;
  1377. g_abortable = XML_FALSE;
  1378. if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
  1379. != XML_STATUS_ERROR)
  1380. fail("Failed to double-stop parser");
  1381. XML_ParserReset(g_parser, NULL);
  1382. XML_SetCharacterDataHandler(g_parser, parser_stop_character_handler);
  1383. g_resumable = XML_TRUE;
  1384. g_abortable = XML_FALSE;
  1385. // can't use SINGLE_BYTES here, because it'll return early on suspension, and
  1386. // we won't know exactly how much input we actually managed to give Expat.
  1387. if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
  1388. != XML_STATUS_SUSPENDED)
  1389. fail("Failed to double-suspend parser");
  1390. XML_ParserReset(g_parser, NULL);
  1391. XML_SetCharacterDataHandler(g_parser, parser_stop_character_handler);
  1392. g_resumable = XML_TRUE;
  1393. g_abortable = XML_TRUE;
  1394. if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
  1395. != XML_STATUS_ERROR)
  1396. fail("Failed to suspend-abort parser");
  1397. }
  1398. END_TEST
  1399. START_TEST(test_good_cdata_ascii) {
  1400. const char *text = "<a><![CDATA[<greeting>Hello, world!</greeting>]]></a>";
  1401. const XML_Char *expected = XCS("<greeting>Hello, world!</greeting>");
  1402. CharData storage;
  1403. CharData_Init(&storage);
  1404. XML_SetUserData(g_parser, &storage);
  1405. XML_SetCharacterDataHandler(g_parser, accumulate_characters);
  1406. /* Add start and end handlers for coverage */
  1407. XML_SetStartCdataSectionHandler(g_parser, dummy_start_cdata_handler);
  1408. XML_SetEndCdataSectionHandler(g_parser, dummy_end_cdata_handler);
  1409. if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
  1410. == XML_STATUS_ERROR)
  1411. xml_failure(g_parser);
  1412. CharData_CheckXMLChars(&storage, expected);
  1413. /* Try again, this time with a default handler */
  1414. XML_ParserReset(g_parser, NULL);
  1415. CharData_Init(&storage);
  1416. XML_SetUserData(g_parser, &storage);
  1417. XML_SetCharacterDataHandler(g_parser, accumulate_characters);
  1418. XML_SetDefaultHandler(g_parser, dummy_default_handler);
  1419. if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
  1420. == XML_STATUS_ERROR)
  1421. xml_failure(g_parser);
  1422. CharData_CheckXMLChars(&storage, expected);
  1423. }
  1424. END_TEST
  1425. START_TEST(test_good_cdata_utf16) {
  1426. /* Test data is:
  1427. * <?xml version='1.0' encoding='utf-16'?>
  1428. * <a><![CDATA[hello]]></a>
  1429. */
  1430. const char text[]
  1431. = "\0<\0?\0x\0m\0l\0"
  1432. " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0"
  1433. " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0"
  1434. "1\0"
  1435. "6\0'"
  1436. "\0?\0>\0\n"
  1437. "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0[\0h\0e\0l\0l\0o\0]\0]\0>\0<\0/\0a\0>";
  1438. const XML_Char *expected = XCS("hello");
  1439. CharData storage;
  1440. CharData_Init(&storage);
  1441. XML_SetUserData(g_parser, &storage);
  1442. XML_SetCharacterDataHandler(g_parser, accumulate_characters);
  1443. if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
  1444. == XML_STATUS_ERROR)
  1445. xml_failure(g_parser);
  1446. CharData_CheckXMLChars(&storage, expected);
  1447. }
  1448. END_TEST
  1449. START_TEST(test_good_cdata_utf16_le) {
  1450. /* Test data is:
  1451. * <?xml version='1.0' encoding='utf-16'?>
  1452. * <a><![CDATA[hello]]></a>
  1453. */
  1454. const char text[]
  1455. = "<\0?\0x\0m\0l\0"
  1456. " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0"
  1457. " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0"
  1458. "1\0"
  1459. "6\0'"
  1460. "\0?\0>\0\n"
  1461. "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0[\0h\0e\0l\0l\0o\0]\0]\0>\0<\0/\0a\0>\0";
  1462. const XML_Char *expected = XCS("hello");
  1463. CharData storage;
  1464. CharData_Init(&storage);
  1465. XML_SetUserData(g_parser, &storage);
  1466. XML_SetCharacterDataHandler(g_parser, accumulate_characters);
  1467. if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
  1468. == XML_STATUS_ERROR)
  1469. xml_failure(g_parser);
  1470. CharData_CheckXMLChars(&storage, expected);
  1471. }
  1472. END_TEST
  1473. /* Test UTF16 conversion of a long cdata string */
  1474. /* 16 characters: handy macro to reduce visual clutter */
  1475. #define A_TO_P_IN_UTF16 "\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M\0N\0O\0P"
  1476. START_TEST(test_long_cdata_utf16) {
  1477. /* Test data is:
  1478. * <?xlm version='1.0' encoding='utf-16'?>
  1479. * <a><![CDATA[
  1480. * ABCDEFGHIJKLMNOP
  1481. * ]]></a>
  1482. */
  1483. const char text[]
  1484. = "\0<\0?\0x\0m\0l\0 "
  1485. "\0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0 "
  1486. "\0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0\x31\0\x36\0'\0?\0>"
  1487. "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0["
  1488. /* 64 characters per line */
  1489. /* clang-format off */
  1490. A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
  1491. A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
  1492. A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
  1493. A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
  1494. A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
  1495. A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
  1496. A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
  1497. A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
  1498. A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
  1499. A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
  1500. A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
  1501. A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
  1502. A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
  1503. A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
  1504. A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
  1505. A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
  1506. A_TO_P_IN_UTF16
  1507. /* clang-format on */
  1508. "\0]\0]\0>\0<\0/\0a\0>";
  1509. const XML_Char *expected =
  1510. /* clang-format off */
  1511. XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
  1512. XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
  1513. XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
  1514. XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
  1515. XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
  1516. XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
  1517. XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
  1518. XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
  1519. XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
  1520. XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
  1521. XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
  1522. XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
  1523. XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
  1524. XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
  1525. XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
  1526. XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
  1527. XCS("ABCDEFGHIJKLMNOP");
  1528. /* clang-format on */
  1529. CharData storage;
  1530. void *buffer;
  1531. CharData_Init(&storage);
  1532. XML_SetUserData(g_parser, &storage);
  1533. XML_SetCharacterDataHandler(g_parser, accumulate_characters);
  1534. buffer = XML_GetBuffer(g_parser, sizeof(text) - 1);
  1535. if (buffer == NULL)
  1536. fail("Could not allocate parse buffer");
  1537. assert(buffer != NULL);
  1538. memcpy(buffer, text, sizeof(text) - 1);
  1539. if (XML_ParseBuffer(g_parser, sizeof(text) - 1, XML_TRUE) == XML_STATUS_ERROR)
  1540. xml_failure(g_parser);
  1541. CharData_CheckXMLChars(&storage, expected);
  1542. }
  1543. END_TEST
  1544. /* Test handling of multiple unit UTF-16 characters */
  1545. START_TEST(test_multichar_cdata_utf16) {
  1546. /* Test data is:
  1547. * <?xml version='1.0' encoding='utf-16'?>
  1548. * <a><![CDATA[{MINIM}{CROTCHET}]]></a>
  1549. *
  1550. * where {MINIM} is U+1d15e (a minim or half-note)
  1551. * UTF-16: 0xd834 0xdd5e
  1552. * UTF-8: 0xf0 0x9d 0x85 0x9e
  1553. * and {CROTCHET} is U+1d15f (a crotchet or quarter-note)
  1554. * UTF-16: 0xd834 0xdd5f
  1555. * UTF-8: 0xf0 0x9d 0x85 0x9f
  1556. */
  1557. const char text[] = "\0<\0?\0x\0m\0l\0"
  1558. " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0"
  1559. " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0"
  1560. "1\0"
  1561. "6\0'"
  1562. "\0?\0>\0\n"
  1563. "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0["
  1564. "\xd8\x34\xdd\x5e\xd8\x34\xdd\x5f"
  1565. "\0]\0]\0>\0<\0/\0a\0>";
  1566. #ifdef XML_UNICODE
  1567. const XML_Char *expected = XCS("\xd834\xdd5e\xd834\xdd5f");
  1568. #else
  1569. const XML_Char *expected = XCS("\xf0\x9d\x85\x9e\xf0\x9d\x85\x9f");
  1570. #endif
  1571. CharData storage;
  1572. CharData_Init(&storage);
  1573. XML_SetUserData(g_parser, &storage);
  1574. XML_SetCharacterDataHandler(g_parser, accumulate_characters);
  1575. if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
  1576. == XML_STATUS_ERROR)
  1577. xml_failure(g_parser);
  1578. CharData_CheckXMLChars(&storage, expected);
  1579. }
  1580. END_TEST
  1581. /* Test that an element name with a UTF-16 surrogate pair is rejected */
  1582. START_TEST(test_utf16_bad_surrogate_pair) {
  1583. /* Test data is:
  1584. * <?xml version='1.0' encoding='utf-16'?>
  1585. * <a><![CDATA[{BADLINB}]]></a>
  1586. *
  1587. * where {BADLINB} is U+10000 (the first Linear B character)
  1588. * with the UTF-16 surrogate pair in the wrong order, i.e.
  1589. * 0xdc00 0xd800
  1590. */
  1591. const char text[] = "\0<\0?\0x\0m\0l\0"
  1592. " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0"
  1593. " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0"
  1594. "1\0"
  1595. "6\0'"
  1596. "\0?\0>\0\n"
  1597. "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0["
  1598. "\xdc\x00\xd8\x00"
  1599. "\0]\0]\0>\0<\0/\0a\0>";
  1600. if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
  1601. != XML_STATUS_ERROR)
  1602. fail("Reversed UTF-16 surrogate pair not faulted");
  1603. if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN)
  1604. xml_failure(g_parser);
  1605. }
  1606. END_TEST
  1607. START_TEST(test_bad_cdata) {
  1608. struct CaseData {
  1609. const char *text;
  1610. enum XML_Error expectedError;
  1611. };
  1612. struct CaseData cases[]
  1613. = {{"<a><", XML_ERROR_UNCLOSED_TOKEN},
  1614. {"<a><!", XML_ERROR_UNCLOSED_TOKEN},
  1615. {"<a><![", XML_ERROR_UNCLOSED_TOKEN},
  1616. {"<a><![C", XML_ERROR_UNCLOSED_TOKEN},
  1617. {"<a><![CD", XML_ERROR_UNCLOSED_TOKEN},
  1618. {"<a><![CDA", XML_ERROR_UNCLOSED_TOKEN},
  1619. {"<a><![CDAT", XML_ERROR_UNCLOSED_TOKEN},
  1620. {"<a><![CDATA", XML_ERROR_UNCLOSED_TOKEN},
  1621. {"<a><![CDATA[", XML_ERROR_UNCLOSED_CDATA_SECTION},
  1622. {"<a><![CDATA[]", XML_ERROR_UNCLOSED_CDATA_SECTION},
  1623. {"<a><![CDATA[]]", XML_ERROR_UNCLOSED_CDATA_SECTION},
  1624. {"<a><!<a/>", XML_ERROR_INVALID_TOKEN},
  1625. {"<a><![<a/>", XML_ERROR_UNCLOSED_TOKEN}, /* ?! */
  1626. {"<a><![C<a/>", XML_ERROR_UNCLOSED_TOKEN}, /* ?! */
  1627. {"<a><![CD<a/>", XML_ERROR_INVALID_TOKEN},
  1628. {"<a><![CDA<a/>", XML_ERROR_INVALID_TOKEN},
  1629. {"<a><![CDAT<a/>", XML_ERROR_INVALID_TOKEN},
  1630. {"<a><![CDATA<a/>", XML_ERROR_INVALID_TOKEN},
  1631. {"<a><![CDATA[<a/>", XML_ERROR_UNCLOSED_CDATA_SECTION},
  1632. {"<a><![CDATA[]<a/>", XML_ERROR_UNCLOSED_CDATA_SECTION},
  1633. {"<a><![CDATA[]]<a/>", XML_ERROR_UNCLOSED_CDATA_SECTION}};
  1634. size_t i = 0;
  1635. for (; i < sizeof(cases) / sizeof(struct CaseData); i++) {
  1636. set_subtest("%s", cases[i].text);
  1637. const enum XML_Status actualStatus = _XML_Parse_SINGLE_BYTES(
  1638. g_parser, cases[i].text, (int)strlen(cases[i].text), XML_TRUE);
  1639. const enum XML_Error actualError = XML_GetErrorCode(g_parser);
  1640. assert(actualStatus == XML_STATUS_ERROR);
  1641. if (actualError != cases[i].expectedError) {
  1642. char message[100];
  1643. snprintf(message, sizeof(message),
  1644. "Expected error %d but got error %d for case %u: \"%s\"\n",
  1645. cases[i].expectedError, actualError, (unsigned int)i + 1,
  1646. cases[i].text);
  1647. fail(message);
  1648. }
  1649. XML_ParserReset(g_parser, NULL);
  1650. }
  1651. }
  1652. END_TEST
  1653. /* Test failures in UTF-16 CDATA */
  1654. START_TEST(test_bad_cdata_utf16) {
  1655. struct CaseData {
  1656. size_t text_bytes;
  1657. const char *text;
  1658. enum XML_Error expected_error;
  1659. };
  1660. const char prolog[] = "\0<\0?\0x\0m\0l\0"
  1661. " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0"
  1662. " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0"
  1663. "1\0"
  1664. "6\0'"
  1665. "\0?\0>\0\n"
  1666. "\0<\0a\0>";
  1667. struct CaseData cases[] = {
  1668. {1, "\0", XML_ERROR_UNCLOSED_TOKEN},
  1669. {2, "\0<", XML_ERROR_UNCLOSED_TOKEN},
  1670. {3, "\0<\0", XML_ERROR_UNCLOSED_TOKEN},
  1671. {4, "\0<\0!", XML_ERROR_UNCLOSED_TOKEN},
  1672. {5, "\0<\0!\0", XML_ERROR_UNCLOSED_TOKEN},
  1673. {6, "\0<\0!\0[", XML_ERROR_UNCLOSED_TOKEN},
  1674. {7, "\0<\0!\0[\0", XML_ERROR_UNCLOSED_TOKEN},
  1675. {8, "\0<\0!\0[\0C", XML_ERROR_UNCLOSED_TOKEN},
  1676. {9, "\0<\0!\0[\0C\0", XML_ERROR_UNCLOSED_TOKEN},
  1677. {10, "\0<\0!\0[\0C\0D", XML_ERROR_UNCLOSED_TOKEN},
  1678. {11, "\0<\0!\0[\0C\0D\0", XML_ERROR_UNCLOSED_TOKEN},
  1679. {12, "\0<\0!\0[\0C\0D\0A", XML_ERROR_UNCLOSED_TOKEN},
  1680. {13, "\0<\0!\0[\0C\0D\0A\0", XML_ERROR_UNCLOSED_TOKEN},
  1681. {14, "\0<\0!\0[\0C\0D\0A\0T", XML_ERROR_UNCLOSED_TOKEN},
  1682. {15, "\0<\0!\0[\0C\0D\0A\0T\0", XML_ERROR_UNCLOSED_TOKEN},
  1683. {16, "\0<\0!\0[\0C\0D\0A\0T\0A", XML_ERROR_UNCLOSED_TOKEN},
  1684. {17, "\0<\0!\0[\0C\0D\0A\0T\0A\0", XML_ERROR_UNCLOSED_TOKEN},
  1685. {18, "\0<\0!\0[\0C\0D\0A\0T\0A\0[", XML_ERROR_UNCLOSED_CDATA_SECTION},
  1686. {19, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0", XML_ERROR_UNCLOSED_CDATA_SECTION},
  1687. {20, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z", XML_ERROR_UNCLOSED_CDATA_SECTION},
  1688. /* Now add a four-byte UTF-16 character */
  1689. {21, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8",
  1690. XML_ERROR_UNCLOSED_CDATA_SECTION},
  1691. {22, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8\x34", XML_ERROR_PARTIAL_CHAR},
  1692. {23, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8\x34\xdd",
  1693. XML_ERROR_PARTIAL_CHAR},
  1694. {24, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8\x34\xdd\x5e",
  1695. XML_ERROR_UNCLOSED_CDATA_SECTION}};
  1696. size_t i;
  1697. for (i = 0; i < sizeof(cases) / sizeof(struct CaseData); i++) {
  1698. set_subtest("case %lu", (long unsigned)(i + 1));
  1699. enum XML_Status actual_status;
  1700. enum XML_Error actual_error;
  1701. if (_XML_Parse_SINGLE_BYTES(g_parser, prolog, (int)sizeof(prolog) - 1,
  1702. XML_FALSE)
  1703. == XML_STATUS_ERROR)
  1704. xml_failure(g_parser);
  1705. actual_status = _XML_Parse_SINGLE_BYTES(g_parser, cases[i].text,
  1706. (int)cases[i].text_bytes, XML_TRUE);
  1707. assert(actual_status == XML_STATUS_ERROR);
  1708. actual_error = XML_GetErrorCode(g_parser);
  1709. if (actual_error != cases[i].expected_error) {
  1710. char message[1024];
  1711. snprintf(message, sizeof(message),
  1712. "Expected error %d (%" XML_FMT_STR "), got %d (%" XML_FMT_STR
  1713. ") for case %lu\n",
  1714. cases[i].expected_error,
  1715. XML_ErrorString(cases[i].expected_error), actual_error,
  1716. XML_ErrorString(actual_error), (long unsigned)(i + 1));
  1717. fail(message);
  1718. }
  1719. XML_ParserReset(g_parser, NULL);
  1720. }
  1721. }
  1722. END_TEST
  1723. /* Test stopping the parser in cdata handler */
  1724. START_TEST(test_stop_parser_between_cdata_calls) {
  1725. const char *text = long_cdata_text;
  1726. XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
  1727. g_resumable = XML_FALSE;
  1728. expect_failure(text, XML_ERROR_ABORTED, "Parse not aborted in CDATA handler");
  1729. }
  1730. END_TEST
  1731. /* Test suspending the parser in cdata handler */
  1732. START_TEST(test_suspend_parser_between_cdata_calls) {
  1733. if (g_chunkSize != 0) {
  1734. // this test does not use SINGLE_BYTES, because of suspension
  1735. return;
  1736. }
  1737. const char *text = long_cdata_text;
  1738. enum XML_Status result;
  1739. XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
  1740. g_resumable = XML_TRUE;
  1741. // can't use SINGLE_BYTES here, because it'll return early on suspension, and
  1742. // we won't know exactly how much input we actually managed to give Expat.
  1743. result = XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE);
  1744. if (result != XML_STATUS_SUSPENDED) {
  1745. if (result == XML_STATUS_ERROR)
  1746. xml_failure(g_parser);
  1747. fail("Parse not suspended in CDATA handler");
  1748. }
  1749. if (XML_GetErrorCode(g_parser) != XML_ERROR_NONE)
  1750. xml_failure(g_parser);
  1751. }
  1752. END_TEST
  1753. /* Test memory allocation functions */
  1754. START_TEST(test_memory_allocation) {
  1755. char *buffer = (char *)XML_MemMalloc(g_parser, 256);
  1756. char *p;
  1757. if (buffer == NULL) {
  1758. fail("Allocation failed");
  1759. } else {
  1760. /* Try writing to memory; some OSes try to cheat! */
  1761. buffer[0] = 'T';
  1762. buffer[1] = 'E';
  1763. buffer[2] = 'S';
  1764. buffer[3] = 'T';
  1765. buffer[4] = '\0';
  1766. if (strcmp(buffer, "TEST") != 0) {
  1767. fail("Memory not writable");
  1768. } else {
  1769. p = (char *)XML_MemRealloc(g_parser, buffer, 512);
  1770. if (p == NULL) {
  1771. fail("Reallocation failed");
  1772. } else {
  1773. /* Write again, just to be sure */
  1774. buffer = p;
  1775. buffer[0] = 'V';
  1776. if (strcmp(buffer, "VEST") != 0) {
  1777. fail("Reallocated memory not writable");
  1778. }
  1779. }
  1780. }
  1781. XML_MemFree(g_parser, buffer);
  1782. }
  1783. }
  1784. END_TEST
  1785. /* Test XML_DefaultCurrent() passes handling on correctly */
  1786. START_TEST(test_default_current) {
  1787. const char *text = "<doc>hell]</doc>";
  1788. const char *entity_text = "<!DOCTYPE doc [\n"
  1789. "<!ENTITY entity '&#37;'>\n"
  1790. "]>\n"
  1791. "<doc>&entity;</doc>";
  1792. set_subtest("with defaulting");
  1793. {
  1794. struct handler_record_list storage;
  1795. storage.count = 0;
  1796. XML_SetDefaultHandler(g_parser, record_default_handler);
  1797. XML_SetCharacterDataHandler(g_parser, record_cdata_handler);
  1798. XML_SetUserData(g_parser, &storage);
  1799. if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
  1800. == XML_STATUS_ERROR)
  1801. xml_failure(g_parser);
  1802. int i = 0;
  1803. assert_record_handler_called(&storage, i++, "record_default_handler", 5);
  1804. // we should have gotten one or more cdata callbacks, totaling 5 chars
  1805. int cdata_len_remaining = 5;
  1806. while (cdata_len_remaining > 0) {
  1807. const struct handler_record_entry *c_entry
  1808. = handler_record_get(&storage, i++);
  1809. assert_true(strcmp(c_entry->name, "record_cdata_handler") == 0);
  1810. assert_true(c_entry->arg > 0);
  1811. assert_true(c_entry->arg <= cdata_len_remaining);
  1812. cdata_len_remaining -= c_entry->arg;
  1813. // default handler must follow, with the exact same len argument.
  1814. assert_record_handler_called(&storage, i++, "record_default_handler",
  1815. c_entry->arg);
  1816. }
  1817. assert_record_handler_called(&storage, i++, "record_default_handler", 6);
  1818. assert_true(storage.count == i);
  1819. }
  1820. /* Again, without the defaulting */
  1821. set_subtest("no defaulting");
  1822. {
  1823. struct handler_record_list storage;
  1824. storage.count = 0;
  1825. XML_ParserReset(g_parser, NULL);
  1826. XML_SetDefaultHandler(g_parser, record_default_handler);
  1827. XML_SetCharacterDataHandler(g_parser, record_cdata_nodefault_handler);
  1828. XML_SetUserData(g_parser, &storage);
  1829. if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
  1830. == XML_STATUS_ERROR)
  1831. xml_failure(g_parser);
  1832. int i = 0;
  1833. assert_record_handler_called(&storage, i++, "record_default_handler", 5);
  1834. // we should have gotten one or more cdata callbacks, totaling 5 chars
  1835. int cdata_len_remaining = 5;
  1836. while (cdata_len_remaining > 0) {
  1837. const struct handler_record_entry *c_entry
  1838. = handler_record_get(&storage, i++);
  1839. assert_true(strcmp(c_entry->name, "record_cdata_nodefault_handler") == 0);
  1840. assert_true(c_entry->arg > 0);
  1841. assert_true(c_entry->arg <= cdata_len_remaining);
  1842. cdata_len_remaining -= c_entry->arg;
  1843. }
  1844. assert_record_handler_called(&storage, i++, "record_default_handler", 6);
  1845. assert_true(storage.count == i);
  1846. }
  1847. /* Now with an internal entity to complicate matters */
  1848. set_subtest("with internal entity");
  1849. {
  1850. struct handler_record_list storage;
  1851. storage.count = 0;
  1852. XML_ParserReset(g_parser, NULL);
  1853. XML_SetDefaultHandler(g_parser, record_default_handler);
  1854. XML_SetCharacterDataHandler(g_parser, record_cdata_handler);
  1855. XML_SetUserData(g_parser, &storage);
  1856. if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text),
  1857. XML_TRUE)
  1858. == XML_STATUS_ERROR)
  1859. xml_failure(g_parser);
  1860. /* The default handler suppresses the entity */
  1861. assert_record_handler_called(&storage, 0, "record_default_handler", 9);
  1862. assert_record_handler_called(&storage, 1, "record_default_handler", 1);
  1863. assert_record_handler_called(&storage, 2, "record_default_handler", 3);
  1864. assert_record_handler_called(&storage, 3, "record_default_handler", 1);
  1865. assert_record_handler_called(&storage, 4, "record_default_handler", 1);
  1866. assert_record_handler_called(&storage, 5, "record_default_handler", 1);
  1867. assert_record_handler_called(&storage, 6, "record_default_handler", 8);
  1868. assert_record_handler_called(&storage, 7, "record_default_handler", 1);
  1869. assert_record_handler_called(&storage, 8, "record_default_handler", 6);
  1870. assert_record_handler_called(&storage, 9, "record_default_handler", 1);
  1871. assert_record_handler_called(&storage, 10, "record_default_handler", 7);
  1872. assert_record_handler_called(&storage, 11, "record_default_handler", 1);
  1873. assert_record_handler_called(&storage, 12, "record_default_handler", 1);
  1874. assert_record_handler_called(&storage, 13, "record_default_handler", 1);
  1875. assert_record_handler_called(&storage, 14, "record_default_handler", 1);
  1876. assert_record_handler_called(&storage, 15, "record_default_handler", 1);
  1877. assert_record_handler_called(&storage, 16, "record_default_handler", 5);
  1878. assert_record_handler_called(&storage, 17, "record_default_handler", 8);
  1879. assert_record_handler_called(&storage, 18, "record_default_handler", 6);
  1880. assert_true(storage.count == 19);
  1881. }
  1882. /* Again, with a skip handler */
  1883. set_subtest("with skip handler");
  1884. {
  1885. struct handler_record_list storage;
  1886. storage.count = 0;
  1887. XML_ParserReset(g_parser, NULL);
  1888. XML_SetDefaultHandler(g_parser, record_default_handler);
  1889. XML_SetCharacterDataHandler(g_parser, record_cdata_handler);
  1890. XML_SetSkippedEntityHandler(g_parser, record_skip_handler);
  1891. XML_SetUserData(g_parser, &storage);
  1892. if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text),
  1893. XML_TRUE)
  1894. == XML_STATUS_ERROR)
  1895. xml_failure(g_parser);
  1896. /* The default handler suppresses the entity */
  1897. assert_record_handler_called(&storage, 0, "record_default_handler", 9);
  1898. assert_record_handler_called(&storage, 1, "record_default_handler", 1);
  1899. assert_record_handler_called(&storage, 2, "record_default_handler", 3);
  1900. assert_record_handler_called(&storage, 3, "record_default_handler", 1);
  1901. assert_record_handler_called(&storage, 4, "record_default_handler", 1);
  1902. assert_record_handler_called(&storage, 5, "record_default_handler", 1);
  1903. assert_record_handler_called(&storage, 6, "record_default_handler", 8);
  1904. assert_record_handler_called(&storage, 7, "record_default_handler", 1);
  1905. assert_record_handler_called(&storage, 8, "record_default_handler", 6);
  1906. assert_record_handler_called(&storage, 9, "record_default_handler", 1);
  1907. assert_record_handler_called(&storage, 10, "record_default_handler", 7);
  1908. assert_record_handler_called(&storage, 11, "record_default_handler", 1);
  1909. assert_record_handler_called(&storage, 12, "record_default_handler", 1);
  1910. assert_record_handler_called(&storage, 13, "record_default_handler", 1);
  1911. assert_record_handler_called(&storage, 14, "record_default_handler", 1);
  1912. assert_record_handler_called(&storage, 15, "record_default_handler", 1);
  1913. assert_record_handler_called(&storage, 16, "record_default_handler", 5);
  1914. assert_record_handler_called(&storage, 17, "record_skip_handler", 0);
  1915. assert_record_handler_called(&storage, 18, "record_default_handler", 6);
  1916. assert_true(storage.count == 19);
  1917. }
  1918. /* This time, allow the entity through */
  1919. set_subtest("allow entity");
  1920. {
  1921. struct handler_record_list storage;
  1922. storage.count = 0;
  1923. XML_ParserReset(g_parser, NULL);
  1924. XML_SetDefaultHandlerExpand(g_parser, record_default_handler);
  1925. XML_SetCharacterDataHandler(g_parser, record_cdata_handler);
  1926. XML_SetUserData(g_parser, &storage);
  1927. if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text),
  1928. XML_TRUE)
  1929. == XML_STATUS_ERROR)
  1930. xml_failure(g_parser);
  1931. assert_record_handler_called(&storage, 0, "record_default_handler", 9);
  1932. assert_record_handler_called(&storage, 1, "record_default_handler", 1);
  1933. assert_record_handler_called(&storage, 2, "record_default_handler", 3);
  1934. assert_record_handler_called(&storage, 3, "record_default_handler", 1);
  1935. assert_record_handler_called(&storage, 4, "record_default_handler", 1);
  1936. assert_record_handler_called(&storage, 5, "record_default_handler", 1);
  1937. assert_record_handler_called(&storage, 6, "record_default_handler", 8);
  1938. assert_record_handler_called(&storage, 7, "record_default_handler", 1);
  1939. assert_record_handler_called(&storage, 8, "record_default_handler", 6);
  1940. assert_record_handler_called(&storage, 9, "record_default_handler", 1);
  1941. assert_record_handler_called(&storage, 10, "record_default_handler", 7);
  1942. assert_record_handler_called(&storage, 11, "record_default_handler", 1);
  1943. assert_record_handler_called(&storage, 12, "record_default_handler", 1);
  1944. assert_record_handler_called(&storage, 13, "record_default_handler", 1);
  1945. assert_record_handler_called(&storage, 14, "record_default_handler", 1);
  1946. assert_record_handler_called(&storage, 15, "record_default_handler", 1);
  1947. assert_record_handler_called(&storage, 16, "record_default_handler", 5);
  1948. assert_record_handler_called(&storage, 17, "record_cdata_handler", 1);
  1949. assert_record_handler_called(&storage, 18, "record_default_handler", 1);
  1950. assert_record_handler_called(&storage, 19, "record_default_handler", 6);
  1951. assert_true(storage.count == 20);
  1952. }
  1953. /* Finally, without passing the cdata to the default handler */
  1954. set_subtest("not passing cdata");
  1955. {
  1956. struct handler_record_list storage;
  1957. storage.count = 0;
  1958. XML_ParserReset(g_parser, NULL);
  1959. XML_SetDefaultHandlerExpand(g_parser, record_default_handler);
  1960. XML_SetCharacterDataHandler(g_parser, record_cdata_nodefault_handler);
  1961. XML_SetUserData(g_parser, &storage);
  1962. if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text),
  1963. XML_TRUE)
  1964. == XML_STATUS_ERROR)
  1965. xml_failure(g_parser);
  1966. assert_record_handler_called(&storage, 0, "record_default_handler", 9);
  1967. assert_record_handler_called(&storage, 1, "record_default_handler", 1);
  1968. assert_record_handler_called(&storage, 2, "record_default_handler", 3);
  1969. assert_record_handler_called(&storage, 3, "record_default_handler", 1);
  1970. assert_record_handler_called(&storage, 4, "record_default_handler", 1);
  1971. assert_record_handler_called(&storage, 5, "record_default_handler", 1);
  1972. assert_record_handler_called(&storage, 6, "record_default_handler", 8);
  1973. assert_record_handler_called(&storage, 7, "record_default_handler", 1);
  1974. assert_record_handler_called(&storage, 8, "record_default_handler", 6);
  1975. assert_record_handler_called(&storage, 9, "record_default_handler", 1);
  1976. assert_record_handler_called(&storage, 10, "record_default_handler", 7);
  1977. assert_record_handler_called(&storage, 11, "record_default_handler", 1);
  1978. assert_record_handler_called(&storage, 12, "record_default_handler", 1);
  1979. assert_record_handler_called(&storage, 13, "record_default_handler", 1);
  1980. assert_record_handler_called(&storage, 14, "record_default_handler", 1);
  1981. assert_record_handler_called(&storage, 15, "record_default_handler", 1);
  1982. assert_record_handler_called(&storage, 16, "record_default_handler", 5);
  1983. assert_record_handler_called(&storage, 17, "record_cdata_nodefault_handler",
  1984. 1);
  1985. assert_record_handler_called(&storage, 18, "record_default_handler", 6);
  1986. assert_true(storage.count == 19);
  1987. }
  1988. }
  1989. END_TEST
  1990. /* Test DTD element parsing code paths */
  1991. START_TEST(test_dtd_elements) {
  1992. const char *text = "<!DOCTYPE doc [\n"
  1993. "<!ELEMENT doc (chapter)>\n"
  1994. "<!ELEMENT chapter (#PCDATA)>\n"
  1995. "]>\n"
  1996. "<doc><chapter>Wombats are go</chapter></doc>";
  1997. XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
  1998. if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
  1999. == XML_STATUS_ERROR)
  2000. xml_failure(g_parser);
  2001. }
  2002. END_TEST
  2003. static void XMLCALL
  2004. element_decl_check_model(void *userData, const XML_Char *name,
  2005. XML_Content *model) {
  2006. UNUSED_P(userData);
  2007. uint32_t errorFlags = 0;
  2008. /* Expected model array structure is this:
  2009. * [0] (type 6, quant 0)
  2010. * [1] (type 5, quant 0)
  2011. * [3] (type 4, quant 0, name "bar")
  2012. * [4] (type 4, quant 0, name "foo")
  2013. * [5] (type 4, quant 3, name "xyz")
  2014. * [2] (type 4, quant 2, name "zebra")
  2015. */
  2016. errorFlags |= ((xcstrcmp(name, XCS("junk")) == 0) ? 0 : (1u << 0));
  2017. errorFlags |= ((model != NULL) ? 0 : (1u << 1));
  2018. if (model != NULL) {
  2019. errorFlags |= ((model[0].type == XML_CTYPE_SEQ) ? 0 : (1u << 2));
  2020. errorFlags |= ((model[0].quant == XML_CQUANT_NONE) ? 0 : (1u << 3));
  2021. errorFlags |= ((model[0].numchildren == 2) ? 0 : (1u << 4));
  2022. errorFlags |= ((model[0].children == &model[1]) ? 0 : (1u << 5));
  2023. errorFlags |= ((model[0].name == NULL) ? 0 : (1u << 6));
  2024. errorFlags |= ((model[1].type == XML_CTYPE_CHOICE) ? 0 : (1u << 7));
  2025. errorFlags |= ((model[1].quant == XML_CQUANT_NONE) ? 0 : (1u << 8));
  2026. errorFlags |= ((model[1].numchildren == 3) ? 0 : (1u << 9));
  2027. errorFlags |= ((model[1].children == &model[3]) ? 0 : (1u << 10));
  2028. errorFlags |= ((model[1].name == NULL) ? 0 : (1u << 11));
  2029. errorFlags |= ((model[2].type == XML_CTYPE_NAME) ? 0 : (1u << 12));
  2030. errorFlags |= ((model[2].quant == XML_CQUANT_REP) ? 0 : (1u << 13));
  2031. errorFlags |= ((model[2].numchildren == 0) ? 0 : (1u << 14));
  2032. errorFlags |= ((model[2].children == NULL) ? 0 : (1u << 15));
  2033. errorFlags
  2034. |= ((xcstrcmp(model[2].name, XCS("zebra")) == 0) ? 0 : (1u << 16));
  2035. errorFlags |= ((model[3].type == XML_CTYPE_NAME) ? 0 : (1u << 17));
  2036. errorFlags |= ((model[3].quant == XML_CQUANT_NONE) ? 0 : (1u << 18));
  2037. errorFlags |= ((model[3].numchildren == 0) ? 0 : (1u << 19));
  2038. errorFlags |= ((model[3].children == NULL) ? 0 : (1u << 20));
  2039. errorFlags |= ((xcstrcmp(model[3].name, XCS("bar")) == 0) ? 0 : (1u << 21));
  2040. errorFlags |= ((model[4].type == XML_CTYPE_NAME) ? 0 : (1u << 22));
  2041. errorFlags |= ((model[4].quant == XML_CQUANT_NONE) ? 0 : (1u << 23));
  2042. errorFlags |= ((model[4].numchildren == 0) ? 0 : (1u << 24));
  2043. errorFlags |= ((model[4].children == NULL) ? 0 : (1u << 25));
  2044. errorFlags |= ((xcstrcmp(model[4].name, XCS("foo")) == 0) ? 0 : (1u << 26));
  2045. errorFlags |= ((model[5].type == XML_CTYPE_NAME) ? 0 : (1u << 27));
  2046. errorFlags |= ((model[5].quant == XML_CQUANT_PLUS) ? 0 : (1u << 28));
  2047. errorFlags |= ((model[5].numchildren == 0) ? 0 : (1u << 29));
  2048. errorFlags |= ((model[5].children == NULL) ? 0 : (1u << 30));
  2049. errorFlags |= ((xcstrcmp(model[5].name, XCS("xyz")) == 0) ? 0 : (1u << 31));
  2050. }
  2051. XML_SetUserData(g_parser, (void *)(uintptr_t)errorFlags);
  2052. XML_FreeContentModel(g_parser, model);
  2053. }
  2054. START_TEST(test_dtd_elements_nesting) {
  2055. // Payload inspired by a test in Perl's XML::Parser
  2056. const char *text = "<!DOCTYPE foo [\n"
  2057. "<!ELEMENT junk ((bar|foo|xyz+), zebra*)>\n"
  2058. "]>\n"
  2059. "<foo/>";
  2060. XML_SetUserData(g_parser, (void *)(uintptr_t)-1);
  2061. XML_SetElementDeclHandler(g_parser, element_decl_check_model);
  2062. if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
  2063. == XML_STATUS_ERROR)
  2064. xml_failure(g_parser);
  2065. if ((uint32_t)(uintptr_t)XML_GetUserData(g_parser) != 0)
  2066. fail("Element declaration model regression detected");
  2067. }
  2068. END_TEST
  2069. /* Test foreign DTD handling */
  2070. START_TEST(test_set_foreign_dtd) {
  2071. const char *text1 = "<?xml version='1.0' encoding='us-ascii'?>\n";
  2072. const char *text2 = "<doc>&entity;</doc>";
  2073. ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
  2074. /* Check hash salt is passed through too */
  2075. XML_SetHashSalt(g_parser, 0x12345678);
  2076. XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
  2077. XML_SetUserData(g_parser, &test_data);
  2078. XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
  2079. /* Add a default handler to exercise more code paths */
  2080. XML_SetDefaultHandler(g_parser, dummy_default_handler);
  2081. if (XML_UseForeignDTD(g_parser, XML_TRUE) != XML_ERROR_NONE)
  2082. fail("Could not set foreign DTD");
  2083. if (_XML_Parse_SINGLE_BYTES(g_parser, text1, (int)strlen(text1), XML_FALSE)
  2084. == XML_STATUS_ERROR)
  2085. xml_failure(g_parser);
  2086. /* Ensure that trying to set the DTD after parsing has started
  2087. * is faulted, even if it's the same setting.
  2088. */
  2089. if (XML_UseForeignDTD(g_parser, XML_TRUE)
  2090. != XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING)
  2091. fail("Failed to reject late foreign DTD setting");
  2092. /* Ditto for the hash salt */
  2093. if (XML_SetHashSalt(g_parser, 0x23456789))
  2094. fail("Failed to reject late hash salt change");
  2095. /* Now finish the parse */
  2096. if (_XML_Parse_SINGLE_BYTES(g_parser, text2, (int)strlen(text2), XML_TRUE)
  2097. == XML_STATUS_ERROR)
  2098. xml_failure(g_parser);
  2099. }
  2100. END_TEST
  2101. /* Test foreign DTD handling with a failing NotStandalone handler */
  2102. START_TEST(test_foreign_dtd_not_standalone) {
  2103. const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
  2104. "<doc>&entity;</doc>";
  2105. ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
  2106. XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
  2107. XML_SetUserData(g_parser, &test_data);
  2108. XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
  2109. XML_SetNotStandaloneHandler(g_parser, reject_not_standalone_handler);
  2110. if (XML_UseForeignDTD(g_parser, XML_TRUE) != XML_ERROR_NONE)
  2111. fail("Could not set foreign DTD");
  2112. expect_failure(text, XML_ERROR_NOT_STANDALONE,
  2113. "NotStandalonehandler failed to reject");
  2114. }
  2115. END_TEST
  2116. /* Test invalid character in a foreign DTD is faulted */
  2117. START_TEST(test_invalid_foreign_dtd) {
  2118. const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
  2119. "<doc>&entity;</doc>";
  2120. ExtFaults test_data
  2121. = {"$", "Dollar not faulted", NULL, XML_ERROR_INVALID_TOKEN};
  2122. XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
  2123. XML_SetUserData(g_parser, &test_data);
  2124. XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
  2125. XML_UseForeignDTD(g_parser, XML_TRUE);
  2126. expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
  2127. "Bad DTD should not have been accepted");
  2128. }
  2129. END_TEST
  2130. /* Test foreign DTD use with a doctype */
  2131. START_TEST(test_foreign_dtd_with_doctype) {
  2132. const char *text1 = "<?xml version='1.0' encoding='us-ascii'?>\n"
  2133. "<!DOCTYPE doc [<!ENTITY entity 'hello world'>]>\n";
  2134. const char *text2 = "<doc>&entity;</doc>";
  2135. ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
  2136. /* Check hash salt is passed through too */
  2137. XML_SetHashSalt(g_parser, 0x12345678);
  2138. XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
  2139. XML_SetUserData(g_parser, &test_data);
  2140. XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
  2141. /* Add a default handler to exercise more code paths */
  2142. XML_SetDefaultHandler(g_parser, dummy_default_handler);
  2143. if (XML_UseForeignDTD(g_parser, XML_TRUE) != XML_ERROR_NONE)
  2144. fail("Could not set foreign DTD");
  2145. if (_XML_Parse_SINGLE_BYTES(g_parser, text1, (int)strlen(text1), XML_FALSE)
  2146. == XML_STATUS_ERROR)
  2147. xml_failure(g_parser);
  2148. /* Ensure that trying to set the DTD after parsing has started
  2149. * is faulted, even if it's the same setting.
  2150. */
  2151. if (XML_UseForeignDTD(g_parser, XML_TRUE)
  2152. != XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING)
  2153. fail("Failed to reject late foreign DTD setting");
  2154. /* Ditto for the hash salt */
  2155. if (XML_SetHashSalt(g_parser, 0x23456789))
  2156. fail("Failed to reject late hash salt change");
  2157. /* Now finish the parse */
  2158. if (_XML_Parse_SINGLE_BYTES(g_parser, text2, (int)strlen(text2), XML_TRUE)
  2159. == XML_STATUS_ERROR)
  2160. xml_failure(g_parser);
  2161. }
  2162. END_TEST
  2163. /* Test XML_UseForeignDTD with no external subset present */
  2164. START_TEST(test_foreign_dtd_without_external_subset) {
  2165. const char *text = "<!DOCTYPE doc [<!ENTITY foo 'bar'>]>\n"
  2166. "<doc>&foo;</doc>";
  2167. XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
  2168. XML_SetUserData(g_parser, NULL);
  2169. XML_SetExternalEntityRefHandler(g_parser, external_entity_null_loader);
  2170. XML_UseForeignDTD(g_parser, XML_TRUE);
  2171. if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
  2172. == XML_STATUS_ERROR)
  2173. xml_failure(g_parser);
  2174. }
  2175. END_TEST
  2176. START_TEST(test_empty_foreign_dtd) {
  2177. const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
  2178. "<doc>&entity;</doc>";
  2179. XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
  2180. XML_SetExternalEntityRefHandler(g_parser, external_entity_null_loader);
  2181. XML_UseForeignDTD(g_parser, XML_TRUE);
  2182. expect_failure(text, XML_ERROR_UNDEFINED_ENTITY,
  2183. "Undefined entity not faulted");
  2184. }
  2185. END_TEST
  2186. /* Test XML Base is set and unset appropriately */
  2187. START_TEST(test_set_base) {
  2188. const XML_Char *old_base;
  2189. const XML_Char *new_base = XCS("/local/file/name.xml");
  2190. old_base = XML_GetBase(g_parser);
  2191. if (XML_SetBase(g_parser, new_base) != XML_STATUS_OK)
  2192. fail("Unable to set base");
  2193. if (xcstrcmp(XML_GetBase(g_parser), new_base) != 0)
  2194. fail("Base setting not correct");
  2195. if (XML_SetBase(g_parser, NULL) != XML_STATUS_OK)
  2196. fail("Unable to NULL base");
  2197. if (XML_GetBase(g_parser) != NULL)
  2198. fail("Base setting not nulled");
  2199. XML_SetBase(g_parser, old_base);
  2200. }
  2201. END_TEST
  2202. /* Test attribute counts, indexing, etc */
  2203. START_TEST(test_attributes) {
  2204. const char *text = "<!DOCTYPE doc [\n"
  2205. "<!ELEMENT doc (tag)>\n"
  2206. "<!ATTLIST doc id ID #REQUIRED>\n"
  2207. "]>"
  2208. "<doc a='1' id='one' b='2'>"
  2209. "<tag c='3'/>"
  2210. "</doc>";
  2211. AttrInfo doc_info[] = {{XCS("a"), XCS("1")},
  2212. {XCS("b"), XCS("2")},
  2213. {XCS("id"), XCS("one")},
  2214. {NULL, NULL}};
  2215. AttrInfo tag_info[] = {{XCS("c"), XCS("3")}, {NULL, NULL}};
  2216. ElementInfo info[] = {{XCS("doc"), 3, XCS("id"), NULL},
  2217. {XCS("tag"), 1, NULL, NULL},
  2218. {NULL, 0, NULL, NULL}};
  2219. info[0].attributes = doc_info;
  2220. info[1].attributes = tag_info;
  2221. XML_Parser parser = XML_ParserCreate(NULL);
  2222. assert_true(parser != NULL);
  2223. ParserAndElementInfo parserAndElementInfos = {
  2224. parser,
  2225. info,
  2226. };
  2227. XML_SetStartElementHandler(parser, counting_start_element_handler);
  2228. XML_SetUserData(parser, &parserAndElementInfos);
  2229. if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
  2230. == XML_STATUS_ERROR)
  2231. xml_failure(parser);
  2232. XML_ParserFree(parser);
  2233. }
  2234. END_TEST
  2235. /* Test reset works correctly in the middle of processing an internal
  2236. * entity. Exercises some obscure code in XML_ParserReset().
  2237. */
  2238. START_TEST(test_reset_in_entity) {
  2239. if (g_chunkSize != 0) {
  2240. // this test does not use SINGLE_BYTES, because of suspension
  2241. return;
  2242. }
  2243. const char *text = "<!DOCTYPE doc [\n"
  2244. "<!ENTITY wombat 'wom'>\n"
  2245. "<!ENTITY entity 'hi &wom; there'>\n"
  2246. "]>\n"
  2247. "<doc>&entity;</doc>";
  2248. XML_ParsingStatus status;
  2249. g_resumable = XML_TRUE;
  2250. XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
  2251. // can't use SINGLE_BYTES here, because it'll return early on suspension, and
  2252. // we won't know exactly how much input we actually managed to give Expat.
  2253. if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
  2254. == XML_STATUS_ERROR)
  2255. xml_failure(g_parser);
  2256. XML_GetParsingStatus(g_parser, &status);
  2257. if (status.parsing != XML_SUSPENDED)
  2258. fail("Parsing status not SUSPENDED");
  2259. XML_ParserReset(g_parser, NULL);
  2260. XML_GetParsingStatus(g_parser, &status);
  2261. if (status.parsing != XML_INITIALIZED)
  2262. fail("Parsing status doesn't reset to INITIALIZED");
  2263. }
  2264. END_TEST
  2265. /* Test that resume correctly passes through parse errors */
  2266. START_TEST(test_resume_invalid_parse) {
  2267. const char *text = "<doc>Hello</doc"; /* Missing closing wedge */
  2268. g_resumable = XML_TRUE;
  2269. XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
  2270. if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
  2271. == XML_STATUS_ERROR)
  2272. xml_failure(g_parser);
  2273. if (XML_ResumeParser(g_parser) == XML_STATUS_OK)
  2274. fail("Resumed invalid parse not faulted");
  2275. if (XML_GetErrorCode(g_parser) != XML_ERROR_UNCLOSED_TOKEN)
  2276. fail("Invalid parse not correctly faulted");
  2277. }
  2278. END_TEST
  2279. /* Test that re-suspended parses are correctly passed through */
  2280. START_TEST(test_resume_resuspended) {
  2281. const char *text = "<doc>Hello<meep/>world</doc>";
  2282. g_resumable = XML_TRUE;
  2283. XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
  2284. if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
  2285. == XML_STATUS_ERROR)
  2286. xml_failure(g_parser);
  2287. g_resumable = XML_TRUE;
  2288. XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
  2289. if (XML_ResumeParser(g_parser) != XML_STATUS_SUSPENDED)
  2290. fail("Resumption not suspended");
  2291. /* This one should succeed and finish up */
  2292. if (XML_ResumeParser(g_parser) != XML_STATUS_OK)
  2293. xml_failure(g_parser);
  2294. }
  2295. END_TEST
  2296. /* Test that CDATA shows up correctly through a default handler */
  2297. START_TEST(test_cdata_default) {
  2298. const char *text = "<doc><![CDATA[Hello\nworld]]></doc>";
  2299. const XML_Char *expected = XCS("<doc><![CDATA[Hello\nworld]]></doc>");
  2300. CharData storage;
  2301. CharData_Init(&storage);
  2302. XML_SetUserData(g_parser, &storage);
  2303. XML_SetDefaultHandler(g_parser, accumulate_characters);
  2304. if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
  2305. == XML_STATUS_ERROR)
  2306. xml_failure(g_parser);
  2307. CharData_CheckXMLChars(&storage, expected);
  2308. }
  2309. END_TEST
  2310. /* Test resetting a subordinate parser does exactly nothing */
  2311. START_TEST(test_subordinate_reset) {
  2312. const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
  2313. "<!DOCTYPE doc SYSTEM 'foo'>\n"
  2314. "<doc>&entity;</doc>";
  2315. XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
  2316. XML_SetExternalEntityRefHandler(g_parser, external_entity_resetter);
  2317. if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
  2318. == XML_STATUS_ERROR)
  2319. xml_failure(g_parser);
  2320. }
  2321. END_TEST
  2322. /* Test suspending a subordinate parser */
  2323. START_TEST(test_subordinate_suspend) {
  2324. const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
  2325. "<!DOCTYPE doc SYSTEM 'foo'>\n"
  2326. "<doc>&entity;</doc>";
  2327. XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
  2328. XML_SetExternalEntityRefHandler(g_parser, external_entity_suspender);
  2329. if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
  2330. == XML_STATUS_ERROR)
  2331. xml_failure(g_parser);
  2332. }
  2333. END_TEST
  2334. /* Test suspending a subordinate parser from an XML declaration */
  2335. /* Increases code coverage of the tests */
  2336. START_TEST(test_subordinate_xdecl_suspend) {
  2337. const char *text
  2338. = "<!DOCTYPE doc [\n"
  2339. " <!ENTITY entity SYSTEM 'http://example.org/dummy.ent'>\n"
  2340. "]>\n"
  2341. "<doc>&entity;</doc>";
  2342. XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
  2343. XML_SetExternalEntityRefHandler(g_parser, external_entity_suspend_xmldecl);
  2344. g_resumable = XML_TRUE;
  2345. if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
  2346. == XML_STATUS_ERROR)
  2347. xml_failure(g_parser);
  2348. }
  2349. END_TEST
  2350. START_TEST(test_subordinate_xdecl_abort) {
  2351. const char *text
  2352. = "<!DOCTYPE doc [\n"
  2353. " <!ENTITY entity SYSTEM 'http://example.org/dummy.ent'>\n"
  2354. "]>\n"
  2355. "<doc>&entity;</doc>";
  2356. XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
  2357. XML_SetExternalEntityRefHandler(g_parser, external_entity_suspend_xmldecl);
  2358. g_resumable = XML_FALSE;
  2359. if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
  2360. == XML_STATUS_ERROR)
  2361. xml_failure(g_parser);
  2362. }
  2363. END_TEST
  2364. /* Test external entity fault handling with suspension */
  2365. START_TEST(test_ext_entity_invalid_suspended_parse) {
  2366. const char *text = "<!DOCTYPE doc [\n"
  2367. " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
  2368. "]>\n"
  2369. "<doc>&en;</doc>";
  2370. ExtFaults faults[]
  2371. = {{"<?xml version='1.0' encoding='us-ascii'?><",
  2372. "Incomplete element declaration not faulted", NULL,
  2373. XML_ERROR_UNCLOSED_TOKEN},
  2374. {/* First two bytes of a three-byte char */
  2375. "<?xml version='1.0' encoding='utf-8'?>\xe2\x82",
  2376. "Incomplete character not faulted", NULL, XML_ERROR_PARTIAL_CHAR},
  2377. {NULL, NULL, NULL, XML_ERROR_NONE}};
  2378. ExtFaults *fault;
  2379. for (fault = &faults[0]; fault->parse_text != NULL; fault++) {
  2380. set_subtest("%s", fault->parse_text);
  2381. XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
  2382. XML_SetExternalEntityRefHandler(g_parser,
  2383. external_entity_suspending_faulter);
  2384. XML_SetUserData(g_parser, fault);
  2385. expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
  2386. "Parser did not report external entity error");
  2387. XML_ParserReset(g_parser, NULL);
  2388. }
  2389. }
  2390. END_TEST
  2391. /* Test setting an explicit encoding */
  2392. START_TEST(test_explicit_encoding) {
  2393. const char *text1 = "<doc>Hello ";
  2394. const char *text2 = " World</doc>";
  2395. /* Just check that we can set the encoding to NULL before starting */
  2396. if (XML_SetEncoding(g_parser, NULL) != XML_STATUS_OK)
  2397. fail("Failed to initialise encoding to NULL");
  2398. /* Say we are UTF-8 */
  2399. if (XML_SetEncoding(g_parser, XCS("utf-8")) != XML_STATUS_OK)
  2400. fail("Failed to set explicit encoding");
  2401. if (_XML_Parse_SINGLE_BYTES(g_parser, text1, (int)strlen(text1), XML_FALSE)
  2402. == XML_STATUS_ERROR)
  2403. xml_failure(g_parser);
  2404. /* Try to switch encodings mid-parse */
  2405. if (XML_SetEncoding(g_parser, XCS("us-ascii")) != XML_STATUS_ERROR)
  2406. fail("Allowed encoding change");
  2407. if (_XML_Parse_SINGLE_BYTES(g_parser, text2, (int)strlen(text2), XML_TRUE)
  2408. == XML_STATUS_ERROR)
  2409. xml_failure(g_parser);
  2410. /* Try now the parse is over */
  2411. if (XML_SetEncoding(g_parser, NULL) != XML_STATUS_OK)
  2412. fail("Failed to unset encoding");
  2413. }
  2414. END_TEST
  2415. /* Test handling of trailing CR (rather than newline) */
  2416. START_TEST(test_trailing_cr) {
  2417. const char *text = "<doc>\r";
  2418. int found_cr;
  2419. /* Try with a character handler, for code coverage */
  2420. XML_SetCharacterDataHandler(g_parser, cr_cdata_handler);
  2421. XML_SetUserData(g_parser, &found_cr);
  2422. found_cr = 0;
  2423. if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
  2424. == XML_STATUS_OK)
  2425. fail("Failed to fault unclosed doc");
  2426. if (found_cr == 0)
  2427. fail("Did not catch the carriage return");
  2428. XML_ParserReset(g_parser, NULL);
  2429. /* Now with a default handler instead */
  2430. XML_SetDefaultHandler(g_parser, cr_cdata_handler);
  2431. XML_SetUserData(g_parser, &found_cr);
  2432. found_cr = 0;
  2433. if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
  2434. == XML_STATUS_OK)
  2435. fail("Failed to fault unclosed doc");
  2436. if (found_cr == 0)
  2437. fail("Did not catch default carriage return");
  2438. }
  2439. END_TEST
  2440. /* Test trailing CR in an external entity parse */
  2441. START_TEST(test_ext_entity_trailing_cr) {
  2442. const char *text = "<!DOCTYPE doc [\n"
  2443. " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
  2444. "]>\n"
  2445. "<doc>&en;</doc>";
  2446. int found_cr;
  2447. XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
  2448. XML_SetExternalEntityRefHandler(g_parser, external_entity_cr_catcher);
  2449. XML_SetUserData(g_parser, &found_cr);
  2450. found_cr = 0;
  2451. if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
  2452. != XML_STATUS_OK)
  2453. xml_failure(g_parser);
  2454. if (found_cr == 0)
  2455. fail("No carriage return found");
  2456. XML_ParserReset(g_parser, NULL);
  2457. /* Try again with a different trailing CR */
  2458. XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
  2459. XML_SetExternalEntityRefHandler(g_parser, external_entity_bad_cr_catcher);
  2460. XML_SetUserData(g_parser, &found_cr);
  2461. found_cr = 0;
  2462. if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
  2463. != XML_STATUS_OK)
  2464. xml_failure(g_parser);
  2465. if (found_cr == 0)
  2466. fail("No carriage return found");
  2467. }
  2468. END_TEST
  2469. /* Test handling of trailing square bracket */
  2470. START_TEST(test_trailing_rsqb) {
  2471. const char *text8 = "<doc>]";
  2472. const char text16[] = "\xFF\xFE<\000d\000o\000c\000>\000]\000";
  2473. int found_rsqb;
  2474. int text8_len = (int)strlen(text8);
  2475. XML_SetCharacterDataHandler(g_parser, rsqb_handler);
  2476. XML_SetUserData(g_parser, &found_rsqb);
  2477. found_rsqb = 0;
  2478. if (_XML_Parse_SINGLE_BYTES(g_parser, text8, text8_len, XML_TRUE)
  2479. == XML_STATUS_OK)
  2480. fail("Failed to fault unclosed doc");
  2481. if (found_rsqb == 0)
  2482. fail("Did not catch the right square bracket");
  2483. /* Try again with a different encoding */
  2484. XML_ParserReset(g_parser, NULL);
  2485. XML_SetCharacterDataHandler(g_parser, rsqb_handler);
  2486. XML_SetUserData(g_parser, &found_rsqb);
  2487. found_rsqb = 0;
  2488. if (_XML_Parse_SINGLE_BYTES(g_parser, text16, (int)sizeof(text16) - 1,
  2489. XML_TRUE)
  2490. == XML_STATUS_OK)
  2491. fail("Failed to fault unclosed doc");
  2492. if (found_rsqb == 0)
  2493. fail("Did not catch the right square bracket");
  2494. /* And finally with a default handler */
  2495. XML_ParserReset(g_parser, NULL);
  2496. XML_SetDefaultHandler(g_parser, rsqb_handler);
  2497. XML_SetUserData(g_parser, &found_rsqb);
  2498. found_rsqb = 0;
  2499. if (_XML_Parse_SINGLE_BYTES(g_parser, text16, (int)sizeof(text16) - 1,
  2500. XML_TRUE)
  2501. == XML_STATUS_OK)
  2502. fail("Failed to fault unclosed doc");
  2503. if (found_rsqb == 0)
  2504. fail("Did not catch the right square bracket");
  2505. }
  2506. END_TEST
  2507. /* Test trailing right square bracket in an external entity parse */
  2508. START_TEST(test_ext_entity_trailing_rsqb) {
  2509. const char *text = "<!DOCTYPE doc [\n"
  2510. " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
  2511. "]>\n"
  2512. "<doc>&en;</doc>";
  2513. int found_rsqb;
  2514. XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
  2515. XML_SetExternalEntityRefHandler(g_parser, external_entity_rsqb_catcher);
  2516. XML_SetUserData(g_parser, &found_rsqb);
  2517. found_rsqb = 0;
  2518. if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
  2519. != XML_STATUS_OK)
  2520. xml_failure(g_parser);
  2521. if (found_rsqb == 0)
  2522. fail("No right square bracket found");
  2523. }
  2524. END_TEST
  2525. /* Test CDATA handling in an external entity */
  2526. START_TEST(test_ext_entity_good_cdata) {
  2527. const char *text = "<!DOCTYPE doc [\n"
  2528. " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
  2529. "]>\n"
  2530. "<doc>&en;</doc>";
  2531. XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
  2532. XML_SetExternalEntityRefHandler(g_parser, external_entity_good_cdata_ascii);
  2533. if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
  2534. != XML_STATUS_OK)
  2535. xml_failure(g_parser);
  2536. }
  2537. END_TEST
  2538. /* Test user parameter settings */
  2539. START_TEST(test_user_parameters) {
  2540. const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
  2541. "<!-- Primary parse -->\n"
  2542. "<!DOCTYPE doc SYSTEM 'foo'>\n"
  2543. "<doc>&entity;";
  2544. const char *epilog = "<!-- Back to primary parser -->\n"
  2545. "</doc>";
  2546. g_comment_count = 0;
  2547. g_skip_count = 0;
  2548. g_xdecl_count = 0;
  2549. XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
  2550. XML_SetXmlDeclHandler(g_parser, xml_decl_handler);
  2551. XML_SetExternalEntityRefHandler(g_parser, external_entity_param_checker);
  2552. XML_SetCommentHandler(g_parser, data_check_comment_handler);
  2553. XML_SetSkippedEntityHandler(g_parser, param_check_skip_handler);
  2554. XML_UseParserAsHandlerArg(g_parser);
  2555. XML_SetUserData(g_parser, (void *)1);
  2556. g_handler_data = g_parser;
  2557. if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_FALSE)
  2558. == XML_STATUS_ERROR)
  2559. xml_failure(g_parser);
  2560. /* Ensure we can't change policy mid-parse */
  2561. if (XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_NEVER))
  2562. fail("Changed param entity parsing policy while parsing");
  2563. if (_XML_Parse_SINGLE_BYTES(g_parser, epilog, (int)strlen(epilog), XML_TRUE)
  2564. == XML_STATUS_ERROR)
  2565. xml_failure(g_parser);
  2566. if (g_comment_count != 3)
  2567. fail("Comment handler not invoked enough times");
  2568. if (g_skip_count != 1)
  2569. fail("Skip handler not invoked enough times");
  2570. if (g_xdecl_count != 1)
  2571. fail("XML declaration handler not invoked");
  2572. }
  2573. END_TEST
  2574. /* Test that an explicit external entity handler argument replaces
  2575. * the parser as the first argument.
  2576. *
  2577. * We do not call the first parameter to the external entity handler
  2578. * 'parser' for once, since the first time the handler is called it
  2579. * will actually be a text string. We need to be able to access the
  2580. * global 'parser' variable to create our external entity parser from,
  2581. * since there are code paths we need to ensure get executed.
  2582. */
  2583. START_TEST(test_ext_entity_ref_parameter) {
  2584. const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
  2585. "<!DOCTYPE doc SYSTEM 'foo'>\n"
  2586. "<doc>&entity;</doc>";
  2587. XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
  2588. XML_SetExternalEntityRefHandler(g_parser, external_entity_ref_param_checker);
  2589. /* Set a handler arg that is not NULL and not parser (which is
  2590. * what NULL would cause to be passed.
  2591. */
  2592. XML_SetExternalEntityRefHandlerArg(g_parser, (void *)text);
  2593. g_handler_data = text;
  2594. if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
  2595. == XML_STATUS_ERROR)
  2596. xml_failure(g_parser);
  2597. /* Now try again with unset args */
  2598. XML_ParserReset(g_parser, NULL);
  2599. XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
  2600. XML_SetExternalEntityRefHandler(g_parser, external_entity_ref_param_checker);
  2601. XML_SetExternalEntityRefHandlerArg(g_parser, NULL);
  2602. g_handler_data = g_parser;
  2603. if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
  2604. == XML_STATUS_ERROR)
  2605. xml_failure(g_parser);
  2606. }
  2607. END_TEST
  2608. /* Test the parsing of an empty string */
  2609. START_TEST(test_empty_parse) {
  2610. const char *text = "<doc></doc>";
  2611. const char *partial = "<doc>";
  2612. if (XML_Parse(g_parser, NULL, 0, XML_FALSE) == XML_STATUS_ERROR)
  2613. fail("Parsing empty string faulted");
  2614. if (XML_Parse(g_parser, NULL, 0, XML_TRUE) != XML_STATUS_ERROR)
  2615. fail("Parsing final empty string not faulted");
  2616. if (XML_GetErrorCode(g_parser) != XML_ERROR_NO_ELEMENTS)
  2617. fail("Parsing final empty string faulted for wrong reason");
  2618. /* Now try with valid text before the empty end */
  2619. XML_ParserReset(g_parser, NULL);
  2620. if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_FALSE)
  2621. == XML_STATUS_ERROR)
  2622. xml_failure(g_parser);
  2623. if (XML_Parse(g_parser, NULL, 0, XML_TRUE) == XML_STATUS_ERROR)
  2624. fail("Parsing final empty string faulted");
  2625. /* Now try with invalid text before the empty end */
  2626. XML_ParserReset(g_parser, NULL);
  2627. if (_XML_Parse_SINGLE_BYTES(g_parser, partial, (int)strlen(partial),
  2628. XML_FALSE)
  2629. == XML_STATUS_ERROR)
  2630. xml_failure(g_parser);
  2631. if (XML_Parse(g_parser, NULL, 0, XML_TRUE) != XML_STATUS_ERROR)
  2632. fail("Parsing final incomplete empty string not faulted");
  2633. }
  2634. END_TEST
  2635. /* Test XML_Parse for len < 0 */
  2636. START_TEST(test_negative_len_parse) {
  2637. const char *const doc = "<root/>";
  2638. for (int isFinal = 0; isFinal < 2; isFinal++) {
  2639. set_subtest("isFinal=%d", isFinal);
  2640. XML_Parser parser = XML_ParserCreate(NULL);
  2641. if (XML_GetErrorCode(parser) != XML_ERROR_NONE)
  2642. fail("There was not supposed to be any initial parse error.");
  2643. const enum XML_Status status = XML_Parse(parser, doc, -1, isFinal);
  2644. if (status != XML_STATUS_ERROR)
  2645. fail("Negative len was expected to fail the parse but did not.");
  2646. if (XML_GetErrorCode(parser) != XML_ERROR_INVALID_ARGUMENT)
  2647. fail("Parse error does not match XML_ERROR_INVALID_ARGUMENT.");
  2648. XML_ParserFree(parser);
  2649. }
  2650. }
  2651. END_TEST
  2652. /* Test XML_ParseBuffer for len < 0 */
  2653. START_TEST(test_negative_len_parse_buffer) {
  2654. const char *const doc = "<root/>";
  2655. for (int isFinal = 0; isFinal < 2; isFinal++) {
  2656. set_subtest("isFinal=%d", isFinal);
  2657. XML_Parser parser = XML_ParserCreate(NULL);
  2658. if (XML_GetErrorCode(parser) != XML_ERROR_NONE)
  2659. fail("There was not supposed to be any initial parse error.");
  2660. void *const buffer = XML_GetBuffer(parser, (int)strlen(doc));
  2661. if (buffer == NULL)
  2662. fail("XML_GetBuffer failed.");
  2663. memcpy(buffer, doc, strlen(doc));
  2664. const enum XML_Status status = XML_ParseBuffer(parser, -1, isFinal);
  2665. if (status != XML_STATUS_ERROR)
  2666. fail("Negative len was expected to fail the parse but did not.");
  2667. if (XML_GetErrorCode(parser) != XML_ERROR_INVALID_ARGUMENT)
  2668. fail("Parse error does not match XML_ERROR_INVALID_ARGUMENT.");
  2669. XML_ParserFree(parser);
  2670. }
  2671. }
  2672. END_TEST
  2673. /* Test odd corners of the XML_GetBuffer interface */
  2674. static enum XML_Status
  2675. get_feature(enum XML_FeatureEnum feature_id, long *presult) {
  2676. const XML_Feature *feature = XML_GetFeatureList();
  2677. if (feature == NULL)
  2678. return XML_STATUS_ERROR;
  2679. for (; feature->feature != XML_FEATURE_END; feature++) {
  2680. if (feature->feature == feature_id) {
  2681. *presult = feature->value;
  2682. return XML_STATUS_OK;
  2683. }
  2684. }
  2685. return XML_STATUS_ERROR;
  2686. }
  2687. /* Test odd corners of the XML_GetBuffer interface */
  2688. START_TEST(test_get_buffer_1) {
  2689. const char *text = get_buffer_test_text;
  2690. void *buffer;
  2691. long context_bytes;
  2692. /* Attempt to allocate a negative length buffer */
  2693. if (XML_GetBuffer(g_parser, -12) != NULL)
  2694. fail("Negative length buffer not failed");
  2695. /* Now get a small buffer and extend it past valid length */
  2696. buffer = XML_GetBuffer(g_parser, 1536);
  2697. if (buffer == NULL)
  2698. fail("1.5K buffer failed");
  2699. assert(buffer != NULL);
  2700. memcpy(buffer, text, strlen(text));
  2701. if (XML_ParseBuffer(g_parser, (int)strlen(text), XML_FALSE)
  2702. == XML_STATUS_ERROR)
  2703. xml_failure(g_parser);
  2704. if (XML_GetBuffer(g_parser, INT_MAX) != NULL)
  2705. fail("INT_MAX buffer not failed");
  2706. /* Now try extending it a more reasonable but still too large
  2707. * amount. The allocator in XML_GetBuffer() doubles the buffer
  2708. * size until it exceeds the requested amount or INT_MAX. If it
  2709. * exceeds INT_MAX, it rejects the request, so we want a request
  2710. * between INT_MAX and INT_MAX/2. A gap of 1K seems comfortable,
  2711. * with an extra byte just to ensure that the request is off any
  2712. * boundary. The request will be inflated internally by
  2713. * XML_CONTEXT_BYTES (if >=1), so we subtract that from our
  2714. * request.
  2715. */
  2716. if (get_feature(XML_FEATURE_CONTEXT_BYTES, &context_bytes) != XML_STATUS_OK)
  2717. context_bytes = 0;
  2718. if (XML_GetBuffer(g_parser, INT_MAX - (context_bytes + 1025)) != NULL)
  2719. fail("INT_MAX- buffer not failed");
  2720. /* Now try extending it a carefully crafted amount */
  2721. if (XML_GetBuffer(g_parser, 1000) == NULL)
  2722. fail("1000 buffer failed");
  2723. }
  2724. END_TEST
  2725. /* Test more corners of the XML_GetBuffer interface */
  2726. START_TEST(test_get_buffer_2) {
  2727. const char *text = get_buffer_test_text;
  2728. void *buffer;
  2729. /* Now get a decent buffer */
  2730. buffer = XML_GetBuffer(g_parser, 1536);
  2731. if (buffer == NULL)
  2732. fail("1.5K buffer failed");
  2733. assert(buffer != NULL);
  2734. memcpy(buffer, text, strlen(text));
  2735. if (XML_ParseBuffer(g_parser, (int)strlen(text), XML_FALSE)
  2736. == XML_STATUS_ERROR)
  2737. xml_failure(g_parser);
  2738. /* Extend it, to catch a different code path */
  2739. if (XML_GetBuffer(g_parser, 1024) == NULL)
  2740. fail("1024 buffer failed");
  2741. }
  2742. END_TEST
  2743. /* Test for signed integer overflow CVE-2022-23852 */
  2744. #if XML_CONTEXT_BYTES > 0
  2745. START_TEST(test_get_buffer_3_overflow) {
  2746. XML_Parser parser = XML_ParserCreate(NULL);
  2747. assert(parser != NULL);
  2748. const char *const text = "\n";
  2749. const int expectedKeepValue = (int)strlen(text);
  2750. // After this call, variable "keep" in XML_GetBuffer will
  2751. // have value expectedKeepValue
  2752. if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text),
  2753. XML_FALSE /* isFinal */)
  2754. == XML_STATUS_ERROR)
  2755. xml_failure(parser);
  2756. assert(expectedKeepValue > 0);
  2757. if (XML_GetBuffer(parser, INT_MAX - expectedKeepValue + 1) != NULL)
  2758. fail("enlarging buffer not failed");
  2759. XML_ParserFree(parser);
  2760. }
  2761. END_TEST
  2762. #endif // XML_CONTEXT_BYTES > 0
  2763. START_TEST(test_buffer_can_grow_to_max) {
  2764. const char *const prefixes[] = {
  2765. "",
  2766. "<",
  2767. "<x a='",
  2768. "<doc><x a='",
  2769. "<document><x a='",
  2770. "<averylongelementnamesuchthatitwillhopefullystretchacrossmultiplelinesand"
  2771. "lookprettyridiculousitsalsoveryhardtoreadandifyouredoingitihavetowonderif"
  2772. "youreallydonthaveanythingbettertodoofcourseiguessicouldveputsomethingbadin"
  2773. "herebutipromisethatididntheybtwhowgreatarespacesandpunctuationforhelping"
  2774. "withreadabilityprettygreatithinkanywaysthisisprobablylongenoughbye><x a='"};
  2775. const int num_prefixes = sizeof(prefixes) / sizeof(prefixes[0]);
  2776. int maxbuf = INT_MAX / 2 + (INT_MAX & 1); // round up without overflow
  2777. #if defined(__MINGW32__) && ! defined(__MINGW64__)
  2778. // workaround for mingw/wine32 on GitHub CI not being able to reach 1GiB
  2779. // Can we make a big allocation?
  2780. void *big = malloc(maxbuf);
  2781. if (! big) {
  2782. // The big allocation failed. Let's be a little lenient.
  2783. maxbuf = maxbuf / 2;
  2784. }
  2785. free(big);
  2786. #endif
  2787. for (int i = 0; i < num_prefixes; ++i) {
  2788. set_subtest("\"%s\"", prefixes[i]);
  2789. XML_Parser parser = XML_ParserCreate(NULL);
  2790. #if XML_GE == 1
  2791. assert_true(XML_SetAllocTrackerActivationThreshold(parser, (size_t)-1)
  2792. == XML_TRUE); // i.e. deactivate
  2793. #endif
  2794. const int prefix_len = (int)strlen(prefixes[i]);
  2795. const enum XML_Status s
  2796. = _XML_Parse_SINGLE_BYTES(parser, prefixes[i], prefix_len, XML_FALSE);
  2797. if (s != XML_STATUS_OK)
  2798. xml_failure(parser);
  2799. // XML_CONTEXT_BYTES of the prefix may remain in the buffer;
  2800. // subtracting the whole prefix is easiest, and close enough.
  2801. assert_true(XML_GetBuffer(parser, maxbuf - prefix_len) != NULL);
  2802. // The limit should be consistent; no prefix should allow us to
  2803. // reach above the max buffer size.
  2804. assert_true(XML_GetBuffer(parser, maxbuf + 1) == NULL);
  2805. XML_ParserFree(parser);
  2806. }
  2807. }
  2808. END_TEST
  2809. START_TEST(test_getbuffer_allocates_on_zero_len) {
  2810. for (int first_len = 1; first_len >= 0; first_len--) {
  2811. set_subtest("with len=%d first", first_len);
  2812. XML_Parser parser = XML_ParserCreate(NULL);
  2813. assert_true(parser != NULL);
  2814. assert_true(XML_GetBuffer(parser, first_len) != NULL);
  2815. assert_true(XML_GetBuffer(parser, 0) != NULL);
  2816. if (XML_ParseBuffer(parser, 0, XML_FALSE) != XML_STATUS_OK)
  2817. xml_failure(parser);
  2818. XML_ParserFree(parser);
  2819. }
  2820. }
  2821. END_TEST
  2822. /* Test position information macros */
  2823. START_TEST(test_byte_info_at_end) {
  2824. const char *text = "<doc></doc>";
  2825. if (XML_GetCurrentByteIndex(g_parser) != -1
  2826. || XML_GetCurrentByteCount(g_parser) != 0)
  2827. fail("Byte index/count incorrect at start of parse");
  2828. if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
  2829. == XML_STATUS_ERROR)
  2830. xml_failure(g_parser);
  2831. /* At end, the count will be zero and the index the end of string */
  2832. if (XML_GetCurrentByteCount(g_parser) != 0)
  2833. fail("Terminal byte count incorrect");
  2834. if (XML_GetCurrentByteIndex(g_parser) != (XML_Index)strlen(text))
  2835. fail("Terminal byte index incorrect");
  2836. }
  2837. END_TEST
  2838. /* Test position information from errors */
  2839. #define PRE_ERROR_STR "<doc></"
  2840. #define POST_ERROR_STR "wombat></doc>"
  2841. START_TEST(test_byte_info_at_error) {
  2842. const char *text = PRE_ERROR_STR POST_ERROR_STR;
  2843. if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
  2844. == XML_STATUS_OK)
  2845. fail("Syntax error not faulted");
  2846. if (XML_GetCurrentByteCount(g_parser) != 0)
  2847. fail("Error byte count incorrect");
  2848. if (XML_GetCurrentByteIndex(g_parser) != strlen(PRE_ERROR_STR))
  2849. fail("Error byte index incorrect");
  2850. }
  2851. END_TEST
  2852. #undef PRE_ERROR_STR
  2853. #undef POST_ERROR_STR
  2854. /* Test position information in handler */
  2855. #define START_ELEMENT "<e>"
  2856. #define CDATA_TEXT "Hello"
  2857. #define END_ELEMENT "</e>"
  2858. START_TEST(test_byte_info_at_cdata) {
  2859. const char *text = START_ELEMENT CDATA_TEXT END_ELEMENT;
  2860. int offset, size;
  2861. ByteTestData data;
  2862. /* Check initial context is empty */
  2863. if (XML_GetInputContext(g_parser, &offset, &size) != NULL)
  2864. fail("Unexpected context at start of parse");
  2865. data.start_element_len = (int)strlen(START_ELEMENT);
  2866. data.cdata_len = (int)strlen(CDATA_TEXT);
  2867. data.total_string_len = (int)strlen(text);
  2868. XML_SetCharacterDataHandler(g_parser, byte_character_handler);
  2869. XML_SetUserData(g_parser, &data);
  2870. if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_OK)
  2871. xml_failure(g_parser);
  2872. }
  2873. END_TEST
  2874. #undef START_ELEMENT
  2875. #undef CDATA_TEXT
  2876. #undef END_ELEMENT
  2877. /* Test predefined entities are correctly recognised */
  2878. START_TEST(test_predefined_entities) {
  2879. const char *text = "<doc>&lt;&gt;&amp;&quot;&apos;</doc>";
  2880. const XML_Char *expected = XCS("<doc>&lt;&gt;&amp;&quot;&apos;</doc>");
  2881. const XML_Char *result = XCS("<>&\"'");
  2882. CharData storage;
  2883. XML_SetDefaultHandler(g_parser, accumulate_characters);
  2884. /* run_character_check uses XML_SetCharacterDataHandler(), which
  2885. * unfortunately heads off a code path that we need to exercise.
  2886. */
  2887. CharData_Init(&storage);
  2888. XML_SetUserData(g_parser, &storage);
  2889. if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
  2890. == XML_STATUS_ERROR)
  2891. xml_failure(g_parser);
  2892. /* The default handler doesn't translate the entities */
  2893. CharData_CheckXMLChars(&storage, expected);
  2894. /* Now try again and check the translation */
  2895. XML_ParserReset(g_parser, NULL);
  2896. run_character_check(text, result);
  2897. }
  2898. END_TEST
  2899. /* Regression test that an invalid tag in an external parameter
  2900. * reference in an external DTD is correctly faulted.
  2901. *
  2902. * Only a few specific tags are legal in DTDs ignoring comments and
  2903. * processing instructions, all of which begin with an exclamation
  2904. * mark. "<el/>" is not one of them, so the parser should raise an
  2905. * error on encountering it.
  2906. */
  2907. START_TEST(test_invalid_tag_in_dtd) {
  2908. const char *text = "<!DOCTYPE doc SYSTEM '004-1.ent'>\n"
  2909. "<doc></doc>\n";
  2910. XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
  2911. XML_SetExternalEntityRefHandler(g_parser, external_entity_param);
  2912. expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
  2913. "Invalid tag IN DTD external param not rejected");
  2914. }
  2915. END_TEST
  2916. /* Test entities not quite the predefined ones are not mis-recognised */
  2917. START_TEST(test_not_predefined_entities) {
  2918. const char *text[] = {"<doc>&pt;</doc>", "<doc>&amo;</doc>",
  2919. "<doc>&quid;</doc>", "<doc>&apod;</doc>", NULL};
  2920. int i = 0;
  2921. while (text[i] != NULL) {
  2922. expect_failure(text[i], XML_ERROR_UNDEFINED_ENTITY,
  2923. "Undefined entity not rejected");
  2924. XML_ParserReset(g_parser, NULL);
  2925. i++;
  2926. }
  2927. }
  2928. END_TEST
  2929. /* Test conditional inclusion (IGNORE) */
  2930. START_TEST(test_ignore_section) {
  2931. const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n"
  2932. "<doc><e>&entity;</e></doc>";
  2933. const XML_Char *expected
  2934. = XCS("<![IGNORE[<!ELEMENT e (#PCDATA)*>]]>\n&entity;");
  2935. CharData storage;
  2936. CharData_Init(&storage);
  2937. XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
  2938. XML_SetUserData(g_parser, &storage);
  2939. XML_SetExternalEntityRefHandler(g_parser, external_entity_load_ignore);
  2940. XML_SetDefaultHandler(g_parser, accumulate_characters);
  2941. XML_SetStartDoctypeDeclHandler(g_parser, dummy_start_doctype_handler);
  2942. XML_SetEndDoctypeDeclHandler(g_parser, dummy_end_doctype_handler);
  2943. XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
  2944. XML_SetStartElementHandler(g_parser, dummy_start_element);
  2945. XML_SetEndElementHandler(g_parser, dummy_end_element);
  2946. if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
  2947. == XML_STATUS_ERROR)
  2948. xml_failure(g_parser);
  2949. CharData_CheckXMLChars(&storage, expected);
  2950. }
  2951. END_TEST
  2952. START_TEST(test_ignore_section_utf16) {
  2953. const char text[] =
  2954. /* <!DOCTYPE d SYSTEM 's'> */
  2955. "<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 "
  2956. "\0S\0Y\0S\0T\0E\0M\0 \0'\0s\0'\0>\0\n\0"
  2957. /* <d><e>&en;</e></d> */
  2958. "<\0d\0>\0<\0e\0>\0&\0e\0n\0;\0<\0/\0e\0>\0<\0/\0d\0>\0";
  2959. const XML_Char *expected = XCS("<![IGNORE[<!ELEMENT e (#PCDATA)*>]]>\n&en;");
  2960. CharData storage;
  2961. CharData_Init(&storage);
  2962. XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
  2963. XML_SetUserData(g_parser, &storage);
  2964. XML_SetExternalEntityRefHandler(g_parser, external_entity_load_ignore_utf16);
  2965. XML_SetDefaultHandler(g_parser, accumulate_characters);
  2966. XML_SetStartDoctypeDeclHandler(g_parser, dummy_start_doctype_handler);
  2967. XML_SetEndDoctypeDeclHandler(g_parser, dummy_end_doctype_handler);
  2968. XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
  2969. XML_SetStartElementHandler(g_parser, dummy_start_element);
  2970. XML_SetEndElementHandler(g_parser, dummy_end_element);
  2971. if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
  2972. == XML_STATUS_ERROR)
  2973. xml_failure(g_parser);
  2974. CharData_CheckXMLChars(&storage, expected);
  2975. }
  2976. END_TEST
  2977. START_TEST(test_ignore_section_utf16_be) {
  2978. const char text[] =
  2979. /* <!DOCTYPE d SYSTEM 's'> */
  2980. "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 "
  2981. "\0S\0Y\0S\0T\0E\0M\0 \0'\0s\0'\0>\0\n"
  2982. /* <d><e>&en;</e></d> */
  2983. "\0<\0d\0>\0<\0e\0>\0&\0e\0n\0;\0<\0/\0e\0>\0<\0/\0d\0>";
  2984. const XML_Char *expected = XCS("<![IGNORE[<!ELEMENT e (#PCDATA)*>]]>\n&en;");
  2985. CharData storage;
  2986. CharData_Init(&storage);
  2987. XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
  2988. XML_SetUserData(g_parser, &storage);
  2989. XML_SetExternalEntityRefHandler(g_parser,
  2990. external_entity_load_ignore_utf16_be);
  2991. XML_SetDefaultHandler(g_parser, accumulate_characters);
  2992. XML_SetStartDoctypeDeclHandler(g_parser, dummy_start_doctype_handler);
  2993. XML_SetEndDoctypeDeclHandler(g_parser, dummy_end_doctype_handler);
  2994. XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
  2995. XML_SetStartElementHandler(g_parser, dummy_start_element);
  2996. XML_SetEndElementHandler(g_parser, dummy_end_element);
  2997. if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
  2998. == XML_STATUS_ERROR)
  2999. xml_failure(g_parser);
  3000. CharData_CheckXMLChars(&storage, expected);
  3001. }
  3002. END_TEST
  3003. /* Test mis-formatted conditional exclusion */
  3004. START_TEST(test_bad_ignore_section) {
  3005. const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n"
  3006. "<doc><e>&entity;</e></doc>";
  3007. ExtFaults faults[]
  3008. = {{"<![IGNORE[<!ELEM", "Broken-off declaration not faulted", NULL,
  3009. XML_ERROR_SYNTAX},
  3010. {"<![IGNORE[\x01]]>", "Invalid XML character not faulted", NULL,
  3011. XML_ERROR_INVALID_TOKEN},
  3012. {/* FIrst two bytes of a three-byte char */
  3013. "<![IGNORE[\xe2\x82", "Partial XML character not faulted", NULL,
  3014. XML_ERROR_PARTIAL_CHAR},
  3015. {NULL, NULL, NULL, XML_ERROR_NONE}};
  3016. ExtFaults *fault;
  3017. for (fault = &faults[0]; fault->parse_text != NULL; fault++) {
  3018. set_subtest("%s", fault->parse_text);
  3019. XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
  3020. XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
  3021. XML_SetUserData(g_parser, fault);
  3022. expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
  3023. "Incomplete IGNORE section not failed");
  3024. XML_ParserReset(g_parser, NULL);
  3025. }
  3026. }
  3027. END_TEST
  3028. struct bom_testdata {
  3029. const char *external;
  3030. int split;
  3031. XML_Bool nested_callback_happened;
  3032. };
  3033. static int XMLCALL
  3034. external_bom_checker(XML_Parser parser, const XML_Char *context,
  3035. const XML_Char *base, const XML_Char *systemId,
  3036. const XML_Char *publicId) {
  3037. const char *text;
  3038. UNUSED_P(base);
  3039. UNUSED_P(systemId);
  3040. UNUSED_P(publicId);
  3041. XML_Parser ext_parser = XML_ExternalEntityParserCreate(parser, context, NULL);
  3042. if (ext_parser == NULL)
  3043. fail("Could not create external entity parser");
  3044. if (! xcstrcmp(systemId, XCS("004-2.ent"))) {
  3045. struct bom_testdata *const testdata
  3046. = (struct bom_testdata *)XML_GetUserData(parser);
  3047. const char *const external = testdata->external;
  3048. const int split = testdata->split;
  3049. testdata->nested_callback_happened = XML_TRUE;
  3050. if (_XML_Parse_SINGLE_BYTES(ext_parser, external, split, XML_FALSE)
  3051. != XML_STATUS_OK) {
  3052. xml_failure(ext_parser);
  3053. }
  3054. text = external + split; // the parse below will continue where we left off.
  3055. } else if (! xcstrcmp(systemId, XCS("004-1.ent"))) {
  3056. text = "<!ELEMENT doc EMPTY>\n"
  3057. "<!ENTITY % e1 SYSTEM '004-2.ent'>\n"
  3058. "<!ENTITY % e2 '%e1;'>\n";
  3059. } else {
  3060. fail("unknown systemId");
  3061. }
  3062. if (_XML_Parse_SINGLE_BYTES(ext_parser, text, (int)strlen(text), XML_TRUE)
  3063. != XML_STATUS_OK)
  3064. xml_failure(ext_parser);
  3065. XML_ParserFree(ext_parser);
  3066. return XML_STATUS_OK;
  3067. }
  3068. /* regression test: BOM should be consumed when followed by a partial token. */
  3069. START_TEST(test_external_bom_consumed) {
  3070. const char *const text = "<!DOCTYPE doc SYSTEM '004-1.ent'>\n"
  3071. "<doc></doc>\n";
  3072. const char *const external = "\xEF\xBB\xBF<!ATTLIST doc a1 CDATA 'value'>";
  3073. const int len = (int)strlen(external);
  3074. for (int split = 0; split <= len; ++split) {
  3075. set_subtest("split at byte %d", split);
  3076. struct bom_testdata testdata;
  3077. testdata.external = external;
  3078. testdata.split = split;
  3079. testdata.nested_callback_happened = XML_FALSE;
  3080. XML_Parser parser = XML_ParserCreate(NULL);
  3081. if (parser == NULL) {
  3082. fail("Couldn't create parser");
  3083. }
  3084. XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
  3085. XML_SetExternalEntityRefHandler(parser, external_bom_checker);
  3086. XML_SetUserData(parser, &testdata);
  3087. if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
  3088. == XML_STATUS_ERROR)
  3089. xml_failure(parser);
  3090. if (! testdata.nested_callback_happened) {
  3091. fail("ref handler not called");
  3092. }
  3093. XML_ParserFree(parser);
  3094. }
  3095. }
  3096. END_TEST
  3097. /* Test recursive parsing */
  3098. START_TEST(test_external_entity_values) {
  3099. const char *text = "<!DOCTYPE doc SYSTEM '004-1.ent'>\n"
  3100. "<doc></doc>\n";
  3101. ExtFaults data_004_2[] = {
  3102. {"<!ATTLIST doc a1 CDATA 'value'>", NULL, NULL, XML_ERROR_NONE},
  3103. {"<!ATTLIST $doc a1 CDATA 'value'>", "Invalid token not faulted", NULL,
  3104. XML_ERROR_INVALID_TOKEN},
  3105. {"'wombat", "Unterminated string not faulted", NULL,
  3106. XML_ERROR_UNCLOSED_TOKEN},
  3107. {"\xe2\x82", "Partial UTF-8 character not faulted", NULL,
  3108. XML_ERROR_PARTIAL_CHAR},
  3109. {"<?xml version='1.0' encoding='utf-8'?>\n", NULL, NULL, XML_ERROR_NONE},
  3110. {"<?xml?>", "Malformed XML declaration not faulted", NULL,
  3111. XML_ERROR_XML_DECL},
  3112. {/* UTF-8 BOM */
  3113. "\xEF\xBB\xBF<!ATTLIST doc a1 CDATA 'value'>", NULL, NULL,
  3114. XML_ERROR_NONE},
  3115. {"<?xml version='1.0' encoding='utf-8'?>\n$",
  3116. "Invalid token after text declaration not faulted", NULL,
  3117. XML_ERROR_INVALID_TOKEN},
  3118. {"<?xml version='1.0' encoding='utf-8'?>\n'wombat",
  3119. "Unterminated string after text decl not faulted", NULL,
  3120. XML_ERROR_UNCLOSED_TOKEN},
  3121. {"<?xml version='1.0' encoding='utf-8'?>\n\xe2\x82",
  3122. "Partial UTF-8 character after text decl not faulted", NULL,
  3123. XML_ERROR_PARTIAL_CHAR},
  3124. {"%e1;", "Recursive parameter entity not faulted", NULL,
  3125. XML_ERROR_RECURSIVE_ENTITY_REF},
  3126. {NULL, NULL, NULL, XML_ERROR_NONE}};
  3127. int i;
  3128. for (i = 0; data_004_2[i].parse_text != NULL; i++) {
  3129. set_subtest("%s", data_004_2[i].parse_text);
  3130. XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
  3131. XML_SetExternalEntityRefHandler(g_parser, external_entity_valuer);
  3132. XML_SetUserData(g_parser, &data_004_2[i]);
  3133. if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
  3134. == XML_STATUS_ERROR)
  3135. xml_failure(g_parser);
  3136. XML_ParserReset(g_parser, NULL);
  3137. }
  3138. }
  3139. END_TEST
  3140. /* Test the recursive parse interacts with a not standalone handler */
  3141. START_TEST(test_ext_entity_not_standalone) {
  3142. const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n"
  3143. "<doc></doc>";
  3144. XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
  3145. XML_SetExternalEntityRefHandler(g_parser, external_entity_not_standalone);
  3146. expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
  3147. "Standalone rejection not caught");
  3148. }
  3149. END_TEST
  3150. START_TEST(test_ext_entity_value_abort) {
  3151. const char *text = "<!DOCTYPE doc SYSTEM '004-1.ent'>\n"
  3152. "<doc></doc>\n";
  3153. XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
  3154. XML_SetExternalEntityRefHandler(g_parser, external_entity_value_aborter);
  3155. g_resumable = XML_FALSE;
  3156. if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
  3157. == XML_STATUS_ERROR)
  3158. xml_failure(g_parser);
  3159. }
  3160. END_TEST
  3161. START_TEST(test_bad_public_doctype) {
  3162. const char *text = "<?xml version='1.0' encoding='utf-8'?>\n"
  3163. "<!DOCTYPE doc PUBLIC '{BadName}' 'test'>\n"
  3164. "<doc></doc>";
  3165. /* Setting a handler provokes a particular code path */
  3166. XML_SetDoctypeDeclHandler(g_parser, dummy_start_doctype_handler,
  3167. dummy_end_doctype_handler);
  3168. expect_failure(text, XML_ERROR_PUBLICID, "Bad Public ID not failed");
  3169. }
  3170. END_TEST
  3171. /* Test based on ibm/valid/P32/ibm32v04.xml */
  3172. START_TEST(test_attribute_enum_value) {
  3173. const char *text = "<?xml version='1.0' standalone='no'?>\n"
  3174. "<!DOCTYPE animal SYSTEM 'test.dtd'>\n"
  3175. "<animal>This is a \n <a/> \n\nyellow tiger</animal>";
  3176. ExtTest dtd_data
  3177. = {"<!ELEMENT animal (#PCDATA|a)*>\n"
  3178. "<!ELEMENT a EMPTY>\n"
  3179. "<!ATTLIST animal xml:space (default|preserve) 'preserve'>",
  3180. NULL, NULL};
  3181. const XML_Char *expected = XCS("This is a \n \n\nyellow tiger");
  3182. XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
  3183. XML_SetUserData(g_parser, &dtd_data);
  3184. XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
  3185. /* An attribute list handler provokes a different code path */
  3186. XML_SetAttlistDeclHandler(g_parser, dummy_attlist_decl_handler);
  3187. run_ext_character_check(text, &dtd_data, expected);
  3188. }
  3189. END_TEST
  3190. /* Slightly bizarrely, the library seems to silently ignore entity
  3191. * definitions for predefined entities, even when they are wrong. The
  3192. * language of the XML 1.0 spec is somewhat unhelpful as to what ought
  3193. * to happen, so this is currently treated as acceptable.
  3194. */
  3195. START_TEST(test_predefined_entity_redefinition) {
  3196. const char *text = "<!DOCTYPE doc [\n"
  3197. "<!ENTITY apos 'foo'>\n"
  3198. "]>\n"
  3199. "<doc>&apos;</doc>";
  3200. run_character_check(text, XCS("'"));
  3201. }
  3202. END_TEST
  3203. /* Test that the parser stops processing the DTD after an unresolved
  3204. * parameter entity is encountered.
  3205. */
  3206. START_TEST(test_dtd_stop_processing) {
  3207. const char *text = "<!DOCTYPE doc [\n"
  3208. "%foo;\n"
  3209. "<!ENTITY bar 'bas'>\n"
  3210. "]><doc/>";
  3211. XML_SetEntityDeclHandler(g_parser, dummy_entity_decl_handler);
  3212. init_dummy_handlers();
  3213. if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
  3214. == XML_STATUS_ERROR)
  3215. xml_failure(g_parser);
  3216. if (get_dummy_handler_flags() != 0)
  3217. fail("DTD processing still going after undefined PE");
  3218. }
  3219. END_TEST
  3220. /* Test public notations with no system ID */
  3221. START_TEST(test_public_notation_no_sysid) {
  3222. const char *text = "<!DOCTYPE doc [\n"
  3223. "<!NOTATION note PUBLIC 'foo'>\n"
  3224. "<!ELEMENT doc EMPTY>\n"
  3225. "]>\n<doc/>";
  3226. init_dummy_handlers();
  3227. XML_SetNotationDeclHandler(g_parser, dummy_notation_decl_handler);
  3228. if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
  3229. == XML_STATUS_ERROR)
  3230. xml_failure(g_parser);
  3231. if (get_dummy_handler_flags() != DUMMY_NOTATION_DECL_HANDLER_FLAG)
  3232. fail("Notation declaration handler not called");
  3233. }
  3234. END_TEST
  3235. START_TEST(test_nested_groups) {
  3236. const char *text
  3237. = "<!DOCTYPE doc [\n"
  3238. "<!ELEMENT doc "
  3239. /* Sixteen elements per line */
  3240. "(e,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,"
  3241. "(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?"
  3242. "))))))))))))))))))))))))))))))))>\n"
  3243. "<!ELEMENT e EMPTY>"
  3244. "]>\n"
  3245. "<doc><e/></doc>";
  3246. CharData storage;
  3247. CharData_Init(&storage);
  3248. XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
  3249. XML_SetStartElementHandler(g_parser, record_element_start_handler);
  3250. XML_SetUserData(g_parser, &storage);
  3251. init_dummy_handlers();
  3252. if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
  3253. == XML_STATUS_ERROR)
  3254. xml_failure(g_parser);
  3255. CharData_CheckXMLChars(&storage, XCS("doce"));
  3256. if (get_dummy_handler_flags() != DUMMY_ELEMENT_DECL_HANDLER_FLAG)
  3257. fail("Element handler not fired");
  3258. }
  3259. END_TEST
  3260. START_TEST(test_group_choice) {
  3261. const char *text = "<!DOCTYPE doc [\n"
  3262. "<!ELEMENT doc (a|b|c)+>\n"
  3263. "<!ELEMENT a EMPTY>\n"
  3264. "<!ELEMENT b (#PCDATA)>\n"
  3265. "<!ELEMENT c ANY>\n"
  3266. "]>\n"
  3267. "<doc>\n"
  3268. "<a/>\n"
  3269. "<b attr='foo'>This is a foo</b>\n"
  3270. "<c></c>\n"
  3271. "</doc>\n";
  3272. XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
  3273. init_dummy_handlers();
  3274. if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
  3275. == XML_STATUS_ERROR)
  3276. xml_failure(g_parser);
  3277. if (get_dummy_handler_flags() != DUMMY_ELEMENT_DECL_HANDLER_FLAG)
  3278. fail("Element handler flag not raised");
  3279. }
  3280. END_TEST
  3281. START_TEST(test_standalone_parameter_entity) {
  3282. const char *text = "<?xml version='1.0' standalone='yes'?>\n"
  3283. "<!DOCTYPE doc SYSTEM 'http://example.org/' [\n"
  3284. "<!ENTITY % entity '<!ELEMENT doc (#PCDATA)>'>\n"
  3285. "%entity;\n"
  3286. "]>\n"
  3287. "<doc></doc>";
  3288. char dtd_data[] = "<!ENTITY % e1 'foo'>\n";
  3289. XML_SetUserData(g_parser, dtd_data);
  3290. XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
  3291. XML_SetExternalEntityRefHandler(g_parser, external_entity_public);
  3292. if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
  3293. == XML_STATUS_ERROR)
  3294. xml_failure(g_parser);
  3295. }
  3296. END_TEST
  3297. /* Test skipping of parameter entity in an external DTD */
  3298. /* Derived from ibm/invalid/P69/ibm69i01.xml */
  3299. START_TEST(test_skipped_parameter_entity) {
  3300. const char *text = "<?xml version='1.0'?>\n"
  3301. "<!DOCTYPE root SYSTEM 'http://example.org/dtd.ent' [\n"
  3302. "<!ELEMENT root (#PCDATA|a)* >\n"
  3303. "]>\n"
  3304. "<root></root>";
  3305. ExtTest dtd_data = {"%pe2;", NULL, NULL};
  3306. XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
  3307. XML_SetUserData(g_parser, &dtd_data);
  3308. XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
  3309. XML_SetSkippedEntityHandler(g_parser, dummy_skip_handler);
  3310. init_dummy_handlers();
  3311. if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
  3312. == XML_STATUS_ERROR)
  3313. xml_failure(g_parser);
  3314. if (get_dummy_handler_flags() != DUMMY_SKIP_HANDLER_FLAG)
  3315. fail("Skip handler not executed");
  3316. }
  3317. END_TEST
  3318. /* Test recursive parameter entity definition rejected in external DTD */
  3319. START_TEST(test_recursive_external_parameter_entity) {
  3320. const char *text = "<?xml version='1.0'?>\n"
  3321. "<!DOCTYPE root SYSTEM 'http://example.org/dtd.ent' [\n"
  3322. "<!ELEMENT root (#PCDATA|a)* >\n"
  3323. "]>\n"
  3324. "<root></root>";
  3325. ExtFaults dtd_data = {"<!ENTITY % pe2 '&#37;pe2;'>\n%pe2;",
  3326. "Recursive external parameter entity not faulted", NULL,
  3327. XML_ERROR_RECURSIVE_ENTITY_REF};
  3328. XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
  3329. XML_SetUserData(g_parser, &dtd_data);
  3330. XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
  3331. expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
  3332. "Recursive external parameter not spotted");
  3333. }
  3334. END_TEST
  3335. /* Test undefined parameter entity in external entity handler */
  3336. START_TEST(test_undefined_ext_entity_in_external_dtd) {
  3337. const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n"
  3338. "<doc></doc>\n";
  3339. XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
  3340. XML_SetExternalEntityRefHandler(g_parser, external_entity_devaluer);
  3341. XML_SetUserData(g_parser, NULL);
  3342. if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
  3343. == XML_STATUS_ERROR)
  3344. xml_failure(g_parser);
  3345. /* Now repeat without the external entity ref handler invoking
  3346. * another copy of itself.
  3347. */
  3348. XML_ParserReset(g_parser, NULL);
  3349. XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
  3350. XML_SetExternalEntityRefHandler(g_parser, external_entity_devaluer);
  3351. XML_SetUserData(g_parser, g_parser); /* Any non-NULL value will do */
  3352. if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
  3353. == XML_STATUS_ERROR)
  3354. xml_failure(g_parser);
  3355. }
  3356. END_TEST
  3357. /* Test suspending the parse on receiving an XML declaration works */
  3358. START_TEST(test_suspend_xdecl) {
  3359. const char *text = long_character_data_text;
  3360. XML_SetXmlDeclHandler(g_parser, entity_suspending_xdecl_handler);
  3361. XML_SetUserData(g_parser, g_parser);
  3362. g_resumable = XML_TRUE;
  3363. // can't use SINGLE_BYTES here, because it'll return early on suspension, and
  3364. // we won't know exactly how much input we actually managed to give Expat.
  3365. if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
  3366. != XML_STATUS_SUSPENDED)
  3367. xml_failure(g_parser);
  3368. if (XML_GetErrorCode(g_parser) != XML_ERROR_NONE)
  3369. xml_failure(g_parser);
  3370. /* Attempt to start a new parse while suspended */
  3371. if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
  3372. != XML_STATUS_ERROR)
  3373. fail("Attempt to parse while suspended not faulted");
  3374. if (XML_GetErrorCode(g_parser) != XML_ERROR_SUSPENDED)
  3375. fail("Suspended parse not faulted with correct error");
  3376. }
  3377. END_TEST
  3378. /* Test aborting the parse in an epilog works */
  3379. START_TEST(test_abort_epilog) {
  3380. const char *text = "<doc></doc>\n\r\n";
  3381. XML_Char trigger_char = XCS('\r');
  3382. XML_SetDefaultHandler(g_parser, selective_aborting_default_handler);
  3383. XML_SetUserData(g_parser, &trigger_char);
  3384. g_resumable = XML_FALSE;
  3385. if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
  3386. != XML_STATUS_ERROR)
  3387. fail("Abort not triggered");
  3388. if (XML_GetErrorCode(g_parser) != XML_ERROR_ABORTED)
  3389. xml_failure(g_parser);
  3390. }
  3391. END_TEST
  3392. /* Test a different code path for abort in the epilog */
  3393. START_TEST(test_abort_epilog_2) {
  3394. const char *text = "<doc></doc>\n";
  3395. XML_Char trigger_char = XCS('\n');
  3396. XML_SetDefaultHandler(g_parser, selective_aborting_default_handler);
  3397. XML_SetUserData(g_parser, &trigger_char);
  3398. g_resumable = XML_FALSE;
  3399. expect_failure(text, XML_ERROR_ABORTED, "Abort not triggered");
  3400. }
  3401. END_TEST
  3402. /* Test suspension from the epilog */
  3403. START_TEST(test_suspend_epilog) {
  3404. const char *text = "<doc></doc>\n";
  3405. XML_Char trigger_char = XCS('\n');
  3406. XML_SetDefaultHandler(g_parser, selective_aborting_default_handler);
  3407. XML_SetUserData(g_parser, &trigger_char);
  3408. g_resumable = XML_TRUE;
  3409. if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
  3410. != XML_STATUS_SUSPENDED)
  3411. xml_failure(g_parser);
  3412. }
  3413. END_TEST
  3414. START_TEST(test_suspend_in_sole_empty_tag) {
  3415. const char *text = "<doc/>";
  3416. enum XML_Status rc;
  3417. XML_SetEndElementHandler(g_parser, suspending_end_handler);
  3418. XML_SetUserData(g_parser, g_parser);
  3419. rc = _XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE);
  3420. if (rc == XML_STATUS_ERROR)
  3421. xml_failure(g_parser);
  3422. else if (rc != XML_STATUS_SUSPENDED)
  3423. fail("Suspend not triggered");
  3424. rc = XML_ResumeParser(g_parser);
  3425. if (rc == XML_STATUS_ERROR)
  3426. xml_failure(g_parser);
  3427. else if (rc != XML_STATUS_OK)
  3428. fail("Resume failed");
  3429. }
  3430. END_TEST
  3431. START_TEST(test_unfinished_epilog) {
  3432. const char *text = "<doc></doc><";
  3433. expect_failure(text, XML_ERROR_UNCLOSED_TOKEN,
  3434. "Incomplete epilog entry not faulted");
  3435. }
  3436. END_TEST
  3437. START_TEST(test_partial_char_in_epilog) {
  3438. const char *text = "<doc></doc>\xe2\x82";
  3439. /* First check that no fault is raised if the parse is not finished */
  3440. if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_FALSE)
  3441. == XML_STATUS_ERROR)
  3442. xml_failure(g_parser);
  3443. /* Now check that it is faulted once we finish */
  3444. if (XML_ParseBuffer(g_parser, 0, XML_TRUE) != XML_STATUS_ERROR)
  3445. fail("Partial character in epilog not faulted");
  3446. if (XML_GetErrorCode(g_parser) != XML_ERROR_PARTIAL_CHAR)
  3447. xml_failure(g_parser);
  3448. }
  3449. END_TEST
  3450. /* Test resuming a parse suspended in entity substitution */
  3451. START_TEST(test_suspend_resume_internal_entity) {
  3452. const char *text
  3453. = "<!DOCTYPE doc [\n"
  3454. "<!ENTITY foo '<suspend>Hi<suspend>Ho</suspend></suspend>'>\n"
  3455. "]>\n"
  3456. "<doc>&foo;</doc>\n";
  3457. const XML_Char *expected1 = XCS("Hi");
  3458. const XML_Char *expected2 = XCS("HiHo");
  3459. CharData storage;
  3460. CharData_Init(&storage);
  3461. XML_SetStartElementHandler(g_parser, start_element_suspender);
  3462. XML_SetCharacterDataHandler(g_parser, accumulate_characters);
  3463. XML_SetUserData(g_parser, &storage);
  3464. // can't use SINGLE_BYTES here, because it'll return early on suspension, and
  3465. // we won't know exactly how much input we actually managed to give Expat.
  3466. if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
  3467. != XML_STATUS_SUSPENDED)
  3468. xml_failure(g_parser);
  3469. CharData_CheckXMLChars(&storage, XCS(""));
  3470. if (XML_ResumeParser(g_parser) != XML_STATUS_SUSPENDED)
  3471. xml_failure(g_parser);
  3472. CharData_CheckXMLChars(&storage, expected1);
  3473. if (XML_ResumeParser(g_parser) != XML_STATUS_OK)
  3474. xml_failure(g_parser);
  3475. CharData_CheckXMLChars(&storage, expected2);
  3476. }
  3477. END_TEST
  3478. START_TEST(test_suspend_resume_internal_entity_issue_629) {
  3479. const char *const text
  3480. = "<!DOCTYPE a [<!ENTITY e '<!--COMMENT-->a'>]><a>&e;<b>\n"
  3481. "<"
  3482. "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
  3483. "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
  3484. "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
  3485. "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
  3486. "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
  3487. "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
  3488. "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
  3489. "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
  3490. "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
  3491. "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
  3492. "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
  3493. "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
  3494. "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
  3495. "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
  3496. "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
  3497. "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
  3498. "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
  3499. "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
  3500. "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
  3501. "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
  3502. "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
  3503. "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
  3504. "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
  3505. "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
  3506. "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
  3507. "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
  3508. "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
  3509. "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
  3510. "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
  3511. "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
  3512. "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
  3513. "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
  3514. "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
  3515. "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
  3516. "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
  3517. "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
  3518. "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
  3519. "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
  3520. "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
  3521. "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
  3522. "/>"
  3523. "</b></a>";
  3524. const size_t firstChunkSizeBytes = 54;
  3525. XML_Parser parser = XML_ParserCreate(NULL);
  3526. XML_SetUserData(parser, parser);
  3527. XML_SetCommentHandler(parser, suspending_comment_handler);
  3528. if (XML_Parse(parser, text, (int)firstChunkSizeBytes, XML_FALSE)
  3529. != XML_STATUS_SUSPENDED)
  3530. xml_failure(parser);
  3531. if (XML_ResumeParser(parser) != XML_STATUS_OK)
  3532. xml_failure(parser);
  3533. if (_XML_Parse_SINGLE_BYTES(parser, text + firstChunkSizeBytes,
  3534. (int)(strlen(text) - firstChunkSizeBytes),
  3535. XML_TRUE)
  3536. != XML_STATUS_OK)
  3537. xml_failure(parser);
  3538. XML_ParserFree(parser);
  3539. }
  3540. END_TEST
  3541. /* Test syntax error is caught at parse resumption */
  3542. START_TEST(test_resume_entity_with_syntax_error) {
  3543. if (g_chunkSize != 0) {
  3544. // this test does not use SINGLE_BYTES, because of suspension
  3545. return;
  3546. }
  3547. const char *text = "<!DOCTYPE doc [\n"
  3548. "<!ENTITY foo '<suspend>Hi</wombat>'>\n"
  3549. "]>\n"
  3550. "<doc>&foo;</doc>\n";
  3551. XML_SetStartElementHandler(g_parser, start_element_suspender);
  3552. // can't use SINGLE_BYTES here, because it'll return early on suspension, and
  3553. // we won't know exactly how much input we actually managed to give Expat.
  3554. if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
  3555. != XML_STATUS_SUSPENDED)
  3556. xml_failure(g_parser);
  3557. if (XML_ResumeParser(g_parser) != XML_STATUS_ERROR)
  3558. fail("Syntax error in entity not faulted");
  3559. if (XML_GetErrorCode(g_parser) != XML_ERROR_TAG_MISMATCH)
  3560. xml_failure(g_parser);
  3561. }
  3562. END_TEST
  3563. /* Test suspending and resuming in a parameter entity substitution */
  3564. START_TEST(test_suspend_resume_parameter_entity) {
  3565. const char *text = "<!DOCTYPE doc [\n"
  3566. "<!ENTITY % foo '<!ELEMENT doc (#PCDATA)*>'>\n"
  3567. "%foo;\n"
  3568. "]>\n"
  3569. "<doc>Hello, world</doc>";
  3570. const XML_Char *expected = XCS("Hello, world");
  3571. CharData storage;
  3572. CharData_Init(&storage);
  3573. XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
  3574. XML_SetElementDeclHandler(g_parser, element_decl_suspender);
  3575. XML_SetCharacterDataHandler(g_parser, accumulate_characters);
  3576. XML_SetUserData(g_parser, &storage);
  3577. if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
  3578. != XML_STATUS_SUSPENDED)
  3579. xml_failure(g_parser);
  3580. CharData_CheckXMLChars(&storage, XCS(""));
  3581. if (XML_ResumeParser(g_parser) != XML_STATUS_OK)
  3582. xml_failure(g_parser);
  3583. CharData_CheckXMLChars(&storage, expected);
  3584. }
  3585. END_TEST
  3586. /* Test attempting to use parser after an error is faulted */
  3587. START_TEST(test_restart_on_error) {
  3588. const char *text = "<$doc><doc></doc>";
  3589. if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
  3590. != XML_STATUS_ERROR)
  3591. fail("Invalid tag name not faulted");
  3592. if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN)
  3593. xml_failure(g_parser);
  3594. if (XML_Parse(g_parser, NULL, 0, XML_TRUE) != XML_STATUS_ERROR)
  3595. fail("Restarting invalid parse not faulted");
  3596. if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN)
  3597. xml_failure(g_parser);
  3598. }
  3599. END_TEST
  3600. /* Test that angle brackets in an attribute default value are faulted */
  3601. START_TEST(test_reject_lt_in_attribute_value) {
  3602. const char *text = "<!DOCTYPE doc [<!ATTLIST doc a CDATA '<bar>'>]>\n"
  3603. "<doc></doc>";
  3604. expect_failure(text, XML_ERROR_INVALID_TOKEN,
  3605. "Bad attribute default not faulted");
  3606. }
  3607. END_TEST
  3608. START_TEST(test_reject_unfinished_param_in_att_value) {
  3609. const char *text = "<!DOCTYPE doc [<!ATTLIST doc a CDATA '&foo'>]>\n"
  3610. "<doc></doc>";
  3611. expect_failure(text, XML_ERROR_INVALID_TOKEN,
  3612. "Bad attribute default not faulted");
  3613. }
  3614. END_TEST
  3615. START_TEST(test_trailing_cr_in_att_value) {
  3616. const char *text = "<doc a='value\r'/>";
  3617. if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
  3618. == XML_STATUS_ERROR)
  3619. xml_failure(g_parser);
  3620. }
  3621. END_TEST
  3622. /* Try parsing a general entity within a parameter entity in a
  3623. * standalone internal DTD. Covers a corner case in the parser.
  3624. */
  3625. START_TEST(test_standalone_internal_entity) {
  3626. const char *text = "<?xml version='1.0' standalone='yes' ?>\n"
  3627. "<!DOCTYPE doc [\n"
  3628. " <!ELEMENT doc (#PCDATA)>\n"
  3629. " <!ENTITY % pe '<!ATTLIST doc att2 CDATA \"&ge;\">'>\n"
  3630. " <!ENTITY ge 'AttDefaultValue'>\n"
  3631. " %pe;\n"
  3632. "]>\n"
  3633. "<doc att2='any'/>";
  3634. XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
  3635. if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
  3636. == XML_STATUS_ERROR)
  3637. xml_failure(g_parser);
  3638. }
  3639. END_TEST
  3640. /* Test that a reference to an unknown external entity is skipped */
  3641. START_TEST(test_skipped_external_entity) {
  3642. const char *text = "<!DOCTYPE doc SYSTEM 'http://example.org/'>\n"
  3643. "<doc></doc>\n";
  3644. ExtTest test_data = {"<!ELEMENT doc EMPTY>\n"
  3645. "<!ENTITY % e2 '%e1;'>\n",
  3646. NULL, NULL};
  3647. XML_SetUserData(g_parser, &test_data);
  3648. XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
  3649. XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
  3650. if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
  3651. == XML_STATUS_ERROR)
  3652. xml_failure(g_parser);
  3653. }
  3654. END_TEST
  3655. /* Test a different form of unknown external entity */
  3656. START_TEST(test_skipped_null_loaded_ext_entity) {
  3657. const char *text = "<!DOCTYPE doc SYSTEM 'http://example.org/one.ent'>\n"
  3658. "<doc />";
  3659. ExtHdlrData test_data
  3660. = {"<!ENTITY % pe1 SYSTEM 'http://example.org/two.ent'>\n"
  3661. "<!ENTITY % pe2 '%pe1;'>\n"
  3662. "%pe2;\n",
  3663. external_entity_null_loader, NULL};
  3664. XML_SetUserData(g_parser, &test_data);
  3665. XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
  3666. XML_SetExternalEntityRefHandler(g_parser, external_entity_oneshot_loader);
  3667. if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
  3668. == XML_STATUS_ERROR)
  3669. xml_failure(g_parser);
  3670. }
  3671. END_TEST
  3672. START_TEST(test_skipped_unloaded_ext_entity) {
  3673. const char *text = "<!DOCTYPE doc SYSTEM 'http://example.org/one.ent'>\n"
  3674. "<doc />";
  3675. ExtHdlrData test_data
  3676. = {"<!ENTITY % pe1 SYSTEM 'http://example.org/two.ent'>\n"
  3677. "<!ENTITY % pe2 '%pe1;'>\n"
  3678. "%pe2;\n",
  3679. NULL, NULL};
  3680. XML_SetUserData(g_parser, &test_data);
  3681. XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
  3682. XML_SetExternalEntityRefHandler(g_parser, external_entity_oneshot_loader);
  3683. if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
  3684. == XML_STATUS_ERROR)
  3685. xml_failure(g_parser);
  3686. }
  3687. END_TEST
  3688. /* Test that a parameter entity value ending with a carriage return
  3689. * has it translated internally into a newline.
  3690. */
  3691. START_TEST(test_param_entity_with_trailing_cr) {
  3692. #define PARAM_ENTITY_NAME "pe"
  3693. #define PARAM_ENTITY_CORE_VALUE "<!ATTLIST doc att CDATA \"default\">"
  3694. const char *text = "<!DOCTYPE doc SYSTEM 'http://example.org/'>\n"
  3695. "<doc/>";
  3696. ExtTest test_data
  3697. = {"<!ENTITY % " PARAM_ENTITY_NAME " '" PARAM_ENTITY_CORE_VALUE "\r'>\n"
  3698. "%" PARAM_ENTITY_NAME ";\n",
  3699. NULL, NULL};
  3700. XML_SetUserData(g_parser, &test_data);
  3701. XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
  3702. XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
  3703. XML_SetEntityDeclHandler(g_parser, param_entity_match_handler);
  3704. param_entity_match_init(XCS(PARAM_ENTITY_NAME),
  3705. XCS(PARAM_ENTITY_CORE_VALUE) XCS("\n"));
  3706. if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
  3707. == XML_STATUS_ERROR)
  3708. xml_failure(g_parser);
  3709. int entity_match_flag = get_param_entity_match_flag();
  3710. if (entity_match_flag == ENTITY_MATCH_FAIL)
  3711. fail("Parameter entity CR->NEWLINE conversion failed");
  3712. else if (entity_match_flag == ENTITY_MATCH_NOT_FOUND)
  3713. fail("Parameter entity not parsed");
  3714. }
  3715. #undef PARAM_ENTITY_NAME
  3716. #undef PARAM_ENTITY_CORE_VALUE
  3717. END_TEST
  3718. START_TEST(test_invalid_character_entity) {
  3719. const char *text = "<!DOCTYPE doc [\n"
  3720. " <!ENTITY entity '&#x110000;'>\n"
  3721. "]>\n"
  3722. "<doc>&entity;</doc>";
  3723. expect_failure(text, XML_ERROR_BAD_CHAR_REF,
  3724. "Out of range character reference not faulted");
  3725. }
  3726. END_TEST
  3727. START_TEST(test_invalid_character_entity_2) {
  3728. const char *text = "<!DOCTYPE doc [\n"
  3729. " <!ENTITY entity '&#xg0;'>\n"
  3730. "]>\n"
  3731. "<doc>&entity;</doc>";
  3732. expect_failure(text, XML_ERROR_INVALID_TOKEN,
  3733. "Out of range character reference not faulted");
  3734. }
  3735. END_TEST
  3736. START_TEST(test_invalid_character_entity_3) {
  3737. const char text[] =
  3738. /* <!DOCTYPE doc [\n */
  3739. "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0o\0c\0 \0[\0\n"
  3740. /* U+0E04 = KHO KHWAI
  3741. * U+0E08 = CHO CHAN */
  3742. /* <!ENTITY entity '&\u0e04\u0e08;'>\n */
  3743. "\0<\0!\0E\0N\0T\0I\0T\0Y\0 \0e\0n\0t\0i\0t\0y\0 "
  3744. "\0'\0&\x0e\x04\x0e\x08\0;\0'\0>\0\n"
  3745. /* ]>\n */
  3746. "\0]\0>\0\n"
  3747. /* <doc>&entity;</doc> */
  3748. "\0<\0d\0o\0c\0>\0&\0e\0n\0t\0i\0t\0y\0;\0<\0/\0d\0o\0c\0>";
  3749. if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
  3750. != XML_STATUS_ERROR)
  3751. fail("Invalid start of entity name not faulted");
  3752. if (XML_GetErrorCode(g_parser) != XML_ERROR_UNDEFINED_ENTITY)
  3753. xml_failure(g_parser);
  3754. }
  3755. END_TEST
  3756. START_TEST(test_invalid_character_entity_4) {
  3757. const char *text = "<!DOCTYPE doc [\n"
  3758. " <!ENTITY entity '&#1114112;'>\n" /* = &#x110000 */
  3759. "]>\n"
  3760. "<doc>&entity;</doc>";
  3761. expect_failure(text, XML_ERROR_BAD_CHAR_REF,
  3762. "Out of range character reference not faulted");
  3763. }
  3764. END_TEST
  3765. /* Test that processing instructions are picked up by a default handler */
  3766. START_TEST(test_pi_handled_in_default) {
  3767. const char *text = "<?test processing instruction?>\n<doc/>";
  3768. const XML_Char *expected = XCS("<?test processing instruction?>\n<doc/>");
  3769. CharData storage;
  3770. CharData_Init(&storage);
  3771. XML_SetDefaultHandler(g_parser, accumulate_characters);
  3772. XML_SetUserData(g_parser, &storage);
  3773. if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
  3774. == XML_STATUS_ERROR)
  3775. xml_failure(g_parser);
  3776. CharData_CheckXMLChars(&storage, expected);
  3777. }
  3778. END_TEST
  3779. /* Test that comments are picked up by a default handler */
  3780. START_TEST(test_comment_handled_in_default) {
  3781. const char *text = "<!-- This is a comment -->\n<doc/>";
  3782. const XML_Char *expected = XCS("<!-- This is a comment -->\n<doc/>");
  3783. CharData storage;
  3784. CharData_Init(&storage);
  3785. XML_SetDefaultHandler(g_parser, accumulate_characters);
  3786. XML_SetUserData(g_parser, &storage);
  3787. if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
  3788. == XML_STATUS_ERROR)
  3789. xml_failure(g_parser);
  3790. CharData_CheckXMLChars(&storage, expected);
  3791. }
  3792. END_TEST
  3793. /* Test PIs that look almost but not quite like XML declarations */
  3794. START_TEST(test_pi_yml) {
  3795. const char *text = "<?yml something like data?><doc/>";
  3796. const XML_Char *expected = XCS("yml: something like data\n");
  3797. CharData storage;
  3798. CharData_Init(&storage);
  3799. XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters);
  3800. XML_SetUserData(g_parser, &storage);
  3801. if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
  3802. == XML_STATUS_ERROR)
  3803. xml_failure(g_parser);
  3804. CharData_CheckXMLChars(&storage, expected);
  3805. }
  3806. END_TEST
  3807. START_TEST(test_pi_xnl) {
  3808. const char *text = "<?xnl nothing like data?><doc/>";
  3809. const XML_Char *expected = XCS("xnl: nothing like data\n");
  3810. CharData storage;
  3811. CharData_Init(&storage);
  3812. XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters);
  3813. XML_SetUserData(g_parser, &storage);
  3814. if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
  3815. == XML_STATUS_ERROR)
  3816. xml_failure(g_parser);
  3817. CharData_CheckXMLChars(&storage, expected);
  3818. }
  3819. END_TEST
  3820. START_TEST(test_pi_xmm) {
  3821. const char *text = "<?xmm everything like data?><doc/>";
  3822. const XML_Char *expected = XCS("xmm: everything like data\n");
  3823. CharData storage;
  3824. CharData_Init(&storage);
  3825. XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters);
  3826. XML_SetUserData(g_parser, &storage);
  3827. if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
  3828. == XML_STATUS_ERROR)
  3829. xml_failure(g_parser);
  3830. CharData_CheckXMLChars(&storage, expected);
  3831. }
  3832. END_TEST
  3833. START_TEST(test_utf16_pi) {
  3834. const char text[] =
  3835. /* <?{KHO KHWAI}{CHO CHAN}?>
  3836. * where {KHO KHWAI} = U+0E04
  3837. * and {CHO CHAN} = U+0E08
  3838. */
  3839. "<\0?\0\x04\x0e\x08\x0e?\0>\0"
  3840. /* <q/> */
  3841. "<\0q\0/\0>\0";
  3842. #ifdef XML_UNICODE
  3843. const XML_Char *expected = XCS("\x0e04\x0e08: \n");
  3844. #else
  3845. const XML_Char *expected = XCS("\xe0\xb8\x84\xe0\xb8\x88: \n");
  3846. #endif
  3847. CharData storage;
  3848. CharData_Init(&storage);
  3849. XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters);
  3850. XML_SetUserData(g_parser, &storage);
  3851. if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
  3852. == XML_STATUS_ERROR)
  3853. xml_failure(g_parser);
  3854. CharData_CheckXMLChars(&storage, expected);
  3855. }
  3856. END_TEST
  3857. START_TEST(test_utf16_be_pi) {
  3858. const char text[] =
  3859. /* <?{KHO KHWAI}{CHO CHAN}?>
  3860. * where {KHO KHWAI} = U+0E04
  3861. * and {CHO CHAN} = U+0E08
  3862. */
  3863. "\0<\0?\x0e\x04\x0e\x08\0?\0>"
  3864. /* <q/> */
  3865. "\0<\0q\0/\0>";
  3866. #ifdef XML_UNICODE
  3867. const XML_Char *expected = XCS("\x0e04\x0e08: \n");
  3868. #else
  3869. const XML_Char *expected = XCS("\xe0\xb8\x84\xe0\xb8\x88: \n");
  3870. #endif
  3871. CharData storage;
  3872. CharData_Init(&storage);
  3873. XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters);
  3874. XML_SetUserData(g_parser, &storage);
  3875. if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
  3876. == XML_STATUS_ERROR)
  3877. xml_failure(g_parser);
  3878. CharData_CheckXMLChars(&storage, expected);
  3879. }
  3880. END_TEST
  3881. /* Test that comments can be picked up and translated */
  3882. START_TEST(test_utf16_be_comment) {
  3883. const char text[] =
  3884. /* <!-- Comment A --> */
  3885. "\0<\0!\0-\0-\0 \0C\0o\0m\0m\0e\0n\0t\0 \0A\0 \0-\0-\0>\0\n"
  3886. /* <doc/> */
  3887. "\0<\0d\0o\0c\0/\0>";
  3888. const XML_Char *expected = XCS(" Comment A ");
  3889. CharData storage;
  3890. CharData_Init(&storage);
  3891. XML_SetCommentHandler(g_parser, accumulate_comment);
  3892. XML_SetUserData(g_parser, &storage);
  3893. if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
  3894. == XML_STATUS_ERROR)
  3895. xml_failure(g_parser);
  3896. CharData_CheckXMLChars(&storage, expected);
  3897. }
  3898. END_TEST
  3899. START_TEST(test_utf16_le_comment) {
  3900. const char text[] =
  3901. /* <!-- Comment B --> */
  3902. "<\0!\0-\0-\0 \0C\0o\0m\0m\0e\0n\0t\0 \0B\0 \0-\0-\0>\0\n\0"
  3903. /* <doc/> */
  3904. "<\0d\0o\0c\0/\0>\0";
  3905. const XML_Char *expected = XCS(" Comment B ");
  3906. CharData storage;
  3907. CharData_Init(&storage);
  3908. XML_SetCommentHandler(g_parser, accumulate_comment);
  3909. XML_SetUserData(g_parser, &storage);
  3910. if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
  3911. == XML_STATUS_ERROR)
  3912. xml_failure(g_parser);
  3913. CharData_CheckXMLChars(&storage, expected);
  3914. }
  3915. END_TEST
  3916. /* Test that the unknown encoding handler with map entries that expect
  3917. * conversion but no conversion function is faulted
  3918. */
  3919. START_TEST(test_missing_encoding_conversion_fn) {
  3920. const char *text = "<?xml version='1.0' encoding='no-conv'?>\n"
  3921. "<doc>\x81</doc>";
  3922. XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
  3923. /* MiscEncodingHandler sets up an encoding with every top-bit-set
  3924. * character introducing a two-byte sequence. For this, it
  3925. * requires a convert function. The above function call doesn't
  3926. * pass one through, so when BadEncodingHandler actually gets
  3927. * called it should supply an invalid encoding.
  3928. */
  3929. expect_failure(text, XML_ERROR_UNKNOWN_ENCODING,
  3930. "Encoding with missing convert() not faulted");
  3931. }
  3932. END_TEST
  3933. START_TEST(test_failing_encoding_conversion_fn) {
  3934. const char *text = "<?xml version='1.0' encoding='failing-conv'?>\n"
  3935. "<doc>\x81</doc>";
  3936. XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
  3937. /* BadEncodingHandler sets up an encoding with every top-bit-set
  3938. * character introducing a two-byte sequence. For this, it
  3939. * requires a convert function. The above function call passes
  3940. * one that insists all possible sequences are invalid anyway.
  3941. */
  3942. expect_failure(text, XML_ERROR_INVALID_TOKEN,
  3943. "Encoding with failing convert() not faulted");
  3944. }
  3945. END_TEST
  3946. /* Test unknown encoding conversions */
  3947. START_TEST(test_unknown_encoding_success) {
  3948. const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
  3949. /* Equivalent to <eoc>Hello, world</eoc> */
  3950. "<\x81\x64\x80oc>Hello, world</\x81\x64\x80oc>";
  3951. XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
  3952. run_character_check(text, XCS("Hello, world"));
  3953. }
  3954. END_TEST
  3955. /* Test bad name character in unknown encoding */
  3956. START_TEST(test_unknown_encoding_bad_name) {
  3957. const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
  3958. "<\xff\x64oc>Hello, world</\xff\x64oc>";
  3959. XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
  3960. expect_failure(text, XML_ERROR_INVALID_TOKEN,
  3961. "Bad name start in unknown encoding not faulted");
  3962. }
  3963. END_TEST
  3964. /* Test bad mid-name character in unknown encoding */
  3965. START_TEST(test_unknown_encoding_bad_name_2) {
  3966. const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
  3967. "<d\xffoc>Hello, world</d\xffoc>";
  3968. XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
  3969. expect_failure(text, XML_ERROR_INVALID_TOKEN,
  3970. "Bad name in unknown encoding not faulted");
  3971. }
  3972. END_TEST
  3973. /* Test element name that is long enough to fill the conversion buffer
  3974. * in an unknown encoding, finishing with an encoded character.
  3975. */
  3976. START_TEST(test_unknown_encoding_long_name_1) {
  3977. const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
  3978. "<abcdefghabcdefghabcdefghijkl\x80m\x80n\x80o\x80p>"
  3979. "Hi"
  3980. "</abcdefghabcdefghabcdefghijkl\x80m\x80n\x80o\x80p>";
  3981. const XML_Char *expected = XCS("abcdefghabcdefghabcdefghijklmnop");
  3982. CharData storage;
  3983. CharData_Init(&storage);
  3984. XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
  3985. XML_SetStartElementHandler(g_parser, record_element_start_handler);
  3986. XML_SetUserData(g_parser, &storage);
  3987. if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
  3988. == XML_STATUS_ERROR)
  3989. xml_failure(g_parser);
  3990. CharData_CheckXMLChars(&storage, expected);
  3991. }
  3992. END_TEST
  3993. /* Test element name that is long enough to fill the conversion buffer
  3994. * in an unknown encoding, finishing with an simple character.
  3995. */
  3996. START_TEST(test_unknown_encoding_long_name_2) {
  3997. const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
  3998. "<abcdefghabcdefghabcdefghijklmnop>"
  3999. "Hi"
  4000. "</abcdefghabcdefghabcdefghijklmnop>";
  4001. const XML_Char *expected = XCS("abcdefghabcdefghabcdefghijklmnop");
  4002. CharData storage;
  4003. CharData_Init(&storage);
  4004. XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
  4005. XML_SetStartElementHandler(g_parser, record_element_start_handler);
  4006. XML_SetUserData(g_parser, &storage);
  4007. if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
  4008. == XML_STATUS_ERROR)
  4009. xml_failure(g_parser);
  4010. CharData_CheckXMLChars(&storage, expected);
  4011. }
  4012. END_TEST
  4013. START_TEST(test_invalid_unknown_encoding) {
  4014. const char *text = "<?xml version='1.0' encoding='invalid-9'?>\n"
  4015. "<doc>Hello world</doc>";
  4016. XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
  4017. expect_failure(text, XML_ERROR_UNKNOWN_ENCODING,
  4018. "Invalid unknown encoding not faulted");
  4019. }
  4020. END_TEST
  4021. START_TEST(test_unknown_ascii_encoding_ok) {
  4022. const char *text = "<?xml version='1.0' encoding='ascii-like'?>\n"
  4023. "<doc>Hello, world</doc>";
  4024. XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
  4025. run_character_check(text, XCS("Hello, world"));
  4026. }
  4027. END_TEST
  4028. START_TEST(test_unknown_ascii_encoding_fail) {
  4029. const char *text = "<?xml version='1.0' encoding='ascii-like'?>\n"
  4030. "<doc>Hello, \x80 world</doc>";
  4031. XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
  4032. expect_failure(text, XML_ERROR_INVALID_TOKEN,
  4033. "Invalid character not faulted");
  4034. }
  4035. END_TEST
  4036. START_TEST(test_unknown_encoding_invalid_length) {
  4037. const char *text = "<?xml version='1.0' encoding='invalid-len'?>\n"
  4038. "<doc>Hello, world</doc>";
  4039. XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
  4040. expect_failure(text, XML_ERROR_UNKNOWN_ENCODING,
  4041. "Invalid unknown encoding not faulted");
  4042. }
  4043. END_TEST
  4044. START_TEST(test_unknown_encoding_invalid_topbit) {
  4045. const char *text = "<?xml version='1.0' encoding='invalid-a'?>\n"
  4046. "<doc>Hello, world</doc>";
  4047. XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
  4048. expect_failure(text, XML_ERROR_UNKNOWN_ENCODING,
  4049. "Invalid unknown encoding not faulted");
  4050. }
  4051. END_TEST
  4052. START_TEST(test_unknown_encoding_invalid_surrogate) {
  4053. const char *text = "<?xml version='1.0' encoding='invalid-surrogate'?>\n"
  4054. "<doc>Hello, \x82 world</doc>";
  4055. XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
  4056. expect_failure(text, XML_ERROR_INVALID_TOKEN,
  4057. "Invalid unknown encoding not faulted");
  4058. }
  4059. END_TEST
  4060. START_TEST(test_unknown_encoding_invalid_high) {
  4061. const char *text = "<?xml version='1.0' encoding='invalid-high'?>\n"
  4062. "<doc>Hello, world</doc>";
  4063. XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
  4064. expect_failure(text, XML_ERROR_UNKNOWN_ENCODING,
  4065. "Invalid unknown encoding not faulted");
  4066. }
  4067. END_TEST
  4068. START_TEST(test_unknown_encoding_invalid_attr_value) {
  4069. const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
  4070. "<doc attr='\xff\x30'/>";
  4071. XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
  4072. expect_failure(text, XML_ERROR_INVALID_TOKEN,
  4073. "Invalid attribute valid not faulted");
  4074. }
  4075. END_TEST
  4076. /* Test an external entity parser set to use latin-1 detects UTF-16
  4077. * BOMs correctly.
  4078. */
  4079. /* Test that UTF-16 BOM does not select UTF-16 given explicit encoding */
  4080. START_TEST(test_ext_entity_latin1_utf16le_bom) {
  4081. const char *text = "<!DOCTYPE doc [\n"
  4082. " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
  4083. "]>\n"
  4084. "<doc>&en;</doc>";
  4085. ExtTest2 test_data
  4086. = {/* If UTF-16, 0xfeff is the BOM and 0x204c is black left bullet */
  4087. /* If Latin-1, 0xff = Y-diaeresis, 0xfe = lowercase thorn,
  4088. * 0x4c = L and 0x20 is a space
  4089. */
  4090. "\xff\xfe\x4c\x20", 4, XCS("iso-8859-1"), NULL};
  4091. #ifdef XML_UNICODE
  4092. const XML_Char *expected = XCS("\x00ff\x00feL ");
  4093. #else
  4094. /* In UTF-8, y-diaeresis is 0xc3 0xbf, lowercase thorn is 0xc3 0xbe */
  4095. const XML_Char *expected = XCS("\xc3\xbf\xc3\xbeL ");
  4096. #endif
  4097. CharData storage;
  4098. CharData_Init(&storage);
  4099. test_data.storage = &storage;
  4100. XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
  4101. XML_SetUserData(g_parser, &test_data);
  4102. XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
  4103. if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
  4104. == XML_STATUS_ERROR)
  4105. xml_failure(g_parser);
  4106. CharData_CheckXMLChars(&storage, expected);
  4107. }
  4108. END_TEST
  4109. START_TEST(test_ext_entity_latin1_utf16be_bom) {
  4110. const char *text = "<!DOCTYPE doc [\n"
  4111. " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
  4112. "]>\n"
  4113. "<doc>&en;</doc>";
  4114. ExtTest2 test_data
  4115. = {/* If UTF-16, 0xfeff is the BOM and 0x204c is black left bullet */
  4116. /* If Latin-1, 0xff = Y-diaeresis, 0xfe = lowercase thorn,
  4117. * 0x4c = L and 0x20 is a space
  4118. */
  4119. "\xfe\xff\x20\x4c", 4, XCS("iso-8859-1"), NULL};
  4120. #ifdef XML_UNICODE
  4121. const XML_Char *expected = XCS("\x00fe\x00ff L");
  4122. #else
  4123. /* In UTF-8, y-diaeresis is 0xc3 0xbf, lowercase thorn is 0xc3 0xbe */
  4124. const XML_Char *expected = XCS("\xc3\xbe\xc3\xbf L");
  4125. #endif
  4126. CharData storage;
  4127. CharData_Init(&storage);
  4128. test_data.storage = &storage;
  4129. XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
  4130. XML_SetUserData(g_parser, &test_data);
  4131. XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
  4132. if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
  4133. == XML_STATUS_ERROR)
  4134. xml_failure(g_parser);
  4135. CharData_CheckXMLChars(&storage, expected);
  4136. }
  4137. END_TEST
  4138. /* Parsing the full buffer rather than a byte at a time makes a
  4139. * difference to the encoding scanning code, so repeat the above tests
  4140. * without breaking them down by byte.
  4141. */
  4142. START_TEST(test_ext_entity_latin1_utf16le_bom2) {
  4143. const char *text = "<!DOCTYPE doc [\n"
  4144. " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
  4145. "]>\n"
  4146. "<doc>&en;</doc>";
  4147. ExtTest2 test_data
  4148. = {/* If UTF-16, 0xfeff is the BOM and 0x204c is black left bullet */
  4149. /* If Latin-1, 0xff = Y-diaeresis, 0xfe = lowercase thorn,
  4150. * 0x4c = L and 0x20 is a space
  4151. */
  4152. "\xff\xfe\x4c\x20", 4, XCS("iso-8859-1"), NULL};
  4153. #ifdef XML_UNICODE
  4154. const XML_Char *expected = XCS("\x00ff\x00feL ");
  4155. #else
  4156. /* In UTF-8, y-diaeresis is 0xc3 0xbf, lowercase thorn is 0xc3 0xbe */
  4157. const XML_Char *expected = XCS("\xc3\xbf\xc3\xbeL ");
  4158. #endif
  4159. CharData storage;
  4160. CharData_Init(&storage);
  4161. test_data.storage = &storage;
  4162. XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
  4163. XML_SetUserData(g_parser, &test_data);
  4164. XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
  4165. if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
  4166. == XML_STATUS_ERROR)
  4167. xml_failure(g_parser);
  4168. CharData_CheckXMLChars(&storage, expected);
  4169. }
  4170. END_TEST
  4171. START_TEST(test_ext_entity_latin1_utf16be_bom2) {
  4172. const char *text = "<!DOCTYPE doc [\n"
  4173. " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
  4174. "]>\n"
  4175. "<doc>&en;</doc>";
  4176. ExtTest2 test_data
  4177. = {/* If UTF-16, 0xfeff is the BOM and 0x204c is black left bullet */
  4178. /* If Latin-1, 0xff = Y-diaeresis, 0xfe = lowercase thorn,
  4179. * 0x4c = L and 0x20 is a space
  4180. */
  4181. "\xfe\xff\x20\x4c", 4, XCS("iso-8859-1"), NULL};
  4182. #ifdef XML_UNICODE
  4183. const XML_Char *expected = XCS("\x00fe\x00ff L");
  4184. #else
  4185. /* In UTF-8, y-diaeresis is 0xc3 0xbf, lowercase thorn is 0xc3 0xbe */
  4186. const XML_Char *expected = "\xc3\xbe\xc3\xbf L";
  4187. #endif
  4188. CharData storage;
  4189. CharData_Init(&storage);
  4190. test_data.storage = &storage;
  4191. XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
  4192. XML_SetUserData(g_parser, &test_data);
  4193. XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
  4194. if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
  4195. == XML_STATUS_ERROR)
  4196. xml_failure(g_parser);
  4197. CharData_CheckXMLChars(&storage, expected);
  4198. }
  4199. END_TEST
  4200. /* Test little-endian UTF-16 given an explicit big-endian encoding */
  4201. START_TEST(test_ext_entity_utf16_be) {
  4202. const char *text = "<!DOCTYPE doc [\n"
  4203. " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
  4204. "]>\n"
  4205. "<doc>&en;</doc>";
  4206. ExtTest2 test_data = {"<\0e\0/\0>\0", 8, XCS("utf-16be"), NULL};
  4207. #ifdef XML_UNICODE
  4208. const XML_Char *expected = XCS("\x3c00\x6500\x2f00\x3e00");
  4209. #else
  4210. const XML_Char *expected = XCS("\xe3\xb0\x80" /* U+3C00 */
  4211. "\xe6\x94\x80" /* U+6500 */
  4212. "\xe2\xbc\x80" /* U+2F00 */
  4213. "\xe3\xb8\x80"); /* U+3E00 */
  4214. #endif
  4215. CharData storage;
  4216. CharData_Init(&storage);
  4217. test_data.storage = &storage;
  4218. XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
  4219. XML_SetUserData(g_parser, &test_data);
  4220. XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
  4221. if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
  4222. == XML_STATUS_ERROR)
  4223. xml_failure(g_parser);
  4224. CharData_CheckXMLChars(&storage, expected);
  4225. }
  4226. END_TEST
  4227. /* Test big-endian UTF-16 given an explicit little-endian encoding */
  4228. START_TEST(test_ext_entity_utf16_le) {
  4229. const char *text = "<!DOCTYPE doc [\n"
  4230. " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
  4231. "]>\n"
  4232. "<doc>&en;</doc>";
  4233. ExtTest2 test_data = {"\0<\0e\0/\0>", 8, XCS("utf-16le"), NULL};
  4234. #ifdef XML_UNICODE
  4235. const XML_Char *expected = XCS("\x3c00\x6500\x2f00\x3e00");
  4236. #else
  4237. const XML_Char *expected = XCS("\xe3\xb0\x80" /* U+3C00 */
  4238. "\xe6\x94\x80" /* U+6500 */
  4239. "\xe2\xbc\x80" /* U+2F00 */
  4240. "\xe3\xb8\x80"); /* U+3E00 */
  4241. #endif
  4242. CharData storage;
  4243. CharData_Init(&storage);
  4244. test_data.storage = &storage;
  4245. XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
  4246. XML_SetUserData(g_parser, &test_data);
  4247. XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
  4248. if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
  4249. == XML_STATUS_ERROR)
  4250. xml_failure(g_parser);
  4251. CharData_CheckXMLChars(&storage, expected);
  4252. }
  4253. END_TEST
  4254. /* Test little-endian UTF-16 given no explicit encoding.
  4255. * The existing default encoding (UTF-8) is assumed to hold without a
  4256. * BOM to contradict it, so the entity value will in fact provoke an
  4257. * error because 0x00 is not a valid XML character. We parse the
  4258. * whole buffer in one go rather than feeding it in byte by byte to
  4259. * exercise different code paths in the initial scanning routines.
  4260. */
  4261. START_TEST(test_ext_entity_utf16_unknown) {
  4262. const char *text = "<!DOCTYPE doc [\n"
  4263. " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
  4264. "]>\n"
  4265. "<doc>&en;</doc>";
  4266. ExtFaults2 test_data
  4267. = {"a\0b\0c\0", 6, "Invalid character in entity not faulted", NULL,
  4268. XML_ERROR_INVALID_TOKEN};
  4269. XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter2);
  4270. XML_SetUserData(g_parser, &test_data);
  4271. expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
  4272. "Invalid character should not have been accepted");
  4273. }
  4274. END_TEST
  4275. /* Test not-quite-UTF-8 BOM (0xEF 0xBB 0xBF) */
  4276. START_TEST(test_ext_entity_utf8_non_bom) {
  4277. const char *text = "<!DOCTYPE doc [\n"
  4278. " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
  4279. "]>\n"
  4280. "<doc>&en;</doc>";
  4281. ExtTest2 test_data
  4282. = {"\xef\xbb\x80", /* Arabic letter DAD medial form, U+FEC0 */
  4283. 3, NULL, NULL};
  4284. #ifdef XML_UNICODE
  4285. const XML_Char *expected = XCS("\xfec0");
  4286. #else
  4287. const XML_Char *expected = XCS("\xef\xbb\x80");
  4288. #endif
  4289. CharData storage;
  4290. CharData_Init(&storage);
  4291. test_data.storage = &storage;
  4292. XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
  4293. XML_SetUserData(g_parser, &test_data);
  4294. XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
  4295. if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
  4296. == XML_STATUS_ERROR)
  4297. xml_failure(g_parser);
  4298. CharData_CheckXMLChars(&storage, expected);
  4299. }
  4300. END_TEST
  4301. /* Test that UTF-8 in a CDATA section is correctly passed through */
  4302. START_TEST(test_utf8_in_cdata_section) {
  4303. const char *text = "<doc><![CDATA[one \xc3\xa9 two]]></doc>";
  4304. #ifdef XML_UNICODE
  4305. const XML_Char *expected = XCS("one \x00e9 two");
  4306. #else
  4307. const XML_Char *expected = XCS("one \xc3\xa9 two");
  4308. #endif
  4309. run_character_check(text, expected);
  4310. }
  4311. END_TEST
  4312. /* Test that little-endian UTF-16 in a CDATA section is handled */
  4313. START_TEST(test_utf8_in_cdata_section_2) {
  4314. const char *text = "<doc><![CDATA[\xc3\xa9]\xc3\xa9two]]></doc>";
  4315. #ifdef XML_UNICODE
  4316. const XML_Char *expected = XCS("\x00e9]\x00e9two");
  4317. #else
  4318. const XML_Char *expected = XCS("\xc3\xa9]\xc3\xa9two");
  4319. #endif
  4320. run_character_check(text, expected);
  4321. }
  4322. END_TEST
  4323. START_TEST(test_utf8_in_start_tags) {
  4324. struct test_case {
  4325. bool goodName;
  4326. bool goodNameStart;
  4327. const char *tagName;
  4328. };
  4329. // The idea with the tests below is this:
  4330. // We want to cover 1-, 2- and 3-byte sequences, 4-byte sequences
  4331. // go to isNever and are hence not a concern.
  4332. //
  4333. // We start with a character that is a valid name character
  4334. // (or even name-start character, see XML 1.0r4 spec) and then we flip
  4335. // single bits at places where (1) the result leaves the UTF-8 encoding space
  4336. // and (2) we stay in the same n-byte sequence family.
  4337. //
  4338. // The flipped bits are highlighted in angle brackets in comments,
  4339. // e.g. "[<1>011 1001]" means we had [0011 1001] but we now flipped
  4340. // the most significant bit to 1 to leave UTF-8 encoding space.
  4341. struct test_case cases[] = {
  4342. // 1-byte UTF-8: [0xxx xxxx]
  4343. {true, true, "\x3A"}, // [0011 1010] = ASCII colon ':'
  4344. {false, false, "\xBA"}, // [<1>011 1010]
  4345. {true, false, "\x39"}, // [0011 1001] = ASCII nine '9'
  4346. {false, false, "\xB9"}, // [<1>011 1001]
  4347. // 2-byte UTF-8: [110x xxxx] [10xx xxxx]
  4348. {true, true, "\xDB\xA5"}, // [1101 1011] [1010 0101] =
  4349. // Arabic small waw U+06E5
  4350. {false, false, "\x9B\xA5"}, // [1<0>01 1011] [1010 0101]
  4351. {false, false, "\xDB\x25"}, // [1101 1011] [<0>010 0101]
  4352. {false, false, "\xDB\xE5"}, // [1101 1011] [1<1>10 0101]
  4353. {true, false, "\xCC\x81"}, // [1100 1100] [1000 0001] =
  4354. // combining char U+0301
  4355. {false, false, "\x8C\x81"}, // [1<0>00 1100] [1000 0001]
  4356. {false, false, "\xCC\x01"}, // [1100 1100] [<0>000 0001]
  4357. {false, false, "\xCC\xC1"}, // [1100 1100] [1<1>00 0001]
  4358. // 3-byte UTF-8: [1110 xxxx] [10xx xxxx] [10xxxxxx]
  4359. {true, true, "\xE0\xA4\x85"}, // [1110 0000] [1010 0100] [1000 0101] =
  4360. // Devanagari Letter A U+0905
  4361. {false, false, "\xA0\xA4\x85"}, // [1<0>10 0000] [1010 0100] [1000 0101]
  4362. {false, false, "\xE0\x24\x85"}, // [1110 0000] [<0>010 0100] [1000 0101]
  4363. {false, false, "\xE0\xE4\x85"}, // [1110 0000] [1<1>10 0100] [1000 0101]
  4364. {false, false, "\xE0\xA4\x05"}, // [1110 0000] [1010 0100] [<0>000 0101]
  4365. {false, false, "\xE0\xA4\xC5"}, // [1110 0000] [1010 0100] [1<1>00 0101]
  4366. {true, false, "\xE0\xA4\x81"}, // [1110 0000] [1010 0100] [1000 0001] =
  4367. // combining char U+0901
  4368. {false, false, "\xA0\xA4\x81"}, // [1<0>10 0000] [1010 0100] [1000 0001]
  4369. {false, false, "\xE0\x24\x81"}, // [1110 0000] [<0>010 0100] [1000 0001]
  4370. {false, false, "\xE0\xE4\x81"}, // [1110 0000] [1<1>10 0100] [1000 0001]
  4371. {false, false, "\xE0\xA4\x01"}, // [1110 0000] [1010 0100] [<0>000 0001]
  4372. {false, false, "\xE0\xA4\xC1"}, // [1110 0000] [1010 0100] [1<1>00 0001]
  4373. };
  4374. const bool atNameStart[] = {true, false};
  4375. size_t i = 0;
  4376. char doc[1024];
  4377. size_t failCount = 0;
  4378. // we need all the bytes to be parsed, but we don't want the errors that can
  4379. // trigger on isFinal=XML_TRUE, so we skip the test if the heuristic is on.
  4380. if (g_reparseDeferralEnabledDefault) {
  4381. return;
  4382. }
  4383. for (; i < sizeof(cases) / sizeof(cases[0]); i++) {
  4384. size_t j = 0;
  4385. for (; j < sizeof(atNameStart) / sizeof(atNameStart[0]); j++) {
  4386. const bool expectedSuccess
  4387. = atNameStart[j] ? cases[i].goodNameStart : cases[i].goodName;
  4388. snprintf(doc, sizeof(doc), "<%s%s><!--", atNameStart[j] ? "" : "a",
  4389. cases[i].tagName);
  4390. XML_Parser parser = XML_ParserCreate(NULL);
  4391. const enum XML_Status status = _XML_Parse_SINGLE_BYTES(
  4392. parser, doc, (int)strlen(doc), /*isFinal=*/XML_FALSE);
  4393. bool success = true;
  4394. if ((status == XML_STATUS_OK) != expectedSuccess) {
  4395. success = false;
  4396. }
  4397. if ((status == XML_STATUS_ERROR)
  4398. && (XML_GetErrorCode(parser) != XML_ERROR_INVALID_TOKEN)) {
  4399. success = false;
  4400. }
  4401. if (! success) {
  4402. fprintf(
  4403. stderr,
  4404. "FAIL case %2u (%sat name start, %u-byte sequence, error code %d)\n",
  4405. (unsigned)i + 1u, atNameStart[j] ? " " : "not ",
  4406. (unsigned)strlen(cases[i].tagName), XML_GetErrorCode(parser));
  4407. failCount++;
  4408. }
  4409. XML_ParserFree(parser);
  4410. }
  4411. }
  4412. if (failCount > 0) {
  4413. fail("UTF-8 regression detected");
  4414. }
  4415. }
  4416. END_TEST
  4417. /* Test trailing spaces in elements are accepted */
  4418. START_TEST(test_trailing_spaces_in_elements) {
  4419. const char *text = "<doc >Hi</doc >";
  4420. const XML_Char *expected = XCS("doc/doc");
  4421. CharData storage;
  4422. CharData_Init(&storage);
  4423. XML_SetElementHandler(g_parser, record_element_start_handler,
  4424. record_element_end_handler);
  4425. XML_SetUserData(g_parser, &storage);
  4426. if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
  4427. == XML_STATUS_ERROR)
  4428. xml_failure(g_parser);
  4429. CharData_CheckXMLChars(&storage, expected);
  4430. }
  4431. END_TEST
  4432. START_TEST(test_utf16_attribute) {
  4433. const char text[] =
  4434. /* <d {KHO KHWAI}{CHO CHAN}='a'/>
  4435. * where {KHO KHWAI} = U+0E04 = 0xe0 0xb8 0x84 in UTF-8
  4436. * and {CHO CHAN} = U+0E08 = 0xe0 0xb8 0x88 in UTF-8
  4437. */
  4438. "<\0d\0 \0\x04\x0e\x08\x0e=\0'\0a\0'\0/\0>\0";
  4439. const XML_Char *expected = XCS("a");
  4440. CharData storage;
  4441. CharData_Init(&storage);
  4442. XML_SetStartElementHandler(g_parser, accumulate_attribute);
  4443. XML_SetUserData(g_parser, &storage);
  4444. if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
  4445. == XML_STATUS_ERROR)
  4446. xml_failure(g_parser);
  4447. CharData_CheckXMLChars(&storage, expected);
  4448. }
  4449. END_TEST
  4450. START_TEST(test_utf16_second_attr) {
  4451. /* <d a='1' {KHO KHWAI}{CHO CHAN}='2'/>
  4452. * where {KHO KHWAI} = U+0E04 = 0xe0 0xb8 0x84 in UTF-8
  4453. * and {CHO CHAN} = U+0E08 = 0xe0 0xb8 0x88 in UTF-8
  4454. */
  4455. const char text[] = "<\0d\0 \0a\0=\0'\0\x31\0'\0 \0"
  4456. "\x04\x0e\x08\x0e=\0'\0\x32\0'\0/\0>\0";
  4457. const XML_Char *expected = XCS("1");
  4458. CharData storage;
  4459. CharData_Init(&storage);
  4460. XML_SetStartElementHandler(g_parser, accumulate_attribute);
  4461. XML_SetUserData(g_parser, &storage);
  4462. if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
  4463. == XML_STATUS_ERROR)
  4464. xml_failure(g_parser);
  4465. CharData_CheckXMLChars(&storage, expected);
  4466. }
  4467. END_TEST
  4468. START_TEST(test_attr_after_solidus) {
  4469. const char *text = "<doc attr1='a' / attr2='b'>";
  4470. expect_failure(text, XML_ERROR_INVALID_TOKEN, "Misplaced / not faulted");
  4471. }
  4472. END_TEST
  4473. START_TEST(test_utf16_pe) {
  4474. /* <!DOCTYPE doc [
  4475. * <!ENTITY % {KHO KHWAI}{CHO CHAN} '<!ELEMENT doc (#PCDATA)>'>
  4476. * %{KHO KHWAI}{CHO CHAN};
  4477. * ]>
  4478. * <doc></doc>
  4479. *
  4480. * where {KHO KHWAI} = U+0E04 = 0xe0 0xb8 0x84 in UTF-8
  4481. * and {CHO CHAN} = U+0E08 = 0xe0 0xb8 0x88 in UTF-8
  4482. */
  4483. const char text[] = "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0o\0c\0 \0[\0\n"
  4484. "\0<\0!\0E\0N\0T\0I\0T\0Y\0 \0%\0 \x0e\x04\x0e\x08\0 "
  4485. "\0'\0<\0!\0E\0L\0E\0M\0E\0N\0T\0 "
  4486. "\0d\0o\0c\0 \0(\0#\0P\0C\0D\0A\0T\0A\0)\0>\0'\0>\0\n"
  4487. "\0%\x0e\x04\x0e\x08\0;\0\n"
  4488. "\0]\0>\0\n"
  4489. "\0<\0d\0o\0c\0>\0<\0/\0d\0o\0c\0>";
  4490. #ifdef XML_UNICODE
  4491. const XML_Char *expected = XCS("\x0e04\x0e08=<!ELEMENT doc (#PCDATA)>\n");
  4492. #else
  4493. const XML_Char *expected
  4494. = XCS("\xe0\xb8\x84\xe0\xb8\x88=<!ELEMENT doc (#PCDATA)>\n");
  4495. #endif
  4496. CharData storage;
  4497. CharData_Init(&storage);
  4498. XML_SetUserData(g_parser, &storage);
  4499. XML_SetEntityDeclHandler(g_parser, accumulate_entity_decl);
  4500. if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
  4501. == XML_STATUS_ERROR)
  4502. xml_failure(g_parser);
  4503. CharData_CheckXMLChars(&storage, expected);
  4504. }
  4505. END_TEST
  4506. /* Test that duff attribute description keywords are rejected */
  4507. START_TEST(test_bad_attr_desc_keyword) {
  4508. const char *text = "<!DOCTYPE doc [\n"
  4509. " <!ATTLIST doc attr CDATA #!IMPLIED>\n"
  4510. "]>\n"
  4511. "<doc />";
  4512. expect_failure(text, XML_ERROR_INVALID_TOKEN,
  4513. "Bad keyword !IMPLIED not faulted");
  4514. }
  4515. END_TEST
  4516. /* Test that an invalid attribute description keyword consisting of
  4517. * UTF-16 characters with their top bytes non-zero are correctly
  4518. * faulted
  4519. */
  4520. START_TEST(test_bad_attr_desc_keyword_utf16) {
  4521. /* <!DOCTYPE d [
  4522. * <!ATTLIST d a CDATA #{KHO KHWAI}{CHO CHAN}>
  4523. * ]><d/>
  4524. *
  4525. * where {KHO KHWAI} = U+0E04 = 0xe0 0xb8 0x84 in UTF-8
  4526. * and {CHO CHAN} = U+0E08 = 0xe0 0xb8 0x88 in UTF-8
  4527. */
  4528. const char text[]
  4529. = "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 \0[\0\n"
  4530. "\0<\0!\0A\0T\0T\0L\0I\0S\0T\0 \0d\0 \0a\0 \0C\0D\0A\0T\0A\0 "
  4531. "\0#\x0e\x04\x0e\x08\0>\0\n"
  4532. "\0]\0>\0<\0d\0/\0>";
  4533. if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
  4534. != XML_STATUS_ERROR)
  4535. fail("Invalid UTF16 attribute keyword not faulted");
  4536. if (XML_GetErrorCode(g_parser) != XML_ERROR_SYNTAX)
  4537. xml_failure(g_parser);
  4538. }
  4539. END_TEST
  4540. /* Test that invalid syntax in a <!DOCTYPE> is rejected. Do this
  4541. * using prefix-encoding (see above) to trigger specific code paths
  4542. */
  4543. START_TEST(test_bad_doctype) {
  4544. const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
  4545. "<!DOCTYPE doc [ \x80\x44 ]><doc/>";
  4546. XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
  4547. expect_failure(text, XML_ERROR_SYNTAX,
  4548. "Invalid bytes in DOCTYPE not faulted");
  4549. }
  4550. END_TEST
  4551. START_TEST(test_bad_doctype_utf8) {
  4552. const char *text = "<!DOCTYPE \xDB\x25"
  4553. "doc><doc/>"; // [1101 1011] [<0>010 0101]
  4554. expect_failure(text, XML_ERROR_INVALID_TOKEN,
  4555. "Invalid UTF-8 in DOCTYPE not faulted");
  4556. }
  4557. END_TEST
  4558. START_TEST(test_bad_doctype_utf16) {
  4559. const char text[] =
  4560. /* <!DOCTYPE doc [ \x06f2 ]><doc/>
  4561. *
  4562. * U+06F2 = EXTENDED ARABIC-INDIC DIGIT TWO, a valid number
  4563. * (name character) but not a valid letter (name start character)
  4564. */
  4565. "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0o\0c\0 \0[\0 "
  4566. "\x06\xf2"
  4567. "\0 \0]\0>\0<\0d\0o\0c\0/\0>";
  4568. if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
  4569. != XML_STATUS_ERROR)
  4570. fail("Invalid bytes in DOCTYPE not faulted");
  4571. if (XML_GetErrorCode(g_parser) != XML_ERROR_SYNTAX)
  4572. xml_failure(g_parser);
  4573. }
  4574. END_TEST
  4575. START_TEST(test_bad_doctype_plus) {
  4576. const char *text = "<!DOCTYPE 1+ [ <!ENTITY foo 'bar'> ]>\n"
  4577. "<1+>&foo;</1+>";
  4578. expect_failure(text, XML_ERROR_INVALID_TOKEN,
  4579. "'+' in document name not faulted");
  4580. }
  4581. END_TEST
  4582. START_TEST(test_bad_doctype_star) {
  4583. const char *text = "<!DOCTYPE 1* [ <!ENTITY foo 'bar'> ]>\n"
  4584. "<1*>&foo;</1*>";
  4585. expect_failure(text, XML_ERROR_INVALID_TOKEN,
  4586. "'*' in document name not faulted");
  4587. }
  4588. END_TEST
  4589. START_TEST(test_bad_doctype_query) {
  4590. const char *text = "<!DOCTYPE 1? [ <!ENTITY foo 'bar'> ]>\n"
  4591. "<1?>&foo;</1?>";
  4592. expect_failure(text, XML_ERROR_INVALID_TOKEN,
  4593. "'?' in document name not faulted");
  4594. }
  4595. END_TEST
  4596. START_TEST(test_unknown_encoding_bad_ignore) {
  4597. const char *text = "<?xml version='1.0' encoding='prefix-conv'?>"
  4598. "<!DOCTYPE doc SYSTEM 'foo'>"
  4599. "<doc><e>&entity;</e></doc>";
  4600. ExtFaults fault = {"<![IGNORE[<!ELEMENT \xffG (#PCDATA)*>]]>",
  4601. "Invalid character not faulted", XCS("prefix-conv"),
  4602. XML_ERROR_INVALID_TOKEN};
  4603. XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
  4604. XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
  4605. XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
  4606. XML_SetUserData(g_parser, &fault);
  4607. expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
  4608. "Bad IGNORE section with unknown encoding not failed");
  4609. }
  4610. END_TEST
  4611. START_TEST(test_entity_in_utf16_be_attr) {
  4612. const char text[] =
  4613. /* <e a='&#228; &#x00E4;'></e> */
  4614. "\0<\0e\0 \0a\0=\0'\0&\0#\0\x32\0\x32\0\x38\0;\0 "
  4615. "\0&\0#\0x\0\x30\0\x30\0E\0\x34\0;\0'\0>\0<\0/\0e\0>";
  4616. #ifdef XML_UNICODE
  4617. const XML_Char *expected = XCS("\x00e4 \x00e4");
  4618. #else
  4619. const XML_Char *expected = XCS("\xc3\xa4 \xc3\xa4");
  4620. #endif
  4621. CharData storage;
  4622. CharData_Init(&storage);
  4623. XML_SetUserData(g_parser, &storage);
  4624. XML_SetStartElementHandler(g_parser, accumulate_attribute);
  4625. if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
  4626. == XML_STATUS_ERROR)
  4627. xml_failure(g_parser);
  4628. CharData_CheckXMLChars(&storage, expected);
  4629. }
  4630. END_TEST
  4631. START_TEST(test_entity_in_utf16_le_attr) {
  4632. const char text[] =
  4633. /* <e a='&#228; &#x00E4;'></e> */
  4634. "<\0e\0 \0a\0=\0'\0&\0#\0\x32\0\x32\0\x38\0;\0 \0"
  4635. "&\0#\0x\0\x30\0\x30\0E\0\x34\0;\0'\0>\0<\0/\0e\0>\0";
  4636. #ifdef XML_UNICODE
  4637. const XML_Char *expected = XCS("\x00e4 \x00e4");
  4638. #else
  4639. const XML_Char *expected = XCS("\xc3\xa4 \xc3\xa4");
  4640. #endif
  4641. CharData storage;
  4642. CharData_Init(&storage);
  4643. XML_SetUserData(g_parser, &storage);
  4644. XML_SetStartElementHandler(g_parser, accumulate_attribute);
  4645. if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
  4646. == XML_STATUS_ERROR)
  4647. xml_failure(g_parser);
  4648. CharData_CheckXMLChars(&storage, expected);
  4649. }
  4650. END_TEST
  4651. START_TEST(test_entity_public_utf16_be) {
  4652. const char text[] =
  4653. /* <!DOCTYPE d [ */
  4654. "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 \0[\0\n"
  4655. /* <!ENTITY % e PUBLIC 'foo' 'bar.ent'> */
  4656. "\0<\0!\0E\0N\0T\0I\0T\0Y\0 \0%\0 \0e\0 \0P\0U\0B\0L\0I\0C\0 "
  4657. "\0'\0f\0o\0o\0'\0 \0'\0b\0a\0r\0.\0e\0n\0t\0'\0>\0\n"
  4658. /* %e; */
  4659. "\0%\0e\0;\0\n"
  4660. /* ]> */
  4661. "\0]\0>\0\n"
  4662. /* <d>&j;</d> */
  4663. "\0<\0d\0>\0&\0j\0;\0<\0/\0d\0>";
  4664. ExtTest2 test_data
  4665. = {/* <!ENTITY j 'baz'> */
  4666. "\0<\0!\0E\0N\0T\0I\0T\0Y\0 \0j\0 \0'\0b\0a\0z\0'\0>", 34, NULL, NULL};
  4667. const XML_Char *expected = XCS("baz");
  4668. CharData storage;
  4669. CharData_Init(&storage);
  4670. test_data.storage = &storage;
  4671. XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
  4672. XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
  4673. XML_SetUserData(g_parser, &test_data);
  4674. XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
  4675. if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
  4676. == XML_STATUS_ERROR)
  4677. xml_failure(g_parser);
  4678. CharData_CheckXMLChars(&storage, expected);
  4679. }
  4680. END_TEST
  4681. START_TEST(test_entity_public_utf16_le) {
  4682. const char text[] =
  4683. /* <!DOCTYPE d [ */
  4684. "<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 \0[\0\n\0"
  4685. /* <!ENTITY % e PUBLIC 'foo' 'bar.ent'> */
  4686. "<\0!\0E\0N\0T\0I\0T\0Y\0 \0%\0 \0e\0 \0P\0U\0B\0L\0I\0C\0 \0"
  4687. "'\0f\0o\0o\0'\0 \0'\0b\0a\0r\0.\0e\0n\0t\0'\0>\0\n\0"
  4688. /* %e; */
  4689. "%\0e\0;\0\n\0"
  4690. /* ]> */
  4691. "]\0>\0\n\0"
  4692. /* <d>&j;</d> */
  4693. "<\0d\0>\0&\0j\0;\0<\0/\0d\0>\0";
  4694. ExtTest2 test_data
  4695. = {/* <!ENTITY j 'baz'> */
  4696. "<\0!\0E\0N\0T\0I\0T\0Y\0 \0j\0 \0'\0b\0a\0z\0'\0>\0", 34, NULL, NULL};
  4697. const XML_Char *expected = XCS("baz");
  4698. CharData storage;
  4699. CharData_Init(&storage);
  4700. test_data.storage = &storage;
  4701. XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
  4702. XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
  4703. XML_SetUserData(g_parser, &test_data);
  4704. XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
  4705. if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
  4706. == XML_STATUS_ERROR)
  4707. xml_failure(g_parser);
  4708. CharData_CheckXMLChars(&storage, expected);
  4709. }
  4710. END_TEST
  4711. /* Test that a doctype with neither an internal nor external subset is
  4712. * faulted
  4713. */
  4714. START_TEST(test_short_doctype) {
  4715. const char *text = "<!DOCTYPE doc></doc>";
  4716. expect_failure(text, XML_ERROR_INVALID_TOKEN,
  4717. "DOCTYPE without subset not rejected");
  4718. }
  4719. END_TEST
  4720. START_TEST(test_short_doctype_2) {
  4721. const char *text = "<!DOCTYPE doc PUBLIC></doc>";
  4722. expect_failure(text, XML_ERROR_SYNTAX,
  4723. "DOCTYPE without Public ID not rejected");
  4724. }
  4725. END_TEST
  4726. START_TEST(test_short_doctype_3) {
  4727. const char *text = "<!DOCTYPE doc SYSTEM></doc>";
  4728. expect_failure(text, XML_ERROR_SYNTAX,
  4729. "DOCTYPE without System ID not rejected");
  4730. }
  4731. END_TEST
  4732. START_TEST(test_long_doctype) {
  4733. const char *text = "<!DOCTYPE doc PUBLIC 'foo' 'bar' 'baz'></doc>";
  4734. expect_failure(text, XML_ERROR_SYNTAX, "DOCTYPE with extra ID not rejected");
  4735. }
  4736. END_TEST
  4737. START_TEST(test_bad_entity) {
  4738. const char *text = "<!DOCTYPE doc [\n"
  4739. " <!ENTITY foo PUBLIC>\n"
  4740. "]>\n"
  4741. "<doc/>";
  4742. expect_failure(text, XML_ERROR_SYNTAX,
  4743. "ENTITY without Public ID is not rejected");
  4744. }
  4745. END_TEST
  4746. /* Test unquoted value is faulted */
  4747. START_TEST(test_bad_entity_2) {
  4748. const char *text = "<!DOCTYPE doc [\n"
  4749. " <!ENTITY % foo bar>\n"
  4750. "]>\n"
  4751. "<doc/>";
  4752. expect_failure(text, XML_ERROR_SYNTAX,
  4753. "ENTITY without Public ID is not rejected");
  4754. }
  4755. END_TEST
  4756. START_TEST(test_bad_entity_3) {
  4757. const char *text = "<!DOCTYPE doc [\n"
  4758. " <!ENTITY % foo PUBLIC>\n"
  4759. "]>\n"
  4760. "<doc/>";
  4761. expect_failure(text, XML_ERROR_SYNTAX,
  4762. "Parameter ENTITY without Public ID is not rejected");
  4763. }
  4764. END_TEST
  4765. START_TEST(test_bad_entity_4) {
  4766. const char *text = "<!DOCTYPE doc [\n"
  4767. " <!ENTITY % foo SYSTEM>\n"
  4768. "]>\n"
  4769. "<doc/>";
  4770. expect_failure(text, XML_ERROR_SYNTAX,
  4771. "Parameter ENTITY without Public ID is not rejected");
  4772. }
  4773. END_TEST
  4774. START_TEST(test_bad_notation) {
  4775. const char *text = "<!DOCTYPE doc [\n"
  4776. " <!NOTATION n SYSTEM>\n"
  4777. "]>\n"
  4778. "<doc/>";
  4779. expect_failure(text, XML_ERROR_SYNTAX,
  4780. "Notation without System ID is not rejected");
  4781. }
  4782. END_TEST
  4783. /* Test for issue #11, wrongly suppressed default handler */
  4784. START_TEST(test_default_doctype_handler) {
  4785. const char *text = "<!DOCTYPE doc PUBLIC 'pubname' 'test.dtd' [\n"
  4786. " <!ENTITY foo 'bar'>\n"
  4787. "]>\n"
  4788. "<doc>&foo;</doc>";
  4789. DefaultCheck test_data[] = {{XCS("'pubname'"), 9, XML_FALSE},
  4790. {XCS("'test.dtd'"), 10, XML_FALSE},
  4791. {NULL, 0, XML_FALSE}};
  4792. int i;
  4793. XML_SetUserData(g_parser, &test_data);
  4794. XML_SetDefaultHandler(g_parser, checking_default_handler);
  4795. XML_SetEntityDeclHandler(g_parser, dummy_entity_decl_handler);
  4796. if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
  4797. == XML_STATUS_ERROR)
  4798. xml_failure(g_parser);
  4799. for (i = 0; test_data[i].expected != NULL; i++)
  4800. if (! test_data[i].seen)
  4801. fail("Default handler not run for public !DOCTYPE");
  4802. }
  4803. END_TEST
  4804. START_TEST(test_empty_element_abort) {
  4805. const char *text = "<abort/>";
  4806. XML_SetStartElementHandler(g_parser, start_element_suspender);
  4807. if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
  4808. != XML_STATUS_ERROR)
  4809. fail("Expected to error on abort");
  4810. }
  4811. END_TEST
  4812. /* Regression test for GH issue #612: unfinished m_declAttributeType
  4813. * allocation in ->m_tempPool can corrupt following allocation.
  4814. */
  4815. START_TEST(test_pool_integrity_with_unfinished_attr) {
  4816. const char *text = "<?xml version='1.0' encoding='UTF-8'?>\n"
  4817. "<!DOCTYPE foo [\n"
  4818. "<!ELEMENT foo ANY>\n"
  4819. "<!ENTITY % entp SYSTEM \"external.dtd\">\n"
  4820. "%entp;\n"
  4821. "]>\n"
  4822. "<a></a>\n";
  4823. const XML_Char *expected = XCS("COMMENT");
  4824. CharData storage;
  4825. CharData_Init(&storage);
  4826. XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
  4827. XML_SetExternalEntityRefHandler(g_parser, external_entity_unfinished_attlist);
  4828. XML_SetAttlistDeclHandler(g_parser, dummy_attlist_decl_handler);
  4829. XML_SetCommentHandler(g_parser, accumulate_comment);
  4830. XML_SetUserData(g_parser, &storage);
  4831. if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
  4832. == XML_STATUS_ERROR)
  4833. xml_failure(g_parser);
  4834. CharData_CheckXMLChars(&storage, expected);
  4835. }
  4836. END_TEST
  4837. /* Test a possible early return location in internalEntityProcessor */
  4838. START_TEST(test_entity_ref_no_elements) {
  4839. const char *const text = "<!DOCTYPE foo [\n"
  4840. "<!ENTITY e1 \"test\">\n"
  4841. "]> <foo>&e1;"; // intentionally missing newline
  4842. XML_Parser parser = XML_ParserCreate(NULL);
  4843. assert_true(_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
  4844. == XML_STATUS_ERROR);
  4845. assert_true(XML_GetErrorCode(parser) == XML_ERROR_NO_ELEMENTS);
  4846. XML_ParserFree(parser);
  4847. }
  4848. END_TEST
  4849. /* Tests if chained entity references lead to unbounded recursion */
  4850. START_TEST(test_deep_nested_entity) {
  4851. const size_t N_LINES = 60000;
  4852. const size_t SIZE_PER_LINE = 50;
  4853. char *const text = (char *)malloc((N_LINES + 4) * SIZE_PER_LINE);
  4854. if (text == NULL) {
  4855. fail("malloc failed");
  4856. }
  4857. char *textPtr = text;
  4858. // Create the XML
  4859. textPtr += snprintf(textPtr, SIZE_PER_LINE,
  4860. "<!DOCTYPE foo [\n"
  4861. " <!ENTITY s0 'deepText'>\n");
  4862. for (size_t i = 1; i < N_LINES; ++i) {
  4863. textPtr += snprintf(textPtr, SIZE_PER_LINE, " <!ENTITY s%lu '&s%lu;'>\n",
  4864. (long unsigned)i, (long unsigned)(i - 1));
  4865. }
  4866. snprintf(textPtr, SIZE_PER_LINE, "]> <foo>&s%lu;</foo>\n",
  4867. (long unsigned)(N_LINES - 1));
  4868. const XML_Char *const expected = XCS("deepText");
  4869. CharData storage;
  4870. CharData_Init(&storage);
  4871. XML_Parser parser = XML_ParserCreate(NULL);
  4872. XML_SetCharacterDataHandler(parser, accumulate_characters);
  4873. XML_SetUserData(parser, &storage);
  4874. if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
  4875. == XML_STATUS_ERROR)
  4876. xml_failure(parser);
  4877. CharData_CheckXMLChars(&storage, expected);
  4878. XML_ParserFree(parser);
  4879. free(text);
  4880. }
  4881. END_TEST
  4882. /* Tests if chained entity references in attributes
  4883. lead to unbounded recursion */
  4884. START_TEST(test_deep_nested_attribute_entity) {
  4885. const size_t N_LINES = 60000;
  4886. const size_t SIZE_PER_LINE = 100;
  4887. char *const text = (char *)malloc((N_LINES + 4) * SIZE_PER_LINE);
  4888. if (text == NULL) {
  4889. fail("malloc failed");
  4890. }
  4891. char *textPtr = text;
  4892. // Create the XML
  4893. textPtr += snprintf(textPtr, SIZE_PER_LINE,
  4894. "<!DOCTYPE foo [\n"
  4895. " <!ENTITY s0 'deepText'>\n");
  4896. for (size_t i = 1; i < N_LINES; ++i) {
  4897. textPtr += snprintf(textPtr, SIZE_PER_LINE, " <!ENTITY s%lu '&s%lu;'>\n",
  4898. (long unsigned)i, (long unsigned)(i - 1));
  4899. }
  4900. snprintf(textPtr, SIZE_PER_LINE, "]> <foo name='&s%lu;'>mainText</foo>\n",
  4901. (long unsigned)(N_LINES - 1));
  4902. AttrInfo doc_info[] = {{XCS("name"), XCS("deepText")}, {NULL, NULL}};
  4903. ElementInfo info[] = {{XCS("foo"), 1, NULL, NULL}, {NULL, 0, NULL, NULL}};
  4904. info[0].attributes = doc_info;
  4905. XML_Parser parser = XML_ParserCreate(NULL);
  4906. ParserAndElementInfo parserPlusElemenInfo = {parser, info};
  4907. XML_SetStartElementHandler(parser, counting_start_element_handler);
  4908. XML_SetUserData(parser, &parserPlusElemenInfo);
  4909. if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
  4910. == XML_STATUS_ERROR)
  4911. xml_failure(parser);
  4912. XML_ParserFree(parser);
  4913. free(text);
  4914. }
  4915. END_TEST
  4916. START_TEST(test_deep_nested_entity_delayed_interpretation) {
  4917. const size_t N_LINES = 70000;
  4918. const size_t SIZE_PER_LINE = 100;
  4919. char *const text = (char *)malloc((N_LINES + 4) * SIZE_PER_LINE);
  4920. if (text == NULL) {
  4921. fail("malloc failed");
  4922. }
  4923. char *textPtr = text;
  4924. // Create the XML
  4925. textPtr += snprintf(textPtr, SIZE_PER_LINE,
  4926. "<!DOCTYPE foo [\n"
  4927. " <!ENTITY %% s0 'deepText'>\n");
  4928. for (size_t i = 1; i < N_LINES; ++i) {
  4929. textPtr += snprintf(textPtr, SIZE_PER_LINE,
  4930. " <!ENTITY %% s%lu '&#37;s%lu;'>\n", (long unsigned)i,
  4931. (long unsigned)(i - 1));
  4932. }
  4933. snprintf(textPtr, SIZE_PER_LINE,
  4934. " <!ENTITY %% define_g \"<!ENTITY g '&#37;s%lu;'>\">\n"
  4935. " %%define_g;\n"
  4936. "]>\n"
  4937. "<foo/>\n",
  4938. (long unsigned)(N_LINES - 1));
  4939. XML_Parser parser = XML_ParserCreate(NULL);
  4940. XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
  4941. if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
  4942. == XML_STATUS_ERROR)
  4943. xml_failure(parser);
  4944. XML_ParserFree(parser);
  4945. free(text);
  4946. }
  4947. END_TEST
  4948. START_TEST(test_nested_entity_suspend) {
  4949. const char *const text = "<!DOCTYPE a [\n"
  4950. " <!ENTITY e1 '<!--e1-->'>\n"
  4951. " <!ENTITY e2 '<!--e2 head-->&e1;<!--e2 tail-->'>\n"
  4952. " <!ENTITY e3 '<!--e3 head-->&e2;<!--e3 tail-->'>\n"
  4953. "]>\n"
  4954. "<a><!--start-->&e3;<!--end--></a>";
  4955. const XML_Char *const expected = XCS("start") XCS("e3 head") XCS("e2 head")
  4956. XCS("e1") XCS("e2 tail") XCS("e3 tail") XCS("end");
  4957. CharData storage;
  4958. CharData_Init(&storage);
  4959. XML_Parser parser = XML_ParserCreate(NULL);
  4960. ParserPlusStorage parserPlusStorage = {parser, &storage};
  4961. XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
  4962. XML_SetCommentHandler(parser, accumulate_and_suspend_comment_handler);
  4963. XML_SetUserData(parser, &parserPlusStorage);
  4964. enum XML_Status status = XML_Parse(parser, text, (int)strlen(text), XML_TRUE);
  4965. while (status == XML_STATUS_SUSPENDED) {
  4966. status = XML_ResumeParser(parser);
  4967. }
  4968. if (status != XML_STATUS_OK)
  4969. xml_failure(parser);
  4970. CharData_CheckXMLChars(&storage, expected);
  4971. XML_ParserFree(parser);
  4972. }
  4973. END_TEST
  4974. START_TEST(test_nested_entity_suspend_2) {
  4975. const char *const text = "<!DOCTYPE doc [\n"
  4976. " <!ENTITY ge1 'head1Ztail1'>\n"
  4977. " <!ENTITY ge2 'head2&ge1;tail2'>\n"
  4978. " <!ENTITY ge3 'head3&ge2;tail3'>\n"
  4979. "]>\n"
  4980. "<doc>&ge3;</doc>";
  4981. const XML_Char *const expected = XCS("head3") XCS("head2") XCS("head1")
  4982. XCS("Z") XCS("tail1") XCS("tail2") XCS("tail3");
  4983. CharData storage;
  4984. CharData_Init(&storage);
  4985. XML_Parser parser = XML_ParserCreate(NULL);
  4986. ParserPlusStorage parserPlusStorage = {parser, &storage};
  4987. XML_SetCharacterDataHandler(parser, accumulate_char_data_and_suspend);
  4988. XML_SetUserData(parser, &parserPlusStorage);
  4989. enum XML_Status status = XML_Parse(parser, text, (int)strlen(text), XML_TRUE);
  4990. while (status == XML_STATUS_SUSPENDED) {
  4991. status = XML_ResumeParser(parser);
  4992. }
  4993. if (status != XML_STATUS_OK)
  4994. xml_failure(parser);
  4995. CharData_CheckXMLChars(&storage, expected);
  4996. XML_ParserFree(parser);
  4997. }
  4998. END_TEST
  4999. /* Regression test for quadratic parsing on large tokens */
  5000. START_TEST(test_big_tokens_scale_linearly) {
  5001. const struct {
  5002. const char *pre;
  5003. const char *post;
  5004. } text[] = {
  5005. {"<a>", "</a>"}, // assumed good, used as baseline
  5006. {"<b><![CDATA[ value: ", " ]]></b>"}, // CDATA, performed OK before patch
  5007. {"<c attr='", "'></c>"}, // big attribute, used to be O(N²)
  5008. {"<d><!-- ", " --></d>"}, // long comment, used to be O(N²)
  5009. {"<e><", "/></e>"}, // big elem name, used to be O(N²)
  5010. };
  5011. const int num_cases = sizeof(text) / sizeof(text[0]);
  5012. char aaaaaa[4096];
  5013. const int fillsize = (int)sizeof(aaaaaa);
  5014. const int fillcount = 100;
  5015. const unsigned approx_bytes = fillsize * fillcount; // ignore pre/post.
  5016. const unsigned max_factor = 4;
  5017. const unsigned max_scanned = max_factor * approx_bytes;
  5018. memset(aaaaaa, 'a', fillsize);
  5019. if (! g_reparseDeferralEnabledDefault) {
  5020. return; // heuristic is disabled; we would get O(n^2) and fail.
  5021. }
  5022. for (int i = 0; i < num_cases; ++i) {
  5023. XML_Parser parser = XML_ParserCreate(NULL);
  5024. assert_true(parser != NULL);
  5025. enum XML_Status status;
  5026. set_subtest("text=\"%saaaaaa%s\"", text[i].pre, text[i].post);
  5027. // parse the start text
  5028. g_bytesScanned = 0;
  5029. status = _XML_Parse_SINGLE_BYTES(parser, text[i].pre,
  5030. (int)strlen(text[i].pre), XML_FALSE);
  5031. if (status != XML_STATUS_OK) {
  5032. xml_failure(parser);
  5033. }
  5034. // parse lots of 'a', failing the test early if it takes too long
  5035. unsigned past_max_count = 0;
  5036. for (int f = 0; f < fillcount; ++f) {
  5037. status = _XML_Parse_SINGLE_BYTES(parser, aaaaaa, fillsize, XML_FALSE);
  5038. if (status != XML_STATUS_OK) {
  5039. xml_failure(parser);
  5040. }
  5041. if (g_bytesScanned > max_scanned) {
  5042. // We're not done, and have already passed the limit -- the test will
  5043. // definitely fail. This block allows us to save time by failing early.
  5044. const unsigned pushed
  5045. = (unsigned)strlen(text[i].pre) + (f + 1) * fillsize;
  5046. fprintf(
  5047. stderr,
  5048. "after %d/%d loops: pushed=%u scanned=%u (factor ~%.2f) max_scanned: %u (factor ~%u)\n",
  5049. f + 1, fillcount, pushed, g_bytesScanned,
  5050. g_bytesScanned / (double)pushed, max_scanned, max_factor);
  5051. past_max_count++;
  5052. // We are failing, but allow a few log prints first. If we don't reach
  5053. // a count of five, the test will fail after the loop instead.
  5054. assert_true(past_max_count < 5);
  5055. }
  5056. }
  5057. // parse the end text
  5058. status = _XML_Parse_SINGLE_BYTES(parser, text[i].post,
  5059. (int)strlen(text[i].post), XML_TRUE);
  5060. if (status != XML_STATUS_OK) {
  5061. xml_failure(parser);
  5062. }
  5063. assert_true(g_bytesScanned > approx_bytes); // or the counter isn't working
  5064. if (g_bytesScanned > max_scanned) {
  5065. fprintf(
  5066. stderr,
  5067. "after all input: scanned=%u (factor ~%.2f) max_scanned: %u (factor ~%u)\n",
  5068. g_bytesScanned, g_bytesScanned / (double)approx_bytes, max_scanned,
  5069. max_factor);
  5070. fail("scanned too many bytes");
  5071. }
  5072. XML_ParserFree(parser);
  5073. }
  5074. }
  5075. END_TEST
  5076. START_TEST(test_set_reparse_deferral) {
  5077. const char *const pre = "<d>";
  5078. const char *const start = "<x attr='";
  5079. const char *const end = "'></x>";
  5080. char eeeeee[100];
  5081. const int fillsize = (int)sizeof(eeeeee);
  5082. memset(eeeeee, 'e', fillsize);
  5083. for (int enabled = 0; enabled <= 1; enabled += 1) {
  5084. set_subtest("deferral=%d", enabled);
  5085. XML_Parser parser = XML_ParserCreate(NULL);
  5086. assert_true(parser != NULL);
  5087. assert_true(XML_SetReparseDeferralEnabled(parser, enabled));
  5088. // pre-grow the buffer to avoid reparsing due to almost-fullness
  5089. assert_true(XML_GetBuffer(parser, fillsize * 10103) != NULL);
  5090. CharData storage;
  5091. CharData_Init(&storage);
  5092. XML_SetUserData(parser, &storage);
  5093. XML_SetStartElementHandler(parser, start_element_event_handler);
  5094. enum XML_Status status;
  5095. // parse the start text
  5096. status = XML_Parse(parser, pre, (int)strlen(pre), XML_FALSE);
  5097. if (status != XML_STATUS_OK) {
  5098. xml_failure(parser);
  5099. }
  5100. CharData_CheckXMLChars(&storage, XCS("d")); // first element should be done
  5101. // ..and the start of the token
  5102. status = XML_Parse(parser, start, (int)strlen(start), XML_FALSE);
  5103. if (status != XML_STATUS_OK) {
  5104. xml_failure(parser);
  5105. }
  5106. CharData_CheckXMLChars(&storage, XCS("d")); // still just the first one
  5107. // try to parse lots of 'e', but the token isn't finished
  5108. for (int c = 0; c < 100; ++c) {
  5109. status = XML_Parse(parser, eeeeee, fillsize, XML_FALSE);
  5110. if (status != XML_STATUS_OK) {
  5111. xml_failure(parser);
  5112. }
  5113. }
  5114. CharData_CheckXMLChars(&storage, XCS("d")); // *still* just the first one
  5115. // end the <x> token.
  5116. status = XML_Parse(parser, end, (int)strlen(end), XML_FALSE);
  5117. if (status != XML_STATUS_OK) {
  5118. xml_failure(parser);
  5119. }
  5120. if (enabled) {
  5121. // In general, we may need to push more data to trigger a reparse attempt,
  5122. // but in this test, the data is constructed to always require it.
  5123. CharData_CheckXMLChars(&storage, XCS("d")); // or the test is incorrect
  5124. // 2x the token length should suffice; the +1 covers the start and end.
  5125. for (int c = 0; c < 101; ++c) {
  5126. status = XML_Parse(parser, eeeeee, fillsize, XML_FALSE);
  5127. if (status != XML_STATUS_OK) {
  5128. xml_failure(parser);
  5129. }
  5130. }
  5131. }
  5132. CharData_CheckXMLChars(&storage, XCS("dx")); // the <x> should be done
  5133. XML_ParserFree(parser);
  5134. }
  5135. }
  5136. END_TEST
  5137. struct element_decl_data {
  5138. XML_Parser parser;
  5139. int count;
  5140. };
  5141. static void
  5142. element_decl_counter(void *userData, const XML_Char *name, XML_Content *model) {
  5143. UNUSED_P(name);
  5144. struct element_decl_data *testdata = (struct element_decl_data *)userData;
  5145. testdata->count += 1;
  5146. XML_FreeContentModel(testdata->parser, model);
  5147. }
  5148. static int
  5149. external_inherited_parser(XML_Parser p, const XML_Char *context,
  5150. const XML_Char *base, const XML_Char *systemId,
  5151. const XML_Char *publicId) {
  5152. UNUSED_P(base);
  5153. UNUSED_P(systemId);
  5154. UNUSED_P(publicId);
  5155. const char *const pre = "<!ELEMENT document ANY>\n";
  5156. const char *const start = "<!ELEMENT ";
  5157. const char *const end = " ANY>\n";
  5158. const char *const post = "<!ELEMENT xyz ANY>\n";
  5159. const int enabled = *(int *)XML_GetUserData(p);
  5160. char eeeeee[100];
  5161. char spaces[100];
  5162. const int fillsize = (int)sizeof(eeeeee);
  5163. assert_true(fillsize == (int)sizeof(spaces));
  5164. memset(eeeeee, 'e', fillsize);
  5165. memset(spaces, ' ', fillsize);
  5166. XML_Parser parser = XML_ExternalEntityParserCreate(p, context, NULL);
  5167. assert_true(parser != NULL);
  5168. // pre-grow the buffer to avoid reparsing due to almost-fullness
  5169. assert_true(XML_GetBuffer(parser, fillsize * 10103) != NULL);
  5170. struct element_decl_data testdata;
  5171. testdata.parser = parser;
  5172. testdata.count = 0;
  5173. XML_SetUserData(parser, &testdata);
  5174. XML_SetElementDeclHandler(parser, element_decl_counter);
  5175. enum XML_Status status;
  5176. // parse the initial text
  5177. status = XML_Parse(parser, pre, (int)strlen(pre), XML_FALSE);
  5178. if (status != XML_STATUS_OK) {
  5179. xml_failure(parser);
  5180. }
  5181. assert_true(testdata.count == 1); // first element should be done
  5182. // ..and the start of the big token
  5183. status = XML_Parse(parser, start, (int)strlen(start), XML_FALSE);
  5184. if (status != XML_STATUS_OK) {
  5185. xml_failure(parser);
  5186. }
  5187. assert_true(testdata.count == 1); // still just the first one
  5188. // try to parse lots of 'e', but the token isn't finished
  5189. for (int c = 0; c < 100; ++c) {
  5190. status = XML_Parse(parser, eeeeee, fillsize, XML_FALSE);
  5191. if (status != XML_STATUS_OK) {
  5192. xml_failure(parser);
  5193. }
  5194. }
  5195. assert_true(testdata.count == 1); // *still* just the first one
  5196. // end the big token.
  5197. status = XML_Parse(parser, end, (int)strlen(end), XML_FALSE);
  5198. if (status != XML_STATUS_OK) {
  5199. xml_failure(parser);
  5200. }
  5201. if (enabled) {
  5202. // In general, we may need to push more data to trigger a reparse attempt,
  5203. // but in this test, the data is constructed to always require it.
  5204. assert_true(testdata.count == 1); // or the test is incorrect
  5205. // 2x the token length should suffice; the +1 covers the start and end.
  5206. for (int c = 0; c < 101; ++c) {
  5207. status = XML_Parse(parser, spaces, fillsize, XML_FALSE);
  5208. if (status != XML_STATUS_OK) {
  5209. xml_failure(parser);
  5210. }
  5211. }
  5212. }
  5213. assert_true(testdata.count == 2); // the big token should be done
  5214. // parse the final text
  5215. status = XML_Parse(parser, post, (int)strlen(post), XML_TRUE);
  5216. if (status != XML_STATUS_OK) {
  5217. xml_failure(parser);
  5218. }
  5219. assert_true(testdata.count == 3); // after isFinal=XML_TRUE, all must be done
  5220. XML_ParserFree(parser);
  5221. return XML_STATUS_OK;
  5222. }
  5223. START_TEST(test_reparse_deferral_is_inherited) {
  5224. const char *const text
  5225. = "<!DOCTYPE document SYSTEM 'something.ext'><document/>";
  5226. for (int enabled = 0; enabled <= 1; ++enabled) {
  5227. set_subtest("deferral=%d", enabled);
  5228. XML_Parser parser = XML_ParserCreate(NULL);
  5229. assert_true(parser != NULL);
  5230. XML_SetUserData(parser, (void *)&enabled);
  5231. XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
  5232. // this handler creates a sub-parser and checks that its deferral behavior
  5233. // is what we expected, based on the value of `enabled` (in userdata).
  5234. XML_SetExternalEntityRefHandler(parser, external_inherited_parser);
  5235. assert_true(XML_SetReparseDeferralEnabled(parser, enabled));
  5236. if (XML_Parse(parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_OK)
  5237. xml_failure(parser);
  5238. XML_ParserFree(parser);
  5239. }
  5240. }
  5241. END_TEST
  5242. START_TEST(test_set_reparse_deferral_on_null_parser) {
  5243. assert_true(XML_SetReparseDeferralEnabled(NULL, 0) == XML_FALSE);
  5244. assert_true(XML_SetReparseDeferralEnabled(NULL, 1) == XML_FALSE);
  5245. assert_true(XML_SetReparseDeferralEnabled(NULL, 10) == XML_FALSE);
  5246. assert_true(XML_SetReparseDeferralEnabled(NULL, 100) == XML_FALSE);
  5247. assert_true(XML_SetReparseDeferralEnabled(NULL, (XML_Bool)INT_MIN)
  5248. == XML_FALSE);
  5249. assert_true(XML_SetReparseDeferralEnabled(NULL, (XML_Bool)INT_MAX)
  5250. == XML_FALSE);
  5251. }
  5252. END_TEST
  5253. START_TEST(test_set_reparse_deferral_on_the_fly) {
  5254. const char *const pre = "<d><x attr='";
  5255. const char *const end = "'></x>";
  5256. char iiiiii[100];
  5257. const int fillsize = (int)sizeof(iiiiii);
  5258. memset(iiiiii, 'i', fillsize);
  5259. XML_Parser parser = XML_ParserCreate(NULL);
  5260. assert_true(parser != NULL);
  5261. assert_true(XML_SetReparseDeferralEnabled(parser, XML_TRUE));
  5262. CharData storage;
  5263. CharData_Init(&storage);
  5264. XML_SetUserData(parser, &storage);
  5265. XML_SetStartElementHandler(parser, start_element_event_handler);
  5266. enum XML_Status status;
  5267. // parse the start text
  5268. status = XML_Parse(parser, pre, (int)strlen(pre), XML_FALSE);
  5269. if (status != XML_STATUS_OK) {
  5270. xml_failure(parser);
  5271. }
  5272. CharData_CheckXMLChars(&storage, XCS("d")); // first element should be done
  5273. // try to parse some 'i', but the token isn't finished
  5274. status = XML_Parse(parser, iiiiii, fillsize, XML_FALSE);
  5275. if (status != XML_STATUS_OK) {
  5276. xml_failure(parser);
  5277. }
  5278. CharData_CheckXMLChars(&storage, XCS("d")); // *still* just the first one
  5279. // end the <x> token.
  5280. status = XML_Parse(parser, end, (int)strlen(end), XML_FALSE);
  5281. if (status != XML_STATUS_OK) {
  5282. xml_failure(parser);
  5283. }
  5284. CharData_CheckXMLChars(&storage, XCS("d")); // not yet.
  5285. // now change the heuristic setting and add *no* data
  5286. assert_true(XML_SetReparseDeferralEnabled(parser, XML_FALSE));
  5287. // we avoid isFinal=XML_TRUE, because that would force-bypass the heuristic.
  5288. status = XML_Parse(parser, "", 0, XML_FALSE);
  5289. if (status != XML_STATUS_OK) {
  5290. xml_failure(parser);
  5291. }
  5292. CharData_CheckXMLChars(&storage, XCS("dx"));
  5293. XML_ParserFree(parser);
  5294. }
  5295. END_TEST
  5296. START_TEST(test_set_bad_reparse_option) {
  5297. XML_Parser parser = XML_ParserCreate(NULL);
  5298. assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 2));
  5299. assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 3));
  5300. assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 99));
  5301. assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 127));
  5302. assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 128));
  5303. assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 129));
  5304. assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 255));
  5305. assert_true(XML_TRUE == XML_SetReparseDeferralEnabled(parser, 0));
  5306. assert_true(XML_TRUE == XML_SetReparseDeferralEnabled(parser, 1));
  5307. XML_ParserFree(parser);
  5308. }
  5309. END_TEST
  5310. static size_t g_totalAlloc = 0;
  5311. static size_t g_biggestAlloc = 0;
  5312. static void *
  5313. counting_realloc(void *ptr, size_t size) {
  5314. g_totalAlloc += size;
  5315. if (size > g_biggestAlloc) {
  5316. g_biggestAlloc = size;
  5317. }
  5318. return realloc(ptr, size);
  5319. }
  5320. static void *
  5321. counting_malloc(size_t size) {
  5322. return counting_realloc(NULL, size);
  5323. }
  5324. START_TEST(test_bypass_heuristic_when_close_to_bufsize) {
  5325. if (g_chunkSize != 0) {
  5326. // this test does not use SINGLE_BYTES, because it depends on very precise
  5327. // buffer fills.
  5328. return;
  5329. }
  5330. if (! g_reparseDeferralEnabledDefault) {
  5331. return; // this test is irrelevant when the deferral heuristic is disabled.
  5332. }
  5333. const int document_length = 65536;
  5334. char *const document = (char *)malloc(document_length);
  5335. const XML_Memory_Handling_Suite memfuncs = {
  5336. counting_malloc,
  5337. counting_realloc,
  5338. free,
  5339. };
  5340. const int leading_list[] = {0, 3, 61, 96, 400, 401, 4000, 4010, 4099, -1};
  5341. const int bigtoken_list[] = {3000, 4000, 4001, 4096, 4099, 5000, 20000, -1};
  5342. const int fillsize_list[] = {131, 256, 399, 400, 401, 1025, 4099, 4321, -1};
  5343. for (const int *leading = leading_list; *leading >= 0; leading++) {
  5344. for (const int *bigtoken = bigtoken_list; *bigtoken >= 0; bigtoken++) {
  5345. for (const int *fillsize = fillsize_list; *fillsize >= 0; fillsize++) {
  5346. set_subtest("leading=%d bigtoken=%d fillsize=%d", *leading, *bigtoken,
  5347. *fillsize);
  5348. // start by checking that the test looks reasonably valid
  5349. assert_true(*leading + *bigtoken <= document_length);
  5350. // put 'x' everywhere; some will be overwritten by elements.
  5351. memset(document, 'x', document_length);
  5352. // maybe add an initial tag
  5353. if (*leading) {
  5354. assert_true(*leading >= 3); // or the test case is invalid
  5355. memcpy(document, "<a>", 3);
  5356. }
  5357. // add the large token
  5358. document[*leading + 0] = '<';
  5359. document[*leading + 1] = 'b';
  5360. memset(&document[*leading + 2], ' ', *bigtoken - 2); // a spacy token
  5361. document[*leading + *bigtoken - 1] = '>';
  5362. // 1 for 'b', plus 1 or 0 depending on the presence of 'a'
  5363. const int expected_elem_total = 1 + (*leading ? 1 : 0);
  5364. XML_Parser parser = XML_ParserCreate_MM(NULL, &memfuncs, NULL);
  5365. assert_true(parser != NULL);
  5366. CharData storage;
  5367. CharData_Init(&storage);
  5368. XML_SetUserData(parser, &storage);
  5369. XML_SetStartElementHandler(parser, start_element_event_handler);
  5370. g_biggestAlloc = 0;
  5371. g_totalAlloc = 0;
  5372. int offset = 0;
  5373. // fill data until the big token is covered (but not necessarily parsed)
  5374. while (offset < *leading + *bigtoken) {
  5375. assert_true(offset + *fillsize <= document_length);
  5376. const enum XML_Status status
  5377. = XML_Parse(parser, &document[offset], *fillsize, XML_FALSE);
  5378. if (status != XML_STATUS_OK) {
  5379. xml_failure(parser);
  5380. }
  5381. offset += *fillsize;
  5382. }
  5383. // Now, check that we've had a buffer allocation that could fit the
  5384. // context bytes and our big token. In order to detect a special case,
  5385. // we need to know how many bytes of our big token were included in the
  5386. // first push that contained _any_ bytes of the big token:
  5387. const int bigtok_first_chunk_bytes = *fillsize - (*leading % *fillsize);
  5388. if (bigtok_first_chunk_bytes >= *bigtoken && XML_CONTEXT_BYTES == 0) {
  5389. // Special case: we aren't saving any context, and the whole big token
  5390. // was covered by a single fill, so Expat may have parsed directly
  5391. // from our input pointer, without allocating an internal buffer.
  5392. } else if (*leading < XML_CONTEXT_BYTES) {
  5393. assert_true(g_biggestAlloc >= *leading + (size_t)*bigtoken);
  5394. } else {
  5395. assert_true(g_biggestAlloc >= XML_CONTEXT_BYTES + (size_t)*bigtoken);
  5396. }
  5397. // fill data until the big token is actually parsed
  5398. while (storage.count < expected_elem_total) {
  5399. const size_t alloc_before = g_totalAlloc;
  5400. assert_true(offset + *fillsize <= document_length);
  5401. const enum XML_Status status
  5402. = XML_Parse(parser, &document[offset], *fillsize, XML_FALSE);
  5403. if (status != XML_STATUS_OK) {
  5404. xml_failure(parser);
  5405. }
  5406. offset += *fillsize;
  5407. // since all the bytes of the big token are already in the buffer,
  5408. // the bufsize ceiling should make us finish its parsing without any
  5409. // further buffer allocations. We assume that there will be no other
  5410. // large allocations in this test.
  5411. assert_true(g_totalAlloc - alloc_before < 4096);
  5412. }
  5413. // test-the-test: was our alloc even called?
  5414. assert_true(g_totalAlloc > 0);
  5415. // test-the-test: there shouldn't be any extra start elements
  5416. assert_true(storage.count == expected_elem_total);
  5417. XML_ParserFree(parser);
  5418. }
  5419. }
  5420. }
  5421. free(document);
  5422. }
  5423. END_TEST
  5424. START_TEST(test_varying_buffer_fills) {
  5425. const int KiB = 1024;
  5426. const int MiB = 1024 * KiB;
  5427. const int document_length = 16 * MiB;
  5428. const int big = 7654321; // arbitrarily chosen between 4 and 8 MiB
  5429. if (g_chunkSize != 0) {
  5430. return; // this test is slow, and doesn't use _XML_Parse_SINGLE_BYTES().
  5431. }
  5432. char *const document = (char *)malloc(document_length);
  5433. assert_true(document != NULL);
  5434. memset(document, 'x', document_length);
  5435. document[0] = '<';
  5436. document[1] = 't';
  5437. memset(&document[2], ' ', big - 2); // a very spacy token
  5438. document[big - 1] = '>';
  5439. // Each testcase is a list of buffer fill sizes, terminated by a value < 0.
  5440. // When reparse deferral is enabled, the final (negated) value is the expected
  5441. // maximum number of bytes scanned in parse attempts.
  5442. const int testcases[][30] = {
  5443. {8 * MiB, -8 * MiB},
  5444. {4 * MiB, 4 * MiB, -12 * MiB}, // try at 4MB, then 8MB = 12 MB total
  5445. // zero-size fills shouldn't trigger the bypass
  5446. {4 * MiB, 0, 4 * MiB, -12 * MiB},
  5447. {4 * MiB, 0, 0, 4 * MiB, -12 * MiB},
  5448. {4 * MiB, 0, 1 * MiB, 0, 3 * MiB, -12 * MiB},
  5449. // try to hit the buffer ceiling only once (at the end)
  5450. {4 * MiB, 2 * MiB, 1 * MiB, 512 * KiB, 256 * KiB, 256 * KiB, -12 * MiB},
  5451. // try to hit the same buffer ceiling multiple times
  5452. {4 * MiB + 1, 2 * MiB, 1 * MiB, 512 * KiB, -25 * MiB},
  5453. // try to hit every ceiling, by always landing 1K shy of the buffer size
  5454. {1 * KiB, 2 * KiB, 4 * KiB, 8 * KiB, 16 * KiB, 32 * KiB, 64 * KiB,
  5455. 128 * KiB, 256 * KiB, 512 * KiB, 1 * MiB, 2 * MiB, 4 * MiB, -16 * MiB},
  5456. // try to avoid every ceiling, by always landing 1B past the buffer size
  5457. // the normal 2x heuristic threshold still forces parse attempts.
  5458. {2 * KiB + 1, // will attempt 2KiB + 1 ==> total 2KiB + 1
  5459. 2 * KiB, 4 * KiB, // will attempt 8KiB + 1 ==> total 10KiB + 2
  5460. 8 * KiB, 16 * KiB, // will attempt 32KiB + 1 ==> total 42KiB + 3
  5461. 32 * KiB, 64 * KiB, // will attempt 128KiB + 1 ==> total 170KiB + 4
  5462. 128 * KiB, 256 * KiB, // will attempt 512KiB + 1 ==> total 682KiB + 5
  5463. 512 * KiB, 1 * MiB, // will attempt 2MiB + 1 ==> total 2M + 682K + 6
  5464. 2 * MiB, 4 * MiB, // will attempt 8MiB + 1 ==> total 10M + 682K + 7
  5465. -(10 * MiB + 682 * KiB + 7)},
  5466. // try to avoid every ceiling again, except on our last fill.
  5467. {2 * KiB + 1, // will attempt 2KiB + 1 ==> total 2KiB + 1
  5468. 2 * KiB, 4 * KiB, // will attempt 8KiB + 1 ==> total 10KiB + 2
  5469. 8 * KiB, 16 * KiB, // will attempt 32KiB + 1 ==> total 42KiB + 3
  5470. 32 * KiB, 64 * KiB, // will attempt 128KiB + 1 ==> total 170KiB + 4
  5471. 128 * KiB, 256 * KiB, // will attempt 512KiB + 1 ==> total 682KiB + 5
  5472. 512 * KiB, 1 * MiB, // will attempt 2MiB + 1 ==> total 2M + 682K + 6
  5473. 2 * MiB, 4 * MiB - 1, // will attempt 8MiB ==> total 10M + 682K + 6
  5474. -(10 * MiB + 682 * KiB + 6)},
  5475. // try to hit ceilings on the way multiple times
  5476. {512 * KiB + 1, 256 * KiB, 128 * KiB, 128 * KiB - 1, // 1 MiB buffer
  5477. 512 * KiB + 1, 256 * KiB, 128 * KiB, 128 * KiB - 1, // 2 MiB buffer
  5478. 1 * MiB + 1, 512 * KiB, 256 * KiB, 256 * KiB - 1, // 4 MiB buffer
  5479. 2 * MiB + 1, 1 * MiB, 512 * KiB, // 8 MiB buffer
  5480. // we'll make a parse attempt at every parse call
  5481. -(45 * MiB + 12)},
  5482. };
  5483. const int testcount = sizeof(testcases) / sizeof(testcases[0]);
  5484. for (int test_i = 0; test_i < testcount; test_i++) {
  5485. const int *fillsize = testcases[test_i];
  5486. set_subtest("#%d {%d %d %d %d ...}", test_i, fillsize[0], fillsize[1],
  5487. fillsize[2], fillsize[3]);
  5488. XML_Parser parser = XML_ParserCreate(NULL);
  5489. assert_true(parser != NULL);
  5490. CharData storage;
  5491. CharData_Init(&storage);
  5492. XML_SetUserData(parser, &storage);
  5493. XML_SetStartElementHandler(parser, start_element_event_handler);
  5494. g_bytesScanned = 0;
  5495. int worstcase_bytes = 0; // sum of (buffered bytes at each XML_Parse call)
  5496. int offset = 0;
  5497. while (*fillsize >= 0) {
  5498. assert_true(offset + *fillsize <= document_length); // or test is invalid
  5499. const enum XML_Status status
  5500. = XML_Parse(parser, &document[offset], *fillsize, XML_FALSE);
  5501. if (status != XML_STATUS_OK) {
  5502. xml_failure(parser);
  5503. }
  5504. offset += *fillsize;
  5505. fillsize++;
  5506. assert_true(offset <= INT_MAX - worstcase_bytes); // avoid overflow
  5507. worstcase_bytes += offset; // we might've tried to parse all pending bytes
  5508. }
  5509. assert_true(storage.count == 1); // the big token should've been parsed
  5510. assert_true(g_bytesScanned > 0); // test-the-test: does our counter work?
  5511. if (g_reparseDeferralEnabledDefault) {
  5512. // heuristic is enabled; some XML_Parse calls may have deferred reparsing
  5513. const unsigned max_bytes_scanned = -*fillsize;
  5514. if (g_bytesScanned > max_bytes_scanned) {
  5515. fprintf(stderr,
  5516. "bytes scanned in parse attempts: actual=%u limit=%u \n",
  5517. g_bytesScanned, max_bytes_scanned);
  5518. fail("too many bytes scanned in parse attempts");
  5519. }
  5520. }
  5521. assert_true(g_bytesScanned <= (unsigned)worstcase_bytes);
  5522. XML_ParserFree(parser);
  5523. }
  5524. free(document);
  5525. }
  5526. END_TEST
  5527. void
  5528. make_basic_test_case(Suite *s) {
  5529. TCase *tc_basic = tcase_create("basic tests");
  5530. suite_add_tcase(s, tc_basic);
  5531. tcase_add_checked_fixture(tc_basic, basic_setup, basic_teardown);
  5532. tcase_add_test(tc_basic, test_nul_byte);
  5533. tcase_add_test(tc_basic, test_u0000_char);
  5534. tcase_add_test(tc_basic, test_siphash_self);
  5535. tcase_add_test(tc_basic, test_siphash_spec);
  5536. tcase_add_test(tc_basic, test_bom_utf8);
  5537. tcase_add_test(tc_basic, test_bom_utf16_be);
  5538. tcase_add_test(tc_basic, test_bom_utf16_le);
  5539. tcase_add_test(tc_basic, test_nobom_utf16_le);
  5540. tcase_add_test(tc_basic, test_hash_collision);
  5541. tcase_add_test(tc_basic, test_illegal_utf8);
  5542. tcase_add_test(tc_basic, test_utf8_auto_align);
  5543. tcase_add_test(tc_basic, test_utf16);
  5544. tcase_add_test(tc_basic, test_utf16_le_epilog_newline);
  5545. tcase_add_test(tc_basic, test_not_utf16);
  5546. tcase_add_test(tc_basic, test_bad_encoding);
  5547. tcase_add_test(tc_basic, test_latin1_umlauts);
  5548. tcase_add_test(tc_basic, test_long_utf8_character);
  5549. tcase_add_test(tc_basic, test_long_latin1_attribute);
  5550. tcase_add_test(tc_basic, test_long_ascii_attribute);
  5551. /* Regression test for SF bug #491986. */
  5552. tcase_add_test(tc_basic, test_danish_latin1);
  5553. /* Regression test for SF bug #514281. */
  5554. tcase_add_test(tc_basic, test_french_charref_hexidecimal);
  5555. tcase_add_test(tc_basic, test_french_charref_decimal);
  5556. tcase_add_test(tc_basic, test_french_latin1);
  5557. tcase_add_test(tc_basic, test_french_utf8);
  5558. tcase_add_test(tc_basic, test_utf8_false_rejection);
  5559. tcase_add_test(tc_basic, test_line_number_after_parse);
  5560. tcase_add_test(tc_basic, test_column_number_after_parse);
  5561. tcase_add_test(tc_basic, test_line_and_column_numbers_inside_handlers);
  5562. tcase_add_test(tc_basic, test_line_number_after_error);
  5563. tcase_add_test(tc_basic, test_column_number_after_error);
  5564. tcase_add_test(tc_basic, test_really_long_lines);
  5565. tcase_add_test(tc_basic, test_really_long_encoded_lines);
  5566. tcase_add_test(tc_basic, test_end_element_events);
  5567. tcase_add_test(tc_basic, test_helper_is_whitespace_normalized);
  5568. tcase_add_test(tc_basic, test_attr_whitespace_normalization);
  5569. tcase_add_test(tc_basic, test_xmldecl_misplaced);
  5570. tcase_add_test(tc_basic, test_xmldecl_invalid);
  5571. tcase_add_test(tc_basic, test_xmldecl_missing_attr);
  5572. tcase_add_test(tc_basic, test_xmldecl_missing_value);
  5573. tcase_add_test__if_xml_ge(tc_basic, test_unknown_encoding_internal_entity);
  5574. tcase_add_test(tc_basic, test_unrecognised_encoding_internal_entity);
  5575. tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_set_encoding);
  5576. tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_no_handler);
  5577. tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_set_bom);
  5578. tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_bad_encoding);
  5579. tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_bad_encoding_2);
  5580. tcase_add_test(tc_basic, test_wfc_undeclared_entity_unread_external_subset);
  5581. tcase_add_test(tc_basic, test_wfc_undeclared_entity_no_external_subset);
  5582. tcase_add_test(tc_basic, test_wfc_undeclared_entity_standalone);
  5583. tcase_add_test(tc_basic,
  5584. test_wfc_undeclared_entity_with_external_subset_standalone);
  5585. tcase_add_test(tc_basic, test_entity_with_external_subset_unless_standalone);
  5586. tcase_add_test(tc_basic, test_wfc_undeclared_entity_with_external_subset);
  5587. tcase_add_test(tc_basic, test_not_standalone_handler_reject);
  5588. tcase_add_test(tc_basic, test_not_standalone_handler_accept);
  5589. tcase_add_test(tc_basic, test_entity_start_tag_level_greater_than_one);
  5590. tcase_add_test__if_xml_ge(tc_basic, test_wfc_no_recursive_entity_refs);
  5591. tcase_add_test(tc_basic, test_no_indirectly_recursive_entity_refs);
  5592. tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_invalid_parse);
  5593. tcase_add_test__if_xml_ge(tc_basic, test_dtd_default_handling);
  5594. tcase_add_test(tc_basic, test_dtd_attr_handling);
  5595. tcase_add_test(tc_basic, test_empty_ns_without_namespaces);
  5596. tcase_add_test(tc_basic, test_ns_in_attribute_default_without_namespaces);
  5597. tcase_add_test(tc_basic, test_stop_parser_between_char_data_calls);
  5598. tcase_add_test(tc_basic, test_suspend_parser_between_char_data_calls);
  5599. tcase_add_test(tc_basic, test_repeated_stop_parser_between_char_data_calls);
  5600. tcase_add_test(tc_basic, test_good_cdata_ascii);
  5601. tcase_add_test(tc_basic, test_good_cdata_utf16);
  5602. tcase_add_test(tc_basic, test_good_cdata_utf16_le);
  5603. tcase_add_test(tc_basic, test_long_cdata_utf16);
  5604. tcase_add_test(tc_basic, test_multichar_cdata_utf16);
  5605. tcase_add_test(tc_basic, test_utf16_bad_surrogate_pair);
  5606. tcase_add_test(tc_basic, test_bad_cdata);
  5607. tcase_add_test(tc_basic, test_bad_cdata_utf16);
  5608. tcase_add_test(tc_basic, test_stop_parser_between_cdata_calls);
  5609. tcase_add_test(tc_basic, test_suspend_parser_between_cdata_calls);
  5610. tcase_add_test(tc_basic, test_memory_allocation);
  5611. tcase_add_test__if_xml_ge(tc_basic, test_default_current);
  5612. tcase_add_test(tc_basic, test_dtd_elements);
  5613. tcase_add_test(tc_basic, test_dtd_elements_nesting);
  5614. tcase_add_test__ifdef_xml_dtd(tc_basic, test_set_foreign_dtd);
  5615. tcase_add_test__ifdef_xml_dtd(tc_basic, test_foreign_dtd_not_standalone);
  5616. tcase_add_test__ifdef_xml_dtd(tc_basic, test_invalid_foreign_dtd);
  5617. tcase_add_test__ifdef_xml_dtd(tc_basic, test_foreign_dtd_with_doctype);
  5618. tcase_add_test__ifdef_xml_dtd(tc_basic,
  5619. test_foreign_dtd_without_external_subset);
  5620. tcase_add_test__ifdef_xml_dtd(tc_basic, test_empty_foreign_dtd);
  5621. tcase_add_test(tc_basic, test_set_base);
  5622. tcase_add_test(tc_basic, test_attributes);
  5623. tcase_add_test__if_xml_ge(tc_basic, test_reset_in_entity);
  5624. tcase_add_test(tc_basic, test_resume_invalid_parse);
  5625. tcase_add_test(tc_basic, test_resume_resuspended);
  5626. tcase_add_test(tc_basic, test_cdata_default);
  5627. tcase_add_test(tc_basic, test_subordinate_reset);
  5628. tcase_add_test(tc_basic, test_subordinate_suspend);
  5629. tcase_add_test__if_xml_ge(tc_basic, test_subordinate_xdecl_suspend);
  5630. tcase_add_test__if_xml_ge(tc_basic, test_subordinate_xdecl_abort);
  5631. tcase_add_test__ifdef_xml_dtd(tc_basic,
  5632. test_ext_entity_invalid_suspended_parse);
  5633. tcase_add_test(tc_basic, test_explicit_encoding);
  5634. tcase_add_test(tc_basic, test_trailing_cr);
  5635. tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_trailing_cr);
  5636. tcase_add_test(tc_basic, test_trailing_rsqb);
  5637. tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_trailing_rsqb);
  5638. tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_good_cdata);
  5639. tcase_add_test__ifdef_xml_dtd(tc_basic, test_user_parameters);
  5640. tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_ref_parameter);
  5641. tcase_add_test(tc_basic, test_empty_parse);
  5642. tcase_add_test(tc_basic, test_negative_len_parse);
  5643. tcase_add_test(tc_basic, test_negative_len_parse_buffer);
  5644. tcase_add_test(tc_basic, test_get_buffer_1);
  5645. tcase_add_test(tc_basic, test_get_buffer_2);
  5646. #if XML_CONTEXT_BYTES > 0
  5647. tcase_add_test(tc_basic, test_get_buffer_3_overflow);
  5648. #endif
  5649. tcase_add_test(tc_basic, test_buffer_can_grow_to_max);
  5650. tcase_add_test(tc_basic, test_getbuffer_allocates_on_zero_len);
  5651. tcase_add_test(tc_basic, test_byte_info_at_end);
  5652. tcase_add_test(tc_basic, test_byte_info_at_error);
  5653. tcase_add_test(tc_basic, test_byte_info_at_cdata);
  5654. tcase_add_test(tc_basic, test_predefined_entities);
  5655. tcase_add_test__ifdef_xml_dtd(tc_basic, test_invalid_tag_in_dtd);
  5656. tcase_add_test(tc_basic, test_not_predefined_entities);
  5657. tcase_add_test__ifdef_xml_dtd(tc_basic, test_ignore_section);
  5658. tcase_add_test__ifdef_xml_dtd(tc_basic, test_ignore_section_utf16);
  5659. tcase_add_test__ifdef_xml_dtd(tc_basic, test_ignore_section_utf16_be);
  5660. tcase_add_test__ifdef_xml_dtd(tc_basic, test_bad_ignore_section);
  5661. tcase_add_test__ifdef_xml_dtd(tc_basic, test_external_bom_consumed);
  5662. tcase_add_test__ifdef_xml_dtd(tc_basic, test_external_entity_values);
  5663. tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_not_standalone);
  5664. tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_value_abort);
  5665. tcase_add_test(tc_basic, test_bad_public_doctype);
  5666. tcase_add_test(tc_basic, test_attribute_enum_value);
  5667. tcase_add_test(tc_basic, test_predefined_entity_redefinition);
  5668. tcase_add_test__ifdef_xml_dtd(tc_basic, test_dtd_stop_processing);
  5669. tcase_add_test(tc_basic, test_public_notation_no_sysid);
  5670. tcase_add_test(tc_basic, test_nested_groups);
  5671. tcase_add_test(tc_basic, test_group_choice);
  5672. tcase_add_test(tc_basic, test_standalone_parameter_entity);
  5673. tcase_add_test__ifdef_xml_dtd(tc_basic, test_skipped_parameter_entity);
  5674. tcase_add_test__ifdef_xml_dtd(tc_basic,
  5675. test_recursive_external_parameter_entity);
  5676. tcase_add_test__ifdef_xml_dtd(tc_basic,
  5677. test_recursive_external_parameter_entity_2);
  5678. tcase_add_test(tc_basic, test_undefined_ext_entity_in_external_dtd);
  5679. tcase_add_test(tc_basic, test_suspend_xdecl);
  5680. tcase_add_test(tc_basic, test_abort_epilog);
  5681. tcase_add_test(tc_basic, test_abort_epilog_2);
  5682. tcase_add_test(tc_basic, test_suspend_epilog);
  5683. tcase_add_test(tc_basic, test_suspend_in_sole_empty_tag);
  5684. tcase_add_test(tc_basic, test_unfinished_epilog);
  5685. tcase_add_test(tc_basic, test_partial_char_in_epilog);
  5686. tcase_add_test__ifdef_xml_dtd(tc_basic, test_suspend_resume_internal_entity);
  5687. tcase_add_test__ifdef_xml_dtd(tc_basic,
  5688. test_suspend_resume_internal_entity_issue_629);
  5689. tcase_add_test__ifdef_xml_dtd(tc_basic, test_resume_entity_with_syntax_error);
  5690. tcase_add_test__ifdef_xml_dtd(tc_basic, test_suspend_resume_parameter_entity);
  5691. tcase_add_test(tc_basic, test_restart_on_error);
  5692. tcase_add_test(tc_basic, test_reject_lt_in_attribute_value);
  5693. tcase_add_test(tc_basic, test_reject_unfinished_param_in_att_value);
  5694. tcase_add_test(tc_basic, test_trailing_cr_in_att_value);
  5695. tcase_add_test(tc_basic, test_standalone_internal_entity);
  5696. tcase_add_test(tc_basic, test_skipped_external_entity);
  5697. tcase_add_test(tc_basic, test_skipped_null_loaded_ext_entity);
  5698. tcase_add_test(tc_basic, test_skipped_unloaded_ext_entity);
  5699. tcase_add_test__ifdef_xml_dtd(tc_basic, test_param_entity_with_trailing_cr);
  5700. tcase_add_test__if_xml_ge(tc_basic, test_invalid_character_entity);
  5701. tcase_add_test__if_xml_ge(tc_basic, test_invalid_character_entity_2);
  5702. tcase_add_test__if_xml_ge(tc_basic, test_invalid_character_entity_3);
  5703. tcase_add_test__if_xml_ge(tc_basic, test_invalid_character_entity_4);
  5704. tcase_add_test(tc_basic, test_pi_handled_in_default);
  5705. tcase_add_test(tc_basic, test_comment_handled_in_default);
  5706. tcase_add_test(tc_basic, test_pi_yml);
  5707. tcase_add_test(tc_basic, test_pi_xnl);
  5708. tcase_add_test(tc_basic, test_pi_xmm);
  5709. tcase_add_test(tc_basic, test_utf16_pi);
  5710. tcase_add_test(tc_basic, test_utf16_be_pi);
  5711. tcase_add_test(tc_basic, test_utf16_be_comment);
  5712. tcase_add_test(tc_basic, test_utf16_le_comment);
  5713. tcase_add_test(tc_basic, test_missing_encoding_conversion_fn);
  5714. tcase_add_test(tc_basic, test_failing_encoding_conversion_fn);
  5715. tcase_add_test(tc_basic, test_unknown_encoding_success);
  5716. tcase_add_test(tc_basic, test_unknown_encoding_bad_name);
  5717. tcase_add_test(tc_basic, test_unknown_encoding_bad_name_2);
  5718. tcase_add_test(tc_basic, test_unknown_encoding_long_name_1);
  5719. tcase_add_test(tc_basic, test_unknown_encoding_long_name_2);
  5720. tcase_add_test(tc_basic, test_invalid_unknown_encoding);
  5721. tcase_add_test(tc_basic, test_unknown_ascii_encoding_ok);
  5722. tcase_add_test(tc_basic, test_unknown_ascii_encoding_fail);
  5723. tcase_add_test(tc_basic, test_unknown_encoding_invalid_length);
  5724. tcase_add_test(tc_basic, test_unknown_encoding_invalid_topbit);
  5725. tcase_add_test(tc_basic, test_unknown_encoding_invalid_surrogate);
  5726. tcase_add_test(tc_basic, test_unknown_encoding_invalid_high);
  5727. tcase_add_test(tc_basic, test_unknown_encoding_invalid_attr_value);
  5728. tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_latin1_utf16le_bom);
  5729. tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_latin1_utf16be_bom);
  5730. tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_latin1_utf16le_bom2);
  5731. tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_latin1_utf16be_bom2);
  5732. tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_utf16_be);
  5733. tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_utf16_le);
  5734. tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_utf16_unknown);
  5735. tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_utf8_non_bom);
  5736. tcase_add_test(tc_basic, test_utf8_in_cdata_section);
  5737. tcase_add_test(tc_basic, test_utf8_in_cdata_section_2);
  5738. tcase_add_test(tc_basic, test_utf8_in_start_tags);
  5739. tcase_add_test(tc_basic, test_trailing_spaces_in_elements);
  5740. tcase_add_test(tc_basic, test_utf16_attribute);
  5741. tcase_add_test(tc_basic, test_utf16_second_attr);
  5742. tcase_add_test(tc_basic, test_attr_after_solidus);
  5743. tcase_add_test__ifdef_xml_dtd(tc_basic, test_utf16_pe);
  5744. tcase_add_test(tc_basic, test_bad_attr_desc_keyword);
  5745. tcase_add_test(tc_basic, test_bad_attr_desc_keyword_utf16);
  5746. tcase_add_test(tc_basic, test_bad_doctype);
  5747. tcase_add_test(tc_basic, test_bad_doctype_utf8);
  5748. tcase_add_test(tc_basic, test_bad_doctype_utf16);
  5749. tcase_add_test(tc_basic, test_bad_doctype_plus);
  5750. tcase_add_test(tc_basic, test_bad_doctype_star);
  5751. tcase_add_test(tc_basic, test_bad_doctype_query);
  5752. tcase_add_test__ifdef_xml_dtd(tc_basic, test_unknown_encoding_bad_ignore);
  5753. tcase_add_test(tc_basic, test_entity_in_utf16_be_attr);
  5754. tcase_add_test(tc_basic, test_entity_in_utf16_le_attr);
  5755. tcase_add_test__ifdef_xml_dtd(tc_basic, test_entity_public_utf16_be);
  5756. tcase_add_test__ifdef_xml_dtd(tc_basic, test_entity_public_utf16_le);
  5757. tcase_add_test(tc_basic, test_short_doctype);
  5758. tcase_add_test(tc_basic, test_short_doctype_2);
  5759. tcase_add_test(tc_basic, test_short_doctype_3);
  5760. tcase_add_test(tc_basic, test_long_doctype);
  5761. tcase_add_test(tc_basic, test_bad_entity);
  5762. tcase_add_test(tc_basic, test_bad_entity_2);
  5763. tcase_add_test(tc_basic, test_bad_entity_3);
  5764. tcase_add_test(tc_basic, test_bad_entity_4);
  5765. tcase_add_test(tc_basic, test_bad_notation);
  5766. tcase_add_test(tc_basic, test_default_doctype_handler);
  5767. tcase_add_test(tc_basic, test_empty_element_abort);
  5768. tcase_add_test__ifdef_xml_dtd(tc_basic,
  5769. test_pool_integrity_with_unfinished_attr);
  5770. tcase_add_test__if_xml_ge(tc_basic, test_entity_ref_no_elements);
  5771. tcase_add_test__if_xml_ge(tc_basic, test_deep_nested_entity);
  5772. tcase_add_test__if_xml_ge(tc_basic, test_deep_nested_attribute_entity);
  5773. tcase_add_test__if_xml_ge(tc_basic,
  5774. test_deep_nested_entity_delayed_interpretation);
  5775. tcase_add_test__if_xml_ge(tc_basic, test_nested_entity_suspend);
  5776. tcase_add_test__if_xml_ge(tc_basic, test_nested_entity_suspend_2);
  5777. tcase_add_test(tc_basic, test_big_tokens_scale_linearly);
  5778. tcase_add_test(tc_basic, test_set_reparse_deferral);
  5779. tcase_add_test(tc_basic, test_reparse_deferral_is_inherited);
  5780. tcase_add_test(tc_basic, test_set_reparse_deferral_on_null_parser);
  5781. tcase_add_test(tc_basic, test_set_reparse_deferral_on_the_fly);
  5782. tcase_add_test(tc_basic, test_set_bad_reparse_option);
  5783. tcase_add_test(tc_basic, test_bypass_heuristic_when_close_to_bufsize);
  5784. tcase_add_test(tc_basic, test_varying_buffer_fills);
  5785. }