|
@@ -8,7 +8,7 @@
|
|
|
|
|
|
Copyright (c) 2001-2006 Fred L. Drake, Jr. <[email protected]>
|
|
|
Copyright (c) 2003 Greg Stein <[email protected]>
|
|
|
- Copyright (c) 2005-2007 Steven Solie <s[email protected]>
|
|
|
+ Copyright (c) 2005-2007 Steven Solie <s[email protected]>
|
|
|
Copyright (c) 2005-2012 Karl Waclawek <[email protected]>
|
|
|
Copyright (c) 2016-2022 Sebastian Pipping <[email protected]>
|
|
|
Copyright (c) 2017-2018 Rhodri James <[email protected]>
|
|
@@ -2664,6 +2664,82 @@ START_TEST(test_dtd_elements) {
|
|
|
}
|
|
|
END_TEST
|
|
|
|
|
|
+static void XMLCALL
|
|
|
+element_decl_check_model(void *userData, const XML_Char *name,
|
|
|
+ XML_Content *model) {
|
|
|
+ UNUSED_P(userData);
|
|
|
+ uint32_t errorFlags = 0;
|
|
|
+
|
|
|
+ /* Expected model array structure is this:
|
|
|
+ * [0] (type 6, quant 0)
|
|
|
+ * [1] (type 5, quant 0)
|
|
|
+ * [3] (type 4, quant 0, name "bar")
|
|
|
+ * [4] (type 4, quant 0, name "foo")
|
|
|
+ * [5] (type 4, quant 3, name "xyz")
|
|
|
+ * [2] (type 4, quant 2, name "zebra")
|
|
|
+ */
|
|
|
+ errorFlags |= ((xcstrcmp(name, XCS("junk")) == 0) ? 0 : (1u << 0));
|
|
|
+ errorFlags |= ((model != NULL) ? 0 : (1u << 1));
|
|
|
+
|
|
|
+ errorFlags |= ((model[0].type == XML_CTYPE_SEQ) ? 0 : (1u << 2));
|
|
|
+ errorFlags |= ((model[0].quant == XML_CQUANT_NONE) ? 0 : (1u << 3));
|
|
|
+ errorFlags |= ((model[0].numchildren == 2) ? 0 : (1u << 4));
|
|
|
+ errorFlags |= ((model[0].children == &model[1]) ? 0 : (1u << 5));
|
|
|
+ errorFlags |= ((model[0].name == NULL) ? 0 : (1u << 6));
|
|
|
+
|
|
|
+ errorFlags |= ((model[1].type == XML_CTYPE_CHOICE) ? 0 : (1u << 7));
|
|
|
+ errorFlags |= ((model[1].quant == XML_CQUANT_NONE) ? 0 : (1u << 8));
|
|
|
+ errorFlags |= ((model[1].numchildren == 3) ? 0 : (1u << 9));
|
|
|
+ errorFlags |= ((model[1].children == &model[3]) ? 0 : (1u << 10));
|
|
|
+ errorFlags |= ((model[1].name == NULL) ? 0 : (1u << 11));
|
|
|
+
|
|
|
+ errorFlags |= ((model[2].type == XML_CTYPE_NAME) ? 0 : (1u << 12));
|
|
|
+ errorFlags |= ((model[2].quant == XML_CQUANT_REP) ? 0 : (1u << 13));
|
|
|
+ errorFlags |= ((model[2].numchildren == 0) ? 0 : (1u << 14));
|
|
|
+ errorFlags |= ((model[2].children == NULL) ? 0 : (1u << 15));
|
|
|
+ errorFlags |= ((xcstrcmp(model[2].name, XCS("zebra")) == 0) ? 0 : (1u << 16));
|
|
|
+
|
|
|
+ errorFlags |= ((model[3].type == XML_CTYPE_NAME) ? 0 : (1u << 17));
|
|
|
+ errorFlags |= ((model[3].quant == XML_CQUANT_NONE) ? 0 : (1u << 18));
|
|
|
+ errorFlags |= ((model[3].numchildren == 0) ? 0 : (1u << 19));
|
|
|
+ errorFlags |= ((model[3].children == NULL) ? 0 : (1u << 20));
|
|
|
+ errorFlags |= ((xcstrcmp(model[3].name, XCS("bar")) == 0) ? 0 : (1u << 21));
|
|
|
+
|
|
|
+ errorFlags |= ((model[4].type == XML_CTYPE_NAME) ? 0 : (1u << 22));
|
|
|
+ errorFlags |= ((model[4].quant == XML_CQUANT_NONE) ? 0 : (1u << 23));
|
|
|
+ errorFlags |= ((model[4].numchildren == 0) ? 0 : (1u << 24));
|
|
|
+ errorFlags |= ((model[4].children == NULL) ? 0 : (1u << 25));
|
|
|
+ errorFlags |= ((xcstrcmp(model[4].name, XCS("foo")) == 0) ? 0 : (1u << 26));
|
|
|
+
|
|
|
+ errorFlags |= ((model[5].type == XML_CTYPE_NAME) ? 0 : (1u << 27));
|
|
|
+ errorFlags |= ((model[5].quant == XML_CQUANT_PLUS) ? 0 : (1u << 28));
|
|
|
+ errorFlags |= ((model[5].numchildren == 0) ? 0 : (1u << 29));
|
|
|
+ errorFlags |= ((model[5].children == NULL) ? 0 : (1u << 30));
|
|
|
+ errorFlags |= ((xcstrcmp(model[5].name, XCS("xyz")) == 0) ? 0 : (1u << 31));
|
|
|
+
|
|
|
+ XML_SetUserData(g_parser, (void *)(uintptr_t)errorFlags);
|
|
|
+ XML_FreeContentModel(g_parser, model);
|
|
|
+}
|
|
|
+
|
|
|
+START_TEST(test_dtd_elements_nesting) {
|
|
|
+ // Payload inspired by a test in Perl's XML::Parser
|
|
|
+ const char *text = "<!DOCTYPE foo [\n"
|
|
|
+ "<!ELEMENT junk ((bar|foo|xyz+), zebra*)>\n"
|
|
|
+ "]>\n"
|
|
|
+ "<foo/>";
|
|
|
+
|
|
|
+ XML_SetUserData(g_parser, (void *)(uintptr_t)-1);
|
|
|
+
|
|
|
+ XML_SetElementDeclHandler(g_parser, element_decl_check_model);
|
|
|
+ if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
|
|
|
+ == XML_STATUS_ERROR)
|
|
|
+ xml_failure(g_parser);
|
|
|
+
|
|
|
+ if ((uint32_t)(uintptr_t)XML_GetUserData(g_parser) != 0)
|
|
|
+ fail("Element declaration model regression detected");
|
|
|
+}
|
|
|
+END_TEST
|
|
|
+
|
|
|
/* Test foreign DTD handling */
|
|
|
START_TEST(test_set_foreign_dtd) {
|
|
|
const char *text1 = "<?xml version='1.0' encoding='us-ascii'?>\n";
|
|
@@ -5998,6 +6074,105 @@ START_TEST(test_utf8_in_cdata_section_2) {
|
|
|
}
|
|
|
END_TEST
|
|
|
|
|
|
+START_TEST(test_utf8_in_start_tags) {
|
|
|
+ struct test_case {
|
|
|
+ bool goodName;
|
|
|
+ bool goodNameStart;
|
|
|
+ const char *tagName;
|
|
|
+ };
|
|
|
+
|
|
|
+ // The idea with the tests below is this:
|
|
|
+ // We want to cover 1-, 2- and 3-byte sequences, 4-byte sequences
|
|
|
+ // go to isNever and are hence not a concern.
|
|
|
+ //
|
|
|
+ // We start with a character that is a valid name character
|
|
|
+ // (or even name-start character, see XML 1.0r4 spec) and then we flip
|
|
|
+ // single bits at places where (1) the result leaves the UTF-8 encoding space
|
|
|
+ // and (2) we stay in the same n-byte sequence family.
|
|
|
+ //
|
|
|
+ // The flipped bits are highlighted in angle brackets in comments,
|
|
|
+ // e.g. "[<1>011 1001]" means we had [0011 1001] but we now flipped
|
|
|
+ // the most significant bit to 1 to leave UTF-8 encoding space.
|
|
|
+ struct test_case cases[] = {
|
|
|
+ // 1-byte UTF-8: [0xxx xxxx]
|
|
|
+ {true, true, "\x3A"}, // [0011 1010] = ASCII colon ':'
|
|
|
+ {false, false, "\xBA"}, // [<1>011 1010]
|
|
|
+ {true, false, "\x39"}, // [0011 1001] = ASCII nine '9'
|
|
|
+ {false, false, "\xB9"}, // [<1>011 1001]
|
|
|
+
|
|
|
+ // 2-byte UTF-8: [110x xxxx] [10xx xxxx]
|
|
|
+ {true, true, "\xDB\xA5"}, // [1101 1011] [1010 0101] =
|
|
|
+ // Arabic small waw U+06E5
|
|
|
+ {false, false, "\x9B\xA5"}, // [1<0>01 1011] [1010 0101]
|
|
|
+ {false, false, "\xDB\x25"}, // [1101 1011] [<0>010 0101]
|
|
|
+ {false, false, "\xDB\xE5"}, // [1101 1011] [1<1>10 0101]
|
|
|
+ {true, false, "\xCC\x81"}, // [1100 1100] [1000 0001] =
|
|
|
+ // combining char U+0301
|
|
|
+ {false, false, "\x8C\x81"}, // [1<0>00 1100] [1000 0001]
|
|
|
+ {false, false, "\xCC\x01"}, // [1100 1100] [<0>000 0001]
|
|
|
+ {false, false, "\xCC\xC1"}, // [1100 1100] [1<1>00 0001]
|
|
|
+
|
|
|
+ // 3-byte UTF-8: [1110 xxxx] [10xx xxxx] [10xxxxxx]
|
|
|
+ {true, true, "\xE0\xA4\x85"}, // [1110 0000] [1010 0100] [1000 0101] =
|
|
|
+ // Devanagari Letter A U+0905
|
|
|
+ {false, false, "\xA0\xA4\x85"}, // [1<0>10 0000] [1010 0100] [1000 0101]
|
|
|
+ {false, false, "\xE0\x24\x85"}, // [1110 0000] [<0>010 0100] [1000 0101]
|
|
|
+ {false, false, "\xE0\xE4\x85"}, // [1110 0000] [1<1>10 0100] [1000 0101]
|
|
|
+ {false, false, "\xE0\xA4\x05"}, // [1110 0000] [1010 0100] [<0>000 0101]
|
|
|
+ {false, false, "\xE0\xA4\xC5"}, // [1110 0000] [1010 0100] [1<1>00 0101]
|
|
|
+ {true, false, "\xE0\xA4\x81"}, // [1110 0000] [1010 0100] [1000 0001] =
|
|
|
+ // combining char U+0901
|
|
|
+ {false, false, "\xA0\xA4\x81"}, // [1<0>10 0000] [1010 0100] [1000 0001]
|
|
|
+ {false, false, "\xE0\x24\x81"}, // [1110 0000] [<0>010 0100] [1000 0001]
|
|
|
+ {false, false, "\xE0\xE4\x81"}, // [1110 0000] [1<1>10 0100] [1000 0001]
|
|
|
+ {false, false, "\xE0\xA4\x01"}, // [1110 0000] [1010 0100] [<0>000 0001]
|
|
|
+ {false, false, "\xE0\xA4\xC1"}, // [1110 0000] [1010 0100] [1<1>00 0001]
|
|
|
+ };
|
|
|
+ const bool atNameStart[] = {true, false};
|
|
|
+
|
|
|
+ size_t i = 0;
|
|
|
+ char doc[1024];
|
|
|
+ size_t failCount = 0;
|
|
|
+
|
|
|
+ for (; i < sizeof(cases) / sizeof(cases[0]); i++) {
|
|
|
+ size_t j = 0;
|
|
|
+ for (; j < sizeof(atNameStart) / sizeof(atNameStart[0]); j++) {
|
|
|
+ const bool expectedSuccess
|
|
|
+ = atNameStart[j] ? cases[i].goodNameStart : cases[i].goodName;
|
|
|
+ sprintf(doc, "<%s%s><!--", atNameStart[j] ? "" : "a", cases[i].tagName);
|
|
|
+ XML_Parser parser = XML_ParserCreate(NULL);
|
|
|
+
|
|
|
+ const enum XML_Status status
|
|
|
+ = XML_Parse(parser, doc, (int)strlen(doc), /*isFinal=*/XML_FALSE);
|
|
|
+
|
|
|
+ bool success = true;
|
|
|
+ if ((status == XML_STATUS_OK) != expectedSuccess) {
|
|
|
+ success = false;
|
|
|
+ }
|
|
|
+ if ((status == XML_STATUS_ERROR)
|
|
|
+ && (XML_GetErrorCode(parser) != XML_ERROR_INVALID_TOKEN)) {
|
|
|
+ success = false;
|
|
|
+ }
|
|
|
+
|
|
|
+ if (! success) {
|
|
|
+ fprintf(
|
|
|
+ stderr,
|
|
|
+ "FAIL case %2u (%sat name start, %u-byte sequence, error code %d)\n",
|
|
|
+ (unsigned)i + 1u, atNameStart[j] ? " " : "not ",
|
|
|
+ (unsigned)strlen(cases[i].tagName), XML_GetErrorCode(parser));
|
|
|
+ failCount++;
|
|
|
+ }
|
|
|
+
|
|
|
+ XML_ParserFree(parser);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ if (failCount > 0) {
|
|
|
+ fail("UTF-8 regression detected");
|
|
|
+ }
|
|
|
+}
|
|
|
+END_TEST
|
|
|
+
|
|
|
/* Test trailing spaces in elements are accepted */
|
|
|
static void XMLCALL
|
|
|
record_element_end_handler(void *userData, const XML_Char *name) {
|
|
@@ -6175,6 +6350,14 @@ START_TEST(test_bad_doctype) {
|
|
|
}
|
|
|
END_TEST
|
|
|
|
|
|
+START_TEST(test_bad_doctype_utf8) {
|
|
|
+ const char *text = "<!DOCTYPE \xDB\x25"
|
|
|
+ "doc><doc/>"; // [1101 1011] [<0>010 0101]
|
|
|
+ expect_failure(text, XML_ERROR_INVALID_TOKEN,
|
|
|
+ "Invalid UTF-8 in DOCTYPE not faulted");
|
|
|
+}
|
|
|
+END_TEST
|
|
|
+
|
|
|
START_TEST(test_bad_doctype_utf16) {
|
|
|
const char text[] =
|
|
|
/* <!DOCTYPE doc [ \x06f2 ]><doc/>
|
|
@@ -7220,6 +7403,35 @@ START_TEST(test_ns_double_colon_doctype) {
|
|
|
}
|
|
|
END_TEST
|
|
|
|
|
|
+START_TEST(test_ns_separator_in_uri) {
|
|
|
+ struct test_case {
|
|
|
+ enum XML_Status expectedStatus;
|
|
|
+ const char *doc;
|
|
|
+ };
|
|
|
+ struct test_case cases[] = {
|
|
|
+ {XML_STATUS_OK, "<doc xmlns='one_two' />"},
|
|
|
+ {XML_STATUS_ERROR, "<doc xmlns='one
two' />"},
|
|
|
+ };
|
|
|
+
|
|
|
+ size_t i = 0;
|
|
|
+ size_t failCount = 0;
|
|
|
+ for (; i < sizeof(cases) / sizeof(cases[0]); i++) {
|
|
|
+ XML_Parser parser = XML_ParserCreateNS(NULL, '\n');
|
|
|
+ XML_SetElementHandler(parser, dummy_start_element, dummy_end_element);
|
|
|
+ if (XML_Parse(parser, cases[i].doc, (int)strlen(cases[i].doc),
|
|
|
+ /*isFinal*/ XML_TRUE)
|
|
|
+ != cases[i].expectedStatus) {
|
|
|
+ failCount++;
|
|
|
+ }
|
|
|
+ XML_ParserFree(parser);
|
|
|
+ }
|
|
|
+
|
|
|
+ if (failCount) {
|
|
|
+ fail("Namespace separator handling is broken");
|
|
|
+ }
|
|
|
+}
|
|
|
+END_TEST
|
|
|
+
|
|
|
/* Control variable; the number of times duff_allocator() will successfully
|
|
|
* allocate */
|
|
|
#define ALLOC_ALWAYS_SUCCEED (-1)
|
|
@@ -7376,7 +7588,7 @@ START_TEST(test_misc_version) {
|
|
|
fail("Version mismatch");
|
|
|
|
|
|
#if ! defined(XML_UNICODE) || defined(XML_UNICODE_WCHAR_T)
|
|
|
- if (xcstrcmp(version_text, XCS("expat_2.4.4"))) /* needs bump on releases */
|
|
|
+ if (xcstrcmp(version_text, XCS("expat_2.4.6"))) /* needs bump on releases */
|
|
|
fail("XML_*_VERSION in expat.h out of sync?\n");
|
|
|
#else
|
|
|
/* If we have XML_UNICODE defined but not XML_UNICODE_WCHAR_T
|
|
@@ -11727,6 +11939,7 @@ make_suite(void) {
|
|
|
tcase_add_test(tc_basic, test_memory_allocation);
|
|
|
tcase_add_test(tc_basic, test_default_current);
|
|
|
tcase_add_test(tc_basic, test_dtd_elements);
|
|
|
+ tcase_add_test(tc_basic, test_dtd_elements_nesting);
|
|
|
tcase_add_test__ifdef_xml_dtd(tc_basic, test_set_foreign_dtd);
|
|
|
tcase_add_test__ifdef_xml_dtd(tc_basic, test_foreign_dtd_not_standalone);
|
|
|
tcase_add_test__ifdef_xml_dtd(tc_basic, test_invalid_foreign_dtd);
|
|
@@ -11841,6 +12054,7 @@ make_suite(void) {
|
|
|
tcase_add_test(tc_basic, test_ext_entity_utf8_non_bom);
|
|
|
tcase_add_test(tc_basic, test_utf8_in_cdata_section);
|
|
|
tcase_add_test(tc_basic, test_utf8_in_cdata_section_2);
|
|
|
+ tcase_add_test(tc_basic, test_utf8_in_start_tags);
|
|
|
tcase_add_test(tc_basic, test_trailing_spaces_in_elements);
|
|
|
tcase_add_test(tc_basic, test_utf16_attribute);
|
|
|
tcase_add_test(tc_basic, test_utf16_second_attr);
|
|
@@ -11849,6 +12063,7 @@ make_suite(void) {
|
|
|
tcase_add_test(tc_basic, test_bad_attr_desc_keyword);
|
|
|
tcase_add_test(tc_basic, test_bad_attr_desc_keyword_utf16);
|
|
|
tcase_add_test(tc_basic, test_bad_doctype);
|
|
|
+ tcase_add_test(tc_basic, test_bad_doctype_utf8);
|
|
|
tcase_add_test(tc_basic, test_bad_doctype_utf16);
|
|
|
tcase_add_test(tc_basic, test_bad_doctype_plus);
|
|
|
tcase_add_test(tc_basic, test_bad_doctype_star);
|
|
@@ -11905,6 +12120,7 @@ make_suite(void) {
|
|
|
tcase_add_test(tc_namespace, test_ns_utf16_doctype);
|
|
|
tcase_add_test(tc_namespace, test_ns_invalid_doctype);
|
|
|
tcase_add_test(tc_namespace, test_ns_double_colon_doctype);
|
|
|
+ tcase_add_test(tc_namespace, test_ns_separator_in_uri);
|
|
|
|
|
|
suite_add_tcase(s, tc_misc);
|
|
|
tcase_add_checked_fixture(tc_misc, NULL, basic_teardown);
|