xml_lpm_fuzzer.cpp 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464
  1. /*
  2. __ __ _
  3. ___\ \/ /_ __ __ _| |_
  4. / _ \\ /| '_ \ / _` | __|
  5. | __// \| |_) | (_| | |_
  6. \___/_/\_\ .__/ \__,_|\__|
  7. |_| XML parser
  8. Copyright (c) 2022 Mark Brand <[email protected]>
  9. Copyright (c) 2025 Sebastian Pipping <[email protected]>
  10. Licensed under the MIT license:
  11. Permission is hereby granted, free of charge, to any person obtaining
  12. a copy of this software and associated documentation files (the
  13. "Software"), to deal in the Software without restriction, including
  14. without limitation the rights to use, copy, modify, merge, publish,
  15. distribute, sublicense, and/or sell copies of the Software, and to permit
  16. persons to whom the Software is furnished to do so, subject to the
  17. following conditions:
  18. The above copyright notice and this permission notice shall be included
  19. in all copies or substantial portions of the Software.
  20. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  21. EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  22. MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
  23. NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
  24. DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
  25. OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  26. USE OR OTHER DEALINGS IN THE SOFTWARE.
  27. */
  28. #if defined(NDEBUG)
  29. # undef NDEBUG // because checks below rely on assert(...)
  30. #endif
  31. #include <assert.h>
  32. #include <stdint.h>
  33. #include <vector>
  34. #include "expat.h"
  35. #include "xml_lpm_fuzzer.pb.h"
  36. #include "src/libfuzzer/libfuzzer_macro.h"
  37. static const char *g_encoding = nullptr;
  38. static const char *g_external_entity = nullptr;
  39. static size_t g_external_entity_size = 0;
  40. void
  41. SetEncoding(const xml_lpm_fuzzer::Encoding &e) {
  42. switch (e) {
  43. case xml_lpm_fuzzer::Encoding::UTF8:
  44. g_encoding = "UTF-8";
  45. break;
  46. case xml_lpm_fuzzer::Encoding::UTF16:
  47. g_encoding = "UTF-16";
  48. break;
  49. case xml_lpm_fuzzer::Encoding::ISO88591:
  50. g_encoding = "ISO-8859-1";
  51. break;
  52. case xml_lpm_fuzzer::Encoding::ASCII:
  53. g_encoding = "US-ASCII";
  54. break;
  55. case xml_lpm_fuzzer::Encoding::NONE:
  56. g_encoding = NULL;
  57. break;
  58. default:
  59. g_encoding = "UNKNOWN";
  60. break;
  61. }
  62. }
  63. static int g_allocation_count = 0;
  64. static std::vector<int> g_fail_allocations = {};
  65. void *
  66. MallocHook(size_t size) {
  67. g_allocation_count += 1;
  68. for (auto index : g_fail_allocations) {
  69. if (index == g_allocation_count) {
  70. return NULL;
  71. }
  72. }
  73. return malloc(size);
  74. }
  75. void *
  76. ReallocHook(void *ptr, size_t size) {
  77. g_allocation_count += 1;
  78. for (auto index : g_fail_allocations) {
  79. if (index == g_allocation_count) {
  80. return NULL;
  81. }
  82. }
  83. return realloc(ptr, size);
  84. }
  85. void
  86. FreeHook(void *ptr) {
  87. free(ptr);
  88. }
  89. XML_Memory_Handling_Suite memory_handling_suite
  90. = {MallocHook, ReallocHook, FreeHook};
  91. void InitializeParser(XML_Parser parser);
  92. // We want a parse function that supports resumption, so that we can cover the
  93. // suspend/resume code.
  94. enum XML_Status
  95. Parse(XML_Parser parser, const char *input, int input_len, int is_final) {
  96. enum XML_Status status = XML_Parse(parser, input, input_len, is_final);
  97. while (status == XML_STATUS_SUSPENDED) {
  98. status = XML_ResumeParser(parser);
  99. }
  100. return status;
  101. }
  102. // When the fuzzer is compiled with instrumentation such as ASan, then the
  103. // accesses in TouchString will fault if they access invalid memory (ie. detect
  104. // either a use-after-free or buffer-overflow). By calling TouchString in each
  105. // of the callbacks, we can check that the arguments meet the API specifications
  106. // in terms of length/null-termination. no_optimize is used to ensure that the
  107. // compiler has to emit actual memory reads, instead of removing them.
  108. static volatile size_t no_optimize = 0;
  109. static void
  110. TouchString(const XML_Char *ptr, int len = -1) {
  111. if (! ptr) {
  112. return;
  113. }
  114. if (len == -1) {
  115. for (XML_Char value = *ptr++; value; value = *ptr++) {
  116. no_optimize += value;
  117. }
  118. } else {
  119. for (int i = 0; i < len; ++i) {
  120. no_optimize += ptr[i];
  121. }
  122. }
  123. }
  124. static void
  125. TouchNodeAndRecurse(XML_Content *content) {
  126. switch (content->type) {
  127. case XML_CTYPE_EMPTY:
  128. case XML_CTYPE_ANY:
  129. assert(content->quant == XML_CQUANT_NONE);
  130. assert(content->name == NULL);
  131. assert(content->numchildren == 0);
  132. assert(content->children == NULL);
  133. break;
  134. case XML_CTYPE_MIXED:
  135. assert(content->quant == XML_CQUANT_NONE
  136. || content->quant == XML_CQUANT_REP);
  137. assert(content->name == NULL);
  138. for (unsigned int i = 0; i < content->numchildren; ++i) {
  139. assert(content->children[i].type == XML_CTYPE_NAME);
  140. assert(content->children[i].quant == XML_CQUANT_NONE);
  141. assert(content->children[i].numchildren == 0);
  142. assert(content->children[i].children == NULL);
  143. TouchString(content->children[i].name);
  144. }
  145. break;
  146. case XML_CTYPE_NAME:
  147. assert((content->quant == XML_CQUANT_NONE)
  148. || (content->quant == XML_CQUANT_OPT)
  149. || (content->quant == XML_CQUANT_REP)
  150. || (content->quant == XML_CQUANT_PLUS));
  151. assert(content->numchildren == 0);
  152. assert(content->children == NULL);
  153. TouchString(content->name);
  154. break;
  155. case XML_CTYPE_CHOICE:
  156. case XML_CTYPE_SEQ:
  157. assert((content->quant == XML_CQUANT_NONE)
  158. || (content->quant == XML_CQUANT_OPT)
  159. || (content->quant == XML_CQUANT_REP)
  160. || (content->quant == XML_CQUANT_PLUS));
  161. assert(content->name == NULL);
  162. for (unsigned int i = 0; i < content->numchildren; ++i) {
  163. TouchNodeAndRecurse(&content->children[i]);
  164. }
  165. break;
  166. default:
  167. assert(false);
  168. }
  169. }
  170. static void XMLCALL
  171. ElementDeclHandler(void *userData, const XML_Char *name, XML_Content *model) {
  172. TouchString(name);
  173. TouchNodeAndRecurse(model);
  174. XML_FreeContentModel((XML_Parser)userData, model);
  175. }
  176. static void XMLCALL
  177. AttlistDeclHandler(void *userData, const XML_Char *elname,
  178. const XML_Char *attname, const XML_Char *atttype,
  179. const XML_Char *dflt, int isrequired) {
  180. (void)userData;
  181. TouchString(elname);
  182. TouchString(attname);
  183. TouchString(atttype);
  184. TouchString(dflt);
  185. (void)isrequired;
  186. }
  187. static void XMLCALL
  188. XmlDeclHandler(void *userData, const XML_Char *version,
  189. const XML_Char *encoding, int standalone) {
  190. (void)userData;
  191. TouchString(version);
  192. TouchString(encoding);
  193. (void)standalone;
  194. }
  195. static void XMLCALL
  196. StartElementHandler(void *userData, const XML_Char *name,
  197. const XML_Char **atts) {
  198. (void)userData;
  199. TouchString(name);
  200. for (size_t i = 0; atts[i] != NULL; ++i) {
  201. TouchString(atts[i]);
  202. }
  203. }
  204. static void XMLCALL
  205. EndElementHandler(void *userData, const XML_Char *name) {
  206. (void)userData;
  207. TouchString(name);
  208. }
  209. static void XMLCALL
  210. CharacterDataHandler(void *userData, const XML_Char *s, int len) {
  211. (void)userData;
  212. TouchString(s, len);
  213. }
  214. static void XMLCALL
  215. ProcessingInstructionHandler(void *userData, const XML_Char *target,
  216. const XML_Char *data) {
  217. (void)userData;
  218. TouchString(target);
  219. TouchString(data);
  220. }
  221. static void XMLCALL
  222. CommentHandler(void *userData, const XML_Char *data) {
  223. TouchString(data);
  224. // Use the comment handler to trigger parser suspend, so that we can get
  225. // coverage of that code.
  226. XML_StopParser((XML_Parser)userData, XML_TRUE);
  227. }
  228. static void XMLCALL
  229. StartCdataSectionHandler(void *userData) {
  230. (void)userData;
  231. }
  232. static void XMLCALL
  233. EndCdataSectionHandler(void *userData) {
  234. (void)userData;
  235. }
  236. static void XMLCALL
  237. DefaultHandler(void *userData, const XML_Char *s, int len) {
  238. (void)userData;
  239. TouchString(s, len);
  240. }
  241. static void XMLCALL
  242. StartDoctypeDeclHandler(void *userData, const XML_Char *doctypeName,
  243. const XML_Char *sysid, const XML_Char *pubid,
  244. int has_internal_subset) {
  245. (void)userData;
  246. TouchString(doctypeName);
  247. TouchString(sysid);
  248. TouchString(pubid);
  249. (void)has_internal_subset;
  250. }
  251. static void XMLCALL
  252. EndDoctypeDeclHandler(void *userData) {
  253. (void)userData;
  254. }
  255. static void XMLCALL
  256. EntityDeclHandler(void *userData, const XML_Char *entityName,
  257. int is_parameter_entity, const XML_Char *value,
  258. int value_length, const XML_Char *base,
  259. const XML_Char *systemId, const XML_Char *publicId,
  260. const XML_Char *notationName) {
  261. (void)userData;
  262. TouchString(entityName);
  263. (void)is_parameter_entity;
  264. TouchString(value, value_length);
  265. TouchString(base);
  266. TouchString(systemId);
  267. TouchString(publicId);
  268. TouchString(notationName);
  269. }
  270. static void XMLCALL
  271. NotationDeclHandler(void *userData, const XML_Char *notationName,
  272. const XML_Char *base, const XML_Char *systemId,
  273. const XML_Char *publicId) {
  274. (void)userData;
  275. TouchString(notationName);
  276. TouchString(base);
  277. TouchString(systemId);
  278. TouchString(publicId);
  279. }
  280. static void XMLCALL
  281. StartNamespaceDeclHandler(void *userData, const XML_Char *prefix,
  282. const XML_Char *uri) {
  283. (void)userData;
  284. TouchString(prefix);
  285. TouchString(uri);
  286. }
  287. static void XMLCALL
  288. EndNamespaceDeclHandler(void *userData, const XML_Char *prefix) {
  289. (void)userData;
  290. TouchString(prefix);
  291. }
  292. static int XMLCALL
  293. NotStandaloneHandler(void *userData) {
  294. (void)userData;
  295. return XML_STATUS_OK;
  296. }
  297. static int XMLCALL
  298. ExternalEntityRefHandler(XML_Parser parser, const XML_Char *context,
  299. const XML_Char *base, const XML_Char *systemId,
  300. const XML_Char *publicId) {
  301. int rc = XML_STATUS_ERROR;
  302. TouchString(context);
  303. TouchString(base);
  304. TouchString(systemId);
  305. TouchString(publicId);
  306. if (g_external_entity) {
  307. XML_Parser ext_parser
  308. = XML_ExternalEntityParserCreate(parser, context, g_encoding);
  309. rc = Parse(ext_parser, g_external_entity, g_external_entity_size, 1);
  310. XML_ParserFree(ext_parser);
  311. }
  312. return rc;
  313. }
  314. static void XMLCALL
  315. SkippedEntityHandler(void *userData, const XML_Char *entityName,
  316. int is_parameter_entity) {
  317. (void)userData;
  318. TouchString(entityName);
  319. (void)is_parameter_entity;
  320. }
  321. static int XMLCALL
  322. UnknownEncodingHandler(void *encodingHandlerData, const XML_Char *name,
  323. XML_Encoding *info) {
  324. (void)encodingHandlerData;
  325. TouchString(name);
  326. (void)info;
  327. return XML_STATUS_ERROR;
  328. }
  329. void
  330. InitializeParser(XML_Parser parser) {
  331. XML_SetUserData(parser, (void *)parser);
  332. XML_SetHashSalt(parser, 0x41414141);
  333. XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
  334. XML_SetElementDeclHandler(parser, ElementDeclHandler);
  335. XML_SetAttlistDeclHandler(parser, AttlistDeclHandler);
  336. XML_SetXmlDeclHandler(parser, XmlDeclHandler);
  337. XML_SetElementHandler(parser, StartElementHandler, EndElementHandler);
  338. XML_SetCharacterDataHandler(parser, CharacterDataHandler);
  339. XML_SetProcessingInstructionHandler(parser, ProcessingInstructionHandler);
  340. XML_SetCommentHandler(parser, CommentHandler);
  341. XML_SetCdataSectionHandler(parser, StartCdataSectionHandler,
  342. EndCdataSectionHandler);
  343. // XML_SetDefaultHandler disables entity expansion
  344. XML_SetDefaultHandlerExpand(parser, DefaultHandler);
  345. XML_SetDoctypeDeclHandler(parser, StartDoctypeDeclHandler,
  346. EndDoctypeDeclHandler);
  347. // Note: This is mutually exclusive with XML_SetUnparsedEntityDeclHandler,
  348. // and there isn't any significant code change between the two.
  349. XML_SetEntityDeclHandler(parser, EntityDeclHandler);
  350. XML_SetNotationDeclHandler(parser, NotationDeclHandler);
  351. XML_SetNamespaceDeclHandler(parser, StartNamespaceDeclHandler,
  352. EndNamespaceDeclHandler);
  353. XML_SetNotStandaloneHandler(parser, NotStandaloneHandler);
  354. XML_SetExternalEntityRefHandler(parser, ExternalEntityRefHandler);
  355. XML_SetSkippedEntityHandler(parser, SkippedEntityHandler);
  356. XML_SetUnknownEncodingHandler(parser, UnknownEncodingHandler, (void *)parser);
  357. }
  358. DEFINE_TEXT_PROTO_FUZZER(const xml_lpm_fuzzer::Testcase &testcase) {
  359. g_external_entity = nullptr;
  360. if (! testcase.actions_size()) {
  361. return;
  362. }
  363. g_allocation_count = 0;
  364. g_fail_allocations.clear();
  365. for (int i = 0; i < testcase.fail_allocations_size(); ++i) {
  366. g_fail_allocations.push_back(testcase.fail_allocations(i));
  367. }
  368. SetEncoding(testcase.encoding());
  369. XML_Parser parser
  370. = XML_ParserCreate_MM(g_encoding, &memory_handling_suite, "|");
  371. InitializeParser(parser);
  372. for (int i = 0; i < testcase.actions_size(); ++i) {
  373. const auto &action = testcase.actions(i);
  374. switch (action.action_case()) {
  375. case xml_lpm_fuzzer::Action::kChunk:
  376. if (XML_STATUS_ERROR
  377. == Parse(parser, action.chunk().data(), action.chunk().size(), 0)) {
  378. // Force a reset after parse error.
  379. XML_ParserReset(parser, g_encoding);
  380. InitializeParser(parser);
  381. }
  382. break;
  383. case xml_lpm_fuzzer::Action::kLastChunk:
  384. Parse(parser, action.last_chunk().data(), action.last_chunk().size(), 1);
  385. XML_ParserReset(parser, g_encoding);
  386. InitializeParser(parser);
  387. break;
  388. case xml_lpm_fuzzer::Action::kReset:
  389. XML_ParserReset(parser, g_encoding);
  390. InitializeParser(parser);
  391. break;
  392. case xml_lpm_fuzzer::Action::kExternalEntity:
  393. g_external_entity = action.external_entity().data();
  394. g_external_entity_size = action.external_entity().size();
  395. break;
  396. default:
  397. break;
  398. }
  399. }
  400. XML_ParserFree(parser);
  401. }