xmlwf.c 39 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327
  1. /*
  2. __ __ _
  3. ___\ \/ /_ __ __ _| |_
  4. / _ \\ /| '_ \ / _` | __|
  5. | __// \| |_) | (_| | |_
  6. \___/_/\_\ .__/ \__,_|\__|
  7. |_| XML parser
  8. Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
  9. Copyright (c) 2000 Clark Cooper <[email protected]>
  10. Copyright (c) 2001-2003 Fred L. Drake, Jr. <[email protected]>
  11. Copyright (c) 2004-2009 Karl Waclawek <[email protected]>
  12. Copyright (c) 2005-2007 Steven Solie <[email protected]>
  13. Copyright (c) 2016-2026 Sebastian Pipping <[email protected]>
  14. Copyright (c) 2017 Rhodri James <[email protected]>
  15. Copyright (c) 2019 David Loffredo <[email protected]>
  16. Copyright (c) 2020 Joe Orton <[email protected]>
  17. Copyright (c) 2020 Kleber Tarcísio <[email protected]>
  18. Copyright (c) 2021 Tim Bray <[email protected]>
  19. Copyright (c) 2022 Martin Ettl <[email protected]>
  20. Copyright (c) 2022 Sean McBride <[email protected]>
  21. Copyright (c) 2025 Alfonso Gregory <[email protected]>
  22. Licensed under the MIT license:
  23. Permission is hereby granted, free of charge, to any person obtaining
  24. a copy of this software and associated documentation files (the
  25. "Software"), to deal in the Software without restriction, including
  26. without limitation the rights to use, copy, modify, merge, publish,
  27. distribute, sublicense, and/or sell copies of the Software, and to permit
  28. persons to whom the Software is furnished to do so, subject to the
  29. following conditions:
  30. The above copyright notice and this permission notice shall be included
  31. in all copies or substantial portions of the Software.
  32. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  33. EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  34. MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
  35. NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
  36. DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
  37. OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  38. USE OR OTHER DEALINGS IN THE SOFTWARE.
  39. */
  40. #include "expat_config.h"
  41. #include <assert.h>
  42. #include <stdio.h>
  43. #include <stdlib.h>
  44. #include <stddef.h>
  45. #include <string.h>
  46. #include <math.h> /* for isnan */
  47. #include <errno.h>
  48. #include "expat.h"
  49. #include "codepage.h"
  50. #include "internal.h" /* for UNUSED_P only */
  51. #include "xmlfile.h"
  52. #include "xmltchar.h"
  53. #ifdef _MSC_VER
  54. # include <crtdbg.h>
  55. #endif
  56. #ifdef XML_UNICODE
  57. # include <wchar.h>
  58. #endif
  59. enum ExitCode {
  60. XMLWF_EXIT_SUCCESS = 0,
  61. XMLWF_EXIT_INTERNAL_ERROR = 1,
  62. XMLWF_EXIT_NOT_WELLFORMED = 2,
  63. XMLWF_EXIT_OUTPUT_ERROR = 3,
  64. XMLWF_EXIT_USAGE_ERROR = 4,
  65. };
  66. /* Structures for handler user data */
  67. typedef struct NotationList {
  68. struct NotationList *next;
  69. const XML_Char *notationName;
  70. const XML_Char *systemId;
  71. const XML_Char *publicId;
  72. } NotationList;
  73. typedef struct xmlwfUserData {
  74. FILE *fp;
  75. NotationList *notationListHead;
  76. const XML_Char *currentDoctypeName;
  77. } XmlwfUserData;
  78. /* This ensures proper sorting. */
  79. #define NSSEP T('\001')
  80. static void XMLCALL
  81. characterData(void *userData, const XML_Char *s, int len) {
  82. FILE *fp = ((XmlwfUserData *)userData)->fp;
  83. for (; len > 0; --len, ++s) {
  84. switch (*s) {
  85. case T('&'):
  86. fputts(T("&amp;"), fp);
  87. break;
  88. case T('<'):
  89. fputts(T("&lt;"), fp);
  90. break;
  91. case T('>'):
  92. fputts(T("&gt;"), fp);
  93. break;
  94. #ifdef W3C14N
  95. case 13:
  96. fputts(T("&#xD;"), fp);
  97. break;
  98. #else
  99. case T('"'):
  100. fputts(T("&quot;"), fp);
  101. break;
  102. case 9:
  103. case 10:
  104. case 13:
  105. ftprintf(fp, T("&#%d;"), *s);
  106. break;
  107. #endif
  108. default:
  109. puttc(*s, fp);
  110. break;
  111. }
  112. }
  113. }
  114. static void
  115. attributeValue(FILE *fp, const XML_Char *s) {
  116. puttc(T('='), fp);
  117. puttc(T('"'), fp);
  118. assert(s);
  119. for (;;) {
  120. switch (*s) {
  121. case 0:
  122. case NSSEP:
  123. puttc(T('"'), fp);
  124. return;
  125. case T('&'):
  126. fputts(T("&amp;"), fp);
  127. break;
  128. case T('<'):
  129. fputts(T("&lt;"), fp);
  130. break;
  131. case T('"'):
  132. fputts(T("&quot;"), fp);
  133. break;
  134. #ifdef W3C14N
  135. case 9:
  136. fputts(T("&#x9;"), fp);
  137. break;
  138. case 10:
  139. fputts(T("&#xA;"), fp);
  140. break;
  141. case 13:
  142. fputts(T("&#xD;"), fp);
  143. break;
  144. #else
  145. case T('>'):
  146. fputts(T("&gt;"), fp);
  147. break;
  148. case 9:
  149. case 10:
  150. case 13:
  151. ftprintf(fp, T("&#%d;"), *s);
  152. break;
  153. #endif
  154. default:
  155. puttc(*s, fp);
  156. break;
  157. }
  158. s++;
  159. }
  160. }
  161. /* Lexicographically comparing UTF-8 encoded attribute values,
  162. is equivalent to lexicographically comparing based on the character number. */
  163. static int
  164. attcmp(const void *att1, const void *att2) {
  165. return tcscmp(*(const XML_Char *const *)att1, *(const XML_Char *const *)att2);
  166. }
  167. static void XMLCALL
  168. startElement(void *userData, const XML_Char *name, const XML_Char **atts) {
  169. int nAtts;
  170. const XML_Char **p;
  171. FILE *fp = ((XmlwfUserData *)userData)->fp;
  172. puttc(T('<'), fp);
  173. fputts(name, fp);
  174. p = atts;
  175. while (*p)
  176. ++p;
  177. nAtts = (int)((p - atts) >> 1);
  178. if (nAtts > 1)
  179. qsort((void *)atts, nAtts, sizeof(XML_Char *) * 2, attcmp);
  180. while (*atts) {
  181. puttc(T(' '), fp);
  182. fputts(*atts++, fp);
  183. attributeValue(fp, *atts);
  184. atts++;
  185. }
  186. puttc(T('>'), fp);
  187. }
  188. static void XMLCALL
  189. endElement(void *userData, const XML_Char *name) {
  190. FILE *fp = ((XmlwfUserData *)userData)->fp;
  191. puttc(T('<'), fp);
  192. puttc(T('/'), fp);
  193. fputts(name, fp);
  194. puttc(T('>'), fp);
  195. }
  196. static int
  197. nsattcmp(const void *p1, const void *p2) {
  198. const XML_Char *att1 = *(const XML_Char *const *)p1;
  199. const XML_Char *att2 = *(const XML_Char *const *)p2;
  200. int sep1 = (tcsrchr(att1, NSSEP) != 0);
  201. int sep2 = (tcsrchr(att2, NSSEP) != 0);
  202. if (sep1 != sep2)
  203. return sep1 - sep2;
  204. return tcscmp(att1, att2);
  205. }
  206. static void XMLCALL
  207. startElementNS(void *userData, const XML_Char *name, const XML_Char **atts) {
  208. int nAtts;
  209. int nsi;
  210. const XML_Char **p;
  211. FILE *fp = ((XmlwfUserData *)userData)->fp;
  212. const XML_Char *sep;
  213. puttc(T('<'), fp);
  214. sep = tcsrchr(name, NSSEP);
  215. if (sep) {
  216. fputts(T("n1:"), fp);
  217. fputts(sep + 1, fp);
  218. fputts(T(" xmlns:n1"), fp);
  219. attributeValue(fp, name);
  220. nsi = 2;
  221. } else {
  222. fputts(name, fp);
  223. nsi = 1;
  224. }
  225. p = atts;
  226. while (*p)
  227. ++p;
  228. nAtts = (int)((p - atts) >> 1);
  229. if (nAtts > 1)
  230. qsort((void *)atts, nAtts, sizeof(XML_Char *) * 2, nsattcmp);
  231. while (*atts) {
  232. name = *atts++;
  233. sep = tcsrchr(name, NSSEP);
  234. puttc(T(' '), fp);
  235. if (sep) {
  236. ftprintf(fp, T("n%d:"), nsi);
  237. fputts(sep + 1, fp);
  238. } else
  239. fputts(name, fp);
  240. attributeValue(fp, *atts);
  241. if (sep) {
  242. ftprintf(fp, T(" xmlns:n%d"), nsi++);
  243. attributeValue(fp, name);
  244. }
  245. atts++;
  246. }
  247. puttc(T('>'), fp);
  248. }
  249. static void XMLCALL
  250. endElementNS(void *userData, const XML_Char *name) {
  251. FILE *fp = ((XmlwfUserData *)userData)->fp;
  252. const XML_Char *sep;
  253. puttc(T('<'), fp);
  254. puttc(T('/'), fp);
  255. sep = tcsrchr(name, NSSEP);
  256. if (sep) {
  257. fputts(T("n1:"), fp);
  258. fputts(sep + 1, fp);
  259. } else
  260. fputts(name, fp);
  261. puttc(T('>'), fp);
  262. }
  263. #ifndef W3C14N
  264. static void XMLCALL
  265. processingInstruction(void *userData, const XML_Char *target,
  266. const XML_Char *data) {
  267. FILE *fp = ((XmlwfUserData *)userData)->fp;
  268. puttc(T('<'), fp);
  269. puttc(T('?'), fp);
  270. fputts(target, fp);
  271. puttc(T(' '), fp);
  272. fputts(data, fp);
  273. puttc(T('?'), fp);
  274. puttc(T('>'), fp);
  275. }
  276. static XML_Char *
  277. xcsdup(const XML_Char *s) {
  278. XML_Char *result;
  279. int count = 0;
  280. size_t numBytes;
  281. /* Get the length of the string, including terminator */
  282. while (s[count++] != 0) {
  283. /* Do nothing */
  284. }
  285. numBytes = count * sizeof(XML_Char);
  286. result = malloc(numBytes);
  287. if (result == NULL)
  288. return NULL;
  289. memcpy(result, s, numBytes);
  290. return result;
  291. }
  292. static void XMLCALL
  293. startDoctypeDecl(void *userData, const XML_Char *doctypeName,
  294. const XML_Char *sysid, const XML_Char *publid,
  295. int has_internal_subset) {
  296. XmlwfUserData *data = (XmlwfUserData *)userData;
  297. UNUSED_P(sysid);
  298. UNUSED_P(publid);
  299. UNUSED_P(has_internal_subset);
  300. data->currentDoctypeName = xcsdup(doctypeName);
  301. }
  302. static void
  303. freeNotations(XmlwfUserData *data) {
  304. NotationList *notationListHead = data->notationListHead;
  305. while (notationListHead != NULL) {
  306. NotationList *next = notationListHead->next;
  307. free((void *)notationListHead->notationName);
  308. free((void *)notationListHead->systemId);
  309. free((void *)notationListHead->publicId);
  310. free(notationListHead);
  311. notationListHead = next;
  312. }
  313. data->notationListHead = NULL;
  314. }
  315. static void
  316. cleanupUserData(XmlwfUserData *userData) {
  317. free((void *)userData->currentDoctypeName);
  318. userData->currentDoctypeName = NULL;
  319. freeNotations(userData);
  320. }
  321. static int
  322. xcscmp(const XML_Char *xs, const XML_Char *xt) {
  323. while (*xs != 0 && *xt != 0) {
  324. if (*xs < *xt)
  325. return -1;
  326. if (*xs > *xt)
  327. return 1;
  328. xs++;
  329. xt++;
  330. }
  331. if (*xs < *xt)
  332. return -1;
  333. if (*xs > *xt)
  334. return 1;
  335. return 0;
  336. }
  337. static int
  338. notationCmp(const void *a, const void *b) {
  339. const NotationList *const n1 = *(const NotationList *const *)a;
  340. const NotationList *const n2 = *(const NotationList *const *)b;
  341. return xcscmp(n1->notationName, n2->notationName);
  342. }
  343. static void XMLCALL
  344. endDoctypeDecl(void *userData) {
  345. XmlwfUserData *data = (XmlwfUserData *)userData;
  346. NotationList **notations;
  347. int notationCount = 0;
  348. NotationList *p;
  349. int i;
  350. /* How many notations do we have? */
  351. for (p = data->notationListHead; p != NULL; p = p->next)
  352. notationCount++;
  353. if (notationCount == 0) {
  354. /* Nothing to report */
  355. goto cleanUp;
  356. }
  357. notations = malloc(notationCount * sizeof(NotationList *));
  358. if (notations == NULL) {
  359. fprintf(stderr, "Unable to sort notations");
  360. goto cleanUp;
  361. }
  362. for (p = data->notationListHead, i = 0; i < notationCount; p = p->next, i++) {
  363. notations[i] = p;
  364. }
  365. qsort(notations, notationCount, sizeof(NotationList *), notationCmp);
  366. /* Output the DOCTYPE header */
  367. fputts(T("<!DOCTYPE "), data->fp);
  368. fputts(data->currentDoctypeName, data->fp);
  369. fputts(T(" [\n"), data->fp);
  370. /* Now the NOTATIONs */
  371. for (i = 0; i < notationCount; i++) {
  372. fputts(T("<!NOTATION "), data->fp);
  373. fputts(notations[i]->notationName, data->fp);
  374. if (notations[i]->publicId != NULL) {
  375. fputts(T(" PUBLIC '"), data->fp);
  376. fputts(notations[i]->publicId, data->fp);
  377. puttc(T('\''), data->fp);
  378. if (notations[i]->systemId != NULL) {
  379. puttc(T(' '), data->fp);
  380. puttc(T('\''), data->fp);
  381. fputts(notations[i]->systemId, data->fp);
  382. puttc(T('\''), data->fp);
  383. }
  384. } else if (notations[i]->systemId != NULL) {
  385. fputts(T(" SYSTEM '"), data->fp);
  386. fputts(notations[i]->systemId, data->fp);
  387. puttc(T('\''), data->fp);
  388. }
  389. puttc(T('>'), data->fp);
  390. puttc(T('\n'), data->fp);
  391. }
  392. /* Finally end the DOCTYPE */
  393. fputts(T("]>\n"), data->fp);
  394. free(notations);
  395. cleanUp:
  396. freeNotations(data);
  397. free((void *)data->currentDoctypeName);
  398. data->currentDoctypeName = NULL;
  399. }
  400. static void XMLCALL
  401. notationDecl(void *userData, const XML_Char *notationName, const XML_Char *base,
  402. const XML_Char *systemId, const XML_Char *publicId) {
  403. XmlwfUserData *data = (XmlwfUserData *)userData;
  404. NotationList *entry = malloc(sizeof(NotationList));
  405. const char *errorMessage = "Unable to store NOTATION for output\n";
  406. UNUSED_P(base);
  407. if (entry == NULL) {
  408. fputs(errorMessage, stderr);
  409. return; /* Nothing we can really do about this */
  410. }
  411. entry->notationName = xcsdup(notationName);
  412. if (entry->notationName == NULL) {
  413. fputs(errorMessage, stderr);
  414. free(entry);
  415. return;
  416. }
  417. if (systemId != NULL) {
  418. entry->systemId = xcsdup(systemId);
  419. if (entry->systemId == NULL) {
  420. fputs(errorMessage, stderr);
  421. free((void *)entry->notationName);
  422. free(entry);
  423. return;
  424. }
  425. } else {
  426. entry->systemId = NULL;
  427. }
  428. if (publicId != NULL) {
  429. entry->publicId = xcsdup(publicId);
  430. if (entry->publicId == NULL) {
  431. fputs(errorMessage, stderr);
  432. free((void *)entry->systemId); /* Safe if it's NULL */
  433. free((void *)entry->notationName);
  434. free(entry);
  435. return;
  436. }
  437. } else {
  438. entry->publicId = NULL;
  439. }
  440. entry->next = data->notationListHead;
  441. data->notationListHead = entry;
  442. }
  443. #endif /* not W3C14N */
  444. static void XMLCALL
  445. defaultCharacterData(void *userData, const XML_Char *s, int len) {
  446. UNUSED_P(s);
  447. UNUSED_P(len);
  448. XML_DefaultCurrent((XML_Parser)userData);
  449. }
  450. static void XMLCALL
  451. defaultStartElement(void *userData, const XML_Char *name,
  452. const XML_Char **atts) {
  453. UNUSED_P(name);
  454. UNUSED_P(atts);
  455. XML_DefaultCurrent((XML_Parser)userData);
  456. }
  457. static void XMLCALL
  458. defaultEndElement(void *userData, const XML_Char *name) {
  459. UNUSED_P(name);
  460. XML_DefaultCurrent((XML_Parser)userData);
  461. }
  462. static void XMLCALL
  463. defaultProcessingInstruction(void *userData, const XML_Char *target,
  464. const XML_Char *data) {
  465. UNUSED_P(target);
  466. UNUSED_P(data);
  467. XML_DefaultCurrent((XML_Parser)userData);
  468. }
  469. static void XMLCALL
  470. nopCharacterData(void *userData, const XML_Char *s, int len) {
  471. UNUSED_P(userData);
  472. UNUSED_P(s);
  473. UNUSED_P(len);
  474. }
  475. static void XMLCALL
  476. nopStartElement(void *userData, const XML_Char *name, const XML_Char **atts) {
  477. UNUSED_P(userData);
  478. UNUSED_P(name);
  479. UNUSED_P(atts);
  480. }
  481. static void XMLCALL
  482. nopEndElement(void *userData, const XML_Char *name) {
  483. UNUSED_P(userData);
  484. UNUSED_P(name);
  485. }
  486. static void XMLCALL
  487. nopProcessingInstruction(void *userData, const XML_Char *target,
  488. const XML_Char *data) {
  489. UNUSED_P(userData);
  490. UNUSED_P(target);
  491. UNUSED_P(data);
  492. }
  493. static void XMLCALL
  494. markup(void *userData, const XML_Char *s, int len) {
  495. FILE *fp = ((XmlwfUserData *)XML_GetUserData((XML_Parser)userData))->fp;
  496. for (; len > 0; --len, ++s)
  497. puttc(*s, fp);
  498. }
  499. static void
  500. metaLocation(XML_Parser parser) {
  501. const XML_Char *uri = XML_GetBase(parser);
  502. FILE *fp = ((XmlwfUserData *)XML_GetUserData(parser))->fp;
  503. if (uri)
  504. ftprintf(fp, T(" uri=\"%s\""), uri);
  505. ftprintf(fp,
  506. T(" byte=\"%") T(XML_FMT_INT_MOD) T("d\"") T(" nbytes=\"%d\"")
  507. T(" line=\"%") T(XML_FMT_INT_MOD) T("u\"") T(" col=\"%")
  508. T(XML_FMT_INT_MOD) T("u\""),
  509. XML_GetCurrentByteIndex(parser), XML_GetCurrentByteCount(parser),
  510. XML_GetCurrentLineNumber(parser),
  511. XML_GetCurrentColumnNumber(parser));
  512. }
  513. static void
  514. metaStartDocument(void *userData) {
  515. fputts(T("<document>\n"),
  516. ((XmlwfUserData *)XML_GetUserData((XML_Parser)userData))->fp);
  517. }
  518. static void
  519. metaEndDocument(void *userData) {
  520. fputts(T("</document>\n"),
  521. ((XmlwfUserData *)XML_GetUserData((XML_Parser)userData))->fp);
  522. }
  523. static void XMLCALL
  524. metaStartElement(void *userData, const XML_Char *name, const XML_Char **atts) {
  525. XML_Parser parser = (XML_Parser)userData;
  526. XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser);
  527. FILE *fp = data->fp;
  528. const XML_Char **specifiedAttsEnd
  529. = atts + XML_GetSpecifiedAttributeCount(parser);
  530. const XML_Char **idAttPtr;
  531. int idAttIndex = XML_GetIdAttributeIndex(parser);
  532. if (idAttIndex < 0)
  533. idAttPtr = 0;
  534. else
  535. idAttPtr = atts + idAttIndex;
  536. ftprintf(fp, T("<starttag name=\"%s\""), name);
  537. metaLocation(parser);
  538. if (*atts) {
  539. fputts(T(">\n"), fp);
  540. do {
  541. ftprintf(fp, T("<attribute name=\"%s\" value=\""), atts[0]);
  542. characterData(data, atts[1], (int)tcslen(atts[1]));
  543. if (atts >= specifiedAttsEnd)
  544. fputts(T("\" defaulted=\"yes\"/>\n"), fp);
  545. else if (atts == idAttPtr)
  546. fputts(T("\" id=\"yes\"/>\n"), fp);
  547. else
  548. fputts(T("\"/>\n"), fp);
  549. } while (*(atts += 2));
  550. fputts(T("</starttag>\n"), fp);
  551. } else
  552. fputts(T("/>\n"), fp);
  553. }
  554. static void XMLCALL
  555. metaEndElement(void *userData, const XML_Char *name) {
  556. XML_Parser parser = (XML_Parser)userData;
  557. XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser);
  558. FILE *fp = data->fp;
  559. ftprintf(fp, T("<endtag name=\"%s\""), name);
  560. metaLocation(parser);
  561. fputts(T("/>\n"), fp);
  562. }
  563. static void XMLCALL
  564. metaProcessingInstruction(void *userData, const XML_Char *target,
  565. const XML_Char *data) {
  566. XML_Parser parser = (XML_Parser)userData;
  567. XmlwfUserData *usrData = (XmlwfUserData *)XML_GetUserData(parser);
  568. FILE *fp = usrData->fp;
  569. ftprintf(fp, T("<pi target=\"%s\" data=\""), target);
  570. characterData(usrData, data, (int)tcslen(data));
  571. puttc(T('"'), fp);
  572. metaLocation(parser);
  573. fputts(T("/>\n"), fp);
  574. }
  575. static void XMLCALL
  576. metaComment(void *userData, const XML_Char *data) {
  577. XML_Parser parser = (XML_Parser)userData;
  578. XmlwfUserData *usrData = (XmlwfUserData *)XML_GetUserData(parser);
  579. FILE *fp = usrData->fp;
  580. fputts(T("<comment data=\""), fp);
  581. characterData(usrData, data, (int)tcslen(data));
  582. puttc(T('"'), fp);
  583. metaLocation(parser);
  584. fputts(T("/>\n"), fp);
  585. }
  586. static void XMLCALL
  587. metaStartCdataSection(void *userData) {
  588. XML_Parser parser = (XML_Parser)userData;
  589. XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser);
  590. FILE *fp = data->fp;
  591. fputts(T("<startcdata"), fp);
  592. metaLocation(parser);
  593. fputts(T("/>\n"), fp);
  594. }
  595. static void XMLCALL
  596. metaEndCdataSection(void *userData) {
  597. XML_Parser parser = (XML_Parser)userData;
  598. XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser);
  599. FILE *fp = data->fp;
  600. fputts(T("<endcdata"), fp);
  601. metaLocation(parser);
  602. fputts(T("/>\n"), fp);
  603. }
  604. static void XMLCALL
  605. metaCharacterData(void *userData, const XML_Char *s, int len) {
  606. XML_Parser parser = (XML_Parser)userData;
  607. XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser);
  608. FILE *fp = data->fp;
  609. fputts(T("<chars str=\""), fp);
  610. characterData(data, s, len);
  611. puttc(T('"'), fp);
  612. metaLocation(parser);
  613. fputts(T("/>\n"), fp);
  614. }
  615. static void XMLCALL
  616. metaStartDoctypeDecl(void *userData, const XML_Char *doctypeName,
  617. const XML_Char *sysid, const XML_Char *pubid,
  618. int has_internal_subset) {
  619. XML_Parser parser = (XML_Parser)userData;
  620. XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser);
  621. FILE *fp = data->fp;
  622. UNUSED_P(sysid);
  623. UNUSED_P(pubid);
  624. UNUSED_P(has_internal_subset);
  625. ftprintf(fp, T("<startdoctype name=\"%s\""), doctypeName);
  626. metaLocation(parser);
  627. fputts(T("/>\n"), fp);
  628. }
  629. static void XMLCALL
  630. metaEndDoctypeDecl(void *userData) {
  631. XML_Parser parser = (XML_Parser)userData;
  632. XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser);
  633. FILE *fp = data->fp;
  634. fputts(T("<enddoctype"), fp);
  635. metaLocation(parser);
  636. fputts(T("/>\n"), fp);
  637. }
  638. static void XMLCALL
  639. metaNotationDecl(void *userData, const XML_Char *notationName,
  640. const XML_Char *base, const XML_Char *systemId,
  641. const XML_Char *publicId) {
  642. XML_Parser parser = (XML_Parser)userData;
  643. XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser);
  644. FILE *fp = data->fp;
  645. UNUSED_P(base);
  646. ftprintf(fp, T("<notation name=\"%s\""), notationName);
  647. if (publicId)
  648. ftprintf(fp, T(" public=\"%s\""), publicId);
  649. if (systemId) {
  650. fputts(T(" system=\""), fp);
  651. characterData(data, systemId, (int)tcslen(systemId));
  652. puttc(T('"'), fp);
  653. }
  654. metaLocation(parser);
  655. fputts(T("/>\n"), fp);
  656. }
  657. static void XMLCALL
  658. metaEntityDecl(void *userData, const XML_Char *entityName, int is_param,
  659. const XML_Char *value, int value_length, const XML_Char *base,
  660. const XML_Char *systemId, const XML_Char *publicId,
  661. const XML_Char *notationName) {
  662. XML_Parser parser = (XML_Parser)userData;
  663. XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser);
  664. FILE *fp = data->fp;
  665. UNUSED_P(is_param);
  666. UNUSED_P(base);
  667. if (value) {
  668. ftprintf(fp, T("<entity name=\"%s\""), entityName);
  669. metaLocation(parser);
  670. puttc(T('>'), fp);
  671. characterData(data, value, value_length);
  672. fputts(T("</entity/>\n"), fp);
  673. } else if (notationName) {
  674. ftprintf(fp, T("<entity name=\"%s\""), entityName);
  675. if (publicId)
  676. ftprintf(fp, T(" public=\"%s\""), publicId);
  677. fputts(T(" system=\""), fp);
  678. characterData(data, systemId, (int)tcslen(systemId));
  679. puttc(T('"'), fp);
  680. ftprintf(fp, T(" notation=\"%s\""), notationName);
  681. metaLocation(parser);
  682. fputts(T("/>\n"), fp);
  683. } else {
  684. ftprintf(fp, T("<entity name=\"%s\""), entityName);
  685. if (publicId)
  686. ftprintf(fp, T(" public=\"%s\""), publicId);
  687. fputts(T(" system=\""), fp);
  688. characterData(data, systemId, (int)tcslen(systemId));
  689. puttc(T('"'), fp);
  690. metaLocation(parser);
  691. fputts(T("/>\n"), fp);
  692. }
  693. }
  694. static void XMLCALL
  695. metaStartNamespaceDecl(void *userData, const XML_Char *prefix,
  696. const XML_Char *uri) {
  697. XML_Parser parser = (XML_Parser)userData;
  698. XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser);
  699. FILE *fp = data->fp;
  700. fputts(T("<startns"), fp);
  701. if (prefix)
  702. ftprintf(fp, T(" prefix=\"%s\""), prefix);
  703. if (uri) {
  704. fputts(T(" ns=\""), fp);
  705. characterData(data, uri, (int)tcslen(uri));
  706. fputts(T("\"/>\n"), fp);
  707. } else
  708. fputts(T("/>\n"), fp);
  709. }
  710. static void XMLCALL
  711. metaEndNamespaceDecl(void *userData, const XML_Char *prefix) {
  712. XML_Parser parser = (XML_Parser)userData;
  713. XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser);
  714. FILE *fp = data->fp;
  715. if (! prefix)
  716. fputts(T("<endns/>\n"), fp);
  717. else
  718. ftprintf(fp, T("<endns prefix=\"%s\"/>\n"), prefix);
  719. }
  720. static int XMLCALL
  721. unknownEncodingConvert(void *data, const char *p) {
  722. return codepageConvert(*(int *)data, p);
  723. }
  724. static int XMLCALL
  725. unknownEncoding(void *userData, const XML_Char *name, XML_Encoding *info) {
  726. int cp;
  727. static const XML_Char prefixL[] = T("windows-");
  728. static const XML_Char prefixU[] = T("WINDOWS-");
  729. int i;
  730. UNUSED_P(userData);
  731. for (i = 0; prefixU[i]; i++)
  732. if (name[i] != prefixU[i] && name[i] != prefixL[i])
  733. return 0;
  734. cp = 0;
  735. for (; name[i]; i++) {
  736. static const XML_Char digits[] = T("0123456789");
  737. const XML_Char *s = tcschr(digits, name[i]);
  738. if (! s)
  739. return 0;
  740. cp *= 10;
  741. cp += (int)(s - digits);
  742. if (cp >= 0x10000)
  743. return 0;
  744. }
  745. if (! codepageMap(cp, info->map))
  746. return 0;
  747. info->convert = unknownEncodingConvert;
  748. /* We could just cast the code page integer to a void *,
  749. and avoid the use of release. */
  750. info->release = free;
  751. info->data = malloc(sizeof(int));
  752. if (! info->data)
  753. return 0;
  754. *(int *)info->data = cp;
  755. return 1;
  756. }
  757. static int XMLCALL
  758. notStandalone(void *userData) {
  759. UNUSED_P(userData);
  760. return 0;
  761. }
  762. static void
  763. showVersion(XML_Char *prog) {
  764. XML_Char *s = prog;
  765. XML_Char ch;
  766. const XML_Feature *features = XML_GetFeatureList();
  767. while ((ch = *s) != 0) {
  768. if (ch == '/'
  769. #if defined(_WIN32)
  770. || ch == '\\'
  771. #endif
  772. )
  773. prog = s + 1;
  774. ++s;
  775. }
  776. ftprintf(stdout, T("%s using %s\n"), prog, XML_ExpatVersion());
  777. if (features != NULL && features[0].feature != XML_FEATURE_END) {
  778. int i = 1;
  779. ftprintf(stdout, T("%s"), features[0].name);
  780. if (features[0].value)
  781. ftprintf(stdout, T("=%ld"), features[0].value);
  782. while (features[i].feature != XML_FEATURE_END) {
  783. ftprintf(stdout, T(", %s"), features[i].name);
  784. if (features[i].value)
  785. ftprintf(stdout, T("=%ld"), features[i].value);
  786. ++i;
  787. }
  788. ftprintf(stdout, T("\n"));
  789. }
  790. }
  791. #if defined(__GNUC__)
  792. __attribute__((noreturn))
  793. #endif
  794. static void
  795. usage(const XML_Char *prog, int rc) {
  796. ftprintf(
  797. stderr,
  798. /* Generated with:
  799. * $ xmlwf/xmlwf_helpgen.sh
  800. * To update, change xmlwf/xmlwf_helpgen.py, then paste the output of
  801. * xmlwf/xmlwf_helpgen.sh in here.
  802. */
  803. /* clang-format off */
  804. T("usage:\n")
  805. T(" %s [OPTIONS] [FILE ...]\n")
  806. T(" %s -h|--help\n")
  807. T(" %s -v|--version\n")
  808. T("\n")
  809. T("xmlwf - Determines if an XML document is well-formed\n")
  810. T("\n")
  811. T("positional arguments:\n")
  812. T(" FILE file to process (default: STDIN)\n")
  813. T("\n")
  814. T("input control arguments:\n")
  815. T(" -s print an error if the document is not [s]tandalone\n")
  816. T(" -n enable [n]amespace processing\n")
  817. T(" -p enable processing of external DTDs and [p]arameter entities\n")
  818. T(" -x enable processing of e[x]ternal entities\n")
  819. T(" (CAREFUL! This makes xmlwf vulnerable to external entity attacks (XXE).)\n")
  820. T(" -e ENCODING override any in-document [e]ncoding declaration\n")
  821. T(" -w enable support for [W]indows code pages\n")
  822. T(" -r disable memory-mapping and use [r]ead calls instead\n")
  823. T(" -g BYTES buffer size to request per call pair to XML_[G]etBuffer and read (default: 8 KiB)\n")
  824. T(" -k when processing multiple files, [k]eep processing after first file with error\n")
  825. T("\n")
  826. T("output control arguments:\n")
  827. T(" -d DIRECTORY output [d]estination directory\n")
  828. T(" -c write a [c]opy of input XML, not canonical XML\n")
  829. T(" -m write [m]eta XML, not canonical XML\n")
  830. T(" -t write no XML output for [t]iming of plain parsing\n")
  831. T(" -N enable adding doctype and [n]otation declarations\n")
  832. T("\n")
  833. T("amplification attack protection (e.g. billion laughs):\n")
  834. T(" NOTE: If you ever need to increase these values for non-attack payload, please file a bug report.\n")
  835. T("\n")
  836. T(" -a FACTOR set maximum tolerated [a]mplification factor (default: 100.0)\n")
  837. T(" -b BYTES set number of output [b]ytes needed to activate (default: 8 MiB/64 MiB)\n")
  838. T("\n")
  839. T("reparse deferral:\n")
  840. T(" -q disable reparse deferral, and allow [q]uadratic parse runtime with large tokens\n")
  841. T("\n")
  842. T("info arguments:\n")
  843. T(" -h, --help show this [h]elp message and exit\n")
  844. T(" -v, --version show program's [v]ersion number and exit\n")
  845. T("\n")
  846. T("environment variables:\n")
  847. T(" EXPAT_ACCOUNTING_DEBUG=(0|1|2|3)\n")
  848. T(" Control verbosity of accounting debugging (default: 0)\n")
  849. T(" EXPAT_ENTITY_DEBUG=(0|1)\n")
  850. T(" Control verbosity of entity debugging (default: 0)\n")
  851. T(" EXPAT_ENTROPY_DEBUG=(0|1)\n")
  852. T(" Control verbosity of entropy debugging (default: 0)\n")
  853. T(" EXPAT_MALLOC_DEBUG=(0|1|2)\n")
  854. T(" Control verbosity of allocation tracker (default: 0)\n")
  855. T("\n")
  856. T("exit status:\n")
  857. T(" 0 the input files are well-formed and the output (if requested) was written successfully\n")
  858. T(" 1 could not allocate data structures, signals a serious problem with execution environment\n")
  859. T(" 2 one or more input files were not well-formed\n")
  860. T(" 3 could not create an output file\n")
  861. T(" 4 command-line argument error\n")
  862. T("\n")
  863. T("xmlwf of libexpat is software libre, licensed under the MIT license.\n")
  864. T("Please report bugs at https://github.com/libexpat/libexpat/issues -- thank you!\n")
  865. , /* clang-format on */
  866. prog, prog, prog);
  867. exit(rc);
  868. }
  869. #if defined(__MINGW32__) && defined(XML_UNICODE)
  870. /* Silence warning about missing prototype */
  871. int wmain(int argc, XML_Char **argv);
  872. #endif
  873. #define XMLWF_SHIFT_ARG_INTO(constCharStarTarget, argc, argv, i, j) \
  874. { \
  875. if (argv[i][j + 1] == T('\0')) { \
  876. if (++i == argc) { \
  877. usage(argv[0], XMLWF_EXIT_USAGE_ERROR); \
  878. /* usage called exit(..), never gets here */ \
  879. } \
  880. constCharStarTarget = argv[i]; \
  881. } else { \
  882. constCharStarTarget = argv[i] + j + 1; \
  883. } \
  884. i++; \
  885. j = 0; \
  886. }
  887. int
  888. tmain(int argc, XML_Char **argv) {
  889. int i, j;
  890. const XML_Char *outputDir = NULL;
  891. const XML_Char *encoding = NULL;
  892. unsigned processFlags = XML_MAP_FILE;
  893. int windowsCodePages = 0;
  894. int outputType = 0;
  895. int useNamespaces = 0;
  896. int requireStandalone = 0;
  897. int requiresNotations = 0;
  898. int continueOnError = 0;
  899. float attackMaximumAmplification = -1.0f; /* signaling "not set" */
  900. unsigned long long attackThresholdBytes = 0;
  901. XML_Bool attackThresholdGiven = XML_FALSE;
  902. XML_Bool disableDeferral = XML_FALSE;
  903. int exitCode = XMLWF_EXIT_SUCCESS;
  904. enum XML_ParamEntityParsing paramEntityParsing
  905. = XML_PARAM_ENTITY_PARSING_NEVER;
  906. int useStdin = 0;
  907. XmlwfUserData userData = {NULL, NULL, NULL};
  908. #ifdef _MSC_VER
  909. _CrtSetDbgFlag(_CRTDBG_ALLOC_MEM_DF | _CRTDBG_LEAK_CHECK_DF);
  910. #endif
  911. i = 1;
  912. j = 0;
  913. while (i < argc) {
  914. if (j == 0) {
  915. if (argv[i][0] != T('-'))
  916. break;
  917. if (argv[i][1] == T('-')) {
  918. if (argv[i][2] == T('\0')) {
  919. i++;
  920. break;
  921. } else if (tcscmp(argv[i] + 2, T("help")) == 0) {
  922. usage(argv[0], XMLWF_EXIT_SUCCESS);
  923. // usage called exit(..), never gets here
  924. } else if (tcscmp(argv[i] + 2, T("version")) == 0) {
  925. showVersion(argv[0]);
  926. return XMLWF_EXIT_SUCCESS;
  927. }
  928. }
  929. j++;
  930. }
  931. switch (argv[i][j]) {
  932. case T('r'):
  933. processFlags &= ~XML_MAP_FILE;
  934. j++;
  935. break;
  936. case T('s'):
  937. requireStandalone = 1;
  938. j++;
  939. break;
  940. case T('n'):
  941. useNamespaces = 1;
  942. j++;
  943. break;
  944. case T('p'):
  945. paramEntityParsing = XML_PARAM_ENTITY_PARSING_ALWAYS;
  946. /* fall through */
  947. case T('x'):
  948. processFlags |= XML_EXTERNAL_ENTITIES;
  949. j++;
  950. break;
  951. case T('w'):
  952. windowsCodePages = 1;
  953. j++;
  954. break;
  955. case T('m'):
  956. outputType = 'm';
  957. j++;
  958. break;
  959. case T('c'):
  960. outputType = 'c';
  961. useNamespaces = 0;
  962. j++;
  963. break;
  964. case T('t'):
  965. outputType = 't';
  966. j++;
  967. break;
  968. case T('N'):
  969. requiresNotations = 1;
  970. j++;
  971. break;
  972. case T('d'):
  973. XMLWF_SHIFT_ARG_INTO(outputDir, argc, argv, i, j);
  974. break;
  975. case T('e'):
  976. XMLWF_SHIFT_ARG_INTO(encoding, argc, argv, i, j);
  977. break;
  978. case T('h'):
  979. usage(argv[0], XMLWF_EXIT_SUCCESS);
  980. // usage called exit(..), never gets here
  981. case T('v'):
  982. showVersion(argv[0]);
  983. return XMLWF_EXIT_SUCCESS;
  984. case T('g'): {
  985. const XML_Char *valueText = NULL;
  986. XMLWF_SHIFT_ARG_INTO(valueText, argc, argv, i, j);
  987. errno = 0;
  988. XML_Char *afterValueText = (XML_Char *)valueText;
  989. const long long read_size_bytes_candidate
  990. = tcstoull(valueText, &afterValueText, 10);
  991. if ((errno != 0) || (afterValueText[0] != T('\0'))
  992. || (read_size_bytes_candidate < 1)
  993. || (read_size_bytes_candidate > (INT_MAX / 2 + 1))) {
  994. // This prevents tperror(..) from reporting misleading "[..]: Success"
  995. errno = ERANGE;
  996. tperror(T("invalid buffer size") T(
  997. " (needs an integer from 1 to INT_MAX/2+1 i.e. 1,073,741,824 on most platforms)"));
  998. exit(XMLWF_EXIT_USAGE_ERROR);
  999. }
  1000. g_read_size_bytes = (int)read_size_bytes_candidate;
  1001. break;
  1002. }
  1003. case T('k'):
  1004. continueOnError = 1;
  1005. j++;
  1006. break;
  1007. case T('a'): {
  1008. const XML_Char *valueText = NULL;
  1009. XMLWF_SHIFT_ARG_INTO(valueText, argc, argv, i, j);
  1010. errno = 0;
  1011. XML_Char *afterValueText = NULL;
  1012. attackMaximumAmplification = tcstof(valueText, &afterValueText);
  1013. if ((errno != 0) || (afterValueText[0] != T('\0'))
  1014. || isnan(attackMaximumAmplification)
  1015. || (attackMaximumAmplification < 1.0f)) {
  1016. // This prevents tperror(..) from reporting misleading "[..]: Success"
  1017. errno = ERANGE;
  1018. tperror(T("invalid amplification limit") T(
  1019. " (needs a floating point number greater or equal than 1.0)"));
  1020. exit(XMLWF_EXIT_USAGE_ERROR);
  1021. }
  1022. #if XML_GE == 0
  1023. ftprintf(stderr,
  1024. T("Warning: Given amplification limit ignored")
  1025. T(", xmlwf has been compiled without DTD/GE support.\n"));
  1026. #endif
  1027. break;
  1028. }
  1029. case T('b'): {
  1030. const XML_Char *valueText = NULL;
  1031. XMLWF_SHIFT_ARG_INTO(valueText, argc, argv, i, j);
  1032. errno = 0;
  1033. XML_Char *afterValueText = (XML_Char *)valueText;
  1034. attackThresholdBytes = tcstoull(valueText, &afterValueText, 10);
  1035. if ((errno != 0) || (afterValueText[0] != T('\0'))) {
  1036. // This prevents tperror(..) from reporting misleading "[..]: Success"
  1037. errno = ERANGE;
  1038. tperror(T("invalid ignore threshold")
  1039. T(" (needs an integer from 0 to 2^64-1)"));
  1040. exit(XMLWF_EXIT_USAGE_ERROR);
  1041. }
  1042. attackThresholdGiven = XML_TRUE;
  1043. #if XML_GE == 0
  1044. ftprintf(stderr,
  1045. T("Warning: Given attack threshold ignored")
  1046. T(", xmlwf has been compiled without DTD/GE support.\n"));
  1047. #endif
  1048. break;
  1049. }
  1050. case T('q'): {
  1051. disableDeferral = XML_TRUE;
  1052. j++;
  1053. break;
  1054. }
  1055. case T('\0'):
  1056. if (j > 1) {
  1057. i++;
  1058. j = 0;
  1059. break;
  1060. }
  1061. /* fall through */
  1062. default:
  1063. usage(argv[0], XMLWF_EXIT_USAGE_ERROR);
  1064. // usage called exit(..), never gets here
  1065. }
  1066. }
  1067. if (i == argc) {
  1068. useStdin = 1;
  1069. processFlags &= ~XML_MAP_FILE;
  1070. i--;
  1071. }
  1072. for (; i < argc; i++) {
  1073. XML_Char *outName = 0;
  1074. int result;
  1075. XML_Parser parser;
  1076. if (useNamespaces)
  1077. parser = XML_ParserCreateNS(encoding, NSSEP);
  1078. else
  1079. parser = XML_ParserCreate(encoding);
  1080. if (! parser) {
  1081. tperror(T("Could not instantiate parser"));
  1082. exit(XMLWF_EXIT_INTERNAL_ERROR);
  1083. }
  1084. if (attackMaximumAmplification != -1.0f) {
  1085. #if XML_GE == 1
  1086. XML_SetBillionLaughsAttackProtectionMaximumAmplification(
  1087. parser, attackMaximumAmplification);
  1088. XML_SetAllocTrackerMaximumAmplification(parser,
  1089. attackMaximumAmplification);
  1090. #endif
  1091. }
  1092. if (attackThresholdGiven) {
  1093. #if XML_GE == 1
  1094. XML_SetBillionLaughsAttackProtectionActivationThreshold(
  1095. parser, attackThresholdBytes);
  1096. XML_SetAllocTrackerActivationThreshold(parser, attackThresholdBytes);
  1097. #else
  1098. (void)attackThresholdBytes; // silence -Wunused-but-set-variable
  1099. #endif
  1100. }
  1101. if (disableDeferral) {
  1102. const XML_Bool success = XML_SetReparseDeferralEnabled(parser, XML_FALSE);
  1103. if (! success) {
  1104. // This prevents tperror(..) from reporting misleading "[..]: Success"
  1105. errno = EINVAL;
  1106. tperror(T("Failed to disable reparse deferral"));
  1107. exit(XMLWF_EXIT_INTERNAL_ERROR);
  1108. }
  1109. }
  1110. if (requireStandalone)
  1111. XML_SetNotStandaloneHandler(parser, notStandalone);
  1112. XML_SetParamEntityParsing(parser, paramEntityParsing);
  1113. if (outputType == 't') {
  1114. /* This is for doing timings; this gives a more realistic estimate of
  1115. the parsing time. */
  1116. outputDir = 0;
  1117. XML_SetElementHandler(parser, nopStartElement, nopEndElement);
  1118. XML_SetCharacterDataHandler(parser, nopCharacterData);
  1119. XML_SetProcessingInstructionHandler(parser, nopProcessingInstruction);
  1120. } else if (outputDir) {
  1121. const XML_Char *delim = T("/");
  1122. const XML_Char *file = useStdin ? T("STDIN") : argv[i];
  1123. if (! useStdin) {
  1124. /* Jump after last (back)slash */
  1125. const XML_Char *lastDelim = tcsrchr(file, delim[0]);
  1126. if (lastDelim)
  1127. file = lastDelim + 1;
  1128. #if defined(_WIN32)
  1129. else {
  1130. const XML_Char *winDelim = T("\\");
  1131. lastDelim = tcsrchr(file, winDelim[0]);
  1132. if (lastDelim) {
  1133. file = lastDelim + 1;
  1134. delim = winDelim;
  1135. }
  1136. }
  1137. #endif
  1138. }
  1139. outName = (XML_Char *)malloc((tcslen(outputDir) + tcslen(file) + 2)
  1140. * sizeof(XML_Char));
  1141. if (! outName) {
  1142. tperror(T("Could not allocate memory"));
  1143. exit(XMLWF_EXIT_INTERNAL_ERROR);
  1144. }
  1145. tcscpy(outName, outputDir);
  1146. tcscat(outName, delim);
  1147. tcscat(outName, file);
  1148. userData.fp = tfopen(outName, T("wb"));
  1149. if (! userData.fp) {
  1150. tperror(outName);
  1151. exitCode = XMLWF_EXIT_OUTPUT_ERROR;
  1152. free(outName);
  1153. XML_ParserFree(parser);
  1154. if (continueOnError) {
  1155. continue;
  1156. } else {
  1157. break;
  1158. }
  1159. }
  1160. setvbuf(userData.fp, NULL, _IOFBF, 16384);
  1161. #ifdef XML_UNICODE
  1162. puttc(0xFEFF, userData.fp);
  1163. #endif
  1164. XML_SetUserData(parser, &userData);
  1165. switch (outputType) {
  1166. case 'm':
  1167. XML_UseParserAsHandlerArg(parser);
  1168. XML_SetElementHandler(parser, metaStartElement, metaEndElement);
  1169. XML_SetProcessingInstructionHandler(parser, metaProcessingInstruction);
  1170. XML_SetCommentHandler(parser, metaComment);
  1171. XML_SetCdataSectionHandler(parser, metaStartCdataSection,
  1172. metaEndCdataSection);
  1173. XML_SetCharacterDataHandler(parser, metaCharacterData);
  1174. XML_SetDoctypeDeclHandler(parser, metaStartDoctypeDecl,
  1175. metaEndDoctypeDecl);
  1176. XML_SetEntityDeclHandler(parser, metaEntityDecl);
  1177. XML_SetNotationDeclHandler(parser, metaNotationDecl);
  1178. XML_SetNamespaceDeclHandler(parser, metaStartNamespaceDecl,
  1179. metaEndNamespaceDecl);
  1180. metaStartDocument(parser);
  1181. break;
  1182. case 'c':
  1183. XML_UseParserAsHandlerArg(parser);
  1184. XML_SetDefaultHandler(parser, markup);
  1185. XML_SetElementHandler(parser, defaultStartElement, defaultEndElement);
  1186. XML_SetCharacterDataHandler(parser, defaultCharacterData);
  1187. XML_SetProcessingInstructionHandler(parser,
  1188. defaultProcessingInstruction);
  1189. break;
  1190. default:
  1191. if (useNamespaces)
  1192. XML_SetElementHandler(parser, startElementNS, endElementNS);
  1193. else
  1194. XML_SetElementHandler(parser, startElement, endElement);
  1195. XML_SetCharacterDataHandler(parser, characterData);
  1196. #ifndef W3C14N
  1197. XML_SetProcessingInstructionHandler(parser, processingInstruction);
  1198. if (requiresNotations) {
  1199. XML_SetDoctypeDeclHandler(parser, startDoctypeDecl, endDoctypeDecl);
  1200. XML_SetNotationDeclHandler(parser, notationDecl);
  1201. }
  1202. #endif /* not W3C14N */
  1203. break;
  1204. }
  1205. }
  1206. if (windowsCodePages)
  1207. XML_SetUnknownEncodingHandler(parser, unknownEncoding, 0);
  1208. result = XML_ProcessFile(parser, useStdin ? NULL : argv[i], processFlags);
  1209. if (outputDir) {
  1210. if (outputType == 'm')
  1211. metaEndDocument(parser);
  1212. fclose(userData.fp);
  1213. if (! result) {
  1214. tremove(outName);
  1215. }
  1216. free(outName);
  1217. }
  1218. XML_ParserFree(parser);
  1219. if (! result) {
  1220. exitCode = XMLWF_EXIT_NOT_WELLFORMED;
  1221. cleanupUserData(&userData);
  1222. if (! continueOnError) {
  1223. break;
  1224. }
  1225. }
  1226. }
  1227. return exitCode;
  1228. }