xmlwf.c 29 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138
  1. /*
  2. __ __ _
  3. ___\ \/ /_ __ __ _| |_
  4. / _ \\ /| '_ \ / _` | __|
  5. | __// \| |_) | (_| | |_
  6. \___/_/\_\ .__/ \__,_|\__|
  7. |_| XML parser
  8. Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
  9. Copyright (c) 2000-2017 Expat development team
  10. Licensed under the MIT license:
  11. Permission is hereby granted, free of charge, to any person obtaining
  12. a copy of this software and associated documentation files (the
  13. "Software"), to deal in the Software without restriction, including
  14. without limitation the rights to use, copy, modify, merge, publish,
  15. distribute, sublicense, and/or sell copies of the Software, and to permit
  16. persons to whom the Software is furnished to do so, subject to the
  17. following conditions:
  18. The above copyright notice and this permission notice shall be included
  19. in all copies or substantial portions of the Software.
  20. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  21. EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  22. MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
  23. NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
  24. DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
  25. OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  26. USE OR OTHER DEALINGS IN THE SOFTWARE.
  27. */
  28. #include <assert.h>
  29. #include <stdio.h>
  30. #include <stdlib.h>
  31. #include <stddef.h>
  32. #include <string.h>
  33. #include "expat.h"
  34. #include "codepage.h"
  35. #include "internal.h" /* for UNUSED_P only */
  36. #include "xmlfile.h"
  37. #include "xmltchar.h"
  38. #ifdef _MSC_VER
  39. # include <crtdbg.h>
  40. #endif
  41. #ifdef XML_UNICODE
  42. # include <wchar.h>
  43. #endif
  44. /* Structures for handler user data */
  45. typedef struct NotationList {
  46. struct NotationList *next;
  47. const XML_Char *notationName;
  48. const XML_Char *systemId;
  49. const XML_Char *publicId;
  50. } NotationList;
  51. typedef struct xmlwfUserData {
  52. FILE *fp;
  53. NotationList *notationListHead;
  54. const XML_Char *currentDoctypeName;
  55. } XmlwfUserData;
  56. /* This ensures proper sorting. */
  57. #define NSSEP T('\001')
  58. static void XMLCALL
  59. characterData(void *userData, const XML_Char *s, int len)
  60. {
  61. FILE *fp = ((XmlwfUserData *)userData)->fp;
  62. for (; len > 0; --len, ++s) {
  63. switch (*s) {
  64. case T('&'):
  65. fputts(T("&amp;"), fp);
  66. break;
  67. case T('<'):
  68. fputts(T("&lt;"), fp);
  69. break;
  70. case T('>'):
  71. fputts(T("&gt;"), fp);
  72. break;
  73. #ifdef W3C14N
  74. case 13:
  75. fputts(T("&#xD;"), fp);
  76. break;
  77. #else
  78. case T('"'):
  79. fputts(T("&quot;"), fp);
  80. break;
  81. case 9:
  82. case 10:
  83. case 13:
  84. ftprintf(fp, T("&#%d;"), *s);
  85. break;
  86. #endif
  87. default:
  88. puttc(*s, fp);
  89. break;
  90. }
  91. }
  92. }
  93. static void
  94. attributeValue(FILE *fp, const XML_Char *s)
  95. {
  96. puttc(T('='), fp);
  97. puttc(T('"'), fp);
  98. assert(s);
  99. for (;;) {
  100. switch (*s) {
  101. case 0:
  102. case NSSEP:
  103. puttc(T('"'), fp);
  104. return;
  105. case T('&'):
  106. fputts(T("&amp;"), fp);
  107. break;
  108. case T('<'):
  109. fputts(T("&lt;"), fp);
  110. break;
  111. case T('"'):
  112. fputts(T("&quot;"), fp);
  113. break;
  114. #ifdef W3C14N
  115. case 9:
  116. fputts(T("&#x9;"), fp);
  117. break;
  118. case 10:
  119. fputts(T("&#xA;"), fp);
  120. break;
  121. case 13:
  122. fputts(T("&#xD;"), fp);
  123. break;
  124. #else
  125. case T('>'):
  126. fputts(T("&gt;"), fp);
  127. break;
  128. case 9:
  129. case 10:
  130. case 13:
  131. ftprintf(fp, T("&#%d;"), *s);
  132. break;
  133. #endif
  134. default:
  135. puttc(*s, fp);
  136. break;
  137. }
  138. s++;
  139. }
  140. }
  141. /* Lexicographically comparing UTF-8 encoded attribute values,
  142. is equivalent to lexicographically comparing based on the character number. */
  143. static int
  144. attcmp(const void *att1, const void *att2)
  145. {
  146. return tcscmp(*(const XML_Char **)att1, *(const XML_Char **)att2);
  147. }
  148. static void XMLCALL
  149. startElement(void *userData, const XML_Char *name, const XML_Char **atts)
  150. {
  151. int nAtts;
  152. const XML_Char **p;
  153. FILE *fp = ((XmlwfUserData *)userData)->fp;
  154. puttc(T('<'), fp);
  155. fputts(name, fp);
  156. p = atts;
  157. while (*p)
  158. ++p;
  159. nAtts = (int)((p - atts) >> 1);
  160. if (nAtts > 1)
  161. qsort((void *)atts, nAtts, sizeof(XML_Char *) * 2, attcmp);
  162. while (*atts) {
  163. puttc(T(' '), fp);
  164. fputts(*atts++, fp);
  165. attributeValue(fp, *atts);
  166. atts++;
  167. }
  168. puttc(T('>'), fp);
  169. }
  170. static void XMLCALL
  171. endElement(void *userData, const XML_Char *name)
  172. {
  173. FILE *fp = ((XmlwfUserData *)userData)->fp;
  174. puttc(T('<'), fp);
  175. puttc(T('/'), fp);
  176. fputts(name, fp);
  177. puttc(T('>'), fp);
  178. }
  179. static int
  180. nsattcmp(const void *p1, const void *p2)
  181. {
  182. const XML_Char *att1 = *(const XML_Char **)p1;
  183. const XML_Char *att2 = *(const XML_Char **)p2;
  184. int sep1 = (tcsrchr(att1, NSSEP) != 0);
  185. int sep2 = (tcsrchr(att1, NSSEP) != 0);
  186. if (sep1 != sep2)
  187. return sep1 - sep2;
  188. return tcscmp(att1, att2);
  189. }
  190. static void XMLCALL
  191. startElementNS(void *userData, const XML_Char *name, const XML_Char **atts)
  192. {
  193. int nAtts;
  194. int nsi;
  195. const XML_Char **p;
  196. FILE *fp = ((XmlwfUserData *)userData)->fp;
  197. const XML_Char *sep;
  198. puttc(T('<'), fp);
  199. sep = tcsrchr(name, NSSEP);
  200. if (sep) {
  201. fputts(T("n1:"), fp);
  202. fputts(sep + 1, fp);
  203. fputts(T(" xmlns:n1"), fp);
  204. attributeValue(fp, name);
  205. nsi = 2;
  206. }
  207. else {
  208. fputts(name, fp);
  209. nsi = 1;
  210. }
  211. p = atts;
  212. while (*p)
  213. ++p;
  214. nAtts = (int)((p - atts) >> 1);
  215. if (nAtts > 1)
  216. qsort((void *)atts, nAtts, sizeof(XML_Char *) * 2, nsattcmp);
  217. while (*atts) {
  218. name = *atts++;
  219. sep = tcsrchr(name, NSSEP);
  220. puttc(T(' '), fp);
  221. if (sep) {
  222. ftprintf(fp, T("n%d:"), nsi);
  223. fputts(sep + 1, fp);
  224. }
  225. else
  226. fputts(name, fp);
  227. attributeValue(fp, *atts);
  228. if (sep) {
  229. ftprintf(fp, T(" xmlns:n%d"), nsi++);
  230. attributeValue(fp, name);
  231. }
  232. atts++;
  233. }
  234. puttc(T('>'), fp);
  235. }
  236. static void XMLCALL
  237. endElementNS(void *userData, const XML_Char *name)
  238. {
  239. FILE *fp = ((XmlwfUserData *)userData)->fp;
  240. const XML_Char *sep;
  241. puttc(T('<'), fp);
  242. puttc(T('/'), fp);
  243. sep = tcsrchr(name, NSSEP);
  244. if (sep) {
  245. fputts(T("n1:"), fp);
  246. fputts(sep + 1, fp);
  247. }
  248. else
  249. fputts(name, fp);
  250. puttc(T('>'), fp);
  251. }
  252. #ifndef W3C14N
  253. static void XMLCALL
  254. processingInstruction(void *userData, const XML_Char *target,
  255. const XML_Char *data)
  256. {
  257. FILE *fp = ((XmlwfUserData *)userData)->fp;
  258. puttc(T('<'), fp);
  259. puttc(T('?'), fp);
  260. fputts(target, fp);
  261. puttc(T(' '), fp);
  262. fputts(data, fp);
  263. puttc(T('?'), fp);
  264. puttc(T('>'), fp);
  265. }
  266. static XML_Char *xcsdup(const XML_Char *s)
  267. {
  268. XML_Char *result;
  269. int count = 0;
  270. int numBytes;
  271. /* Get the length of the string, including terminator */
  272. while (s[count++] != 0) {
  273. /* Do nothing */
  274. }
  275. numBytes = count * sizeof(XML_Char);
  276. result = malloc(numBytes);
  277. if (result == NULL)
  278. return NULL;
  279. memcpy(result, s, numBytes);
  280. return result;
  281. }
  282. static void XMLCALL
  283. startDoctypeDecl(void *userData,
  284. const XML_Char *doctypeName,
  285. const XML_Char *UNUSED_P(sysid),
  286. const XML_Char *UNUSED_P(publid),
  287. int UNUSED_P(has_internal_subset))
  288. {
  289. XmlwfUserData *data = (XmlwfUserData *)userData;
  290. data->currentDoctypeName = xcsdup(doctypeName);
  291. }
  292. static void
  293. freeNotations(XmlwfUserData *data)
  294. {
  295. NotationList *notationListHead = data->notationListHead;
  296. while (notationListHead != NULL) {
  297. NotationList *next = notationListHead->next;
  298. free((void *)notationListHead->notationName);
  299. free((void *)notationListHead->systemId);
  300. free((void *)notationListHead->publicId);
  301. free(notationListHead);
  302. notationListHead = next;
  303. }
  304. data->notationListHead = NULL;
  305. }
  306. static int xcscmp(const XML_Char *xs, const XML_Char *xt)
  307. {
  308. while (*xs != 0 && *xt != 0) {
  309. if (*xs < *xt)
  310. return -1;
  311. if (*xs > *xt)
  312. return 1;
  313. xs++;
  314. xt++;
  315. }
  316. if (*xs < *xt)
  317. return -1;
  318. if (*xs > *xt)
  319. return 1;
  320. return 0;
  321. }
  322. static int
  323. notationCmp(const void *a, const void *b)
  324. {
  325. const NotationList * const n1 = *(NotationList **)a;
  326. const NotationList * const n2 = *(NotationList **)b;
  327. return xcscmp(n1->notationName, n2->notationName);
  328. }
  329. static void XMLCALL
  330. endDoctypeDecl(void *userData)
  331. {
  332. XmlwfUserData *data = (XmlwfUserData *)userData;
  333. NotationList **notations;
  334. int notationCount = 0;
  335. NotationList *p;
  336. int i;
  337. /* How many notations do we have? */
  338. for (p = data->notationListHead; p != NULL; p = p->next)
  339. notationCount++;
  340. if (notationCount == 0) {
  341. /* Nothing to report */
  342. free((void *)data->currentDoctypeName);
  343. data->currentDoctypeName = NULL;
  344. return;
  345. }
  346. notations = malloc(notationCount * sizeof(NotationList *));
  347. if (notations == NULL) {
  348. fprintf(stderr, "Unable to sort notations");
  349. freeNotations(data);
  350. return;
  351. }
  352. for (p = data->notationListHead, i = 0;
  353. i < notationCount;
  354. p = p->next, i++) {
  355. notations[i] = p;
  356. }
  357. qsort(notations, notationCount, sizeof(NotationList *), notationCmp);
  358. /* Output the DOCTYPE header */
  359. fputts(T("<!DOCTYPE "), data->fp);
  360. fputts(data->currentDoctypeName, data->fp);
  361. fputts(T(" [\n"), data->fp);
  362. /* Now the NOTATIONs */
  363. for (i = 0; i < notationCount; i++) {
  364. fputts(T("<!NOTATION "), data->fp);
  365. fputts(notations[i]->notationName, data->fp);
  366. if (notations[i]->publicId != NULL) {
  367. fputts(T(" PUBLIC '"), data->fp);
  368. fputts(notations[i]->publicId, data->fp);
  369. puttc(T('\''), data->fp);
  370. if (notations[i]->systemId != NULL) {
  371. puttc(T(' '), data->fp);
  372. puttc(T('\''), data->fp);
  373. fputts(notations[i]->systemId, data->fp);
  374. puttc(T('\''), data->fp);
  375. }
  376. }
  377. else if (notations[i]->systemId != NULL) {
  378. fputts(T(" SYSTEM '"), data->fp);
  379. fputts(notations[i]->systemId, data->fp);
  380. puttc(T('\''), data->fp);
  381. }
  382. puttc(T('>'), data->fp);
  383. puttc(T('\n'), data->fp);
  384. }
  385. /* Finally end the DOCTYPE */
  386. fputts(T("]>\n"), data->fp);
  387. free(notations);
  388. freeNotations(data);
  389. free((void *)data->currentDoctypeName);
  390. data->currentDoctypeName = NULL;
  391. }
  392. static void XMLCALL
  393. notationDecl(void *userData,
  394. const XML_Char *notationName,
  395. const XML_Char *UNUSED_P(base),
  396. const XML_Char *systemId,
  397. const XML_Char *publicId)
  398. {
  399. XmlwfUserData *data = (XmlwfUserData *)userData;
  400. NotationList *entry = malloc(sizeof(NotationList));
  401. const char *errorMessage = "Unable to store NOTATION for output\n";
  402. if (entry == NULL) {
  403. fputs(errorMessage, stderr);
  404. return; /* Nothing we can really do about this */
  405. }
  406. entry->notationName = xcsdup(notationName);
  407. if (entry->notationName == NULL) {
  408. fputs(errorMessage, stderr);
  409. free(entry);
  410. return;
  411. }
  412. if (systemId != NULL) {
  413. entry->systemId = xcsdup(systemId);
  414. if (entry->systemId == NULL) {
  415. fputs(errorMessage, stderr);
  416. free((void *)entry->notationName);
  417. free(entry);
  418. return;
  419. }
  420. }
  421. else {
  422. entry->systemId = NULL;
  423. }
  424. if (publicId != NULL) {
  425. entry->publicId = xcsdup(publicId);
  426. if (entry->publicId == NULL) {
  427. fputs(errorMessage, stderr);
  428. free((void *)entry->systemId); /* Safe if it's NULL */
  429. free((void *)entry->notationName);
  430. free(entry);
  431. return;
  432. }
  433. }
  434. else {
  435. entry->publicId = NULL;
  436. }
  437. entry->next = data->notationListHead;
  438. data->notationListHead = entry;
  439. }
  440. #endif /* not W3C14N */
  441. static void XMLCALL
  442. defaultCharacterData(void *userData, const XML_Char *UNUSED_P(s), int UNUSED_P(len))
  443. {
  444. XML_DefaultCurrent((XML_Parser) userData);
  445. }
  446. static void XMLCALL
  447. defaultStartElement(void *userData, const XML_Char *UNUSED_P(name),
  448. const XML_Char **UNUSED_P(atts))
  449. {
  450. XML_DefaultCurrent((XML_Parser) userData);
  451. }
  452. static void XMLCALL
  453. defaultEndElement(void *userData, const XML_Char *UNUSED_P(name))
  454. {
  455. XML_DefaultCurrent((XML_Parser) userData);
  456. }
  457. static void XMLCALL
  458. defaultProcessingInstruction(void *userData, const XML_Char *UNUSED_P(target),
  459. const XML_Char *UNUSED_P(data))
  460. {
  461. XML_DefaultCurrent((XML_Parser) userData);
  462. }
  463. static void XMLCALL
  464. nopCharacterData(void *UNUSED_P(userData), const XML_Char *UNUSED_P(s), int UNUSED_P(len))
  465. {
  466. }
  467. static void XMLCALL
  468. nopStartElement(void *UNUSED_P(userData), const XML_Char *UNUSED_P(name), const XML_Char **UNUSED_P(atts))
  469. {
  470. }
  471. static void XMLCALL
  472. nopEndElement(void *UNUSED_P(userData), const XML_Char *UNUSED_P(name))
  473. {
  474. }
  475. static void XMLCALL
  476. nopProcessingInstruction(void *UNUSED_P(userData), const XML_Char *UNUSED_P(target),
  477. const XML_Char *UNUSED_P(data))
  478. {
  479. }
  480. static void XMLCALL
  481. markup(void *userData, const XML_Char *s, int len)
  482. {
  483. FILE *fp = ((XmlwfUserData *)XML_GetUserData((XML_Parser) userData))->fp;
  484. for (; len > 0; --len, ++s)
  485. puttc(*s, fp);
  486. }
  487. static void
  488. metaLocation(XML_Parser parser)
  489. {
  490. const XML_Char *uri = XML_GetBase(parser);
  491. FILE *fp = ((XmlwfUserData *)XML_GetUserData(parser))->fp;
  492. if (uri)
  493. ftprintf(fp, T(" uri=\"%s\""), uri);
  494. ftprintf(fp,
  495. T(" byte=\"%") T(XML_FMT_INT_MOD) T("d\"")
  496. T(" nbytes=\"%d\"")
  497. T(" line=\"%") T(XML_FMT_INT_MOD) T("u\"")
  498. T(" col=\"%") T(XML_FMT_INT_MOD) T("u\""),
  499. XML_GetCurrentByteIndex(parser),
  500. XML_GetCurrentByteCount(parser),
  501. XML_GetCurrentLineNumber(parser),
  502. XML_GetCurrentColumnNumber(parser));
  503. }
  504. static void
  505. metaStartDocument(void *userData)
  506. {
  507. fputts(T("<document>\n"),
  508. ((XmlwfUserData *)XML_GetUserData((XML_Parser) userData))->fp);
  509. }
  510. static void
  511. metaEndDocument(void *userData)
  512. {
  513. fputts(T("</document>\n"),
  514. ((XmlwfUserData *)XML_GetUserData((XML_Parser) userData))->fp);
  515. }
  516. static void XMLCALL
  517. metaStartElement(void *userData, const XML_Char *name,
  518. const XML_Char **atts)
  519. {
  520. XML_Parser parser = (XML_Parser) userData;
  521. XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser);
  522. FILE *fp = data->fp;
  523. const XML_Char **specifiedAttsEnd
  524. = atts + XML_GetSpecifiedAttributeCount(parser);
  525. const XML_Char **idAttPtr;
  526. int idAttIndex = XML_GetIdAttributeIndex(parser);
  527. if (idAttIndex < 0)
  528. idAttPtr = 0;
  529. else
  530. idAttPtr = atts + idAttIndex;
  531. ftprintf(fp, T("<starttag name=\"%s\""), name);
  532. metaLocation(parser);
  533. if (*atts) {
  534. fputts(T(">\n"), fp);
  535. do {
  536. ftprintf(fp, T("<attribute name=\"%s\" value=\""), atts[0]);
  537. characterData(data, atts[1], (int)tcslen(atts[1]));
  538. if (atts >= specifiedAttsEnd)
  539. fputts(T("\" defaulted=\"yes\"/>\n"), fp);
  540. else if (atts == idAttPtr)
  541. fputts(T("\" id=\"yes\"/>\n"), fp);
  542. else
  543. fputts(T("\"/>\n"), fp);
  544. } while (*(atts += 2));
  545. fputts(T("</starttag>\n"), fp);
  546. }
  547. else
  548. fputts(T("/>\n"), fp);
  549. }
  550. static void XMLCALL
  551. metaEndElement(void *userData, const XML_Char *name)
  552. {
  553. XML_Parser parser = (XML_Parser) userData;
  554. XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser);
  555. FILE *fp = data->fp;
  556. ftprintf(fp, T("<endtag name=\"%s\""), name);
  557. metaLocation(parser);
  558. fputts(T("/>\n"), fp);
  559. }
  560. static void XMLCALL
  561. metaProcessingInstruction(void *userData, const XML_Char *target,
  562. const XML_Char *data)
  563. {
  564. XML_Parser parser = (XML_Parser) userData;
  565. XmlwfUserData *usrData = (XmlwfUserData *)XML_GetUserData(parser);
  566. FILE *fp = usrData->fp;
  567. ftprintf(fp, T("<pi target=\"%s\" data=\""), target);
  568. characterData(usrData, data, (int)tcslen(data));
  569. puttc(T('"'), fp);
  570. metaLocation(parser);
  571. fputts(T("/>\n"), fp);
  572. }
  573. static void XMLCALL
  574. metaComment(void *userData, const XML_Char *data)
  575. {
  576. XML_Parser parser = (XML_Parser) userData;
  577. XmlwfUserData *usrData = (XmlwfUserData *)XML_GetUserData(parser);
  578. FILE *fp = usrData->fp;
  579. fputts(T("<comment data=\""), fp);
  580. characterData(usrData, data, (int)tcslen(data));
  581. puttc(T('"'), fp);
  582. metaLocation(parser);
  583. fputts(T("/>\n"), fp);
  584. }
  585. static void XMLCALL
  586. metaStartCdataSection(void *userData)
  587. {
  588. XML_Parser parser = (XML_Parser) userData;
  589. XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser);
  590. FILE *fp = data->fp;
  591. fputts(T("<startcdata"), fp);
  592. metaLocation(parser);
  593. fputts(T("/>\n"), fp);
  594. }
  595. static void XMLCALL
  596. metaEndCdataSection(void *userData)
  597. {
  598. XML_Parser parser = (XML_Parser) userData;
  599. XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser);
  600. FILE *fp = data->fp;
  601. fputts(T("<endcdata"), fp);
  602. metaLocation(parser);
  603. fputts(T("/>\n"), fp);
  604. }
  605. static void XMLCALL
  606. metaCharacterData(void *userData, const XML_Char *s, int len)
  607. {
  608. XML_Parser parser = (XML_Parser) userData;
  609. XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser);
  610. FILE *fp = data->fp;
  611. fputts(T("<chars str=\""), fp);
  612. characterData(data, s, len);
  613. puttc(T('"'), fp);
  614. metaLocation(parser);
  615. fputts(T("/>\n"), fp);
  616. }
  617. static void XMLCALL
  618. metaStartDoctypeDecl(void *userData,
  619. const XML_Char *doctypeName,
  620. const XML_Char *UNUSED_P(sysid),
  621. const XML_Char *UNUSED_P(pubid),
  622. int UNUSED_P(has_internal_subset))
  623. {
  624. XML_Parser parser = (XML_Parser) userData;
  625. XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser);
  626. FILE *fp = data->fp;
  627. ftprintf(fp, T("<startdoctype name=\"%s\""), doctypeName);
  628. metaLocation(parser);
  629. fputts(T("/>\n"), fp);
  630. }
  631. static void XMLCALL
  632. metaEndDoctypeDecl(void *userData)
  633. {
  634. XML_Parser parser = (XML_Parser) userData;
  635. XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser);
  636. FILE *fp = data->fp;
  637. fputts(T("<enddoctype"), fp);
  638. metaLocation(parser);
  639. fputts(T("/>\n"), fp);
  640. }
  641. static void XMLCALL
  642. metaNotationDecl(void *userData,
  643. const XML_Char *notationName,
  644. const XML_Char *UNUSED_P(base),
  645. const XML_Char *systemId,
  646. const XML_Char *publicId)
  647. {
  648. XML_Parser parser = (XML_Parser) userData;
  649. XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser);
  650. FILE *fp = data->fp;
  651. ftprintf(fp, T("<notation name=\"%s\""), notationName);
  652. if (publicId)
  653. ftprintf(fp, T(" public=\"%s\""), publicId);
  654. if (systemId) {
  655. fputts(T(" system=\""), fp);
  656. characterData(data, systemId, (int)tcslen(systemId));
  657. puttc(T('"'), fp);
  658. }
  659. metaLocation(parser);
  660. fputts(T("/>\n"), fp);
  661. }
  662. static void XMLCALL
  663. metaEntityDecl(void *userData,
  664. const XML_Char *entityName,
  665. int UNUSED_P(is_param),
  666. const XML_Char *value,
  667. int value_length,
  668. const XML_Char *UNUSED_P(base),
  669. const XML_Char *systemId,
  670. const XML_Char *publicId,
  671. const XML_Char *notationName)
  672. {
  673. XML_Parser parser = (XML_Parser) userData;
  674. XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser);
  675. FILE *fp = data->fp;
  676. if (value) {
  677. ftprintf(fp, T("<entity name=\"%s\""), entityName);
  678. metaLocation(parser);
  679. puttc(T('>'), fp);
  680. characterData(data, value, value_length);
  681. fputts(T("</entity/>\n"), fp);
  682. }
  683. else if (notationName) {
  684. ftprintf(fp, T("<entity name=\"%s\""), entityName);
  685. if (publicId)
  686. ftprintf(fp, T(" public=\"%s\""), publicId);
  687. fputts(T(" system=\""), fp);
  688. characterData(data, systemId, (int)tcslen(systemId));
  689. puttc(T('"'), fp);
  690. ftprintf(fp, T(" notation=\"%s\""), notationName);
  691. metaLocation(parser);
  692. fputts(T("/>\n"), fp);
  693. }
  694. else {
  695. ftprintf(fp, T("<entity name=\"%s\""), entityName);
  696. if (publicId)
  697. ftprintf(fp, T(" public=\"%s\""), publicId);
  698. fputts(T(" system=\""), fp);
  699. characterData(data, systemId, (int)tcslen(systemId));
  700. puttc(T('"'), fp);
  701. metaLocation(parser);
  702. fputts(T("/>\n"), fp);
  703. }
  704. }
  705. static void XMLCALL
  706. metaStartNamespaceDecl(void *userData,
  707. const XML_Char *prefix,
  708. const XML_Char *uri)
  709. {
  710. XML_Parser parser = (XML_Parser) userData;
  711. XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser);
  712. FILE *fp = data->fp;
  713. fputts(T("<startns"), fp);
  714. if (prefix)
  715. ftprintf(fp, T(" prefix=\"%s\""), prefix);
  716. if (uri) {
  717. fputts(T(" ns=\""), fp);
  718. characterData(data, uri, (int)tcslen(uri));
  719. fputts(T("\"/>\n"), fp);
  720. }
  721. else
  722. fputts(T("/>\n"), fp);
  723. }
  724. static void XMLCALL
  725. metaEndNamespaceDecl(void *userData, const XML_Char *prefix)
  726. {
  727. XML_Parser parser = (XML_Parser) userData;
  728. XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser);
  729. FILE *fp = data->fp;
  730. if (!prefix)
  731. fputts(T("<endns/>\n"), fp);
  732. else
  733. ftprintf(fp, T("<endns prefix=\"%s\"/>\n"), prefix);
  734. }
  735. static int XMLCALL
  736. unknownEncodingConvert(void *data, const char *p)
  737. {
  738. return codepageConvert(*(int *)data, p);
  739. }
  740. static int XMLCALL
  741. unknownEncoding(void *UNUSED_P(userData), const XML_Char *name, XML_Encoding *info)
  742. {
  743. int cp;
  744. static const XML_Char prefixL[] = T("windows-");
  745. static const XML_Char prefixU[] = T("WINDOWS-");
  746. int i;
  747. for (i = 0; prefixU[i]; i++)
  748. if (name[i] != prefixU[i] && name[i] != prefixL[i])
  749. return 0;
  750. cp = 0;
  751. for (; name[i]; i++) {
  752. static const XML_Char digits[] = T("0123456789");
  753. const XML_Char *s = tcschr(digits, name[i]);
  754. if (!s)
  755. return 0;
  756. cp *= 10;
  757. cp += (int)(s - digits);
  758. if (cp >= 0x10000)
  759. return 0;
  760. }
  761. if (!codepageMap(cp, info->map))
  762. return 0;
  763. info->convert = unknownEncodingConvert;
  764. /* We could just cast the code page integer to a void *,
  765. and avoid the use of release. */
  766. info->release = free;
  767. info->data = malloc(sizeof(int));
  768. if (!info->data)
  769. return 0;
  770. *(int *)info->data = cp;
  771. return 1;
  772. }
  773. static int XMLCALL
  774. notStandalone(void *UNUSED_P(userData))
  775. {
  776. return 0;
  777. }
  778. static void
  779. showVersion(XML_Char *prog)
  780. {
  781. XML_Char *s = prog;
  782. XML_Char ch;
  783. const XML_Feature *features = XML_GetFeatureList();
  784. while ((ch = *s) != 0) {
  785. if (ch == '/'
  786. #if defined(_WIN32)
  787. || ch == '\\'
  788. #endif
  789. )
  790. prog = s + 1;
  791. ++s;
  792. }
  793. ftprintf(stdout, T("%s using %s\n"), prog, XML_ExpatVersion());
  794. if (features != NULL && features[0].feature != XML_FEATURE_END) {
  795. int i = 1;
  796. ftprintf(stdout, T("%s"), features[0].name);
  797. if (features[0].value)
  798. ftprintf(stdout, T("=%ld"), features[0].value);
  799. while (features[i].feature != XML_FEATURE_END) {
  800. ftprintf(stdout, T(", %s"), features[i].name);
  801. if (features[i].value)
  802. ftprintf(stdout, T("=%ld"), features[i].value);
  803. ++i;
  804. }
  805. ftprintf(stdout, T("\n"));
  806. }
  807. }
  808. static void
  809. usage(const XML_Char *prog, int rc)
  810. {
  811. ftprintf(stderr,
  812. T("usage: %s [-s] [-n] [-p] [-x] [-e encoding] [-w] [-d output-dir] [-c] [-m] [-r] [-t] [-N] [file ...]\n"), prog);
  813. exit(rc);
  814. }
  815. #if defined(__MINGW32__) && defined(XML_UNICODE)
  816. /* Silence warning about missing prototype */
  817. int wmain(int argc, XML_Char **argv);
  818. #endif
  819. int
  820. tmain(int argc, XML_Char **argv)
  821. {
  822. int i, j;
  823. const XML_Char *outputDir = NULL;
  824. const XML_Char *encoding = NULL;
  825. unsigned processFlags = XML_MAP_FILE;
  826. int windowsCodePages = 0;
  827. int outputType = 0;
  828. int useNamespaces = 0;
  829. int requireStandalone = 0;
  830. int requiresNotations = 0;
  831. enum XML_ParamEntityParsing paramEntityParsing =
  832. XML_PARAM_ENTITY_PARSING_NEVER;
  833. int useStdin = 0;
  834. XmlwfUserData userData = { NULL, NULL, NULL };
  835. #ifdef _MSC_VER
  836. _CrtSetDbgFlag(_CRTDBG_ALLOC_MEM_DF|_CRTDBG_LEAK_CHECK_DF);
  837. #endif
  838. i = 1;
  839. j = 0;
  840. while (i < argc) {
  841. if (j == 0) {
  842. if (argv[i][0] != T('-'))
  843. break;
  844. if (argv[i][1] == T('-') && argv[i][2] == T('\0')) {
  845. i++;
  846. break;
  847. }
  848. j++;
  849. }
  850. switch (argv[i][j]) {
  851. case T('r'):
  852. processFlags &= ~XML_MAP_FILE;
  853. j++;
  854. break;
  855. case T('s'):
  856. requireStandalone = 1;
  857. j++;
  858. break;
  859. case T('n'):
  860. useNamespaces = 1;
  861. j++;
  862. break;
  863. case T('p'):
  864. paramEntityParsing = XML_PARAM_ENTITY_PARSING_ALWAYS;
  865. /* fall through */
  866. case T('x'):
  867. processFlags |= XML_EXTERNAL_ENTITIES;
  868. j++;
  869. break;
  870. case T('w'):
  871. windowsCodePages = 1;
  872. j++;
  873. break;
  874. case T('m'):
  875. outputType = 'm';
  876. j++;
  877. break;
  878. case T('c'):
  879. outputType = 'c';
  880. useNamespaces = 0;
  881. j++;
  882. break;
  883. case T('t'):
  884. outputType = 't';
  885. j++;
  886. break;
  887. case T('N'):
  888. requiresNotations = 1;
  889. j++;
  890. break;
  891. case T('d'):
  892. if (argv[i][j + 1] == T('\0')) {
  893. if (++i == argc)
  894. usage(argv[0], 2);
  895. outputDir = argv[i];
  896. }
  897. else
  898. outputDir = argv[i] + j + 1;
  899. i++;
  900. j = 0;
  901. break;
  902. case T('e'):
  903. if (argv[i][j + 1] == T('\0')) {
  904. if (++i == argc)
  905. usage(argv[0], 2);
  906. encoding = argv[i];
  907. }
  908. else
  909. encoding = argv[i] + j + 1;
  910. i++;
  911. j = 0;
  912. break;
  913. case T('h'):
  914. usage(argv[0], 0);
  915. return 0;
  916. case T('v'):
  917. showVersion(argv[0]);
  918. return 0;
  919. case T('\0'):
  920. if (j > 1) {
  921. i++;
  922. j = 0;
  923. break;
  924. }
  925. /* fall through */
  926. default:
  927. usage(argv[0], 2);
  928. }
  929. }
  930. if (i == argc) {
  931. useStdin = 1;
  932. processFlags &= ~XML_MAP_FILE;
  933. i--;
  934. }
  935. for (; i < argc; i++) {
  936. XML_Char *outName = 0;
  937. int result;
  938. XML_Parser parser;
  939. if (useNamespaces)
  940. parser = XML_ParserCreateNS(encoding, NSSEP);
  941. else
  942. parser = XML_ParserCreate(encoding);
  943. if (! parser) {
  944. tperror(T("Could not instantiate parser"));
  945. exit(1);
  946. }
  947. if (requireStandalone)
  948. XML_SetNotStandaloneHandler(parser, notStandalone);
  949. XML_SetParamEntityParsing(parser, paramEntityParsing);
  950. if (outputType == 't') {
  951. /* This is for doing timings; this gives a more realistic estimate of
  952. the parsing time. */
  953. outputDir = 0;
  954. XML_SetElementHandler(parser, nopStartElement, nopEndElement);
  955. XML_SetCharacterDataHandler(parser, nopCharacterData);
  956. XML_SetProcessingInstructionHandler(parser, nopProcessingInstruction);
  957. }
  958. else if (outputDir) {
  959. const XML_Char * delim = T("/");
  960. const XML_Char *file = useStdin ? T("STDIN") : argv[i];
  961. if (!useStdin) {
  962. /* Jump after last (back)slash */
  963. const XML_Char * lastDelim = tcsrchr(file, delim[0]);
  964. if (lastDelim)
  965. file = lastDelim + 1;
  966. #if defined(_WIN32)
  967. else {
  968. const XML_Char * winDelim = T("\\");
  969. lastDelim = tcsrchr(file, winDelim[0]);
  970. if (lastDelim) {
  971. file = lastDelim + 1;
  972. delim = winDelim;
  973. }
  974. }
  975. #endif
  976. }
  977. outName = (XML_Char *)malloc((tcslen(outputDir) + tcslen(file) + 2)
  978. * sizeof(XML_Char));
  979. tcscpy(outName, outputDir);
  980. tcscat(outName, delim);
  981. tcscat(outName, file);
  982. userData.fp = tfopen(outName, T("wb"));
  983. if (!userData.fp) {
  984. tperror(outName);
  985. exit(1);
  986. }
  987. setvbuf(userData.fp, NULL, _IOFBF, 16384);
  988. #ifdef XML_UNICODE
  989. puttc(0xFEFF, userData.fp);
  990. #endif
  991. XML_SetUserData(parser, &userData);
  992. switch (outputType) {
  993. case 'm':
  994. XML_UseParserAsHandlerArg(parser);
  995. XML_SetElementHandler(parser, metaStartElement, metaEndElement);
  996. XML_SetProcessingInstructionHandler(parser, metaProcessingInstruction);
  997. XML_SetCommentHandler(parser, metaComment);
  998. XML_SetCdataSectionHandler(parser, metaStartCdataSection,
  999. metaEndCdataSection);
  1000. XML_SetCharacterDataHandler(parser, metaCharacterData);
  1001. XML_SetDoctypeDeclHandler(parser, metaStartDoctypeDecl,
  1002. metaEndDoctypeDecl);
  1003. XML_SetEntityDeclHandler(parser, metaEntityDecl);
  1004. XML_SetNotationDeclHandler(parser, metaNotationDecl);
  1005. XML_SetNamespaceDeclHandler(parser, metaStartNamespaceDecl,
  1006. metaEndNamespaceDecl);
  1007. metaStartDocument(parser);
  1008. break;
  1009. case 'c':
  1010. XML_UseParserAsHandlerArg(parser);
  1011. XML_SetDefaultHandler(parser, markup);
  1012. XML_SetElementHandler(parser, defaultStartElement, defaultEndElement);
  1013. XML_SetCharacterDataHandler(parser, defaultCharacterData);
  1014. XML_SetProcessingInstructionHandler(parser,
  1015. defaultProcessingInstruction);
  1016. break;
  1017. default:
  1018. if (useNamespaces)
  1019. XML_SetElementHandler(parser, startElementNS, endElementNS);
  1020. else
  1021. XML_SetElementHandler(parser, startElement, endElement);
  1022. XML_SetCharacterDataHandler(parser, characterData);
  1023. #ifndef W3C14N
  1024. XML_SetProcessingInstructionHandler(parser, processingInstruction);
  1025. if (requiresNotations) {
  1026. XML_SetDoctypeDeclHandler(parser, startDoctypeDecl, endDoctypeDecl);
  1027. XML_SetNotationDeclHandler(parser, notationDecl);
  1028. }
  1029. #endif /* not W3C14N */
  1030. break;
  1031. }
  1032. }
  1033. if (windowsCodePages)
  1034. XML_SetUnknownEncodingHandler(parser, unknownEncoding, 0);
  1035. result = XML_ProcessFile(parser, useStdin ? NULL : argv[i], processFlags);
  1036. if (outputDir) {
  1037. if (outputType == 'm')
  1038. metaEndDocument(parser);
  1039. fclose(userData.fp);
  1040. if (!result) {
  1041. tremove(outName);
  1042. exit(2);
  1043. }
  1044. free(outName);
  1045. }
  1046. XML_ParserFree(parser);
  1047. }
  1048. return 0;
  1049. }