123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174 |
- /*
- __ __ _
- ___\ \/ /_ __ __ _| |_
- / _ \\ /| '_ \ / _` | __|
- | __// \| |_) | (_| | |_
- \___/_/\_\ .__/ \__,_|\__|
- |_| XML parser
- Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
- Copyright (c) 2002 Fred L. Drake, Jr. <[email protected]>
- Copyright (c) 2016-2017 Sebastian Pipping <[email protected]>
- Licensed under the MIT license:
- Permission is hereby granted, free of charge, to any person obtaining
- a copy of this software and associated documentation files (the
- "Software"), to deal in the Software without restriction, including
- without limitation the rights to use, copy, modify, merge, publish,
- distribute, sublicense, and/or sell copies of the Software, and to permit
- persons to whom the Software is furnished to do so, subject to the
- following conditions:
- The above copyright notice and this permission notice shall be included
- in all copies or substantial portions of the Software.
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
- NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
- DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
- #define CHARSET_MAX 41
- static const char *
- getTok(const char **pp) {
- enum { inAtom, inString, init, inComment };
- int state = init;
- const char *tokStart = 0;
- for (;;) {
- switch (**pp) {
- case '\0':
- return 0;
- case ' ':
- case '\r':
- case '\t':
- case '\n':
- if (state == inAtom)
- return tokStart;
- break;
- case '(':
- if (state == inAtom)
- return tokStart;
- if (state != inString)
- state++;
- break;
- case ')':
- if (state > init)
- --state;
- else if (state != inString)
- return 0;
- break;
- case ';':
- case '/':
- case '=':
- if (state == inAtom)
- return tokStart;
- if (state == init)
- return (*pp)++;
- break;
- case '\\':
- ++*pp;
- if (**pp == '\0')
- return 0;
- break;
- case '"':
- switch (state) {
- case inString:
- ++*pp;
- return tokStart;
- case inAtom:
- return tokStart;
- case init:
- tokStart = *pp;
- state = inString;
- break;
- }
- break;
- default:
- if (state == init) {
- tokStart = *pp;
- state = inAtom;
- }
- break;
- }
- ++*pp;
- }
- /* not reached */
- }
- /* key must be lowercase ASCII */
- static int
- matchkey(const char *start, const char *end, const char *key) {
- if (! start)
- return 0;
- for (; start != end; start++, key++)
- if (*start != *key && *start != 'A' + (*key - 'a'))
- return 0;
- return *key == '\0';
- }
- void
- getXMLCharset(const char *buf, char *charset) {
- const char *next, *p;
- charset[0] = '\0';
- next = buf;
- p = getTok(&next);
- if (matchkey(p, next, "text"))
- strcpy(charset, "us-ascii");
- else if (! matchkey(p, next, "application"))
- return;
- p = getTok(&next);
- if (! p || *p != '/')
- return;
- p = getTok(&next);
- if (matchkey(p, next, "xml"))
- isXml = 1;
- p = getTok(&next);
- while (p) {
- if (*p == ';') {
- p = getTok(&next);
- if (matchkey(p, next, "charset")) {
- p = getTok(&next);
- if (p && *p == '=') {
- p = getTok(&next);
- if (p) {
- char *s = charset;
- if (*p == '"') {
- while (++p != next - 1) {
- if (*p == '\\')
- ++p;
- if (s == charset + CHARSET_MAX - 1) {
- charset[0] = '\0';
- break;
- }
- *s++ = *p;
- }
- *s++ = '\0';
- } else {
- if (next - p > CHARSET_MAX - 1)
- break;
- while (p != next)
- *s++ = *p++;
- *s = 0;
- break;
- }
- }
- }
- }
- } else
- p = getTok(&next);
- }
- }
- int
- main(int argc, char **argv) {
- char buf[CHARSET_MAX];
- getXMLCharset(argv[1], buf);
- printf("charset = \"%s\"\n", buf);
- return 0;
- }
|