| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800 |
- //
- // Parser.cpp
- //
- // $Id$
- //
- // Library: JSON
- // Package: JSON
- // Module: Parser
- //
- // Copyright (c) 2012, Applied Informatics Software Engineering GmbH.
- // and Contributors.
- //
- // Permission is hereby granted, free of charge, to any person or organization
- // obtaining a copy of the software and accompanying documentation covered by
- // this license (the "Software") to use, reproduce, display, distribute,
- // execute, and transmit the Software, and to prepare derivative works of the
- // Software, and to permit third-parties to whom the Software is furnished to
- // do so, all subject to the following:
- //
- // The copyright notices in the Software and this entire statement, including
- // the above license grant, this restriction and the following disclaimer,
- // must be included in all copies of the Software, in whole or in part, and
- // all derivative works of the Software, unless such copies or derivative
- // works are solely in the form of machine-executable object code generated by
- // a source language processor.
- //
- // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- // FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
- // SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
- // FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
- // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- // DEALINGS IN THE SOFTWARE.
- //
- #include "Poco/JSON/Parser.h"
- #include "Poco/JSON/JSONException.h"
- #include "Poco/Ascii.h"
- #include "Poco/Token.h"
- #include "Poco/UTF8Encoding.h"
- #undef min
- #undef max
- #include <limits>
- namespace Poco {
- namespace JSON {
- class SeparatorToken: public Token
- {
- public:
- SeparatorToken()
- {
- }
- virtual ~SeparatorToken()
- {
- }
- Class tokenClass() const
- {
- return Token::SEPARATOR_TOKEN;
- }
- bool start(char c, std::istream& istr)
- {
- if (c == '{'
- || c == '}'
- || c == ']'
- || c == '['
- || c == ','
- || c == ':')
- {
- _value = c;
- return true;
- }
- if ( c == '\'' )
- {
- throw JSONException("Invalid quote found");
- }
- else return false;
- }
- void finish(std::istream& istr)
- {
- }
- };
- class StringToken: public Token
- {
- public:
- StringToken()
- {
- }
- virtual ~StringToken()
- {
- }
- Class tokenClass() const
- {
- return Token::STRING_LITERAL_TOKEN;
- }
- bool start(char c, std::istream& istr)
- {
- if (c == '"')
- {
- _value = ""; // We don't need the quote!
- return true;
- }
- else return false;
- }
- void finish(std::istream& istr)
- {
- int c = 0;
- while ((c = istr.get()) != -1)
- {
- if (c == 0)
- {
- throw JSONException("Null byte not allowed");
- }
- if ( 0 < c && c <= 0x1F )
- {
- throw JSONException(format("Control character 0x%x not allowed", (unsigned int) c));
- }
- if (c == '"')
- break;
-
- if(0x80 <= c && c <= 0xFF)
- {
- int count = utf8_check_first(c);
- if (!count)
- {
- throw JSONException(format("Unable to decode byte 0x%x", (unsigned int) c));
- }
- char buffer[5];
- buffer[0] = c;
- for(int i = 1; i < count; ++i)
- {
- buffer[i] = istr.get();
- }
-
- if ( !UTF8Encoding::isLegal((unsigned char*) buffer, count) )
- {
- throw JSONException("No legal UTF8 found");
- }
- buffer[count] = '\0';
- _value += buffer;
- continue;
- }
- if (c == '\\') // Escaped String
- {
- c = istr.get();
- switch(c)
- {
- case '"' : c = '"'; break;
- case '\\' : c = '\\'; break;
- case '/' : c = '/'; break;
- case 'b' : c = '\b'; break;
- case 'f' : c = '\f'; break;
- case 'n' : c = '\n'; break;
- case 'r' : c = '\r'; break;
- case 't' : c = '\t'; break;
- case 'u' : // Unicode
- {
- Poco::Int32 unicode = decodeUnicode(istr);
- if ( unicode == 0 )
- {
- throw JSONException("\\u0000 is not allowed");
- }
- if ( unicode >= 0xD800 && unicode <= 0xDBFF )
- {
- c = istr.get();
- if ( c != '\\' )
- {
- throw JSONException("Invalid unicode surrogate pair");
- }
- c = istr.get();
- if ( c != 'u' )
- {
- throw JSONException("Invalid unicode surrogate pair");
- }
- Poco::Int32 surrogatePair = decodeUnicode(istr);
- if ( 0xDC00 <= surrogatePair && surrogatePair <= 0xDFFF )
- {
- unicode = 0x10000 + ((unicode & 0x3FF) << 10) + (surrogatePair & 0x3FF);
- }
- else
- {
- throw JSONException("Invalid unicode surrogate pair");
- }
- }
- else if ( 0xDC00 <= unicode && unicode <= 0xDFFF )
- {
- throw JSONException("Invalid unicode");
- }
-
- Poco::UTF8Encoding utf8encoding;
- int length = utf8encoding.convert(unicode, NULL, 0);
- std::vector<unsigned char> convert(length);
- utf8encoding.convert(unicode, &convert[0], length);
- for(int i = 0; i < length; ++i)
- {
- _value += (char) convert[i];
- }
- continue;
- }
- default:
- {
- throw JSONException(format("Invalid escape '%c' character used", (char) c));
- }
- }
- }
- _value += c;
- }
- if ( c == -1 )
- {
- throw JSONException("Unterminated string found");
- }
- }
- Poco::Int32 decodeUnicode(std::istream& istr)
- {
- Poco::Int32 value = 0;
- for(int i = 0; i < 4; i++)
- {
- value <<= 4;
- int nc = istr.peek();
- if ( nc == -1 )
- {
- throw JSONException("Invalid unicode sequence");
- }
- istr.get(); // No EOF, so read the character
- if (nc >= '0' && nc <= '9')
- value += nc - '0';
- else if (nc >= 'A' && nc <= 'F')
- value += 10 + nc - 'A';
- else if (nc >= 'a' && nc <= 'f')
- value += 10 + nc - 'a';
- else
- throw JSONException("Invalid unicode sequence. Hexadecimal digit expected");
- }
- return value;
- }
-
- private:
- int utf8_check_first(char byte)
- {
- unsigned char u = (unsigned char) byte;
- if(u < 0x80)
- return 1;
- if (0x80 <= u && u <= 0xBF)
- {
- /* second, third or fourth byte of a multi-byte
- sequence, i.e. a "continuation byte" */
- return 0;
- }
- else if(u == 0xC0 || u == 0xC1)
- {
- /* overlong encoding of an ASCII byte */
- return 0;
- }
- else if(0xC2 <= u && u <= 0xDF)
- {
- /* 2-byte sequence */
- return 2;
- }
- else if(0xE0 <= u && u <= 0xEF)
- {
- /* 3-byte sequence */
- return 3;
- }
- else if(0xF0 <= u && u <= 0xF4)
- {
- /* 4-byte sequence */
- return 4;
- }
- else
- {
- /* u >= 0xF5 */
- /* Restricted (start of 4-, 5- or 6-byte sequence) or invalid
- UTF-8 */
- return 0;
- }
- }
- };
- class KeywordToken : public Token
- {
- public:
- KeywordToken()
- {
- }
- virtual ~KeywordToken()
- {
- }
- Class tokenClass() const
- {
- return Token::KEYWORD_TOKEN;
- }
- bool start(char c, std::istream& istr)
- {
- if ( Ascii::isAlpha(c) )
- {
- _value = c;
- return true;
- }
- return false;
- }
- void finish(std::istream& istr)
- {
- int c = istr.peek();
- while (c != -1 && Ascii::isAlpha(c) )
- {
- istr.get();
- _value += c;
- c = istr.peek();
- }
- }
- };
- class NumberToken: public Token
- {
- public:
- NumberToken() : _activeClass(INTEGER_LITERAL_TOKEN)
- {
- }
- virtual ~NumberToken()
- {
- }
- Class tokenClass() const
- {
- return _activeClass;
- }
- bool start(char c, std::istream& istr)
- {
- // Reset the active class to integer
- _activeClass = INTEGER_LITERAL_TOKEN;
- if ( c == -1 )
- return false;
- if (Ascii::isDigit(c))
- {
- if ( c == '0' )
- {
- int nc = istr.peek();
- if ( Ascii::isDigit(nc) ) // A digit after a zero is not allowed
- {
- throw JSONException("Number can't start with a zero");
- }
- }
- _value = c;
- return true;
- }
- if (c == '-')
- {
- _value = c;
- int nc = istr.peek();
- if (Ascii::isDigit(nc))
- {
- if (nc == '0')
- {
- _value += '0';
- istr.get();
- nc = istr.peek();
- if ( Ascii::isDigit(nc) ) // A digit after -0 is not allowed
- {
- throw JSONException("Number can't start with a zero");
- }
- }
- return true;
- }
- }
- return false;
- }
- void finish(std::istream& istr)
- {
- int c;
- while( (c = istr.peek()) != -1)
- {
- if (Ascii::isDigit(c))
- {
- _value += c;
- istr.get();
- }
- else
- {
- switch(c)
- {
- case '.': // Float
- {
- if (_activeClass == Token::FLOAT_LITERAL_TOKEN)
- {
- throw JSONException("Invalid float value");
- }
- _activeClass = Token::FLOAT_LITERAL_TOKEN;
- _value += c;
- istr.get();
- // After a . we need a digit
- c = istr.peek();
- if ( ! Ascii::isDigit(c) )
- {
- throw JSONException("Invalid float value");
- }
- break;
- }
- case 'E':
- case 'e':
- {
- if (_activeClass == Token::DOUBLE_LITERAL_TOKEN)
- {
- throw JSONException("Invalid double value");
- }
- _activeClass = Token::DOUBLE_LITERAL_TOKEN;
- // Add the e or E
- _value += c;
- istr.get();
- // When the next char is - or + then read the next char
- c = istr.peek();
- if (c == '-' || c == '+')
- {
- _value += c;
- istr.get();
- c = istr.peek();
- }
- if (! Ascii::isDigit(c))
- {
- throw JSONException("Invalid double value");
- }
- break;
- }
- default:
- return; // End of number token
- }
- istr.get(); // If we get here we have a valid character for a number
- _value += c;
- }
- }
- }
- private:
- Class _activeClass;
- };
- Parser::Parser() : _tokenizer(), _handler(NULL)
- {
- _tokenizer.addToken(new WhitespaceToken());
- _tokenizer.addToken(new InvalidToken());
- _tokenizer.addToken(new SeparatorToken());
- _tokenizer.addToken(new StringToken());
- _tokenizer.addToken(new NumberToken());
- _tokenizer.addToken(new KeywordToken());
- }
- Parser::~Parser()
- {
- }
- const Token* Parser::nextToken()
- {
- const Token* token = _tokenizer.next();
- if (token->is(Token::EOF_TOKEN))
- {
- throw JSONException("Unexpected EOF found");
- }
- return token;
- }
- void Parser::parse(std::istream& in)
- {
- _tokenizer.attachToStream(in);
- const Token* token = nextToken();
- if (token->is(Token::SEPARATOR_TOKEN))
- {
- // This must be a { or a [
- if (token->asChar() == '{')
- {
- readObject();
- }
- else if (token->asChar() == '[')
- {
- readArray();
- }
- else
- {
- throw JSONException(format("Invalid separator '%c' found. Expecting { or [", token->asChar()));
- }
- token = _tokenizer.next();
- if (! token->is(Token::EOF_TOKEN))
- {
- throw JSONException(format("EOF expected but found '%s'", token->asString()));
- }
- }
- else
- {
- throw JSONException(format("Invalid token '%s' found. Expecting { or [", token->asString()));
- }
- }
- void Parser::readObject()
- {
- if (_handler != NULL)
- {
- _handler->startObject();
- }
- if ( readRow(true) ) // First call is special: check for empty object
- {
- while(readRow());
- }
- if (_handler != NULL)
- {
- _handler->endObject();
- }
- }
- bool Parser::readRow(bool firstCall)
- {
- const Token* token = nextToken();
- if (firstCall && token->tokenClass() == Token::SEPARATOR_TOKEN && token->asChar() == '}')
- {
- return false; // End of object is possible for an empty object
- }
- if (token->tokenClass() == Token::STRING_LITERAL_TOKEN)
- {
- std::string propertyName = token->tokenString();
- if ( _handler != NULL )
- {
- _handler->key(propertyName);
- }
- token = nextToken();
- if (token->is(Token::SEPARATOR_TOKEN)
- && token->asChar() == ':')
- {
- readValue(nextToken());
- token = nextToken();
- if (token->is(Token::SEPARATOR_TOKEN))
- {
- if (token->asChar() == ',')
- {
- if (_handler != NULL)
- {
- _handler->comma();
- }
- return true; // Read next row
- }
- else if (token->asChar() == '}')
- {
- return false; // End of object
- }
- else
- {
- throw JSONException(format("Invalid separator '%c' found. Expecting , or }", token->asChar()));
- }
- }
- else
- {
- throw JSONException(format("Invalid token '%s' found. Expecting , or }", token->asString()));
- }
- }
- else
- {
- throw JSONException(format("Invalid token '%s' found. Expecting :", token->asString()));
- }
- }
- else
- {
- throw JSONException(format("Invalid token '%s' found. Expecting key", token->asString()));
- }
- }
- void Parser::readValue(const Token* token)
- {
- switch(token->tokenClass())
- {
- default:
- case Token::IDENTIFIER_TOKEN:
- case Token::OPERATOR_TOKEN:
- case Token::CHAR_LITERAL_TOKEN:
- break;
-
- case Token::INTEGER_LITERAL_TOKEN:
- if (_handler != NULL)
- {
- #if defined(POCO_HAVE_INT64)
- try
- {
- Int64 value = token->asInteger64();
- // if number is 32-bit, then handle as such
- if ( value > std::numeric_limits<int>::max()
- || value < std::numeric_limits<int>::min() )
- {
- _handler->value(value);
- }
- else
- {
- _handler->value(static_cast<int>(value));
- }
- }
- // try to handle error as unsigned in case of overflow
- catch ( const SyntaxException& )
- {
- UInt64 value = token->asUnsignedInteger64();
- // if number is 32-bit, then handle as such
- if ( value > std::numeric_limits<unsigned>::max() )
- {
- _handler->value(value);
- }
- else
- {
- _handler->value(static_cast<unsigned>(value));
- }
- }
- #else
- try
- {
- int value = token->asInteger();
- _handle->value(value);
- }
- // try to handle error as unsigned in case of overflow
- catch ( const SyntaxException& )
- {
- unsigned value = token->asUnsignedInteger();
- _handle->value(value);
- }
- #endif
- }
- break;
- case Token::KEYWORD_TOKEN:
- {
- if (token->tokenString().compare("null") == 0)
- {
- if (_handler != NULL)
- {
- _handler->null();
- }
- }
- else if (token->tokenString().compare("true") == 0)
- {
- if (_handler != NULL)
- {
- _handler->value(true);
- }
- }
- else if (token->tokenString().compare("false") == 0)
- {
- if (_handler != NULL)
- {
- _handler->value(false);
- }
- }
- else
- {
- throw JSONException(format("Invalid keyword '%s' found", token->asString()));
- }
- break;
- }
- case Token::FLOAT_LITERAL_TOKEN:
- // Fall through
- case Token::DOUBLE_LITERAL_TOKEN:
- if (_handler != NULL)
- {
- _handler->value(token->asFloat());
- }
- break;
- case Token::STRING_LITERAL_TOKEN:
- if (_handler != NULL)
- {
- _handler->value(token->tokenString());
- }
- break;
- case Token::SEPARATOR_TOKEN:
- {
- if (token->asChar() == '{')
- {
- readObject();
- }
- else if (token->asChar() == '[')
- {
- readArray();
- }
- break;
- }
- case Token::INVALID_TOKEN:
- throw JSONException(format("Invalid token '%s' found", token->asString()));
- }
- }
- void Parser::readArray()
- {
- if (_handler != NULL)
- {
- _handler->startArray();
- }
- if (readElements(true)) // First call is special: check for empty array
- {
- while(readElements());
- }
- if (_handler != NULL)
- {
- _handler->endArray();
- }
- }
- bool Parser::readElements(bool firstCall)
- {
- const Token* token = nextToken();
- if (firstCall && token->is(Token::SEPARATOR_TOKEN) && token->asChar() == ']')
- {
- // End of array is possible for an empty array
- return false;
- }
- readValue(token);
- token = nextToken();
- if (token->is(Token::SEPARATOR_TOKEN))
- {
- if (token->asChar() == ']')
- return false; // End of array
- if (token->asChar() == ',')
- {
- if (_handler != NULL)
- {
- _handler->comma();
- }
- return true;
- }
- throw JSONException(format("Invalid separator '%c' found. Expecting , or ]", token->asChar()));
- }
- throw JSONException(format("Invalid token '%s' found.", token->asString()));
- }
- } } // namespace Poco::JSON
|