/********************************************** License: BSD Project Webpage: http://cajun-jsonapi.sourceforge.net/ Author: Terry Caton ***********************************************/ #include "libaegisub/cajun/reader.h" #ifndef LAGI_PRE #include #include #include #endif /* TODO: * better documentation * unicode character decoding */ namespace json { std::istream& operator >> (std::istream& istr, UnknownElement& elementRoot) { Reader::Read(elementRoot, istr); return istr; } Reader::Location::Location() : m_nLine(0), m_nLineOffset(0), m_nDocOffset(0) {} ////////////////////// // Reader::InputStream // wrapper around istream to keep track of document/line offsets class Reader::InputStream { std::istream& m_iStr; Location m_Location; public: InputStream(std::istream& iStr) : m_iStr(iStr) { } int Get() { assert(!m_iStr.eof()); int c = m_iStr.get(); ++m_Location.m_nDocOffset; if (c == '\n') { ++m_Location.m_nLine; m_Location.m_nLineOffset = 0; } else { ++m_Location.m_nLineOffset; } return c; } int Peek() { assert(!m_iStr.eof()); return m_iStr.peek(); } bool EOS() { m_iStr.peek(); // apparently eof flag isn't set until a character read is attempted. whatever. return m_iStr.eof(); } const Location& GetLocation() const { return m_Location; } }; ////////////////////// // Reader::TokenStream class Reader::TokenStream { const Tokens& m_Tokens; Tokens::const_iterator m_itCurrent; public: TokenStream(const Tokens& tokens) : m_Tokens(tokens), m_itCurrent(tokens.begin()) { } const Token& Peek() { assert(!EOS()); return *m_itCurrent; } const Token& Get() { assert(!EOS()); return *m_itCurrent++; } bool EOS() const { return m_itCurrent == m_Tokens.end(); } }; /////////////////// // Reader (finally) void Reader::Read(Object& object, std::istream& istr) { Read_i(object, istr); } void Reader::Read(Array& array, std::istream& istr) { Read_i(array, istr); } void Reader::Read(String& string, std::istream& istr) { Read_i(string, istr); } void Reader::Read(Number& number, std::istream& istr) { Read_i(number, istr); } void Reader::Read(Boolean& boolean, std::istream& istr) { Read_i(boolean, istr); } void Reader::Read(Null& null, std::istream& istr) { Read_i(null, istr); } void Reader::Read(UnknownElement& unknown, std::istream& istr) { Read_i(unknown, istr); } template void Reader::Read_i(ElementTypeT& element, std::istream& istr) { Reader reader; Tokens tokens; InputStream inputStream(istr); reader.Scan(tokens, inputStream); TokenStream tokenStream(tokens); element = reader.Parse(tokenStream); if (!tokenStream.EOS()) { const Token& token = tokenStream.Peek(); throw ParseException("Expected End of token stream; found " + token.sValue, token.locBegin, token.locEnd); } } void Reader::Scan(Tokens& tokens, InputStream& inputStream) { while (EatWhiteSpace(inputStream), !inputStream.EOS()) { // if all goes well, we'll create a token each pass Token token; token.locBegin = inputStream.GetLocation(); // gives us null-terminated string std::string sChar; sChar.push_back(inputStream.Peek()); switch (sChar[0]) { case '{': token.sValue = sChar[0]; MatchExpectedString(sChar, inputStream); token.nType = Token::TOKEN_OBJECT_BEGIN; break; case '}': token.sValue = sChar[0]; MatchExpectedString(sChar, inputStream); token.nType = Token::TOKEN_OBJECT_END; break; case '[': token.sValue = sChar[0]; MatchExpectedString(sChar, inputStream); token.nType = Token::TOKEN_ARRAY_BEGIN; break; case ']': token.sValue = sChar[0]; MatchExpectedString(sChar, inputStream); token.nType = Token::TOKEN_ARRAY_END; break; case ',': token.sValue = sChar[0]; MatchExpectedString(sChar, inputStream); token.nType = Token::TOKEN_NEXT_ELEMENT; break; case ':': token.sValue = sChar[0]; MatchExpectedString(sChar, inputStream); token.nType = Token::TOKEN_MEMBER_ASSIGN; break; case '"': MatchString(token.sValue, inputStream); token.nType = Token::TOKEN_STRING; break; case '-': case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': MatchNumber(token.sValue, inputStream); token.nType = Token::TOKEN_NUMBER; break; case 't': token.sValue = "true"; MatchExpectedString(token.sValue, inputStream); token.nType = Token::TOKEN_BOOLEAN; break; case 'f': token.sValue = "false"; MatchExpectedString(token.sValue, inputStream); token.nType = Token::TOKEN_BOOLEAN; break; case 'n': token.sValue = "null"; MatchExpectedString(token.sValue, inputStream); token.nType = Token::TOKEN_NULL; break; default: throw ScanException("Unexpected character in stream: " + sChar, inputStream.GetLocation()); } token.locEnd = inputStream.GetLocation(); tokens.push_back(token); } } void Reader::EatWhiteSpace(InputStream& inputStream) { while (!inputStream.EOS() && ::isspace(inputStream.Peek())) inputStream.Get(); } void Reader::MatchExpectedString(const std::string& sExpected, InputStream& inputStream) { std::string::const_iterator it(sExpected.begin()), itEnd(sExpected.end()); for ( ; it != itEnd; ++it) { if (inputStream.EOS() || // did we reach the end before finding what we're looking for... inputStream.Get() != *it) // ...or did we find something different? { throw ScanException("Expected string: " + sExpected, inputStream.GetLocation()); } } // all's well if we made it here, return quietly } void Reader::MatchString(std::string& string, InputStream& inputStream) { MatchExpectedString("\"", inputStream); while (inputStream.EOS() == false && inputStream.Peek() != '"') { char c = inputStream.Get(); // escape? if (c == '\\' && inputStream.EOS() == false) // shouldn't have reached the end yet { c = inputStream.Get(); switch (c) { case '/': string.push_back('/'); break; case '"': string.push_back('"'); break; case '\\': string.push_back('\\'); break; case 'b': string.push_back('\b'); break; case 'f': string.push_back('\f'); break; case 'n': string.push_back('\n'); break; case 'r': string.push_back('\r'); break; case 't': string.push_back('\t'); break; case 'u': // TODO: what do we do with this? default: throw ScanException("Unrecognized escape sequence found in string: \\" + c, inputStream.GetLocation()); } } else { string.push_back(c); } } // eat the last '"' that we just peeked MatchExpectedString("\"", inputStream); } void Reader::MatchNumber(std::string& sNumber, InputStream& inputStream) { const char sNumericChars[] = "0123456789.eE-+"; std::set numericChars; numericChars.insert(sNumericChars, sNumericChars + sizeof(sNumericChars)); while (inputStream.EOS() == false && numericChars.find(inputStream.Peek()) != numericChars.end()) { sNumber.push_back(inputStream.Get()); } } UnknownElement Reader::Parse(Reader::TokenStream& tokenStream) { if (tokenStream.EOS()) throw ParseException("Unexpected end of token stream", Location(), Location()); // nowhere to point to Token const& token = tokenStream.Peek(); switch (token.nType) { case Token::TOKEN_OBJECT_BEGIN: return ParseObject(tokenStream); case Token::TOKEN_ARRAY_BEGIN: return ParseArray(tokenStream); case Token::TOKEN_STRING: return ParseString(tokenStream); case Token::TOKEN_NUMBER: return ParseNumber(tokenStream); case Token::TOKEN_BOOLEAN: return ParseBoolean(tokenStream); case Token::TOKEN_NULL: return ParseNull(tokenStream); default: throw ParseException("Unexpected token: " + token.sValue, token.locBegin, token.locEnd); } } Object Reader::ParseObject(Reader::TokenStream& tokenStream) { MatchExpectedToken(Token::TOKEN_OBJECT_BEGIN, tokenStream); Object object; while (!tokenStream.EOS() && tokenStream.Peek().nType != Token::TOKEN_OBJECT_END) { // first the member name. save the token in case we have to throw an exception const Token& tokenName = tokenStream.Peek(); std::string const& name = MatchExpectedToken(Token::TOKEN_STRING, tokenStream); if (object.count(name)) throw ParseException("Duplicate object member token: " + name, tokenName.locBegin, tokenName.locEnd); // ...then the key/value separator... MatchExpectedToken(Token::TOKEN_MEMBER_ASSIGN, tokenStream); // ...then the value itself (can be anything). object[name] = Parse(tokenStream); if (!tokenStream.EOS() && tokenStream.Peek().nType != Token::TOKEN_OBJECT_END) MatchExpectedToken(Token::TOKEN_NEXT_ELEMENT, tokenStream); } MatchExpectedToken(Token::TOKEN_OBJECT_END, tokenStream); return object; } Array Reader::ParseArray(Reader::TokenStream& tokenStream) { MatchExpectedToken(Token::TOKEN_ARRAY_BEGIN, tokenStream); Array array; while (!tokenStream.EOS() && tokenStream.Peek().nType != Token::TOKEN_ARRAY_END) { array.push_back(Parse(tokenStream)); if (!tokenStream.EOS() && tokenStream.Peek().nType != Token::TOKEN_ARRAY_END) MatchExpectedToken(Token::TOKEN_NEXT_ELEMENT, tokenStream); } MatchExpectedToken(Token::TOKEN_ARRAY_END, tokenStream); return array; } String Reader::ParseString(Reader::TokenStream& tokenStream) { return MatchExpectedToken(Token::TOKEN_STRING, tokenStream); } Number Reader::ParseNumber(Reader::TokenStream& tokenStream) { const Token& currentToken = tokenStream.Peek(); // might need this later for throwing exception const std::string& sValue = MatchExpectedToken(Token::TOKEN_NUMBER, tokenStream); std::istringstream iStr(sValue); double dValue; iStr >> dValue; // did we consume all characters in the token? if (!iStr.eof()) throw ParseException("Unexpected character in NUMBER token: " + iStr.peek(), currentToken.locBegin, currentToken.locEnd); return dValue; } Boolean Reader::ParseBoolean(Reader::TokenStream& tokenStream) { return MatchExpectedToken(Token::TOKEN_BOOLEAN, tokenStream) == "true"; } Null Reader::ParseNull(Reader::TokenStream& tokenStream) { MatchExpectedToken(Token::TOKEN_NULL, tokenStream); return Null(); } const std::string& Reader::MatchExpectedToken(Token::Type nExpected, Reader::TokenStream& tokenStream) { if (tokenStream.EOS()) { throw ParseException("Unexpected End of token stream", Location(), Location()); // nowhere to point to } const Token& token = tokenStream.Get(); if (token.nType != nExpected) { throw ParseException("Unexpected token: " + token.sValue, token.locBegin, token.locEnd); } return token.sValue; } } // End namespace