3097dc634e
Originally committed to SVN as r6005.
423 lines
12 KiB
C++
423 lines
12 KiB
C++
/**********************************************
|
|
|
|
License: BSD
|
|
Project Webpage: http://cajun-jsonapi.sourceforge.net/
|
|
Author: Terry Caton
|
|
|
|
***********************************************/
|
|
|
|
#include "libaegisub/cajun/reader.h"
|
|
|
|
#ifndef LAGI_PRE
|
|
#include <cassert>
|
|
#include <set>
|
|
#include <sstream>
|
|
#endif
|
|
|
|
/*
|
|
|
|
TODO:
|
|
* better documentation
|
|
* unicode character decoding
|
|
|
|
*/
|
|
|
|
namespace json
|
|
{
|
|
|
|
std::istream& operator >> (std::istream& istr, UnknownElement& elementRoot) {
|
|
Reader::Read(elementRoot, istr);
|
|
return istr;
|
|
}
|
|
|
|
Reader::Location::Location() :
|
|
m_nLine(0),
|
|
m_nLineOffset(0),
|
|
m_nDocOffset(0)
|
|
{}
|
|
|
|
|
|
//////////////////////
|
|
// Reader::InputStream
|
|
|
|
// wrapper around istream to keep track of document/line offsets
|
|
class Reader::InputStream
|
|
{
|
|
std::istream& m_iStr;
|
|
Location m_Location;
|
|
public:
|
|
InputStream(std::istream& iStr) : m_iStr(iStr) { }
|
|
|
|
int Get() {
|
|
assert(!m_iStr.eof());
|
|
int c = m_iStr.get();
|
|
|
|
++m_Location.m_nDocOffset;
|
|
if (c == '\n') {
|
|
++m_Location.m_nLine;
|
|
m_Location.m_nLineOffset = 0;
|
|
}
|
|
else {
|
|
++m_Location.m_nLineOffset;
|
|
}
|
|
|
|
return c;
|
|
}
|
|
int Peek() {
|
|
assert(!m_iStr.eof());
|
|
return m_iStr.peek();
|
|
}
|
|
|
|
bool EOS() {
|
|
m_iStr.peek(); // apparently eof flag isn't set until a character read is attempted. whatever.
|
|
return m_iStr.eof();
|
|
}
|
|
|
|
const Location& GetLocation() const { return m_Location; }
|
|
};
|
|
|
|
//////////////////////
|
|
// Reader::TokenStream
|
|
|
|
class Reader::TokenStream
|
|
{
|
|
const Tokens& m_Tokens;
|
|
Tokens::const_iterator m_itCurrent;
|
|
|
|
public:
|
|
TokenStream(const Tokens& tokens)
|
|
: m_Tokens(tokens), m_itCurrent(tokens.begin())
|
|
{ }
|
|
|
|
const Token& Peek() {
|
|
assert(!EOS());
|
|
return *m_itCurrent;
|
|
}
|
|
const Token& Get() {
|
|
assert(!EOS());
|
|
return *m_itCurrent++;
|
|
}
|
|
|
|
bool EOS() const {
|
|
return m_itCurrent == m_Tokens.end();
|
|
}
|
|
};
|
|
|
|
///////////////////
|
|
// Reader (finally)
|
|
void Reader::Read(Object& object, std::istream& istr) { Read_i(object, istr); }
|
|
void Reader::Read(Array& array, std::istream& istr) { Read_i(array, istr); }
|
|
void Reader::Read(String& string, std::istream& istr) { Read_i(string, istr); }
|
|
void Reader::Read(Number& number, std::istream& istr) { Read_i(number, istr); }
|
|
void Reader::Read(Boolean& boolean, std::istream& istr) { Read_i(boolean, istr); }
|
|
void Reader::Read(Null& null, std::istream& istr) { Read_i(null, istr); }
|
|
void Reader::Read(UnknownElement& unknown, std::istream& istr) { Read_i(unknown, istr); }
|
|
|
|
template <typename ElementTypeT>
|
|
void Reader::Read_i(ElementTypeT& element, std::istream& istr)
|
|
{
|
|
Reader reader;
|
|
|
|
Tokens tokens;
|
|
InputStream inputStream(istr);
|
|
reader.Scan(tokens, inputStream);
|
|
|
|
TokenStream tokenStream(tokens);
|
|
element = reader.Parse(tokenStream);
|
|
|
|
if (!tokenStream.EOS())
|
|
{
|
|
const Token& token = tokenStream.Peek();
|
|
throw ParseException("Expected End of token stream; found " + token.sValue, token.locBegin, token.locEnd);
|
|
}
|
|
}
|
|
|
|
void Reader::Scan(Tokens& tokens, InputStream& inputStream)
|
|
{
|
|
while (EatWhiteSpace(inputStream), !inputStream.EOS())
|
|
{
|
|
// if all goes well, we'll create a token each pass
|
|
Token token;
|
|
token.locBegin = inputStream.GetLocation();
|
|
|
|
// gives us null-terminated string
|
|
std::string sChar;
|
|
sChar.push_back(inputStream.Peek());
|
|
|
|
switch (sChar[0])
|
|
{
|
|
case '{':
|
|
token.sValue = sChar[0];
|
|
MatchExpectedString(sChar, inputStream);
|
|
token.nType = Token::TOKEN_OBJECT_BEGIN;
|
|
break;
|
|
|
|
case '}':
|
|
token.sValue = sChar[0];
|
|
MatchExpectedString(sChar, inputStream);
|
|
token.nType = Token::TOKEN_OBJECT_END;
|
|
break;
|
|
|
|
case '[':
|
|
token.sValue = sChar[0];
|
|
MatchExpectedString(sChar, inputStream);
|
|
token.nType = Token::TOKEN_ARRAY_BEGIN;
|
|
break;
|
|
|
|
case ']':
|
|
token.sValue = sChar[0];
|
|
MatchExpectedString(sChar, inputStream);
|
|
token.nType = Token::TOKEN_ARRAY_END;
|
|
break;
|
|
|
|
case ',':
|
|
token.sValue = sChar[0];
|
|
MatchExpectedString(sChar, inputStream);
|
|
token.nType = Token::TOKEN_NEXT_ELEMENT;
|
|
break;
|
|
|
|
case ':':
|
|
token.sValue = sChar[0];
|
|
MatchExpectedString(sChar, inputStream);
|
|
token.nType = Token::TOKEN_MEMBER_ASSIGN;
|
|
break;
|
|
|
|
case '"':
|
|
MatchString(token.sValue, inputStream);
|
|
token.nType = Token::TOKEN_STRING;
|
|
break;
|
|
|
|
case '-':
|
|
case '0':
|
|
case '1':
|
|
case '2':
|
|
case '3':
|
|
case '4':
|
|
case '5':
|
|
case '6':
|
|
case '7':
|
|
case '8':
|
|
case '9':
|
|
MatchNumber(token.sValue, inputStream);
|
|
token.nType = Token::TOKEN_NUMBER;
|
|
break;
|
|
|
|
case 't':
|
|
token.sValue = "true";
|
|
MatchExpectedString(token.sValue, inputStream);
|
|
token.nType = Token::TOKEN_BOOLEAN;
|
|
break;
|
|
|
|
case 'f':
|
|
token.sValue = "false";
|
|
MatchExpectedString(token.sValue, inputStream);
|
|
token.nType = Token::TOKEN_BOOLEAN;
|
|
break;
|
|
|
|
case 'n':
|
|
token.sValue = "null";
|
|
MatchExpectedString(token.sValue, inputStream);
|
|
token.nType = Token::TOKEN_NULL;
|
|
break;
|
|
|
|
default:
|
|
throw ScanException("Unexpected character in stream: " + sChar, inputStream.GetLocation());
|
|
}
|
|
|
|
token.locEnd = inputStream.GetLocation();
|
|
tokens.push_back(token);
|
|
}
|
|
}
|
|
|
|
|
|
void Reader::EatWhiteSpace(InputStream& inputStream)
|
|
{
|
|
while (!inputStream.EOS() && ::isspace(inputStream.Peek()))
|
|
inputStream.Get();
|
|
}
|
|
|
|
void Reader::MatchExpectedString(const std::string& sExpected, InputStream& inputStream)
|
|
{
|
|
std::string::const_iterator it(sExpected.begin()),
|
|
itEnd(sExpected.end());
|
|
for ( ; it != itEnd; ++it) {
|
|
if (inputStream.EOS() || // did we reach the end before finding what we're looking for...
|
|
inputStream.Get() != *it) // ...or did we find something different?
|
|
{
|
|
throw ScanException("Expected string: " + sExpected, inputStream.GetLocation());
|
|
}
|
|
}
|
|
|
|
// all's well if we made it here, return quietly
|
|
}
|
|
|
|
|
|
void Reader::MatchString(std::string& string, InputStream& inputStream)
|
|
{
|
|
MatchExpectedString("\"", inputStream);
|
|
|
|
while (inputStream.EOS() == false &&
|
|
inputStream.Peek() != '"')
|
|
{
|
|
char c = inputStream.Get();
|
|
|
|
// escape?
|
|
if (c == '\\' &&
|
|
inputStream.EOS() == false) // shouldn't have reached the end yet
|
|
{
|
|
c = inputStream.Get();
|
|
switch (c) {
|
|
case '/': string.push_back('/'); break;
|
|
case '"': string.push_back('"'); break;
|
|
case '\\': string.push_back('\\'); break;
|
|
case 'b': string.push_back('\b'); break;
|
|
case 'f': string.push_back('\f'); break;
|
|
case 'n': string.push_back('\n'); break;
|
|
case 'r': string.push_back('\r'); break;
|
|
case 't': string.push_back('\t'); break;
|
|
case 'u': // TODO: what do we do with this?
|
|
default:
|
|
throw ScanException("Unrecognized escape sequence found in string: \\" + c, inputStream.GetLocation());
|
|
}
|
|
}
|
|
else {
|
|
string.push_back(c);
|
|
}
|
|
}
|
|
|
|
// eat the last '"' that we just peeked
|
|
MatchExpectedString("\"", inputStream);
|
|
}
|
|
|
|
|
|
void Reader::MatchNumber(std::string& sNumber, InputStream& inputStream)
|
|
{
|
|
const char sNumericChars[] = "0123456789.eE-+";
|
|
std::set<char> numericChars;
|
|
numericChars.insert(sNumericChars, sNumericChars + sizeof(sNumericChars));
|
|
|
|
while (inputStream.EOS() == false &&
|
|
numericChars.find(inputStream.Peek()) != numericChars.end())
|
|
{
|
|
sNumber.push_back(inputStream.Get());
|
|
}
|
|
}
|
|
|
|
|
|
UnknownElement Reader::Parse(Reader::TokenStream& tokenStream)
|
|
{
|
|
if (tokenStream.EOS())
|
|
throw ParseException("Unexpected end of token stream", Location(), Location()); // nowhere to point to
|
|
|
|
Token const& token = tokenStream.Peek();
|
|
switch (token.nType) {
|
|
case Token::TOKEN_OBJECT_BEGIN: return ParseObject(tokenStream);
|
|
case Token::TOKEN_ARRAY_BEGIN: return ParseArray(tokenStream);
|
|
case Token::TOKEN_STRING: return ParseString(tokenStream);
|
|
case Token::TOKEN_NUMBER: return ParseNumber(tokenStream);
|
|
case Token::TOKEN_BOOLEAN: return ParseBoolean(tokenStream);
|
|
case Token::TOKEN_NULL: return ParseNull(tokenStream);
|
|
default:
|
|
throw ParseException("Unexpected token: " + token.sValue, token.locBegin, token.locEnd);
|
|
}
|
|
}
|
|
|
|
Object Reader::ParseObject(Reader::TokenStream& tokenStream)
|
|
{
|
|
MatchExpectedToken(Token::TOKEN_OBJECT_BEGIN, tokenStream);
|
|
|
|
Object object;
|
|
|
|
while (!tokenStream.EOS() && tokenStream.Peek().nType != Token::TOKEN_OBJECT_END)
|
|
{
|
|
// first the member name. save the token in case we have to throw an exception
|
|
const Token& tokenName = tokenStream.Peek();
|
|
std::string const& name = MatchExpectedToken(Token::TOKEN_STRING, tokenStream);
|
|
|
|
if (object.count(name))
|
|
throw ParseException("Duplicate object member token: " + name, tokenName.locBegin, tokenName.locEnd);
|
|
|
|
// ...then the key/value separator...
|
|
MatchExpectedToken(Token::TOKEN_MEMBER_ASSIGN, tokenStream);
|
|
|
|
// ...then the value itself (can be anything).
|
|
object[name] = Parse(tokenStream);
|
|
|
|
if (!tokenStream.EOS() && tokenStream.Peek().nType != Token::TOKEN_OBJECT_END)
|
|
MatchExpectedToken(Token::TOKEN_NEXT_ELEMENT, tokenStream);
|
|
}
|
|
|
|
MatchExpectedToken(Token::TOKEN_OBJECT_END, tokenStream);
|
|
|
|
return object;
|
|
}
|
|
|
|
Array Reader::ParseArray(Reader::TokenStream& tokenStream)
|
|
{
|
|
MatchExpectedToken(Token::TOKEN_ARRAY_BEGIN, tokenStream);
|
|
|
|
Array array;
|
|
|
|
while (!tokenStream.EOS() && tokenStream.Peek().nType != Token::TOKEN_ARRAY_END)
|
|
{
|
|
array.push_back(Parse(tokenStream));
|
|
|
|
if (!tokenStream.EOS() && tokenStream.Peek().nType != Token::TOKEN_ARRAY_END)
|
|
MatchExpectedToken(Token::TOKEN_NEXT_ELEMENT, tokenStream);
|
|
}
|
|
|
|
MatchExpectedToken(Token::TOKEN_ARRAY_END, tokenStream);
|
|
|
|
return array;
|
|
}
|
|
|
|
String Reader::ParseString(Reader::TokenStream& tokenStream)
|
|
{
|
|
return MatchExpectedToken(Token::TOKEN_STRING, tokenStream);
|
|
}
|
|
|
|
Number Reader::ParseNumber(Reader::TokenStream& tokenStream)
|
|
{
|
|
const Token& currentToken = tokenStream.Peek(); // might need this later for throwing exception
|
|
const std::string& sValue = MatchExpectedToken(Token::TOKEN_NUMBER, tokenStream);
|
|
|
|
std::istringstream iStr(sValue);
|
|
double dValue;
|
|
iStr >> dValue;
|
|
|
|
// did we consume all characters in the token?
|
|
if (!iStr.eof())
|
|
throw ParseException("Unexpected character in NUMBER token: " + iStr.peek(), currentToken.locBegin, currentToken.locEnd);
|
|
|
|
return dValue;
|
|
}
|
|
|
|
Boolean Reader::ParseBoolean(Reader::TokenStream& tokenStream)
|
|
{
|
|
return MatchExpectedToken(Token::TOKEN_BOOLEAN, tokenStream) == "true";
|
|
}
|
|
|
|
Null Reader::ParseNull(Reader::TokenStream& tokenStream)
|
|
{
|
|
MatchExpectedToken(Token::TOKEN_NULL, tokenStream);
|
|
return Null();
|
|
}
|
|
|
|
|
|
const std::string& Reader::MatchExpectedToken(Token::Type nExpected, Reader::TokenStream& tokenStream)
|
|
{
|
|
if (tokenStream.EOS())
|
|
{
|
|
throw ParseException("Unexpected End of token stream", Location(), Location()); // nowhere to point to
|
|
}
|
|
|
|
const Token& token = tokenStream.Get();
|
|
if (token.nType != nExpected)
|
|
{
|
|
throw ParseException("Unexpected token: " + token.sValue, token.locBegin, token.locEnd);
|
|
}
|
|
|
|
return token.sValue;
|
|
}
|
|
|
|
} // End namespace
|