07da6f6f1b
Trying to decide whether an option should be an int or double after discarding the differences between "1.0" and "1" simply isn't possible, and even if an option was initialized correctly, if it was changed to a round number it could get written as an int and break later. Also convert cajun to tabs because three spaces to indent is terrible. Originally committed to SVN as r6018.
378 lines
11 KiB
C++
378 lines
11 KiB
C++
/**********************************************
|
|
|
|
License: BSD
|
|
Project Webpage: http://cajun-jsonapi.sourceforge.net/
|
|
Author: Terry Caton
|
|
|
|
***********************************************/
|
|
|
|
#include "libaegisub/cajun/reader.h"
|
|
|
|
#ifndef LAGI_PRE
|
|
#include <cassert>
|
|
#include <set>
|
|
#include <sstream>
|
|
#endif
|
|
|
|
/*
|
|
|
|
TODO:
|
|
* better documentation
|
|
* unicode character decoding
|
|
|
|
*/
|
|
|
|
namespace json {
|
|
|
|
std::istream& operator >> (std::istream& istr, UnknownElement& elementRoot) {
|
|
Reader::Read(elementRoot, istr);
|
|
return istr;
|
|
}
|
|
|
|
/// Wrapper around istream to keep track of document/line offsets
|
|
class Reader::InputStream {
|
|
std::istream& m_iStr;
|
|
Location m_Location;
|
|
public:
|
|
InputStream(std::istream& iStr) : m_iStr(iStr) { }
|
|
|
|
int Get() {
|
|
assert(!m_iStr.eof());
|
|
int c = m_iStr.get();
|
|
|
|
++m_Location.m_nDocOffset;
|
|
if (c == '\n') {
|
|
++m_Location.m_nLine;
|
|
m_Location.m_nLineOffset = 0;
|
|
}
|
|
else {
|
|
++m_Location.m_nLineOffset;
|
|
}
|
|
|
|
return c;
|
|
}
|
|
|
|
int Peek() {
|
|
assert(!m_iStr.eof());
|
|
return m_iStr.peek();
|
|
}
|
|
|
|
bool EOS() {
|
|
m_iStr.peek(); // apparently eof flag isn't set until a character read is attempted. whatever.
|
|
return m_iStr.eof();
|
|
}
|
|
|
|
Location const& GetLocation() const { return m_Location; }
|
|
};
|
|
|
|
class Reader::TokenStream {
|
|
Tokens const& m_Tokens;
|
|
Tokens::const_iterator m_itCurrent;
|
|
|
|
public:
|
|
TokenStream(Tokens const& tokens) : m_Tokens(tokens), m_itCurrent(tokens.begin())
|
|
{ }
|
|
|
|
Token const& Peek() {
|
|
assert(!EOS());
|
|
return *m_itCurrent;
|
|
}
|
|
Token const& Get() {
|
|
assert(!EOS());
|
|
return *m_itCurrent++;
|
|
}
|
|
|
|
bool EOS() const { return m_itCurrent == m_Tokens.end(); }
|
|
};
|
|
|
|
void Reader::Read(Object& object, std::istream& istr) { Read_i(object, istr); }
|
|
void Reader::Read(Array& array, std::istream& istr) { Read_i(array, istr); }
|
|
void Reader::Read(String& string, std::istream& istr) { Read_i(string, istr); }
|
|
void Reader::Read(Integer& number, std::istream& istr) { Read_i(number, istr); }
|
|
void Reader::Read(Double& number, std::istream& istr) { Read_i(number, istr); }
|
|
void Reader::Read(Boolean& boolean, std::istream& istr) { Read_i(boolean, istr); }
|
|
void Reader::Read(Null& null, std::istream& istr) { Read_i(null, istr); }
|
|
void Reader::Read(UnknownElement& unknown, std::istream& istr) { Read_i(unknown, istr); }
|
|
|
|
template <typename ElementTypeT>
|
|
void Reader::Read_i(ElementTypeT& element, std::istream& istr) {
|
|
Reader reader;
|
|
|
|
Tokens tokens;
|
|
InputStream inputStream(istr);
|
|
reader.Scan(tokens, inputStream);
|
|
|
|
TokenStream tokenStream(tokens);
|
|
element = reader.Parse(tokenStream);
|
|
|
|
if (!tokenStream.EOS()) {
|
|
Token const& token = tokenStream.Peek();
|
|
throw ParseException("Expected End of token stream; found " + token.sValue, token.locBegin, token.locEnd);
|
|
}
|
|
}
|
|
|
|
void Reader::Scan(Tokens& tokens, InputStream& inputStream) {
|
|
while (EatWhiteSpace(inputStream), !inputStream.EOS()) {
|
|
// if all goes well, we'll create a token each pass
|
|
Token token;
|
|
token.locBegin = inputStream.GetLocation();
|
|
|
|
// gives us null-terminated string
|
|
std::string sChar;
|
|
sChar.push_back(inputStream.Peek());
|
|
|
|
switch (sChar[0]) {
|
|
case '{':
|
|
token.sValue = sChar[0];
|
|
MatchExpectedString(sChar, inputStream);
|
|
token.nType = Token::TOKEN_OBJECT_BEGIN;
|
|
break;
|
|
|
|
case '}':
|
|
token.sValue = sChar[0];
|
|
MatchExpectedString(sChar, inputStream);
|
|
token.nType = Token::TOKEN_OBJECT_END;
|
|
break;
|
|
|
|
case '[':
|
|
token.sValue = sChar[0];
|
|
MatchExpectedString(sChar, inputStream);
|
|
token.nType = Token::TOKEN_ARRAY_BEGIN;
|
|
break;
|
|
|
|
case ']':
|
|
token.sValue = sChar[0];
|
|
MatchExpectedString(sChar, inputStream);
|
|
token.nType = Token::TOKEN_ARRAY_END;
|
|
break;
|
|
|
|
case ',':
|
|
token.sValue = sChar[0];
|
|
MatchExpectedString(sChar, inputStream);
|
|
token.nType = Token::TOKEN_NEXT_ELEMENT;
|
|
break;
|
|
|
|
case ':':
|
|
token.sValue = sChar[0];
|
|
MatchExpectedString(sChar, inputStream);
|
|
token.nType = Token::TOKEN_MEMBER_ASSIGN;
|
|
break;
|
|
|
|
case '"':
|
|
MatchString(token.sValue, inputStream);
|
|
token.nType = Token::TOKEN_STRING;
|
|
break;
|
|
|
|
case '-':
|
|
case '0':
|
|
case '1':
|
|
case '2':
|
|
case '3':
|
|
case '4':
|
|
case '5':
|
|
case '6':
|
|
case '7':
|
|
case '8':
|
|
case '9':
|
|
MatchNumber(token.sValue, inputStream);
|
|
token.nType = Token::TOKEN_NUMBER;
|
|
break;
|
|
|
|
case 't':
|
|
token.sValue = "true";
|
|
MatchExpectedString(token.sValue, inputStream);
|
|
token.nType = Token::TOKEN_BOOLEAN;
|
|
break;
|
|
|
|
case 'f':
|
|
token.sValue = "false";
|
|
MatchExpectedString(token.sValue, inputStream);
|
|
token.nType = Token::TOKEN_BOOLEAN;
|
|
break;
|
|
|
|
case 'n':
|
|
token.sValue = "null";
|
|
MatchExpectedString(token.sValue, inputStream);
|
|
token.nType = Token::TOKEN_NULL;
|
|
break;
|
|
|
|
default:
|
|
throw ScanException("Unexpected character in stream: " + sChar, inputStream.GetLocation());
|
|
}
|
|
|
|
token.locEnd = inputStream.GetLocation();
|
|
tokens.push_back(token);
|
|
}
|
|
}
|
|
|
|
|
|
void Reader::EatWhiteSpace(InputStream& inputStream) {
|
|
while (!inputStream.EOS() && ::isspace(inputStream.Peek()))
|
|
inputStream.Get();
|
|
}
|
|
|
|
void Reader::MatchExpectedString(std::string const& sExpected, InputStream& inputStream) {
|
|
std::string::const_iterator it(sExpected.begin()), itEnd(sExpected.end());
|
|
for ( ; it != itEnd; ++it) {
|
|
if (inputStream.EOS() || // did we reach the end before finding what we're looking for...
|
|
inputStream.Get() != *it) // ...or did we find something different?
|
|
{
|
|
throw ScanException("Expected string: " + sExpected, inputStream.GetLocation());
|
|
}
|
|
}
|
|
}
|
|
|
|
void Reader::MatchString(std::string& string, InputStream& inputStream) {
|
|
MatchExpectedString("\"", inputStream);
|
|
|
|
while (!inputStream.EOS() && inputStream.Peek() != '"') {
|
|
char c = inputStream.Get();
|
|
|
|
// escape?
|
|
if (c == '\\' && !inputStream.EOS()) { // shouldn't have reached the end yet
|
|
c = inputStream.Get();
|
|
switch (c) {
|
|
case '/': string.push_back('/'); break;
|
|
case '"': string.push_back('"'); break;
|
|
case '\\': string.push_back('\\'); break;
|
|
case 'b': string.push_back('\b'); break;
|
|
case 'f': string.push_back('\f'); break;
|
|
case 'n': string.push_back('\n'); break;
|
|
case 'r': string.push_back('\r'); break;
|
|
case 't': string.push_back('\t'); break;
|
|
case 'u': // TODO: what do we do with this?
|
|
default:
|
|
throw ScanException("Unrecognized escape sequence found in string: \\" + c, inputStream.GetLocation());
|
|
}
|
|
}
|
|
else {
|
|
string.push_back(c);
|
|
}
|
|
}
|
|
|
|
// eat the last '"' that we hopefully just peeked
|
|
MatchExpectedString("\"", inputStream);
|
|
}
|
|
|
|
void Reader::MatchNumber(std::string& sNumber, InputStream& inputStream) {
|
|
const char sNumericChars[] = "0123456789.eE-+";
|
|
std::set<char> numericChars;
|
|
numericChars.insert(sNumericChars, sNumericChars + sizeof(sNumericChars));
|
|
|
|
while (!inputStream.EOS()&& numericChars.count(inputStream.Peek()))
|
|
sNumber.push_back(inputStream.Get());
|
|
}
|
|
|
|
UnknownElement Reader::Parse(Reader::TokenStream& tokenStream) {
|
|
if (tokenStream.EOS())
|
|
throw ParseException("Unexpected end of token stream", Location(), Location()); // nowhere to point to
|
|
|
|
Token const& token = tokenStream.Peek();
|
|
switch (token.nType) {
|
|
case Token::TOKEN_OBJECT_BEGIN: return ParseObject(tokenStream);
|
|
case Token::TOKEN_ARRAY_BEGIN: return ParseArray(tokenStream);
|
|
case Token::TOKEN_STRING: return ParseString(tokenStream);
|
|
case Token::TOKEN_NUMBER: return ParseNumber(tokenStream);
|
|
case Token::TOKEN_BOOLEAN: return ParseBoolean(tokenStream);
|
|
case Token::TOKEN_NULL: return ParseNull(tokenStream);
|
|
default:
|
|
throw ParseException("Unexpected token: " + token.sValue, token.locBegin, token.locEnd);
|
|
}
|
|
}
|
|
|
|
UnknownElement Reader::ParseObject(Reader::TokenStream& tokenStream) {
|
|
MatchExpectedToken(Token::TOKEN_OBJECT_BEGIN, tokenStream);
|
|
|
|
Object object;
|
|
|
|
while (!tokenStream.EOS() && tokenStream.Peek().nType != Token::TOKEN_OBJECT_END) {
|
|
// first the member name. save the token in case we have to throw an exception
|
|
Token const& tokenName = tokenStream.Peek();
|
|
std::string const& name = MatchExpectedToken(Token::TOKEN_STRING, tokenStream);
|
|
|
|
if (object.count(name))
|
|
throw ParseException("Duplicate object member token: " + name, tokenName.locBegin, tokenName.locEnd);
|
|
|
|
// ...then the key/value separator...
|
|
MatchExpectedToken(Token::TOKEN_MEMBER_ASSIGN, tokenStream);
|
|
|
|
// ...then the value itself (can be anything).
|
|
object[name] = Parse(tokenStream);
|
|
|
|
if (!tokenStream.EOS() && tokenStream.Peek().nType != Token::TOKEN_OBJECT_END)
|
|
MatchExpectedToken(Token::TOKEN_NEXT_ELEMENT, tokenStream);
|
|
}
|
|
|
|
MatchExpectedToken(Token::TOKEN_OBJECT_END, tokenStream);
|
|
|
|
return object;
|
|
}
|
|
|
|
UnknownElement Reader::ParseArray(Reader::TokenStream& tokenStream) {
|
|
MatchExpectedToken(Token::TOKEN_ARRAY_BEGIN, tokenStream);
|
|
|
|
Array array;
|
|
|
|
while (!tokenStream.EOS() && tokenStream.Peek().nType != Token::TOKEN_ARRAY_END)
|
|
{
|
|
array.push_back(Parse(tokenStream));
|
|
|
|
if (!tokenStream.EOS() && tokenStream.Peek().nType != Token::TOKEN_ARRAY_END)
|
|
MatchExpectedToken(Token::TOKEN_NEXT_ELEMENT, tokenStream);
|
|
}
|
|
|
|
MatchExpectedToken(Token::TOKEN_ARRAY_END, tokenStream);
|
|
|
|
return array;
|
|
}
|
|
|
|
UnknownElement Reader::ParseString(Reader::TokenStream& tokenStream) {
|
|
return MatchExpectedToken(Token::TOKEN_STRING, tokenStream);
|
|
}
|
|
|
|
UnknownElement Reader::ParseNumber(Reader::TokenStream& tokenStream) {
|
|
Token const& currentToken = tokenStream.Peek(); // might need this later for throwing exception
|
|
std::string const& sValue = MatchExpectedToken(Token::TOKEN_NUMBER, tokenStream);
|
|
|
|
// First try to parse it as an int
|
|
std::istringstream iStr(sValue);
|
|
int64_t iValue;
|
|
iStr >> iValue;
|
|
|
|
// If the entire token was consumed then it's not a double
|
|
if (iStr.eof())
|
|
return iValue;
|
|
|
|
// Try again as a double
|
|
iStr.seekg(0, std::ios::beg);
|
|
double dValue;
|
|
iStr >> dValue;
|
|
|
|
// If there's still stuff left in the token then it's malformed
|
|
if (!iStr.eof())
|
|
throw ParseException("Unexpected character in NUMBER token: " + iStr.peek(), currentToken.locBegin, currentToken.locEnd);
|
|
|
|
return dValue;
|
|
}
|
|
|
|
UnknownElement Reader::ParseBoolean(Reader::TokenStream& tokenStream) {
|
|
return MatchExpectedToken(Token::TOKEN_BOOLEAN, tokenStream) == "true";
|
|
}
|
|
|
|
UnknownElement Reader::ParseNull(Reader::TokenStream& tokenStream) {
|
|
MatchExpectedToken(Token::TOKEN_NULL, tokenStream);
|
|
return Null();
|
|
}
|
|
|
|
std::string const& Reader::MatchExpectedToken(Token::Type nExpected, Reader::TokenStream& tokenStream) {
|
|
if (tokenStream.EOS())
|
|
throw ParseException("Unexpected End of token stream", Location(), Location()); // nowhere to point to
|
|
|
|
Token const& token = tokenStream.Get();
|
|
if (token.nType != nExpected)
|
|
throw ParseException("Unexpected token: " + token.sValue, token.locBegin, token.locEnd);
|
|
|
|
return token.sValue;
|
|
}
|
|
|
|
}
|