Aegisub/aegisub/libaegisub/common/cajun/reader.cpp
Thomas Goyne 07da6f6f1b Split json::Number into json::Double and json::Integer
Trying to decide whether an option should be an int or double after
discarding the differences between "1.0" and "1" simply isn't possible,
and even if an option was initialized correctly, if it was changed to a
round number it could get written as an int and break later.

Also convert cajun to tabs because three spaces to indent is terrible.

Originally committed to SVN as r6018.
2011-12-22 21:12:25 +00:00

379 lines
11 KiB
C++

/**********************************************
License: BSD
Project Webpage: http://cajun-jsonapi.sourceforge.net/
Author: Terry Caton
***********************************************/
#include "libaegisub/cajun/reader.h"
#ifndef LAGI_PRE
#include <cassert>
#include <set>
#include <sstream>
#endif
/*
TODO:
* better documentation
* unicode character decoding
*/
namespace json {
std::istream& operator >> (std::istream& istr, UnknownElement& elementRoot) {
Reader::Read(elementRoot, istr);
return istr;
}
/// Wrapper around istream to keep track of document/line offsets
class Reader::InputStream {
std::istream& m_iStr;
Location m_Location;
public:
InputStream(std::istream& iStr) : m_iStr(iStr) { }
int Get() {
assert(!m_iStr.eof());
int c = m_iStr.get();
++m_Location.m_nDocOffset;
if (c == '\n') {
++m_Location.m_nLine;
m_Location.m_nLineOffset = 0;
}
else {
++m_Location.m_nLineOffset;
}
return c;
}
int Peek() {
assert(!m_iStr.eof());
return m_iStr.peek();
}
bool EOS() {
m_iStr.peek(); // apparently eof flag isn't set until a character read is attempted. whatever.
return m_iStr.eof();
}
Location const& GetLocation() const { return m_Location; }
};
class Reader::TokenStream {
Tokens const& m_Tokens;
Tokens::const_iterator m_itCurrent;
public:
TokenStream(Tokens const& tokens) : m_Tokens(tokens), m_itCurrent(tokens.begin())
{ }
Token const& Peek() {
assert(!EOS());
return *m_itCurrent;
}
Token const& Get() {
assert(!EOS());
return *m_itCurrent++;
}
bool EOS() const { return m_itCurrent == m_Tokens.end(); }
};
void Reader::Read(Object& object, std::istream& istr) { Read_i(object, istr); }
void Reader::Read(Array& array, std::istream& istr) { Read_i(array, istr); }
void Reader::Read(String& string, std::istream& istr) { Read_i(string, istr); }
void Reader::Read(Integer& number, std::istream& istr) { Read_i(number, istr); }
void Reader::Read(Double& number, std::istream& istr) { Read_i(number, istr); }
void Reader::Read(Boolean& boolean, std::istream& istr) { Read_i(boolean, istr); }
void Reader::Read(Null& null, std::istream& istr) { Read_i(null, istr); }
void Reader::Read(UnknownElement& unknown, std::istream& istr) { Read_i(unknown, istr); }
template <typename ElementTypeT>
void Reader::Read_i(ElementTypeT& element, std::istream& istr) {
Reader reader;
Tokens tokens;
InputStream inputStream(istr);
reader.Scan(tokens, inputStream);
TokenStream tokenStream(tokens);
element = reader.Parse(tokenStream);
if (!tokenStream.EOS()) {
Token const& token = tokenStream.Peek();
throw ParseException("Expected End of token stream; found " + token.sValue, token.locBegin, token.locEnd);
}
}
void Reader::Scan(Tokens& tokens, InputStream& inputStream) {
while (EatWhiteSpace(inputStream), !inputStream.EOS()) {
// if all goes well, we'll create a token each pass
Token token;
token.locBegin = inputStream.GetLocation();
// gives us null-terminated string
std::string sChar;
sChar.push_back(inputStream.Peek());
switch (sChar[0]) {
case '{':
token.sValue = sChar[0];
MatchExpectedString(sChar, inputStream);
token.nType = Token::TOKEN_OBJECT_BEGIN;
break;
case '}':
token.sValue = sChar[0];
MatchExpectedString(sChar, inputStream);
token.nType = Token::TOKEN_OBJECT_END;
break;
case '[':
token.sValue = sChar[0];
MatchExpectedString(sChar, inputStream);
token.nType = Token::TOKEN_ARRAY_BEGIN;
break;
case ']':
token.sValue = sChar[0];
MatchExpectedString(sChar, inputStream);
token.nType = Token::TOKEN_ARRAY_END;
break;
case ',':
token.sValue = sChar[0];
MatchExpectedString(sChar, inputStream);
token.nType = Token::TOKEN_NEXT_ELEMENT;
break;
case ':':
token.sValue = sChar[0];
MatchExpectedString(sChar, inputStream);
token.nType = Token::TOKEN_MEMBER_ASSIGN;
break;
case '"':
MatchString(token.sValue, inputStream);
token.nType = Token::TOKEN_STRING;
break;
case '-':
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
MatchNumber(token.sValue, inputStream);
token.nType = Token::TOKEN_NUMBER;
break;
case 't':
token.sValue = "true";
MatchExpectedString(token.sValue, inputStream);
token.nType = Token::TOKEN_BOOLEAN;
break;
case 'f':
token.sValue = "false";
MatchExpectedString(token.sValue, inputStream);
token.nType = Token::TOKEN_BOOLEAN;
break;
case 'n':
token.sValue = "null";
MatchExpectedString(token.sValue, inputStream);
token.nType = Token::TOKEN_NULL;
break;
default:
throw ScanException("Unexpected character in stream: " + sChar, inputStream.GetLocation());
}
token.locEnd = inputStream.GetLocation();
tokens.push_back(token);
}
}
void Reader::EatWhiteSpace(InputStream& inputStream) {
while (!inputStream.EOS() && ::isspace(inputStream.Peek()))
inputStream.Get();
}
void Reader::MatchExpectedString(std::string const& sExpected, InputStream& inputStream) {
std::string::const_iterator it(sExpected.begin()), itEnd(sExpected.end());
for ( ; it != itEnd; ++it) {
if (inputStream.EOS() || // did we reach the end before finding what we're looking for...
inputStream.Get() != *it) // ...or did we find something different?
{
throw ScanException("Expected string: " + sExpected, inputStream.GetLocation());
}
}
}
void Reader::MatchString(std::string& string, InputStream& inputStream) {
MatchExpectedString("\"", inputStream);
while (!inputStream.EOS() && inputStream.Peek() != '"') {
char c = inputStream.Get();
// escape?
if (c == '\\' && !inputStream.EOS()) { // shouldn't have reached the end yet
c = inputStream.Get();
switch (c) {
case '/': string.push_back('/'); break;
case '"': string.push_back('"'); break;
case '\\': string.push_back('\\'); break;
case 'b': string.push_back('\b'); break;
case 'f': string.push_back('\f'); break;
case 'n': string.push_back('\n'); break;
case 'r': string.push_back('\r'); break;
case 't': string.push_back('\t'); break;
case 'u': // TODO: what do we do with this?
default:
throw ScanException("Unrecognized escape sequence found in string: \\" + c, inputStream.GetLocation());
}
}
else {
string.push_back(c);
}
}
// eat the last '"' that we hopefully just peeked
MatchExpectedString("\"", inputStream);
}
void Reader::MatchNumber(std::string& sNumber, InputStream& inputStream) {
const char sNumericChars[] = "0123456789.eE-+";
std::set<char> numericChars;
numericChars.insert(sNumericChars, sNumericChars + sizeof(sNumericChars));
while (!inputStream.EOS()&& numericChars.count(inputStream.Peek()))
sNumber.push_back(inputStream.Get());
}
UnknownElement Reader::Parse(Reader::TokenStream& tokenStream) {
if (tokenStream.EOS())
throw ParseException("Unexpected end of token stream", Location(), Location()); // nowhere to point to
Token const& token = tokenStream.Peek();
switch (token.nType) {
case Token::TOKEN_OBJECT_BEGIN: return ParseObject(tokenStream);
case Token::TOKEN_ARRAY_BEGIN: return ParseArray(tokenStream);
case Token::TOKEN_STRING: return ParseString(tokenStream);
case Token::TOKEN_NUMBER: return ParseNumber(tokenStream);
case Token::TOKEN_BOOLEAN: return ParseBoolean(tokenStream);
case Token::TOKEN_NULL: return ParseNull(tokenStream);
default:
throw ParseException("Unexpected token: " + token.sValue, token.locBegin, token.locEnd);
}
}
UnknownElement Reader::ParseObject(Reader::TokenStream& tokenStream) {
MatchExpectedToken(Token::TOKEN_OBJECT_BEGIN, tokenStream);
Object object;
while (!tokenStream.EOS() && tokenStream.Peek().nType != Token::TOKEN_OBJECT_END) {
// first the member name. save the token in case we have to throw an exception
Token const& tokenName = tokenStream.Peek();
std::string const& name = MatchExpectedToken(Token::TOKEN_STRING, tokenStream);
if (object.count(name))
throw ParseException("Duplicate object member token: " + name, tokenName.locBegin, tokenName.locEnd);
// ...then the key/value separator...
MatchExpectedToken(Token::TOKEN_MEMBER_ASSIGN, tokenStream);
// ...then the value itself (can be anything).
object[name] = Parse(tokenStream);
if (!tokenStream.EOS() && tokenStream.Peek().nType != Token::TOKEN_OBJECT_END)
MatchExpectedToken(Token::TOKEN_NEXT_ELEMENT, tokenStream);
}
MatchExpectedToken(Token::TOKEN_OBJECT_END, tokenStream);
return object;
}
UnknownElement Reader::ParseArray(Reader::TokenStream& tokenStream) {
MatchExpectedToken(Token::TOKEN_ARRAY_BEGIN, tokenStream);
Array array;
while (!tokenStream.EOS() && tokenStream.Peek().nType != Token::TOKEN_ARRAY_END)
{
array.push_back(Parse(tokenStream));
if (!tokenStream.EOS() && tokenStream.Peek().nType != Token::TOKEN_ARRAY_END)
MatchExpectedToken(Token::TOKEN_NEXT_ELEMENT, tokenStream);
}
MatchExpectedToken(Token::TOKEN_ARRAY_END, tokenStream);
return array;
}
UnknownElement Reader::ParseString(Reader::TokenStream& tokenStream) {
return MatchExpectedToken(Token::TOKEN_STRING, tokenStream);
}
UnknownElement Reader::ParseNumber(Reader::TokenStream& tokenStream) {
Token const& currentToken = tokenStream.Peek(); // might need this later for throwing exception
std::string const& sValue = MatchExpectedToken(Token::TOKEN_NUMBER, tokenStream);
// First try to parse it as an int
std::istringstream iStr(sValue);
int64_t iValue;
iStr >> iValue;
// If the entire token was consumed then it's not a double
if (iStr.eof())
return iValue;
// Try again as a double
iStr.seekg(0, std::ios::beg);
double dValue;
iStr >> dValue;
// If there's still stuff left in the token then it's malformed
if (!iStr.eof())
throw ParseException("Unexpected character in NUMBER token: " + iStr.peek(), currentToken.locBegin, currentToken.locEnd);
return dValue;
}
UnknownElement Reader::ParseBoolean(Reader::TokenStream& tokenStream) {
return MatchExpectedToken(Token::TOKEN_BOOLEAN, tokenStream) == "true";
}
UnknownElement Reader::ParseNull(Reader::TokenStream& tokenStream) {
MatchExpectedToken(Token::TOKEN_NULL, tokenStream);
return Null();
}
std::string const& Reader::MatchExpectedToken(Token::Type nExpected, Reader::TokenStream& tokenStream) {
if (tokenStream.EOS())
throw ParseException("Unexpected End of token stream", Location(), Location()); // nowhere to point to
Token const& token = tokenStream.Get();
if (token.nType != nExpected)
throw ParseException("Unexpected token: " + token.sValue, token.locBegin, token.locEnd);
return token.sValue;
}
}