Aegisub/libaegisub/common/parser.cpp
Thomas Goyne 92b8b2851b Don't build the dialogue lexer on every use
Building the lexer takes much longer than actually lexing, and since the
lexer is stateless there's no reason not to just make it static.
2014-07-14 10:10:22 -07:00

243 lines
7.3 KiB
C++

// Copyright (c) 2012, Thomas Goyne <plorkyeran@aegisub.org>
//
// Permission to use, copy, modify, and distribute this software for any
// purpose with or without fee is hereby granted, provided that the above
// copyright notice and this permission notice appear in all copies.
//
// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
//
// Aegisub Project http://www.aegisub.org/
#include "parser.h"
#include "libaegisub/color.h"
#include "libaegisub/ass/dialogue_parser.h"
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix_core.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <boost/spirit/include/phoenix_fusion.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/spirit/include/lex_lexertl.hpp>
// We have to use the copy of pheonix within spirit if it exists, as the
// standalone copy has different header guards
#ifdef HAVE_BOOST_SPIRIT_HOME_PHOENIX_VERSION_HPP
#include <boost/spirit/home/phoenix/statement.hpp>
#else
#include <boost/phoenix/statement.hpp>
#endif
BOOST_FUSION_ADAPT_STRUCT(
agi::Color,
(unsigned char, r)
(unsigned char, g)
(unsigned char, b)
(unsigned char, a)
)
namespace {
using namespace boost::spirit;
/// Convert a abgr value in an int or unsigned int to an agi::Color
struct unpack_colors : public boost::static_visitor<agi::Color> {
template<typename T> struct result { typedef agi::Color type; };
template<class T> agi::Color operator()(T arg) const {
return boost::apply_visitor(*this, arg);
}
agi::Color operator()(int abgr) const { return (*this)((unsigned)abgr); }
agi::Color operator()(unsigned int abgr) const {
return agi::Color(abgr & 0xFF, (abgr >> 8) & 0xFF, (abgr >> 16) & 0xFF, (abgr >> 24) & 0xFF);
}
};
template<typename Iterator>
struct color_grammar : qi::grammar<Iterator, agi::Color()> {
qi::rule<Iterator, agi::Color()> color;
qi::rule<Iterator, agi::Color()> css_color;
qi::rule<Iterator, agi::Color()> ass_color;
qi::rule<Iterator, unsigned char()> rgb_component;
qi::rule<Iterator, unsigned char()> rgb_percent;
qi::rule<Iterator, unsigned char()> hex_byte;
qi::rule<Iterator, unsigned char()> hex_char;
qi::rule<Iterator> comma;
qi::rule<Iterator> blank;
#define HEX_PARSER(type, len) qi::uint_parser<unsigned type, 16, len, len>()
color_grammar() : color_grammar::base_type(color) {
color = css_color | ass_color;
boost::phoenix::function<unpack_colors> unpack;
// Order is important here; int_ (for SSA) needs to come before the ASS
// option as decimal numbers of the appropriate length are also valid
// hex numbers
// ASS with alpha needs to come before ASS without alpha for the same
// reason
ass_color = (
int_
| -lit('&') >> -(lit('H') | lit('h')) >> (HEX_PARSER(int, 8) | HEX_PARSER(int, 6)) >> -lit('&')
)[_val = unpack(_1)] >> blank >> qi::eoi;
css_color
= "rgb(" >> blank >> rgb_component >> comma >> rgb_component >> comma >> rgb_component >> blank >> ')'
| '#' >> hex_byte >> hex_byte >> hex_byte
| '#' >> hex_char >> hex_char >> hex_char
;
hex_char = HEX_PARSER(int, 1)[_val = _1 * 16 + _1];
hex_byte = HEX_PARSER(int, 2);
rgb_component = qi::uint_parser<unsigned char, 10, 1, 3>();
comma = *qi::blank >> "," >> *qi::blank;
blank = *qi::blank;
}
};
template <typename Lexer>
struct dialogue_tokens final : lex::lexer<Lexer> {
int paren_depth;
template<typename KT>
void init(KT &&kara_templater) {
using lex::_state;
using lex::char_;
using lex::string;
using namespace boost::phoenix;
using namespace agi::ass::DialogueTokenType;
this->self
= string("\\\\[nNh]", LINE_BREAK)
| char_('{', OVR_BEGIN)[ref(paren_depth) = 0, _state = "OVR"]
| kara_templater
| string(".", TEXT)
;
this->self("OVR")
= char_('{', ERROR)
| char_('}', OVR_END)[_state = "INITIAL"]
| char_('\\', TAG_START)[_state = "TAGSTART"]
| string("\\s+", WHITESPACE)
| kara_templater
| string(".", COMMENT)
;
this->self("ARG")
= char_('{', ERROR)
| char_('}', OVR_END)[_state = "INITIAL"]
| char_('(', OPEN_PAREN)[++ref(paren_depth)]
| char_(')', CLOSE_PAREN)[--ref(paren_depth), if_(ref(paren_depth) == 0)[_state = "OVR"]]
| char_('\\', TAG_START)[_state = "TAGSTART"]
| char_(',', ARG_SEP)
| string("\\s+", WHITESPACE)
| string(".", ARG)
| kara_templater
;
this->self("TAGSTART")
= string("\\s+", WHITESPACE)
| string("r|fn", TAG_NAME)[_state = "ARG"]
| char_('\\', TAG_START)
| char_('}', OVR_END)[_state = "INITIAL"]
| string("[a-z0-9]", TAG_NAME)[_state = "TAGNAME"]
| string(".", COMMENT)[_state = "OVR"]
| kara_templater
;
this->self("TAGNAME")
= string("[a-z]+", TAG_NAME)[_state = "ARG"]
| char_('(', OPEN_PAREN)[++ref(paren_depth), _state = "ARG"]
| char_(')', CLOSE_PAREN)[--ref(paren_depth), if_(ref(paren_depth) == 0)[_state = "OVR"]]
| char_('}', OVR_END)[_state = "INITIAL"]
| char_('\\', TAG_START)[_state = "TAGSTART"]
| string(".", ARG)[_state = "ARG"]
| kara_templater
;
}
dialogue_tokens(bool karaoke_templater) : paren_depth(0) {
using lex::string;
using namespace agi::ass::DialogueTokenType;
if (karaoke_templater)
init(string("![^!]*!", KARAOKE_TEMPLATE) | string("\\$[A-Za-z_]+", KARAOKE_VARIABLE));
else
init(lex::char_('\1'));
}
};
template<typename Parser, typename T>
bool do_try_parse(std::string const& str, Parser parser, T *out) {
using namespace boost::spirit::qi;
T res;
char const* cstr = str.c_str();
bool parsed = parse(cstr, cstr + str.size(), parser, res);
if (parsed && cstr == &str[str.size()]) {
*out = res;
return true;
}
return false;
}
}
namespace agi {
namespace parser {
bool parse(Color &dst, std::string const& str) {
std::string::const_iterator begin = str.begin();
bool parsed = parse(begin, str.end(), color_grammar<std::string::const_iterator>(), dst);
return parsed && begin == str.end();
}
}
namespace ass {
std::vector<DialogueToken> TokenizeDialogueBody(std::string const& str, bool karaoke_templater) {
static const dialogue_tokens<lex::lexertl::actor_lexer<>> kt(true);
static const dialogue_tokens<lex::lexertl::actor_lexer<>> not_kt(false);
auto const& tokenizer = karaoke_templater ? kt : not_kt;
char const *first = str.c_str();
char const *last = first + str.size();
std::vector<DialogueToken> data;
auto it = tokenizer.begin(first, last), end = tokenizer.end();
for (; it != end && token_is_valid(*it); ++it) {
int id = it->id();
ptrdiff_t len = it->value().end() - it->value().begin();
assert(len > 0);
if (data.empty() || data.back().type != id)
data.push_back(DialogueToken{id, static_cast<size_t>(len)});
else
data.back().length += len;
}
return data;
}
}
namespace util {
// from util.h
bool try_parse(std::string const& str, double *out) {
return do_try_parse(str, boost::spirit::qi::double_, out);
}
bool try_parse(std::string const& str, int *out) {
return do_try_parse(str, boost::spirit::qi::int_, out);
}
}
}