Add a lexer for the body of dialogue lines to libaegisub
This commit is contained in:
parent
bd78692148
commit
47bafe4b9f
5 changed files with 393 additions and 1 deletions
|
@ -542,6 +542,14 @@
|
|||
>
|
||||
</File>
|
||||
</Filter>
|
||||
<Filter
|
||||
Name="ASS"
|
||||
>
|
||||
<File
|
||||
RelativePath="..\..\libaegisub\include\libaegisub\ass\dialogue_parser.h"
|
||||
>
|
||||
</File>
|
||||
</Filter>
|
||||
<File
|
||||
RelativePath="..\..\libaegisub\lagi_pre.h"
|
||||
>
|
||||
|
|
|
@ -17,12 +17,15 @@
|
|||
#include "parser.h"
|
||||
|
||||
#include "libaegisub/color.h"
|
||||
#include "libaegisub/ass/dialogue_parser.h"
|
||||
|
||||
#include <boost/spirit/include/qi.hpp>
|
||||
#include <boost/spirit/include/phoenix_core.hpp>
|
||||
#include <boost/spirit/include/phoenix_operator.hpp>
|
||||
#include <boost/spirit/include/phoenix_fusion.hpp>
|
||||
#include <boost/fusion/include/adapt_struct.hpp>
|
||||
#include <boost/spirit/include/lex_lexertl.hpp>
|
||||
#include <boost/spirit/home/phoenix/statement.hpp>
|
||||
|
||||
BOOST_FUSION_ADAPT_STRUCT(
|
||||
agi::Color,
|
||||
|
@ -96,13 +99,95 @@ struct color_grammar : qi::grammar<Iterator, agi::Color()> {
|
|||
}
|
||||
};
|
||||
|
||||
template <typename Lexer>
|
||||
struct dialogue_tokens : lex::lexer<Lexer> {
|
||||
int paren_depth;
|
||||
|
||||
dialogue_tokens() : paren_depth(0) {
|
||||
using lex::_state;
|
||||
using lex::char_;
|
||||
using lex::string;
|
||||
using namespace boost::phoenix;
|
||||
using namespace agi::ass::DialogueTokenType;
|
||||
|
||||
this->self
|
||||
= string("\\\\[nNh]", LINE_BREAK)
|
||||
| char_('{', OVR_BEGIN)[ref(paren_depth) = 0, _state = "OVR"]
|
||||
| string(".", TEXT)
|
||||
;
|
||||
|
||||
this->self("OVR")
|
||||
= char_('{', ERROR)
|
||||
| char_('}', OVR_END)[_state = "INITIAL"]
|
||||
| char_('\\', TAG_START)[_state = "TAGSTART"]
|
||||
| string("\\s+", WHITESPACE)
|
||||
| string(".", COMMENT)
|
||||
;
|
||||
|
||||
this->self("ARG")
|
||||
= char_('{', ERROR)
|
||||
| char_('}', OVR_END)[_state = "INITIAL"]
|
||||
| char_('(', OPEN_PAREN)[++ref(paren_depth)]
|
||||
| char_(')', CLOSE_PAREN)[--ref(paren_depth), if_(ref(paren_depth) == 0)[_state = "OVR"]]
|
||||
| char_('\\', TAG_START)[_state = "TAGSTART"]
|
||||
| char_(',', ARG_SEP)
|
||||
| string("\\s+", WHITESPACE)
|
||||
| string(".", ARG)
|
||||
;
|
||||
|
||||
this->self("TAGSTART")
|
||||
= string("\\s+", WHITESPACE)
|
||||
| string("r|fn", TAG_NAME)[_state = "ARG"]
|
||||
| char_('\\', TAG_START)
|
||||
| char_('}', OVR_END)[_state = "INITIAL"]
|
||||
| string("[a-z0-9]", TAG_NAME)[_state = "TAGNAME"]
|
||||
| string(".", COMMENT)[_state = "OVR"]
|
||||
;
|
||||
|
||||
this->self("TAGNAME")
|
||||
= string("[a-z]+", TAG_NAME)[_state = "ARG"]
|
||||
| char_('(', OPEN_PAREN)[++ref(paren_depth), _state = "ARG"]
|
||||
| char_(')', CLOSE_PAREN)[--ref(paren_depth), if_(ref(paren_depth) == 0)[_state = "OVR"]]
|
||||
| char_('}', OVR_END)[_state = "INITIAL"]
|
||||
| char_('\\', TAG_START)[_state = "TAGSTART"]
|
||||
| string(".", ARG)[_state = "ARG"]
|
||||
;
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
namespace agi { namespace parser {
|
||||
namespace agi {
|
||||
namespace parser {
|
||||
bool parse(Color &dst, std::string const& str) {
|
||||
std::string::const_iterator begin = str.begin();
|
||||
bool parsed = parse(begin, str.end(), color_grammar<std::string::const_iterator>(), dst);
|
||||
return parsed && begin == str.end();
|
||||
}
|
||||
}
|
||||
|
||||
namespace ass {
|
||||
std::vector<DialogueToken> TokenizeDialogueBody(std::string const& str) {
|
||||
dialogue_tokens<lex::lexertl::actor_lexer<> > tokenizer;
|
||||
|
||||
char const* first = str.c_str();
|
||||
char const* last = first + str.size();
|
||||
std::vector<DialogueToken> data;
|
||||
dialogue_tokens<lex::lexertl::actor_lexer<> >::iterator_type
|
||||
it = tokenizer.begin(first, last),
|
||||
end = tokenizer.end();
|
||||
|
||||
for (; it != end && token_is_valid(*it); ++it) {
|
||||
int id = it->id();
|
||||
ptrdiff_t len = it->value().end() - it->value().begin();
|
||||
assert(len > 0);
|
||||
if (data.empty() || data.back().type != id)
|
||||
data.push_back(DialogueToken(id, len));
|
||||
else
|
||||
data.back().length += len;
|
||||
}
|
||||
|
||||
return data;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
47
aegisub/libaegisub/include/libaegisub/ass/dialogue_parser.h
Normal file
47
aegisub/libaegisub/include/libaegisub/ass/dialogue_parser.h
Normal file
|
@ -0,0 +1,47 @@
|
|||
// Copyright (c) 2012, Thomas Goyne <plorkyeran@aegisub.org>
|
||||
//
|
||||
// Permission to use, copy, modify, and distribute this software for any
|
||||
// purpose with or without fee is hereby granted, provided that the above
|
||||
// copyright notice and this permission notice appear in all copies.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
||||
// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
||||
// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
||||
// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
||||
// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
||||
// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
|
||||
#ifndef LAGI_PRE
|
||||
#include <vector>
|
||||
#endif
|
||||
|
||||
namespace agi {
|
||||
namespace ass {
|
||||
namespace DialogueTokenType {
|
||||
enum {
|
||||
TEXT = 1000,
|
||||
LINE_BREAK,
|
||||
OVR_BEGIN,
|
||||
OVR_END,
|
||||
TAG_START,
|
||||
TAG_NAME,
|
||||
OPEN_PAREN,
|
||||
CLOSE_PAREN,
|
||||
ARG_SEP,
|
||||
ARG,
|
||||
ERROR,
|
||||
COMMENT,
|
||||
WHITESPACE
|
||||
};
|
||||
}
|
||||
|
||||
struct DialogueToken {
|
||||
int type;
|
||||
size_t length;
|
||||
DialogueToken(int type, size_t length) : type(type), length(length) { }
|
||||
};
|
||||
|
||||
std::vector<DialogueToken> TokenizeDialogueBody(std::string const& str);
|
||||
}
|
||||
}
|
|
@ -20,6 +20,7 @@ SRC = \
|
|||
libaegisub_access.cpp \
|
||||
libaegisub_cajun.cpp \
|
||||
libaegisub_color.cpp \
|
||||
libaegisub_dialogue_lexer.cpp \
|
||||
libaegisub_hotkey.cpp \
|
||||
libaegisub_iconv.cpp \
|
||||
libaegisub_keyframe.cpp \
|
||||
|
|
251
aegisub/tests/libaegisub_dialogue_lexer.cpp
Normal file
251
aegisub/tests/libaegisub_dialogue_lexer.cpp
Normal file
|
@ -0,0 +1,251 @@
|
|||
// Copyright (c) 2012, Thomas Goyne <plorkyeran@aegisub.org>
|
||||
//
|
||||
// Permission to use, copy, modify, and distribute this software for any
|
||||
// purpose with or without fee is hereby granted, provided that the above
|
||||
// copyright notice and this permission notice appear in all copies.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
||||
// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
||||
// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
||||
// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
||||
// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
||||
// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
|
||||
#include <libaegisub/ass/dialogue_parser.h>
|
||||
|
||||
#include "main.h"
|
||||
#include "util.h"
|
||||
|
||||
class lagi_dialogue_lexer : public libagi {
|
||||
};
|
||||
|
||||
using namespace agi::ass;
|
||||
|
||||
TEST(lagi_dialogue_lexer, empty) {
|
||||
ASSERT_TRUE(TokenizeDialogueBody("").empty());
|
||||
}
|
||||
|
||||
#define tok_str(arg1, ...) do { \
|
||||
std::string str = arg1; \
|
||||
std::vector<DialogueToken> tok = TokenizeDialogueBody(str); \
|
||||
size_t token_index = 0; \
|
||||
__VA_ARGS__ \
|
||||
EXPECT_EQ(token_index, tok.size()); \
|
||||
} while(false)
|
||||
|
||||
#define expect_tok(expected_type, expected_len) do { \
|
||||
EXPECT_LT(token_index, tok.size()); \
|
||||
if (token_index < tok.size()) { \
|
||||
EXPECT_EQ(DialogueTokenType::expected_type, tok[token_index].type); \
|
||||
EXPECT_EQ(expected_len, tok[token_index].length); \
|
||||
++token_index; \
|
||||
} \
|
||||
} while(false)
|
||||
|
||||
TEST(lagi_dialogue_lexer, plain_text) {
|
||||
tok_str("hello there",
|
||||
expect_tok(TEXT, 11);
|
||||
);
|
||||
|
||||
tok_str("hello\\Nthere",
|
||||
expect_tok(TEXT, 5);
|
||||
expect_tok(LINE_BREAK, 2);
|
||||
expect_tok(TEXT, 5);
|
||||
);
|
||||
|
||||
tok_str("hello\\n\\h\\kthere",
|
||||
expect_tok(TEXT, 5);
|
||||
expect_tok(LINE_BREAK, 4);
|
||||
expect_tok(TEXT, 7);
|
||||
);
|
||||
}
|
||||
|
||||
TEST(lagi_dialogue_lexer, basic_override_tags) {
|
||||
tok_str("{\\b1}bold text{\\b0}",
|
||||
expect_tok(OVR_BEGIN, 1);
|
||||
expect_tok(TAG_START, 1);
|
||||
expect_tok(TAG_NAME, 1);
|
||||
expect_tok(ARG, 1);
|
||||
expect_tok(OVR_END, 1);
|
||||
expect_tok(TEXT, 9);
|
||||
expect_tok(OVR_BEGIN, 1);
|
||||
expect_tok(TAG_START, 1);
|
||||
expect_tok(TAG_NAME, 1);
|
||||
expect_tok(ARG, 1);
|
||||
expect_tok(OVR_END, 1);
|
||||
);
|
||||
|
||||
tok_str("{\\fnComic Sans MS}text",
|
||||
expect_tok(OVR_BEGIN, 1);
|
||||
expect_tok(TAG_START, 1);
|
||||
expect_tok(TAG_NAME, 2);
|
||||
expect_tok(ARG, 5);
|
||||
expect_tok(WHITESPACE, 1);
|
||||
expect_tok(ARG, 4);
|
||||
expect_tok(WHITESPACE, 1);
|
||||
expect_tok(ARG, 2);
|
||||
expect_tok(OVR_END, 1);
|
||||
expect_tok(TEXT, 4);
|
||||
);
|
||||
|
||||
tok_str("{\\pos(0,0)}a",
|
||||
expect_tok(OVR_BEGIN, 1);
|
||||
expect_tok(TAG_START, 1);
|
||||
expect_tok(TAG_NAME, 3);
|
||||
expect_tok(OPEN_PAREN, 1);
|
||||
expect_tok(ARG, 1);
|
||||
expect_tok(ARG_SEP, 1);
|
||||
expect_tok(ARG, 1);
|
||||
expect_tok(CLOSE_PAREN, 1);
|
||||
expect_tok(OVR_END, 1);
|
||||
expect_tok(TEXT, 1);
|
||||
);
|
||||
|
||||
tok_str("{\\pos( 0 , 0 )}a",
|
||||
expect_tok(OVR_BEGIN, 1);
|
||||
expect_tok(TAG_START, 1);
|
||||
expect_tok(TAG_NAME, 3);
|
||||
expect_tok(OPEN_PAREN, 1);
|
||||
expect_tok(WHITESPACE, 1);
|
||||
expect_tok(ARG, 1);
|
||||
expect_tok(WHITESPACE, 1);
|
||||
expect_tok(ARG_SEP, 1);
|
||||
expect_tok(WHITESPACE, 1);
|
||||
expect_tok(ARG, 1);
|
||||
expect_tok(WHITESPACE, 1);
|
||||
expect_tok(CLOSE_PAREN, 1);
|
||||
expect_tok(OVR_END, 1);
|
||||
expect_tok(TEXT, 1);
|
||||
);
|
||||
|
||||
tok_str("{\\c&HFFFFFF&\\2c&H0000FF&\\3c&H000000&}a",
|
||||
expect_tok(OVR_BEGIN, 1);
|
||||
expect_tok(TAG_START, 1);
|
||||
expect_tok(TAG_NAME, 1);
|
||||
expect_tok(ARG, 9);
|
||||
expect_tok(TAG_START, 1);
|
||||
expect_tok(TAG_NAME, 2);
|
||||
expect_tok(ARG, 9);
|
||||
expect_tok(TAG_START, 1);
|
||||
expect_tok(TAG_NAME, 2);
|
||||
expect_tok(ARG, 9);
|
||||
expect_tok(OVR_END, 1);
|
||||
expect_tok(TEXT, 1);
|
||||
);
|
||||
|
||||
tok_str("{\\t(0,100,\\clip(1, m 0 0 l 10 10 10 20))}a",
|
||||
expect_tok(OVR_BEGIN, 1);
|
||||
expect_tok(TAG_START, 1);
|
||||
expect_tok(TAG_NAME, 1);
|
||||
expect_tok(OPEN_PAREN, 1);
|
||||
expect_tok(ARG, 1);
|
||||
expect_tok(ARG_SEP, 1);
|
||||
expect_tok(ARG, 3);
|
||||
expect_tok(ARG_SEP, 1);
|
||||
expect_tok(TAG_START, 1);
|
||||
expect_tok(TAG_NAME, 4);
|
||||
expect_tok(OPEN_PAREN, 1);
|
||||
expect_tok(ARG, 1);
|
||||
expect_tok(ARG_SEP, 1);
|
||||
expect_tok(WHITESPACE, 1);
|
||||
expect_tok(ARG, 1);
|
||||
expect_tok(WHITESPACE, 1);
|
||||
expect_tok(ARG, 1);
|
||||
expect_tok(WHITESPACE, 1);
|
||||
expect_tok(ARG, 1);
|
||||
expect_tok(WHITESPACE, 1);
|
||||
expect_tok(ARG, 1);
|
||||
expect_tok(WHITESPACE, 1);
|
||||
expect_tok(ARG, 2);
|
||||
expect_tok(WHITESPACE, 1);
|
||||
expect_tok(ARG, 2);
|
||||
expect_tok(WHITESPACE, 1);
|
||||
expect_tok(ARG, 2);
|
||||
expect_tok(WHITESPACE, 1);
|
||||
expect_tok(ARG, 2);
|
||||
expect_tok(CLOSE_PAREN, 2);
|
||||
expect_tok(OVR_END, 1);
|
||||
expect_tok(TEXT, 1);
|
||||
);
|
||||
}
|
||||
|
||||
TEST(lagi_dialogue_lexer, merging) {
|
||||
tok_str("{\\b\\b",
|
||||
expect_tok(OVR_BEGIN, 1);
|
||||
expect_tok(TAG_START, 1);
|
||||
expect_tok(TAG_NAME, 1);
|
||||
expect_tok(TAG_START, 1);
|
||||
expect_tok(TAG_NAME, 1);
|
||||
);
|
||||
}
|
||||
|
||||
TEST(lagi_dialogue_lexer, whitespace) {
|
||||
tok_str("{ \\ fn Comic Sans MS }asd",
|
||||
expect_tok(OVR_BEGIN, 1);
|
||||
expect_tok(WHITESPACE, 1);
|
||||
expect_tok(TAG_START, 1);
|
||||
expect_tok(WHITESPACE, 1);
|
||||
expect_tok(TAG_NAME, 2);
|
||||
expect_tok(WHITESPACE, 1);
|
||||
expect_tok(ARG, 5);
|
||||
expect_tok(WHITESPACE, 1);
|
||||
expect_tok(ARG, 4);
|
||||
expect_tok(WHITESPACE, 1);
|
||||
expect_tok(ARG, 2);
|
||||
expect_tok(WHITESPACE, 1);
|
||||
expect_tok(OVR_END, 1);
|
||||
expect_tok(TEXT, 3);
|
||||
);
|
||||
}
|
||||
|
||||
TEST(lagi_dialogue_lexer, comment) {
|
||||
tok_str("{a}b",
|
||||
expect_tok(OVR_BEGIN, 1);
|
||||
expect_tok(COMMENT, 1);
|
||||
expect_tok(OVR_END, 1);
|
||||
expect_tok(TEXT, 1);
|
||||
);
|
||||
|
||||
tok_str("{a\\b}c",
|
||||
expect_tok(OVR_BEGIN, 1);
|
||||
expect_tok(COMMENT, 1);
|
||||
expect_tok(TAG_START, 1);
|
||||
expect_tok(TAG_NAME, 1);
|
||||
expect_tok(OVR_END, 1);
|
||||
expect_tok(TEXT, 1);
|
||||
);
|
||||
}
|
||||
|
||||
TEST(lagi_dialogue_lexer, malformed) {
|
||||
tok_str("}",
|
||||
expect_tok(TEXT, 1);
|
||||
);
|
||||
|
||||
tok_str("{{",
|
||||
expect_tok(OVR_BEGIN, 1);
|
||||
expect_tok(ERROR, 1);
|
||||
);
|
||||
|
||||
tok_str("{\\pos(0,0}a",
|
||||
expect_tok(OVR_BEGIN, 1);
|
||||
expect_tok(TAG_START, 1);
|
||||
expect_tok(TAG_NAME, 3);
|
||||
expect_tok(OPEN_PAREN, 1);
|
||||
expect_tok(ARG, 1);
|
||||
expect_tok(ARG_SEP, 1);
|
||||
expect_tok(ARG, 1);
|
||||
expect_tok(OVR_END, 1);
|
||||
expect_tok(TEXT, 1);
|
||||
);
|
||||
|
||||
tok_str("{\\b1\\}asdf",
|
||||
expect_tok(OVR_BEGIN, 1);
|
||||
expect_tok(TAG_START, 1);
|
||||
expect_tok(TAG_NAME, 1);
|
||||
expect_tok(ARG, 1);
|
||||
expect_tok(TAG_START, 1);
|
||||
expect_tok(OVR_END, 1);
|
||||
expect_tok(TEXT, 4);
|
||||
);
|
||||
}
|
Loading…
Reference in a new issue