diff --git a/aegisub/build/libaegisub_vs2008/libaegisub_vs2008.vcproj b/aegisub/build/libaegisub_vs2008/libaegisub_vs2008.vcproj
index 08b5568c1..3c33d931b 100644
--- a/aegisub/build/libaegisub_vs2008/libaegisub_vs2008.vcproj
+++ b/aegisub/build/libaegisub_vs2008/libaegisub_vs2008.vcproj
@@ -542,6 +542,14 @@
>
+
+
+
+
diff --git a/aegisub/libaegisub/common/parser.cpp b/aegisub/libaegisub/common/parser.cpp
index 9d9359cf0..bd383a26c 100644
--- a/aegisub/libaegisub/common/parser.cpp
+++ b/aegisub/libaegisub/common/parser.cpp
@@ -17,12 +17,15 @@
#include "parser.h"
#include "libaegisub/color.h"
+#include "libaegisub/ass/dialogue_parser.h"
#include
#include
#include
#include
#include
+#include
+#include
BOOST_FUSION_ADAPT_STRUCT(
agi::Color,
@@ -96,13 +99,95 @@ struct color_grammar : qi::grammar {
}
};
+template
+struct dialogue_tokens : lex::lexer {
+ int paren_depth;
+
+ dialogue_tokens() : paren_depth(0) {
+ using lex::_state;
+ using lex::char_;
+ using lex::string;
+ using namespace boost::phoenix;
+ using namespace agi::ass::DialogueTokenType;
+
+ this->self
+ = string("\\\\[nNh]", LINE_BREAK)
+ | char_('{', OVR_BEGIN)[ref(paren_depth) = 0, _state = "OVR"]
+ | string(".", TEXT)
+ ;
+
+ this->self("OVR")
+ = char_('{', ERROR)
+ | char_('}', OVR_END)[_state = "INITIAL"]
+ | char_('\\', TAG_START)[_state = "TAGSTART"]
+ | string("\\s+", WHITESPACE)
+ | string(".", COMMENT)
+ ;
+
+ this->self("ARG")
+ = char_('{', ERROR)
+ | char_('}', OVR_END)[_state = "INITIAL"]
+ | char_('(', OPEN_PAREN)[++ref(paren_depth)]
+ | char_(')', CLOSE_PAREN)[--ref(paren_depth), if_(ref(paren_depth) == 0)[_state = "OVR"]]
+ | char_('\\', TAG_START)[_state = "TAGSTART"]
+ | char_(',', ARG_SEP)
+ | string("\\s+", WHITESPACE)
+ | string(".", ARG)
+ ;
+
+ this->self("TAGSTART")
+ = string("\\s+", WHITESPACE)
+ | string("r|fn", TAG_NAME)[_state = "ARG"]
+ | char_('\\', TAG_START)
+ | char_('}', OVR_END)[_state = "INITIAL"]
+ | string("[a-z0-9]", TAG_NAME)[_state = "TAGNAME"]
+ | string(".", COMMENT)[_state = "OVR"]
+ ;
+
+ this->self("TAGNAME")
+ = string("[a-z]+", TAG_NAME)[_state = "ARG"]
+ | char_('(', OPEN_PAREN)[++ref(paren_depth), _state = "ARG"]
+ | char_(')', CLOSE_PAREN)[--ref(paren_depth), if_(ref(paren_depth) == 0)[_state = "OVR"]]
+ | char_('}', OVR_END)[_state = "INITIAL"]
+ | char_('\\', TAG_START)[_state = "TAGSTART"]
+ | string(".", ARG)[_state = "ARG"]
+ ;
+ }
+};
+
}
-namespace agi { namespace parser {
+namespace agi {
+namespace parser {
bool parse(Color &dst, std::string const& str) {
std::string::const_iterator begin = str.begin();
bool parsed = parse(begin, str.end(), color_grammar(), dst);
return parsed && begin == str.end();
}
}
+
+namespace ass {
+ std::vector TokenizeDialogueBody(std::string const& str) {
+ dialogue_tokens > tokenizer;
+
+ char const* first = str.c_str();
+ char const* last = first + str.size();
+ std::vector data;
+ dialogue_tokens >::iterator_type
+ it = tokenizer.begin(first, last),
+ end = tokenizer.end();
+
+ for (; it != end && token_is_valid(*it); ++it) {
+ int id = it->id();
+ ptrdiff_t len = it->value().end() - it->value().begin();
+ assert(len > 0);
+ if (data.empty() || data.back().type != id)
+ data.push_back(DialogueToken(id, len));
+ else
+ data.back().length += len;
+ }
+
+ return data;
+ }
+}
}
diff --git a/aegisub/libaegisub/include/libaegisub/ass/dialogue_parser.h b/aegisub/libaegisub/include/libaegisub/ass/dialogue_parser.h
new file mode 100644
index 000000000..5c2a10f06
--- /dev/null
+++ b/aegisub/libaegisub/include/libaegisub/ass/dialogue_parser.h
@@ -0,0 +1,47 @@
+// Copyright (c) 2012, Thomas Goyne
+//
+// Permission to use, copy, modify, and distribute this software for any
+// purpose with or without fee is hereby granted, provided that the above
+// copyright notice and this permission notice appear in all copies.
+//
+// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+
+#ifndef LAGI_PRE
+#include
+#endif
+
+namespace agi {
+ namespace ass {
+ namespace DialogueTokenType {
+ enum {
+ TEXT = 1000,
+ LINE_BREAK,
+ OVR_BEGIN,
+ OVR_END,
+ TAG_START,
+ TAG_NAME,
+ OPEN_PAREN,
+ CLOSE_PAREN,
+ ARG_SEP,
+ ARG,
+ ERROR,
+ COMMENT,
+ WHITESPACE
+ };
+ }
+
+ struct DialogueToken {
+ int type;
+ size_t length;
+ DialogueToken(int type, size_t length) : type(type), length(length) { }
+ };
+
+ std::vector TokenizeDialogueBody(std::string const& str);
+ }
+}
diff --git a/aegisub/tests/Makefile b/aegisub/tests/Makefile
index 383cb5056..7b8adfc7f 100644
--- a/aegisub/tests/Makefile
+++ b/aegisub/tests/Makefile
@@ -20,6 +20,7 @@ SRC = \
libaegisub_access.cpp \
libaegisub_cajun.cpp \
libaegisub_color.cpp \
+ libaegisub_dialogue_lexer.cpp \
libaegisub_hotkey.cpp \
libaegisub_iconv.cpp \
libaegisub_keyframe.cpp \
diff --git a/aegisub/tests/libaegisub_dialogue_lexer.cpp b/aegisub/tests/libaegisub_dialogue_lexer.cpp
new file mode 100644
index 000000000..1ced1771d
--- /dev/null
+++ b/aegisub/tests/libaegisub_dialogue_lexer.cpp
@@ -0,0 +1,251 @@
+// Copyright (c) 2012, Thomas Goyne
+//
+// Permission to use, copy, modify, and distribute this software for any
+// purpose with or without fee is hereby granted, provided that the above
+// copyright notice and this permission notice appear in all copies.
+//
+// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+
+#include
+
+#include "main.h"
+#include "util.h"
+
+class lagi_dialogue_lexer : public libagi {
+};
+
+using namespace agi::ass;
+
+TEST(lagi_dialogue_lexer, empty) {
+ ASSERT_TRUE(TokenizeDialogueBody("").empty());
+}
+
+#define tok_str(arg1, ...) do { \
+ std::string str = arg1; \
+ std::vector tok = TokenizeDialogueBody(str); \
+ size_t token_index = 0; \
+ __VA_ARGS__ \
+ EXPECT_EQ(token_index, tok.size()); \
+} while(false)
+
+#define expect_tok(expected_type, expected_len) do { \
+ EXPECT_LT(token_index, tok.size()); \
+ if (token_index < tok.size()) { \
+ EXPECT_EQ(DialogueTokenType::expected_type, tok[token_index].type); \
+ EXPECT_EQ(expected_len, tok[token_index].length); \
+ ++token_index; \
+ } \
+} while(false)
+
+TEST(lagi_dialogue_lexer, plain_text) {
+ tok_str("hello there",
+ expect_tok(TEXT, 11);
+ );
+
+ tok_str("hello\\Nthere",
+ expect_tok(TEXT, 5);
+ expect_tok(LINE_BREAK, 2);
+ expect_tok(TEXT, 5);
+ );
+
+ tok_str("hello\\n\\h\\kthere",
+ expect_tok(TEXT, 5);
+ expect_tok(LINE_BREAK, 4);
+ expect_tok(TEXT, 7);
+ );
+}
+
+TEST(lagi_dialogue_lexer, basic_override_tags) {
+ tok_str("{\\b1}bold text{\\b0}",
+ expect_tok(OVR_BEGIN, 1);
+ expect_tok(TAG_START, 1);
+ expect_tok(TAG_NAME, 1);
+ expect_tok(ARG, 1);
+ expect_tok(OVR_END, 1);
+ expect_tok(TEXT, 9);
+ expect_tok(OVR_BEGIN, 1);
+ expect_tok(TAG_START, 1);
+ expect_tok(TAG_NAME, 1);
+ expect_tok(ARG, 1);
+ expect_tok(OVR_END, 1);
+ );
+
+ tok_str("{\\fnComic Sans MS}text",
+ expect_tok(OVR_BEGIN, 1);
+ expect_tok(TAG_START, 1);
+ expect_tok(TAG_NAME, 2);
+ expect_tok(ARG, 5);
+ expect_tok(WHITESPACE, 1);
+ expect_tok(ARG, 4);
+ expect_tok(WHITESPACE, 1);
+ expect_tok(ARG, 2);
+ expect_tok(OVR_END, 1);
+ expect_tok(TEXT, 4);
+ );
+
+ tok_str("{\\pos(0,0)}a",
+ expect_tok(OVR_BEGIN, 1);
+ expect_tok(TAG_START, 1);
+ expect_tok(TAG_NAME, 3);
+ expect_tok(OPEN_PAREN, 1);
+ expect_tok(ARG, 1);
+ expect_tok(ARG_SEP, 1);
+ expect_tok(ARG, 1);
+ expect_tok(CLOSE_PAREN, 1);
+ expect_tok(OVR_END, 1);
+ expect_tok(TEXT, 1);
+ );
+
+ tok_str("{\\pos( 0 , 0 )}a",
+ expect_tok(OVR_BEGIN, 1);
+ expect_tok(TAG_START, 1);
+ expect_tok(TAG_NAME, 3);
+ expect_tok(OPEN_PAREN, 1);
+ expect_tok(WHITESPACE, 1);
+ expect_tok(ARG, 1);
+ expect_tok(WHITESPACE, 1);
+ expect_tok(ARG_SEP, 1);
+ expect_tok(WHITESPACE, 1);
+ expect_tok(ARG, 1);
+ expect_tok(WHITESPACE, 1);
+ expect_tok(CLOSE_PAREN, 1);
+ expect_tok(OVR_END, 1);
+ expect_tok(TEXT, 1);
+ );
+
+ tok_str("{\\c&HFFFFFF&\\2c&H0000FF&\\3c&H000000&}a",
+ expect_tok(OVR_BEGIN, 1);
+ expect_tok(TAG_START, 1);
+ expect_tok(TAG_NAME, 1);
+ expect_tok(ARG, 9);
+ expect_tok(TAG_START, 1);
+ expect_tok(TAG_NAME, 2);
+ expect_tok(ARG, 9);
+ expect_tok(TAG_START, 1);
+ expect_tok(TAG_NAME, 2);
+ expect_tok(ARG, 9);
+ expect_tok(OVR_END, 1);
+ expect_tok(TEXT, 1);
+ );
+
+ tok_str("{\\t(0,100,\\clip(1, m 0 0 l 10 10 10 20))}a",
+ expect_tok(OVR_BEGIN, 1);
+ expect_tok(TAG_START, 1);
+ expect_tok(TAG_NAME, 1);
+ expect_tok(OPEN_PAREN, 1);
+ expect_tok(ARG, 1);
+ expect_tok(ARG_SEP, 1);
+ expect_tok(ARG, 3);
+ expect_tok(ARG_SEP, 1);
+ expect_tok(TAG_START, 1);
+ expect_tok(TAG_NAME, 4);
+ expect_tok(OPEN_PAREN, 1);
+ expect_tok(ARG, 1);
+ expect_tok(ARG_SEP, 1);
+ expect_tok(WHITESPACE, 1);
+ expect_tok(ARG, 1);
+ expect_tok(WHITESPACE, 1);
+ expect_tok(ARG, 1);
+ expect_tok(WHITESPACE, 1);
+ expect_tok(ARG, 1);
+ expect_tok(WHITESPACE, 1);
+ expect_tok(ARG, 1);
+ expect_tok(WHITESPACE, 1);
+ expect_tok(ARG, 2);
+ expect_tok(WHITESPACE, 1);
+ expect_tok(ARG, 2);
+ expect_tok(WHITESPACE, 1);
+ expect_tok(ARG, 2);
+ expect_tok(WHITESPACE, 1);
+ expect_tok(ARG, 2);
+ expect_tok(CLOSE_PAREN, 2);
+ expect_tok(OVR_END, 1);
+ expect_tok(TEXT, 1);
+ );
+}
+
+TEST(lagi_dialogue_lexer, merging) {
+ tok_str("{\\b\\b",
+ expect_tok(OVR_BEGIN, 1);
+ expect_tok(TAG_START, 1);
+ expect_tok(TAG_NAME, 1);
+ expect_tok(TAG_START, 1);
+ expect_tok(TAG_NAME, 1);
+ );
+}
+
+TEST(lagi_dialogue_lexer, whitespace) {
+ tok_str("{ \\ fn Comic Sans MS }asd",
+ expect_tok(OVR_BEGIN, 1);
+ expect_tok(WHITESPACE, 1);
+ expect_tok(TAG_START, 1);
+ expect_tok(WHITESPACE, 1);
+ expect_tok(TAG_NAME, 2);
+ expect_tok(WHITESPACE, 1);
+ expect_tok(ARG, 5);
+ expect_tok(WHITESPACE, 1);
+ expect_tok(ARG, 4);
+ expect_tok(WHITESPACE, 1);
+ expect_tok(ARG, 2);
+ expect_tok(WHITESPACE, 1);
+ expect_tok(OVR_END, 1);
+ expect_tok(TEXT, 3);
+ );
+}
+
+TEST(lagi_dialogue_lexer, comment) {
+ tok_str("{a}b",
+ expect_tok(OVR_BEGIN, 1);
+ expect_tok(COMMENT, 1);
+ expect_tok(OVR_END, 1);
+ expect_tok(TEXT, 1);
+ );
+
+ tok_str("{a\\b}c",
+ expect_tok(OVR_BEGIN, 1);
+ expect_tok(COMMENT, 1);
+ expect_tok(TAG_START, 1);
+ expect_tok(TAG_NAME, 1);
+ expect_tok(OVR_END, 1);
+ expect_tok(TEXT, 1);
+ );
+}
+
+TEST(lagi_dialogue_lexer, malformed) {
+ tok_str("}",
+ expect_tok(TEXT, 1);
+ );
+
+ tok_str("{{",
+ expect_tok(OVR_BEGIN, 1);
+ expect_tok(ERROR, 1);
+ );
+
+ tok_str("{\\pos(0,0}a",
+ expect_tok(OVR_BEGIN, 1);
+ expect_tok(TAG_START, 1);
+ expect_tok(TAG_NAME, 3);
+ expect_tok(OPEN_PAREN, 1);
+ expect_tok(ARG, 1);
+ expect_tok(ARG_SEP, 1);
+ expect_tok(ARG, 1);
+ expect_tok(OVR_END, 1);
+ expect_tok(TEXT, 1);
+ );
+
+ tok_str("{\\b1\\}asdf",
+ expect_tok(OVR_BEGIN, 1);
+ expect_tok(TAG_START, 1);
+ expect_tok(TAG_NAME, 1);
+ expect_tok(ARG, 1);
+ expect_tok(TAG_START, 1);
+ expect_tok(OVR_END, 1);
+ expect_tok(TEXT, 4);
+ );
+}