diff --git a/aegisub/build/Aegisub/Aegisub.vcxproj b/aegisub/build/Aegisub/Aegisub.vcxproj
index a698646fb..92a653fd8 100644
--- a/aegisub/build/Aegisub/Aegisub.vcxproj
+++ b/aegisub/build/Aegisub/Aegisub.vcxproj
@@ -198,7 +198,6 @@
-
@@ -393,7 +392,6 @@
-
NotUsing
diff --git a/aegisub/build/Aegisub/Aegisub.vcxproj.filters b/aegisub/build/Aegisub/Aegisub.vcxproj.filters
index 5a7274c26..cb309864e 100644
--- a/aegisub/build/Aegisub/Aegisub.vcxproj.filters
+++ b/aegisub/build/Aegisub/Aegisub.vcxproj.filters
@@ -339,9 +339,6 @@
Video\UI
-
- Features\Karaoke copier
-
Subtitle formats
@@ -992,9 +989,6 @@
Video\UI
-
- Features\Karaoke copier
-
Features\Spell checker
diff --git a/aegisub/build/libaegisub/libaegisub.vcxproj b/aegisub/build/libaegisub/libaegisub.vcxproj
index 1911fbde8..9242f69ea 100644
--- a/aegisub/build/libaegisub/libaegisub.vcxproj
+++ b/aegisub/build/libaegisub/libaegisub.vcxproj
@@ -4,7 +4,6 @@
{BB3FED86-DB7A-4DC7-964A-260FB86CDE61}
libaegisub
-
lib
@@ -13,7 +12,6 @@
-
@@ -33,7 +31,6 @@
lagi_pre.h
-
@@ -60,6 +57,8 @@
+
+
@@ -101,6 +100,8 @@
+
+
diff --git a/aegisub/build/libaegisub/libaegisub.vcxproj.filters b/aegisub/build/libaegisub/libaegisub.vcxproj.filters
index e7d275f76..1c487e2cc 100644
--- a/aegisub/build/libaegisub/libaegisub.vcxproj.filters
+++ b/aegisub/build/libaegisub/libaegisub.vcxproj.filters
@@ -155,6 +155,12 @@
ASS
+
+ Header Files
+
+
+ Header Files
+
@@ -256,6 +262,12 @@
ASS
+
+ Source Files\Common
+
+
+ Source Files\Common
+
diff --git a/aegisub/libaegisub/Makefile b/aegisub/libaegisub/Makefile
index 633219cd1..6812c9fd7 100644
--- a/aegisub/libaegisub/Makefile
+++ b/aegisub/libaegisub/Makefile
@@ -28,6 +28,8 @@ SRC += \
common/hotkey.cpp \
common/io.cpp \
common/json.cpp \
+ common/kana_table.cpp \
+ common/karaoke_matcher.cpp \
common/keyframe.cpp \
common/log.cpp \
common/mru.cpp \
diff --git a/aegisub/libaegisub/common/kana_table.cpp b/aegisub/libaegisub/common/kana_table.cpp
new file mode 100644
index 000000000..e265d285d
--- /dev/null
+++ b/aegisub/libaegisub/common/kana_table.cpp
@@ -0,0 +1,622 @@
+// Copyright (c) 2013, Thomas Goyne
+//
+// Permission to use, copy, modify, and distribute this software for any
+// purpose with or without fee is hereby granted, provided that the above
+// copyright notice and this permission notice appear in all copies.
+//
+// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+//
+// Aegisub Project http://www.aegisub.org/
+
+#include "../config.h"
+
+#include "libaegisub/kana_table.h"
+
+#include
+
+namespace {
+agi::kana_pair kana_to_romaji[] = {
+ {"\xE3\x81\x81", "a"}, // ぁ
+ {"\xE3\x81\x82", "a"}, // あ
+ {"\xE3\x81\x83", "i"}, // ぃ
+ {"\xE3\x81\x84", "i"}, // い
+ {"\xE3\x81\x85", "u"}, // ぅ
+ {"\xE3\x81\x86", "u"}, // う
+ {"\xE3\x81\x87", "e"}, // ぇ
+ {"\xE3\x81\x88", "e"}, // え
+ {"\xE3\x81\x89", "o"}, // ぉ
+ {"\xE3\x81\x8A", "o"}, // お
+ {"\xE3\x81\x8B", "ka"}, // か
+ {"\xE3\x81\x8C", "ga"}, // が
+ {"\xE3\x81\x8D", "ki"}, // き
+ {"\xE3\x81\x8D\xE3\x82\x83", "kya"}, // きゃ
+ {"\xE3\x81\x8D\xE3\x82\x85", "kyu"}, // きゅ
+ {"\xE3\x81\x8D\xE3\x82\x87", "kyo"}, // きょ
+ {"\xE3\x81\x8E", "gi"}, // ぎ
+ {"\xE3\x81\x8E\xE3\x82\x83", "gya"}, // ぎゃ
+ {"\xE3\x81\x8E\xE3\x82\x85", "gyu"}, // ぎゅ
+ {"\xE3\x81\x8E\xE3\x82\x87", "gyo"}, // ぎょ
+ {"\xE3\x81\x8F", "ku"}, // く
+ {"\xE3\x81\x90", "gu"}, // ぐ
+ {"\xE3\x81\x91", "ke"}, // け
+ {"\xE3\x81\x92", "ge"}, // げ
+ {"\xE3\x81\x93", "ko"}, // こ
+ {"\xE3\x81\x94", "go"}, // ご
+ {"\xE3\x81\x95", "sa"}, // さ
+ {"\xE3\x81\x96", "za"}, // ざ
+ {"\xE3\x81\x97", "shi"}, // し
+ {"\xE3\x81\x97\xE3\x82\x83", "sha"}, // しゃ
+ {"\xE3\x81\x97\xE3\x82\x85", "shu"}, // しゅ
+ {"\xE3\x81\x97\xE3\x82\x87", "sho"}, // しょ
+ {"\xE3\x81\x98", "ji"}, // じ
+ {"\xE3\x81\x98\xE3\x82\x83", "ja"}, // じゃ
+ {"\xE3\x81\x98\xE3\x82\x85", "ju"}, // じゅ
+ {"\xE3\x81\x98\xE3\x82\x87", "jo"}, // じょ
+ {"\xE3\x81\x99", "su"}, // す
+ {"\xE3\x81\x9A", "zu"}, // ず
+ {"\xE3\x81\x9B", "se"}, // せ
+ {"\xE3\x81\x9C", "ze"}, // ぜ
+ {"\xE3\x81\x9D", "so"}, // そ
+ {"\xE3\x81\x9E", "zo"}, // ぞ
+ {"\xE3\x81\x9F", "ta"}, // た
+ {"\xE3\x81\xA0", "da"}, // だ
+ {"\xE3\x81\xA1", "chi"}, // ち
+ {"\xE3\x81\xA1\xE3\x82\x83", "cha"}, // ちゃ
+ {"\xE3\x81\xA1\xE3\x82\x85", "chu"}, // ちゅ
+ {"\xE3\x81\xA1\xE3\x82\x87", "cho"}, // ちょ
+ {"\xE3\x81\xA2", "ji"}, // ぢ
+ {"\xE3\x81\xA2\xE3\x82\x83", "ja"}, // ぢゃ
+ {"\xE3\x81\xA2\xE3\x82\x85", "ju"}, // ぢゅ
+ {"\xE3\x81\xA2\xE3\x82\x87", "jo"}, // ぢょ
+ {"\xE3\x81\xA3", "c"}, // っ
+ {"\xE3\x81\xA3", "k"}, // っ
+ {"\xE3\x81\xA3", "p"}, // っ
+ {"\xE3\x81\xA3", "s"}, // っ
+ {"\xE3\x81\xA3", "t"}, // っ
+ {"\xE3\x81\xA4", "tsu"}, // つ
+ {"\xE3\x81\xA5", "zu"}, // づ
+ {"\xE3\x81\xA6", "te"}, // て
+ {"\xE3\x81\xA7", "de"}, // で
+ {"\xE3\x81\xA8", "to"}, // と
+ {"\xE3\x81\xA9", "do"}, // ど
+ {"\xE3\x81\xAA", "na"}, // な
+ {"\xE3\x81\xAB", "ni"}, // に
+ {"\xE3\x81\xAB\xE3\x82\x83", "nya"}, // にゃ
+ {"\xE3\x81\xAB\xE3\x82\x85", "nyu"}, // にゅ
+ {"\xE3\x81\xAB\xE3\x82\x87", "nyo"}, // にょ
+ {"\xE3\x81\xAC", "nu"}, // ぬ
+ {"\xE3\x81\xAD", "ne"}, // ね
+ {"\xE3\x81\xAE", "no"}, // の
+ {"\xE3\x81\xAF", "ha"}, // は
+ {"\xE3\x81\xAF", "wa"}, // は
+ {"\xE3\x81\xB0", "ba"}, // ば
+ {"\xE3\x81\xB1", "pa"}, // ぱ
+ {"\xE3\x81\xB2", "hi"}, // ひ
+ {"\xE3\x81\xB2\xE3\x82\x83", "hya"}, // ひゃ
+ {"\xE3\x81\xB2\xE3\x82\x85", "hyu"}, // ひゅ
+ {"\xE3\x81\xB2\xE3\x82\x87", "hyo"}, // ひょ
+ {"\xE3\x81\xB3", "bi"}, // び
+ {"\xE3\x81\xB3\xE3\x82\x83", "bya"}, // びゃ
+ {"\xE3\x81\xB3\xE3\x82\x85", "byu"}, // びゅ
+ {"\xE3\x81\xB3\xE3\x82\x87", "byo"}, // びょ
+ {"\xE3\x81\xB4", "pi"}, // ぴ
+ {"\xE3\x81\xB4\xE3\x82\x83", "pya"}, // ぴゃ
+ {"\xE3\x81\xB4\xE3\x82\x85", "pyu"}, // ぴゅ
+ {"\xE3\x81\xB4\xE3\x82\x87", "pyo"}, // ぴょ
+ {"\xE3\x81\xB5", "fu"}, // ふ
+ {"\xE3\x81\xB6", "bu"}, // ぶ
+ {"\xE3\x81\xB7", "pu"}, // ぷ
+ {"\xE3\x81\xB8", "he"}, // へ
+ {"\xE3\x81\xB8", "e"}, // へ
+ {"\xE3\x81\xB9", "be"}, // べ
+ {"\xE3\x81\xBA", "pe"}, // ぺ
+ {"\xE3\x81\xBB", "ho"}, // ほ
+ {"\xE3\x81\xBC", "bo"}, // ぼ
+ {"\xE3\x81\xBD", "po"}, // ぽ
+ {"\xE3\x81\xBE", "ma"}, // ま
+ {"\xE3\x81\xBF", "mi"}, // み
+ {"\xE3\x81\xBF\xE3\x82\x83", "mya"}, // みゃ
+ {"\xE3\x81\xBF\xE3\x82\x85", "myu"}, // みゅ
+ {"\xE3\x81\xBF\xE3\x82\x87", "myo"}, // みょ
+ {"\xE3\x82\x80", "mu"}, // む
+ {"\xE3\x82\x81", "me"}, // め
+ {"\xE3\x82\x82", "mo"}, // も
+ {"\xE3\x82\x84", "ya"}, // や
+ {"\xE3\x82\x86", "yu"}, // ゆ
+ {"\xE3\x82\x88", "yo"}, // よ
+ {"\xE3\x82\x89", "ra"}, // ら
+ {"\xE3\x82\x8A", "ri"}, // り
+ {"\xE3\x82\x8A\xE3\x82\x83", "rya"}, // りゃ
+ {"\xE3\x82\x8A\xE3\x82\x85", "ryu"}, // りゅ
+ {"\xE3\x82\x8A\xE3\x82\x87", "ryo"}, // りょ
+ {"\xE3\x82\x8B", "ru"}, // る
+ {"\xE3\x82\x8C", "re"}, // れ
+ {"\xE3\x82\x8D", "ro"}, // ろ
+ {"\xE3\x82\x8F", "wa"}, // わ
+ {"\xE3\x82\x90", "wi"}, // ゐ
+ {"\xE3\x82\x91", "we"}, // ゑ
+ {"\xE3\x82\x92", "wo"}, // を
+ {"\xE3\x82\x93", "m"}, // ん
+ {"\xE3\x82\x93", "n"}, // ん
+ {"\xE3\x82\xA1", "a"}, // ァ
+ {"\xE3\x82\xA2", "a"}, // ア
+ {"\xE3\x82\xA3", "i"}, // ィ
+ {"\xE3\x82\xA4", "i"}, // イ
+ {"\xE3\x82\xA4\xE3\x82\xA7", "ye"}, // イェ
+ {"\xE3\x82\xA5", "u"}, // ゥ
+ {"\xE3\x82\xA6", "u"}, // ウ
+ {"\xE3\x82\xA6\xE3\x82\xA3", "wi"}, // ウィ
+ {"\xE3\x82\xA6\xE3\x82\xA7", "we"}, // ウェ
+ {"\xE3\x82\xA6\xE3\x82\xA9", "wo"}, // ウォ
+ {"\xE3\x82\xA7", "e"}, // ェ
+ {"\xE3\x82\xA8", "e"}, // エ
+ {"\xE3\x82\xA9", "o"}, // ォ
+ {"\xE3\x82\xAA", "o"}, // オ
+ {"\xE3\x82\xAB", "ka"}, // カ
+ {"\xE3\x82\xAC", "ga"}, // ガ
+ {"\xE3\x82\xAD", "ki"}, // キ
+ {"\xE3\x82\xAD\xE3\x83\xA3", "kya"}, // キャ
+ {"\xE3\x82\xAD\xE3\x83\xA5", "kyu"}, // キュ
+ {"\xE3\x82\xAD\xE3\x83\xA7", "kyo"}, // キョ
+ {"\xE3\x82\xAE", "gi"}, // ギ
+ {"\xE3\x82\xAE\xE3\x83\xA3", "gya"}, // ギャ
+ {"\xE3\x82\xAE\xE3\x83\xA5", "gyu"}, // ギュ
+ {"\xE3\x82\xAE\xE3\x83\xA7", "gyo"}, // ギョ
+ {"\xE3\x82\xAF", "ku"}, // ク
+ {"\xE3\x82\xB0", "gu"}, // グ
+ {"\xE3\x82\xB1", "ke"}, // ケ
+ {"\xE3\x82\xB2", "ge"}, // ゲ
+ {"\xE3\x82\xB3", "ko"}, // コ
+ {"\xE3\x82\xB4", "go"}, // ゴ
+ {"\xE3\x82\xB5", "sa"}, // サ
+ {"\xE3\x82\xB6", "za"}, // ザ
+ {"\xE3\x82\xB7", "shi"}, // シ
+ {"\xE3\x82\xB7\xE3\x82\xA7", "she"}, // シェ
+ {"\xE3\x82\xB7\xE3\x83\xA3", "sha"}, // シャ
+ {"\xE3\x82\xB7\xE3\x83\xA5", "shu"}, // シュ
+ {"\xE3\x82\xB7\xE3\x83\xA7", "sho"}, // ショ
+ {"\xE3\x82\xB8", "ji"}, // ジ
+ {"\xE3\x82\xB8\xE3\x82\xA7", "je"}, // ジェ
+ {"\xE3\x82\xB8\xE3\x83\xA3", "ja"}, // ジャ
+ {"\xE3\x82\xB8\xE3\x83\xA5", "ju"}, // ジュ
+ {"\xE3\x82\xB8\xE3\x83\xA7", "jo"}, // ジョ
+ {"\xE3\x82\xB9", "su"}, // ス
+ {"\xE3\x82\xBA", "zu"}, // ズ
+ {"\xE3\x82\xBB", "se"}, // セ
+ {"\xE3\x82\xBC", "ze"}, // ゼ
+ {"\xE3\x82\xBD", "so"}, // ソ
+ {"\xE3\x82\xBE", "zo"}, // ゾ
+ {"\xE3\x82\xBF", "ta"}, // タ
+ {"\xE3\x83\x80", "da"}, // ダ
+ {"\xE3\x83\x81", "chi"}, // チ
+ {"\xE3\x83\x81\xE3\x82\xA7", "che"}, // チェ
+ {"\xE3\x83\x81\xE3\x83\xA3", "cha"}, // チャ
+ {"\xE3\x83\x81\xE3\x83\xA5", "chu"}, // チュ
+ {"\xE3\x83\x81\xE3\x83\xA7", "cho"}, // チョ
+ {"\xE3\x83\x82", "ji"}, // ヂ
+ {"\xE3\x83\x82\xE3\x83\xA3", "ja"}, // ヂャ
+ {"\xE3\x83\x82\xE3\x83\xA5", "ju"}, // ヂュ
+ {"\xE3\x83\x82\xE3\x83\xA7", "jo"}, // ヂョ
+ {"\xE3\x83\x83", "c"}, // ッ
+ {"\xE3\x83\x83", "k"}, // ッ
+ {"\xE3\x83\x83", "p"}, // ッ
+ {"\xE3\x83\x83", "s"}, // ッ
+ {"\xE3\x83\x83", "t"}, // ッ
+ {"\xE3\x83\x84", "tsu"}, // ツ
+ {"\xE3\x83\x84\xE3\x82\xA1", "tsa"}, // ツァ
+ {"\xE3\x83\x84\xE3\x82\xA3", "tsi"}, // ツィ
+ {"\xE3\x83\x84\xE3\x82\xA7", "tse"}, // ツェ
+ {"\xE3\x83\x84\xE3\x82\xA9", "tso"}, // ツォ
+ {"\xE3\x83\x85", "zu"}, // ヅ
+ {"\xE3\x83\x86", "te"}, // テ
+ {"\xE3\x83\x86\xE3\x82\xA3", "ti"}, // ティ
+ {"\xE3\x83\x86\xE3\x82\xA5", "tu"}, // テゥ
+ {"\xE3\x83\x86\xE3\x83\xA5", "tyu"}, // テュ
+ {"\xE3\x83\x87", "de"}, // デ
+ {"\xE3\x83\x87\xE3\x82\xA3", "di"}, // ディ
+ {"\xE3\x83\x87\xE3\x82\xA5", "du"}, // デゥ
+ {"\xE3\x83\x87\xE3\x82\xA5", "dyu"}, // デゥ
+ {"\xE3\x83\x88", "to"}, // ト
+ {"\xE3\x83\x89", "do"}, // ド
+ {"\xE3\x83\x8A", "na"}, // ナ
+ {"\xE3\x83\x8B", "ni"}, // ニ
+ {"\xE3\x83\x8B\xE3\x83\xA3", "nya"}, // ニャ
+ {"\xE3\x83\x8B\xE3\x83\xA5", "nyu"}, // ニュ
+ {"\xE3\x83\x8B\xE3\x83\xA7", "nyo"}, // ニョ
+ {"\xE3\x83\x8C", "nu"}, // ヌ
+ {"\xE3\x83\x8D", "ne"}, // ネ
+ {"\xE3\x83\x8E", "no"}, // ノ
+ {"\xE3\x83\x8F", "ha"}, // ハ
+ {"\xE3\x83\x90", "ba"}, // バ
+ {"\xE3\x83\x91", "pa"}, // パ
+ {"\xE3\x83\x92", "hi"}, // ヒ
+ {"\xE3\x83\x92\xE3\x83\xA3", "hya"}, // ヒャ
+ {"\xE3\x83\x92\xE3\x83\xA5", "hyu"}, // ヒュ
+ {"\xE3\x83\x92\xE3\x83\xA7", "hyo"}, // ヒョ
+ {"\xE3\x83\x93", "bi"}, // ビ
+ {"\xE3\x83\x93\xE3\x83\xA3", "bya"}, // ビャ
+ {"\xE3\x83\x93\xE3\x83\xA5", "byu"}, // ビュ
+ {"\xE3\x83\x93\xE3\x83\xA7", "byo"}, // ビョ
+ {"\xE3\x83\x94", "pi"}, // ピ
+ {"\xE3\x83\x94\xE3\x83\xA3", "pya"}, // ピャ
+ {"\xE3\x83\x94\xE3\x83\xA5", "pyu"}, // ピュ
+ {"\xE3\x83\x94\xE3\x83\xA7", "pyo"}, // ピョ
+ {"\xE3\x83\x95", "fu"}, // フ
+ {"\xE3\x83\x95\xE3\x82\xA1", "fa"}, // ファ
+ {"\xE3\x83\x95\xE3\x82\xA3", "fi"}, // フィ
+ {"\xE3\x83\x95\xE3\x82\xA7", "fe"}, // フェ
+ {"\xE3\x83\x95\xE3\x82\xA9", "fo"}, // フォ
+ {"\xE3\x83\x95\xE3\x83\xA5", "fyu"}, // フュ
+ {"\xE3\x83\x96", "bu"}, // ブ
+ {"\xE3\x83\x97", "pu"}, // プ
+ {"\xE3\x83\x98", "he"}, // ヘ
+ {"\xE3\x83\x99", "be"}, // ベ
+ {"\xE3\x83\x9A", "pe"}, // ペ
+ {"\xE3\x83\x9B", "ho"}, // ホ
+ {"\xE3\x83\x9C", "bo"}, // ボ
+ {"\xE3\x83\x9D", "po"}, // ポ
+ {"\xE3\x83\x9E", "ma"}, // マ
+ {"\xE3\x83\x9F", "mi"}, // ミ
+ {"\xE3\x83\x9F\xE3\x83\xA3", "mya"}, // ミャ
+ {"\xE3\x83\x9F\xE3\x83\xA5", "myu"}, // ミュ
+ {"\xE3\x83\x9F\xE3\x83\xA7", "myo"}, // ミョ
+ {"\xE3\x83\xA0", "mu"}, // ム
+ {"\xE3\x83\xA1", "me"}, // メ
+ {"\xE3\x83\xA2", "mo"}, // モ
+ {"\xE3\x83\xA4", "ya"}, // ヤ
+ {"\xE3\x83\xA6", "yu"}, // ユ
+ {"\xE3\x83\xA8", "yo"}, // ヨ
+ {"\xE3\x83\xA9", "ra"}, // ラ
+ {"\xE3\x83\xAA", "ri"}, // リ
+ {"\xE3\x83\xAA\xE3\x83\xA3", "rya"}, // リャ
+ {"\xE3\x83\xAA\xE3\x83\xA5", "ryu"}, // リュ
+ {"\xE3\x83\xAA\xE3\x83\xA7", "ryo"}, // リョ
+ {"\xE3\x83\xAB", "ru"}, // ル
+ {"\xE3\x83\xAC", "re"}, // レ
+ {"\xE3\x83\xAD", "ro"}, // ロ
+ {"\xE3\x83\xAF", "wa"}, // ワ
+ {"\xE3\x83\xB0", "wi"}, // ヰ
+ {"\xE3\x83\xB1", "we"}, // ヱ
+ {"\xE3\x83\xB2", "wo"}, // ヲ
+ {"\xE3\x83\xB3", "m"}, // ン
+ {"\xE3\x83\xB3", "n"}, // ン
+ {"\xE3\x83\xB4", "vu"}, // ヴ
+ {"\xE3\x83\xB4\xE3\x82\xA1", "va"}, // ヴァ
+ {"\xE3\x83\xB4\xE3\x82\xA3", "vi"}, // ヴィ
+ {"\xE3\x83\xB4\xE3\x82\xA7", "ve"}, // ヴェ
+ {"\xE3\x83\xB4\xE3\x82\xA9", "vo"}, // ヴォ
+ {"\xE3\x83\xB4\xE3\x83\xA3", "vya"}, // ヴャ
+ {"\xE3\x83\xB4\xE3\x83\xA5", "vyu"}, // ヴュ
+ {"\xE3\x83\xB4\xE3\x83\xA7", "vyo"}, // ヴョ
+ {"\xE3\x83\xBC", "a"}, // ー
+ {"\xE3\x83\xBC", "e"}, // ー
+ {"\xE3\x83\xBC", "i"}, // ー
+ {"\xE3\x83\xBC", "o"}, // ー
+ {"\xE3\x83\xBC", "u"}, // ー
+};
+
+agi::kana_pair romaji_to_kana[] = {
+ {"\xE3\x81\x81", "a"}, // ぁ
+ {"\xE3\x81\x82", "a"}, // あ
+ {"\xE3\x82\xA1", "a"}, // ァ
+ {"\xE3\x82\xA2", "a"}, // ア
+ {"\xE3\x83\xBC", "a"}, // ー
+ {"\xE3\x81\xB0", "ba"}, // ば
+ {"\xE3\x83\x90", "ba"}, // バ
+ {"\xE3\x81\xB9", "be"}, // べ
+ {"\xE3\x83\x99", "be"}, // ベ
+ {"\xE3\x81\xB3", "bi"}, // び
+ {"\xE3\x83\x93", "bi"}, // ビ
+ {"\xE3\x81\xBC", "bo"}, // ぼ
+ {"\xE3\x83\x9C", "bo"}, // ボ
+ {"\xE3\x81\xB6", "bu"}, // ぶ
+ {"\xE3\x83\x96", "bu"}, // ブ
+ {"\xE3\x81\xB3\xE3\x82\x83", "bya"}, // びゃ
+ {"\xE3\x83\x93\xE3\x83\xA3", "bya"}, // ビャ
+ {"\xE3\x81\xB3\xE3\x82\x87", "byo"}, // びょ
+ {"\xE3\x83\x93\xE3\x83\xA7", "byo"}, // ビョ
+ {"\xE3\x81\xB3\xE3\x82\x85", "byu"}, // びゅ
+ {"\xE3\x83\x93\xE3\x83\xA5", "byu"}, // ビュ
+ {"\xE3\x81\xA3", "c"}, // っ
+ {"\xE3\x83\x83", "c"}, // ッ
+ {"\xE3\x81\xA1\xE3\x82\x83", "cha"}, // ちゃ
+ {"\xE3\x83\x81\xE3\x83\xA3", "cha"}, // チャ
+ {"\xE3\x83\x81\xE3\x82\xA7", "che"}, // チェ
+ {"\xE3\x81\xA1", "chi"}, // ち
+ {"\xE3\x83\x81", "chi"}, // チ
+ {"\xE3\x81\xA1\xE3\x82\x87", "cho"}, // ちょ
+ {"\xE3\x83\x81\xE3\x83\xA7", "cho"}, // チョ
+ {"\xE3\x81\xA1\xE3\x82\x85", "chu"}, // ちゅ
+ {"\xE3\x83\x81\xE3\x83\xA5", "chu"}, // チュ
+ {"\xE3\x81\xA0", "da"}, // だ
+ {"\xE3\x83\x80", "da"}, // ダ
+ {"\xE3\x81\xA7", "de"}, // で
+ {"\xE3\x83\x87", "de"}, // デ
+ {"\xE3\x83\x87\xE3\x82\xA3", "di"}, // ディ
+ {"\xE3\x81\xA9", "do"}, // ど
+ {"\xE3\x83\x89", "do"}, // ド
+ {"\xE3\x83\x87\xE3\x82\xA5", "du"}, // デゥ
+ {"\xE3\x83\x87\xE3\x82\xA5", "dyu"}, // デゥ
+ {"\xE3\x81\x87", "e"}, // ぇ
+ {"\xE3\x81\x88", "e"}, // え
+ {"\xE3\x82\xA7", "e"}, // ェ
+ {"\xE3\x82\xA8", "e"}, // エ
+ {"\xE3\x83\xBC", "e"}, // ー
+ {"\xE3\x83\x95\xE3\x82\xA1", "fa"}, // ファ
+ {"\xE3\x83\x95\xE3\x82\xA7", "fe"}, // フェ
+ {"\xE3\x83\x95\xE3\x82\xA3", "fi"}, // フィ
+ {"\xE3\x83\x95\xE3\x82\xA9", "fo"}, // フォ
+ {"\xE3\x81\xB5", "fu"}, // ふ
+ {"\xE3\x83\x95", "fu"}, // フ
+ {"\xE3\x83\x95\xE3\x83\xA5", "fyu"}, // フュ
+ {"\xE3\x81\x8C", "ga"}, // が
+ {"\xE3\x82\xAC", "ga"}, // ガ
+ {"\xE3\x81\x92", "ge"}, // げ
+ {"\xE3\x82\xB2", "ge"}, // ゲ
+ {"\xE3\x81\x8E", "gi"}, // ぎ
+ {"\xE3\x82\xAE", "gi"}, // ギ
+ {"\xE3\x81\x94", "go"}, // ご
+ {"\xE3\x82\xB4", "go"}, // ゴ
+ {"\xE3\x81\x90", "gu"}, // ぐ
+ {"\xE3\x82\xB0", "gu"}, // グ
+ {"\xE3\x81\x8E\xE3\x82\x83", "gya"}, // ぎゃ
+ {"\xE3\x82\xAE\xE3\x83\xA3", "gya"}, // ギャ
+ {"\xE3\x81\x8E\xE3\x82\x87", "gyo"}, // ぎょ
+ {"\xE3\x82\xAE\xE3\x83\xA7", "gyo"}, // ギョ
+ {"\xE3\x81\x8E\xE3\x82\x85", "gyu"}, // ぎゅ
+ {"\xE3\x82\xAE\xE3\x83\xA5", "gyu"}, // ギュ
+ {"\xE3\x81\xAF", "ha"}, // は
+ {"\xE3\x83\x8F", "ha"}, // ハ
+ {"\xE3\x81\xB8", "he"}, // へ
+ {"\xE3\x83\x98", "he"}, // ヘ
+ {"\xE3\x81\xB2", "hi"}, // ひ
+ {"\xE3\x83\x92", "hi"}, // ヒ
+ {"\xE3\x81\xBB", "ho"}, // ほ
+ {"\xE3\x83\x9B", "ho"}, // ホ
+ {"\xE3\x81\xB2\xE3\x82\x83", "hya"}, // ひゃ
+ {"\xE3\x83\x92\xE3\x83\xA3", "hya"}, // ヒャ
+ {"\xE3\x81\xB2\xE3\x82\x87", "hyo"}, // ひょ
+ {"\xE3\x83\x92\xE3\x83\xA7", "hyo"}, // ヒョ
+ {"\xE3\x81\xB2\xE3\x82\x85", "hyu"}, // ひゅ
+ {"\xE3\x83\x92\xE3\x83\xA5", "hyu"}, // ヒュ
+ {"\xE3\x81\x83", "i"}, // ぃ
+ {"\xE3\x81\x84", "i"}, // い
+ {"\xE3\x82\xA3", "i"}, // ィ
+ {"\xE3\x82\xA4", "i"}, // イ
+ {"\xE3\x83\xBC", "i"}, // ー
+ {"\xE3\x81\x98\xE3\x82\x83", "ja"}, // じゃ
+ {"\xE3\x81\xA2\xE3\x82\x83", "ja"}, // ぢゃ
+ {"\xE3\x82\xB8\xE3\x83\xA3", "ja"}, // ジャ
+ {"\xE3\x83\x82\xE3\x83\xA3", "ja"}, // ヂャ
+ {"\xE3\x82\xB8\xE3\x82\xA7", "je"}, // ジェ
+ {"\xE3\x81\x98", "ji"}, // じ
+ {"\xE3\x81\xA2", "ji"}, // ぢ
+ {"\xE3\x82\xB8", "ji"}, // ジ
+ {"\xE3\x83\x82", "ji"}, // ヂ
+ {"\xE3\x81\x98\xE3\x82\x87", "jo"}, // じょ
+ {"\xE3\x81\xA2\xE3\x82\x87", "jo"}, // ぢょ
+ {"\xE3\x82\xB8\xE3\x83\xA7", "jo"}, // ジョ
+ {"\xE3\x83\x82\xE3\x83\xA7", "jo"}, // ヂョ
+ {"\xE3\x81\x98\xE3\x82\x85", "ju"}, // じゅ
+ {"\xE3\x81\xA2\xE3\x82\x85", "ju"}, // ぢゅ
+ {"\xE3\x82\xB8\xE3\x83\xA5", "ju"}, // ジュ
+ {"\xE3\x83\x82\xE3\x83\xA5", "ju"}, // ヂュ
+ {"\xE3\x81\xA3", "k"}, // っ
+ {"\xE3\x83\x83", "k"}, // ッ
+ {"\xE3\x81\x8B", "ka"}, // か
+ {"\xE3\x82\xAB", "ka"}, // カ
+ {"\xE3\x81\x91", "ke"}, // け
+ {"\xE3\x82\xB1", "ke"}, // ケ
+ {"\xE3\x81\x8D", "ki"}, // き
+ {"\xE3\x82\xAD", "ki"}, // キ
+ {"\xE3\x81\x93", "ko"}, // こ
+ {"\xE3\x82\xB3", "ko"}, // コ
+ {"\xE3\x81\x8F", "ku"}, // く
+ {"\xE3\x82\xAF", "ku"}, // ク
+ {"\xE3\x81\x8D\xE3\x82\x83", "kya"}, // きゃ
+ {"\xE3\x82\xAD\xE3\x83\xA3", "kya"}, // キャ
+ {"\xE3\x81\x8D\xE3\x82\x87", "kyo"}, // きょ
+ {"\xE3\x82\xAD\xE3\x83\xA7", "kyo"}, // キョ
+ {"\xE3\x81\x8D\xE3\x82\x85", "kyu"}, // きゅ
+ {"\xE3\x82\xAD\xE3\x83\xA5", "kyu"}, // キュ
+ {"\xE3\x82\x93", "m"}, // ん
+ {"\xE3\x83\xB3", "m"}, // ン
+ {"\xE3\x81\xBE", "ma"}, // ま
+ {"\xE3\x83\x9E", "ma"}, // マ
+ {"\xE3\x82\x81", "me"}, // め
+ {"\xE3\x83\xA1", "me"}, // メ
+ {"\xE3\x81\xBF", "mi"}, // み
+ {"\xE3\x83\x9F", "mi"}, // ミ
+ {"\xE3\x82\x82", "mo"}, // も
+ {"\xE3\x83\xA2", "mo"}, // モ
+ {"\xE3\x82\x80", "mu"}, // む
+ {"\xE3\x83\xA0", "mu"}, // ム
+ {"\xE3\x81\xBF\xE3\x82\x83", "mya"}, // みゃ
+ {"\xE3\x83\x9F\xE3\x83\xA3", "mya"}, // ミャ
+ {"\xE3\x81\xBF\xE3\x82\x87", "myo"}, // みょ
+ {"\xE3\x83\x9F\xE3\x83\xA7", "myo"}, // ミョ
+ {"\xE3\x81\xBF\xE3\x82\x85", "myu"}, // みゅ
+ {"\xE3\x83\x9F\xE3\x83\xA5", "myu"}, // ミュ
+ {"\xE3\x82\x93", "n"}, // ん
+ {"\xE3\x83\xB3", "n"}, // ン
+ {"\xE3\x81\xAA", "na"}, // な
+ {"\xE3\x83\x8A", "na"}, // ナ
+ {"\xE3\x81\xAD", "ne"}, // ね
+ {"\xE3\x83\x8D", "ne"}, // ネ
+ {"\xE3\x81\xAB", "ni"}, // に
+ {"\xE3\x83\x8B", "ni"}, // ニ
+ {"\xE3\x81\xAE", "no"}, // の
+ {"\xE3\x83\x8E", "no"}, // ノ
+ {"\xE3\x81\xAC", "nu"}, // ぬ
+ {"\xE3\x83\x8C", "nu"}, // ヌ
+ {"\xE3\x81\xAB\xE3\x82\x83", "nya"}, // にゃ
+ {"\xE3\x83\x8B\xE3\x83\xA3", "nya"}, // ニャ
+ {"\xE3\x81\xAB\xE3\x82\x87", "nyo"}, // にょ
+ {"\xE3\x83\x8B\xE3\x83\xA7", "nyo"}, // ニョ
+ {"\xE3\x81\xAB\xE3\x82\x85", "nyu"}, // にゅ
+ {"\xE3\x83\x8B\xE3\x83\xA5", "nyu"}, // ニュ
+ {"\xE3\x81\x89", "o"}, // ぉ
+ {"\xE3\x81\x8A", "o"}, // お
+ {"\xE3\x82\xA9", "o"}, // ォ
+ {"\xE3\x82\xAA", "o"}, // オ
+ {"\xE3\x83\xBC", "o"}, // ー
+ {"\xE3\x81\xA3", "p"}, // っ
+ {"\xE3\x83\x83", "p"}, // ッ
+ {"\xE3\x81\xB1", "pa"}, // ぱ
+ {"\xE3\x83\x91", "pa"}, // パ
+ {"\xE3\x81\xBA", "pe"}, // ぺ
+ {"\xE3\x83\x9A", "pe"}, // ペ
+ {"\xE3\x81\xB4", "pi"}, // ぴ
+ {"\xE3\x83\x94", "pi"}, // ピ
+ {"\xE3\x81\xBD", "po"}, // ぽ
+ {"\xE3\x83\x9D", "po"}, // ポ
+ {"\xE3\x81\xB7", "pu"}, // ぷ
+ {"\xE3\x83\x97", "pu"}, // プ
+ {"\xE3\x81\xB4\xE3\x82\x83", "pya"}, // ぴゃ
+ {"\xE3\x83\x94\xE3\x83\xA3", "pya"}, // ピャ
+ {"\xE3\x81\xB4\xE3\x82\x87", "pyo"}, // ぴょ
+ {"\xE3\x83\x94\xE3\x83\xA7", "pyo"}, // ピョ
+ {"\xE3\x81\xB4\xE3\x82\x85", "pyu"}, // ぴゅ
+ {"\xE3\x83\x94\xE3\x83\xA5", "pyu"}, // ピュ
+ {"\xE3\x82\x89", "ra"}, // ら
+ {"\xE3\x83\xA9", "ra"}, // ラ
+ {"\xE3\x82\x8C", "re"}, // れ
+ {"\xE3\x83\xAC", "re"}, // レ
+ {"\xE3\x82\x8A", "ri"}, // り
+ {"\xE3\x83\xAA", "ri"}, // リ
+ {"\xE3\x82\x8D", "ro"}, // ろ
+ {"\xE3\x83\xAD", "ro"}, // ロ
+ {"\xE3\x82\x8B", "ru"}, // る
+ {"\xE3\x83\xAB", "ru"}, // ル
+ {"\xE3\x82\x8A\xE3\x82\x83", "rya"}, // りゃ
+ {"\xE3\x83\xAA\xE3\x83\xA3", "rya"}, // リャ
+ {"\xE3\x82\x8A\xE3\x82\x87", "ryo"}, // りょ
+ {"\xE3\x83\xAA\xE3\x83\xA7", "ryo"}, // リョ
+ {"\xE3\x82\x8A\xE3\x82\x85", "ryu"}, // りゅ
+ {"\xE3\x83\xAA\xE3\x83\xA5", "ryu"}, // リュ
+ {"\xE3\x81\xA3", "s"}, // っ
+ {"\xE3\x83\x83", "s"}, // ッ
+ {"\xE3\x81\x95", "sa"}, // さ
+ {"\xE3\x82\xB5", "sa"}, // サ
+ {"\xE3\x81\x9B", "se"}, // せ
+ {"\xE3\x82\xBB", "se"}, // セ
+ {"\xE3\x81\x97\xE3\x82\x83", "sha"}, // しゃ
+ {"\xE3\x82\xB7\xE3\x83\xA3", "sha"}, // シャ
+ {"\xE3\x82\xB7\xE3\x82\xA7", "she"}, // シェ
+ {"\xE3\x81\x97", "shi"}, // し
+ {"\xE3\x82\xB7", "shi"}, // シ
+ {"\xE3\x81\x97\xE3\x82\x87", "sho"}, // しょ
+ {"\xE3\x82\xB7\xE3\x83\xA7", "sho"}, // ショ
+ {"\xE3\x81\x97\xE3\x82\x85", "shu"}, // しゅ
+ {"\xE3\x82\xB7\xE3\x83\xA5", "shu"}, // シュ
+ {"\xE3\x81\x9D", "so"}, // そ
+ {"\xE3\x82\xBD", "so"}, // ソ
+ {"\xE3\x81\x99", "su"}, // す
+ {"\xE3\x82\xB9", "su"}, // ス
+ {"\xE3\x81\xA3", "t"}, // っ
+ {"\xE3\x83\x83", "t"}, // ッ
+ {"\xE3\x81\x9F", "ta"}, // た
+ {"\xE3\x82\xBF", "ta"}, // タ
+ {"\xE3\x81\xA6", "te"}, // て
+ {"\xE3\x83\x86", "te"}, // テ
+ {"\xE3\x83\x86\xE3\x82\xA3", "ti"}, // ティ
+ {"\xE3\x81\xA8", "to"}, // と
+ {"\xE3\x83\x88", "to"}, // ト
+ {"\xE3\x83\x84\xE3\x82\xA1", "tsa"}, // ツァ
+ {"\xE3\x83\x84\xE3\x82\xA7", "tse"}, // ツェ
+ {"\xE3\x83\x84\xE3\x82\xA3", "tsi"}, // ツィ
+ {"\xE3\x83\x84\xE3\x82\xA9", "tso"}, // ツォ
+ {"\xE3\x81\xA4", "tsu"}, // つ
+ {"\xE3\x83\x84", "tsu"}, // ツ
+ {"\xE3\x83\x86\xE3\x82\xA5", "tu"}, // テゥ
+ {"\xE3\x83\x86\xE3\x83\xA5", "tyu"}, // テュ
+ {"\xE3\x81\x85", "u"}, // ぅ
+ {"\xE3\x81\x86", "u"}, // う
+ {"\xE3\x82\xA5", "u"}, // ゥ
+ {"\xE3\x82\xA6", "u"}, // ウ
+ {"\xE3\x83\xBC", "u"}, // ー
+ {"\xE3\x83\xB4\xE3\x82\xA1", "va"}, // ヴァ
+ {"\xE3\x83\xB4\xE3\x82\xA7", "ve"}, // ヴェ
+ {"\xE3\x83\xB4\xE3\x82\xA3", "vi"}, // ヴィ
+ {"\xE3\x83\xB4\xE3\x82\xA9", "vo"}, // ヴォ
+ {"\xE3\x83\xB4", "vu"}, // ヴ
+ {"\xE3\x83\xB4\xE3\x83\xA3", "vya"}, // ヴャ
+ {"\xE3\x83\xB4\xE3\x83\xA7", "vyo"}, // ヴョ
+ {"\xE3\x83\xB4\xE3\x83\xA5", "vyu"}, // ヴュ
+ {"\xE3\x81\xAF", "wa"}, // は
+ {"\xE3\x82\x8F", "wa"}, // わ
+ {"\xE3\x83\xAF", "wa"}, // ワ
+ {"\xE3\x82\x91", "we"}, // ゑ
+ {"\xE3\x82\xA6\xE3\x82\xA7", "we"}, // ウェ
+ {"\xE3\x83\xB1", "we"}, // ヱ
+ {"\xE3\x82\x90", "wi"}, // ゐ
+ {"\xE3\x82\xA6\xE3\x82\xA3", "wi"}, // ウィ
+ {"\xE3\x83\xB0", "wi"}, // ヰ
+ {"\xE3\x82\x92", "wo"}, // を
+ {"\xE3\x82\xA6\xE3\x82\xA9", "wo"}, // ウォ
+ {"\xE3\x83\xB2", "wo"}, // ヲ
+ {"\xE3\x82\x84", "ya"}, // や
+ {"\xE3\x83\xA4", "ya"}, // ヤ
+ {"\xE3\x82\xA4\xE3\x82\xA7", "ye"}, // イェ
+ {"\xE3\x82\x88", "yo"}, // よ
+ {"\xE3\x83\xA8", "yo"}, // ヨ
+ {"\xE3\x82\x86", "yu"}, // ゆ
+ {"\xE3\x83\xA6", "yu"}, // ユ
+ {"\xE3\x81\x96", "za"}, // ざ
+ {"\xE3\x82\xB6", "za"}, // ザ
+ {"\xE3\x81\x9C", "ze"}, // ぜ
+ {"\xE3\x82\xBC", "ze"}, // ゼ
+ {"\xE3\x81\x9E", "zo"}, // ぞ
+ {"\xE3\x82\xBE", "zo"}, // ゾ
+ {"\xE3\x81\x9A", "zu"}, // ず
+ {"\xE3\x81\xA5", "zu"}, // づ
+ {"\xE3\x82\xBA", "zu"}, // ズ
+ {"\xE3\x83\x85", "zu"}, // ヅ
+};
+
+bool cmp_kana(agi::kana_pair const& kp, std::string const& kana) {
+ return strcmp(kp.kana, kana.c_str()) < 0;
+}
+
+struct cmp_romaji {
+ bool operator()(agi::kana_pair const& kp, std::string const& romaji) const {
+ return strcmp(kp.romaji, romaji.c_str()) < 0;
+ }
+ bool operator()(std::string const& romaji, agi::kana_pair const& kp) const {
+ return strcmp(kp.romaji, romaji.c_str()) > 0;
+ }
+
+#ifdef _MSC_VER // debug iterator stuff needs this overload
+ bool operator()(agi::kana_pair const& a, agi::kana_pair const& b) const {
+ return strcmp(a.romaji, b.romaji) < 0;
+ }
+#endif
+};
+
+}
+
+namespace agi {
+std::vector kana_to_romaji(std::string const& kana) {
+ std::vector ret;
+ for (auto pair = boost::lower_bound(::kana_to_romaji, kana, cmp_kana);
+ pair != std::end(::kana_to_romaji) && !strcmp(pair->kana, kana.c_str());
+ ++pair)
+ ret.push_back(pair->romaji);
+ return ret;
+}
+
+boost::iterator_range romaji_to_kana(std::string const& romaji) {
+ for (size_t len = std::min(3, romaji.size()); len > 0; --len) {
+ auto pair = boost::equal_range(::romaji_to_kana, romaji.substr(0, len).c_str(), cmp_romaji());
+ if (pair.first != pair.second)
+ return boost::make_iterator_range(pair.first, pair.second);
+ }
+ return boost::make_iterator_range(::romaji_to_kana, ::romaji_to_kana);
+}
+}
diff --git a/aegisub/libaegisub/common/karaoke_matcher.cpp b/aegisub/libaegisub/common/karaoke_matcher.cpp
new file mode 100644
index 000000000..68f2f3524
--- /dev/null
+++ b/aegisub/libaegisub/common/karaoke_matcher.cpp
@@ -0,0 +1,209 @@
+// Copyright (c) 2013, Thomas Goyne
+//
+// Permission to use, copy, modify, and distribute this software for any
+// purpose with or without fee is hereby granted, provided that the above
+// copyright notice and this permission notice appear in all copies.
+//
+// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+//
+// Aegisub Project http://www.aegisub.org/
+
+#include "../config.h"
+
+#include "libaegisub/karaoke_matcher.h"
+
+#include "libaegisub/kana_table.h"
+#include "libaegisub/util.h"
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+namespace {
+int32_t next_codepoint(const char *str, size_t *i) {
+ UChar32 c;
+ U8_NEXT_UNSAFE(str, *i, c);
+ return c;
+}
+
+bool is_whitespace(int32_t c) {
+ return !!u_isUWhiteSpace(c);
+}
+
+bool is_whitespace(std::string const& str) {
+ size_t i = 0;
+ while (auto c = next_codepoint(str.c_str(), &i)) {
+ if (!u_isUWhiteSpace(c))
+ return false;
+ }
+ return true;
+}
+
+// strcmp but ignoring case and accents
+int compare(std::string const& a, std::string const& b) {
+ using namespace boost::locale;
+ return std::use_facet>(std::locale()).compare(collator_base::primary, a, b);
+}
+
+}
+
+namespace agi {
+
+karaoke_match_result auto_match_karaoke(std::vector const& source_strings, std::string const& dest_string) {
+ karaoke_match_result result = { 0, 0 };
+ if (source_strings.empty()) return result;
+
+ using namespace boost::locale::boundary;
+ using boost::starts_with;
+
+ result.source_length = 1;
+ ssegment_index destination_characters(character, begin(dest_string), end(dest_string));
+ auto src = boost::to_lower_copy(source_strings[0]);
+ auto dst = destination_characters.begin();
+ auto dst_end = destination_characters.end();
+
+ // Eat all the whitespace at the beginning of the source and destination
+ // syllables and exit if either ran out.
+ auto eat_whitespace = [&]() -> bool {
+ size_t i = 0, first_non_whitespace = 0;
+ while (is_whitespace(next_codepoint(src.c_str(), &i)))
+ first_non_whitespace = i;
+ if (first_non_whitespace)
+ src = src.substr(first_non_whitespace);
+
+ while (dst != dst_end && is_whitespace(dst->str())) {
+ ++dst;
+ ++result.destination_length;
+ }
+
+ // If we ran out of dest then this needs to match the rest of the
+ // source syllables (this probably means the user did something wrong)
+ if (dst == dst_end) {
+ result.source_length = source_strings.size();
+ return true;
+ }
+
+ return src.empty();
+ };
+
+ if (eat_whitespace()) return result;
+
+ // We now have a non-whitespace character at the beginning of both source
+ // and destination. Check if the source starts with a romanized kana, and
+ // if it does then check if the destination also has the appropriate
+ // character. If it does, match them and repeat.
+ while (!src.empty()) {
+ // First check for a basic match of the first character of the source and dest
+ auto first_src_char = ssegment_index(character, begin(src), end(src)).begin()->str();
+ if (compare(first_src_char, dst->str()) == 0) {
+ ++dst;
+ ++result.destination_length;
+ src.erase(0, first_src_char.size());
+ if (eat_whitespace()) return result;
+ continue;
+ }
+
+ auto check = [&](kana_pair const& kp) -> bool {
+ if (!starts_with(&*dst->begin(), kp.kana)) return false;
+
+ src = src.substr(strlen(kp.romaji));
+ for (size_t i = 0; kp.kana[i]; ) {
+ i += dst->length();
+ ++result.destination_length;
+ ++dst;
+ }
+ return true;
+ };
+
+ bool matched = false;
+ for (auto const& match : romaji_to_kana(src)) {
+ if (check(match)) {
+ if (eat_whitespace()) return result;
+ matched = true;
+ break;
+ }
+ }
+ if (!matched) break;
+ }
+
+ // Source and dest are now non-empty and start with non-whitespace.
+ // If there's only one character left in the dest, it obviously needs to
+ // match all of the source syllables left.
+ if (std::distance(dst, dst_end) == 1) {
+ result.source_length = source_strings.size();
+ ++result.destination_length;
+ return result;
+ }
+
+ // We couldn't match the current character, but if we can match the *next*
+ // syllable then we know that everything in between must belong to the
+ // current syllable. Do this by looking up to KANA_SEARCH_DISTANCE
+ // characters ahead in destination and seeing if we can match them against
+ // the beginning of a syllable after this syllable.
+ // If a match is found, make a guess at how much source and destination
+ // should be selected based on the distances it was found at.
+
+ // The longest kanji are 'uketamawa.ru' and 'kokorozashi', each with a
+ // reading consisting of five kana. This means each each character from
+ // the destination can match at most five syllables from the source.
+ static const int max_character_length = 5;
+
+ // Arbitrarily chosen limit on the number of dest characters to try
+ // skipping. Higher numbers probably increase false-positives.
+ static const int dst_lookahead_max = 3;
+
+ for (size_t lookahead = 0; lookahead < dst_lookahead_max; ++lookahead) {
+ if (++dst == dst_end) break;
+
+ // Transliterate this character if it's a known hiragana or katakana character
+ std::vector translit;
+ auto next = std::next(dst);
+ if (next != dst_end)
+ boost::copy(kana_to_romaji(dst->str() + next->str()), back_inserter(translit));
+ boost::copy(kana_to_romaji(dst->str()), back_inserter(translit));
+
+ // Search for it and the transliterated version in the source
+ int src_lookahead_max = (lookahead + 1) * max_character_length;
+ int src_lookahead_pos = 0;
+ for (auto const& syl : source_strings) {
+ // Don't count blank syllables in the max search distance
+ if (is_whitespace(syl)) continue;
+ if (++src_lookahead_pos == 1) continue;
+ if (src_lookahead_pos > src_lookahead_max) break;
+
+ std::string lsyl = boost::to_lower_copy(syl);
+ if (!(starts_with(syl, dst->str()) || util::any_of(translit, [&](const char *str) { return starts_with(lsyl, str); })))
+ continue;
+
+ // The syllable immediately after the current one matched, so
+ // everything up to the match must go with the current syllable.
+ if (src_lookahead_pos == 2) {
+ result.destination_length += lookahead + 1;
+ return result;
+ }
+
+ // The match was multiple syllables ahead, so just divide the
+ // destination characters evenly between the source syllables
+ result.destination_length += 1;
+ result.source_length = static_cast((src_lookahead_pos - 1.0) / (lookahead + 1.0) + .5);
+ return result;
+ }
+ }
+
+ // We wouldn't have gotten here if the dest was empty, so make sure at
+ // least one character is selected
+ result.destination_length = std::max(result.destination_length, 1u);
+
+ return result;
+}
+}
diff --git a/aegisub/libaegisub/include/libaegisub/kana_table.h b/aegisub/libaegisub/include/libaegisub/kana_table.h
new file mode 100644
index 000000000..a826819be
--- /dev/null
+++ b/aegisub/libaegisub/include/libaegisub/kana_table.h
@@ -0,0 +1,30 @@
+// Copyright (c) 2013, Thomas Goyne
+//
+// Permission to use, copy, modify, and distribute this software for any
+// purpose with or without fee is hereby granted, provided that the above
+// copyright notice and this permission notice appear in all copies.
+//
+// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+//
+// Aegisub Project http://www.aegisub.org/
+
+#include
+#include
+
+namespace agi {
+ struct kana_pair {
+ const char *kana;
+ const char *romaji;
+ };
+
+ /// Transliterated romaji for the given kana, or nullptr if not applicable
+ std::vector kana_to_romaji(std::string const& kana);
+
+ boost::iterator_range romaji_to_kana(std::string const& romaji);
+}
diff --git a/aegisub/libaegisub/include/libaegisub/karaoke_matcher.h b/aegisub/libaegisub/include/libaegisub/karaoke_matcher.h
new file mode 100644
index 000000000..527cac18f
--- /dev/null
+++ b/aegisub/libaegisub/include/libaegisub/karaoke_matcher.h
@@ -0,0 +1,30 @@
+// Copyright (c) 2013, Thomas Goyne
+//
+// Permission to use, copy, modify, and distribute this software for any
+// purpose with or without fee is hereby granted, provided that the above
+// copyright notice and this permission notice appear in all copies.
+//
+// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+//
+// Aegisub Project http://www.aegisub.org/
+
+#include
+#include
+
+namespace agi {
+ struct karaoke_match_result {
+ /// The number of strings in the source matched
+ size_t source_length;
+ /// The number of characters in the destination string matched
+ size_t destination_length;
+ };
+
+ /// Try to automatically select the portion of dst which corresponds to the first string in src
+ karaoke_match_result auto_match_karaoke(std::vector const& src, std::string const& dst);
+}
diff --git a/aegisub/libaegisub/include/libaegisub/util.h b/aegisub/libaegisub/include/libaegisub/util.h
index 26dd2970d..32b6a56ba 100644
--- a/aegisub/libaegisub/include/libaegisub/util.h
+++ b/aegisub/libaegisub/include/libaegisub/util.h
@@ -75,5 +75,11 @@ namespace agi {
/// elsewhere (because libstcc++ 4.7 is missing it).
void sleep_for(int ms);
+ // boost.range doesn't have wrappers for the C++11 stuff
+ template
+ bool any_of(Range&& r, Predicate&& p) {
+ return std::any_of(std::begin(r), std::end(r), std::forward(p));
+ }
+
} // namespace util
} // namespace agi
diff --git a/aegisub/src/Makefile b/aegisub/src/Makefile
index 6782a83c4..19c8d128e 100644
--- a/aegisub/src/Makefile
+++ b/aegisub/src/Makefile
@@ -194,7 +194,6 @@ SRC += \
hotkey.cpp \
hotkey_data_view_model.cpp \
initial_line_state.cpp \
- kana_table.cpp \
lpeg.c \
main.cpp \
menu.cpp \
diff --git a/aegisub/src/dialog_kara_timing_copy.cpp b/aegisub/src/dialog_kara_timing_copy.cpp
index 71b53b8f2..877e6bc40 100644
--- a/aegisub/src/dialog_kara_timing_copy.cpp
+++ b/aegisub/src/dialog_kara_timing_copy.cpp
@@ -43,12 +43,17 @@
#include "compat.h"
#include "help_button.h"
#include "include/aegisub/context.h"
-#include "kana_table.h"
#include "libresrc/libresrc.h"
#include "options.h"
#include "selection_controller.h"
#include "utils.h"
+#include
+
+#include
+#include
+#include
+#include
#include
#include
@@ -76,12 +81,13 @@ class KaraokeLineMatchDisplay : public wxControl {
std::vector matched_groups;
std::deque unmatched_source;
- std::string unmatched_destination;
+ std::string destination_str;
+ boost::locale::boundary::ssegment_index destination;
+ boost::locale::boundary::ssegment_index::iterator match_begin, match_end;
int last_total_matchgroup_render_width;
size_t source_sel_length;
- size_t destination_sel_length;
void OnPaint(wxPaintEvent &event);
@@ -96,7 +102,7 @@ public:
/// Number of syllables not yet matched from source
size_t GetRemainingSource() const { return unmatched_source.size(); }
/// Number of characters not yet matched from destination
- size_t GetRemainingDestination() const { return unmatched_destination.size(); }
+ size_t GetRemainingDestination() const { return distance(match_end, destination.end()); }
// Adjust source and destination match lengths
void IncreaseSourceMatch();
@@ -147,7 +153,7 @@ wxSize KaraokeLineMatchDisplay::GetBestSize() const
return wxSize(min_width * 2, h_src + h_dst + 7);
}
-int DrawBoxedText(wxDC &dc, const std::string &txt, int x, int y)
+int DrawBoxedText(wxDC &dc, wxString const& txt, int x, int y)
{
int tw, th;
// Assume the pen, brush and font properties have already been set in the DC.
@@ -164,10 +170,9 @@ int DrawBoxedText(wxDC &dc, const std::string &txt, int x, int y)
}
else
{
- wxString wxtxt(to_wx(txt));
- dc.GetTextExtent(wxtxt, &tw, &th);
+ dc.GetTextExtent(txt, &tw, &th);
dc.DrawRectangle(x, y-2, tw+4, th+4);
- dc.DrawText(wxtxt, x+2, y);
+ dc.DrawText(txt, x+2, y);
return tw+3;
}
}
@@ -256,11 +261,11 @@ void KaraokeLineMatchDisplay::OnPaint(wxPaintEvent &)
// Matched source syllables
int syl_x = next_x;
for (auto const& syl : grp.src)
- syl_x += DrawBoxedText(dc, syl.text, syl_x, y_line1);
+ syl_x += DrawBoxedText(dc, to_wx(syl.text), syl_x, y_line1);
// Matched destination text
{
- const int adv = DrawBoxedText(dc, grp.dst, next_x, y_line2);
+ const int adv = DrawBoxedText(dc, to_wx(grp.dst), next_x, y_line2);
// Adjust next_x here while we have the text_w
next_x = syl_x > next_x + adv ? syl_x : next_x + adv;
@@ -292,24 +297,30 @@ void KaraokeLineMatchDisplay::OnPaint(wxPaintEvent &)
dc.SetBrush(wxBrush(inner_back));
}
- syl_x += DrawBoxedText(dc, unmatched_source[j].text, syl_x, y_line1);
+ syl_x += DrawBoxedText(dc, to_wx(unmatched_source[j].text), syl_x, y_line1);
}
// Remaining destination
- if (!unmatched_destination.empty())
+ if (match_begin != match_end)
{
dc.SetTextBackground(sel_back);
dc.SetTextForeground(sel_text);
dc.SetBrush(wxBrush(sel_back));
- next_x += DrawBoxedText(dc, unmatched_destination.substr(0, destination_sel_length), next_x, y_line2);
+ wxString str;
+ for (auto it = match_begin; it != match_end; ++it)
+ str += to_wx(it->str());
+ next_x += DrawBoxedText(dc, str, next_x, y_line2);
}
- if (destination_sel_length < unmatched_destination.size())
+ if (match_end != destination.end())
{
dc.SetTextBackground(inner_back);
dc.SetTextForeground(inner_text);
dc.SetBrush(wxBrush(inner_back));
- DrawBoxedText(dc, unmatched_destination.substr(destination_sel_length), next_x, y_line2);
+ wxString str;
+ for (auto it = match_end; it != destination.end(); ++it)
+ str += to_wx(it->str());
+ DrawBoxedText(dc, str, next_x, y_line2);
}
}
@@ -328,8 +339,12 @@ void KaraokeLineMatchDisplay::SetInputData(AssDialogue *src, AssDialogue *dst)
source_sel_length = 1;
}
- unmatched_destination = dst ? dst->GetStrippedText() : "";
- destination_sel_length = std::max(1, unmatched_destination.size());
+ destination_str = dst ? dst->GetStrippedText() : "";
+ using namespace boost::locale::boundary;
+ destination = ssegment_index(character, begin(destination_str), end(destination_str));
+ match_begin = match_end = destination.begin();
+ if (!destination_str.empty())
+ ++match_end;
Refresh(true);
}
@@ -363,182 +378,34 @@ void KaraokeLineMatchDisplay::DecreaseSourceMatch()
void KaraokeLineMatchDisplay::IncreseDestinationMatch()
{
- destination_sel_length = std::min(destination_sel_length + 1, GetRemainingDestination());
- Refresh(true);
+ if (match_end != destination.end()) {
+ ++match_end;
+ Refresh(true);
+ }
}
void KaraokeLineMatchDisplay::DecreaseDestinationMatch()
{
- destination_sel_length = std::max(destination_sel_length, 1) - 1;
- Refresh(true);
+ if (match_end != match_begin) {
+ --match_end;
+ Refresh(true);
+ }
}
-/// Kana interpolation, in characters, unset to disable
-#define KANA_SEARCH_DISTANCE 3
-
void KaraokeLineMatchDisplay::AutoMatchJapanese()
{
- if (unmatched_source.size() < 1) return;
-
- // Quick escape: If there's no destination left, take all remaining source.
- // (Usually this means the user made a mistake.)
- if (unmatched_destination.empty())
- {
- source_sel_length = unmatched_source.size();
- destination_sel_length = 0;
- return;
- }
-
- // We'll first see if we can do something with the first unmatched source syllable
- wxString src(to_wx(unmatched_source[0].text).Lower());
- wxString dst(to_wx(unmatched_destination));
- source_sel_length = 1; // we're working on the first, assume it was matched
- destination_sel_length = 0;
-
- // Quick escape: If the source syllable is empty, return with first source syllable and empty destination
- if (src.empty()) return;
-
- // Try to match the next source syllable against the destination. Do it
- // "inverted": try all kana from the table and prefix-match them against
- // the destination, then if it matches a prefix, try to match the hepburn
- // for it agast the source; eat if it matches. Keep trying to match as
- // long as there's text left in the source syllable or matching fails.
- while (src.size() > 0)
- {
- wxString dst_hira_rest, dst_kata_rest, src_rest;
- bool matched = false;
- for (const KanaEntry *ke = KanaTable; ke->hiragana; ++ke)
- {
- if (src.StartsWith(ke->hepburn, &src_rest))
- {
- bool hira_matches = dst.StartsWith(ke->hiragana, &dst_hira_rest) && *ke->hiragana;
- bool kata_matches = dst.StartsWith(ke->katakana, &dst_kata_rest);
-
- if (hira_matches || kata_matches)
- {
- matched = true;
- src = src_rest;
- dst = hira_matches ? dst_hira_rest : dst_kata_rest;
- destination_sel_length += wcslen(hira_matches ? ke->hiragana : ke->katakana);
- break;
- }
- }
- }
- if (!matched) break;
- }
-
- // The source might be empty now: That's good!
- // That means we managed to match it all against destination text
- if (src.empty()) return;
- // destination_sel_length already has the appropriate value
- // and source_sel_length was already 1
-
- // Now the source syllable might consist of just whitespace.
- // Eat all whitespace at the start of the destination.
- if (StringEmptyOrWhitespace(src))
- {
- wxString str(to_wx(unmatched_destination.substr(destination_sel_length)));
- destination_sel_length += std::distance(str.begin(), std::find_if_not(str.begin(), str.end(), IsWhitespace));
- // Now we've eaten all spaces in the destination as well
- // so the selection lengths should be good
- return;
- }
-
- // If there's just one character left in the destination at this point,
- // (and the source doesn't begin with space syllables, see test above)
- // assume it's safe to take all remaining source to match the single
- // remaining destination.
- if (unmatched_destination.size() == 1)
- {
- source_sel_length = unmatched_source.size();
- destination_sel_length = 1;
- return;
- }
-
-#ifdef KANA_SEARCH_DISTANCE
- // Try to look up to KANA_SEARCH_DISTANCE characters ahead in destination,
- // see if any of those are recognised kana. If there are any within the
- // range, see if it matches a following syllable, at most 5 source
- // syllables per character in source we're ahead.
- // The number 5 comes from the kanji with the longest readings:
- // 'uketamawa.ru' and 'kokorozashi' which each have a reading consisting of
- // five kana.
- // Only match the found kana in destination against the beginning of source
- // syllables, not the middle of them.
- // If a match is found, make a guess at how much source and destination
- // should be selected based on the distances it was found at.
- dst = to_wx(unmatched_destination);
- for (size_t lookahead = 0; lookahead < KANA_SEARCH_DISTANCE; ++lookahead)
- {
- // Eat dst at the beginning, don't test for the first character being kana
- dst = dst.Mid(1);
- // Find a position where hiragana or katakana matches
- wxString matched_roma;
- wxString matched_kana;
- for (const KanaEntry *ke = KanaTable; ke->hiragana; ++ke)
- {
- if (*ke->hiragana && dst.StartsWith(ke->hiragana))
- {
- matched_roma = ke->hepburn;
- matched_kana = ke->hiragana;
- break;
- }
- if (*ke->katakana && dst.StartsWith(ke->katakana))
- {
- matched_roma = ke->hepburn;
- matched_kana = ke->katakana;
- break;
- }
- }
- // If we didn't match any kana against dst, move to next char in dst
- if (!matched_kana)
- continue;
- // Otherwise look for a match for the romaji
- // For the magic number 5, see big comment block above
- int src_lookahead_max = (lookahead+1)*5;
- int src_lookahead_pos = 0;
- for (auto const& syl : unmatched_source)
- {
- // Check if we've gone too far ahead in the source
- if (src_lookahead_pos++ >= src_lookahead_max) break;
- // Otherwise look for a match
- if (to_wx(syl.text).StartsWith(matched_roma))
- {
- // Yay! Time to interpolate.
- // Special case: If the last source syllable before the matching one is
- // empty or contains just whitespace, don't include that one.
- if (src_lookahead_pos > 1 && StringEmptyOrWhitespace(to_wx(unmatched_source[src_lookahead_pos-2].text)))
- src_lookahead_pos -= 1;
- // Special case: Just one source syllable matching, pick all destination found
- if (src_lookahead_pos == 2)
- {
- source_sel_length = 1;
- destination_sel_length = lookahead+1;
- return;
- }
- // Otherwise try to split the eaten source syllables evenly between the eaten
- // destination characters, and do a regular rounding.
- float src_per_dst = (float)(src_lookahead_pos-1)/(float)(lookahead+1);
- source_sel_length = (int)(src_per_dst + 0.5);
- destination_sel_length = 1;
- return;
- }
- }
- }
-#endif
-
- // Okay so we didn't match anything. Aww.
- // Just fail...
- // We know from earlier that we do have both some source and some destination.
- source_sel_length = 1;
- destination_sel_length = 1;
- return;
+ std::vector source;
+ for (auto const& syl : unmatched_source)
+ source.emplace_back(syl.text);
+ auto result = agi::auto_match_karaoke(source, match_begin == destination.end() ? "" : &*match_begin->begin());
+ source_sel_length = result.source_length;
+ match_end = std::next(match_begin, result.destination_length);
}
bool KaraokeLineMatchDisplay::AcceptMatch()
{
// Completely empty match
- if (source_sel_length == 0 && destination_sel_length == 0) return false;
+ if (source_sel_length == 0 && match_begin == match_end) return false;
MatchGroup match;
@@ -547,10 +414,8 @@ bool KaraokeLineMatchDisplay::AcceptMatch()
unmatched_source.erase(unmatched_source.begin(), unmatched_source.begin() + source_sel_length);
source_sel_length = 0;
- assert(destination_sel_length <= unmatched_destination.size());
- match.dst = unmatched_destination.substr(0, destination_sel_length);
- unmatched_destination.erase(0, destination_sel_length);
- destination_sel_length = 0;
+ match.dst = std::string(match_begin->begin(), match_end == destination.end() ? destination_str.end() : match_end->begin());
+ match_begin = match_end;
matched_groups.emplace_back(std::move(match));
@@ -569,12 +434,12 @@ bool KaraokeLineMatchDisplay::UndoMatch()
MatchGroup &group = matched_groups.back();
source_sel_length = group.src.size();
- destination_sel_length = group.dst.size();
-
copy(group.src.rbegin(), group.src.rend(), front_inserter(unmatched_source));
group.src.clear();
- unmatched_destination = group.dst + unmatched_destination;
+ match_end = match_begin;
+ for (size_t size = group.dst.size(); size > 0; size -= match_begin->length())
+ --match_begin;
matched_groups.pop_back();
diff --git a/aegisub/src/kana_table.cpp b/aegisub/src/kana_table.cpp
deleted file mode 100644
index a05df5a0c..000000000
--- a/aegisub/src/kana_table.cpp
+++ /dev/null
@@ -1,262 +0,0 @@
-// Copyright (c) 2006, Rodrigo Braz Monteiro
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are met:
-//
-// * Redistributions of source code must retain the above copyright notice,
-// this list of conditions and the following disclaimer.
-// * Redistributions in binary form must reproduce the above copyright notice,
-// this list of conditions and the following disclaimer in the documentation
-// and/or other materials provided with the distribution.
-// * Neither the name of the Aegisub Group nor the names of its contributors
-// may be used to endorse or promote products derived from this software
-// without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-// POSSIBILITY OF SUCH DAMAGE.
-//
-// Aegisub Project http://www.aegisub.org/
-
-/// @file kana_table.cpp
-/// @brief Data about the Japanese kana syllabary used by kanji karaoke timing copying
-/// @ingroup kara_timing_copy
-///
-
-
-#include "config.h"
-
-#include "kana_table.h"
-
-const KanaEntry KanaTable[] =
-{
- // Regular kana usage and combinations
- { L"\u3042", L"\u30a2", L"a" },
- { L"\u3044", L"\u30a4", L"i" },
- { L"\u3046", L"\u30a6", L"u" },
- { L"\u3048", L"\u30a8", L"e" },
- { L"\u304a", L"\u30aa", L"o" },
-
- { L"\u304b", L"\u30ab", L"ka" },
- { L"\u304d", L"\u30ad", L"ki" },
- { L"\u304f", L"\u30af", L"ku" },
- { L"\u3051", L"\u30b1", L"ke" },
- { L"\u3053", L"\u30b3", L"ko" },
-
- { L"\u3055", L"\u30b5", L"sa" },
- { L"\u3057", L"\u30b7", L"shi" },
- { L"\u3059", L"\u30b9", L"su" },
- { L"\u305b", L"\u30bb", L"se" },
- { L"\u305d", L"\u30bd", L"so" },
-
- { L"\u305f", L"\u30bf", L"ta" },
- { L"\u3061", L"\u30c1", L"chi" },
- { L"\u3064", L"\u30c4", L"tsu" },
- { L"\u3066", L"\u30c6", L"te" },
- { L"\u3068", L"\u30c8", L"to" },
-
- { L"\u306a", L"\u30ca", L"na" },
- { L"\u306b", L"\u30cb", L"ni" },
- { L"\u306c", L"\u30cc", L"nu" },
- { L"\u306d", L"\u30cd", L"ne" },
- { L"\u306e", L"\u30ce", L"no" },
-
- { L"\u306f", L"\u30cf", L"ha" },
- { L"\u3072", L"\u30d2", L"hi" },
- { L"\u3075", L"\u30d5", L"fu" },
- { L"\u3078", L"\u30d8", L"he" },
- { L"\u307b", L"\u30db", L"ho" },
-
- { L"\u307e", L"\u30de", L"ma" },
- { L"\u307f", L"\u30df", L"mi" },
- { L"\u3080", L"\u30e0", L"mu" },
- { L"\u3081", L"\u30e1", L"me" },
- { L"\u3082", L"\u30e2", L"mo" },
-
- { L"\u3084", L"\u30e4", L"ya" },
- { L"\u3086", L"\u30e6", L"yu" },
- { L"\u3088", L"\u30e8", L"yo" },
-
- { L"\u3089", L"\u30e9", L"ra" },
- { L"\u308a", L"\u30ea", L"ri" },
- { L"\u308b", L"\u30eb", L"ru" },
- { L"\u308c", L"\u30ec", L"re" },
- { L"\u308d", L"\u30ed", L"ro" },
-
- { L"\u308f", L"\u30ef", L"wa" },
- { L"\u3090", L"\u30f0", L"wi" },
- { L"\u3091", L"\u30f1", L"we" },
- { L"\u3092", L"\u30f2", L"wo" },
-
- { L"\u304c", L"\u30ac", L"ga" },
- { L"\u304e", L"\u30ae", L"gi" },
- { L"\u3050", L"\u30b0", L"gu" },
- { L"\u3052", L"\u30b2", L"ge" },
- { L"\u3054", L"\u30b4", L"go" },
-
- { L"\u3056", L"\u30b6", L"za" },
- { L"\u3058", L"\u30b8", L"ji" },
- { L"\u305a", L"\u30ba", L"zu" },
- { L"\u305c", L"\u30bc", L"ze" },
- { L"\u305e", L"\u30be", L"zo" },
-
- { L"\u3060", L"\u30c0", L"da" },
- { L"\u3062", L"\u30c2", L"ji" },
- { L"\u3065", L"\u30c5", L"zu" },
- { L"\u3067", L"\u30c7", L"de" },
- { L"\u3069", L"\u30c9", L"do" },
-
- { L"\u3070", L"\u30d0", L"ba" },
- { L"\u3073", L"\u30d3", L"bi" },
- { L"\u3076", L"\u30d6", L"bu" },
- { L"\u3079", L"\u30d9", L"be" },
- { L"\u307c", L"\u30dc", L"bo" },
-
- { L"\u3071", L"\u30d1", L"pa" },
- { L"\u3074", L"\u30d4", L"pi" },
- { L"\u3077", L"\u30d7", L"pu" },
- { L"\u307a", L"\u30da", L"pe" },
- { L"\u307d", L"\u30dd", L"po" },
-
- { L"\u304d\u3083", L"\u30ad\u30e3", L"kya" },
- { L"\u304d\u3085", L"\u30ad\u30e5", L"kyu" },
- { L"\u304d\u3087", L"\u30ad\u30e7", L"kyo" },
-
- { L"\u3057\u3083", L"\u30b7\u30e3", L"sha" },
- { L"\u3057\u3085", L"\u30b7\u30e5", L"shu" },
- { L"\u3057\u3087", L"\u30b7\u30e7", L"sho" },
-
- { L"\u3061\u3083", L"\u30c1\u30e3", L"cha" },
- { L"\u3061\u3085", L"\u30c1\u30e5", L"chu" },
- { L"\u3061\u3087", L"\u30c1\u30e7", L"cho" },
-
- { L"\u306b\u3083", L"\u30cb\u30e3", L"nya" },
- { L"\u306b\u3085", L"\u30cb\u30e5", L"nyu" },
- { L"\u306b\u3087", L"\u30cb\u30e7", L"nyo" },
-
- { L"\u3072\u3083", L"\u30d2\u30e3", L"hya" },
- { L"\u3072\u3085", L"\u30d2\u30e5", L"hyu" },
- { L"\u3072\u3087", L"\u30d2\u30e7", L"hyo" },
-
- { L"\u307f\u3083", L"\u30df\u30e3", L"mya" },
- { L"\u307f\u3085", L"\u30df\u30e5", L"myu" },
- { L"\u307f\u3087", L"\u30df\u30e7", L"myo" },
-
- { L"\u308a\u3083", L"\u30ea\u30e3", L"rya" },
- { L"\u308a\u3085", L"\u30ea\u30e5", L"ryu" },
- { L"\u308a\u3087", L"\u30ea\u30e7", L"ryo" },
-
- { L"\u304e\u3083", L"\u30ae\u30e3", L"gya" },
- { L"\u304e\u3085", L"\u30ae\u30e5", L"gyu" },
- { L"\u304e\u3087", L"\u30ae\u30e7", L"gyo" },
-
- { L"\u3058\u3083", L"\u30b8\u30e3", L"ja" },
- { L"\u3058\u3085", L"\u30b8\u30e5", L"ju" },
- { L"\u3058\u3087", L"\u30b8\u30e7", L"jo" },
-
- { L"\u3062\u3083", L"\u30c2\u30e3", L"ja" },
- { L"\u3062\u3085", L"\u30c2\u30e5", L"ju" },
- { L"\u3062\u3087", L"\u30c2\u30e7", L"jo" },
-
- { L"\u3073\u3083", L"\u30d3\u30e3", L"bya" },
- { L"\u3073\u3085", L"\u30d3\u30e5", L"byu" },
- { L"\u3073\u3087", L"\u30d3\u30e7", L"byo" },
-
- { L"\u3074\u3083", L"\u30d4\u30e3", L"pya" },
- { L"\u3074\u3085", L"\u30d4\u30e5", L"pyu" },
- { L"\u3074\u3087", L"\u30d4\u30e7", L"pyo" },
-
-
- // Specialty katakana usage for loan words
-
- // Katakana fu + small vowel
- { L"", L"\u30d5\u30a1", L"fa" },
- { L"", L"\u30d5\u30a3", L"fi" },
- { L"", L"\u30d5\u30a7", L"fe" },
- { L"", L"\u30d5\u30a9", L"fo" },
-
- // Katakana vu + small vowel
- { L"", L"\u30f4\u30a1", L"va" },
- { L"", L"\u30f4\u30a3", L"vi" },
- { L"", L"\u30f4", L"vu" },
- { L"", L"\u30f4\u30a7", L"ve" },
- { L"", L"\u30f4\u30a9", L"vo" },
-
- // Katakana fu + small yu
- { L"", L"\u30d5\u30e5", L"fyu" },
-
- // Katakana i + little e
- { L"", L"\u30a4\u30a7", L"ye" },
-
- // Katakana u + little vowels
- { L"", L"\u30a6\u30a3", L"wi" },
- { L"", L"\u30a6\u30a7", L"we" },
- { L"", L"\u30a6\u30a9", L"wo" },
-
- // Katakana vu + small ya-yu-yo
- { L"", L"\u30f4\u30e3", L"vya" },
- { L"", L"\u30f4\u30e5", L"vyu" },
- { L"", L"\u30f4\u30e7", L"vyo" },
-
- // Katakana shi-ji-chi + small e
- { L"", L"\u30b7\u30a7", L"she" },
- { L"", L"\u30b8\u30a7", L"je" },
- { L"", L"\u30c1\u30a7", L"che" },
-
- // Katakana de + small i-u-yu
- { L"", L"\u30c6\u30a3", L"ti" },
- { L"", L"\u30c6\u30a5", L"tu" },
- { L"", L"\u30c6\u30e5", L"tyu" },
-
- // Katakana de + small i-u-yu
- { L"", L"\u30c7\u30a3", L"di" },
- { L"", L"\u30c7\u30a5", L"du" },
- { L"", L"\u30c7\u30a5", L"dyu" },
-
- // Katakana tsu + small vowels
- { L"", L"\u30c4\u30a1", L"tsa" },
- { L"", L"\u30c4\u30a3", L"tsi" },
- { L"", L"\u30c4\u30a7", L"tse" },
- { L"", L"\u30c4\u30a9", L"tso" },
-
-
- // Syllablic consonants
-
- // Small tsu
- { L"\u3063", L"\u30c3", L"t" },
- { L"\u3063", L"\u30c3", L"c" },
- { L"\u3063", L"\u30c3", L"s" },
- { L"\u3063", L"\u30c3", L"k" },
- { L"\u3063", L"\u30c3", L"p" },
-
- // Syllabic n
- { L"\u3093", L"\u30f3", L"n" },
- { L"\u3093", L"\u30f3", L"m" },
-
-
- // Other special usage
-
- // Small vowels
- { L"\u3041", L"\u30a1", L"a" },
- { L"\u3043", L"\u30a3", L"i" },
- { L"\u3045", L"\u30a5", L"u" },
- { L"\u3047", L"\u30a7", L"e" },
- { L"\u3049", L"\u30a9", L"o" },
-
- // Long vowel mark (dash)
- { L"", L"\u30fc", L"a" },
- { L"", L"\u30fc", L"i" },
- { L"", L"\u30fc", L"u" },
- { L"", L"\u30fc", L"e" },
- { L"", L"\u30fc", L"o" },
- { 0, 0, 0 }
-};
diff --git a/aegisub/src/kana_table.h b/aegisub/src/kana_table.h
deleted file mode 100644
index 20a3b3483..000000000
--- a/aegisub/src/kana_table.h
+++ /dev/null
@@ -1,53 +0,0 @@
-// Copyright (c) 2006, Rodrigo Braz Monteiro
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are met:
-//
-// * Redistributions of source code must retain the above copyright notice,
-// this list of conditions and the following disclaimer.
-// * Redistributions in binary form must reproduce the above copyright notice,
-// this list of conditions and the following disclaimer in the documentation
-// and/or other materials provided with the distribution.
-// * Neither the name of the Aegisub Group nor the names of its contributors
-// may be used to endorse or promote products derived from this software
-// without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-// POSSIBILITY OF SUCH DAMAGE.
-//
-// Aegisub Project http://www.aegisub.org/
-
-/// @file kana_table.h
-/// @see kana_table.cpp
-/// @ingroup kara_timing_copy
-///
-
-#include
-
-#include
-
-/// @class KanaEntry
-/// @brief Base class for Kana + Romaji tuples.
-struct KanaEntry {
- /// Hiragana
- const wchar_t *hiragana;
-
- /// Katakana
- const wchar_t *katakana;
-
- /// Hepburn romaji.
- const wchar_t *hepburn;
-};
-
-/// Table of Hiragana, Katakana and Hepburn romaji tuples.
-extern const KanaEntry KanaTable[];
diff --git a/aegisub/src/utils.cpp b/aegisub/src/utils.cpp
index f72593d57..90e4a312d 100644
--- a/aegisub/src/utils.cpp
+++ b/aegisub/src/utils.cpp
@@ -54,7 +54,6 @@
#include
#include
#include