Extract word-splitting logic from the syntax highlighter

This commit is contained in:
Thomas Goyne 2012-11-06 16:26:00 -08:00
parent 24c21dd425
commit 88fdee726b
13 changed files with 482 additions and 230 deletions

View file

@ -26,49 +26,76 @@
namespace { namespace {
typedef std::vector<agi::ass::DialogueToken> TokenVec; typedef std::vector<agi::ass::DialogueToken> TokenVec;
namespace dt = agi::ass::DialogueTokenType; using namespace agi::ass;
namespace ss = agi::ass::SyntaxStyle; namespace dt = DialogueTokenType;
namespace ss = SyntaxStyle;
class SyntaxHighlighter { class SyntaxHighlighter {
TokenVec ranges; TokenVec ranges;
std::string const& text; std::string const& text;
agi::SpellChecker *spellchecker; agi::SpellChecker *spellchecker;
agi::scoped_holder<iconv_t, int(&)(iconv_t)> utf8_to_utf32;
void SetStyling(int len, int type) { void SetStyling(int len, int type) {
if (ranges.size() && ranges.back().type == type) if (ranges.size() && ranges.back().type == type)
ranges.back().length += len; ranges.back().length += len;
else else
ranges.push_back(agi::ass::DialogueToken(type, len)); ranges.push_back(DialogueToken(type, len));
} }
void CheckWord(int start, int end) { public:
int len = end - start; SyntaxHighlighter(std::string const& text, agi::SpellChecker *spellchecker)
if (!len) return; : text(text)
, spellchecker(spellchecker)
{ }
if (!spellchecker->CheckWord(text.substr(start, len))) TokenVec Highlight(TokenVec const& tokens, bool template_line) {
if (tokens.empty()) return ranges;
size_t pos = 0;
for (size_t i = 0; i < tokens.size(); ++i) {
size_t len = tokens[i].length;
switch (tokens[i].type) {
case dt::LINE_BREAK: SetStyling(len, ss::LINE_BREAK); break;
case dt::ERROR: SetStyling(len, ss::ERROR); break;
case dt::ARG: SetStyling(len, ss::PARAMETER); break;
case dt::COMMENT: SetStyling(len, ss::COMMENT); break;
case dt::WHITESPACE: SetStyling(len, ss::NORMAL); break;
case dt::DRAWING: SetStyling(len, ss::DRAWING); break;
case dt::TEXT: SetStyling(len, ss::NORMAL); break;
case dt::TAG_NAME: SetStyling(len, ss::TAG); break;
case dt::OPEN_PAREN: case dt::CLOSE_PAREN: case dt::ARG_SEP: case dt::TAG_START:
SetStyling(len, ss::PUNCTUATION);
break;
case dt::OVR_BEGIN: case dt::OVR_END:
SetStyling(len, ss::OVERRIDE);
break;
case dt::WORD:
if (spellchecker && !spellchecker->CheckWord(text.substr(pos, len)))
SetStyling(len, ss::SPELLING); SetStyling(len, ss::SPELLING);
else else
SetStyling(len, ss::NORMAL); SetStyling(len, ss::NORMAL);
break;
} }
int NextChar(int pos, int len, int& char_len) { pos += len;
int chr = 0; // karaoke templater
char *inptr = const_cast<char *>(&text[pos]);
size_t inlen = len;
char *outptr = (char *)&chr;
size_t outlen = sizeof chr;
iconv(utf8_to_utf32, &inptr, &inlen, &outptr, &outlen);
if (outlen != 0)
return 0;
char_len = len - inlen;
return chr;
} }
void StyleSpellCheck(int pos, int len) { return ranges;
const int delims[] = { }
};
class WordSplitter {
std::string const& text;
std::vector<DialogueToken> &tokens;
agi::scoped_holder<iconv_t, int(&)(iconv_t)> utf8_to_utf32;
size_t last_ovr_end;
size_t pos;
bool in_drawing;
bool IsWordSep(int chr) {
static const int delims[] = {
0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0028, 0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0028,
0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f, 0x003a, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f, 0x003a,
0x003b, 0x003d, 0x003f, 0x0040, 0x005b, 0x005c, 0x005d, 0x005e, 0x003b, 0x003d, 0x003f, 0x0040, 0x005b, 0x005c, 0x005d, 0x005e,
@ -86,38 +113,72 @@ class SyntaxHighlighter {
0xff5b, 0xff5d, 0xff5e 0xff5b, 0xff5d, 0xff5e
}; };
return std::binary_search(std::begin(delims), std::end(delims), chr);
}
int NextChar(int pos, int len, int& char_len) {
int chr = 0;
char *inptr = const_cast<char *>(&text[pos]);
size_t inlen = len;
char *outptr = (char *)&chr;
size_t outlen = sizeof chr;
iconv(utf8_to_utf32, &inptr, &inlen, &outptr, &outlen);
if (outlen != 0)
return 0;
char_len = len - inlen;
return chr;
}
void SwitchTo(size_t &i, int type, int len) {
if (tokens[i].type == type) return;
if (tokens[i].length == (size_t)len)
tokens[i].type = type;
else {
tokens.insert(tokens.begin() + i + 1, DialogueToken(type, len));
tokens[i].length -= len;
++i;
++last_ovr_end;
}
}
void SplitText(size_t &i) {
if (in_drawing) {
tokens[i].type = dt::DRAWING;
return;
}
int chrlen = 0; int chrlen = 0;
int start = pos; int len = tokens[i].length;
for (; len > 0; pos += chrlen, len -= chrlen) { int tpos = pos;
int chr = NextChar(pos, len, chrlen); for (; len > 0; tpos += chrlen, len -= chrlen) {
int chr = NextChar(tpos, len, chrlen);
if (!chr) return; if (!chr) return;
if (std::binary_search(std::begin(delims), std::end(delims), chr)) { if (IsWordSep(chr))
CheckWord(start, pos); SwitchTo(i, dt::TEXT, len);
SetStyling(1, ss::NORMAL); else
start = pos + 1; SwitchTo(i, dt::WORD, len);
} }
} }
CheckWord(start, pos);
}
public: public:
SyntaxHighlighter(std::string const& text, agi::SpellChecker *spellchecker) WordSplitter(std::string const& text, std::vector<DialogueToken> &tokens)
: text(text) : text(text)
, spellchecker(spellchecker) , tokens(tokens)
, utf8_to_utf32(iconv_open("utf-32le", "utf-8"), iconv_close) , utf8_to_utf32(iconv_open("utf-32le", "utf-8"), iconv_close)
, last_ovr_end(0)
, pos(0)
, in_drawing(false)
{ } { }
TokenVec Highlight(TokenVec const& tokens, bool template_line) { void SplitWords() {
if (tokens.empty()) return ranges; if (tokens.empty()) return;
bool in_drawing = false;
size_t pos = 0;
// VSFilter treats unclosed override blocks as plain text, so pretend // VSFilter treats unclosed override blocks as plain text, so pretend
// all tokens after the last override block are TEXT // all tokens after the last override block are TEXT
size_t last_ovr_end = 0;
for (size_t i = tokens.size(); i > 0; --i) { for (size_t i = tokens.size(); i > 0; --i) {
if (tokens[i - 1].type == dt::OVR_END) { if (tokens[i - 1].type == dt::OVR_END) {
last_ovr_end = i - 1; last_ovr_end = i - 1;
@ -127,30 +188,14 @@ public:
for (size_t i = 0; i < tokens.size(); ++i) { for (size_t i = 0; i < tokens.size(); ++i) {
size_t len = tokens[i].length; size_t len = tokens[i].length;
switch (i > last_ovr_end ? dt::TEXT : tokens[i].type) { switch (tokens[i].type) {
case dt::LINE_BREAK: SetStyling(len, ss::LINE_BREAK); break; case dt::LINE_BREAK: break;
case dt::ERROR: SetStyling(len, ss::ERROR); break; case dt::TEXT: SplitText(i); break;
case dt::ARG: SetStyling(len, ss::PARAMETER); break;
case dt::COMMENT: SetStyling(len, ss::COMMENT); break;
case dt::WHITESPACE: SetStyling(len, ss::NORMAL); break;
case dt::OPEN_PAREN: case dt::CLOSE_PAREN: case dt::ARG_SEP: case dt::TAG_START:
SetStyling(len, ss::PUNCTUATION);
break;
case dt::OVR_BEGIN: case dt::OVR_END:
SetStyling(len, ss::OVERRIDE);
break;
case dt::TEXT:
if (in_drawing)
SetStyling(len, ss::DRAWING);
else if (spellchecker)
StyleSpellCheck(pos, len);
else
SetStyling(len, ss::NORMAL);
break;
case dt::TAG_NAME: case dt::TAG_NAME:
SetStyling(len, ss::TAG); if (i > last_ovr_end) {
SplitText(i);
break;
}
if (len != 1 || i + 1 >= tokens.size() || text[pos] != 'p') if (len != 1 || i + 1 >= tokens.size() || text[pos] != 'p')
break; break;
@ -170,13 +215,14 @@ public:
break; break;
} }
break; break;
default:
if (i > last_ovr_end)
SplitText(i);
break;
} }
pos += len; pos += len;
// karaoke templater
} }
return ranges;
} }
}; };
} }
@ -188,5 +234,9 @@ std::vector<DialogueToken> SyntaxHighlight(std::string const& text, std::vector<
return SyntaxHighlighter(text, spellchecker).Highlight(tokens, template_line); return SyntaxHighlighter(text, spellchecker).Highlight(tokens, template_line);
} }
void SplitWords(std::string const& str, std::vector<DialogueToken> &tokens) {
WordSplitter(str, tokens).SplitWords();
}
} }
} }

View file

@ -27,6 +27,7 @@ namespace agi {
namespace DialogueTokenType { namespace DialogueTokenType {
enum { enum {
TEXT = 1000, TEXT = 1000,
WORD,
LINE_BREAK, LINE_BREAK,
OVR_BEGIN, OVR_BEGIN,
OVR_END, OVR_END,
@ -38,7 +39,8 @@ namespace agi {
ARG, ARG,
ERROR, ERROR,
COMMENT, COMMENT,
WHITESPACE WHITESPACE,
DRAWING
}; };
} }
@ -66,8 +68,13 @@ namespace agi {
DialogueToken(int type, size_t length) : type(type), length(length) { } DialogueToken(int type, size_t length) : type(type), length(length) { }
}; };
/// Tokenize the passed string as the body of a dialogue line
std::vector<DialogueToken> TokenizeDialogueBody(std::string const& str); std::vector<DialogueToken> TokenizeDialogueBody(std::string const& str);
/// Split the words in the TEXT tokens of the lexed line into their
/// own tokens and convert the body of drawings to DRAWING tokens
void SplitWords(std::string const& str, std::vector<DialogueToken> &tokens);
std::vector<DialogueToken> SyntaxHighlight(std::string const& text, std::vector<DialogueToken> const& tokens, bool template_line, SpellChecker *spellchecker); std::vector<DialogueToken> SyntaxHighlight(std::string const& text, std::vector<DialogueToken> const& tokens, bool template_line, SpellChecker *spellchecker);
} }
} }

View file

@ -47,6 +47,7 @@
#include "subs_edit_ctrl.h" #include "subs_edit_ctrl.h"
#include "utils.h" #include "utils.h"
#include <libaegisub/ass/dialogue_parser.h>
#include <libaegisub/exception.h> #include <libaegisub/exception.h>
#include <libaegisub/spellchecker.h> #include <libaegisub/spellchecker.h>
@ -168,7 +169,7 @@ void DialogSpellChecker::OnReplace(wxCommandEvent&) {
} }
void DialogSpellChecker::OnReplaceAll(wxCommandEvent&) { void DialogSpellChecker::OnReplaceAll(wxCommandEvent&) {
auto_replace[orig_word->GetValue()] = replace_word->GetValue(); auto_replace[from_wx(orig_word->GetValue())] = from_wx(replace_word->GetValue());
Replace(); Replace();
FindNext(); FindNext();
@ -179,7 +180,7 @@ void DialogSpellChecker::OnIgnore(wxCommandEvent&) {
} }
void DialogSpellChecker::OnIgnoreAll(wxCommandEvent&) { void DialogSpellChecker::OnIgnoreAll(wxCommandEvent&) {
auto_ignore.insert(orig_word->GetValue()); auto_ignore.emplace(from_wx(orig_word->GetValue()));
FindNext(); FindNext();
} }
@ -247,19 +248,22 @@ bool DialogSpellChecker::FindNext() {
bool DialogSpellChecker::CheckLine(AssDialogue *active_line, int start_pos, int *commit_id) { bool DialogSpellChecker::CheckLine(AssDialogue *active_line, int start_pos, int *commit_id) {
if (active_line->Comment && skip_comments->GetValue()) return false; if (active_line->Comment && skip_comments->GetValue()) return false;
IntPairVector results; std::string text = from_wx(active_line->Text);
GetWordBoundaries(active_line->Text, results); auto tokens = agi::ass::TokenizeDialogueBody(text);
agi::ass::SplitWords(text, tokens);
int shift = 0; word_start = 0;
for (auto const& result : results) { for (auto const& tok : tokens) {
word_start = result.first + shift; word_start += tok.length;
if (tok.type != agi::ass::DialogueTokenType::WORD) continue;
if (word_start < start_pos) continue; if (word_start < start_pos) continue;
word_end = result.second + shift;
wxString word = active_line->Text.Mid(word_start, word_end - word_start);
if (auto_ignore.count(word) || spellchecker->CheckWord(from_wx(word))) continue; word_len = tok.length;
std::string word = text.substr(word_start, word_len);
std::map<wxString, wxString>::const_iterator auto_rep = auto_replace.find(word); if (auto_ignore.count(word) || spellchecker->CheckWord(word)) continue;
auto auto_rep = auto_replace.find(word);
if (auto_rep == auto_replace.end()) { if (auto_rep == auto_replace.end()) {
#ifdef __WXGTK__ #ifdef __WXGTK__
// http://trac.wxwidgets.org/ticket/14369 // http://trac.wxwidgets.org/ticket/14369
@ -274,9 +278,10 @@ bool DialogSpellChecker::CheckLine(AssDialogue *active_line, int start_pos, int
return true; return true;
} }
active_line->Text = active_line->Text.Left(word_start) + auto_rep->second + active_line->Text.Mid(word_end); text.replace(word_start, word_len, auto_rep->second);
active_line->Text = from_wx(text);
*commit_id = context->ass->Commit(_("spell check replace"), AssFile::COMMIT_DIAG_TEXT, *commit_id); *commit_id = context->ass->Commit(_("spell check replace"), AssFile::COMMIT_DIAG_TEXT, *commit_id);
shift += auto_rep->second.size() - auto_rep->first.size(); word_start += auto_rep->second.size() - auto_rep->first.size();
} }
return false; return false;
} }
@ -285,23 +290,23 @@ void DialogSpellChecker::Replace() {
AssDialogue *active_line = context->selectionController->GetActiveLine(); AssDialogue *active_line = context->selectionController->GetActiveLine();
// Only replace if the user hasn't changed the selection to something else // Only replace if the user hasn't changed the selection to something else
if (active_line->Text.Mid(word_start, word_end - word_start) == orig_word->GetValue()) { if (active_line->Text.Mid(word_start, word_len) == orig_word->GetValue()) {
active_line->Text = active_line->Text.Left(word_start) + replace_word->GetValue() + active_line->Text.Mid(word_end); active_line->Text.replace(word_start, word_len, replace_word->GetValue());
context->ass->Commit(_("spell check replace"), AssFile::COMMIT_DIAG_TEXT); context->ass->Commit(_("spell check replace"), AssFile::COMMIT_DIAG_TEXT);
context->textSelectionController->SetInsertionPoint(word_start + replace_word->GetValue().size()); context->textSelectionController->SetInsertionPoint(word_start + replace_word->GetValue().size());
} }
} }
void DialogSpellChecker::SetWord(wxString const& word) { void DialogSpellChecker::SetWord(std::string const& word) {
orig_word->SetValue(word); orig_word->SetValue(to_wx(word));
wxArrayString suggestions = to_wx(spellchecker->GetSuggestions(from_wx(word))); wxArrayString suggestions = to_wx(spellchecker->GetSuggestions(word));
replace_word->SetValue(suggestions.size() ? suggestions[0] : word); replace_word->SetValue(suggestions.size() ? suggestions[0] : to_wx(word));
suggest_list->Clear(); suggest_list->Clear();
suggest_list->Append(suggestions); suggest_list->Append(suggestions);
context->textSelectionController->SetSelection(word_start, word_end); context->textSelectionController->SetSelection(word_start, word_start + word_len);
context->textSelectionController->SetInsertionPoint(word_end); context->textSelectionController->SetInsertionPoint(word_start + word_len);
add_button->Enable(spellchecker->CanAddWord(from_wx(word))); add_button->Enable(spellchecker->CanAddWord(word));
} }

View file

@ -48,16 +48,16 @@ class DialogSpellChecker : public wxDialog {
agi::scoped_ptr<agi::SpellChecker> spellchecker; ///< The spellchecking engine agi::scoped_ptr<agi::SpellChecker> spellchecker; ///< The spellchecking engine
/// Words which the user has indicated should always be corrected /// Words which the user has indicated should always be corrected
std::map<wxString,wxString> auto_replace; std::map<std::string, std::string> auto_replace;
/// Words which the user has temporarily added to the dictionary /// Words which the user has temporarily added to the dictionary
std::set<wxString> auto_ignore; std::set<std::string> auto_ignore;
/// Dictionaries available /// Dictionaries available
wxArrayString dictionary_lang_codes; wxArrayString dictionary_lang_codes;
int word_start; ///< Start index of the current misspelled word int word_start; ///< Start index of the current misspelled word
int word_end; ///< End index of the current misspelled word int word_len; ///< Length of the current misspelled word
wxTextCtrl *orig_word; ///< The word being corrected wxTextCtrl *orig_word; ///< The word being corrected
wxTextCtrl *replace_word; ///< The replacement that will be used if "Replace" is clicked wxTextCtrl *replace_word; ///< The replacement that will be used if "Replace" is clicked
@ -83,7 +83,7 @@ class DialogSpellChecker : public wxDialog {
bool CheckLine(AssDialogue *active_line, int start_pos, int *commit_id); bool CheckLine(AssDialogue *active_line, int start_pos, int *commit_id);
/// Set the current word to be corrected /// Set the current word to be corrected
void SetWord(wxString const& word); void SetWord(std::string const& word);
/// Correct the currently selected word /// Correct the currently selected word
void Replace(); void Replace();

View file

@ -82,32 +82,6 @@ void ScintillaTextCtrl::SetUnicodeStyling(int start,int length,int style) {
SetStyling(len,style); SetStyling(len,style);
} }
/// @brief Get boundaries of word at position
void ScintillaTextCtrl::GetBoundsOfWordAtPosition(int pos,int &start,int &end) {
IntPairVector results;
GetWordBoundaries(GetText(), results);
// Get boundaries
for (auto const& result : results) {
if (result.first <= pos && result.second >= pos) {
start = result.first;
end = result.second;
return;
}
}
// Word not found
start = 0;
end = 0;
}
/// @brief Get word at specified position
wxString ScintillaTextCtrl::GetWordAtPosition(int pos) {
int start,end;
GetBoundsOfWordAtPosition(pos, start, end);
return GetText().Mid(start, end - start);
}
/// @brief Set selection, unicode-aware /// @brief Set selection, unicode-aware
void ScintillaTextCtrl::SetSelectionU(int start, int end) { void ScintillaTextCtrl::SetSelectionU(int start, int end) {
SetSelection(GetUnicodePosition(start),GetUnicodePosition(end)); SetSelection(GetUnicodePosition(start),GetUnicodePosition(end));

View file

@ -34,6 +34,8 @@
#ifndef AGI_PRE #ifndef AGI_PRE
#include <wx/stc/stc.h> #include <wx/stc/stc.h>
#include <string>
#endif #endif
/// DOCME /// DOCME
@ -46,8 +48,6 @@ class ScintillaTextCtrl : public wxStyledTextCtrl {
void OnMouseWheel(wxMouseEvent& evt); void OnMouseWheel(wxMouseEvent& evt);
public: public:
wxString GetWordAtPosition(int pos);
void GetBoundsOfWordAtPosition(int pos,int &start,int &end);
int GetUnicodePosition(int pos); int GetUnicodePosition(int pos);
int GetReverseUnicodePosition(int pos); int GetReverseUnicodePosition(int pos);

View file

@ -213,6 +213,7 @@ void SubsTextEditCtrl::UpdateStyle() {
line_text = move(text); line_text = move(text);
} }
tokenized_line = agi::ass::TokenizeDialogueBody(line_text); tokenized_line = agi::ass::TokenizeDialogueBody(line_text);
agi::ass::SplitWords(line_text, tokenized_line);
cursor_pos = -1; cursor_pos = -1;
UpdateCallTip(); UpdateCallTip();
@ -298,15 +299,13 @@ void SubsTextEditCtrl::Paste() {
void SubsTextEditCtrl::OnContextMenu(wxContextMenuEvent &event) { void SubsTextEditCtrl::OnContextMenu(wxContextMenuEvent &event) {
wxPoint pos = event.GetPosition(); wxPoint pos = event.GetPosition();
int activePos; int activePos;
if (pos == wxDefaultPosition) { if (pos == wxDefaultPosition)
activePos = GetCurrentPos(); activePos = GetCurrentPos();
} else
else {
activePos = PositionFromPoint(ScreenToClient(pos)); activePos = PositionFromPoint(ScreenToClient(pos));
}
currentWordPos = GetReverseUnicodePosition(activePos); currentWordPos = GetBoundsOfWordAtPosition(activePos);
currentWord = from_wx(GetWordAtPosition(currentWordPos)); currentWord = line_text.substr(currentWordPos.first, currentWordPos.second);
wxMenu menu; wxMenu menu;
if (!currentWord.empty()) { if (!currentWord.empty()) {
@ -431,27 +430,22 @@ void SubsTextEditCtrl::OnAddToDictionary(wxCommandEvent &) {
void SubsTextEditCtrl::OnUseSuggestion(wxCommandEvent &event) { void SubsTextEditCtrl::OnUseSuggestion(wxCommandEvent &event) {
std::string suggestion; std::string suggestion;
int sugIdx = event.GetId() - EDIT_MENU_THESAURUS_SUGS; int sugIdx = event.GetId() - EDIT_MENU_THESAURUS_SUGS;
if (sugIdx >= 0) { if (sugIdx >= 0)
suggestion = lagi_wxString(thesSugs[sugIdx]); suggestion = from_wx(thesSugs[sugIdx]);
} else
else {
suggestion = sugs[event.GetId() - EDIT_MENU_SUGGESTIONS]; suggestion = sugs[event.GetId() - EDIT_MENU_SUGGESTIONS];
}
// Strip suggestion of parenthesis // Strip suggestion of parenthesis
size_t pos = suggestion.find("("); size_t pos = suggestion.find("(");
if (pos != suggestion.npos) if (pos != suggestion.npos)
suggestion.resize(pos - 1); suggestion.resize(pos - 1);
// Get boundaries of text being replaced // line_text needs to get cleared before SetTextRaw to ensure it gets reparsed
int start, end; std::string new_text;
GetBoundsOfWordAtPosition(currentWordPos, start, end); swap(line_text, new_text);
SetTextRaw(new_text.replace(currentWordPos.first, currentWordPos.second, suggestion).c_str());
wxString text = GetText(); SetSelection(currentWordPos.first, currentWordPos.first + suggestion.size());
SetText(text.Left(std::max(0, start)) + to_wx(suggestion) + text.Mid(end));
// Set selection
SetSelectionU(start, start+suggestion.size());
SetFocus(); SetFocus();
} }
@ -480,3 +474,17 @@ void SubsTextEditCtrl::OnSetThesLanguage(wxCommandEvent &event) {
UpdateStyle(); UpdateStyle();
} }
std::pair<int, int> SubsTextEditCtrl::GetBoundsOfWordAtPosition(int pos) {
int len = 0;
for (auto const& tok : tokenized_line) {
if ((int)tok.length > pos) {
if (tok.type == agi::ass::DialogueTokenType::WORD)
return std::make_pair(len, tok.length);
return std::make_pair(0, 0);
}
len += tok.length;
}
return std::make_pair(0, 0);
}

View file

@ -68,7 +68,7 @@ class SubsTextEditCtrl : public ScintillaTextCtrl {
std::string currentWord; std::string currentWord;
/// The beginning of the word right-clicked on, for spellchecker replacing /// The beginning of the word right-clicked on, for spellchecker replacing
int currentWordPos; std::pair<int, int> currentWordPos;
/// Spellchecker suggestions for the last right-clicked word /// Spellchecker suggestions for the last right-clicked word
std::vector<std::string> sugs; std::vector<std::string> sugs;
@ -129,5 +129,7 @@ public:
void SetTextTo(wxString const& text); void SetTextTo(wxString const& text);
void Paste(); void Paste();
std::pair<int, int> GetBoundsOfWordAtPosition(int pos);
DECLARE_EVENT_TABLE() DECLARE_EVENT_TABLE()
}; };

View file

@ -118,75 +118,6 @@ int SmallestPowerOf2(int x) {
return x; return x;
} }
void GetWordBoundaries(wxString const& text, IntPairVector &results, int start, int end) {
int depth = 0;
bool in_draw_mode = false;
if (end < 0) end = text.size();
// Delimiters
const wxUniChar delims[] = {
0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0028,
0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f, 0x003a,
0x003b, 0x003d, 0x003f, 0x0040, 0x005b, 0x005c, 0x005d, 0x005e,
0x005f, 0x0060, 0x007b, 0x007c, 0x007d, 0x007e, 0x00a1, 0x00a2,
0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7, 0x00a8, 0x00aa, 0x00ab,
0x00b0, 0x00b6, 0x00b7, 0x00ba, 0x00bb, 0x00bf, 0x02dc, 0x0e3f,
0x2010, 0x2013, 0x2014, 0x2015, 0x2018, 0x2019, 0x201c, 0x201d,
0x2020, 0x2021, 0x2022, 0x2025, 0x2026, 0x2026, 0x2030, 0x2031,
0x2032, 0x203b, 0x203b, 0x203d, 0x2042, 0x2044, 0x20a6, 0x20a9,
0x20aa, 0x20ac, 0x20ad, 0x2116, 0x2234, 0x2235, 0x2420, 0x2422,
0x2423, 0x2506, 0x25ca, 0x2605, 0x261e, 0x2e2e, 0x3000, 0x3001,
0x3002, 0x3008, 0x3009, 0x300a, 0x300b, 0x300c, 0x300d, 0x300e,
0x300f, 0x3010, 0x3011, 0x3014, 0x3015, 0x3016, 0x3017, 0x3018,
0x3019, 0x301a, 0x301b, 0x301c, 0x3030, 0x303d, 0x30fb, 0xff0a,
0xff5b, 0xff5d, 0xff5e
};
for (int i = start; i < end + 1; ++i) {
// Current character
wxUniChar cur = i < end ? text[i] : wxUniChar('.');
// Increase depth
if (cur == '{') {
depth++;
if (depth == 1 && start != i && !in_draw_mode)
results.push_back(std::make_pair(start, i));
}
// Decrease depth
else if (cur == '}') {
depth--;
start = i + 1;
}
else if (depth > 0) {
// Check for draw mode
if (cur == '\\' && i + 1 < end && text[i + 1] == 'p') {
i += 2;
// Eat leading zeros
while (i < end && text[i] == '0') ++i;
in_draw_mode = i < end && text[i] >= '0' && text[i] <= '9';
if (!in_draw_mode) --i;
}
}
else if (!in_draw_mode) {
// Check if it is \n or \N
if (cur == '\\' && i < end-1 && (text[i+1] == 'N' || text[i+1] == 'n' || text[i+1] == 'h')) {
if (start != i)
results.push_back(std::make_pair(start, i));
start = i + 2;
i++;
}
// Check for standard delimiters
else if (std::binary_search(delims, delims + sizeof(delims) / sizeof(delims[0]), cur)) {
if (start != i)
results.push_back(std::make_pair(start, i));
start = i + 1;
}
}
}
}
bool IsWhitespace(wchar_t c) bool IsWhitespace(wchar_t c)
{ {
const wchar_t whitespaces[] = { const wchar_t whitespaces[] = {

View file

@ -49,8 +49,6 @@
class wxMouseEvent; class wxMouseEvent;
class wxWindow; class wxWindow;
typedef std::vector<std::pair<int,int> > IntPairVector;
/// @brief Make a path relative to reference /// @brief Make a path relative to reference
wxString MakeRelativePath(wxString path,wxString reference); wxString MakeRelativePath(wxString path,wxString reference);
/// @brief Extract original path from relative /// @brief Extract original path from relative
@ -64,16 +62,6 @@ wxString PrettySize(int bytes);
/// Algorithm from http://bob.allegronetwork.com/prog/tricks.html /// Algorithm from http://bob.allegronetwork.com/prog/tricks.html
int SmallestPowerOf2(int x); int SmallestPowerOf2(int x);
/// Get the indices in text which are the beginnings of words
/// @param text Text to split into words
/// @param[out] results Vector of indices which are the beginnings of words
/// @param start First index in text to check
/// @param end Last index in text to check, or -1 for end
///
/// This is ASS-specific and not a general purpose word boundary finder; words
/// within override blocks or drawing blocks are ignored
void GetWordBoundaries(wxString const& text, IntPairVector &results, int start=0, int end=-1);
/// Check if wchar 'c' is a whitespace character /// Check if wchar 'c' is a whitespace character
bool IsWhitespace(wchar_t c); bool IsWhitespace(wchar_t c);

View file

@ -25,13 +25,14 @@ SRC = \
libaegisub_iconv.cpp \ libaegisub_iconv.cpp \
libaegisub_keyframe.cpp \ libaegisub_keyframe.cpp \
libaegisub_line_iterator.cpp \ libaegisub_line_iterator.cpp \
libaegisub_line_wrap.cpp \
libaegisub_option.cpp \ libaegisub_option.cpp \
libaegisub_mru.cpp \ libaegisub_mru.cpp \
libaegisub_signals.cpp \ libaegisub_signals.cpp \
libaegisub_thesaurus.cpp \ libaegisub_thesaurus.cpp \
libaegisub_util.cpp \ libaegisub_util.cpp \
libaegisub_vfr.cpp \ libaegisub_vfr.cpp \
libaegisub_line_wrap.cpp libaegisub_word_split.cpp
HEADER = \ HEADER = \
*.h *.h

View file

@ -0,0 +1,151 @@
// Copyright (c) 2012, Thomas Goyne <plorkyeran@aegisub.org>
//
// Permission to use, copy, modify, and distribute this software for any
// purpose with or without fee is hereby granted, provided that the above
// copyright notice and this permission notice appear in all copies.
//
// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
#include <libaegisub/ass/dialogue_parser.h>
#include <libaegisub/spellchecker.h>
#include "main.h"
class MockSpellChecker : public agi::SpellChecker {
void AddWord(std::string const&) { }
bool CanAddWord(std::string const&) { return false; }
std::vector<std::string> GetSuggestions(std::string const&) { return std::vector<std::string>(); }
std::vector<std::string> GetLanguageList() { return std::vector<std::string>(); }
bool CheckWord(std::string const& word) { return word != "incorrect"; }
};
using namespace agi::ass;
namespace dt = DialogueTokenType;
namespace ss = SyntaxStyle;
class lagi_syntax : public libagi { };
TEST(lagi_syntax, empty) {
std::string text;
std::vector<DialogueToken> tokens;
EXPECT_TRUE(SyntaxHighlight(text, tokens, false, 0).empty());
tokens.emplace_back(dt::TEXT, 0);
auto syntax = SyntaxHighlight(text, tokens, false, 0);
EXPECT_EQ(1u, syntax.size());
EXPECT_EQ(ss::NORMAL, syntax[0].type);
}
#define tok_str(arg1, ...) do { \
MockSpellChecker spellchecker; \
std::string str = arg1; \
std::vector<DialogueToken> tok = TokenizeDialogueBody(str); \
SplitWords(str, tok); \
std::vector<DialogueToken> styles = SyntaxHighlight(str, tok, false, &spellchecker); \
size_t token_index = 0; \
__VA_ARGS__ \
EXPECT_EQ(token_index, styles.size()); \
} while(false)
#define expect_style(expected_type, expected_len) do { \
EXPECT_LT(token_index, styles.size()); \
if (token_index < styles.size()) { \
EXPECT_EQ(expected_type, styles[token_index].type); \
EXPECT_EQ(expected_len, styles[token_index].length); \
++token_index; \
} \
} while(false)
TEST(lagi_syntax, spellcheck) {
tok_str("correct incorrect correct",
expect_style(ss::NORMAL, 8u);
expect_style(ss::SPELLING, 9u);
expect_style(ss::NORMAL, 8u);
);
}
TEST(lagi_syntax, drawing) {
tok_str("incorrect{\\p1}m 10 10{\\p}correct",
expect_style(ss::SPELLING, 9u);
expect_style(ss::OVERRIDE, 1u);
expect_style(ss::PUNCTUATION, 1u);
expect_style(ss::TAG, 1u);
expect_style(ss::PARAMETER, 1u);
expect_style(ss::OVERRIDE, 1u);
expect_style(ss::DRAWING, 7u);
expect_style(ss::OVERRIDE, 1u);
expect_style(ss::PUNCTUATION, 1u);
expect_style(ss::TAG, 1u);
expect_style(ss::OVERRIDE, 1u);
expect_style(ss::NORMAL, 7u);
);
}
TEST(lagi_syntax, transform) {
tok_str("{\\t(0, 0, \\clip(0,0,10,10)}clipped text",
expect_style(ss::OVERRIDE, 1u);
expect_style(ss::PUNCTUATION, 1u);
expect_style(ss::TAG, 1u);
expect_style(ss::PUNCTUATION, 1u);
expect_style(ss::PARAMETER, 1u);
expect_style(ss::PUNCTUATION, 1u);
expect_style(ss::NORMAL, 1u);
expect_style(ss::PARAMETER, 1u);
expect_style(ss::PUNCTUATION, 1u);
expect_style(ss::NORMAL, 1u);
expect_style(ss::PUNCTUATION, 1u);
expect_style(ss::TAG, 4u);
expect_style(ss::PUNCTUATION, 1u);
expect_style(ss::PARAMETER, 1u);
expect_style(ss::PUNCTUATION, 1u);
expect_style(ss::PARAMETER, 1u);
expect_style(ss::PUNCTUATION, 1u);
expect_style(ss::PARAMETER, 2u);
expect_style(ss::PUNCTUATION, 1u);
expect_style(ss::PARAMETER, 2u);
expect_style(ss::PUNCTUATION, 1u);
expect_style(ss::OVERRIDE, 1u);
expect_style(ss::NORMAL, 12u);
);
}
TEST(lagi_syntax, unclosed) {
tok_str("{\\incorrect}{\\incorrect",
expect_style(ss::OVERRIDE, 1u);
expect_style(ss::PUNCTUATION, 1u);
expect_style(ss::TAG, 9u);
expect_style(ss::OVERRIDE, 1u);
expect_style(ss::NORMAL, 2u);
expect_style(ss::SPELLING, 9u);
);
}
TEST(lagi_syntax, comment) {
tok_str("abc{def}ghi",
expect_style(ss::NORMAL, 3u);
expect_style(ss::OVERRIDE, 1u);
expect_style(ss::COMMENT, 3u);
expect_style(ss::OVERRIDE, 1u);
expect_style(ss::NORMAL, 3u);
);
}
TEST(lagi_syntax, linebreak) {
tok_str("a\\Nb\\nc\\hd\\N\\N",
expect_style(ss::NORMAL, 1u);
expect_style(ss::LINE_BREAK, 2u);
expect_style(ss::NORMAL, 1u);
expect_style(ss::LINE_BREAK, 2u);
expect_style(ss::NORMAL, 1u);
expect_style(ss::LINE_BREAK, 2u);
expect_style(ss::NORMAL, 1u);
expect_style(ss::LINE_BREAK, 4u);
);
}

View file

@ -0,0 +1,135 @@
// Copyright (c) 2012, Thomas Goyne <plorkyeran@aegisub.org>
//
// Permission to use, copy, modify, and distribute this software for any
// purpose with or without fee is hereby granted, provided that the above
// copyright notice and this permission notice appear in all copies.
//
// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
#include "main.h"
#include <libaegisub/ass/dialogue_parser.h>
class lagi_word_split : public libagi { };
using namespace agi::ass;
namespace dt = DialogueTokenType;
TEST(lagi_word_split, empty) {
std::string text;
std::vector<DialogueToken> tokens;
SplitWords(text, tokens);
EXPECT_TRUE(tokens.empty());
tokens.emplace_back(0, 0);
SplitWords(text, tokens);
EXPECT_EQ(1u, tokens.size());
}
TEST(lagi_word_split, one_word) {
std::string text = "abc";
std::vector<DialogueToken> tokens = {{dt::TEXT, 3}};
SplitWords(text, tokens);
ASSERT_EQ(1u, tokens.size());
EXPECT_EQ(dt::WORD, tokens[0].type);
}
TEST(lagi_word_split, two_words_space) {
std::string text = "abc def";
std::vector<DialogueToken> tokens = {{dt::TEXT, 7}};
SplitWords(text, tokens);
ASSERT_EQ(3u, tokens.size());
EXPECT_EQ(dt::WORD, tokens[0].type);
EXPECT_EQ(3u, tokens[0].length);
EXPECT_EQ(dt::TEXT, tokens[1].type);
EXPECT_EQ(1u, tokens[1].length);
EXPECT_EQ(dt::WORD, tokens[2].type);
EXPECT_EQ(3u, tokens[2].length);
}
TEST(lagi_word_split, two_words_newline) {
std::string text = "abc\\Ndef";
std::vector<DialogueToken> tokens = {
{dt::TEXT, 3},
{dt::LINE_BREAK, 2},
{dt::TEXT, 3}
};
SplitWords(text, tokens);
ASSERT_EQ(3u, tokens.size());
EXPECT_EQ(dt::WORD, tokens[0].type);
EXPECT_EQ(3u, tokens[0].length);
EXPECT_EQ(dt::LINE_BREAK, tokens[1].type);
EXPECT_EQ(2u, tokens[1].length);
EXPECT_EQ(dt::WORD, tokens[2].type);
EXPECT_EQ(3u, tokens[2].length);
}
TEST(lagi_word_split, two_words_unicode) {
std::string text = u8"abc\u300adef";
std::vector<DialogueToken> tokens = {{dt::TEXT, 9}};
SplitWords(text, tokens);
ASSERT_EQ(3u, tokens.size());
EXPECT_EQ(dt::WORD, tokens[0].type);
EXPECT_EQ(3u, tokens[0].length);
EXPECT_EQ(dt::TEXT, tokens[1].type);
EXPECT_EQ(3u, tokens[1].length);
EXPECT_EQ(dt::WORD, tokens[2].type);
EXPECT_EQ(3u, tokens[2].length);
}
TEST(lagi_word_split, drawing) {
std::string text = "a b{\\p1}m 10{\\p0}c";
std::vector<DialogueToken> tokens = {
{dt::TEXT, 3},
{dt::OVR_BEGIN, 1},
{dt::TAG_START, 1},
{dt::TAG_NAME, 1},
{dt::ARG, 1},
{dt::OVR_END, 1},
{dt::TEXT, 4},
{dt::OVR_BEGIN, 1},
{dt::TAG_START, 1},
{dt::TAG_NAME, 1},
{dt::ARG, 1},
{dt::OVR_END, 1},
{dt::TEXT, 1}
};
SplitWords(text, tokens);
ASSERT_EQ(15u, tokens.size());
EXPECT_EQ(dt::WORD, tokens[0].type);
EXPECT_EQ(dt::WORD, tokens[2].type);
EXPECT_EQ(dt::WORD, tokens[14].type);
EXPECT_EQ(dt::DRAWING, tokens[8].type);
}
TEST(lagi_word_split, unclosed_ovr) {
std::string text = "a{\\b";
std::vector<DialogueToken> tokens = {
{dt::TEXT, 1},
{dt::OVR_BEGIN, 1},
{dt::TAG_START, 1},
{dt::TAG_NAME, 1}
};
SplitWords(text, tokens);
ASSERT_EQ(4u, tokens.size());
EXPECT_EQ(dt::WORD, tokens[0].type);
EXPECT_EQ(dt::TEXT, tokens[1].type);
EXPECT_EQ(dt::TEXT, tokens[2].type);
EXPECT_EQ(dt::WORD, tokens[3].type);
}