forked from mia/Aegisub
Rewrite HunspellSpellChecker
Add support for loading dictionaries from both the user's dictionary path and the application's install path Fix some issues with loading and saving the user's customized dictionary Originally committed to SVN as r4845.
This commit is contained in:
parent
1214290e90
commit
1bb8d16a45
2 changed files with 159 additions and 182 deletions
|
@ -1,4 +1,4 @@
|
||||||
// Copyright (c) 2006, Rodrigo Braz Monteiro
|
// Copyright (c) 2010, Thomas Goyne <plorkyeran@aegisub.org>
|
||||||
// All rights reserved.
|
// All rights reserved.
|
||||||
//
|
//
|
||||||
// Redistribution and use in source and binary forms, with or without
|
// Redistribution and use in source and binary forms, with or without
|
||||||
|
@ -39,13 +39,16 @@
|
||||||
#ifdef WITH_HUNSPELL
|
#ifdef WITH_HUNSPELL
|
||||||
|
|
||||||
#ifndef AGI_PRE
|
#ifndef AGI_PRE
|
||||||
|
#include <algorithm>
|
||||||
|
#include <iterator>
|
||||||
|
#include <list>
|
||||||
|
|
||||||
#include <wx/dir.h>
|
#include <wx/dir.h>
|
||||||
#include <wx/filename.h>
|
#include <wx/filename.h>
|
||||||
#include <wx/log.h>
|
|
||||||
#include <wx/txtstrm.h>
|
|
||||||
#include <wx/wfstream.h>
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#include <libaegisub/io.h>
|
||||||
|
#include <libaegisub/line_iterator.h>
|
||||||
#include <libaegisub/log.h>
|
#include <libaegisub/log.h>
|
||||||
|
|
||||||
#include "charset_conv.h"
|
#include "charset_conv.h"
|
||||||
|
@ -53,41 +56,16 @@
|
||||||
#include "main.h"
|
#include "main.h"
|
||||||
#include "spellchecker_hunspell.h"
|
#include "spellchecker_hunspell.h"
|
||||||
#include "standard_paths.h"
|
#include "standard_paths.h"
|
||||||
#include "text_file_reader.h"
|
|
||||||
#include "text_file_writer.h"
|
|
||||||
#include "utils.h"
|
|
||||||
|
|
||||||
/// @brief Constructor
|
|
||||||
HunspellSpellChecker::HunspellSpellChecker() {
|
HunspellSpellChecker::HunspellSpellChecker() {
|
||||||
hunspell = NULL;
|
|
||||||
conv = NULL;
|
|
||||||
rconv = NULL;
|
|
||||||
SetLanguage(lagi_wxString(OPT_GET("Tool/Spell Checker/Language")->GetString()));
|
SetLanguage(lagi_wxString(OPT_GET("Tool/Spell Checker/Language")->GetString()));
|
||||||
}
|
}
|
||||||
|
|
||||||
/// @brief Destructor
|
|
||||||
HunspellSpellChecker::~HunspellSpellChecker() {
|
HunspellSpellChecker::~HunspellSpellChecker() {
|
||||||
Reset();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// @brief Reset spelling library
|
|
||||||
void HunspellSpellChecker::Reset() {
|
|
||||||
delete hunspell;
|
|
||||||
hunspell = NULL;
|
|
||||||
delete conv;
|
|
||||||
conv = NULL;
|
|
||||||
delete rconv;
|
|
||||||
rconv = NULL;
|
|
||||||
affpath.Clear();
|
|
||||||
dicpath.Clear();
|
|
||||||
}
|
|
||||||
|
|
||||||
/// @brief Can add to dictionary?
|
|
||||||
/// @param word Word to check.
|
|
||||||
/// @return Whether word can be added or not.
|
|
||||||
///
|
|
||||||
bool HunspellSpellChecker::CanAddWord(wxString word) {
|
bool HunspellSpellChecker::CanAddWord(wxString word) {
|
||||||
if (!hunspell) return false;
|
if (!hunspell.get()) return false;
|
||||||
try {
|
try {
|
||||||
conv->Convert(STD_STR(word));
|
conv->Convert(STD_STR(word));
|
||||||
return true;
|
return true;
|
||||||
|
@ -97,72 +75,52 @@ bool HunspellSpellChecker::CanAddWord(wxString word) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// @brief Add word to dictionary
|
|
||||||
/// @param word Word to add.
|
|
||||||
///
|
|
||||||
void HunspellSpellChecker::AddWord(wxString word) {
|
void HunspellSpellChecker::AddWord(wxString word) {
|
||||||
// Dictionary OK?
|
if (!hunspell.get()) return;
|
||||||
if (!hunspell) return;
|
|
||||||
|
|
||||||
// Add to currently loaded file
|
std::string sword = STD_STR(word);
|
||||||
|
|
||||||
|
// Add it to the in-memory dictionary
|
||||||
#ifdef WITH_OLD_HUNSPELL
|
#ifdef WITH_OLD_HUNSPELL
|
||||||
hunspell->put_word(conv->Convert(STD_STR(word)).c_str());
|
hunspell->put_word(conv->Convert(sword).c_str());
|
||||||
#else
|
#else
|
||||||
hunspell->add(conv->Convert(STD_STR(word)).c_str());
|
hunspell->add(conv->Convert(sword).c_str());
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
std::list<std::string> words;
|
||||||
|
|
||||||
// Ensure that the path exists
|
// Ensure that the path exists
|
||||||
wxFileName fn(usrdicpath);
|
wxFileName fn(userDicPath);
|
||||||
if (!fn.DirExists()) {
|
if (!fn.DirExists()) {
|
||||||
wxFileName::Mkdir(fn.GetPath());
|
wxFileName::Mkdir(fn.GetPath());
|
||||||
}
|
}
|
||||||
|
// Read the old contents of the user's dictionary
|
||||||
// Load dictionary
|
else {
|
||||||
wxArrayString dic;
|
std::auto_ptr<std::istream> stream(agi::io::Open(STD_STR(userDicPath)));
|
||||||
bool added = false;
|
std::remove_copy_if(
|
||||||
if (fn.FileExists()) { // Even if you ever want to remove this "if", keep the braces, so the stream closes at the end
|
++agi::line_iterator<std::string>(*stream.get()),
|
||||||
bool first = true;
|
agi::line_iterator<std::string>(),
|
||||||
TextFileReader reader(usrdicpath, L"UTF-8");
|
std::back_inserter(words),
|
||||||
while (reader.HasMoreLines()) {
|
std::mem_fun_ref(&std::string::empty));
|
||||||
wxString curLine = reader.ReadLineFromFile();
|
|
||||||
if (curLine.IsEmpty()) continue;
|
|
||||||
|
|
||||||
if (first) {
|
|
||||||
first = false;
|
|
||||||
if (curLine.IsNumber()) continue;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// See if word to be added goes here
|
// Add the word
|
||||||
if (!added && curLine.Lower() > word.Lower()) {
|
words.push_back(sword);
|
||||||
dic.Add(word);
|
words.sort();
|
||||||
added = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Add to memory dictionary
|
// Write the new dictionary
|
||||||
dic.Add(curLine);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Not added yet
|
|
||||||
if (!added) dic.Add(word);
|
|
||||||
|
|
||||||
// Write back to disk
|
|
||||||
try {
|
try {
|
||||||
TextFileWriter writer(usrdicpath, L"UTF-8");
|
agi::io::Save writer(STD_STR(userDicPath));
|
||||||
writer.WriteLineToFile(wxString::Format(L"%i", dic.Count()));
|
writer.Get() << words.size() << "\n";
|
||||||
for (unsigned int i=0;i<dic.Count();i++) writer.WriteLineToFile(dic[i]);
|
std::copy(words.begin(), words.end(), std::ostream_iterator<std::string>(writer.Get(), "\n"));
|
||||||
}
|
}
|
||||||
catch (const agi::Exception&) {
|
catch (const agi::Exception&) {
|
||||||
// Failed to open file
|
// Failed to open file
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// @brief Check if the word is valid.
|
|
||||||
/// @param word Word to check
|
|
||||||
/// @return Whether word is valid or not.
|
|
||||||
///
|
|
||||||
bool HunspellSpellChecker::CheckWord(wxString word) {
|
bool HunspellSpellChecker::CheckWord(wxString word) {
|
||||||
if (!hunspell) return true;
|
if (!hunspell.get()) return true;
|
||||||
try {
|
try {
|
||||||
return hunspell->spell(conv->Convert(STD_STR(word)).c_str()) == 1;
|
return hunspell->spell(conv->Convert(STD_STR(word)).c_str()) == 1;
|
||||||
}
|
}
|
||||||
|
@ -171,112 +129,129 @@ bool HunspellSpellChecker::CheckWord(wxString word) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// @brief Get suggestions for word.
|
|
||||||
/// @param word Word to get suggestions for
|
|
||||||
/// @return List of suggestions
|
|
||||||
///
|
|
||||||
wxArrayString HunspellSpellChecker::GetSuggestions(wxString word) {
|
wxArrayString HunspellSpellChecker::GetSuggestions(wxString word) {
|
||||||
wxArrayString suggestions;
|
wxArrayString suggestions;
|
||||||
if (!hunspell) return suggestions;
|
if (!hunspell.get()) return suggestions;
|
||||||
|
|
||||||
try {
|
|
||||||
// Grab raw from Hunspell
|
// Grab raw from Hunspell
|
||||||
char **results;
|
char **results;
|
||||||
int n = hunspell->suggest(&results,conv->Convert(STD_STR(word)).c_str());
|
int n = hunspell->suggest(&results,conv->Convert(STD_STR(word)).c_str());
|
||||||
|
|
||||||
|
suggestions.reserve(n);
|
||||||
// Convert each
|
// Convert each
|
||||||
for (int i=0;i<n;i++) {
|
for (int i = 0; i < n; ++i) {
|
||||||
suggestions.Add(rconv->Convert(results[i]));
|
try {
|
||||||
|
suggestions.Add(lagi_wxString(rconv->Convert(results[i])));
|
||||||
|
}
|
||||||
|
catch (agi::charset::ConvError const&) {
|
||||||
|
// Shouldn't ever actually happen...
|
||||||
|
}
|
||||||
delete results[i];
|
delete results[i];
|
||||||
}
|
}
|
||||||
|
|
||||||
delete results;
|
delete results;
|
||||||
}
|
|
||||||
catch (agi::charset::ConvError const&) {
|
|
||||||
return suggestions;
|
|
||||||
}
|
|
||||||
|
|
||||||
return suggestions;
|
return suggestions;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// @brief Get list of available dictionaries.
|
|
||||||
/// @return List of available dictionaries
|
|
||||||
///
|
|
||||||
wxArrayString HunspellSpellChecker::GetLanguageList() {
|
wxArrayString HunspellSpellChecker::GetLanguageList() {
|
||||||
// Get dir name
|
wxArrayString dic, aff;
|
||||||
wxString path = StandardPaths::DecodePathMaybeRelative(lagi_wxString(OPT_GET("Path/Dictionary")->GetString()), _T("?data")) + _T("/");
|
|
||||||
wxArrayString list;
|
|
||||||
wxFileName folder(path);
|
|
||||||
if (!folder.DirExists()) return list;
|
|
||||||
|
|
||||||
// Get file lists
|
// Get list of dictionaries
|
||||||
wxArrayString dic;
|
wxString path = StandardPaths::DecodePath("?data/dictionaries/");
|
||||||
wxDir::GetAllFiles(path,&dic,_T("*.dic"),wxDIR_FILES);
|
if (wxFileName::DirExists(path)) {
|
||||||
wxArrayString aff;
|
wxDir::GetAllFiles(path, &dic, "*.dic", wxDIR_FILES);
|
||||||
wxDir::GetAllFiles(path,&aff,_T("*.aff"),wxDIR_FILES);
|
wxDir::GetAllFiles(path, &aff, "*.aff", wxDIR_FILES);
|
||||||
|
}
|
||||||
|
path = StandardPaths::DecodePath(lagi_wxString(OPT_GET("Path/Dictionary")->GetString()) + "/");
|
||||||
|
if (wxFileName::DirExists(path)) {
|
||||||
|
wxDir::GetAllFiles(path, &dic, "*.dic", wxDIR_FILES);
|
||||||
|
wxDir::GetAllFiles(path, &aff, "*.aff", wxDIR_FILES);
|
||||||
|
}
|
||||||
|
if (aff.empty()) return wxArrayString();
|
||||||
|
|
||||||
// For each dictionary match, see if it can find the corresponding .aff
|
dic.Sort();
|
||||||
for (unsigned int i=0;i<dic.Count();i++) {
|
aff.Sort();
|
||||||
wxString curAff = dic[i].Left(dic[i].Length()-4) + _T(".aff");
|
|
||||||
for (unsigned int j=0;j<aff.Count();j++) {
|
// Drop extensions
|
||||||
// Found match
|
for (size_t i = 0; i < dic.size(); ++i) dic[i].resize(dic[i].size() - 4);
|
||||||
if (curAff == aff[j]) {
|
for (size_t i = 0; i < aff.size(); ++i) aff[i].resize(aff[i].size() - 4);
|
||||||
wxFileName fname(curAff);
|
|
||||||
list.Add(fname.GetName());
|
// Verify that each aff has a dic
|
||||||
break;
|
wxArrayString ret;
|
||||||
|
for (size_t i = 0, j = 0; i < dic.size() && j < aff.size(); ) {
|
||||||
|
int cmp = dic[i].Cmp(aff[j]);
|
||||||
|
if (cmp < 0) ++i;
|
||||||
|
else if (cmp > 0) ++j;
|
||||||
|
else {
|
||||||
|
// Don't insert a language twice if it's in both the user dir and
|
||||||
|
// the app's dir
|
||||||
|
wxString name = wxFileName(aff[j]).GetName();
|
||||||
|
if (ret.empty() || name != ret.back())
|
||||||
|
ret.push_back(name);
|
||||||
|
++i;
|
||||||
|
++j;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
return ret;
|
||||||
|
|
||||||
// Return list
|
|
||||||
return list;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// @brief Set language.
|
|
||||||
/// @param language Language to set
|
|
||||||
///
|
|
||||||
void HunspellSpellChecker::SetLanguage(wxString language) {
|
void HunspellSpellChecker::SetLanguage(wxString language) {
|
||||||
// Unload
|
if (language.empty()) return;
|
||||||
Reset();
|
|
||||||
if (language.IsEmpty()) return;
|
|
||||||
|
|
||||||
// Get dir name
|
wxString userDicRoot = StandardPaths::DecodePath(lagi_wxString(OPT_GET("Path/Dictionary")->GetString()));
|
||||||
//FIXME: this should use ?user instead of ?data; however, since it apparently works already on win32, I'm not gonna mess with it right now :p
|
wxString dataDicRoot = StandardPaths::DecodePath("?data/dictionaries");
|
||||||
wxString path = StandardPaths::DecodePathMaybeRelative(lagi_wxString(OPT_GET("Path/Dictionary")->GetString()), _T("?data")) + _T("/");
|
|
||||||
wxString userPath = StandardPaths::DecodePath(_T("?user/dictionaries/user_"));
|
|
||||||
|
|
||||||
// Get affix and dictionary paths
|
// If the user has a dic/aff pair in their dictionary path for this language
|
||||||
affpath = wxString::Format("%s%s.aff", path, language);
|
// use that; otherwise use the one from Aegisub's install dir, adding words
|
||||||
dicpath = wxString::Format("%s%s.dic", path, language);
|
// from the dic in the user's dictionary path if it exists
|
||||||
usrdicpath = wxString::Format("%s%s.dic", userPath, language);
|
wxString affPath = wxString::Format("%s/%s.aff", userDicRoot, language);
|
||||||
|
wxString dicPath = wxString::Format("%s/%s.dic", userDicRoot, language);
|
||||||
|
userDicPath = wxString::Format("%s/user_%s.dic", userDicRoot, language);
|
||||||
|
if (!wxFileExists(affPath) || !wxFileExists(dicPath)) {
|
||||||
|
affPath = wxString::Format("%s/%s.aff", dataDicRoot, language);
|
||||||
|
dicPath = wxString::Format("%s/%s.dic", dataDicRoot, language);
|
||||||
|
}
|
||||||
|
|
||||||
LOG_I("dictionary/file") << dicpath;
|
LOG_I("dictionary/file") << dicPath;
|
||||||
|
|
||||||
// Check if language is available
|
if (!wxFileExists(affPath) || !wxFileExists(dicPath)) {
|
||||||
if (!wxFileExists(affpath) || !wxFileExists(dicpath)) return;
|
LOG_D("dictionary/file") << "Dictionary not found";
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
hunspell.reset(new Hunspell(affPath.mb_str(csConvLocal), dicPath.mb_str(csConvLocal)));
|
||||||
|
if (!hunspell.get()) return;
|
||||||
|
|
||||||
|
conv.reset(new agi::charset::IconvWrapper("utf-8", hunspell->get_dic_encoding()));
|
||||||
|
rconv.reset(new agi::charset::IconvWrapper(hunspell->get_dic_encoding(), "utf-8"));
|
||||||
|
|
||||||
|
if (userDicPath == dicPath || !wxFileExists(userDicPath)) return;
|
||||||
|
|
||||||
// Load
|
|
||||||
hunspell = new Hunspell(affpath.mb_str(csConvLocal),dicpath.mb_str(csConvLocal));
|
|
||||||
conv = NULL;
|
|
||||||
if (hunspell) {
|
|
||||||
conv = new agi::charset::IconvWrapper("wchar_t", hunspell->get_dic_encoding());
|
|
||||||
rconv = new agi::charset::IconvWrapper(hunspell->get_dic_encoding(), "wchar_t");
|
|
||||||
try {
|
try {
|
||||||
TextFileReader reader(usrdicpath, L"UTF-8");
|
std::auto_ptr<std::istream> stream(agi::io::Open(STD_STR(userDicPath)));
|
||||||
while (reader.HasMoreLines()) {
|
agi::line_iterator<std::string> userDic(*stream.get());
|
||||||
wxString curLine = reader.ReadLineFromFile();
|
agi::line_iterator<std::string> end;
|
||||||
if (curLine.IsEmpty() || curLine.IsNumber()) continue;
|
++userDic; // skip entry count line
|
||||||
|
for (; userDic != end; ++userDic) {
|
||||||
|
if ((*userDic).empty()) continue;
|
||||||
|
try {
|
||||||
#ifdef WITH_OLD_HUNSPELL
|
#ifdef WITH_OLD_HUNSPELL
|
||||||
hunspell->put_word(conv->Convert(STD_STR(curLine)).c_str());
|
hunspell->put_word(conv->Convert(*userDic).c_str());
|
||||||
#else
|
#else
|
||||||
hunspell->add(conv->Convert(STD_STR(curLine)).c_str());
|
hunspell->add(conv->Convert(*userDic).c_str());
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
catch (agi::charset::ConvError const&) {
|
||||||
|
// Normally this shouldn't happen, but some versions of Aegisub
|
||||||
|
// wrote words in the wrong charset
|
||||||
}
|
}
|
||||||
catch (const wchar_t *) {
|
|
||||||
// file not found
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
catch (agi::Exception const&) {
|
||||||
|
// File ceased to exist between when we checked and when we tried to
|
||||||
|
// open it or we don't have permission to read it for whatever reason
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif // WITH_HUNSPELL
|
#endif // WITH_HUNSPELL
|
||||||
|
|
|
@ -34,10 +34,6 @@
|
||||||
/// @ingroup spelling
|
/// @ingroup spelling
|
||||||
///
|
///
|
||||||
|
|
||||||
|
|
||||||
///////////
|
|
||||||
// Headers
|
|
||||||
|
|
||||||
#ifdef WITH_HUNSPELL
|
#ifdef WITH_HUNSPELL
|
||||||
|
|
||||||
#include <hunspell/hunspell.hxx>
|
#include <hunspell/hunspell.hxx>
|
||||||
|
@ -53,37 +49,43 @@ namespace agi {
|
||||||
/// @brief Hunspell spell checker
|
/// @brief Hunspell spell checker
|
||||||
///
|
///
|
||||||
class HunspellSpellChecker : public SpellChecker {
|
class HunspellSpellChecker : public SpellChecker {
|
||||||
private:
|
|
||||||
|
|
||||||
/// Hunspell instance
|
/// Hunspell instance
|
||||||
Hunspell *hunspell;
|
std::auto_ptr<Hunspell> hunspell;
|
||||||
|
|
||||||
/// Conversion buffer
|
/// Conversion buffer
|
||||||
agi::charset::IconvWrapper *conv;
|
std::auto_ptr<agi::charset::IconvWrapper> conv;
|
||||||
agi::charset::IconvWrapper *rconv;
|
std::auto_ptr<agi::charset::IconvWrapper> rconv;
|
||||||
|
|
||||||
/// Path to .aff file
|
|
||||||
wxString affpath;
|
|
||||||
|
|
||||||
/// Path to .dic file
|
|
||||||
wxString dicpath;
|
|
||||||
|
|
||||||
/// Path to user-local dictionary.
|
/// Path to user-local dictionary.
|
||||||
wxString usrdicpath;
|
wxString userDicPath;
|
||||||
|
|
||||||
void Reset();
|
|
||||||
|
|
||||||
public:
|
public:
|
||||||
HunspellSpellChecker();
|
HunspellSpellChecker();
|
||||||
~HunspellSpellChecker();
|
~HunspellSpellChecker();
|
||||||
|
|
||||||
|
/// @brief Add word to dictionary
|
||||||
|
/// @param word Word to add.
|
||||||
void AddWord(wxString word);
|
void AddWord(wxString word);
|
||||||
|
|
||||||
|
/// @brief Can add to dictionary?
|
||||||
|
/// @param word Word to check.
|
||||||
|
/// @return Whether word can be added or not.
|
||||||
bool CanAddWord(wxString word);
|
bool CanAddWord(wxString word);
|
||||||
|
|
||||||
|
/// @brief Check if the word is valid.
|
||||||
|
/// @param word Word to check
|
||||||
|
/// @return Whether word is valid or not.
|
||||||
bool CheckWord(wxString word);
|
bool CheckWord(wxString word);
|
||||||
|
|
||||||
|
/// @brief Get suggestions for word.
|
||||||
|
/// @param word Word to get suggestions for
|
||||||
|
/// @return List of suggestions
|
||||||
wxArrayString GetSuggestions(wxString word);
|
wxArrayString GetSuggestions(wxString word);
|
||||||
|
|
||||||
|
/// @brief Get a list of languages which dictionaries are present for
|
||||||
wxArrayString GetLanguageList();
|
wxArrayString GetLanguageList();
|
||||||
|
/// @brief Set the spellchecker's language
|
||||||
|
/// @param language Language code
|
||||||
void SetLanguage(wxString language);
|
void SetLanguage(wxString language);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue