forked from mia/Aegisub
Replace MyThes thesaurus implementation with a custom one
Fixes a pile of unicode-related issues, such as dictionaries in a path which does not fit into the system's local charset, and significantly cuts down on the amount of code. Originally committed to SVN as r6250.
This commit is contained in:
parent
518f93f18f
commit
3c62a38c7a
18 changed files with 462 additions and 845 deletions
|
@ -855,14 +855,6 @@
|
|||
RelativePath="..\..\src\md5.h"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\src\mythes.cxx"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\src\mythes.hxx"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\src\pen.cpp"
|
||||
>
|
||||
|
@ -1423,14 +1415,6 @@
|
|||
RelativePath="..\..\src\thesaurus.h"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\src\thesaurus_myspell.cpp"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\src\thesaurus_myspell.h"
|
||||
>
|
||||
</File>
|
||||
</Filter>
|
||||
<Filter
|
||||
Name="Subtitle Formats"
|
||||
|
|
|
@ -311,6 +311,10 @@
|
|||
RelativePath="..\..\libaegisub\common\path.cpp"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\libaegisub\common\thesaurus.cpp"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\libaegisub\common\util.cpp"
|
||||
>
|
||||
|
@ -477,6 +481,10 @@
|
|||
RelativePath="..\..\libaegisub\include\libaegisub\signal.h"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\libaegisub\include\libaegisub\thesaurus.h"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\libaegisub\include\libaegisub\types.h"
|
||||
>
|
||||
|
|
|
@ -334,6 +334,10 @@
|
|||
RelativePath="..\..\tests\libaegisub_signals.cpp"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\tests\libaegisub_thesaurus.cpp"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\tests\libaegisub_util.cpp"
|
||||
>
|
||||
|
|
|
@ -35,6 +35,7 @@ SRC = \
|
|||
common/keyframe.cpp \
|
||||
common/util.cpp \
|
||||
common/log.cpp \
|
||||
common/thesaurus.cpp \
|
||||
common/validator.cpp \
|
||||
common/vfr.cpp \
|
||||
unix/util.cpp \
|
||||
|
|
97
aegisub/libaegisub/common/thesaurus.cpp
Normal file
97
aegisub/libaegisub/common/thesaurus.cpp
Normal file
|
@ -0,0 +1,97 @@
|
|||
// Copyright (c) 2012, Thomas Goyne <plorkyeran@aegisub.org>
|
||||
//
|
||||
// Permission to use, copy, modify, and distribute this software for any
|
||||
// purpose with or without fee is hereby granted, provided that the above
|
||||
// copyright notice and this permission notice appear in all copies.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
||||
// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
||||
// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
||||
// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
||||
// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
||||
// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
//
|
||||
// $Id$
|
||||
|
||||
/// @file thesaurus.cpp
|
||||
/// @brief MyThes-compatible thesaurus implementation
|
||||
/// @ingroup libaegisub thesaurus
|
||||
|
||||
#include "libaegisub/thesaurus.h"
|
||||
|
||||
#include "libaegisub/charset_conv.h"
|
||||
#include "libaegisub/io.h"
|
||||
#include "libaegisub/line_iterator.h"
|
||||
|
||||
template<class String, class Char, class Container>
|
||||
static void split(String const& str, Char sep, Container *out) {
|
||||
typename String::size_type pos, prev = 0;
|
||||
out->reserve(2);
|
||||
while ((pos = str.find(sep, prev)) != String::npos) {
|
||||
if (pos > prev)
|
||||
out->push_back(str.substr(prev, pos - prev));
|
||||
prev = pos + 1;
|
||||
}
|
||||
if (prev < str.size())
|
||||
out->push_back(str.substr(prev));
|
||||
}
|
||||
|
||||
namespace agi {
|
||||
|
||||
Thesaurus::Thesaurus(std::string const& dat_path, std::string const& idx_path)
|
||||
: dat(io::Open(dat_path))
|
||||
{
|
||||
scoped_ptr<std::ifstream> idx(io::Open(idx_path));
|
||||
|
||||
std::string encoding_name;
|
||||
getline(*idx, encoding_name);
|
||||
std::string unused_entry_count;
|
||||
getline(*idx, unused_entry_count);
|
||||
|
||||
// Read the list of words and file offsets for those words
|
||||
for (line_iterator<std::string> iter(*idx, encoding_name), end; iter != end; ++iter) {
|
||||
std::vector<std::string> chunks;
|
||||
split(*iter, '|', &chunks);
|
||||
if (chunks.size() == 2) {
|
||||
offsets[chunks[0]] = atoi(chunks[1].c_str());
|
||||
}
|
||||
}
|
||||
|
||||
conv.reset(new charset::IconvWrapper(encoding_name.c_str(), "utf-8"));
|
||||
}
|
||||
|
||||
Thesaurus::~Thesaurus() { }
|
||||
|
||||
void Thesaurus::Lookup(std::string const& word, std::vector<Entry> *out) {
|
||||
out->clear();
|
||||
|
||||
std::map<std::string, int>::const_iterator it = offsets.find(word);
|
||||
if (!dat.get() || it == offsets.end()) return;
|
||||
|
||||
dat->seekg(it->second, std::ios::beg);
|
||||
if (!dat->good()) return;
|
||||
|
||||
// First line is the word and meaning count
|
||||
std::string temp;
|
||||
getline(*dat, temp);
|
||||
std::vector<std::string> header;
|
||||
split(conv->Convert(temp), '|', &header);
|
||||
if (header.size() != 2) return;
|
||||
int meanings = atoi(header[1].c_str());
|
||||
|
||||
out->resize(meanings);
|
||||
for (int i = 0; i < meanings; ++i) {
|
||||
std::vector<std::string> line;
|
||||
getline(*dat, temp);
|
||||
split(conv->Convert(temp), '|', &line);
|
||||
|
||||
// The "definition" is just the part of speech plus the word it's
|
||||
// giving synonyms for (which may not be the passed word)
|
||||
(*out)[i].first = line[0] + ' ' + line[1];
|
||||
(*out)[i].second.reserve(line.size() - 2);
|
||||
copy(line.begin() + 2, line.end(), back_inserter((*out)[i].second));
|
||||
}
|
||||
}
|
||||
|
||||
}
|
58
aegisub/libaegisub/include/libaegisub/thesaurus.h
Normal file
58
aegisub/libaegisub/include/libaegisub/thesaurus.h
Normal file
|
@ -0,0 +1,58 @@
|
|||
// Copyright (c) 2012, Thomas Goyne <plorkyeran@aegisub.org>
|
||||
//
|
||||
// Permission to use, copy, modify, and distribute this software for any
|
||||
// purpose with or without fee is hereby granted, provided that the above
|
||||
// copyright notice and this permission notice appear in all copies.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
||||
// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
||||
// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
||||
// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
||||
// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
||||
// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
//
|
||||
// $Id$
|
||||
|
||||
/// @file thesaurus.h
|
||||
/// @brief MyThes-compatible thesaurus implementation
|
||||
/// @ingroup libaegisub thesaurus
|
||||
|
||||
#include <libaegisub/scoped_ptr.h>
|
||||
|
||||
#ifndef LAGI_PRE
|
||||
#include <iosfwd>
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#endif
|
||||
|
||||
namespace agi {
|
||||
|
||||
namespace charset { class IconvWrapper; }
|
||||
|
||||
class Thesaurus {
|
||||
/// Map of word -> byte position in the data file
|
||||
std::map<std::string, int> offsets;
|
||||
/// Read handle to the data file
|
||||
scoped_ptr<std::ifstream> dat;
|
||||
/// Converter from the data file's charset to UTF-8
|
||||
scoped_ptr<charset::IconvWrapper> conv;
|
||||
|
||||
public:
|
||||
/// A pair of a word and synonyms for that word
|
||||
typedef std::pair<std::string, std::vector<std::string> > Entry;
|
||||
|
||||
/// Constructor
|
||||
/// @param dat_path Path to data file
|
||||
/// @param idx_path Path to index file
|
||||
Thesaurus(std::string const& dat_path, std::string const& idx_path);
|
||||
~Thesaurus();
|
||||
|
||||
/// Look up synonyms for a word
|
||||
/// @param word Word to look up
|
||||
/// @param[out] out Vector to fill with word/synonym lists
|
||||
void Lookup(std::string const& word, std::vector<Entry> *out);
|
||||
};
|
||||
|
||||
}
|
|
@ -192,7 +192,6 @@ SRC += \
|
|||
menu.cpp \
|
||||
md5.c \
|
||||
mkv_wrap.cpp \
|
||||
mythes.cxx \
|
||||
pen.cpp \
|
||||
persist_location.cpp \
|
||||
plugin_manager.cpp \
|
||||
|
@ -221,7 +220,6 @@ SRC += \
|
|||
text_file_reader.cpp \
|
||||
text_file_writer.cpp \
|
||||
thesaurus.cpp \
|
||||
thesaurus_myspell.cpp \
|
||||
timeedit_ctrl.cpp \
|
||||
threaded_frame_source.cpp \
|
||||
toggle_bitmap.cpp \
|
||||
|
|
|
@ -97,7 +97,6 @@ AboutScreen::AboutScreen(wxWindow *parent)
|
|||
#ifdef WITH_FREETYPE2
|
||||
libString += " Freetype - Copyright (c) David Turner, Robert Wilhelm, Werner Lemberg;\n";
|
||||
#endif
|
||||
libString += " MyThes - Copyright (c) Kevin B. Hendricks, Stratford, Ontario, Canada.\n";
|
||||
#ifdef WITH_FFTW3
|
||||
libString += " FFTW - Copyright (c) Matteo Frigo, Massachusetts Institute of Technology;\n";
|
||||
#endif
|
||||
|
|
|
@ -1,398 +0,0 @@
|
|||
/*
|
||||
* Copyright 2003 Kevin B. Hendricks, Stratford, Ontario, Canada
|
||||
* And Contributors. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. All modifications to the source code must be clearly marked as
|
||||
* such. Binary redistributions based on modified source code
|
||||
* must be clearly marked as modified versions in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS
|
||||
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
|
||||
* KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include <errno.h>
|
||||
#include "mythes.hxx"
|
||||
|
||||
// some basic utility routines
|
||||
|
||||
// string duplication routine
|
||||
char * mythes_mystrdup(const char * p)
|
||||
{
|
||||
|
||||
int sl = strlen(p) + 1;
|
||||
char * d = (char *)malloc(sl);
|
||||
if (d) {
|
||||
memcpy(d,p,sl);
|
||||
return d;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// remove cross-platform text line end characters
|
||||
void mythes_mychomp(char * s)
|
||||
{
|
||||
int k = strlen(s);
|
||||
if ((k > 0) && ((*(s+k-1)=='\r') || (*(s+k-1)=='\n'))) *(s+k-1) = '\0';
|
||||
if ((k > 1) && (*(s+k-2) == '\r')) *(s+k-2) = '\0';
|
||||
}
|
||||
|
||||
|
||||
// return index of char in string
|
||||
int mystr_indexOfChar(const char * d, int c)
|
||||
{
|
||||
const char * p = strchr(d,c);
|
||||
if (p) return (int)(p-d);
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
||||
MyThes::MyThes(const char* idxpath, const char * datpath)
|
||||
{
|
||||
nw = 0;
|
||||
encoding = NULL;
|
||||
list = NULL;
|
||||
offst = NULL;
|
||||
|
||||
if (thInitialize(idxpath, datpath) != 1) {
|
||||
fprintf(stderr,"Error - can't open %s or %s\n",idxpath, datpath);
|
||||
fflush(stderr);
|
||||
if (encoding) free((void*)encoding);
|
||||
if (list) free((void*)list);
|
||||
if (offst) free((void*)offst);
|
||||
// did not initialize properly - throw exception?
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
MyThes::~MyThes()
|
||||
{
|
||||
if (thCleanup() != 1) {
|
||||
/* did not cleanup properly - throw exception? */
|
||||
}
|
||||
if (encoding) free((void*)encoding);
|
||||
encoding = NULL;
|
||||
list = NULL;
|
||||
offst = NULL;
|
||||
}
|
||||
|
||||
|
||||
int MyThes::thInitialize(const char* idxpath, const char* datpath)
|
||||
{
|
||||
|
||||
// open the index file
|
||||
FILE * pifile = fopen(idxpath,"r");
|
||||
if (!pifile) {
|
||||
pifile = NULL;
|
||||
return 0;
|
||||
}
|
||||
|
||||
// parse in encoding and index size */
|
||||
char * wrd;
|
||||
wrd = (char *)calloc(1, MAX_WD_LEN);
|
||||
int len = readLine(pifile,wrd,MAX_WD_LEN);
|
||||
encoding = mythes_mystrdup(wrd);
|
||||
len = readLine(pifile,wrd,MAX_WD_LEN);
|
||||
int idxsz = atoi(wrd);
|
||||
|
||||
|
||||
// now allocate list, offst for the given size
|
||||
list = (char**) calloc(idxsz,sizeof(char*));
|
||||
offst = (unsigned int*) calloc(idxsz,sizeof(unsigned int));
|
||||
|
||||
if ( (!(list)) || (!(offst)) ) {
|
||||
fprintf(stderr,"Error - bad memory allocation\n");
|
||||
fflush(stderr);
|
||||
return 0;
|
||||
}
|
||||
|
||||
// now parse the remaining lines of the index
|
||||
len = readLine(pifile,wrd,MAX_WD_LEN);
|
||||
while (len > 0)
|
||||
{
|
||||
int np = mystr_indexOfChar(wrd,'|');
|
||||
if (nw < idxsz) {
|
||||
if (np >= 0) {
|
||||
*(wrd+np) = '\0';
|
||||
list[nw] = (char *)calloc(1,(np+1));
|
||||
memcpy((list[nw]),wrd,np);
|
||||
offst[nw] = atoi(wrd+np+1);
|
||||
nw++;
|
||||
}
|
||||
}
|
||||
len = readLine(pifile,wrd,MAX_WD_LEN);
|
||||
}
|
||||
|
||||
free((void *)wrd);
|
||||
fclose(pifile);
|
||||
pifile=NULL;
|
||||
|
||||
/* next open the data file */
|
||||
pdfile = fopen(datpath,"r");
|
||||
if (!pdfile) {
|
||||
pdfile = NULL;
|
||||
return 0;
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
int MyThes::thCleanup()
|
||||
{
|
||||
/* first close the data file */
|
||||
if (pdfile) {
|
||||
fclose(pdfile);
|
||||
pdfile=NULL;
|
||||
}
|
||||
|
||||
/* now free up all the allocated strings on the list */
|
||||
for (int i=0; i < nw; i++)
|
||||
{
|
||||
if (list[i]) {
|
||||
free(list[i]);
|
||||
list[i] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
if (list) free((void*)list);
|
||||
if (offst) free((void*)offst);
|
||||
|
||||
nw = 0;
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
|
||||
// lookup text in index and count of meanings and a list of meaning entries
|
||||
// with each entry having a synonym count and pointer to an
|
||||
// array of char * (i.e the synonyms)
|
||||
//
|
||||
// note: calling routine should call CleanUpAfterLookup with the original
|
||||
// meaning point and count to properly deallocate memory
|
||||
|
||||
int MyThes::Lookup(const char * pText, int len, mentry** pme)
|
||||
{
|
||||
|
||||
*pme = NULL;
|
||||
|
||||
// handle the case of missing file or file related errors
|
||||
if (! pdfile) return 0;
|
||||
|
||||
long offset = 0;
|
||||
|
||||
/* copy search word and make sure null terminated */
|
||||
char * wrd = (char *) calloc(1,(len+1));
|
||||
memcpy(wrd,pText,len);
|
||||
|
||||
/* find it in the list */
|
||||
int idx = binsearch(wrd,list,nw);
|
||||
free(wrd);
|
||||
if (idx < 0) return 0;
|
||||
|
||||
// now seek to the offset
|
||||
offset = (long) offst[idx];
|
||||
int rc = fseek(pdfile,offset,SEEK_SET);
|
||||
if (rc) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
// grab the count of the number of meanings
|
||||
// and allocate a list of meaning entries
|
||||
char * buf = NULL;
|
||||
buf = (char *) malloc( MAX_LN_LEN );
|
||||
if (!buf) return 0;
|
||||
readLine(pdfile, buf, (MAX_LN_LEN-1));
|
||||
int np = mystr_indexOfChar(buf,'|');
|
||||
if (np < 0) {
|
||||
free(buf);
|
||||
return 0;
|
||||
}
|
||||
int nmeanings = atoi(buf+np+1);
|
||||
*pme = (mentry*) malloc( nmeanings * sizeof(mentry) );
|
||||
if (!(*pme)) {
|
||||
free(buf);
|
||||
return 0;
|
||||
}
|
||||
|
||||
// now read in each meaning and parse it to get defn, count and synonym lists
|
||||
mentry* pm = *(pme);
|
||||
char dfn[MAX_WD_LEN];
|
||||
|
||||
for (int j = 0; j < nmeanings; j++) {
|
||||
readLine(pdfile, buf, (MAX_LN_LEN-1));
|
||||
|
||||
pm->count = 0;
|
||||
pm->psyns = NULL;
|
||||
pm->defn = NULL;
|
||||
|
||||
// store away the part of speech for later use
|
||||
char * p = buf;
|
||||
char * pos = NULL;
|
||||
np = mystr_indexOfChar(p,'|');
|
||||
if (np >= 0) {
|
||||
*(buf+np) = '\0';
|
||||
pos = mythes_mystrdup(p);
|
||||
p = p + np + 1;
|
||||
} else {
|
||||
pos = mythes_mystrdup("");
|
||||
}
|
||||
|
||||
// count the number of fields in the remaining line
|
||||
int nf = 1;
|
||||
char * d = p;
|
||||
np = mystr_indexOfChar(d,'|');
|
||||
while ( np >= 0 ) {
|
||||
nf++;
|
||||
d = d + np + 1;
|
||||
np = mystr_indexOfChar(d,'|');
|
||||
}
|
||||
pm->count = nf;
|
||||
pm->psyns = (char **) malloc(nf*sizeof(char*));
|
||||
|
||||
// fill in the synonym list
|
||||
d = p;
|
||||
for (int j = 0; j < nf; j++) {
|
||||
np = mystr_indexOfChar(d,'|');
|
||||
if (np > 0) {
|
||||
*(d+np) = '\0';
|
||||
pm->psyns[j] = mythes_mystrdup(d);
|
||||
d = d + np + 1;
|
||||
} else {
|
||||
pm->psyns[j] = mythes_mystrdup(d);
|
||||
}
|
||||
}
|
||||
|
||||
// add pos to first synonym to create the definition
|
||||
int k = strlen(pos);
|
||||
int m = strlen(pm->psyns[0]);
|
||||
if ((k+m) < (MAX_WD_LEN - 1)) {
|
||||
strncpy(dfn,pos,k);
|
||||
*(dfn+k) = ' ';
|
||||
strncpy((dfn+k+1),(pm->psyns[0]),m+1);
|
||||
pm->defn = mythes_mystrdup(dfn);
|
||||
} else {
|
||||
pm->defn = mythes_mystrdup(pm->psyns[0]);
|
||||
}
|
||||
free(pos);
|
||||
pm++;
|
||||
|
||||
}
|
||||
free(buf);
|
||||
|
||||
return nmeanings;
|
||||
}
|
||||
|
||||
|
||||
|
||||
void MyThes::CleanUpAfterLookup(mentry ** pme, int nmeanings)
|
||||
{
|
||||
|
||||
if (nmeanings == 0) return;
|
||||
if ((*pme) == NULL) return;
|
||||
|
||||
mentry * pm = *pme;
|
||||
|
||||
for (int i = 0; i < nmeanings; i++) {
|
||||
int count = pm->count;
|
||||
for (int j = 0; j < count; j++) {
|
||||
if (pm->psyns[j]) free(pm->psyns[j]);
|
||||
pm->psyns[j] = NULL;
|
||||
}
|
||||
if (pm->psyns) free(pm->psyns);
|
||||
pm->psyns = NULL;
|
||||
if (pm->defn) free(pm->defn);
|
||||
pm->defn = NULL;
|
||||
pm->count = 0;
|
||||
pm++;
|
||||
}
|
||||
pm = *pme;
|
||||
free(pm);
|
||||
*pme = NULL;
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
// read a line of text from a text file stripping
|
||||
// off the line terminator and replacing it with
|
||||
// a null string terminator.
|
||||
// returns: -1 on error or the number of characters in
|
||||
// in the returning string
|
||||
|
||||
// A maximum of nc characters will be returned
|
||||
|
||||
int MyThes::readLine(FILE * pf, char * buf, int nc)
|
||||
{
|
||||
|
||||
if (fgets(buf,nc,pf)) {
|
||||
mythes_mychomp(buf);
|
||||
return strlen(buf);
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
||||
|
||||
// performs a binary search on null terminated character
|
||||
// strings
|
||||
//
|
||||
// returns: -1 on not found
|
||||
// index of wrd in the list[]
|
||||
|
||||
int MyThes::binsearch(char * sw, char* list[], int nlst)
|
||||
{
|
||||
int lp, up, mp, j, indx;
|
||||
lp = 0;
|
||||
up = nlst-1;
|
||||
indx = -1;
|
||||
if (nlst == 0) return -1;
|
||||
if (strcmp(sw,list[lp]) < 0) return -1;
|
||||
if (strcmp(sw,list[up]) > 0) return -1;
|
||||
while (indx < 0 ) {
|
||||
mp = (int)((lp+up) >> 1);
|
||||
j = strcmp(sw,list[mp]);
|
||||
if ( j > 0) {
|
||||
lp = mp + 1;
|
||||
} else if (j < 0 ) {
|
||||
up = mp - 1;
|
||||
} else {
|
||||
indx = mp;
|
||||
}
|
||||
if (lp > up) return -1;
|
||||
}
|
||||
return indx;
|
||||
}
|
||||
|
||||
char * MyThes::get_th_encoding()
|
||||
{
|
||||
if (encoding) return encoding;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
|
@ -1,103 +0,0 @@
|
|||
/*
|
||||
* Copyright 2003 Kevin B. Hendricks, Stratford, Ontario, Canada
|
||||
* And Contributors. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. All modifications to the source code must be clearly marked as
|
||||
* such. Binary redistributions based on modified source code
|
||||
* must be clearly marked as modified versions in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS
|
||||
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
|
||||
* KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
#ifndef _MYTHES_HXX_
|
||||
#define _MYTHES_HXX_
|
||||
|
||||
// some maximum sizes for buffers
|
||||
#define MAX_WD_LEN 200
|
||||
#define MAX_LN_LEN 16384
|
||||
|
||||
|
||||
// a meaning with definition, count of synonyms and synonym list
|
||||
struct mentry {
|
||||
char* defn;
|
||||
int count;
|
||||
char** psyns;
|
||||
};
|
||||
|
||||
|
||||
class MyThes
|
||||
{
|
||||
|
||||
int nw; /* number of entries in thesaurus */
|
||||
char** list; /* stores word list */
|
||||
unsigned int* offst; /* stores offset list */
|
||||
char * encoding; /* stores text encoding; */
|
||||
|
||||
FILE *pdfile;
|
||||
|
||||
// disallow copy-constructor and assignment-operator for now
|
||||
MyThes();
|
||||
MyThes(const MyThes &);
|
||||
MyThes & operator = (const MyThes &);
|
||||
|
||||
public:
|
||||
MyThes(const char* idxpath, const char* datpath);
|
||||
~MyThes();
|
||||
|
||||
// lookup text in index and return number of meanings
|
||||
// each meaning entry has a defintion, synonym count and pointer
|
||||
// when complete return the *original* meaning entry and count via
|
||||
// CleanUpAfterLookup to properly handle memory deallocation
|
||||
|
||||
int Lookup(const char * pText, int len, mentry** pme);
|
||||
|
||||
void CleanUpAfterLookup(mentry** pme, int nmean);
|
||||
|
||||
char* get_th_encoding();
|
||||
|
||||
private:
|
||||
// Open index and dat files and load list array
|
||||
int thInitialize (const char* indxpath, const char* datpath);
|
||||
|
||||
// internal close and cleanup dat and idx files
|
||||
int thCleanup ();
|
||||
|
||||
// read a text line (\n terminated) stripping off line terminator
|
||||
int readLine(FILE * pf, char * buf, int nc);
|
||||
|
||||
// binary search on null terminated character strings
|
||||
int binsearch(char * wrd, char* list[], int nlst);
|
||||
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
@ -79,7 +79,7 @@ enum {
|
|||
SubsTextEditCtrl::SubsTextEditCtrl(wxWindow* parent, wxSize wsize, long style, SubtitlesGrid *grid)
|
||||
: ScintillaTextCtrl(parent, -1, "", wxDefaultPosition, wsize, style)
|
||||
, spellchecker(SpellCheckerFactory::GetSpellChecker())
|
||||
, thesaurus(Thesaurus::GetThesaurus())
|
||||
, thesaurus(new Thesaurus)
|
||||
, grid(grid)
|
||||
{
|
||||
// Set properties
|
||||
|
@ -795,14 +795,15 @@ void SubsTextEditCtrl::OnContextMenu(wxContextMenuEvent &event) {
|
|||
// Thesaurus
|
||||
if (thesaurus.get() && currentWord.Length()) {
|
||||
// Get results
|
||||
ThesaurusEntryArray result;
|
||||
thesaurus->Lookup(currentWord,result);
|
||||
std::vector<Thesaurus::Entry> result;
|
||||
thesaurus->Lookup(currentWord,&result);
|
||||
|
||||
// Compile list
|
||||
thesSugs.Clear();
|
||||
for (unsigned int i=0;i<result.size();i++) {
|
||||
for (unsigned int j=0;j<result[i].words.Count();j++) {
|
||||
thesSugs.Add(result[i].words[j]);
|
||||
thesSugs.clear();
|
||||
thesSugs.reserve(result.size() * 5);
|
||||
for (size_t i = 0; i < result.size(); ++i) {
|
||||
for (size_t j = 0; j < result[i].second.size(); ++j) {
|
||||
thesSugs.push_back(result[i].second[j]);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -815,10 +816,10 @@ void SubsTextEditCtrl::OnContextMenu(wxContextMenuEvent &event) {
|
|||
|
||||
// Build menu
|
||||
int curThesEntry = 0;
|
||||
for (unsigned int i=0;i<result.size();i++) {
|
||||
for (size_t i=0;i<result.size();i++) {
|
||||
// Single word, insert directly
|
||||
if (result[i].words.Count() == 1) {
|
||||
thesMenu->Append(EDIT_MENU_THESAURUS_SUGS+curThesEntry,result[i].name);
|
||||
if (result[i].second.size() == 1) {
|
||||
thesMenu->Append(EDIT_MENU_THESAURUS_SUGS+curThesEntry,lagi_wxString(result[i].first));
|
||||
curThesEntry++;
|
||||
}
|
||||
|
||||
|
@ -826,13 +827,13 @@ void SubsTextEditCtrl::OnContextMenu(wxContextMenuEvent &event) {
|
|||
else {
|
||||
// Insert entries
|
||||
wxMenu *subMenu = new wxMenu();
|
||||
for (unsigned int j=0;j<result[i].words.Count();j++) {
|
||||
subMenu->Append(EDIT_MENU_THESAURUS_SUGS+curThesEntry,result[i].words[j]);
|
||||
for (size_t j=0;j<result[i].second.size();j++) {
|
||||
subMenu->Append(EDIT_MENU_THESAURUS_SUGS+curThesEntry,lagi_wxString(result[i].second[j]));
|
||||
curThesEntry++;
|
||||
}
|
||||
|
||||
// Insert submenu
|
||||
thesMenu->Append(-1, result[i].name, subMenu);
|
||||
thesMenu->Append(-1, lagi_wxString(result[i].first), subMenu);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -911,7 +912,7 @@ void SubsTextEditCtrl::OnUseSuggestion(wxCommandEvent &event) {
|
|||
wxString suggestion;
|
||||
int sugIdx = event.GetId() - EDIT_MENU_THESAURUS_SUGS;
|
||||
if (sugIdx >= 0) {
|
||||
suggestion = thesSugs[sugIdx];
|
||||
suggestion = lagi_wxString(thesSugs[sugIdx]);
|
||||
}
|
||||
else {
|
||||
suggestion = sugs[event.GetId() - EDIT_MENU_SUGGESTIONS];
|
||||
|
@ -953,10 +954,9 @@ void SubsTextEditCtrl::OnSetThesLanguage(wxCommandEvent &event) {
|
|||
|
||||
// Set language
|
||||
int index = event.GetId() - EDIT_MENU_THES_LANGS - 1;
|
||||
if (index >= 0) {
|
||||
thesaurus->SetLanguage(langs[index]);
|
||||
OPT_SET("Tool/Thesaurus/Language")->SetString(STD_STR(langs[index]));
|
||||
}
|
||||
wxString lang;
|
||||
if (index >= 0) lang = langs[index];
|
||||
OPT_SET("Tool/Thesaurus/Language")->SetString(STD_STR(lang));
|
||||
|
||||
UpdateStyle();
|
||||
}
|
||||
|
|
|
@ -67,7 +67,7 @@ class SubsTextEditCtrl : public ScintillaTextCtrl {
|
|||
wxArrayString sugs;
|
||||
|
||||
/// DOCME
|
||||
wxArrayString thesSugs;
|
||||
std::vector<std::string> thesSugs;
|
||||
|
||||
/// DOCME
|
||||
int currentWordPos;
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
// Copyright (c) 2006, Rodrigo Braz Monteiro
|
||||
// Copyright (c) 2011, Thomas Goyne <plorkyeran@aegisub.org>
|
||||
// All rights reserved.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
|
@ -30,29 +30,111 @@
|
|||
// $Id$
|
||||
|
||||
/// @file thesaurus.cpp
|
||||
/// @brief Base-class for thesaurus implementations
|
||||
/// @brief Thesaurus implementation
|
||||
/// @ingroup thesaurus
|
||||
///
|
||||
|
||||
|
||||
///////////
|
||||
// Headers
|
||||
#include "config.h"
|
||||
|
||||
#include "thesaurus_myspell.h"
|
||||
#include "thesaurus.h"
|
||||
|
||||
#ifndef AGI_PRE
|
||||
#include <wx/dir.h>
|
||||
#include <wx/filename.h>
|
||||
#endif
|
||||
|
||||
/// @brief Get spell checker
|
||||
///
|
||||
Thesaurus *Thesaurus::GetThesaurus() {
|
||||
// Initialize
|
||||
Thesaurus *thes = NULL;
|
||||
#include <libaegisub/log.h>
|
||||
#include <libaegisub/thesaurus.h>
|
||||
|
||||
// Get myspell
|
||||
thes = new MySpellThesaurus();
|
||||
#include "compat.h"
|
||||
#include "main.h"
|
||||
#include "standard_paths.h"
|
||||
|
||||
// Return
|
||||
return thes;
|
||||
Thesaurus::Thesaurus()
|
||||
: lang_listener(OPT_SUB("Tool/Thesaurus/Language", &Thesaurus::OnLanguageChanged, this))
|
||||
, dict_path_listener(OPT_SUB("Path/Dictionary", &Thesaurus::OnPathChanged, this))
|
||||
{
|
||||
OnLanguageChanged();
|
||||
}
|
||||
|
||||
Thesaurus::~Thesaurus() {
|
||||
// Explicit empty destructor needed for scoped_ptr with incomplete types
|
||||
}
|
||||
|
||||
void Thesaurus::Lookup(wxString const& word, std::vector<Entry> *result) {
|
||||
if (!impl.get()) return;
|
||||
impl->Lookup(STD_STR(word.Lower()), result);
|
||||
}
|
||||
|
||||
wxArrayString Thesaurus::GetLanguageList() const {
|
||||
if (!languages.empty()) return languages;
|
||||
|
||||
wxArrayString idx, dat;
|
||||
|
||||
// Get list of dictionaries
|
||||
wxString path = StandardPaths::DecodePath("?data/dictionaries/");
|
||||
if (wxFileName::DirExists(path)) {
|
||||
wxDir::GetAllFiles(path, &idx, "th_*.idx", wxDIR_FILES);
|
||||
wxDir::GetAllFiles(path, &dat, "th_*.dat", wxDIR_FILES);
|
||||
}
|
||||
path = StandardPaths::DecodePath(lagi_wxString(OPT_GET("Path/Dictionary")->GetString()) + "/");
|
||||
if (wxFileName::DirExists(path)) {
|
||||
wxDir::GetAllFiles(path, &idx, "th_*.idx", wxDIR_FILES);
|
||||
wxDir::GetAllFiles(path, &dat, "th_*.dat", wxDIR_FILES);
|
||||
}
|
||||
if (idx.empty() || dat.empty()) return languages;
|
||||
|
||||
idx.Sort();
|
||||
dat.Sort();
|
||||
|
||||
// Drop extensions and the th_ prefix
|
||||
for (size_t i = 0; i < idx.size(); ++i) idx[i] = idx[i].Mid(3, idx[i].size() - 7);
|
||||
for (size_t i = 0; i < dat.size(); ++i) dat[i] = dat[i].Mid(3, dat[i].size() - 7);
|
||||
|
||||
// Verify that each idx has a dat
|
||||
for (size_t i = 0, j = 0; i < idx.size() && j < dat.size(); ) {
|
||||
int cmp = idx[i].Cmp(dat[j]);
|
||||
if (cmp < 0) ++i;
|
||||
else if (cmp > 0) ++j;
|
||||
else {
|
||||
// Don't insert a language twice if it's in both the user dir and
|
||||
// the app's dir
|
||||
wxString name = wxFileName(dat[j]).GetName().Mid(3);
|
||||
if (languages.empty() || name != languages.back())
|
||||
languages.push_back(name);
|
||||
++i;
|
||||
++j;
|
||||
}
|
||||
}
|
||||
return languages;
|
||||
}
|
||||
|
||||
void Thesaurus::OnLanguageChanged() {
|
||||
impl.reset();
|
||||
|
||||
std::string language = OPT_GET("Tool/Thesaurus/Language")->GetString();
|
||||
if (language.empty()) return;
|
||||
|
||||
wxString path = StandardPaths::DecodePath(lagi_wxString(OPT_GET("Path/Dictionary")->GetString()) + "/");
|
||||
|
||||
// Get index and data paths
|
||||
wxString idxpath = wxString::Format("%s/th_%s.idx", path, language);
|
||||
wxString datpath = wxString::Format("%s/th_%s.dat", path, language);
|
||||
|
||||
// If they aren't in the user dictionary path, check the application directory
|
||||
if (!wxFileExists(idxpath) || !wxFileExists(datpath)) {
|
||||
path = StandardPaths::DecodePath("?data/dictionaries/");
|
||||
idxpath = wxString::Format("%s/th_%s.idx", path, language);
|
||||
datpath = wxString::Format("%s/th_%s.dat", path, language);
|
||||
|
||||
if (!wxFileExists(idxpath) || !wxFileExists(datpath)) return;
|
||||
}
|
||||
|
||||
LOG_I("thesaurus/file") << "Using thesaurus: " << datpath.c_str();
|
||||
|
||||
impl.reset(new agi::Thesaurus(STD_STR(datpath), STD_STR(idxpath)));
|
||||
}
|
||||
|
||||
void Thesaurus::OnPathChanged() {
|
||||
languages.clear();
|
||||
}
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
// Copyright (c) 2006, Rodrigo Braz Monteiro
|
||||
// Copyright (c) 2011, Thomas Goyne <plorkyeran@aegisub.org>
|
||||
// All rights reserved.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
|
@ -34,9 +34,6 @@
|
|||
/// @ingroup thesaurus
|
||||
///
|
||||
|
||||
|
||||
///////////
|
||||
// Headers
|
||||
#ifndef AGI_PRE
|
||||
#include <vector>
|
||||
|
||||
|
@ -44,50 +41,40 @@
|
|||
#include <wx/string.h>
|
||||
#endif
|
||||
|
||||
#include <libaegisub/scoped_ptr.h>
|
||||
#include <libaegisub/signal.h>
|
||||
|
||||
/// DOCME
|
||||
/// @class ThesaurusEntry
|
||||
/// @brief DOCME
|
||||
///
|
||||
/// DOCME
|
||||
class ThesaurusEntry {
|
||||
public:
|
||||
namespace agi { class Thesaurus; }
|
||||
|
||||
/// DOCME
|
||||
wxString name;
|
||||
|
||||
/// DOCME
|
||||
wxArrayString words;
|
||||
};
|
||||
|
||||
|
||||
|
||||
/// DOCME
|
||||
typedef std::vector<ThesaurusEntry> ThesaurusEntryArray;
|
||||
|
||||
|
||||
|
||||
/// DOCME
|
||||
/// @class Thesaurus
|
||||
/// @brief DOCME
|
||||
///
|
||||
/// DOCME
|
||||
/// @brief A wrapper around agi::Thesarus adding wx and Aegisub-specific stuff
|
||||
class Thesaurus {
|
||||
/// The actual thesarus implementation
|
||||
agi::scoped_ptr<agi::Thesaurus> impl;
|
||||
/// A cached list of languages available
|
||||
mutable wxArrayString languages;
|
||||
|
||||
/// Thesaurus language change slot
|
||||
agi::signal::Connection lang_listener;
|
||||
/// Thesaurus language change handler
|
||||
void OnLanguageChanged();
|
||||
|
||||
/// Thesaurus path change slot
|
||||
agi::signal::Connection dict_path_listener;
|
||||
/// Thesaurus path change handler
|
||||
void OnPathChanged();
|
||||
public:
|
||||
static Thesaurus *GetThesaurus();
|
||||
/// A pair of a word and synonyms for that word
|
||||
typedef std::pair<std::string, std::vector<std::string> > Entry;
|
||||
|
||||
Thesaurus();
|
||||
~Thesaurus();
|
||||
|
||||
/// @brief DOCME
|
||||
///
|
||||
Thesaurus() {}
|
||||
/// Get a list of synonyms for a word, grouped by possible meanings of the word
|
||||
/// @param word Word to get synonyms for
|
||||
/// @param[out] result Output list
|
||||
void Lookup(wxString const& word, std::vector<Entry> *result);
|
||||
|
||||
/// @brief DOCME
|
||||
///
|
||||
virtual ~Thesaurus() {}
|
||||
|
||||
virtual void Lookup(wxString word,ThesaurusEntryArray &result)=0;
|
||||
virtual wxArrayString GetLanguageList()=0;
|
||||
virtual void SetLanguage(wxString language)=0;
|
||||
/// Get a list of language codes which thesauri are available for
|
||||
wxArrayString GetLanguageList() const;
|
||||
};
|
||||
|
||||
|
||||
|
|
|
@ -1,176 +0,0 @@
|
|||
// Copyright (c) 2006, Rodrigo Braz Monteiro
|
||||
// All rights reserved.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
// * Neither the name of the Aegisub Group nor the names of its contributors
|
||||
// may be used to endorse or promote products derived from this software
|
||||
// without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
// POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Aegisub Project http://www.aegisub.org/
|
||||
//
|
||||
// $Id$
|
||||
|
||||
/// @file thesaurus_myspell.cpp
|
||||
/// @brief MySpell-based thesaurus implementation
|
||||
/// @ingroup thesaurus
|
||||
///
|
||||
|
||||
|
||||
///////////
|
||||
// Headers
|
||||
#include "config.h"
|
||||
|
||||
#ifndef AGI_PRE
|
||||
#include <wx/dir.h>
|
||||
#include <wx/filename.h>
|
||||
#include <wx/log.h>
|
||||
#endif
|
||||
|
||||
#include <libaegisub/log.h>
|
||||
|
||||
#include "compat.h"
|
||||
#include "mythes.hxx"
|
||||
#include "main.h"
|
||||
#include "standard_paths.h"
|
||||
#include "thesaurus_myspell.h"
|
||||
#include "utils.h"
|
||||
|
||||
|
||||
/// @brief Constructor
|
||||
///
|
||||
MySpellThesaurus::MySpellThesaurus() {
|
||||
conv = NULL;
|
||||
mythes = NULL;
|
||||
SetLanguage(lagi_wxString(OPT_GET("Tool/Thesaurus/Language")->GetString()));
|
||||
}
|
||||
|
||||
|
||||
|
||||
/// @brief Destructor
|
||||
///
|
||||
MySpellThesaurus::~MySpellThesaurus() {
|
||||
delete mythes;
|
||||
mythes = NULL;
|
||||
delete conv;
|
||||
conv = NULL;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/// @brief Get suggestions
|
||||
/// @param word
|
||||
/// @param result
|
||||
/// @return
|
||||
///
|
||||
void MySpellThesaurus::Lookup(wxString word,ThesaurusEntryArray &result) {
|
||||
// Loaded?
|
||||
if (!mythes) return;
|
||||
|
||||
// Grab raw from MyThes
|
||||
mentry *me;
|
||||
wxCharBuffer buf = word.Lower().mb_str(*conv);
|
||||
if (!buf) return;
|
||||
int n = mythes->Lookup(buf,strlen(buf),&me);
|
||||
|
||||
// Each entry
|
||||
for (int i=0;i<n;i++) {
|
||||
ThesaurusEntry entry;
|
||||
entry.name = wxString(me[i].defn,*conv);
|
||||
for (int j=0;j<me[i].count;j++) entry.words.Add(wxString(me[i].psyns[j],*conv));
|
||||
result.push_back(entry);
|
||||
}
|
||||
|
||||
// Clean up
|
||||
mythes->CleanUpAfterLookup(&me,n);
|
||||
}
|
||||
|
||||
|
||||
|
||||
/// @brief Get language list
|
||||
/// @return
|
||||
///
|
||||
wxArrayString MySpellThesaurus::GetLanguageList() {
|
||||
// Get dir name
|
||||
wxString path = StandardPaths::DecodePathMaybeRelative(lagi_wxString(OPT_GET("Path/Dictionary")->GetString()), _T("?data")) + _T("/");
|
||||
wxArrayString list;
|
||||
wxFileName folder(path);
|
||||
if (!folder.DirExists()) return list;
|
||||
|
||||
// Get file lists
|
||||
wxArrayString idx;
|
||||
wxDir::GetAllFiles(path,&idx,_T("*.idx"),wxDIR_FILES);
|
||||
wxArrayString dat;
|
||||
wxDir::GetAllFiles(path,&dat,_T("*.dat"),wxDIR_FILES);
|
||||
|
||||
// For each idxtionary match, see if it can find the corresponding .dat
|
||||
for (unsigned int i=0;i<idx.Count();i++) {
|
||||
wxString curdat = idx[i].Left(std::max(0,signed(idx[i].Length())-4)) + _T(".dat");
|
||||
for (unsigned int j=0;j<dat.Count();j++) {
|
||||
// Found match
|
||||
if (curdat == dat[j]) {
|
||||
wxFileName fname(curdat);
|
||||
wxString name = fname.GetName();
|
||||
if (name.Left(3) == _T("th_")) name = name.Mid(3);
|
||||
list.Add(name);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Return list
|
||||
return list;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/// @brief Set language
|
||||
/// @param language
|
||||
///
|
||||
void MySpellThesaurus::SetLanguage(wxString language) {
|
||||
// Unload
|
||||
delete mythes;
|
||||
mythes = NULL;
|
||||
delete conv;
|
||||
conv = NULL;
|
||||
|
||||
// Unloading
|
||||
if (language.IsEmpty()) return;
|
||||
|
||||
// Get dir name
|
||||
wxString path = StandardPaths::DecodePathMaybeRelative(lagi_wxString(OPT_GET("Path/Dictionary")->GetString()), _T("?data")) + _T("/");
|
||||
|
||||
// Get affix and dictionary paths
|
||||
wxString idxpath = path + _T("th_") + language + _T(".idx");
|
||||
wxString datpath = path + _T("th_") + language + _T(".dat");
|
||||
|
||||
// Check if language is available
|
||||
if (!wxFileExists(idxpath) || !wxFileExists(datpath)) return;
|
||||
|
||||
LOG_I("thesaurus/file") << "Using thesaurus: " << datpath.c_str();
|
||||
|
||||
// Load
|
||||
mythes = new MyThes(idxpath.mb_str(wxConvLocal),datpath.mb_str(wxConvLocal));
|
||||
conv = NULL;
|
||||
if (mythes) conv = new wxCSConv(wxString(mythes->get_th_encoding(),wxConvUTF8));
|
||||
}
|
||||
|
||||
|
|
@ -1,73 +0,0 @@
|
|||
// Copyright (c) 2006, Rodrigo Braz Monteiro
|
||||
// All rights reserved.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
// * Neither the name of the Aegisub Group nor the names of its contributors
|
||||
// may be used to endorse or promote products derived from this software
|
||||
// without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
// POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Aegisub Project http://www.aegisub.org/
|
||||
//
|
||||
// $Id$
|
||||
|
||||
/// @file thesaurus_myspell.h
|
||||
/// @see thesaurus_myspell.cpp
|
||||
/// @ingroup thesaurus
|
||||
///
|
||||
|
||||
|
||||
|
||||
|
||||
///////////
|
||||
// Headers
|
||||
#include "thesaurus.h"
|
||||
|
||||
|
||||
//////////////
|
||||
// Prototypes
|
||||
class MyThes;
|
||||
|
||||
|
||||
/// DOCME
|
||||
/// @class MySpellThesaurus
|
||||
/// @brief DOCME
|
||||
///
|
||||
/// DOCME
|
||||
class MySpellThesaurus: public Thesaurus {
|
||||
private:
|
||||
|
||||
/// DOCME
|
||||
MyThes *mythes;
|
||||
|
||||
/// DOCME
|
||||
wxCSConv *conv;
|
||||
|
||||
public:
|
||||
MySpellThesaurus();
|
||||
~MySpellThesaurus();
|
||||
|
||||
void Lookup(wxString word,ThesaurusEntryArray &result);
|
||||
wxArrayString GetLanguageList();
|
||||
void SetLanguage(wxString language);
|
||||
};
|
||||
|
||||
|
|
@ -27,6 +27,7 @@ SRC = \
|
|||
libaegisub_option.cpp \
|
||||
libaegisub_mru.cpp \
|
||||
libaegisub_signals.cpp \
|
||||
libaegisub_thesaurus.cpp \
|
||||
libaegisub_util.cpp \
|
||||
libaegisub_vfr.cpp
|
||||
|
||||
|
|
148
aegisub/tests/libaegisub_thesaurus.cpp
Normal file
148
aegisub/tests/libaegisub_thesaurus.cpp
Normal file
|
@ -0,0 +1,148 @@
|
|||
// Copyright (c) 2012, Thomas Goyne <plorkyeran@aegisub.org>
|
||||
//
|
||||
// Permission to use, copy, modify, and distribute this software for any
|
||||
// purpose with or without fee is hereby granted, provided that the above
|
||||
// copyright notice and this permission notice appear in all copies.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
||||
// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
||||
// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
||||
// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
||||
// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
||||
// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
//
|
||||
// $Id$
|
||||
|
||||
#include <libaegisub/thesaurus.h>
|
||||
|
||||
#include "main.h"
|
||||
#include "util.h"
|
||||
|
||||
#include <fstream>
|
||||
|
||||
class lagi_thes : public libagi {
|
||||
protected:
|
||||
std::string idx_path;
|
||||
std::string dat_path;
|
||||
|
||||
void SetUp() {
|
||||
using std::endl;
|
||||
|
||||
idx_path = "data/thes.idx";
|
||||
dat_path = "data/thes.dat";
|
||||
|
||||
std::ofstream idx(idx_path.c_str());
|
||||
std::ofstream dat(dat_path.c_str());
|
||||
|
||||
idx << "UTF-8" << endl;
|
||||
dat << "UTF-8" << endl;
|
||||
idx << 7 << endl; // entry count
|
||||
|
||||
idx << "Word 1|" << dat.tellp() << endl;
|
||||
dat << "Word 1|1" << endl;
|
||||
dat << "(noun)|Word 1|Word 1A|Word 1B|Word 1C" << endl;
|
||||
|
||||
idx << "Word 2|" << dat.tellp() << endl;
|
||||
dat << "Word 2|2" << endl;
|
||||
dat << "(adj)|Word 2|Word 2 adj" << endl;
|
||||
dat << "(noun)|Word 2|Word 2 noun" << endl;
|
||||
|
||||
dat << "Unindexed Word|1" << endl;
|
||||
dat << "(adv)|Unindexed Word|Indexed Word" << endl;
|
||||
|
||||
idx << "Word 3|" << dat.tellp() << endl;
|
||||
dat << "Word 3|1" << endl;
|
||||
dat << "(verb)|Not Word 3|Four" << endl;
|
||||
|
||||
idx << "Too few fields" << endl;
|
||||
idx << "Too many fields|100|100" << endl;
|
||||
idx << "Not a number|foo" << endl;
|
||||
idx << "Out of range|" << dat.tellp() << endl;
|
||||
idx << "Further out of range|" << 1 + dat.tellp() << endl;
|
||||
}
|
||||
};
|
||||
|
||||
TEST_F(lagi_thes, parse) {
|
||||
ASSERT_NO_THROW(agi::Thesaurus(dat_path, idx_path));
|
||||
}
|
||||
|
||||
TEST_F(lagi_thes, word_1) {
|
||||
agi::Thesaurus thes(dat_path, idx_path);
|
||||
|
||||
std::vector<agi::Thesaurus::Entry> entries;
|
||||
ASSERT_NO_THROW(thes.Lookup("Word 1", &entries));
|
||||
ASSERT_EQ(1, entries.size());
|
||||
ASSERT_EQ(3, entries[0].second.size());
|
||||
EXPECT_STREQ("(noun) Word 1", entries[0].first.c_str());
|
||||
EXPECT_STREQ("Word 1A", entries[0].second[0].c_str());
|
||||
EXPECT_STREQ("Word 1B", entries[0].second[1].c_str());
|
||||
EXPECT_STREQ("Word 1C", entries[0].second[2].c_str());
|
||||
}
|
||||
|
||||
TEST_F(lagi_thes, word_2) {
|
||||
agi::Thesaurus thes(dat_path, idx_path);
|
||||
|
||||
std::vector<agi::Thesaurus::Entry> entries;
|
||||
ASSERT_NO_THROW(thes.Lookup("Word 2", &entries));
|
||||
ASSERT_EQ(2, entries.size());
|
||||
ASSERT_EQ(1, entries[0].second.size());
|
||||
ASSERT_EQ(1, entries[1].second.size());
|
||||
EXPECT_STREQ("(adj) Word 2", entries[0].first.c_str());
|
||||
EXPECT_STREQ("(noun) Word 2", entries[1].first.c_str());
|
||||
EXPECT_STREQ("Word 2 adj", entries[0].second[0].c_str());
|
||||
EXPECT_STREQ("Word 2 noun", entries[1].second[0].c_str());
|
||||
}
|
||||
|
||||
TEST_F(lagi_thes, word_3) {
|
||||
agi::Thesaurus thes(dat_path, idx_path);
|
||||
|
||||
std::vector<agi::Thesaurus::Entry> entries;
|
||||
ASSERT_NO_THROW(thes.Lookup("Word 3", &entries));
|
||||
ASSERT_EQ(1, entries.size());
|
||||
ASSERT_EQ(1, entries[0].second.size());
|
||||
EXPECT_STREQ("(verb) Not Word 3", entries[0].first.c_str());
|
||||
EXPECT_STREQ("Four", entries[0].second[0].c_str());
|
||||
}
|
||||
|
||||
TEST_F(lagi_thes, bad_word) {
|
||||
agi::Thesaurus thes(dat_path, idx_path);
|
||||
|
||||
std::vector<agi::Thesaurus::Entry> entries;
|
||||
ASSERT_NO_THROW(thes.Lookup("Nonexistent word", &entries));
|
||||
EXPECT_EQ(0, entries.size());
|
||||
}
|
||||
|
||||
TEST_F(lagi_thes, lookup_clears) {
|
||||
agi::Thesaurus thes(dat_path, idx_path);
|
||||
|
||||
std::vector<agi::Thesaurus::Entry> entries;
|
||||
ASSERT_NO_THROW(thes.Lookup("Word 1", &entries));
|
||||
ASSERT_NO_THROW(thes.Lookup("Word 2", &entries));
|
||||
ASSERT_NO_THROW(thes.Lookup("Word 3", &entries));
|
||||
EXPECT_EQ(1, entries.size());
|
||||
}
|
||||
|
||||
TEST_F(lagi_thes, malformed_index_lines) {
|
||||
agi::Thesaurus thes(dat_path, idx_path);
|
||||
|
||||
std::vector<agi::Thesaurus::Entry> entries;
|
||||
ASSERT_NO_THROW(thes.Lookup("Too few fields", &entries));
|
||||
EXPECT_EQ(0, entries.size());
|
||||
ASSERT_NO_THROW(thes.Lookup("Too many fields", &entries));
|
||||
EXPECT_EQ(0, entries.size());
|
||||
ASSERT_NO_THROW(thes.Lookup("Not a number", &entries));
|
||||
EXPECT_EQ(0, entries.size());
|
||||
ASSERT_NO_THROW(thes.Lookup("Out of range", &entries));
|
||||
EXPECT_EQ(0, entries.size());
|
||||
ASSERT_NO_THROW(thes.Lookup("Further out of range", &entries));
|
||||
EXPECT_EQ(0, entries.size());
|
||||
}
|
||||
|
||||
TEST_F(lagi_thes, unindexed_word) {
|
||||
agi::Thesaurus thes(dat_path, idx_path);
|
||||
|
||||
std::vector<agi::Thesaurus::Entry> entries;
|
||||
ASSERT_NO_THROW(thes.Lookup("Unindexed Word", &entries));
|
||||
EXPECT_EQ(0, entries.size());
|
||||
}
|
Loading…
Reference in a new issue