Replace MyThes thesaurus implementation with a custom one
Fixes a pile of unicode-related issues, such as dictionaries in a path which does not fit into the system's local charset, and significantly cuts down on the amount of code. Originally committed to SVN as r6250.
This commit is contained in:
parent
518f93f18f
commit
3c62a38c7a
18 changed files with 462 additions and 845 deletions
|
@ -855,14 +855,6 @@
|
||||||
RelativePath="..\..\src\md5.h"
|
RelativePath="..\..\src\md5.h"
|
||||||
>
|
>
|
||||||
</File>
|
</File>
|
||||||
<File
|
|
||||||
RelativePath="..\..\src\mythes.cxx"
|
|
||||||
>
|
|
||||||
</File>
|
|
||||||
<File
|
|
||||||
RelativePath="..\..\src\mythes.hxx"
|
|
||||||
>
|
|
||||||
</File>
|
|
||||||
<File
|
<File
|
||||||
RelativePath="..\..\src\pen.cpp"
|
RelativePath="..\..\src\pen.cpp"
|
||||||
>
|
>
|
||||||
|
@ -1423,14 +1415,6 @@
|
||||||
RelativePath="..\..\src\thesaurus.h"
|
RelativePath="..\..\src\thesaurus.h"
|
||||||
>
|
>
|
||||||
</File>
|
</File>
|
||||||
<File
|
|
||||||
RelativePath="..\..\src\thesaurus_myspell.cpp"
|
|
||||||
>
|
|
||||||
</File>
|
|
||||||
<File
|
|
||||||
RelativePath="..\..\src\thesaurus_myspell.h"
|
|
||||||
>
|
|
||||||
</File>
|
|
||||||
</Filter>
|
</Filter>
|
||||||
<Filter
|
<Filter
|
||||||
Name="Subtitle Formats"
|
Name="Subtitle Formats"
|
||||||
|
|
|
@ -311,6 +311,10 @@
|
||||||
RelativePath="..\..\libaegisub\common\path.cpp"
|
RelativePath="..\..\libaegisub\common\path.cpp"
|
||||||
>
|
>
|
||||||
</File>
|
</File>
|
||||||
|
<File
|
||||||
|
RelativePath="..\..\libaegisub\common\thesaurus.cpp"
|
||||||
|
>
|
||||||
|
</File>
|
||||||
<File
|
<File
|
||||||
RelativePath="..\..\libaegisub\common\util.cpp"
|
RelativePath="..\..\libaegisub\common\util.cpp"
|
||||||
>
|
>
|
||||||
|
@ -477,6 +481,10 @@
|
||||||
RelativePath="..\..\libaegisub\include\libaegisub\signal.h"
|
RelativePath="..\..\libaegisub\include\libaegisub\signal.h"
|
||||||
>
|
>
|
||||||
</File>
|
</File>
|
||||||
|
<File
|
||||||
|
RelativePath="..\..\libaegisub\include\libaegisub\thesaurus.h"
|
||||||
|
>
|
||||||
|
</File>
|
||||||
<File
|
<File
|
||||||
RelativePath="..\..\libaegisub\include\libaegisub\types.h"
|
RelativePath="..\..\libaegisub\include\libaegisub\types.h"
|
||||||
>
|
>
|
||||||
|
|
|
@ -334,6 +334,10 @@
|
||||||
RelativePath="..\..\tests\libaegisub_signals.cpp"
|
RelativePath="..\..\tests\libaegisub_signals.cpp"
|
||||||
>
|
>
|
||||||
</File>
|
</File>
|
||||||
|
<File
|
||||||
|
RelativePath="..\..\tests\libaegisub_thesaurus.cpp"
|
||||||
|
>
|
||||||
|
</File>
|
||||||
<File
|
<File
|
||||||
RelativePath="..\..\tests\libaegisub_util.cpp"
|
RelativePath="..\..\tests\libaegisub_util.cpp"
|
||||||
>
|
>
|
||||||
|
|
|
@ -35,6 +35,7 @@ SRC = \
|
||||||
common/keyframe.cpp \
|
common/keyframe.cpp \
|
||||||
common/util.cpp \
|
common/util.cpp \
|
||||||
common/log.cpp \
|
common/log.cpp \
|
||||||
|
common/thesaurus.cpp \
|
||||||
common/validator.cpp \
|
common/validator.cpp \
|
||||||
common/vfr.cpp \
|
common/vfr.cpp \
|
||||||
unix/util.cpp \
|
unix/util.cpp \
|
||||||
|
|
97
aegisub/libaegisub/common/thesaurus.cpp
Normal file
97
aegisub/libaegisub/common/thesaurus.cpp
Normal file
|
@ -0,0 +1,97 @@
|
||||||
|
// Copyright (c) 2012, Thomas Goyne <plorkyeran@aegisub.org>
|
||||||
|
//
|
||||||
|
// Permission to use, copy, modify, and distribute this software for any
|
||||||
|
// purpose with or without fee is hereby granted, provided that the above
|
||||||
|
// copyright notice and this permission notice appear in all copies.
|
||||||
|
//
|
||||||
|
// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
||||||
|
// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
||||||
|
// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
||||||
|
// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||||
|
// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
||||||
|
// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
||||||
|
// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||||
|
//
|
||||||
|
// $Id$
|
||||||
|
|
||||||
|
/// @file thesaurus.cpp
|
||||||
|
/// @brief MyThes-compatible thesaurus implementation
|
||||||
|
/// @ingroup libaegisub thesaurus
|
||||||
|
|
||||||
|
#include "libaegisub/thesaurus.h"
|
||||||
|
|
||||||
|
#include "libaegisub/charset_conv.h"
|
||||||
|
#include "libaegisub/io.h"
|
||||||
|
#include "libaegisub/line_iterator.h"
|
||||||
|
|
||||||
|
template<class String, class Char, class Container>
|
||||||
|
static void split(String const& str, Char sep, Container *out) {
|
||||||
|
typename String::size_type pos, prev = 0;
|
||||||
|
out->reserve(2);
|
||||||
|
while ((pos = str.find(sep, prev)) != String::npos) {
|
||||||
|
if (pos > prev)
|
||||||
|
out->push_back(str.substr(prev, pos - prev));
|
||||||
|
prev = pos + 1;
|
||||||
|
}
|
||||||
|
if (prev < str.size())
|
||||||
|
out->push_back(str.substr(prev));
|
||||||
|
}
|
||||||
|
|
||||||
|
namespace agi {
|
||||||
|
|
||||||
|
Thesaurus::Thesaurus(std::string const& dat_path, std::string const& idx_path)
|
||||||
|
: dat(io::Open(dat_path))
|
||||||
|
{
|
||||||
|
scoped_ptr<std::ifstream> idx(io::Open(idx_path));
|
||||||
|
|
||||||
|
std::string encoding_name;
|
||||||
|
getline(*idx, encoding_name);
|
||||||
|
std::string unused_entry_count;
|
||||||
|
getline(*idx, unused_entry_count);
|
||||||
|
|
||||||
|
// Read the list of words and file offsets for those words
|
||||||
|
for (line_iterator<std::string> iter(*idx, encoding_name), end; iter != end; ++iter) {
|
||||||
|
std::vector<std::string> chunks;
|
||||||
|
split(*iter, '|', &chunks);
|
||||||
|
if (chunks.size() == 2) {
|
||||||
|
offsets[chunks[0]] = atoi(chunks[1].c_str());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
conv.reset(new charset::IconvWrapper(encoding_name.c_str(), "utf-8"));
|
||||||
|
}
|
||||||
|
|
||||||
|
Thesaurus::~Thesaurus() { }
|
||||||
|
|
||||||
|
void Thesaurus::Lookup(std::string const& word, std::vector<Entry> *out) {
|
||||||
|
out->clear();
|
||||||
|
|
||||||
|
std::map<std::string, int>::const_iterator it = offsets.find(word);
|
||||||
|
if (!dat.get() || it == offsets.end()) return;
|
||||||
|
|
||||||
|
dat->seekg(it->second, std::ios::beg);
|
||||||
|
if (!dat->good()) return;
|
||||||
|
|
||||||
|
// First line is the word and meaning count
|
||||||
|
std::string temp;
|
||||||
|
getline(*dat, temp);
|
||||||
|
std::vector<std::string> header;
|
||||||
|
split(conv->Convert(temp), '|', &header);
|
||||||
|
if (header.size() != 2) return;
|
||||||
|
int meanings = atoi(header[1].c_str());
|
||||||
|
|
||||||
|
out->resize(meanings);
|
||||||
|
for (int i = 0; i < meanings; ++i) {
|
||||||
|
std::vector<std::string> line;
|
||||||
|
getline(*dat, temp);
|
||||||
|
split(conv->Convert(temp), '|', &line);
|
||||||
|
|
||||||
|
// The "definition" is just the part of speech plus the word it's
|
||||||
|
// giving synonyms for (which may not be the passed word)
|
||||||
|
(*out)[i].first = line[0] + ' ' + line[1];
|
||||||
|
(*out)[i].second.reserve(line.size() - 2);
|
||||||
|
copy(line.begin() + 2, line.end(), back_inserter((*out)[i].second));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
58
aegisub/libaegisub/include/libaegisub/thesaurus.h
Normal file
58
aegisub/libaegisub/include/libaegisub/thesaurus.h
Normal file
|
@ -0,0 +1,58 @@
|
||||||
|
// Copyright (c) 2012, Thomas Goyne <plorkyeran@aegisub.org>
|
||||||
|
//
|
||||||
|
// Permission to use, copy, modify, and distribute this software for any
|
||||||
|
// purpose with or without fee is hereby granted, provided that the above
|
||||||
|
// copyright notice and this permission notice appear in all copies.
|
||||||
|
//
|
||||||
|
// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
||||||
|
// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
||||||
|
// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
||||||
|
// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||||
|
// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
||||||
|
// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
||||||
|
// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||||
|
//
|
||||||
|
// $Id$
|
||||||
|
|
||||||
|
/// @file thesaurus.h
|
||||||
|
/// @brief MyThes-compatible thesaurus implementation
|
||||||
|
/// @ingroup libaegisub thesaurus
|
||||||
|
|
||||||
|
#include <libaegisub/scoped_ptr.h>
|
||||||
|
|
||||||
|
#ifndef LAGI_PRE
|
||||||
|
#include <iosfwd>
|
||||||
|
#include <map>
|
||||||
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
namespace agi {
|
||||||
|
|
||||||
|
namespace charset { class IconvWrapper; }
|
||||||
|
|
||||||
|
class Thesaurus {
|
||||||
|
/// Map of word -> byte position in the data file
|
||||||
|
std::map<std::string, int> offsets;
|
||||||
|
/// Read handle to the data file
|
||||||
|
scoped_ptr<std::ifstream> dat;
|
||||||
|
/// Converter from the data file's charset to UTF-8
|
||||||
|
scoped_ptr<charset::IconvWrapper> conv;
|
||||||
|
|
||||||
|
public:
|
||||||
|
/// A pair of a word and synonyms for that word
|
||||||
|
typedef std::pair<std::string, std::vector<std::string> > Entry;
|
||||||
|
|
||||||
|
/// Constructor
|
||||||
|
/// @param dat_path Path to data file
|
||||||
|
/// @param idx_path Path to index file
|
||||||
|
Thesaurus(std::string const& dat_path, std::string const& idx_path);
|
||||||
|
~Thesaurus();
|
||||||
|
|
||||||
|
/// Look up synonyms for a word
|
||||||
|
/// @param word Word to look up
|
||||||
|
/// @param[out] out Vector to fill with word/synonym lists
|
||||||
|
void Lookup(std::string const& word, std::vector<Entry> *out);
|
||||||
|
};
|
||||||
|
|
||||||
|
}
|
|
@ -192,7 +192,6 @@ SRC += \
|
||||||
menu.cpp \
|
menu.cpp \
|
||||||
md5.c \
|
md5.c \
|
||||||
mkv_wrap.cpp \
|
mkv_wrap.cpp \
|
||||||
mythes.cxx \
|
|
||||||
pen.cpp \
|
pen.cpp \
|
||||||
persist_location.cpp \
|
persist_location.cpp \
|
||||||
plugin_manager.cpp \
|
plugin_manager.cpp \
|
||||||
|
@ -221,7 +220,6 @@ SRC += \
|
||||||
text_file_reader.cpp \
|
text_file_reader.cpp \
|
||||||
text_file_writer.cpp \
|
text_file_writer.cpp \
|
||||||
thesaurus.cpp \
|
thesaurus.cpp \
|
||||||
thesaurus_myspell.cpp \
|
|
||||||
timeedit_ctrl.cpp \
|
timeedit_ctrl.cpp \
|
||||||
threaded_frame_source.cpp \
|
threaded_frame_source.cpp \
|
||||||
toggle_bitmap.cpp \
|
toggle_bitmap.cpp \
|
||||||
|
|
|
@ -97,7 +97,6 @@ AboutScreen::AboutScreen(wxWindow *parent)
|
||||||
#ifdef WITH_FREETYPE2
|
#ifdef WITH_FREETYPE2
|
||||||
libString += " Freetype - Copyright (c) David Turner, Robert Wilhelm, Werner Lemberg;\n";
|
libString += " Freetype - Copyright (c) David Turner, Robert Wilhelm, Werner Lemberg;\n";
|
||||||
#endif
|
#endif
|
||||||
libString += " MyThes - Copyright (c) Kevin B. Hendricks, Stratford, Ontario, Canada.\n";
|
|
||||||
#ifdef WITH_FFTW3
|
#ifdef WITH_FFTW3
|
||||||
libString += " FFTW - Copyright (c) Matteo Frigo, Massachusetts Institute of Technology;\n";
|
libString += " FFTW - Copyright (c) Matteo Frigo, Massachusetts Institute of Technology;\n";
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -1,398 +0,0 @@
|
||||||
/*
|
|
||||||
* Copyright 2003 Kevin B. Hendricks, Stratford, Ontario, Canada
|
|
||||||
* And Contributors. All rights reserved.
|
|
||||||
*
|
|
||||||
* Redistribution and use in source and binary forms, with or without
|
|
||||||
* modification, are permitted provided that the following conditions
|
|
||||||
* are met:
|
|
||||||
*
|
|
||||||
* 1. Redistributions of source code must retain the above copyright
|
|
||||||
* notice, this list of conditions and the following disclaimer.
|
|
||||||
*
|
|
||||||
* 2. Redistributions in binary form must reproduce the above copyright
|
|
||||||
* notice, this list of conditions and the following disclaimer in the
|
|
||||||
* documentation and/or other materials provided with the distribution.
|
|
||||||
*
|
|
||||||
* 3. All modifications to the source code must be clearly marked as
|
|
||||||
* such. Binary redistributions based on modified source code
|
|
||||||
* must be clearly marked as modified versions in the documentation
|
|
||||||
* and/or other materials provided with the distribution.
|
|
||||||
*
|
|
||||||
* THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS
|
|
||||||
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
|
||||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
|
|
||||||
* KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
|
||||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
|
||||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
|
||||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
||||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
||||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
||||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
||||||
* SUCH DAMAGE.
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
|
|
||||||
|
|
||||||
#include <stdio.h>
|
|
||||||
#include <string.h>
|
|
||||||
#include <stdlib.h>
|
|
||||||
#include <errno.h>
|
|
||||||
#include "mythes.hxx"
|
|
||||||
|
|
||||||
// some basic utility routines
|
|
||||||
|
|
||||||
// string duplication routine
|
|
||||||
char * mythes_mystrdup(const char * p)
|
|
||||||
{
|
|
||||||
|
|
||||||
int sl = strlen(p) + 1;
|
|
||||||
char * d = (char *)malloc(sl);
|
|
||||||
if (d) {
|
|
||||||
memcpy(d,p,sl);
|
|
||||||
return d;
|
|
||||||
}
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
// remove cross-platform text line end characters
|
|
||||||
void mythes_mychomp(char * s)
|
|
||||||
{
|
|
||||||
int k = strlen(s);
|
|
||||||
if ((k > 0) && ((*(s+k-1)=='\r') || (*(s+k-1)=='\n'))) *(s+k-1) = '\0';
|
|
||||||
if ((k > 1) && (*(s+k-2) == '\r')) *(s+k-2) = '\0';
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
// return index of char in string
|
|
||||||
int mystr_indexOfChar(const char * d, int c)
|
|
||||||
{
|
|
||||||
const char * p = strchr(d,c);
|
|
||||||
if (p) return (int)(p-d);
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
MyThes::MyThes(const char* idxpath, const char * datpath)
|
|
||||||
{
|
|
||||||
nw = 0;
|
|
||||||
encoding = NULL;
|
|
||||||
list = NULL;
|
|
||||||
offst = NULL;
|
|
||||||
|
|
||||||
if (thInitialize(idxpath, datpath) != 1) {
|
|
||||||
fprintf(stderr,"Error - can't open %s or %s\n",idxpath, datpath);
|
|
||||||
fflush(stderr);
|
|
||||||
if (encoding) free((void*)encoding);
|
|
||||||
if (list) free((void*)list);
|
|
||||||
if (offst) free((void*)offst);
|
|
||||||
// did not initialize properly - throw exception?
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
MyThes::~MyThes()
|
|
||||||
{
|
|
||||||
if (thCleanup() != 1) {
|
|
||||||
/* did not cleanup properly - throw exception? */
|
|
||||||
}
|
|
||||||
if (encoding) free((void*)encoding);
|
|
||||||
encoding = NULL;
|
|
||||||
list = NULL;
|
|
||||||
offst = NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
int MyThes::thInitialize(const char* idxpath, const char* datpath)
|
|
||||||
{
|
|
||||||
|
|
||||||
// open the index file
|
|
||||||
FILE * pifile = fopen(idxpath,"r");
|
|
||||||
if (!pifile) {
|
|
||||||
pifile = NULL;
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
// parse in encoding and index size */
|
|
||||||
char * wrd;
|
|
||||||
wrd = (char *)calloc(1, MAX_WD_LEN);
|
|
||||||
int len = readLine(pifile,wrd,MAX_WD_LEN);
|
|
||||||
encoding = mythes_mystrdup(wrd);
|
|
||||||
len = readLine(pifile,wrd,MAX_WD_LEN);
|
|
||||||
int idxsz = atoi(wrd);
|
|
||||||
|
|
||||||
|
|
||||||
// now allocate list, offst for the given size
|
|
||||||
list = (char**) calloc(idxsz,sizeof(char*));
|
|
||||||
offst = (unsigned int*) calloc(idxsz,sizeof(unsigned int));
|
|
||||||
|
|
||||||
if ( (!(list)) || (!(offst)) ) {
|
|
||||||
fprintf(stderr,"Error - bad memory allocation\n");
|
|
||||||
fflush(stderr);
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
// now parse the remaining lines of the index
|
|
||||||
len = readLine(pifile,wrd,MAX_WD_LEN);
|
|
||||||
while (len > 0)
|
|
||||||
{
|
|
||||||
int np = mystr_indexOfChar(wrd,'|');
|
|
||||||
if (nw < idxsz) {
|
|
||||||
if (np >= 0) {
|
|
||||||
*(wrd+np) = '\0';
|
|
||||||
list[nw] = (char *)calloc(1,(np+1));
|
|
||||||
memcpy((list[nw]),wrd,np);
|
|
||||||
offst[nw] = atoi(wrd+np+1);
|
|
||||||
nw++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
len = readLine(pifile,wrd,MAX_WD_LEN);
|
|
||||||
}
|
|
||||||
|
|
||||||
free((void *)wrd);
|
|
||||||
fclose(pifile);
|
|
||||||
pifile=NULL;
|
|
||||||
|
|
||||||
/* next open the data file */
|
|
||||||
pdfile = fopen(datpath,"r");
|
|
||||||
if (!pdfile) {
|
|
||||||
pdfile = NULL;
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
int MyThes::thCleanup()
|
|
||||||
{
|
|
||||||
/* first close the data file */
|
|
||||||
if (pdfile) {
|
|
||||||
fclose(pdfile);
|
|
||||||
pdfile=NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* now free up all the allocated strings on the list */
|
|
||||||
for (int i=0; i < nw; i++)
|
|
||||||
{
|
|
||||||
if (list[i]) {
|
|
||||||
free(list[i]);
|
|
||||||
list[i] = 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (list) free((void*)list);
|
|
||||||
if (offst) free((void*)offst);
|
|
||||||
|
|
||||||
nw = 0;
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
// lookup text in index and count of meanings and a list of meaning entries
|
|
||||||
// with each entry having a synonym count and pointer to an
|
|
||||||
// array of char * (i.e the synonyms)
|
|
||||||
//
|
|
||||||
// note: calling routine should call CleanUpAfterLookup with the original
|
|
||||||
// meaning point and count to properly deallocate memory
|
|
||||||
|
|
||||||
int MyThes::Lookup(const char * pText, int len, mentry** pme)
|
|
||||||
{
|
|
||||||
|
|
||||||
*pme = NULL;
|
|
||||||
|
|
||||||
// handle the case of missing file or file related errors
|
|
||||||
if (! pdfile) return 0;
|
|
||||||
|
|
||||||
long offset = 0;
|
|
||||||
|
|
||||||
/* copy search word and make sure null terminated */
|
|
||||||
char * wrd = (char *) calloc(1,(len+1));
|
|
||||||
memcpy(wrd,pText,len);
|
|
||||||
|
|
||||||
/* find it in the list */
|
|
||||||
int idx = binsearch(wrd,list,nw);
|
|
||||||
free(wrd);
|
|
||||||
if (idx < 0) return 0;
|
|
||||||
|
|
||||||
// now seek to the offset
|
|
||||||
offset = (long) offst[idx];
|
|
||||||
int rc = fseek(pdfile,offset,SEEK_SET);
|
|
||||||
if (rc) {
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
// grab the count of the number of meanings
|
|
||||||
// and allocate a list of meaning entries
|
|
||||||
char * buf = NULL;
|
|
||||||
buf = (char *) malloc( MAX_LN_LEN );
|
|
||||||
if (!buf) return 0;
|
|
||||||
readLine(pdfile, buf, (MAX_LN_LEN-1));
|
|
||||||
int np = mystr_indexOfChar(buf,'|');
|
|
||||||
if (np < 0) {
|
|
||||||
free(buf);
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
int nmeanings = atoi(buf+np+1);
|
|
||||||
*pme = (mentry*) malloc( nmeanings * sizeof(mentry) );
|
|
||||||
if (!(*pme)) {
|
|
||||||
free(buf);
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
// now read in each meaning and parse it to get defn, count and synonym lists
|
|
||||||
mentry* pm = *(pme);
|
|
||||||
char dfn[MAX_WD_LEN];
|
|
||||||
|
|
||||||
for (int j = 0; j < nmeanings; j++) {
|
|
||||||
readLine(pdfile, buf, (MAX_LN_LEN-1));
|
|
||||||
|
|
||||||
pm->count = 0;
|
|
||||||
pm->psyns = NULL;
|
|
||||||
pm->defn = NULL;
|
|
||||||
|
|
||||||
// store away the part of speech for later use
|
|
||||||
char * p = buf;
|
|
||||||
char * pos = NULL;
|
|
||||||
np = mystr_indexOfChar(p,'|');
|
|
||||||
if (np >= 0) {
|
|
||||||
*(buf+np) = '\0';
|
|
||||||
pos = mythes_mystrdup(p);
|
|
||||||
p = p + np + 1;
|
|
||||||
} else {
|
|
||||||
pos = mythes_mystrdup("");
|
|
||||||
}
|
|
||||||
|
|
||||||
// count the number of fields in the remaining line
|
|
||||||
int nf = 1;
|
|
||||||
char * d = p;
|
|
||||||
np = mystr_indexOfChar(d,'|');
|
|
||||||
while ( np >= 0 ) {
|
|
||||||
nf++;
|
|
||||||
d = d + np + 1;
|
|
||||||
np = mystr_indexOfChar(d,'|');
|
|
||||||
}
|
|
||||||
pm->count = nf;
|
|
||||||
pm->psyns = (char **) malloc(nf*sizeof(char*));
|
|
||||||
|
|
||||||
// fill in the synonym list
|
|
||||||
d = p;
|
|
||||||
for (int j = 0; j < nf; j++) {
|
|
||||||
np = mystr_indexOfChar(d,'|');
|
|
||||||
if (np > 0) {
|
|
||||||
*(d+np) = '\0';
|
|
||||||
pm->psyns[j] = mythes_mystrdup(d);
|
|
||||||
d = d + np + 1;
|
|
||||||
} else {
|
|
||||||
pm->psyns[j] = mythes_mystrdup(d);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// add pos to first synonym to create the definition
|
|
||||||
int k = strlen(pos);
|
|
||||||
int m = strlen(pm->psyns[0]);
|
|
||||||
if ((k+m) < (MAX_WD_LEN - 1)) {
|
|
||||||
strncpy(dfn,pos,k);
|
|
||||||
*(dfn+k) = ' ';
|
|
||||||
strncpy((dfn+k+1),(pm->psyns[0]),m+1);
|
|
||||||
pm->defn = mythes_mystrdup(dfn);
|
|
||||||
} else {
|
|
||||||
pm->defn = mythes_mystrdup(pm->psyns[0]);
|
|
||||||
}
|
|
||||||
free(pos);
|
|
||||||
pm++;
|
|
||||||
|
|
||||||
}
|
|
||||||
free(buf);
|
|
||||||
|
|
||||||
return nmeanings;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
void MyThes::CleanUpAfterLookup(mentry ** pme, int nmeanings)
|
|
||||||
{
|
|
||||||
|
|
||||||
if (nmeanings == 0) return;
|
|
||||||
if ((*pme) == NULL) return;
|
|
||||||
|
|
||||||
mentry * pm = *pme;
|
|
||||||
|
|
||||||
for (int i = 0; i < nmeanings; i++) {
|
|
||||||
int count = pm->count;
|
|
||||||
for (int j = 0; j < count; j++) {
|
|
||||||
if (pm->psyns[j]) free(pm->psyns[j]);
|
|
||||||
pm->psyns[j] = NULL;
|
|
||||||
}
|
|
||||||
if (pm->psyns) free(pm->psyns);
|
|
||||||
pm->psyns = NULL;
|
|
||||||
if (pm->defn) free(pm->defn);
|
|
||||||
pm->defn = NULL;
|
|
||||||
pm->count = 0;
|
|
||||||
pm++;
|
|
||||||
}
|
|
||||||
pm = *pme;
|
|
||||||
free(pm);
|
|
||||||
*pme = NULL;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
// read a line of text from a text file stripping
|
|
||||||
// off the line terminator and replacing it with
|
|
||||||
// a null string terminator.
|
|
||||||
// returns: -1 on error or the number of characters in
|
|
||||||
// in the returning string
|
|
||||||
|
|
||||||
// A maximum of nc characters will be returned
|
|
||||||
|
|
||||||
int MyThes::readLine(FILE * pf, char * buf, int nc)
|
|
||||||
{
|
|
||||||
|
|
||||||
if (fgets(buf,nc,pf)) {
|
|
||||||
mythes_mychomp(buf);
|
|
||||||
return strlen(buf);
|
|
||||||
}
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
// performs a binary search on null terminated character
|
|
||||||
// strings
|
|
||||||
//
|
|
||||||
// returns: -1 on not found
|
|
||||||
// index of wrd in the list[]
|
|
||||||
|
|
||||||
int MyThes::binsearch(char * sw, char* list[], int nlst)
|
|
||||||
{
|
|
||||||
int lp, up, mp, j, indx;
|
|
||||||
lp = 0;
|
|
||||||
up = nlst-1;
|
|
||||||
indx = -1;
|
|
||||||
if (nlst == 0) return -1;
|
|
||||||
if (strcmp(sw,list[lp]) < 0) return -1;
|
|
||||||
if (strcmp(sw,list[up]) > 0) return -1;
|
|
||||||
while (indx < 0 ) {
|
|
||||||
mp = (int)((lp+up) >> 1);
|
|
||||||
j = strcmp(sw,list[mp]);
|
|
||||||
if ( j > 0) {
|
|
||||||
lp = mp + 1;
|
|
||||||
} else if (j < 0 ) {
|
|
||||||
up = mp - 1;
|
|
||||||
} else {
|
|
||||||
indx = mp;
|
|
||||||
}
|
|
||||||
if (lp > up) return -1;
|
|
||||||
}
|
|
||||||
return indx;
|
|
||||||
}
|
|
||||||
|
|
||||||
char * MyThes::get_th_encoding()
|
|
||||||
{
|
|
||||||
if (encoding) return encoding;
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
|
@ -1,103 +0,0 @@
|
||||||
/*
|
|
||||||
* Copyright 2003 Kevin B. Hendricks, Stratford, Ontario, Canada
|
|
||||||
* And Contributors. All rights reserved.
|
|
||||||
*
|
|
||||||
* Redistribution and use in source and binary forms, with or without
|
|
||||||
* modification, are permitted provided that the following conditions
|
|
||||||
* are met:
|
|
||||||
*
|
|
||||||
* 1. Redistributions of source code must retain the above copyright
|
|
||||||
* notice, this list of conditions and the following disclaimer.
|
|
||||||
*
|
|
||||||
* 2. Redistributions in binary form must reproduce the above copyright
|
|
||||||
* notice, this list of conditions and the following disclaimer in the
|
|
||||||
* documentation and/or other materials provided with the distribution.
|
|
||||||
*
|
|
||||||
* 3. All modifications to the source code must be clearly marked as
|
|
||||||
* such. Binary redistributions based on modified source code
|
|
||||||
* must be clearly marked as modified versions in the documentation
|
|
||||||
* and/or other materials provided with the distribution.
|
|
||||||
*
|
|
||||||
* THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS
|
|
||||||
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
|
||||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
|
|
||||||
* KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
|
||||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
|
||||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
|
||||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
||||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
||||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
||||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
||||||
* SUCH DAMAGE.
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
|
|
||||||
|
|
||||||
#ifndef _MYTHES_HXX_
|
|
||||||
#define _MYTHES_HXX_
|
|
||||||
|
|
||||||
// some maximum sizes for buffers
|
|
||||||
#define MAX_WD_LEN 200
|
|
||||||
#define MAX_LN_LEN 16384
|
|
||||||
|
|
||||||
|
|
||||||
// a meaning with definition, count of synonyms and synonym list
|
|
||||||
struct mentry {
|
|
||||||
char* defn;
|
|
||||||
int count;
|
|
||||||
char** psyns;
|
|
||||||
};
|
|
||||||
|
|
||||||
|
|
||||||
class MyThes
|
|
||||||
{
|
|
||||||
|
|
||||||
int nw; /* number of entries in thesaurus */
|
|
||||||
char** list; /* stores word list */
|
|
||||||
unsigned int* offst; /* stores offset list */
|
|
||||||
char * encoding; /* stores text encoding; */
|
|
||||||
|
|
||||||
FILE *pdfile;
|
|
||||||
|
|
||||||
// disallow copy-constructor and assignment-operator for now
|
|
||||||
MyThes();
|
|
||||||
MyThes(const MyThes &);
|
|
||||||
MyThes & operator = (const MyThes &);
|
|
||||||
|
|
||||||
public:
|
|
||||||
MyThes(const char* idxpath, const char* datpath);
|
|
||||||
~MyThes();
|
|
||||||
|
|
||||||
// lookup text in index and return number of meanings
|
|
||||||
// each meaning entry has a defintion, synonym count and pointer
|
|
||||||
// when complete return the *original* meaning entry and count via
|
|
||||||
// CleanUpAfterLookup to properly handle memory deallocation
|
|
||||||
|
|
||||||
int Lookup(const char * pText, int len, mentry** pme);
|
|
||||||
|
|
||||||
void CleanUpAfterLookup(mentry** pme, int nmean);
|
|
||||||
|
|
||||||
char* get_th_encoding();
|
|
||||||
|
|
||||||
private:
|
|
||||||
// Open index and dat files and load list array
|
|
||||||
int thInitialize (const char* indxpath, const char* datpath);
|
|
||||||
|
|
||||||
// internal close and cleanup dat and idx files
|
|
||||||
int thCleanup ();
|
|
||||||
|
|
||||||
// read a text line (\n terminated) stripping off line terminator
|
|
||||||
int readLine(FILE * pf, char * buf, int nc);
|
|
||||||
|
|
||||||
// binary search on null terminated character strings
|
|
||||||
int binsearch(char * wrd, char* list[], int nlst);
|
|
||||||
|
|
||||||
};
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -79,7 +79,7 @@ enum {
|
||||||
SubsTextEditCtrl::SubsTextEditCtrl(wxWindow* parent, wxSize wsize, long style, SubtitlesGrid *grid)
|
SubsTextEditCtrl::SubsTextEditCtrl(wxWindow* parent, wxSize wsize, long style, SubtitlesGrid *grid)
|
||||||
: ScintillaTextCtrl(parent, -1, "", wxDefaultPosition, wsize, style)
|
: ScintillaTextCtrl(parent, -1, "", wxDefaultPosition, wsize, style)
|
||||||
, spellchecker(SpellCheckerFactory::GetSpellChecker())
|
, spellchecker(SpellCheckerFactory::GetSpellChecker())
|
||||||
, thesaurus(Thesaurus::GetThesaurus())
|
, thesaurus(new Thesaurus)
|
||||||
, grid(grid)
|
, grid(grid)
|
||||||
{
|
{
|
||||||
// Set properties
|
// Set properties
|
||||||
|
@ -795,14 +795,15 @@ void SubsTextEditCtrl::OnContextMenu(wxContextMenuEvent &event) {
|
||||||
// Thesaurus
|
// Thesaurus
|
||||||
if (thesaurus.get() && currentWord.Length()) {
|
if (thesaurus.get() && currentWord.Length()) {
|
||||||
// Get results
|
// Get results
|
||||||
ThesaurusEntryArray result;
|
std::vector<Thesaurus::Entry> result;
|
||||||
thesaurus->Lookup(currentWord,result);
|
thesaurus->Lookup(currentWord,&result);
|
||||||
|
|
||||||
// Compile list
|
// Compile list
|
||||||
thesSugs.Clear();
|
thesSugs.clear();
|
||||||
for (unsigned int i=0;i<result.size();i++) {
|
thesSugs.reserve(result.size() * 5);
|
||||||
for (unsigned int j=0;j<result[i].words.Count();j++) {
|
for (size_t i = 0; i < result.size(); ++i) {
|
||||||
thesSugs.Add(result[i].words[j]);
|
for (size_t j = 0; j < result[i].second.size(); ++j) {
|
||||||
|
thesSugs.push_back(result[i].second[j]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -815,10 +816,10 @@ void SubsTextEditCtrl::OnContextMenu(wxContextMenuEvent &event) {
|
||||||
|
|
||||||
// Build menu
|
// Build menu
|
||||||
int curThesEntry = 0;
|
int curThesEntry = 0;
|
||||||
for (unsigned int i=0;i<result.size();i++) {
|
for (size_t i=0;i<result.size();i++) {
|
||||||
// Single word, insert directly
|
// Single word, insert directly
|
||||||
if (result[i].words.Count() == 1) {
|
if (result[i].second.size() == 1) {
|
||||||
thesMenu->Append(EDIT_MENU_THESAURUS_SUGS+curThesEntry,result[i].name);
|
thesMenu->Append(EDIT_MENU_THESAURUS_SUGS+curThesEntry,lagi_wxString(result[i].first));
|
||||||
curThesEntry++;
|
curThesEntry++;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -826,13 +827,13 @@ void SubsTextEditCtrl::OnContextMenu(wxContextMenuEvent &event) {
|
||||||
else {
|
else {
|
||||||
// Insert entries
|
// Insert entries
|
||||||
wxMenu *subMenu = new wxMenu();
|
wxMenu *subMenu = new wxMenu();
|
||||||
for (unsigned int j=0;j<result[i].words.Count();j++) {
|
for (size_t j=0;j<result[i].second.size();j++) {
|
||||||
subMenu->Append(EDIT_MENU_THESAURUS_SUGS+curThesEntry,result[i].words[j]);
|
subMenu->Append(EDIT_MENU_THESAURUS_SUGS+curThesEntry,lagi_wxString(result[i].second[j]));
|
||||||
curThesEntry++;
|
curThesEntry++;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Insert submenu
|
// Insert submenu
|
||||||
thesMenu->Append(-1, result[i].name, subMenu);
|
thesMenu->Append(-1, lagi_wxString(result[i].first), subMenu);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -911,7 +912,7 @@ void SubsTextEditCtrl::OnUseSuggestion(wxCommandEvent &event) {
|
||||||
wxString suggestion;
|
wxString suggestion;
|
||||||
int sugIdx = event.GetId() - EDIT_MENU_THESAURUS_SUGS;
|
int sugIdx = event.GetId() - EDIT_MENU_THESAURUS_SUGS;
|
||||||
if (sugIdx >= 0) {
|
if (sugIdx >= 0) {
|
||||||
suggestion = thesSugs[sugIdx];
|
suggestion = lagi_wxString(thesSugs[sugIdx]);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
suggestion = sugs[event.GetId() - EDIT_MENU_SUGGESTIONS];
|
suggestion = sugs[event.GetId() - EDIT_MENU_SUGGESTIONS];
|
||||||
|
@ -953,10 +954,9 @@ void SubsTextEditCtrl::OnSetThesLanguage(wxCommandEvent &event) {
|
||||||
|
|
||||||
// Set language
|
// Set language
|
||||||
int index = event.GetId() - EDIT_MENU_THES_LANGS - 1;
|
int index = event.GetId() - EDIT_MENU_THES_LANGS - 1;
|
||||||
if (index >= 0) {
|
wxString lang;
|
||||||
thesaurus->SetLanguage(langs[index]);
|
if (index >= 0) lang = langs[index];
|
||||||
OPT_SET("Tool/Thesaurus/Language")->SetString(STD_STR(langs[index]));
|
OPT_SET("Tool/Thesaurus/Language")->SetString(STD_STR(lang));
|
||||||
}
|
|
||||||
|
|
||||||
UpdateStyle();
|
UpdateStyle();
|
||||||
}
|
}
|
||||||
|
|
|
@ -67,7 +67,7 @@ class SubsTextEditCtrl : public ScintillaTextCtrl {
|
||||||
wxArrayString sugs;
|
wxArrayString sugs;
|
||||||
|
|
||||||
/// DOCME
|
/// DOCME
|
||||||
wxArrayString thesSugs;
|
std::vector<std::string> thesSugs;
|
||||||
|
|
||||||
/// DOCME
|
/// DOCME
|
||||||
int currentWordPos;
|
int currentWordPos;
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
// Copyright (c) 2006, Rodrigo Braz Monteiro
|
// Copyright (c) 2011, Thomas Goyne <plorkyeran@aegisub.org>
|
||||||
// All rights reserved.
|
// All rights reserved.
|
||||||
//
|
//
|
||||||
// Redistribution and use in source and binary forms, with or without
|
// Redistribution and use in source and binary forms, with or without
|
||||||
|
@ -30,29 +30,111 @@
|
||||||
// $Id$
|
// $Id$
|
||||||
|
|
||||||
/// @file thesaurus.cpp
|
/// @file thesaurus.cpp
|
||||||
/// @brief Base-class for thesaurus implementations
|
/// @brief Thesaurus implementation
|
||||||
/// @ingroup thesaurus
|
/// @ingroup thesaurus
|
||||||
///
|
///
|
||||||
|
|
||||||
|
|
||||||
///////////
|
|
||||||
// Headers
|
|
||||||
#include "config.h"
|
#include "config.h"
|
||||||
|
|
||||||
#include "thesaurus_myspell.h"
|
#include "thesaurus.h"
|
||||||
|
|
||||||
|
#ifndef AGI_PRE
|
||||||
|
#include <wx/dir.h>
|
||||||
|
#include <wx/filename.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
/// @brief Get spell checker
|
#include <libaegisub/log.h>
|
||||||
///
|
#include <libaegisub/thesaurus.h>
|
||||||
Thesaurus *Thesaurus::GetThesaurus() {
|
|
||||||
// Initialize
|
|
||||||
Thesaurus *thes = NULL;
|
|
||||||
|
|
||||||
// Get myspell
|
#include "compat.h"
|
||||||
thes = new MySpellThesaurus();
|
#include "main.h"
|
||||||
|
#include "standard_paths.h"
|
||||||
|
|
||||||
// Return
|
Thesaurus::Thesaurus()
|
||||||
return thes;
|
: lang_listener(OPT_SUB("Tool/Thesaurus/Language", &Thesaurus::OnLanguageChanged, this))
|
||||||
|
, dict_path_listener(OPT_SUB("Path/Dictionary", &Thesaurus::OnPathChanged, this))
|
||||||
|
{
|
||||||
|
OnLanguageChanged();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Thesaurus::~Thesaurus() {
|
||||||
|
// Explicit empty destructor needed for scoped_ptr with incomplete types
|
||||||
|
}
|
||||||
|
|
||||||
|
void Thesaurus::Lookup(wxString const& word, std::vector<Entry> *result) {
|
||||||
|
if (!impl.get()) return;
|
||||||
|
impl->Lookup(STD_STR(word.Lower()), result);
|
||||||
|
}
|
||||||
|
|
||||||
|
wxArrayString Thesaurus::GetLanguageList() const {
|
||||||
|
if (!languages.empty()) return languages;
|
||||||
|
|
||||||
|
wxArrayString idx, dat;
|
||||||
|
|
||||||
|
// Get list of dictionaries
|
||||||
|
wxString path = StandardPaths::DecodePath("?data/dictionaries/");
|
||||||
|
if (wxFileName::DirExists(path)) {
|
||||||
|
wxDir::GetAllFiles(path, &idx, "th_*.idx", wxDIR_FILES);
|
||||||
|
wxDir::GetAllFiles(path, &dat, "th_*.dat", wxDIR_FILES);
|
||||||
|
}
|
||||||
|
path = StandardPaths::DecodePath(lagi_wxString(OPT_GET("Path/Dictionary")->GetString()) + "/");
|
||||||
|
if (wxFileName::DirExists(path)) {
|
||||||
|
wxDir::GetAllFiles(path, &idx, "th_*.idx", wxDIR_FILES);
|
||||||
|
wxDir::GetAllFiles(path, &dat, "th_*.dat", wxDIR_FILES);
|
||||||
|
}
|
||||||
|
if (idx.empty() || dat.empty()) return languages;
|
||||||
|
|
||||||
|
idx.Sort();
|
||||||
|
dat.Sort();
|
||||||
|
|
||||||
|
// Drop extensions and the th_ prefix
|
||||||
|
for (size_t i = 0; i < idx.size(); ++i) idx[i] = idx[i].Mid(3, idx[i].size() - 7);
|
||||||
|
for (size_t i = 0; i < dat.size(); ++i) dat[i] = dat[i].Mid(3, dat[i].size() - 7);
|
||||||
|
|
||||||
|
// Verify that each idx has a dat
|
||||||
|
for (size_t i = 0, j = 0; i < idx.size() && j < dat.size(); ) {
|
||||||
|
int cmp = idx[i].Cmp(dat[j]);
|
||||||
|
if (cmp < 0) ++i;
|
||||||
|
else if (cmp > 0) ++j;
|
||||||
|
else {
|
||||||
|
// Don't insert a language twice if it's in both the user dir and
|
||||||
|
// the app's dir
|
||||||
|
wxString name = wxFileName(dat[j]).GetName().Mid(3);
|
||||||
|
if (languages.empty() || name != languages.back())
|
||||||
|
languages.push_back(name);
|
||||||
|
++i;
|
||||||
|
++j;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return languages;
|
||||||
|
}
|
||||||
|
|
||||||
|
void Thesaurus::OnLanguageChanged() {
|
||||||
|
impl.reset();
|
||||||
|
|
||||||
|
std::string language = OPT_GET("Tool/Thesaurus/Language")->GetString();
|
||||||
|
if (language.empty()) return;
|
||||||
|
|
||||||
|
wxString path = StandardPaths::DecodePath(lagi_wxString(OPT_GET("Path/Dictionary")->GetString()) + "/");
|
||||||
|
|
||||||
|
// Get index and data paths
|
||||||
|
wxString idxpath = wxString::Format("%s/th_%s.idx", path, language);
|
||||||
|
wxString datpath = wxString::Format("%s/th_%s.dat", path, language);
|
||||||
|
|
||||||
|
// If they aren't in the user dictionary path, check the application directory
|
||||||
|
if (!wxFileExists(idxpath) || !wxFileExists(datpath)) {
|
||||||
|
path = StandardPaths::DecodePath("?data/dictionaries/");
|
||||||
|
idxpath = wxString::Format("%s/th_%s.idx", path, language);
|
||||||
|
datpath = wxString::Format("%s/th_%s.dat", path, language);
|
||||||
|
|
||||||
|
if (!wxFileExists(idxpath) || !wxFileExists(datpath)) return;
|
||||||
|
}
|
||||||
|
|
||||||
|
LOG_I("thesaurus/file") << "Using thesaurus: " << datpath.c_str();
|
||||||
|
|
||||||
|
impl.reset(new agi::Thesaurus(STD_STR(datpath), STD_STR(idxpath)));
|
||||||
|
}
|
||||||
|
|
||||||
|
void Thesaurus::OnPathChanged() {
|
||||||
|
languages.clear();
|
||||||
|
}
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
// Copyright (c) 2006, Rodrigo Braz Monteiro
|
// Copyright (c) 2011, Thomas Goyne <plorkyeran@aegisub.org>
|
||||||
// All rights reserved.
|
// All rights reserved.
|
||||||
//
|
//
|
||||||
// Redistribution and use in source and binary forms, with or without
|
// Redistribution and use in source and binary forms, with or without
|
||||||
|
@ -34,9 +34,6 @@
|
||||||
/// @ingroup thesaurus
|
/// @ingroup thesaurus
|
||||||
///
|
///
|
||||||
|
|
||||||
|
|
||||||
///////////
|
|
||||||
// Headers
|
|
||||||
#ifndef AGI_PRE
|
#ifndef AGI_PRE
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
|
@ -44,50 +41,40 @@
|
||||||
#include <wx/string.h>
|
#include <wx/string.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#include <libaegisub/scoped_ptr.h>
|
||||||
|
#include <libaegisub/signal.h>
|
||||||
|
|
||||||
/// DOCME
|
namespace agi { class Thesaurus; }
|
||||||
/// @class ThesaurusEntry
|
|
||||||
/// @brief DOCME
|
|
||||||
///
|
|
||||||
/// DOCME
|
|
||||||
class ThesaurusEntry {
|
|
||||||
public:
|
|
||||||
|
|
||||||
/// DOCME
|
|
||||||
wxString name;
|
|
||||||
|
|
||||||
/// DOCME
|
|
||||||
wxArrayString words;
|
|
||||||
};
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/// DOCME
|
|
||||||
typedef std::vector<ThesaurusEntry> ThesaurusEntryArray;
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/// DOCME
|
|
||||||
/// @class Thesaurus
|
/// @class Thesaurus
|
||||||
/// @brief DOCME
|
/// @brief A wrapper around agi::Thesarus adding wx and Aegisub-specific stuff
|
||||||
///
|
|
||||||
/// DOCME
|
|
||||||
class Thesaurus {
|
class Thesaurus {
|
||||||
|
/// The actual thesarus implementation
|
||||||
|
agi::scoped_ptr<agi::Thesaurus> impl;
|
||||||
|
/// A cached list of languages available
|
||||||
|
mutable wxArrayString languages;
|
||||||
|
|
||||||
|
/// Thesaurus language change slot
|
||||||
|
agi::signal::Connection lang_listener;
|
||||||
|
/// Thesaurus language change handler
|
||||||
|
void OnLanguageChanged();
|
||||||
|
|
||||||
|
/// Thesaurus path change slot
|
||||||
|
agi::signal::Connection dict_path_listener;
|
||||||
|
/// Thesaurus path change handler
|
||||||
|
void OnPathChanged();
|
||||||
public:
|
public:
|
||||||
static Thesaurus *GetThesaurus();
|
/// A pair of a word and synonyms for that word
|
||||||
|
typedef std::pair<std::string, std::vector<std::string> > Entry;
|
||||||
|
|
||||||
|
Thesaurus();
|
||||||
|
~Thesaurus();
|
||||||
|
|
||||||
/// @brief DOCME
|
/// Get a list of synonyms for a word, grouped by possible meanings of the word
|
||||||
///
|
/// @param word Word to get synonyms for
|
||||||
Thesaurus() {}
|
/// @param[out] result Output list
|
||||||
|
void Lookup(wxString const& word, std::vector<Entry> *result);
|
||||||
|
|
||||||
/// @brief DOCME
|
/// Get a list of language codes which thesauri are available for
|
||||||
///
|
wxArrayString GetLanguageList() const;
|
||||||
virtual ~Thesaurus() {}
|
|
||||||
|
|
||||||
virtual void Lookup(wxString word,ThesaurusEntryArray &result)=0;
|
|
||||||
virtual wxArrayString GetLanguageList()=0;
|
|
||||||
virtual void SetLanguage(wxString language)=0;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1,176 +0,0 @@
|
||||||
// Copyright (c) 2006, Rodrigo Braz Monteiro
|
|
||||||
// All rights reserved.
|
|
||||||
//
|
|
||||||
// Redistribution and use in source and binary forms, with or without
|
|
||||||
// modification, are permitted provided that the following conditions are met:
|
|
||||||
//
|
|
||||||
// * Redistributions of source code must retain the above copyright notice,
|
|
||||||
// this list of conditions and the following disclaimer.
|
|
||||||
// * Redistributions in binary form must reproduce the above copyright notice,
|
|
||||||
// this list of conditions and the following disclaimer in the documentation
|
|
||||||
// and/or other materials provided with the distribution.
|
|
||||||
// * Neither the name of the Aegisub Group nor the names of its contributors
|
|
||||||
// may be used to endorse or promote products derived from this software
|
|
||||||
// without specific prior written permission.
|
|
||||||
//
|
|
||||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
||||||
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
||||||
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
||||||
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
||||||
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
||||||
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
||||||
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
||||||
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
||||||
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
||||||
// POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
//
|
|
||||||
// Aegisub Project http://www.aegisub.org/
|
|
||||||
//
|
|
||||||
// $Id$
|
|
||||||
|
|
||||||
/// @file thesaurus_myspell.cpp
|
|
||||||
/// @brief MySpell-based thesaurus implementation
|
|
||||||
/// @ingroup thesaurus
|
|
||||||
///
|
|
||||||
|
|
||||||
|
|
||||||
///////////
|
|
||||||
// Headers
|
|
||||||
#include "config.h"
|
|
||||||
|
|
||||||
#ifndef AGI_PRE
|
|
||||||
#include <wx/dir.h>
|
|
||||||
#include <wx/filename.h>
|
|
||||||
#include <wx/log.h>
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#include <libaegisub/log.h>
|
|
||||||
|
|
||||||
#include "compat.h"
|
|
||||||
#include "mythes.hxx"
|
|
||||||
#include "main.h"
|
|
||||||
#include "standard_paths.h"
|
|
||||||
#include "thesaurus_myspell.h"
|
|
||||||
#include "utils.h"
|
|
||||||
|
|
||||||
|
|
||||||
/// @brief Constructor
|
|
||||||
///
|
|
||||||
MySpellThesaurus::MySpellThesaurus() {
|
|
||||||
conv = NULL;
|
|
||||||
mythes = NULL;
|
|
||||||
SetLanguage(lagi_wxString(OPT_GET("Tool/Thesaurus/Language")->GetString()));
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/// @brief Destructor
|
|
||||||
///
|
|
||||||
MySpellThesaurus::~MySpellThesaurus() {
|
|
||||||
delete mythes;
|
|
||||||
mythes = NULL;
|
|
||||||
delete conv;
|
|
||||||
conv = NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/// @brief Get suggestions
|
|
||||||
/// @param word
|
|
||||||
/// @param result
|
|
||||||
/// @return
|
|
||||||
///
|
|
||||||
void MySpellThesaurus::Lookup(wxString word,ThesaurusEntryArray &result) {
|
|
||||||
// Loaded?
|
|
||||||
if (!mythes) return;
|
|
||||||
|
|
||||||
// Grab raw from MyThes
|
|
||||||
mentry *me;
|
|
||||||
wxCharBuffer buf = word.Lower().mb_str(*conv);
|
|
||||||
if (!buf) return;
|
|
||||||
int n = mythes->Lookup(buf,strlen(buf),&me);
|
|
||||||
|
|
||||||
// Each entry
|
|
||||||
for (int i=0;i<n;i++) {
|
|
||||||
ThesaurusEntry entry;
|
|
||||||
entry.name = wxString(me[i].defn,*conv);
|
|
||||||
for (int j=0;j<me[i].count;j++) entry.words.Add(wxString(me[i].psyns[j],*conv));
|
|
||||||
result.push_back(entry);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Clean up
|
|
||||||
mythes->CleanUpAfterLookup(&me,n);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/// @brief Get language list
|
|
||||||
/// @return
|
|
||||||
///
|
|
||||||
wxArrayString MySpellThesaurus::GetLanguageList() {
|
|
||||||
// Get dir name
|
|
||||||
wxString path = StandardPaths::DecodePathMaybeRelative(lagi_wxString(OPT_GET("Path/Dictionary")->GetString()), _T("?data")) + _T("/");
|
|
||||||
wxArrayString list;
|
|
||||||
wxFileName folder(path);
|
|
||||||
if (!folder.DirExists()) return list;
|
|
||||||
|
|
||||||
// Get file lists
|
|
||||||
wxArrayString idx;
|
|
||||||
wxDir::GetAllFiles(path,&idx,_T("*.idx"),wxDIR_FILES);
|
|
||||||
wxArrayString dat;
|
|
||||||
wxDir::GetAllFiles(path,&dat,_T("*.dat"),wxDIR_FILES);
|
|
||||||
|
|
||||||
// For each idxtionary match, see if it can find the corresponding .dat
|
|
||||||
for (unsigned int i=0;i<idx.Count();i++) {
|
|
||||||
wxString curdat = idx[i].Left(std::max(0,signed(idx[i].Length())-4)) + _T(".dat");
|
|
||||||
for (unsigned int j=0;j<dat.Count();j++) {
|
|
||||||
// Found match
|
|
||||||
if (curdat == dat[j]) {
|
|
||||||
wxFileName fname(curdat);
|
|
||||||
wxString name = fname.GetName();
|
|
||||||
if (name.Left(3) == _T("th_")) name = name.Mid(3);
|
|
||||||
list.Add(name);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Return list
|
|
||||||
return list;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/// @brief Set language
|
|
||||||
/// @param language
|
|
||||||
///
|
|
||||||
void MySpellThesaurus::SetLanguage(wxString language) {
|
|
||||||
// Unload
|
|
||||||
delete mythes;
|
|
||||||
mythes = NULL;
|
|
||||||
delete conv;
|
|
||||||
conv = NULL;
|
|
||||||
|
|
||||||
// Unloading
|
|
||||||
if (language.IsEmpty()) return;
|
|
||||||
|
|
||||||
// Get dir name
|
|
||||||
wxString path = StandardPaths::DecodePathMaybeRelative(lagi_wxString(OPT_GET("Path/Dictionary")->GetString()), _T("?data")) + _T("/");
|
|
||||||
|
|
||||||
// Get affix and dictionary paths
|
|
||||||
wxString idxpath = path + _T("th_") + language + _T(".idx");
|
|
||||||
wxString datpath = path + _T("th_") + language + _T(".dat");
|
|
||||||
|
|
||||||
// Check if language is available
|
|
||||||
if (!wxFileExists(idxpath) || !wxFileExists(datpath)) return;
|
|
||||||
|
|
||||||
LOG_I("thesaurus/file") << "Using thesaurus: " << datpath.c_str();
|
|
||||||
|
|
||||||
// Load
|
|
||||||
mythes = new MyThes(idxpath.mb_str(wxConvLocal),datpath.mb_str(wxConvLocal));
|
|
||||||
conv = NULL;
|
|
||||||
if (mythes) conv = new wxCSConv(wxString(mythes->get_th_encoding(),wxConvUTF8));
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
|
@ -1,73 +0,0 @@
|
||||||
// Copyright (c) 2006, Rodrigo Braz Monteiro
|
|
||||||
// All rights reserved.
|
|
||||||
//
|
|
||||||
// Redistribution and use in source and binary forms, with or without
|
|
||||||
// modification, are permitted provided that the following conditions are met:
|
|
||||||
//
|
|
||||||
// * Redistributions of source code must retain the above copyright notice,
|
|
||||||
// this list of conditions and the following disclaimer.
|
|
||||||
// * Redistributions in binary form must reproduce the above copyright notice,
|
|
||||||
// this list of conditions and the following disclaimer in the documentation
|
|
||||||
// and/or other materials provided with the distribution.
|
|
||||||
// * Neither the name of the Aegisub Group nor the names of its contributors
|
|
||||||
// may be used to endorse or promote products derived from this software
|
|
||||||
// without specific prior written permission.
|
|
||||||
//
|
|
||||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
||||||
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
||||||
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
||||||
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
||||||
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
||||||
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
||||||
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
||||||
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
||||||
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
||||||
// POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
//
|
|
||||||
// Aegisub Project http://www.aegisub.org/
|
|
||||||
//
|
|
||||||
// $Id$
|
|
||||||
|
|
||||||
/// @file thesaurus_myspell.h
|
|
||||||
/// @see thesaurus_myspell.cpp
|
|
||||||
/// @ingroup thesaurus
|
|
||||||
///
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
///////////
|
|
||||||
// Headers
|
|
||||||
#include "thesaurus.h"
|
|
||||||
|
|
||||||
|
|
||||||
//////////////
|
|
||||||
// Prototypes
|
|
||||||
class MyThes;
|
|
||||||
|
|
||||||
|
|
||||||
/// DOCME
|
|
||||||
/// @class MySpellThesaurus
|
|
||||||
/// @brief DOCME
|
|
||||||
///
|
|
||||||
/// DOCME
|
|
||||||
class MySpellThesaurus: public Thesaurus {
|
|
||||||
private:
|
|
||||||
|
|
||||||
/// DOCME
|
|
||||||
MyThes *mythes;
|
|
||||||
|
|
||||||
/// DOCME
|
|
||||||
wxCSConv *conv;
|
|
||||||
|
|
||||||
public:
|
|
||||||
MySpellThesaurus();
|
|
||||||
~MySpellThesaurus();
|
|
||||||
|
|
||||||
void Lookup(wxString word,ThesaurusEntryArray &result);
|
|
||||||
wxArrayString GetLanguageList();
|
|
||||||
void SetLanguage(wxString language);
|
|
||||||
};
|
|
||||||
|
|
||||||
|
|
|
@ -27,6 +27,7 @@ SRC = \
|
||||||
libaegisub_option.cpp \
|
libaegisub_option.cpp \
|
||||||
libaegisub_mru.cpp \
|
libaegisub_mru.cpp \
|
||||||
libaegisub_signals.cpp \
|
libaegisub_signals.cpp \
|
||||||
|
libaegisub_thesaurus.cpp \
|
||||||
libaegisub_util.cpp \
|
libaegisub_util.cpp \
|
||||||
libaegisub_vfr.cpp
|
libaegisub_vfr.cpp
|
||||||
|
|
||||||
|
|
148
aegisub/tests/libaegisub_thesaurus.cpp
Normal file
148
aegisub/tests/libaegisub_thesaurus.cpp
Normal file
|
@ -0,0 +1,148 @@
|
||||||
|
// Copyright (c) 2012, Thomas Goyne <plorkyeran@aegisub.org>
|
||||||
|
//
|
||||||
|
// Permission to use, copy, modify, and distribute this software for any
|
||||||
|
// purpose with or without fee is hereby granted, provided that the above
|
||||||
|
// copyright notice and this permission notice appear in all copies.
|
||||||
|
//
|
||||||
|
// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
||||||
|
// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
||||||
|
// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
||||||
|
// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||||
|
// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
||||||
|
// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
||||||
|
// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||||
|
//
|
||||||
|
// $Id$
|
||||||
|
|
||||||
|
#include <libaegisub/thesaurus.h>
|
||||||
|
|
||||||
|
#include "main.h"
|
||||||
|
#include "util.h"
|
||||||
|
|
||||||
|
#include <fstream>
|
||||||
|
|
||||||
|
class lagi_thes : public libagi {
|
||||||
|
protected:
|
||||||
|
std::string idx_path;
|
||||||
|
std::string dat_path;
|
||||||
|
|
||||||
|
void SetUp() {
|
||||||
|
using std::endl;
|
||||||
|
|
||||||
|
idx_path = "data/thes.idx";
|
||||||
|
dat_path = "data/thes.dat";
|
||||||
|
|
||||||
|
std::ofstream idx(idx_path.c_str());
|
||||||
|
std::ofstream dat(dat_path.c_str());
|
||||||
|
|
||||||
|
idx << "UTF-8" << endl;
|
||||||
|
dat << "UTF-8" << endl;
|
||||||
|
idx << 7 << endl; // entry count
|
||||||
|
|
||||||
|
idx << "Word 1|" << dat.tellp() << endl;
|
||||||
|
dat << "Word 1|1" << endl;
|
||||||
|
dat << "(noun)|Word 1|Word 1A|Word 1B|Word 1C" << endl;
|
||||||
|
|
||||||
|
idx << "Word 2|" << dat.tellp() << endl;
|
||||||
|
dat << "Word 2|2" << endl;
|
||||||
|
dat << "(adj)|Word 2|Word 2 adj" << endl;
|
||||||
|
dat << "(noun)|Word 2|Word 2 noun" << endl;
|
||||||
|
|
||||||
|
dat << "Unindexed Word|1" << endl;
|
||||||
|
dat << "(adv)|Unindexed Word|Indexed Word" << endl;
|
||||||
|
|
||||||
|
idx << "Word 3|" << dat.tellp() << endl;
|
||||||
|
dat << "Word 3|1" << endl;
|
||||||
|
dat << "(verb)|Not Word 3|Four" << endl;
|
||||||
|
|
||||||
|
idx << "Too few fields" << endl;
|
||||||
|
idx << "Too many fields|100|100" << endl;
|
||||||
|
idx << "Not a number|foo" << endl;
|
||||||
|
idx << "Out of range|" << dat.tellp() << endl;
|
||||||
|
idx << "Further out of range|" << 1 + dat.tellp() << endl;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
TEST_F(lagi_thes, parse) {
|
||||||
|
ASSERT_NO_THROW(agi::Thesaurus(dat_path, idx_path));
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(lagi_thes, word_1) {
|
||||||
|
agi::Thesaurus thes(dat_path, idx_path);
|
||||||
|
|
||||||
|
std::vector<agi::Thesaurus::Entry> entries;
|
||||||
|
ASSERT_NO_THROW(thes.Lookup("Word 1", &entries));
|
||||||
|
ASSERT_EQ(1, entries.size());
|
||||||
|
ASSERT_EQ(3, entries[0].second.size());
|
||||||
|
EXPECT_STREQ("(noun) Word 1", entries[0].first.c_str());
|
||||||
|
EXPECT_STREQ("Word 1A", entries[0].second[0].c_str());
|
||||||
|
EXPECT_STREQ("Word 1B", entries[0].second[1].c_str());
|
||||||
|
EXPECT_STREQ("Word 1C", entries[0].second[2].c_str());
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(lagi_thes, word_2) {
|
||||||
|
agi::Thesaurus thes(dat_path, idx_path);
|
||||||
|
|
||||||
|
std::vector<agi::Thesaurus::Entry> entries;
|
||||||
|
ASSERT_NO_THROW(thes.Lookup("Word 2", &entries));
|
||||||
|
ASSERT_EQ(2, entries.size());
|
||||||
|
ASSERT_EQ(1, entries[0].second.size());
|
||||||
|
ASSERT_EQ(1, entries[1].second.size());
|
||||||
|
EXPECT_STREQ("(adj) Word 2", entries[0].first.c_str());
|
||||||
|
EXPECT_STREQ("(noun) Word 2", entries[1].first.c_str());
|
||||||
|
EXPECT_STREQ("Word 2 adj", entries[0].second[0].c_str());
|
||||||
|
EXPECT_STREQ("Word 2 noun", entries[1].second[0].c_str());
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(lagi_thes, word_3) {
|
||||||
|
agi::Thesaurus thes(dat_path, idx_path);
|
||||||
|
|
||||||
|
std::vector<agi::Thesaurus::Entry> entries;
|
||||||
|
ASSERT_NO_THROW(thes.Lookup("Word 3", &entries));
|
||||||
|
ASSERT_EQ(1, entries.size());
|
||||||
|
ASSERT_EQ(1, entries[0].second.size());
|
||||||
|
EXPECT_STREQ("(verb) Not Word 3", entries[0].first.c_str());
|
||||||
|
EXPECT_STREQ("Four", entries[0].second[0].c_str());
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(lagi_thes, bad_word) {
|
||||||
|
agi::Thesaurus thes(dat_path, idx_path);
|
||||||
|
|
||||||
|
std::vector<agi::Thesaurus::Entry> entries;
|
||||||
|
ASSERT_NO_THROW(thes.Lookup("Nonexistent word", &entries));
|
||||||
|
EXPECT_EQ(0, entries.size());
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(lagi_thes, lookup_clears) {
|
||||||
|
agi::Thesaurus thes(dat_path, idx_path);
|
||||||
|
|
||||||
|
std::vector<agi::Thesaurus::Entry> entries;
|
||||||
|
ASSERT_NO_THROW(thes.Lookup("Word 1", &entries));
|
||||||
|
ASSERT_NO_THROW(thes.Lookup("Word 2", &entries));
|
||||||
|
ASSERT_NO_THROW(thes.Lookup("Word 3", &entries));
|
||||||
|
EXPECT_EQ(1, entries.size());
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(lagi_thes, malformed_index_lines) {
|
||||||
|
agi::Thesaurus thes(dat_path, idx_path);
|
||||||
|
|
||||||
|
std::vector<agi::Thesaurus::Entry> entries;
|
||||||
|
ASSERT_NO_THROW(thes.Lookup("Too few fields", &entries));
|
||||||
|
EXPECT_EQ(0, entries.size());
|
||||||
|
ASSERT_NO_THROW(thes.Lookup("Too many fields", &entries));
|
||||||
|
EXPECT_EQ(0, entries.size());
|
||||||
|
ASSERT_NO_THROW(thes.Lookup("Not a number", &entries));
|
||||||
|
EXPECT_EQ(0, entries.size());
|
||||||
|
ASSERT_NO_THROW(thes.Lookup("Out of range", &entries));
|
||||||
|
EXPECT_EQ(0, entries.size());
|
||||||
|
ASSERT_NO_THROW(thes.Lookup("Further out of range", &entries));
|
||||||
|
EXPECT_EQ(0, entries.size());
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(lagi_thes, unindexed_word) {
|
||||||
|
agi::Thesaurus thes(dat_path, idx_path);
|
||||||
|
|
||||||
|
std::vector<agi::Thesaurus::Entry> entries;
|
||||||
|
ASSERT_NO_THROW(thes.Lookup("Unindexed Word", &entries));
|
||||||
|
EXPECT_EQ(0, entries.size());
|
||||||
|
}
|
Loading…
Reference in a new issue