Update to hunspell 1.3.3
This commit is contained in:
parent
41b08628bc
commit
3c57dda9ac
46 changed files with 578 additions and 1582 deletions
|
@ -4,6 +4,7 @@
|
||||||
<ProjectGuid>{CC791693-6B28-40AC-879D-64A6C16468E3}</ProjectGuid>
|
<ProjectGuid>{CC791693-6B28-40AC-879D-64A6C16468E3}</ProjectGuid>
|
||||||
<RootNamespace>hunspell</RootNamespace>
|
<RootNamespace>hunspell</RootNamespace>
|
||||||
</PropertyGroup>
|
</PropertyGroup>
|
||||||
|
|
||||||
<!-- Aegisub project configuration -->
|
<!-- Aegisub project configuration -->
|
||||||
<PropertyGroup Label="AegisubConfiguration">
|
<PropertyGroup Label="AegisubConfiguration">
|
||||||
<AegisubProjectType>lib</AegisubProjectType>
|
<AegisubProjectType>lib</AegisubProjectType>
|
||||||
|
@ -12,6 +13,7 @@
|
||||||
<ImportGroup Label="PropertySheets">
|
<ImportGroup Label="PropertySheets">
|
||||||
<Import Project="$(MSBuildThisFileDirectory)..\aegisub.props" />
|
<Import Project="$(MSBuildThisFileDirectory)..\aegisub.props" />
|
||||||
</ImportGroup>
|
</ImportGroup>
|
||||||
|
|
||||||
<!-- Project specific configuration -->
|
<!-- Project specific configuration -->
|
||||||
<ItemDefinitionGroup>
|
<ItemDefinitionGroup>
|
||||||
<ClCompile>
|
<ClCompile>
|
||||||
|
@ -20,6 +22,7 @@
|
||||||
<RuntimeTypeInfo>false</RuntimeTypeInfo>
|
<RuntimeTypeInfo>false</RuntimeTypeInfo>
|
||||||
</ClCompile>
|
</ClCompile>
|
||||||
</ItemDefinitionGroup>
|
</ItemDefinitionGroup>
|
||||||
|
|
||||||
<!-- Source files -->
|
<!-- Source files -->
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
<ClInclude Include="$(SrcDir)hunspell\affentry.hxx" />
|
<ClInclude Include="$(SrcDir)hunspell\affentry.hxx" />
|
||||||
|
@ -35,13 +38,7 @@
|
||||||
<ClInclude Include="$(SrcDir)hunspell\langnum.hxx" />
|
<ClInclude Include="$(SrcDir)hunspell\langnum.hxx" />
|
||||||
<ClInclude Include="$(SrcDir)hunspell\phonet.hxx" />
|
<ClInclude Include="$(SrcDir)hunspell\phonet.hxx" />
|
||||||
<ClInclude Include="$(SrcDir)hunspell\suggestmgr.hxx" />
|
<ClInclude Include="$(SrcDir)hunspell\suggestmgr.hxx" />
|
||||||
<ClInclude Include="$(SrcDir)parsers\firstparser.hxx" />
|
|
||||||
<ClInclude Include="$(SrcDir)parsers\htmlparser.hxx" />
|
|
||||||
<ClInclude Include="$(SrcDir)parsers\latexparser.hxx" />
|
|
||||||
<ClInclude Include="$(SrcDir)parsers\manparser.hxx" />
|
|
||||||
<ClInclude Include="$(SrcDir)parsers\textparser.hxx" />
|
|
||||||
<ClInclude Include="$(SrcDir)win_api\config.h" />
|
<ClInclude Include="$(SrcDir)win_api\config.h" />
|
||||||
<ClInclude Include="$(SrcDir)win_api\hunspelldll.h" />
|
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
<ClCompile Include="$(SrcDir)hunspell\affentry.cxx" />
|
<ClCompile Include="$(SrcDir)hunspell\affentry.cxx" />
|
||||||
|
@ -55,15 +52,5 @@
|
||||||
<ClCompile Include="$(SrcDir)hunspell\phonet.cxx" />
|
<ClCompile Include="$(SrcDir)hunspell\phonet.cxx" />
|
||||||
<ClCompile Include="$(SrcDir)hunspell\replist.cxx" />
|
<ClCompile Include="$(SrcDir)hunspell\replist.cxx" />
|
||||||
<ClCompile Include="$(SrcDir)hunspell\suggestmgr.cxx" />
|
<ClCompile Include="$(SrcDir)hunspell\suggestmgr.cxx" />
|
||||||
<ClCompile Include="$(SrcDir)parsers\firstparser.cxx" />
|
|
||||||
<ClCompile Include="$(SrcDir)parsers\htmlparser.cxx" />
|
|
||||||
<ClCompile Include="$(SrcDir)parsers\latexparser.cxx" />
|
|
||||||
<ClCompile Include="$(SrcDir)parsers\manparser.cxx" />
|
|
||||||
<ClCompile Include="$(SrcDir)parsers\testparser.cxx" />
|
|
||||||
<ClCompile Include="$(SrcDir)parsers\textparser.cxx" />
|
|
||||||
<ClCompile Include="$(SrcDir)win_api\hunspelldll.c">
|
|
||||||
<!-- Why is this file named .c when it's obviously C++ code -->
|
|
||||||
<CompileAs>CompileAsCpp</CompileAs>
|
|
||||||
</ClCompile>
|
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
</Project>
|
</Project>
|
||||||
|
|
|
@ -9,10 +9,6 @@
|
||||||
<UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
|
<UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
|
||||||
<Extensions>h;hpp;hxx;hm;inl;inc;xsd</Extensions>
|
<Extensions>h;hpp;hxx;hm;inl;inc;xsd</Extensions>
|
||||||
</Filter>
|
</Filter>
|
||||||
<Filter Include="Resource Files">
|
|
||||||
<UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
|
|
||||||
<Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions>
|
|
||||||
</Filter>
|
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
<ClInclude Include="$(SrcDir)hunspell\affixmgr.hxx">
|
<ClInclude Include="$(SrcDir)hunspell\affixmgr.hxx">
|
||||||
|
@ -51,27 +47,9 @@
|
||||||
<ClInclude Include="$(SrcDir)hunspell\suggestmgr.hxx">
|
<ClInclude Include="$(SrcDir)hunspell\suggestmgr.hxx">
|
||||||
<Filter>Header Files</Filter>
|
<Filter>Header Files</Filter>
|
||||||
</ClInclude>
|
</ClInclude>
|
||||||
<ClInclude Include="$(SrcDir)parsers\firstparser.hxx">
|
|
||||||
<Filter>Header Files</Filter>
|
|
||||||
</ClInclude>
|
|
||||||
<ClInclude Include="$(SrcDir)parsers\htmlparser.hxx">
|
|
||||||
<Filter>Header Files</Filter>
|
|
||||||
</ClInclude>
|
|
||||||
<ClInclude Include="$(SrcDir)parsers\latexparser.hxx">
|
|
||||||
<Filter>Header Files</Filter>
|
|
||||||
</ClInclude>
|
|
||||||
<ClInclude Include="$(SrcDir)parsers\manparser.hxx">
|
|
||||||
<Filter>Header Files</Filter>
|
|
||||||
</ClInclude>
|
|
||||||
<ClInclude Include="$(SrcDir)parsers\textparser.hxx">
|
|
||||||
<Filter>Header Files</Filter>
|
|
||||||
</ClInclude>
|
|
||||||
<ClInclude Include="$(SrcDir)win_api\config.h">
|
<ClInclude Include="$(SrcDir)win_api\config.h">
|
||||||
<Filter>Header Files</Filter>
|
<Filter>Header Files</Filter>
|
||||||
</ClInclude>
|
</ClInclude>
|
||||||
<ClInclude Include="$(SrcDir)win_api\hunspelldll.h">
|
|
||||||
<Filter>Header Files</Filter>
|
|
||||||
</ClInclude>
|
|
||||||
<ClInclude Include="$(SrcDir)hunspell\affentry.hxx">
|
<ClInclude Include="$(SrcDir)hunspell\affentry.hxx">
|
||||||
<Filter>Header Files</Filter>
|
<Filter>Header Files</Filter>
|
||||||
</ClInclude>
|
</ClInclude>
|
||||||
|
@ -101,27 +79,6 @@
|
||||||
<ClCompile Include="$(SrcDir)hunspell\suggestmgr.cxx">
|
<ClCompile Include="$(SrcDir)hunspell\suggestmgr.cxx">
|
||||||
<Filter>Source Files</Filter>
|
<Filter>Source Files</Filter>
|
||||||
</ClCompile>
|
</ClCompile>
|
||||||
<ClCompile Include="$(SrcDir)parsers\firstparser.cxx">
|
|
||||||
<Filter>Source Files</Filter>
|
|
||||||
</ClCompile>
|
|
||||||
<ClCompile Include="$(SrcDir)parsers\htmlparser.cxx">
|
|
||||||
<Filter>Source Files</Filter>
|
|
||||||
</ClCompile>
|
|
||||||
<ClCompile Include="$(SrcDir)parsers\latexparser.cxx">
|
|
||||||
<Filter>Source Files</Filter>
|
|
||||||
</ClCompile>
|
|
||||||
<ClCompile Include="$(SrcDir)parsers\manparser.cxx">
|
|
||||||
<Filter>Source Files</Filter>
|
|
||||||
</ClCompile>
|
|
||||||
<ClCompile Include="$(SrcDir)parsers\testparser.cxx">
|
|
||||||
<Filter>Source Files</Filter>
|
|
||||||
</ClCompile>
|
|
||||||
<ClCompile Include="$(SrcDir)parsers\textparser.cxx">
|
|
||||||
<Filter>Source Files</Filter>
|
|
||||||
</ClCompile>
|
|
||||||
<ClCompile Include="$(SrcDir)win_api\hunspelldll.c">
|
|
||||||
<Filter>Source Files</Filter>
|
|
||||||
</ClCompile>
|
|
||||||
<ClCompile Include="$(SrcDir)hunspell\filemgr.cxx">
|
<ClCompile Include="$(SrcDir)hunspell\filemgr.cxx">
|
||||||
<Filter>Source Files</Filter>
|
<Filter>Source Files</Filter>
|
||||||
</ClCompile>
|
</ClCompile>
|
||||||
|
@ -132,4 +89,4 @@
|
||||||
<Filter>Source Files</Filter>
|
<Filter>Source Files</Filter>
|
||||||
</ClCompile>
|
</ClCompile>
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
</Project>
|
</Project>
|
|
@ -13,11 +13,6 @@
|
||||||
// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||||
//
|
//
|
||||||
|
|
||||||
/// @file spellchecker_hunspell.cpp
|
|
||||||
/// @brief Hunspell-based spell checker implementation
|
|
||||||
/// @ingroup spelling
|
|
||||||
///
|
|
||||||
|
|
||||||
#ifdef WITH_HUNSPELL
|
#ifdef WITH_HUNSPELL
|
||||||
#include "spellchecker_hunspell.h"
|
#include "spellchecker_hunspell.h"
|
||||||
|
|
||||||
|
@ -33,6 +28,9 @@
|
||||||
#include <libaegisub/make_unique.h>
|
#include <libaegisub/make_unique.h>
|
||||||
|
|
||||||
#include <boost/range/algorithm.hpp>
|
#include <boost/range/algorithm.hpp>
|
||||||
|
|
||||||
|
#define HUNSPELL_STATIC
|
||||||
|
#undef near
|
||||||
#include <hunspell/hunspell.hxx>
|
#include <hunspell/hunspell.hxx>
|
||||||
|
|
||||||
HunspellSpellChecker::HunspellSpellChecker()
|
HunspellSpellChecker::HunspellSpellChecker()
|
||||||
|
|
2
vendor/hunspell/AUTHORS
vendored
2
vendor/hunspell/AUTHORS
vendored
|
@ -1,5 +1,5 @@
|
||||||
Author of Hunspell:
|
Author of Hunspell:
|
||||||
Németh László nemeth (at) OpenOffice.org
|
Németh László nemeth (at) numbertext.org
|
||||||
|
|
||||||
Hunspell based on OpenOffice.org's Myspell. MySpell's author:
|
Hunspell based on OpenOffice.org's Myspell. MySpell's author:
|
||||||
Kevin Hendricks kevin.hendricks (at) sympatico.ca
|
Kevin Hendricks kevin.hendricks (at) sympatico.ca
|
||||||
|
|
117
vendor/hunspell/ChangeLog
vendored
117
vendor/hunspell/ChangeLog
vendored
|
@ -1,3 +1,120 @@
|
||||||
|
2014-06-02 Németh László <nemeth at numbertext dot org>:
|
||||||
|
* escape spaces in paths of ODF files
|
||||||
|
|
||||||
|
2014-05-28 Németh László <nemeth at numbertext dot org>:
|
||||||
|
* add long path/Unicode path support in WIN32 environment:
|
||||||
|
- hunspell#233 (reported by mahak gark) and LibreOffice fdo#48017
|
||||||
|
* flat ODF support, eg.:
|
||||||
|
hunspell doc.fodt
|
||||||
|
cat doc.fodt | hunspell -l -O
|
||||||
|
* new options:
|
||||||
|
- -X (XML) input format
|
||||||
|
- -O (ODF or flat ODF) input format
|
||||||
|
- --check-apostrophe: check and force Unicode apostrophe usage
|
||||||
|
(ASCII or Unicode apostrophe has to be in the
|
||||||
|
WORDCHARS section of the affix file)
|
||||||
|
* fix ODF support:
|
||||||
|
- break 1-line XML of ODT documents at </style:style>, too,
|
||||||
|
not only at </text:p> (limiting tokenization problems, when
|
||||||
|
fgets stops within an XML tag)
|
||||||
|
- show ODF file path on the UI instead of the temporary file
|
||||||
|
* fix XML support:
|
||||||
|
- ', ", &, < and > in replacements converted to XML entities
|
||||||
|
- recognize &apos at tokenization, depending from WORDCHARS
|
||||||
|
- ' in tokens converted to ' before spell checking and
|
||||||
|
in the output of the pipe interface
|
||||||
|
* better apostrophe usage:
|
||||||
|
- WORDCHARS only with one of the Unicode or ASCII apostrophe
|
||||||
|
results extended word tokenization: both of them will be part of
|
||||||
|
the words (if they are inside: eg. word's, but not words').
|
||||||
|
- convert Unicode apostrophes to ASCII ones for 8-bit dictionaries
|
||||||
|
(eg. English dictionaries), or for UTF-8 dictionaries only
|
||||||
|
with ASCII apostrophe supports (eg. French dictionaries).
|
||||||
|
* updated manual:
|
||||||
|
- hunspell.4 renamed to hunspell.5, see
|
||||||
|
hunspell#241 reported by Cristopher Yeleighton
|
||||||
|
- updated translations
|
||||||
|
- note about long/Unicode paths in WIN32 (hunspell.3)
|
||||||
|
|
||||||
|
2014-04-25 Németh László <nemeth at numbertext dot org>:
|
||||||
|
* OpenDocument support, eg.
|
||||||
|
hunspell *.odt
|
||||||
|
hunspell -l *.odt
|
||||||
|
* always load default personal dictionary (fix
|
||||||
|
filtering bad words - reduce this word list - using
|
||||||
|
it as a personal dictionary workflow)
|
||||||
|
* fix parsing/URL recognition problem (bad tokens
|
||||||
|
with aposthrophes)
|
||||||
|
|
||||||
|
2013-07-25 pchang9@cs.wisc.edu
|
||||||
|
* moz#897255 Wasted work in line_uniq
|
||||||
|
* moz#897780 Wasted work in SuggestMgr::twowords
|
||||||
|
|
||||||
|
2013-07-25 Caolán McNamara <caolanm at LibO>:
|
||||||
|
* hunspell#167 layout problems with long lines
|
||||||
|
- based on the original fix by xorho
|
||||||
|
adapted to HEAD
|
||||||
|
* rhbz#925562 upgrade config.guess for aarch64
|
||||||
|
|
||||||
|
2013-07-24 pchang9@cs.wisc.edu
|
||||||
|
* moz#896301 Wasted work in SfxEntry::checkword
|
||||||
|
* moz#896844 Wasted work in AffixMgr::defcpd_check
|
||||||
|
|
||||||
|
2013-06-13 Konstantin Khlebniko
|
||||||
|
* #49 HashMgr::add_word computes wrong size for struct hentry
|
||||||
|
|
||||||
|
2013-06-13 Ville Skyttä
|
||||||
|
* #53 Man page syntax fixes
|
||||||
|
|
||||||
|
2013-04-19 John Thomson <john thomson at SIL>
|
||||||
|
* win_api: add remove() of Hunspell API (hun#3606435)
|
||||||
|
|
||||||
|
2013-04-19 Rouslan Solomokhin <at sf.net>
|
||||||
|
* fix crash in suggestions for 99-character long words
|
||||||
|
by extending arrays of SuggestMgr::forgotchar_*
|
||||||
|
(hun#3595024, also http://crbug.com/130128),
|
||||||
|
thanks to also Paweł Hajdan to report the patch
|
||||||
|
|
||||||
|
2013-04-01 Caolán McNamara <caolanm at LibO>:
|
||||||
|
* hunspell: -Werror=undef
|
||||||
|
|
||||||
|
2013-03-13 Caolán McNamara <caolanm at LibO>:
|
||||||
|
* rhbz#918938 crash in interaction with danish thesaurus
|
||||||
|
|
||||||
|
2012-09-18 Németh László <nemeth at numbertext dot org>:
|
||||||
|
* src/hunspell/affixmgr.*: - fix morphological analysis of
|
||||||
|
compound words (hun#3544994, reported by Dávid Nemeskey, fdo#55045)
|
||||||
|
|
||||||
|
2012-06-29 Caolán McNamara <caolanm at LibO>:
|
||||||
|
* fix various coverity warnings
|
||||||
|
|
||||||
|
2012-01-10 Ehsan Akhgari <ehsan at mozilla dot com>
|
||||||
|
* moz#710940 Firefox Crash [@ AffixMgr::parse_file(char const*, char
|
||||||
|
const*) ]
|
||||||
|
|
||||||
|
2011-12-16 Jared Wein <jwein at mozilla dot com>
|
||||||
|
* moz#710967 Incorrect argument passed to strncmp in
|
||||||
|
AffixMgr::parse_convtable
|
||||||
|
|
||||||
|
2011-12-06 Caolán McNamara <caolanm at LibO>:
|
||||||
|
* rhbz#759647 fixed tempname of hunSPELL.bak collides with other users
|
||||||
|
when multiple edits in one dir
|
||||||
|
|
||||||
|
2011-10-13 Caolán McNamara <caolanm at LibO>:
|
||||||
|
* moz#694002 crash in hunspell affixmgr on exit with bad .aff
|
||||||
|
* leak in hunspell affixmgr with bad .aff
|
||||||
|
|
||||||
|
2011-09-19 Caolán McNamara <caolanm at LibO>:
|
||||||
|
* make libparsers.a not installed thanks to Tomáš Chvátal
|
||||||
|
|
||||||
|
2011-06-23 Caolán McNamara <caolanm at LibO>:
|
||||||
|
* fix some windows compiler warnings
|
||||||
|
|
||||||
|
2011-05-24 Németh László <nemeth at numbertext dot org>:
|
||||||
|
* src/hunspell/affixmgr.*: allow twofold suffixes in compounds
|
||||||
|
by extended version of Arno Teigseth's patch, see hun#3288562.
|
||||||
|
- new option for this feature: COMPOUNDMORESUFFIXES
|
||||||
|
|
||||||
2011-02-16 Németh László <nemeth at numbertext dot org>:
|
2011-02-16 Németh László <nemeth at numbertext dot org>:
|
||||||
* src/*/Makefile.am: fix library versioning, the probem reported by
|
* src/*/Makefile.am: fix library versioning, the probem reported by
|
||||||
Rene Engerhald and Simon Brouwer.
|
Rene Engerhald and Simon Brouwer.
|
||||||
|
|
4
vendor/hunspell/NEWS
vendored
4
vendor/hunspell/NEWS
vendored
|
@ -1,3 +1,7 @@
|
||||||
|
2014-06-02: Hunspell 1.3.3 release:
|
||||||
|
- OpenDocument (ODF and Flat ODF) support (ODF needs unzip program)
|
||||||
|
- various bug fixes
|
||||||
|
|
||||||
2011-02-02: Hunspell 1.3.2 release:
|
2011-02-02: Hunspell 1.3.2 release:
|
||||||
- fix library versioning
|
- fix library versioning
|
||||||
- improved manual
|
- improved manual
|
||||||
|
|
9
vendor/hunspell/README
vendored
9
vendor/hunspell/README
vendored
|
@ -47,7 +47,7 @@ glibc-devel
|
||||||
|
|
||||||
optional developer packages:
|
optional developer packages:
|
||||||
|
|
||||||
ncurses (need for --with-ui)
|
ncurses (need for --with-ui), eg. libncursesw5 for UTF-8
|
||||||
readline (for fancy input line editing,
|
readline (for fancy input line editing,
|
||||||
configure parameter: --with-readline)
|
configure parameter: --with-readline)
|
||||||
locale and gettext (but you can also use the
|
locale and gettext (but you can also use the
|
||||||
|
@ -118,7 +118,7 @@ Documentation
|
||||||
-------------
|
-------------
|
||||||
|
|
||||||
features and dictionary format:
|
features and dictionary format:
|
||||||
man 4 hunspell
|
man 5 hunspell
|
||||||
|
|
||||||
man hunspell
|
man hunspell
|
||||||
hunspell -h
|
hunspell -h
|
||||||
|
@ -169,6 +169,9 @@ Dictionaries
|
||||||
------------
|
------------
|
||||||
|
|
||||||
Myspell & Hunspell dictionaries:
|
Myspell & Hunspell dictionaries:
|
||||||
|
http://extensions.libreoffice.org
|
||||||
|
http://cgit.freedesktop.org/libreoffice/dictionaries
|
||||||
|
http://extensions.openoffice.org
|
||||||
http://wiki.services.openoffice.org/wiki/Dictionaries
|
http://wiki.services.openoffice.org/wiki/Dictionaries
|
||||||
|
|
||||||
Aspell dictionaries (need some conversion):
|
Aspell dictionaries (need some conversion):
|
||||||
|
@ -176,4 +179,4 @@ ftp://ftp.gnu.org/gnu/aspell/dict
|
||||||
Conversion steps: see relevant feature request at http://hunspell.sf.net.
|
Conversion steps: see relevant feature request at http://hunspell.sf.net.
|
||||||
|
|
||||||
László Németh
|
László Németh
|
||||||
nemeth at OOo
|
nemeth at numbertext org
|
||||||
|
|
6
vendor/hunspell/THANKS
vendored
6
vendor/hunspell/THANKS
vendored
|
@ -12,6 +12,7 @@ Ingo H. de Boer
|
||||||
Simon Brouwer
|
Simon Brouwer
|
||||||
Jeppe Bundsgaard
|
Jeppe Bundsgaard
|
||||||
Ginn Chen
|
Ginn Chen
|
||||||
|
Tomáš Chvátal
|
||||||
Aaron Digulla
|
Aaron Digulla
|
||||||
Dmitri Gabinski
|
Dmitri Gabinski
|
||||||
Dvornik László
|
Dvornik László
|
||||||
|
@ -107,6 +108,9 @@ and others (see also AUTHORS.myspell)
|
||||||
FSF.hu Foundation
|
FSF.hu Foundation
|
||||||
http://www.fsf.hu
|
http://www.fsf.hu
|
||||||
|
|
||||||
|
LibreOffice community
|
||||||
|
http://www.libreoffice.org
|
||||||
|
|
||||||
MOKK Research Centre
|
MOKK Research Centre
|
||||||
Budapest University of Technology and Economics
|
Budapest University of Technology and Economics
|
||||||
Sociology and Communications Department
|
Sociology and Communications Department
|
||||||
|
@ -129,4 +133,4 @@ UHU-Linux Kft.
|
||||||
Thanks,
|
Thanks,
|
||||||
|
|
||||||
Németh László
|
Németh László
|
||||||
nemeth at OOo
|
nemeth at numbertext org
|
||||||
|
|
151
vendor/hunspell/src/hunspell/affentry.cxx
vendored
151
vendor/hunspell/src/hunspell/affentry.cxx
vendored
|
@ -9,13 +9,17 @@
|
||||||
#include "affentry.hxx"
|
#include "affentry.hxx"
|
||||||
#include "csutil.hxx"
|
#include "csutil.hxx"
|
||||||
|
|
||||||
|
#define MAXTEMPWORDLEN (MAXWORDUTF8LEN + 4)
|
||||||
|
|
||||||
PfxEntry::PfxEntry(AffixMgr* pmgr, affentry* dp)
|
PfxEntry::PfxEntry(AffixMgr* pmgr, affentry* dp)
|
||||||
|
// register affix manager
|
||||||
|
: pmyMgr(pmgr)
|
||||||
|
, next(NULL)
|
||||||
|
, nexteq(NULL)
|
||||||
|
, nextne(NULL)
|
||||||
|
, flgnxt(NULL)
|
||||||
{
|
{
|
||||||
// register affix manager
|
|
||||||
pmyMgr = pmgr;
|
|
||||||
|
|
||||||
// set up its initial values
|
// set up its initial values
|
||||||
|
|
||||||
aflag = dp->aflag; // flag
|
aflag = dp->aflag; // flag
|
||||||
strip = dp->strip; // string to strip
|
strip = dp->strip; // string to strip
|
||||||
appnd = dp->appnd; // string to append
|
appnd = dp->appnd; // string to append
|
||||||
|
@ -28,9 +32,6 @@ PfxEntry::PfxEntry(AffixMgr* pmgr, affentry* dp)
|
||||||
memcpy(c.conds, dp->c.l.conds1, MAXCONDLEN_1);
|
memcpy(c.conds, dp->c.l.conds1, MAXCONDLEN_1);
|
||||||
c.l.conds2 = dp->c.l.conds2;
|
c.l.conds2 = dp->c.l.conds2;
|
||||||
} else memcpy(c.conds, dp->c.conds, MAXCONDLEN);
|
} else memcpy(c.conds, dp->c.conds, MAXCONDLEN);
|
||||||
next = NULL;
|
|
||||||
nextne = NULL;
|
|
||||||
nexteq = NULL;
|
|
||||||
morphcode = dp->morphcode;
|
morphcode = dp->morphcode;
|
||||||
contclass = dp->contclass;
|
contclass = dp->contclass;
|
||||||
contclasslen = dp->contclasslen;
|
contclasslen = dp->contclasslen;
|
||||||
|
@ -53,16 +54,17 @@ PfxEntry::~PfxEntry()
|
||||||
// add prefix to this word assuming conditions hold
|
// add prefix to this word assuming conditions hold
|
||||||
char * PfxEntry::add(const char * word, int len)
|
char * PfxEntry::add(const char * word, int len)
|
||||||
{
|
{
|
||||||
char tword[MAXWORDUTF8LEN + 4];
|
char tword[MAXTEMPWORDLEN];
|
||||||
|
|
||||||
if ((len > stripl || (len == 0 && pmyMgr->get_fullstrip())) &&
|
if ((len > stripl || (len == 0 && pmyMgr->get_fullstrip())) &&
|
||||||
(len >= numconds) && test_condition(word) &&
|
(len >= numconds) && test_condition(word) &&
|
||||||
(!stripl || (strncmp(word, strip, stripl) == 0)) &&
|
(!stripl || (strncmp(word, strip, stripl) == 0)) &&
|
||||||
((MAXWORDUTF8LEN + 4) > (len + appndl - stripl))) {
|
((MAXTEMPWORDLEN) > (len + appndl - stripl))) {
|
||||||
/* we have a match so add prefix */
|
/* we have a match so add prefix */
|
||||||
char * pp = tword;
|
char * pp = tword;
|
||||||
if (appndl) {
|
if (appndl) {
|
||||||
strcpy(tword,appnd);
|
strncpy(tword, appnd, MAXTEMPWORDLEN-1);
|
||||||
|
tword[MAXTEMPWORDLEN-1] = '\0';
|
||||||
pp += appndl;
|
pp += appndl;
|
||||||
}
|
}
|
||||||
strcpy(pp, (word + stripl));
|
strcpy(pp, (word + stripl));
|
||||||
|
@ -110,13 +112,15 @@ inline int PfxEntry::test_condition(const char * st)
|
||||||
if (*st == '\0' && p) return 0; // word <= condition
|
if (*st == '\0' && p) return 0; // word <= condition
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case '.': if (!pos) { // dots are not metacharacters in groups: [.]
|
case '.':
|
||||||
|
if (!pos) { // dots are not metacharacters in groups: [.]
|
||||||
p = nextchar(p);
|
p = nextchar(p);
|
||||||
// skip the next character
|
// skip the next character
|
||||||
for (st++; (opts & aeUTF8) && (*st & 0xc0) == 0x80; st++);
|
for (st++; (opts & aeUTF8) && (*st & 0xc0) == 0x80; st++);
|
||||||
if (*st == '\0' && p) return 0; // word <= condition
|
if (*st == '\0' && p) return 0; // word <= condition
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
/* FALLTHROUGH */
|
||||||
default: {
|
default: {
|
||||||
if (*st == *p) {
|
if (*st == *p) {
|
||||||
st++;
|
st++;
|
||||||
|
@ -133,11 +137,11 @@ inline int PfxEntry::test_condition(const char * st)
|
||||||
}
|
}
|
||||||
if (pos && st != pos) {
|
if (pos && st != pos) {
|
||||||
ingroup = true;
|
ingroup = true;
|
||||||
while (p && *p != ']' && (p = nextchar(p)));
|
while (p && *p != ']' && ((p = nextchar(p)) != NULL));
|
||||||
}
|
}
|
||||||
} else if (pos) {
|
} else if (pos) {
|
||||||
ingroup = true;
|
ingroup = true;
|
||||||
while (p && *p != ']' && (p = nextchar(p)));
|
while (p && *p != ']' && ((p = nextchar(p)) != NULL));
|
||||||
}
|
}
|
||||||
} else if (pos) { // group
|
} else if (pos) { // group
|
||||||
p = nextchar(p);
|
p = nextchar(p);
|
||||||
|
@ -153,7 +157,7 @@ struct hentry * PfxEntry::checkword(const char * word, int len, char in_compound
|
||||||
{
|
{
|
||||||
int tmpl; // length of tmpword
|
int tmpl; // length of tmpword
|
||||||
struct hentry * he; // hash entry of root word or NULL
|
struct hentry * he; // hash entry of root word or NULL
|
||||||
char tmpword[MAXWORDUTF8LEN + 4];
|
char tmpword[MAXTEMPWORDLEN];
|
||||||
|
|
||||||
// on entry prefix is 0 length or already matches the beginning of the word.
|
// on entry prefix is 0 length or already matches the beginning of the word.
|
||||||
// So if the remaining root word has positive length
|
// So if the remaining root word has positive length
|
||||||
|
@ -167,7 +171,10 @@ struct hentry * PfxEntry::checkword(const char * word, int len, char in_compound
|
||||||
// generate new root word by removing prefix and adding
|
// generate new root word by removing prefix and adding
|
||||||
// back any characters that would have been stripped
|
// back any characters that would have been stripped
|
||||||
|
|
||||||
if (stripl) strcpy (tmpword, strip);
|
if (stripl) {
|
||||||
|
strncpy(tmpword, strip, MAXTEMPWORDLEN-1);
|
||||||
|
tmpword[MAXTEMPWORDLEN-1] = '\0';
|
||||||
|
}
|
||||||
strcpy ((tmpword + stripl), (word + appndl));
|
strcpy ((tmpword + stripl), (word + appndl));
|
||||||
|
|
||||||
// now make sure all of the conditions on characters
|
// now make sure all of the conditions on characters
|
||||||
|
@ -214,7 +221,7 @@ struct hentry * PfxEntry::check_twosfx(const char * word, int len,
|
||||||
{
|
{
|
||||||
int tmpl; // length of tmpword
|
int tmpl; // length of tmpword
|
||||||
struct hentry * he; // hash entry of root word or NULL
|
struct hentry * he; // hash entry of root word or NULL
|
||||||
char tmpword[MAXWORDUTF8LEN + 4];
|
char tmpword[MAXTEMPWORDLEN];
|
||||||
|
|
||||||
// on entry prefix is 0 length or already matches the beginning of the word.
|
// on entry prefix is 0 length or already matches the beginning of the word.
|
||||||
// So if the remaining root word has positive length
|
// So if the remaining root word has positive length
|
||||||
|
@ -229,7 +236,10 @@ struct hentry * PfxEntry::check_twosfx(const char * word, int len,
|
||||||
// generate new root word by removing prefix and adding
|
// generate new root word by removing prefix and adding
|
||||||
// back any characters that would have been stripped
|
// back any characters that would have been stripped
|
||||||
|
|
||||||
if (stripl) strcpy (tmpword, strip);
|
if (stripl) {
|
||||||
|
strncpy(tmpword, strip, MAXTEMPWORDLEN-1);
|
||||||
|
tmpword[MAXTEMPWORDLEN-1] = '\0';
|
||||||
|
}
|
||||||
strcpy ((tmpword + stripl), (word + appndl));
|
strcpy ((tmpword + stripl), (word + appndl));
|
||||||
|
|
||||||
// now make sure all of the conditions on characters
|
// now make sure all of the conditions on characters
|
||||||
|
@ -261,7 +271,7 @@ char * PfxEntry::check_twosfx_morph(const char * word, int len,
|
||||||
char in_compound, const FLAG needflag)
|
char in_compound, const FLAG needflag)
|
||||||
{
|
{
|
||||||
int tmpl; // length of tmpword
|
int tmpl; // length of tmpword
|
||||||
char tmpword[MAXWORDUTF8LEN + 4];
|
char tmpword[MAXTEMPWORDLEN];
|
||||||
|
|
||||||
// on entry prefix is 0 length or already matches the beginning of the word.
|
// on entry prefix is 0 length or already matches the beginning of the word.
|
||||||
// So if the remaining root word has positive length
|
// So if the remaining root word has positive length
|
||||||
|
@ -276,7 +286,10 @@ char * PfxEntry::check_twosfx_morph(const char * word, int len,
|
||||||
// generate new root word by removing prefix and adding
|
// generate new root word by removing prefix and adding
|
||||||
// back any characters that would have been stripped
|
// back any characters that would have been stripped
|
||||||
|
|
||||||
if (stripl) strcpy (tmpword, strip);
|
if (stripl) {
|
||||||
|
strncpy(tmpword, strip, MAXTEMPWORDLEN-1);
|
||||||
|
tmpword[MAXTEMPWORDLEN-1] = '\0';
|
||||||
|
}
|
||||||
strcpy ((tmpword + stripl), (word + appndl));
|
strcpy ((tmpword + stripl), (word + appndl));
|
||||||
|
|
||||||
// now make sure all of the conditions on characters
|
// now make sure all of the conditions on characters
|
||||||
|
@ -308,7 +321,7 @@ char * PfxEntry::check_morph(const char * word, int len, char in_compound, const
|
||||||
{
|
{
|
||||||
int tmpl; // length of tmpword
|
int tmpl; // length of tmpword
|
||||||
struct hentry * he; // hash entry of root word or NULL
|
struct hentry * he; // hash entry of root word or NULL
|
||||||
char tmpword[MAXWORDUTF8LEN + 4];
|
char tmpword[MAXTEMPWORDLEN];
|
||||||
char result[MAXLNLEN];
|
char result[MAXLNLEN];
|
||||||
char * st;
|
char * st;
|
||||||
|
|
||||||
|
@ -327,7 +340,10 @@ char * PfxEntry::check_morph(const char * word, int len, char in_compound, const
|
||||||
// generate new root word by removing prefix and adding
|
// generate new root word by removing prefix and adding
|
||||||
// back any characters that would have been stripped
|
// back any characters that would have been stripped
|
||||||
|
|
||||||
if (stripl) strcpy (tmpword, strip);
|
if (stripl) {
|
||||||
|
strncpy(tmpword, strip, MAXTEMPWORDLEN-1);
|
||||||
|
tmpword[MAXTEMPWORDLEN-1] = '\0';
|
||||||
|
}
|
||||||
strcpy ((tmpword + stripl), (word + appndl));
|
strcpy ((tmpword + stripl), (word + appndl));
|
||||||
|
|
||||||
// now make sure all of the conditions on characters
|
// now make sure all of the conditions on characters
|
||||||
|
@ -395,10 +411,15 @@ char * PfxEntry::check_morph(const char * word, int len, char in_compound, const
|
||||||
}
|
}
|
||||||
|
|
||||||
SfxEntry::SfxEntry(AffixMgr * pmgr, affentry* dp)
|
SfxEntry::SfxEntry(AffixMgr * pmgr, affentry* dp)
|
||||||
|
: pmyMgr(pmgr) // register affix manager
|
||||||
|
, next(NULL)
|
||||||
|
, nexteq(NULL)
|
||||||
|
, nextne(NULL)
|
||||||
|
, flgnxt(NULL)
|
||||||
|
, l_morph(NULL)
|
||||||
|
, r_morph(NULL)
|
||||||
|
, eq_morph(NULL)
|
||||||
{
|
{
|
||||||
// register affix manager
|
|
||||||
pmyMgr = pmgr;
|
|
||||||
|
|
||||||
// set up its initial values
|
// set up its initial values
|
||||||
aflag = dp->aflag; // char flag
|
aflag = dp->aflag; // char flag
|
||||||
strip = dp->strip; // string to strip
|
strip = dp->strip; // string to strip
|
||||||
|
@ -413,7 +434,6 @@ SfxEntry::SfxEntry(AffixMgr * pmgr, affentry* dp)
|
||||||
memcpy(c.l.conds1, dp->c.l.conds1, MAXCONDLEN_1);
|
memcpy(c.l.conds1, dp->c.l.conds1, MAXCONDLEN_1);
|
||||||
c.l.conds2 = dp->c.l.conds2;
|
c.l.conds2 = dp->c.l.conds2;
|
||||||
} else memcpy(c.conds, dp->c.conds, MAXCONDLEN);
|
} else memcpy(c.conds, dp->c.conds, MAXCONDLEN);
|
||||||
|
|
||||||
rappnd = myrevstrdup(appnd);
|
rappnd = myrevstrdup(appnd);
|
||||||
morphcode = dp->morphcode;
|
morphcode = dp->morphcode;
|
||||||
contclass = dp->contclass;
|
contclass = dp->contclass;
|
||||||
|
@ -438,15 +458,16 @@ SfxEntry::~SfxEntry()
|
||||||
// add suffix to this word assuming conditions hold
|
// add suffix to this word assuming conditions hold
|
||||||
char * SfxEntry::add(const char * word, int len)
|
char * SfxEntry::add(const char * word, int len)
|
||||||
{
|
{
|
||||||
char tword[MAXWORDUTF8LEN + 4];
|
char tword[MAXTEMPWORDLEN];
|
||||||
|
|
||||||
/* make sure all conditions match */
|
/* make sure all conditions match */
|
||||||
if ((len > stripl || (len == 0 && pmyMgr->get_fullstrip())) &&
|
if ((len > stripl || (len == 0 && pmyMgr->get_fullstrip())) &&
|
||||||
(len >= numconds) && test_condition(word + len, word) &&
|
(len >= numconds) && test_condition(word + len, word) &&
|
||||||
(!stripl || (strcmp(word + len - stripl, strip) == 0)) &&
|
(!stripl || (strcmp(word + len - stripl, strip) == 0)) &&
|
||||||
((MAXWORDUTF8LEN + 4) > (len + appndl - stripl))) {
|
((MAXTEMPWORDLEN) > (len + appndl - stripl))) {
|
||||||
/* we have a match so add suffix */
|
/* we have a match so add suffix */
|
||||||
strcpy(tword,word);
|
strncpy(tword, word, MAXTEMPWORDLEN-1);
|
||||||
|
tword[MAXTEMPWORDLEN-1] = '\0';
|
||||||
if (appndl) {
|
if (appndl) {
|
||||||
strcpy(tword + len - stripl, appnd);
|
strcpy(tword + len - stripl, appnd);
|
||||||
} else {
|
} else {
|
||||||
|
@ -481,24 +502,37 @@ inline int SfxEntry::test_condition(const char * st, const char * beg)
|
||||||
int i = 1;
|
int i = 1;
|
||||||
while (1) {
|
while (1) {
|
||||||
switch (*p) {
|
switch (*p) {
|
||||||
case '\0': return 1;
|
case '\0':
|
||||||
case '[': { p = nextchar(p); pos = st; break; }
|
return 1;
|
||||||
case '^': { p = nextchar(p); neg = true; break; }
|
case '[':
|
||||||
case ']': { if (!neg && !ingroup) return 0;
|
p = nextchar(p);
|
||||||
i++;
|
pos = st;
|
||||||
// skip the next character
|
break;
|
||||||
if (!ingroup) {
|
case '^':
|
||||||
for (; (opts & aeUTF8) && (st >= beg) && (*st & 0xc0) == 0x80; st--);
|
p = nextchar(p);
|
||||||
st--;
|
neg = true;
|
||||||
}
|
break;
|
||||||
pos = NULL;
|
case ']':
|
||||||
neg = false;
|
if (!neg && !ingroup)
|
||||||
ingroup = false;
|
return 0;
|
||||||
p = nextchar(p);
|
i++;
|
||||||
if (st < beg && p) return 0; // word <= condition
|
// skip the next character
|
||||||
break;
|
if (!ingroup)
|
||||||
}
|
{
|
||||||
case '.': if (!pos) { // dots are not metacharacters in groups: [.]
|
for (; (opts & aeUTF8) && (st >= beg) && (*st & 0xc0) == 0x80; st--);
|
||||||
|
st--;
|
||||||
|
}
|
||||||
|
pos = NULL;
|
||||||
|
neg = false;
|
||||||
|
ingroup = false;
|
||||||
|
p = nextchar(p);
|
||||||
|
if (st < beg && p)
|
||||||
|
return 0; // word <= condition
|
||||||
|
break;
|
||||||
|
case '.':
|
||||||
|
if (!pos)
|
||||||
|
{
|
||||||
|
// dots are not metacharacters in groups: [.]
|
||||||
p = nextchar(p);
|
p = nextchar(p);
|
||||||
// skip the next character
|
// skip the next character
|
||||||
for (st--; (opts & aeUTF8) && (st >= beg) && (*st & 0xc0) == 0x80; st--);
|
for (st--; (opts & aeUTF8) && (st >= beg) && (*st & 0xc0) == 0x80; st--);
|
||||||
|
@ -513,6 +547,7 @@ inline int SfxEntry::test_condition(const char * st, const char * beg)
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
/* FALLTHROUGH */
|
||||||
default: {
|
default: {
|
||||||
if (*st == *p) {
|
if (*st == *p) {
|
||||||
p = nextchar(p);
|
p = nextchar(p);
|
||||||
|
@ -533,7 +568,7 @@ inline int SfxEntry::test_condition(const char * st, const char * beg)
|
||||||
if (neg) return 0;
|
if (neg) return 0;
|
||||||
else if (i == numconds) return 1;
|
else if (i == numconds) return 1;
|
||||||
ingroup = true;
|
ingroup = true;
|
||||||
while (p && *p != ']' && (p = nextchar(p)));
|
while (p && *p != ']' && ((p = nextchar(p)) != NULL));
|
||||||
st--;
|
st--;
|
||||||
}
|
}
|
||||||
if (p && *p != ']') p = nextchar(p);
|
if (p && *p != ']') p = nextchar(p);
|
||||||
|
@ -541,7 +576,7 @@ inline int SfxEntry::test_condition(const char * st, const char * beg)
|
||||||
if (neg) return 0;
|
if (neg) return 0;
|
||||||
else if (i == numconds) return 1;
|
else if (i == numconds) return 1;
|
||||||
ingroup = true;
|
ingroup = true;
|
||||||
while (p && *p != ']' && (p = nextchar(p)));
|
while (p && *p != ']' && ((p = nextchar(p)) != NULL));
|
||||||
// if (p && *p != ']') p = nextchar(p);
|
// if (p && *p != ']') p = nextchar(p);
|
||||||
st--;
|
st--;
|
||||||
}
|
}
|
||||||
|
@ -567,7 +602,7 @@ struct hentry * SfxEntry::checkword(const char * word, int len, int optflags,
|
||||||
int tmpl; // length of tmpword
|
int tmpl; // length of tmpword
|
||||||
struct hentry * he; // hash entry pointer
|
struct hentry * he; // hash entry pointer
|
||||||
unsigned char * cp;
|
unsigned char * cp;
|
||||||
char tmpword[MAXWORDUTF8LEN + 4];
|
char tmpword[MAXTEMPWORDLEN];
|
||||||
PfxEntry* ep = ppfx;
|
PfxEntry* ep = ppfx;
|
||||||
|
|
||||||
// if this suffix is being cross checked with a prefix
|
// if this suffix is being cross checked with a prefix
|
||||||
|
@ -592,7 +627,8 @@ struct hentry * SfxEntry::checkword(const char * word, int len, int optflags,
|
||||||
// back any characters that would have been stripped or
|
// back any characters that would have been stripped or
|
||||||
// or null terminating the shorter string
|
// or null terminating the shorter string
|
||||||
|
|
||||||
strcpy (tmpword, word);
|
strncpy (tmpword, word, MAXTEMPWORDLEN-1);
|
||||||
|
tmpword[MAXTEMPWORDLEN-1] = '\0';
|
||||||
cp = (unsigned char *)(tmpword + tmpl);
|
cp = (unsigned char *)(tmpword + tmpl);
|
||||||
if (stripl) {
|
if (stripl) {
|
||||||
strcpy ((char *)cp, strip);
|
strcpy ((char *)cp, strip);
|
||||||
|
@ -645,7 +681,10 @@ struct hentry * SfxEntry::checkword(const char * word, int len, int optflags,
|
||||||
} else if (wlst && (*ns < maxSug)) {
|
} else if (wlst && (*ns < maxSug)) {
|
||||||
int cwrd = 1;
|
int cwrd = 1;
|
||||||
for (int k=0; k < *ns; k++)
|
for (int k=0; k < *ns; k++)
|
||||||
if (strcmp(tmpword, wlst[k]) == 0) cwrd = 0;
|
if (strcmp(tmpword, wlst[k]) == 0) {
|
||||||
|
cwrd = 0;
|
||||||
|
break;
|
||||||
|
}
|
||||||
if (cwrd) {
|
if (cwrd) {
|
||||||
wlst[*ns] = mystrdup(tmpword);
|
wlst[*ns] = mystrdup(tmpword);
|
||||||
if (wlst[*ns] == NULL) {
|
if (wlst[*ns] == NULL) {
|
||||||
|
@ -668,7 +707,7 @@ struct hentry * SfxEntry::check_twosfx(const char * word, int len, int optflags,
|
||||||
int tmpl; // length of tmpword
|
int tmpl; // length of tmpword
|
||||||
struct hentry * he; // hash entry pointer
|
struct hentry * he; // hash entry pointer
|
||||||
unsigned char * cp;
|
unsigned char * cp;
|
||||||
char tmpword[MAXWORDUTF8LEN + 4];
|
char tmpword[MAXTEMPWORDLEN];
|
||||||
PfxEntry* ep = ppfx;
|
PfxEntry* ep = ppfx;
|
||||||
|
|
||||||
|
|
||||||
|
@ -692,7 +731,8 @@ struct hentry * SfxEntry::check_twosfx(const char * word, int len, int optflags,
|
||||||
// back any characters that would have been stripped or
|
// back any characters that would have been stripped or
|
||||||
// or null terminating the shorter string
|
// or null terminating the shorter string
|
||||||
|
|
||||||
strcpy (tmpword, word);
|
strncpy(tmpword, word, MAXTEMPWORDLEN-1);
|
||||||
|
tmpword[MAXTEMPWORDLEN-1] = '\0';
|
||||||
cp = (unsigned char *)(tmpword + tmpl);
|
cp = (unsigned char *)(tmpword + tmpl);
|
||||||
if (stripl) {
|
if (stripl) {
|
||||||
strcpy ((char *)cp, strip);
|
strcpy ((char *)cp, strip);
|
||||||
|
@ -729,7 +769,7 @@ char * SfxEntry::check_twosfx_morph(const char * word, int len, int optflags,
|
||||||
{
|
{
|
||||||
int tmpl; // length of tmpword
|
int tmpl; // length of tmpword
|
||||||
unsigned char * cp;
|
unsigned char * cp;
|
||||||
char tmpword[MAXWORDUTF8LEN + 4];
|
char tmpword[MAXTEMPWORDLEN];
|
||||||
PfxEntry* ep = ppfx;
|
PfxEntry* ep = ppfx;
|
||||||
char * st;
|
char * st;
|
||||||
|
|
||||||
|
@ -757,7 +797,8 @@ char * SfxEntry::check_twosfx_morph(const char * word, int len, int optflags,
|
||||||
// back any characters that would have been stripped or
|
// back any characters that would have been stripped or
|
||||||
// or null terminating the shorter string
|
// or null terminating the shorter string
|
||||||
|
|
||||||
strcpy (tmpword, word);
|
strncpy(tmpword, word, MAXTEMPWORDLEN-1);
|
||||||
|
tmpword[MAXTEMPWORDLEN-1] = '\0';
|
||||||
cp = (unsigned char *)(tmpword + tmpl);
|
cp = (unsigned char *)(tmpword + tmpl);
|
||||||
if (stripl) {
|
if (stripl) {
|
||||||
strcpy ((char *)cp, strip);
|
strcpy ((char *)cp, strip);
|
||||||
|
|
8
vendor/hunspell/src/hunspell/affentry.hxx
vendored
8
vendor/hunspell/src/hunspell/affentry.hxx
vendored
|
@ -11,6 +11,10 @@
|
||||||
|
|
||||||
class LIBHUNSPELL_DLL_EXPORTED PfxEntry : protected AffEntry
|
class LIBHUNSPELL_DLL_EXPORTED PfxEntry : protected AffEntry
|
||||||
{
|
{
|
||||||
|
private:
|
||||||
|
PfxEntry(const PfxEntry&);
|
||||||
|
PfxEntry& operator = (const PfxEntry&);
|
||||||
|
private:
|
||||||
AffixMgr* pmyMgr;
|
AffixMgr* pmyMgr;
|
||||||
|
|
||||||
PfxEntry * next;
|
PfxEntry * next;
|
||||||
|
@ -67,6 +71,10 @@ public:
|
||||||
|
|
||||||
class LIBHUNSPELL_DLL_EXPORTED SfxEntry : protected AffEntry
|
class LIBHUNSPELL_DLL_EXPORTED SfxEntry : protected AffEntry
|
||||||
{
|
{
|
||||||
|
private:
|
||||||
|
SfxEntry(const SfxEntry&);
|
||||||
|
SfxEntry& operator = (const SfxEntry&);
|
||||||
|
private:
|
||||||
AffixMgr* pmyMgr;
|
AffixMgr* pmyMgr;
|
||||||
char * rappnd;
|
char * rappnd;
|
||||||
|
|
||||||
|
|
145
vendor/hunspell/src/hunspell/affixmgr.cxx
vendored
145
vendor/hunspell/src/hunspell/affixmgr.cxx
vendored
|
@ -48,6 +48,7 @@ AffixMgr::AffixMgr(const char * affpath, HashMgr** ptr, int * md, const char * k
|
||||||
compoundroot = FLAG_NULL; // compound word signing flag
|
compoundroot = FLAG_NULL; // compound word signing flag
|
||||||
compoundpermitflag = FLAG_NULL; // compound permitting flag for suffixed word
|
compoundpermitflag = FLAG_NULL; // compound permitting flag for suffixed word
|
||||||
compoundforbidflag = FLAG_NULL; // compound fordidden flag for suffixed word
|
compoundforbidflag = FLAG_NULL; // compound fordidden flag for suffixed word
|
||||||
|
compoundmoresuffixes = 0; // allow more suffixes within compound words
|
||||||
checkcompounddup = 0; // forbid double words in compounds
|
checkcompounddup = 0; // forbid double words in compounds
|
||||||
checkcompoundrep = 0; // forbid bad compounds (may be non compound word with a REP substitution)
|
checkcompoundrep = 0; // forbid bad compounds (may be non compound word with a REP substitution)
|
||||||
checkcompoundcase = 0; // forbid upper and lowercase combinations at word bounds
|
checkcompoundcase = 0; // forbid upper and lowercase combinations at word bounds
|
||||||
|
@ -253,6 +254,14 @@ AffixMgr::~AffixMgr()
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void AffixMgr::finishFileMgr(FileMgr *afflst)
|
||||||
|
{
|
||||||
|
delete afflst;
|
||||||
|
|
||||||
|
// convert affix trees to sorted list
|
||||||
|
process_pfx_tree_to_list();
|
||||||
|
process_sfx_tree_to_list();
|
||||||
|
}
|
||||||
|
|
||||||
// read in aff file and build up prefix and suffix entry objects
|
// read in aff file and build up prefix and suffix entry objects
|
||||||
int AffixMgr::parse_file(const char * affpath, const char * key)
|
int AffixMgr::parse_file(const char * affpath, const char * key)
|
||||||
|
@ -279,7 +288,7 @@ int AffixMgr::parse_file(const char * affpath, const char * key)
|
||||||
|
|
||||||
// read in each line ignoring any that do not
|
// read in each line ignoring any that do not
|
||||||
// start with a known line type indicator
|
// start with a known line type indicator
|
||||||
while ((line = afflst->getline())) {
|
while ((line = afflst->getline()) != NULL) {
|
||||||
mychomp(line);
|
mychomp(line);
|
||||||
|
|
||||||
/* remove byte order mark */
|
/* remove byte order mark */
|
||||||
|
@ -294,7 +303,7 @@ int AffixMgr::parse_file(const char * affpath, const char * key)
|
||||||
/* parse in the keyboard string */
|
/* parse in the keyboard string */
|
||||||
if (strncmp(line,"KEY",3) == 0) {
|
if (strncmp(line,"KEY",3) == 0) {
|
||||||
if (parse_string(line, &keystring, afflst->getlinenum())) {
|
if (parse_string(line, &keystring, afflst->getlinenum())) {
|
||||||
delete afflst;
|
finishFileMgr(afflst);
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -302,7 +311,7 @@ int AffixMgr::parse_file(const char * affpath, const char * key)
|
||||||
/* parse in the try string */
|
/* parse in the try string */
|
||||||
if (strncmp(line,"TRY",3) == 0) {
|
if (strncmp(line,"TRY",3) == 0) {
|
||||||
if (parse_string(line, &trystring, afflst->getlinenum())) {
|
if (parse_string(line, &trystring, afflst->getlinenum())) {
|
||||||
delete afflst;
|
finishFileMgr(afflst);
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -310,7 +319,7 @@ int AffixMgr::parse_file(const char * affpath, const char * key)
|
||||||
/* parse in the name of the character set used by the .dict and .aff */
|
/* parse in the name of the character set used by the .dict and .aff */
|
||||||
if (strncmp(line,"SET",3) == 0) {
|
if (strncmp(line,"SET",3) == 0) {
|
||||||
if (parse_string(line, &encoding, afflst->getlinenum())) {
|
if (parse_string(line, &encoding, afflst->getlinenum())) {
|
||||||
delete afflst;
|
finishFileMgr(afflst);
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
if (strcmp(encoding, "UTF-8") == 0) {
|
if (strcmp(encoding, "UTF-8") == 0) {
|
||||||
|
@ -330,7 +339,7 @@ int AffixMgr::parse_file(const char * affpath, const char * key)
|
||||||
/* parse in the flag used by the controlled compound words */
|
/* parse in the flag used by the controlled compound words */
|
||||||
if (strncmp(line,"COMPOUNDFLAG",12) == 0) {
|
if (strncmp(line,"COMPOUNDFLAG",12) == 0) {
|
||||||
if (parse_flag(line, &compoundflag, afflst)) {
|
if (parse_flag(line, &compoundflag, afflst)) {
|
||||||
delete afflst;
|
finishFileMgr(afflst);
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -339,12 +348,12 @@ int AffixMgr::parse_file(const char * affpath, const char * key)
|
||||||
if (strncmp(line,"COMPOUNDBEGIN",13) == 0) {
|
if (strncmp(line,"COMPOUNDBEGIN",13) == 0) {
|
||||||
if (complexprefixes) {
|
if (complexprefixes) {
|
||||||
if (parse_flag(line, &compoundend, afflst)) {
|
if (parse_flag(line, &compoundend, afflst)) {
|
||||||
delete afflst;
|
finishFileMgr(afflst);
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
if (parse_flag(line, &compoundbegin, afflst)) {
|
if (parse_flag(line, &compoundbegin, afflst)) {
|
||||||
delete afflst;
|
finishFileMgr(afflst);
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -353,7 +362,7 @@ int AffixMgr::parse_file(const char * affpath, const char * key)
|
||||||
/* parse in the flag used by compound words */
|
/* parse in the flag used by compound words */
|
||||||
if (strncmp(line,"COMPOUNDMIDDLE",14) == 0) {
|
if (strncmp(line,"COMPOUNDMIDDLE",14) == 0) {
|
||||||
if (parse_flag(line, &compoundmiddle, afflst)) {
|
if (parse_flag(line, &compoundmiddle, afflst)) {
|
||||||
delete afflst;
|
finishFileMgr(afflst);
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -361,12 +370,12 @@ int AffixMgr::parse_file(const char * affpath, const char * key)
|
||||||
if (strncmp(line,"COMPOUNDEND",11) == 0) {
|
if (strncmp(line,"COMPOUNDEND",11) == 0) {
|
||||||
if (complexprefixes) {
|
if (complexprefixes) {
|
||||||
if (parse_flag(line, &compoundbegin, afflst)) {
|
if (parse_flag(line, &compoundbegin, afflst)) {
|
||||||
delete afflst;
|
finishFileMgr(afflst);
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
if (parse_flag(line, &compoundend, afflst)) {
|
if (parse_flag(line, &compoundend, afflst)) {
|
||||||
delete afflst;
|
finishFileMgr(afflst);
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -375,7 +384,7 @@ int AffixMgr::parse_file(const char * affpath, const char * key)
|
||||||
/* parse in the data used by compound_check() method */
|
/* parse in the data used by compound_check() method */
|
||||||
if (strncmp(line,"COMPOUNDWORDMAX",15) == 0) {
|
if (strncmp(line,"COMPOUNDWORDMAX",15) == 0) {
|
||||||
if (parse_num(line, &cpdwordmax, afflst)) {
|
if (parse_num(line, &cpdwordmax, afflst)) {
|
||||||
delete afflst;
|
finishFileMgr(afflst);
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -383,7 +392,7 @@ int AffixMgr::parse_file(const char * affpath, const char * key)
|
||||||
/* parse in the flag sign compounds in dictionary */
|
/* parse in the flag sign compounds in dictionary */
|
||||||
if (strncmp(line,"COMPOUNDROOT",12) == 0) {
|
if (strncmp(line,"COMPOUNDROOT",12) == 0) {
|
||||||
if (parse_flag(line, &compoundroot, afflst)) {
|
if (parse_flag(line, &compoundroot, afflst)) {
|
||||||
delete afflst;
|
finishFileMgr(afflst);
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -391,7 +400,7 @@ int AffixMgr::parse_file(const char * affpath, const char * key)
|
||||||
/* parse in the flag used by compound_check() method */
|
/* parse in the flag used by compound_check() method */
|
||||||
if (strncmp(line,"COMPOUNDPERMITFLAG",18) == 0) {
|
if (strncmp(line,"COMPOUNDPERMITFLAG",18) == 0) {
|
||||||
if (parse_flag(line, &compoundpermitflag, afflst)) {
|
if (parse_flag(line, &compoundpermitflag, afflst)) {
|
||||||
delete afflst;
|
finishFileMgr(afflst);
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -399,11 +408,15 @@ int AffixMgr::parse_file(const char * affpath, const char * key)
|
||||||
/* parse in the flag used by compound_check() method */
|
/* parse in the flag used by compound_check() method */
|
||||||
if (strncmp(line,"COMPOUNDFORBIDFLAG",18) == 0) {
|
if (strncmp(line,"COMPOUNDFORBIDFLAG",18) == 0) {
|
||||||
if (parse_flag(line, &compoundforbidflag, afflst)) {
|
if (parse_flag(line, &compoundforbidflag, afflst)) {
|
||||||
delete afflst;
|
finishFileMgr(afflst);
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (strncmp(line,"COMPOUNDMORESUFFIXES",20) == 0) {
|
||||||
|
compoundmoresuffixes = 1;
|
||||||
|
}
|
||||||
|
|
||||||
if (strncmp(line,"CHECKCOMPOUNDDUP",16) == 0) {
|
if (strncmp(line,"CHECKCOMPOUNDDUP",16) == 0) {
|
||||||
checkcompounddup = 1;
|
checkcompounddup = 1;
|
||||||
}
|
}
|
||||||
|
@ -426,14 +439,14 @@ int AffixMgr::parse_file(const char * affpath, const char * key)
|
||||||
|
|
||||||
if (strncmp(line,"NOSUGGEST",9) == 0) {
|
if (strncmp(line,"NOSUGGEST",9) == 0) {
|
||||||
if (parse_flag(line, &nosuggest, afflst)) {
|
if (parse_flag(line, &nosuggest, afflst)) {
|
||||||
delete afflst;
|
finishFileMgr(afflst);
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (strncmp(line,"NONGRAMSUGGEST",14) == 0) {
|
if (strncmp(line,"NONGRAMSUGGEST",14) == 0) {
|
||||||
if (parse_flag(line, &nongramsuggest, afflst)) {
|
if (parse_flag(line, &nongramsuggest, afflst)) {
|
||||||
delete afflst;
|
finishFileMgr(afflst);
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -441,7 +454,7 @@ int AffixMgr::parse_file(const char * affpath, const char * key)
|
||||||
/* parse in the flag used by forbidden words */
|
/* parse in the flag used by forbidden words */
|
||||||
if (strncmp(line,"FORBIDDENWORD",13) == 0) {
|
if (strncmp(line,"FORBIDDENWORD",13) == 0) {
|
||||||
if (parse_flag(line, &forbiddenword, afflst)) {
|
if (parse_flag(line, &forbiddenword, afflst)) {
|
||||||
delete afflst;
|
finishFileMgr(afflst);
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -449,7 +462,7 @@ int AffixMgr::parse_file(const char * affpath, const char * key)
|
||||||
/* parse in the flag used by forbidden words */
|
/* parse in the flag used by forbidden words */
|
||||||
if (strncmp(line,"LEMMA_PRESENT",13) == 0) {
|
if (strncmp(line,"LEMMA_PRESENT",13) == 0) {
|
||||||
if (parse_flag(line, &lemma_present, afflst)) {
|
if (parse_flag(line, &lemma_present, afflst)) {
|
||||||
delete afflst;
|
finishFileMgr(afflst);
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -457,7 +470,7 @@ int AffixMgr::parse_file(const char * affpath, const char * key)
|
||||||
/* parse in the flag used by circumfixes */
|
/* parse in the flag used by circumfixes */
|
||||||
if (strncmp(line,"CIRCUMFIX",9) == 0) {
|
if (strncmp(line,"CIRCUMFIX",9) == 0) {
|
||||||
if (parse_flag(line, &circumfix, afflst)) {
|
if (parse_flag(line, &circumfix, afflst)) {
|
||||||
delete afflst;
|
finishFileMgr(afflst);
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -465,7 +478,7 @@ int AffixMgr::parse_file(const char * affpath, const char * key)
|
||||||
/* parse in the flag used by fogemorphemes */
|
/* parse in the flag used by fogemorphemes */
|
||||||
if (strncmp(line,"ONLYINCOMPOUND",14) == 0) {
|
if (strncmp(line,"ONLYINCOMPOUND",14) == 0) {
|
||||||
if (parse_flag(line, &onlyincompound, afflst)) {
|
if (parse_flag(line, &onlyincompound, afflst)) {
|
||||||
delete afflst;
|
finishFileMgr(afflst);
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -473,7 +486,7 @@ int AffixMgr::parse_file(const char * affpath, const char * key)
|
||||||
/* parse in the flag used by `needaffixs' */
|
/* parse in the flag used by `needaffixs' */
|
||||||
if (strncmp(line,"PSEUDOROOT",10) == 0) {
|
if (strncmp(line,"PSEUDOROOT",10) == 0) {
|
||||||
if (parse_flag(line, &needaffix, afflst)) {
|
if (parse_flag(line, &needaffix, afflst)) {
|
||||||
delete afflst;
|
finishFileMgr(afflst);
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -481,7 +494,7 @@ int AffixMgr::parse_file(const char * affpath, const char * key)
|
||||||
/* parse in the flag used by `needaffixs' */
|
/* parse in the flag used by `needaffixs' */
|
||||||
if (strncmp(line,"NEEDAFFIX",9) == 0) {
|
if (strncmp(line,"NEEDAFFIX",9) == 0) {
|
||||||
if (parse_flag(line, &needaffix, afflst)) {
|
if (parse_flag(line, &needaffix, afflst)) {
|
||||||
delete afflst;
|
finishFileMgr(afflst);
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -489,7 +502,7 @@ int AffixMgr::parse_file(const char * affpath, const char * key)
|
||||||
/* parse in the minimal length for words in compounds */
|
/* parse in the minimal length for words in compounds */
|
||||||
if (strncmp(line,"COMPOUNDMIN",11) == 0) {
|
if (strncmp(line,"COMPOUNDMIN",11) == 0) {
|
||||||
if (parse_num(line, &cpdmin, afflst)) {
|
if (parse_num(line, &cpdmin, afflst)) {
|
||||||
delete afflst;
|
finishFileMgr(afflst);
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
if (cpdmin < 1) cpdmin = 1;
|
if (cpdmin < 1) cpdmin = 1;
|
||||||
|
@ -498,7 +511,7 @@ int AffixMgr::parse_file(const char * affpath, const char * key)
|
||||||
/* parse in the max. words and syllables in compounds */
|
/* parse in the max. words and syllables in compounds */
|
||||||
if (strncmp(line,"COMPOUNDSYLLABLE",16) == 0) {
|
if (strncmp(line,"COMPOUNDSYLLABLE",16) == 0) {
|
||||||
if (parse_cpdsyllable(line, afflst)) {
|
if (parse_cpdsyllable(line, afflst)) {
|
||||||
delete afflst;
|
finishFileMgr(afflst);
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -506,7 +519,7 @@ int AffixMgr::parse_file(const char * affpath, const char * key)
|
||||||
/* parse in the flag used by compound_check() method */
|
/* parse in the flag used by compound_check() method */
|
||||||
if (strncmp(line,"SYLLABLENUM",11) == 0) {
|
if (strncmp(line,"SYLLABLENUM",11) == 0) {
|
||||||
if (parse_string(line, &cpdsyllablenum, afflst->getlinenum())) {
|
if (parse_string(line, &cpdsyllablenum, afflst->getlinenum())) {
|
||||||
delete afflst;
|
finishFileMgr(afflst);
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -519,7 +532,7 @@ int AffixMgr::parse_file(const char * affpath, const char * key)
|
||||||
/* parse in the extra word characters */
|
/* parse in the extra word characters */
|
||||||
if (strncmp(line,"WORDCHARS",9) == 0) {
|
if (strncmp(line,"WORDCHARS",9) == 0) {
|
||||||
if (parse_array(line, &wordchars, &wordchars_utf16, &wordchars_utf16_len, utf8, afflst->getlinenum())) {
|
if (parse_array(line, &wordchars, &wordchars_utf16, &wordchars_utf16_len, utf8, afflst->getlinenum())) {
|
||||||
delete afflst;
|
finishFileMgr(afflst);
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -527,7 +540,7 @@ int AffixMgr::parse_file(const char * affpath, const char * key)
|
||||||
/* parse in the ignored characters (for example, Arabic optional diacretics charachters */
|
/* parse in the ignored characters (for example, Arabic optional diacretics charachters */
|
||||||
if (strncmp(line,"IGNORE",6) == 0) {
|
if (strncmp(line,"IGNORE",6) == 0) {
|
||||||
if (parse_array(line, &ignorechars, &ignorechars_utf16, &ignorechars_utf16_len, utf8, afflst->getlinenum())) {
|
if (parse_array(line, &ignorechars, &ignorechars_utf16, &ignorechars_utf16_len, utf8, afflst->getlinenum())) {
|
||||||
delete afflst;
|
finishFileMgr(afflst);
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -535,7 +548,7 @@ int AffixMgr::parse_file(const char * affpath, const char * key)
|
||||||
/* parse in the typical fault correcting table */
|
/* parse in the typical fault correcting table */
|
||||||
if (strncmp(line,"REP",3) == 0) {
|
if (strncmp(line,"REP",3) == 0) {
|
||||||
if (parse_reptable(line, afflst)) {
|
if (parse_reptable(line, afflst)) {
|
||||||
delete afflst;
|
finishFileMgr(afflst);
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -543,7 +556,7 @@ int AffixMgr::parse_file(const char * affpath, const char * key)
|
||||||
/* parse in the input conversion table */
|
/* parse in the input conversion table */
|
||||||
if (strncmp(line,"ICONV",5) == 0) {
|
if (strncmp(line,"ICONV",5) == 0) {
|
||||||
if (parse_convtable(line, afflst, &iconvtable, "ICONV")) {
|
if (parse_convtable(line, afflst, &iconvtable, "ICONV")) {
|
||||||
delete afflst;
|
finishFileMgr(afflst);
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -551,7 +564,7 @@ int AffixMgr::parse_file(const char * affpath, const char * key)
|
||||||
/* parse in the input conversion table */
|
/* parse in the input conversion table */
|
||||||
if (strncmp(line,"OCONV",5) == 0) {
|
if (strncmp(line,"OCONV",5) == 0) {
|
||||||
if (parse_convtable(line, afflst, &oconvtable, "OCONV")) {
|
if (parse_convtable(line, afflst, &oconvtable, "OCONV")) {
|
||||||
delete afflst;
|
finishFileMgr(afflst);
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -559,7 +572,7 @@ int AffixMgr::parse_file(const char * affpath, const char * key)
|
||||||
/* parse in the phonetic translation table */
|
/* parse in the phonetic translation table */
|
||||||
if (strncmp(line,"PHONE",5) == 0) {
|
if (strncmp(line,"PHONE",5) == 0) {
|
||||||
if (parse_phonetable(line, afflst)) {
|
if (parse_phonetable(line, afflst)) {
|
||||||
delete afflst;
|
finishFileMgr(afflst);
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -567,7 +580,7 @@ int AffixMgr::parse_file(const char * affpath, const char * key)
|
||||||
/* parse in the checkcompoundpattern table */
|
/* parse in the checkcompoundpattern table */
|
||||||
if (strncmp(line,"CHECKCOMPOUNDPATTERN",20) == 0) {
|
if (strncmp(line,"CHECKCOMPOUNDPATTERN",20) == 0) {
|
||||||
if (parse_checkcpdtable(line, afflst)) {
|
if (parse_checkcpdtable(line, afflst)) {
|
||||||
delete afflst;
|
finishFileMgr(afflst);
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -575,7 +588,7 @@ int AffixMgr::parse_file(const char * affpath, const char * key)
|
||||||
/* parse in the defcompound table */
|
/* parse in the defcompound table */
|
||||||
if (strncmp(line,"COMPOUNDRULE",12) == 0) {
|
if (strncmp(line,"COMPOUNDRULE",12) == 0) {
|
||||||
if (parse_defcpdtable(line, afflst)) {
|
if (parse_defcpdtable(line, afflst)) {
|
||||||
delete afflst;
|
finishFileMgr(afflst);
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -583,7 +596,7 @@ int AffixMgr::parse_file(const char * affpath, const char * key)
|
||||||
/* parse in the related character map table */
|
/* parse in the related character map table */
|
||||||
if (strncmp(line,"MAP",3) == 0) {
|
if (strncmp(line,"MAP",3) == 0) {
|
||||||
if (parse_maptable(line, afflst)) {
|
if (parse_maptable(line, afflst)) {
|
||||||
delete afflst;
|
finishFileMgr(afflst);
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -591,7 +604,7 @@ int AffixMgr::parse_file(const char * affpath, const char * key)
|
||||||
/* parse in the word breakpoints table */
|
/* parse in the word breakpoints table */
|
||||||
if (strncmp(line,"BREAK",5) == 0) {
|
if (strncmp(line,"BREAK",5) == 0) {
|
||||||
if (parse_breaktable(line, afflst)) {
|
if (parse_breaktable(line, afflst)) {
|
||||||
delete afflst;
|
finishFileMgr(afflst);
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -599,7 +612,7 @@ int AffixMgr::parse_file(const char * affpath, const char * key)
|
||||||
/* parse in the language for language specific codes */
|
/* parse in the language for language specific codes */
|
||||||
if (strncmp(line,"LANG",4) == 0) {
|
if (strncmp(line,"LANG",4) == 0) {
|
||||||
if (parse_string(line, &lang, afflst->getlinenum())) {
|
if (parse_string(line, &lang, afflst->getlinenum())) {
|
||||||
delete afflst;
|
finishFileMgr(afflst);
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
langnum = get_lang_num(lang);
|
langnum = get_lang_num(lang);
|
||||||
|
@ -612,7 +625,7 @@ int AffixMgr::parse_file(const char * affpath, const char * key)
|
||||||
|
|
||||||
if (strncmp(line,"MAXNGRAMSUGS",12) == 0) {
|
if (strncmp(line,"MAXNGRAMSUGS",12) == 0) {
|
||||||
if (parse_num(line, &maxngramsugs, afflst)) {
|
if (parse_num(line, &maxngramsugs, afflst)) {
|
||||||
delete afflst;
|
finishFileMgr(afflst);
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -622,14 +635,14 @@ int AffixMgr::parse_file(const char * affpath, const char * key)
|
||||||
|
|
||||||
if (strncmp(line,"MAXDIFF",7) == 0) {
|
if (strncmp(line,"MAXDIFF",7) == 0) {
|
||||||
if (parse_num(line, &maxdiff, afflst)) {
|
if (parse_num(line, &maxdiff, afflst)) {
|
||||||
delete afflst;
|
finishFileMgr(afflst);
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (strncmp(line,"MAXCPDSUGS",10) == 0) {
|
if (strncmp(line,"MAXCPDSUGS",10) == 0) {
|
||||||
if (parse_num(line, &maxcpdsugs, afflst)) {
|
if (parse_num(line, &maxcpdsugs, afflst)) {
|
||||||
delete afflst;
|
finishFileMgr(afflst);
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -649,7 +662,7 @@ int AffixMgr::parse_file(const char * affpath, const char * key)
|
||||||
/* parse in the flag used by forbidden words */
|
/* parse in the flag used by forbidden words */
|
||||||
if (strncmp(line,"KEEPCASE",8) == 0) {
|
if (strncmp(line,"KEEPCASE",8) == 0) {
|
||||||
if (parse_flag(line, &keepcase, afflst)) {
|
if (parse_flag(line, &keepcase, afflst)) {
|
||||||
delete afflst;
|
finishFileMgr(afflst);
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -657,7 +670,7 @@ int AffixMgr::parse_file(const char * affpath, const char * key)
|
||||||
/* parse in the flag used by `forceucase' */
|
/* parse in the flag used by `forceucase' */
|
||||||
if (strncmp(line,"FORCEUCASE",10) == 0) {
|
if (strncmp(line,"FORCEUCASE",10) == 0) {
|
||||||
if (parse_flag(line, &forceucase, afflst)) {
|
if (parse_flag(line, &forceucase, afflst)) {
|
||||||
delete afflst;
|
finishFileMgr(afflst);
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -665,7 +678,7 @@ int AffixMgr::parse_file(const char * affpath, const char * key)
|
||||||
/* parse in the flag used by `warn' */
|
/* parse in the flag used by `warn' */
|
||||||
if (strncmp(line,"WARN",4) == 0) {
|
if (strncmp(line,"WARN",4) == 0) {
|
||||||
if (parse_flag(line, &warn, afflst)) {
|
if (parse_flag(line, &warn, afflst)) {
|
||||||
delete afflst;
|
finishFileMgr(afflst);
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -677,7 +690,7 @@ int AffixMgr::parse_file(const char * affpath, const char * key)
|
||||||
/* parse in the flag used by the affix generator */
|
/* parse in the flag used by the affix generator */
|
||||||
if (strncmp(line,"SUBSTANDARD",11) == 0) {
|
if (strncmp(line,"SUBSTANDARD",11) == 0) {
|
||||||
if (parse_flag(line, &substandard, afflst)) {
|
if (parse_flag(line, &substandard, afflst)) {
|
||||||
delete afflst;
|
finishFileMgr(afflst);
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -696,19 +709,14 @@ int AffixMgr::parse_file(const char * affpath, const char * key)
|
||||||
dupflags_ini = 0;
|
dupflags_ini = 0;
|
||||||
}
|
}
|
||||||
if (parse_affix(line, ft, afflst, dupflags)) {
|
if (parse_affix(line, ft, afflst, dupflags)) {
|
||||||
delete afflst;
|
finishFileMgr(afflst);
|
||||||
process_pfx_tree_to_list();
|
|
||||||
process_sfx_tree_to_list();
|
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
delete afflst;
|
|
||||||
|
|
||||||
// convert affix trees to sorted list
|
finishFileMgr(afflst);
|
||||||
process_pfx_tree_to_list();
|
// affix trees are sorted now
|
||||||
process_sfx_tree_to_list();
|
|
||||||
|
|
||||||
// now we can speed up performance greatly taking advantage of the
|
// now we can speed up performance greatly taking advantage of the
|
||||||
// relationship between the affixes and the idea of "subsets".
|
// relationship between the affixes and the idea of "subsets".
|
||||||
|
@ -1319,7 +1327,7 @@ int AffixMgr::cpdrep_check(const char * word, int wl)
|
||||||
}
|
}
|
||||||
|
|
||||||
// forbid compoundings when there are special patterns at word bound
|
// forbid compoundings when there are special patterns at word bound
|
||||||
int AffixMgr::cpdpat_check(const char * word, int pos, hentry * r1, hentry * r2, const char affixed)
|
int AffixMgr::cpdpat_check(const char * word, int pos, hentry * r1, hentry * r2, const char /*affixed*/)
|
||||||
{
|
{
|
||||||
int len;
|
int len;
|
||||||
for (int i = 0; i < numcheckcpd; i++) {
|
for (int i = 0; i < numcheckcpd; i++) {
|
||||||
|
@ -1332,7 +1340,7 @@ int AffixMgr::cpdpat_check(const char * word, int pos, hentry * r1, hentry * r2,
|
||||||
// zero pattern (0/flag) => unmodified stem (zero affixes allowed)
|
// zero pattern (0/flag) => unmodified stem (zero affixes allowed)
|
||||||
(!*(checkcpdtable[i].pattern) || (
|
(!*(checkcpdtable[i].pattern) || (
|
||||||
(*(checkcpdtable[i].pattern)=='0' && r1->blen <= pos && strncmp(word + pos - r1->blen, r1->word, r1->blen) == 0) ||
|
(*(checkcpdtable[i].pattern)=='0' && r1->blen <= pos && strncmp(word + pos - r1->blen, r1->word, r1->blen) == 0) ||
|
||||||
(*(checkcpdtable[i].pattern)!='0' && (len = strlen(checkcpdtable[i].pattern)) &&
|
(*(checkcpdtable[i].pattern)!='0' && ((len = strlen(checkcpdtable[i].pattern)) != 0) &&
|
||||||
strncmp(word + pos - len, checkcpdtable[i].pattern, len) == 0)))) {
|
strncmp(word + pos - len, checkcpdtable[i].pattern, len) == 0)))) {
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
@ -1393,7 +1401,10 @@ int AffixMgr::defcpd_check(hentry *** words, short wnum, hentry * rv, hentry **
|
||||||
for (i = 0; i < numdefcpd; i++) {
|
for (i = 0; i < numdefcpd; i++) {
|
||||||
for (j = 0; j < defcpdtable[i].len; j++) {
|
for (j = 0; j < defcpdtable[i].len; j++) {
|
||||||
if (defcpdtable[i].def[j] != '*' && defcpdtable[i].def[j] != '?' &&
|
if (defcpdtable[i].def[j] != '*' && defcpdtable[i].def[j] != '?' &&
|
||||||
TESTAFF(rv->astr, defcpdtable[i].def[j], rv->alen)) ok = 1;
|
TESTAFF(rv->astr, defcpdtable[i].def[j], rv->alen)) {
|
||||||
|
ok = 1;
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (ok == 0) {
|
if (ok == 0) {
|
||||||
|
@ -1544,7 +1555,7 @@ struct hentry * AffixMgr::compound_check(const char * word, int len,
|
||||||
int oldlen = 0;
|
int oldlen = 0;
|
||||||
int checkedstriple = 0;
|
int checkedstriple = 0;
|
||||||
int onlycpdrule;
|
int onlycpdrule;
|
||||||
int affixed = 0;
|
char affixed = 0;
|
||||||
hentry ** oldwords = words;
|
hentry ** oldwords = words;
|
||||||
|
|
||||||
int checked_prefix;
|
int checked_prefix;
|
||||||
|
@ -1626,8 +1637,9 @@ struct hentry * AffixMgr::compound_check(const char * word, int len,
|
||||||
if (onlycpdrule) break;
|
if (onlycpdrule) break;
|
||||||
if (compoundflag &&
|
if (compoundflag &&
|
||||||
!(rv = prefix_check(st, i, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN, compoundflag))) {
|
!(rv = prefix_check(st, i, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN, compoundflag))) {
|
||||||
if ((rv = suffix_check(st, i, 0, NULL, NULL, 0, NULL,
|
if (((rv = suffix_check(st, i, 0, NULL, NULL, 0, NULL,
|
||||||
FLAG_NULL, compoundflag, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) && !hu_mov_rule &&
|
FLAG_NULL, compoundflag, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) ||
|
||||||
|
(compoundmoresuffixes && (rv = suffix_check_twosfx(st, i, 0, NULL, compoundflag)))) && !hu_mov_rule &&
|
||||||
sfx->getCont() &&
|
sfx->getCont() &&
|
||||||
((compoundforbidflag && TESTAFF(sfx->getCont(), compoundforbidflag,
|
((compoundforbidflag && TESTAFF(sfx->getCont(), compoundforbidflag,
|
||||||
sfx->getContLen())) || (compoundend &&
|
sfx->getContLen())) || (compoundend &&
|
||||||
|
@ -1640,9 +1652,11 @@ struct hentry * AffixMgr::compound_check(const char * word, int len,
|
||||||
if (rv ||
|
if (rv ||
|
||||||
(((wordnum == 0) && compoundbegin &&
|
(((wordnum == 0) && compoundbegin &&
|
||||||
((rv = suffix_check(st, i, 0, NULL, NULL, 0, NULL, FLAG_NULL, compoundbegin, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) ||
|
((rv = suffix_check(st, i, 0, NULL, NULL, 0, NULL, FLAG_NULL, compoundbegin, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) ||
|
||||||
|
(compoundmoresuffixes && (rv = suffix_check_twosfx(st, i, 0, NULL, compoundbegin))) || // twofold suffixes + compound
|
||||||
(rv = prefix_check(st, i, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN, compoundbegin)))) ||
|
(rv = prefix_check(st, i, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN, compoundbegin)))) ||
|
||||||
((wordnum > 0) && compoundmiddle &&
|
((wordnum > 0) && compoundmiddle &&
|
||||||
((rv = suffix_check(st, i, 0, NULL, NULL, 0, NULL, FLAG_NULL, compoundmiddle, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) ||
|
((rv = suffix_check(st, i, 0, NULL, NULL, 0, NULL, FLAG_NULL, compoundmiddle, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) ||
|
||||||
|
(compoundmoresuffixes && (rv = suffix_check_twosfx(st, i, 0, NULL, compoundmiddle))) || // twofold suffixes + compound
|
||||||
(rv = prefix_check(st, i, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN, compoundmiddle)))))
|
(rv = prefix_check(st, i, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN, compoundmiddle)))))
|
||||||
) checked_prefix = 1;
|
) checked_prefix = 1;
|
||||||
// else check forbiddenwords and needaffix
|
// else check forbiddenwords and needaffix
|
||||||
|
@ -2045,7 +2059,7 @@ int AffixMgr::compound_check_morph(const char * word, int len,
|
||||||
int cmax;
|
int cmax;
|
||||||
|
|
||||||
int onlycpdrule;
|
int onlycpdrule;
|
||||||
int affixed = 0;
|
char affixed = 0;
|
||||||
hentry ** oldwords = words;
|
hentry ** oldwords = words;
|
||||||
|
|
||||||
setcminmax(&cmin, &cmax, word, len);
|
setcminmax(&cmin, &cmax, word, len);
|
||||||
|
@ -2115,11 +2129,12 @@ int AffixMgr::compound_check_morph(const char * word, int len,
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!rv) {
|
if (!rv) {
|
||||||
if (onlycpdrule) break;
|
if (onlycpdrule && strlen(*result) > MAXLNLEN/10) break;
|
||||||
if (compoundflag &&
|
if (compoundflag &&
|
||||||
!(rv = prefix_check(st, i, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN, compoundflag))) {
|
!(rv = prefix_check(st, i, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN, compoundflag))) {
|
||||||
if ((rv = suffix_check(st, i, 0, NULL, NULL, 0, NULL,
|
if (((rv = suffix_check(st, i, 0, NULL, NULL, 0, NULL,
|
||||||
FLAG_NULL, compoundflag, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) && !hu_mov_rule &&
|
FLAG_NULL, compoundflag, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) ||
|
||||||
|
(compoundmoresuffixes && (rv = suffix_check_twosfx(st, i, 0, NULL, compoundflag)))) && !hu_mov_rule &&
|
||||||
sfx->getCont() &&
|
sfx->getCont() &&
|
||||||
((compoundforbidflag && TESTAFF(sfx->getCont(), compoundforbidflag,
|
((compoundforbidflag && TESTAFF(sfx->getCont(), compoundforbidflag,
|
||||||
sfx->getContLen())) || (compoundend &&
|
sfx->getContLen())) || (compoundend &&
|
||||||
|
@ -2132,9 +2147,11 @@ int AffixMgr::compound_check_morph(const char * word, int len,
|
||||||
if (rv ||
|
if (rv ||
|
||||||
(((wordnum == 0) && compoundbegin &&
|
(((wordnum == 0) && compoundbegin &&
|
||||||
((rv = suffix_check(st, i, 0, NULL, NULL, 0, NULL, FLAG_NULL, compoundbegin, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) ||
|
((rv = suffix_check(st, i, 0, NULL, NULL, 0, NULL, FLAG_NULL, compoundbegin, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) ||
|
||||||
|
(compoundmoresuffixes && (rv = suffix_check_twosfx(st, i, 0, NULL, compoundbegin))) || // twofold suffix+compound
|
||||||
(rv = prefix_check(st, i, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN, compoundbegin)))) ||
|
(rv = prefix_check(st, i, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN, compoundbegin)))) ||
|
||||||
((wordnum > 0) && compoundmiddle &&
|
((wordnum > 0) && compoundmiddle &&
|
||||||
((rv = suffix_check(st, i, 0, NULL, NULL, 0, NULL, FLAG_NULL, compoundmiddle, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) ||
|
((rv = suffix_check(st, i, 0, NULL, NULL, 0, NULL, FLAG_NULL, compoundmiddle, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) ||
|
||||||
|
(compoundmoresuffixes && (rv = suffix_check_twosfx(st, i, 0, NULL, compoundmiddle))) || // twofold suffix+compound
|
||||||
(rv = prefix_check(st, i, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN, compoundmiddle)))))
|
(rv = prefix_check(st, i, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN, compoundmiddle)))))
|
||||||
) {
|
) {
|
||||||
// char * p = prefix_check_morph(st, i, 0, compound);
|
// char * p = prefix_check_morph(st, i, 0, compound);
|
||||||
|
@ -3554,7 +3571,7 @@ int AffixMgr::parse_reptable(char * line, FileMgr * af)
|
||||||
/* now parse the numrep lines to read in the remainder of the table */
|
/* now parse the numrep lines to read in the remainder of the table */
|
||||||
char * nl;
|
char * nl;
|
||||||
for (int j=0; j < numrep; j++) {
|
for (int j=0; j < numrep; j++) {
|
||||||
if (!(nl = af->getline())) return 1;
|
if ((nl = af->getline()) == NULL) return 1;
|
||||||
mychomp(nl);
|
mychomp(nl);
|
||||||
tp = nl;
|
tp = nl;
|
||||||
i = 0;
|
i = 0;
|
||||||
|
@ -3651,7 +3668,7 @@ int AffixMgr::parse_convtable(char * line, FileMgr * af, RepList ** rl, const c
|
||||||
if (*piece != '\0') {
|
if (*piece != '\0') {
|
||||||
switch(i) {
|
switch(i) {
|
||||||
case 0: {
|
case 0: {
|
||||||
if (strncmp(piece, keyword, sizeof(keyword)) != 0) {
|
if (strncmp(piece, keyword, strlen(keyword)) != 0) {
|
||||||
HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", af->getlinenum());
|
HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", af->getlinenum());
|
||||||
delete *rl;
|
delete *rl;
|
||||||
*rl = NULL;
|
*rl = NULL;
|
||||||
|
@ -4258,7 +4275,7 @@ int AffixMgr::parse_affix(char * line, const char at, FileMgr * af, char * dupf
|
||||||
std::vector<affentry>::iterator start = affentries.begin();
|
std::vector<affentry>::iterator start = affentries.begin();
|
||||||
std::vector<affentry>::iterator end = affentries.end();
|
std::vector<affentry>::iterator end = affentries.end();
|
||||||
for (std::vector<affentry>::iterator entry = start; entry != end; ++entry) {
|
for (std::vector<affentry>::iterator entry = start; entry != end; ++entry) {
|
||||||
if (!(nl = af->getline())) return 1;
|
if ((nl = af->getline()) == NULL) return 1;
|
||||||
mychomp(nl);
|
mychomp(nl);
|
||||||
tp = nl;
|
tp = nl;
|
||||||
i = 0;
|
i = 0;
|
||||||
|
|
2
vendor/hunspell/src/hunspell/affixmgr.hxx
vendored
2
vendor/hunspell/src/hunspell/affixmgr.hxx
vendored
|
@ -41,6 +41,7 @@ class LIBHUNSPELL_DLL_EXPORTED AffixMgr
|
||||||
FLAG compoundroot;
|
FLAG compoundroot;
|
||||||
FLAG compoundforbidflag;
|
FLAG compoundforbidflag;
|
||||||
FLAG compoundpermitflag;
|
FLAG compoundpermitflag;
|
||||||
|
int compoundmoresuffixes;
|
||||||
int checkcompounddup;
|
int checkcompounddup;
|
||||||
int checkcompoundrep;
|
int checkcompoundrep;
|
||||||
int checkcompoundcase;
|
int checkcompoundcase;
|
||||||
|
@ -244,6 +245,7 @@ private:
|
||||||
int process_sfx_tree_to_list();
|
int process_sfx_tree_to_list();
|
||||||
int redundant_condition(char, char * strip, int stripl,
|
int redundant_condition(char, char * strip, int stripl,
|
||||||
const char * cond, int);
|
const char * cond, int);
|
||||||
|
void finishFileMgr(FileMgr *afflst);
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
2
vendor/hunspell/src/hunspell/atypes.hxx
vendored
2
vendor/hunspell/src/hunspell/atypes.hxx
vendored
|
@ -57,7 +57,7 @@ static inline void HUNSPELL_WARNING(FILE *, const char *, ...) {}
|
||||||
#define FLAG_NULL 0x00
|
#define FLAG_NULL 0x00
|
||||||
#define FREE_FLAG(a) a = 0
|
#define FREE_FLAG(a) a = 0
|
||||||
|
|
||||||
#define TESTAFF( a, b , c ) flag_bsearch((unsigned short *) a, (unsigned short) b, c)
|
#define TESTAFF( a, b , c ) (flag_bsearch((unsigned short *) a, (unsigned short) b, c))
|
||||||
|
|
||||||
struct affentry
|
struct affentry
|
||||||
{
|
{
|
||||||
|
|
4
vendor/hunspell/src/hunspell/baseaffix.hxx
vendored
4
vendor/hunspell/src/hunspell/baseaffix.hxx
vendored
|
@ -5,7 +5,11 @@
|
||||||
|
|
||||||
class LIBHUNSPELL_DLL_EXPORTED AffEntry
|
class LIBHUNSPELL_DLL_EXPORTED AffEntry
|
||||||
{
|
{
|
||||||
|
private:
|
||||||
|
AffEntry(const AffEntry&);
|
||||||
|
AffEntry& operator = (const AffEntry&);
|
||||||
protected:
|
protected:
|
||||||
|
AffEntry() {}
|
||||||
char * appnd;
|
char * appnd;
|
||||||
char * strip;
|
char * strip;
|
||||||
unsigned char appndl;
|
unsigned char appndl;
|
||||||
|
|
49
vendor/hunspell/src/hunspell/csutil.cxx
vendored
49
vendor/hunspell/src/hunspell/csutil.cxx
vendored
|
@ -17,6 +17,11 @@ struct unicode_info {
|
||||||
unsigned short clower;
|
unsigned short clower;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#ifdef _WIN32
|
||||||
|
#include <windows.h>
|
||||||
|
#include <wchar.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifdef OPENOFFICEORG
|
#ifdef OPENOFFICEORG
|
||||||
# include <unicode/uchar.h>
|
# include <unicode/uchar.h>
|
||||||
#else
|
#else
|
||||||
|
@ -46,6 +51,21 @@ struct unicode_info2 {
|
||||||
static struct unicode_info2 * utf_tbl = NULL;
|
static struct unicode_info2 * utf_tbl = NULL;
|
||||||
static int utf_tbl_count = 0; // utf_tbl can be used by multiple Hunspell instances
|
static int utf_tbl_count = 0; // utf_tbl can be used by multiple Hunspell instances
|
||||||
|
|
||||||
|
FILE * myfopen(const char * path, const char * mode) {
|
||||||
|
#ifdef _WIN32
|
||||||
|
#define WIN32_LONG_PATH_PREFIX "\\\\?\\"
|
||||||
|
if (strncmp(path, WIN32_LONG_PATH_PREFIX, 4) == 0) {
|
||||||
|
int len = MultiByteToWideChar(CP_UTF8, 0, path, -1, NULL, 0);
|
||||||
|
wchar_t *buff = (wchar_t *) malloc(len * sizeof(wchar_t));
|
||||||
|
MultiByteToWideChar(CP_UTF8, 0, path, -1, buff, len);
|
||||||
|
FILE * f = _wfopen(buff, (strcmp(mode, "r") == 0) ? L"r" : L"rb");
|
||||||
|
free(buff);
|
||||||
|
return f;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
return fopen(path, mode);
|
||||||
|
}
|
||||||
|
|
||||||
/* only UTF-16 (BMP) implementation */
|
/* only UTF-16 (BMP) implementation */
|
||||||
char * u16_u8(char * dest, int size, const w_char * src, int srclen) {
|
char * u16_u8(char * dest, int size, const w_char * src, int srclen) {
|
||||||
signed char * u8 = (signed char *)dest;
|
signed char * u8 = (signed char *)dest;
|
||||||
|
@ -342,7 +362,10 @@ char * line_uniq(char * text, char breakchar) {
|
||||||
for ( i = 1; i < linenum; i++ ) {
|
for ( i = 1; i < linenum; i++ ) {
|
||||||
int dup = 0;
|
int dup = 0;
|
||||||
for (int j = 0; j < i; j++) {
|
for (int j = 0; j < i; j++) {
|
||||||
if (strcmp(lines[i], lines[j]) == 0) dup = 1;
|
if (strcmp(lines[i], lines[j]) == 0) {
|
||||||
|
dup = 1;
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if (!dup) {
|
if (!dup) {
|
||||||
if ((i > 1) || (*(lines[0]) != '\0')) {
|
if ((i > 1) || (*(lines[0]) != '\0')) {
|
||||||
|
@ -5468,7 +5491,15 @@ struct cs_info * get_current_cs(const char * es) {
|
||||||
// conversion tables static in this file, create them when needed
|
// conversion tables static in this file, create them when needed
|
||||||
// with help the mozilla backend.
|
// with help the mozilla backend.
|
||||||
struct cs_info * get_current_cs(const char * es) {
|
struct cs_info * get_current_cs(const char * es) {
|
||||||
struct cs_info *ccs;
|
struct cs_info *ccs = new cs_info[256];
|
||||||
|
// Initialze the array with dummy data so that we wouldn't need
|
||||||
|
// to return null in case of failures.
|
||||||
|
for (int i = 0; i <= 0xff; ++i) {
|
||||||
|
ccs[i].ccase = false;
|
||||||
|
ccs[i].clower = i;
|
||||||
|
ccs[i].cupper = i;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
nsCOMPtr<nsIUnicodeEncoder> encoder;
|
nsCOMPtr<nsIUnicodeEncoder> encoder;
|
||||||
nsCOMPtr<nsIUnicodeDecoder> decoder;
|
nsCOMPtr<nsIUnicodeDecoder> decoder;
|
||||||
|
@ -5476,21 +5507,19 @@ struct cs_info * get_current_cs(const char * es) {
|
||||||
nsresult rv;
|
nsresult rv;
|
||||||
nsCOMPtr<nsICharsetConverterManager> ccm = do_GetService(kCharsetConverterManagerCID, &rv);
|
nsCOMPtr<nsICharsetConverterManager> ccm = do_GetService(kCharsetConverterManagerCID, &rv);
|
||||||
if (NS_FAILED(rv))
|
if (NS_FAILED(rv))
|
||||||
return nsnull;
|
return ccs;
|
||||||
|
|
||||||
rv = ccm->GetUnicodeEncoder(es, getter_AddRefs(encoder));
|
rv = ccm->GetUnicodeEncoder(es, getter_AddRefs(encoder));
|
||||||
if (NS_FAILED(rv))
|
if (NS_FAILED(rv))
|
||||||
return nsnull;
|
return ccs;
|
||||||
encoder->SetOutputErrorBehavior(encoder->kOnError_Signal, nsnull, '?');
|
encoder->SetOutputErrorBehavior(encoder->kOnError_Signal, nsnull, '?');
|
||||||
rv = ccm->GetUnicodeDecoder(es, getter_AddRefs(decoder));
|
rv = ccm->GetUnicodeDecoder(es, getter_AddRefs(decoder));
|
||||||
if (NS_FAILED(rv))
|
if (NS_FAILED(rv))
|
||||||
return nsnull;
|
return ccs;
|
||||||
decoder->SetInputErrorBehavior(decoder->kOnError_Signal);
|
decoder->SetInputErrorBehavior(decoder->kOnError_Signal);
|
||||||
|
|
||||||
if (NS_FAILED(rv))
|
if (NS_FAILED(rv))
|
||||||
return nsnull;
|
return ccs;
|
||||||
|
|
||||||
ccs = new cs_info[256];
|
|
||||||
|
|
||||||
for (unsigned int i = 0; i <= 0xff; ++i) {
|
for (unsigned int i = 0; i <= 0xff; ++i) {
|
||||||
PRBool success = PR_FALSE;
|
PRBool success = PR_FALSE;
|
||||||
|
@ -5653,7 +5682,7 @@ unsigned short unicodetoupper(unsigned short c, int langnum)
|
||||||
if (c == 0x0069 && ((langnum == LANG_az) || (langnum == LANG_tr)))
|
if (c == 0x0069 && ((langnum == LANG_az) || (langnum == LANG_tr)))
|
||||||
return 0x0130;
|
return 0x0130;
|
||||||
#ifdef OPENOFFICEORG
|
#ifdef OPENOFFICEORG
|
||||||
return u_toupper(c);
|
return static_cast<unsigned short>(u_toupper(c));
|
||||||
#else
|
#else
|
||||||
#ifdef MOZILLA_CLIENT
|
#ifdef MOZILLA_CLIENT
|
||||||
return ToUpperCase((PRUnichar) c);
|
return ToUpperCase((PRUnichar) c);
|
||||||
|
@ -5671,7 +5700,7 @@ unsigned short unicodetolower(unsigned short c, int langnum)
|
||||||
if (c == 0x0049 && ((langnum == LANG_az) || (langnum == LANG_tr)))
|
if (c == 0x0049 && ((langnum == LANG_az) || (langnum == LANG_tr)))
|
||||||
return 0x0131;
|
return 0x0131;
|
||||||
#ifdef OPENOFFICEORG
|
#ifdef OPENOFFICEORG
|
||||||
return u_tolower(c);
|
return static_cast<unsigned short>(u_tolower(c));
|
||||||
#else
|
#else
|
||||||
#ifdef MOZILLA_CLIENT
|
#ifdef MOZILLA_CLIENT
|
||||||
return ToLowerCase((PRUnichar) c);
|
return ToLowerCase((PRUnichar) c);
|
||||||
|
|
3
vendor/hunspell/src/hunspell/csutil.hxx
vendored
3
vendor/hunspell/src/hunspell/csutil.hxx
vendored
|
@ -52,6 +52,9 @@
|
||||||
#define FORBIDDENWORD 65510
|
#define FORBIDDENWORD 65510
|
||||||
#define ONLYUPCASEFLAG 65511
|
#define ONLYUPCASEFLAG 65511
|
||||||
|
|
||||||
|
// fopen or optional _wfopen to fix long pathname problem of WIN32
|
||||||
|
LIBHUNSPELL_DLL_EXPORTED FILE * myfopen(const char * path, const char * mode);
|
||||||
|
|
||||||
// convert UTF-16 characters to UTF-8
|
// convert UTF-16 characters to UTF-8
|
||||||
LIBHUNSPELL_DLL_EXPORTED char * u16_u8(char * dest, int size, const w_char * src, int srclen);
|
LIBHUNSPELL_DLL_EXPORTED char * u16_u8(char * dest, int size, const w_char * src, int srclen);
|
||||||
|
|
||||||
|
|
6
vendor/hunspell/src/hunspell/dictmgr.cxx
vendored
6
vendor/hunspell/src/hunspell/dictmgr.cxx
vendored
|
@ -5,6 +5,7 @@
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
|
|
||||||
#include "dictmgr.hxx"
|
#include "dictmgr.hxx"
|
||||||
|
#include "csutil.hxx"
|
||||||
|
|
||||||
DictMgr::DictMgr(const char * dictpath, const char * etype) : numdict(0)
|
DictMgr::DictMgr(const char * dictpath, const char * etype) : numdict(0)
|
||||||
{
|
{
|
||||||
|
@ -57,7 +58,7 @@ int DictMgr::parse_file(const char * dictpath, const char * etype)
|
||||||
|
|
||||||
// open the dictionary list file
|
// open the dictionary list file
|
||||||
FILE * dictlst;
|
FILE * dictlst;
|
||||||
dictlst = fopen(dictpath,"r");
|
dictlst = myfopen(dictpath,"r");
|
||||||
if (!dictlst) {
|
if (!dictlst) {
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
@ -100,7 +101,8 @@ int DictMgr::parse_file(const char * dictpath, const char * etype)
|
||||||
case 3:
|
case 3:
|
||||||
free(pdict->region);
|
free(pdict->region);
|
||||||
pdict->region=NULL;
|
pdict->region=NULL;
|
||||||
case 2: //deliberate fallthrough
|
/* FALLTHROUGH */
|
||||||
|
case 2:
|
||||||
free(pdict->lang);
|
free(pdict->lang);
|
||||||
pdict->lang=NULL;
|
pdict->lang=NULL;
|
||||||
default:
|
default:
|
||||||
|
|
5
vendor/hunspell/src/hunspell/dictmgr.hxx
vendored
5
vendor/hunspell/src/hunspell/dictmgr.hxx
vendored
|
@ -15,7 +15,10 @@ struct dictentry {
|
||||||
|
|
||||||
class LIBHUNSPELL_DLL_EXPORTED DictMgr
|
class LIBHUNSPELL_DLL_EXPORTED DictMgr
|
||||||
{
|
{
|
||||||
|
private:
|
||||||
|
DictMgr(const DictMgr&);
|
||||||
|
DictMgr& operator = (const DictMgr&);
|
||||||
|
private:
|
||||||
int numdict;
|
int numdict;
|
||||||
dictentry * pdentry;
|
dictentry * pdentry;
|
||||||
|
|
||||||
|
|
14
vendor/hunspell/src/hunspell/filemgr.cxx
vendored
14
vendor/hunspell/src/hunspell/filemgr.cxx
vendored
|
@ -6,16 +6,20 @@
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
|
|
||||||
#include "filemgr.hxx"
|
#include "filemgr.hxx"
|
||||||
|
#include "csutil.hxx"
|
||||||
|
|
||||||
int FileMgr::fail(const char * err, const char * par) {
|
int FileMgr::fail(const char * err, const char * par) {
|
||||||
fprintf(stderr, err, par);
|
fprintf(stderr, err, par);
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
FileMgr::FileMgr(const char * file, const char * key) {
|
FileMgr::FileMgr(const char * file, const char * key)
|
||||||
linenum = 0;
|
: hin(NULL)
|
||||||
hin = NULL;
|
, linenum(0)
|
||||||
fin = fopen(file, "r");
|
{
|
||||||
|
in[0] = '\0';
|
||||||
|
|
||||||
|
fin = myfopen(file, "r");
|
||||||
if (!fin) {
|
if (!fin) {
|
||||||
// check hzipped file
|
// check hzipped file
|
||||||
char * st = (char *) malloc(strlen(file) + strlen(HZIP_EXTENSION) + 1);
|
char * st = (char *) malloc(strlen(file) + strlen(HZIP_EXTENSION) + 1);
|
||||||
|
@ -39,7 +43,7 @@ char * FileMgr::getline() {
|
||||||
const char * l;
|
const char * l;
|
||||||
linenum++;
|
linenum++;
|
||||||
if (fin) return fgets(in, BUFSIZE - 1, fin);
|
if (fin) return fgets(in, BUFSIZE - 1, fin);
|
||||||
if (hin && (l = hin->getline())) return strcpy(in, l);
|
if (hin && ((l = hin->getline()) != NULL)) return strcpy(in, l);
|
||||||
linenum--;
|
linenum--;
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
3
vendor/hunspell/src/hunspell/filemgr.hxx
vendored
3
vendor/hunspell/src/hunspell/filemgr.hxx
vendored
|
@ -9,6 +9,9 @@
|
||||||
|
|
||||||
class LIBHUNSPELL_DLL_EXPORTED FileMgr
|
class LIBHUNSPELL_DLL_EXPORTED FileMgr
|
||||||
{
|
{
|
||||||
|
private:
|
||||||
|
FileMgr(const FileMgr&);
|
||||||
|
FileMgr& operator = (const FileMgr&);
|
||||||
protected:
|
protected:
|
||||||
FILE * fin;
|
FILE * fin;
|
||||||
Hunzip * hin;
|
Hunzip * hin;
|
||||||
|
|
70
vendor/hunspell/src/hunspell/hashmgr.cxx
vendored
70
vendor/hunspell/src/hunspell/hashmgr.cxx
vendored
|
@ -5,6 +5,7 @@
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <ctype.h>
|
#include <ctype.h>
|
||||||
|
#include <limits>
|
||||||
|
|
||||||
#include "hashmgr.hxx"
|
#include "hashmgr.hxx"
|
||||||
#include "csutil.hxx"
|
#include "csutil.hxx"
|
||||||
|
@ -13,12 +14,19 @@
|
||||||
// build a hash table from a munched word list
|
// build a hash table from a munched word list
|
||||||
|
|
||||||
HashMgr::HashMgr(const char * tpath, const char * apath, const char * key)
|
HashMgr::HashMgr(const char * tpath, const char * apath, const char * key)
|
||||||
|
: tablesize(0)
|
||||||
|
, tableptr(NULL)
|
||||||
|
, userword(0)
|
||||||
|
, flag_mode(FLAG_CHAR)
|
||||||
|
, complexprefixes(0)
|
||||||
|
, utf8(0)
|
||||||
|
, forbiddenword(FORBIDDENWORD) // forbidden word signing flag
|
||||||
|
, numaliasf(0)
|
||||||
|
, aliasf(NULL)
|
||||||
|
, aliasflen(0)
|
||||||
|
, numaliasm(0)
|
||||||
|
, aliasm(NULL)
|
||||||
{
|
{
|
||||||
tablesize = 0;
|
|
||||||
tableptr = NULL;
|
|
||||||
flag_mode = FLAG_CHAR;
|
|
||||||
complexprefixes = 0;
|
|
||||||
utf8 = 0;
|
|
||||||
langnum = 0;
|
langnum = 0;
|
||||||
lang = NULL;
|
lang = NULL;
|
||||||
enc = NULL;
|
enc = NULL;
|
||||||
|
@ -26,11 +34,6 @@ HashMgr::HashMgr(const char * tpath, const char * apath, const char * key)
|
||||||
ignorechars = NULL;
|
ignorechars = NULL;
|
||||||
ignorechars_utf16 = NULL;
|
ignorechars_utf16 = NULL;
|
||||||
ignorechars_utf16_len = 0;
|
ignorechars_utf16_len = 0;
|
||||||
numaliasf = 0;
|
|
||||||
aliasf = NULL;
|
|
||||||
numaliasm = 0;
|
|
||||||
aliasm = NULL;
|
|
||||||
forbiddenword = FORBIDDENWORD; // forbidden word signing flag
|
|
||||||
load_config(apath, key);
|
load_config(apath, key);
|
||||||
int ec = load_tables(tpath, key);
|
int ec = load_tables(tpath, key);
|
||||||
if (ec) {
|
if (ec) {
|
||||||
|
@ -116,7 +119,7 @@ int HashMgr::add_word(const char * word, int wbl, int wcl, unsigned short * aff,
|
||||||
int al, const char * desc, bool onlyupcase)
|
int al, const char * desc, bool onlyupcase)
|
||||||
{
|
{
|
||||||
bool upcasehomonym = false;
|
bool upcasehomonym = false;
|
||||||
int descl = desc ? (aliasm ? sizeof(short) : strlen(desc) + 1) : 0;
|
int descl = desc ? (aliasm ? sizeof(char *) : strlen(desc) + 1) : 0;
|
||||||
// variable-length hash record with word and optional fields
|
// variable-length hash record with word and optional fields
|
||||||
struct hentry* hp =
|
struct hentry* hp =
|
||||||
(struct hentry *) malloc (sizeof(struct hentry) + wbl + descl);
|
(struct hentry *) malloc (sizeof(struct hentry) + wbl + descl);
|
||||||
|
@ -210,18 +213,21 @@ int HashMgr::add_word(const char * word, int wbl, int wcl, unsigned short * aff,
|
||||||
}
|
}
|
||||||
|
|
||||||
int HashMgr::add_hidden_capitalized_word(char * word, int wbl, int wcl,
|
int HashMgr::add_hidden_capitalized_word(char * word, int wbl, int wcl,
|
||||||
unsigned short * flags, int al, char * dp, int captype)
|
unsigned short * flags, int flagslen, char * dp, int captype)
|
||||||
{
|
{
|
||||||
|
if (flags == NULL)
|
||||||
|
flagslen = 0;
|
||||||
|
|
||||||
// add inner capitalized forms to handle the following allcap forms:
|
// add inner capitalized forms to handle the following allcap forms:
|
||||||
// Mixed caps: OpenOffice.org -> OPENOFFICE.ORG
|
// Mixed caps: OpenOffice.org -> OPENOFFICE.ORG
|
||||||
// Allcaps with suffixes: CIA's -> CIA'S
|
// Allcaps with suffixes: CIA's -> CIA'S
|
||||||
if (((captype == HUHCAP) || (captype == HUHINITCAP) ||
|
if (((captype == HUHCAP) || (captype == HUHINITCAP) ||
|
||||||
((captype == ALLCAP) && (flags != NULL))) &&
|
((captype == ALLCAP) && (flagslen != 0))) &&
|
||||||
!((flags != NULL) && TESTAFF(flags, forbiddenword, al))) {
|
!((flagslen != 0) && TESTAFF(flags, forbiddenword, flagslen))) {
|
||||||
unsigned short * flags2 = (unsigned short *) malloc (sizeof(unsigned short) * (al+1));
|
unsigned short * flags2 = (unsigned short *) malloc (sizeof(unsigned short) * (flagslen+1));
|
||||||
if (!flags2) return 1;
|
if (!flags2) return 1;
|
||||||
if (al) memcpy(flags2, flags, al * sizeof(unsigned short));
|
if (flagslen) memcpy(flags2, flags, flagslen * sizeof(unsigned short));
|
||||||
flags2[al] = ONLYUPCASEFLAG;
|
flags2[flagslen] = ONLYUPCASEFLAG;
|
||||||
if (utf8) {
|
if (utf8) {
|
||||||
char st[BUFSIZE];
|
char st[BUFSIZE];
|
||||||
w_char w[BUFSIZE];
|
w_char w[BUFSIZE];
|
||||||
|
@ -229,11 +235,11 @@ int HashMgr::add_hidden_capitalized_word(char * word, int wbl, int wcl,
|
||||||
mkallsmall_utf(w, wlen, langnum);
|
mkallsmall_utf(w, wlen, langnum);
|
||||||
mkallcap_utf(w, 1, langnum);
|
mkallcap_utf(w, 1, langnum);
|
||||||
u16_u8(st, BUFSIZE, w, wlen);
|
u16_u8(st, BUFSIZE, w, wlen);
|
||||||
return add_word(st,wbl,wcl,flags2,al+1,dp, true);
|
return add_word(st,wbl,wcl,flags2,flagslen+1,dp, true);
|
||||||
} else {
|
} else {
|
||||||
mkallsmall(word, csconv);
|
mkallsmall(word, csconv);
|
||||||
mkinitcap(word, csconv);
|
mkinitcap(word, csconv);
|
||||||
return add_word(word,wbl,wcl,flags2,al+1,dp, true);
|
return add_word(word,wbl,wcl,flags2,flagslen+1,dp, true);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -363,8 +369,8 @@ int HashMgr::load_tables(const char * tpath, const char * key)
|
||||||
if (dict == NULL) return 1;
|
if (dict == NULL) return 1;
|
||||||
|
|
||||||
// first read the first line of file to get hash table size */
|
// first read the first line of file to get hash table size */
|
||||||
if (!(ts = dict->getline())) {
|
if ((ts = dict->getline()) == NULL) {
|
||||||
HUNSPELL_WARNING(stderr, "error: empty dic file\n");
|
HUNSPELL_WARNING(stderr, "error: empty dic file %s\n", tpath);
|
||||||
delete dict;
|
delete dict;
|
||||||
return 2;
|
return 2;
|
||||||
}
|
}
|
||||||
|
@ -377,30 +383,32 @@ int HashMgr::load_tables(const char * tpath, const char * key)
|
||||||
}
|
}
|
||||||
|
|
||||||
tablesize = atoi(ts);
|
tablesize = atoi(ts);
|
||||||
if (tablesize == 0) {
|
|
||||||
|
int nExtra = 5 + USERWORD;
|
||||||
|
|
||||||
|
if (tablesize <= 0 || (tablesize >= (std::numeric_limits<int>::max() - 1 - nExtra) / int(sizeof(struct hentry *)))) {
|
||||||
HUNSPELL_WARNING(stderr, "error: line 1: missing or bad word count in the dic file\n");
|
HUNSPELL_WARNING(stderr, "error: line 1: missing or bad word count in the dic file\n");
|
||||||
delete dict;
|
delete dict;
|
||||||
return 4;
|
return 4;
|
||||||
}
|
}
|
||||||
tablesize = tablesize + 5 + USERWORD;
|
tablesize += nExtra;
|
||||||
if ((tablesize %2) == 0) tablesize++;
|
if ((tablesize % 2) == 0) tablesize++;
|
||||||
|
|
||||||
// allocate the hash table
|
// allocate the hash table
|
||||||
tableptr = (struct hentry **) malloc(tablesize * sizeof(struct hentry *));
|
tableptr = (struct hentry **) calloc(tablesize, sizeof(struct hentry *));
|
||||||
if (! tableptr) {
|
if (! tableptr) {
|
||||||
delete dict;
|
delete dict;
|
||||||
return 3;
|
return 3;
|
||||||
}
|
}
|
||||||
for (int i=0; i<tablesize; i++) tableptr[i] = NULL;
|
|
||||||
|
|
||||||
// loop through all words on much list and add to hash
|
// loop through all words on much list and add to hash
|
||||||
// table and create word and affix strings
|
// table and create word and affix strings
|
||||||
|
|
||||||
while ((ts = dict->getline())) {
|
while ((ts = dict->getline()) != NULL) {
|
||||||
mychomp(ts);
|
mychomp(ts);
|
||||||
// split each line into word and morphological description
|
// split each line into word and morphological description
|
||||||
dp = ts;
|
dp = ts;
|
||||||
while ((dp = strchr(dp, ':'))) {
|
while ((dp = strchr(dp, ':')) != NULL) {
|
||||||
if ((dp > ts + 3) && (*(dp - 3) == ' ' || *(dp - 3) == '\t')) {
|
if ((dp > ts + 3) && (*(dp - 3) == ' ' || *(dp - 3) == '\t')) {
|
||||||
for (dp -= 4; dp >= ts && (*dp == ' ' || *dp == '\t'); dp--);
|
for (dp -= 4; dp >= ts && (*dp == ' ' || *dp == '\t'); dp--);
|
||||||
if (dp < ts) { // missing word
|
if (dp < ts) { // missing word
|
||||||
|
@ -616,7 +624,7 @@ int HashMgr::load_config(const char * affpath, const char * key)
|
||||||
// read in each line ignoring any that do not
|
// read in each line ignoring any that do not
|
||||||
// start with a known line type indicator
|
// start with a known line type indicator
|
||||||
|
|
||||||
while ((line = afflst->getline())) {
|
while ((line = afflst->getline()) != NULL) {
|
||||||
mychomp(line);
|
mychomp(line);
|
||||||
|
|
||||||
/* remove byte order mark */
|
/* remove byte order mark */
|
||||||
|
@ -756,7 +764,7 @@ int HashMgr::parse_aliasf(char * line, FileMgr * af)
|
||||||
/* now parse the numaliasf lines to read in the remainder of the table */
|
/* now parse the numaliasf lines to read in the remainder of the table */
|
||||||
char * nl;
|
char * nl;
|
||||||
for (int j=0; j < numaliasf; j++) {
|
for (int j=0; j < numaliasf; j++) {
|
||||||
if (!(nl = af->getline())) return 1;
|
if ((nl = af->getline()) == NULL) return 1;
|
||||||
mychomp(nl);
|
mychomp(nl);
|
||||||
tp = nl;
|
tp = nl;
|
||||||
i = 0;
|
i = 0;
|
||||||
|
@ -863,7 +871,7 @@ int HashMgr::parse_aliasm(char * line, FileMgr * af)
|
||||||
/* now parse the numaliasm lines to read in the remainder of the table */
|
/* now parse the numaliasm lines to read in the remainder of the table */
|
||||||
char * nl = line;
|
char * nl = line;
|
||||||
for (int j=0; j < numaliasm; j++) {
|
for (int j=0; j < numaliasm; j++) {
|
||||||
if (!(nl = af->getline())) return 1;
|
if ((nl = af->getline()) == NULL) return 1;
|
||||||
mychomp(nl);
|
mychomp(nl);
|
||||||
tp = nl;
|
tp = nl;
|
||||||
i = 0;
|
i = 0;
|
||||||
|
|
98
vendor/hunspell/src/hunspell/hunspell.cxx
vendored
98
vendor/hunspell/src/hunspell/hunspell.cxx
vendored
|
@ -12,6 +12,8 @@
|
||||||
#endif
|
#endif
|
||||||
#include "csutil.hxx"
|
#include "csutil.hxx"
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
|
||||||
Hunspell::Hunspell(const char * affpath, const char * dpath, const char * key)
|
Hunspell::Hunspell(const char * affpath, const char * dpath, const char * key)
|
||||||
{
|
{
|
||||||
encoding = NULL;
|
encoding = NULL;
|
||||||
|
@ -328,6 +330,10 @@ int Hunspell::spell(const char * word, int * info, char ** root)
|
||||||
char cw[MAXWORDUTF8LEN];
|
char cw[MAXWORDUTF8LEN];
|
||||||
char wspace[MAXWORDUTF8LEN];
|
char wspace[MAXWORDUTF8LEN];
|
||||||
w_char unicw[MAXWORDLEN];
|
w_char unicw[MAXWORDLEN];
|
||||||
|
|
||||||
|
int info2 = 0;
|
||||||
|
if (!info) info = &info2; else *info = 0;
|
||||||
|
|
||||||
// Hunspell supports XML input of the simplified API (see manual)
|
// Hunspell supports XML input of the simplified API (see manual)
|
||||||
if (strcmp(word, SPELL_XML) == 0) return 1;
|
if (strcmp(word, SPELL_XML) == 0) return 1;
|
||||||
int nc = strlen(word);
|
int nc = strlen(word);
|
||||||
|
@ -346,7 +352,6 @@ int Hunspell::spell(const char * word, int * info, char ** root)
|
||||||
if (rl && rl->conv(word, wspace)) wl = cleanword2(cw, wspace, unicw, &nc, &captype, &abbv);
|
if (rl && rl->conv(word, wspace)) wl = cleanword2(cw, wspace, unicw, &nc, &captype, &abbv);
|
||||||
else wl = cleanword2(cw, word, unicw, &nc, &captype, &abbv);
|
else wl = cleanword2(cw, word, unicw, &nc, &captype, &abbv);
|
||||||
|
|
||||||
int info2 = 0;
|
|
||||||
if (wl == 0 || maxdic == 0) return 1;
|
if (wl == 0 || maxdic == 0) return 1;
|
||||||
if (root) *root = NULL;
|
if (root) *root = NULL;
|
||||||
|
|
||||||
|
@ -364,13 +369,14 @@ int Hunspell::spell(const char * word, int * info, char ** root)
|
||||||
} else break;
|
} else break;
|
||||||
}
|
}
|
||||||
if ((i == wl) && (nstate == NNUM)) return 1;
|
if ((i == wl) && (nstate == NNUM)) return 1;
|
||||||
if (!info) info = &info2; else *info = 0;
|
|
||||||
|
|
||||||
switch(captype) {
|
switch(captype) {
|
||||||
case HUHCAP:
|
case HUHCAP:
|
||||||
|
/* FALLTHROUGH */
|
||||||
case HUHINITCAP:
|
case HUHINITCAP:
|
||||||
*info += SPELL_ORIGCAP;
|
*info += SPELL_ORIGCAP;
|
||||||
case NOCAP: {
|
/* FALLTHROUGH */
|
||||||
|
case NOCAP:
|
||||||
rv = checkword(cw, info, root);
|
rv = checkword(cw, info, root);
|
||||||
if ((abbv) && !(rv)) {
|
if ((abbv) && !(rv)) {
|
||||||
memcpy(wspace,cw,wl);
|
memcpy(wspace,cw,wl);
|
||||||
|
@ -379,7 +385,6 @@ int Hunspell::spell(const char * word, int * info, char ** root)
|
||||||
rv = checkword(wspace, info, root);
|
rv = checkword(wspace, info, root);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
|
||||||
case ALLCAP: {
|
case ALLCAP: {
|
||||||
*info += SPELL_ORIGCAP;
|
*info += SPELL_ORIGCAP;
|
||||||
rv = checkword(cw, info, root);
|
rv = checkword(cw, info, root);
|
||||||
|
@ -403,7 +408,7 @@ int Hunspell::spell(const char * word, int * info, char ** root)
|
||||||
*apostrophe = '\0';
|
*apostrophe = '\0';
|
||||||
wl2 = u8_u16(tmpword, MAXWORDLEN, cw);
|
wl2 = u8_u16(tmpword, MAXWORDLEN, cw);
|
||||||
*apostrophe = '\'';
|
*apostrophe = '\'';
|
||||||
if (wl2 < nc) {
|
if (wl2 >= 0 && wl2 < nc) {
|
||||||
mkinitcap2(apostrophe + 1, unicw + wl2 + 1, nc - wl2 - 1);
|
mkinitcap2(apostrophe + 1, unicw + wl2 + 1, nc - wl2 - 1);
|
||||||
rv = checkword(cw, info, root);
|
rv = checkword(cw, info, root);
|
||||||
if (rv) break;
|
if (rv) break;
|
||||||
|
@ -750,19 +755,28 @@ int Hunspell::suggest(char*** slst, const char * word)
|
||||||
char * dot = strchr(cw, '.');
|
char * dot = strchr(cw, '.');
|
||||||
if (dot && (dot > cw)) {
|
if (dot && (dot > cw)) {
|
||||||
int captype_;
|
int captype_;
|
||||||
if (utf8) {
|
if (utf8)
|
||||||
|
{
|
||||||
w_char w_[MAXWORDLEN];
|
w_char w_[MAXWORDLEN];
|
||||||
int wl_ = u8_u16(w_, MAXWORDLEN, dot + 1);
|
int wl_ = u8_u16(w_, MAXWORDLEN, dot + 1);
|
||||||
captype_ = get_captype_utf8(w_, wl_, langnum);
|
captype_ = get_captype_utf8(w_, wl_, langnum);
|
||||||
} else captype_ = get_captype(dot+1, strlen(dot+1), csconv);
|
} else captype_ = get_captype(dot+1, strlen(dot+1), csconv);
|
||||||
if (captype_ == INITCAP) {
|
if (captype_ == INITCAP)
|
||||||
|
{
|
||||||
char * st = mystrdup(cw);
|
char * st = mystrdup(cw);
|
||||||
if (st) st = (char *) realloc(st, wl + 2);
|
if (st)
|
||||||
if (st) {
|
{
|
||||||
st[(dot - cw) + 1] = ' ';
|
char *newst = (char *) realloc(st, wl + 2);
|
||||||
strcpy(st + (dot - cw) + 2, dot + 1);
|
if (newst == NULL)
|
||||||
ns = insert_sug(slst, st, ns);
|
free(st);
|
||||||
free(st);
|
st = newst;
|
||||||
|
}
|
||||||
|
if (st)
|
||||||
|
{
|
||||||
|
st[(dot - cw) + 1] = ' ';
|
||||||
|
strcpy(st + (dot - cw) + 2, dot + 1);
|
||||||
|
ns = insert_sug(slst, st, ns);
|
||||||
|
free(st);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -848,7 +862,7 @@ int Hunspell::suggest(char*** slst, const char * word)
|
||||||
*pos = '\0';
|
*pos = '\0';
|
||||||
strcpy(w, (*slst)[j]);
|
strcpy(w, (*slst)[j]);
|
||||||
strcat(w, pos + 1);
|
strcat(w, pos + 1);
|
||||||
spell(w, &info, NULL);
|
(void)spell(w, &info, NULL);
|
||||||
if ((info & SPELL_COMPOUND) && (info & SPELL_FORBIDDEN)) {
|
if ((info & SPELL_COMPOUND) && (info & SPELL_FORBIDDEN)) {
|
||||||
*pos = ' ';
|
*pos = ' ';
|
||||||
} else *pos = '-';
|
} else *pos = '-';
|
||||||
|
@ -1670,6 +1684,13 @@ int Hunspell::get_langnum() const
|
||||||
return langnum;
|
return langnum;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int Hunspell::input_conv(const char * word, char * dest)
|
||||||
|
{
|
||||||
|
RepList * rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL;
|
||||||
|
return (rl && rl->conv(word, dest));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
// return the beginning of the element (attr == NULL) or the attribute
|
// return the beginning of the element (attr == NULL) or the attribute
|
||||||
const char * Hunspell::get_xml_pos(const char * s, const char * attr)
|
const char * Hunspell::get_xml_pos(const char * s, const char * attr)
|
||||||
{
|
{
|
||||||
|
@ -1694,11 +1715,11 @@ int Hunspell::get_xml_list(char ***slst, char * list, const char * tag) {
|
||||||
int n = 0;
|
int n = 0;
|
||||||
char * p;
|
char * p;
|
||||||
if (!list) return 0;
|
if (!list) return 0;
|
||||||
for (p = list; (p = strstr(p, tag)); p++) n++;
|
for (p = list; ((p = strstr(p, tag)) != NULL); p++) n++;
|
||||||
if (n == 0) return 0;
|
if (n == 0) return 0;
|
||||||
*slst = (char **) malloc(sizeof(char *) * n);
|
*slst = (char **) malloc(sizeof(char *) * n);
|
||||||
if (!*slst) return 0;
|
if (!*slst) return 0;
|
||||||
for (p = list, n = 0; (p = strstr(p, tag)); p++, n++) {
|
for (p = list, n = 0; ((p = strstr(p, tag)) != NULL); p++, n++) {
|
||||||
int l = strlen(p);
|
int l = strlen(p);
|
||||||
(*slst)[n] = (char *) malloc(l + 1);
|
(*slst)[n] = (char *) malloc(l + 1);
|
||||||
if (!(*slst)[n]) return n;
|
if (!(*slst)[n]) return n;
|
||||||
|
@ -1710,6 +1731,19 @@ int Hunspell::get_xml_list(char ***slst, char * list, const char * tag) {
|
||||||
return n;
|
return n;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
namespace
|
||||||
|
{
|
||||||
|
void myrep(std::string& str, const std::string& search, const std::string& replace)
|
||||||
|
{
|
||||||
|
size_t pos = 0;
|
||||||
|
while ((pos = str.find(search, pos)) != std::string::npos)
|
||||||
|
{
|
||||||
|
str.replace(pos, search.length(), replace);
|
||||||
|
pos += replace.length();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
int Hunspell::spellml(char*** slst, const char * word)
|
int Hunspell::spellml(char*** slst, const char * word)
|
||||||
{
|
{
|
||||||
char *q, *q2;
|
char *q, *q2;
|
||||||
|
@ -1721,26 +1755,26 @@ int Hunspell::spellml(char*** slst, const char * word)
|
||||||
q2 = strstr(q2, "<word");
|
q2 = strstr(q2, "<word");
|
||||||
if (!q2) return 0; // bad XML input
|
if (!q2) return 0; // bad XML input
|
||||||
if (check_xml_par(q, "type=", "analyze")) {
|
if (check_xml_par(q, "type=", "analyze")) {
|
||||||
int n = 0, s = 0;
|
int n = 0;
|
||||||
if (get_xml_par(cw, strchr(q2, '>'), MAXWORDUTF8LEN - 10)) n = analyze(slst, cw);
|
if (get_xml_par(cw, strchr(q2, '>'), MAXWORDUTF8LEN - 10)) n = analyze(slst, cw);
|
||||||
if (n == 0) return 0;
|
if (n == 0) return 0;
|
||||||
// convert the result to <code><a>ana1</a><a>ana2</a></code> format
|
// convert the result to <code><a>ana1</a><a>ana2</a></code> format
|
||||||
for (int i = 0; i < n; i++) s+= strlen((*slst)[i]);
|
std::string r;
|
||||||
char * r = (char *) malloc(6 + 5 * s + 7 * n + 7 + 1); // XXX 5*s->&->&
|
r.append("<code>");
|
||||||
if (!r) return 0;
|
|
||||||
strcpy(r, "<code>");
|
|
||||||
for (int i = 0; i < n; i++) {
|
for (int i = 0; i < n; i++) {
|
||||||
int l = strlen(r);
|
r.append("<a>");
|
||||||
strcpy(r + l, "<a>");
|
|
||||||
strcpy(r + l + 3, (*slst)[i]);
|
std::string entry((*slst)[i]);
|
||||||
mystrrep(r + l + 3, "\t", " ");
|
|
||||||
mystrrep(r + l + 3, "<", "<");
|
|
||||||
mystrrep(r + l + 3, "&", "&");
|
|
||||||
strcat(r, "</a>");
|
|
||||||
free((*slst)[i]);
|
free((*slst)[i]);
|
||||||
|
myrep(entry, "\t", " ");
|
||||||
|
myrep(entry, "&", "&");
|
||||||
|
myrep(entry, "<", "<");
|
||||||
|
r.append(entry);
|
||||||
|
|
||||||
|
r.append("</a>");
|
||||||
}
|
}
|
||||||
strcat(r, "</code>");
|
r.append("</code>");
|
||||||
(*slst)[0] = r;
|
(*slst)[0] = mystrdup(r.c_str());
|
||||||
return 1;
|
return 1;
|
||||||
} else if (check_xml_par(q, "type=", "stem")) {
|
} else if (check_xml_par(q, "type=", "stem")) {
|
||||||
if (get_xml_par(cw, strchr(q2, '>'), MAXWORDUTF8LEN - 1)) return stem(slst, cw);
|
if (get_xml_par(cw, strchr(q2, '>'), MAXWORDUTF8LEN - 1)) return stem(slst, cw);
|
||||||
|
@ -1753,9 +1787,9 @@ int Hunspell::spellml(char*** slst, const char * word)
|
||||||
return generate(slst, cw, cw2);
|
return generate(slst, cw, cw2);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
if ((q2 = strstr(q2 + 1, "<code"))) {
|
if ((q2 = strstr(q2 + 1, "<code")) != NULL) {
|
||||||
char ** slst2;
|
char ** slst2;
|
||||||
if ((n = get_xml_list(&slst2, strchr(q2, '>'), "<a>"))) {
|
if ((n = get_xml_list(&slst2, strchr(q2, '>'), "<a>")) != 0) {
|
||||||
int n2 = generate(slst, cw, slst2, n);
|
int n2 = generate(slst, cw, slst2, n);
|
||||||
freelist(&slst2, n);
|
freelist(&slst2, n);
|
||||||
return uniqlist(*slst, n2);
|
return uniqlist(*slst, n2);
|
||||||
|
|
14
vendor/hunspell/src/hunspell/hunspell.hxx
vendored
14
vendor/hunspell/src/hunspell/hunspell.hxx
vendored
|
@ -17,8 +17,12 @@
|
||||||
#ifndef _MYSPELLMGR_HXX_
|
#ifndef _MYSPELLMGR_HXX_
|
||||||
#define _MYSPELLMGR_HXX_
|
#define _MYSPELLMGR_HXX_
|
||||||
|
|
||||||
class Hunspell
|
class LIBHUNSPELL_DLL_EXPORTED Hunspell
|
||||||
{
|
{
|
||||||
|
private:
|
||||||
|
Hunspell(const Hunspell&);
|
||||||
|
Hunspell& operator = (const Hunspell&);
|
||||||
|
private:
|
||||||
AffixMgr* pAMgr;
|
AffixMgr* pAMgr;
|
||||||
HashMgr* pHMgr[MAXDIC];
|
HashMgr* pHMgr[MAXDIC];
|
||||||
int maxdic;
|
int maxdic;
|
||||||
|
@ -35,6 +39,11 @@ public:
|
||||||
|
|
||||||
/* Hunspell(aff, dic) - constructor of Hunspell class
|
/* Hunspell(aff, dic) - constructor of Hunspell class
|
||||||
* input: path of affix file and dictionary file
|
* input: path of affix file and dictionary file
|
||||||
|
*
|
||||||
|
* In WIN32 environment, use UTF-8 encoded paths started with the long path
|
||||||
|
* prefix \\\\?\\ to handle system-independent character encoding and very
|
||||||
|
* long path names (without the long path prefix Hunspell will use fopen()
|
||||||
|
* with system-dependent character encoding instead of _wfopen()).
|
||||||
*/
|
*/
|
||||||
|
|
||||||
Hunspell(const char * affpath, const char * dpath, const char * key = NULL);
|
Hunspell(const char * affpath, const char * dpath, const char * key = NULL);
|
||||||
|
@ -131,6 +140,9 @@ public:
|
||||||
const char * get_version();
|
const char * get_version();
|
||||||
|
|
||||||
int get_langnum() const;
|
int get_langnum() const;
|
||||||
|
|
||||||
|
/* need for putdic */
|
||||||
|
int input_conv(const char * word, char * dest);
|
||||||
|
|
||||||
/* experimental and deprecated functions */
|
/* experimental and deprecated functions */
|
||||||
|
|
||||||
|
|
6
vendor/hunspell/src/hunspell/hunvisapi.h
vendored
6
vendor/hunspell/src/hunspell/hunvisapi.h
vendored
|
@ -1,10 +1,6 @@
|
||||||
#ifndef _HUNSPELL_VISIBILITY_H_
|
#ifndef _HUNSPELL_VISIBILITY_H_
|
||||||
#define _HUNSPELL_VISIBILITY_H_
|
#define _HUNSPELL_VISIBILITY_H_
|
||||||
|
|
||||||
#ifndef HUNSPELL_STATIC
|
|
||||||
#define HUNSPELL_STATIC
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if defined(HUNSPELL_STATIC)
|
#if defined(HUNSPELL_STATIC)
|
||||||
# define LIBHUNSPELL_DLL_EXPORTED
|
# define LIBHUNSPELL_DLL_EXPORTED
|
||||||
#elif defined(_MSC_VER)
|
#elif defined(_MSC_VER)
|
||||||
|
@ -13,7 +9,7 @@
|
||||||
# else
|
# else
|
||||||
# define LIBHUNSPELL_DLL_EXPORTED __declspec(dllimport)
|
# define LIBHUNSPELL_DLL_EXPORTED __declspec(dllimport)
|
||||||
# endif
|
# endif
|
||||||
#elif BUILDING_LIBHUNSPELL && 1
|
#elif defined(BUILDING_LIBHUNSPELL) && 1
|
||||||
# define LIBHUNSPELL_DLL_EXPORTED __attribute__((__visibility__("default")))
|
# define LIBHUNSPELL_DLL_EXPORTED __attribute__((__visibility__("default")))
|
||||||
#else
|
#else
|
||||||
# define LIBHUNSPELL_DLL_EXPORTED
|
# define LIBHUNSPELL_DLL_EXPORTED
|
||||||
|
|
2
vendor/hunspell/src/hunspell/hunvisapi.h.in
vendored
2
vendor/hunspell/src/hunspell/hunvisapi.h.in
vendored
|
@ -9,7 +9,7 @@
|
||||||
# else
|
# else
|
||||||
# define LIBHUNSPELL_DLL_EXPORTED __declspec(dllimport)
|
# define LIBHUNSPELL_DLL_EXPORTED __declspec(dllimport)
|
||||||
# endif
|
# endif
|
||||||
#elif BUILDING_LIBHUNSPELL && @HAVE_VISIBILITY@
|
#elif defined(BUILDING_LIBHUNSPELL) && @HAVE_VISIBILITY@
|
||||||
# define LIBHUNSPELL_DLL_EXPORTED __attribute__((__visibility__("default")))
|
# define LIBHUNSPELL_DLL_EXPORTED __attribute__((__visibility__("default")))
|
||||||
#else
|
#else
|
||||||
# define LIBHUNSPELL_DLL_EXPORTED
|
# define LIBHUNSPELL_DLL_EXPORTED
|
||||||
|
|
23
vendor/hunspell/src/hunspell/hunzip.cxx
vendored
23
vendor/hunspell/src/hunspell/hunzip.cxx
vendored
|
@ -3,6 +3,7 @@
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
|
|
||||||
#include "hunzip.hxx"
|
#include "hunzip.hxx"
|
||||||
|
#include "csutil.hxx"
|
||||||
|
|
||||||
#define CODELEN 65536
|
#define CODELEN 65536
|
||||||
#define BASEBITREC 5000
|
#define BASEBITREC 5000
|
||||||
|
@ -17,15 +18,17 @@ int Hunzip::fail(const char * err, const char * par) {
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
Hunzip::Hunzip(const char * file, const char * key) {
|
Hunzip::Hunzip(const char * file, const char * key)
|
||||||
bufsiz = 0;
|
: fin(NULL)
|
||||||
lastbit = 0;
|
, bufsiz(0)
|
||||||
inc = 0;
|
, lastbit(0)
|
||||||
outc = 0;
|
, inc(0)
|
||||||
dec = NULL;
|
, inbits(0)
|
||||||
fin = NULL;
|
, outc(0)
|
||||||
filename = (char *) malloc(strlen(file) + 1);
|
, dec(NULL)
|
||||||
if (filename) strcpy(filename, file);
|
{
|
||||||
|
in[0] = out[0] = line[0] = '\0';
|
||||||
|
filename = mystrdup(file);
|
||||||
if (getcode(key) == -1) bufsiz = -1;
|
if (getcode(key) == -1) bufsiz = -1;
|
||||||
else bufsiz = getbuf();
|
else bufsiz = getbuf();
|
||||||
}
|
}
|
||||||
|
@ -38,7 +41,7 @@ int Hunzip::getcode(const char * key) {
|
||||||
|
|
||||||
if (!filename) return -1;
|
if (!filename) return -1;
|
||||||
|
|
||||||
fin = fopen(filename, "rb");
|
fin = myfopen(filename, "rb");
|
||||||
if (!fin) return -1;
|
if (!fin) return -1;
|
||||||
|
|
||||||
// read magic number
|
// read magic number
|
||||||
|
|
4
vendor/hunspell/src/hunspell/hunzip.hxx
vendored
4
vendor/hunspell/src/hunspell/hunzip.hxx
vendored
|
@ -23,7 +23,9 @@ struct bit {
|
||||||
|
|
||||||
class LIBHUNSPELL_DLL_EXPORTED Hunzip
|
class LIBHUNSPELL_DLL_EXPORTED Hunzip
|
||||||
{
|
{
|
||||||
|
private:
|
||||||
|
Hunzip(const Hunzip&);
|
||||||
|
Hunzip& operator = (const Hunzip&);
|
||||||
protected:
|
protected:
|
||||||
char * filename;
|
char * filename;
|
||||||
FILE * fin;
|
FILE * fin;
|
||||||
|
|
3
vendor/hunspell/src/hunspell/phonet.cxx
vendored
3
vendor/hunspell/src/hunspell/phonet.cxx
vendored
|
@ -87,7 +87,8 @@ int phonet (const char * inword, char * target,
|
||||||
char word[MAXPHONETUTF8LEN + 1];
|
char word[MAXPHONETUTF8LEN + 1];
|
||||||
if (len == -1) len = strlen(inword);
|
if (len == -1) len = strlen(inword);
|
||||||
if (len > MAXPHONETUTF8LEN) return 0;
|
if (len > MAXPHONETUTF8LEN) return 0;
|
||||||
strcpy(word, inword);
|
strncpy(word, inword, MAXPHONETUTF8LEN);
|
||||||
|
word[MAXPHONETUTF8LEN] = '\0';
|
||||||
|
|
||||||
/** check word **/
|
/** check word **/
|
||||||
i = j = z = 0;
|
i = j = z = 0;
|
||||||
|
|
4
vendor/hunspell/src/hunspell/replist.hxx
vendored
4
vendor/hunspell/src/hunspell/replist.hxx
vendored
|
@ -6,9 +6,11 @@
|
||||||
|
|
||||||
#include "w_char.hxx"
|
#include "w_char.hxx"
|
||||||
|
|
||||||
#undef near
|
|
||||||
class LIBHUNSPELL_DLL_EXPORTED RepList
|
class LIBHUNSPELL_DLL_EXPORTED RepList
|
||||||
{
|
{
|
||||||
|
private:
|
||||||
|
RepList(const RepList&);
|
||||||
|
RepList& operator = (const RepList&);
|
||||||
protected:
|
protected:
|
||||||
replentry ** dat;
|
replentry ** dat;
|
||||||
int size;
|
int size;
|
||||||
|
|
49
vendor/hunspell/src/hunspell/suggestmgr.cxx
vendored
49
vendor/hunspell/src/hunspell/suggestmgr.cxx
vendored
|
@ -107,7 +107,10 @@ int SuggestMgr::testsug(char** wlst, const char * candidate, int wl, int ns, int
|
||||||
int cwrd = 1;
|
int cwrd = 1;
|
||||||
if (ns == maxSug) return maxSug;
|
if (ns == maxSug) return maxSug;
|
||||||
for (int k=0; k < ns; k++) {
|
for (int k=0; k < ns; k++) {
|
||||||
if (strcmp(candidate,wlst[k]) == 0) cwrd = 0;
|
if (strcmp(candidate,wlst[k]) == 0) {
|
||||||
|
cwrd = 0;
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if ((cwrd) && checkword(candidate, wl, cpdsuggest, timer, timelimit)) {
|
if ((cwrd) && checkword(candidate, wl, cpdsuggest, timer, timelimit)) {
|
||||||
wlst[ns] = mystrdup(candidate);
|
wlst[ns] = mystrdup(candidate);
|
||||||
|
@ -364,8 +367,12 @@ int SuggestMgr::map_related(const char * word, char * candidate, int wn, int cn,
|
||||||
int cwrd = 1;
|
int cwrd = 1;
|
||||||
*(candidate + cn) = '\0';
|
*(candidate + cn) = '\0';
|
||||||
int wl = strlen(candidate);
|
int wl = strlen(candidate);
|
||||||
for (int m=0; m < ns; m++)
|
for (int m=0; m < ns; m++) {
|
||||||
if (strcmp(candidate, wlst[m]) == 0) cwrd = 0;
|
if (strcmp(candidate, wlst[m]) == 0) {
|
||||||
|
cwrd = 0;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
if ((cwrd) && checkword(candidate, wl, cpdsuggest, timer, timelimit)) {
|
if ((cwrd) && checkword(candidate, wl, cpdsuggest, timer, timelimit)) {
|
||||||
if (ns < maxSug) {
|
if (ns < maxSug) {
|
||||||
wlst[ns] = mystrdup(candidate);
|
wlst[ns] = mystrdup(candidate);
|
||||||
|
@ -678,7 +685,7 @@ int SuggestMgr::extrachar(char** wlst, const char * word, int ns, int cpdsuggest
|
||||||
// error is missing a letter it needs
|
// error is missing a letter it needs
|
||||||
int SuggestMgr::forgotchar(char ** wlst, const char * word, int ns, int cpdsuggest)
|
int SuggestMgr::forgotchar(char ** wlst, const char * word, int ns, int cpdsuggest)
|
||||||
{
|
{
|
||||||
char candidate[MAXSWUTF8L];
|
char candidate[MAXSWUTF8L + 4];
|
||||||
char * p;
|
char * p;
|
||||||
clock_t timelimit = clock();
|
clock_t timelimit = clock();
|
||||||
int timer = MINTIMER;
|
int timer = MINTIMER;
|
||||||
|
@ -700,8 +707,8 @@ int SuggestMgr::forgotchar(char ** wlst, const char * word, int ns, int cpdsugge
|
||||||
// error is missing a letter it needs
|
// error is missing a letter it needs
|
||||||
int SuggestMgr::forgotchar_utf(char ** wlst, const w_char * word, int wl, int ns, int cpdsuggest)
|
int SuggestMgr::forgotchar_utf(char ** wlst, const w_char * word, int wl, int ns, int cpdsuggest)
|
||||||
{
|
{
|
||||||
w_char candidate_utf[MAXSWL];
|
w_char candidate_utf[MAXSWL + 1];
|
||||||
char candidate[MAXSWUTF8L];
|
char candidate[MAXSWUTF8L + 4];
|
||||||
w_char * p;
|
w_char * p;
|
||||||
clock_t timelimit = clock();
|
clock_t timelimit = clock();
|
||||||
int timer = MINTIMER;
|
int timer = MINTIMER;
|
||||||
|
@ -761,8 +768,12 @@ int SuggestMgr::twowords(char ** wlst, const char * word, int ns, int cpdsuggest
|
||||||
((c1 == 3) && (c2 >= 2)))) *p = '-';
|
((c1 == 3) && (c2 >= 2)))) *p = '-';
|
||||||
|
|
||||||
cwrd = 1;
|
cwrd = 1;
|
||||||
for (int k=0; k < ns; k++)
|
for (int k=0; k < ns; k++) {
|
||||||
if (strcmp(candidate,wlst[k]) == 0) cwrd = 0;
|
if (strcmp(candidate,wlst[k]) == 0) {
|
||||||
|
cwrd = 0;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
if (ns < maxSug) {
|
if (ns < maxSug) {
|
||||||
if (cwrd) {
|
if (cwrd) {
|
||||||
wlst[ns] = mystrdup(candidate);
|
wlst[ns] = mystrdup(candidate);
|
||||||
|
@ -777,8 +788,12 @@ int SuggestMgr::twowords(char ** wlst, const char * word, int ns, int cpdsuggest
|
||||||
mystrlen(p + 1) > 1 &&
|
mystrlen(p + 1) > 1 &&
|
||||||
mystrlen(candidate) - mystrlen(p) > 1) {
|
mystrlen(candidate) - mystrlen(p) > 1) {
|
||||||
*p = '-';
|
*p = '-';
|
||||||
for (int k=0; k < ns; k++)
|
for (int k=0; k < ns; k++) {
|
||||||
if (strcmp(candidate,wlst[k]) == 0) cwrd = 0;
|
if (strcmp(candidate,wlst[k]) == 0) {
|
||||||
|
cwrd = 0;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
if (ns < maxSug) {
|
if (ns < maxSug) {
|
||||||
if (cwrd) {
|
if (cwrd) {
|
||||||
wlst[ns] = mystrdup(candidate);
|
wlst[ns] = mystrdup(candidate);
|
||||||
|
@ -1333,7 +1348,10 @@ int SuggestMgr::ngsuggest(char** wlst, char * w, int ns, HashMgr** pHMgr, int md
|
||||||
if ((!guessorig[i] && strstr(guess[i], wlst[j])) ||
|
if ((!guessorig[i] && strstr(guess[i], wlst[j])) ||
|
||||||
(guessorig[i] && strstr(guessorig[i], wlst[j])) ||
|
(guessorig[i] && strstr(guessorig[i], wlst[j])) ||
|
||||||
// check forbidden words
|
// check forbidden words
|
||||||
!checkword(guess[i], strlen(guess[i]), 0, NULL, NULL)) unique = 0;
|
!checkword(guess[i], strlen(guess[i]), 0, NULL, NULL)) {
|
||||||
|
unique = 0;
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if (unique) {
|
if (unique) {
|
||||||
wlst[ns++] = guess[i];
|
wlst[ns++] = guess[i];
|
||||||
|
@ -1361,7 +1379,10 @@ int SuggestMgr::ngsuggest(char** wlst, char * w, int ns, HashMgr** pHMgr, int md
|
||||||
// don't suggest previous suggestions or a previous suggestion with prefixes or affixes
|
// don't suggest previous suggestions or a previous suggestion with prefixes or affixes
|
||||||
if (strstr(rootsphon[i], wlst[j]) ||
|
if (strstr(rootsphon[i], wlst[j]) ||
|
||||||
// check forbidden words
|
// check forbidden words
|
||||||
!checkword(rootsphon[i], strlen(rootsphon[i]), 0, NULL, NULL)) unique = 0;
|
!checkword(rootsphon[i], strlen(rootsphon[i]), 0, NULL, NULL)) {
|
||||||
|
unique = 0;
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if (unique) {
|
if (unique) {
|
||||||
wlst[ns++] = mystrdup(rootsphon[i]);
|
wlst[ns++] = mystrdup(rootsphon[i]);
|
||||||
|
@ -1855,6 +1876,10 @@ int SuggestMgr::commoncharacterpositions(char * s1, const char * s2, int * is_sw
|
||||||
w_char su2[MAXSWL];
|
w_char su2[MAXSWL];
|
||||||
int l1 = u8_u16(su1, MAXSWL, s1);
|
int l1 = u8_u16(su1, MAXSWL, s1);
|
||||||
int l2 = u8_u16(su2, MAXSWL, s2);
|
int l2 = u8_u16(su2, MAXSWL, s2);
|
||||||
|
|
||||||
|
if (l1 <= 0 || l2 <= 0)
|
||||||
|
return 0;
|
||||||
|
|
||||||
// decapitalize dictionary word
|
// decapitalize dictionary word
|
||||||
if (complexprefixes) {
|
if (complexprefixes) {
|
||||||
mkallsmall_utf(su2+l2-1, 1, langnum);
|
mkallsmall_utf(su2+l2-1, 1, langnum);
|
||||||
|
|
4
vendor/hunspell/src/hunspell/suggestmgr.hxx
vendored
4
vendor/hunspell/src/hunspell/suggestmgr.hxx
vendored
|
@ -32,6 +32,10 @@ enum { LCS_UP, LCS_LEFT, LCS_UPLEFT };
|
||||||
|
|
||||||
class LIBHUNSPELL_DLL_EXPORTED SuggestMgr
|
class LIBHUNSPELL_DLL_EXPORTED SuggestMgr
|
||||||
{
|
{
|
||||||
|
private:
|
||||||
|
SuggestMgr(const SuggestMgr&);
|
||||||
|
SuggestMgr& operator = (const SuggestMgr&);
|
||||||
|
private:
|
||||||
char * ckey;
|
char * ckey;
|
||||||
int ckeyl;
|
int ckeyl;
|
||||||
w_char * ckey_utf;
|
w_char * ckey_utf;
|
||||||
|
|
33
vendor/hunspell/src/parsers/firstparser.cxx
vendored
33
vendor/hunspell/src/parsers/firstparser.cxx
vendored
|
@ -1,33 +0,0 @@
|
||||||
#include <cstdlib>
|
|
||||||
#include <cstring>
|
|
||||||
#include <cstdio>
|
|
||||||
#include <ctype.h>
|
|
||||||
|
|
||||||
#include "../hunspell/csutil.hxx"
|
|
||||||
#include "firstparser.hxx"
|
|
||||||
|
|
||||||
#ifndef W32
|
|
||||||
using namespace std;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
FirstParser::FirstParser(const char * wordchars)
|
|
||||||
{
|
|
||||||
init(wordchars);
|
|
||||||
}
|
|
||||||
|
|
||||||
FirstParser::~FirstParser()
|
|
||||||
{
|
|
||||||
}
|
|
||||||
|
|
||||||
char * FirstParser::next_token()
|
|
||||||
{
|
|
||||||
char * tabpos = strchr(line[actual],'\t');
|
|
||||||
if ((tabpos) && (tabpos - line[actual]>token)) {
|
|
||||||
char * t = (char *) malloc(tabpos - line[actual] + 1);
|
|
||||||
t[tabpos - line[actual]] = '\0';
|
|
||||||
token = tabpos - line[actual] +1;
|
|
||||||
if (t) return strncpy(t, line[actual], tabpos - line[actual]);
|
|
||||||
fprintf(stderr,"Error - Insufficient Memory\n");
|
|
||||||
}
|
|
||||||
return NULL;
|
|
||||||
}
|
|
34
vendor/hunspell/src/parsers/firstparser.hxx
vendored
34
vendor/hunspell/src/parsers/firstparser.hxx
vendored
|
@ -1,34 +0,0 @@
|
||||||
/*
|
|
||||||
* parser classes of HunTools
|
|
||||||
*
|
|
||||||
* implemented: text, HTML, TeX, first word
|
|
||||||
*
|
|
||||||
* Copyright (C) 2003, Laszlo Nemeth
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef _FIRSTPARSER_HXX_
|
|
||||||
#define _FIRSTPARSER_HXX_
|
|
||||||
|
|
||||||
#include "textparser.hxx"
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Check first word of the input line
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
|
|
||||||
class FirstParser : public TextParser
|
|
||||||
{
|
|
||||||
|
|
||||||
public:
|
|
||||||
|
|
||||||
|
|
||||||
FirstParser(const char * wc);
|
|
||||||
virtual ~FirstParser();
|
|
||||||
|
|
||||||
virtual char * next_token();
|
|
||||||
|
|
||||||
};
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
151
vendor/hunspell/src/parsers/htmlparser.cxx
vendored
151
vendor/hunspell/src/parsers/htmlparser.cxx
vendored
|
@ -1,151 +0,0 @@
|
||||||
#include <cstdlib>
|
|
||||||
#include <cstring>
|
|
||||||
#include <cstdio>
|
|
||||||
#include <ctype.h>
|
|
||||||
|
|
||||||
#include "../hunspell/csutil.hxx"
|
|
||||||
#include "htmlparser.hxx"
|
|
||||||
|
|
||||||
|
|
||||||
#ifndef W32
|
|
||||||
using namespace std;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
enum { ST_NON_WORD, ST_WORD, ST_TAG, ST_CHAR_ENTITY, ST_OTHER_TAG, ST_ATTRIB };
|
|
||||||
|
|
||||||
static const char * PATTERN[][2] = {
|
|
||||||
{ "<script", "</script>" },
|
|
||||||
{ "<style", "</style>" },
|
|
||||||
{ "<code", "</code>" },
|
|
||||||
{ "<samp", "</samp>" },
|
|
||||||
{ "<kbd", "</kbd>" },
|
|
||||||
{ "<var", "</var>" },
|
|
||||||
{ "<listing", "</listing>" },
|
|
||||||
{ "<address", "</address>" },
|
|
||||||
{ "<pre", "</pre>" },
|
|
||||||
{ "<!--", "-->" },
|
|
||||||
{ "<[cdata[", "]]>" }, // XML comment
|
|
||||||
{ "<", ">" }
|
|
||||||
};
|
|
||||||
|
|
||||||
#define PATTERN_LEN (sizeof(PATTERN) / (sizeof(char *) * 2))
|
|
||||||
|
|
||||||
static const char * PATTERN2[][2] = {
|
|
||||||
{ "<img", "alt=" }, // ALT and TITLE attrib handled spec.
|
|
||||||
{ "<img", "title=" },
|
|
||||||
{ "<a ", "title=" }
|
|
||||||
};
|
|
||||||
|
|
||||||
#define PATTERN_LEN2 (sizeof(PATTERN2) / (sizeof(char *) * 2))
|
|
||||||
|
|
||||||
HTMLParser::HTMLParser(const char * wordchars)
|
|
||||||
{
|
|
||||||
init(wordchars);
|
|
||||||
}
|
|
||||||
|
|
||||||
HTMLParser::HTMLParser(unsigned short * wordchars, int len)
|
|
||||||
{
|
|
||||||
init(wordchars, len);
|
|
||||||
}
|
|
||||||
|
|
||||||
HTMLParser::~HTMLParser()
|
|
||||||
{
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
int HTMLParser::look_pattern(const char * p[][2], unsigned int len, int column)
|
|
||||||
{
|
|
||||||
for (unsigned int i = 0; i < len; i++) {
|
|
||||||
char * j = line[actual] + head;
|
|
||||||
const char * k = p[i][column];
|
|
||||||
while ((*k != '\0') && (tolower(*j) == *k)) {
|
|
||||||
j++;
|
|
||||||
k++;
|
|
||||||
}
|
|
||||||
if (*k == '\0') return i;
|
|
||||||
}
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* HTML parser
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
|
|
||||||
|
|
||||||
char * HTMLParser::next_token()
|
|
||||||
{
|
|
||||||
const char * latin1;
|
|
||||||
|
|
||||||
for (;;) {
|
|
||||||
//fprintf(stderr, "%d:%c:%s\n", state, line[actual][head], line[actual]);
|
|
||||||
//getch();
|
|
||||||
switch (state)
|
|
||||||
{
|
|
||||||
case ST_NON_WORD: // non word chars
|
|
||||||
prevstate = ST_NON_WORD;
|
|
||||||
if ((pattern_num = look_pattern(PATTERN, PATTERN_LEN, 0)) != -1) {
|
|
||||||
checkattr = 0;
|
|
||||||
if ((pattern2_num = look_pattern(PATTERN2, PATTERN_LEN2, 0)) != -1) {
|
|
||||||
checkattr = 1;
|
|
||||||
}
|
|
||||||
state = ST_TAG;
|
|
||||||
} else if (is_wordchar(line[actual] + head)) {
|
|
||||||
state = ST_WORD;
|
|
||||||
token = head;
|
|
||||||
} else if ((latin1 = get_latin1(line[actual] + head))) {
|
|
||||||
state = ST_WORD;
|
|
||||||
token = head;
|
|
||||||
head += strlen(latin1);
|
|
||||||
} else if (line[actual][head] == '&') {
|
|
||||||
state = ST_CHAR_ENTITY;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case ST_WORD: // wordchar
|
|
||||||
if ((latin1 = get_latin1(line[actual] + head))) {
|
|
||||||
head += strlen(latin1);
|
|
||||||
} else if (! is_wordchar(line[actual] + head)) {
|
|
||||||
state = prevstate;
|
|
||||||
char * t = alloc_token(token, &head);
|
|
||||||
if (t) return t;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case ST_TAG: // comment, labels, etc
|
|
||||||
int i;
|
|
||||||
if ((checkattr == 1) && ((i = look_pattern(PATTERN2, PATTERN_LEN2, 1)) != -1)
|
|
||||||
&& (strcmp(PATTERN2[i][0],PATTERN2[pattern2_num][0]) == 0)) {
|
|
||||||
checkattr = 2;
|
|
||||||
} else if ((checkattr > 0) && (line[actual][head] == '>')) {
|
|
||||||
state = ST_NON_WORD;
|
|
||||||
} else if (((i = look_pattern(PATTERN, PATTERN_LEN, 1)) != -1) &&
|
|
||||||
(strcmp(PATTERN[i][1],PATTERN[pattern_num][1]) == 0)) {
|
|
||||||
state = ST_NON_WORD;
|
|
||||||
head += strlen(PATTERN[pattern_num][1]) - 1;
|
|
||||||
} else if ( (strcmp(PATTERN[pattern_num][0], "<") == 0) &&
|
|
||||||
((line[actual][head] == '"') || (line[actual][head] == '\''))) {
|
|
||||||
quotmark = line[actual][head];
|
|
||||||
state = ST_ATTRIB;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case ST_ATTRIB: // non word chars
|
|
||||||
prevstate = ST_ATTRIB;
|
|
||||||
if (line[actual][head] == quotmark) {
|
|
||||||
state = ST_TAG;
|
|
||||||
if (checkattr == 2) checkattr = 1;
|
|
||||||
// for IMG ALT
|
|
||||||
} else if (is_wordchar(line[actual] + head) && (checkattr == 2)) {
|
|
||||||
state = ST_WORD;
|
|
||||||
token = head;
|
|
||||||
} else if (line[actual][head] == '&') {
|
|
||||||
state = ST_CHAR_ENTITY;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case ST_CHAR_ENTITY: // SGML element
|
|
||||||
if ((tolower(line[actual][head]) == ';')) {
|
|
||||||
state = prevstate;
|
|
||||||
head--;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (next_char(line[actual], &head)) return NULL;
|
|
||||||
}
|
|
||||||
}
|
|
44
vendor/hunspell/src/parsers/htmlparser.hxx
vendored
44
vendor/hunspell/src/parsers/htmlparser.hxx
vendored
|
@ -1,44 +0,0 @@
|
||||||
/*
|
|
||||||
* HTML parser class for MySpell
|
|
||||||
*
|
|
||||||
* implemented: text, HTML, TeX
|
|
||||||
*
|
|
||||||
* Copyright (C) 2002, Laszlo Nemeth
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef _HTMLPARSER_HXX_
|
|
||||||
#define _HTMLPARSER_HXX_
|
|
||||||
|
|
||||||
|
|
||||||
#include "textparser.hxx"
|
|
||||||
|
|
||||||
/*
|
|
||||||
* HTML Parser
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
|
|
||||||
class HTMLParser : public TextParser
|
|
||||||
{
|
|
||||||
public:
|
|
||||||
|
|
||||||
HTMLParser(const char * wc);
|
|
||||||
HTMLParser(unsigned short * wordchars, int len);
|
|
||||||
virtual ~HTMLParser();
|
|
||||||
|
|
||||||
virtual char * next_token();
|
|
||||||
|
|
||||||
private:
|
|
||||||
|
|
||||||
int look_pattern(const char * p[][2], unsigned int len, int column);
|
|
||||||
int pattern_num;
|
|
||||||
int pattern2_num;
|
|
||||||
int prevstate;
|
|
||||||
int checkattr;
|
|
||||||
char quotmark;
|
|
||||||
|
|
||||||
};
|
|
||||||
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
223
vendor/hunspell/src/parsers/latexparser.cxx
vendored
223
vendor/hunspell/src/parsers/latexparser.cxx
vendored
|
@ -1,223 +0,0 @@
|
||||||
#include <cstdlib>
|
|
||||||
#include <cstring>
|
|
||||||
#include <cstdio>
|
|
||||||
#include <ctype.h>
|
|
||||||
|
|
||||||
#include "../hunspell/csutil.hxx"
|
|
||||||
#include "latexparser.hxx"
|
|
||||||
|
|
||||||
#ifndef W32
|
|
||||||
using namespace std;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
static struct {
|
|
||||||
const char * pat[2];
|
|
||||||
int arg;
|
|
||||||
} PATTERN[] = {
|
|
||||||
{ { "\\(", "\\)" } , 0 },
|
|
||||||
{ { "$$", "$$" } , 0 },
|
|
||||||
{ { "$", "$" } , 0 },
|
|
||||||
{ { "\\begin{math}", "\\end{math}" } , 0 },
|
|
||||||
{ { "\\[", "\\]" } , 0 },
|
|
||||||
{ { "\\begin{displaymath}", "\\end{displaymath}" } , 0 },
|
|
||||||
{ { "\\begin{equation}", "\\end{equation}" } , 0 },
|
|
||||||
{ { "\\begin{equation*}", "\\end{equation*}" } , 0 },
|
|
||||||
{ { "\\cite", NULL } , 1 },
|
|
||||||
{ { "\\nocite", NULL } , 1 },
|
|
||||||
{ { "\\index", NULL } , 1 },
|
|
||||||
{ { "\\label", NULL } , 1 },
|
|
||||||
{ { "\\ref", NULL } , 1 },
|
|
||||||
{ { "\\pageref", NULL } , 1 },
|
|
||||||
{ { "\\parbox", NULL } , 1 },
|
|
||||||
{ { "\\begin{verbatim}", "\\end{verbatim}" } , 0 },
|
|
||||||
{ { "\\verb+", "+" } , 0 },
|
|
||||||
{ { "\\verb|", "|" } , 0 },
|
|
||||||
{ { "\\verb#", "#" } , 0 },
|
|
||||||
{ { "\\verb*", "*" } , 0 },
|
|
||||||
{ { "\\documentstyle", "\\begin{document}" } , 0 },
|
|
||||||
{ { "\\documentclass", "\\begin{document}" } , 0 },
|
|
||||||
// { { "\\documentclass", NULL } , 1 },
|
|
||||||
{ { "\\usepackage", NULL } , 1 },
|
|
||||||
{ { "\\includeonly", NULL } , 1 },
|
|
||||||
{ { "\\include", NULL } , 1 },
|
|
||||||
{ { "\\input", NULL } , 1 },
|
|
||||||
{ { "\\vspace", NULL } , 1 },
|
|
||||||
{ { "\\setlength", NULL } , 2 },
|
|
||||||
{ { "\\addtolength", NULL } , 2 },
|
|
||||||
{ { "\\settowidth", NULL } , 2 },
|
|
||||||
{ { "\\rule", NULL } , 2 },
|
|
||||||
{ { "\\hspace", NULL } , 1 } ,
|
|
||||||
{ { "\\vspace", NULL } , 1 } ,
|
|
||||||
{ { "\\\\[", "]" } , 0 },
|
|
||||||
{ { "\\pagebreak[", "]" } , 0 } ,
|
|
||||||
{ { "\\nopagebreak[", "]" } , 0 } ,
|
|
||||||
{ { "\\enlargethispage", NULL } , 1 } ,
|
|
||||||
{ { "\\begin{tabular}", NULL } , 1 } ,
|
|
||||||
{ { "\\addcontentsline", NULL } , 2 } ,
|
|
||||||
{ { "\\begin{thebibliography}", NULL } , 1 } ,
|
|
||||||
{ { "\\bibliography", NULL } , 1 } ,
|
|
||||||
{ { "\\bibliographystyle", NULL } , 1 } ,
|
|
||||||
{ { "\\bibitem", NULL } , 1 } ,
|
|
||||||
{ { "\\begin", NULL } , 1 } ,
|
|
||||||
{ { "\\end", NULL } , 1 } ,
|
|
||||||
{ { "\\pagestyle", NULL } , 1 } ,
|
|
||||||
{ { "\\pagenumbering", NULL } , 1 } ,
|
|
||||||
{ { "\\thispagestyle", NULL } , 1 } ,
|
|
||||||
{ { "\\newtheorem", NULL } , 2 },
|
|
||||||
{ { "\\newcommand", NULL } , 2 },
|
|
||||||
{ { "\\renewcommand", NULL } , 2 },
|
|
||||||
{ { "\\setcounter", NULL } , 2 },
|
|
||||||
{ { "\\addtocounter", NULL } , 1 },
|
|
||||||
{ { "\\stepcounter", NULL } , 1 },
|
|
||||||
{ { "\\selectlanguage", NULL } , 1 },
|
|
||||||
{ { "\\inputencoding", NULL } , 1 },
|
|
||||||
{ { "\\hyphenation", NULL } , 1 },
|
|
||||||
{ { "\\definecolor", NULL } , 3 },
|
|
||||||
{ { "\\color", NULL } , 1 },
|
|
||||||
{ { "\\textcolor", NULL } , 1 },
|
|
||||||
{ { "\\pagecolor", NULL } , 1 },
|
|
||||||
{ { "\\colorbox", NULL } , 2 },
|
|
||||||
{ { "\\fcolorbox", NULL } , 2 },
|
|
||||||
{ { "\\declaregraphicsextensions", NULL } , 1 },
|
|
||||||
{ { "\\psfig", NULL } , 1 },
|
|
||||||
{ { "\\url", NULL } , 1 },
|
|
||||||
{ { "\\eqref", NULL } , 1 },
|
|
||||||
{ { "\\vskip", NULL } , 1 },
|
|
||||||
{ { "\\vglue", NULL } , 1 },
|
|
||||||
{ { "\'\'", NULL } , 1 }
|
|
||||||
};
|
|
||||||
|
|
||||||
#define PATTERN_LEN (sizeof(PATTERN) / sizeof(PATTERN[0]))
|
|
||||||
|
|
||||||
LaTeXParser::LaTeXParser(const char * wordchars)
|
|
||||||
{
|
|
||||||
init(wordchars);
|
|
||||||
}
|
|
||||||
|
|
||||||
LaTeXParser::LaTeXParser(unsigned short * wordchars, int len)
|
|
||||||
{
|
|
||||||
init(wordchars, len);
|
|
||||||
}
|
|
||||||
|
|
||||||
LaTeXParser::~LaTeXParser()
|
|
||||||
{
|
|
||||||
}
|
|
||||||
|
|
||||||
int LaTeXParser::look_pattern(int col)
|
|
||||||
{
|
|
||||||
for (unsigned int i = 0; i < PATTERN_LEN; i++) {
|
|
||||||
char * j = line[actual] + head;
|
|
||||||
const char * k = PATTERN[i].pat[col];
|
|
||||||
if (! k) continue;
|
|
||||||
while ((*k != '\0') && (tolower(*j) == *k)) {
|
|
||||||
j++;
|
|
||||||
k++;
|
|
||||||
}
|
|
||||||
if (*k == '\0') return i;
|
|
||||||
}
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* LaTeXParser
|
|
||||||
*
|
|
||||||
* state 0: not wordchar
|
|
||||||
* state 1: wordchar
|
|
||||||
* state 2: comments
|
|
||||||
* state 3: commands
|
|
||||||
* state 4: commands with arguments
|
|
||||||
* state 5: % comment
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
|
|
||||||
|
|
||||||
char * LaTeXParser::next_token()
|
|
||||||
{
|
|
||||||
int i;
|
|
||||||
int slash = 0;
|
|
||||||
int apostrophe;
|
|
||||||
for (;;) {
|
|
||||||
// fprintf(stderr,"depth: %d, state: %d, , arg: %d, token: %s\n",depth,state,arg,line[actual]+head);
|
|
||||||
|
|
||||||
switch (state)
|
|
||||||
{
|
|
||||||
case 0: // non word chars
|
|
||||||
if ((pattern_num = look_pattern(0)) != -1) {
|
|
||||||
if (PATTERN[pattern_num].pat[1]) {
|
|
||||||
state = 2;
|
|
||||||
} else {
|
|
||||||
state = 4;
|
|
||||||
depth = 0;
|
|
||||||
arg = 0;
|
|
||||||
opt = 1;
|
|
||||||
}
|
|
||||||
head += strlen(PATTERN[pattern_num].pat[0]) - 1;
|
|
||||||
} else if ((line[actual][head] == '%')) {
|
|
||||||
state = 5;
|
|
||||||
} else if (is_wordchar(line[actual] + head)) {
|
|
||||||
state = 1;
|
|
||||||
token = head;
|
|
||||||
} else if (line[actual][head] == '\\') {
|
|
||||||
if (line[actual][head + 1] == '\\' || // \\ (linebreak)
|
|
||||||
(line[actual][head + 1] == '$') || // \$ (dollar sign)
|
|
||||||
(line[actual][head + 1] == '%')) { // \% (percent)
|
|
||||||
head++;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
state = 3;
|
|
||||||
} else if (line[actual][head] == '%') {
|
|
||||||
if ((head==0) || (line[actual][head - 1] != '\\')) state = 5;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case 1: // wordchar
|
|
||||||
apostrophe = 0;
|
|
||||||
if (! is_wordchar(line[actual] + head) ||
|
|
||||||
(line[actual][head] == '\'' && line[actual][head+1] == '\'' && ++apostrophe)) {
|
|
||||||
state = 0;
|
|
||||||
char * t = alloc_token(token, &head);
|
|
||||||
if (apostrophe) head += 2;
|
|
||||||
if (t) return t;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case 2: // comment, labels, etc
|
|
||||||
if (((i = look_pattern(1)) != -1) &&
|
|
||||||
(strcmp(PATTERN[i].pat[1],PATTERN[pattern_num].pat[1]) == 0)) {
|
|
||||||
state = 0;
|
|
||||||
head += strlen(PATTERN[pattern_num].pat[1]) - 1;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case 3: // command
|
|
||||||
if ((tolower(line[actual][head]) < 'a') || (tolower(line[actual][head]) > 'z')) {
|
|
||||||
state = 0;
|
|
||||||
head--;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case 4: // command with arguments
|
|
||||||
if (slash && (line[actual][head] != '\0')) {
|
|
||||||
slash = 0;
|
|
||||||
head++;
|
|
||||||
break;
|
|
||||||
} else if (line[actual][head]=='\\') {
|
|
||||||
slash = 1;
|
|
||||||
} else if ((line[actual][head] == '{') ||
|
|
||||||
((opt) && (line[actual][head] == '['))) {
|
|
||||||
depth++;
|
|
||||||
opt = 0;
|
|
||||||
} else if (line[actual][head] == '}') {
|
|
||||||
depth--;
|
|
||||||
if (depth == 0) {
|
|
||||||
opt = 1;
|
|
||||||
arg++;
|
|
||||||
}
|
|
||||||
if (((depth == 0) && (arg == PATTERN[pattern_num].arg)) ||
|
|
||||||
(depth < 0) ) {
|
|
||||||
state = 0; // XXX not handles the last optional arg.
|
|
||||||
}
|
|
||||||
} else if (line[actual][head] == ']') depth--;
|
|
||||||
} // case
|
|
||||||
if (next_char(line[actual], &head)) {
|
|
||||||
if (state == 5) state = 0;
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
44
vendor/hunspell/src/parsers/latexparser.hxx
vendored
44
vendor/hunspell/src/parsers/latexparser.hxx
vendored
|
@ -1,44 +0,0 @@
|
||||||
/*
|
|
||||||
* parser classes for MySpell
|
|
||||||
*
|
|
||||||
* implemented: text, HTML, TeX
|
|
||||||
*
|
|
||||||
* Copyright (C) 2002, Laszlo Nemeth
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef _LATEXPARSER_HXX_
|
|
||||||
#define _LATEXPARSER_HXX_
|
|
||||||
|
|
||||||
|
|
||||||
#include "textparser.hxx"
|
|
||||||
|
|
||||||
/*
|
|
||||||
* HTML Parser
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
|
|
||||||
class LaTeXParser : public TextParser
|
|
||||||
{
|
|
||||||
int pattern_num; // number of comment
|
|
||||||
int depth; // depth of blocks
|
|
||||||
int arg; // arguments's number
|
|
||||||
int opt; // optional argument attrib.
|
|
||||||
|
|
||||||
public:
|
|
||||||
|
|
||||||
LaTeXParser(const char * wc);
|
|
||||||
LaTeXParser(unsigned short * wordchars, int len);
|
|
||||||
virtual ~LaTeXParser();
|
|
||||||
|
|
||||||
virtual char * next_token();
|
|
||||||
|
|
||||||
private:
|
|
||||||
|
|
||||||
int look_pattern(int col);
|
|
||||||
|
|
||||||
};
|
|
||||||
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
71
vendor/hunspell/src/parsers/manparser.cxx
vendored
71
vendor/hunspell/src/parsers/manparser.cxx
vendored
|
@ -1,71 +0,0 @@
|
||||||
#include <cstdlib>
|
|
||||||
#include <cstring>
|
|
||||||
#include <cstdio>
|
|
||||||
#include <ctype.h>
|
|
||||||
|
|
||||||
#include "../hunspell/csutil.hxx"
|
|
||||||
#include "manparser.hxx"
|
|
||||||
|
|
||||||
|
|
||||||
#ifndef W32
|
|
||||||
using namespace std;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
ManParser::ManParser() {
|
|
||||||
}
|
|
||||||
|
|
||||||
ManParser::ManParser(const char * wordchars)
|
|
||||||
{
|
|
||||||
init(wordchars);
|
|
||||||
}
|
|
||||||
|
|
||||||
ManParser::ManParser(unsigned short * wordchars, int len)
|
|
||||||
{
|
|
||||||
init(wordchars, len);
|
|
||||||
}
|
|
||||||
|
|
||||||
ManParser::~ManParser()
|
|
||||||
{
|
|
||||||
}
|
|
||||||
|
|
||||||
char * ManParser::next_token()
|
|
||||||
{
|
|
||||||
for (;;) {
|
|
||||||
switch (state)
|
|
||||||
{
|
|
||||||
case 1: // command arguments
|
|
||||||
if (line[actual][head] == ' ') state = 2;
|
|
||||||
break;
|
|
||||||
case 0: // dot in begin of line
|
|
||||||
if (line[actual][0] == '.') {
|
|
||||||
state = 1;
|
|
||||||
break;
|
|
||||||
} else {
|
|
||||||
state = 2;
|
|
||||||
}
|
|
||||||
// no break
|
|
||||||
case 2: // non word chars
|
|
||||||
if (is_wordchar(line[actual] + head)) {
|
|
||||||
state = 3;
|
|
||||||
token = head;
|
|
||||||
} else if ((line[actual][head] == '\\') &&
|
|
||||||
(line[actual][head + 1] == 'f') &&
|
|
||||||
(line[actual][head + 2] != '\0')) {
|
|
||||||
head += 2;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case 3: // wordchar
|
|
||||||
if (! is_wordchar(line[actual] + head)) {
|
|
||||||
state = 2;
|
|
||||||
char * t = alloc_token(token, &head);
|
|
||||||
if (t) return t;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
if (next_char(line[actual], &head)) {
|
|
||||||
state = 0;
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
38
vendor/hunspell/src/parsers/manparser.hxx
vendored
38
vendor/hunspell/src/parsers/manparser.hxx
vendored
|
@ -1,38 +0,0 @@
|
||||||
/*
|
|
||||||
* parser classes for MySpell
|
|
||||||
*
|
|
||||||
* implemented: text, HTML, TeX
|
|
||||||
*
|
|
||||||
* Copyright (C) 2002, Laszlo Nemeth
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef _MANPARSER_HXX_
|
|
||||||
#define _MANPARSER_HXX_
|
|
||||||
|
|
||||||
#include "textparser.hxx"
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Manparse Parser
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
|
|
||||||
class ManParser : public TextParser
|
|
||||||
{
|
|
||||||
|
|
||||||
protected:
|
|
||||||
|
|
||||||
|
|
||||||
public:
|
|
||||||
|
|
||||||
ManParser();
|
|
||||||
ManParser(const char * wc);
|
|
||||||
ManParser(unsigned short * wordchars, int len);
|
|
||||||
virtual ~ManParser();
|
|
||||||
|
|
||||||
virtual char * next_token();
|
|
||||||
|
|
||||||
};
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
53
vendor/hunspell/src/parsers/testparser.cxx
vendored
53
vendor/hunspell/src/parsers/testparser.cxx
vendored
|
@ -1,53 +0,0 @@
|
||||||
#include <cstring>
|
|
||||||
#include <cstdlib>
|
|
||||||
#include <cstdio>
|
|
||||||
|
|
||||||
#include "textparser.hxx"
|
|
||||||
#include "htmlparser.hxx"
|
|
||||||
#include "latexparser.hxx"
|
|
||||||
|
|
||||||
#ifndef W32
|
|
||||||
using namespace std;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int
|
|
||||||
main(int argc, char** argv)
|
|
||||||
{
|
|
||||||
FILE * f;
|
|
||||||
/* first parse the command line options */
|
|
||||||
|
|
||||||
if (argc < 2) {
|
|
||||||
fprintf(stderr,"correct syntax is:\n");
|
|
||||||
fprintf(stderr,"testparser file\n");
|
|
||||||
fprintf(stderr,"example: testparser /dev/stdin\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* open the words to check list */
|
|
||||||
f = fopen(argv[1],"r");
|
|
||||||
if (!f) {
|
|
||||||
fprintf(stderr,"Error - could not open file of words to check\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
TextParser * p = new LaTeXParser("qwertzuiopasdfghjklyxcvbnméáúõûóüöíQWERTZUIOPASDFGHJKLYXCVBNMÍÉÁÕÚÖÜÓÛ");
|
|
||||||
|
|
||||||
char buf[MAXLNLEN];
|
|
||||||
char * next;
|
|
||||||
|
|
||||||
while(fgets(buf,MAXLNLEN,f)) {
|
|
||||||
fprintf(stdout,"---------------------------------------\n");
|
|
||||||
p->put_line(buf);
|
|
||||||
fprintf(stderr, "x:%s\n", buf);
|
|
||||||
p->set_url_checking(1);
|
|
||||||
while ((next=p->next_token())) {
|
|
||||||
fprintf(stdout,"token: %s\n",next);
|
|
||||||
free(next);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
delete p;
|
|
||||||
fclose(f);
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
291
vendor/hunspell/src/parsers/textparser.cxx
vendored
291
vendor/hunspell/src/parsers/textparser.cxx
vendored
|
@ -1,291 +0,0 @@
|
||||||
#include <cstdlib>
|
|
||||||
#include <cstring>
|
|
||||||
#include <cstdio>
|
|
||||||
#include <ctype.h>
|
|
||||||
|
|
||||||
#include "../hunspell/csutil.hxx"
|
|
||||||
#include "textparser.hxx"
|
|
||||||
|
|
||||||
#ifndef W32
|
|
||||||
using namespace std;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// ISO-8859-1 HTML character entities
|
|
||||||
|
|
||||||
static const char * LATIN1[] = {
|
|
||||||
"À",
|
|
||||||
"Ã",
|
|
||||||
"Å",
|
|
||||||
"Æ",
|
|
||||||
"È",
|
|
||||||
"Ê",
|
|
||||||
"Ì",
|
|
||||||
"Ï",
|
|
||||||
"Ð",
|
|
||||||
"Ñ",
|
|
||||||
"Ò",
|
|
||||||
"Ø",
|
|
||||||
"Ù",
|
|
||||||
"Þ",
|
|
||||||
"à",
|
|
||||||
"ã",
|
|
||||||
"å",
|
|
||||||
"æ",
|
|
||||||
"è",
|
|
||||||
"ê",
|
|
||||||
"ì",
|
|
||||||
"ï",
|
|
||||||
"ð",
|
|
||||||
"ñ",
|
|
||||||
"ò",
|
|
||||||
"ø",
|
|
||||||
"ù",
|
|
||||||
"þ",
|
|
||||||
"ÿ"
|
|
||||||
};
|
|
||||||
|
|
||||||
#define LATIN1_LEN (sizeof(LATIN1) / sizeof(char *))
|
|
||||||
|
|
||||||
TextParser::TextParser() {
|
|
||||||
init((char *) NULL);
|
|
||||||
}
|
|
||||||
|
|
||||||
TextParser::TextParser(const char * wordchars)
|
|
||||||
{
|
|
||||||
init(wordchars);
|
|
||||||
}
|
|
||||||
|
|
||||||
TextParser::TextParser(unsigned short * wordchars, int len)
|
|
||||||
{
|
|
||||||
init(wordchars, len);
|
|
||||||
}
|
|
||||||
|
|
||||||
TextParser::~TextParser()
|
|
||||||
{
|
|
||||||
}
|
|
||||||
|
|
||||||
int TextParser::is_wordchar(char * w)
|
|
||||||
{
|
|
||||||
if (*w == '\0') return 0;
|
|
||||||
if (utf8) {
|
|
||||||
w_char wc;
|
|
||||||
unsigned short idx;
|
|
||||||
u8_u16(&wc, 1, w);
|
|
||||||
idx = (wc.h << 8) + wc.l;
|
|
||||||
return (unicodeisalpha(idx) || (wordchars_utf16 && flag_bsearch(wordchars_utf16, *((unsigned short *) &wc), wclen)));
|
|
||||||
} else {
|
|
||||||
return wordcharacters[(*w + 256) % 256];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
const char * TextParser::get_latin1(char * s)
|
|
||||||
{
|
|
||||||
if (s[0] == '&') {
|
|
||||||
unsigned int i = 0;
|
|
||||||
while ((i < LATIN1_LEN) &&
|
|
||||||
strncmp(LATIN1[i], s, strlen(LATIN1[i]))) i++;
|
|
||||||
if (i != LATIN1_LEN) return LATIN1[i];
|
|
||||||
}
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
void TextParser::init(const char * wordchars)
|
|
||||||
{
|
|
||||||
for (int i = 0; i < MAXPREVLINE; i++) {
|
|
||||||
line[i][0] = '\0';
|
|
||||||
}
|
|
||||||
actual = 0;
|
|
||||||
head = 0;
|
|
||||||
token = 0;
|
|
||||||
state = 0;
|
|
||||||
utf8 = 0;
|
|
||||||
checkurl = 0;
|
|
||||||
unsigned int j;
|
|
||||||
for (j = 0; j < 256; j++) {
|
|
||||||
wordcharacters[j] = 0;
|
|
||||||
}
|
|
||||||
if (!wordchars) wordchars = "qwertzuiopasdfghjklyxcvbnmQWERTZUIOPASDFGHJKLYXCVBNM";
|
|
||||||
for (j = 0; j < strlen(wordchars); j++) {
|
|
||||||
wordcharacters[(wordchars[j] + 256) % 256] = 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void TextParser::init(unsigned short * wc, int len)
|
|
||||||
{
|
|
||||||
for (int i = 0; i < MAXPREVLINE; i++) {
|
|
||||||
line[i][0] = '\0';
|
|
||||||
}
|
|
||||||
actual = 0;
|
|
||||||
head = 0;
|
|
||||||
token = 0;
|
|
||||||
state = 0;
|
|
||||||
utf8 = 1;
|
|
||||||
checkurl = 0;
|
|
||||||
wordchars_utf16 = wc;
|
|
||||||
wclen = len;
|
|
||||||
}
|
|
||||||
|
|
||||||
int TextParser::next_char(char * line, int * pos) {
|
|
||||||
if (*(line + *pos) == '\0') return 1;
|
|
||||||
if (utf8) {
|
|
||||||
if (*(line + *pos) >> 7) {
|
|
||||||
// jump to next UTF-8 character
|
|
||||||
for((*pos)++; (*(line + *pos) & 0xc0) == 0x80; (*pos)++);
|
|
||||||
} else {
|
|
||||||
(*pos)++;
|
|
||||||
}
|
|
||||||
} else (*pos)++;
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
void TextParser::put_line(char * word)
|
|
||||||
{
|
|
||||||
actual = (actual + 1) % MAXPREVLINE;
|
|
||||||
strcpy(line[actual], word);
|
|
||||||
token = 0;
|
|
||||||
head = 0;
|
|
||||||
check_urls();
|
|
||||||
}
|
|
||||||
|
|
||||||
char * TextParser::get_prevline(int n)
|
|
||||||
{
|
|
||||||
return mystrdup(line[(actual + MAXPREVLINE - n) % MAXPREVLINE]);
|
|
||||||
}
|
|
||||||
|
|
||||||
char * TextParser::get_line()
|
|
||||||
{
|
|
||||||
return get_prevline(0);
|
|
||||||
}
|
|
||||||
|
|
||||||
char * TextParser::next_token()
|
|
||||||
{
|
|
||||||
const char * latin1;
|
|
||||||
|
|
||||||
for (;;) {
|
|
||||||
switch (state)
|
|
||||||
{
|
|
||||||
case 0: // non word chars
|
|
||||||
if (is_wordchar(line[actual] + head)) {
|
|
||||||
state = 1;
|
|
||||||
token = head;
|
|
||||||
} else if ((latin1 = get_latin1(line[actual] + head))) {
|
|
||||||
state = 1;
|
|
||||||
token = head;
|
|
||||||
head += strlen(latin1);
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case 1: // wordchar
|
|
||||||
if ((latin1 = get_latin1(line[actual] + head))) {
|
|
||||||
head += strlen(latin1);
|
|
||||||
} else if (! is_wordchar(line[actual] + head)) {
|
|
||||||
state = 0;
|
|
||||||
char * t = alloc_token(token, &head);
|
|
||||||
if (t) return t;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
if (next_char(line[actual], &head)) return NULL;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
int TextParser::get_tokenpos()
|
|
||||||
{
|
|
||||||
return token;
|
|
||||||
}
|
|
||||||
|
|
||||||
int TextParser::change_token(const char * word)
|
|
||||||
{
|
|
||||||
if (word) {
|
|
||||||
char * r = mystrdup(line[actual] + head);
|
|
||||||
strcpy(line[actual] + token, word);
|
|
||||||
strcat(line[actual], r);
|
|
||||||
head = token;
|
|
||||||
free(r);
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
void TextParser::check_urls()
|
|
||||||
{
|
|
||||||
int url_state = 0;
|
|
||||||
int url_head = 0;
|
|
||||||
int url_token = 0;
|
|
||||||
int url = 0;
|
|
||||||
for (;;) {
|
|
||||||
switch (url_state)
|
|
||||||
{
|
|
||||||
case 0: // non word chars
|
|
||||||
if (is_wordchar(line[actual] + url_head)) {
|
|
||||||
url_state = 1;
|
|
||||||
url_token = url_head;
|
|
||||||
// Unix path
|
|
||||||
} else if (*(line[actual] + url_head) == '/') {
|
|
||||||
url_state = 1;
|
|
||||||
url_token = url_head;
|
|
||||||
url = 1;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case 1: // wordchar
|
|
||||||
char ch = *(line[actual] + url_head);
|
|
||||||
// e-mail address
|
|
||||||
if ((ch == '@') ||
|
|
||||||
// MS-DOS, Windows path
|
|
||||||
(strncmp(line[actual] + url_head, ":\\", 2) == 0) ||
|
|
||||||
// URL
|
|
||||||
(strncmp(line[actual] + url_head, "://", 3) == 0)) {
|
|
||||||
url = 1;
|
|
||||||
} else if (! (is_wordchar(line[actual] + url_head) ||
|
|
||||||
(ch == '-') || (ch == '_') || (ch == '\\') ||
|
|
||||||
(ch == '.') || (ch == ':') || (ch == '/') ||
|
|
||||||
(ch == '~') || (ch == '%') || (ch == '*') ||
|
|
||||||
(ch == '$') || (ch == '[') || (ch == ']') ||
|
|
||||||
(ch == '?') || (ch == '!') ||
|
|
||||||
((ch >= '0') && (ch <= '9')))) {
|
|
||||||
url_state = 0;
|
|
||||||
if (url == 1) {
|
|
||||||
for (int i = url_token; i < url_head; i++) {
|
|
||||||
*(urlline + i) = 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
url = 0;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
*(urlline + url_head) = 0;
|
|
||||||
if (next_char(line[actual], &url_head)) return;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
int TextParser::get_url(int token_pos, int * head)
|
|
||||||
{
|
|
||||||
for (int i = *head; urlline[i] && *(line[actual]+i); i++, (*head)++);
|
|
||||||
return checkurl ? 0 : urlline[token_pos];
|
|
||||||
}
|
|
||||||
|
|
||||||
void TextParser::set_url_checking(int check)
|
|
||||||
{
|
|
||||||
checkurl = check;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
char * TextParser::alloc_token(int token, int * head)
|
|
||||||
{
|
|
||||||
if (get_url(token, head)) return NULL;
|
|
||||||
char * t = (char *) malloc(*head - token + 1);
|
|
||||||
if (t) {
|
|
||||||
t[*head - token] = '\0';
|
|
||||||
strncpy(t, line[actual] + token, *head - token);
|
|
||||||
// remove colon for Finnish and Swedish language
|
|
||||||
if (t[*head - token - 1] == ':') {
|
|
||||||
t[*head - token - 1] = '\0';
|
|
||||||
if (!t[0]) {
|
|
||||||
free(t);
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return t;
|
|
||||||
}
|
|
||||||
fprintf(stderr,"Error - Insufficient Memory\n");
|
|
||||||
return NULL;
|
|
||||||
}
|
|
69
vendor/hunspell/src/parsers/textparser.hxx
vendored
69
vendor/hunspell/src/parsers/textparser.hxx
vendored
|
@ -1,69 +0,0 @@
|
||||||
/*
|
|
||||||
* parser classes for MySpell
|
|
||||||
*
|
|
||||||
* implemented: text, HTML, TeX
|
|
||||||
*
|
|
||||||
* Copyright (C) 2002, Laszlo Nemeth
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef _TEXTPARSER_HXX_
|
|
||||||
#define _TEXTPARSER_HXX_
|
|
||||||
|
|
||||||
// set sum of actual and previous lines
|
|
||||||
#define MAXPREVLINE 4
|
|
||||||
|
|
||||||
#ifndef MAXLNLEN
|
|
||||||
#define MAXLNLEN 8192
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Base Text Parser
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
|
|
||||||
class TextParser
|
|
||||||
{
|
|
||||||
|
|
||||||
protected:
|
|
||||||
void init(const char *);
|
|
||||||
void init(unsigned short * wordchars, int len);
|
|
||||||
int wordcharacters[256]; // for detection of the word boundaries
|
|
||||||
char line[MAXPREVLINE][MAXLNLEN]; // parsed and previous lines
|
|
||||||
char urlline[MAXLNLEN]; // mask for url detection
|
|
||||||
int checkurl;
|
|
||||||
int actual; // actual line
|
|
||||||
int head; // head position
|
|
||||||
int token; // begin of token
|
|
||||||
int state; // state of automata
|
|
||||||
int utf8; // UTF-8 character encoding
|
|
||||||
int next_char(char * line, int * pos);
|
|
||||||
unsigned short * wordchars_utf16;
|
|
||||||
int wclen;
|
|
||||||
|
|
||||||
public:
|
|
||||||
|
|
||||||
TextParser();
|
|
||||||
TextParser(unsigned short * wordchars, int len);
|
|
||||||
TextParser(const char * wc);
|
|
||||||
virtual ~TextParser();
|
|
||||||
|
|
||||||
void put_line(char * line);
|
|
||||||
char * get_line();
|
|
||||||
char * get_prevline(int n);
|
|
||||||
virtual char * next_token();
|
|
||||||
int change_token(const char * word);
|
|
||||||
void set_url_checking(int check);
|
|
||||||
|
|
||||||
int get_tokenpos();
|
|
||||||
int is_wordchar(char * w);
|
|
||||||
const char * get_latin1(char * s);
|
|
||||||
char * next_char();
|
|
||||||
int tokenize_urls();
|
|
||||||
void check_urls();
|
|
||||||
int get_url(int token_pos, int * head);
|
|
||||||
char * alloc_token(int token, int * head);
|
|
||||||
};
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
32
vendor/hunspell/src/win_api/Hunspell.rc
vendored
32
vendor/hunspell/src/win_api/Hunspell.rc
vendored
|
@ -1,32 +0,0 @@
|
||||||
|
|
||||||
#include <windows.h>
|
|
||||||
|
|
||||||
VS_VERSION_INFO VERSIONINFO
|
|
||||||
FILEVERSION 1,3,1,0
|
|
||||||
PRODUCTVERSION 1,3,1,0
|
|
||||||
FILEFLAGSMASK 0x17L
|
|
||||||
FILEFLAGS 0
|
|
||||||
FILEOS VOS_NT_WINDOWS32
|
|
||||||
FILETYPE VFT_APP
|
|
||||||
FILESUBTYPE VFT2_UNKNOWN
|
|
||||||
BEGIN
|
|
||||||
BLOCK "VarFileInfo"
|
|
||||||
BEGIN
|
|
||||||
VALUE "Translation", 0x409, 1200
|
|
||||||
END
|
|
||||||
BLOCK "StringFileInfo"
|
|
||||||
BEGIN
|
|
||||||
BLOCK "040904b0"
|
|
||||||
BEGIN
|
|
||||||
VALUE "Comments", "Hunspell (http://hunspell.sourceforge.net/) by László Németh"
|
|
||||||
VALUE "CompanyName", "http://hunspell.sourceforge.net/"
|
|
||||||
VALUE "FileDescription", "libhunspell"
|
|
||||||
VALUE "FileVersion", "1.3.2"
|
|
||||||
VALUE "InternalName", "libhunspell"
|
|
||||||
VALUE "LegalCopyright", "Copyright (c) 2007-2011"
|
|
||||||
VALUE "OriginalFilename", "libhunspell.dll"
|
|
||||||
VALUE "ProductName", "Hunspell Dynamic Link Library"
|
|
||||||
VALUE "ProductVersion", "1.3.2"
|
|
||||||
END
|
|
||||||
END
|
|
||||||
END
|
|
4
vendor/hunspell/src/win_api/config.h
vendored
4
vendor/hunspell/src/win_api/config.h
vendored
|
@ -204,5 +204,5 @@
|
||||||
#define PACKAGE_TARNAME
|
#define PACKAGE_TARNAME
|
||||||
|
|
||||||
/* Define to the version of this package. */
|
/* Define to the version of this package. */
|
||||||
#define PACKAGE_VERSION "1.3.2"
|
#define PACKAGE_VERSION "1.3.3"
|
||||||
#define VERSION "1.3.2"
|
#define VERSION "1.3.3"
|
||||||
|
|
126
vendor/hunspell/src/win_api/hunspelldll.c
vendored
126
vendor/hunspell/src/win_api/hunspelldll.c
vendored
|
@ -1,126 +0,0 @@
|
||||||
/* ***** BEGIN LICENSE BLOCK *****
|
|
||||||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
|
||||||
*
|
|
||||||
* The contents of this file are subject to the Mozilla Public License Version
|
|
||||||
* 1.1 (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
* http://www.mozilla.org/MPL/
|
|
||||||
*
|
|
||||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
|
||||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
|
||||||
* for the specific language governing rights and limitations under the
|
|
||||||
* License.
|
|
||||||
*
|
|
||||||
* Copyright (C) 2006
|
|
||||||
* Miha Vrhovnik (http://simail.sf.net, http://xcollect.sf.net)
|
|
||||||
* All Rights Reserved.
|
|
||||||
*
|
|
||||||
* Contributor(s):
|
|
||||||
*
|
|
||||||
*
|
|
||||||
* Alternatively, the contents of this file may be used under the terms of
|
|
||||||
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
|
||||||
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
|
||||||
* in which case the provisions of the GPL or the LGPL are applicable instead
|
|
||||||
* of those above. If you wish to allow use of your version of this file only
|
|
||||||
* under the terms of either the GPL or the LGPL, and not to allow others to
|
|
||||||
* use your version of this file under the terms of the MPL, indicate your
|
|
||||||
* decision by deleting the provisions above and replace them with the notice
|
|
||||||
* and other provisions required by the GPL or the LGPL. If you do not delete
|
|
||||||
* the provisions above, a recipient may use your version of this file under
|
|
||||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
|
||||||
*
|
|
||||||
* ***** END LICENSE BLOCK ***** **/
|
|
||||||
#include "hunspelldll.h"
|
|
||||||
#include <windows.h>
|
|
||||||
|
|
||||||
#include <stdio.h>
|
|
||||||
#include <stdlib.h>
|
|
||||||
#include <cstring>
|
|
||||||
#include <cstdlib>
|
|
||||||
#include <cstdio>
|
|
||||||
|
|
||||||
LIBHUNSPELL_DLL_EXPORTED void * hunspell_initialize(char *aff_file, char *dict_file)
|
|
||||||
{
|
|
||||||
Hunspell * pMS = new Hunspell(aff_file, dict_file);
|
|
||||||
return pMS;
|
|
||||||
}
|
|
||||||
|
|
||||||
LIBHUNSPELL_DLL_EXPORTED void * hunspell_initialize_key(char *aff_file, char *dict_file, char * key)
|
|
||||||
{
|
|
||||||
Hunspell * pMS = new Hunspell(aff_file, dict_file, key);
|
|
||||||
return pMS;
|
|
||||||
}
|
|
||||||
|
|
||||||
LIBHUNSPELL_DLL_EXPORTED void hunspell_uninitialize(Hunspell *pMS)
|
|
||||||
{
|
|
||||||
delete pMS;
|
|
||||||
}
|
|
||||||
|
|
||||||
LIBHUNSPELL_DLL_EXPORTED int hunspell_spell(Hunspell *pMS, char *word)
|
|
||||||
{
|
|
||||||
return pMS->spell(word);
|
|
||||||
}
|
|
||||||
|
|
||||||
LIBHUNSPELL_DLL_EXPORTED int hunspell_suggest(Hunspell *pMS, char *word, char ***slst)
|
|
||||||
{
|
|
||||||
return pMS->suggest(slst, word);
|
|
||||||
}
|
|
||||||
|
|
||||||
#ifdef HUNSPELL_EXPERIMENTAL
|
|
||||||
LIBHUNSPELL_DLL_EXPORTED int hunspell_suggest_auto(Hunspell *pMS, char *word, char ***slst)
|
|
||||||
{
|
|
||||||
return pMS->suggest_auto(slst, word);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
LIBHUNSPELL_DLL_EXPORTED void hunspell_free_list(Hunspell *pMS, char ***slst, int len)
|
|
||||||
{
|
|
||||||
pMS->free_list(slst, len);
|
|
||||||
}
|
|
||||||
|
|
||||||
// deprecated (use hunspell_free_list)
|
|
||||||
LIBHUNSPELL_DLL_EXPORTED void hunspell_suggest_free(Hunspell *pMS, char **slst, int len)
|
|
||||||
{
|
|
||||||
for (int i = 0; i < len; i++) {
|
|
||||||
free(slst[i]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
LIBHUNSPELL_DLL_EXPORTED char * hunspell_get_dic_encoding(Hunspell *pMS)
|
|
||||||
{
|
|
||||||
return pMS->get_dic_encoding();
|
|
||||||
}
|
|
||||||
|
|
||||||
LIBHUNSPELL_DLL_EXPORTED int hunspell_add(Hunspell *pMS, char *word)
|
|
||||||
{
|
|
||||||
return pMS->add(word);
|
|
||||||
}
|
|
||||||
|
|
||||||
LIBHUNSPELL_DLL_EXPORTED int hunspell_add_with_affix(Hunspell *pMS, char *word, char *modelword)
|
|
||||||
{
|
|
||||||
return pMS->add_with_affix(word, modelword);
|
|
||||||
}
|
|
||||||
|
|
||||||
BOOL APIENTRY DllMain (HINSTANCE hInst /* Library instance handle. */ ,
|
|
||||||
DWORD reason /* Reason this function is being called. */ ,
|
|
||||||
LPVOID reserved /* Not used. */ )
|
|
||||||
{
|
|
||||||
switch (reason)
|
|
||||||
{
|
|
||||||
case DLL_PROCESS_ATTACH:
|
|
||||||
break;
|
|
||||||
|
|
||||||
case DLL_PROCESS_DETACH:
|
|
||||||
break;
|
|
||||||
|
|
||||||
case DLL_THREAD_ATTACH:
|
|
||||||
break;
|
|
||||||
|
|
||||||
case DLL_THREAD_DETACH:
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Returns TRUE on success, FALSE on failure */
|
|
||||||
return TRUE;
|
|
||||||
}
|
|
68
vendor/hunspell/src/win_api/hunspelldll.h
vendored
68
vendor/hunspell/src/win_api/hunspelldll.h
vendored
|
@ -1,68 +0,0 @@
|
||||||
/* ***** BEGIN LICENSE BLOCK *****
|
|
||||||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
|
||||||
*
|
|
||||||
* The contents of this file are subject to the Mozilla Public License Version
|
|
||||||
* 1.1 (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
* http://www.mozilla.org/MPL/
|
|
||||||
*
|
|
||||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
|
||||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
|
||||||
* for the specific language governing rights and limitations under the
|
|
||||||
* License.
|
|
||||||
*
|
|
||||||
* Copyright (C) 2006
|
|
||||||
* Miha Vrhovnik (http://simail.sf.net, http://xcollect.sf.net)
|
|
||||||
* All Rights Reserved.
|
|
||||||
*
|
|
||||||
* Contributor(s):
|
|
||||||
*
|
|
||||||
*
|
|
||||||
* Alternatively, the contents of this file may be used under the terms of
|
|
||||||
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
|
||||||
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
|
||||||
* in which case the provisions of the GPL or the LGPL are applicable instead
|
|
||||||
* of those above. If you wish to allow use of your version of this file only
|
|
||||||
* under the terms of either the GPL or the LGPL, and not to allow others to
|
|
||||||
* use your version of this file under the terms of the MPL, indicate your
|
|
||||||
* decision by deleting the provisions above and replace them with the notice
|
|
||||||
* and other provisions required by the GPL or the LGPL. If you do not delete
|
|
||||||
* the provisions above, a recipient may use your version of this file under
|
|
||||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
|
||||||
*
|
|
||||||
* ***** END LICENSE BLOCK ***** **/
|
|
||||||
#include "hunspell.hxx"
|
|
||||||
|
|
||||||
#ifndef _DLL_H_
|
|
||||||
#define _DLL_H_
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
extern "C" {
|
|
||||||
#endif
|
|
||||||
|
|
||||||
//returns pointer to spell object, params are aff file name and dict file name
|
|
||||||
LIBHUNSPELL_DLL_EXPORTED void *hunspell_initialize(char *aff_file, char *dict_file);
|
|
||||||
//frees spell object
|
|
||||||
LIBHUNSPELL_DLL_EXPORTED void hunspell_uninitialize(Hunspell *pMS);
|
|
||||||
//spellcheck word, returns 1 if word ok otherwise 0
|
|
||||||
LIBHUNSPELL_DLL_EXPORTED int hunspell_spell(Hunspell *pMS, char *word);
|
|
||||||
//suggest words for word, returns number of words in slst
|
|
||||||
// YOU NEED TO CALL hunspell_suggest_free after you've done with words
|
|
||||||
LIBHUNSPELL_DLL_EXPORTED int hunspell_suggest(Hunspell *pMS, char *word, char ***slst);
|
|
||||||
LIBHUNSPELL_DLL_EXPORTED int hunspell_suggest_auto(Hunspell *pMS, char *word, char ***slst);
|
|
||||||
//free slst array
|
|
||||||
LIBHUNSPELL_DLL_EXPORTED void hunspell_free_list(Hunspell *pMS, char ***slst, int len);
|
|
||||||
// deprecated (use hunspell_free_list)
|
|
||||||
LIBHUNSPELL_DLL_EXPORTED void hunspell_suggest_free(Hunspell *pMS, char **slst, int len);
|
|
||||||
//make local copy of returned string!!
|
|
||||||
LIBHUNSPELL_DLL_EXPORTED char * hunspell_get_dic_encoding(Hunspell *pMS);
|
|
||||||
//add word to dict (word is valid until spell object is not destroyed)
|
|
||||||
LIBHUNSPELL_DLL_EXPORTED int hunspell_add(Hunspell *pMS, char *word);
|
|
||||||
//add word to dict with affixes of the modelword (word is valid until spell object is not destroyed)
|
|
||||||
LIBHUNSPELL_DLL_EXPORTED int hunspell_add_with_affix(Hunspell *pMS, char *word, char *modelword);
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#endif /* _DLL_H_ */
|
|
Loading…
Reference in a new issue