meson: add Hunspell port

This moves the visibility logic (HUNSPELL_STATIC/BUILDING_LIBHUNSPELL)
to the build system. Only relevant on Windows.
This commit is contained in:
Myaamori 2021-04-07 12:29:47 +01:00 committed by Ryan Lucia
parent 0f95589d0a
commit c43e4fba3c
59 changed files with 42 additions and 45002 deletions

View file

@ -183,7 +183,7 @@ foreach dep: [
['ffms2', '>=2.22', 'FFMS2', ['ffms2', 'ffms2_dep']],
# other
['fftw3', '', 'FFTW3', []],
['hunspell', '', 'Hunspell', []], # needs a proper port
['hunspell', '', 'Hunspell', ['hunspell', 'hunspell_dep']],
['uchardet', '', 'uchardet', []], # needs a proper port
dep_version = dep[1] != '' ? dep[1] : '>=0'

View file

@ -29,7 +29,6 @@
#include <boost/range/algorithm.hpp>
#undef near
#include <hunspell/hunspell.hxx>

View file

@ -0,0 +1,9 @@
directory = hunspell-1.7.0
source_url =
source_filename = hunspell-1.7.0.tar.gz
source_hash = 57be4e03ae9dd62c3471f667a0d81a14513e314d4d92081292b90435944ff951
patch_directory = hunspell
hunspell = hunspell_dep

File diff suppressed because it is too large Load diff

@ -1,5 +0,0 @@
View file

View file

@ -1,12 +0,0 @@
View file

Electronic Distribution Mechanism to anyone to whom you made an
View file

`config.sub' isn't included in this package, then this package doesn't
View file

@ -1,182 +0,0 @@
Kevin Hendricks
View file

View file

@ -1,55 +0,0 @@
View file

View file

View file

View file

char in_compound = IN_CPD_NOT);
struct hentry * prefix_check(const char * word, int len,
char in_compound, const FLAG needflag = FLAG_NULL);
inline int isSubset(const char * s1, const char * s2);
struct hentry * prefix_check_twosfx(const char * word, int len,
char in_compound, const FLAG needflag = FLAG_NULL);
inline int isRevSubset(const char * s1, const char * end_of_s2, int len);
struct hentry * suffix_check(const char * word, int len, int sfxopts,
PfxEntry* ppfx, char ** wlst, int maxSug, int * ns,
const FLAG cclass = FLAG_NULL, const FLAG needflag = FLAG_NULL,
char in_compound = IN_CPD_NOT);
struct hentry * suffix_check_twosfx(const char * word, int len,
int sfxopts, PfxEntry* ppfx, const FLAG needflag = FLAG_NULL);
char * affix_check_morph(const char * word, int len,
const FLAG needflag = FLAG_NULL, char in_compound = IN_CPD_NOT);
char * prefix_check_morph(const char * word, int len,
char in_compound, const FLAG needflag = FLAG_NULL);
char * suffix_check_morph (const char * word, int len, int sfxopts,
PfxEntry * ppfx, const FLAG cclass = FLAG_NULL,
const FLAG needflag = FLAG_NULL, char in_compound = IN_CPD_NOT);
char * prefix_check_twosfx_morph(const char * word, int len,
char in_compound, const FLAG needflag = FLAG_NULL);
char * suffix_check_twosfx_morph(const char * word, int len,
int sfxopts, PfxEntry * ppfx, const FLAG needflag = FLAG_NULL);
char * morphgen(char * ts, int wl, const unsigned short * ap,
unsigned short al, char * morph, char * targetmorph, int level);
int expand_rootword(struct guessword * wlst, int maxn, const char * ts,
int wl, const unsigned short * ap, unsigned short al, char * bad,
int, char *);
short get_syllable (const char * word, int wlen);
int cpdrep_check(const char * word, int len);
int cpdpat_check(const char * word, int len, hentry * r1, hentry * r2,
const char affixed);
int defcpd_check(hentry *** words, short wnum, hentry * rv,
hentry ** rwords, char all);
int cpdcase_check(const char * word, int len);
inline int candidate_check(const char * word, int len);
void setcminmax(int * cmin, int * cmax, const char * word, int len);
struct hentry * compound_check(const char * word, int len, short wordnum,
short numsyllable, short maxwordnum, short wnum, hentry ** words,
char hu_mov_rule, char is_sug, int * info);
int compound_check_morph(const char * word, int len, short wordnum,
short numsyllable, short maxwordnum, short wnum, hentry ** words,
char hu_mov_rule, char ** result, char * partresult);
struct hentry * lookup(const char * word);
int get_numrep() const;
struct replentry * get_reptable() const;
RepList * get_iconvtable() const;
RepList * get_oconvtable() const;
struct phonetable * get_phonetable() const;
int get_nummap() const;
struct mapentry * get_maptable() const;
int get_numbreak() const;
char ** get_breaktable() const;
char * get_encoding();
int get_langnum() const;
char * get_key_string();
char * get_try_string() const;
const char * get_wordchars() const;
unsigned short * get_wordchars_utf16(int * len) const;
char * get_ignore() const;
unsigned short * get_ignore_utf16(int * len) const;
int get_compound() const;
FLAG get_compoundflag() const;
FLAG get_compoundbegin() const;
FLAG get_forbiddenword() const;
FLAG get_nosuggest() const;
FLAG get_nongramsuggest() const;
FLAG get_needaffix() const;
FLAG get_onlyincompound() const;
FLAG get_compoundroot() const;
FLAG get_lemma_present() const;
int get_checknum() const;
const char * get_prefix() const;
const char * get_suffix() const;
const char * get_derived() const;
const char * get_version() const;
int have_contclass() const;
int get_utf8() const;
int get_complexprefixes() const;
char * get_suffixed(char ) const;
int get_maxngramsugs() const;
int get_maxcpdsugs() const;
int get_maxdiff() const;
int get_onlymaxdiff() const;
int get_nosplitsugs() const;
int get_sugswithdots(void) const;
FLAG get_keepcase(void) const;
FLAG get_forceucase(void) const;
FLAG get_warn(void) const;
int get_forbidwarn(void) const;
int get_checksharps(void) const;
char * encode_flag(unsigned short aflag) const;
int get_fullstrip() const;
int parse_file(const char * affpath, const char * key);
int parse_flag(char * line, unsigned short * out, FileMgr * af);
int parse_num(char * line, int * out, FileMgr * af);
int parse_cpdsyllable(char * line, FileMgr * af);
int parse_reptable(char * line, FileMgr * af);
int parse_convtable(char * line, FileMgr * af, RepList ** rl, const char * keyword);
int parse_phonetable(char * line, FileMgr * af);
int parse_maptable(char * line, FileMgr * af);
int parse_breaktable(char * line, FileMgr * af);
int parse_checkcpdtable(char * line, FileMgr * af);
int parse_defcpdtable(char * line, FileMgr * af);
int parse_affix(char * line, const char at, FileMgr * af, char * dupflags);
void reverse_condition(char *);
void debugflag(char * result, unsigned short flag);
int condlen(char *);
int encodeit(affentry &entry, char * cs);
int build_pfxtree(PfxEntry* pfxptr);
int build_sfxtree(SfxEntry* sfxptr);
int process_pfx_order();
int process_sfx_order();
PfxEntry * process_pfx_in_order(PfxEntry * ptr, PfxEntry * nptr);
SfxEntry * process_sfx_in_order(SfxEntry * ptr, SfxEntry * nptr);
int process_pfx_tree_to_list();
int process_sfx_tree_to_list();
int redundant_condition(char, char * strip, int stripl,
const char * cond, int);
void finishFileMgr(FileMgr *afflst);

#ifndef _ATYPES_HXX_
#define _ATYPES_HXX_
// HUNSTEM def.
#define HUNSTEM
#include "hashmgr.hxx"
#include "w_char.hxx"
#define SETSIZE 256
#define CONTSIZE 65536
#define MAXWORDLEN 100
#define MAXWORDUTF8LEN 256
// affentry options
#define aeXPRODUCT (1 << 0)
#define aeUTF8 (1 << 1)
#define aeALIASF (1 << 2)
#define aeALIASM (1 << 3)
#define aeLONGCOND (1 << 4)
// compound options
#define IN_CPD_NOT 0
#define IN_CPD_BEGIN 1
#define IN_CPD_END 2
#define IN_CPD_OTHER 3
// info options
#define SPELL_COMPOUND (1 << 0)
#define SPELL_FORBIDDEN (1 << 1)
#define SPELL_ALLCAP (1 << 2)
#define SPELL_NOCAP (1 << 3)
#define SPELL_INITCAP (1 << 4)
#define SPELL_ORIGCAP (1 << 5)
#define SPELL_WARN (1 << 6)
#define MAXLNLEN 8192
#define MINCPDLEN 3
#define MAXCOMPOUND 10
#define MAXCONDLEN 20
#define MAXCONDLEN_1 (MAXCONDLEN - sizeof(char *))
#define MAXACC 1000
#define FLAG unsigned short
#define FLAG_NULL 0x00
#define FREE_FLAG(a) a = 0
#define TESTAFF( a, b , c ) (flag_bsearch((unsigned short *) a, (unsigned short) b, c))
struct affentry
char * strip;
char * appnd;
unsigned char stripl;
unsigned char appndl;
char numconds;
char opts;
unsigned short aflag;
unsigned short * contclass;
short contclasslen;
union {
char conds[MAXCONDLEN];
struct {
char conds1[MAXCONDLEN_1];
char * conds2;
} l;
} c;
char * morphcode;
struct guessword {
char * word;
bool allow;
char * orig;
struct mapentry {
char ** set;
int len;
struct flagentry {
FLAG * def;
int len;
struct patentry {
char * pattern;
char * pattern2;
char * pattern3;
FLAG cond;
FLAG cond2;

#ifndef _BASEAFF_HXX_
#define _BASEAFF_HXX_
#include "hunvisapi.h"
AffEntry(const AffEntry&);
AffEntry& operator = (const AffEntry&);
AffEntry() {}
char * appnd;
char * strip;
unsigned char appndl;
unsigned char stripl;
char numconds;
char opts;
unsigned short aflag;
union {
char conds[MAXCONDLEN];
struct {
char conds1[MAXCONDLEN_1];
char * conds2;
} l;
} c;
char * morphcode;
unsigned short * contclass;
short contclasslen;

#ifndef __CSUTILHXX__
#define __CSUTILHXX__
#include "hunvisapi.h"
// First some base level utility routines
#include <string.h>
#include "w_char.hxx"
#include "htypes.hxx"
#include "nscore.h" // for mozalloc headers
// casing
#define NOCAP 0
#define INITCAP 1
#define ALLCAP 2
#define HUHCAP 3
#define HUHINITCAP 4
// default encoding and keystring
#define SPELL_ENCODING "ISO8859-1"
#define SPELL_KEYSTRING "qwertyuiop|asdfghjkl|zxcvbnm"
// default morphological fields
#define MORPH_STEM "st:"
#define MORPH_ALLOMORPH "al:"
#define MORPH_POS "po:"
#define MORPH_DERI_PFX "dp:"
#define MORPH_INFL_PFX "ip:"
#define MORPH_TERM_PFX "tp:"
#define MORPH_DERI_SFX "ds:"
#define MORPH_INFL_SFX "is:"
#define MORPH_TERM_SFX "ts:"
#define MORPH_SURF_PFX "sp:"
#define MORPH_FREQ "fr:"
#define MORPH_PHON "ph:"
#define MORPH_HYPH "hy:"
#define MORPH_PART "pa:"
#define MORPH_FLAG "fl:"
#define MORPH_HENTRY "_H:"
#define MORPH_TAG_LEN strlen(MORPH_STEM)
#define MSEP_FLD ' '
#define MSEP_REC '\n'
#define MSEP_ALT '\v'
// default flags
#define DEFAULTFLAGS 65510
#define FORBIDDENWORD 65510
#define ONLYUPCASEFLAG 65511
// fopen or optional _wfopen to fix long pathname problem of WIN32
LIBHUNSPELL_DLL_EXPORTED FILE * myfopen(const char * path, const char * mode);
// convert UTF-16 characters to UTF-8
LIBHUNSPELL_DLL_EXPORTED char * u16_u8(char * dest, int size, const w_char * src, int srclen);
// convert UTF-8 characters to UTF-16
LIBHUNSPELL_DLL_EXPORTED int u8_u16(w_char * dest, int size, const char * src);
// sort 2-byte vector
LIBHUNSPELL_DLL_EXPORTED void flag_qsort(unsigned short flags[], int begin, int end);
// binary search in 2-byte vector
LIBHUNSPELL_DLL_EXPORTED int flag_bsearch(unsigned short flags[], unsigned short flag, int right);
// remove end of line char(s)
LIBHUNSPELL_DLL_EXPORTED void mychomp(char * s);
LIBHUNSPELL_DLL_EXPORTED char * mystrcat(char * dest, const char * st, int max);
// duplicate reverse of string
LIBHUNSPELL_DLL_EXPORTED char * myrevstrdup(const char * s);
// parse into tokens with char delimiter
LIBHUNSPELL_DLL_EXPORTED char * mystrsep(char ** sptr, const char delim);
// parse into tokens with char delimiter
LIBHUNSPELL_DLL_EXPORTED char * mystrsep2(char ** sptr, const char delim);
// parse into tokens with char delimiter
LIBHUNSPELL_DLL_EXPORTED char * mystrrep(char *, const char *, const char *);
// append s to ends of every lines in text
LIBHUNSPELL_DLL_EXPORTED void strlinecat(char * lines, const char * s);
// tokenize into lines with new line
LIBHUNSPELL_DLL_EXPORTED int line_tok(const char * text, char *** lines, char breakchar);
// tokenize into lines with new line and uniq in place
LIBHUNSPELL_DLL_EXPORTED char * line_uniq(char * text, char breakchar);
LIBHUNSPELL_DLL_EXPORTED char * line_uniq_app(char ** text, char breakchar);
// change oldchar to newchar in place
LIBHUNSPELL_DLL_EXPORTED char * tr(char * text, char oldc, char newc);
// reverse word
LIBHUNSPELL_DLL_EXPORTED int reverseword(char *);
// reverse word
LIBHUNSPELL_DLL_EXPORTED int reverseword_utf(char *);
// remove duplicates
LIBHUNSPELL_DLL_EXPORTED int uniqlist(char ** list, int n);
// free character array list
LIBHUNSPELL_DLL_EXPORTED void freelist(char *** list, int n);
// character encoding information
struct cs_info {
unsigned char ccase;
unsigned char clower;
unsigned char cupper;
LIBHUNSPELL_DLL_EXPORTED int initialize_utf_tbl();
LIBHUNSPELL_DLL_EXPORTED void free_utf_tbl();
LIBHUNSPELL_DLL_EXPORTED unsigned short unicodetoupper(unsigned short c, int langnum);
LIBHUNSPELL_DLL_EXPORTED unsigned short unicodetolower(unsigned short c, int langnum);
LIBHUNSPELL_DLL_EXPORTED int unicodeisalpha(unsigned short c);
LIBHUNSPELL_DLL_EXPORTED struct cs_info * get_current_cs(const char * es);
// get language identifiers of language codes
LIBHUNSPELL_DLL_EXPORTED int get_lang_num(const char * lang);
// get characters of the given 8bit encoding with lower- and uppercase forms
LIBHUNSPELL_DLL_EXPORTED char * get_casechars(const char * enc);
// convert null terminated string to all caps using encoding
LIBHUNSPELL_DLL_EXPORTED void enmkallcap(char * d, const char * p, const char * encoding);
// convert null terminated string to all little using encoding
LIBHUNSPELL_DLL_EXPORTED void enmkallsmall(char * d, const char * p, const char * encoding);
// convert null terminated string to have initial capital using encoding
LIBHUNSPELL_DLL_EXPORTED void enmkinitcap(char * d, const char * p, const char * encoding);
// convert null terminated string to all caps
LIBHUNSPELL_DLL_EXPORTED void mkallcap(char * p, const struct cs_info * csconv);
// convert null terminated string to all little
LIBHUNSPELL_DLL_EXPORTED void mkallsmall(char * p, const struct cs_info * csconv);
// convert null terminated string to have initial capital
LIBHUNSPELL_DLL_EXPORTED void mkinitcap(char * p, const struct cs_info * csconv);
// convert first nc characters of UTF-8 string to little
LIBHUNSPELL_DLL_EXPORTED void mkallsmall_utf(w_char * u, int nc, int langnum);
// convert first nc characters of UTF-8 string to capital
LIBHUNSPELL_DLL_EXPORTED void mkallcap_utf(w_char * u, int nc, int langnum);
// get type of capitalization
LIBHUNSPELL_DLL_EXPORTED int get_captype(char * q, int nl, cs_info *);
// get type of capitalization (UTF-8)
LIBHUNSPELL_DLL_EXPORTED int get_captype_utf8(w_char * q, int nl, int langnum);
// strip all ignored characters in the string
LIBHUNSPELL_DLL_EXPORTED void remove_ignored_chars_utf(char * word, unsigned short ignored_chars[], int ignored_len);
// strip all ignored characters in the string
LIBHUNSPELL_DLL_EXPORTED void remove_ignored_chars(char * word, char * ignored_chars);
LIBHUNSPELL_DLL_EXPORTED int parse_string(char * line, char ** out, int ln);
LIBHUNSPELL_DLL_EXPORTED int parse_array(char * line, char ** out, unsigned short ** out_utf16,
int * out_utf16_len, int utf8, int ln);
LIBHUNSPELL_DLL_EXPORTED int fieldlen(const char * r);
LIBHUNSPELL_DLL_EXPORTED char * copy_field(char * dest, const char * morph, const char * var);
LIBHUNSPELL_DLL_EXPORTED int morphcmp(const char * s, const char * t);
LIBHUNSPELL_DLL_EXPORTED int get_sfxcount(const char * morph);
// conversion function for protected memory
LIBHUNSPELL_DLL_EXPORTED void store_pointer(char * dest, char * source);
// conversion function for protected memory
LIBHUNSPELL_DLL_EXPORTED char * get_stored_pointer(const char * s);
// hash entry macros
LIBHUNSPELL_DLL_EXPORTED inline char* HENTRY_DATA(struct hentry *h)
char *ret;
if (!h->var)
ret = NULL;
else if (h->var & H_OPT_ALIASM)
ret = get_stored_pointer(HENTRY_WORD(h) + h->blen + 1);
ret = HENTRY_WORD(h) + h->blen + 1;
return ret;
// NULL-free version for warning-free OOo build
LIBHUNSPELL_DLL_EXPORTED inline const char* HENTRY_DATA2(const struct hentry *h)
const char *ret;
if (!h->var)
ret = "";
else if (h->var & H_OPT_ALIASM)
ret = get_stored_pointer(HENTRY_WORD(h) + h->blen + 1);
ret = HENTRY_WORD(h) + h->blen + 1;
return ret;
LIBHUNSPELL_DLL_EXPORTED inline char* HENTRY_FIND(struct hentry *h, const char *p)
return (HENTRY_DATA(h) ? strstr(HENTRY_DATA(h), p) : NULL);
#define w_char_eq(a,b) (((a).l == (b).l) && ((a).h == (b).h))

View file

@ -1,182 +0,0 @@
#include <stdlib.h>
#include <stdio.h>
#include "dictmgr.hxx"
#include "csutil.hxx"
DictMgr::DictMgr(const char * dictpath, const char * etype) : numdict(0)
// load list of etype entries
pdentry = (dictentry *)malloc(MAXDICTIONARIES*sizeof(struct dictentry));
if (pdentry) {
if (parse_file(dictpath, etype)) {
numdict = 0;
// no dictionary.lst found is okay
dictentry * pdict = NULL;
if (pdentry) {
pdict = pdentry;
for (int i=0;i<numdict;i++) {
if (pdict->lang) {
pdict->lang = NULL;
if (pdict->region) {
if (pdict->filename) {
pdict->filename = NULL;
pdentry = NULL;
pdict = NULL;
numdict = 0;
// read in list of etype entries and build up structure to describe them
int DictMgr::parse_file(const char * dictpath, const char * etype)
int i;
dictentry * pdict = pdentry;
// open the dictionary list file
FILE * dictlst;
dictlst = myfopen(dictpath,"r");
if (!dictlst) {
return 1;
// step one is to parse the dictionary list building up the
// descriptive structures
// read in each line ignoring any that dont start with etype
while (fgets(line,MAXDICTENTRYLEN,dictlst)) {
/* parse in a dictionary entry */
if (strncmp(line,etype,4) == 0) {
if (numdict < MAXDICTIONARIES) {
char * tp = line;
char * piece;
i = 0;
while ((piece=mystrsep(&tp,' '))) {
if (*piece != '\0') {
switch(i) {
case 0: break;
case 1: pdict->lang = mystrdup(piece); break;
case 2: if (strcmp (piece, "ANY") == 0)
pdict->region = mystrdup("");
pdict->region = mystrdup(piece);
case 3: pdict->filename = mystrdup(piece); break;
default: break;
if (i == 4) {
} else {
switch (i) {
case 3:
case 2:
fprintf(stderr,"dictionary list corruption in line \"%s\"\n",line);
return 0;
// return text encoding of dictionary
int DictMgr::get_list(dictentry ** ppentry)
*ppentry = pdentry;
return numdict;
// strip strings into token based on single char delimiter
// acts like strsep() but only uses a delim char and not
// a delim string
char * DictMgr::mystrsep(char ** stringp, const char delim)
char * rv = NULL;
char * mp = *stringp;
size_t n = strlen(mp);
if (n > 0) {
char * dp = (char *)memchr(mp,(int)((unsigned char)delim),n);
if (dp) {
*stringp = dp+1;
size_t nc = dp - mp;
rv = (char *) malloc(nc+1);
if (rv) {
*(rv+nc) = '\0';
} else {
rv = (char *) malloc(n+1);
if (rv) {
memcpy(rv, mp, n);
*(rv+n) = '\0';
*stringp = mp + n;
return rv;
// replaces strdup with ansi version
char * DictMgr::mystrdup(const char * s)
char * d = NULL;
if (s) {
int sl = strlen(s)+1;
d = (char *) malloc(sl);
if (d) memcpy(d,s,sl);
return d;
// remove cross-platform text line end characters
void DictMgr:: mychomp(char * s)
int k = strlen(s);
if ((k > 0) && ((*(s+k-1)=='\r') || (*(s+k-1)=='\n'))) *(s+k-1) = '\0';
if ((k > 1) && (*(s+k-2) == '\r')) *(s+k-2) = '\0';

View file

@ -1,39 +0,0 @@
#ifndef _DICTMGR_HXX_
#define _DICTMGR_HXX_
#include "hunvisapi.h"
struct dictentry {
char * filename;
char * lang;
char * region;
DictMgr(const DictMgr&);
DictMgr& operator = (const DictMgr&);
int numdict;
dictentry * pdentry;
DictMgr(const char * dictpath, const char * etype);
int get_list(dictentry** ppentry);
int parse_file(const char * dictpath, const char * etype);
char * mystrsep(char ** stringp, const char delim);
char * mystrdup(const char * s);
void mychomp(char * s);

#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include "filemgr.hxx"
#include "csutil.hxx"
int FileMgr::fail(const char * err, const char * par) {
fprintf(stderr, err, par);
return -1;
FileMgr::FileMgr(const char * file, const char * key)
: hin(NULL)
, linenum(0)
in[0] = '\0';
fin = myfopen(file, "r");
if (!fin) {
// check hzipped file
char * st = (char *) malloc(strlen(file) + strlen(HZIP_EXTENSION) + 1);
if (st) {
strcpy(st, file);
strcat(st, HZIP_EXTENSION);
hin = new Hunzip(st, key);
if (!fin && !hin) fail(MSG_OPEN, file);
if (fin) fclose(fin);
if (hin) delete hin;
char * FileMgr::getline() {
const char * l;
if (fin) return fgets(in, BUFSIZE - 1, fin);
if (hin && ((l = hin->getline()) != NULL)) return strcpy(in, l);
return NULL;
int FileMgr::getlinenum() {
return linenum;

View file

@ -1,28 +0,0 @@
/* file manager class - read lines of files [filename] OR [filename.hz] */
#ifndef _FILEMGR_HXX_
#define _FILEMGR_HXX_
#include "hunvisapi.h"
#include "hunzip.hxx"
#include <stdio.h>
FileMgr(const FileMgr&);
FileMgr& operator = (const FileMgr&);
FILE * fin;
Hunzip * hin;
char in[BUFSIZE + 50]; // input buffer
int fail(const char * err, const char * par);
int linenum;
FileMgr(const char * filename, const char * key = NULL);
char * getline();
int getlinenum();

View file

@ -1,936 +0,0 @@
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <ctype.h>
#include <limits>
#include "hashmgr.hxx"
#include "csutil.hxx"
#include "atypes.hxx"
// build a hash table from a munched word list
HashMgr::HashMgr(const char * tpath, const char * apath, const char * key)
: tablesize(0)
, tableptr(NULL)
, userword(0)
, flag_mode(FLAG_CHAR)
, complexprefixes(0)
, utf8(0)
, forbiddenword(FORBIDDENWORD) // forbidden word signing flag
, numaliasf(0)
, aliasf(NULL)
, aliasflen(0)
, numaliasm(0)
, aliasm(NULL)
langnum = 0;
lang = NULL;
enc = NULL;
csconv = 0;
ignorechars = NULL;
ignorechars_utf16 = NULL;
ignorechars_utf16_len = 0;
load_config(apath, key);
int ec = load_tables(tpath, key);
if (ec) {
/* error condition - what should we do here */
HUNSPELL_WARNING(stderr, "Hash Manager Error : %d\n",ec);
if (tableptr) {
tableptr = NULL;
tablesize = 0;
if (tableptr) {
// now pass through hash table freeing up everything
// go through column by column of the table
for (int i=0; i < tablesize; i++) {
struct hentry * pt = tableptr[i];
struct hentry * nt = NULL;
while(pt) {
nt = pt->next;
if (pt->astr && (!aliasf || TESTAFF(pt->astr, ONLYUPCASEFLAG, pt->alen))) free(pt->astr);
pt = nt;
tablesize = 0;
if (aliasf) {
for (int j = 0; j < (numaliasf); j++) free(aliasf[j]);
aliasf = NULL;
if (aliasflen) {
aliasflen = NULL;
if (aliasm) {
for (int j = 0; j < (numaliasm); j++) free(aliasm[j]);
aliasm = NULL;
if (utf8) free_utf_tbl();
if (enc) free(enc);
if (lang) free(lang);
if (ignorechars) free(ignorechars);
if (ignorechars_utf16) free(ignorechars_utf16);
delete [] csconv;
// lookup a root word in the hashtable
struct hentry * HashMgr::lookup(const char *word) const
struct hentry * dp;
if (tableptr) {
dp = tableptr[hash(word)];
if (!dp) return NULL;
for ( ; dp != NULL; dp = dp->next) {
if (strcmp(word, dp->word) == 0) return dp;
return NULL;
// add a word to the hash table (private)
int HashMgr::add_word(const char * word, int wbl, int wcl, unsigned short * aff,
int al, const char * desc, bool onlyupcase)
bool upcasehomonym = false;
int descl = desc ? (aliasm ? sizeof(char *) : strlen(desc) + 1) : 0;
// variable-length hash record with word and optional fields
struct hentry* hp =
(struct hentry *) malloc (sizeof(struct hentry) + wbl + descl);
if (!hp) return 1;
char * hpw = hp->word;
strcpy(hpw, word);
if (ignorechars != NULL) {
if (utf8) {
remove_ignored_chars_utf(hpw, ignorechars_utf16, ignorechars_utf16_len);
} else {
remove_ignored_chars(hpw, ignorechars);
if (complexprefixes) {
if (utf8) reverseword_utf(hpw); else reverseword(hpw);
int i = hash(hpw);
hp->blen = (unsigned char) wbl;
hp->clen = (unsigned char) wcl;
hp->alen = (short) al;
hp->astr = aff;
hp->next = NULL;
hp->next_homonym = NULL;
// store the description string or its pointer
if (desc) {
hp->var = H_OPT;
if (aliasm) {
hp->var += H_OPT_ALIASM;
store_pointer(hpw + wbl + 1, get_aliasm(atoi(desc)));
} else {
strcpy(hpw + wbl + 1, desc);
if (complexprefixes) {
if (utf8) reverseword_utf(HENTRY_DATA(hp));
else reverseword(HENTRY_DATA(hp));
if (strstr(HENTRY_DATA(hp), MORPH_PHON)) hp->var += H_OPT_PHON;
} else hp->var = 0;
struct hentry * dp = tableptr[i];
if (!dp) {
tableptr[i] = hp;
return 0;
while (dp->next != NULL) {
if ((!dp->next_homonym) && (strcmp(hp->word, dp->word) == 0)) {
// remove hidden onlyupcase homonym
if (!onlyupcase) {
if ((dp->astr) && TESTAFF(dp->astr, ONLYUPCASEFLAG, dp->alen)) {
dp->astr = hp->astr;
dp->alen = hp->alen;
return 0;
} else {
dp->next_homonym = hp;
} else {
upcasehomonym = true;
if (strcmp(hp->word, dp->word) == 0) {
// remove hidden onlyupcase homonym
if (!onlyupcase) {
if ((dp->astr) && TESTAFF(dp->astr, ONLYUPCASEFLAG, dp->alen)) {
dp->astr = hp->astr;
dp->alen = hp->alen;
return 0;
} else {
dp->next_homonym = hp;
} else {
upcasehomonym = true;
if (!upcasehomonym) {
dp->next = hp;
} else {
// remove hidden onlyupcase homonym
if (hp->astr) free(hp->astr);
return 0;
int HashMgr::add_hidden_capitalized_word(char * word, int wbl, int wcl,
unsigned short * flags, int flagslen, char * dp, int captype)
if (flags == NULL)
flagslen = 0;
// add inner capitalized forms to handle the following allcap forms:
// Mixed caps: -> OPENOFFICE.ORG
// Allcaps with suffixes: CIA's -> CIA'S
if (((captype == HUHCAP) || (captype == HUHINITCAP) ||
((captype == ALLCAP) && (flagslen != 0))) &&
!((flagslen != 0) && TESTAFF(flags, forbiddenword, flagslen))) {
unsigned short * flags2 = (unsigned short *) malloc (sizeof(unsigned short) * (flagslen+1));
if (!flags2) return 1;
if (flagslen) memcpy(flags2, flags, flagslen * sizeof(unsigned short));
flags2[flagslen] = ONLYUPCASEFLAG;
if (utf8) {
char st[BUFSIZE];
w_char w[BUFSIZE];
int wlen = u8_u16(w, BUFSIZE, word);
mkallsmall_utf(w, wlen, langnum);
mkallcap_utf(w, 1, langnum);
u16_u8(st, BUFSIZE, w, wlen);
return add_word(st,wbl,wcl,flags2,flagslen+1,dp, true);
} else {
mkallsmall(word, csconv);
mkinitcap(word, csconv);
return add_word(word,wbl,wcl,flags2,flagslen+1,dp, true);
return 0;
// detect captype and modify word length for UTF-8 encoding
int HashMgr::get_clen_and_captype(const char * word, int wbl, int * captype) {
int len;
if (utf8) {
w_char dest_utf[BUFSIZE];
len = u8_u16(dest_utf, BUFSIZE, word);
*captype = get_captype_utf8(dest_utf, len, langnum);
} else {
len = wbl;
*captype = get_captype((char *) word, len, csconv);
return len;
// remove word (personal dictionary function for standalone applications)
int HashMgr::remove(const char * word)
struct hentry * dp = lookup(word);
while (dp) {
if (dp->alen == 0 || !TESTAFF(dp->astr, forbiddenword, dp->alen)) {
unsigned short * flags =
(unsigned short *) malloc(sizeof(short) * (dp->alen + 1));
if (!flags) return 1;
for (int i = 0; i < dp->alen; i++) flags[i] = dp->astr[i];
flags[dp->alen] = forbiddenword;
dp->astr = flags;
flag_qsort(flags, 0, dp->alen);
dp = dp->next_homonym;
return 0;
/* remove forbidden flag to add a personal word to the hash */
int HashMgr::remove_forbidden_flag(const char * word) {
struct hentry * dp = lookup(word);
if (!dp) return 1;
while (dp) {
if (dp->astr && TESTAFF(dp->astr, forbiddenword, dp->alen)) {
if (dp->alen == 1) dp->alen = 0; // XXX forbidden words of personal dic.
else {
unsigned short * flags2 =
(unsigned short *) malloc(sizeof(short) * (dp->alen - 1));
if (!flags2) return 1;
int i, j = 0;
for (i = 0; i < dp->alen; i++) {
if (dp->astr[i] != forbiddenword) flags2[j++] = dp->astr[i];
dp->astr = flags2; // XXX allowed forbidden words
dp = dp->next_homonym;
return 0;
// add a custom dic. word to the hash table (public)
int HashMgr::add(const char * word)
unsigned short * flags = NULL;
int al = 0;
if (remove_forbidden_flag(word)) {
int captype;
int wbl = strlen(word);
int wcl = get_clen_and_captype(word, wbl, &captype);
add_word(word, wbl, wcl, flags, al, NULL, false);
return add_hidden_capitalized_word((char *) word, wbl, wcl, flags, al, NULL, captype);
return 0;
int HashMgr::add_with_affix(const char * word, const char * example)
// detect captype and modify word length for UTF-8 encoding
struct hentry * dp = lookup(example);
if (dp && dp->astr) {
int captype;
int wbl = strlen(word);
int wcl = get_clen_and_captype(word, wbl, &captype);
if (aliasf) {
add_word(word, wbl, wcl, dp->astr, dp->alen, NULL, false);
} else {
unsigned short * flags = (unsigned short *) malloc (dp->alen * sizeof(short));
if (flags) {
memcpy((void *) flags, (void *) dp->astr, dp->alen * sizeof(short));
add_word(word, wbl, wcl, flags, dp->alen, NULL, false);
} else return 1;
return add_hidden_capitalized_word((char *) word, wbl, wcl, dp->astr, dp->alen, NULL, captype);
return 1;
// walk the hash table entry by entry - null at end
// initialize: col=-1; hp = NULL; hp = walk_hashtable(&col, hp);
struct hentry * HashMgr::walk_hashtable(int &col, struct hentry * hp) const
if (hp && hp->next != NULL) return hp->next;
for (col++; col < tablesize; col++) {
if (tableptr[col]) return tableptr[col];
// null at end and reset to start
col = -1;
return NULL;
// load a munched word list and build a hash table on the fly
int HashMgr::load_tables(const char * tpath, const char * key)
int al;
char * ap;
char * dp;
char * dp2;
unsigned short * flags;
char * ts;
// open dictionary file
FileMgr * dict = new FileMgr(tpath, key);
if (dict == NULL) return 1;
// first read the first line of file to get hash table size */
if ((ts = dict->getline()) == NULL) {
HUNSPELL_WARNING(stderr, "error: empty dic file %s\n", tpath);
delete dict;
return 2;
/* remove byte order mark */
if (strncmp(ts,"\xEF\xBB\xBF",3) == 0) {
memmove(ts, ts+3, strlen(ts+3)+1);
// warning: dic file begins with byte order mark: possible incompatibility with old Hunspell versions
tablesize = atoi(ts);
int nExtra = 5 + USERWORD;
if (tablesize <= 0 || (tablesize >= (std::numeric_limits<int>::max() - 1 - nExtra) / int(sizeof(struct hentry *)))) {
HUNSPELL_WARNING(stderr, "error: line 1: missing or bad word count in the dic file\n");
delete dict;
return 4;
tablesize += nExtra;
if ((tablesize % 2) == 0) tablesize++;
// allocate the hash table
tableptr = (struct hentry **) calloc(tablesize, sizeof(struct hentry *));
if (! tableptr) {
delete dict;
return 3;
// loop through all words on much list and add to hash
// table and create word and affix strings
while ((ts = dict->getline()) != NULL) {
// split each line into word and morphological description
dp = ts;
while ((dp = strchr(dp, ':')) != NULL) {
if ((dp > ts + 3) && (*(dp - 3) == ' ' || *(dp - 3) == '\t')) {
for (dp -= 4; dp >= ts && (*dp == ' ' || *dp == '\t'); dp--);
if (dp < ts) { // missing word
dp = NULL;
} else {
*(dp + 1) = '\0';
dp = dp + 2;
// tabulator is the old morphological field separator
dp2 = strchr(ts, '\t');
if (dp2 && (!dp || dp2 < dp)) {
*dp2 = '\0';
dp = dp2 + 1;
// split each line into word and affix char strings
// "\/" signs slash in words (not affix separator)
// "/" at beginning of the line is word character (not affix separator)
ap = strchr(ts,'/');
while (ap) {
if (ap == ts) {
} else if (*(ap - 1) != '\\') break;
// replace "\/" with "/"
for (char * sp = ap - 1; *sp; *sp = *(sp + 1), sp++);
ap = strchr(ap,'/');
if (ap) {
*ap = '\0';
if (aliasf) {
int index = atoi(ap + 1);
al = get_aliasf(index, &flags, dict);
if (!al) {
HUNSPELL_WARNING(stderr, "error: line %d: bad flag vector alias\n", dict->getlinenum());
*ap = '\0';
} else {
al = decode_flags(&flags, ap + 1, dict);
if (al == -1) {
HUNSPELL_WARNING(stderr, "Can't allocate memory.\n");
delete dict;
return 6;
flag_qsort(flags, 0, al);
} else {
al = 0;
ap = NULL;
flags = NULL;
int captype;
int wbl = strlen(ts);
int wcl = get_clen_and_captype(ts, wbl, &captype);
// add the word and its index plus its capitalized form optionally
if (add_word(ts,wbl,wcl,flags,al,dp, false) ||
add_hidden_capitalized_word(ts, wbl, wcl, flags, al, dp, captype)) {
delete dict;
return 5;
delete dict;
return 0;
// the hash function is a simple load and rotate
// algorithm borrowed
int HashMgr::hash(const char * word) const
long hv = 0;
for (int i=0; i < 4 && *word != 0; i++)
hv = (hv << 8) | (*word++);
while (*word != 0) {
hv ^= (*word++);
return (unsigned long) hv % tablesize;
int HashMgr::decode_flags(unsigned short ** result, char * flags, FileMgr * af) {
int len;
if (*flags == '\0') {
*result = NULL;
return 0;
switch (flag_mode) {
case FLAG_LONG: { // two-character flags (1x2yZz -> 1x 2y Zz)
len = strlen(flags);
if (len%2 == 1) HUNSPELL_WARNING(stderr, "error: line %d: bad flagvector\n", af->getlinenum());
len /= 2;
*result = (unsigned short *) malloc(len * sizeof(short));
if (!*result) return -1;
for (int i = 0; i < len; i++) {
(*result)[i] = (((unsigned short) flags[i * 2]) << 8) + (unsigned short) flags[i * 2 + 1];
case FLAG_NUM: { // decimal numbers separated by comma (4521,23,233 -> 4521 23 233)
int i;
len = 1;
char * src = flags;
unsigned short * dest;
char * p;
for (p = flags; *p; p++) {
if (*p == ',') len++;
*result = (unsigned short *) malloc(len * sizeof(short));
if (!*result) return -1;
dest = *result;
for (p = flags; *p; p++) {
if (*p == ',') {
i = atoi(src);
if (i >= DEFAULTFLAGS) HUNSPELL_WARNING(stderr, "error: line %d: flag id %d is too large (max: %d)\n",
af->getlinenum(), i, DEFAULTFLAGS - 1);
*dest = (unsigned short) i;
if (*dest == 0) HUNSPELL_WARNING(stderr, "error: line %d: 0 is wrong flag id\n", af->getlinenum());
src = p + 1;
i = atoi(src);
if (i >= DEFAULTFLAGS) HUNSPELL_WARNING(stderr, "error: line %d: flag id %d is too large (max: %d)\n",
af->getlinenum(), i, DEFAULTFLAGS - 1);
*dest = (unsigned short) i;
if (*dest == 0) HUNSPELL_WARNING(stderr, "error: line %d: 0 is wrong flag id\n", af->getlinenum());
case FLAG_UNI: { // UTF-8 characters
w_char w[BUFSIZE/2];
len = u8_u16(w, BUFSIZE/2, flags);
*result = (unsigned short *) malloc(len * sizeof(short));
if (!*result) return -1;
memcpy(*result, w, len * sizeof(short));
default: { // Ispell's one-character flags (erfg -> e r f g)
unsigned short * dest;
len = strlen(flags);
*result = (unsigned short *) malloc(len * sizeof(short));
if (!*result) return -1;
dest = *result;
for (unsigned char * p = (unsigned char *) flags; *p; p++) {
*dest = (unsigned short) *p;
return len;
unsigned short HashMgr::decode_flag(const char * f) {
unsigned short s = 0;
int i;
switch (flag_mode) {
s = ((unsigned short) f[0] << 8) + (unsigned short) f[1];
case FLAG_NUM:
i = atoi(f);
if (i >= DEFAULTFLAGS) HUNSPELL_WARNING(stderr, "error: flag id %d is too large (max: %d)\n", i, DEFAULTFLAGS - 1);
s = (unsigned short) i;
case FLAG_UNI:
u8_u16((w_char *) &s, 1, f);
s = (unsigned short) *((unsigned char *)f);
if (s == 0) HUNSPELL_WARNING(stderr, "error: 0 is wrong flag id\n");
return s;
char * HashMgr::encode_flag(unsigned short f) {
unsigned char ch[10];
if (f==0) return mystrdup("(NULL)");
if (flag_mode == FLAG_LONG) {
ch[0] = (unsigned char) (f >> 8);
ch[1] = (unsigned char) (f - ((f >> 8) << 8));
ch[2] = '\0';
} else if (flag_mode == FLAG_NUM) {
sprintf((char *) ch, "%d", f);
} else if (flag_mode == FLAG_UNI) {
u16_u8((char *) &ch, 10, (w_char *) &f, 1);
} else {
ch[0] = (unsigned char) (f);
ch[1] = '\0';
return mystrdup((char *) ch);
// read in aff file and set flag mode
int HashMgr::load_config(const char * affpath, const char * key)
char * line; // io buffers
int firstline = 1;
// open the affix file
FileMgr * afflst = new FileMgr(affpath, key);
if (!afflst) {
HUNSPELL_WARNING(stderr, "Error - could not open affix description file %s\n",affpath);
return 1;
// read in each line ignoring any that do not
// start with a known line type indicator
while ((line = afflst->getline()) != NULL) {
/* remove byte order mark */
if (firstline) {
firstline = 0;
if (strncmp(line,"\xEF\xBB\xBF",3) == 0) memmove(line, line+3, strlen(line+3)+1);
/* parse in the try string */
if ((strncmp(line,"FLAG",4) == 0) && isspace(line[4])) {
if (flag_mode != FLAG_CHAR) {
HUNSPELL_WARNING(stderr, "error: line %d: multiple definitions of the FLAG affix file parameter\n", afflst->getlinenum());
if (strstr(line, "long")) flag_mode = FLAG_LONG;
if (strstr(line, "num")) flag_mode = FLAG_NUM;
if (strstr(line, "UTF-8")) flag_mode = FLAG_UNI;
if (flag_mode == FLAG_CHAR) {
HUNSPELL_WARNING(stderr, "error: line %d: FLAG needs `num', `long' or `UTF-8' parameter\n", afflst->getlinenum());
if (strncmp(line,"FORBIDDENWORD",13) == 0) {
char * st = NULL;
if (parse_string(line, &st, afflst->getlinenum())) {
delete afflst;
return 1;
forbiddenword = decode_flag(st);
if (strncmp(line, "SET", 3) == 0) {
if (parse_string(line, &enc, afflst->getlinenum())) {
delete afflst;
return 1;
if (strcmp(enc, "UTF-8") == 0) {
utf8 = 1;
} else csconv = get_current_cs(enc);
if (strncmp(line, "LANG", 4) == 0) {
if (parse_string(line, &lang, afflst->getlinenum())) {
delete afflst;
return 1;
langnum = get_lang_num(lang);
/* parse in the ignored characters (for example, Arabic optional diacritics characters */
if (strncmp(line,"IGNORE",6) == 0) {
if (parse_array(line, &ignorechars, &ignorechars_utf16,
&ignorechars_utf16_len, utf8, afflst->getlinenum())) {
delete afflst;
return 1;
if ((strncmp(line,"AF",2) == 0) && isspace(line[2])) {
if (parse_aliasf(line, afflst)) {
delete afflst;
return 1;
if ((strncmp(line,"AM",2) == 0) && isspace(line[2])) {
if (parse_aliasm(line, afflst)) {
delete afflst;
return 1;
if (strncmp(line,"COMPLEXPREFIXES",15) == 0) complexprefixes = 1;
if (((strncmp(line,"SFX",3) == 0) || (strncmp(line,"PFX",3) == 0)) && isspace(line[3])) break;
if (csconv == NULL) csconv = get_current_cs(SPELL_ENCODING);
delete afflst;
return 0;
/* parse in the ALIAS table */
int HashMgr::parse_aliasf(char * line, FileMgr * af)
if (numaliasf != 0) {
HUNSPELL_WARNING(stderr, "error: line %d: multiple table definitions\n", af->getlinenum());
return 1;
char * tp = line;
char * piece;
int i = 0;
int np = 0;
piece = mystrsep(&tp, 0);
while (piece) {
if (*piece != '\0') {
switch(i) {
case 0: { np++; break; }
case 1: {
numaliasf = atoi(piece);
if (numaliasf < 1) {
numaliasf = 0;
aliasf = NULL;
aliasflen = NULL;
HUNSPELL_WARNING(stderr, "error: line %d: bad entry number\n", af->getlinenum());
return 1;
aliasf = (unsigned short **) malloc(numaliasf * sizeof(unsigned short *));
aliasflen = (unsigned short *) malloc(numaliasf * sizeof(short));
if (!aliasf || !aliasflen) {
numaliasf = 0;
if (aliasf) free(aliasf);
if (aliasflen) free(aliasflen);
aliasf = NULL;
aliasflen = NULL;
return 1;
default: break;
piece = mystrsep(&tp, 0);
if (np != 2) {
numaliasf = 0;
aliasf = NULL;
aliasflen = NULL;
HUNSPELL_WARNING(stderr, "error: line %d: missing data\n", af->getlinenum());
return 1;
/* now parse the numaliasf lines to read in the remainder of the table */
char * nl;
for (int j=0; j < numaliasf; j++) {
if ((nl = af->getline()) == NULL) return 1;
tp = nl;
i = 0;
aliasf[j] = NULL;
aliasflen[j] = 0;
piece = mystrsep(&tp, 0);
while (piece) {
if (*piece != '\0') {
switch(i) {
case 0: {
if (strncmp(piece,"AF",2) != 0) {
numaliasf = 0;
aliasf = NULL;
aliasflen = NULL;
HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", af->getlinenum());
return 1;
case 1: {
aliasflen[j] = (unsigned short) decode_flags(&(aliasf[j]), piece, af);
flag_qsort(aliasf[j], 0, aliasflen[j]);
default: break;
piece = mystrsep(&tp, 0);
if (!aliasf[j]) {
aliasf = NULL;
aliasflen = NULL;
numaliasf = 0;
HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", af->getlinenum());
return 1;
return 0;
int HashMgr::is_aliasf() {
return (aliasf != NULL);
int HashMgr::get_aliasf(int index, unsigned short ** fvec, FileMgr * af) {
if ((index > 0) && (index <= numaliasf)) {
*fvec = aliasf[index - 1];
return aliasflen[index - 1];
HUNSPELL_WARNING(stderr, "error: line %d: bad flag alias index: %d\n", af->getlinenum(), index);
*fvec = NULL;
return 0;
/* parse morph alias definitions */
int HashMgr::parse_aliasm(char * line, FileMgr * af)
if (numaliasm != 0) {
HUNSPELL_WARNING(stderr, "error: line %d: multiple table definitions\n", af->getlinenum());
return 1;
char * tp = line;
char * piece;
int i = 0;
int np = 0;
piece = mystrsep(&tp, 0);
while (piece) {
if (*piece != '\0') {
switch(i) {
case 0: { np++; break; }
case 1: {
numaliasm = atoi(piece);
if (numaliasm < 1) {
HUNSPELL_WARNING(stderr, "error: line %d: bad entry number\n", af->getlinenum());
return 1;
aliasm = (char **) malloc(numaliasm * sizeof(char *));
if (!aliasm) {
numaliasm = 0;
return 1;
default: break;
piece = mystrsep(&tp, 0);
if (np != 2) {
numaliasm = 0;
aliasm = NULL;
HUNSPELL_WARNING(stderr, "error: line %d: missing data\n", af->getlinenum());
return 1;
/* now parse the numaliasm lines to read in the remainder of the table */
char * nl = line;
for (int j=0; j < numaliasm; j++) {
if ((nl = af->getline()) == NULL) return 1;
tp = nl;
i = 0;
aliasm[j] = NULL;
piece = mystrsep(&tp, ' ');
while (piece) {
if (*piece != '\0') {
switch(i) {
case 0: {
if (strncmp(piece,"AM",2) != 0) {
HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", af->getlinenum());
numaliasm = 0;
aliasm = NULL;
return 1;
case 1: {
// add the remaining of the line
if (*tp) {
*(tp - 1) = ' ';
tp = tp + strlen(tp);
if (complexprefixes) {
if (utf8) reverseword_utf(piece);
else reverseword(piece);
aliasm[j] = mystrdup(piece);
if (!aliasm[j]) {
numaliasm = 0;
aliasm = NULL;
return 1;
break; }
default: break;
piece = mystrsep(&tp, ' ');
if (!aliasm[j]) {
numaliasm = 0;
aliasm = NULL;
HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", af->getlinenum());
return 1;
return 0;
int HashMgr::is_aliasm() {
return (aliasm != NULL);
char * HashMgr::get_aliasm(int index) {
if ((index > 0) && (index <= numaliasm)) return aliasm[index - 1];
HUNSPELL_WARNING(stderr, "error: bad morph. alias index: %d\n", index);
return NULL;

#define _HASHMGR_HXX_
#include "hunvisapi.h"
#include <stdio.h>
#include "htypes.hxx"
#include "filemgr.hxx"
int tablesize;
struct hentry ** tableptr;
int userword;
flag flag_mode;
int complexprefixes;
int utf8;
unsigned short forbiddenword;
int langnum;
char * enc;
char * lang;
struct cs_info * csconv;
char * ignorechars;
unsigned short * ignorechars_utf16;
int ignorechars_utf16_len;
int numaliasf; // flag vector `compression' with aliases
unsigned short ** aliasf;
unsigned short * aliasflen;
int numaliasm; // morphological desciption `compression' with aliases
char ** aliasm;
HashMgr(const char * tpath, const char * apath, const char * key = NULL);
struct hentry * lookup(const char *) const;
int hash(const char *) const;
struct hentry * walk_hashtable(int & col, struct hentry * hp) const;
int add(const char * word);
int add_with_affix(const char * word, const char * pattern);
int remove(const char * word);
int decode_flags(unsigned short ** result, char * flags, FileMgr * af);
unsigned short decode_flag(const char * flag);
char * encode_flag(unsigned short flag);
int is_aliasf();
int get_aliasf(int index, unsigned short ** fvec, FileMgr * af);
int is_aliasm();
char * get_aliasm(int index);
int get_clen_and_captype(const char * word, int wbl, int * captype);
int load_tables(const char * tpath, const char * key);
int add_word(const char * word, int wbl, int wcl, unsigned short * ap,
int al, const char * desc, bool onlyupcase);
int load_config(const char * affpath, const char * key);
int parse_aliasf(char * line, FileMgr * af);
int add_hidden_capitalized_word(char * word, int wbl, int wcl,
unsigned short * flags, int al, char * dp, int captype);
int parse_aliasm(char * line, FileMgr * af);
int remove_forbidden_flag(const char * word);

#ifndef _HTYPES_HXX_
#define _HTYPES_HXX_
#define ROTATE_LEN 5
#define ROTATE(v,q) \
(v) = ((v) << (q)) | (((v) >> (32 - q)) & ((1 << (q))-1));
// hentry options
#define H_OPT (1 << 0)
#define H_OPT_ALIASM (1 << 1)
#define H_OPT_PHON (1 << 2)
// see also csutil.hxx
#define HENTRY_WORD(h) &(h->word[0])
// approx. number of user defined words
#define USERWORD 1000
struct hentry
unsigned char blen; // word length in bytes
unsigned char clen; // word length in characters (different for UTF-8 enc.)
short alen; // length of affix flag vector
unsigned short * astr; // affix flag vector
struct hentry * next; // next word with same hash code
struct hentry * next_homonym; // next homonym word (with same hash code)
char var; // variable fields (only for special pronounciation yet)
char word[1]; // variable-length word (8-bit or UTF-8 encoding)

#ifndef _MYSPELLMGR_H_
#define _MYSPELLMGR_H_
#include "hunvisapi.h"
#ifdef __cplusplus
extern "C" {
typedef struct Hunhandle Hunhandle;
LIBHUNSPELL_DLL_EXPORTED Hunhandle *Hunspell_create(const char * affpath, const char * dpath);
LIBHUNSPELL_DLL_EXPORTED Hunhandle *Hunspell_create_key(const char * affpath, const char * dpath,
const char * key);
LIBHUNSPELL_DLL_EXPORTED void Hunspell_destroy(Hunhandle *pHunspell);
/* spell(word) - spellcheck word
* output: 0 = bad word, not 0 = good word
LIBHUNSPELL_DLL_EXPORTED int Hunspell_spell(Hunhandle *pHunspell, const char *);
LIBHUNSPELL_DLL_EXPORTED char *Hunspell_get_dic_encoding(Hunhandle *pHunspell);
/* suggest(suggestions, word) - search suggestions
* input: pointer to an array of strings pointer and the (bad) word
* array of strings pointer (here *slst) may not be initialized
* output: number of suggestions in string array, and suggestions in
* a newly allocated array of strings (*slts will be NULL when number
* of suggestion equals 0.)
LIBHUNSPELL_DLL_EXPORTED int Hunspell_suggest(Hunhandle *pHunspell, char*** slst, const char * word);
/* morphological functions */
/* analyze(result, word) - morphological analysis of the word */
LIBHUNSPELL_DLL_EXPORTED int Hunspell_analyze(Hunhandle *pHunspell, char*** slst, const char * word);
/* stem(result, word) - stemmer function */
LIBHUNSPELL_DLL_EXPORTED int Hunspell_stem(Hunhandle *pHunspell, char*** slst, const char * word);
/* stem(result, analysis, n) - get stems from a morph. analysis
* example:
* char ** result, result2;
* int n1 = Hunspell_analyze(result, "words");
* int n2 = Hunspell_stem2(result2, result, n1);
LIBHUNSPELL_DLL_EXPORTED int Hunspell_stem2(Hunhandle *pHunspell, char*** slst, char** desc, int n);
/* generate(result, word, word2) - morphological generation by example(s) */
LIBHUNSPELL_DLL_EXPORTED int Hunspell_generate(Hunhandle *pHunspell, char*** slst, const char * word,
const char * word2);
/* generate(result, word, desc, n) - generation by morph. description(s)
* example:
* char ** result;
* char * affix = "is:plural"; // description depends from dictionaries, too
* int n = Hunspell_generate2(result, "word", &affix, 1);
* for (int i = 0; i < n; i++) printf("%s\n", result[i]);
LIBHUNSPELL_DLL_EXPORTED int Hunspell_generate2(Hunhandle *pHunspell, char*** slst, const char * word,
char** desc, int n);
/* functions for run-time modification of the dictionary */
/* add word to the run-time dictionary */
LIBHUNSPELL_DLL_EXPORTED int Hunspell_add(Hunhandle *pHunspell, const char * word);
/* add word to the run-time dictionary with affix flags of
* the example (a dictionary word): Hunspell will recognize
* affixed forms of the new word, too.
LIBHUNSPELL_DLL_EXPORTED int Hunspell_add_with_affix(Hunhandle *pHunspell, const char * word, const char * example);
/* remove word from the run-time dictionary */
LIBHUNSPELL_DLL_EXPORTED int Hunspell_remove(Hunhandle *pHunspell, const char * word);
/* free suggestion lists */
LIBHUNSPELL_DLL_EXPORTED void Hunspell_free_list(Hunhandle *pHunspell, char *** slst, int n);
#ifdef __cplusplus

#include "hunvisapi.h"
#include "hashmgr.hxx"
#include "affixmgr.hxx"
#include "suggestmgr.hxx"
#include "langnum.hxx"
#define SPELL_XML "<?xml?>"
#define MAXDIC 20
#define MAXSHARPS 5
#define HUNSPELL_OK (1 << 0)
#define HUNSPELL_OK_WARN (1 << 1)
Hunspell(const Hunspell&);
Hunspell& operator = (const Hunspell&);
AffixMgr* pAMgr;
HashMgr* pHMgr[MAXDIC];
int maxdic;
SuggestMgr* pSMgr;
char * affixpath;
char * encoding;
struct cs_info * csconv;
int langnum;
int utf8;
int complexprefixes;
char** wordbreak;
/* Hunspell(aff, dic) - constructor of Hunspell class
* input: path of affix file and dictionary file
* In WIN32 environment, use UTF-8 encoded paths started with the long path
* prefix \\\\?\\ to handle system-independent character encoding and very
* long path names (without the long path prefix Hunspell will use fopen()
* with system-dependent character encoding instead of _wfopen()).
Hunspell(const char * affpath, const char * dpath, const char * key = NULL);
/* load extra dictionaries (only dic files) */
int add_dic(const char * dpath, const char * key = NULL);
/* spell(word) - spellcheck word
* output: 0 = bad word, not 0 = good word
* plus output:
* info: information bit array, fields:
* SPELL_COMPOUND = a compound word
* SPELL_FORBIDDEN = an explicit forbidden word
* root: root (stem), when input is a word with affix(es)
int spell(const char * word, int * info = NULL, char ** root = NULL);
/* suggest(suggestions, word) - search suggestions
* input: pointer to an array of strings pointer and the (bad) word
* array of strings pointer (here *slst) may not be initialized
* output: number of suggestions in string array, and suggestions in
* a newly allocated array of strings (*slts will be NULL when number
* of suggestion equals 0.)
int suggest(char*** slst, const char * word);
/* deallocate suggestion lists */
void free_list(char *** slst, int n);
char * get_dic_encoding();
/* morphological functions */
/* analyze(result, word) - morphological analysis of the word */
int analyze(char*** slst, const char * word);
/* stem(result, word) - stemmer function */
int stem(char*** slst, const char * word);
/* stem(result, analysis, n) - get stems from a morph. analysis
* example:
* char ** result, result2;
* int n1 = analyze(&result, "words");
* int n2 = stem(&result2, result, n1);
int stem(char*** slst, char ** morph, int n);
/* generate(result, word, word2) - morphological generation by example(s) */
int generate(char*** slst, const char * word, const char * word2);
/* generate(result, word, desc, n) - generation by morph. description(s)
* example:
* char ** result;
* char * affix = "is:plural"; // description depends from dictionaries, too
* int n = generate(&result, "word", &affix, 1);
* for (int i = 0; i < n; i++) printf("%s\n", result[i]);
int generate(char*** slst, const char * word, char ** desc, int n);
/* functions for run-time modification of the dictionary */
/* add word to the run-time dictionary */
int add(const char * word);
/* add word to the run-time dictionary with affix flags of
* the example (a dictionary word): Hunspell will recognize
* affixed forms of the new word, too.
int add_with_affix(const char * word, const char * example);
/* remove word from the run-time dictionary */
int remove(const char * word);
/* other */
/* get extra word characters definied in affix file for tokenization */
const char * get_wordchars();
unsigned short * get_wordchars_utf16(int * len);
struct cs_info * get_csconv();
const char * get_version();
int get_langnum() const;
/* need for putdic */
int input_conv(const char * word, char * dest);
/* experimental and deprecated functions */
/* suffix is an affix flag string, similarly in dictionary files */
int put_word_suffix(const char * word, const char * suffix);
char * morph_with_correction(const char * word);
/* spec. suggestions */
int suggest_auto(char*** slst, const char * word);
int suggest_pos_stems(char*** slst, const char * word);
int cleanword(char *, const char *, int * pcaptype, int * pabbrev);
int cleanword2(char *, const char *, w_char *, int * w_len, int * pcaptype, int * pabbrev);
void mkinitcap(char *);
int mkinitcap2(char * p, w_char * u, int nc);
int mkinitsmall2(char * p, w_char * u, int nc);
void mkallcap(char *);
int mkallcap2(char * p, w_char * u, int nc);
void mkallsmall(char *);
int mkallsmall2(char * p, w_char * u, int nc);
struct hentry * checkword(const char *, int * info, char **root);
char * sharps_u8_l1(char * dest, char * source);
hentry * spellsharps(char * base, char *, int, int, char * tmp, int * info, char **root);
int is_keepcase(const hentry * rv);
int insert_sug(char ***slst, char * word, int ns);
void cat_result(char * result, char * st);
char * stem_description(const char * desc);
int spellml(char*** slst, const char * word);
int get_xml_par(char * dest, const char * par, int maxl);
const char * get_xml_pos(const char * s, const char * attr);
int get_xml_list(char ***slst, char * list, const char * tag);
int check_xml_par(const char * q, const char * attr, const char * value);

#if defined(HUNSPELL_STATIC)
#elif defined(_MSC_VER)
# define LIBHUNSPELL_DLL_EXPORTED __declspec(dllexport)
# else
# define LIBHUNSPELL_DLL_EXPORTED __declspec(dllimport)
# endif
#elif defined(BUILDING_LIBHUNSPELL) && 1
# define LIBHUNSPELL_DLL_EXPORTED __attribute__((__visibility__("default")))

#if defined(HUNSPELL_STATIC)
#elif defined(_MSC_VER)
# define LIBHUNSPELL_DLL_EXPORTED __declspec(dllexport)
# else
# define LIBHUNSPELL_DLL_EXPORTED __declspec(dllimport)
# endif
return fail(MSG_FORMAT, filename);
// check encryption
if (strncmp(MAGIC_ENCRYPT, in, MAGICLEN) == 0) {
unsigned char cs;
if (!key) return fail(MSG_KEY, filename);
if (fread(&c, 1, 1, fin) < 1) return fail(MSG_FORMAT, filename);
for (cs = 0; *enc; enc++) cs ^= *enc;
if (cs != c[0]) return fail(MSG_KEY, filename);
enc = key;
} else key = NULL;
// read record count
if (fread(&c, 1, 2, fin) < 2) return fail(MSG_FORMAT, filename);
if (key) {
c[0] ^= *enc;
if (*(++enc) == '\0') enc = key;
c[1] ^= *enc;
n = ((int) c[0] << 8) + c[1];
dec = (struct bit *) malloc(BASEBITREC * sizeof(struct bit));
if (!dec) return fail(MSG_MEMORY, filename);
dec[0].v[0] = 0;
dec[0].v[1] = 0;
// read codes
for (i = 0; i < n; i++) {
unsigned char l;
if (fread(c, 1, 2, fin) < 2) return fail(MSG_FORMAT, filename);
if (key) {
if (*(++enc) == '\0') enc = key;
c[0] ^= *enc;
if (*(++enc) == '\0') enc = key;
c[1] ^= *enc;
if (fread(&l, 1, 1, fin) < 1) return fail(MSG_FORMAT, filename);
if (key) {
if (*(++enc) == '\0') enc = key;
l ^= *enc;
if (fread(in, 1, l/8+1, fin) < (size_t) l/8+1) return fail(MSG_FORMAT, filename);
if (key) for (j = 0; j <= l/8; j++) {
if (*(++enc) == '\0') enc = key;
in[j] ^= *enc;
p = 0;
for (j = 0; j < l; j++) {
int b = (in[j/8] & (1 << (7 - (j % 8)))) ? 1 : 0;
int oldp = p;
p = dec[p].v[b];
if (p == 0) {
if (lastbit == allocatedbit) {
allocatedbit += BASEBITREC;
dec = (struct bit *) realloc(dec, allocatedbit * sizeof(struct bit));
dec[lastbit].v[0] = 0;
dec[lastbit].v[1] = 0;
dec[oldp].v[b] = lastbit;
p = lastbit;
dec[p].c[0] = c[0];
dec[p].c[1] = c[1];
return 0;
if (dec) free(dec);
if (fin) fclose(fin);
if (filename) free(filename);
int Hunzip::getbuf() {
int p = 0;
int o = 0;
do {
if (inc == 0) inbits = fread(in, 1, BUFSIZE, fin) * 8;
for (; inc < inbits; inc++) {
int b = (in[inc / 8] & (1 << (7 - (inc % 8)))) ? 1 : 0;
int oldp = p;
p = dec[p].v[b];
if (p == 0) {
if (oldp == lastbit) {
fin = NULL;
// add last odd byte
if (dec[lastbit].c[0]) out[o++] = dec[lastbit].c[1];
return o;
out[o++] = dec[oldp].c[0];
out[o++] = dec[oldp].c[1];
if (o == BUFSIZE) return o;
p = dec[p].v[b];
inc = 0;
} while (inbits == BUFSIZE * 8);
return fail(MSG_FORMAT, filename);
const char * Hunzip::getline() {
char linebuf[BUFSIZE];
int l = 0, eol = 0, left = 0, right = 0;
if (bufsiz == -1) return NULL;
while (l < bufsiz && !eol) {
linebuf[l++] = out[outc];
switch (out[outc]) {
case '\t': break;
case 31: { // escape
if (++outc == bufsiz) {
bufsiz = getbuf();
outc = 0;
linebuf[l - 1] = out[outc];
case ' ': break;
default: if (((unsigned char) out[outc]) < 47) {
if (out[outc] > 32) {
right = out[outc] - 31;
if (++outc == bufsiz) {
bufsiz = getbuf();
outc = 0;
if (out[outc] == 30) left = 9; else left = out[outc];
linebuf[l-1] = '\n';
eol = 1;
if (++outc == bufsiz) {
outc = 0;
bufsiz = fin ? getbuf(): -1;
if (right) strcpy(linebuf + l - 1, line + strlen(line) - right - 1);
else linebuf[l] = '\0';
strcpy(line + left, linebuf);
return line;

/* hunzip: file decompression for sorted dictionaries with optional encryption,
* algorithm: prefix-suffix encoding and 16-bit Huffman encoding */
#ifndef _HUNZIP_HXX_
#define _HUNZIP_HXX_
#include "hunvisapi.h"
#include <stdio.h>
#define BUFSIZE 65536
#define HZIP_EXTENSION ".hz"
#define MSG_OPEN "error: %s: cannot open\n"
#define MSG_FORMAT "error: %s: not in hzip format\n"
#define MSG_MEMORY "error: %s: missing memory\n"
#define MSG_KEY "error: %s: missing or bad password\n"
struct bit {
unsigned char c[2];
int v[2];
Hunzip(const Hunzip&);
Hunzip& operator = (const Hunzip&);
char * filename;
FILE * fin;
int bufsiz, lastbit, inc, inbits, outc;
struct bit * dec; // code table
char in[BUFSIZE]; // input buffer
char out[BUFSIZE + 1]; // Huffman-decoded buffer
char line[BUFSIZE + 50]; // decoded line
int getcode(const char * key);
int getbuf();
int fail(const char * err, const char * par);
Hunzip(const char * filename, const char * key = NULL);
@ -1,38 +0,0 @@
#ifndef _LANGNUM_HXX_
#define _LANGNUM_HXX_
language numbers for language specific codes
enum {
LANG_az=100, // custom number
LANG_la=99, // custom number
LANG_lv=101, // custom number

PRJ = ../../../../../..
PRJNAME = hunspell
TARGET = hunspell
#----- Settings ---------------------------------------------------------
# --- Files --------------------------------------------------------
$(SLO)$/affentry.obj \
$(SLO)$/affixmgr.obj \
$(SLO)$/dictmgr.obj \
$(SLO)$/csutil.obj \
$(SLO)$/hashmgr.obj \
$(SLO)$/suggestmgr.obj \
$(SLO)$/phonet.obj \
$(SLO)$/hunzip.obj \
$(SLO)$/filemgr.obj \
$(SLO)$/replist.obj \
LIB1TARGET= $(SLB)$/lib$(TARGET).lib
# --- Targets ------------------------------------------------------

/* phonetic.c - generic replacement aglogithms for phonetic transformation
Copyright (C) 2000 Bjoern Jacke
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License version 2.1 as published by the Free Software Foundation;
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; If not, see
2000-01-05 Bjoern Jacke <bjoern at>
Initial Release insprired by the article about phonetic
transformations out of c't 25/1999
2007-07-26 Bjoern Jacke <bjoern at>
Released under MPL/GPL/LGPL tri-license for Hunspell
2007-08-23 Laszlo Nemeth <nemeth at OOo>
Porting from Aspell to Hunspell using C-like structs
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <ctype.h>
#include "csutil.hxx"
#include "phonet.hxx"
void init_phonet_hash(phonetable & parms)
int i, k;
for (i = 0; i < HASHSIZE; i++) {
parms.hash[i] = -1;
for (i = 0; parms.rules[i][0] != '\0'; i += 2) {
/** set hash value **/
k = (unsigned char) parms.rules[i][0];
if (parms.hash[k] < 0) {
parms.hash[k] = i;
// like strcpy but safe if the strings overlap
// but only if dest < src
static inline void strmove(char * dest, char * src) {
while (*src)
*dest++ = *src++;
*dest = '\0';
static int myisalpha(char ch) {
if ((unsigned char) ch < 128) return isalpha(ch);
return 1;
/* phonetic transcription algorithm */
/* see: */
/* convert string to uppercase before this call */
int phonet (const char * inword, char * target,
int len,
phonetable & parms)
/** Do phonetic transformation. **/
/** "len" = length of "inword" incl. '\0'. **/
/** result: >= 0: length of "target" **/
/** otherwise: error **/
int i,j,k=0,n,p,z;
int k0,n0,p0=-333,z0;
char c, c0;
const char * s;
typedef unsigned char uchar;
char word[MAXPHONETUTF8LEN + 1];
if (len == -1) len = strlen(inword);
if (len > MAXPHONETUTF8LEN) return 0;
strncpy(word, inword, MAXPHONETUTF8LEN);
word[MAXPHONETUTF8LEN] = '\0';
/** check word **/
i = j = z = 0;
while ((c = word[i]) != '\0') {
n = parms.hash[(uchar) c];
z0 = 0;
if (n >= 0) {
/** check all rules for the same letter **/
while (parms.rules[n][0] == c) {
/** check whole string **/
k = 1; /** number of found letters **/
p = 5; /** default priority **/
s = parms.rules[n];
s++; /** important for (see below) "*(s-1)" **/
while (*s != '\0' && word[i+k] == *s
&& !isdigit ((unsigned char) *s) && strchr ("(-<^$", *s) == NULL) {
if (*s == '(') {
/** check letters in "(..)" **/
if (myisalpha(word[i+k]) // ...could be implied?
&& strchr(s+1, word[i+k]) != NULL) {
while (*s != ')')
p0 = (int) *s;
k0 = k;
while (*s == '-' && k > 1) {
if (*s == '<')
if (isdigit ((unsigned char) *s)) {
/** determine priority **/
p = *s - '0';
if (*s == '^' && *(s+1) == '^')
if (*s == '\0'
|| (*s == '^'
&& (i == 0 || ! myisalpha(word[i-1]))
&& (*(s+1) != '$'
|| (! myisalpha(word[i+k0]) )))
|| (*s == '$' && i > 0
&& myisalpha(word[i-1])
&& (! myisalpha(word[i+k0]) )))
/** search for followup rules, if: **/
/** parms.followup and k > 1 and NO '-' in searchstring **/
c0 = word[i+k-1];
n0 = parms.hash[(uchar) c0];
// if (parms.followup && k > 1 && n0 >= 0
if (k > 1 && n0 >= 0
&& p0 != (int) '-' && word[i+k] != '\0') {
/** test follow-up rule for "word[i+k]" **/
while (parms.rules[n0][0] == c0) {
/** check whole string **/
k0 = k;
p0 = 5;
s = parms.rules[n0];
while (*s != '\0' && word[i+k0] == *s
&& ! isdigit((unsigned char) *s) && strchr("(-<^$",*s) == NULL) {
if (*s == '(') {
/** check letters **/
if (myisalpha(word[i+k0])
&& strchr (s+1, word[i+k0]) != NULL) {
while (*s != ')' && *s != '\0')
if (*s == ')')
while (*s == '-') {
/** "k0" gets NOT reduced **/
/** because "if (k0 == k)" **/
if (*s == '<')
if (isdigit ((unsigned char) *s)) {
p0 = *s - '0';
if (*s == '\0'
/** *s == '^' cuts **/
|| (*s == '$' && ! myisalpha(word[i+k0])))
if (k0 == k) {
/** this is just a piece of the string **/
n0 += 2;
if (p0 < p) {
/** priority too low **/
n0 += 2;
/** rule fits; stop search **/
n0 += 2;
} /** End of "while (parms.rules[n0][0] == c0)" **/
if (p0 >= p && parms.rules[n0][0] == c0) {
n += 2;
} /** end of follow-up stuff **/
/** replace string **/
s = parms.rules[n+1];
p0 = (parms.rules[n][0] != '\0'
&& strchr (parms.rules[n]+1,'<') != NULL) ? 1:0;
if (p0 == 1 && z == 0) {
/** rule with '<' is used **/
if (j > 0 && *s != '\0'
&& (target[j-1] == c || target[j-1] == *s)) {
z0 = 1;
z = 1;
k0 = 0;
while (*s != '\0' && word[i+k0] != '\0') {
word[i+k0] = *s;
if (k > k0)
strmove (&word[0]+i+k0, &word[0]+i+k);
/** new "actual letter" **/
c = word[i];
else { /** no '<' rule used **/
i += k - 1;
z = 0;
while (*s != '\0'
&& *(s+1) != '\0' && j < len) {
if (j == 0 || target[j-1] != *s) {
target[j] = *s;
/** new "actual letter" **/
c = *s;
if (parms.rules[n][0] != '\0'
&& strstr (parms.rules[n]+1, "^^") != NULL) {
if (c != '\0') {
target[j] = c;
strmove (&word[0], &word[0]+i+1);
i = 0;
z0 = 1;
} /** end of follow-up stuff **/
n += 2;
} /** end of while (parms.rules[n][0] == c) **/
} /** end of if (n >= 0) **/
if (z0 == 0) {
// if (k && (assert(p0!=-333),!p0) && j < len && c != '\0'
// && (!parms.collapse_result || j == 0 || target[j-1] != c)){
if (k && !p0 && j < len && c != '\0'
&& (1 || j == 0 || target[j-1] != c)){
/** condense only double letters **/
target[j] = c;
///printf("\n setting \n");
z = 0;
} /** end of while ((c = word[i]) != '\0') **/
target[j] = '\0';
return (j);
#ifndef __PHONETHXX__
#define __PHONETHXX__
#define HASHSIZE 256
#define MAXPHONETLEN 256
#include "hunvisapi.h"
struct phonetable {
char utf8;
cs_info * lang;
int num;
char * * rules;
int hash[HASHSIZE];
LIBHUNSPELL_DLL_EXPORTED void init_phonet_hash(phonetable & parms);
LIBHUNSPELL_DLL_EXPORTED int phonet (const char * inword, char * target,
int len, phonetable & phone);

#include "license.hunspell"
#include "license.myspell"
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include "replist.hxx"
#include "csutil.hxx"
RepList::RepList(int n) {
dat = (replentry **) malloc(sizeof(replentry *) * n);
if (dat == 0) size = 0; else size = n;
pos = 0;
for (int i = 0; i < pos; i++) {
int RepList::get_pos() {
return pos;
replentry * RepList::item(int n) {
return dat[n];
int RepList::near(const char * word) {
int p1 = 0;
int p2 = pos;
while ((p2 - p1) > 1) {
int m = (p1 + p2) / 2;
int c = strcmp(word, dat[m]->pattern);
if (c <= 0) {
if (c < 0) p2 = m; else p1 = p2 = m;
} else p1 = m;
return p1;
int RepList::match(const char * word, int n) {
if (strncmp(word, dat[n]->pattern, strlen(dat[n]->pattern)) == 0) return strlen(dat[n]->pattern);
return 0;
int RepList::add(char * pat1, char * pat2) {
if (pos >= size || pat1 == NULL || pat2 == NULL) return 1;
replentry * r = (replentry *) malloc(sizeof(replentry));
if (r == NULL) return 1;
r->pattern = mystrrep(pat1, "_", " ");
r->pattern2 = mystrrep(pat2, "_", " ");
r->start = false;
r->end = false;
dat[pos++] = r;
for (int i = pos - 1; i > 0; i--) {
r = dat[i];
if (strcmp(r->pattern, dat[i - 1]->pattern) < 0) {
dat[i] = dat[i - 1];
dat[i - 1] = r;
} else break;
return 0;
int RepList::conv(const char * word, char * dest) {
int stl = 0;
int change = 0;
for (size_t i = 0; i < strlen(word); i++) {
int n = near(word + i);
int l = match(word + i, n);
if (l) {
strcpy(dest + stl, dat[n]->pattern2);
stl += strlen(dat[n]->pattern2);
i += l - 1;
change = 1;
} else dest[stl++] = word[i];
dest[stl] = '\0';
return change;

/* string replacement list class */
#ifndef _REPLIST_HXX_
#define _REPLIST_HXX_
#include "hunvisapi.h"
#include "w_char.hxx"
RepList(const RepList&);
RepList& operator = (const RepList&);
replentry ** dat;
int size;
int pos;
RepList(int n);
int get_pos();
int add(char * pat1, char * pat2);
replentry * item(int n);
int near(const char * word);
int match(const char * word, int n);
int conv(const char * word, char * dest);

@ -1,115 +0,0 @@
#define MAXSWL 100
#define MAXSWUTF8L (MAXSWL * 4)
#define MAX_ROOTS 100
#define MAX_WORDS 100
#define MAX_GUESS 200
// timelimit: max ~1/4 sec (process time on Linux) for a time consuming function
#define MINTIMER 100
#define MAXPLUSTIMER 100
#define NGRAM_LONGER_WORSE (1 << 0)
#define NGRAM_ANY_MISMATCH (1 << 1)
#define NGRAM_LOWERING (1 << 2)
#define NGRAM_WEIGHTED (1 << 3)
#include "hunvisapi.h"
#include "atypes.hxx"
#include "affixmgr.hxx"
#include "hashmgr.hxx"
#include "langnum.hxx"
#include <time.h>
SuggestMgr(const SuggestMgr&);
SuggestMgr& operator = (const SuggestMgr&);
char * ckey;
int ckeyl;
w_char * ckey_utf;
char * ctry;
int ctryl;
w_char * ctry_utf;
AffixMgr* pAMgr;
int maxSug;
struct cs_info * csconv;
int utf8;
int langnum;
int nosplitsugs;
int maxngramsugs;
int maxcpdsugs;
int complexprefixes;
SuggestMgr(const char * tryme, int maxn, AffixMgr *aptr);
int suggest(char*** slst, const char * word, int nsug, int * onlycmpdsug);
int ngsuggest(char ** wlst, char * word, int ns, HashMgr** pHMgr, int md);
int suggest_auto(char*** slst, const char * word, int nsug);
int suggest_stems(char*** slst, const char * word, int nsug);
int suggest_pos_stems(char*** slst, const char * word, int nsug);
char * suggest_morph(const char * word);
char * suggest_gen(char ** pl, int pln, char * pattern);
char * suggest_morph_for_spelling_error(const char * word);
int testsug(char** wlst, const char * candidate, int wl, int ns, int cpdsuggest,
int * timer, clock_t * timelimit);
int checkword(const char *, int, int, int *, clock_t *);
int check_forbidden(const char *, int);
int capchars(char **, const char *, int, int);
int replchars(char**, const char *, int, int);
int doubletwochars(char**, const char *, int, int);
int forgotchar(char **, const char *, int, int);
int swapchar(char **, const char *, int, int);
int longswapchar(char **, const char *, int, int);
int movechar(char **, const char *, int, int);
int extrachar(char **, const char *, int, int);
int badcharkey(char **, const char *, int, int);
int badchar(char **, const char *, int, int);
int twowords(char **, const char *, int, int);
int fixstems(char **, const char *, int);
int capchars_utf(char **, const w_char *, int wl, int, int);
int doubletwochars_utf(char**, const w_char *, int wl, int, int);
int forgotchar_utf(char**, const w_char *, int wl, int, int);
int extrachar_utf(char**, const w_char *, int wl, int, int);
int badcharkey_utf(char **, const w_char *, int wl, int, int);
int badchar_utf(char **, const w_char *, int wl, int, int);
int swapchar_utf(char **, const w_char *, int wl, int, int);
int longswapchar_utf(char **, const w_char *, int, int, int);
int movechar_utf(char **, const w_char *, int, int, int);
int mapchars(char**, const char *, int, int);
int map_related(const char *, char *, int, int, char ** wlst, int, int, const mapentry*, int, int *, clock_t *);
int ngram(int n, char * s1, const char * s2, int opt);
int mystrlen(const char * word);
int leftcommonsubstring(char * s1, const char * s2);
int commoncharacterpositions(char * s1, const char * s2, int * is_swap);
void bubblesort( char ** rwd, char ** rwd2, int * rsc, int n);
void lcs(const char * s, const char * s2, int * l1, int * l2, char ** result);
int lcslen(const char * s, const char* s2);
char * suggest_hentry_gen(hentry * rv, char * pattern);

@ -1,21 +0,0 @@
#ifndef __WCHARHXX__
#define __WCHARHXX__
#ifndef GCC
typedef struct {
typedef struct __attribute__ ((packed)) {
unsigned char l;
unsigned char h;
} w_char;
// two character arrays
struct replentry {
char * pattern;
char * pattern2;
bool start;
bool end;

/* Generated from by autoheader. */
/* Define to one of `_getb67', `GETB67', `getb67' for Cray-2 and Cray-YMP
systems. This function is required for `alloca.c' support on those systems.
/* Define to 1 if using `alloca.c'. */
#define C_ALLOCA 1
/* Define to 1 if translation of program messages to the user's native
language is requested. */
/* Define to 1 if you have `alloca', as a function or macro. */
#define HAVE_ALLOCA 1
/* Define to 1 if you have <alloca.h> and it should be used (not on Ultrix).
#define HAVE_ALLOCA_H 1
/* Define to 1 if you have the <argz.h> header file. */
#define HAVE_ARGZ_H 1
/* "Define if you have the <curses.h> header" */
/* Define if the GNU dcgettext() function is already present or preinstalled.
/* Define to 1 if you have the <dlfcn.h> header file. */
#define HAVE_DLFCN_H 1
/* Define to 1 if you have the <error.h> header file. */
#define HAVE_ERROR_H 1
/* Define to 1 if you have the <fcntl.h> header file. */
#define HAVE_FCNTL_H 1
/* Define to 1 if you have the `feof_unlocked' function. */
/* Define to 1 if you have the `fgets_unlocked' function. */
/* Define to 1 if you have the `getcwd' function. */
#define HAVE_GETCWD 1
/* Define to 1 if you have the `getc_unlocked' function. */
/* Define to 1 if you have the `getegid' function. */
#define HAVE_GETEGID 1
/* Define to 1 if you have the `geteuid' function. */
#define HAVE_GETEUID 1
/* Define to 1 if you have the `getgid' function. */
#define HAVE_GETGID 1
/* Define to 1 if you have the `getpagesize' function. */
/* Define if the GNU gettext() function is already present or preinstalled. */
#define HAVE_GETTEXT 1
/* Define to 1 if you have the `getuid' function. */
#define HAVE_GETUID 1
/* Define if you have the iconv() function. */
/* Define to 1 if you have the <inttypes.h> header file. */
/* Define if you have <langinfo.h> and nl_langinfo(CODESET). */
/* Define if your <locale.h> file defines LC_MESSAGES. */
/* Define to 1 if you have the <libintl.h> header file. */
#define HAVE_LIBINTL_H 1
/* Define to 1 if you have the <limits.h> header file. */
#define HAVE_LIMITS_H 1
/* Define to 1 if you have the <locale.h> header file. */
#define HAVE_LOCALE_H 1
/* Define to 1 if you have the `memchr' function. */
#define HAVE_MEMCHR 1
/* Define to 1 if you have the <memory.h> header file. */
#define HAVE_MEMORY_H 1
/* Define to 1 if you have the `mempcpy' function. */
#define HAVE_MEMPCPY 1
/* Define to 1 if you have a working `mmap' system call. */
#define HAVE_MMAP 1
/* Define to 1 if you have the `munmap' function. */
#define HAVE_MUNMAP 1
/* "Define if you have the <ncursesw/curses.h> header" */
/* Define to 1 if you have the <nl_types.h> header file. */
#define HAVE_NL_TYPES_H 1
/* Define to 1 if you have the `putenv' function. */
#define HAVE_PUTENV 1
/* "Define if you have fancy command input editing with Readline" */
/* Define to 1 if you have the `setenv' function. */
#define HAVE_SETENV 1
/* Define to 1 if you have the `setlocale' function. */
/* Define to 1 if you have the <stddef.h> header file. */
#define HAVE_STDDEF_H 1
/* Define to 1 if you have the <stdint.h> header file. */
#define HAVE_STDINT_H 1
/* Define to 1 if you have the <stdlib.h> header file. */
#define HAVE_STDLIB_H 1
/* Define to 1 if you have the `stpcpy' function. */
#define HAVE_STPCPY 1
/* Define to 1 if you have the `strcasecmp' function. */
/* Define to 1 if you have the `strchr' function. */
#define HAVE_STRCHR 1
/* Define to 1 if you have the `strdup' function. */
#define HAVE_STRDUP 1
/* Define to 1 if you have the <strings.h> header file. */
#define HAVE_STRINGS_H 1
/* Define to 1 if you have the <string.h> header file. */
#define HAVE_STRING_H 1
/* Define to 1 if you have the `strstr' function. */
#define HAVE_STRSTR 1
/* Define to 1 if you have the `strtoul' function. */
#define HAVE_STRTOUL 1
/* Define to 1 if you have the <sys/param.h> header file. */
#define HAVE_SYS_PARAM_H 1
/* Define to 1 if you have the <sys/stat.h> header file. */
#define HAVE_SYS_STAT_H 1
/* Define to 1 if you have the <sys/types.h> header file. */
#define HAVE_SYS_TYPES_H 1
/* Define to 1 if you have the `tsearch' function. */
#define HAVE_TSEARCH 1
/* Define to 1 if you have the <unistd.h> header file. */
#define HAVE_UNISTD_H 1
/* Define to 1 if you have the `__argz_count' function. */
#define HAVE___ARGZ_COUNT 1
/* Define to 1 if you have the `__argz_next' function. */
#define HAVE___ARGZ_NEXT 1
/* Define to 1 if you have the `__argz_stringify' function. */
/* "Define if you use exterimental functions" */
/* "Define if you need warning messages" */
/* Define as const if the declaration of iconv() needs const. */
#define ICONV_CONST 1
/* Name of package */
#define PACKAGE
/* Define to the address where bug reports for this package should be sent. */
/* Define to the full name of this package. */
/* Define to the full name and version of this package. */
/* Define to the one symbol short name of this package. */
/* Define to the version of this package. */
#define PACKAGE_VERSION "1.3.3"
#define VERSION "1.3.3"

project('Hunspell', 'cpp', version: '1.7.0', meson_version: '>=0.57.0')
inc = include_directories('src')
hunspell_dep = declare_dependency(link_with: hunspell,
include_directories: inc,
compile_args: compile_args)

View file

@ -0,0 +1,23 @@
hunspell_sources = files([
if get_option('default_library') == 'static'
cpp_args = []
compile_args = ['-DHUNSPELL_STATIC']
compile_args = []
hunspell = library('hunspell', hunspell_sources,
cpp_args: cpp_args + compile_args)