forked from mia/Aegisub
391219ea54
1. svn mv assdraw csri hunspell lua51 contrib * See r2749 for full description. Originally committed to SVN as r2754.
69 lines
1.8 KiB
C++
69 lines
1.8 KiB
C++
/*
|
|
* parser classes for MySpell
|
|
*
|
|
* implemented: text, HTML, TeX
|
|
*
|
|
* Copyright (C) 2002, Laszlo Nemeth
|
|
*
|
|
*/
|
|
|
|
#ifndef _TEXTPARSER_HXX_
|
|
#define _TEXTPARSER_HXX_
|
|
|
|
// set sum of actual and previous lines
|
|
#define MAXPREVLINE 4
|
|
|
|
#ifndef MAXLNLEN
|
|
#define MAXLNLEN 8192
|
|
#endif
|
|
|
|
/*
|
|
* Base Text Parser
|
|
*
|
|
*/
|
|
|
|
class TextParser
|
|
{
|
|
|
|
protected:
|
|
void init(const char *);
|
|
void init(unsigned short * wordchars, int len);
|
|
int wordcharacters[256]; // for detection of the word boundaries
|
|
char line[MAXPREVLINE][MAXLNLEN]; // parsed and previous lines
|
|
char urlline[MAXLNLEN]; // mask for url detection
|
|
int checkurl;
|
|
int actual; // actual line
|
|
int head; // head position
|
|
int token; // begin of token
|
|
int state; // state of automata
|
|
int utf8; // UTF-8 character encoding
|
|
int next_char(char * line, int * pos);
|
|
unsigned short * wordchars_utf16;
|
|
int wclen;
|
|
|
|
public:
|
|
|
|
TextParser();
|
|
TextParser(unsigned short * wordchars, int len);
|
|
TextParser(const char * wc);
|
|
virtual ~TextParser();
|
|
|
|
void put_line(char * line);
|
|
char * get_line();
|
|
char * get_prevline(int n);
|
|
virtual char * next_token();
|
|
int change_token(const char * word);
|
|
void set_url_checking(int check);
|
|
|
|
int get_tokenpos();
|
|
int is_wordchar(char * w);
|
|
char * get_latin1(char * s);
|
|
char * next_char();
|
|
int tokenize_urls();
|
|
void check_urls();
|
|
int get_url(int token_pos, int * head);
|
|
char * alloc_token(int token, int * head);
|
|
};
|
|
|
|
#endif
|
|
|