#include #include #include #include #include "../hunspell/csutil.hxx" #include "htmlparser.hxx" #ifndef W32 using namespace std; #endif enum { ST_NON_WORD, ST_WORD, ST_TAG, ST_CHAR_ENTITY, ST_OTHER_TAG, ST_ATTRIB }; static char * PATTERN[][2] = { { "" }, { "" }, { "" }, { "" }, { "" }, { "" }, { "" }, { "" }, { "" }, { "" }, { "<[cdata[", "]]>" }, // XML comment { "<", ">" } }; #define PATTERN_LEN (sizeof(PATTERN) / (sizeof(char *) * 2)) static char * PATTERN2[][2] = { { " 0) && (line[actual][head] == '>')) { state = ST_NON_WORD; } else if (((i = look_pattern(PATTERN, PATTERN_LEN, 1)) != -1) && (strcmp(PATTERN[i][1],PATTERN[pattern_num][1]) == 0)) { state = ST_NON_WORD; head += strlen(PATTERN[pattern_num][1]) - 1; } else if ( (strcmp(PATTERN[pattern_num][0], "<") == 0) && ((line[actual][head] == '"') || (line[actual][head] == '\''))) { quotmark = line[actual][head]; state = ST_ATTRIB; } break; case ST_ATTRIB: // non word chars prevstate = ST_ATTRIB; if (line[actual][head] == quotmark) { state = ST_TAG; if (checkattr == 2) checkattr = 1; // for IMG ALT } else if (is_wordchar(line[actual] + head) && (checkattr == 2)) { state = ST_WORD; token = head; } else if (line[actual][head] == '&') { state = ST_CHAR_ENTITY; } break; case ST_CHAR_ENTITY: // SGML element if ((tolower(line[actual][head]) < 'a') || (tolower(line[actual][head]) > 'z')) { state = prevstate; head--; } } if (next_char(line[actual], &head)) return NULL; } }