Got all the Unicode codepoints listed in Plorkyeran's patch and listed them in an array, to avoid encoding the source file as UTF-8 or UTF-16. Updates #710, seems to fix it but someone please confirm.
Originally committed to SVN as r3067.
This commit is contained in:
parent
de5c5ea24b
commit
8e13869510
1 changed files with 18 additions and 5 deletions
|
@ -231,11 +231,24 @@ void GetWordBoundaries(const wxString text,IntPairVector &results,int start,int
|
|||
bool isDelim;
|
||||
|
||||
// Delimiters
|
||||
wxString delim(_T(" .,;:!?-(){}[]\"\\/"));
|
||||
wxChar temp = 0xBF;
|
||||
delim += temp;
|
||||
temp = 0xA1;
|
||||
delim += temp;
|
||||
const wchar_t delim_chars[] = {
|
||||
0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0028,
|
||||
0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f, 0x003a,
|
||||
0x003b, 0x003d, 0x003f, 0x0040, 0x005b, 0x005c, 0x005d, 0x005e,
|
||||
0x005f, 0x0060, 0x007b, 0x007c, 0x007d, 0x007e, 0x00a1, 0x00a2,
|
||||
0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7, 0x00a8, 0x00aa, 0x00ab,
|
||||
0x00b0, 0x00b6, 0x00b7, 0x00ba, 0x00bb, 0x00bf, 0x02dc, 0x0e3f,
|
||||
0x2010, 0x2013, 0x2014, 0x2015, 0x2018, 0x2019, 0x201c, 0x201d,
|
||||
0x2020, 0x2021, 0x2022, 0x2025, 0x2026, 0x2026, 0x2030, 0x2031,
|
||||
0x2032, 0x203b, 0x203b, 0x203d, 0x2042, 0x2044, 0x20a6, 0x20a9,
|
||||
0x20aa, 0x20ac, 0x20ad, 0x2116, 0x2234, 0x2235, 0x2420, 0x2422,
|
||||
0x2423, 0x2506, 0x25ca, 0x2605, 0x261e, 0x2e2e, 0x3000, 0x3001,
|
||||
0x3002, 0x3008, 0x3009, 0x300a, 0x300b, 0x300c, 0x300d, 0x300e,
|
||||
0x300f, 0x3010, 0x3011, 0x3014, 0x3015, 0x3016, 0x3017, 0x3018,
|
||||
0x3019, 0x301a, 0x301b, 0x301c, 0x3030, 0x303d, 0x30fb, 0xff0a,
|
||||
0xff5b, 0xff5d, 0xff5e, 0
|
||||
};
|
||||
wxString delim(delim_chars);
|
||||
|
||||
// Scan
|
||||
for (int i=start;i<end+1;i++) {
|
||||
|
|
Loading…
Reference in a new issue