From 8e13869510312ae4ccf6baaf3e5a2c3c4250a953 Mon Sep 17 00:00:00 2001 From: Niels Martin Hansen Date: Tue, 16 Jun 2009 23:29:56 +0000 Subject: [PATCH] Got all the Unicode codepoints listed in Plorkyeran's patch and listed them in an array, to avoid encoding the source file as UTF-8 or UTF-16. Updates #710, seems to fix it but someone please confirm. Originally committed to SVN as r3067. --- aegisub/src/utils.cpp | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/aegisub/src/utils.cpp b/aegisub/src/utils.cpp index 1e0be4d41..9e193a674 100644 --- a/aegisub/src/utils.cpp +++ b/aegisub/src/utils.cpp @@ -231,11 +231,24 @@ void GetWordBoundaries(const wxString text,IntPairVector &results,int start,int bool isDelim; // Delimiters - wxString delim(_T(" .,;:!?-(){}[]\"\\/")); - wxChar temp = 0xBF; - delim += temp; - temp = 0xA1; - delim += temp; + const wchar_t delim_chars[] = { + 0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0028, + 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f, 0x003a, + 0x003b, 0x003d, 0x003f, 0x0040, 0x005b, 0x005c, 0x005d, 0x005e, + 0x005f, 0x0060, 0x007b, 0x007c, 0x007d, 0x007e, 0x00a1, 0x00a2, + 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7, 0x00a8, 0x00aa, 0x00ab, + 0x00b0, 0x00b6, 0x00b7, 0x00ba, 0x00bb, 0x00bf, 0x02dc, 0x0e3f, + 0x2010, 0x2013, 0x2014, 0x2015, 0x2018, 0x2019, 0x201c, 0x201d, + 0x2020, 0x2021, 0x2022, 0x2025, 0x2026, 0x2026, 0x2030, 0x2031, + 0x2032, 0x203b, 0x203b, 0x203d, 0x2042, 0x2044, 0x20a6, 0x20a9, + 0x20aa, 0x20ac, 0x20ad, 0x2116, 0x2234, 0x2235, 0x2420, 0x2422, + 0x2423, 0x2506, 0x25ca, 0x2605, 0x261e, 0x2e2e, 0x3000, 0x3001, + 0x3002, 0x3008, 0x3009, 0x300a, 0x300b, 0x300c, 0x300d, 0x300e, + 0x300f, 0x3010, 0x3011, 0x3014, 0x3015, 0x3016, 0x3017, 0x3018, + 0x3019, 0x301a, 0x301b, 0x301c, 0x3030, 0x303d, 0x30fb, 0xff0a, + 0xff5b, 0xff5d, 0xff5e, 0 + }; + wxString delim(delim_chars); // Scan for (int i=start;i