Aegisub/aegisub/src/text_file_reader.cpp

// Copyright (c) 2005, Rodrigo Braz Monteiro
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
//   * Redistributions of source code must retain the above copyright notice,
//     this list of conditions and the following disclaimer.
//   * Redistributions in binary form must reproduce the above copyright notice,
//     this list of conditions and the following disclaimer in the documentation
//     and/or other materials provided with the distribution.
//   * Neither the name of the Aegisub Group nor the names of its contributors
//     may be used to endorse or promote products derived from this software
//     without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.
//
// -----------------------------------------------------------------------------
//
// AEGISUB
//
// Website: http://aegisub.cellosoft.com
// Contact: mailto:zeratul@cellosoft.com
//

#include "config.h"

#include <fstream>
#include <algorithm>
#include <string>
#include <assert.h>
#include <errno.h>
#include "text_file_reader.h"
#include "charset_conv.h"


#ifdef WITH_UNIVCHARDET
#include "charset_detect.h"
#endif

TextFileReader::TextFileReader(wxString filename, wxString enc, bool trim)
: encoding(enc), conv((iconv_t)-1), trim(trim), readComplete(false), currout(0), outptr(0), currentLine(0) {
#ifdef __WINDOWS__
	file.open(filename.wc_str(),std::ios::in | std::ios::binary);
#else
	file.open(wxFNCONV(filename),std::ios::in | std::ios::binary);
#endif
	if (!file.is_open()) {
		throw _T("Failed opening file for reading.");
	}

	if (encoding.IsEmpty()) encoding = GetEncoding(filename);
	if (encoding == _T("binary")) return;
	encoding = AegisubCSConv::GetRealEncodingName(encoding);
	conv = iconv_open(WCHAR_T_ENCODING, encoding.ToAscii());
}

TextFileReader::~TextFileReader() {
	if (conv != (iconv_t)-1) iconv_close(conv);
}

wxString TextFileReader::GetEncoding(const wxString filename) {
	// Prepare
	unsigned char b[4];
	memset(b, 0, sizeof(b));

	// Read four bytes from file
	std::ifstream ifile;
#ifdef __WINDOWS__
	ifile.open(filename.wc_str());
#else
	ifile.open(wxFNCONV(filename));
#endif
	if (!ifile.is_open()) {
		return _T("unknown");
	}
	ifile.read(reinterpret_cast<char *>(b),4);
	ifile.close();

	// Try to get the byte order mark from them
	if (b[0] == 0xEF && b[1] == 0xBB && b[2] == 0xBF) return _T("UTF-8");
	else if (b[0] == 0xFF && b[1] == 0xFE && b[2] == 0x00 && b[3] == 0x00) return _T("UTF-32LE");
	else if (b[0] == 0x00 && b[1] == 0x00 && b[2] == 0xFE && b[3] == 0xFF) return _T("UTF-32BE");
	else if (b[0] == 0xFF && b[1] == 0xFE) return _T("UTF-16LE");
	else if (b[0] == 0xFE && b[1] == 0xFF) return _T("UTF-16BE");
	else if (b[0] == 0x2B && b[1] == 0x2F && b[2] == 0x76) return _T("UTF-7");

	// Try to guess UTF-16
	else if (b[0] == 0 && b[1] >= 32 && b[2] == 0 && b[3] >= 32) return _T("UTF-16BE");
	else if (b[0] >= 32 && b[1] == 0 && b[2] >= 32 && b[3] == 0) return _T("UTF-16LE");

	// If any of the first four bytes are under 0x20 (the first printable character),
	// except for 9-13 range, assume binary
	for (int i=0;i<4;i++) {
		if (b[i] < 9 || (b[i] > 13 && b[i] < 32)) return _T("binary");
	}

#ifdef WITH_UNIVCHARDET
	// Use universalchardet library to detect charset
	CharSetDetect det;
	return det.GetEncoding(filename);
#else
	// Fall back to local
	return _T("Local");
#endif
}

wchar_t TextFileReader::GetWChar() {
	// If there's already some converted characters waiting, return the next one
	if (++currout < outptr) {
		return *currout;
	}

	if (file.eof()) return 0;

	// Otherwise convert another block
	char    inbuf[64];
	char    *inptr = inbuf;
	size_t  inbytesleft = sizeof(inbuf) - 4;
	int     bytesAdded = 0;
	memset(inbuf, 0, inbytesleft);

	outptr       = outbuf;
	outbytesleft = sizeof(outbuf);
	currout      = outbuf;

	file.read(inbuf, inbytesleft);
	inbytesleft = file.gcount();
	if (inbytesleft == 0)
		return 0;

	do {
		size_t ret = iconv(conv, &inptr, &inbytesleft, reinterpret_cast<char **>(&outptr), &outbytesleft);
		if (ret != (size_t)-1) break;

		int err = errno;
		// If 64 chars do not fit into 256 wchar_ts the environment is so bizzare that doing
		// anything is probably futile
		assert(err != E2BIG);

		// (Hopefully) the edge of the buffer happened to split a multibyte character, so keep
		// adding one byte to the input buffer until either it succeeds or we add enough bytes to
		// complete any character
		if (++bytesAdded > 3)
			throw wxString::Format(_T("Invalid input character found near line %u"), currentLine);

		file.read(inptr + inbytesleft, 1);
		inbytesleft++;
	} while (!file.eof() && file.gcount());

	if (outptr > outbuf)
		return *currout;

	throw wxString::Format(_T("Invalid input character found near line %u"), currentLine);
}

wxString TextFileReader::ReadLineFromFile() {
	wxString buffer;
	size_t bufAlloc = 1024;
	buffer.Alloc(bufAlloc);

	currentLine++;
	// Read a line
	wchar_t ch;
	size_t len = 0;
	for (ch = GetWChar(); ch != L'\n' && ch != 0; ch = GetWChar()) {
		if (ch == L'\r') continue;
		// Skip the BOM -- we don't need it as the encoding is already known
		// and it sometimes causes conversion problems
		if (ch == 0xFEFF && len == 0) continue;

		if (len >= bufAlloc - 1) {
			bufAlloc *= 2;
			buffer.Alloc(bufAlloc);
		}
		buffer += ch;
		len++;
	}
	if (ch == 0)
		readComplete = true;

	// Trim
	if (trim) {
		buffer.Trim(true);
		buffer.Trim(false);
	}
	return buffer;
}

bool TextFileReader::HasMoreLines() {
	return !readComplete;
}

void TextFileReader::EnsureValid(wxString enc) {
	if (enc == _T("binary")) return;

	enc = AegisubCSConv::GetRealEncodingName(enc);
	iconv_t cd = iconv_open(WCHAR_T_ENCODING, enc.ToAscii());
	bool canOpen = cd != (iconv_t)-1;
	iconv_close(cd);
	if (!canOpen) {
		throw wxString::Format(_T("Character set %s is not supported."), enc.c_str());
	}
}

wxString TextFileReader::GetCurrentEncoding() {
	return encoding;
}
Originally committed to SVN as r2. 2006-01-16 22:02:54 +01:00			`// Copyright (c) 2005, Rodrigo Braz Monteiro`
			`// All rights reserved.`
			`//`
			`// Redistribution and use in source and binary forms, with or without`
			`// modification, are permitted provided that the following conditions are met:`
			`//`
			`// * Redistributions of source code must retain the above copyright notice,`
			`// this list of conditions and the following disclaimer.`
			`// * Redistributions in binary form must reproduce the above copyright notice,`
			`// this list of conditions and the following disclaimer in the documentation`
			`// and/or other materials provided with the distribution.`
			`// * Neither the name of the Aegisub Group nor the names of its contributors`
			`// may be used to endorse or promote products derived from this software`
			`// without specific prior written permission.`
			`//`
			`// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"`
			`// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE`
			`// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE`
			`// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE`
			`// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR`
			`// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF`
			`// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS`
			`// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN`
			`// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)`
			`// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE`
			`// POSSIBILITY OF SUCH DAMAGE.`
			`//`
			`// -----------------------------------------------------------------------------`
			`//`
			`// AEGISUB`
			`//`
			`// Website: http://aegisub.cellosoft.com`
			`// Contact: mailto:zeratul@cellosoft.com`
			`//`

Change how configuration works. * move win32/config0.h -> config/config_windows0.h * move win32/stdint.h -> msvc/stdint.h * move posix/defines.h -> config/config_unix.h * add config.h - brings in config_(windows\|unix).h as required * add config.h to .cpp Self-contain FFmpegSource2 with required function defines wrapped with __UNIX__ instead of the now removed defines.h. * Edit aegisub_vs2008.vcproj to remove explicit inclusion of config.h, also change VCPreBuildEventTool to create windows_config.h from windows_config0.h. Add msvc to include path. * Change configure to create ./acconf.h instead of posix/acconf.h This will allow us to create a more standard and platform agnostic way of configuration aegsisub during build time. Originally committed to SVN as r2621. 2009-01-04 07:31:48 +01:00			`#include "config.h"`

Originally committed to SVN as r2. 2006-01-16 22:02:54 +01:00			`#include <fstream>`
			`#include <algorithm>`
			`#include <string>`
Replaced most wx-based charset conversions with a custom iconv-based conversion. Closes #639, #666, #837, #849 and #877. Originally committed to SVN as r3137. 2009-07-14 23:28:49 +02:00			`#include <assert.h>`
A few minor cleanups to the new charset conversion code. Originally committed to SVN as r3159. 2009-07-18 02:58:13 +02:00			`#include <errno.h>`
Originally committed to SVN as r2. 2006-01-16 22:02:54 +01:00			`#include "text_file_reader.h"`
A few minor cleanups to the new charset conversion code. Originally committed to SVN as r3159. 2009-07-18 02:58:13 +02:00			`#include "charset_conv.h"`

Added shb's incomplete perl support code (doesn't build on VC++ yet) Originally committed to SVN as r1741. 2008-01-16 19:29:29 +01:00
Removed dependency from universal charset detector (that was the last one) Originally committed to SVN as r1662. 2008-01-01 23:42:29 +01:00			`#ifdef WITH_UNIVCHARDET`
Instead of falling back to your local charset, Aegisub will now use the "universalchardet" library (the one used by the Mozilla project) to autodetect the character set of non-Unicode files. Originally committed to SVN as r1028. 2007-04-08 08:01:41 +02:00			`#include "charset_detect.h"`
Disabled autodetection on non-windows for now. Originally committed to SVN as r1029. 2007-04-08 08:10:52 +02:00			`#endif`
Originally committed to SVN as r2. 2006-01-16 22:02:54 +01:00
Replaced most wx-based charset conversions with a custom iconv-based conversion. Closes #639, #666, #837, #849 and #877. Originally committed to SVN as r3137. 2009-07-14 23:28:49 +02:00			`TextFileReader::TextFileReader(wxString filename, wxString enc, bool trim)`
			`: encoding(enc), conv((iconv_t)-1), trim(trim), readComplete(false), currout(0), outptr(0), currentLine(0) {`
			`#ifdef __WINDOWS__`
			`file.open(filename.wc_str(),std::ios::in \| std::ios::binary);`
			`#else`
			`file.open(wxFNCONV(filename),std::ios::in \| std::ios::binary);`
			`#endif`
			`if (!file.is_open()) {`
			`throw _T("Failed opening file for reading.");`
			`}`
Originally committed to SVN as r2. 2006-01-16 22:02:54 +01:00
Implemented redo, and replace == _T("") with .IsEmpty() Originally committed to SVN as r80. 2006-02-20 22:32:58 +01:00			`if (encoding.IsEmpty()) encoding = GetEncoding(filename);`
Fixed file backup AGAIN... Originally committed to SVN as r1279. 2007-06-21 06:11:24 +02:00			`if (encoding == _T("binary")) return;`
Replaced most wx-based charset conversions with a custom iconv-based conversion. Closes #639, #666, #837, #849 and #877. Originally committed to SVN as r3137. 2009-07-14 23:28:49 +02:00			`encoding = AegisubCSConv::GetRealEncodingName(encoding);`
			`conv = iconv_open(WCHAR_T_ENCODING, encoding.ToAscii());`
Originally committed to SVN as r2. 2006-01-16 22:02:54 +01:00			`}`

			`TextFileReader::~TextFileReader() {`
Replaced most wx-based charset conversions with a custom iconv-based conversion. Closes #639, #666, #837, #849 and #877. Originally committed to SVN as r3137. 2009-07-14 23:28:49 +02:00			`if (conv != (iconv_t)-1) iconv_close(conv);`
Originally committed to SVN as r2. 2006-01-16 22:02:54 +01:00			`}`

A few minor cleanups to the new charset conversion code. Originally committed to SVN as r3159. 2009-07-18 02:58:13 +02:00			`wxString TextFileReader::GetEncoding(const wxString filename) {`
Originally committed to SVN as r2. 2006-01-16 22:02:54 +01:00			`// Prepare`
			`unsigned char b[4];`
Replaced most wx-based charset conversions with a custom iconv-based conversion. Closes #639, #666, #837, #849 and #877. Originally committed to SVN as r3137. 2009-07-14 23:28:49 +02:00			`memset(b, 0, sizeof(b));`
Originally committed to SVN as r2. 2006-01-16 22:02:54 +01:00
			`// Read four bytes from file`
Replaced most wx-based charset conversions with a custom iconv-based conversion. Closes #639, #666, #837, #849 and #877. Originally committed to SVN as r3137. 2009-07-14 23:28:49 +02:00			`std::ifstream ifile;`
s/WIN32/__WINDOWS__/ to stop a define collision with universalchardet. This is a temp solution as the real fix is sorting out universalchardet to build correctly. Originally committed to SVN as r2087. 2008-03-21 03:41:46 +01:00			`#ifdef __WINDOWS__`
A few minor cleanups to the new charset conversion code. Originally committed to SVN as r3159. 2009-07-18 02:58:13 +02:00			`ifile.open(filename.wc_str());`
Some fixes for SVN wx compatibility (that doesn't work, anyway) Originally committed to SVN as r1472. 2007-08-07 22:45:41 +02:00			`#else`
A few minor cleanups to the new charset conversion code. Originally committed to SVN as r3159. 2009-07-18 02:58:13 +02:00			`ifile.open(wxFNCONV(filename));`
Some fixes for SVN wx compatibility (that doesn't work, anyway) Originally committed to SVN as r1472. 2007-08-07 22:45:41 +02:00			`#endif`
Originally committed to SVN as r2. 2006-01-16 22:02:54 +01:00			`if (!ifile.is_open()) {`
			`return _T("unknown");`
			`}`
Replaced most wx-based charset conversions with a custom iconv-based conversion. Closes #639, #666, #837, #849 and #877. Originally committed to SVN as r3137. 2009-07-14 23:28:49 +02:00			`ifile.read(reinterpret_cast<char *>(b),4);`
Originally committed to SVN as r2. 2006-01-16 22:02:54 +01:00			`ifile.close();`

			`// Try to get the byte order mark from them`
			`if (b[0] == 0xEF && b[1] == 0xBB && b[2] == 0xBF) return _T("UTF-8");`
			`else if (b[0] == 0xFF && b[1] == 0xFE && b[2] == 0x00 && b[3] == 0x00) return _T("UTF-32LE");`
			`else if (b[0] == 0x00 && b[1] == 0x00 && b[2] == 0xFE && b[3] == 0xFF) return _T("UTF-32BE");`
			`else if (b[0] == 0xFF && b[1] == 0xFE) return _T("UTF-16LE");`
			`else if (b[0] == 0xFE && b[1] == 0xFF) return _T("UTF-16BE");`
			`else if (b[0] == 0x2B && b[1] == 0x2F && b[2] == 0x76) return _T("UTF-7");`

			`// Try to guess UTF-16`
Fixed file backup AGAIN... Originally committed to SVN as r1279. 2007-06-21 06:11:24 +02:00			`else if (b[0] == 0 && b[1] >= 32 && b[2] == 0 && b[3] >= 32) return _T("UTF-16BE");`
			`else if (b[0] >= 32 && b[1] == 0 && b[2] >= 32 && b[3] == 0) return _T("UTF-16LE");`
Originally committed to SVN as r2. 2006-01-16 22:02:54 +01:00
Made text file reader use iostream on all platforms... this will probably need some testing, though, so old code is only #ifdefed out. Originally committed to SVN as r1069. 2007-04-13 04:05:38 +02:00			`// If any of the first four bytes are under 0x20 (the first printable character),`
			`// except for 9-13 range, assume binary`
			`for (int i=0;i<4;i++) {`
			`if (b[i] < 9 \|\| (b[i] > 13 && b[i] < 32)) return _T("binary");`
			`}`

Replaced most wx-based charset conversions with a custom iconv-based conversion. Closes #639, #666, #837, #849 and #877. Originally committed to SVN as r3137. 2009-07-14 23:28:49 +02:00			`#ifdef WITH_UNIVCHARDET`
Instead of falling back to your local charset, Aegisub will now use the "universalchardet" library (the one used by the Mozilla project) to autodetect the character set of non-Unicode files. Originally committed to SVN as r1028. 2007-04-08 08:01:41 +02:00			`// Use universalchardet library to detect charset`
			`CharSetDetect det;`
A few minor cleanups to the new charset conversion code. Originally committed to SVN as r3159. 2009-07-18 02:58:13 +02:00			`return det.GetEncoding(filename);`
Replaced most wx-based charset conversions with a custom iconv-based conversion. Closes #639, #666, #837, #849 and #877. Originally committed to SVN as r3137. 2009-07-14 23:28:49 +02:00			`#else`
Disabled autodetection on non-windows for now. Originally committed to SVN as r1029. 2007-04-08 08:10:52 +02:00			`// Fall back to local`
			`return _T("Local");`
Replaced most wx-based charset conversions with a custom iconv-based conversion. Closes #639, #666, #837, #849 and #877. Originally committed to SVN as r3137. 2009-07-14 23:28:49 +02:00			`#endif`
Originally committed to SVN as r2. 2006-01-16 22:02:54 +01:00			`}`

Replaced most wx-based charset conversions with a custom iconv-based conversion. Closes #639, #666, #837, #849 and #877. Originally committed to SVN as r3137. 2009-07-14 23:28:49 +02:00			`wchar_t TextFileReader::GetWChar() {`
			`// If there's already some converted characters waiting, return the next one`
			`if (++currout < outptr) {`
			`return *currout;`
Originally committed to SVN as r2. 2006-01-16 22:02:54 +01:00			`}`

Replaced most wx-based charset conversions with a custom iconv-based conversion. Closes #639, #666, #837, #849 and #877. Originally committed to SVN as r3137. 2009-07-14 23:28:49 +02:00			`if (file.eof()) return 0;`
Originally committed to SVN as r2. 2006-01-16 22:02:54 +01:00
Replaced most wx-based charset conversions with a custom iconv-based conversion. Closes #639, #666, #837, #849 and #877. Originally committed to SVN as r3137. 2009-07-14 23:28:49 +02:00			`// Otherwise convert another block`
			`char inbuf[64];`
			`char *inptr = inbuf;`
			`size_t inbytesleft = sizeof(inbuf) - 4;`
			`int bytesAdded = 0;`
			`memset(inbuf, 0, inbytesleft);`
Originally committed to SVN as r2. 2006-01-16 22:02:54 +01:00
Replaced most wx-based charset conversions with a custom iconv-based conversion. Closes #639, #666, #837, #849 and #877. Originally committed to SVN as r3137. 2009-07-14 23:28:49 +02:00			`outptr = outbuf;`
			`outbytesleft = sizeof(outbuf);`
			`currout = outbuf;`
Originally committed to SVN as r2. 2006-01-16 22:02:54 +01:00
Replaced most wx-based charset conversions with a custom iconv-based conversion. Closes #639, #666, #837, #849 and #877. Originally committed to SVN as r3137. 2009-07-14 23:28:49 +02:00			`file.read(inbuf, inbytesleft);`
			`inbytesleft = file.gcount();`
Fix a regression introduced by Plorkyeran's iconv patch that caused the text file reader to throw an exception when trying to read an empty file. Originally committed to SVN as r3146. 2009-07-16 17:10:40 +02:00			`if (inbytesleft == 0)`
			`return 0;`
Originally committed to SVN as r2. 2006-01-16 22:02:54 +01:00
Replaced most wx-based charset conversions with a custom iconv-based conversion. Closes #639, #666, #837, #849 and #877. Originally committed to SVN as r3137. 2009-07-14 23:28:49 +02:00			`do {`
			`size_t ret = iconv(conv, &inptr, &inbytesleft, reinterpret_cast<char **>(&outptr), &outbytesleft);`
			`if (ret != (size_t)-1) break;`
Originally committed to SVN as r2. 2006-01-16 22:02:54 +01:00
Replaced most wx-based charset conversions with a custom iconv-based conversion. Closes #639, #666, #837, #849 and #877. Originally committed to SVN as r3137. 2009-07-14 23:28:49 +02:00			`int err = errno;`
			`// If 64 chars do not fit into 256 wchar_ts the environment is so bizzare that doing`
			`// anything is probably futile`
			`assert(err != E2BIG);`
Originally committed to SVN as r2. 2006-01-16 22:02:54 +01:00
Replaced most wx-based charset conversions with a custom iconv-based conversion. Closes #639, #666, #837, #849 and #877. Originally committed to SVN as r3137. 2009-07-14 23:28:49 +02:00			`// (Hopefully) the edge of the buffer happened to split a multibyte character, so keep`
			`// adding one byte to the input buffer until either it succeeds or we add enough bytes to`
			`// complete any character`
			`if (++bytesAdded > 3)`
			`throw wxString::Format(_T("Invalid input character found near line %u"), currentLine);`
Originally committed to SVN as r2. 2006-01-16 22:02:54 +01:00
Replaced most wx-based charset conversions with a custom iconv-based conversion. Closes #639, #666, #837, #849 and #877. Originally committed to SVN as r3137. 2009-07-14 23:28:49 +02:00			`file.read(inptr + inbytesleft, 1);`
			`inbytesleft++;`
A few minor cleanups to the new charset conversion code. Originally committed to SVN as r3159. 2009-07-18 02:58:13 +02:00			`} while (!file.eof() && file.gcount());`
Originally committed to SVN as r2. 2006-01-16 22:02:54 +01:00
Replaced most wx-based charset conversions with a custom iconv-based conversion. Closes #639, #666, #837, #849 and #877. Originally committed to SVN as r3137. 2009-07-14 23:28:49 +02:00			`if (outptr > outbuf)`
			`return *currout;`
Originally committed to SVN as r2. 2006-01-16 22:02:54 +01:00
Replaced most wx-based charset conversions with a custom iconv-based conversion. Closes #639, #666, #837, #849 and #877. Originally committed to SVN as r3137. 2009-07-14 23:28:49 +02:00			`throw wxString::Format(_T("Invalid input character found near line %u"), currentLine);`
Originally committed to SVN as r2. 2006-01-16 22:02:54 +01:00			`}`

Replaced most wx-based charset conversions with a custom iconv-based conversion. Closes #639, #666, #837, #849 and #877. Originally committed to SVN as r3137. 2009-07-14 23:28:49 +02:00			`wxString TextFileReader::ReadLineFromFile() {`
			`wxString buffer;`
			`size_t bufAlloc = 1024;`
			`buffer.Alloc(bufAlloc);`

			`currentLine++;`
			`// Read a line`
			`wchar_t ch;`
			`size_t len = 0;`
			`for (ch = GetWChar(); ch != L'\n' && ch != 0; ch = GetWChar()) {`
			`if (ch == L'\r') continue;`
A few minor cleanups to the new charset conversion code. Originally committed to SVN as r3159. 2009-07-18 02:58:13 +02:00			`// Skip the BOM -- we don't need it as the encoding is already known`
			`// and it sometimes causes conversion problems`
Replaced most wx-based charset conversions with a custom iconv-based conversion. Closes #639, #666, #837, #849 and #877. Originally committed to SVN as r3137. 2009-07-14 23:28:49 +02:00			`if (ch == 0xFEFF && len == 0) continue;`

			`if (len >= bufAlloc - 1) {`
			`bufAlloc *= 2;`
			`buffer.Alloc(bufAlloc);`
			`}`
			`buffer += ch;`
			`len++;`
			`}`
			`if (ch == 0)`
			`readComplete = true;`
Originally committed to SVN as r2. 2006-01-16 22:02:54 +01:00
Replaced most wx-based charset conversions with a custom iconv-based conversion. Closes #639, #666, #837, #849 and #877. Originally committed to SVN as r3137. 2009-07-14 23:28:49 +02:00			`// Trim`
			`if (trim) {`
			`buffer.Trim(true);`
			`buffer.Trim(false);`
			`}`
			`return buffer;`
Originally committed to SVN as r2. 2006-01-16 22:02:54 +01:00			`}`

			`bool TextFileReader::HasMoreLines() {`
Replaced most wx-based charset conversions with a custom iconv-based conversion. Closes #639, #666, #837, #849 and #877. Originally committed to SVN as r3137. 2009-07-14 23:28:49 +02:00			`return !readComplete;`
Originally committed to SVN as r2. 2006-01-16 22:02:54 +01:00			`}`

			`void TextFileReader::EnsureValid(wxString enc) {`
Replaced most wx-based charset conversions with a custom iconv-based conversion. Closes #639, #666, #837, #849 and #877. Originally committed to SVN as r3137. 2009-07-14 23:28:49 +02:00			`if (enc == _T("binary")) return;`

			`enc = AegisubCSConv::GetRealEncodingName(enc);`
			`iconv_t cd = iconv_open(WCHAR_T_ENCODING, enc.ToAscii());`
			`bool canOpen = cd != (iconv_t)-1;`
			`iconv_close(cd);`
			`if (!canOpen) {`
			`throw wxString::Format(_T("Character set %s is not supported."), enc.c_str());`
Originally committed to SVN as r2. 2006-01-16 22:02:54 +01:00			`}`
			`}`
Instead of falling back to your local charset, Aegisub will now use the "universalchardet" library (the one used by the Mozilla project) to autodetect the character set of non-Unicode files. Originally committed to SVN as r1028. 2007-04-08 08:01:41 +02:00
			`wxString TextFileReader::GetCurrentEncoding() {`
			`return encoding;`
			`}`