Add some documentation for AegisubCSConv and TextFileReader.

Originally committed to SVN as r4036.
2010-01-24 18:56:51 +00:00 · 2010-01-24 18:56:51 +00:00 · ba088237d7
commit ba088237d7
parent c7d95e5590
5 changed files with 155 additions and 298 deletions
--- a/aegisub/src/ass_file.cpp
+++ b/aegisub/src/ass_file.cpp
@ -103,7 +103,6 @@ void AssFile::Load (const wxString _filename,const wxString charset,bool addToRe
 		wxString enc;
 		if (charset.IsEmpty()) enc = TextFileReader::GetEncoding(_filename);
 		else enc = charset;
 		TextFileReader::EnsureValid(enc);
 		// Generic preparation
 		Clear();
--- a/aegisub/src/charset_conv.cpp
+++ b/aegisub/src/charset_conv.cpp
@ -49,52 +49,27 @@
 WX_DECLARE_STRING_HASH_MAP(wxString, PrettyNamesHash);
 #if wxUSE_THREADS
 /// DOCME
 static wxMutex encodingListMutex;
 #endif
 /// DOCME
 static const iconv_t iconv_invalid = (iconv_t)-1;
 /// DOCME
 static const size_t  iconv_failed  = (size_t)-1;
 /// DOCME
 #define ICONV_CONST_CAST(a) const_cast<ICONV_CONST char *>(a)
 #ifndef ICONV_POSIX
 static int addEncoding(unsigned int namescount, const char * const * names, void* data);
 #endif
 /// DOCME
 static wxArrayString   *supportedEncodings = NULL;
 /// DOCME
 static wxArrayString   *prettyEncodingList = NULL;
 /// DOCME
 static PrettyNamesHash *prettyEncodingHash = NULL;
 /// @brief DOCME
 /// @param mbEncName   
 /// @param enableSubst 
 ///
 AegisubCSConv::AegisubCSConv(const wxChar *mbEncName, bool enableSubst)
-:	mbCharsetName(GetRealEncodingName(mbEncName)), mbNulLen(0), enableSubst(enableSubst)
+: wcCharsetName(WCHAR_T_ENCODING)
 , mbCharsetName(GetRealEncodingName(mbEncName))
 , mbNulLen(0)
 , enableSubst(enableSubst)
 , m2w(wcCharsetName, mbCharsetName)
 , w2m(mbCharsetName, wcCharsetName)
 {
 	wcCharsetName = wxString::FromAscii(WCHAR_T_ENCODING);
 	m2w = iconv_open(wcCharsetName.ToAscii(), mbCharsetName.ToAscii());
 	w2m = iconv_open(mbCharsetName.ToAscii(), wcCharsetName.ToAscii());
 	if (m2w == iconv_invalid || w2m == iconv_invalid) {
-		if (m2w != iconv_invalid) iconv_close(m2w);
+		throw wxString::Format(L"Character set %s is not supported.", mbEncName);
 		if (w2m != iconv_invalid) iconv_close(w2m);
 		throw wxString::Format(_T("Character set %s is not supported."), mbEncName);
 	}
 	if (enableSubst) {
@ -110,26 +85,14 @@ AegisubCSConv::AegisubCSConv(const wxChar *mbEncName, bool enableSubst)
 	}
 }
 /// @brief DOCME
 ///
 AegisubCSConv::~AegisubCSConv() {
 	if (m2w != iconv_invalid) iconv_close(m2w);
 	if (w2m != iconv_invalid) iconv_close(w2m);
 }
 /// @brief DOCME
 /// @return 
 ///
 wxMBConv * AegisubCSConv::Clone() const {
 	AegisubCSConv *c = new AegisubCSConv(mbCharsetName);
 	c->mbNulLen = mbNulLen;
 	return c;
 }
 /// @brief Calculate the size of NUL in the target encoding via iconv
-/// @return 
+/// @return The size in bytes of NUL
 ///
 size_t AegisubCSConv::GetMBNulLen() const {
 	if (mbNulLen == 0) {
 		const wchar_t nulStr[] = L"";
@ -142,18 +105,13 @@ size_t AegisubCSConv::GetMBNulLen() const {
 		size_t res = iconv(w2m, &inPtr, &inLen, &outPtr, &outLen);
 		if (res != 0)
-			const_cast<AegisubCSConv *>(this)->mbNulLen = (size_t)-1;
+			mbNulLen = (size_t)-1;
 		else
-			const_cast<AegisubCSConv *>(this)->mbNulLen = sizeof(outBuff) - outLen;
+			mbNulLen = sizeof(outBuff) - outLen;
 	}
 	return mbNulLen;
 }
 /// @brief Calculate the length (in bytes) of a MB string, not including the terminator
 /// @param str 
 /// @return 
 ///
 size_t AegisubCSConv::MBBuffLen(const char * str) const {
 	size_t nulLen = GetMBNulLen();
 	const char *ptr;
@ -171,14 +129,12 @@ size_t AegisubCSConv::MBBuffLen(const char * str) const {
 	}
 }
-
+/// @brief Convert a string from multibyte to wide characters
-/// @brief DOCME
+/// @param dst     Destination buffer.
-/// @param dst     
+/// @param dstSize Length of destination buffer in wchar_ts
-/// @param dstSize 
+/// @param src     Source multibyte string
-/// @param src     
+/// @param srcLen  Length of source buffer in bytes, or -1 to autodetect
-/// @param srcLen  
+/// @return The number of wchar_ts needed to store the string in the target charset
 /// @return 
 ///
 size_t AegisubCSConv::ToWChar(wchar_t *dst, size_t dstSize, const char *src, size_t srcLen) const {
 	return doConversion(
 		m2w,
@ -189,14 +145,12 @@ size_t AegisubCSConv::ToWChar(wchar_t *dst, size_t dstSize, const char *src, siz
 	) / sizeof(wchar_t);
 }
-
+/// @brief Convert a string from wide characters to multibyte
-/// @brief DOCME
+/// @param dst     Destination buffer
-/// @param dst     
+/// @param dstSize Length of destination buffer in bytes
-/// @param dstSize 
+/// @param src     Source wide character string
-/// @param src     
+/// @param srcLen  Length in wchar_ts of source, or -1 to autodetect
-/// @param srcLen  
+/// @return The number of bytes needed to store the string in the target charset
 /// @return 
 ///
 size_t AegisubCSConv::FromWChar(char *dst, size_t dstSize, const wchar_t *src, size_t srcLen) const {
 	return doConversion(
 		w2m,
@ -207,15 +161,7 @@ size_t AegisubCSConv::FromWChar(char *dst, size_t dstSize, const wchar_t *src, s
 	);
 }
-
+// Perform a conversion if a buffer is given or calculate the needed buffer size if not
 /// @brief DOCME
 /// @param cd      
 /// @param dst     
 /// @param dstSize 
 /// @param src     
 /// @param srcSize 
 /// @return 
 ///
 size_t AegisubCSConv::doConversion(iconv_t cd, char *dst, size_t dstSize, char *src, size_t srcSize) const {
 	if (dstSize > 0) {
 		return iconvWrapper(cd, &src, &srcSize, &dst, &dstSize);
@ -239,20 +185,12 @@ size_t AegisubCSConv::doConversion(iconv_t cd, char *dst, size_t dstSize, char *
 	return charsWritten;
 }
-
+// Actually perform a conversion via iconv
 /// @brief DOCME
 /// @param cd           
 /// @param inbuf        
 /// @param inbytesleft  
 /// @param outbuf       
 /// @param outbytesleft 
 /// @return 
 ///
 size_t AegisubCSConv::iconvWrapper(iconv_t cd, char **inbuf, size_t *inbytesleft,
                                   char **outbuf, size_t *outbytesleft) const {
 #if wxUSE_THREADS
-	wxMutexLocker lock(const_cast<AegisubCSConv *>(this)->iconvMutex);
+	wxMutexLocker lock(iconvMutex);
 #endif
 	char *outbuforig = *outbuf;
@ -265,10 +203,11 @@ size_t AegisubCSConv::iconvWrapper(iconv_t cd, char **inbuf, size_t *inbytesleft
 #ifdef ICONV_POSIX
 	if (errno == EILSEQ) {
-		throw	_T("One or more characters do not fit in the selected ")
+		throw
-				_T("encoding and the version of iconv Aegisub was built with")
+			L"One or more characters do not fit in the selected "
-				_T(" does not have useful fallbacks. For best results, ")
+			L"encoding and the version of iconv Aegisub was built with"
-				_T("please rebuild Aegisub using a recent version of GNU iconv.");
+			L" does not have useful fallbacks. For best results, "
 			L"please rebuild Aegisub using a recent version of GNU iconv.";
 	}
 	return wxCONV_FAILED;
 #else
@ -287,7 +226,7 @@ size_t AegisubCSConv::iconvWrapper(iconv_t cd, char **inbuf, size_t *inbytesleft
 	}
 	if (res == iconv_failed && err == EILSEQ) {
 		// Conversion still failed with transliteration enabled, so try our substitution
-		iconvctl(cd, ICONV_SET_FALLBACKS, const_cast<iconv_fallbacks *>(&fallbacks));
+		iconvctl(cd, ICONV_SET_FALLBACKS, &fallbacks);
 		res = iconv(cd, inbuf, inbytesleft, outbuf, outbytesleft);
 		err = errno;
 		iconvctl(cd, ICONV_SET_FALLBACKS, NULL);
@ -309,13 +248,11 @@ size_t AegisubCSConv::iconvWrapper(iconv_t cd, char **inbuf, size_t *inbytesleft
 }
-/// @brief DOCME
+/// @brief GNU iconv character substitution callback
-/// @param code         
+/// @param code         Unicode character which could not be converted
-/// @param callback     
+/// @param callback     Callback to tell iconv what string to use instead
-/// @param callback_arg 
+/// @param callback_arg Iconv userdata for callback
-/// @param convPtr      
+/// @param convPtr      AegisubCSConv instance to use
 /// @return 
 ///
 void AegisubCSConv::ucToMbFallback(
 	unsigned int code,
 	void (*callback) (const char *buf, size_t buflen, void* callback_arg),
@ -323,7 +260,8 @@ void AegisubCSConv::ucToMbFallback(
 	void *convPtr)
 {
 	// At some point in the future, this should probably switch to a real mapping
-	// For now, there's just three cases: BOM to nothing, \ to itself (lol Shift-JIS) and everything else to ?
+	// For now, there's just three cases: BOM to nothing, '\' to itself
 	// (for Shift-JIS, which does not have \) and everything else to '?'
 	if (code == 0xFEFF) return;
 	if (code == 0x5C) callback("\\", 1, callback_arg);
 	else {
@ -333,13 +271,10 @@ void AegisubCSConv::ucToMbFallback(
 }
 #ifndef ICONV_POSIX
-
+/// @brief Callback for iconvlist
-/// @brief DOCME
+/// @param namescount Number of names in names
-/// @param namescount 
+/// @param names      Names to add to the list
-/// @param names      
+/// @param data       Unused userdata field
 /// @param data       
 /// @return 
 ///
 int addEncoding(unsigned int namescount, const char * const * names, void* data) {
 	for (unsigned int i = 0; i < namescount; i++) {
 		supportedEncodings->Add(wxString::FromAscii(names[i]));
@ -348,10 +283,6 @@ int addEncoding(unsigned int namescount, const char * const * names, void* data)
 }
 #endif
 /// @brief DOCME
 /// @return 
 ///
 wxArrayString AegisubCSConv::GetAllSupportedEncodings() {
 #if wxUSE_THREADS
 	wxMutexLocker lock(encodingListMutex);
@ -366,13 +297,8 @@ wxArrayString AegisubCSConv::GetAllSupportedEncodings() {
 	return *supportedEncodings;
 }
 /// @brief Map pretty names to the real encoding names
 /// @param name 
 /// @return 
 ///
 wxString AegisubCSConv::GetRealEncodingName(wxString name) {
-	if (name.Lower() == _T("local")) return wxLocale::GetSystemEncodingName();
+	if (name.Lower() == L"local") return wxLocale::GetSystemEncodingName();
 	if (prettyEncodingList == NULL) return name;
 	PrettyNamesHash::iterator realName = prettyEncodingHash->find(name);
@ -382,9 +308,6 @@ wxString AegisubCSConv::GetRealEncodingName(wxString name) {
 	return name;
 }
 /// @brief DOCME
 ///
 wxArrayString AegisubCSConv::GetEncodingsList() {
 #if wxUSE_THREADS
 	wxMutexLocker lock(encodingListMutex);
@ -511,10 +434,10 @@ wxArrayString AegisubCSConv::GetEncodingsList() {
 		PrettyNamesHash *map = new PrettyNamesHash(100);
 		wxArrayString *arr = new wxArrayString();
-		arr->Add(_T("Local"));
+		arr->Add(L"Local");
 		for (int i = 0; encodingNames[i].real != NULL; i++) {
-			// Verify that iconv actually supports this encoding
+			// Verify that iconv actually supports converting to and from this encoding
 			iconv_t cd = iconv_open(encodingNames[i].real, WCHAR_T_ENCODING);
 			if (cd == iconv_invalid) continue;
 			iconv_close(cd);
@ -533,7 +456,5 @@ wxArrayString AegisubCSConv::GetEncodingsList() {
 	}
 	return *prettyEncodingList;
 }
-static AegisubCSConv localConv(_T("Local"), false);
+static AegisubCSConv localConv(L"Local", false);
 AegisubCSConv& csConvLocal(localConv);
--- a/aegisub/src/charset_conv.h
+++ b/aegisub/src/charset_conv.h
@ -1,4 +1,4 @@
-// Copyright (c) 2009, Thomas Goyne
+// Copyright (c) 2010, Thomas Goyne
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
@ -34,9 +34,6 @@
 /// @ingroup utility
 ///
 #ifndef AGI_PRE
 #include <iconv.h>
 #include <wchar.h>
@ -49,63 +46,76 @@
 #include "aegisub_endian.h"
 #if !defined(_LIBICONV_VERSION) || _LIBICONV_VERSION < 0x010A || defined(LIBICONV_PLUG)
 /// DOCME
 #define ICONV_POSIX
 #endif
 /// @class iconv_wrapper
 /// @brief RAII wrapper for iconv
 class iconv_wrapper {
 private:
 	iconv_t conv;
 public:
 	iconv_wrapper(const char *to, const char *from)
 	: conv(iconv_open(to, from))
 	{ }
 	iconv_wrapper(wxString const& to, wxString const& from)
 	: conv(iconv_open(to.ToAscii(), from.ToAscii()))
 	{ }
 	iconv_wrapper(const char *to, wxString const& from)
 	: conv(iconv_open(to, from.ToAscii()))
 	{ }
 	iconv_wrapper(wxString const& to, const char *from)
 	: conv(iconv_open(to.ToAscii(), from))
 	{ }
 	~iconv_wrapper() {
 		if (conv != (iconv_t)-1) iconv_close(conv);
 	}
 	operator iconv_t() {
 		return conv;
 	}
 	operator const iconv_t() const {
 		return conv;
 	}
 };
 /// DOCME
 /// @class AegisubCSConv
-/// @brief DOCME
+/// @brief wxMBConv implementation for converting to and from unicode
 ///
 /// DOCME
 class AegisubCSConv : public wxMBConv {
 public:
-	// By default, any conversion that would be lossy will fail
+	/// @param mbEncName   Multibyte encoding to convert to/from
-	// When enableSubst is true, conversions to multibyte with a sufficiently large buffer
+	/// @param enableSubst Whether to substitute characters when needed.
-	// are guaranteed to succeed, with characters dropped or changed as needed to fit the
+	/// By default, any conversion that would be lossy will fail
-	// string into the target encoding.
+	/// When enableSubst is true, conversions to multibyte with a sufficiently
 	/// large buffer are guaranteed to succeed, with characters dropped or
 	/// changed as needed to fit the string into the target encoding.
 	AegisubCSConv(const wxChar *mbEncName, bool enableSubst = false);
 	virtual ~AegisubCSConv();
 	// wxMBConv implementation; see strconv.h for usage details
-	virtual size_t ToWChar(wchar_t *dst, size_t dstLen, const char *src, size_t srcLen = wxNO_LEN) const;
+	size_t ToWChar(wchar_t *dst, size_t dstLen, const char *src, size_t srcLen = wxNO_LEN) const;
-	virtual size_t FromWChar(char *dst, size_t dstLen, const wchar_t *src, size_t srcLen = wxNO_LEN) const;
+	size_t FromWChar(char *dst, size_t dstLen, const wchar_t *src, size_t srcLen = wxNO_LEN) const;
-	virtual size_t GetMBNulLen() const;
+	size_t GetMBNulLen() const;
-	virtual wxMBConv *Clone() const;
+	wxMBConv *Clone() const;
-	// Get the length (in bytes) of a null-terminated string whose encoding is mbEncName
+	/// @brief Multibyte-aware strlen
 	/// @return Length in bytes of str (excluding terminator)
 	size_t MBBuffLen(const char *str) const;
-	// Get a list of support encodings with somewhat user-friendly names
+	/// @brief Get a list of support encodings with user-friendly names
 	static wxArrayString GetEncodingsList();
-	// Get a list of all encodings supported by iconv
+	/// @brief Get a list of all encodings supported by iconv
 	/// Requires GNU iconv for useful results
 	static wxArrayString GetAllSupportedEncodings();
-	// Map a user-friendly encoding name to iconv's name
+	/// @brief Map a user-friendly encoding name to the real encoding name
 	static wxString GetRealEncodingName(wxString name);
 protected:
 	/// DOCME
 	/// DOCME
 	iconv_t m2w, w2m;
 private:
-
+	// The smattering of mutable variables here are due to that ToWChar and
-	/// DOCME
+	// FromWChar are const in wxMBConv, but we require minor mutation for
 	// things like locks (as iconv is not thread-safe)
 	wxString wcCharsetName;
 	/// DOCME
 	wxString mbCharsetName;
-
+	mutable size_t mbNulLen;
 	/// DOCME
 	size_t   mbNulLen;
 	/// DOCME
 	bool enableSubst;
 	size_t doConversion(iconv_t cd, char *dst, size_t dstSize, char *src, size_t srcSize) const;
@ -117,56 +127,43 @@ private:
 		void *callback_arg,
 		void *convPtr);
-	/// DOCME
+	/// Replacement character for characters which do not fit in the target
 	/// encoding and iconv does not have an appropriate substitute for
 	char invalidRep[8];
 	/// DOCME
 	size_t invalidRepSize;
 #ifndef ICONV_POSIX
-
+	mutable iconv_fallbacks fallbacks;
 	/// DOCME
 	iconv_fallbacks fallbacks;
 #endif
 #if wxUSE_THREADS
-
+	mutable wxMutex iconvMutex;
 	/// DOCME
 	wxMutex iconvMutex;
 #endif
 protected:
 	iconv_wrapper m2w, w2m;
 };
-// Predefined conversion for the current locale. Should be a drop-in replacement for wxConvLocal
+// Predefined conversion for the current locale, intended to be a drop-in
 // replacement for wxConvLocal
 extern AegisubCSConv& csConvLocal;
 #ifdef HAVE_BIG_ENDIAN
 #	if SIZEOF_WCHAR_T == 4
 /// DOCME
 #		define WCHAR_T_ENCODING "UTF-32BE"
 #	elif SIZEOF_WCHAR_T == 2
 /// DOCME
 #		define WCHAR_T_ENCODING "UTF-16BE"
 #	endif
 #elif defined(HAVE_LITTLE_ENDIAN)
 #	if SIZEOF_WCHAR_T == 4
 /// DOCME
 #		define WCHAR_T_ENCODING "UTF-32LE"
 #	elif SIZEOF_WCHAR_T == 2
 /// DOCME
 #		define WCHAR_T_ENCODING "UTF-16LE"
 #	endif
 #else
 #	if SIZEOF_WCHAR_T == 4
 /// DOCME
 #		define WCHAR_T_ENCODING ((Endian::MachineToBig((uint32_t)1) == 1) ? "UTF-32BE" : "UTF-32LE")
 #	elif SIZEOF_WCHAR_T == 2
 /// DOCME
 #		define WCHAR_T_ENCODING ((Endian::MachineToBig((uint32_t)1) == 1) ? "UTF-16BE" : "UTF-16LE")
 #	endif
 #endif
--- a/aegisub/src/text_file_reader.cpp
+++ b/aegisub/src/text_file_reader.cpp
@ -1,4 +1,4 @@
-// Copyright (c) 2005, Rodrigo Braz Monteiro
+// Copyright (c) 2010, Rodrigo Braz Monteiro, Thomas Goyne
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
@ -51,13 +51,6 @@
 #endif
 #include "text_file_reader.h"
 /// @brief DOCME
 /// @param filename 
 /// @param enc      
 /// @param trim     
 /// @return 
 ///
 TextFileReader::TextFileReader(wxString filename, wxString enc, bool trim)
 : encoding(enc), conv((iconv_t)-1), trim(trim), readComplete(false), currout(0), outptr(0), currentLine(0) {
 #ifdef __WINDOWS__
@ -65,29 +58,22 @@ TextFileReader::TextFileReader(wxString filename, wxString enc, bool trim)
 #else
 	file.open(wxFNCONV(filename),std::ios::in | std::ios::binary);
 #endif
-	if (!file.is_open()) {
+	if (!file.is_open()) throw L"Failed opening file for reading.";
 		throw _T("Failed opening file for reading.");
 	}
 	if (encoding.IsEmpty()) encoding = GetEncoding(filename);
-	if (encoding == _T("binary")) return;
+	if (encoding == L"binary") return;
 	encoding = AegisubCSConv::GetRealEncodingName(encoding);
 	conv = iconv_open(WCHAR_T_ENCODING, encoding.ToAscii());
 	if (conv == (iconv_t)-1) {
 		throw wxString::Format(L"Character set '%s' is not supported.", enc.c_str());
 	}
 }
 /// @brief DOCME
 ///
 TextFileReader::~TextFileReader() {
 	if (conv != (iconv_t)-1) iconv_close(conv);
 }
-
+wxString TextFileReader::GetEncoding(wxString const& filename) {
 /// @brief DOCME
 /// @param filename 
 /// @return 
 ///
 wxString TextFileReader::GetEncoding(const wxString filename) {
 	// Prepare
 	unsigned char b[4];
 	memset(b, 0, sizeof(b));
@ -100,27 +86,27 @@ wxString TextFileReader::GetEncoding(const wxString filename) {
 	ifile.open(wxFNCONV(filename));
 #endif
 	if (!ifile.is_open()) {
-		return _T("unknown");
+		return L"unknown";
 	}
 	ifile.read(reinterpret_cast<char *>(b),4);
 	ifile.close();
 	// Try to get the byte order mark from them
-	if (b[0] == 0xEF && b[1] == 0xBB && b[2] == 0xBF) return _T("UTF-8");
+	if (b[0] == 0xEF && b[1] == 0xBB && b[2] == 0xBF) return L"UTF-8";
-	else if (b[0] == 0xFF && b[1] == 0xFE && b[2] == 0x00 && b[3] == 0x00) return _T("UTF-32LE");
+	else if (b[0] == 0xFF && b[1] == 0xFE && b[2] == 0x00 && b[3] == 0x00) return L"UTF-32LE";
-	else if (b[0] == 0x00 && b[1] == 0x00 && b[2] == 0xFE && b[3] == 0xFF) return _T("UTF-32BE");
+	else if (b[0] == 0x00 && b[1] == 0x00 && b[2] == 0xFE && b[3] == 0xFF) return L"UTF-32BE";
-	else if (b[0] == 0xFF && b[1] == 0xFE) return _T("UTF-16LE");
+	else if (b[0] == 0xFF && b[1] == 0xFE) return L"UTF-16LE";
-	else if (b[0] == 0xFE && b[1] == 0xFF) return _T("UTF-16BE");
+	else if (b[0] == 0xFE && b[1] == 0xFF) return L"UTF-16BE";
-	else if (b[0] == 0x2B && b[1] == 0x2F && b[2] == 0x76) return _T("UTF-7");
+	else if (b[0] == 0x2B && b[1] == 0x2F && b[2] == 0x76) return L"UTF-7";
 	// Try to guess UTF-16
-	else if (b[0] == 0 && b[1] >= 32 && b[2] == 0 && b[3] >= 32) return _T("UTF-16BE");
+	else if (b[0] == 0 && b[1] >= 32 && b[2] == 0 && b[3] >= 32) return L"UTF-16BE";
-	else if (b[0] >= 32 && b[1] == 0 && b[2] >= 32 && b[3] == 0) return _T("UTF-16LE");
+	else if (b[0] >= 32 && b[1] == 0 && b[2] >= 32 && b[3] == 0) return L"UTF-16LE";
 	// If any of the first four bytes are under 0x20 (the first printable character),
 	// except for 9-13 range, assume binary
 	for (int i=0;i<4;i++) {
-		if (b[i] < 9 || (b[i] > 13 && b[i] < 32)) return _T("binary");
+		if (b[i] < 9 || (b[i] > 13 && b[i] < 32)) return L"binary";
 	}
 #ifdef WITH_UNIVCHARDET
@ -129,14 +115,10 @@ wxString TextFileReader::GetEncoding(const wxString filename) {
 	return det.GetEncoding(filename);
 #else
 	// Fall back to local
-	return _T("Local");
+	return L"local";
 #endif
 }
 /// @brief DOCME
 /// @return 
 ///
 wchar_t TextFileReader::GetWChar() {
 	// If there's already some converted characters waiting, return the next one
 	if (++currout < outptr) {
@ -174,7 +156,7 @@ wchar_t TextFileReader::GetWChar() {
 		// adding one byte to the input buffer until either it succeeds or we add enough bytes to
 		// complete any character
 		if (++bytesAdded > 3)
-			throw wxString::Format(_T("Invalid input character found near line %u"), currentLine);
+			throw wxString::Format(L"Invalid input character found near line %u", currentLine);
 		file.read(inptr + inbytesleft, 1);
 		inbytesleft++;
@ -183,34 +165,27 @@ wchar_t TextFileReader::GetWChar() {
 	if (outptr > outbuf)
 		return *currout;
-	throw wxString::Format(_T("Invalid input character found near line %u"), currentLine);
+	throw wxString::Format(L"Invalid input character found near line %u", currentLine);
 }
 /// @brief DOCME
 /// @return 
 ///
 wxString TextFileReader::ReadLineFromFile() {
 	wxString buffer;
-	size_t bufAlloc = 1024;
+	buffer.Alloc(1024);
 	buffer.Alloc(bufAlloc);
 	currentLine++;
 	// Read a line
 	wchar_t ch;
-	size_t len = 0;
+	bool first = true;
 	// This doesn't work for \r deliminated files, but it's very unlikely
 	// that we'll run into one of those
 	for (ch = GetWChar(); ch != L'\n' && ch != 0; ch = GetWChar()) {
 		if (ch == L'\r') continue;
 		// Skip the BOM -- we don't need it as the encoding is already known
 		// and it sometimes causes conversion problems
-		if (ch == 0xFEFF && len == 0) continue;
+		if (ch == 0xFEFF && first) continue;
 		if (len >= bufAlloc - 1) {
 			bufAlloc *= 2;
 			buffer.Alloc(bufAlloc);
 		}
 		buffer += ch;
-		len++;
+		first = false;
 	}
 	if (ch == 0)
 		readComplete = true;
@ -223,36 +198,10 @@ wxString TextFileReader::ReadLineFromFile() {
 	return buffer;
 }
 /// @brief DOCME
 /// @return 
 ///
 bool TextFileReader::HasMoreLines() {
 	return !readComplete;
 }
 /// @brief DOCME
 /// @param encoding
 /// @return 
 ///
 void TextFileReader::EnsureValid(wxString enc) {
 	if (enc == _T("binary")) return;
 	enc = AegisubCSConv::GetRealEncodingName(enc);
 	iconv_t cd = iconv_open(WCHAR_T_ENCODING, enc.ToAscii());
 	bool canOpen = cd != (iconv_t)-1;
 	iconv_close(cd);
 	if (!canOpen) {
 		throw wxString::Format(_T("Character set %s is not supported."), enc.c_str());
 	}
 }
 /// @brief DOCME
 ///
 wxString TextFileReader::GetCurrentEncoding() {
 	return encoding;
 }
--- a/aegisub/src/text_file_reader.h
+++ b/aegisub/src/text_file_reader.h
@ -1,4 +1,4 @@
-// Copyright (c) 2005, Rodrigo Braz Monteiro
+// Copyright (c) 2010, Rodrigo Braz Monteiro
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
@ -45,62 +45,53 @@
 #include <wx/string.h>
 #endif
 /// DOCME
 /// @class TextFileReader
-/// @brief DOCME
+/// @brief A line-based text file reader
 ///
 /// DOCME
 class TextFileReader {
 private:
-
+	/// Encoding of the file being read
 	/// DOCME
 	wxString encoding;
 	/// DOCME
 	std::ifstream file;
 	/// DOCME
 	iconv_t conv;
 	/// DOCME
 	bool trim;
 	/// DOCME
 	bool readComplete;
-
+	// Iconv buffers and state
 	/// DOCME
 	wchar_t outbuf[256];
 	/// DOCME
 	wchar_t *currout;
 	/// DOCME
 	wchar_t *outptr;
 	/// DOCME
 	size_t  outbytesleft;
-
+	/// Current line number
 	/// DOCME
 	unsigned int currentLine;
 	/// @brief Read a single wchar_t from the file
 	wchar_t GetWChar();
 	TextFileReader(const TextFileReader&);
 	TextFileReader& operator=(const TextFileReader&);
 public:
-	TextFileReader(wxString filename,wxString encoding=_T(""), bool trim=true);
+	/// @brief Constructor
 	/// @param filename File to open
 	/// @param enc      Encoding to use, or empty to autodetect
 	/// @param trim     Whether to trim whitespace from lines read
 	TextFileReader(wxString filename,wxString encoding=L"", bool trim=true);
 	/// @brief Destructor
 	~TextFileReader();
 	/// @brief Read a line from the file
 	/// @return The line, possibly trimmed
 	wxString ReadLineFromFile();
 	/// @brief Check if there are any more lines to read
 	bool HasMoreLines();
-	static void EnsureValid(const wxString encoding);
+	/// @brief Get the file encoding used by this reader
 	/// @return "unknown", "binary", or a character encoding name
 	wxString GetCurrentEncoding();
-	static wxString GetEncoding(const wxString filename);
+
 	/// @brief Attempt to detect a file's encoding
 	/// @param filename The file to check
 	/// @return "unknown", "binary", or a character encoding name
 	static wxString GetEncoding(wxString const& filename);
 };