Move most character set conversion code to libaegisub and make everything use the new conversion functionality.

Originally committed to SVN as r4423.
2010-06-03 20:32:25 +00:00 · 2010-06-03 20:32:25 +00:00 · b6d29443a3
commit b6d29443a3
parent 7337a11745
32 changed files with 967 additions and 644 deletions
--- a/aegisub/build/libaegisub_vs2008/libaegisub_vs2008.vcproj
+++ b/aegisub/build/libaegisub_vs2008/libaegisub_vs2008.vcproj
@ -20,6 +20,7 @@
 	<Configurations>
 		<Configuration
 			Name="Debug|Win32"
+			IntermediateDirectory="$(ConfigurationName)"
 			ConfigurationType="4"
 			InheritedPropertySheets=".\wxlib_include.vsprops;.\libraries_outdirs.vsprops;.\compiler_options_debug.vsprops;..\aegisub_vs2008\wxlib_lib32.vsprops;..\aegisub_vs2008\suffix_debug32.vsprops;.\precomp_header.vsprops;.\src_msvc_include_dir.vsprops"
 			CharacterSet="1"
@ -190,11 +191,11 @@
 				>
 			</File>
 			<File
-				RelativePath="..\..\libaegisub\common\charset_ucd.cpp"
+				RelativePath="..\..\libaegisub\common\charset_conv.cpp"
 				>
 			</File>
 			<File
-				RelativePath="..\..\libaegisub\common\log.cpp"
+				RelativePath="..\..\libaegisub\common\charset_ucd.cpp"
 				>
 			</File>
 			<File
@ -231,6 +232,10 @@
 				RelativePath="..\..\libaegisub\windows\access.cpp"
 				>
 			</File>
+			<File
+				RelativePath="..\..\libaegisub\windows\charset_conv_win.cpp"
+				>
+			</File>
 			<File
 				RelativePath="..\..\libaegisub\windows\io.cpp"
 				>
@ -303,6 +308,18 @@
 				RelativePath="..\..\libaegisub\include\libaegisub\access.h"
 				>
 			</File>
+			<File
+				RelativePath="..\..\libaegisub\include\libaegisub\charset_conv.h"
+				>
+			</File>
+			<File
+				RelativePath="..\..\libaegisub\include\libaegisub\charset_conv_win.h"
+				>
+			</File>
+			<File
+				RelativePath="..\..\libaegisub\include\libaegisub\charsets.def"
+				>
+			</File>
 			<File
 				RelativePath="..\..\libaegisub\include\libaegisub\colour.h"
 				>
--- a/aegisub/build/tests_vs2008/tests_vs2008.vcproj
+++ b/aegisub/build/tests_vs2008/tests_vs2008.vcproj
@ -228,6 +228,10 @@
 			RelativePath="..\..\tests\libaegisub_cajun.cpp"
 			>
 		</File>
+		<File
+			RelativePath="..\..\tests\libaegisub_iconv.cpp"
+			>
+		</File>
 		<File
 			RelativePath="..\..\tests\libaegisub_mru.cpp"
 			>
--- a/aegisub/libaegisub/Makefile.am
+++ b/aegisub/libaegisub/Makefile.am
@ -21,6 +21,7 @@ endif

 libaegisub_2_2_la_SOURCES = \
 	common/charset.cpp \
+	common/charset_conv.cpp \
 	common/charset_ucd.cpp \
 	common/mru.cpp \
 	common/option.cpp \
--- a/aegisub/libaegisub/common/charset_conv.cpp
+++ b/aegisub/libaegisub/common/charset_conv.cpp
@ -0,0 +1,327 @@
+// Copyright (c) 2010, Thomas Goyne <plorkyeran@aegisub.org>
+//
+// Permission to use, copy, modify, and distribute this software for any
+// purpose with or without fee is hereby granted, provided that the above
+// copyright notice and this permission notice appear in all copies.
+//
+// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+//
+// $Id$
+
+/// @file charset_conv.cpp
+/// @brief Wrapper for libiconv to present a more C++-friendly API
+/// @ingroup libaegisub
+
+#ifndef LAGI_PRE
+#endif
+
+#include <libaegisub/charset_conv.h>
+#include <iconv.h>
+
+// Check if we can use advanced fallback capabilities added in GNU's iconv
+// implementation
+#if !defined(_LIBICONV_VERSION) || _LIBICONV_VERSION < 0x010A || defined(LIBICONV_PLUG)
+#define ICONV_POSIX
+#endif
+
+static const iconv_t iconv_invalid = (iconv_t)-1;
+static const size_t iconv_failed = (size_t)-1;
+
+namespace {
+	struct ltstr {
+		bool operator()(const char* s1, const char* s2) const {
+			return strcmp(s1, s2) < 0;
+		}
+	};
+}
+
+/// @brief Map a user-friendly encoding name to the real encoding name
+static const char* GetRealEncodingName(const char* name) {
+	static std::map<const char*, const char*, ltstr> prettyNames;
+
+	if (prettyNames.empty()) {
+#		define ADD(pretty, real) prettyNames[pretty] = real
+#		include <libaegisub/charsets.def>
+#		undef ADD
+	}
+
+	std::map<const char*, const char*, ltstr>::iterator real = prettyNames.find(name);
+	if (real != prettyNames.end()) {
+		return real->second;
+	}
+	return name;
+}
+
+
+namespace agi {
+	namespace charset {
+
+#ifdef ICONV_POSIX
+class IconvWrapper::Converter {
+public:
+	Converter(bool, const char*) { }
+	size_t operator()(iconv_t cd, char** inbuf, size_t* inbytesleft, char** outbuf, size_t* outbytesleft) {
+		return iconv(cd, inbuf, inbytesleft, outbuf, outbytesleft);
+	}
+};
+#else
+class IconvWrapper::Converter : public iconv_fallbacks {
+private:
+	bool subst;
+	char invalidRep[4];
+	size_t invalidRepSize;
+	static void fallback(
+		unsigned int code,
+		void (*callback) (const char *buf, size_t buflen, void* callback_arg),
+		void *callback_arg,
+		void *convPtr)
+	{
+		// At some point in the future, this should probably switch to a real mapping
+		// For now, there's just three cases: BOM to nothing, '\' to itself
+		// (for Shift-JIS, which does not have \) and everything else to '?'
+		if (code == 0xFEFF) return;
+		if (code == 0x5C) callback("\\", 1, callback_arg);
+		else {
+			Converter *self = static_cast<Converter *>(convPtr);
+			callback(self->invalidRep, self->invalidRepSize, callback_arg);
+		}
+	}
+public:
+	Converter(bool subst, const char* targetEnc)
+		: subst(subst)
+	{
+		data = this;
+		mb_to_uc_fallback = NULL;
+		mb_to_wc_fallback = NULL;
+		uc_to_mb_fallback = fallback;
+		wc_to_mb_fallback = NULL;
+
+			char sbuff[] = "?";
+			char* src = sbuff;
+			char* dst = invalidRep;
+			size_t dstLen = 4;
+			size_t srcLen = 1;
+
+		iconv_t cd = iconv_open(GetRealEncodingName(targetEnc), "UTF-8");
+		assert(cd != iconv_invalid);
+		size_t res = iconv(cd, &src, &srcLen, &dst, &dstLen);
+		assert(res != iconv_failed);
+		assert(srcLen == 0);
+		iconv_close(cd);
+
+		invalidRepSize = 4 - dstLen;
+	}
+	size_t operator()(iconv_t cd, char** inbuf, size_t* inbytesleft, char** outbuf, size_t* outbytesleft) {
+		size_t res = iconv(cd, inbuf, inbytesleft, outbuf, outbytesleft);
+
+		if (!subst) return res;
+
+		// Save original errno so we can return it rather than the result from iconvctl
+		int err = errno;
+
+		// Some characters in the input string do not exist in the output encoding
+		if (res == iconv_failed && err == EILSEQ) {
+			// first try transliteration only
+			int transliterate = 1;
+			iconvctl(cd, ICONV_SET_TRANSLITERATE, &transliterate);
+			res = iconv(cd, inbuf, inbytesleft, outbuf, outbytesleft);
+			err = errno;
+			transliterate = 0;
+			iconvctl(cd, ICONV_SET_TRANSLITERATE, &transliterate);
+		}
+		if (res == iconv_failed && err == EILSEQ) {
+			// Conversion still failed with transliteration enabled, so try our substitution
+			iconvctl(cd, ICONV_SET_FALLBACKS, this);
+			res = iconv(cd, inbuf, inbytesleft, outbuf, outbytesleft);
+			err = errno;
+			iconvctl(cd, ICONV_SET_FALLBACKS, NULL);
+		}
+		if (res == iconv_failed && err == E2BIG && *outbytesleft == 0) {
+			// Check for E2BIG false positives
+			char buff[4];
+			size_t buffsize = 4;
+			char* out = buff;
+			char* in = *inbuf;
+			size_t insize = *inbytesleft;
+
+			iconvctl(cd, ICONV_SET_FALLBACKS, this);
+			res = iconv(cd, &in, &insize, &out, &buffsize);
+			// If no bytes of the output buffer were used, the original
+			// conversion may have been successful
+			if (buffsize == 4) {
+				err = errno;
+			}
+			else {
+				res = iconv_failed;
+			}
+			iconvctl(cd, ICONV_SET_FALLBACKS, NULL);
+		}
+
+		errno = err;
+		return res;
+	}
+};
+#endif
+
+// Calculate the size of NUL in the given character set
+static size_t NulSize(const char* encoding) {
+	// We need a character set to convert from with a known encoding of NUL
+	// UTF-8 seems like the obvious choice
+	iconv_t cd = iconv_open(GetRealEncodingName(encoding), "UTF-8");
+	assert(cd != iconv_invalid);
+
+	char dbuff[4];
+	char sbuff[] = "";
+	char* dst = dbuff;
+	char* src = sbuff;
+	size_t dstLen = sizeof(dbuff);
+	size_t srcLen = 1;
+
+	size_t ret = iconv(cd, &src, &srcLen, &dst, &dstLen);
+	assert(ret != iconv_failed);
+	assert(dst - dbuff > 0);
+	iconv_close(cd);
+
+	return dst - dbuff;
+}
+
+IconvWrapper::IconvWrapper(const char* sourceEncoding, const char* destEncoding, bool enableSubst)
+: toNulLen(0)
+, fromNulLen(0)
+, conv(NULL)
+{
+	cd = iconv_open(GetRealEncodingName(destEncoding), GetRealEncodingName(sourceEncoding));
+	if (cd == iconv_invalid) {
+		throw UnsupportedConversion(std::string("Cannot convert from ") + sourceEncoding + " to " + destEncoding);
+	}
+
+	// These need to be set only after we verify that the source and des
+	// charsets are valid
+	toNulLen = NulSize(destEncoding);
+	fromNulLen = NulSize(sourceEncoding);
+	conv.reset(new Converter(enableSubst, destEncoding));
+}
+IconvWrapper::~IconvWrapper() {
+	if (cd != iconv_invalid) iconv_close(cd);
+}
+
+std::string IconvWrapper::Convert(std::string const& source) {
+	std::string dest;
+	Convert(source, dest);
+	return dest;
+}
+void IconvWrapper::Convert(std::string const& source, std::string &dest) {
+	/// @todo Investigate if it's worth using ropes to avoid having to convert
+	///       everything twice. It probably isn't.
+	size_t len = RequiredBufferSize(source);
+	dest.resize(len);
+	
+	// This is technically invalid as C++03 does not require that strings use
+	// a single contiguous block of memory. However, no implementation has ever
+	// not done so and C++0x does require that it be contiguous
+	Convert(source.data(), source.size(), &dest[0], len);
+}
+
+size_t IconvWrapper::Convert(const char* source, size_t sourceSize, char *dest, size_t destSize) {
+	if (sourceSize == (size_t)-1) {
+		sourceSize = SrcStrLen(source);
+	}
+	// POSIX requires that inbuf be const char **, but libiconv uses char**
+	size_t res = (*conv)(cd, const_cast<char **>(&source), &sourceSize, &dest, &destSize);
+
+	if (res == iconv_failed) {
+		switch (errno) {
+			case E2BIG:
+				throw BufferTooSmall(
+					"Destination buffer was not large enough to fit converted "
+					"string.");
+			case EINVAL:
+				throw BadInput(
+					"One or more characters in the input string were not valid "
+					"characters in the given input encoding");
+			case EILSEQ:
+				throw BadOutput(
+					"One or more characters could not be converted to the "
+					"selected target encoding and the version of iconv "
+					"Aegisub was built with does not have useful fallbacks. "
+					"For best results, please build Aegisub using a recent "
+					"version of GNU iconv.");
+			default:
+				throw ConversionFailure("An unknown conversion failure occured");
+		}
+	}
+	return res;
+}
+
+size_t IconvWrapper::Convert(const char** source, size_t* sourceSize, char** dest, size_t* destSize) {
+	return (*conv)(cd, const_cast<char **>(source), sourceSize, dest, destSize);
+}
+
+size_t IconvWrapper::RequiredBufferSize(std::string const& str) {
+	return RequiredBufferSize(str.data(), str.size());
+}
+
+size_t IconvWrapper::RequiredBufferSize(const char* src, size_t srcLen) {
+	char buff[512];
+	size_t charsWritten = 0;
+	size_t res;
+
+	do {
+		char* dst = buff;
+		size_t dstSize = sizeof(buff);
+		res = (*conv)(cd, const_cast<char **>(&src), &srcLen, &dst, &dstSize);
+
+		charsWritten += dst - buff;
+	} while (res == iconv_failed && errno == E2BIG);
+
+	if (res == iconv_failed) {
+		switch (errno) {
+			case EINVAL:
+				throw BadInput(
+					"One or more characters in the input string were not valid "
+					"characters in the given input encoding");
+			case EILSEQ:
+				throw BadOutput(
+					"One or more characters could not be converted to the "
+					"selected target encoding and the version of iconv "
+					"Aegisub was built with does not have useful fallbacks. "
+					"For best results, please build Aegisub using a recent "
+					"version of GNU iconv.");
+			default:
+				throw ConversionFailure("An unknown conversion failure occured");
+		}
+	}
+	return charsWritten;
+}
+
+static size_t mbstrlen(const char* str, size_t nulLen) {
+	const char *ptr;
+	switch (nulLen) {
+		case 1:
+			return strlen(str);
+		case 2:
+			for (ptr = str; *reinterpret_cast<const uint16_t *>(ptr) != 0; ptr += 2) ;
+			return ptr - str;
+		case 4:
+			for (ptr = str; *reinterpret_cast<const uint32_t *>(ptr) != 0; ptr += 4) ;
+			return ptr - str;
+		default:
+			return (size_t)-1;
+	}
+}
+
+size_t IconvWrapper::SrcStrLen(const char* str) {
+	return mbstrlen(str, fromNulLen);
+
+}
+size_t IconvWrapper::DstStrLen(const char* str) {
+	return mbstrlen(str, toNulLen);
+}
+	}
+}
--- a/aegisub/libaegisub/include/libaegisub/charset_conv.h
+++ b/aegisub/libaegisub/include/libaegisub/charset_conv.h
@ -0,0 +1,107 @@
+// Copyright (c) 2010, Thomas Goyne <plorkyeran@aegisub.org>
+//
+// Permission to use, copy, modify, and distribute this software for any
+// purpose with or without fee is hereby granted, provided that the above
+// copyright notice and this permission notice appear in all copies.
+//
+// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+//
+// $Id$
+
+/// @file charset_conv.h
+/// @brief Wrapper for libiconv to present a more C++-friendly API
+/// @ingroup libaegisub
+
+#ifndef LAGI_PRE
+#include <string.h>
+#include <memory>
+#include <string>
+#include <vector>
+#endif
+
+#include <libaegisub/exception.h>
+
+namespace agi {
+	namespace charset {
+
+DEFINE_BASE_EXCEPTION_NOINNER(ConvError, Exception)
+DEFINE_SIMPLE_EXCEPTION_NOINNER(UnsupportedConversion, ConvError, "iconv/unsupported")
+DEFINE_SIMPLE_EXCEPTION_NOINNER(ConversionFailure, ConvError, "iconv/failed")
+DEFINE_SIMPLE_EXCEPTION_NOINNER(BufferTooSmall, ConversionFailure, "iconv/failed/E2BIG")
+DEFINE_SIMPLE_EXCEPTION_NOINNER(BadInput, ConversionFailure, "iconv/failed/EILSEQ")
+DEFINE_SIMPLE_EXCEPTION_NOINNER(BadOutput, ConversionFailure, "iconv/failed/EINVAL")
+
+/// @brief Get a list of support encodings with user-friendly names
+template<class T>
+T const& GetEncodingsList() {
+	static T nameList;
+	if (nameList.empty()) {
+#		define ADD(pretty, real) nameList.push_back(pretty)
+#		include <libaegisub/charsets.def>
+#		undef ADD
+	}
+	return nameList;
+}
+
+typedef void* iconv_t;
+
+/// @brief A C++ wrapper for iconv
+class IconvWrapper {
+private:
+	// Helper class that abstracts away the differences betwen libiconv and
+	// POSIX iconv implementations
+	class Converter;
+
+	iconv_t cd;
+	size_t toNulLen;
+	size_t fromNulLen;
+	std::auto_ptr<Converter> conv;
+
+public:
+	/// @brief Create a converter
+	/// @param sourceEncoding Source encoding name, may be a pretty name
+	/// @param destEncoding   Destination encoding name, may be a pretty name
+	/// @param enableSubst    If true, when possible characters will be
+	///                       mutilated or dropped rather than a letting a
+	///                       conversion fail
+	IconvWrapper(const char* sourceEncoding, const char* destEncoding, bool enableSubst = true);
+	~IconvWrapper();
+
+	/// @brief Convert a string from the source to destination charset
+	/// @param source String to convert
+	/// @return Converted string. Note that std::string always uses a single byte
+	///         terminator, so c_str() may not return a valid string if the dest
+	///         charset has wider terminators
+	std::string Convert(std::string const& source);
+	/// @brief Convert a string from the source to destination charset
+	/// @param source String to convert
+	/// @param[out] dest String to place the result in
+	void Convert(std::string const& source, std::string &dest);
+	size_t Convert(const char* source, size_t sourceSize, char* dest, size_t destSize);
+	/// Bare wrapper around iconv; see iconv documention for details
+	size_t Convert(const char **inbuf, size_t *inbytesleft, char **outbuf, size_t *outbytesleft);
+
+	/// @brief Get the required buffer size required to fit the source string in the target charset
+	/// @param source A string in the source charset
+	/// @param sourceSize Length of the source in bytes
+	/// @return Bytes required, including NUL terminator if applicable
+	size_t RequiredBufferSize(const char* source, size_t sourceSize);
+	/// @brief Get the required buffer size required to fit the source string in the target charset
+	/// @param str A string in the source charset
+	/// @return Bytes required, not including space needed for NUL terminator
+	size_t RequiredBufferSize(std::string const& str);
+
+	/// Encoding-aware strlen for strings encoding in the source charset
+	size_t SrcStrLen(const char* str);
+	/// Encoding-aware strlen for strings encoding in the destination charset
+	size_t DstStrLen(const char* str);
+};
+
+	}
+}
--- a/aegisub/libaegisub/include/libaegisub/charset_conv_win.h
+++ b/aegisub/libaegisub/include/libaegisub/charset_conv_win.h
@ -0,0 +1,29 @@
+// Copyright (c) 2010, Thomas Goyne <plorkyeran@aegisub.org>
+//
+// Permission to use, copy, modify, and distribute this software for any
+// purpose with or without fee is hereby granted, provided that the above
+// copyright notice and this permission notice appear in all copies.
+//
+// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+//
+// $Id$
+
+/// @file charset_conv_win.h
+/// @brief Windows-specific charset conversion stuff
+/// @ingroup libaegisub windows
+
+#include <libaegisub/charset_conv.h>
+
+namespace agi {
+	namespace charset {
+		/// Convert a UTF-8 string to a string suitable for use with Win32 API functions
+		std::wstring ConvertW(std::string const& src);
+		std::string ConvertW(std::wstring const& src);
+	}
+}
--- a/aegisub/libaegisub/include/libaegisub/charsets.def
+++ b/aegisub/libaegisub/include/libaegisub/charsets.def
@ -0,0 +1,116 @@
+ADD("Local",                             "");
+
+ADD("Unicode (UTF-8)",                   "utf-8");
+ADD("Unicode (UTF-16)",                  "utf-16");
+ADD("Unicode (UTF-16BE)",                "utf-16be");
+ADD("Unicode (UTF-16LE)",                "utf-16le");
+ADD("Unicode (UTF-32)",                  "utf-32");
+ADD("Unicode (UTF-32BE)",                "utf-32be");
+ADD("Unicode (UTF-32LE)",                "utf-32le");
+ADD("Unicode (UTF-7)",                   "utf-7");
+
+ADD("Arabic (IBM-864)",                  "ibm864");
+ADD("Arabic (IBM-864-I)",                "ibm864i");
+ADD("Arabic (ISO-8859-6)",               "iso-8859-6");
+ADD("Arabic (ISO-8859-6-E)",             "iso-8859-6-e");
+ADD("Arabic (ISO-8859-6-I)",             "iso-8859-6-i");
+ADD("Arabic (Langbox ISO-8859-6.16)",    "x-iso-8859-6-16");
+ADD("Arabic (Langbox ISO-8859-6.8x)",    "x-iso-8859-6-8-x");
+ADD("Arabic (MacArabic)",                "x-mac-arabic");
+ADD("Arabic (Windows-1256)",             "windows-1256");
+
+ADD("Armenian (ARMSCII-8)",              "armscii-8");
+
+ADD("Baltic (ISO-8859-13)",              "iso-8859-13");
+ADD("Baltic (ISO-8859-4)",               "iso-8859-4");
+ADD("Baltic (Windows-1257)",             "windows-1257");
+
+ADD("Celtic (ISO-8859-14)",              "iso-8859-14");
+
+ADD("Central European (IBM-852)",        "ibm852");
+ADD("Central European (ISO-8859-2)",     "iso-8859-2");
+ADD("Central European (MacCE)",          "x-mac-ce");
+ADD("Central European (Windows-1250)",   "windows-1250");
+
+ADD("Chinese Simplified (GB18030)",      "gb18030");
+ADD("Chinese Simplified (GB2312)",       "gb2312");
+ADD("Chinese Simplified (GBK)",          "x-gbk");
+ADD("Chinese Simplified (HZ)",           "hz-gb-2312");
+ADD("Chinese Simplified (ISO-2022-CN)",  "iso-2022-cn");
+ADD("Chinese Traditional (Big5)",        "big5");
+ADD("Chinese Traditional (Big5-HKSCS)",  "big5-hkscs");
+ADD("Chinese Traditional (EUC-TW)",      "x-euc-tw");
+
+ADD("Croatian (MacCroatian)",            "x-mac-croatian");
+
+ADD("Cyrillic (IBM-855)",                "ibm855");
+ADD("Cyrillic (ISO-8859-5)",             "iso-8859-5");
+ADD("Cyrillic (ISO-IR-111)",             "iso-ir-111");
+ADD("Cyrillic (KOI8-R)",                 "koi8-r");
+ADD("Cyrillic (MacCyrillic)",            "x-mac-cyrillic");
+ADD("Cyrillic (Windows-1251)",           "windows-1251");
+ADD("Cyrillic/Russian (CP-866)",         "ibm866");
+ADD("Cyrillic/Ukrainian (KOI8-U)",       "koi8-u");
+ADD("Cyrillic/Ukrainian (MacUkrainian)", "x-mac-ukrainian");
+
+ADD("English (US-ASCII)",                "us-ascii");
+
+ADD("Farsi (MacFarsi)",                  "x-mac-farsi");
+
+ADD("Georgian (GEOSTD8)",                "geostd8");
+
+ADD("Greek (ISO-8859-7)",                "iso-8859-7");
+ADD("Greek (MacGreek)",                  "x-mac-greek");
+ADD("Greek (Windows-1253)",              "windows-1253");
+
+ADD("Gujarati (MacGujarati)",            "x-mac-gujarati");
+ADD("Gurmukhi (MacGurmukhi)",            "x-mac-gurmukhi");
+
+ADD("Hebrew (IBM-862)",                  "ibm862");
+ADD("Hebrew (ISO-8859-8-E)",             "iso-8859-8-e");
+ADD("Hebrew (ISO-8859-8-I)",             "iso-8859-8-i");
+ADD("Hebrew (MacHebrew)",                "x-mac-hebrew");
+ADD("Hebrew (Windows-1255)",             "windows-1255");
+ADD("Hebrew Visual (ISO-8859-8)",        "iso-8859-8");
+
+ADD("Hindi (MacDevanagari)",             "x-mac-devanagari");
+ADD("Hindi (SunDevanagari)",             "x-sun-unicode-india-0");
+
+ADD("Icelandic (MacIcelandic)",          "x-mac-icelandic");
+
+ADD("Japanese (EUC-JP)",                 "euc-jp");
+ADD("Japanese (ISO-2022-JP)",            "iso-2022-jp");
+ADD("Japanese (Shift_JIS)",              "shift_jis");
+
+ADD("Korean (EUC-KR)",                   "euc-kr");
+ADD("Korean (ISO-2022-KR)",              "iso-2022-kr");
+ADD("Korean (JOHAB)",                    "x-johab");
+ADD("Korean (UHC)",                      "x-windows-949");
+
+ADD("Nordic (ISO-8859-10)",              "iso-8859-10");
+
+ADD("Romanian (ISO-8859-16)",            "iso-8859-16");
+ADD("Romanian (MacRomanian)",            "x-mac-romanian");
+
+ADD("South European (ISO-8859-3)",       "iso-8859-3");
+
+ADD("Thai (IBM-874)",                    "ibm874");
+ADD("Thai (ISO-8859-11)",                "iso-8859-11");
+ADD("Thai (TIS-620)",                    "tis-620");
+ADD("Thai (Windows-874)",                "windows-874");
+
+ADD("Turkish (IBM-857)",                 "ibm857");
+ADD("Turkish (ISO-8859-9)",              "iso-8859-9");
+ADD("Turkish (MacTurkish)",              "x-mac-turkish");
+ADD("Turkish (Windows-1254)",            "windows-1254");
+
+ADD("Vietnamese (TCVN)",                 "x-viet-tcvn5712");
+ADD("Vietnamese (VISCII)",               "viscii");
+ADD("Vietnamese (VPS)",                  "x-viet-vps");
+ADD("Vietnamese (Windows-1258)",         "windows-1258");
+
+ADD("Western (IBM-850)",                 "ibm850");
+ADD("Western (ISO-8859-1)",              "iso-8859-1");
+ADD("Western (ISO-8859-15)",             "iso-8859-15");
+ADD("Western (MacRoman)",                "x-mac-roman");
+ADD("Western (Windows-1252)",            "windows-1252");
--- a/aegisub/libaegisub/lagi_pre.h
+++ b/aegisub/libaegisub/lagi_pre.h
@ -6,6 +6,7 @@
 #include <stdarg.h>
 #include <stdio.h>
 #include <stdint.h>
+#include <string.h>
 #include <sys/stat.h>
 #include <time.h>

--- a/aegisub/libaegisub/windows/access.cpp
+++ b/aegisub/libaegisub/windows/access.cpp
@ -25,8 +25,9 @@
 #include <fstream>
 #endif

-#include "libaegisub/util.h"
-#include "libaegisub/util_win.h"
+#include <libaegisub/charset_conv_win.h>
+#include <libaegisub/util.h>
+#include <libaegisub/util_win.h>

 namespace agi {
 	namespace acs {
@ -57,8 +58,7 @@ is a short (and incomplete) todo
   requires detecting the filesystem being used.
 */
 void Check(const std::string &file, acs::Type type) {
-	std::wstring wfile;
-	wfile.assign(file.begin(), file.end());
+	std::wstring wfile = agi::charset::ConvertW(file);

 	SECURITY_DESCRIPTOR* sd;
 	DWORD len = 0;
--- a/aegisub/libaegisub/windows/charset_conv_win.cpp
+++ b/aegisub/libaegisub/windows/charset_conv_win.cpp
@ -0,0 +1,49 @@
+// Copyright (c) 2010, Thomas Goyne <plorkyeran@aegisub.org>
+//
+// Permission to use, copy, modify, and distribute this software for any
+// purpose with or without fee is hereby granted, provided that the above
+// copyright notice and this permission notice appear in all copies.
+//
+// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+//
+// $Id$
+
+/// @file charset_conv_win.h
+/// @brief Windows-specific charset conversion stuff
+/// @ingroup libaegisub windows
+
+#include <libaegisub/charset_conv_win.h>
+
+namespace agi {
+	namespace charset {
+
+std::wstring ConvertW(std::string const& source) {
+	static IconvWrapper w32Conv("utf-8", "utf-16le", false);
+
+	std::wstring dest;
+	size_t len = w32Conv.RequiredBufferSize(source);
+	dest.resize(len / sizeof(wchar_t));
+	w32Conv.Convert(source.data(), source.size(), reinterpret_cast<char *>(&dest[0]), len);
+	return dest;
+}
+
+std::string ConvertW(std::wstring const& source) {
+	static IconvWrapper w32Conv("utf-16le", "utf-8", false);
+
+	std::string dest;
+	size_t srcLen = source.size() * sizeof(wchar_t);
+	const char* src = reinterpret_cast<const char *>(source.c_str());
+	size_t len = w32Conv.RequiredBufferSize(src, srcLen);
+	dest.resize(len);
+	w32Conv.Convert(src, srcLen, &dest[0], len);
+	return dest;
+}
+
+	}
+}
--- a/aegisub/libaegisub/windows/io.cpp
+++ b/aegisub/libaegisub/windows/io.cpp
@ -26,6 +26,7 @@
 #include <fstream>
 #endif

+#include <libaegisub/charset_conv_win.h>
 #include "libaegisub/io.h"
 #include "libaegisub/log.h"
 #include "libaegisub/util.h"
@ -34,11 +35,13 @@
 namespace agi {
 	namespace io {

+using agi::charset::ConvertW;
+
 std::ifstream* Open(const std::string &file) {
 	LOG_D("agi/io/open/file") << file;
 	acs::CheckFileRead(file);

-	std::ifstream *stream = new std::ifstream(file.c_str());
+	std::ifstream *stream = new std::ifstream(ConvertW(file).c_str());

 	if (stream->fail()) {
 		delete stream;
@ -53,7 +56,7 @@ Save::Save(const std::string& file): file_name(file) {
 	LOG_D("agi/io/save/file") << file;
 	const std::string pwd = util::DirName(file);

-	acs::CheckDirWrite(pwd.c_str());
+	acs::CheckDirWrite(pwd);

 	try {
 		acs::CheckFileWrite(file);
@ -61,23 +64,19 @@ Save::Save(const std::string& file): file_name(file) {
 		// If the file doesn't exist we create a 0 byte file, this so so
 		// util::Rename will find it, and to let users know something went
 		// wrong by leaving a 0 byte file.
-		std::ofstream fp_touch(file.c_str());
+		std::ofstream fp_touch(ConvertW(file).c_str());
 	}

 	/// @todo This is a temp hack, proper implementation needs to come after
 	///       Windows support is added.  The code in the destructor needs fixing
 	///       as well.
-	const std::string tmp = file + "_tmp";
-
 	// This will open to file.XXXX. (tempfile)
-	fp = new std::ofstream(tmp.c_str());
+	fp = new std::ofstream(ConvertW(file + "_tmp").c_str());
 }

 Save::~Save() {
-
-	const std::string tmp(file_name + "_tmp");
 	delete fp;
-	util::Rename(tmp, file_name);
+	util::Rename(file_name + "_tmp", file_name);
 }

 std::ofstream& Save::Get() {
--- a/aegisub/libaegisub/windows/util.cpp
+++ b/aegisub/libaegisub/windows/util.cpp
@ -30,23 +30,22 @@

 #endif

-//#include <string.h>
 #include "libaegisub/types.h"
+#include <libaegisub/charset_conv_win.h>
 #include "libaegisub/util.h"
 #include "libaegisub/util_win.h"

 namespace agi {
 	namespace util {

+using agi::charset::ConvertW;

 const std::string DirName(const std::string& path) {
 	if (path.find('/') == std::string::npos) {
-		const std::string cwd(".");
-		return cwd;
+		return ".";
 	}

-	const std::string stripped = path.substr(0, path.rfind("/")+1);
-	return stripped;
+	return path.substr(0, path.rfind("/")+1);
 }

 void Rename(const std::string& from, const std::string& to) {
@ -58,19 +57,18 @@ void Rename(const std::string& from, const std::string& to) {
 		acs::CheckDirWrite(DirName(to));
 	}

-	MoveFileExA(from.c_str(), to.c_str(), MOVEFILE_REPLACE_EXISTING);
+	MoveFileEx(ConvertW(from).c_str(), ConvertW(to).c_str(), MOVEFILE_REPLACE_EXISTING);
 }

 std::string ErrorString(DWORD error) {
-	LPSTR lpstr = NULL;
+	LPWSTR lpstr = NULL;

-	if(FormatMessageA(FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM, NULL, error, 0, (LPSTR)&lpstr, 0, NULL) == 0) {
+	if(FormatMessage(FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM, NULL, error, 0, reinterpret_cast<LPWSTR>(&lpstr), 0, NULL) == 0) {
 		/// @todo Return the actual 'unknown error' string from windows.
-		std::string str("Unknown Error");		
-		return str;
+		return "Unknown Error";
 	}

-	std::string str(lpstr);
+	std::string str = ConvertW(lpstr);
 	LocalFree(lpstr);
 	return str;
 }
--- a/aegisub/src/charset_conv.cpp
+++ b/aegisub/src/charset_conv.cpp
@ -42,107 +42,21 @@
 #include <errno.h>
 #include <stdint.h>

-#include <wx/hashmap.h>
 #include <wx/intl.h>
 #endif

-WX_DECLARE_STRING_HASH_MAP(wxString, PrettyNamesHash);
+class AegisubCSConvImpl : public AegisubCSConv {
+public:
+	AegisubCSConvImpl() { }
+};

-#if wxUSE_THREADS
-static wxMutex encodingListMutex;
-#endif
-
-static const iconv_t iconv_invalid = (iconv_t)-1;
-static const size_t  iconv_failed  = (size_t)-1;
-#define ICONV_CONST_CAST(a) const_cast<ICONV_CONST char *>(a)
-
-static wxArrayString   *supportedEncodings = NULL;
-static wxArrayString   *prettyEncodingList = NULL;
-static PrettyNamesHash *prettyEncodingHash = NULL;
-
-AegisubCSConv::AegisubCSConv(const wxChar *mbEncName, bool enableSubst)
-: wcCharsetName(WCHAR_T_ENCODING)
-, mbCharsetName(GetRealEncodingName(mbEncName))
-, mbNulLen(0)
-, enableSubst(enableSubst)
-, m2w(wcCharsetName, mbCharsetName)
-, w2m(mbCharsetName, wcCharsetName)
+AegisubCSConv::AegisubCSConv()
+: conv("wchar_t", "")
 {
-	if (m2w == iconv_invalid || w2m == iconv_invalid) {
-		throw wxString::Format(L"Character set %s is not supported.", mbEncName);
 }

-	if (enableSubst) {
-		invalidRepSize = FromWChar(invalidRep, sizeof(invalidRep), L"?") - GetMBNulLen();
-
-#ifndef ICONV_POSIX
-		fallbacks.data = this;
-		fallbacks.mb_to_uc_fallback = NULL;
-		fallbacks.mb_to_wc_fallback = NULL;
-		fallbacks.uc_to_mb_fallback = ucToMbFallback;
-		fallbacks.wc_to_mb_fallback = NULL;
-#endif
-	}
-}
-
-wxMBConv * AegisubCSConv::Clone() const {
-	AegisubCSConv *c = new AegisubCSConv(mbCharsetName);
-	c->mbNulLen = mbNulLen;
-	return c;
-}
-
-/// @brief Calculate the size of NUL in the target encoding via iconv
-/// @return The size in bytes of NUL
-size_t AegisubCSConv::GetMBNulLen() const {
-	if (mbNulLen == 0) {
-		const wchar_t nulStr[] = L"";
-		char outBuff[8];
-		size_t inLen  = sizeof(wchar_t);
-		size_t outLen = sizeof(outBuff);
-		char * inPtr  = (char *)nulStr;
-		char * outPtr = outBuff;
-
-		size_t res = iconv(w2m, &inPtr, &inLen, &outPtr, &outLen);
-
-		if (res != 0)
-			mbNulLen = (size_t)-1;
-		else
-			mbNulLen = sizeof(outBuff) - outLen;
-	}
-	return mbNulLen;
-}
-
-size_t AegisubCSConv::MBBuffLen(const char * str) const {
-	size_t nulLen = GetMBNulLen();
-	const char *ptr;
-	switch (nulLen) {
-		case 1:
-			return strlen(str);
-		case 2:
-			for (ptr = str; *reinterpret_cast<const uint16_t *>(ptr) != 0; ptr += 2) ;
-			return ptr - str;
-		case 4:
-			for (ptr = str; *reinterpret_cast<const uint32_t *>(ptr) != 0; ptr += 4) ;
-			return ptr - str;
-		default:
-			return (size_t)-1;
-	}
-}
-
-/// @brief Convert a string from multibyte to wide characters
-/// @param dst     Destination buffer.
-/// @param dstSize Length of destination buffer in wchar_ts
-/// @param src     Source multibyte string
-/// @param srcLen  Length of source buffer in bytes, or -1 to autodetect
-/// @return The number of wchar_ts needed to store the string in the target charset
 size_t AegisubCSConv::ToWChar(wchar_t *dst, size_t dstSize, const char *src, size_t srcLen) const {
-	return doConversion(
-		m2w,
-		reinterpret_cast<char *>(dst),
-		dstSize * sizeof(wchar_t),
-		const_cast<char *>(src),
-		srcLen == wxNO_LEN ? MBBuffLen(src) + GetMBNulLen() : srcLen
-	) / sizeof(wchar_t);
+	throw agi::charset::UnsupportedConversion("Cannot convert to local with csConvLocal");
 }

 /// @brief Convert a string from wide characters to multibyte
@ -152,309 +66,19 @@ size_t AegisubCSConv::ToWChar(wchar_t *dst, size_t dstSize, const char *src, siz
 /// @param srcLen  Length in wchar_ts of source, or -1 to autodetect
 /// @return The number of bytes needed to store the string in the target charset
 size_t AegisubCSConv::FromWChar(char *dst, size_t dstSize, const wchar_t *src, size_t srcLen) const {
-	return doConversion(
-		w2m,
-		dst,
-		dstSize,
-		reinterpret_cast<char *>(const_cast<wchar_t *>(src)),
-		(srcLen == wxNO_LEN ? wcslen(src) + 1 : srcLen) * sizeof(wchar_t)
-	);
+	try {
+		if (srcLen != (size_t)-1) {
+			if (src[srcLen - 1] == 0) srcLen -= 1;
+			srcLen *= sizeof(wchar_t);
 		}
-
-// Perform a conversion if a buffer is given or calculate the needed buffer size if not
-size_t AegisubCSConv::doConversion(iconv_t cd, char *dst, size_t dstSize, char *src, size_t srcSize) const {
-	if (dstSize > 0) {
-		return iconvWrapper(cd, &src, &srcSize, &dst, &dstSize);
+		if (dstSize == 0) {
+			return conv.RequiredBufferSize(reinterpret_cast<const char*>(src), srcLen);
 		}
-
-	// No destination given, so calculate the needed buffer size instead
-	char buff[32];
-	size_t buffSize = 32;
-	size_t charsWritten = 0;
-	size_t res;
-
-	do {
-		dst = buff;
-		dstSize = buffSize;
-		res = iconvWrapper(cd, &src, &srcSize, &dst, &dstSize);
-
-		charsWritten += dst - buff;
-	} while (res == iconv_failed && errno == E2BIG);
-
-	if (res == iconv_failed) return wxCONV_FAILED;
-	return charsWritten;
+		return conv.Convert(reinterpret_cast<const char*>(src), srcLen, dst, dstSize);
 	}
-
-// Actually perform a conversion via iconv
-size_t AegisubCSConv::iconvWrapper(iconv_t cd, char **inbuf, size_t *inbytesleft,
-                                   char **outbuf, size_t *outbytesleft) const {
-
-#if wxUSE_THREADS
-	wxMutexLocker lock(iconvMutex);
-#endif
-
-	char *outbuforig = *outbuf;
-	size_t res = iconv(cd, inbuf, inbytesleft, outbuf, outbytesleft);
-
-	if (res != iconv_failed)
-		return *outbuf - outbuforig;
-	if (!enableSubst)
-		return iconv_failed;
-
-#ifdef ICONV_POSIX
-	if (errno == EILSEQ) {
-		throw
-			L"One or more characters do not fit in the selected "
-			L"encoding and the version of iconv Aegisub was built with"
-			L" does not have useful fallbacks. For best results, "
-			L"please rebuild Aegisub using a recent version of GNU iconv.";
-	}
-	return wxCONV_FAILED;
-#else
-	// Save original errno so we can return it rather than the result from iconvctl
-	int err = errno;
-
-	// Some characters in the input string do not exist in the output encoding
-	if (res == iconv_failed && err == EILSEQ) {
-		// first try transliteration only
-		int transliterate = 1;
-		iconvctl(cd, ICONV_SET_TRANSLITERATE, &transliterate);
-		res = iconv(cd, inbuf, inbytesleft, outbuf, outbytesleft);
-		err = errno;
-		transliterate = 0;
-		iconvctl(cd, ICONV_SET_TRANSLITERATE, &transliterate);
-	}
-	if (res == iconv_failed && err == EILSEQ) {
-		// Conversion still failed with transliteration enabled, so try our substitution
-		iconvctl(cd, ICONV_SET_FALLBACKS, &fallbacks);
-		res = iconv(cd, inbuf, inbytesleft, outbuf, outbytesleft);
-		err = errno;
-		iconvctl(cd, ICONV_SET_FALLBACKS, NULL);
-	}
-	if (res == iconv_failed && err == EILSEQ) {
-		// Conversion still failed, so just drop any invalid characters
-		int discard = 1;
-		iconvctl(cd, ICONV_SET_DISCARD_ILSEQ, &discard);
-		res = iconv(cd, inbuf, inbytesleft, outbuf, outbytesleft);
-		err = errno;
-		discard = 0;
-		iconvctl(cd, ICONV_SET_DISCARD_ILSEQ, &discard);
-	}
-
-	errno = err;
-	if (res == iconv_failed) return wxCONV_FAILED;
-	return *outbuf - outbuforig;
-#endif
-}
-
-
-/// @brief GNU iconv character substitution callback
-/// @param code         Unicode character which could not be converted
-/// @param callback     Callback to tell iconv what string to use instead
-/// @param callback_arg Iconv userdata for callback
-/// @param convPtr      AegisubCSConv instance to use
-void AegisubCSConv::ucToMbFallback(
-	unsigned int code,
-	void (*callback) (const char *buf, size_t buflen, void* callback_arg),
-	void *callback_arg,
-	void *convPtr)
-{
-	// At some point in the future, this should probably switch to a real mapping
-	// For now, there's just three cases: BOM to nothing, '\' to itself
-	// (for Shift-JIS, which does not have \) and everything else to '?'
-	if (code == 0xFEFF) return;
-	if (code == 0x5C) callback("\\", 1, callback_arg);
-	else {
-		AegisubCSConv *self = static_cast<AegisubCSConv *>(convPtr);
-		callback(self->invalidRep, self->invalidRepSize, callback_arg);
+	catch (agi::charset::ConvError const&) {
+		return (size_t)-1;
 	}
 }
-
-#ifndef ICONV_POSIX
-/// @brief Callback for iconvlist
-/// @param namescount Number of names in names
-/// @param names      Names to add to the list
-/// @param data       Unused userdata field
-int addEncoding(unsigned int namescount, const char * const * names, void* data) {
-	for (unsigned int i = 0; i < namescount; i++) {
-		supportedEncodings->Add(wxString::FromAscii(names[i]));
-	}
-	return 0;
-}
-#endif
-
-wxArrayString AegisubCSConv::GetAllSupportedEncodings() {
-#if wxUSE_THREADS
-	wxMutexLocker lock(encodingListMutex);
-#endif
-	if (supportedEncodings == NULL) {
-		supportedEncodings = new wxArrayString();
-#ifndef ICONV_POSIX
-		iconvlist(addEncoding, NULL);
-		supportedEncodings->Sort();
-#endif
-	}
-	return *supportedEncodings;
-}
-
-wxString AegisubCSConv::GetRealEncodingName(wxString name) {
-	if (name.Lower() == L"local") return wxLocale::GetSystemEncodingName();
-	if (prettyEncodingList == NULL) return name;
-
-	PrettyNamesHash::iterator realName = prettyEncodingHash->find(name);
-	if (realName != prettyEncodingHash->end()) {
-		return realName->second;
-	}
-	return name;
-}
-
-wxArrayString AegisubCSConv::GetEncodingsList() {
-#if wxUSE_THREADS
-	wxMutexLocker lock(encodingListMutex);
-#endif
-	if (prettyEncodingList == NULL) {
-		struct { const char *pretty, *real; } encodingNames[] = {
-			{"Unicode (UTF-8)",                   "utf-8"},
-			{"Unicode (UTF-16)",                  "utf-16"},
-			{"Unicode (UTF-16BE)",                "utf-16be"},
-			{"Unicode (UTF-16LE)",                "utf-16le"},
-			{"Unicode (UTF-32)",                  "utf-32"},
-			{"Unicode (UTF-32BE)",                "utf-32be"},
-			{"Unicode (UTF-32LE)",                "utf-32le"},
-			{"Unicode (UTF-7)",                   "utf-7"},
-
-			{"Arabic (IBM-864)",                  "ibm864"},
-			{"Arabic (IBM-864-I)",                "ibm864i"},
-			{"Arabic (ISO-8859-6)",               "iso-8859-6"},
-			{"Arabic (ISO-8859-6-E)",             "iso-8859-6-e"},
-			{"Arabic (ISO-8859-6-I)",             "iso-8859-6-i"},
-			{"Arabic (Langbox ISO-8859-6.16)",    "x-iso-8859-6-16"},
-			{"Arabic (Langbox ISO-8859-6.8x)",    "x-iso-8859-6-8-x"},
-			{"Arabic (MacArabic)",                "x-mac-arabic"},
-			{"Arabic (Windows-1256)",             "windows-1256"},
-
-			{"Armenian (ARMSCII-8)",              "armscii-8"},
-
-			{"Baltic (ISO-8859-13)",              "iso-8859-13"},
-			{"Baltic (ISO-8859-4)",               "iso-8859-4"},
-			{"Baltic (Windows-1257)",             "windows-1257"},
-
-			{"Celtic (ISO-8859-14)",              "iso-8859-14"},
-
-			{"Central European (IBM-852)",        "ibm852"},
-			{"Central European (ISO-8859-2)",     "iso-8859-2"},
-			{"Central European (MacCE)",          "x-mac-ce"},
-			{"Central European (Windows-1250)",   "windows-1250"},
-
-			{"Chinese Simplified (GB18030)",      "gb18030"},
-			{"Chinese Simplified (GB2312)",       "gb2312"},
-			{"Chinese Simplified (GBK)",          "x-gbk"},
-			{"Chinese Simplified (HZ)",           "hz-gb-2312"},
-			{"Chinese Simplified (ISO-2022-CN)",  "iso-2022-cn"},
-			{"Chinese Traditional (Big5)",        "big5"},
-			{"Chinese Traditional (Big5-HKSCS)",  "big5-hkscs"},
-			{"Chinese Traditional (EUC-TW)",      "x-euc-tw"},
-
-			{"Croatian (MacCroatian)",            "x-mac-croatian"},
-
-			{"Cyrillic (IBM-855)",                "ibm855"},
-			{"Cyrillic (ISO-8859-5)",             "iso-8859-5"},
-			{"Cyrillic (ISO-IR-111)",             "iso-ir-111"},
-			{"Cyrillic (KOI8-R)",                 "koi8-r"},
-			{"Cyrillic (MacCyrillic)",            "x-mac-cyrillic"},
-			{"Cyrillic (Windows-1251)",           "windows-1251"},
-			{"Cyrillic/Russian (CP-866)",         "ibm866"},
-			{"Cyrillic/Ukrainian (KOI8-U)",       "koi8-u"},
-			{"Cyrillic/Ukrainian (MacUkrainian)", "x-mac-ukrainian"},
-
-			{"English (US-ASCII)",                "us-ascii"},
-
-			{"Farsi (MacFarsi)",                  "x-mac-farsi"},
-
-			{"Georgian (GEOSTD8)",                "geostd8"},
-
-			{"Greek (ISO-8859-7)",                "iso-8859-7"},
-			{"Greek (MacGreek)",                  "x-mac-greek"},
-			{"Greek (Windows-1253)",              "windows-1253"},
-
-			{"Gujarati (MacGujarati)",            "x-mac-gujarati"},
-			{"Gurmukhi (MacGurmukhi)",            "x-mac-gurmukhi"},
-
-			{"Hebrew (IBM-862)",                  "ibm862"},
-			{"Hebrew (ISO-8859-8-E)",             "iso-8859-8-e"},
-			{"Hebrew (ISO-8859-8-I)",             "iso-8859-8-i"},
-			{"Hebrew (MacHebrew)",                "x-mac-hebrew"},
-			{"Hebrew (Windows-1255)",             "windows-1255"},
-			{"Hebrew Visual (ISO-8859-8)",        "iso-8859-8"},
-
-			{"Hindi (MacDevanagari)",             "x-mac-devanagari"},
-			{"Hindi (SunDevanagari)",             "x-sun-unicode-india-0"},
-
-			{"Icelandic (MacIcelandic)",          "x-mac-icelandic"},
-
-			{"Japanese (EUC-JP)",                 "euc-jp"},
-			{"Japanese (ISO-2022-JP)",            "iso-2022-jp"},
-			{"Japanese (Shift_JIS)",              "shift_jis"},
-
-			{"Korean (EUC-KR)",                   "euc-kr"},
-			{"Korean (ISO-2022-KR)",              "iso-2022-kr"},
-			{"Korean (JOHAB)",                    "x-johab"},
-			{"Korean (UHC)",                      "x-windows-949"},
-
-			{"Nordic (ISO-8859-10)",              "iso-8859-10"},
-
-			{"Romanian (ISO-8859-16)",            "iso-8859-16"},
-			{"Romanian (MacRomanian)",            "x-mac-romanian"},
-
-			{"South European (ISO-8859-3)",       "iso-8859-3"},
-
-			{"Thai (IBM-874)",                    "ibm874"},
-			{"Thai (ISO-8859-11)",                "iso-8859-11"},
-			{"Thai (TIS-620)",                    "tis-620"},
-			{"Thai (Windows-874)",                "windows-874"},
-
-			{"Turkish (IBM-857)",                 "ibm857"},
-			{"Turkish (ISO-8859-9)",              "iso-8859-9"},
-			{"Turkish (MacTurkish)",              "x-mac-turkish"},
-			{"Turkish (Windows-1254)",            "windows-1254"},
-
-			{"Vietnamese (TCVN)",                 "x-viet-tcvn5712"},
-			{"Vietnamese (VISCII)",               "viscii"},
-			{"Vietnamese (VPS)",                  "x-viet-vps"},
-			{"Vietnamese (Windows-1258)",         "windows-1258"},
-
-			{"Western (IBM-850)",                 "ibm850"},
-			{"Western (ISO-8859-1)",              "iso-8859-1"},
-			{"Western (ISO-8859-15)",             "iso-8859-15"},
-			{"Western (MacRoman)",                "x-mac-roman"},
-			{"Western (Windows-1252)",            "windows-1252"},
-
-			{NULL,                                NULL}
-		};
-
-		PrettyNamesHash *map = new PrettyNamesHash(100);
-		wxArrayString *arr = new wxArrayString();
-		arr->Add(L"Local");
-
-		for (int i = 0; encodingNames[i].real != NULL; i++) {
-			// Verify that iconv actually supports converting to and from this encoding
-			iconv_t cd = iconv_open(encodingNames[i].real, WCHAR_T_ENCODING);
-			if (cd == iconv_invalid) continue;
-			iconv_close(cd);
-
-			cd = iconv_open(WCHAR_T_ENCODING, encodingNames[i].real);
-			if (cd == iconv_invalid) continue;
-			iconv_close(cd);
-
-			wxString pretty = wxString::FromAscii(encodingNames[i].pretty);
-			arr->Add(pretty);
-			(*map)[pretty] = wxString::FromAscii(encodingNames[i].real);
-		}
-
-		prettyEncodingList = arr;
-		prettyEncodingHash = map;
-	}
-	return *prettyEncodingList;
-}
-static AegisubCSConv localConv(L"Local", false);
-AegisubCSConv& csConvLocal(localConv);
+static AegisubCSConvImpl localConv;
+AegisubCSConv& csConvLocal = localConv;
--- a/aegisub/src/charset_conv.h
+++ b/aegisub/src/charset_conv.h
@ -35,135 +35,38 @@
 ///

 #ifndef AGI_PRE
-#include <iconv.h>
-#include <wchar.h>
-
-#include <wx/arrstr.h>
 #include <wx/string.h>
 #include <wx/strconv.h>
 #include <wx/thread.h>
 #endif

 #include "aegisub_endian.h"
-
-#if !defined(_LIBICONV_VERSION) || _LIBICONV_VERSION < 0x010A || defined(LIBICONV_PLUG)
-#define ICONV_POSIX
-#endif
-
-/// @class iconv_wrapper
-/// @brief RAII wrapper for iconv
-class iconv_wrapper {
-private:
-	iconv_t conv;
-public:
-	iconv_wrapper(const char *to, const char *from)
-	: conv(iconv_open(to, from))
-	{ }
-	iconv_wrapper(wxString const& to, wxString const& from)
-	: conv(iconv_open(to.ToAscii(), from.ToAscii()))
-	{ }
-	iconv_wrapper(const char *to, wxString const& from)
-	: conv(iconv_open(to, from.ToAscii()))
-	{ }
-	iconv_wrapper(wxString const& to, const char *from)
-	: conv(iconv_open(to.ToAscii(), from))
-	{ }
-	~iconv_wrapper() {
-		if (conv != (iconv_t)-1) iconv_close(conv);
-	}
-	operator iconv_t() {
-		return conv;
-	}
-	operator const iconv_t() const {
-		return conv;
-	}
-};
+#include <libaegisub/charset_conv.h>

 /// @class AegisubCSConv
 /// @brief wxMBConv implementation for converting to and from unicode
 class AegisubCSConv : public wxMBConv {
 public:
-	/// @param mbEncName   Multibyte encoding to convert to/from
-	/// @param enableSubst Whether to substitute characters when needed.
-	/// By default, any conversion that would be lossy will fail
-	/// When enableSubst is true, conversions to multibyte with a sufficiently
-	/// large buffer are guaranteed to succeed, with characters dropped or
-	/// changed as needed to fit the string into the target encoding.
-	AegisubCSConv(const wxChar *mbEncName, bool enableSubst = false);

 	// wxMBConv implementation; see strconv.h for usage details
 	size_t ToWChar(wchar_t *dst, size_t dstLen, const char *src, size_t srcLen = wxNO_LEN) const;
 	size_t FromWChar(char *dst, size_t dstLen, const wchar_t *src, size_t srcLen = wxNO_LEN) const;
-	size_t GetMBNulLen() const;
-	wxMBConv *Clone() const;
-
-	/// @brief Multibyte-aware strlen
-	/// @return Length in bytes of str (excluding terminator)
-	size_t MBBuffLen(const char *str) const;
-
-	/// @brief Get a list of support encodings with user-friendly names
-	static wxArrayString GetEncodingsList();
-	/// @brief Get a list of all encodings supported by iconv
-	/// Requires GNU iconv for useful results
-	static wxArrayString GetAllSupportedEncodings();
-	/// @brief Map a user-friendly encoding name to the real encoding name
-	static wxString GetRealEncodingName(wxString name);
+	wxMBConv *Clone() const { return NULL; };

+protected:
+	AegisubCSConv();
 private:
-	// The smattering of mutable variables here are due to that ToWChar and
-	// FromWChar are const in wxMBConv, but we require minor mutation for
-	// things like locks (as iconv is not thread-safe)
-	wxString wcCharsetName;
-	wxString mbCharsetName;
-	mutable size_t mbNulLen;
-	bool enableSubst;
-
-	size_t doConversion(iconv_t cd, char *dst, size_t dstSize, char *src, size_t srcSize) const;
-	size_t iconvWrapper(iconv_t cd, char **inbuf, size_t *inbytesleft, char **outbuf, size_t *outbytesleft) const;
-
-	static void ucToMbFallback(
-		unsigned int code,
-		void (*callback) (const char *buf, size_t buflen, void* callback_arg),
-		void *callback_arg,
-		void *convPtr);
-
-	/// Replacement character for characters which do not fit in the target
-	/// encoding and iconv does not have an appropriate substitute for
-	char invalidRep[8];
-	size_t invalidRepSize;
-
-#ifndef ICONV_POSIX
-	mutable iconv_fallbacks fallbacks;
-#endif
+	AegisubCSConv(const AegisubCSConv&);
+	AegisubCSConv& operator=(const AegisubCSConv&);
+	wxString localCharset;

 #if wxUSE_THREADS
 	mutable wxMutex iconvMutex;
 #endif

-protected:
-	iconv_wrapper m2w, w2m;
+	// ToWChar and FromWChar are const in wxMBConv, but iconv can't be used
+	// immutably
+	mutable agi::charset::IconvWrapper conv;
 };

-// Predefined conversion for the current locale, intended to be a drop-in
-// replacement for wxConvLocal
 extern AegisubCSConv& csConvLocal;
-
-#ifdef HAVE_BIG_ENDIAN
-#	if SIZEOF_WCHAR_T == 4
-#		define WCHAR_T_ENCODING "UTF-32BE"
-#	elif SIZEOF_WCHAR_T == 2
-#		define WCHAR_T_ENCODING "UTF-16BE"
-#	endif
-#elif defined(HAVE_LITTLE_ENDIAN)
-#	if SIZEOF_WCHAR_T == 4
-#		define WCHAR_T_ENCODING "UTF-32LE"
-#	elif SIZEOF_WCHAR_T == 2
-#		define WCHAR_T_ENCODING "UTF-16LE"
-#	endif
-#else
-#	if SIZEOF_WCHAR_T == 4
-#		define WCHAR_T_ENCODING ((Endian::MachineToBig((uint32_t)1) == 1) ? "UTF-32BE" : "UTF-32LE")
-#	elif SIZEOF_WCHAR_T == 2
-#		define WCHAR_T_ENCODING ((Endian::MachineToBig((uint32_t)1) == 1) ? "UTF-16BE" : "UTF-16LE")
-#	endif
-#endif
--- a/aegisub/src/compat.cpp
+++ b/aegisub/src/compat.cpp
@ -7,7 +7,7 @@ wxArrayString lagi_MRU_wxAS(const wxString &list) {
 	const agi::MRUManager::MRUListMap *map_list = AegisubApp::Get()->mru->Get(STD_STR(list));

 	for (agi::MRUManager::MRUListMap::const_iterator i_lst = map_list->begin(); i_lst != map_list->end(); ++i_lst) {
-		work.Add(wxString(i_lst->second));
+		work.Add(wxString(i_lst->second.c_str(), wxConvUTF8));
 	}

 	return work;
--- a/aegisub/src/compat.h
+++ b/aegisub/src/compat.h
@ -8,8 +8,8 @@

 #include <libaegisub/colour.h>

-#define STD_STR(x) std::string(x.mb_str())
+#define STD_STR(x) std::string(x.utf8_str())

 inline wxColour lagi_wxColour(const agi::Colour &colour) { return wxColour(colour); }
-inline wxString lagi_wxString(const std::string &str) { return wxString(str); }
+inline wxString lagi_wxString(const std::string &str) { return wxString(str.c_str(), wxConvUTF8); }
 wxArrayString lagi_MRU_wxAS(const wxString &list);
--- a/aegisub/src/dialog_export.cpp
+++ b/aegisub/src/dialog_export.cpp
@ -34,9 +34,6 @@
 /// @ingroup export
 ///

-
-///////////
-// Headers
 #include "config.h"

 #ifndef AGI_PRE
@ -102,7 +99,7 @@ DialogExport::DialogExport (wxWindow *parent)

 	// Charset dropdown list
 	wxStaticText *charset_list_label = new wxStaticText(this, -1, _("Text encoding:"));
-	CharsetList = new wxChoice(this, Charset_List_Box, wxDefaultPosition, wxDefaultSize, AegisubCSConv::GetEncodingsList());
+	CharsetList = new wxChoice(this, Charset_List_Box, wxDefaultPosition, wxDefaultSize, agi::charset::GetEncodingsList<wxArrayString>());
 	wxSizer *charset_list_sizer = new wxBoxSizer(wxHORIZONTAL);
 	charset_list_sizer->Add(charset_list_label, 0, wxALIGN_CENTER | wxRIGHT, 5);
 	charset_list_sizer->Add(CharsetList, 1, wxEXPAND);
@ -219,6 +216,9 @@ void DialogExport::OnProcess(wxCommandEvent &event) {
 		wxString err(error);
 		wxMessageBox(err, _T("Error exporting subtitles"), wxOK | wxICON_ERROR, this);
 	}
+	catch (const agi::charset::ConvError& err) {
+		wxMessageBox(err.GetMessage(), _T("Error exporting subtitles"), wxOK | wxICON_ERROR, this);
+	}
 	catch (...) {
 		wxMessageBox(_T("Unknown error"), _T("Error exporting subtitles"), wxOK | wxICON_ERROR, this);
 	}
--- a/aegisub/src/frame_main.cpp
+++ b/aegisub/src/frame_main.cpp
@ -713,8 +713,7 @@ void FrameMain::LoadSubtitles (wxString filename,wxString charset) {
 			// Make sure that file isn't actually a timecode file
 			try {
 				TextFileReader testSubs(filename,charset);
-				charset = testSubs.GetCurrentEncoding();
-				isBinary = charset == _T("binary");
+				isBinary = testSubs.IsBinary();
 				if (!isBinary && testSubs.HasMoreLines()) {
 					wxString cur = testSubs.ReadLineFromFile();
 					if (cur.Left(10) == _T("# timecode")) {
@ -817,8 +816,7 @@ bool FrameMain::SaveSubtitles(bool saveas,bool withCharset) {
 		// Get charset
 		wxString charset = _T("");
 		if (withCharset) {
-			wxArrayString choices = AegisubCSConv::GetEncodingsList();
-			charset = wxGetSingleChoice(_("Choose charset code:"), _T("Charset"),choices,this,-1, -1,true,250,200);
+			charset = wxGetSingleChoice(_("Choose charset code:"), _T("Charset"),agi::charset::GetEncodingsList<wxArrayString>(),this,-1, -1,true,250,200);
 			if (charset.IsEmpty()) return false;
 		}

--- a/aegisub/src/frame_main_events.cpp
+++ b/aegisub/src/frame_main_events.cpp
@ -538,7 +538,7 @@ int FrameMain::AddMacroMenuItems(wxMenu *menu, const std::vector<Automation4::Fe
 ///
 void FrameMain::OnOpenRecentSubs(wxCommandEvent &event) {
 	int number = event.GetId()-Menu_File_Recent;
-	LoadSubtitles(AegisubApp::Get()->mru->GetEntry("Subtitle", number));
+	LoadSubtitles(lagi_wxString(AegisubApp::Get()->mru->GetEntry("Subtitle", number)));
 }


@ -548,7 +548,7 @@ void FrameMain::OnOpenRecentSubs(wxCommandEvent &event) {
 ///
 void FrameMain::OnOpenRecentVideo(wxCommandEvent &event) {
 	int number = event.GetId()-Menu_Video_Recent;
-	LoadVideo(AegisubApp::Get()->mru->GetEntry("Video", number));
+	LoadVideo(lagi_wxString(AegisubApp::Get()->mru->GetEntry("Video", number)));
 }


@ -558,7 +558,7 @@ void FrameMain::OnOpenRecentVideo(wxCommandEvent &event) {
 ///
 void FrameMain::OnOpenRecentTimecodes(wxCommandEvent &event) {
 	int number = event.GetId()-Menu_Timecodes_Recent;
-	LoadVFR(AegisubApp::Get()->mru->GetEntry("Timecodes", number));
+	LoadVFR(lagi_wxString(AegisubApp::Get()->mru->GetEntry("Timecodes", number)));
 }


@ -568,7 +568,7 @@ void FrameMain::OnOpenRecentTimecodes(wxCommandEvent &event) {
 ///
 void FrameMain::OnOpenRecentKeyframes(wxCommandEvent &event) {
 	int number = event.GetId()-Menu_Keyframes_Recent;
-	KeyFrameFile::Load(AegisubApp::Get()->mru->GetEntry("Keyframes", number));
+	KeyFrameFile::Load(lagi_wxString(AegisubApp::Get()->mru->GetEntry("Keyframes", number)));
 	videoBox->videoSlider->Refresh();
 	audioBox->audioDisplay->Update();
 	Refresh();
@ -581,7 +581,7 @@ void FrameMain::OnOpenRecentKeyframes(wxCommandEvent &event) {
 ///
 void FrameMain::OnOpenRecentAudio(wxCommandEvent &event) {
 	int number = event.GetId()-Menu_Audio_Recent;
-	LoadSubtitles(AegisubApp::Get()->mru->GetEntry("Audio", number));
+	LoadAudio(lagi_wxString(AegisubApp::Get()->mru->GetEntry("Audio", number)));
 }


@ -805,13 +805,12 @@ void FrameMain::OnOpenSubtitles(wxCommandEvent& WXUNUSED(event)) {
 ///
 void FrameMain::OnOpenSubtitlesCharset(wxCommandEvent& WXUNUSED(event)) {
 	// Initialize charsets
-	wxArrayString choices = AegisubCSConv::GetEncodingsList();
 	wxString path = lagi_wxString(OPT_GET("Path/Last/Subtitles")->GetString());

 	// Get options and load
 	wxString filename = wxFileSelector(_("Open subtitles file"),path,_T(""),_T(""),AssFile::GetWildcardList(0),wxFD_OPEN | wxFD_FILE_MUST_EXIST);
 	if (!filename.empty()) {
-		wxString charset = wxGetSingleChoice(_("Choose charset code:"), _("Charset"),choices,this,-1, -1,true,250,200);
+		wxString charset = wxGetSingleChoice(_("Choose charset code:"), _("Charset"),agi::charset::GetEncodingsList<wxArrayString>(),this,-1, -1,true,250,200);
 		if (!charset.empty()) {
 			LoadSubtitles(filename,charset);
 		}
--- a/aegisub/src/hotkeys.cpp
+++ b/aegisub/src/hotkeys.cpp
@ -300,7 +300,7 @@ void HotkeyManager::Load() {
 	TextFileReader file(filename);
 	wxString header;
 	try {
-		if (file.GetCurrentEncoding() != _T("binary"))
+		if (!file.IsBinary())
 			header = file.ReadLineFromFile();
 	}
 	catch (wxString e) {
--- a/aegisub/src/main.cpp
+++ b/aegisub/src/main.cpp
@ -263,6 +263,10 @@ emit_stdout->Enable();
 		wxMessageBox(err,_T("Fatal error while initializing"));
 		return false;
 	}
+	catch (agi::Exception const& e) {
+		wxMessageBox(e.GetMessage(),_T("Fatal error while initializing"));
+		return false;
+	}

 	catch (...) {
 		wxMessageBox(_T("Unhandled exception"),_T("Fatal error while initializing"));
--- a/aegisub/src/preferences.cpp
+++ b/aegisub/src/preferences.cpp
@ -29,6 +29,7 @@
 #include <libaegisub/exception.h>

 #include "colour_button.h"
+#include "compat.h"
 #include "libresrc/libresrc.h"
 #include "preferences.h"
 #include "main.h"
@ -172,7 +173,7 @@ void Preferences::OptionAdd(wxPanel *parent, wxFlexGridSizer *flex, const wxStri

 		case agi::OptionValue::Type_String: {
 			flex->Add(new wxStaticText(parent, wxID_ANY, name), 1, wxALIGN_CENTRE_VERTICAL);
-			wxTextCtrl *text = new wxTextCtrl(parent, wxID_ANY , opt->GetString(), wxDefaultPosition, wxDefaultSize);
+			wxTextCtrl *text = new wxTextCtrl(parent, wxID_ANY , lagi_wxString(opt->GetString()), wxDefaultPosition, wxDefaultSize);
 			flex->Add(text, 1, wxEXPAND);
 			break;
 		}
--- a/aegisub/src/spellchecker_hunspell.cpp
+++ b/aegisub/src/spellchecker_hunspell.cpp
@ -59,6 +59,8 @@
 #include "options.h"
 #include "spellchecker_hunspell.h"
 #include "standard_paths.h"
+#include "text_file_reader.h"
+#include "text_file_writer.h"
 #include "utils.h"


@ -66,6 +68,7 @@
 HunspellSpellChecker::HunspellSpellChecker() {
 	hunspell = NULL;
 	conv = NULL;
+	rconv = NULL;
 	SetLanguage(lagi_wxString(OPT_GET("Tool/Spell Checker/Language")->GetString()));
 }

@ -84,6 +87,8 @@ void HunspellSpellChecker::Reset() {
 	hunspell = NULL;
 	delete conv;
 	conv = NULL;
+	delete rconv;
+	rconv = NULL;
 	affpath.Clear();
 	dicpath.Clear();
 }
@ -96,8 +101,13 @@ void HunspellSpellChecker::Reset() {
 ///
 bool HunspellSpellChecker::CanAddWord(wxString word) {
 	if (!hunspell) return false;
-	wxCharBuffer buffer = word.mb_str(*conv);
-	return (buffer.data() != NULL);
+	try {
+		conv->Convert(word);
+		return true;
+	}
+	catch (agi::charset::ConvError const&) {
+		return false;
+	}
 }


@ -111,9 +121,9 @@ void HunspellSpellChecker::AddWord(wxString word) {

 	// Add to currently loaded file
 #ifdef WITH_OLD_HUNSPELL
-	hunspell->put_word(word.mb_str(*conv));
+	hunspell->put_word(conv->Convert(word).c_str());
 #else
-	hunspell->add(word.mb_str(*conv));
+	hunspell->add(conv->Convert(word).c_str());
 #endif

 	// Ensure that the path exists
@ -124,22 +134,14 @@ void HunspellSpellChecker::AddWord(wxString word) {

 	// Load dictionary
 	wxArrayString dic;
-	wxString curLine;
 	bool added = false;
 	if (fn.FileExists()) {	// Even if you ever want to remove this "if", keep the braces, so the stream closes at the end
 		bool first = true;
-		wxFileInputStream in(usrdicpath);
-		if (!in.IsOk()) return;
-		wxTextInputStream textIn(in,_T(" \t"),*conv);
-
-		// Read it
-		while (in.CanRead() && !in.Eof()) {
-			// Read line
-			curLine = textIn.ReadLine();
-			curLine.Trim();
+		TextFileReader reader(usrdicpath, L"UTF-8");
+		while (reader.HasMoreLines()) {
+			wxString curLine = reader.ReadLineFromFile();
 			if (curLine.IsEmpty()) continue;

-			// First
 			if (first) {
 				first = false;
 				if (curLine.IsNumber()) continue;
@ -160,11 +162,14 @@ void HunspellSpellChecker::AddWord(wxString word) {
 	if (!added) dic.Add(word);

 	// Write back to disk
-	wxFileOutputStream out(usrdicpath);
-	if (!out.IsOk()) return;
-	wxTextOutputStream textOut(out,wxEOL_UNIX,*conv);
-	textOut.WriteString(wxString::Format(_T("%i"),dic.Count())+_T("\n"));
-	for (unsigned int i=0;i<dic.Count();i++) textOut.WriteString(dic[i]+_T("\n"));
+	try {
+		TextFileWriter writer(usrdicpath, L"UTF-8");
+		writer.WriteLineToFile(wxString::Format(L"%i", dic.Count()));
+		for (unsigned int i=0;i<dic.Count();i++) writer.WriteLineToFile(dic[i]);
+	}
+	catch (const wchar_t*) {
+		// Failed to open file
+	}
 }


@ -175,10 +180,13 @@ void HunspellSpellChecker::AddWord(wxString word) {
 ///
 bool HunspellSpellChecker::CheckWord(wxString word) {
 	if (!hunspell) return true;
-	wxCharBuffer buf = word.mb_str(*conv);
-	if (buf) return (hunspell->spell(buf) == 1);
+	try {
+		return hunspell->spell(conv->Convert(word).c_str()) == 1;
+	}
+	catch (agi::charset::ConvError const&) {
 		return false;
 	}
+}



@ -187,31 +195,26 @@ bool HunspellSpellChecker::CheckWord(wxString word) {
 /// @return List of suggestions
 ///
 wxArrayString HunspellSpellChecker::GetSuggestions(wxString word) {
-	// Array
 	wxArrayString suggestions;
+	if (!hunspell) return suggestions;

-	// Get suggestions
-	if (hunspell) {
-		// Word
-		wxCharBuffer buf = word.mb_str(*conv);
-		if (!buf) return suggestions;
-
+	try {
 		// Grab raw from Hunspell
 		char **results;
-		int n = hunspell->suggest(&results,buf);
+		int n = hunspell->suggest(&results,conv->Convert(word).c_str());

 		// Convert each
 		for (int i=0;i<n;i++) {
-			wxString current(results[i],*conv);
-			suggestions.Add(current);
+			suggestions.Add(rconv->Convert(results[i]));
 			delete results[i];
 		}

-		// Delete
 		delete results;
 	}
+	catch (agi::charset::ConvError const&) {
+		return suggestions;
+	}

-	// Return them
 	return suggestions;
 }

@ -279,25 +282,23 @@ void HunspellSpellChecker::SetLanguage(wxString language) {
 	hunspell = new Hunspell(affpath.mb_str(csConvLocal),dicpath.mb_str(csConvLocal));
 	conv = NULL;
 	if (hunspell) {
-		conv = new AegisubCSConv(wxString(hunspell->get_dic_encoding(),wxConvUTF8));
-
-		// Load user dictionary
-		if (wxFileExists(usrdicpath)) {
-			wxFileInputStream in(usrdicpath);
-			if (!in.IsOk()) return;
-			wxTextInputStream textIn(in,_T(" \t"),*conv);
-			while (in.CanRead() && !in.Eof()) {
-				// Read line
-				wxString curLine = textIn.ReadLine();
-				curLine.Trim();
+		conv  = new agi::charset::IconvWrapper("wchar_t", hunspell->get_dic_encoding());
+		rconv = new agi::charset::IconvWrapper(hunspell->get_dic_encoding(), "wchar_t");
+		try {
+			TextFileReader reader(usrdicpath, L"UTF-8");
+			while (reader.HasMoreLines()) {
+				wxString curLine = reader.ReadLineFromFile();
 				if (curLine.IsEmpty() || curLine.IsNumber()) continue;
 #ifdef WITH_OLD_HUNSPELL
-				hunspell->put_word(curLine.mb_str(*conv));
+				hunspell->put_word(conv->Convert(curLine).c_str());
 #else
-				hunspell->add(curLine.mb_str(*conv));
+				hunspell->add(conv->Convert(curLine).c_str());
 #endif
 			}
 		}
+		catch (const wchar_t *) {
+			// file not found
+		}
 	}
 }

--- a/aegisub/src/spellchecker_hunspell.h
+++ b/aegisub/src/spellchecker_hunspell.h
@ -43,6 +43,11 @@
 #include <hunspell/hunspell.hxx>

 #include "include/aegisub/spellchecker.h"
+namespace agi {
+	namespace charset {
+		class IconvWrapper;
+	}
+}


 /// @class HunspellSpellChecker
@ -55,7 +60,8 @@ private:
 	Hunspell *hunspell;

 	/// Conversion buffer
-	wxMBConv *conv;
+	agi::charset::IconvWrapper *conv;
+	agi::charset::IconvWrapper *rconv;

 	/// Path to .aff file
 	wxString affpath;
--- a/aegisub/src/text_file_reader.cpp
+++ b/aegisub/src/text_file_reader.cpp
@ -51,8 +51,15 @@
 #include "charset_detect.h"
 #include "text_file_reader.h"

-TextFileReader::TextFileReader(wxString filename, wxString enc, bool trim)
-: encoding(enc), conv((iconv_t)-1), trim(trim), readComplete(false), currout(0), outptr(0), currentLine(0) {
+TextFileReader::TextFileReader(wxString const& filename, wxString encoding, bool trim)
+: isBinary(false)
+, conv()
+, trim(trim)
+, readComplete(false)
+, currout(0)
+, outptr(0)
+, currentLine(0)
+{
 #ifdef __WINDOWS__
 	file.open(filename.wc_str(),std::ios::in | std::ios::binary);
 #else
@ -61,16 +68,14 @@ TextFileReader::TextFileReader(wxString filename, wxString enc, bool trim)
 	if (!file.is_open()) throw L"Failed opening file for reading.";

 	if (encoding.IsEmpty()) encoding = CharSetDetect::GetEncoding(filename);
-	if (encoding == L"binary") return;
-	encoding = AegisubCSConv::GetRealEncodingName(encoding);
-	conv = iconv_open(WCHAR_T_ENCODING, encoding.ToAscii());
-	if (conv == (iconv_t)-1) {
-		throw wxString::Format(L"Character set '%s' is not supported.", enc.c_str());
+	if (encoding == L"binary") {
+		isBinary = true;
+		return;
 	}
+	conv.reset(new agi::charset::IconvWrapper(encoding.c_str(), "wchar_t"));
 }

 TextFileReader::~TextFileReader() {
-	if (conv != (iconv_t)-1) iconv_close(conv);
 }

 wchar_t TextFileReader::GetWChar() {
@ -98,7 +103,8 @@ wchar_t TextFileReader::GetWChar() {
 		return 0;

 	do {
-		size_t ret = iconv(conv, &inptr, &inbytesleft, reinterpret_cast<char **>(&outptr), &outbytesleft);
+		// Without this const_cast the wrong overload is chosen
+		size_t ret = conv->Convert(const_cast<const char**>(&inptr), &inbytesleft, reinterpret_cast<char **>(&outptr), &outbytesleft);
 		if (ret != (size_t)-1) break;

 		int err = errno;
@ -144,7 +150,6 @@ wxString TextFileReader::ReadLineFromFile() {
 	if (ch == 0)
 		readComplete = true;

-	// Trim
 	if (trim) {
 		buffer.Trim(true);
 		buffer.Trim(false);
@ -155,7 +160,3 @@ wxString TextFileReader::ReadLineFromFile() {
 bool TextFileReader::HasMoreLines() {
 	return !readComplete;
 }
-
-wxString TextFileReader::GetCurrentEncoding() {
-	return encoding;
-}
--- a/aegisub/src/text_file_reader.h
+++ b/aegisub/src/text_file_reader.h
@ -38,21 +38,23 @@

 #ifndef AGI_PRE
 #include <fstream>
-
-#include <iconv.h>
+#include <memory>

 #include <wx/dynarray.h>
 #include <wx/string.h>
 #endif

+namespace agi { namespace charset {
+	class IconvWrapper;
+} }
+
 /// @class TextFileReader
 /// @brief A line-based text file reader
 class TextFileReader {
 private:
-	/// Encoding of the file being read
-	wxString encoding;
+	bool isBinary;
 	std::ifstream file;
-	iconv_t conv;
+	std::auto_ptr<agi::charset::IconvWrapper> conv;
 	bool trim;
 	bool readComplete;

@ -76,7 +78,7 @@ public:
 	/// @param filename File to open
 	/// @param enc      Encoding to use, or empty to autodetect
 	/// @param trim     Whether to trim whitespace from lines read
-	TextFileReader(wxString filename,wxString encoding=L"", bool trim=true);
+	TextFileReader(wxString const& filename,wxString encoding=L"", bool trim=true);
 	/// @brief Destructor
 	~TextFileReader();

@ -85,8 +87,5 @@ public:
 	wxString ReadLineFromFile();
 	/// @brief Check if there are any more lines to read
 	bool HasMoreLines();
-
-	/// @brief Get the file encoding used by this reader
-	/// @return "unknown", "binary", or a character encoding name
-	wxString GetCurrentEncoding();
+	bool IsBinary() { return isBinary; }
 };
--- a/aegisub/src/text_file_writer.cpp
+++ b/aegisub/src/text_file_writer.cpp
@ -51,7 +51,7 @@
 /// @param filename 
 /// @param encoding 
 ///
-TextFileWriter::TextFileWriter(wxString filename, wxString encoding)
+TextFileWriter::TextFileWriter(wxString const& filename, wxString encoding)
 : conv() {
 #ifdef WIN32
 	file.open(filename.wc_str(),std::ios::out | std::ios::binary | std::ios::trunc);
@ -59,17 +59,17 @@ TextFileWriter::TextFileWriter(wxString filename, wxString encoding)
 	file.open(wxFNCONV(filename),std::ios::out | std::ios::binary | std::ios::trunc);
 #endif
 	if (!file.is_open()) {
-		throw _T("Failed opening file for writing.");
+		throw L"Failed opening file for writing.";
 	}

-	if (encoding.IsEmpty()) encoding = lagi_wxString(OPT_GET("App/Save Charset")->GetString());
-	conv.reset(new AegisubCSConv(encoding, true));
+	if (encoding.empty()) encoding = lagi_wxString(OPT_GET("App/Save Charset")->GetString());
+	conv.reset(new agi::charset::IconvWrapper("utf-8", encoding.c_str(), true));

 	// Write the BOM
 	try {
-		WriteLineToFile(_T("\uFEFF"), false);
+		WriteLineToFile(L"\uFEFF", false);
 	}
-	catch (wxString ignore) {
+	catch (agi::charset::ConversionFailure&) {
 		// If the BOM could not be converted to the target encoding it isn't needed
 	}
 }
@ -85,14 +85,11 @@ TextFileWriter::~TextFileWriter() {
 /// @brief DOCME
 /// @param line         
 /// @param addLineBreak 
-///
 void TextFileWriter::WriteLineToFile(wxString line, bool addLineBreak) {
-	wxString temp = line;
-	if (addLineBreak) temp += _T("\r\n");
+	if (addLineBreak) line += L"\n";

-	wxCharBuffer buf = temp.mb_str(*conv);
-	if (buf.data())
-		file.write(buf.data(), conv->MBBuffLen(buf.data()));
+	std::string buf = conv->Convert(line.utf8_str().data());
+	file.write(buf.data(), buf.size());
 }


--- a/aegisub/src/text_file_writer.h
+++ b/aegisub/src/text_file_writer.h
@ -43,8 +43,11 @@
 #include <wx/string.h>
 #endif

-
-class AegisubCSConv;
+namespace agi {
+	namespace charset {
+		class IconvWrapper;
+	}
+}


 /// DOCME
@ -59,13 +62,13 @@ private:
 	std::ofstream file;

 	/// DOCME
-	std::auto_ptr<AegisubCSConv> conv;
+	std::auto_ptr<agi::charset::IconvWrapper> conv;

 	TextFileWriter(const TextFileWriter&);
 	TextFileWriter& operator=(const TextFileWriter&);

 public:
-	TextFileWriter(wxString filename, wxString encoding=_T(""));
+	TextFileWriter(wxString const& filename, wxString encoding="");
 	~TextFileWriter();

 	void WriteLineToFile(wxString line, bool addLineBreak=true);
--- a/aegisub/src/video_provider_manager.cpp
+++ b/aegisub/src/video_provider_manager.cpp
@ -69,7 +69,7 @@ VideoProvider *VideoProviderFactoryManager::GetProvider(wxString video) {
 	}

 	try {
-		VideoProvider *y4m_provider = new YUV4MPEGVideoProvider(video.wc_str());
+		VideoProvider *y4m_provider = new YUV4MPEGVideoProvider(video);
 		if (y4m_provider)
 			y4m_provider = new VideoProviderCache(y4m_provider);
 		return y4m_provider;
@ -92,7 +92,7 @@ VideoProvider *VideoProviderFactoryManager::GetProvider(wxString video) {
 	for (unsigned int i=0;i<list.Count();i++) {
 		try {
 			// Create provider
-			VideoProvider *provider = GetFactory(list[i])->CreateProvider(video.wc_str());
+			VideoProvider *provider = GetFactory(list[i])->CreateProvider(video);
 			if (provider) {
 				// Cache if necessary
 				if (provider->WantsCaching()) {
--- a/aegisub/tests/Makefile.am
+++ b/aegisub/tests/Makefile.am
@ -12,6 +12,7 @@ run_SOURCES = \
 		util_unix.cpp \
 		libaegisub_access.cpp \
 		libaegisub_cajun.cpp \
+		libaegisub_iconv.cpp \
 		libaegisub_util.cpp \
 		libaegisub_mru.cpp
 		
--- a/aegisub/tests/libaegisub_iconv.cpp
+++ b/aegisub/tests/libaegisub_iconv.cpp
@ -0,0 +1,138 @@
+// Copyright (c) 2010, Thomas Goyne <plorkyeran@aegisub.org>
+//
+// Permission to use, copy, modify, and distribute this software for any
+// purpose with or without fee is hereby granted, provided that the above
+// copyright notice and this permission notice appear in all copies.
+//
+// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+//
+// $Id$
+
+/// @file libaegisub_iconv.cpp
+/// @brief agi::charset
+/// @ingroup iconv
+
+#include <stdint.h>
+#include <libaegisub/charset_conv.h>
+
+#include "main.h"
+#include "util.h"
+
+using namespace agi::charset;
+
+TEST(lagi_iconv, BasicSetup) {
+	EXPECT_NO_THROW(IconvWrapper("UTF-8", "UTF-16LE"));
+}
+
+TEST(lagi_iconv, InvalidConversions) {
+	EXPECT_THROW(IconvWrapper("nonexistent charset", "UTF-16LE"), UnsupportedConversion);
+	EXPECT_THROW(IconvWrapper("UTF-16LE", "nonexistent charset"), UnsupportedConversion);
+	EXPECT_THROW(IconvWrapper("nonexistent charset", "nonexistent charset"), UnsupportedConversion);
+}
+
+TEST(lagi_iconv, StrLen1) {
+	IconvWrapper conv("UTF-8", "UTF-8", false);
+	for (int i = 0; i < 10; i++) {
+		std::string str(i, ' ');
+		ASSERT_EQ(i, conv.SrcStrLen(str.c_str()));
+		ASSERT_EQ(i, conv.DstStrLen(str.c_str()));
+	}
+}
+TEST(lagi_iconv, StrLen2) {
+	IconvWrapper conv("UTF-16LE", "UTF-16LE", false);
+	for (int i = 0; i < 10; i++) {
+		std::basic_string<int16_t> str(i, ' ');
+		ASSERT_EQ(2*i, conv.SrcStrLen((const char *)str.c_str()));
+		ASSERT_EQ(2*i, conv.DstStrLen((const char *)str.c_str()));
+	}
+}
+TEST(lagi_iconv, StrLen4) {
+	IconvWrapper conv("UTF-32LE", "UTF-32LE", false);
+	for (int i = 0; i < 10; i++) {
+		std::basic_string<int32_t> str(i, ' ');
+		ASSERT_EQ(4*i, conv.SrcStrLen((const char *)str.c_str()));
+		ASSERT_EQ(4*i, conv.DstStrLen((const char *)str.c_str()));
+	}
+}
+
+TEST(lagi_iconv, Fallbacks) {
+	IconvWrapper nofallback("UTF-8", "Shift-JIS", false);
+	IconvWrapper fallback("UTF-8", "Shift-JIS", true);
+	IconvWrapper noneneeded("UTF-8", "UTF-16LE", false);
+
+	// Shift-JIS does not have a backslash
+	EXPECT_THROW(nofallback.Convert("\\"), BadOutput);
+	ASSERT_NO_THROW(fallback.Convert("\\"));
+	EXPECT_EQ("\\", fallback.Convert("\\"));
+	EXPECT_NO_THROW(noneneeded.Convert("\\"));
+
+	// BOM into non-unicode
+	char bom[] = "\xEF\xBB\xBF";
+	EXPECT_THROW(nofallback.Convert(bom), BadOutput);
+	ASSERT_NO_THROW(fallback.Convert(bom));
+	EXPECT_EQ("", fallback.Convert(bom));
+	EXPECT_NO_THROW(noneneeded.Convert(bom));
+
+	// A snowman (U+2603)
+	char snowman[] = "\xE2\x98\x83";
+	EXPECT_THROW(nofallback.Convert(snowman), BadOutput);
+	EXPECT_NO_THROW(noneneeded.Convert(snowman));
+	ASSERT_NO_THROW(fallback.Convert(snowman));
+	EXPECT_EQ("?", fallback.Convert(snowman));
+}
+
+TEST(lagi_iconv, BadInput) {
+	IconvWrapper utf16("UTF-16LE", "UTF-8");
+	EXPECT_THROW(utf16.Convert(" "), BadInput);
+	IconvWrapper utf8("UTF-8", "UTF-16LE");
+	EXPECT_THROW(utf8.Convert("\xE2\xFF"), BadInput);
+}
+
+TEST(lagi_iconv, Conversions) {
+	IconvWrapper utf16le("UTF-16LE", "UTF-8", false);
+	IconvWrapper utf16be("UTF-16BE", "UTF-8", false);
+	IconvWrapper utf8("UTF-8", "UTF-16LE", false);
+
+	char space_utf8_[] = " ";
+	char space_utf16be_[] = {0, 32, 0, 0};
+	char space_utf16le_[] = {32, 0, 0, 0};
+	std::string space_utf8(space_utf8_);
+	std::string space_utf16be(space_utf16be_, 2);
+	std::string space_utf16le(space_utf16le_, 2);
+
+	EXPECT_EQ(space_utf8, utf16le.Convert(space_utf16le));
+	EXPECT_EQ(space_utf8, utf16be.Convert(space_utf16be));
+	EXPECT_EQ(space_utf16le, utf8.Convert(space_utf8));
+}
+
+// Basic overflow tests
+TEST(lagi_iconv, Buffer) {
+	IconvWrapper conv("UTF-8", "UTF-16LE", false);
+	char buff[32];
+	memset(buff, 0xFF, sizeof(buff));
+
+	EXPECT_THROW(conv.Convert("", 1, buff, 0), BufferTooSmall);
+	EXPECT_EQ('\xFF', buff[0]);
+	EXPECT_THROW(conv.Convert("", 1, buff, 1), BufferTooSmall);
+	EXPECT_EQ('\xFF', buff[0]);
+	EXPECT_NO_THROW(conv.Convert("", 1, buff, 2));
+	EXPECT_EQ('\0', buff[0]);
+	EXPECT_EQ('\0', buff[1]);
+	EXPECT_EQ('\xFF', buff[2]);
+}
+
+TEST(lagi_iconv, LocalSupport) {
+	ASSERT_NO_THROW(IconvWrapper("UTF-8", ""));
+	IconvWrapper conv("UTF-8", "");
+	ASSERT_NO_THROW(conv.Convert(" "));
+	EXPECT_EQ(" ", conv.Convert(" "));
+}
+TEST(lagi_iconv, wchar_tSupport) {
+	EXPECT_NO_THROW(IconvWrapper("UTF-8", "wchar_t"));
+}