forked from mia/Aegisub
Move most character set conversion code to libaegisub and make everything use the new conversion functionality.
Originally committed to SVN as r4423.
This commit is contained in:
parent
7337a11745
commit
b6d29443a3
32 changed files with 967 additions and 644 deletions
|
@ -20,6 +20,7 @@
|
|||
<Configurations>
|
||||
<Configuration
|
||||
Name="Debug|Win32"
|
||||
IntermediateDirectory="$(ConfigurationName)"
|
||||
ConfigurationType="4"
|
||||
InheritedPropertySheets=".\wxlib_include.vsprops;.\libraries_outdirs.vsprops;.\compiler_options_debug.vsprops;..\aegisub_vs2008\wxlib_lib32.vsprops;..\aegisub_vs2008\suffix_debug32.vsprops;.\precomp_header.vsprops;.\src_msvc_include_dir.vsprops"
|
||||
CharacterSet="1"
|
||||
|
@ -190,11 +191,11 @@
|
|||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\libaegisub\common\charset_ucd.cpp"
|
||||
RelativePath="..\..\libaegisub\common\charset_conv.cpp"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\libaegisub\common\log.cpp"
|
||||
RelativePath="..\..\libaegisub\common\charset_ucd.cpp"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
|
@ -231,6 +232,10 @@
|
|||
RelativePath="..\..\libaegisub\windows\access.cpp"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\libaegisub\windows\charset_conv_win.cpp"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\libaegisub\windows\io.cpp"
|
||||
>
|
||||
|
@ -303,6 +308,18 @@
|
|||
RelativePath="..\..\libaegisub\include\libaegisub\access.h"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\libaegisub\include\libaegisub\charset_conv.h"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\libaegisub\include\libaegisub\charset_conv_win.h"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\libaegisub\include\libaegisub\charsets.def"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\libaegisub\include\libaegisub\colour.h"
|
||||
>
|
||||
|
|
|
@ -228,6 +228,10 @@
|
|||
RelativePath="..\..\tests\libaegisub_cajun.cpp"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\tests\libaegisub_iconv.cpp"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\tests\libaegisub_mru.cpp"
|
||||
>
|
||||
|
|
|
@ -21,6 +21,7 @@ endif
|
|||
|
||||
libaegisub_2_2_la_SOURCES = \
|
||||
common/charset.cpp \
|
||||
common/charset_conv.cpp \
|
||||
common/charset_ucd.cpp \
|
||||
common/mru.cpp \
|
||||
common/option.cpp \
|
||||
|
|
327
aegisub/libaegisub/common/charset_conv.cpp
Normal file
327
aegisub/libaegisub/common/charset_conv.cpp
Normal file
|
@ -0,0 +1,327 @@
|
|||
// Copyright (c) 2010, Thomas Goyne <plorkyeran@aegisub.org>
|
||||
//
|
||||
// Permission to use, copy, modify, and distribute this software for any
|
||||
// purpose with or without fee is hereby granted, provided that the above
|
||||
// copyright notice and this permission notice appear in all copies.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
||||
// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
||||
// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
||||
// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
||||
// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
||||
// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
//
|
||||
// $Id$
|
||||
|
||||
/// @file charset_conv.cpp
|
||||
/// @brief Wrapper for libiconv to present a more C++-friendly API
|
||||
/// @ingroup libaegisub
|
||||
|
||||
#ifndef LAGI_PRE
|
||||
#endif
|
||||
|
||||
#include <libaegisub/charset_conv.h>
|
||||
#include <iconv.h>
|
||||
|
||||
// Check if we can use advanced fallback capabilities added in GNU's iconv
|
||||
// implementation
|
||||
#if !defined(_LIBICONV_VERSION) || _LIBICONV_VERSION < 0x010A || defined(LIBICONV_PLUG)
|
||||
#define ICONV_POSIX
|
||||
#endif
|
||||
|
||||
static const iconv_t iconv_invalid = (iconv_t)-1;
|
||||
static const size_t iconv_failed = (size_t)-1;
|
||||
|
||||
namespace {
|
||||
struct ltstr {
|
||||
bool operator()(const char* s1, const char* s2) const {
|
||||
return strcmp(s1, s2) < 0;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
/// @brief Map a user-friendly encoding name to the real encoding name
|
||||
static const char* GetRealEncodingName(const char* name) {
|
||||
static std::map<const char*, const char*, ltstr> prettyNames;
|
||||
|
||||
if (prettyNames.empty()) {
|
||||
# define ADD(pretty, real) prettyNames[pretty] = real
|
||||
# include <libaegisub/charsets.def>
|
||||
# undef ADD
|
||||
}
|
||||
|
||||
std::map<const char*, const char*, ltstr>::iterator real = prettyNames.find(name);
|
||||
if (real != prettyNames.end()) {
|
||||
return real->second;
|
||||
}
|
||||
return name;
|
||||
}
|
||||
|
||||
|
||||
namespace agi {
|
||||
namespace charset {
|
||||
|
||||
#ifdef ICONV_POSIX
|
||||
class IconvWrapper::Converter {
|
||||
public:
|
||||
Converter(bool, const char*) { }
|
||||
size_t operator()(iconv_t cd, char** inbuf, size_t* inbytesleft, char** outbuf, size_t* outbytesleft) {
|
||||
return iconv(cd, inbuf, inbytesleft, outbuf, outbytesleft);
|
||||
}
|
||||
};
|
||||
#else
|
||||
class IconvWrapper::Converter : public iconv_fallbacks {
|
||||
private:
|
||||
bool subst;
|
||||
char invalidRep[4];
|
||||
size_t invalidRepSize;
|
||||
static void fallback(
|
||||
unsigned int code,
|
||||
void (*callback) (const char *buf, size_t buflen, void* callback_arg),
|
||||
void *callback_arg,
|
||||
void *convPtr)
|
||||
{
|
||||
// At some point in the future, this should probably switch to a real mapping
|
||||
// For now, there's just three cases: BOM to nothing, '\' to itself
|
||||
// (for Shift-JIS, which does not have \) and everything else to '?'
|
||||
if (code == 0xFEFF) return;
|
||||
if (code == 0x5C) callback("\\", 1, callback_arg);
|
||||
else {
|
||||
Converter *self = static_cast<Converter *>(convPtr);
|
||||
callback(self->invalidRep, self->invalidRepSize, callback_arg);
|
||||
}
|
||||
}
|
||||
public:
|
||||
Converter(bool subst, const char* targetEnc)
|
||||
: subst(subst)
|
||||
{
|
||||
data = this;
|
||||
mb_to_uc_fallback = NULL;
|
||||
mb_to_wc_fallback = NULL;
|
||||
uc_to_mb_fallback = fallback;
|
||||
wc_to_mb_fallback = NULL;
|
||||
|
||||
char sbuff[] = "?";
|
||||
char* src = sbuff;
|
||||
char* dst = invalidRep;
|
||||
size_t dstLen = 4;
|
||||
size_t srcLen = 1;
|
||||
|
||||
iconv_t cd = iconv_open(GetRealEncodingName(targetEnc), "UTF-8");
|
||||
assert(cd != iconv_invalid);
|
||||
size_t res = iconv(cd, &src, &srcLen, &dst, &dstLen);
|
||||
assert(res != iconv_failed);
|
||||
assert(srcLen == 0);
|
||||
iconv_close(cd);
|
||||
|
||||
invalidRepSize = 4 - dstLen;
|
||||
}
|
||||
size_t operator()(iconv_t cd, char** inbuf, size_t* inbytesleft, char** outbuf, size_t* outbytesleft) {
|
||||
size_t res = iconv(cd, inbuf, inbytesleft, outbuf, outbytesleft);
|
||||
|
||||
if (!subst) return res;
|
||||
|
||||
// Save original errno so we can return it rather than the result from iconvctl
|
||||
int err = errno;
|
||||
|
||||
// Some characters in the input string do not exist in the output encoding
|
||||
if (res == iconv_failed && err == EILSEQ) {
|
||||
// first try transliteration only
|
||||
int transliterate = 1;
|
||||
iconvctl(cd, ICONV_SET_TRANSLITERATE, &transliterate);
|
||||
res = iconv(cd, inbuf, inbytesleft, outbuf, outbytesleft);
|
||||
err = errno;
|
||||
transliterate = 0;
|
||||
iconvctl(cd, ICONV_SET_TRANSLITERATE, &transliterate);
|
||||
}
|
||||
if (res == iconv_failed && err == EILSEQ) {
|
||||
// Conversion still failed with transliteration enabled, so try our substitution
|
||||
iconvctl(cd, ICONV_SET_FALLBACKS, this);
|
||||
res = iconv(cd, inbuf, inbytesleft, outbuf, outbytesleft);
|
||||
err = errno;
|
||||
iconvctl(cd, ICONV_SET_FALLBACKS, NULL);
|
||||
}
|
||||
if (res == iconv_failed && err == E2BIG && *outbytesleft == 0) {
|
||||
// Check for E2BIG false positives
|
||||
char buff[4];
|
||||
size_t buffsize = 4;
|
||||
char* out = buff;
|
||||
char* in = *inbuf;
|
||||
size_t insize = *inbytesleft;
|
||||
|
||||
iconvctl(cd, ICONV_SET_FALLBACKS, this);
|
||||
res = iconv(cd, &in, &insize, &out, &buffsize);
|
||||
// If no bytes of the output buffer were used, the original
|
||||
// conversion may have been successful
|
||||
if (buffsize == 4) {
|
||||
err = errno;
|
||||
}
|
||||
else {
|
||||
res = iconv_failed;
|
||||
}
|
||||
iconvctl(cd, ICONV_SET_FALLBACKS, NULL);
|
||||
}
|
||||
|
||||
errno = err;
|
||||
return res;
|
||||
}
|
||||
};
|
||||
#endif
|
||||
|
||||
// Calculate the size of NUL in the given character set
|
||||
static size_t NulSize(const char* encoding) {
|
||||
// We need a character set to convert from with a known encoding of NUL
|
||||
// UTF-8 seems like the obvious choice
|
||||
iconv_t cd = iconv_open(GetRealEncodingName(encoding), "UTF-8");
|
||||
assert(cd != iconv_invalid);
|
||||
|
||||
char dbuff[4];
|
||||
char sbuff[] = "";
|
||||
char* dst = dbuff;
|
||||
char* src = sbuff;
|
||||
size_t dstLen = sizeof(dbuff);
|
||||
size_t srcLen = 1;
|
||||
|
||||
size_t ret = iconv(cd, &src, &srcLen, &dst, &dstLen);
|
||||
assert(ret != iconv_failed);
|
||||
assert(dst - dbuff > 0);
|
||||
iconv_close(cd);
|
||||
|
||||
return dst - dbuff;
|
||||
}
|
||||
|
||||
IconvWrapper::IconvWrapper(const char* sourceEncoding, const char* destEncoding, bool enableSubst)
|
||||
: toNulLen(0)
|
||||
, fromNulLen(0)
|
||||
, conv(NULL)
|
||||
{
|
||||
cd = iconv_open(GetRealEncodingName(destEncoding), GetRealEncodingName(sourceEncoding));
|
||||
if (cd == iconv_invalid) {
|
||||
throw UnsupportedConversion(std::string("Cannot convert from ") + sourceEncoding + " to " + destEncoding);
|
||||
}
|
||||
|
||||
// These need to be set only after we verify that the source and des
|
||||
// charsets are valid
|
||||
toNulLen = NulSize(destEncoding);
|
||||
fromNulLen = NulSize(sourceEncoding);
|
||||
conv.reset(new Converter(enableSubst, destEncoding));
|
||||
}
|
||||
IconvWrapper::~IconvWrapper() {
|
||||
if (cd != iconv_invalid) iconv_close(cd);
|
||||
}
|
||||
|
||||
std::string IconvWrapper::Convert(std::string const& source) {
|
||||
std::string dest;
|
||||
Convert(source, dest);
|
||||
return dest;
|
||||
}
|
||||
void IconvWrapper::Convert(std::string const& source, std::string &dest) {
|
||||
/// @todo Investigate if it's worth using ropes to avoid having to convert
|
||||
/// everything twice. It probably isn't.
|
||||
size_t len = RequiredBufferSize(source);
|
||||
dest.resize(len);
|
||||
|
||||
// This is technically invalid as C++03 does not require that strings use
|
||||
// a single contiguous block of memory. However, no implementation has ever
|
||||
// not done so and C++0x does require that it be contiguous
|
||||
Convert(source.data(), source.size(), &dest[0], len);
|
||||
}
|
||||
|
||||
size_t IconvWrapper::Convert(const char* source, size_t sourceSize, char *dest, size_t destSize) {
|
||||
if (sourceSize == (size_t)-1) {
|
||||
sourceSize = SrcStrLen(source);
|
||||
}
|
||||
// POSIX requires that inbuf be const char **, but libiconv uses char**
|
||||
size_t res = (*conv)(cd, const_cast<char **>(&source), &sourceSize, &dest, &destSize);
|
||||
|
||||
if (res == iconv_failed) {
|
||||
switch (errno) {
|
||||
case E2BIG:
|
||||
throw BufferTooSmall(
|
||||
"Destination buffer was not large enough to fit converted "
|
||||
"string.");
|
||||
case EINVAL:
|
||||
throw BadInput(
|
||||
"One or more characters in the input string were not valid "
|
||||
"characters in the given input encoding");
|
||||
case EILSEQ:
|
||||
throw BadOutput(
|
||||
"One or more characters could not be converted to the "
|
||||
"selected target encoding and the version of iconv "
|
||||
"Aegisub was built with does not have useful fallbacks. "
|
||||
"For best results, please build Aegisub using a recent "
|
||||
"version of GNU iconv.");
|
||||
default:
|
||||
throw ConversionFailure("An unknown conversion failure occured");
|
||||
}
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
size_t IconvWrapper::Convert(const char** source, size_t* sourceSize, char** dest, size_t* destSize) {
|
||||
return (*conv)(cd, const_cast<char **>(source), sourceSize, dest, destSize);
|
||||
}
|
||||
|
||||
size_t IconvWrapper::RequiredBufferSize(std::string const& str) {
|
||||
return RequiredBufferSize(str.data(), str.size());
|
||||
}
|
||||
|
||||
size_t IconvWrapper::RequiredBufferSize(const char* src, size_t srcLen) {
|
||||
char buff[512];
|
||||
size_t charsWritten = 0;
|
||||
size_t res;
|
||||
|
||||
do {
|
||||
char* dst = buff;
|
||||
size_t dstSize = sizeof(buff);
|
||||
res = (*conv)(cd, const_cast<char **>(&src), &srcLen, &dst, &dstSize);
|
||||
|
||||
charsWritten += dst - buff;
|
||||
} while (res == iconv_failed && errno == E2BIG);
|
||||
|
||||
if (res == iconv_failed) {
|
||||
switch (errno) {
|
||||
case EINVAL:
|
||||
throw BadInput(
|
||||
"One or more characters in the input string were not valid "
|
||||
"characters in the given input encoding");
|
||||
case EILSEQ:
|
||||
throw BadOutput(
|
||||
"One or more characters could not be converted to the "
|
||||
"selected target encoding and the version of iconv "
|
||||
"Aegisub was built with does not have useful fallbacks. "
|
||||
"For best results, please build Aegisub using a recent "
|
||||
"version of GNU iconv.");
|
||||
default:
|
||||
throw ConversionFailure("An unknown conversion failure occured");
|
||||
}
|
||||
}
|
||||
return charsWritten;
|
||||
}
|
||||
|
||||
static size_t mbstrlen(const char* str, size_t nulLen) {
|
||||
const char *ptr;
|
||||
switch (nulLen) {
|
||||
case 1:
|
||||
return strlen(str);
|
||||
case 2:
|
||||
for (ptr = str; *reinterpret_cast<const uint16_t *>(ptr) != 0; ptr += 2) ;
|
||||
return ptr - str;
|
||||
case 4:
|
||||
for (ptr = str; *reinterpret_cast<const uint32_t *>(ptr) != 0; ptr += 4) ;
|
||||
return ptr - str;
|
||||
default:
|
||||
return (size_t)-1;
|
||||
}
|
||||
}
|
||||
|
||||
size_t IconvWrapper::SrcStrLen(const char* str) {
|
||||
return mbstrlen(str, fromNulLen);
|
||||
|
||||
}
|
||||
size_t IconvWrapper::DstStrLen(const char* str) {
|
||||
return mbstrlen(str, toNulLen);
|
||||
}
|
||||
}
|
||||
}
|
107
aegisub/libaegisub/include/libaegisub/charset_conv.h
Normal file
107
aegisub/libaegisub/include/libaegisub/charset_conv.h
Normal file
|
@ -0,0 +1,107 @@
|
|||
// Copyright (c) 2010, Thomas Goyne <plorkyeran@aegisub.org>
|
||||
//
|
||||
// Permission to use, copy, modify, and distribute this software for any
|
||||
// purpose with or without fee is hereby granted, provided that the above
|
||||
// copyright notice and this permission notice appear in all copies.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
||||
// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
||||
// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
||||
// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
||||
// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
||||
// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
//
|
||||
// $Id$
|
||||
|
||||
/// @file charset_conv.h
|
||||
/// @brief Wrapper for libiconv to present a more C++-friendly API
|
||||
/// @ingroup libaegisub
|
||||
|
||||
#ifndef LAGI_PRE
|
||||
#include <string.h>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#endif
|
||||
|
||||
#include <libaegisub/exception.h>
|
||||
|
||||
namespace agi {
|
||||
namespace charset {
|
||||
|
||||
DEFINE_BASE_EXCEPTION_NOINNER(ConvError, Exception)
|
||||
DEFINE_SIMPLE_EXCEPTION_NOINNER(UnsupportedConversion, ConvError, "iconv/unsupported")
|
||||
DEFINE_SIMPLE_EXCEPTION_NOINNER(ConversionFailure, ConvError, "iconv/failed")
|
||||
DEFINE_SIMPLE_EXCEPTION_NOINNER(BufferTooSmall, ConversionFailure, "iconv/failed/E2BIG")
|
||||
DEFINE_SIMPLE_EXCEPTION_NOINNER(BadInput, ConversionFailure, "iconv/failed/EILSEQ")
|
||||
DEFINE_SIMPLE_EXCEPTION_NOINNER(BadOutput, ConversionFailure, "iconv/failed/EINVAL")
|
||||
|
||||
/// @brief Get a list of support encodings with user-friendly names
|
||||
template<class T>
|
||||
T const& GetEncodingsList() {
|
||||
static T nameList;
|
||||
if (nameList.empty()) {
|
||||
# define ADD(pretty, real) nameList.push_back(pretty)
|
||||
# include <libaegisub/charsets.def>
|
||||
# undef ADD
|
||||
}
|
||||
return nameList;
|
||||
}
|
||||
|
||||
typedef void* iconv_t;
|
||||
|
||||
/// @brief A C++ wrapper for iconv
|
||||
class IconvWrapper {
|
||||
private:
|
||||
// Helper class that abstracts away the differences betwen libiconv and
|
||||
// POSIX iconv implementations
|
||||
class Converter;
|
||||
|
||||
iconv_t cd;
|
||||
size_t toNulLen;
|
||||
size_t fromNulLen;
|
||||
std::auto_ptr<Converter> conv;
|
||||
|
||||
public:
|
||||
/// @brief Create a converter
|
||||
/// @param sourceEncoding Source encoding name, may be a pretty name
|
||||
/// @param destEncoding Destination encoding name, may be a pretty name
|
||||
/// @param enableSubst If true, when possible characters will be
|
||||
/// mutilated or dropped rather than a letting a
|
||||
/// conversion fail
|
||||
IconvWrapper(const char* sourceEncoding, const char* destEncoding, bool enableSubst = true);
|
||||
~IconvWrapper();
|
||||
|
||||
/// @brief Convert a string from the source to destination charset
|
||||
/// @param source String to convert
|
||||
/// @return Converted string. Note that std::string always uses a single byte
|
||||
/// terminator, so c_str() may not return a valid string if the dest
|
||||
/// charset has wider terminators
|
||||
std::string Convert(std::string const& source);
|
||||
/// @brief Convert a string from the source to destination charset
|
||||
/// @param source String to convert
|
||||
/// @param[out] dest String to place the result in
|
||||
void Convert(std::string const& source, std::string &dest);
|
||||
size_t Convert(const char* source, size_t sourceSize, char* dest, size_t destSize);
|
||||
/// Bare wrapper around iconv; see iconv documention for details
|
||||
size_t Convert(const char **inbuf, size_t *inbytesleft, char **outbuf, size_t *outbytesleft);
|
||||
|
||||
/// @brief Get the required buffer size required to fit the source string in the target charset
|
||||
/// @param source A string in the source charset
|
||||
/// @param sourceSize Length of the source in bytes
|
||||
/// @return Bytes required, including NUL terminator if applicable
|
||||
size_t RequiredBufferSize(const char* source, size_t sourceSize);
|
||||
/// @brief Get the required buffer size required to fit the source string in the target charset
|
||||
/// @param str A string in the source charset
|
||||
/// @return Bytes required, not including space needed for NUL terminator
|
||||
size_t RequiredBufferSize(std::string const& str);
|
||||
|
||||
/// Encoding-aware strlen for strings encoding in the source charset
|
||||
size_t SrcStrLen(const char* str);
|
||||
/// Encoding-aware strlen for strings encoding in the destination charset
|
||||
size_t DstStrLen(const char* str);
|
||||
};
|
||||
|
||||
}
|
||||
}
|
29
aegisub/libaegisub/include/libaegisub/charset_conv_win.h
Normal file
29
aegisub/libaegisub/include/libaegisub/charset_conv_win.h
Normal file
|
@ -0,0 +1,29 @@
|
|||
// Copyright (c) 2010, Thomas Goyne <plorkyeran@aegisub.org>
|
||||
//
|
||||
// Permission to use, copy, modify, and distribute this software for any
|
||||
// purpose with or without fee is hereby granted, provided that the above
|
||||
// copyright notice and this permission notice appear in all copies.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
||||
// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
||||
// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
||||
// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
||||
// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
||||
// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
//
|
||||
// $Id$
|
||||
|
||||
/// @file charset_conv_win.h
|
||||
/// @brief Windows-specific charset conversion stuff
|
||||
/// @ingroup libaegisub windows
|
||||
|
||||
#include <libaegisub/charset_conv.h>
|
||||
|
||||
namespace agi {
|
||||
namespace charset {
|
||||
/// Convert a UTF-8 string to a string suitable for use with Win32 API functions
|
||||
std::wstring ConvertW(std::string const& src);
|
||||
std::string ConvertW(std::wstring const& src);
|
||||
}
|
||||
}
|
116
aegisub/libaegisub/include/libaegisub/charsets.def
Normal file
116
aegisub/libaegisub/include/libaegisub/charsets.def
Normal file
|
@ -0,0 +1,116 @@
|
|||
ADD("Local", "");
|
||||
|
||||
ADD("Unicode (UTF-8)", "utf-8");
|
||||
ADD("Unicode (UTF-16)", "utf-16");
|
||||
ADD("Unicode (UTF-16BE)", "utf-16be");
|
||||
ADD("Unicode (UTF-16LE)", "utf-16le");
|
||||
ADD("Unicode (UTF-32)", "utf-32");
|
||||
ADD("Unicode (UTF-32BE)", "utf-32be");
|
||||
ADD("Unicode (UTF-32LE)", "utf-32le");
|
||||
ADD("Unicode (UTF-7)", "utf-7");
|
||||
|
||||
ADD("Arabic (IBM-864)", "ibm864");
|
||||
ADD("Arabic (IBM-864-I)", "ibm864i");
|
||||
ADD("Arabic (ISO-8859-6)", "iso-8859-6");
|
||||
ADD("Arabic (ISO-8859-6-E)", "iso-8859-6-e");
|
||||
ADD("Arabic (ISO-8859-6-I)", "iso-8859-6-i");
|
||||
ADD("Arabic (Langbox ISO-8859-6.16)", "x-iso-8859-6-16");
|
||||
ADD("Arabic (Langbox ISO-8859-6.8x)", "x-iso-8859-6-8-x");
|
||||
ADD("Arabic (MacArabic)", "x-mac-arabic");
|
||||
ADD("Arabic (Windows-1256)", "windows-1256");
|
||||
|
||||
ADD("Armenian (ARMSCII-8)", "armscii-8");
|
||||
|
||||
ADD("Baltic (ISO-8859-13)", "iso-8859-13");
|
||||
ADD("Baltic (ISO-8859-4)", "iso-8859-4");
|
||||
ADD("Baltic (Windows-1257)", "windows-1257");
|
||||
|
||||
ADD("Celtic (ISO-8859-14)", "iso-8859-14");
|
||||
|
||||
ADD("Central European (IBM-852)", "ibm852");
|
||||
ADD("Central European (ISO-8859-2)", "iso-8859-2");
|
||||
ADD("Central European (MacCE)", "x-mac-ce");
|
||||
ADD("Central European (Windows-1250)", "windows-1250");
|
||||
|
||||
ADD("Chinese Simplified (GB18030)", "gb18030");
|
||||
ADD("Chinese Simplified (GB2312)", "gb2312");
|
||||
ADD("Chinese Simplified (GBK)", "x-gbk");
|
||||
ADD("Chinese Simplified (HZ)", "hz-gb-2312");
|
||||
ADD("Chinese Simplified (ISO-2022-CN)", "iso-2022-cn");
|
||||
ADD("Chinese Traditional (Big5)", "big5");
|
||||
ADD("Chinese Traditional (Big5-HKSCS)", "big5-hkscs");
|
||||
ADD("Chinese Traditional (EUC-TW)", "x-euc-tw");
|
||||
|
||||
ADD("Croatian (MacCroatian)", "x-mac-croatian");
|
||||
|
||||
ADD("Cyrillic (IBM-855)", "ibm855");
|
||||
ADD("Cyrillic (ISO-8859-5)", "iso-8859-5");
|
||||
ADD("Cyrillic (ISO-IR-111)", "iso-ir-111");
|
||||
ADD("Cyrillic (KOI8-R)", "koi8-r");
|
||||
ADD("Cyrillic (MacCyrillic)", "x-mac-cyrillic");
|
||||
ADD("Cyrillic (Windows-1251)", "windows-1251");
|
||||
ADD("Cyrillic/Russian (CP-866)", "ibm866");
|
||||
ADD("Cyrillic/Ukrainian (KOI8-U)", "koi8-u");
|
||||
ADD("Cyrillic/Ukrainian (MacUkrainian)", "x-mac-ukrainian");
|
||||
|
||||
ADD("English (US-ASCII)", "us-ascii");
|
||||
|
||||
ADD("Farsi (MacFarsi)", "x-mac-farsi");
|
||||
|
||||
ADD("Georgian (GEOSTD8)", "geostd8");
|
||||
|
||||
ADD("Greek (ISO-8859-7)", "iso-8859-7");
|
||||
ADD("Greek (MacGreek)", "x-mac-greek");
|
||||
ADD("Greek (Windows-1253)", "windows-1253");
|
||||
|
||||
ADD("Gujarati (MacGujarati)", "x-mac-gujarati");
|
||||
ADD("Gurmukhi (MacGurmukhi)", "x-mac-gurmukhi");
|
||||
|
||||
ADD("Hebrew (IBM-862)", "ibm862");
|
||||
ADD("Hebrew (ISO-8859-8-E)", "iso-8859-8-e");
|
||||
ADD("Hebrew (ISO-8859-8-I)", "iso-8859-8-i");
|
||||
ADD("Hebrew (MacHebrew)", "x-mac-hebrew");
|
||||
ADD("Hebrew (Windows-1255)", "windows-1255");
|
||||
ADD("Hebrew Visual (ISO-8859-8)", "iso-8859-8");
|
||||
|
||||
ADD("Hindi (MacDevanagari)", "x-mac-devanagari");
|
||||
ADD("Hindi (SunDevanagari)", "x-sun-unicode-india-0");
|
||||
|
||||
ADD("Icelandic (MacIcelandic)", "x-mac-icelandic");
|
||||
|
||||
ADD("Japanese (EUC-JP)", "euc-jp");
|
||||
ADD("Japanese (ISO-2022-JP)", "iso-2022-jp");
|
||||
ADD("Japanese (Shift_JIS)", "shift_jis");
|
||||
|
||||
ADD("Korean (EUC-KR)", "euc-kr");
|
||||
ADD("Korean (ISO-2022-KR)", "iso-2022-kr");
|
||||
ADD("Korean (JOHAB)", "x-johab");
|
||||
ADD("Korean (UHC)", "x-windows-949");
|
||||
|
||||
ADD("Nordic (ISO-8859-10)", "iso-8859-10");
|
||||
|
||||
ADD("Romanian (ISO-8859-16)", "iso-8859-16");
|
||||
ADD("Romanian (MacRomanian)", "x-mac-romanian");
|
||||
|
||||
ADD("South European (ISO-8859-3)", "iso-8859-3");
|
||||
|
||||
ADD("Thai (IBM-874)", "ibm874");
|
||||
ADD("Thai (ISO-8859-11)", "iso-8859-11");
|
||||
ADD("Thai (TIS-620)", "tis-620");
|
||||
ADD("Thai (Windows-874)", "windows-874");
|
||||
|
||||
ADD("Turkish (IBM-857)", "ibm857");
|
||||
ADD("Turkish (ISO-8859-9)", "iso-8859-9");
|
||||
ADD("Turkish (MacTurkish)", "x-mac-turkish");
|
||||
ADD("Turkish (Windows-1254)", "windows-1254");
|
||||
|
||||
ADD("Vietnamese (TCVN)", "x-viet-tcvn5712");
|
||||
ADD("Vietnamese (VISCII)", "viscii");
|
||||
ADD("Vietnamese (VPS)", "x-viet-vps");
|
||||
ADD("Vietnamese (Windows-1258)", "windows-1258");
|
||||
|
||||
ADD("Western (IBM-850)", "ibm850");
|
||||
ADD("Western (ISO-8859-1)", "iso-8859-1");
|
||||
ADD("Western (ISO-8859-15)", "iso-8859-15");
|
||||
ADD("Western (MacRoman)", "x-mac-roman");
|
||||
ADD("Western (Windows-1252)", "windows-1252");
|
|
@ -6,6 +6,7 @@
|
|||
#include <stdarg.h>
|
||||
#include <stdio.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <sys/stat.h>
|
||||
#include <time.h>
|
||||
|
||||
|
|
|
@ -25,8 +25,9 @@
|
|||
#include <fstream>
|
||||
#endif
|
||||
|
||||
#include "libaegisub/util.h"
|
||||
#include "libaegisub/util_win.h"
|
||||
#include <libaegisub/charset_conv_win.h>
|
||||
#include <libaegisub/util.h>
|
||||
#include <libaegisub/util_win.h>
|
||||
|
||||
namespace agi {
|
||||
namespace acs {
|
||||
|
@ -57,8 +58,7 @@ is a short (and incomplete) todo
|
|||
requires detecting the filesystem being used.
|
||||
*/
|
||||
void Check(const std::string &file, acs::Type type) {
|
||||
std::wstring wfile;
|
||||
wfile.assign(file.begin(), file.end());
|
||||
std::wstring wfile = agi::charset::ConvertW(file);
|
||||
|
||||
SECURITY_DESCRIPTOR* sd;
|
||||
DWORD len = 0;
|
||||
|
|
49
aegisub/libaegisub/windows/charset_conv_win.cpp
Normal file
49
aegisub/libaegisub/windows/charset_conv_win.cpp
Normal file
|
@ -0,0 +1,49 @@
|
|||
// Copyright (c) 2010, Thomas Goyne <plorkyeran@aegisub.org>
|
||||
//
|
||||
// Permission to use, copy, modify, and distribute this software for any
|
||||
// purpose with or without fee is hereby granted, provided that the above
|
||||
// copyright notice and this permission notice appear in all copies.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
||||
// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
||||
// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
||||
// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
||||
// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
||||
// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
//
|
||||
// $Id$
|
||||
|
||||
/// @file charset_conv_win.h
|
||||
/// @brief Windows-specific charset conversion stuff
|
||||
/// @ingroup libaegisub windows
|
||||
|
||||
#include <libaegisub/charset_conv_win.h>
|
||||
|
||||
namespace agi {
|
||||
namespace charset {
|
||||
|
||||
std::wstring ConvertW(std::string const& source) {
|
||||
static IconvWrapper w32Conv("utf-8", "utf-16le", false);
|
||||
|
||||
std::wstring dest;
|
||||
size_t len = w32Conv.RequiredBufferSize(source);
|
||||
dest.resize(len / sizeof(wchar_t));
|
||||
w32Conv.Convert(source.data(), source.size(), reinterpret_cast<char *>(&dest[0]), len);
|
||||
return dest;
|
||||
}
|
||||
|
||||
std::string ConvertW(std::wstring const& source) {
|
||||
static IconvWrapper w32Conv("utf-16le", "utf-8", false);
|
||||
|
||||
std::string dest;
|
||||
size_t srcLen = source.size() * sizeof(wchar_t);
|
||||
const char* src = reinterpret_cast<const char *>(source.c_str());
|
||||
size_t len = w32Conv.RequiredBufferSize(src, srcLen);
|
||||
dest.resize(len);
|
||||
w32Conv.Convert(src, srcLen, &dest[0], len);
|
||||
return dest;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
|
@ -26,6 +26,7 @@
|
|||
#include <fstream>
|
||||
#endif
|
||||
|
||||
#include <libaegisub/charset_conv_win.h>
|
||||
#include "libaegisub/io.h"
|
||||
#include "libaegisub/log.h"
|
||||
#include "libaegisub/util.h"
|
||||
|
@ -34,11 +35,13 @@
|
|||
namespace agi {
|
||||
namespace io {
|
||||
|
||||
using agi::charset::ConvertW;
|
||||
|
||||
std::ifstream* Open(const std::string &file) {
|
||||
LOG_D("agi/io/open/file") << file;
|
||||
acs::CheckFileRead(file);
|
||||
|
||||
std::ifstream *stream = new std::ifstream(file.c_str());
|
||||
std::ifstream *stream = new std::ifstream(ConvertW(file).c_str());
|
||||
|
||||
if (stream->fail()) {
|
||||
delete stream;
|
||||
|
@ -53,7 +56,7 @@ Save::Save(const std::string& file): file_name(file) {
|
|||
LOG_D("agi/io/save/file") << file;
|
||||
const std::string pwd = util::DirName(file);
|
||||
|
||||
acs::CheckDirWrite(pwd.c_str());
|
||||
acs::CheckDirWrite(pwd);
|
||||
|
||||
try {
|
||||
acs::CheckFileWrite(file);
|
||||
|
@ -61,23 +64,19 @@ Save::Save(const std::string& file): file_name(file) {
|
|||
// If the file doesn't exist we create a 0 byte file, this so so
|
||||
// util::Rename will find it, and to let users know something went
|
||||
// wrong by leaving a 0 byte file.
|
||||
std::ofstream fp_touch(file.c_str());
|
||||
std::ofstream fp_touch(ConvertW(file).c_str());
|
||||
}
|
||||
|
||||
/// @todo This is a temp hack, proper implementation needs to come after
|
||||
/// Windows support is added. The code in the destructor needs fixing
|
||||
/// as well.
|
||||
const std::string tmp = file + "_tmp";
|
||||
|
||||
// This will open to file.XXXX. (tempfile)
|
||||
fp = new std::ofstream(tmp.c_str());
|
||||
fp = new std::ofstream(ConvertW(file + "_tmp").c_str());
|
||||
}
|
||||
|
||||
Save::~Save() {
|
||||
|
||||
const std::string tmp(file_name + "_tmp");
|
||||
delete fp;
|
||||
util::Rename(tmp, file_name);
|
||||
util::Rename(file_name + "_tmp", file_name);
|
||||
}
|
||||
|
||||
std::ofstream& Save::Get() {
|
||||
|
|
|
@ -30,23 +30,22 @@
|
|||
|
||||
#endif
|
||||
|
||||
//#include <string.h>
|
||||
#include "libaegisub/types.h"
|
||||
#include <libaegisub/charset_conv_win.h>
|
||||
#include "libaegisub/util.h"
|
||||
#include "libaegisub/util_win.h"
|
||||
|
||||
namespace agi {
|
||||
namespace util {
|
||||
|
||||
using agi::charset::ConvertW;
|
||||
|
||||
const std::string DirName(const std::string& path) {
|
||||
if (path.find('/') == std::string::npos) {
|
||||
const std::string cwd(".");
|
||||
return cwd;
|
||||
return ".";
|
||||
}
|
||||
|
||||
const std::string stripped = path.substr(0, path.rfind("/")+1);
|
||||
return stripped;
|
||||
return path.substr(0, path.rfind("/")+1);
|
||||
}
|
||||
|
||||
void Rename(const std::string& from, const std::string& to) {
|
||||
|
@ -58,19 +57,18 @@ void Rename(const std::string& from, const std::string& to) {
|
|||
acs::CheckDirWrite(DirName(to));
|
||||
}
|
||||
|
||||
MoveFileExA(from.c_str(), to.c_str(), MOVEFILE_REPLACE_EXISTING);
|
||||
MoveFileEx(ConvertW(from).c_str(), ConvertW(to).c_str(), MOVEFILE_REPLACE_EXISTING);
|
||||
}
|
||||
|
||||
std::string ErrorString(DWORD error) {
|
||||
LPSTR lpstr = NULL;
|
||||
LPWSTR lpstr = NULL;
|
||||
|
||||
if(FormatMessageA(FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM, NULL, error, 0, (LPSTR)&lpstr, 0, NULL) == 0) {
|
||||
if(FormatMessage(FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM, NULL, error, 0, reinterpret_cast<LPWSTR>(&lpstr), 0, NULL) == 0) {
|
||||
/// @todo Return the actual 'unknown error' string from windows.
|
||||
std::string str("Unknown Error");
|
||||
return str;
|
||||
return "Unknown Error";
|
||||
}
|
||||
|
||||
std::string str(lpstr);
|
||||
std::string str = ConvertW(lpstr);
|
||||
LocalFree(lpstr);
|
||||
return str;
|
||||
}
|
||||
|
|
|
@ -42,107 +42,21 @@
|
|||
#include <errno.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include <wx/hashmap.h>
|
||||
#include <wx/intl.h>
|
||||
#endif
|
||||
|
||||
WX_DECLARE_STRING_HASH_MAP(wxString, PrettyNamesHash);
|
||||
class AegisubCSConvImpl : public AegisubCSConv {
|
||||
public:
|
||||
AegisubCSConvImpl() { }
|
||||
};
|
||||
|
||||
#if wxUSE_THREADS
|
||||
static wxMutex encodingListMutex;
|
||||
#endif
|
||||
|
||||
static const iconv_t iconv_invalid = (iconv_t)-1;
|
||||
static const size_t iconv_failed = (size_t)-1;
|
||||
#define ICONV_CONST_CAST(a) const_cast<ICONV_CONST char *>(a)
|
||||
|
||||
static wxArrayString *supportedEncodings = NULL;
|
||||
static wxArrayString *prettyEncodingList = NULL;
|
||||
static PrettyNamesHash *prettyEncodingHash = NULL;
|
||||
|
||||
AegisubCSConv::AegisubCSConv(const wxChar *mbEncName, bool enableSubst)
|
||||
: wcCharsetName(WCHAR_T_ENCODING)
|
||||
, mbCharsetName(GetRealEncodingName(mbEncName))
|
||||
, mbNulLen(0)
|
||||
, enableSubst(enableSubst)
|
||||
, m2w(wcCharsetName, mbCharsetName)
|
||||
, w2m(mbCharsetName, wcCharsetName)
|
||||
AegisubCSConv::AegisubCSConv()
|
||||
: conv("wchar_t", "")
|
||||
{
|
||||
if (m2w == iconv_invalid || w2m == iconv_invalid) {
|
||||
throw wxString::Format(L"Character set %s is not supported.", mbEncName);
|
||||
}
|
||||
|
||||
if (enableSubst) {
|
||||
invalidRepSize = FromWChar(invalidRep, sizeof(invalidRep), L"?") - GetMBNulLen();
|
||||
|
||||
#ifndef ICONV_POSIX
|
||||
fallbacks.data = this;
|
||||
fallbacks.mb_to_uc_fallback = NULL;
|
||||
fallbacks.mb_to_wc_fallback = NULL;
|
||||
fallbacks.uc_to_mb_fallback = ucToMbFallback;
|
||||
fallbacks.wc_to_mb_fallback = NULL;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
wxMBConv * AegisubCSConv::Clone() const {
|
||||
AegisubCSConv *c = new AegisubCSConv(mbCharsetName);
|
||||
c->mbNulLen = mbNulLen;
|
||||
return c;
|
||||
}
|
||||
|
||||
/// @brief Calculate the size of NUL in the target encoding via iconv
|
||||
/// @return The size in bytes of NUL
|
||||
size_t AegisubCSConv::GetMBNulLen() const {
|
||||
if (mbNulLen == 0) {
|
||||
const wchar_t nulStr[] = L"";
|
||||
char outBuff[8];
|
||||
size_t inLen = sizeof(wchar_t);
|
||||
size_t outLen = sizeof(outBuff);
|
||||
char * inPtr = (char *)nulStr;
|
||||
char * outPtr = outBuff;
|
||||
|
||||
size_t res = iconv(w2m, &inPtr, &inLen, &outPtr, &outLen);
|
||||
|
||||
if (res != 0)
|
||||
mbNulLen = (size_t)-1;
|
||||
else
|
||||
mbNulLen = sizeof(outBuff) - outLen;
|
||||
}
|
||||
return mbNulLen;
|
||||
}
|
||||
|
||||
size_t AegisubCSConv::MBBuffLen(const char * str) const {
|
||||
size_t nulLen = GetMBNulLen();
|
||||
const char *ptr;
|
||||
switch (nulLen) {
|
||||
case 1:
|
||||
return strlen(str);
|
||||
case 2:
|
||||
for (ptr = str; *reinterpret_cast<const uint16_t *>(ptr) != 0; ptr += 2) ;
|
||||
return ptr - str;
|
||||
case 4:
|
||||
for (ptr = str; *reinterpret_cast<const uint32_t *>(ptr) != 0; ptr += 4) ;
|
||||
return ptr - str;
|
||||
default:
|
||||
return (size_t)-1;
|
||||
}
|
||||
}
|
||||
|
||||
/// @brief Convert a string from multibyte to wide characters
|
||||
/// @param dst Destination buffer.
|
||||
/// @param dstSize Length of destination buffer in wchar_ts
|
||||
/// @param src Source multibyte string
|
||||
/// @param srcLen Length of source buffer in bytes, or -1 to autodetect
|
||||
/// @return The number of wchar_ts needed to store the string in the target charset
|
||||
size_t AegisubCSConv::ToWChar(wchar_t *dst, size_t dstSize, const char *src, size_t srcLen) const {
|
||||
return doConversion(
|
||||
m2w,
|
||||
reinterpret_cast<char *>(dst),
|
||||
dstSize * sizeof(wchar_t),
|
||||
const_cast<char *>(src),
|
||||
srcLen == wxNO_LEN ? MBBuffLen(src) + GetMBNulLen() : srcLen
|
||||
) / sizeof(wchar_t);
|
||||
throw agi::charset::UnsupportedConversion("Cannot convert to local with csConvLocal");
|
||||
}
|
||||
|
||||
/// @brief Convert a string from wide characters to multibyte
|
||||
|
@ -152,309 +66,19 @@ size_t AegisubCSConv::ToWChar(wchar_t *dst, size_t dstSize, const char *src, siz
|
|||
/// @param srcLen Length in wchar_ts of source, or -1 to autodetect
|
||||
/// @return The number of bytes needed to store the string in the target charset
|
||||
size_t AegisubCSConv::FromWChar(char *dst, size_t dstSize, const wchar_t *src, size_t srcLen) const {
|
||||
return doConversion(
|
||||
w2m,
|
||||
dst,
|
||||
dstSize,
|
||||
reinterpret_cast<char *>(const_cast<wchar_t *>(src)),
|
||||
(srcLen == wxNO_LEN ? wcslen(src) + 1 : srcLen) * sizeof(wchar_t)
|
||||
);
|
||||
try {
|
||||
if (srcLen != (size_t)-1) {
|
||||
if (src[srcLen - 1] == 0) srcLen -= 1;
|
||||
srcLen *= sizeof(wchar_t);
|
||||
}
|
||||
|
||||
// Perform a conversion if a buffer is given or calculate the needed buffer size if not
|
||||
size_t AegisubCSConv::doConversion(iconv_t cd, char *dst, size_t dstSize, char *src, size_t srcSize) const {
|
||||
if (dstSize > 0) {
|
||||
return iconvWrapper(cd, &src, &srcSize, &dst, &dstSize);
|
||||
if (dstSize == 0) {
|
||||
return conv.RequiredBufferSize(reinterpret_cast<const char*>(src), srcLen);
|
||||
}
|
||||
|
||||
// No destination given, so calculate the needed buffer size instead
|
||||
char buff[32];
|
||||
size_t buffSize = 32;
|
||||
size_t charsWritten = 0;
|
||||
size_t res;
|
||||
|
||||
do {
|
||||
dst = buff;
|
||||
dstSize = buffSize;
|
||||
res = iconvWrapper(cd, &src, &srcSize, &dst, &dstSize);
|
||||
|
||||
charsWritten += dst - buff;
|
||||
} while (res == iconv_failed && errno == E2BIG);
|
||||
|
||||
if (res == iconv_failed) return wxCONV_FAILED;
|
||||
return charsWritten;
|
||||
return conv.Convert(reinterpret_cast<const char*>(src), srcLen, dst, dstSize);
|
||||
}
|
||||
|
||||
// Actually perform a conversion via iconv
|
||||
size_t AegisubCSConv::iconvWrapper(iconv_t cd, char **inbuf, size_t *inbytesleft,
|
||||
char **outbuf, size_t *outbytesleft) const {
|
||||
|
||||
#if wxUSE_THREADS
|
||||
wxMutexLocker lock(iconvMutex);
|
||||
#endif
|
||||
|
||||
char *outbuforig = *outbuf;
|
||||
size_t res = iconv(cd, inbuf, inbytesleft, outbuf, outbytesleft);
|
||||
|
||||
if (res != iconv_failed)
|
||||
return *outbuf - outbuforig;
|
||||
if (!enableSubst)
|
||||
return iconv_failed;
|
||||
|
||||
#ifdef ICONV_POSIX
|
||||
if (errno == EILSEQ) {
|
||||
throw
|
||||
L"One or more characters do not fit in the selected "
|
||||
L"encoding and the version of iconv Aegisub was built with"
|
||||
L" does not have useful fallbacks. For best results, "
|
||||
L"please rebuild Aegisub using a recent version of GNU iconv.";
|
||||
}
|
||||
return wxCONV_FAILED;
|
||||
#else
|
||||
// Save original errno so we can return it rather than the result from iconvctl
|
||||
int err = errno;
|
||||
|
||||
// Some characters in the input string do not exist in the output encoding
|
||||
if (res == iconv_failed && err == EILSEQ) {
|
||||
// first try transliteration only
|
||||
int transliterate = 1;
|
||||
iconvctl(cd, ICONV_SET_TRANSLITERATE, &transliterate);
|
||||
res = iconv(cd, inbuf, inbytesleft, outbuf, outbytesleft);
|
||||
err = errno;
|
||||
transliterate = 0;
|
||||
iconvctl(cd, ICONV_SET_TRANSLITERATE, &transliterate);
|
||||
}
|
||||
if (res == iconv_failed && err == EILSEQ) {
|
||||
// Conversion still failed with transliteration enabled, so try our substitution
|
||||
iconvctl(cd, ICONV_SET_FALLBACKS, &fallbacks);
|
||||
res = iconv(cd, inbuf, inbytesleft, outbuf, outbytesleft);
|
||||
err = errno;
|
||||
iconvctl(cd, ICONV_SET_FALLBACKS, NULL);
|
||||
}
|
||||
if (res == iconv_failed && err == EILSEQ) {
|
||||
// Conversion still failed, so just drop any invalid characters
|
||||
int discard = 1;
|
||||
iconvctl(cd, ICONV_SET_DISCARD_ILSEQ, &discard);
|
||||
res = iconv(cd, inbuf, inbytesleft, outbuf, outbytesleft);
|
||||
err = errno;
|
||||
discard = 0;
|
||||
iconvctl(cd, ICONV_SET_DISCARD_ILSEQ, &discard);
|
||||
}
|
||||
|
||||
errno = err;
|
||||
if (res == iconv_failed) return wxCONV_FAILED;
|
||||
return *outbuf - outbuforig;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
/// @brief GNU iconv character substitution callback
|
||||
/// @param code Unicode character which could not be converted
|
||||
/// @param callback Callback to tell iconv what string to use instead
|
||||
/// @param callback_arg Iconv userdata for callback
|
||||
/// @param convPtr AegisubCSConv instance to use
|
||||
void AegisubCSConv::ucToMbFallback(
|
||||
unsigned int code,
|
||||
void (*callback) (const char *buf, size_t buflen, void* callback_arg),
|
||||
void *callback_arg,
|
||||
void *convPtr)
|
||||
{
|
||||
// At some point in the future, this should probably switch to a real mapping
|
||||
// For now, there's just three cases: BOM to nothing, '\' to itself
|
||||
// (for Shift-JIS, which does not have \) and everything else to '?'
|
||||
if (code == 0xFEFF) return;
|
||||
if (code == 0x5C) callback("\\", 1, callback_arg);
|
||||
else {
|
||||
AegisubCSConv *self = static_cast<AegisubCSConv *>(convPtr);
|
||||
callback(self->invalidRep, self->invalidRepSize, callback_arg);
|
||||
catch (agi::charset::ConvError const&) {
|
||||
return (size_t)-1;
|
||||
}
|
||||
}
|
||||
|
||||
#ifndef ICONV_POSIX
|
||||
/// @brief Callback for iconvlist
|
||||
/// @param namescount Number of names in names
|
||||
/// @param names Names to add to the list
|
||||
/// @param data Unused userdata field
|
||||
int addEncoding(unsigned int namescount, const char * const * names, void* data) {
|
||||
for (unsigned int i = 0; i < namescount; i++) {
|
||||
supportedEncodings->Add(wxString::FromAscii(names[i]));
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
wxArrayString AegisubCSConv::GetAllSupportedEncodings() {
|
||||
#if wxUSE_THREADS
|
||||
wxMutexLocker lock(encodingListMutex);
|
||||
#endif
|
||||
if (supportedEncodings == NULL) {
|
||||
supportedEncodings = new wxArrayString();
|
||||
#ifndef ICONV_POSIX
|
||||
iconvlist(addEncoding, NULL);
|
||||
supportedEncodings->Sort();
|
||||
#endif
|
||||
}
|
||||
return *supportedEncodings;
|
||||
}
|
||||
|
||||
wxString AegisubCSConv::GetRealEncodingName(wxString name) {
|
||||
if (name.Lower() == L"local") return wxLocale::GetSystemEncodingName();
|
||||
if (prettyEncodingList == NULL) return name;
|
||||
|
||||
PrettyNamesHash::iterator realName = prettyEncodingHash->find(name);
|
||||
if (realName != prettyEncodingHash->end()) {
|
||||
return realName->second;
|
||||
}
|
||||
return name;
|
||||
}
|
||||
|
||||
wxArrayString AegisubCSConv::GetEncodingsList() {
|
||||
#if wxUSE_THREADS
|
||||
wxMutexLocker lock(encodingListMutex);
|
||||
#endif
|
||||
if (prettyEncodingList == NULL) {
|
||||
struct { const char *pretty, *real; } encodingNames[] = {
|
||||
{"Unicode (UTF-8)", "utf-8"},
|
||||
{"Unicode (UTF-16)", "utf-16"},
|
||||
{"Unicode (UTF-16BE)", "utf-16be"},
|
||||
{"Unicode (UTF-16LE)", "utf-16le"},
|
||||
{"Unicode (UTF-32)", "utf-32"},
|
||||
{"Unicode (UTF-32BE)", "utf-32be"},
|
||||
{"Unicode (UTF-32LE)", "utf-32le"},
|
||||
{"Unicode (UTF-7)", "utf-7"},
|
||||
|
||||
{"Arabic (IBM-864)", "ibm864"},
|
||||
{"Arabic (IBM-864-I)", "ibm864i"},
|
||||
{"Arabic (ISO-8859-6)", "iso-8859-6"},
|
||||
{"Arabic (ISO-8859-6-E)", "iso-8859-6-e"},
|
||||
{"Arabic (ISO-8859-6-I)", "iso-8859-6-i"},
|
||||
{"Arabic (Langbox ISO-8859-6.16)", "x-iso-8859-6-16"},
|
||||
{"Arabic (Langbox ISO-8859-6.8x)", "x-iso-8859-6-8-x"},
|
||||
{"Arabic (MacArabic)", "x-mac-arabic"},
|
||||
{"Arabic (Windows-1256)", "windows-1256"},
|
||||
|
||||
{"Armenian (ARMSCII-8)", "armscii-8"},
|
||||
|
||||
{"Baltic (ISO-8859-13)", "iso-8859-13"},
|
||||
{"Baltic (ISO-8859-4)", "iso-8859-4"},
|
||||
{"Baltic (Windows-1257)", "windows-1257"},
|
||||
|
||||
{"Celtic (ISO-8859-14)", "iso-8859-14"},
|
||||
|
||||
{"Central European (IBM-852)", "ibm852"},
|
||||
{"Central European (ISO-8859-2)", "iso-8859-2"},
|
||||
{"Central European (MacCE)", "x-mac-ce"},
|
||||
{"Central European (Windows-1250)", "windows-1250"},
|
||||
|
||||
{"Chinese Simplified (GB18030)", "gb18030"},
|
||||
{"Chinese Simplified (GB2312)", "gb2312"},
|
||||
{"Chinese Simplified (GBK)", "x-gbk"},
|
||||
{"Chinese Simplified (HZ)", "hz-gb-2312"},
|
||||
{"Chinese Simplified (ISO-2022-CN)", "iso-2022-cn"},
|
||||
{"Chinese Traditional (Big5)", "big5"},
|
||||
{"Chinese Traditional (Big5-HKSCS)", "big5-hkscs"},
|
||||
{"Chinese Traditional (EUC-TW)", "x-euc-tw"},
|
||||
|
||||
{"Croatian (MacCroatian)", "x-mac-croatian"},
|
||||
|
||||
{"Cyrillic (IBM-855)", "ibm855"},
|
||||
{"Cyrillic (ISO-8859-5)", "iso-8859-5"},
|
||||
{"Cyrillic (ISO-IR-111)", "iso-ir-111"},
|
||||
{"Cyrillic (KOI8-R)", "koi8-r"},
|
||||
{"Cyrillic (MacCyrillic)", "x-mac-cyrillic"},
|
||||
{"Cyrillic (Windows-1251)", "windows-1251"},
|
||||
{"Cyrillic/Russian (CP-866)", "ibm866"},
|
||||
{"Cyrillic/Ukrainian (KOI8-U)", "koi8-u"},
|
||||
{"Cyrillic/Ukrainian (MacUkrainian)", "x-mac-ukrainian"},
|
||||
|
||||
{"English (US-ASCII)", "us-ascii"},
|
||||
|
||||
{"Farsi (MacFarsi)", "x-mac-farsi"},
|
||||
|
||||
{"Georgian (GEOSTD8)", "geostd8"},
|
||||
|
||||
{"Greek (ISO-8859-7)", "iso-8859-7"},
|
||||
{"Greek (MacGreek)", "x-mac-greek"},
|
||||
{"Greek (Windows-1253)", "windows-1253"},
|
||||
|
||||
{"Gujarati (MacGujarati)", "x-mac-gujarati"},
|
||||
{"Gurmukhi (MacGurmukhi)", "x-mac-gurmukhi"},
|
||||
|
||||
{"Hebrew (IBM-862)", "ibm862"},
|
||||
{"Hebrew (ISO-8859-8-E)", "iso-8859-8-e"},
|
||||
{"Hebrew (ISO-8859-8-I)", "iso-8859-8-i"},
|
||||
{"Hebrew (MacHebrew)", "x-mac-hebrew"},
|
||||
{"Hebrew (Windows-1255)", "windows-1255"},
|
||||
{"Hebrew Visual (ISO-8859-8)", "iso-8859-8"},
|
||||
|
||||
{"Hindi (MacDevanagari)", "x-mac-devanagari"},
|
||||
{"Hindi (SunDevanagari)", "x-sun-unicode-india-0"},
|
||||
|
||||
{"Icelandic (MacIcelandic)", "x-mac-icelandic"},
|
||||
|
||||
{"Japanese (EUC-JP)", "euc-jp"},
|
||||
{"Japanese (ISO-2022-JP)", "iso-2022-jp"},
|
||||
{"Japanese (Shift_JIS)", "shift_jis"},
|
||||
|
||||
{"Korean (EUC-KR)", "euc-kr"},
|
||||
{"Korean (ISO-2022-KR)", "iso-2022-kr"},
|
||||
{"Korean (JOHAB)", "x-johab"},
|
||||
{"Korean (UHC)", "x-windows-949"},
|
||||
|
||||
{"Nordic (ISO-8859-10)", "iso-8859-10"},
|
||||
|
||||
{"Romanian (ISO-8859-16)", "iso-8859-16"},
|
||||
{"Romanian (MacRomanian)", "x-mac-romanian"},
|
||||
|
||||
{"South European (ISO-8859-3)", "iso-8859-3"},
|
||||
|
||||
{"Thai (IBM-874)", "ibm874"},
|
||||
{"Thai (ISO-8859-11)", "iso-8859-11"},
|
||||
{"Thai (TIS-620)", "tis-620"},
|
||||
{"Thai (Windows-874)", "windows-874"},
|
||||
|
||||
{"Turkish (IBM-857)", "ibm857"},
|
||||
{"Turkish (ISO-8859-9)", "iso-8859-9"},
|
||||
{"Turkish (MacTurkish)", "x-mac-turkish"},
|
||||
{"Turkish (Windows-1254)", "windows-1254"},
|
||||
|
||||
{"Vietnamese (TCVN)", "x-viet-tcvn5712"},
|
||||
{"Vietnamese (VISCII)", "viscii"},
|
||||
{"Vietnamese (VPS)", "x-viet-vps"},
|
||||
{"Vietnamese (Windows-1258)", "windows-1258"},
|
||||
|
||||
{"Western (IBM-850)", "ibm850"},
|
||||
{"Western (ISO-8859-1)", "iso-8859-1"},
|
||||
{"Western (ISO-8859-15)", "iso-8859-15"},
|
||||
{"Western (MacRoman)", "x-mac-roman"},
|
||||
{"Western (Windows-1252)", "windows-1252"},
|
||||
|
||||
{NULL, NULL}
|
||||
};
|
||||
|
||||
PrettyNamesHash *map = new PrettyNamesHash(100);
|
||||
wxArrayString *arr = new wxArrayString();
|
||||
arr->Add(L"Local");
|
||||
|
||||
for (int i = 0; encodingNames[i].real != NULL; i++) {
|
||||
// Verify that iconv actually supports converting to and from this encoding
|
||||
iconv_t cd = iconv_open(encodingNames[i].real, WCHAR_T_ENCODING);
|
||||
if (cd == iconv_invalid) continue;
|
||||
iconv_close(cd);
|
||||
|
||||
cd = iconv_open(WCHAR_T_ENCODING, encodingNames[i].real);
|
||||
if (cd == iconv_invalid) continue;
|
||||
iconv_close(cd);
|
||||
|
||||
wxString pretty = wxString::FromAscii(encodingNames[i].pretty);
|
||||
arr->Add(pretty);
|
||||
(*map)[pretty] = wxString::FromAscii(encodingNames[i].real);
|
||||
}
|
||||
|
||||
prettyEncodingList = arr;
|
||||
prettyEncodingHash = map;
|
||||
}
|
||||
return *prettyEncodingList;
|
||||
}
|
||||
static AegisubCSConv localConv(L"Local", false);
|
||||
AegisubCSConv& csConvLocal(localConv);
|
||||
static AegisubCSConvImpl localConv;
|
||||
AegisubCSConv& csConvLocal = localConv;
|
||||
|
|
|
@ -35,135 +35,38 @@
|
|||
///
|
||||
|
||||
#ifndef AGI_PRE
|
||||
#include <iconv.h>
|
||||
#include <wchar.h>
|
||||
|
||||
#include <wx/arrstr.h>
|
||||
#include <wx/string.h>
|
||||
#include <wx/strconv.h>
|
||||
#include <wx/thread.h>
|
||||
#endif
|
||||
|
||||
#include "aegisub_endian.h"
|
||||
|
||||
#if !defined(_LIBICONV_VERSION) || _LIBICONV_VERSION < 0x010A || defined(LIBICONV_PLUG)
|
||||
#define ICONV_POSIX
|
||||
#endif
|
||||
|
||||
/// @class iconv_wrapper
|
||||
/// @brief RAII wrapper for iconv
|
||||
class iconv_wrapper {
|
||||
private:
|
||||
iconv_t conv;
|
||||
public:
|
||||
iconv_wrapper(const char *to, const char *from)
|
||||
: conv(iconv_open(to, from))
|
||||
{ }
|
||||
iconv_wrapper(wxString const& to, wxString const& from)
|
||||
: conv(iconv_open(to.ToAscii(), from.ToAscii()))
|
||||
{ }
|
||||
iconv_wrapper(const char *to, wxString const& from)
|
||||
: conv(iconv_open(to, from.ToAscii()))
|
||||
{ }
|
||||
iconv_wrapper(wxString const& to, const char *from)
|
||||
: conv(iconv_open(to.ToAscii(), from))
|
||||
{ }
|
||||
~iconv_wrapper() {
|
||||
if (conv != (iconv_t)-1) iconv_close(conv);
|
||||
}
|
||||
operator iconv_t() {
|
||||
return conv;
|
||||
}
|
||||
operator const iconv_t() const {
|
||||
return conv;
|
||||
}
|
||||
};
|
||||
#include <libaegisub/charset_conv.h>
|
||||
|
||||
/// @class AegisubCSConv
|
||||
/// @brief wxMBConv implementation for converting to and from unicode
|
||||
class AegisubCSConv : public wxMBConv {
|
||||
public:
|
||||
/// @param mbEncName Multibyte encoding to convert to/from
|
||||
/// @param enableSubst Whether to substitute characters when needed.
|
||||
/// By default, any conversion that would be lossy will fail
|
||||
/// When enableSubst is true, conversions to multibyte with a sufficiently
|
||||
/// large buffer are guaranteed to succeed, with characters dropped or
|
||||
/// changed as needed to fit the string into the target encoding.
|
||||
AegisubCSConv(const wxChar *mbEncName, bool enableSubst = false);
|
||||
|
||||
// wxMBConv implementation; see strconv.h for usage details
|
||||
size_t ToWChar(wchar_t *dst, size_t dstLen, const char *src, size_t srcLen = wxNO_LEN) const;
|
||||
size_t FromWChar(char *dst, size_t dstLen, const wchar_t *src, size_t srcLen = wxNO_LEN) const;
|
||||
size_t GetMBNulLen() const;
|
||||
wxMBConv *Clone() const;
|
||||
|
||||
/// @brief Multibyte-aware strlen
|
||||
/// @return Length in bytes of str (excluding terminator)
|
||||
size_t MBBuffLen(const char *str) const;
|
||||
|
||||
/// @brief Get a list of support encodings with user-friendly names
|
||||
static wxArrayString GetEncodingsList();
|
||||
/// @brief Get a list of all encodings supported by iconv
|
||||
/// Requires GNU iconv for useful results
|
||||
static wxArrayString GetAllSupportedEncodings();
|
||||
/// @brief Map a user-friendly encoding name to the real encoding name
|
||||
static wxString GetRealEncodingName(wxString name);
|
||||
wxMBConv *Clone() const { return NULL; };
|
||||
|
||||
protected:
|
||||
AegisubCSConv();
|
||||
private:
|
||||
// The smattering of mutable variables here are due to that ToWChar and
|
||||
// FromWChar are const in wxMBConv, but we require minor mutation for
|
||||
// things like locks (as iconv is not thread-safe)
|
||||
wxString wcCharsetName;
|
||||
wxString mbCharsetName;
|
||||
mutable size_t mbNulLen;
|
||||
bool enableSubst;
|
||||
|
||||
size_t doConversion(iconv_t cd, char *dst, size_t dstSize, char *src, size_t srcSize) const;
|
||||
size_t iconvWrapper(iconv_t cd, char **inbuf, size_t *inbytesleft, char **outbuf, size_t *outbytesleft) const;
|
||||
|
||||
static void ucToMbFallback(
|
||||
unsigned int code,
|
||||
void (*callback) (const char *buf, size_t buflen, void* callback_arg),
|
||||
void *callback_arg,
|
||||
void *convPtr);
|
||||
|
||||
/// Replacement character for characters which do not fit in the target
|
||||
/// encoding and iconv does not have an appropriate substitute for
|
||||
char invalidRep[8];
|
||||
size_t invalidRepSize;
|
||||
|
||||
#ifndef ICONV_POSIX
|
||||
mutable iconv_fallbacks fallbacks;
|
||||
#endif
|
||||
AegisubCSConv(const AegisubCSConv&);
|
||||
AegisubCSConv& operator=(const AegisubCSConv&);
|
||||
wxString localCharset;
|
||||
|
||||
#if wxUSE_THREADS
|
||||
mutable wxMutex iconvMutex;
|
||||
#endif
|
||||
|
||||
protected:
|
||||
iconv_wrapper m2w, w2m;
|
||||
// ToWChar and FromWChar are const in wxMBConv, but iconv can't be used
|
||||
// immutably
|
||||
mutable agi::charset::IconvWrapper conv;
|
||||
};
|
||||
|
||||
// Predefined conversion for the current locale, intended to be a drop-in
|
||||
// replacement for wxConvLocal
|
||||
extern AegisubCSConv& csConvLocal;
|
||||
|
||||
#ifdef HAVE_BIG_ENDIAN
|
||||
# if SIZEOF_WCHAR_T == 4
|
||||
# define WCHAR_T_ENCODING "UTF-32BE"
|
||||
# elif SIZEOF_WCHAR_T == 2
|
||||
# define WCHAR_T_ENCODING "UTF-16BE"
|
||||
# endif
|
||||
#elif defined(HAVE_LITTLE_ENDIAN)
|
||||
# if SIZEOF_WCHAR_T == 4
|
||||
# define WCHAR_T_ENCODING "UTF-32LE"
|
||||
# elif SIZEOF_WCHAR_T == 2
|
||||
# define WCHAR_T_ENCODING "UTF-16LE"
|
||||
# endif
|
||||
#else
|
||||
# if SIZEOF_WCHAR_T == 4
|
||||
# define WCHAR_T_ENCODING ((Endian::MachineToBig((uint32_t)1) == 1) ? "UTF-32BE" : "UTF-32LE")
|
||||
# elif SIZEOF_WCHAR_T == 2
|
||||
# define WCHAR_T_ENCODING ((Endian::MachineToBig((uint32_t)1) == 1) ? "UTF-16BE" : "UTF-16LE")
|
||||
# endif
|
||||
#endif
|
||||
|
|
|
@ -7,7 +7,7 @@ wxArrayString lagi_MRU_wxAS(const wxString &list) {
|
|||
const agi::MRUManager::MRUListMap *map_list = AegisubApp::Get()->mru->Get(STD_STR(list));
|
||||
|
||||
for (agi::MRUManager::MRUListMap::const_iterator i_lst = map_list->begin(); i_lst != map_list->end(); ++i_lst) {
|
||||
work.Add(wxString(i_lst->second));
|
||||
work.Add(wxString(i_lst->second.c_str(), wxConvUTF8));
|
||||
}
|
||||
|
||||
return work;
|
||||
|
|
|
@ -8,8 +8,8 @@
|
|||
|
||||
#include <libaegisub/colour.h>
|
||||
|
||||
#define STD_STR(x) std::string(x.mb_str())
|
||||
#define STD_STR(x) std::string(x.utf8_str())
|
||||
|
||||
inline wxColour lagi_wxColour(const agi::Colour &colour) { return wxColour(colour); }
|
||||
inline wxString lagi_wxString(const std::string &str) { return wxString(str); }
|
||||
inline wxString lagi_wxString(const std::string &str) { return wxString(str.c_str(), wxConvUTF8); }
|
||||
wxArrayString lagi_MRU_wxAS(const wxString &list);
|
||||
|
|
|
@ -34,9 +34,6 @@
|
|||
/// @ingroup export
|
||||
///
|
||||
|
||||
|
||||
///////////
|
||||
// Headers
|
||||
#include "config.h"
|
||||
|
||||
#ifndef AGI_PRE
|
||||
|
@ -102,7 +99,7 @@ DialogExport::DialogExport (wxWindow *parent)
|
|||
|
||||
// Charset dropdown list
|
||||
wxStaticText *charset_list_label = new wxStaticText(this, -1, _("Text encoding:"));
|
||||
CharsetList = new wxChoice(this, Charset_List_Box, wxDefaultPosition, wxDefaultSize, AegisubCSConv::GetEncodingsList());
|
||||
CharsetList = new wxChoice(this, Charset_List_Box, wxDefaultPosition, wxDefaultSize, agi::charset::GetEncodingsList<wxArrayString>());
|
||||
wxSizer *charset_list_sizer = new wxBoxSizer(wxHORIZONTAL);
|
||||
charset_list_sizer->Add(charset_list_label, 0, wxALIGN_CENTER | wxRIGHT, 5);
|
||||
charset_list_sizer->Add(CharsetList, 1, wxEXPAND);
|
||||
|
@ -219,6 +216,9 @@ void DialogExport::OnProcess(wxCommandEvent &event) {
|
|||
wxString err(error);
|
||||
wxMessageBox(err, _T("Error exporting subtitles"), wxOK | wxICON_ERROR, this);
|
||||
}
|
||||
catch (const agi::charset::ConvError& err) {
|
||||
wxMessageBox(err.GetMessage(), _T("Error exporting subtitles"), wxOK | wxICON_ERROR, this);
|
||||
}
|
||||
catch (...) {
|
||||
wxMessageBox(_T("Unknown error"), _T("Error exporting subtitles"), wxOK | wxICON_ERROR, this);
|
||||
}
|
||||
|
|
|
@ -713,8 +713,7 @@ void FrameMain::LoadSubtitles (wxString filename,wxString charset) {
|
|||
// Make sure that file isn't actually a timecode file
|
||||
try {
|
||||
TextFileReader testSubs(filename,charset);
|
||||
charset = testSubs.GetCurrentEncoding();
|
||||
isBinary = charset == _T("binary");
|
||||
isBinary = testSubs.IsBinary();
|
||||
if (!isBinary && testSubs.HasMoreLines()) {
|
||||
wxString cur = testSubs.ReadLineFromFile();
|
||||
if (cur.Left(10) == _T("# timecode")) {
|
||||
|
@ -817,8 +816,7 @@ bool FrameMain::SaveSubtitles(bool saveas,bool withCharset) {
|
|||
// Get charset
|
||||
wxString charset = _T("");
|
||||
if (withCharset) {
|
||||
wxArrayString choices = AegisubCSConv::GetEncodingsList();
|
||||
charset = wxGetSingleChoice(_("Choose charset code:"), _T("Charset"),choices,this,-1, -1,true,250,200);
|
||||
charset = wxGetSingleChoice(_("Choose charset code:"), _T("Charset"),agi::charset::GetEncodingsList<wxArrayString>(),this,-1, -1,true,250,200);
|
||||
if (charset.IsEmpty()) return false;
|
||||
}
|
||||
|
||||
|
|
|
@ -538,7 +538,7 @@ int FrameMain::AddMacroMenuItems(wxMenu *menu, const std::vector<Automation4::Fe
|
|||
///
|
||||
void FrameMain::OnOpenRecentSubs(wxCommandEvent &event) {
|
||||
int number = event.GetId()-Menu_File_Recent;
|
||||
LoadSubtitles(AegisubApp::Get()->mru->GetEntry("Subtitle", number));
|
||||
LoadSubtitles(lagi_wxString(AegisubApp::Get()->mru->GetEntry("Subtitle", number)));
|
||||
}
|
||||
|
||||
|
||||
|
@ -548,7 +548,7 @@ void FrameMain::OnOpenRecentSubs(wxCommandEvent &event) {
|
|||
///
|
||||
void FrameMain::OnOpenRecentVideo(wxCommandEvent &event) {
|
||||
int number = event.GetId()-Menu_Video_Recent;
|
||||
LoadVideo(AegisubApp::Get()->mru->GetEntry("Video", number));
|
||||
LoadVideo(lagi_wxString(AegisubApp::Get()->mru->GetEntry("Video", number)));
|
||||
}
|
||||
|
||||
|
||||
|
@ -558,7 +558,7 @@ void FrameMain::OnOpenRecentVideo(wxCommandEvent &event) {
|
|||
///
|
||||
void FrameMain::OnOpenRecentTimecodes(wxCommandEvent &event) {
|
||||
int number = event.GetId()-Menu_Timecodes_Recent;
|
||||
LoadVFR(AegisubApp::Get()->mru->GetEntry("Timecodes", number));
|
||||
LoadVFR(lagi_wxString(AegisubApp::Get()->mru->GetEntry("Timecodes", number)));
|
||||
}
|
||||
|
||||
|
||||
|
@ -568,7 +568,7 @@ void FrameMain::OnOpenRecentTimecodes(wxCommandEvent &event) {
|
|||
///
|
||||
void FrameMain::OnOpenRecentKeyframes(wxCommandEvent &event) {
|
||||
int number = event.GetId()-Menu_Keyframes_Recent;
|
||||
KeyFrameFile::Load(AegisubApp::Get()->mru->GetEntry("Keyframes", number));
|
||||
KeyFrameFile::Load(lagi_wxString(AegisubApp::Get()->mru->GetEntry("Keyframes", number)));
|
||||
videoBox->videoSlider->Refresh();
|
||||
audioBox->audioDisplay->Update();
|
||||
Refresh();
|
||||
|
@ -581,7 +581,7 @@ void FrameMain::OnOpenRecentKeyframes(wxCommandEvent &event) {
|
|||
///
|
||||
void FrameMain::OnOpenRecentAudio(wxCommandEvent &event) {
|
||||
int number = event.GetId()-Menu_Audio_Recent;
|
||||
LoadSubtitles(AegisubApp::Get()->mru->GetEntry("Audio", number));
|
||||
LoadAudio(lagi_wxString(AegisubApp::Get()->mru->GetEntry("Audio", number)));
|
||||
}
|
||||
|
||||
|
||||
|
@ -805,13 +805,12 @@ void FrameMain::OnOpenSubtitles(wxCommandEvent& WXUNUSED(event)) {
|
|||
///
|
||||
void FrameMain::OnOpenSubtitlesCharset(wxCommandEvent& WXUNUSED(event)) {
|
||||
// Initialize charsets
|
||||
wxArrayString choices = AegisubCSConv::GetEncodingsList();
|
||||
wxString path = lagi_wxString(OPT_GET("Path/Last/Subtitles")->GetString());
|
||||
|
||||
// Get options and load
|
||||
wxString filename = wxFileSelector(_("Open subtitles file"),path,_T(""),_T(""),AssFile::GetWildcardList(0),wxFD_OPEN | wxFD_FILE_MUST_EXIST);
|
||||
if (!filename.empty()) {
|
||||
wxString charset = wxGetSingleChoice(_("Choose charset code:"), _("Charset"),choices,this,-1, -1,true,250,200);
|
||||
wxString charset = wxGetSingleChoice(_("Choose charset code:"), _("Charset"),agi::charset::GetEncodingsList<wxArrayString>(),this,-1, -1,true,250,200);
|
||||
if (!charset.empty()) {
|
||||
LoadSubtitles(filename,charset);
|
||||
}
|
||||
|
|
|
@ -300,7 +300,7 @@ void HotkeyManager::Load() {
|
|||
TextFileReader file(filename);
|
||||
wxString header;
|
||||
try {
|
||||
if (file.GetCurrentEncoding() != _T("binary"))
|
||||
if (!file.IsBinary())
|
||||
header = file.ReadLineFromFile();
|
||||
}
|
||||
catch (wxString e) {
|
||||
|
|
|
@ -263,6 +263,10 @@ emit_stdout->Enable();
|
|||
wxMessageBox(err,_T("Fatal error while initializing"));
|
||||
return false;
|
||||
}
|
||||
catch (agi::Exception const& e) {
|
||||
wxMessageBox(e.GetMessage(),_T("Fatal error while initializing"));
|
||||
return false;
|
||||
}
|
||||
|
||||
catch (...) {
|
||||
wxMessageBox(_T("Unhandled exception"),_T("Fatal error while initializing"));
|
||||
|
|
|
@ -29,6 +29,7 @@
|
|||
#include <libaegisub/exception.h>
|
||||
|
||||
#include "colour_button.h"
|
||||
#include "compat.h"
|
||||
#include "libresrc/libresrc.h"
|
||||
#include "preferences.h"
|
||||
#include "main.h"
|
||||
|
@ -172,7 +173,7 @@ void Preferences::OptionAdd(wxPanel *parent, wxFlexGridSizer *flex, const wxStri
|
|||
|
||||
case agi::OptionValue::Type_String: {
|
||||
flex->Add(new wxStaticText(parent, wxID_ANY, name), 1, wxALIGN_CENTRE_VERTICAL);
|
||||
wxTextCtrl *text = new wxTextCtrl(parent, wxID_ANY , opt->GetString(), wxDefaultPosition, wxDefaultSize);
|
||||
wxTextCtrl *text = new wxTextCtrl(parent, wxID_ANY , lagi_wxString(opt->GetString()), wxDefaultPosition, wxDefaultSize);
|
||||
flex->Add(text, 1, wxEXPAND);
|
||||
break;
|
||||
}
|
||||
|
|
|
@ -59,6 +59,8 @@
|
|||
#include "options.h"
|
||||
#include "spellchecker_hunspell.h"
|
||||
#include "standard_paths.h"
|
||||
#include "text_file_reader.h"
|
||||
#include "text_file_writer.h"
|
||||
#include "utils.h"
|
||||
|
||||
|
||||
|
@ -66,6 +68,7 @@
|
|||
HunspellSpellChecker::HunspellSpellChecker() {
|
||||
hunspell = NULL;
|
||||
conv = NULL;
|
||||
rconv = NULL;
|
||||
SetLanguage(lagi_wxString(OPT_GET("Tool/Spell Checker/Language")->GetString()));
|
||||
}
|
||||
|
||||
|
@ -84,6 +87,8 @@ void HunspellSpellChecker::Reset() {
|
|||
hunspell = NULL;
|
||||
delete conv;
|
||||
conv = NULL;
|
||||
delete rconv;
|
||||
rconv = NULL;
|
||||
affpath.Clear();
|
||||
dicpath.Clear();
|
||||
}
|
||||
|
@ -96,8 +101,13 @@ void HunspellSpellChecker::Reset() {
|
|||
///
|
||||
bool HunspellSpellChecker::CanAddWord(wxString word) {
|
||||
if (!hunspell) return false;
|
||||
wxCharBuffer buffer = word.mb_str(*conv);
|
||||
return (buffer.data() != NULL);
|
||||
try {
|
||||
conv->Convert(word);
|
||||
return true;
|
||||
}
|
||||
catch (agi::charset::ConvError const&) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
@ -111,9 +121,9 @@ void HunspellSpellChecker::AddWord(wxString word) {
|
|||
|
||||
// Add to currently loaded file
|
||||
#ifdef WITH_OLD_HUNSPELL
|
||||
hunspell->put_word(word.mb_str(*conv));
|
||||
hunspell->put_word(conv->Convert(word).c_str());
|
||||
#else
|
||||
hunspell->add(word.mb_str(*conv));
|
||||
hunspell->add(conv->Convert(word).c_str());
|
||||
#endif
|
||||
|
||||
// Ensure that the path exists
|
||||
|
@ -124,22 +134,14 @@ void HunspellSpellChecker::AddWord(wxString word) {
|
|||
|
||||
// Load dictionary
|
||||
wxArrayString dic;
|
||||
wxString curLine;
|
||||
bool added = false;
|
||||
if (fn.FileExists()) { // Even if you ever want to remove this "if", keep the braces, so the stream closes at the end
|
||||
bool first = true;
|
||||
wxFileInputStream in(usrdicpath);
|
||||
if (!in.IsOk()) return;
|
||||
wxTextInputStream textIn(in,_T(" \t"),*conv);
|
||||
|
||||
// Read it
|
||||
while (in.CanRead() && !in.Eof()) {
|
||||
// Read line
|
||||
curLine = textIn.ReadLine();
|
||||
curLine.Trim();
|
||||
TextFileReader reader(usrdicpath, L"UTF-8");
|
||||
while (reader.HasMoreLines()) {
|
||||
wxString curLine = reader.ReadLineFromFile();
|
||||
if (curLine.IsEmpty()) continue;
|
||||
|
||||
// First
|
||||
if (first) {
|
||||
first = false;
|
||||
if (curLine.IsNumber()) continue;
|
||||
|
@ -160,11 +162,14 @@ void HunspellSpellChecker::AddWord(wxString word) {
|
|||
if (!added) dic.Add(word);
|
||||
|
||||
// Write back to disk
|
||||
wxFileOutputStream out(usrdicpath);
|
||||
if (!out.IsOk()) return;
|
||||
wxTextOutputStream textOut(out,wxEOL_UNIX,*conv);
|
||||
textOut.WriteString(wxString::Format(_T("%i"),dic.Count())+_T("\n"));
|
||||
for (unsigned int i=0;i<dic.Count();i++) textOut.WriteString(dic[i]+_T("\n"));
|
||||
try {
|
||||
TextFileWriter writer(usrdicpath, L"UTF-8");
|
||||
writer.WriteLineToFile(wxString::Format(L"%i", dic.Count()));
|
||||
for (unsigned int i=0;i<dic.Count();i++) writer.WriteLineToFile(dic[i]);
|
||||
}
|
||||
catch (const wchar_t*) {
|
||||
// Failed to open file
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
@ -175,10 +180,13 @@ void HunspellSpellChecker::AddWord(wxString word) {
|
|||
///
|
||||
bool HunspellSpellChecker::CheckWord(wxString word) {
|
||||
if (!hunspell) return true;
|
||||
wxCharBuffer buf = word.mb_str(*conv);
|
||||
if (buf) return (hunspell->spell(buf) == 1);
|
||||
try {
|
||||
return hunspell->spell(conv->Convert(word).c_str()) == 1;
|
||||
}
|
||||
catch (agi::charset::ConvError const&) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
@ -187,31 +195,26 @@ bool HunspellSpellChecker::CheckWord(wxString word) {
|
|||
/// @return List of suggestions
|
||||
///
|
||||
wxArrayString HunspellSpellChecker::GetSuggestions(wxString word) {
|
||||
// Array
|
||||
wxArrayString suggestions;
|
||||
if (!hunspell) return suggestions;
|
||||
|
||||
// Get suggestions
|
||||
if (hunspell) {
|
||||
// Word
|
||||
wxCharBuffer buf = word.mb_str(*conv);
|
||||
if (!buf) return suggestions;
|
||||
|
||||
try {
|
||||
// Grab raw from Hunspell
|
||||
char **results;
|
||||
int n = hunspell->suggest(&results,buf);
|
||||
int n = hunspell->suggest(&results,conv->Convert(word).c_str());
|
||||
|
||||
// Convert each
|
||||
for (int i=0;i<n;i++) {
|
||||
wxString current(results[i],*conv);
|
||||
suggestions.Add(current);
|
||||
suggestions.Add(rconv->Convert(results[i]));
|
||||
delete results[i];
|
||||
}
|
||||
|
||||
// Delete
|
||||
delete results;
|
||||
}
|
||||
catch (agi::charset::ConvError const&) {
|
||||
return suggestions;
|
||||
}
|
||||
|
||||
// Return them
|
||||
return suggestions;
|
||||
}
|
||||
|
||||
|
@ -279,25 +282,23 @@ void HunspellSpellChecker::SetLanguage(wxString language) {
|
|||
hunspell = new Hunspell(affpath.mb_str(csConvLocal),dicpath.mb_str(csConvLocal));
|
||||
conv = NULL;
|
||||
if (hunspell) {
|
||||
conv = new AegisubCSConv(wxString(hunspell->get_dic_encoding(),wxConvUTF8));
|
||||
|
||||
// Load user dictionary
|
||||
if (wxFileExists(usrdicpath)) {
|
||||
wxFileInputStream in(usrdicpath);
|
||||
if (!in.IsOk()) return;
|
||||
wxTextInputStream textIn(in,_T(" \t"),*conv);
|
||||
while (in.CanRead() && !in.Eof()) {
|
||||
// Read line
|
||||
wxString curLine = textIn.ReadLine();
|
||||
curLine.Trim();
|
||||
conv = new agi::charset::IconvWrapper("wchar_t", hunspell->get_dic_encoding());
|
||||
rconv = new agi::charset::IconvWrapper(hunspell->get_dic_encoding(), "wchar_t");
|
||||
try {
|
||||
TextFileReader reader(usrdicpath, L"UTF-8");
|
||||
while (reader.HasMoreLines()) {
|
||||
wxString curLine = reader.ReadLineFromFile();
|
||||
if (curLine.IsEmpty() || curLine.IsNumber()) continue;
|
||||
#ifdef WITH_OLD_HUNSPELL
|
||||
hunspell->put_word(curLine.mb_str(*conv));
|
||||
hunspell->put_word(conv->Convert(curLine).c_str());
|
||||
#else
|
||||
hunspell->add(curLine.mb_str(*conv));
|
||||
hunspell->add(conv->Convert(curLine).c_str());
|
||||
#endif
|
||||
}
|
||||
}
|
||||
catch (const wchar_t *) {
|
||||
// file not found
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -43,6 +43,11 @@
|
|||
#include <hunspell/hunspell.hxx>
|
||||
|
||||
#include "include/aegisub/spellchecker.h"
|
||||
namespace agi {
|
||||
namespace charset {
|
||||
class IconvWrapper;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/// @class HunspellSpellChecker
|
||||
|
@ -55,7 +60,8 @@ private:
|
|||
Hunspell *hunspell;
|
||||
|
||||
/// Conversion buffer
|
||||
wxMBConv *conv;
|
||||
agi::charset::IconvWrapper *conv;
|
||||
agi::charset::IconvWrapper *rconv;
|
||||
|
||||
/// Path to .aff file
|
||||
wxString affpath;
|
||||
|
|
|
@ -51,8 +51,15 @@
|
|||
#include "charset_detect.h"
|
||||
#include "text_file_reader.h"
|
||||
|
||||
TextFileReader::TextFileReader(wxString filename, wxString enc, bool trim)
|
||||
: encoding(enc), conv((iconv_t)-1), trim(trim), readComplete(false), currout(0), outptr(0), currentLine(0) {
|
||||
TextFileReader::TextFileReader(wxString const& filename, wxString encoding, bool trim)
|
||||
: isBinary(false)
|
||||
, conv()
|
||||
, trim(trim)
|
||||
, readComplete(false)
|
||||
, currout(0)
|
||||
, outptr(0)
|
||||
, currentLine(0)
|
||||
{
|
||||
#ifdef __WINDOWS__
|
||||
file.open(filename.wc_str(),std::ios::in | std::ios::binary);
|
||||
#else
|
||||
|
@ -61,16 +68,14 @@ TextFileReader::TextFileReader(wxString filename, wxString enc, bool trim)
|
|||
if (!file.is_open()) throw L"Failed opening file for reading.";
|
||||
|
||||
if (encoding.IsEmpty()) encoding = CharSetDetect::GetEncoding(filename);
|
||||
if (encoding == L"binary") return;
|
||||
encoding = AegisubCSConv::GetRealEncodingName(encoding);
|
||||
conv = iconv_open(WCHAR_T_ENCODING, encoding.ToAscii());
|
||||
if (conv == (iconv_t)-1) {
|
||||
throw wxString::Format(L"Character set '%s' is not supported.", enc.c_str());
|
||||
if (encoding == L"binary") {
|
||||
isBinary = true;
|
||||
return;
|
||||
}
|
||||
conv.reset(new agi::charset::IconvWrapper(encoding.c_str(), "wchar_t"));
|
||||
}
|
||||
|
||||
TextFileReader::~TextFileReader() {
|
||||
if (conv != (iconv_t)-1) iconv_close(conv);
|
||||
}
|
||||
|
||||
wchar_t TextFileReader::GetWChar() {
|
||||
|
@ -98,7 +103,8 @@ wchar_t TextFileReader::GetWChar() {
|
|||
return 0;
|
||||
|
||||
do {
|
||||
size_t ret = iconv(conv, &inptr, &inbytesleft, reinterpret_cast<char **>(&outptr), &outbytesleft);
|
||||
// Without this const_cast the wrong overload is chosen
|
||||
size_t ret = conv->Convert(const_cast<const char**>(&inptr), &inbytesleft, reinterpret_cast<char **>(&outptr), &outbytesleft);
|
||||
if (ret != (size_t)-1) break;
|
||||
|
||||
int err = errno;
|
||||
|
@ -144,7 +150,6 @@ wxString TextFileReader::ReadLineFromFile() {
|
|||
if (ch == 0)
|
||||
readComplete = true;
|
||||
|
||||
// Trim
|
||||
if (trim) {
|
||||
buffer.Trim(true);
|
||||
buffer.Trim(false);
|
||||
|
@ -155,7 +160,3 @@ wxString TextFileReader::ReadLineFromFile() {
|
|||
bool TextFileReader::HasMoreLines() {
|
||||
return !readComplete;
|
||||
}
|
||||
|
||||
wxString TextFileReader::GetCurrentEncoding() {
|
||||
return encoding;
|
||||
}
|
||||
|
|
|
@ -38,21 +38,23 @@
|
|||
|
||||
#ifndef AGI_PRE
|
||||
#include <fstream>
|
||||
|
||||
#include <iconv.h>
|
||||
#include <memory>
|
||||
|
||||
#include <wx/dynarray.h>
|
||||
#include <wx/string.h>
|
||||
#endif
|
||||
|
||||
namespace agi { namespace charset {
|
||||
class IconvWrapper;
|
||||
} }
|
||||
|
||||
/// @class TextFileReader
|
||||
/// @brief A line-based text file reader
|
||||
class TextFileReader {
|
||||
private:
|
||||
/// Encoding of the file being read
|
||||
wxString encoding;
|
||||
bool isBinary;
|
||||
std::ifstream file;
|
||||
iconv_t conv;
|
||||
std::auto_ptr<agi::charset::IconvWrapper> conv;
|
||||
bool trim;
|
||||
bool readComplete;
|
||||
|
||||
|
@ -76,7 +78,7 @@ public:
|
|||
/// @param filename File to open
|
||||
/// @param enc Encoding to use, or empty to autodetect
|
||||
/// @param trim Whether to trim whitespace from lines read
|
||||
TextFileReader(wxString filename,wxString encoding=L"", bool trim=true);
|
||||
TextFileReader(wxString const& filename,wxString encoding=L"", bool trim=true);
|
||||
/// @brief Destructor
|
||||
~TextFileReader();
|
||||
|
||||
|
@ -85,8 +87,5 @@ public:
|
|||
wxString ReadLineFromFile();
|
||||
/// @brief Check if there are any more lines to read
|
||||
bool HasMoreLines();
|
||||
|
||||
/// @brief Get the file encoding used by this reader
|
||||
/// @return "unknown", "binary", or a character encoding name
|
||||
wxString GetCurrentEncoding();
|
||||
bool IsBinary() { return isBinary; }
|
||||
};
|
||||
|
|
|
@ -51,7 +51,7 @@
|
|||
/// @param filename
|
||||
/// @param encoding
|
||||
///
|
||||
TextFileWriter::TextFileWriter(wxString filename, wxString encoding)
|
||||
TextFileWriter::TextFileWriter(wxString const& filename, wxString encoding)
|
||||
: conv() {
|
||||
#ifdef WIN32
|
||||
file.open(filename.wc_str(),std::ios::out | std::ios::binary | std::ios::trunc);
|
||||
|
@ -59,17 +59,17 @@ TextFileWriter::TextFileWriter(wxString filename, wxString encoding)
|
|||
file.open(wxFNCONV(filename),std::ios::out | std::ios::binary | std::ios::trunc);
|
||||
#endif
|
||||
if (!file.is_open()) {
|
||||
throw _T("Failed opening file for writing.");
|
||||
throw L"Failed opening file for writing.";
|
||||
}
|
||||
|
||||
if (encoding.IsEmpty()) encoding = lagi_wxString(OPT_GET("App/Save Charset")->GetString());
|
||||
conv.reset(new AegisubCSConv(encoding, true));
|
||||
if (encoding.empty()) encoding = lagi_wxString(OPT_GET("App/Save Charset")->GetString());
|
||||
conv.reset(new agi::charset::IconvWrapper("utf-8", encoding.c_str(), true));
|
||||
|
||||
// Write the BOM
|
||||
try {
|
||||
WriteLineToFile(_T("\uFEFF"), false);
|
||||
WriteLineToFile(L"\uFEFF", false);
|
||||
}
|
||||
catch (wxString ignore) {
|
||||
catch (agi::charset::ConversionFailure&) {
|
||||
// If the BOM could not be converted to the target encoding it isn't needed
|
||||
}
|
||||
}
|
||||
|
@ -85,14 +85,11 @@ TextFileWriter::~TextFileWriter() {
|
|||
/// @brief DOCME
|
||||
/// @param line
|
||||
/// @param addLineBreak
|
||||
///
|
||||
void TextFileWriter::WriteLineToFile(wxString line, bool addLineBreak) {
|
||||
wxString temp = line;
|
||||
if (addLineBreak) temp += _T("\r\n");
|
||||
if (addLineBreak) line += L"\n";
|
||||
|
||||
wxCharBuffer buf = temp.mb_str(*conv);
|
||||
if (buf.data())
|
||||
file.write(buf.data(), conv->MBBuffLen(buf.data()));
|
||||
std::string buf = conv->Convert(line.utf8_str().data());
|
||||
file.write(buf.data(), buf.size());
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -43,8 +43,11 @@
|
|||
#include <wx/string.h>
|
||||
#endif
|
||||
|
||||
|
||||
class AegisubCSConv;
|
||||
namespace agi {
|
||||
namespace charset {
|
||||
class IconvWrapper;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/// DOCME
|
||||
|
@ -59,13 +62,13 @@ private:
|
|||
std::ofstream file;
|
||||
|
||||
/// DOCME
|
||||
std::auto_ptr<AegisubCSConv> conv;
|
||||
std::auto_ptr<agi::charset::IconvWrapper> conv;
|
||||
|
||||
TextFileWriter(const TextFileWriter&);
|
||||
TextFileWriter& operator=(const TextFileWriter&);
|
||||
|
||||
public:
|
||||
TextFileWriter(wxString filename, wxString encoding=_T(""));
|
||||
TextFileWriter(wxString const& filename, wxString encoding="");
|
||||
~TextFileWriter();
|
||||
|
||||
void WriteLineToFile(wxString line, bool addLineBreak=true);
|
||||
|
|
|
@ -69,7 +69,7 @@ VideoProvider *VideoProviderFactoryManager::GetProvider(wxString video) {
|
|||
}
|
||||
|
||||
try {
|
||||
VideoProvider *y4m_provider = new YUV4MPEGVideoProvider(video.wc_str());
|
||||
VideoProvider *y4m_provider = new YUV4MPEGVideoProvider(video);
|
||||
if (y4m_provider)
|
||||
y4m_provider = new VideoProviderCache(y4m_provider);
|
||||
return y4m_provider;
|
||||
|
@ -92,7 +92,7 @@ VideoProvider *VideoProviderFactoryManager::GetProvider(wxString video) {
|
|||
for (unsigned int i=0;i<list.Count();i++) {
|
||||
try {
|
||||
// Create provider
|
||||
VideoProvider *provider = GetFactory(list[i])->CreateProvider(video.wc_str());
|
||||
VideoProvider *provider = GetFactory(list[i])->CreateProvider(video);
|
||||
if (provider) {
|
||||
// Cache if necessary
|
||||
if (provider->WantsCaching()) {
|
||||
|
|
|
@ -12,6 +12,7 @@ run_SOURCES = \
|
|||
util_unix.cpp \
|
||||
libaegisub_access.cpp \
|
||||
libaegisub_cajun.cpp \
|
||||
libaegisub_iconv.cpp \
|
||||
libaegisub_util.cpp \
|
||||
libaegisub_mru.cpp
|
||||
|
||||
|
|
138
aegisub/tests/libaegisub_iconv.cpp
Normal file
138
aegisub/tests/libaegisub_iconv.cpp
Normal file
|
@ -0,0 +1,138 @@
|
|||
// Copyright (c) 2010, Thomas Goyne <plorkyeran@aegisub.org>
|
||||
//
|
||||
// Permission to use, copy, modify, and distribute this software for any
|
||||
// purpose with or without fee is hereby granted, provided that the above
|
||||
// copyright notice and this permission notice appear in all copies.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
||||
// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
||||
// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
||||
// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
||||
// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
||||
// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
//
|
||||
// $Id$
|
||||
|
||||
/// @file libaegisub_iconv.cpp
|
||||
/// @brief agi::charset
|
||||
/// @ingroup iconv
|
||||
|
||||
#include <stdint.h>
|
||||
#include <libaegisub/charset_conv.h>
|
||||
|
||||
#include "main.h"
|
||||
#include "util.h"
|
||||
|
||||
using namespace agi::charset;
|
||||
|
||||
TEST(lagi_iconv, BasicSetup) {
|
||||
EXPECT_NO_THROW(IconvWrapper("UTF-8", "UTF-16LE"));
|
||||
}
|
||||
|
||||
TEST(lagi_iconv, InvalidConversions) {
|
||||
EXPECT_THROW(IconvWrapper("nonexistent charset", "UTF-16LE"), UnsupportedConversion);
|
||||
EXPECT_THROW(IconvWrapper("UTF-16LE", "nonexistent charset"), UnsupportedConversion);
|
||||
EXPECT_THROW(IconvWrapper("nonexistent charset", "nonexistent charset"), UnsupportedConversion);
|
||||
}
|
||||
|
||||
TEST(lagi_iconv, StrLen1) {
|
||||
IconvWrapper conv("UTF-8", "UTF-8", false);
|
||||
for (int i = 0; i < 10; i++) {
|
||||
std::string str(i, ' ');
|
||||
ASSERT_EQ(i, conv.SrcStrLen(str.c_str()));
|
||||
ASSERT_EQ(i, conv.DstStrLen(str.c_str()));
|
||||
}
|
||||
}
|
||||
TEST(lagi_iconv, StrLen2) {
|
||||
IconvWrapper conv("UTF-16LE", "UTF-16LE", false);
|
||||
for (int i = 0; i < 10; i++) {
|
||||
std::basic_string<int16_t> str(i, ' ');
|
||||
ASSERT_EQ(2*i, conv.SrcStrLen((const char *)str.c_str()));
|
||||
ASSERT_EQ(2*i, conv.DstStrLen((const char *)str.c_str()));
|
||||
}
|
||||
}
|
||||
TEST(lagi_iconv, StrLen4) {
|
||||
IconvWrapper conv("UTF-32LE", "UTF-32LE", false);
|
||||
for (int i = 0; i < 10; i++) {
|
||||
std::basic_string<int32_t> str(i, ' ');
|
||||
ASSERT_EQ(4*i, conv.SrcStrLen((const char *)str.c_str()));
|
||||
ASSERT_EQ(4*i, conv.DstStrLen((const char *)str.c_str()));
|
||||
}
|
||||
}
|
||||
|
||||
TEST(lagi_iconv, Fallbacks) {
|
||||
IconvWrapper nofallback("UTF-8", "Shift-JIS", false);
|
||||
IconvWrapper fallback("UTF-8", "Shift-JIS", true);
|
||||
IconvWrapper noneneeded("UTF-8", "UTF-16LE", false);
|
||||
|
||||
// Shift-JIS does not have a backslash
|
||||
EXPECT_THROW(nofallback.Convert("\\"), BadOutput);
|
||||
ASSERT_NO_THROW(fallback.Convert("\\"));
|
||||
EXPECT_EQ("\\", fallback.Convert("\\"));
|
||||
EXPECT_NO_THROW(noneneeded.Convert("\\"));
|
||||
|
||||
// BOM into non-unicode
|
||||
char bom[] = "\xEF\xBB\xBF";
|
||||
EXPECT_THROW(nofallback.Convert(bom), BadOutput);
|
||||
ASSERT_NO_THROW(fallback.Convert(bom));
|
||||
EXPECT_EQ("", fallback.Convert(bom));
|
||||
EXPECT_NO_THROW(noneneeded.Convert(bom));
|
||||
|
||||
// A snowman (U+2603)
|
||||
char snowman[] = "\xE2\x98\x83";
|
||||
EXPECT_THROW(nofallback.Convert(snowman), BadOutput);
|
||||
EXPECT_NO_THROW(noneneeded.Convert(snowman));
|
||||
ASSERT_NO_THROW(fallback.Convert(snowman));
|
||||
EXPECT_EQ("?", fallback.Convert(snowman));
|
||||
}
|
||||
|
||||
TEST(lagi_iconv, BadInput) {
|
||||
IconvWrapper utf16("UTF-16LE", "UTF-8");
|
||||
EXPECT_THROW(utf16.Convert(" "), BadInput);
|
||||
IconvWrapper utf8("UTF-8", "UTF-16LE");
|
||||
EXPECT_THROW(utf8.Convert("\xE2\xFF"), BadInput);
|
||||
}
|
||||
|
||||
TEST(lagi_iconv, Conversions) {
|
||||
IconvWrapper utf16le("UTF-16LE", "UTF-8", false);
|
||||
IconvWrapper utf16be("UTF-16BE", "UTF-8", false);
|
||||
IconvWrapper utf8("UTF-8", "UTF-16LE", false);
|
||||
|
||||
char space_utf8_[] = " ";
|
||||
char space_utf16be_[] = {0, 32, 0, 0};
|
||||
char space_utf16le_[] = {32, 0, 0, 0};
|
||||
std::string space_utf8(space_utf8_);
|
||||
std::string space_utf16be(space_utf16be_, 2);
|
||||
std::string space_utf16le(space_utf16le_, 2);
|
||||
|
||||
EXPECT_EQ(space_utf8, utf16le.Convert(space_utf16le));
|
||||
EXPECT_EQ(space_utf8, utf16be.Convert(space_utf16be));
|
||||
EXPECT_EQ(space_utf16le, utf8.Convert(space_utf8));
|
||||
}
|
||||
|
||||
// Basic overflow tests
|
||||
TEST(lagi_iconv, Buffer) {
|
||||
IconvWrapper conv("UTF-8", "UTF-16LE", false);
|
||||
char buff[32];
|
||||
memset(buff, 0xFF, sizeof(buff));
|
||||
|
||||
EXPECT_THROW(conv.Convert("", 1, buff, 0), BufferTooSmall);
|
||||
EXPECT_EQ('\xFF', buff[0]);
|
||||
EXPECT_THROW(conv.Convert("", 1, buff, 1), BufferTooSmall);
|
||||
EXPECT_EQ('\xFF', buff[0]);
|
||||
EXPECT_NO_THROW(conv.Convert("", 1, buff, 2));
|
||||
EXPECT_EQ('\0', buff[0]);
|
||||
EXPECT_EQ('\0', buff[1]);
|
||||
EXPECT_EQ('\xFF', buff[2]);
|
||||
}
|
||||
|
||||
TEST(lagi_iconv, LocalSupport) {
|
||||
ASSERT_NO_THROW(IconvWrapper("UTF-8", ""));
|
||||
IconvWrapper conv("UTF-8", "");
|
||||
ASSERT_NO_THROW(conv.Convert(" "));
|
||||
EXPECT_EQ(" ", conv.Convert(" "));
|
||||
}
|
||||
TEST(lagi_iconv, wchar_tSupport) {
|
||||
EXPECT_NO_THROW(IconvWrapper("UTF-8", "wchar_t"));
|
||||
}
|
Loading…
Reference in a new issue