forked from mia/Aegisub
Eliminate a pointless std::map in charset_conv
This commit is contained in:
parent
a22dd0f9ca
commit
f0f836c47b
4 changed files with 85 additions and 81 deletions
|
@ -20,7 +20,7 @@
|
|||
#include <cstdint>
|
||||
|
||||
#include <cassert>
|
||||
#include <map>
|
||||
#include <boost/range/algorithm.hpp>
|
||||
|
||||
#include <libaegisub/charset_conv.h>
|
||||
#include <iconv.h>
|
||||
|
@ -44,28 +44,33 @@ static const size_t iconv_failed = (size_t)-1;
|
|||
|
||||
namespace {
|
||||
using namespace agi::charset;
|
||||
struct ltstr {
|
||||
bool operator()(const char* s1, const char* s2) const {
|
||||
return strcmp(s1, s2) < 0;
|
||||
}
|
||||
};
|
||||
|
||||
Converter *get_converter(bool subst, const char *src, const char *dst);
|
||||
|
||||
/// @brief Map a user-friendly encoding name to the real encoding name
|
||||
const char *get_real_encoding_name(const char* name) {
|
||||
static std::map<const char*, const char*, ltstr> pretty_names;
|
||||
|
||||
if (pretty_names.empty()) {
|
||||
# define ADD(pretty, real) pretty_names[pretty] = real
|
||||
const char *get_real_encoding_name(const char *name) {
|
||||
struct pair { const char *pretty; const char *real; };
|
||||
static pair pretty_names[] = {
|
||||
# define ADD(pretty, real) pair{pretty, real},
|
||||
# include <libaegisub/charsets.def>
|
||||
# undef ADD
|
||||
}
|
||||
};
|
||||
|
||||
auto real = pretty_names.find(name);
|
||||
if (real != pretty_names.end())
|
||||
return real->second;
|
||||
return name;
|
||||
static bool init = false;
|
||||
if (!init) {
|
||||
init = true;
|
||||
boost::sort(pretty_names, [](pair a, pair b) {
|
||||
return strcmp(a.pretty, b.pretty) < 0;
|
||||
});
|
||||
}
|
||||
|
||||
auto enc = boost::lower_bound(pretty_names, name, [](pair a, const char *b) {
|
||||
return strcmp(a.pretty, b) < 0;
|
||||
});
|
||||
|
||||
if (enc != std::end(pretty_names) && strcmp(enc->pretty, name) == 0)
|
||||
return enc->real;
|
||||
return name;
|
||||
}
|
||||
|
||||
size_t get_bom_size(Iconv& cd) {
|
||||
|
|
|
@ -120,7 +120,7 @@ template<class T>
|
|||
T const& GetEncodingsList() {
|
||||
static T name_list;
|
||||
if (name_list.empty()) {
|
||||
# define ADD(pretty, real) if (IsConversionSupported(real, "utf-8")) name_list.push_back(pretty)
|
||||
# define ADD(pretty, real) if (IsConversionSupported(real, "utf-8")) name_list.push_back(pretty);
|
||||
# include <libaegisub/charsets.def>
|
||||
# undef ADD
|
||||
}
|
||||
|
|
|
@ -1,78 +1,78 @@
|
|||
ADD("Local", "");
|
||||
ADD("Local", "")
|
||||
|
||||
ADD("Unicode (UTF-8)", "utf-8");
|
||||
ADD("Unicode (UTF-16)", "utf-16");
|
||||
ADD("Unicode (UTF-16BE)", "utf-16be");
|
||||
ADD("Unicode (UTF-16LE)", "utf-16le");
|
||||
ADD("Unicode (UTF-32)", "utf-32");
|
||||
ADD("Unicode (UTF-32BE)", "utf-32be");
|
||||
ADD("Unicode (UTF-32LE)", "utf-32le");
|
||||
ADD("Unicode (UTF-7)", "utf-7");
|
||||
ADD("Unicode (UTF-8)", "utf-8")
|
||||
ADD("Unicode (UTF-16)", "utf-16")
|
||||
ADD("Unicode (UTF-16BE)", "utf-16be")
|
||||
ADD("Unicode (UTF-16LE)", "utf-16le")
|
||||
ADD("Unicode (UTF-32)", "utf-32")
|
||||
ADD("Unicode (UTF-32BE)", "utf-32be")
|
||||
ADD("Unicode (UTF-32LE)", "utf-32le")
|
||||
ADD("Unicode (UTF-7)", "utf-7")
|
||||
|
||||
ADD("Arabic (IBM-864)", "ibm864");
|
||||
ADD("Arabic (ISO-8859-6)", "iso-8859-6");
|
||||
ADD("Arabic (Windows-1256)", "windows-1256");
|
||||
ADD("Arabic (IBM-864)", "ibm864")
|
||||
ADD("Arabic (ISO-8859-6)", "iso-8859-6")
|
||||
ADD("Arabic (Windows-1256)", "windows-1256")
|
||||
|
||||
ADD("Armenian (ARMSCII-8)", "armscii-8");
|
||||
ADD("Armenian (ARMSCII-8)", "armscii-8")
|
||||
|
||||
ADD("Baltic (ISO-8859-13)", "iso-8859-13");
|
||||
ADD("Baltic (ISO-8859-4)", "iso-8859-4");
|
||||
ADD("Baltic (Windows-1257)", "windows-1257");
|
||||
ADD("Baltic (ISO-8859-13)", "iso-8859-13")
|
||||
ADD("Baltic (ISO-8859-4)", "iso-8859-4")
|
||||
ADD("Baltic (Windows-1257)", "windows-1257")
|
||||
|
||||
ADD("Celtic (ISO-8859-14)", "iso-8859-14");
|
||||
ADD("Celtic (ISO-8859-14)", "iso-8859-14")
|
||||
|
||||
ADD("Central European (IBM-852)", "ibm852");
|
||||
ADD("Central European (ISO-8859-2)", "iso-8859-2");
|
||||
ADD("Central European (Windows-1250)", "windows-1250");
|
||||
ADD("Central European (IBM-852)", "ibm852")
|
||||
ADD("Central European (ISO-8859-2)", "iso-8859-2")
|
||||
ADD("Central European (Windows-1250)", "windows-1250")
|
||||
|
||||
ADD("Chinese Simplified (GB18030)", "gb18030");
|
||||
ADD("Chinese Simplified (GB2312)", "gb2312");
|
||||
ADD("Chinese Simplified (HZ)", "hz-gb-2312");
|
||||
ADD("Chinese Simplified (ISO-2022-CN)", "iso-2022-cn");
|
||||
ADD("Chinese Traditional (Big5)", "big5");
|
||||
ADD("Chinese Traditional (Big5-HKSCS)", "big5-hkscs");
|
||||
ADD("Chinese Simplified (GB18030)", "gb18030")
|
||||
ADD("Chinese Simplified (GB2312)", "gb2312")
|
||||
ADD("Chinese Simplified (HZ)", "hz-gb-2312")
|
||||
ADD("Chinese Simplified (ISO-2022-CN)", "iso-2022-cn")
|
||||
ADD("Chinese Traditional (Big5)", "big5")
|
||||
ADD("Chinese Traditional (Big5-HKSCS)", "big5-hkscs")
|
||||
|
||||
ADD("Cyrillic (IBM-855)", "ibm855");
|
||||
ADD("Cyrillic (ISO-8859-5)", "iso-8859-5");
|
||||
ADD("Cyrillic (KOI8-R)", "koi8-r");
|
||||
ADD("Cyrillic (Windows-1251)", "windows-1251");
|
||||
ADD("Cyrillic/Russian (CP-866)", "ibm866");
|
||||
ADD("Cyrillic/Ukrainian (KOI8-U)", "koi8-u");
|
||||
ADD("Cyrillic (IBM-855)", "ibm855")
|
||||
ADD("Cyrillic (ISO-8859-5)", "iso-8859-5")
|
||||
ADD("Cyrillic (KOI8-R)", "koi8-r")
|
||||
ADD("Cyrillic (Windows-1251)", "windows-1251")
|
||||
ADD("Cyrillic/Russian (CP-866)", "ibm866")
|
||||
ADD("Cyrillic/Ukrainian (KOI8-U)", "koi8-u")
|
||||
|
||||
ADD("English (US-ASCII)", "us-ascii");
|
||||
ADD("English (US-ASCII)", "us-ascii")
|
||||
|
||||
ADD("Greek (ISO-8859-7)", "iso-8859-7");
|
||||
ADD("Greek (Windows-1253)", "windows-1253");
|
||||
ADD("Greek (ISO-8859-7)", "iso-8859-7")
|
||||
ADD("Greek (Windows-1253)", "windows-1253")
|
||||
|
||||
ADD("Hebrew (IBM-862)", "ibm862");
|
||||
ADD("Hebrew (Windows-1255)", "windows-1255");
|
||||
ADD("Hebrew Visual (ISO-8859-8)", "iso-8859-8");
|
||||
ADD("Hebrew (IBM-862)", "ibm862")
|
||||
ADD("Hebrew (Windows-1255)", "windows-1255")
|
||||
ADD("Hebrew Visual (ISO-8859-8)", "iso-8859-8")
|
||||
|
||||
ADD("Japanese (EUC-JP)", "euc-jp");
|
||||
ADD("Japanese (ISO-2022-JP)", "iso-2022-jp");
|
||||
ADD("Japanese (Shift_JIS)", "shift_jis");
|
||||
ADD("Japanese (EUC-JP)", "euc-jp")
|
||||
ADD("Japanese (ISO-2022-JP)", "iso-2022-jp")
|
||||
ADD("Japanese (Shift_JIS)", "shift_jis")
|
||||
|
||||
ADD("Korean (EUC-KR)", "euc-kr");
|
||||
ADD("Korean (ISO-2022-KR)", "iso-2022-kr");
|
||||
ADD("Korean (EUC-KR)", "euc-kr")
|
||||
ADD("Korean (ISO-2022-KR)", "iso-2022-kr")
|
||||
|
||||
ADD("Nordic (ISO-8859-10)", "iso-8859-10");
|
||||
ADD("Nordic (ISO-8859-10)", "iso-8859-10")
|
||||
|
||||
ADD("Romanian (ISO-8859-16)", "iso-8859-16");
|
||||
ADD("Romanian (ISO-8859-16)", "iso-8859-16")
|
||||
|
||||
ADD("South European (ISO-8859-3)", "iso-8859-3");
|
||||
ADD("South European (ISO-8859-3)", "iso-8859-3")
|
||||
|
||||
ADD("Thai (ISO-8859-11)", "iso-8859-11");
|
||||
ADD("Thai (TIS-620)", "tis-620");
|
||||
ADD("Thai (Windows-874)", "windows-874");
|
||||
ADD("Thai (ISO-8859-11)", "iso-8859-11")
|
||||
ADD("Thai (TIS-620)", "tis-620")
|
||||
ADD("Thai (Windows-874)", "windows-874")
|
||||
|
||||
ADD("Turkish (IBM-857)", "ibm857");
|
||||
ADD("Turkish (ISO-8859-9)", "iso-8859-9");
|
||||
ADD("Turkish (Windows-1254)", "windows-1254");
|
||||
ADD("Turkish (IBM-857)", "ibm857")
|
||||
ADD("Turkish (ISO-8859-9)", "iso-8859-9")
|
||||
ADD("Turkish (Windows-1254)", "windows-1254")
|
||||
|
||||
ADD("Vietnamese (VISCII)", "viscii");
|
||||
ADD("Vietnamese (Windows-1258)", "windows-1258");
|
||||
ADD("Vietnamese (VISCII)", "viscii")
|
||||
ADD("Vietnamese (Windows-1258)", "windows-1258")
|
||||
|
||||
ADD("Western (IBM-850)", "ibm850");
|
||||
ADD("Western (ISO-8859-1)", "iso-8859-1");
|
||||
ADD("Western (ISO-8859-15)", "iso-8859-15");
|
||||
ADD("Western (Windows-1252)", "windows-1252");
|
||||
ADD("Western (IBM-850)", "ibm850")
|
||||
ADD("Western (ISO-8859-1)", "iso-8859-1")
|
||||
ADD("Western (ISO-8859-15)", "iso-8859-15")
|
||||
ADD("Western (Windows-1252)", "windows-1252")
|
||||
|
|
|
@ -135,14 +135,13 @@ TEST(lagi_iconv, wchar_tSupport) {
|
|||
}
|
||||
|
||||
TEST(lagi_iconv, Roundtrip) {
|
||||
std::vector<std::string> names = GetEncodingsList<std::vector<std::string> >();
|
||||
for (auto cur = names.begin(); cur != names.end(); ++cur) {
|
||||
ASSERT_NO_THROW(IconvWrapper("utf-8", cur->c_str()));
|
||||
ASSERT_NO_THROW(IconvWrapper(cur->c_str(), "utf-8"));
|
||||
for (auto const& name : GetEncodingsList<std::vector<std::string>>()) {
|
||||
ASSERT_NO_THROW(IconvWrapper("utf-8", name.c_str()));
|
||||
ASSERT_NO_THROW(IconvWrapper(name.c_str(), "utf-8"));
|
||||
EXPECT_EQ(
|
||||
"Jackdaws love my big sphinx of quartz",
|
||||
IconvWrapper(cur->c_str(), "utf-8").Convert(
|
||||
IconvWrapper("utf-8", cur->c_str()).Convert(
|
||||
IconvWrapper(name.c_str(), "utf-8").Convert(
|
||||
IconvWrapper("utf-8", name.c_str()).Convert(
|
||||
"Jackdaws love my big sphinx of quartz")));
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue