592 lines
16 KiB
C
592 lines
16 KiB
C
/*
|
|
* Copyright (C) 1999-2008 Free Software Foundation, Inc.
|
|
* This file is part of the GNU LIBICONV Library.
|
|
*
|
|
* The GNU LIBICONV Library is free software; you can redistribute it
|
|
* and/or modify it under the terms of the GNU Library General Public
|
|
* License as published by the Free Software Foundation; either version 2
|
|
* of the License, or (at your option) any later version.
|
|
*
|
|
* The GNU LIBICONV Library is distributed in the hope that it will be
|
|
* useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
* Library General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU Library General Public
|
|
* License along with the GNU LIBICONV Library; see the file COPYING.LIB.
|
|
* If not, write to the Free Software Foundation, Inc., 51 Franklin Street,
|
|
* Fifth Floor, Boston, MA 02110-1301, USA.
|
|
*/
|
|
|
|
#include <iconv.h>
|
|
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include "config.h"
|
|
#include "localcharset.h"
|
|
|
|
#if ENABLE_EXTRA
|
|
/*
|
|
* Consider all system dependent encodings, for any system,
|
|
* and the extra encodings.
|
|
*/
|
|
#define USE_AIX
|
|
#define USE_OSF1
|
|
#define USE_DOS
|
|
#define USE_EXTRA
|
|
#else
|
|
/*
|
|
* Consider those system dependent encodings that are needed for the
|
|
* current system.
|
|
*/
|
|
#ifdef _AIX
|
|
#define USE_AIX
|
|
#endif
|
|
#if defined(__osf__) || defined(VMS)
|
|
#define USE_OSF1
|
|
#endif
|
|
#if defined(__DJGPP__) || (defined(_WIN32) && (defined(_MSC_VER) || defined(__MINGW32__)))
|
|
#define USE_DOS
|
|
#endif
|
|
#endif
|
|
|
|
/*
|
|
* Data type for general conversion loop.
|
|
*/
|
|
struct loop_funcs {
|
|
size_t (*loop_convert) (iconv_t icd,
|
|
const char* * inbuf, size_t *inbytesleft,
|
|
char* * outbuf, size_t *outbytesleft);
|
|
size_t (*loop_reset) (iconv_t icd,
|
|
char* * outbuf, size_t *outbytesleft);
|
|
};
|
|
|
|
/*
|
|
* Converters.
|
|
*/
|
|
#include "converters.h"
|
|
|
|
/*
|
|
* Transliteration tables.
|
|
*/
|
|
#include "cjk_variants.h"
|
|
#include "translit.h"
|
|
|
|
/*
|
|
* Table of all supported encodings.
|
|
*/
|
|
struct encoding {
|
|
struct mbtowc_funcs ifuncs; /* conversion multibyte -> unicode */
|
|
struct wctomb_funcs ofuncs; /* conversion unicode -> multibyte */
|
|
int oflags; /* flags for unicode -> multibyte conversion */
|
|
};
|
|
#define DEFALIAS(xxx_alias,xxx) /* nothing */
|
|
enum {
|
|
#define DEFENCODING(xxx_names,xxx,xxx_ifuncs1,xxx_ifuncs2,xxx_ofuncs1,xxx_ofuncs2) \
|
|
ei_##xxx ,
|
|
#include "encodings.def"
|
|
#ifdef USE_AIX
|
|
# include "encodings_aix.def"
|
|
#endif
|
|
#ifdef USE_OSF1
|
|
# include "encodings_osf1.def"
|
|
#endif
|
|
#ifdef USE_DOS
|
|
# include "encodings_dos.def"
|
|
#endif
|
|
#ifdef USE_EXTRA
|
|
# include "encodings_extra.def"
|
|
#endif
|
|
#include "encodings_local.def"
|
|
#undef DEFENCODING
|
|
ei_for_broken_compilers_that_dont_like_trailing_commas
|
|
};
|
|
#include "flags.h"
|
|
static struct encoding const all_encodings[] = {
|
|
#define DEFENCODING(xxx_names,xxx,xxx_ifuncs1,xxx_ifuncs2,xxx_ofuncs1,xxx_ofuncs2) \
|
|
{ xxx_ifuncs1,xxx_ifuncs2, xxx_ofuncs1,xxx_ofuncs2, ei_##xxx##_oflags },
|
|
#include "encodings.def"
|
|
#ifdef USE_AIX
|
|
# include "encodings_aix.def"
|
|
#endif
|
|
#ifdef USE_OSF1
|
|
# include "encodings_osf1.def"
|
|
#endif
|
|
#ifdef USE_DOS
|
|
# include "encodings_dos.def"
|
|
#endif
|
|
#ifdef USE_EXTRA
|
|
# include "encodings_extra.def"
|
|
#endif
|
|
#undef DEFENCODING
|
|
#define DEFENCODING(xxx_names,xxx,xxx_ifuncs1,xxx_ifuncs2,xxx_ofuncs1,xxx_ofuncs2) \
|
|
{ xxx_ifuncs1,xxx_ifuncs2, xxx_ofuncs1,xxx_ofuncs2, 0 },
|
|
#include "encodings_local.def"
|
|
#undef DEFENCODING
|
|
};
|
|
#undef DEFALIAS
|
|
|
|
/*
|
|
* Conversion loops.
|
|
*/
|
|
#include "loops.h"
|
|
|
|
/*
|
|
* Alias lookup function.
|
|
* Defines
|
|
* struct alias { int name; unsigned int encoding_index; };
|
|
* const struct alias * aliases_lookup (const char *str, unsigned int len);
|
|
* #define MAX_WORD_LENGTH ...
|
|
*/
|
|
#if defined _AIX
|
|
# include "aliases_sysaix.h"
|
|
#elif defined hpux || defined __hpux
|
|
# include "aliases_syshpux.h"
|
|
#elif defined __osf__
|
|
# include "aliases_sysosf1.h"
|
|
#elif defined __sun
|
|
# include "aliases_syssolaris.h"
|
|
#else
|
|
# include "aliases.h"
|
|
#endif
|
|
|
|
/*
|
|
* System dependent alias lookup function.
|
|
* Defines
|
|
* const struct alias * aliases2_lookup (const char *str);
|
|
*/
|
|
#if defined(USE_AIX) || defined(USE_OSF1) || defined(USE_DOS) || defined(USE_EXTRA) /* || ... */
|
|
struct stringpool2_t {
|
|
#define S(tag,name,encoding_index) char stringpool_##tag[sizeof(name)];
|
|
#include "aliases2.h"
|
|
#undef S
|
|
};
|
|
static const struct stringpool2_t stringpool2_contents = {
|
|
#define S(tag,name,encoding_index) name,
|
|
#include "aliases2.h"
|
|
#undef S
|
|
};
|
|
#define stringpool2 ((const char *) &stringpool2_contents)
|
|
static const struct alias sysdep_aliases[] = {
|
|
#define S(tag,name,encoding_index) { (int)(long)&((struct stringpool2_t *)0)->stringpool_##tag, encoding_index },
|
|
#include "aliases2.h"
|
|
#undef S
|
|
};
|
|
#ifdef __GNUC__
|
|
__inline
|
|
#endif
|
|
const struct alias *
|
|
aliases2_lookup (register const char *str)
|
|
{
|
|
const struct alias * ptr;
|
|
unsigned int count;
|
|
for (ptr = sysdep_aliases, count = sizeof(sysdep_aliases)/sizeof(sysdep_aliases[0]); count > 0; ptr++, count--)
|
|
if (!strcmp(str, stringpool2 + ptr->name))
|
|
return ptr;
|
|
return NULL;
|
|
}
|
|
#else
|
|
#define aliases2_lookup(str) NULL
|
|
#define stringpool2 NULL
|
|
#endif
|
|
|
|
#if 0
|
|
/* Like !strcasecmp, except that the both strings can be assumed to be ASCII
|
|
and the first string can be assumed to be in uppercase. */
|
|
static int strequal (const char* str1, const char* str2)
|
|
{
|
|
unsigned char c1;
|
|
unsigned char c2;
|
|
for (;;) {
|
|
c1 = * (unsigned char *) str1++;
|
|
c2 = * (unsigned char *) str2++;
|
|
if (c1 == 0)
|
|
break;
|
|
if (c2 >= 'a' && c2 <= 'z')
|
|
c2 -= 'a'-'A';
|
|
if (c1 != c2)
|
|
break;
|
|
}
|
|
return (c1 == c2);
|
|
}
|
|
#endif
|
|
|
|
iconv_t iconv_open (const char* tocode, const char* fromcode)
|
|
{
|
|
struct conv_struct * cd;
|
|
unsigned int from_index;
|
|
int from_wchar;
|
|
unsigned int to_index;
|
|
int to_wchar;
|
|
int transliterate;
|
|
int discard_ilseq;
|
|
|
|
#include "iconv_open1.h"
|
|
|
|
cd = (struct conv_struct *) malloc(from_wchar != to_wchar
|
|
? sizeof(struct wchar_conv_struct)
|
|
: sizeof(struct conv_struct));
|
|
if (cd == NULL) {
|
|
errno = ENOMEM;
|
|
return (iconv_t)(-1);
|
|
}
|
|
|
|
#include "iconv_open2.h"
|
|
|
|
return (iconv_t)cd;
|
|
invalid:
|
|
errno = EINVAL;
|
|
return (iconv_t)(-1);
|
|
}
|
|
|
|
size_t iconv (iconv_t icd,
|
|
ICONV_CONST char* * inbuf, size_t *inbytesleft,
|
|
char* * outbuf, size_t *outbytesleft)
|
|
{
|
|
conv_t cd = (conv_t) icd;
|
|
if (inbuf == NULL || *inbuf == NULL)
|
|
return cd->lfuncs.loop_reset(icd,outbuf,outbytesleft);
|
|
else
|
|
return cd->lfuncs.loop_convert(icd,
|
|
(const char* *)inbuf,inbytesleft,
|
|
outbuf,outbytesleft);
|
|
}
|
|
|
|
int iconv_close (iconv_t icd)
|
|
{
|
|
conv_t cd = (conv_t) icd;
|
|
free(cd);
|
|
return 0;
|
|
}
|
|
|
|
#ifndef LIBICONV_PLUG
|
|
|
|
/*
|
|
* Verify that a 'struct conv_struct' and a 'struct wchar_conv_struct' each
|
|
* fit in an iconv_allocation_t.
|
|
* If this verification fails, iconv_allocation_t must be made larger and
|
|
* the major version in LIBICONV_VERSION_INFO must be bumped.
|
|
* Currently 'struct conv_struct' has 21 integer/pointer fields, and
|
|
* 'struct wchar_conv_struct' additionally has an 'mbstate_t' field.
|
|
*/
|
|
typedef int verify_size_1[2 * (sizeof (struct conv_struct) <= sizeof (iconv_allocation_t)) - 1];
|
|
typedef int verify_size_2[2 * (sizeof (struct wchar_conv_struct) <= sizeof (iconv_allocation_t)) - 1];
|
|
|
|
int iconv_open_into (const char* tocode, const char* fromcode,
|
|
iconv_allocation_t* resultp)
|
|
{
|
|
struct conv_struct * cd;
|
|
unsigned int from_index;
|
|
int from_wchar;
|
|
unsigned int to_index;
|
|
int to_wchar;
|
|
int transliterate;
|
|
int discard_ilseq;
|
|
|
|
#include "iconv_open1.h"
|
|
|
|
cd = (struct conv_struct *) resultp;
|
|
|
|
#include "iconv_open2.h"
|
|
|
|
return 0;
|
|
invalid:
|
|
errno = EINVAL;
|
|
return -1;
|
|
}
|
|
|
|
int iconvctl (iconv_t icd, int request, void* argument)
|
|
{
|
|
conv_t cd = (conv_t) icd;
|
|
switch (request) {
|
|
case ICONV_TRIVIALP:
|
|
*(int *)argument =
|
|
((cd->lfuncs.loop_convert == unicode_loop_convert
|
|
&& cd->iindex == cd->oindex)
|
|
|| cd->lfuncs.loop_convert == wchar_id_loop_convert
|
|
? 1 : 0);
|
|
return 0;
|
|
case ICONV_GET_TRANSLITERATE:
|
|
*(int *)argument = cd->transliterate;
|
|
return 0;
|
|
case ICONV_SET_TRANSLITERATE:
|
|
cd->transliterate = (*(const int *)argument ? 1 : 0);
|
|
return 0;
|
|
case ICONV_GET_DISCARD_ILSEQ:
|
|
*(int *)argument = cd->discard_ilseq;
|
|
return 0;
|
|
case ICONV_SET_DISCARD_ILSEQ:
|
|
cd->discard_ilseq = (*(const int *)argument ? 1 : 0);
|
|
return 0;
|
|
case ICONV_SET_HOOKS:
|
|
if (argument != NULL) {
|
|
cd->hooks = *(const struct iconv_hooks *)argument;
|
|
} else {
|
|
cd->hooks.uc_hook = NULL;
|
|
cd->hooks.wc_hook = NULL;
|
|
cd->hooks.data = NULL;
|
|
}
|
|
return 0;
|
|
case ICONV_SET_FALLBACKS:
|
|
if (argument != NULL) {
|
|
cd->fallbacks = *(const struct iconv_fallbacks *)argument;
|
|
} else {
|
|
cd->fallbacks.mb_to_uc_fallback = NULL;
|
|
cd->fallbacks.uc_to_mb_fallback = NULL;
|
|
cd->fallbacks.mb_to_wc_fallback = NULL;
|
|
cd->fallbacks.wc_to_mb_fallback = NULL;
|
|
cd->fallbacks.data = NULL;
|
|
}
|
|
return 0;
|
|
default:
|
|
errno = EINVAL;
|
|
return -1;
|
|
}
|
|
}
|
|
|
|
/* An alias after its name has been converted from 'int' to 'const char*'. */
|
|
struct nalias { const char* name; unsigned int encoding_index; };
|
|
|
|
static int compare_by_index (const void * arg1, const void * arg2)
|
|
{
|
|
const struct nalias * alias1 = (const struct nalias *) arg1;
|
|
const struct nalias * alias2 = (const struct nalias *) arg2;
|
|
return (int)alias1->encoding_index - (int)alias2->encoding_index;
|
|
}
|
|
|
|
static int compare_by_name (const void * arg1, const void * arg2)
|
|
{
|
|
const char * name1 = *(const char **)arg1;
|
|
const char * name2 = *(const char **)arg2;
|
|
/* Compare alphabetically, but put "CS" names at the end. */
|
|
int sign = strcmp(name1,name2);
|
|
if (sign != 0) {
|
|
sign = ((name1[0]=='C' && name1[1]=='S') - (name2[0]=='C' && name2[1]=='S'))
|
|
* 4 + (sign >= 0 ? 1 : -1);
|
|
}
|
|
return sign;
|
|
}
|
|
|
|
void iconvlist (int (*do_one) (unsigned int namescount,
|
|
const char * const * names,
|
|
void* data),
|
|
void* data)
|
|
{
|
|
#define aliascount1 sizeof(aliases)/sizeof(aliases[0])
|
|
#ifndef aliases2_lookup
|
|
#define aliascount2 sizeof(sysdep_aliases)/sizeof(sysdep_aliases[0])
|
|
#else
|
|
#define aliascount2 0
|
|
#endif
|
|
#define aliascount (aliascount1+aliascount2)
|
|
struct nalias aliasbuf[aliascount];
|
|
const char * namesbuf[aliascount];
|
|
size_t num_aliases;
|
|
{
|
|
/* Put all existing aliases into a buffer. */
|
|
size_t i;
|
|
size_t j;
|
|
j = 0;
|
|
for (i = 0; i < aliascount1; i++) {
|
|
const struct alias * p = &aliases[i];
|
|
if (p->name >= 0
|
|
&& p->encoding_index != ei_local_char
|
|
&& p->encoding_index != ei_local_wchar_t) {
|
|
aliasbuf[j].name = stringpool + p->name;
|
|
aliasbuf[j].encoding_index = p->encoding_index;
|
|
j++;
|
|
}
|
|
}
|
|
#ifndef aliases2_lookup
|
|
for (i = 0; i < aliascount2; i++) {
|
|
aliasbuf[j].name = stringpool2 + sysdep_aliases[i].name;
|
|
aliasbuf[j].encoding_index = sysdep_aliases[i].encoding_index;
|
|
j++;
|
|
}
|
|
#endif
|
|
num_aliases = j;
|
|
}
|
|
/* Sort by encoding_index. */
|
|
if (num_aliases > 1)
|
|
qsort(aliasbuf, num_aliases, sizeof(struct nalias), compare_by_index);
|
|
{
|
|
/* Process all aliases with the same encoding_index together. */
|
|
size_t j;
|
|
j = 0;
|
|
while (j < num_aliases) {
|
|
unsigned int ei = aliasbuf[j].encoding_index;
|
|
size_t i = 0;
|
|
do
|
|
namesbuf[i++] = aliasbuf[j++].name;
|
|
while (j < num_aliases && aliasbuf[j].encoding_index == ei);
|
|
if (i > 1)
|
|
qsort(namesbuf, i, sizeof(const char *), compare_by_name);
|
|
/* Call the callback. */
|
|
if (do_one(i,namesbuf,data))
|
|
break;
|
|
}
|
|
}
|
|
#undef aliascount
|
|
#undef aliascount2
|
|
#undef aliascount1
|
|
}
|
|
|
|
/*
|
|
* Table of canonical names of encodings.
|
|
* Instead of strings, it contains offsets into stringpool and stringpool2.
|
|
*/
|
|
static const unsigned short all_canonical[] = {
|
|
#if defined _AIX
|
|
# include "canonical_sysaix.h"
|
|
#elif defined hpux || defined __hpux
|
|
# include "canonical_syshpux.h"
|
|
#elif defined __osf__
|
|
# include "canonical_sysosf1.h"
|
|
#elif defined __sun
|
|
# include "canonical_syssolaris.h"
|
|
#else
|
|
# include "canonical.h"
|
|
#endif
|
|
#ifdef USE_AIX
|
|
# if defined _AIX
|
|
# include "canonical_aix_sysaix.h"
|
|
# else
|
|
# include "canonical_aix.h"
|
|
# endif
|
|
#endif
|
|
#ifdef USE_OSF1
|
|
# if defined __osf__
|
|
# include "canonical_osf1_sysosf1.h"
|
|
# else
|
|
# include "canonical_osf1.h"
|
|
# endif
|
|
#endif
|
|
#ifdef USE_DOS
|
|
# include "canonical_dos.h"
|
|
#endif
|
|
#ifdef USE_EXTRA
|
|
# include "canonical_extra.h"
|
|
#endif
|
|
#if defined _AIX
|
|
# include "canonical_local_sysaix.h"
|
|
#elif defined hpux || defined __hpux
|
|
# include "canonical_local_syshpux.h"
|
|
#elif defined __osf__
|
|
# include "canonical_local_sysosf1.h"
|
|
#elif defined __sun
|
|
# include "canonical_local_syssolaris.h"
|
|
#else
|
|
# include "canonical_local.h"
|
|
#endif
|
|
};
|
|
|
|
const char * iconv_canonicalize (const char * name)
|
|
{
|
|
const char* code;
|
|
char buf[MAX_WORD_LENGTH+10+1];
|
|
const char* cp;
|
|
char* bp;
|
|
const struct alias * ap;
|
|
unsigned int count;
|
|
unsigned int index;
|
|
const char* pool;
|
|
|
|
/* Before calling aliases_lookup, convert the input string to upper case,
|
|
* and check whether it's entirely ASCII (we call gperf with option "-7"
|
|
* to achieve a smaller table) and non-empty. If it's not entirely ASCII,
|
|
* or if it's too long, it is not a valid encoding name.
|
|
*/
|
|
for (code = name;;) {
|
|
/* Search code in the table. */
|
|
for (cp = code, bp = buf, count = MAX_WORD_LENGTH+10+1; ; cp++, bp++) {
|
|
unsigned char c = * (unsigned char *) cp;
|
|
if (c >= 0x80)
|
|
goto invalid;
|
|
if (c >= 'a' && c <= 'z')
|
|
c -= 'a'-'A';
|
|
*bp = c;
|
|
if (c == '\0')
|
|
break;
|
|
if (--count == 0)
|
|
goto invalid;
|
|
}
|
|
for (;;) {
|
|
if (bp-buf >= 10 && memcmp(bp-10,"//TRANSLIT",10)==0) {
|
|
bp -= 10;
|
|
*bp = '\0';
|
|
continue;
|
|
}
|
|
if (bp-buf >= 8 && memcmp(bp-8,"//IGNORE",8)==0) {
|
|
bp -= 8;
|
|
*bp = '\0';
|
|
continue;
|
|
}
|
|
break;
|
|
}
|
|
if (buf[0] == '\0') {
|
|
code = locale_charset();
|
|
/* Avoid an endless loop that could occur when using an older version
|
|
of localcharset.c. */
|
|
if (code[0] == '\0')
|
|
goto invalid;
|
|
continue;
|
|
}
|
|
pool = stringpool;
|
|
ap = aliases_lookup(buf,bp-buf);
|
|
if (ap == NULL) {
|
|
pool = stringpool2;
|
|
ap = aliases2_lookup(buf);
|
|
if (ap == NULL)
|
|
goto invalid;
|
|
}
|
|
if (ap->encoding_index == ei_local_char) {
|
|
code = locale_charset();
|
|
/* Avoid an endless loop that could occur when using an older version
|
|
of localcharset.c. */
|
|
if (code[0] == '\0')
|
|
goto invalid;
|
|
continue;
|
|
}
|
|
if (ap->encoding_index == ei_local_wchar_t) {
|
|
/* On systems which define __STDC_ISO_10646__, wchar_t is Unicode.
|
|
This is also the case on native Woe32 systems. */
|
|
#if __STDC_ISO_10646__ || ((defined _WIN32 || defined __WIN32__) && !defined __CYGWIN__)
|
|
if (sizeof(wchar_t) == 4) {
|
|
index = ei_ucs4internal;
|
|
break;
|
|
}
|
|
if (sizeof(wchar_t) == 2) {
|
|
index = ei_ucs2internal;
|
|
break;
|
|
}
|
|
if (sizeof(wchar_t) == 1) {
|
|
index = ei_iso8859_1;
|
|
break;
|
|
}
|
|
#endif
|
|
}
|
|
index = ap->encoding_index;
|
|
break;
|
|
}
|
|
return all_canonical[index] + pool;
|
|
invalid:
|
|
return name;
|
|
}
|
|
|
|
int _libiconv_version = _LIBICONV_VERSION;
|
|
|
|
#if defined __FreeBSD__ && !defined __gnu_freebsd__
|
|
/* GNU libiconv is the native FreeBSD iconv implementation since 2002.
|
|
It wants to define the symbols 'iconv_open', 'iconv', 'iconv_close'. */
|
|
#define strong_alias(name, aliasname) _strong_alias(name, aliasname)
|
|
#define _strong_alias(name, aliasname) \
|
|
extern __typeof (name) aliasname __attribute__ ((alias (#name)));
|
|
#undef iconv_open
|
|
#undef iconv
|
|
#undef iconv_close
|
|
strong_alias (libiconv_open, iconv_open)
|
|
strong_alias (libiconv, iconv)
|
|
strong_alias (libiconv_close, iconv_close)
|
|
#endif
|
|
|
|
#endif
|