176 lines
6.9 KiB
C
176 lines
6.9 KiB
C
|
/* @(#) $Id: enca.h,v 1.11 2005/02/27 12:08:55 yeti Exp $ */
|
||
|
/* This header file is in the public domain. */
|
||
|
#ifndef ENCA_H
|
||
|
#define ENCA_H
|
||
|
|
||
|
#include <stdlib.h>
|
||
|
/* According to autoconf stdlib may not be enough for size_t */
|
||
|
#include <sys/types.h>
|
||
|
|
||
|
#ifdef __cplusplus
|
||
|
extern "C" {
|
||
|
#endif /* __cplusplus */
|
||
|
|
||
|
/* Enumerated types */
|
||
|
|
||
|
typedef enum { /*< flags >*/
|
||
|
ENCA_SURFACE_EOL_CR = 1 << 0,
|
||
|
ENCA_SURFACE_EOL_LF = 1 << 1,
|
||
|
ENCA_SURFACE_EOL_CRLF = 1 << 2,
|
||
|
ENCA_SURFACE_EOL_MIX = 1 << 3,
|
||
|
ENCA_SURFACE_EOL_BIN = 1 << 4,
|
||
|
ENCA_SURFACE_MASK_EOL = (ENCA_SURFACE_EOL_CR
|
||
|
| ENCA_SURFACE_EOL_LF
|
||
|
| ENCA_SURFACE_EOL_CRLF
|
||
|
| ENCA_SURFACE_EOL_MIX
|
||
|
| ENCA_SURFACE_EOL_BIN),
|
||
|
ENCA_SURFACE_PERM_21 = 1 << 5,
|
||
|
ENCA_SURFACE_PERM_4321 = 1 << 6,
|
||
|
ENCA_SURFACE_PERM_MIX = 1 << 7,
|
||
|
ENCA_SURFACE_MASK_PERM = (ENCA_SURFACE_PERM_21
|
||
|
| ENCA_SURFACE_PERM_4321
|
||
|
| ENCA_SURFACE_PERM_MIX),
|
||
|
ENCA_SURFACE_QP = 1 << 8,
|
||
|
ENCA_SURFACE_REMOVE = 1 << 13,
|
||
|
ENCA_SURFACE_UNKNOWN = 1 << 14,
|
||
|
ENCA_SURFACE_MASK_ALL = (ENCA_SURFACE_MASK_EOL
|
||
|
| ENCA_SURFACE_MASK_PERM
|
||
|
| ENCA_SURFACE_QP
|
||
|
| ENCA_SURFACE_REMOVE)
|
||
|
} EncaSurface;
|
||
|
|
||
|
typedef enum {
|
||
|
ENCA_NAME_STYLE_ENCA,
|
||
|
ENCA_NAME_STYLE_RFC1345,
|
||
|
ENCA_NAME_STYLE_CSTOCS,
|
||
|
ENCA_NAME_STYLE_ICONV,
|
||
|
ENCA_NAME_STYLE_HUMAN,
|
||
|
ENCA_NAME_STYLE_MIME
|
||
|
} EncaNameStyle;
|
||
|
|
||
|
typedef enum { /*< flags >*/
|
||
|
ENCA_CHARSET_7BIT = 1 << 0,
|
||
|
ENCA_CHARSET_8BIT = 1 << 1,
|
||
|
ENCA_CHARSET_16BIT = 1 << 2,
|
||
|
ENCA_CHARSET_32BIT = 1 << 3,
|
||
|
ENCA_CHARSET_FIXED = 1 << 4,
|
||
|
ENCA_CHARSET_VARIABLE = 1 << 5,
|
||
|
ENCA_CHARSET_BINARY = 1 << 6,
|
||
|
ENCA_CHARSET_REGULAR = 1 << 7,
|
||
|
ENCA_CHARSET_MULTIBYTE = 1 << 8
|
||
|
} EncaCharsetFlags;
|
||
|
|
||
|
typedef enum {
|
||
|
ENCA_EOK = 0,
|
||
|
ENCA_EINVALUE,
|
||
|
ENCA_EEMPTY,
|
||
|
ENCA_EFILTERED,
|
||
|
ENCA_ENOCS8,
|
||
|
ENCA_ESIGNIF,
|
||
|
ENCA_EWINNER,
|
||
|
ENCA_EGARBAGE
|
||
|
} EncaErrno;
|
||
|
|
||
|
#define ENCA_CS_UNKNOWN (-1)
|
||
|
|
||
|
#define ENCA_NOT_A_CHAR 0xffff
|
||
|
|
||
|
/* Published (opaque) typedefs */
|
||
|
typedef struct _EncaAnalyserState *EncaAnalyser;
|
||
|
|
||
|
/* Public (transparent) typedefs */
|
||
|
typedef struct _EncaEncoding EncaEncoding;
|
||
|
|
||
|
struct _EncaEncoding { int charset; EncaSurface surface; };
|
||
|
|
||
|
/* Basic interface. */
|
||
|
EncaAnalyser enca_analyser_alloc (const char *langname);
|
||
|
void enca_analyser_free (EncaAnalyser analyser);
|
||
|
EncaEncoding enca_analyse (EncaAnalyser analyser,
|
||
|
unsigned char *buffer,
|
||
|
size_t size);
|
||
|
EncaEncoding enca_analyse_const (EncaAnalyser analyser,
|
||
|
const unsigned char *buffer,
|
||
|
size_t size);
|
||
|
int enca_double_utf8_check (EncaAnalyser analyser,
|
||
|
const unsigned char *buffer,
|
||
|
size_t size);
|
||
|
int* enca_double_utf8_get_candidates (EncaAnalyser analyser);
|
||
|
int enca_errno (EncaAnalyser analyser);
|
||
|
const char* enca_strerror (EncaAnalyser analyser,
|
||
|
int errnum);
|
||
|
|
||
|
/* Options. */
|
||
|
void enca_set_multibyte (EncaAnalyser analyser,
|
||
|
int multibyte);
|
||
|
int enca_get_multibyte (EncaAnalyser analyser);
|
||
|
void enca_set_interpreted_surfaces (EncaAnalyser analyser,
|
||
|
int interpreted_surfaces);
|
||
|
int enca_get_interpreted_surfaces (EncaAnalyser analyser);
|
||
|
void enca_set_ambiguity (EncaAnalyser analyser,
|
||
|
int ambiguity);
|
||
|
int enca_get_ambiguity (EncaAnalyser analyser);
|
||
|
void enca_set_filtering (EncaAnalyser analyser,
|
||
|
int filtering);
|
||
|
int enca_get_filtering (EncaAnalyser analyser);
|
||
|
void enca_set_garbage_test (EncaAnalyser analyser,
|
||
|
int garabage_test);
|
||
|
int enca_get_garbage_test (EncaAnalyser analyser);
|
||
|
void enca_set_termination_strictness (EncaAnalyser analyser,
|
||
|
int termination_strictness);
|
||
|
int enca_get_termination_strictness (EncaAnalyser analyser);
|
||
|
int enca_set_significant (EncaAnalyser analyser,
|
||
|
size_t significant);
|
||
|
size_t enca_get_significant (EncaAnalyser analyser);
|
||
|
int enca_set_threshold (EncaAnalyser analyser,
|
||
|
double threshold);
|
||
|
double enca_get_threshold (EncaAnalyser analyser);
|
||
|
|
||
|
/* Names and properties. */
|
||
|
const char* enca_charset_name (int charset,
|
||
|
EncaNameStyle whatname);
|
||
|
const char** enca_get_charset_aliases (int charset,
|
||
|
size_t *n);
|
||
|
char* enca_get_surface_name (EncaSurface surface,
|
||
|
EncaNameStyle whatname);
|
||
|
EncaEncoding enca_parse_encoding_name (const char *name);
|
||
|
EncaSurface enca_charset_natural_surface (int charset);
|
||
|
EncaCharsetFlags enca_charset_properties (int charset);
|
||
|
|
||
|
#define enca_charset_is_known(cs) \
|
||
|
((cs) != ENCA_CS_UNKNOWN)
|
||
|
#define enca_charset_is_7bit(cs) \
|
||
|
(enca_charset_properties(cs) & ENCA_CHARSET_7BIT)
|
||
|
#define enca_charset_is_8bit(cs) \
|
||
|
(enca_charset_properties(cs) & ENCA_CHARSET_8BIT)
|
||
|
#define enca_charset_is_16bit(cs) \
|
||
|
(enca_charset_properties(cs) & ENCA_CHARSET_16BIT)
|
||
|
#define enca_charset_is_32bit(cs) \
|
||
|
(enca_charset_properties(cs) & ENCA_CHARSET_32BIT)
|
||
|
#define enca_charset_is_fixed(cs) \
|
||
|
(enca_charset_properties(cs) & ENCA_CHARSET_FIXED)
|
||
|
#define enca_charset_is_variable(cs) \
|
||
|
(enca_charset_properties(cs) & ENCA_CHARSET_VARIABLE)
|
||
|
#define enca_charset_is_binary(cs) \
|
||
|
(enca_charset_properties(cs) & ENCA_CHARSET_BINARY)
|
||
|
#define enca_charset_is_regular(cs) \
|
||
|
(enca_charset_properties(cs) & ENCA_CHARSET_REGULAR)
|
||
|
#define enca_charset_is_multibyte(cs) \
|
||
|
(enca_charset_properties(cs) & ENCA_CHARSET_MULTIBYTE)
|
||
|
|
||
|
/* Auxiliary functions. */
|
||
|
int enca_charset_has_ucs2_map (int charset);
|
||
|
int enca_charset_ucs2_map (int charset,
|
||
|
unsigned int *buffer);
|
||
|
size_t enca_number_of_charsets (void);
|
||
|
const char* enca_analyser_language (EncaAnalyser analyser);
|
||
|
const char* enca_language_english_name (const char *lang);
|
||
|
const char** enca_get_languages (size_t *n);
|
||
|
int* enca_get_language_charsets (const char *langname,
|
||
|
size_t *n);
|
||
|
#ifdef __cplusplus
|
||
|
}
|
||
|
#endif /* __cplusplus */
|
||
|
|
||
|
#endif
|