Add some documentation for AegisubCSConv and TextFileReader.

Originally committed to SVN as r4036.
This commit is contained in:
Thomas Goyne 2010-01-24 18:56:51 +00:00
parent c7d95e5590
commit ba088237d7
5 changed files with 155 additions and 298 deletions

View file

@ -103,7 +103,6 @@ void AssFile::Load (const wxString _filename,const wxString charset,bool addToRe
wxString enc;
if (charset.IsEmpty()) enc = TextFileReader::GetEncoding(_filename);
else enc = charset;
TextFileReader::EnsureValid(enc);
// Generic preparation
Clear();

View file

@ -49,52 +49,27 @@
WX_DECLARE_STRING_HASH_MAP(wxString, PrettyNamesHash);
#if wxUSE_THREADS
/// DOCME
static wxMutex encodingListMutex;
#endif
/// DOCME
static const iconv_t iconv_invalid = (iconv_t)-1;
/// DOCME
static const size_t iconv_failed = (size_t)-1;
/// DOCME
#define ICONV_CONST_CAST(a) const_cast<ICONV_CONST char *>(a)
#ifndef ICONV_POSIX
static int addEncoding(unsigned int namescount, const char * const * names, void* data);
#endif
/// DOCME
static wxArrayString *supportedEncodings = NULL;
/// DOCME
static wxArrayString *prettyEncodingList = NULL;
/// DOCME
static PrettyNamesHash *prettyEncodingHash = NULL;
/// @brief DOCME
/// @param mbEncName
/// @param enableSubst
///
AegisubCSConv::AegisubCSConv(const wxChar *mbEncName, bool enableSubst)
: mbCharsetName(GetRealEncodingName(mbEncName)), mbNulLen(0), enableSubst(enableSubst)
: wcCharsetName(WCHAR_T_ENCODING)
, mbCharsetName(GetRealEncodingName(mbEncName))
, mbNulLen(0)
, enableSubst(enableSubst)
, m2w(wcCharsetName, mbCharsetName)
, w2m(mbCharsetName, wcCharsetName)
{
wcCharsetName = wxString::FromAscii(WCHAR_T_ENCODING);
m2w = iconv_open(wcCharsetName.ToAscii(), mbCharsetName.ToAscii());
w2m = iconv_open(mbCharsetName.ToAscii(), wcCharsetName.ToAscii());
if (m2w == iconv_invalid || w2m == iconv_invalid) {
if (m2w != iconv_invalid) iconv_close(m2w);
if (w2m != iconv_invalid) iconv_close(w2m);
throw wxString::Format(_T("Character set %s is not supported."), mbEncName);
throw wxString::Format(L"Character set %s is not supported.", mbEncName);
}
if (enableSubst) {
@ -110,26 +85,14 @@ AegisubCSConv::AegisubCSConv(const wxChar *mbEncName, bool enableSubst)
}
}
/// @brief DOCME
///
AegisubCSConv::~AegisubCSConv() {
if (m2w != iconv_invalid) iconv_close(m2w);
if (w2m != iconv_invalid) iconv_close(w2m);
}
/// @brief DOCME
/// @return
///
wxMBConv * AegisubCSConv::Clone() const {
AegisubCSConv *c = new AegisubCSConv(mbCharsetName);
c->mbNulLen = mbNulLen;
return c;
}
/// @brief Calculate the size of NUL in the target encoding via iconv
/// @return
///
/// @return The size in bytes of NUL
size_t AegisubCSConv::GetMBNulLen() const {
if (mbNulLen == 0) {
const wchar_t nulStr[] = L"";
@ -142,18 +105,13 @@ size_t AegisubCSConv::GetMBNulLen() const {
size_t res = iconv(w2m, &inPtr, &inLen, &outPtr, &outLen);
if (res != 0)
const_cast<AegisubCSConv *>(this)->mbNulLen = (size_t)-1;
mbNulLen = (size_t)-1;
else
const_cast<AegisubCSConv *>(this)->mbNulLen = sizeof(outBuff) - outLen;
mbNulLen = sizeof(outBuff) - outLen;
}
return mbNulLen;
}
/// @brief Calculate the length (in bytes) of a MB string, not including the terminator
/// @param str
/// @return
///
size_t AegisubCSConv::MBBuffLen(const char * str) const {
size_t nulLen = GetMBNulLen();
const char *ptr;
@ -171,14 +129,12 @@ size_t AegisubCSConv::MBBuffLen(const char * str) const {
}
}
/// @brief DOCME
/// @param dst
/// @param dstSize
/// @param src
/// @param srcLen
/// @return
///
/// @brief Convert a string from multibyte to wide characters
/// @param dst Destination buffer.
/// @param dstSize Length of destination buffer in wchar_ts
/// @param src Source multibyte string
/// @param srcLen Length of source buffer in bytes, or -1 to autodetect
/// @return The number of wchar_ts needed to store the string in the target charset
size_t AegisubCSConv::ToWChar(wchar_t *dst, size_t dstSize, const char *src, size_t srcLen) const {
return doConversion(
m2w,
@ -189,14 +145,12 @@ size_t AegisubCSConv::ToWChar(wchar_t *dst, size_t dstSize, const char *src, siz
) / sizeof(wchar_t);
}
/// @brief DOCME
/// @param dst
/// @param dstSize
/// @param src
/// @param srcLen
/// @return
///
/// @brief Convert a string from wide characters to multibyte
/// @param dst Destination buffer
/// @param dstSize Length of destination buffer in bytes
/// @param src Source wide character string
/// @param srcLen Length in wchar_ts of source, or -1 to autodetect
/// @return The number of bytes needed to store the string in the target charset
size_t AegisubCSConv::FromWChar(char *dst, size_t dstSize, const wchar_t *src, size_t srcLen) const {
return doConversion(
w2m,
@ -207,15 +161,7 @@ size_t AegisubCSConv::FromWChar(char *dst, size_t dstSize, const wchar_t *src, s
);
}
/// @brief DOCME
/// @param cd
/// @param dst
/// @param dstSize
/// @param src
/// @param srcSize
/// @return
///
// Perform a conversion if a buffer is given or calculate the needed buffer size if not
size_t AegisubCSConv::doConversion(iconv_t cd, char *dst, size_t dstSize, char *src, size_t srcSize) const {
if (dstSize > 0) {
return iconvWrapper(cd, &src, &srcSize, &dst, &dstSize);
@ -239,20 +185,12 @@ size_t AegisubCSConv::doConversion(iconv_t cd, char *dst, size_t dstSize, char *
return charsWritten;
}
/// @brief DOCME
/// @param cd
/// @param inbuf
/// @param inbytesleft
/// @param outbuf
/// @param outbytesleft
/// @return
///
// Actually perform a conversion via iconv
size_t AegisubCSConv::iconvWrapper(iconv_t cd, char **inbuf, size_t *inbytesleft,
char **outbuf, size_t *outbytesleft) const {
char **outbuf, size_t *outbytesleft) const {
#if wxUSE_THREADS
wxMutexLocker lock(const_cast<AegisubCSConv *>(this)->iconvMutex);
wxMutexLocker lock(iconvMutex);
#endif
char *outbuforig = *outbuf;
@ -265,10 +203,11 @@ size_t AegisubCSConv::iconvWrapper(iconv_t cd, char **inbuf, size_t *inbytesleft
#ifdef ICONV_POSIX
if (errno == EILSEQ) {
throw _T("One or more characters do not fit in the selected ")
_T("encoding and the version of iconv Aegisub was built with")
_T(" does not have useful fallbacks. For best results, ")
_T("please rebuild Aegisub using a recent version of GNU iconv.");
throw
L"One or more characters do not fit in the selected "
L"encoding and the version of iconv Aegisub was built with"
L" does not have useful fallbacks. For best results, "
L"please rebuild Aegisub using a recent version of GNU iconv.";
}
return wxCONV_FAILED;
#else
@ -287,7 +226,7 @@ size_t AegisubCSConv::iconvWrapper(iconv_t cd, char **inbuf, size_t *inbytesleft
}
if (res == iconv_failed && err == EILSEQ) {
// Conversion still failed with transliteration enabled, so try our substitution
iconvctl(cd, ICONV_SET_FALLBACKS, const_cast<iconv_fallbacks *>(&fallbacks));
iconvctl(cd, ICONV_SET_FALLBACKS, &fallbacks);
res = iconv(cd, inbuf, inbytesleft, outbuf, outbytesleft);
err = errno;
iconvctl(cd, ICONV_SET_FALLBACKS, NULL);
@ -309,13 +248,11 @@ size_t AegisubCSConv::iconvWrapper(iconv_t cd, char **inbuf, size_t *inbytesleft
}
/// @brief DOCME
/// @param code
/// @param callback
/// @param callback_arg
/// @param convPtr
/// @return
///
/// @brief GNU iconv character substitution callback
/// @param code Unicode character which could not be converted
/// @param callback Callback to tell iconv what string to use instead
/// @param callback_arg Iconv userdata for callback
/// @param convPtr AegisubCSConv instance to use
void AegisubCSConv::ucToMbFallback(
unsigned int code,
void (*callback) (const char *buf, size_t buflen, void* callback_arg),
@ -323,7 +260,8 @@ void AegisubCSConv::ucToMbFallback(
void *convPtr)
{
// At some point in the future, this should probably switch to a real mapping
// For now, there's just three cases: BOM to nothing, \ to itself (lol Shift-JIS) and everything else to ?
// For now, there's just three cases: BOM to nothing, '\' to itself
// (for Shift-JIS, which does not have \) and everything else to '?'
if (code == 0xFEFF) return;
if (code == 0x5C) callback("\\", 1, callback_arg);
else {
@ -333,13 +271,10 @@ void AegisubCSConv::ucToMbFallback(
}
#ifndef ICONV_POSIX
/// @brief DOCME
/// @param namescount
/// @param names
/// @param data
/// @return
///
/// @brief Callback for iconvlist
/// @param namescount Number of names in names
/// @param names Names to add to the list
/// @param data Unused userdata field
int addEncoding(unsigned int namescount, const char * const * names, void* data) {
for (unsigned int i = 0; i < namescount; i++) {
supportedEncodings->Add(wxString::FromAscii(names[i]));
@ -348,10 +283,6 @@ int addEncoding(unsigned int namescount, const char * const * names, void* data)
}
#endif
/// @brief DOCME
/// @return
///
wxArrayString AegisubCSConv::GetAllSupportedEncodings() {
#if wxUSE_THREADS
wxMutexLocker lock(encodingListMutex);
@ -366,13 +297,8 @@ wxArrayString AegisubCSConv::GetAllSupportedEncodings() {
return *supportedEncodings;
}
/// @brief Map pretty names to the real encoding names
/// @param name
/// @return
///
wxString AegisubCSConv::GetRealEncodingName(wxString name) {
if (name.Lower() == _T("local")) return wxLocale::GetSystemEncodingName();
if (name.Lower() == L"local") return wxLocale::GetSystemEncodingName();
if (prettyEncodingList == NULL) return name;
PrettyNamesHash::iterator realName = prettyEncodingHash->find(name);
@ -382,9 +308,6 @@ wxString AegisubCSConv::GetRealEncodingName(wxString name) {
return name;
}
/// @brief DOCME
///
wxArrayString AegisubCSConv::GetEncodingsList() {
#if wxUSE_THREADS
wxMutexLocker lock(encodingListMutex);
@ -511,10 +434,10 @@ wxArrayString AegisubCSConv::GetEncodingsList() {
PrettyNamesHash *map = new PrettyNamesHash(100);
wxArrayString *arr = new wxArrayString();
arr->Add(_T("Local"));
arr->Add(L"Local");
for (int i = 0; encodingNames[i].real != NULL; i++) {
// Verify that iconv actually supports this encoding
// Verify that iconv actually supports converting to and from this encoding
iconv_t cd = iconv_open(encodingNames[i].real, WCHAR_T_ENCODING);
if (cd == iconv_invalid) continue;
iconv_close(cd);
@ -533,7 +456,5 @@ wxArrayString AegisubCSConv::GetEncodingsList() {
}
return *prettyEncodingList;
}
static AegisubCSConv localConv(_T("Local"), false);
static AegisubCSConv localConv(L"Local", false);
AegisubCSConv& csConvLocal(localConv);

View file

@ -1,4 +1,4 @@
// Copyright (c) 2009, Thomas Goyne
// Copyright (c) 2010, Thomas Goyne
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
@ -34,9 +34,6 @@
/// @ingroup utility
///
#ifndef AGI_PRE
#include <iconv.h>
#include <wchar.h>
@ -49,64 +46,77 @@
#include "aegisub_endian.h"
#if !defined(_LIBICONV_VERSION) || _LIBICONV_VERSION < 0x010A || defined(LIBICONV_PLUG)
/// DOCME
#define ICONV_POSIX
#endif
/// @class iconv_wrapper
/// @brief RAII wrapper for iconv
class iconv_wrapper {
private:
iconv_t conv;
public:
iconv_wrapper(const char *to, const char *from)
: conv(iconv_open(to, from))
{ }
iconv_wrapper(wxString const& to, wxString const& from)
: conv(iconv_open(to.ToAscii(), from.ToAscii()))
{ }
iconv_wrapper(const char *to, wxString const& from)
: conv(iconv_open(to, from.ToAscii()))
{ }
iconv_wrapper(wxString const& to, const char *from)
: conv(iconv_open(to.ToAscii(), from))
{ }
~iconv_wrapper() {
if (conv != (iconv_t)-1) iconv_close(conv);
}
operator iconv_t() {
return conv;
}
operator const iconv_t() const {
return conv;
}
};
/// DOCME
/// @class AegisubCSConv
/// @brief DOCME
///
/// DOCME
/// @brief wxMBConv implementation for converting to and from unicode
class AegisubCSConv : public wxMBConv {
public:
// By default, any conversion that would be lossy will fail
// When enableSubst is true, conversions to multibyte with a sufficiently large buffer
// are guaranteed to succeed, with characters dropped or changed as needed to fit the
// string into the target encoding.
/// @param mbEncName Multibyte encoding to convert to/from
/// @param enableSubst Whether to substitute characters when needed.
/// By default, any conversion that would be lossy will fail
/// When enableSubst is true, conversions to multibyte with a sufficiently
/// large buffer are guaranteed to succeed, with characters dropped or
/// changed as needed to fit the string into the target encoding.
AegisubCSConv(const wxChar *mbEncName, bool enableSubst = false);
virtual ~AegisubCSConv();
// wxMBConv implementation; see strconv.h for usage details
virtual size_t ToWChar(wchar_t *dst, size_t dstLen, const char *src, size_t srcLen = wxNO_LEN) const;
virtual size_t FromWChar(char *dst, size_t dstLen, const wchar_t *src, size_t srcLen = wxNO_LEN) const;
virtual size_t GetMBNulLen() const;
virtual wxMBConv *Clone() const;
size_t ToWChar(wchar_t *dst, size_t dstLen, const char *src, size_t srcLen = wxNO_LEN) const;
size_t FromWChar(char *dst, size_t dstLen, const wchar_t *src, size_t srcLen = wxNO_LEN) const;
size_t GetMBNulLen() const;
wxMBConv *Clone() const;
// Get the length (in bytes) of a null-terminated string whose encoding is mbEncName
/// @brief Multibyte-aware strlen
/// @return Length in bytes of str (excluding terminator)
size_t MBBuffLen(const char *str) const;
// Get a list of support encodings with somewhat user-friendly names
/// @brief Get a list of support encodings with user-friendly names
static wxArrayString GetEncodingsList();
// Get a list of all encodings supported by iconv
/// @brief Get a list of all encodings supported by iconv
/// Requires GNU iconv for useful results
static wxArrayString GetAllSupportedEncodings();
// Map a user-friendly encoding name to iconv's name
/// @brief Map a user-friendly encoding name to the real encoding name
static wxString GetRealEncodingName(wxString name);
protected:
/// DOCME
/// DOCME
iconv_t m2w, w2m;
private:
/// DOCME
// The smattering of mutable variables here are due to that ToWChar and
// FromWChar are const in wxMBConv, but we require minor mutation for
// things like locks (as iconv is not thread-safe)
wxString wcCharsetName;
/// DOCME
wxString mbCharsetName;
/// DOCME
size_t mbNulLen;
/// DOCME
bool enableSubst;
mutable size_t mbNulLen;
bool enableSubst;
size_t doConversion(iconv_t cd, char *dst, size_t dstSize, char *src, size_t srcSize) const;
size_t iconvWrapper(iconv_t cd, char **inbuf, size_t *inbytesleft, char **outbuf, size_t *outbytesleft) const;
@ -117,56 +127,43 @@ private:
void *callback_arg,
void *convPtr);
/// DOCME
/// Replacement character for characters which do not fit in the target
/// encoding and iconv does not have an appropriate substitute for
char invalidRep[8];
/// DOCME
size_t invalidRepSize;
#ifndef ICONV_POSIX
/// DOCME
iconv_fallbacks fallbacks;
mutable iconv_fallbacks fallbacks;
#endif
#if wxUSE_THREADS
/// DOCME
wxMutex iconvMutex;
mutable wxMutex iconvMutex;
#endif
protected:
iconv_wrapper m2w, w2m;
};
// Predefined conversion for the current locale. Should be a drop-in replacement for wxConvLocal
// Predefined conversion for the current locale, intended to be a drop-in
// replacement for wxConvLocal
extern AegisubCSConv& csConvLocal;
#ifdef HAVE_BIG_ENDIAN
# if SIZEOF_WCHAR_T == 4
/// DOCME
# define WCHAR_T_ENCODING "UTF-32BE"
# elif SIZEOF_WCHAR_T == 2
/// DOCME
# define WCHAR_T_ENCODING "UTF-16BE"
# endif
#elif defined(HAVE_LITTLE_ENDIAN)
# if SIZEOF_WCHAR_T == 4
/// DOCME
# define WCHAR_T_ENCODING "UTF-32LE"
# elif SIZEOF_WCHAR_T == 2
/// DOCME
# define WCHAR_T_ENCODING "UTF-16LE"
# endif
#else
# if SIZEOF_WCHAR_T == 4
/// DOCME
# define WCHAR_T_ENCODING ((Endian::MachineToBig((uint32_t)1) == 1) ? "UTF-32BE" : "UTF-32LE")
# elif SIZEOF_WCHAR_T == 2
/// DOCME
# define WCHAR_T_ENCODING ((Endian::MachineToBig((uint32_t)1) == 1) ? "UTF-16BE" : "UTF-16LE")
# endif
#endif

View file

@ -1,4 +1,4 @@
// Copyright (c) 2005, Rodrigo Braz Monteiro
// Copyright (c) 2010, Rodrigo Braz Monteiro, Thomas Goyne
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
@ -51,13 +51,6 @@
#endif
#include "text_file_reader.h"
/// @brief DOCME
/// @param filename
/// @param enc
/// @param trim
/// @return
///
TextFileReader::TextFileReader(wxString filename, wxString enc, bool trim)
: encoding(enc), conv((iconv_t)-1), trim(trim), readComplete(false), currout(0), outptr(0), currentLine(0) {
#ifdef __WINDOWS__
@ -65,29 +58,22 @@ TextFileReader::TextFileReader(wxString filename, wxString enc, bool trim)
#else
file.open(wxFNCONV(filename),std::ios::in | std::ios::binary);
#endif
if (!file.is_open()) {
throw _T("Failed opening file for reading.");
}
if (!file.is_open()) throw L"Failed opening file for reading.";
if (encoding.IsEmpty()) encoding = GetEncoding(filename);
if (encoding == _T("binary")) return;
if (encoding == L"binary") return;
encoding = AegisubCSConv::GetRealEncodingName(encoding);
conv = iconv_open(WCHAR_T_ENCODING, encoding.ToAscii());
if (conv == (iconv_t)-1) {
throw wxString::Format(L"Character set '%s' is not supported.", enc.c_str());
}
}
/// @brief DOCME
///
TextFileReader::~TextFileReader() {
if (conv != (iconv_t)-1) iconv_close(conv);
}
/// @brief DOCME
/// @param filename
/// @return
///
wxString TextFileReader::GetEncoding(const wxString filename) {
wxString TextFileReader::GetEncoding(wxString const& filename) {
// Prepare
unsigned char b[4];
memset(b, 0, sizeof(b));
@ -100,27 +86,27 @@ wxString TextFileReader::GetEncoding(const wxString filename) {
ifile.open(wxFNCONV(filename));
#endif
if (!ifile.is_open()) {
return _T("unknown");
return L"unknown";
}
ifile.read(reinterpret_cast<char *>(b),4);
ifile.close();
// Try to get the byte order mark from them
if (b[0] == 0xEF && b[1] == 0xBB && b[2] == 0xBF) return _T("UTF-8");
else if (b[0] == 0xFF && b[1] == 0xFE && b[2] == 0x00 && b[3] == 0x00) return _T("UTF-32LE");
else if (b[0] == 0x00 && b[1] == 0x00 && b[2] == 0xFE && b[3] == 0xFF) return _T("UTF-32BE");
else if (b[0] == 0xFF && b[1] == 0xFE) return _T("UTF-16LE");
else if (b[0] == 0xFE && b[1] == 0xFF) return _T("UTF-16BE");
else if (b[0] == 0x2B && b[1] == 0x2F && b[2] == 0x76) return _T("UTF-7");
if (b[0] == 0xEF && b[1] == 0xBB && b[2] == 0xBF) return L"UTF-8";
else if (b[0] == 0xFF && b[1] == 0xFE && b[2] == 0x00 && b[3] == 0x00) return L"UTF-32LE";
else if (b[0] == 0x00 && b[1] == 0x00 && b[2] == 0xFE && b[3] == 0xFF) return L"UTF-32BE";
else if (b[0] == 0xFF && b[1] == 0xFE) return L"UTF-16LE";
else if (b[0] == 0xFE && b[1] == 0xFF) return L"UTF-16BE";
else if (b[0] == 0x2B && b[1] == 0x2F && b[2] == 0x76) return L"UTF-7";
// Try to guess UTF-16
else if (b[0] == 0 && b[1] >= 32 && b[2] == 0 && b[3] >= 32) return _T("UTF-16BE");
else if (b[0] >= 32 && b[1] == 0 && b[2] >= 32 && b[3] == 0) return _T("UTF-16LE");
else if (b[0] == 0 && b[1] >= 32 && b[2] == 0 && b[3] >= 32) return L"UTF-16BE";
else if (b[0] >= 32 && b[1] == 0 && b[2] >= 32 && b[3] == 0) return L"UTF-16LE";
// If any of the first four bytes are under 0x20 (the first printable character),
// except for 9-13 range, assume binary
for (int i=0;i<4;i++) {
if (b[i] < 9 || (b[i] > 13 && b[i] < 32)) return _T("binary");
if (b[i] < 9 || (b[i] > 13 && b[i] < 32)) return L"binary";
}
#ifdef WITH_UNIVCHARDET
@ -129,14 +115,10 @@ wxString TextFileReader::GetEncoding(const wxString filename) {
return det.GetEncoding(filename);
#else
// Fall back to local
return _T("Local");
return L"local";
#endif
}
/// @brief DOCME
/// @return
///
wchar_t TextFileReader::GetWChar() {
// If there's already some converted characters waiting, return the next one
if (++currout < outptr) {
@ -174,7 +156,7 @@ wchar_t TextFileReader::GetWChar() {
// adding one byte to the input buffer until either it succeeds or we add enough bytes to
// complete any character
if (++bytesAdded > 3)
throw wxString::Format(_T("Invalid input character found near line %u"), currentLine);
throw wxString::Format(L"Invalid input character found near line %u", currentLine);
file.read(inptr + inbytesleft, 1);
inbytesleft++;
@ -183,34 +165,27 @@ wchar_t TextFileReader::GetWChar() {
if (outptr > outbuf)
return *currout;
throw wxString::Format(_T("Invalid input character found near line %u"), currentLine);
throw wxString::Format(L"Invalid input character found near line %u", currentLine);
}
/// @brief DOCME
/// @return
///
wxString TextFileReader::ReadLineFromFile() {
wxString buffer;
size_t bufAlloc = 1024;
buffer.Alloc(bufAlloc);
buffer.Alloc(1024);
currentLine++;
// Read a line
wchar_t ch;
size_t len = 0;
bool first = true;
// This doesn't work for \r deliminated files, but it's very unlikely
// that we'll run into one of those
for (ch = GetWChar(); ch != L'\n' && ch != 0; ch = GetWChar()) {
if (ch == L'\r') continue;
// Skip the BOM -- we don't need it as the encoding is already known
// and it sometimes causes conversion problems
if (ch == 0xFEFF && len == 0) continue;
if (ch == 0xFEFF && first) continue;
if (len >= bufAlloc - 1) {
bufAlloc *= 2;
buffer.Alloc(bufAlloc);
}
buffer += ch;
len++;
first = false;
}
if (ch == 0)
readComplete = true;
@ -223,36 +198,10 @@ wxString TextFileReader::ReadLineFromFile() {
return buffer;
}
/// @brief DOCME
/// @return
///
bool TextFileReader::HasMoreLines() {
return !readComplete;
}
/// @brief DOCME
/// @param encoding
/// @return
///
void TextFileReader::EnsureValid(wxString enc) {
if (enc == _T("binary")) return;
enc = AegisubCSConv::GetRealEncodingName(enc);
iconv_t cd = iconv_open(WCHAR_T_ENCODING, enc.ToAscii());
bool canOpen = cd != (iconv_t)-1;
iconv_close(cd);
if (!canOpen) {
throw wxString::Format(_T("Character set %s is not supported."), enc.c_str());
}
}
/// @brief DOCME
///
wxString TextFileReader::GetCurrentEncoding() {
return encoding;
}

View file

@ -1,4 +1,4 @@
// Copyright (c) 2005, Rodrigo Braz Monteiro
// Copyright (c) 2010, Rodrigo Braz Monteiro
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
@ -45,62 +45,53 @@
#include <wx/string.h>
#endif
/// DOCME
/// @class TextFileReader
/// @brief DOCME
///
/// DOCME
/// @brief A line-based text file reader
class TextFileReader {
private:
/// DOCME
/// Encoding of the file being read
wxString encoding;
/// DOCME
std::ifstream file;
/// DOCME
iconv_t conv;
/// DOCME
bool trim;
/// DOCME
bool readComplete;
/// DOCME
// Iconv buffers and state
wchar_t outbuf[256];
/// DOCME
wchar_t *currout;
/// DOCME
wchar_t *outptr;
/// DOCME
size_t outbytesleft;
/// DOCME
/// Current line number
unsigned int currentLine;
/// @brief Read a single wchar_t from the file
wchar_t GetWChar();
TextFileReader(const TextFileReader&);
TextFileReader& operator=(const TextFileReader&);
public:
TextFileReader(wxString filename,wxString encoding=_T(""), bool trim=true);
/// @brief Constructor
/// @param filename File to open
/// @param enc Encoding to use, or empty to autodetect
/// @param trim Whether to trim whitespace from lines read
TextFileReader(wxString filename,wxString encoding=L"", bool trim=true);
/// @brief Destructor
~TextFileReader();
/// @brief Read a line from the file
/// @return The line, possibly trimmed
wxString ReadLineFromFile();
/// @brief Check if there are any more lines to read
bool HasMoreLines();
static void EnsureValid(const wxString encoding);
/// @brief Get the file encoding used by this reader
/// @return "unknown", "binary", or a character encoding name
wxString GetCurrentEncoding();
static wxString GetEncoding(const wxString filename);
/// @brief Attempt to detect a file's encoding
/// @param filename The file to check
/// @return "unknown", "binary", or a character encoding name
static wxString GetEncoding(wxString const& filename);
};