From ba088237d7e02e2189149d431d553e7776751988 Mon Sep 17 00:00:00 2001 From: Thomas Goyne Date: Sun, 24 Jan 2010 18:56:51 +0000 Subject: [PATCH] Add some documentation for AegisubCSConv and TextFileReader. Originally committed to SVN as r4036. --- aegisub/src/ass_file.cpp | 1 - aegisub/src/charset_conv.cpp | 173 +++++++++---------------------- aegisub/src/charset_conv.h | 123 +++++++++++----------- aegisub/src/text_file_reader.cpp | 103 +++++------------- aegisub/src/text_file_reader.h | 53 ++++------ 5 files changed, 155 insertions(+), 298 deletions(-) diff --git a/aegisub/src/ass_file.cpp b/aegisub/src/ass_file.cpp index c1a4c032c..f5c80efe9 100644 --- a/aegisub/src/ass_file.cpp +++ b/aegisub/src/ass_file.cpp @@ -103,7 +103,6 @@ void AssFile::Load (const wxString _filename,const wxString charset,bool addToRe wxString enc; if (charset.IsEmpty()) enc = TextFileReader::GetEncoding(_filename); else enc = charset; - TextFileReader::EnsureValid(enc); // Generic preparation Clear(); diff --git a/aegisub/src/charset_conv.cpp b/aegisub/src/charset_conv.cpp index d15b88896..c98170126 100644 --- a/aegisub/src/charset_conv.cpp +++ b/aegisub/src/charset_conv.cpp @@ -49,52 +49,27 @@ WX_DECLARE_STRING_HASH_MAP(wxString, PrettyNamesHash); #if wxUSE_THREADS - -/// DOCME static wxMutex encodingListMutex; #endif - -/// DOCME static const iconv_t iconv_invalid = (iconv_t)-1; - -/// DOCME static const size_t iconv_failed = (size_t)-1; - -/// DOCME #define ICONV_CONST_CAST(a) const_cast(a) -#ifndef ICONV_POSIX -static int addEncoding(unsigned int namescount, const char * const * names, void* data); -#endif - -/// DOCME static wxArrayString *supportedEncodings = NULL; - -/// DOCME static wxArrayString *prettyEncodingList = NULL; - -/// DOCME static PrettyNamesHash *prettyEncodingHash = NULL; - -/// @brief DOCME -/// @param mbEncName -/// @param enableSubst -/// AegisubCSConv::AegisubCSConv(const wxChar *mbEncName, bool enableSubst) -: mbCharsetName(GetRealEncodingName(mbEncName)), mbNulLen(0), enableSubst(enableSubst) +: wcCharsetName(WCHAR_T_ENCODING) +, mbCharsetName(GetRealEncodingName(mbEncName)) +, mbNulLen(0) +, enableSubst(enableSubst) +, m2w(wcCharsetName, mbCharsetName) +, w2m(mbCharsetName, wcCharsetName) { - wcCharsetName = wxString::FromAscii(WCHAR_T_ENCODING); - - m2w = iconv_open(wcCharsetName.ToAscii(), mbCharsetName.ToAscii()); - w2m = iconv_open(mbCharsetName.ToAscii(), wcCharsetName.ToAscii()); - if (m2w == iconv_invalid || w2m == iconv_invalid) { - if (m2w != iconv_invalid) iconv_close(m2w); - if (w2m != iconv_invalid) iconv_close(w2m); - - throw wxString::Format(_T("Character set %s is not supported."), mbEncName); + throw wxString::Format(L"Character set %s is not supported.", mbEncName); } if (enableSubst) { @@ -110,26 +85,14 @@ AegisubCSConv::AegisubCSConv(const wxChar *mbEncName, bool enableSubst) } } -/// @brief DOCME -/// -AegisubCSConv::~AegisubCSConv() { - if (m2w != iconv_invalid) iconv_close(m2w); - if (w2m != iconv_invalid) iconv_close(w2m); -} - -/// @brief DOCME -/// @return -/// wxMBConv * AegisubCSConv::Clone() const { AegisubCSConv *c = new AegisubCSConv(mbCharsetName); c->mbNulLen = mbNulLen; return c; } - /// @brief Calculate the size of NUL in the target encoding via iconv -/// @return -/// +/// @return The size in bytes of NUL size_t AegisubCSConv::GetMBNulLen() const { if (mbNulLen == 0) { const wchar_t nulStr[] = L""; @@ -142,18 +105,13 @@ size_t AegisubCSConv::GetMBNulLen() const { size_t res = iconv(w2m, &inPtr, &inLen, &outPtr, &outLen); if (res != 0) - const_cast(this)->mbNulLen = (size_t)-1; + mbNulLen = (size_t)-1; else - const_cast(this)->mbNulLen = sizeof(outBuff) - outLen; + mbNulLen = sizeof(outBuff) - outLen; } return mbNulLen; } - -/// @brief Calculate the length (in bytes) of a MB string, not including the terminator -/// @param str -/// @return -/// size_t AegisubCSConv::MBBuffLen(const char * str) const { size_t nulLen = GetMBNulLen(); const char *ptr; @@ -171,14 +129,12 @@ size_t AegisubCSConv::MBBuffLen(const char * str) const { } } - -/// @brief DOCME -/// @param dst -/// @param dstSize -/// @param src -/// @param srcLen -/// @return -/// +/// @brief Convert a string from multibyte to wide characters +/// @param dst Destination buffer. +/// @param dstSize Length of destination buffer in wchar_ts +/// @param src Source multibyte string +/// @param srcLen Length of source buffer in bytes, or -1 to autodetect +/// @return The number of wchar_ts needed to store the string in the target charset size_t AegisubCSConv::ToWChar(wchar_t *dst, size_t dstSize, const char *src, size_t srcLen) const { return doConversion( m2w, @@ -189,14 +145,12 @@ size_t AegisubCSConv::ToWChar(wchar_t *dst, size_t dstSize, const char *src, siz ) / sizeof(wchar_t); } - -/// @brief DOCME -/// @param dst -/// @param dstSize -/// @param src -/// @param srcLen -/// @return -/// +/// @brief Convert a string from wide characters to multibyte +/// @param dst Destination buffer +/// @param dstSize Length of destination buffer in bytes +/// @param src Source wide character string +/// @param srcLen Length in wchar_ts of source, or -1 to autodetect +/// @return The number of bytes needed to store the string in the target charset size_t AegisubCSConv::FromWChar(char *dst, size_t dstSize, const wchar_t *src, size_t srcLen) const { return doConversion( w2m, @@ -207,15 +161,7 @@ size_t AegisubCSConv::FromWChar(char *dst, size_t dstSize, const wchar_t *src, s ); } - -/// @brief DOCME -/// @param cd -/// @param dst -/// @param dstSize -/// @param src -/// @param srcSize -/// @return -/// +// Perform a conversion if a buffer is given or calculate the needed buffer size if not size_t AegisubCSConv::doConversion(iconv_t cd, char *dst, size_t dstSize, char *src, size_t srcSize) const { if (dstSize > 0) { return iconvWrapper(cd, &src, &srcSize, &dst, &dstSize); @@ -239,20 +185,12 @@ size_t AegisubCSConv::doConversion(iconv_t cd, char *dst, size_t dstSize, char * return charsWritten; } - -/// @brief DOCME -/// @param cd -/// @param inbuf -/// @param inbytesleft -/// @param outbuf -/// @param outbytesleft -/// @return -/// +// Actually perform a conversion via iconv size_t AegisubCSConv::iconvWrapper(iconv_t cd, char **inbuf, size_t *inbytesleft, - char **outbuf, size_t *outbytesleft) const { + char **outbuf, size_t *outbytesleft) const { #if wxUSE_THREADS - wxMutexLocker lock(const_cast(this)->iconvMutex); + wxMutexLocker lock(iconvMutex); #endif char *outbuforig = *outbuf; @@ -265,10 +203,11 @@ size_t AegisubCSConv::iconvWrapper(iconv_t cd, char **inbuf, size_t *inbytesleft #ifdef ICONV_POSIX if (errno == EILSEQ) { - throw _T("One or more characters do not fit in the selected ") - _T("encoding and the version of iconv Aegisub was built with") - _T(" does not have useful fallbacks. For best results, ") - _T("please rebuild Aegisub using a recent version of GNU iconv."); + throw + L"One or more characters do not fit in the selected " + L"encoding and the version of iconv Aegisub was built with" + L" does not have useful fallbacks. For best results, " + L"please rebuild Aegisub using a recent version of GNU iconv."; } return wxCONV_FAILED; #else @@ -287,7 +226,7 @@ size_t AegisubCSConv::iconvWrapper(iconv_t cd, char **inbuf, size_t *inbytesleft } if (res == iconv_failed && err == EILSEQ) { // Conversion still failed with transliteration enabled, so try our substitution - iconvctl(cd, ICONV_SET_FALLBACKS, const_cast(&fallbacks)); + iconvctl(cd, ICONV_SET_FALLBACKS, &fallbacks); res = iconv(cd, inbuf, inbytesleft, outbuf, outbytesleft); err = errno; iconvctl(cd, ICONV_SET_FALLBACKS, NULL); @@ -309,13 +248,11 @@ size_t AegisubCSConv::iconvWrapper(iconv_t cd, char **inbuf, size_t *inbytesleft } -/// @brief DOCME -/// @param code -/// @param callback -/// @param callback_arg -/// @param convPtr -/// @return -/// +/// @brief GNU iconv character substitution callback +/// @param code Unicode character which could not be converted +/// @param callback Callback to tell iconv what string to use instead +/// @param callback_arg Iconv userdata for callback +/// @param convPtr AegisubCSConv instance to use void AegisubCSConv::ucToMbFallback( unsigned int code, void (*callback) (const char *buf, size_t buflen, void* callback_arg), @@ -323,7 +260,8 @@ void AegisubCSConv::ucToMbFallback( void *convPtr) { // At some point in the future, this should probably switch to a real mapping - // For now, there's just three cases: BOM to nothing, \ to itself (lol Shift-JIS) and everything else to ? + // For now, there's just three cases: BOM to nothing, '\' to itself + // (for Shift-JIS, which does not have \) and everything else to '?' if (code == 0xFEFF) return; if (code == 0x5C) callback("\\", 1, callback_arg); else { @@ -333,13 +271,10 @@ void AegisubCSConv::ucToMbFallback( } #ifndef ICONV_POSIX - -/// @brief DOCME -/// @param namescount -/// @param names -/// @param data -/// @return -/// +/// @brief Callback for iconvlist +/// @param namescount Number of names in names +/// @param names Names to add to the list +/// @param data Unused userdata field int addEncoding(unsigned int namescount, const char * const * names, void* data) { for (unsigned int i = 0; i < namescount; i++) { supportedEncodings->Add(wxString::FromAscii(names[i])); @@ -348,10 +283,6 @@ int addEncoding(unsigned int namescount, const char * const * names, void* data) } #endif - -/// @brief DOCME -/// @return -/// wxArrayString AegisubCSConv::GetAllSupportedEncodings() { #if wxUSE_THREADS wxMutexLocker lock(encodingListMutex); @@ -366,13 +297,8 @@ wxArrayString AegisubCSConv::GetAllSupportedEncodings() { return *supportedEncodings; } - -/// @brief Map pretty names to the real encoding names -/// @param name -/// @return -/// wxString AegisubCSConv::GetRealEncodingName(wxString name) { - if (name.Lower() == _T("local")) return wxLocale::GetSystemEncodingName(); + if (name.Lower() == L"local") return wxLocale::GetSystemEncodingName(); if (prettyEncodingList == NULL) return name; PrettyNamesHash::iterator realName = prettyEncodingHash->find(name); @@ -382,9 +308,6 @@ wxString AegisubCSConv::GetRealEncodingName(wxString name) { return name; } - -/// @brief DOCME -/// wxArrayString AegisubCSConv::GetEncodingsList() { #if wxUSE_THREADS wxMutexLocker lock(encodingListMutex); @@ -511,10 +434,10 @@ wxArrayString AegisubCSConv::GetEncodingsList() { PrettyNamesHash *map = new PrettyNamesHash(100); wxArrayString *arr = new wxArrayString(); - arr->Add(_T("Local")); + arr->Add(L"Local"); for (int i = 0; encodingNames[i].real != NULL; i++) { - // Verify that iconv actually supports this encoding + // Verify that iconv actually supports converting to and from this encoding iconv_t cd = iconv_open(encodingNames[i].real, WCHAR_T_ENCODING); if (cd == iconv_invalid) continue; iconv_close(cd); @@ -533,7 +456,5 @@ wxArrayString AegisubCSConv::GetEncodingsList() { } return *prettyEncodingList; } -static AegisubCSConv localConv(_T("Local"), false); +static AegisubCSConv localConv(L"Local", false); AegisubCSConv& csConvLocal(localConv); - - diff --git a/aegisub/src/charset_conv.h b/aegisub/src/charset_conv.h index 7d39fcd62..28700819b 100644 --- a/aegisub/src/charset_conv.h +++ b/aegisub/src/charset_conv.h @@ -1,4 +1,4 @@ -// Copyright (c) 2009, Thomas Goyne +// Copyright (c) 2010, Thomas Goyne // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -34,9 +34,6 @@ /// @ingroup utility /// - - - #ifndef AGI_PRE #include #include @@ -49,64 +46,77 @@ #include "aegisub_endian.h" - #if !defined(_LIBICONV_VERSION) || _LIBICONV_VERSION < 0x010A || defined(LIBICONV_PLUG) - -/// DOCME #define ICONV_POSIX #endif +/// @class iconv_wrapper +/// @brief RAII wrapper for iconv +class iconv_wrapper { +private: + iconv_t conv; +public: + iconv_wrapper(const char *to, const char *from) + : conv(iconv_open(to, from)) + { } + iconv_wrapper(wxString const& to, wxString const& from) + : conv(iconv_open(to.ToAscii(), from.ToAscii())) + { } + iconv_wrapper(const char *to, wxString const& from) + : conv(iconv_open(to, from.ToAscii())) + { } + iconv_wrapper(wxString const& to, const char *from) + : conv(iconv_open(to.ToAscii(), from)) + { } + ~iconv_wrapper() { + if (conv != (iconv_t)-1) iconv_close(conv); + } + operator iconv_t() { + return conv; + } + operator const iconv_t() const { + return conv; + } +}; -/// DOCME /// @class AegisubCSConv -/// @brief DOCME -/// -/// DOCME +/// @brief wxMBConv implementation for converting to and from unicode class AegisubCSConv : public wxMBConv { public: - // By default, any conversion that would be lossy will fail - // When enableSubst is true, conversions to multibyte with a sufficiently large buffer - // are guaranteed to succeed, with characters dropped or changed as needed to fit the - // string into the target encoding. + /// @param mbEncName Multibyte encoding to convert to/from + /// @param enableSubst Whether to substitute characters when needed. + /// By default, any conversion that would be lossy will fail + /// When enableSubst is true, conversions to multibyte with a sufficiently + /// large buffer are guaranteed to succeed, with characters dropped or + /// changed as needed to fit the string into the target encoding. AegisubCSConv(const wxChar *mbEncName, bool enableSubst = false); - virtual ~AegisubCSConv(); // wxMBConv implementation; see strconv.h for usage details - virtual size_t ToWChar(wchar_t *dst, size_t dstLen, const char *src, size_t srcLen = wxNO_LEN) const; - virtual size_t FromWChar(char *dst, size_t dstLen, const wchar_t *src, size_t srcLen = wxNO_LEN) const; - virtual size_t GetMBNulLen() const; - virtual wxMBConv *Clone() const; + size_t ToWChar(wchar_t *dst, size_t dstLen, const char *src, size_t srcLen = wxNO_LEN) const; + size_t FromWChar(char *dst, size_t dstLen, const wchar_t *src, size_t srcLen = wxNO_LEN) const; + size_t GetMBNulLen() const; + wxMBConv *Clone() const; - // Get the length (in bytes) of a null-terminated string whose encoding is mbEncName + /// @brief Multibyte-aware strlen + /// @return Length in bytes of str (excluding terminator) size_t MBBuffLen(const char *str) const; - // Get a list of support encodings with somewhat user-friendly names + /// @brief Get a list of support encodings with user-friendly names static wxArrayString GetEncodingsList(); - // Get a list of all encodings supported by iconv + /// @brief Get a list of all encodings supported by iconv + /// Requires GNU iconv for useful results static wxArrayString GetAllSupportedEncodings(); - // Map a user-friendly encoding name to iconv's name + /// @brief Map a user-friendly encoding name to the real encoding name static wxString GetRealEncodingName(wxString name); -protected: - - /// DOCME - - /// DOCME - iconv_t m2w, w2m; - private: - - /// DOCME + // The smattering of mutable variables here are due to that ToWChar and + // FromWChar are const in wxMBConv, but we require minor mutation for + // things like locks (as iconv is not thread-safe) wxString wcCharsetName; - - /// DOCME wxString mbCharsetName; - - /// DOCME - size_t mbNulLen; - - /// DOCME - bool enableSubst; + mutable size_t mbNulLen; + bool enableSubst; size_t doConversion(iconv_t cd, char *dst, size_t dstSize, char *src, size_t srcSize) const; size_t iconvWrapper(iconv_t cd, char **inbuf, size_t *inbytesleft, char **outbuf, size_t *outbytesleft) const; @@ -117,56 +127,43 @@ private: void *callback_arg, void *convPtr); - /// DOCME + /// Replacement character for characters which do not fit in the target + /// encoding and iconv does not have an appropriate substitute for char invalidRep[8]; - - /// DOCME size_t invalidRepSize; #ifndef ICONV_POSIX - - /// DOCME - iconv_fallbacks fallbacks; + mutable iconv_fallbacks fallbacks; #endif #if wxUSE_THREADS - - /// DOCME - wxMutex iconvMutex; + mutable wxMutex iconvMutex; #endif + +protected: + iconv_wrapper m2w, w2m; }; -// Predefined conversion for the current locale. Should be a drop-in replacement for wxConvLocal +// Predefined conversion for the current locale, intended to be a drop-in +// replacement for wxConvLocal extern AegisubCSConv& csConvLocal; #ifdef HAVE_BIG_ENDIAN # if SIZEOF_WCHAR_T == 4 - -/// DOCME # define WCHAR_T_ENCODING "UTF-32BE" # elif SIZEOF_WCHAR_T == 2 - -/// DOCME # define WCHAR_T_ENCODING "UTF-16BE" # endif #elif defined(HAVE_LITTLE_ENDIAN) # if SIZEOF_WCHAR_T == 4 - -/// DOCME # define WCHAR_T_ENCODING "UTF-32LE" # elif SIZEOF_WCHAR_T == 2 - -/// DOCME # define WCHAR_T_ENCODING "UTF-16LE" # endif #else # if SIZEOF_WCHAR_T == 4 - -/// DOCME # define WCHAR_T_ENCODING ((Endian::MachineToBig((uint32_t)1) == 1) ? "UTF-32BE" : "UTF-32LE") # elif SIZEOF_WCHAR_T == 2 - -/// DOCME # define WCHAR_T_ENCODING ((Endian::MachineToBig((uint32_t)1) == 1) ? "UTF-16BE" : "UTF-16LE") # endif #endif diff --git a/aegisub/src/text_file_reader.cpp b/aegisub/src/text_file_reader.cpp index 6c0b0c726..90c52f2f9 100644 --- a/aegisub/src/text_file_reader.cpp +++ b/aegisub/src/text_file_reader.cpp @@ -1,4 +1,4 @@ -// Copyright (c) 2005, Rodrigo Braz Monteiro +// Copyright (c) 2010, Rodrigo Braz Monteiro, Thomas Goyne // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -51,13 +51,6 @@ #endif #include "text_file_reader.h" - -/// @brief DOCME -/// @param filename -/// @param enc -/// @param trim -/// @return -/// TextFileReader::TextFileReader(wxString filename, wxString enc, bool trim) : encoding(enc), conv((iconv_t)-1), trim(trim), readComplete(false), currout(0), outptr(0), currentLine(0) { #ifdef __WINDOWS__ @@ -65,29 +58,22 @@ TextFileReader::TextFileReader(wxString filename, wxString enc, bool trim) #else file.open(wxFNCONV(filename),std::ios::in | std::ios::binary); #endif - if (!file.is_open()) { - throw _T("Failed opening file for reading."); - } + if (!file.is_open()) throw L"Failed opening file for reading."; if (encoding.IsEmpty()) encoding = GetEncoding(filename); - if (encoding == _T("binary")) return; + if (encoding == L"binary") return; encoding = AegisubCSConv::GetRealEncodingName(encoding); conv = iconv_open(WCHAR_T_ENCODING, encoding.ToAscii()); + if (conv == (iconv_t)-1) { + throw wxString::Format(L"Character set '%s' is not supported.", enc.c_str()); + } } - -/// @brief DOCME -/// TextFileReader::~TextFileReader() { if (conv != (iconv_t)-1) iconv_close(conv); } - -/// @brief DOCME -/// @param filename -/// @return -/// -wxString TextFileReader::GetEncoding(const wxString filename) { +wxString TextFileReader::GetEncoding(wxString const& filename) { // Prepare unsigned char b[4]; memset(b, 0, sizeof(b)); @@ -100,27 +86,27 @@ wxString TextFileReader::GetEncoding(const wxString filename) { ifile.open(wxFNCONV(filename)); #endif if (!ifile.is_open()) { - return _T("unknown"); + return L"unknown"; } ifile.read(reinterpret_cast(b),4); ifile.close(); // Try to get the byte order mark from them - if (b[0] == 0xEF && b[1] == 0xBB && b[2] == 0xBF) return _T("UTF-8"); - else if (b[0] == 0xFF && b[1] == 0xFE && b[2] == 0x00 && b[3] == 0x00) return _T("UTF-32LE"); - else if (b[0] == 0x00 && b[1] == 0x00 && b[2] == 0xFE && b[3] == 0xFF) return _T("UTF-32BE"); - else if (b[0] == 0xFF && b[1] == 0xFE) return _T("UTF-16LE"); - else if (b[0] == 0xFE && b[1] == 0xFF) return _T("UTF-16BE"); - else if (b[0] == 0x2B && b[1] == 0x2F && b[2] == 0x76) return _T("UTF-7"); + if (b[0] == 0xEF && b[1] == 0xBB && b[2] == 0xBF) return L"UTF-8"; + else if (b[0] == 0xFF && b[1] == 0xFE && b[2] == 0x00 && b[3] == 0x00) return L"UTF-32LE"; + else if (b[0] == 0x00 && b[1] == 0x00 && b[2] == 0xFE && b[3] == 0xFF) return L"UTF-32BE"; + else if (b[0] == 0xFF && b[1] == 0xFE) return L"UTF-16LE"; + else if (b[0] == 0xFE && b[1] == 0xFF) return L"UTF-16BE"; + else if (b[0] == 0x2B && b[1] == 0x2F && b[2] == 0x76) return L"UTF-7"; // Try to guess UTF-16 - else if (b[0] == 0 && b[1] >= 32 && b[2] == 0 && b[3] >= 32) return _T("UTF-16BE"); - else if (b[0] >= 32 && b[1] == 0 && b[2] >= 32 && b[3] == 0) return _T("UTF-16LE"); + else if (b[0] == 0 && b[1] >= 32 && b[2] == 0 && b[3] >= 32) return L"UTF-16BE"; + else if (b[0] >= 32 && b[1] == 0 && b[2] >= 32 && b[3] == 0) return L"UTF-16LE"; // If any of the first four bytes are under 0x20 (the first printable character), // except for 9-13 range, assume binary for (int i=0;i<4;i++) { - if (b[i] < 9 || (b[i] > 13 && b[i] < 32)) return _T("binary"); + if (b[i] < 9 || (b[i] > 13 && b[i] < 32)) return L"binary"; } #ifdef WITH_UNIVCHARDET @@ -129,14 +115,10 @@ wxString TextFileReader::GetEncoding(const wxString filename) { return det.GetEncoding(filename); #else // Fall back to local - return _T("Local"); + return L"local"; #endif } - -/// @brief DOCME -/// @return -/// wchar_t TextFileReader::GetWChar() { // If there's already some converted characters waiting, return the next one if (++currout < outptr) { @@ -174,7 +156,7 @@ wchar_t TextFileReader::GetWChar() { // adding one byte to the input buffer until either it succeeds or we add enough bytes to // complete any character if (++bytesAdded > 3) - throw wxString::Format(_T("Invalid input character found near line %u"), currentLine); + throw wxString::Format(L"Invalid input character found near line %u", currentLine); file.read(inptr + inbytesleft, 1); inbytesleft++; @@ -183,34 +165,27 @@ wchar_t TextFileReader::GetWChar() { if (outptr > outbuf) return *currout; - throw wxString::Format(_T("Invalid input character found near line %u"), currentLine); + throw wxString::Format(L"Invalid input character found near line %u", currentLine); } - -/// @brief DOCME -/// @return -/// wxString TextFileReader::ReadLineFromFile() { wxString buffer; - size_t bufAlloc = 1024; - buffer.Alloc(bufAlloc); + buffer.Alloc(1024); currentLine++; // Read a line wchar_t ch; - size_t len = 0; + bool first = true; + // This doesn't work for \r deliminated files, but it's very unlikely + // that we'll run into one of those for (ch = GetWChar(); ch != L'\n' && ch != 0; ch = GetWChar()) { if (ch == L'\r') continue; // Skip the BOM -- we don't need it as the encoding is already known // and it sometimes causes conversion problems - if (ch == 0xFEFF && len == 0) continue; + if (ch == 0xFEFF && first) continue; - if (len >= bufAlloc - 1) { - bufAlloc *= 2; - buffer.Alloc(bufAlloc); - } buffer += ch; - len++; + first = false; } if (ch == 0) readComplete = true; @@ -223,36 +198,10 @@ wxString TextFileReader::ReadLineFromFile() { return buffer; } - -/// @brief DOCME -/// @return -/// bool TextFileReader::HasMoreLines() { return !readComplete; } - -/// @brief DOCME -/// @param encoding -/// @return -/// -void TextFileReader::EnsureValid(wxString enc) { - if (enc == _T("binary")) return; - - enc = AegisubCSConv::GetRealEncodingName(enc); - iconv_t cd = iconv_open(WCHAR_T_ENCODING, enc.ToAscii()); - bool canOpen = cd != (iconv_t)-1; - iconv_close(cd); - if (!canOpen) { - throw wxString::Format(_T("Character set %s is not supported."), enc.c_str()); - } -} - - -/// @brief DOCME -/// wxString TextFileReader::GetCurrentEncoding() { return encoding; } - - diff --git a/aegisub/src/text_file_reader.h b/aegisub/src/text_file_reader.h index 1bdb14e63..c557912c8 100644 --- a/aegisub/src/text_file_reader.h +++ b/aegisub/src/text_file_reader.h @@ -1,4 +1,4 @@ -// Copyright (c) 2005, Rodrigo Braz Monteiro +// Copyright (c) 2010, Rodrigo Braz Monteiro // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -45,62 +45,53 @@ #include #endif - -/// DOCME /// @class TextFileReader -/// @brief DOCME -/// -/// DOCME +/// @brief A line-based text file reader class TextFileReader { private: - - /// DOCME + /// Encoding of the file being read wxString encoding; - - /// DOCME std::ifstream file; - - /// DOCME iconv_t conv; - - /// DOCME bool trim; - - /// DOCME bool readComplete; - - /// DOCME + // Iconv buffers and state wchar_t outbuf[256]; - - /// DOCME wchar_t *currout; - - /// DOCME wchar_t *outptr; - - /// DOCME size_t outbytesleft; - - /// DOCME + /// Current line number unsigned int currentLine; + /// @brief Read a single wchar_t from the file wchar_t GetWChar(); TextFileReader(const TextFileReader&); TextFileReader& operator=(const TextFileReader&); public: - TextFileReader(wxString filename,wxString encoding=_T(""), bool trim=true); + /// @brief Constructor + /// @param filename File to open + /// @param enc Encoding to use, or empty to autodetect + /// @param trim Whether to trim whitespace from lines read + TextFileReader(wxString filename,wxString encoding=L"", bool trim=true); + /// @brief Destructor ~TextFileReader(); + /// @brief Read a line from the file + /// @return The line, possibly trimmed wxString ReadLineFromFile(); + /// @brief Check if there are any more lines to read bool HasMoreLines(); - static void EnsureValid(const wxString encoding); + /// @brief Get the file encoding used by this reader + /// @return "unknown", "binary", or a character encoding name wxString GetCurrentEncoding(); - static wxString GetEncoding(const wxString filename); + + /// @brief Attempt to detect a file's encoding + /// @param filename The file to check + /// @return "unknown", "binary", or a character encoding name + static wxString GetEncoding(wxString const& filename); }; - -