A few minor cleanups to the new charset conversion code.

Originally committed to SVN as r3159.
This commit is contained in:
Thomas Goyne 2009-07-18 00:58:13 +00:00
parent 779dcadc69
commit c2087304fc
7 changed files with 38 additions and 21 deletions

View file

@ -34,7 +34,13 @@
// //
#include "charset_conv.h" #include "charset_conv.h"
#include <stdint.h> #include <stdint.h>
#include <errno.h>
#include <wx/hashmap.h>
#include <wx/intl.h>
WX_DECLARE_STRING_HASH_MAP(wxString, PrettyNamesHash);
#if wxUSE_THREADS #if wxUSE_THREADS
static wxMutex encodingListMutex; static wxMutex encodingListMutex;

View file

@ -33,21 +33,18 @@
// Contact: mailto:zeratul@cellosoft.com // Contact: mailto:zeratul@cellosoft.com
// //
#ifndef AEGISUB_STRCONV #ifndef AEGISUB_CHARSET_CONV_H
#define AEGISUB_STRCONV #define AEGISUB_CHARSET_CONV_H
#include <iconv.h> #include <iconv.h>
#include <wchar.h> #include <wchar.h>
#include <wx/intl.h>
#include <wx/hashmap.h>
#include <wx/thread.h>
#include <wx/arrstr.h> #include <wx/arrstr.h>
#include <errno.h> #include <wx/thread.h>
#include <wx/string.h>
#include <wx/strconv.h>
#include "aegisub_endian.h" #include "aegisub_endian.h"
WX_DECLARE_STRING_HASH_MAP(wxString, PrettyNamesHash);
#if !defined(_LIBICONV_VERSION) || _LIBICONV_VERSION < 0x010A || defined(LIBICONV_PLUG) #if !defined(_LIBICONV_VERSION) || _LIBICONV_VERSION < 0x010A || defined(LIBICONV_PLUG)
#define ICONV_POSIX #define ICONV_POSIX
#endif #endif
@ -56,7 +53,7 @@ class AegisubCSConv : public wxMBConv {
public: public:
// By default, any conversion that would be lossy will fail // By default, any conversion that would be lossy will fail
// When enableSubst is true, conversions to multibyte with a sufficiently large buffer // When enableSubst is true, conversions to multibyte with a sufficiently large buffer
// are guarunteed to succeed, with characters dropped or changed as needed to fit the // are guaranteed to succeed, with characters dropped or changed as needed to fit the
// string into the target encoding. // string into the target encoding.
AegisubCSConv(const wxChar *mbEncName, bool enableSubst = false); AegisubCSConv(const wxChar *mbEncName, bool enableSubst = false);
virtual ~AegisubCSConv(); virtual ~AegisubCSConv();
@ -77,8 +74,6 @@ public:
// Map a user-friendly encoding name to iconv's name // Map a user-friendly encoding name to iconv's name
static wxString GetRealEncodingName(wxString name); static wxString GetRealEncodingName(wxString name);
static iconv_t IconvOpen(const char *toEncoding);
protected: protected:
iconv_t m2w, w2m; iconv_t m2w, w2m;

View file

@ -88,6 +88,7 @@
#include "standard_paths.h" #include "standard_paths.h"
#include "dialog_video_details.h" #include "dialog_video_details.h"
#include "keyframe.h" #include "keyframe.h"
#include "charset_conv.h"
//////////////////// ////////////////////

View file

@ -39,7 +39,10 @@
#include <algorithm> #include <algorithm>
#include <string> #include <string>
#include <assert.h> #include <assert.h>
#include <errno.h>
#include "text_file_reader.h" #include "text_file_reader.h"
#include "charset_conv.h"
#ifdef WITH_UNIVCHARDET #ifdef WITH_UNIVCHARDET
#include "charset_detect.h" #include "charset_detect.h"
@ -66,7 +69,7 @@ TextFileReader::~TextFileReader() {
if (conv != (iconv_t)-1) iconv_close(conv); if (conv != (iconv_t)-1) iconv_close(conv);
} }
wxString TextFileReader::GetEncoding(const wxString _filename) { wxString TextFileReader::GetEncoding(const wxString filename) {
// Prepare // Prepare
unsigned char b[4]; unsigned char b[4];
memset(b, 0, sizeof(b)); memset(b, 0, sizeof(b));
@ -74,9 +77,9 @@ wxString TextFileReader::GetEncoding(const wxString _filename) {
// Read four bytes from file // Read four bytes from file
std::ifstream ifile; std::ifstream ifile;
#ifdef __WINDOWS__ #ifdef __WINDOWS__
ifile.open(_filename.wc_str()); ifile.open(filename.wc_str());
#else #else
ifile.open(wxFNCONV(_filename)); ifile.open(wxFNCONV(filename));
#endif #endif
if (!ifile.is_open()) { if (!ifile.is_open()) {
return _T("unknown"); return _T("unknown");
@ -105,7 +108,7 @@ wxString TextFileReader::GetEncoding(const wxString _filename) {
#ifdef WITH_UNIVCHARDET #ifdef WITH_UNIVCHARDET
// Use universalchardet library to detect charset // Use universalchardet library to detect charset
CharSetDetect det; CharSetDetect det;
return det.GetEncoding(_filename); return det.GetEncoding(filename);
#else #else
// Fall back to local // Fall back to local
return _T("Local"); return _T("Local");
@ -153,7 +156,7 @@ wchar_t TextFileReader::GetWChar() {
file.read(inptr + inbytesleft, 1); file.read(inptr + inbytesleft, 1);
inbytesleft++; inbytesleft++;
} while (!file.eof()); } while (!file.eof() && file.gcount());
if (outptr > outbuf) if (outptr > outbuf)
return *currout; return *currout;
@ -172,6 +175,8 @@ wxString TextFileReader::ReadLineFromFile() {
size_t len = 0; size_t len = 0;
for (ch = GetWChar(); ch != L'\n' && ch != 0; ch = GetWChar()) { for (ch = GetWChar(); ch != L'\n' && ch != 0; ch = GetWChar()) {
if (ch == L'\r') continue; if (ch == L'\r') continue;
// Skip the BOM -- we don't need it as the encoding is already known
// and it sometimes causes conversion problems
if (ch == 0xFEFF && len == 0) continue; if (ch == 0xFEFF && len == 0) continue;
if (len >= bufAlloc - 1) { if (len >= bufAlloc - 1) {

View file

@ -39,8 +39,7 @@
#include <wx/dynarray.h> #include <wx/dynarray.h>
#include <wx/string.h> #include <wx/string.h>
#include <fstream> #include <fstream>
#include <iconv.h>
#include "charset_conv.h"
class TextFileReader { class TextFileReader {
private: private:
@ -57,10 +56,11 @@ private:
unsigned int currentLine; unsigned int currentLine;
void Open();
void Close();
wchar_t GetWChar(); wchar_t GetWChar();
TextFileReader(const TextFileReader&);
TextFileReader& operator=(const TextFileReader&);
public: public:
TextFileReader(wxString filename,wxString encoding=_T(""), bool trim=true); TextFileReader(wxString filename,wxString encoding=_T(""), bool trim=true);
~TextFileReader(); ~TextFileReader();

View file

@ -39,6 +39,7 @@
#include "text_file_writer.h" #include "text_file_writer.h"
#include "options.h" #include "options.h"
#include "aegisub_endian.h" #include "aegisub_endian.h"
#include "charset_conv.h"
TextFileWriter::TextFileWriter(wxString filename, wxString encoding) TextFileWriter::TextFileWriter(wxString filename, wxString encoding)
: conv() { : conv() {
@ -63,6 +64,10 @@ TextFileWriter::TextFileWriter(wxString filename, wxString encoding)
} }
} }
TextFileWriter::~TextFileWriter() {
// Explicit empty destructor required with an auto_ptr to an incomplete class
}
void TextFileWriter::WriteLineToFile(wxString line, bool addLineBreak) { void TextFileWriter::WriteLineToFile(wxString line, bool addLineBreak) {
wxString temp = line; wxString temp = line;
if (addLineBreak) temp += _T("\r\n"); if (addLineBreak) temp += _T("\r\n");

View file

@ -42,15 +42,20 @@
#include <fstream> #include <fstream>
#include <memory> #include <memory>
#include "charset_conv.h" class AegisubCSConv;
class TextFileWriter { class TextFileWriter {
private: private:
std::ofstream file; std::ofstream file;
std::auto_ptr<AegisubCSConv> conv; std::auto_ptr<AegisubCSConv> conv;
TextFileWriter(const TextFileWriter&);
TextFileWriter& operator=(const TextFileWriter&);
public: public:
TextFileWriter(wxString filename, wxString encoding=_T("")); TextFileWriter(wxString filename, wxString encoding=_T(""));
~TextFileWriter();
void WriteLineToFile(wxString line, bool addLineBreak=true); void WriteLineToFile(wxString line, bool addLineBreak=true);
}; };