Simplify charset detection

Originally committed to SVN as r4419.
This commit is contained in:
Thomas Goyne 2010-06-03 20:31:43 +00:00
parent 401560b190
commit f064624ecd
6 changed files with 16 additions and 45 deletions

View file

@ -50,6 +50,7 @@
#include "ass_file.h" #include "ass_file.h"
#include "ass_override.h" #include "ass_override.h"
#include "ass_style.h" #include "ass_style.h"
#include "charset_detect.h"
#include "compat.h" #include "compat.h"
#include "main.h" #include "main.h"
#include "options.h" #include "options.h"
@ -74,7 +75,7 @@ AssFile::~AssFile() {
/// @param file /// @param file
/// @param charset /// @param charset
/// @param addToRecent /// @param addToRecent
void AssFile::Load (const wxString _filename,const wxString charset,bool addToRecent) { void AssFile::Load (const wxString &_filename,wxString charset,bool addToRecent) {
bool ok = true; bool ok = true;
try { try {
@ -91,9 +92,9 @@ void AssFile::Load (const wxString _filename,const wxString charset,bool addToRe
fclose(file); fclose(file);
// Find file encoding // Find file encoding
wxString enc; if (charset.empty()) {
if (charset.IsEmpty()) enc = TextFileReader::GetEncoding(_filename); charset = CharSetDetect::GetEncoding(_filename);
else enc = charset; }
// Generic preparation // Generic preparation
Clear(); Clear();
@ -104,7 +105,7 @@ void AssFile::Load (const wxString _filename,const wxString charset,bool addToRe
// Read file // Read file
if (reader) { if (reader) {
reader->SetTarget(this); reader->SetTarget(this);
reader->ReadFile(_filename,enc); reader->ReadFile(_filename,charset);
} }
// Couldn't find a type // Couldn't find a type

View file

@ -109,7 +109,7 @@ public:
AssStyle *GetStyle(wxString name); // Gets style by its name AssStyle *GetStyle(wxString name); // Gets style by its name
//wxString GetString(); // Returns the whole file as a single string //wxString GetString(); // Returns the whole file as a single string
void Load(wxString file,wxString charset=_T(""),bool addToRecent=true); // Load from a file void Load(const wxString &file,wxString charset=_T(""),bool addToRecent=true); // Load from a file
void Save(wxString file,bool setfilename=false,bool addToRecent=true,const wxString encoding=_T("")); // Save to a file. Pass true to second argument if this isn't a copy void Save(wxString file,bool setfilename=false,bool addToRecent=true,const wxString encoding=_T("")); // Save to a file. Pass true to second argument if this isn't a copy
void SaveMemory(std::vector<char> &dst,const wxString encoding=_T("")); // Save to a memory string void SaveMemory(std::vector<char> &dst,const wxString encoding=_T("")); // Save to a memory string
void Export(wxString file); // Saves exported copy, with effects applied void Export(wxString file); // Saves exported copy, with effects applied

View file

@ -34,9 +34,6 @@
/// @ingroup utility /// @ingroup utility
/// ///
///////////
// Headers
#include "config.h" #include "config.h"
#ifndef AGI_PRE #ifndef AGI_PRE
@ -52,16 +49,11 @@
#include <libaegisub/log.h> #include <libaegisub/log.h>
#include "charset_detect.h" #include "charset_detect.h"
#include "text_file_reader.h"
#include "compat.h" #include "compat.h"
namespace CharSetDetect {
wxString GetEncoding(wxString const& filename) {
/// @brief Get encoding
/// @param filename
/// @return
///
wxString CharSetDetect::GetEncoding(wxString filename) {
LOG_I("charset/file") << filename; LOG_I("charset/file") << filename;
bool unknown = 0; bool unknown = 0;
@ -70,7 +62,7 @@ wxString CharSetDetect::GetEncoding(wxString filename) {
try { try {
agi::charset::DetectAll(STD_STR(filename), list); agi::charset::DetectAll(STD_STR(filename), list);
} catch (const agi::charset::UnknownCharset&) { } catch (const agi::charset::UnknownCharset&) {
unknown = 1; unknown = 1;
} }
@ -93,3 +85,5 @@ wxString CharSetDetect::GetEncoding(wxString filename) {
return i_lst->second; return i_lst->second;
} }
}

View file

@ -34,17 +34,9 @@
/// @ingroup utility /// @ingroup utility
/// ///
/// DOCME namespace CharSetDetect {
/// @class CharSetDetect
/// @brief Detect character set of a file
class CharSetDetect {
private:
/// Character set
wxString result;
public:
/// @brief Get character set name. /// @brief Get character set name.
/// @param filename File to check /// @param filename File to check
/// @return Character set name /// @return Character set name
wxString GetEncoding(wxString filename); wxString GetEncoding(wxString const& filename);
}; }

View file

@ -48,9 +48,7 @@
#include <libaegisub/log.h> #include <libaegisub/log.h>
#include "charset_conv.h" #include "charset_conv.h"
#ifdef WITH_UNIVCHARDET
#include "charset_detect.h" #include "charset_detect.h"
#endif
#include "text_file_reader.h" #include "text_file_reader.h"
TextFileReader::TextFileReader(wxString filename, wxString enc, bool trim) TextFileReader::TextFileReader(wxString filename, wxString enc, bool trim)
@ -62,7 +60,7 @@ TextFileReader::TextFileReader(wxString filename, wxString enc, bool trim)
#endif #endif
if (!file.is_open()) throw L"Failed opening file for reading."; if (!file.is_open()) throw L"Failed opening file for reading.";
if (encoding.IsEmpty()) encoding = GetEncoding(filename); if (encoding.IsEmpty()) encoding = CharSetDetect::GetEncoding(filename);
if (encoding == L"binary") return; if (encoding == L"binary") return;
encoding = AegisubCSConv::GetRealEncodingName(encoding); encoding = AegisubCSConv::GetRealEncodingName(encoding);
conv = iconv_open(WCHAR_T_ENCODING, encoding.ToAscii()); conv = iconv_open(WCHAR_T_ENCODING, encoding.ToAscii());
@ -75,15 +73,6 @@ TextFileReader::~TextFileReader() {
if (conv != (iconv_t)-1) iconv_close(conv); if (conv != (iconv_t)-1) iconv_close(conv);
} }
wxString TextFileReader::GetEncoding(wxString const& filename) {
// Use universalchardet library to detect charset
CharSetDetect det;
wxString str(det.GetEncoding(filename));
LOG_I("file/reader/text/encoding") << str;
return str;
}
wchar_t TextFileReader::GetWChar() { wchar_t TextFileReader::GetWChar() {
// If there's already some converted characters waiting, return the next one // If there's already some converted characters waiting, return the next one
if (++currout < outptr) { if (++currout < outptr) {

View file

@ -89,9 +89,4 @@ public:
/// @brief Get the file encoding used by this reader /// @brief Get the file encoding used by this reader
/// @return "unknown", "binary", or a character encoding name /// @return "unknown", "binary", or a character encoding name
wxString GetCurrentEncoding(); wxString GetCurrentEncoding();
/// @brief Attempt to detect a file's encoding
/// @param filename The file to check
/// @return "unknown", "binary", or a character encoding name
static wxString GetEncoding(wxString const& filename);
}; };