forked from mia/Aegisub
Simplify charset detection
Originally committed to SVN as r4419.
This commit is contained in:
parent
401560b190
commit
f064624ecd
6 changed files with 16 additions and 45 deletions
|
@ -50,6 +50,7 @@
|
||||||
#include "ass_file.h"
|
#include "ass_file.h"
|
||||||
#include "ass_override.h"
|
#include "ass_override.h"
|
||||||
#include "ass_style.h"
|
#include "ass_style.h"
|
||||||
|
#include "charset_detect.h"
|
||||||
#include "compat.h"
|
#include "compat.h"
|
||||||
#include "main.h"
|
#include "main.h"
|
||||||
#include "options.h"
|
#include "options.h"
|
||||||
|
@ -74,7 +75,7 @@ AssFile::~AssFile() {
|
||||||
/// @param file
|
/// @param file
|
||||||
/// @param charset
|
/// @param charset
|
||||||
/// @param addToRecent
|
/// @param addToRecent
|
||||||
void AssFile::Load (const wxString _filename,const wxString charset,bool addToRecent) {
|
void AssFile::Load (const wxString &_filename,wxString charset,bool addToRecent) {
|
||||||
bool ok = true;
|
bool ok = true;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
|
@ -91,9 +92,9 @@ void AssFile::Load (const wxString _filename,const wxString charset,bool addToRe
|
||||||
fclose(file);
|
fclose(file);
|
||||||
|
|
||||||
// Find file encoding
|
// Find file encoding
|
||||||
wxString enc;
|
if (charset.empty()) {
|
||||||
if (charset.IsEmpty()) enc = TextFileReader::GetEncoding(_filename);
|
charset = CharSetDetect::GetEncoding(_filename);
|
||||||
else enc = charset;
|
}
|
||||||
|
|
||||||
// Generic preparation
|
// Generic preparation
|
||||||
Clear();
|
Clear();
|
||||||
|
@ -104,7 +105,7 @@ void AssFile::Load (const wxString _filename,const wxString charset,bool addToRe
|
||||||
// Read file
|
// Read file
|
||||||
if (reader) {
|
if (reader) {
|
||||||
reader->SetTarget(this);
|
reader->SetTarget(this);
|
||||||
reader->ReadFile(_filename,enc);
|
reader->ReadFile(_filename,charset);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Couldn't find a type
|
// Couldn't find a type
|
||||||
|
|
|
@ -109,7 +109,7 @@ public:
|
||||||
AssStyle *GetStyle(wxString name); // Gets style by its name
|
AssStyle *GetStyle(wxString name); // Gets style by its name
|
||||||
|
|
||||||
//wxString GetString(); // Returns the whole file as a single string
|
//wxString GetString(); // Returns the whole file as a single string
|
||||||
void Load(wxString file,wxString charset=_T(""),bool addToRecent=true); // Load from a file
|
void Load(const wxString &file,wxString charset=_T(""),bool addToRecent=true); // Load from a file
|
||||||
void Save(wxString file,bool setfilename=false,bool addToRecent=true,const wxString encoding=_T("")); // Save to a file. Pass true to second argument if this isn't a copy
|
void Save(wxString file,bool setfilename=false,bool addToRecent=true,const wxString encoding=_T("")); // Save to a file. Pass true to second argument if this isn't a copy
|
||||||
void SaveMemory(std::vector<char> &dst,const wxString encoding=_T("")); // Save to a memory string
|
void SaveMemory(std::vector<char> &dst,const wxString encoding=_T("")); // Save to a memory string
|
||||||
void Export(wxString file); // Saves exported copy, with effects applied
|
void Export(wxString file); // Saves exported copy, with effects applied
|
||||||
|
|
|
@ -34,9 +34,6 @@
|
||||||
/// @ingroup utility
|
/// @ingroup utility
|
||||||
///
|
///
|
||||||
|
|
||||||
|
|
||||||
///////////
|
|
||||||
// Headers
|
|
||||||
#include "config.h"
|
#include "config.h"
|
||||||
|
|
||||||
#ifndef AGI_PRE
|
#ifndef AGI_PRE
|
||||||
|
@ -52,16 +49,11 @@
|
||||||
#include <libaegisub/log.h>
|
#include <libaegisub/log.h>
|
||||||
|
|
||||||
#include "charset_detect.h"
|
#include "charset_detect.h"
|
||||||
#include "text_file_reader.h"
|
|
||||||
#include "compat.h"
|
#include "compat.h"
|
||||||
|
|
||||||
|
namespace CharSetDetect {
|
||||||
|
|
||||||
|
wxString GetEncoding(wxString const& filename) {
|
||||||
/// @brief Get encoding
|
|
||||||
/// @param filename
|
|
||||||
/// @return
|
|
||||||
///
|
|
||||||
wxString CharSetDetect::GetEncoding(wxString filename) {
|
|
||||||
LOG_I("charset/file") << filename;
|
LOG_I("charset/file") << filename;
|
||||||
bool unknown = 0;
|
bool unknown = 0;
|
||||||
|
|
||||||
|
@ -70,7 +62,7 @@ wxString CharSetDetect::GetEncoding(wxString filename) {
|
||||||
|
|
||||||
try {
|
try {
|
||||||
agi::charset::DetectAll(STD_STR(filename), list);
|
agi::charset::DetectAll(STD_STR(filename), list);
|
||||||
} catch (const agi::charset::UnknownCharset&) {
|
} catch (const agi::charset::UnknownCharset&) {
|
||||||
unknown = 1;
|
unknown = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -93,3 +85,5 @@ wxString CharSetDetect::GetEncoding(wxString filename) {
|
||||||
return i_lst->second;
|
return i_lst->second;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
|
@ -34,17 +34,9 @@
|
||||||
/// @ingroup utility
|
/// @ingroup utility
|
||||||
///
|
///
|
||||||
|
|
||||||
/// DOCME
|
namespace CharSetDetect {
|
||||||
/// @class CharSetDetect
|
|
||||||
/// @brief Detect character set of a file
|
|
||||||
class CharSetDetect {
|
|
||||||
private:
|
|
||||||
/// Character set
|
|
||||||
wxString result;
|
|
||||||
|
|
||||||
public:
|
|
||||||
/// @brief Get character set name.
|
/// @brief Get character set name.
|
||||||
/// @param filename File to check
|
/// @param filename File to check
|
||||||
/// @return Character set name
|
/// @return Character set name
|
||||||
wxString GetEncoding(wxString filename);
|
wxString GetEncoding(wxString const& filename);
|
||||||
};
|
}
|
||||||
|
|
|
@ -48,9 +48,7 @@
|
||||||
#include <libaegisub/log.h>
|
#include <libaegisub/log.h>
|
||||||
|
|
||||||
#include "charset_conv.h"
|
#include "charset_conv.h"
|
||||||
#ifdef WITH_UNIVCHARDET
|
|
||||||
#include "charset_detect.h"
|
#include "charset_detect.h"
|
||||||
#endif
|
|
||||||
#include "text_file_reader.h"
|
#include "text_file_reader.h"
|
||||||
|
|
||||||
TextFileReader::TextFileReader(wxString filename, wxString enc, bool trim)
|
TextFileReader::TextFileReader(wxString filename, wxString enc, bool trim)
|
||||||
|
@ -62,7 +60,7 @@ TextFileReader::TextFileReader(wxString filename, wxString enc, bool trim)
|
||||||
#endif
|
#endif
|
||||||
if (!file.is_open()) throw L"Failed opening file for reading.";
|
if (!file.is_open()) throw L"Failed opening file for reading.";
|
||||||
|
|
||||||
if (encoding.IsEmpty()) encoding = GetEncoding(filename);
|
if (encoding.IsEmpty()) encoding = CharSetDetect::GetEncoding(filename);
|
||||||
if (encoding == L"binary") return;
|
if (encoding == L"binary") return;
|
||||||
encoding = AegisubCSConv::GetRealEncodingName(encoding);
|
encoding = AegisubCSConv::GetRealEncodingName(encoding);
|
||||||
conv = iconv_open(WCHAR_T_ENCODING, encoding.ToAscii());
|
conv = iconv_open(WCHAR_T_ENCODING, encoding.ToAscii());
|
||||||
|
@ -75,15 +73,6 @@ TextFileReader::~TextFileReader() {
|
||||||
if (conv != (iconv_t)-1) iconv_close(conv);
|
if (conv != (iconv_t)-1) iconv_close(conv);
|
||||||
}
|
}
|
||||||
|
|
||||||
wxString TextFileReader::GetEncoding(wxString const& filename) {
|
|
||||||
|
|
||||||
// Use universalchardet library to detect charset
|
|
||||||
CharSetDetect det;
|
|
||||||
wxString str(det.GetEncoding(filename));
|
|
||||||
LOG_I("file/reader/text/encoding") << str;
|
|
||||||
return str;
|
|
||||||
}
|
|
||||||
|
|
||||||
wchar_t TextFileReader::GetWChar() {
|
wchar_t TextFileReader::GetWChar() {
|
||||||
// If there's already some converted characters waiting, return the next one
|
// If there's already some converted characters waiting, return the next one
|
||||||
if (++currout < outptr) {
|
if (++currout < outptr) {
|
||||||
|
|
|
@ -89,9 +89,4 @@ public:
|
||||||
/// @brief Get the file encoding used by this reader
|
/// @brief Get the file encoding used by this reader
|
||||||
/// @return "unknown", "binary", or a character encoding name
|
/// @return "unknown", "binary", or a character encoding name
|
||||||
wxString GetCurrentEncoding();
|
wxString GetCurrentEncoding();
|
||||||
|
|
||||||
/// @brief Attempt to detect a file's encoding
|
|
||||||
/// @param filename The file to check
|
|
||||||
/// @return "unknown", "binary", or a character encoding name
|
|
||||||
static wxString GetEncoding(wxString const& filename);
|
|
||||||
};
|
};
|
||||||
|
|
Loading…
Reference in a new issue