Patched universalchardet to allow Aegisub to pick all possible encodings that it found and present them to the user.

Originally committed to SVN as r1752.
This commit is contained in:
Rodrigo Braz Monteiro 2008-01-17 18:35:06 +00:00
parent 44da8de898
commit 71502c1eeb
5 changed files with 60 additions and 2 deletions

View file

@ -38,6 +38,8 @@
// Headers
#include "charset_detect.h"
#include "text_file_reader.h"
#include "../universalchardet/nsCharSetProber.h"
#include <wx/choicdlg.h>
////////////////
@ -60,9 +62,52 @@ wxString CharSetDetect::GetEncoding(wxString filename) {
return result;
}
struct CharDetResult {
float confidence;
wxString name;
bool operator < (CharDetResult &par) { return confidence > par.confidence; }
};
//////////
// Report
void CharSetDetect::Report(const char* aCharset) {
// Store the result reported
result = wxString(aCharset,wxConvUTF8);
// Grab every result obtained
std::list<CharDetResult> results;
for (int i=0;i<NUM_OF_CHARSET_PROBERS;i++) {
int probes = mCharSetProbers[i]->GetProbeCount();
for (int j=0;j<probes;j++) {
float conf = mCharSetProbers[i]->GetConfidence(j);
// Only bother with those whose confidence is at least 5%
if (conf > 0.05f) {
results.push_back(CharDetResult());
results.back().name = wxString(mCharSetProbers[i]->GetCharSetName(j),wxConvUTF8);
results.back().confidence = mCharSetProbers[i]->GetConfidence(j);
}
}
}
// If you got more than one valid result, ask the user which he wants
if (results.size() > 1) {
results.sort();
// Get choice from user
int n = results.size();
wxArrayString choices;
for (std::list<CharDetResult>::iterator cur=results.begin();cur!=results.end();cur++) {
choices.Add(wxString::Format(_T("%f%% - "),(*cur).confidence*100.0f) + (*cur).name);
}
int choice = wxGetSingleChoiceIndex(_("Aegisub could not narrow down the character set to a single one.\nPlease pick one below:"),_("Choose character set"),choices);
if (choice == -1) throw _T("Canceled");
// Retrieve name
int i = 0;
for (std::list<CharDetResult>::iterator cur=results.begin();cur!=results.end();cur++,i++) {
if (i == choice) result = (*cur).name;
}
}
}

View file

@ -616,8 +616,9 @@ void FrameMain::LoadSubtitles (wxString filename,wxString charset) {
if (!fileCheck.FileExists()) throw _T("Selected file does not exist.");
// Make sure that file isn't actually a timecode file
TextFileReader testSubs(filename);
isBinary = testSubs.GetCurrentEncoding() == _T("binary");
TextFileReader testSubs(filename,charset);
charset = testSubs.GetCurrentEncoding();
isBinary = charset == _T("binary");
if (!isBinary && testSubs.HasMoreLines()) {
wxString cur = testSubs.ReadLineFromFile();
if (cur.Left(10) == _T("# timecode")) {

View file

@ -60,6 +60,10 @@ public:
virtual float GetConfidence(void) = 0;
virtual void SetOpion() = 0;
virtual const char* GetCharSetName(int i) { return GetCharSetName(); }
virtual float GetConfidence(int i) { return GetConfidence(); }
virtual int GetProbeCount(void) { return 1; }
#ifdef DEBUG_chardet
virtual void DumpStatus() {};
#endif

View file

@ -60,6 +60,10 @@ public:
float GetConfidence(void);
void SetOpion() {};
const char* GetCharSetName(int i) { return mProbers[i]->GetCharSetName(); }
float GetConfidence(int i) { return mProbers[i]->GetConfidence(); }
int GetProbeCount(void) { return NUM_OF_PROBERS; }
#ifdef DEBUG_chardet
void DumpStatus();
#endif

View file

@ -54,6 +54,10 @@ public:
float GetConfidence(void);
void SetOpion() {};
const char* GetCharSetName(int i) { return mProbers[i]->GetCharSetName(); }
float GetConfidence(int i) { return mProbers[i]->GetConfidence(); }
int GetProbeCount(void) { return NUM_OF_SBCS_PROBERS; }
#ifdef DEBUG_chardet
void DumpStatus();
#endif