2007-04-08 08:01:41 +02:00
// Copyright (c) 2007, Rodrigo Braz Monteiro
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * Neither the name of the Aegisub Group nor the names of its contributors
// may be used to endorse or promote products derived from this software
// without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.
//
// -----------------------------------------------------------------------------
//
// AEGISUB
//
// Website: http://aegisub.cellosoft.com
// Contact: mailto:zeratul@cellosoft.com
//
///////////
// Headers
# include "charset_detect.h"
# include "text_file_reader.h"
2008-01-17 19:35:06 +01:00
# include "../universalchardet/nsCharSetProber.h"
# include <wx/choicdlg.h>
2007-04-08 08:01:41 +02:00
2008-01-20 08:24:04 +01:00
struct CharDetResult {
float confidence ;
wxString name ;
bool operator < ( CharDetResult & par ) { return confidence > par . confidence ; }
} ;
2007-04-08 08:01:41 +02:00
////////////////
// Get encoding
wxString CharSetDetect : : GetEncoding ( wxString filename ) {
// Open file
TextFileReader reader ( filename , _T ( " Local " ) ) ;
// Loop through it until it finds interesting lines
while ( reader . HasMoreLines ( ) & & ! done ( ) ) {
wxString line = reader . ReadLineFromFile ( ) ;
wxCharBuffer buffer = line . mb_str ( wxConvLocal ) ;
HandleData ( buffer , line . Length ( ) ) ;
}
// Flag as finished
DataEnd ( ) ;
2008-01-17 19:35:06 +01:00
// Grab every result obtained
std : : list < CharDetResult > results ;
for ( int i = 0 ; i < NUM_OF_CHARSET_PROBERS ; i + + ) {
int probes = mCharSetProbers [ i ] - > GetProbeCount ( ) ;
for ( int j = 0 ; j < probes ; j + + ) {
float conf = mCharSetProbers [ i ] - > GetConfidence ( j ) ;
2008-01-20 08:24:04 +01:00
// Only bother with those whose confidence is at least 1%
if ( conf > 0.01f ) {
2008-01-17 19:35:06 +01:00
results . push_back ( CharDetResult ( ) ) ;
results . back ( ) . name = wxString ( mCharSetProbers [ i ] - > GetCharSetName ( j ) , wxConvUTF8 ) ;
results . back ( ) . confidence = mCharSetProbers [ i ] - > GetConfidence ( j ) ;
}
}
}
// If you got more than one valid result, ask the user which he wants
if ( results . size ( ) > 1 ) {
results . sort ( ) ;
// Get choice from user
int n = results . size ( ) ;
wxArrayString choices ;
2008-01-20 08:24:04 +01:00
wxArrayString picked ;
int i = 0 ;
2008-01-17 19:35:06 +01:00
for ( std : : list < CharDetResult > : : iterator cur = results . begin ( ) ; cur ! = results . end ( ) ; cur + + ) {
2008-01-20 08:24:04 +01:00
wxString name = ( * cur ) . name ;
if ( picked . Index ( name ) = = wxNOT_FOUND ) {
picked . Add ( name ) ;
choices . Add ( wxString : : Format ( _T ( " %f%% - " ) , ( * cur ) . confidence * 100.0f ) + name ) ;
i + + ;
if ( i = = 20 ) break ;
}
2008-01-17 19:35:06 +01:00
}
int choice = wxGetSingleChoiceIndex ( _ ( " Aegisub could not narrow down the character set to a single one. \n Please pick one below: " ) , _ ( " Choose character set " ) , choices ) ;
if ( choice = = - 1 ) throw _T ( " Canceled " ) ;
// Retrieve name
2008-01-20 08:24:04 +01:00
i = 0 ;
2008-01-17 19:35:06 +01:00
for ( std : : list < CharDetResult > : : iterator cur = results . begin ( ) ; cur ! = results . end ( ) ; cur + + , i + + ) {
if ( i = = choice ) result = ( * cur ) . name ;
}
}
2008-01-20 08:24:04 +01:00
// Return whatever it got
return result ;
}
//////////
// Report
void CharSetDetect : : Report ( const char * aCharset ) {
// Store the result reported
result = wxString ( aCharset , wxConvUTF8 ) ;
2007-04-08 08:01:41 +02:00
}