Fixed UTF-16 support on gorgonsub, and, incidentally, it's ~20% faster than UTF-8.

Originally committed to SVN as r2063.
This commit is contained in:
Rodrigo Braz Monteiro 2008-03-15 08:36:52 +00:00
parent 16bcf0c942
commit ca63097e90
8 changed files with 124 additions and 110 deletions

View file

@ -56,13 +56,21 @@ namespace Gorgonsub {
}; };
Exception(ExceptionList code); Exception(ExceptionList code);
Exception(ExceptionList code,const char* file,const long line);
String GetMessage() const { return GetMessage(code); } String GetMessage() const { return wxString(what(),wxConvLocal); }
int GetCode(); int GetCode();
private: private:
static String GetMessage(int code); static String GetMessage(int code);
static String GetMessageFile(int code,const char *file,long line);
ExceptionList code; ExceptionList code;
}; };
}; };
#ifdef _MSC_VER
#define THROW_GORGON_EXCEPTION(code) throw Gorgonsub::Exception(code,__FILE__,__LINE__)
#else
#define THROW_GORGON_EXCEPTION(code) throw Gorgonsub::Exception(code)
#endif

View file

@ -57,7 +57,7 @@ namespace Gorgonsub {
// Shifts all the buffer left, destroying steps entries // Shifts all the buffer left, destroying steps entries
void ShiftLeft(size_t steps) { void ShiftLeft(size_t steps) {
steps = Min(_size,steps); steps = Min(_size,steps);
memcpy(&buffer[0],&buffer[steps],_size-steps); memcpy(&buffer[0],&buffer[steps],(_size-steps)*sizeof(T));
_size -= steps; _size -= steps;
} }

View file

@ -37,14 +37,20 @@
using namespace Gorgonsub; using namespace Gorgonsub;
/////////////// ////////////////
// Constructor // Constructors
Exception::Exception(ExceptionList _code) Exception::Exception(ExceptionList _code)
: std::exception(GetMessage(_code).mb_str(wxConvLocal)) : std::exception(GetMessage(_code).mb_str(wxConvLocal))
{ {
code = _code; code = _code;
} }
Exception::Exception(ExceptionList _code,const char *file,const long line)
: std::exception(GetMessageFile(_code,file,line).mb_str(wxConvLocal))
{
code = _code;
}
////////////////////// //////////////////////
// Get message string // Get message string
@ -65,6 +71,14 @@ String Exception::GetMessage(int code)
} }
///////////////////////////////////
// Insert file and line on message
String Exception::GetMessageFile(int code,const char *file,long line)
{
return GetMessage(code) + _T(" (") + wxString(file,wxConvLocal) + wxString::Format(_T(":%i)."),line);
}
//////////// ////////////
// Get code // Get code
int Exception::GetCode() int Exception::GetCode()

View file

@ -56,7 +56,9 @@ DialogueASS::DialogueASS(const String &data,int version)
version++; version++;
if (version > 2) version = 0; if (version > 2) version = 0;
} }
if (!valid) throw Exception(Exception::Parse_Error); if (!valid) {
THROW_GORGON_EXCEPTION(Exception::Parse_Error);
}
} }

View file

@ -55,7 +55,7 @@ StyleASS::StyleASS(String data,int version)
version++; version++;
if (version > 2) version = 0; if (version > 2) version = 0;
} }
if (!valid) throw Exception(Exception::Parse_Error); if (!valid) THROW_GORGON_EXCEPTION(Exception::Parse_Error);
} }

View file

@ -56,7 +56,6 @@ TextFileReader::TextFileReader(wxInputStream &stream,Gorgonsub::String enc,bool
trim = _trim; trim = _trim;
threaded = prefetch && false; threaded = prefetch && false;
thread = NULL; thread = NULL;
_buffer.Alloc(4096);
// Set encoding // Set encoding
encoding = enc.c_str(); encoding = enc.c_str();
@ -101,6 +100,78 @@ void TextFileReader::SetEncodingConfiguration()
else { else {
conv = shared_ptr<wxMBConv> (new wxCSConv(encoding)); conv = shared_ptr<wxMBConv> (new wxCSConv(encoding));
} }
// Allocate buffer
if (!Is16) buffer1.Alloc(4096);
else buffer2.Alloc(4096);
}
////////////////////
// Helper functions
wxString GetString(char *read,shared_ptr<wxMBConv> conv) { return wxString(read,*conv); }
wxString GetString(wchar_t *read,shared_ptr<wxMBConv> conv) { (void)conv; return wxString(read); }
inline void Swap(wchar_t &a) {
char *c = (char*) &a;
char aux = c[0];
c[0] = c[1];
c[1] = aux;
}
inline void Swap(char &a) { (void) a; }
////////////////
// Parse a line
template <typename T>
void ParseLine(FastBuffer<T> &_buffer,wxInputStream &file,wxString &stringBuffer,shared_ptr<wxMBConv> conv,bool swap)
{
// Look for a new line
int newLinePos = -1;
T newLineChar = 0;
size_t size = _buffer.GetSize();
// Find first line break
if (size) _buffer.FindLineBreak(0,size,newLinePos,newLineChar);
// If no line breaks were found, load more data into file
while (newLinePos == -1) {
// Read 2048 bytes
const size_t readBytes = 2048;
const size_t read = readBytes/sizeof(T);
size_t oldSize = _buffer.GetSize();
T *ptr = _buffer.GetWritePtr(read);
file.Read(ptr,readBytes);
size_t lastRead = file.LastRead()/sizeof(T);
_buffer.AssumeSize(_buffer.GetSize()+lastRead-read);
// Swap
if (swap) {
T* ptr2 = ptr;
for (size_t i=0;i<lastRead;i++) {
Swap(*ptr2++);
}
}
// Find line break
_buffer.FindLineBreak(oldSize,lastRead+oldSize,newLinePos,newLineChar);
// End of file, force a line break
if (file.Eof() && newLinePos == -1) newLinePos = (int) _buffer.GetSize();
}
// Found newline
if (newLinePos != -1) {
T *read = _buffer.GetMutableReadPtr();
// Replace newline with null character and convert to proper charset
if (newLinePos) {
read[newLinePos] = 0;
stringBuffer = GetString(read,conv);
}
// Remove an extra character if the new is the complement of \n,\r (13^7=10, 10^7=13)
if (read[newLinePos+1] == (newLineChar ^ 7)) newLinePos++;
_buffer.ShiftLeft(newLinePos+1);
}
} }
@ -114,82 +185,10 @@ Gorgonsub::String TextFileReader::ActuallyReadLine()
std::string buffer = ""; std::string buffer = "";
// Read UTF-16 line from file // Read UTF-16 line from file
if (Is16) { if (Is16) ParseLine<wchar_t>(buffer2,file,stringBuffer,conv,swap);
char charbuffer[3];
charbuffer[2] = 0;
wchar_t ch = 0;
size_t len = 0;
while (ch != L'\n' && !file.Eof()) {
// Read two chars from file
charbuffer[0] = 0;
charbuffer[1] = 0;
file.Read(charbuffer,2);
// Swap bytes for big endian
if (swap) {
register char aux = charbuffer[0];
charbuffer[0] = charbuffer[1];
charbuffer[1] = aux;
}
// Convert two chars into a widechar and append to string
ch = *((wchar_t*)charbuffer);
if (len >= bufAlloc - 1) {
bufAlloc *= 2;
stringBuffer.Alloc(bufAlloc);
}
stringBuffer += ch;
len++;
}
// Remove line breaks
len = stringBuffer.Length();
for (size_t i=0;i<len;i++) {
if (stringBuffer[i] == _T('\r') || stringBuffer[i] == _T('\n')) stringBuffer[i] = _T(' ');
}
}
// Read ASCII/UTF-8 line from file // Read ASCII/UTF-8 line from file
else { else ParseLine<char>(buffer1,file,stringBuffer,conv,false);
// Look for a new line
int newLinePos = -1;
char newLineChar = 0;
size_t size = _buffer.GetSize();
// Find first line break
if (size) _buffer.FindLineBreak(0,size,newLinePos,newLineChar);
// If no line breaks were found, load more data into file
while (newLinePos == -1) {
// Read 2048 bytes
const size_t read = 2048;
size_t oldSize = _buffer.GetSize();
char *ptr = _buffer.GetWritePtr(read);
file.Read(ptr,read);
size_t lastRead = file.LastRead();
_buffer.AssumeSize(_buffer.GetSize()+lastRead-read);
// Find line break
_buffer.FindLineBreak(oldSize,lastRead+oldSize,newLinePos,newLineChar);
// End of file, force a line break
if (file.Eof() && newLinePos == -1) newLinePos = (int) _buffer.GetSize();
}
// Found newline
if (newLinePos != -1) {
// Replace newline with null character and convert to proper charset
char *read = _buffer.GetMutableReadPtr();
if (newLinePos) {
read[newLinePos] = 0;
stringBuffer = wxString(read,*conv);
}
// Remove an extra character if the new is the complement of \n,\r (13^7=10, 10^7=13)
if (read[newLinePos+1] == (newLineChar ^ 7)) newLinePos++;
_buffer.ShiftLeft(newLinePos+1);
}
}
// Remove BOM // Remove BOM
size_t startPos = 0; size_t startPos = 0;
@ -208,7 +207,7 @@ bool TextFileReader::HasMoreLines()
{ {
if (cache.size()) return true; if (cache.size()) return true;
wxCriticalSectionLocker locker(mutex); wxCriticalSectionLocker locker(mutex);
return (!file.Eof() || _buffer.GetSize()); return (!file.Eof() || buffer1.GetSize() || buffer2.GetSize());
} }

View file

@ -52,7 +52,8 @@ namespace Gorgonsub {
wxCriticalSection mutex; wxCriticalSection mutex;
std::list<String> cache; std::list<String> cache;
FastBuffer<char> _buffer; FastBuffer<char> buffer1;
FastBuffer<wchar_t> buffer2;
wxString encoding; wxString encoding;
wxInputStream &file; wxInputStream &file;

View file

@ -60,10 +60,17 @@ int main() {
// Load subtitles // Load subtitles
cout << "Loading file... "; cout << "Loading file... ";
timer.Start(); timer.Start();
control.LoadFile(L"subs_in.ass",L"UTF-8"); control.LoadFile(L"subs_in.ass",L"UTF-16LE");
timer.Pause();
cout << "Done in " << timer.Time() << " ms.\n";
//system("pause");
// Save subtitles
cout << "Saving file... ";
timer.Start();
control.SaveFile(L"subs_out.ass",L"UTF-8");
timer.Pause(); timer.Pause();
cout << "Done in " << timer.Time() << " ms.\n"; cout << "Done in " << timer.Time() << " ms.\n";
system("pause");
// Create line to be inserted // Create line to be inserted
cout << "Creating data... "; cout << "Creating data... ";
@ -81,30 +88,13 @@ int main() {
timer.Pause(); timer.Pause();
cout << "Done in " << timer.Time() << " ms.\n"; cout << "Done in " << timer.Time() << " ms.\n";
// Save subtitles
cout << "Saving file... ";
//control.SaveFile(L"subs_out_mid1.ass",L"UTF-8");
cout << "Done.\n";
// Undo // Undo
cout << "Undoing... (can undo=" << (control.CanUndo()?"true":"false") << ") "; cout << "Undoing and redoing 1000 times... ";
control.Undo();
cout << "Done.\n";
// Save subtitles
cout << "Saving file... ";
control.SaveFile(L"subs_out_mid2.ass",L"UTF-8");
cout << "Done.\n";
// Redo
cout << "Undoing... (can redo=" << (control.CanRedo()?"true":"false") << ") ";
control.Redo();
cout << "Done.\n";
// Save subtitles
cout << "Saving file... ";
timer.Start(); timer.Start();
//control.SaveFile(L"subs_out.ass",L"UTF-8"); for (size_t i=0;i<1000;i++) {
control.Undo();
control.Redo();
}
timer.Pause(); timer.Pause();
cout << "Done in " << timer.Time() << " ms.\n"; cout << "Done in " << timer.Time() << " ms.\n";
} }