// Copyright (c) 2005, Rodrigo Braz Monteiro // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are met: // // * Redistributions of source code must retain the above copyright notice, // this list of conditions and the following disclaimer. // * Redistributions in binary form must reproduce the above copyright notice, // this list of conditions and the following disclaimer in the documentation // and/or other materials provided with the distribution. // * Neither the name of the Aegisub Group nor the names of its contributors // may be used to endorse or promote products derived from this software // without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE // POSSIBILITY OF SUCH DAMAGE. // // ----------------------------------------------------------------------------- // // AEGISUB // // Website: http://aegisub.cellosoft.com // Contact: mailto:zeratul@cellosoft.com // /////////// // Headers #include #include #include #include "text_file_reader.h" using namespace Athenasub; #ifdef WITH_UNIVCHARDET #include "charset_detect.h" #endif /////////////// // Constructor TextFileReader::TextFileReader(wxInputStream &stream,String enc,bool _trim) : file(stream) { // Setup trim = _trim; // Set encoding encoding = enc.GetWxString(); if (encoding == _T("binary")) return; SetEncodingConfiguration(); } ////////////// // Destructor TextFileReader::~TextFileReader() { } ////////////////////////////// // Set encoding configuration void TextFileReader::SetEncodingConfiguration() { // Set encoding configuration swap = false; Is16 = false; isUtf8 = false; //conv = shared_ptr(); if (encoding == _T("UTF-8")) { conv = shared_ptr (new wxMBConvUTF8); isUtf8 = true; } else if (encoding == _T("UTF-16LE")) { Is16 = true; } else if (encoding == _T("UTF-16BE")) { Is16 = true; swap = true; } else if (encoding == _T("UTF-7")) { conv = shared_ptr(new wxCSConv(encoding)); } else if (encoding == _T("Local")) { conv = shared_ptr (wxConvCurrent,NullDeleter()); } else { conv = shared_ptr (new wxCSConv(encoding)); } // Allocate buffer if (!Is16) buffer1.Alloc(4096); else buffer2.Alloc(4096); } //////////////////// // Helper functions String GetString(char *read,shared_ptr conv,bool isUtf8) { if (isUtf8) { return String(read); } else { return String(wxString(read,*conv)); } } String GetString(wchar_t *read,shared_ptr conv,bool isUtf8) { (void)conv; (void)isUtf8; return String(read); } inline void Swap(wchar_t &a) { char *c = (char*) &a; char aux = c[0]; c[0] = c[1]; c[1] = aux; } inline void Swap(char &a) { (void) a; } //////////////// // Parse a line template void ParseLine(FastBuffer &_buffer,wxInputStream &file,String &stringBuffer,shared_ptr conv,bool swap,bool isUtf8) { // Look for a new line int newLinePos = -1; T newLineChar = 0; size_t size = _buffer.GetSize(); // Find first line break if (size) _buffer.FindLineBreak(0,size,newLinePos,newLineChar); // If no line breaks were found, load more data into file while (newLinePos == -1) { // Read 2048 bytes const size_t readBytes = 1024; const size_t read = readBytes/sizeof(T); size_t oldSize = _buffer.GetSize(); T *ptr = _buffer.GetWritePtr(read); file.Read(ptr,readBytes); size_t lastRead = file.LastRead()/sizeof(T); _buffer.AssumeSize(_buffer.GetSize()+lastRead-read); // Swap if (swap) { T* ptr2 = ptr; for (size_t i=0;i(buffer2,file,stringBuffer,conv,swap,false); // Read ASCII/UTF-8 line from file else ParseLine(buffer1,file,stringBuffer,conv,false,isUtf8); // Remove BOM (UTF-8 EF BB BF) size_t startPos = 0; if (stringBuffer.Length() >= 3) { int b1 = (unsigned char) stringBuffer[0]; int b2 = (unsigned char) stringBuffer[1]; int b3 = (unsigned char) stringBuffer[2]; if (b1 == 0xEF && b2 == 0xBB && b3 == 0xBF) startPos = 3; } // Trim String str = String(stringBuffer); if (trim) return String(String::StringTrim(str,startPos)); if (startPos) return String(str.c_str() + startPos); return str; } ////////////////////////////////// // Checks if there's more to read bool TextFileReader::HasMoreLines() { return (!file.Eof() || buffer1.GetSize() || buffer2.GetSize()); } //////////////////////////////// // Ensure that charset is valid void TextFileReader::EnsureValid(Athenasub::String enc) { if (enc == "unknown" || enc == "UTF-32BE" || enc == "UTF-32LE") { String error = "Character set "; error += enc; error += " is not supported."; throw error.c_str(); } } /////////////////////////// // Get encoding being used String TextFileReader::GetCurrentEncoding() { return String(encoding.c_str()); } /////////////////// // Rewind the file void TextFileReader::Rewind() { THROW_ATHENA_EXCEPTION(Exception::TODO); }