diff --git a/aegilib/aegilib.vcproj b/aegilib/aegilib.vcproj index 6211d1fd2..35be6eb9c 100644 --- a/aegilib/aegilib.vcproj +++ b/aegilib/aegilib.vcproj @@ -175,10 +175,6 @@ RelativePath=".\include\aegilib\exception.h" > - - @@ -272,6 +268,14 @@ RelativePath=".\src\prec.h" > + + + + #include #include "manipulator.h" -#include "file.h" namespace Aegilib { // Prototypes diff --git a/aegilib/src/formats/format_ass.cpp b/aegilib/src/formats/format_ass.cpp index 7c64a7058..f1f346669 100644 --- a/aegilib/src/formats/format_ass.cpp +++ b/aegilib/src/formats/format_ass.cpp @@ -35,6 +35,8 @@ #include "model.h" #include "format_ass.h" +#include "../text_file_reader.h" +#include using namespace Aegilib; @@ -57,6 +59,13 @@ FormatHandlerASS::~FormatHandlerASS() // Load a file void FormatHandlerASS::Load(wxInputStream &file,const String encoding) { - (void) file; - (void) encoding; + // Make text file reader + TextFileReader reader(file,encoding); + + using namespace std; + cout << endl << "Dumping file:" << endl; + while (reader.HasMoreLines()) { + wxString cur = reader.ReadLineFromFile(); + cout << cur.mb_str(wxConvUTF8) << endl; + } } diff --git a/aegilib/src/text_file_reader.cpp b/aegilib/src/text_file_reader.cpp new file mode 100644 index 000000000..d221c044c --- /dev/null +++ b/aegilib/src/text_file_reader.cpp @@ -0,0 +1,206 @@ +// Copyright (c) 2005, Rodrigo Braz Monteiro +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of the Aegisub Group nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +// ----------------------------------------------------------------------------- +// +// AEGISUB +// +// Website: http://aegisub.cellosoft.com +// Contact: mailto:zeratul@cellosoft.com +// + + +/////////// +// Headers +#include +#include +#include +#include "text_file_reader.h" +using namespace Aegilib; + +#ifdef WITH_UNIVCHARDET +#include "charset_detect.h" +#endif + + +/////////////// +// Constructor +TextFileReader::TextFileReader(wxInputStream &stream,Aegilib::String enc,bool _trim) +: file(stream) +{ + // Setup + customConv = false; + trim = _trim; + + // Set encoding + encoding = enc.c_str(); + if (encoding == _T("binary")) return; + SetEncodingConfiguration(); +} + + +////////////// +// Destructor +TextFileReader::~TextFileReader() { + // Clean up conversion + if (customConv) delete conv; +} + + +////////////////////////////// +// Set encoding configuration +void TextFileReader::SetEncodingConfiguration() { + // Set encoding configuration + swap = false; + Is16 = false; + customConv = false; + conv = NULL; + if (encoding == _T("UTF-8")) { + conv = new wxMBConvUTF8; + customConv = true; + } + else if (encoding == _T("UTF-16LE")) { + Is16 = true; + } + else if (encoding == _T("UTF-16BE")) { + Is16 = true; + swap = true; + } + else if (encoding == _T("UTF-7")) { + conv = new wxCSConv(encoding); + customConv = true; + } + else if (encoding == _T("Local")) { + conv = wxConvCurrent; + } + else { + conv = new wxCSConv(encoding); + customConv = true; + } +} + + +////////////////////////// +// Reads a line from file +Aegilib::String TextFileReader::ReadLineFromFile() { + wxString wxbuffer; + size_t bufAlloc = 1024; + wxbuffer.Alloc(bufAlloc); + std::string buffer = ""; + + // Read UTF-16 line from file + if (Is16) { + char charbuffer[3]; + charbuffer[2] = 0; + wchar_t ch = 0; + size_t len = 0; + while (ch != L'\n' && !file.Eof()) { + // Read two chars from file + charbuffer[0] = 0; + charbuffer[1] = 0; + file.Read(charbuffer,2); + + // Swap bytes for big endian + if (swap) { + register char aux = charbuffer[0]; + charbuffer[0] = charbuffer[1]; + charbuffer[1] = aux; + } + + // Convert two chars into a widechar and append to string + ch = *((wchar_t*)charbuffer); + if (len >= bufAlloc - 1) { + bufAlloc *= 2; + wxbuffer.Alloc(bufAlloc); + } + wxbuffer += ch; + len++; + } + } + + // Read ASCII/UTF-8 line from file + else { + //getline(file,buffer); + //wxbuffer.Clear(); + //if (buffer.length()) wxbuffer = wxString(buffer.c_str(),*conv); + char temp = 0; + std::string buffer; + while (temp != '\n' && !file.Eof()) { + file.Read(&temp,1); + if (temp != '\r') { + buffer += temp; + } + } + if (buffer.size()) wxbuffer = wxString(buffer.c_str(),*conv); + } + + // Remove line breaks + //wxbuffer.Replace(_T("\r"),_T("\0")); + //wxbuffer.Replace(_T("\n"),_T("\0")); + size_t len=wxbuffer.Length(); + for (size_t i=0;i 0 && wxbuffer[0] == 0xFEFF) { + wxbuffer = wxbuffer.Mid(1); + } + + // Trim + if (trim) { + wxbuffer.Trim(true); + wxbuffer.Trim(false); + } + return Aegilib::String(wxbuffer.c_str()); +} + + +////////////////////////////////// +// Checks if there's more to read +bool TextFileReader::HasMoreLines() { + return (!file.Eof()); +} + + +//////////////////////////////// +// Ensure that charset is valid +void TextFileReader::EnsureValid(Aegilib::String enc) { + if (enc == _T("unknown") || enc == _T("UTF-32BE") || enc == _T("UTF-32LE")) { + wxString error = _T("Character set "); + error += enc; + error += _T(" is not supported."); + throw error.c_str(); + } +} + + +/////////////////////////// +// Get encoding being used +Aegilib::String TextFileReader::GetCurrentEncoding() { + return encoding.c_str(); +} diff --git a/aegilib/src/text_file_reader.h b/aegilib/src/text_file_reader.h new file mode 100644 index 000000000..9e9a1d7f9 --- /dev/null +++ b/aegilib/src/text_file_reader.h @@ -0,0 +1,70 @@ +// Copyright (c) 2005, Rodrigo Braz Monteiro +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of the Aegisub Group nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +// ----------------------------------------------------------------------------- +// +// AEGISUB +// +// Website: http://aegisub.cellosoft.com +// Contact: mailto:zeratul@cellosoft.com +// + + +#pragma once + + +// Headers +#include "aegilib.h" +#include + + +namespace Aegilib { + class TextFileReader { + private: + wxString encoding; + wxInputStream &file; + wxMBConv *conv; + bool Is16; + bool swap; + bool customConv; + bool trim; + + void SetEncodingConfiguration(); + + public: + TextFileReader(wxInputStream &stream,String encoding=_T(""),bool trim=true); + ~TextFileReader(); + + String ReadLineFromFile(); + bool HasMoreLines(); + + static void EnsureValid(const String encoding); + String GetCurrentEncoding(); + static String GetEncoding(const String filename); + }; +} + diff --git a/aegilib/src/text_file_writer.cpp b/aegilib/src/text_file_writer.cpp new file mode 100644 index 000000000..a838abbe7 --- /dev/null +++ b/aegilib/src/text_file_writer.cpp @@ -0,0 +1,152 @@ +// Copyright (c) 2005, Rodrigo Braz Monteiro +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of the Aegisub Group nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +// ----------------------------------------------------------------------------- +// +// AEGISUB +// +// Website: http://aegisub.cellosoft.com +// Contact: mailto:zeratul@cellosoft.com +// + + +/////////// +// Headers +#include +#include "text_file_writer.h" + + +/////////////// +// Constructor +TextFileWriter::TextFileWriter(Aegilib::String _filename,Aegilib::String enc) { + // Setup + open = false; + customConv = false; + IsFirst = true; + filename = _filename; + + // Set encoding + encoding = enc; + if (encoding == _T("Local")) conv = &wxConvLocal; + else { + if (encoding.IsEmpty()) encoding = _T("UTF-8"); + if (encoding == _T("US-ASCII")) encoding = _T("ISO-8859-1"); + conv = new wxCSConv(encoding); + customConv = true; + IsUnicode = encoding.Left(3) == _T("UTF"); + } + + // Open file + Open(); +} + + +////////////// +// Destructor +TextFileWriter::~TextFileWriter() { + Close(); +} + + +///////////// +// Open file +void TextFileWriter::Open() { + // Open file + if (open) return; +#ifdef WIN32 + file.open(filename.wc_str(),std::ios::out | std::ios::binary | std::ios::trunc); +#else + file.open(wxFNCONV(filename),std::ios::out | std::ios::binary | std::ios::trunc); +#endif + if (!file.is_open()) { + throw _T("Failed opening file for writing."); + } + open = true; + + // Set encoding + SetEncoding(); +} + + +////////////// +// Close file +void TextFileWriter::Close() { + if (!open) return; + file.close(); + open = false; + if (customConv) delete conv; +} + + +///////////////// +// Write to file +void TextFileWriter::WriteLineToFile(Aegilib::String line,bool addLineBreak) { + // Make sure it's loaded + if (!open) Open(); + + // Add line break + wxString temp = line; + if (addLineBreak) temp += _T("\r\n"); + + // Add BOM if it's the first line and the target format is Unicode + if (IsFirst && IsUnicode) { + wchar_t bom = 0xFEFF; + temp = wxString(bom) + temp; + } + IsFirst = false; + + // 16-bit + if (Is16) { + wxWCharBuffer buf = temp.wc_str(*conv); + if (!buf.data()) + return; + size_t len = wcslen(buf.data()); + file.write((const char*)buf.data(),(std::streamsize)len*sizeof(wchar_t)); + } + + // 8-bit + else { + wxCharBuffer buf = temp.mb_str(*conv); + if (!buf.data()) + return; + size_t len = strlen(buf.data()); + file.write(buf.data(),(std::streamsize)len); + } +} + + +//////////////// +// Set encoding +void TextFileWriter::SetEncoding() { + // Prepare + Is16 = false; + + // UTF-16 + if (encoding.Left(6) == _T("UTF-16")) { + Is16 = true; + } +} diff --git a/aegilib/src/text_file_writer.h b/aegilib/src/text_file_writer.h new file mode 100644 index 000000000..c1d156cac --- /dev/null +++ b/aegilib/src/text_file_writer.h @@ -0,0 +1,75 @@ +// Copyright (c) 2005, Rodrigo Braz Monteiro +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of the Aegisub Group nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +// ----------------------------------------------------------------------------- +// +// AEGISUB +// +// Website: http://aegisub.cellosoft.com +// Contact: mailto:zeratul@cellosoft.com +// + + +#ifndef TEXT_FILE_WRITER_H +#define TEXT_FILE_WRITER_H + + +/////////// +// Headers +#include +#include +#include + + +///////// +// Class +class TextFileWriter { +private: + wxString filename; + wxString encoding; + std::ofstream file; + + wxMBConv *conv; + bool customConv; + bool open; + bool Is16; + bool IsFirst; + bool IsUnicode; + + void Open(); + void Close(); + void SetEncoding(); + +public: + TextFileWriter(Aegilib::String filename,Aegilib::String encoding=_T("")); + ~TextFileWriter(); + + void WriteLineToFile(Aegilib::String line,bool addLineBreak=true); +}; + + +#endif diff --git a/aegilib/test/test.vcproj b/aegilib/test/test.vcproj index 4b70d41ca..76dadaee7 100644 --- a/aegilib/test/test.vcproj +++ b/aegilib/test/test.vcproj @@ -182,28 +182,12 @@ RelativePath=".\src\main.cpp" > - - - - - - - -