Subs lib can now actually parse text files (kinda).

Originally committed to SVN as r1971.
This commit is contained in:
Rodrigo Braz Monteiro 2008-03-08 07:52:20 +00:00
parent fc14717b71
commit 96616bf79c
8 changed files with 522 additions and 23 deletions

View file

@ -175,10 +175,6 @@
RelativePath=".\include\aegilib\exception.h"
>
</File>
<File
RelativePath=".\include\aegilib\file.h"
>
</File>
<File
RelativePath=".\include\aegilib\format.h"
>
@ -272,6 +268,14 @@
RelativePath=".\src\prec.h"
>
</File>
<File
RelativePath=".\src\text_file_reader.cpp"
>
</File>
<File
RelativePath=".\src\text_file_reader.h"
>
</File>
</Filter>
<Filter
Name="Formats"

View file

@ -37,7 +37,6 @@
#include <list>
#include <wx/stream.h>
#include "manipulator.h"
#include "file.h"
namespace Aegilib {
// Prototypes

View file

@ -35,6 +35,8 @@
#include "model.h"
#include "format_ass.h"
#include "../text_file_reader.h"
#include <iostream>
using namespace Aegilib;
@ -57,6 +59,13 @@ FormatHandlerASS::~FormatHandlerASS()
// Load a file
void FormatHandlerASS::Load(wxInputStream &file,const String encoding)
{
(void) file;
(void) encoding;
// Make text file reader
TextFileReader reader(file,encoding);
using namespace std;
cout << endl << "Dumping file:" << endl;
while (reader.HasMoreLines()) {
wxString cur = reader.ReadLineFromFile();
cout << cur.mb_str(wxConvUTF8) << endl;
}
}

View file

@ -0,0 +1,206 @@
// Copyright (c) 2005, Rodrigo Braz Monteiro
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * Neither the name of the Aegisub Group nor the names of its contributors
// may be used to endorse or promote products derived from this software
// without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.
//
// -----------------------------------------------------------------------------
//
// AEGISUB
//
// Website: http://aegisub.cellosoft.com
// Contact: mailto:zeratul@cellosoft.com
//
///////////
// Headers
#include <algorithm>
#include <string>
#include <wx/wfstream.h>
#include "text_file_reader.h"
using namespace Aegilib;
#ifdef WITH_UNIVCHARDET
#include "charset_detect.h"
#endif
///////////////
// Constructor
TextFileReader::TextFileReader(wxInputStream &stream,Aegilib::String enc,bool _trim)
: file(stream)
{
// Setup
customConv = false;
trim = _trim;
// Set encoding
encoding = enc.c_str();
if (encoding == _T("binary")) return;
SetEncodingConfiguration();
}
//////////////
// Destructor
TextFileReader::~TextFileReader() {
// Clean up conversion
if (customConv) delete conv;
}
//////////////////////////////
// Set encoding configuration
void TextFileReader::SetEncodingConfiguration() {
// Set encoding configuration
swap = false;
Is16 = false;
customConv = false;
conv = NULL;
if (encoding == _T("UTF-8")) {
conv = new wxMBConvUTF8;
customConv = true;
}
else if (encoding == _T("UTF-16LE")) {
Is16 = true;
}
else if (encoding == _T("UTF-16BE")) {
Is16 = true;
swap = true;
}
else if (encoding == _T("UTF-7")) {
conv = new wxCSConv(encoding);
customConv = true;
}
else if (encoding == _T("Local")) {
conv = wxConvCurrent;
}
else {
conv = new wxCSConv(encoding);
customConv = true;
}
}
//////////////////////////
// Reads a line from file
Aegilib::String TextFileReader::ReadLineFromFile() {
wxString wxbuffer;
size_t bufAlloc = 1024;
wxbuffer.Alloc(bufAlloc);
std::string buffer = "";
// Read UTF-16 line from file
if (Is16) {
char charbuffer[3];
charbuffer[2] = 0;
wchar_t ch = 0;
size_t len = 0;
while (ch != L'\n' && !file.Eof()) {
// Read two chars from file
charbuffer[0] = 0;
charbuffer[1] = 0;
file.Read(charbuffer,2);
// Swap bytes for big endian
if (swap) {
register char aux = charbuffer[0];
charbuffer[0] = charbuffer[1];
charbuffer[1] = aux;
}
// Convert two chars into a widechar and append to string
ch = *((wchar_t*)charbuffer);
if (len >= bufAlloc - 1) {
bufAlloc *= 2;
wxbuffer.Alloc(bufAlloc);
}
wxbuffer += ch;
len++;
}
}
// Read ASCII/UTF-8 line from file
else {
//getline(file,buffer);
//wxbuffer.Clear();
//if (buffer.length()) wxbuffer = wxString(buffer.c_str(),*conv);
char temp = 0;
std::string buffer;
while (temp != '\n' && !file.Eof()) {
file.Read(&temp,1);
if (temp != '\r') {
buffer += temp;
}
}
if (buffer.size()) wxbuffer = wxString(buffer.c_str(),*conv);
}
// Remove line breaks
//wxbuffer.Replace(_T("\r"),_T("\0"));
//wxbuffer.Replace(_T("\n"),_T("\0"));
size_t len=wxbuffer.Length();
for (size_t i=0;i<len;i++) {
if (wxbuffer[i] == _T('\r') || wxbuffer[i] == _T('\n')) wxbuffer[i] = _T(' ');
}
// Remove BOM
if (wxbuffer.Length() > 0 && wxbuffer[0] == 0xFEFF) {
wxbuffer = wxbuffer.Mid(1);
}
// Trim
if (trim) {
wxbuffer.Trim(true);
wxbuffer.Trim(false);
}
return Aegilib::String(wxbuffer.c_str());
}
//////////////////////////////////
// Checks if there's more to read
bool TextFileReader::HasMoreLines() {
return (!file.Eof());
}
////////////////////////////////
// Ensure that charset is valid
void TextFileReader::EnsureValid(Aegilib::String enc) {
if (enc == _T("unknown") || enc == _T("UTF-32BE") || enc == _T("UTF-32LE")) {
wxString error = _T("Character set ");
error += enc;
error += _T(" is not supported.");
throw error.c_str();
}
}
///////////////////////////
// Get encoding being used
Aegilib::String TextFileReader::GetCurrentEncoding() {
return encoding.c_str();
}

View file

@ -0,0 +1,70 @@
// Copyright (c) 2005, Rodrigo Braz Monteiro
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * Neither the name of the Aegisub Group nor the names of its contributors
// may be used to endorse or promote products derived from this software
// without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.
//
// -----------------------------------------------------------------------------
//
// AEGISUB
//
// Website: http://aegisub.cellosoft.com
// Contact: mailto:zeratul@cellosoft.com
//
#pragma once
// Headers
#include "aegilib.h"
#include <wx/stream.h>
namespace Aegilib {
class TextFileReader {
private:
wxString encoding;
wxInputStream &file;
wxMBConv *conv;
bool Is16;
bool swap;
bool customConv;
bool trim;
void SetEncodingConfiguration();
public:
TextFileReader(wxInputStream &stream,String encoding=_T(""),bool trim=true);
~TextFileReader();
String ReadLineFromFile();
bool HasMoreLines();
static void EnsureValid(const String encoding);
String GetCurrentEncoding();
static String GetEncoding(const String filename);
};
}

View file

@ -0,0 +1,152 @@
// Copyright (c) 2005, Rodrigo Braz Monteiro
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * Neither the name of the Aegisub Group nor the names of its contributors
// may be used to endorse or promote products derived from this software
// without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.
//
// -----------------------------------------------------------------------------
//
// AEGISUB
//
// Website: http://aegisub.cellosoft.com
// Contact: mailto:zeratul@cellosoft.com
//
///////////
// Headers
#include <fstream>
#include "text_file_writer.h"
///////////////
// Constructor
TextFileWriter::TextFileWriter(Aegilib::String _filename,Aegilib::String enc) {
// Setup
open = false;
customConv = false;
IsFirst = true;
filename = _filename;
// Set encoding
encoding = enc;
if (encoding == _T("Local")) conv = &wxConvLocal;
else {
if (encoding.IsEmpty()) encoding = _T("UTF-8");
if (encoding == _T("US-ASCII")) encoding = _T("ISO-8859-1");
conv = new wxCSConv(encoding);
customConv = true;
IsUnicode = encoding.Left(3) == _T("UTF");
}
// Open file
Open();
}
//////////////
// Destructor
TextFileWriter::~TextFileWriter() {
Close();
}
/////////////
// Open file
void TextFileWriter::Open() {
// Open file
if (open) return;
#ifdef WIN32
file.open(filename.wc_str(),std::ios::out | std::ios::binary | std::ios::trunc);
#else
file.open(wxFNCONV(filename),std::ios::out | std::ios::binary | std::ios::trunc);
#endif
if (!file.is_open()) {
throw _T("Failed opening file for writing.");
}
open = true;
// Set encoding
SetEncoding();
}
//////////////
// Close file
void TextFileWriter::Close() {
if (!open) return;
file.close();
open = false;
if (customConv) delete conv;
}
/////////////////
// Write to file
void TextFileWriter::WriteLineToFile(Aegilib::String line,bool addLineBreak) {
// Make sure it's loaded
if (!open) Open();
// Add line break
wxString temp = line;
if (addLineBreak) temp += _T("\r\n");
// Add BOM if it's the first line and the target format is Unicode
if (IsFirst && IsUnicode) {
wchar_t bom = 0xFEFF;
temp = wxString(bom) + temp;
}
IsFirst = false;
// 16-bit
if (Is16) {
wxWCharBuffer buf = temp.wc_str(*conv);
if (!buf.data())
return;
size_t len = wcslen(buf.data());
file.write((const char*)buf.data(),(std::streamsize)len*sizeof(wchar_t));
}
// 8-bit
else {
wxCharBuffer buf = temp.mb_str(*conv);
if (!buf.data())
return;
size_t len = strlen(buf.data());
file.write(buf.data(),(std::streamsize)len);
}
}
////////////////
// Set encoding
void TextFileWriter::SetEncoding() {
// Prepare
Is16 = false;
// UTF-16
if (encoding.Left(6) == _T("UTF-16")) {
Is16 = true;
}
}

View file

@ -0,0 +1,75 @@
// Copyright (c) 2005, Rodrigo Braz Monteiro
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * Neither the name of the Aegisub Group nor the names of its contributors
// may be used to endorse or promote products derived from this software
// without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.
//
// -----------------------------------------------------------------------------
//
// AEGISUB
//
// Website: http://aegisub.cellosoft.com
// Contact: mailto:zeratul@cellosoft.com
//
#ifndef TEXT_FILE_WRITER_H
#define TEXT_FILE_WRITER_H
///////////
// Headers
#include <wx/wxprec.h>
#include <wx/string.h>
#include <fstream>
/////////
// Class
class TextFileWriter {
private:
wxString filename;
wxString encoding;
std::ofstream file;
wxMBConv *conv;
bool customConv;
bool open;
bool Is16;
bool IsFirst;
bool IsUnicode;
void Open();
void Close();
void SetEncoding();
public:
TextFileWriter(Aegilib::String filename,Aegilib::String encoding=_T(""));
~TextFileWriter();
void WriteLineToFile(Aegilib::String line,bool addLineBreak=true);
};
#endif

View file

@ -182,28 +182,12 @@
RelativePath=".\src\main.cpp"
>
</File>
<File
RelativePath=".\src\text_file_reader.cpp"
>
</File>
<File
RelativePath=".\src\text_file_writer.cpp"
>
</File>
</Filter>
<Filter
Name="Header Files"
Filter="h;hpp;hxx;hm;inl;inc;xsd"
UniqueIdentifier="{93995380-89BD-4b04-88EB-625FBE52EBFB}"
>
<File
RelativePath=".\src\text_file_reader.h"
>
</File>
<File
RelativePath=".\src\text_file_writer.h"
>
</File>
</Filter>
<Filter
Name="Resource Files"