forked from mia/Aegisub
4ec507f814
Document all of the SubtitleFormat methods. Add default implementations of CanReadFile and CanWriteFile that check against the appropriate wildcard list. Clean up and simplify a lot of very odd code. Throw typed exceptions in all subtitle readers rather than strings. Originally committed to SVN as r5617.
236 lines
7.1 KiB
C++
236 lines
7.1 KiB
C++
// Copyright (c) 2006, Rodrigo Braz Monteiro
|
|
// All rights reserved.
|
|
//
|
|
// Redistribution and use in source and binary forms, with or without
|
|
// modification, are permitted provided that the following conditions are met:
|
|
//
|
|
// * Redistributions of source code must retain the above copyright notice,
|
|
// this list of conditions and the following disclaimer.
|
|
// * Redistributions in binary form must reproduce the above copyright notice,
|
|
// this list of conditions and the following disclaimer in the documentation
|
|
// and/or other materials provided with the distribution.
|
|
// * Neither the name of the Aegisub Group nor the names of its contributors
|
|
// may be used to endorse or promote products derived from this software
|
|
// without specific prior written permission.
|
|
//
|
|
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
// POSSIBILITY OF SUCH DAMAGE.
|
|
//
|
|
// Aegisub Project http://www.aegisub.org/
|
|
//
|
|
// $Id$
|
|
|
|
/// @file subtitle_format_srt.cpp
|
|
/// @brief Reading/writing SubRip format subtitles (.SRT)
|
|
/// @ingroup subtitle_io
|
|
///
|
|
|
|
#include "config.h"
|
|
|
|
#ifndef AGI_PRE
|
|
#include <wx/regex.h>
|
|
#endif
|
|
|
|
#include "ass_dialogue.h"
|
|
#include "ass_file.h"
|
|
#include "subtitle_format_srt.h"
|
|
#include "text_file_reader.h"
|
|
#include "text_file_writer.h"
|
|
|
|
|
|
DEFINE_SIMPLE_EXCEPTION(SRTParseError, SubtitleFormatParseError, "subtitle_io/parse/srt")
|
|
|
|
SRTSubtitleFormat::SRTSubtitleFormat()
|
|
: SubtitleFormat("SubRip")
|
|
{
|
|
}
|
|
|
|
wxArrayString SRTSubtitleFormat::GetReadWildcards() const {
|
|
wxArrayString formats;
|
|
formats.Add("srt");
|
|
return formats;
|
|
}
|
|
|
|
wxArrayString SRTSubtitleFormat::GetWriteWildcards() const {
|
|
return GetReadWildcards();
|
|
}
|
|
|
|
void SRTSubtitleFormat::ReadFile(wxString const& filename, wxString const& encoding) {
|
|
using namespace std;
|
|
|
|
TextFileReader file(filename,encoding);
|
|
|
|
LoadDefault(false);
|
|
|
|
// See parsing algorithm at <http://devel.aegisub.org/wiki/SubtitleFormats/SRT>
|
|
|
|
// "hh:mm:ss,fff --> hh:mm:ss,fff" (e.g. "00:00:04,070 --> 00:00:10,04")
|
|
/// @todo: move the full parsing of SRT timestamps here, instead of having it in AssTime
|
|
wxRegEx timestamp_regex("^([0-9]{2}:[0-9]{2}:[0-9]{2},[0-9]{3}) --> ([0-9]{2}:[0-9]{2}:[0-9]{2},[0-9]{3})");
|
|
if (!timestamp_regex.IsValid())
|
|
throw agi::InternalError("Parsing SRT: Failed compiling regex", 0);
|
|
|
|
int state = 1;
|
|
int line_num = 0;
|
|
int linebreak_debt = 0;
|
|
AssDialogue *line = 0;
|
|
while (file.HasMoreLines()) {
|
|
wxString text_line = file.ReadLineFromFile();
|
|
line_num++;
|
|
text_line.Trim(true).Trim(false);
|
|
|
|
switch (state) {
|
|
case 1:
|
|
{
|
|
// start of file, no subtitles found yet
|
|
if (text_line.IsEmpty())
|
|
// ignore blank lines
|
|
break;
|
|
if (text_line.IsNumber()) {
|
|
// found the line number, throw it away and hope for timestamps
|
|
state = 2;
|
|
break;
|
|
}
|
|
if (timestamp_regex.Matches(text_line)) {
|
|
goto found_timestamps;
|
|
}
|
|
char cvtbuf[16]; sprintf(cvtbuf, "%d", line_num);
|
|
throw SRTParseError(std::string("Parsing SRT: Expected subtitle index at line ") + cvtbuf, 0);
|
|
}
|
|
case 2:
|
|
{
|
|
// want timestamps
|
|
if (timestamp_regex.Matches(text_line) == false) {
|
|
// bad format
|
|
char cvtbuf[16]; sprintf(cvtbuf, "%d", line_num);
|
|
throw SRTParseError(std::string("Parsing SRT: Expected timestamp pair at line ") + cvtbuf, 0);
|
|
}
|
|
found_timestamps:
|
|
if (line != 0) {
|
|
// finalise active line
|
|
line->ParseSRTTags();
|
|
line = 0;
|
|
}
|
|
// create new subtitle
|
|
line = new AssDialogue();
|
|
line->group = "[Events]";
|
|
line->Style = "Default";
|
|
line->Comment = false;
|
|
// this parsing should best be moved out of AssTime
|
|
line->Start.ParseSRT(timestamp_regex.GetMatch(text_line, 1));
|
|
line->End.ParseSRT(timestamp_regex.GetMatch(text_line, 2));
|
|
// store pointer to subtitle, we'll continue working on it
|
|
Line->push_back(line);
|
|
// next we're reading the text
|
|
state = 3;
|
|
break;
|
|
}
|
|
case 3:
|
|
{
|
|
// reading first line of subtitle text
|
|
if (text_line.IsEmpty()) {
|
|
// that's not very interesting... blank subtitle?
|
|
state = 5;
|
|
// no previous line that needs a line break after
|
|
linebreak_debt = 0;
|
|
break;
|
|
}
|
|
line->Text.Append(text_line);
|
|
state = 4;
|
|
break;
|
|
}
|
|
case 4:
|
|
{
|
|
// reading following line of subtitle text
|
|
if (text_line.IsEmpty()) {
|
|
// blank line, next may begin a new subtitle
|
|
state = 5;
|
|
// previous line needs a line break after
|
|
linebreak_debt = 1;
|
|
break;
|
|
}
|
|
line->Text.Append("\\N").Append(text_line);
|
|
break;
|
|
}
|
|
case 5:
|
|
{
|
|
// blank line in subtitle text
|
|
linebreak_debt++;
|
|
if (text_line.IsEmpty()) {
|
|
// multiple blank lines in a row, just add a line break...
|
|
break;
|
|
}
|
|
if (text_line.IsNumber()) {
|
|
// must be a subtitle index!
|
|
// go for timestamps next
|
|
state = 2;
|
|
break;
|
|
}
|
|
if (timestamp_regex.Matches(text_line)) {
|
|
goto found_timestamps;
|
|
}
|
|
// assume it's a continuation of the subtitle text
|
|
// resolve our line break debt and append the line text
|
|
while (linebreak_debt-- > 0)
|
|
line->Text.Append("\\N");
|
|
line->Text.Append(text_line);
|
|
state = 4;
|
|
break;
|
|
}
|
|
default:
|
|
{
|
|
char cvtbuf[16]; sprintf(cvtbuf, "%d", state);
|
|
throw agi::InternalError(std::string("Parsing SRT: Reached unexpected state ") + cvtbuf, 0);
|
|
}
|
|
}
|
|
}
|
|
|
|
if (state == 1 || state == 2) {
|
|
throw SRTParseError("Parsing SRT: Incomplete file", 0);
|
|
}
|
|
|
|
if (line) {
|
|
// an unfinalised line
|
|
line->ParseSRTTags();
|
|
}
|
|
}
|
|
|
|
void SRTSubtitleFormat::WriteFile(wxString const& filename, wxString const& encoding) {
|
|
TextFileWriter file(filename,encoding);
|
|
|
|
// Convert to SRT
|
|
CreateCopy();
|
|
SortLines();
|
|
StripComments();
|
|
// Tags must be converted in two passes
|
|
// First ASS style overrides are converted to SRT but linebreaks are kept
|
|
ConvertTags(2,"\\N",false);
|
|
// Then we can recombine overlaps, this requires ASS style linebreaks
|
|
RecombineOverlaps();
|
|
MergeIdentical();
|
|
// And finally convert linebreaks
|
|
ConvertTags(0,"\r\n",false);
|
|
// Otherwise unclosed overrides might affect lines they shouldn't, see bug #809 for example
|
|
|
|
// Write lines
|
|
int i=1;
|
|
for (std::list<AssEntry*>::iterator cur=Line->begin();cur!=Line->end();cur++) {
|
|
if (AssDialogue *current = dynamic_cast<AssDialogue*>(*cur)) {
|
|
file.WriteLineToFile(wxString::Format("%i", i++));
|
|
file.WriteLineToFile(current->Start.GetSRTFormated() + " --> " + current->End.GetSRTFormated());
|
|
file.WriteLineToFile(current->Text);
|
|
file.WriteLineToFile("");
|
|
}
|
|
}
|
|
|
|
ClearCopy();
|
|
}
|