Rewrite SRT parsing to use more flexible algorithm, allow more freeform files. Updates #1213.
Originally committed to SVN as r4557.
This commit is contained in:
parent
4c01f9a36b
commit
2fc8420ee9
1 changed files with 125 additions and 68 deletions
|
@ -39,6 +39,10 @@
|
||||||
// Headers
|
// Headers
|
||||||
#include "config.h"
|
#include "config.h"
|
||||||
|
|
||||||
|
#ifndef AGI_PRE
|
||||||
|
#include <wx/regex.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
#include "ass_dialogue.h"
|
#include "ass_dialogue.h"
|
||||||
#include "ass_file.h"
|
#include "ass_file.h"
|
||||||
#include "subtitle_format_srt.h"
|
#include "subtitle_format_srt.h"
|
||||||
|
@ -46,6 +50,9 @@
|
||||||
#include "text_file_writer.h"
|
#include "text_file_writer.h"
|
||||||
|
|
||||||
|
|
||||||
|
DEFINE_SIMPLE_EXCEPTION(SRTParseError, SubtitleFormatParseError, "subtitle_io/parse/srt")
|
||||||
|
|
||||||
|
|
||||||
/// @brief Can read?
|
/// @brief Can read?
|
||||||
/// @param filename
|
/// @param filename
|
||||||
/// @return
|
/// @return
|
||||||
|
@ -108,84 +115,134 @@ void SRTSubtitleFormat::ReadFile(wxString filename,wxString encoding) {
|
||||||
// Default
|
// Default
|
||||||
LoadDefault(false);
|
LoadDefault(false);
|
||||||
|
|
||||||
// Parse file
|
// See parsing algorithm at <http://devel.aegisub.org/wiki/SubtitleFormats/SRT>
|
||||||
int linen = 1;
|
|
||||||
int fileLine = 0;
|
// "hh:mm:ss,fff --> hh:mm:ss,fff" (e.g. "00:00:04,070 --> 00:00:10,04")
|
||||||
int mode = 0;
|
/// @todo: move the full parsing of SRT timestamps here, instead of having it in AssTime
|
||||||
int lines = 0;
|
wxRegEx timestamp_regex(L"^([0-9]{2}:[0-9]{2}:[0-9]{2},[0-9]{3}) --> ([0-9]{2}:[0-9]{2}:[0-9]{2},[0-9]{3})");
|
||||||
long templ;
|
if (!timestamp_regex.IsValid())
|
||||||
AssDialogue *line = NULL;
|
throw agi::InternalError("Parsing SRT: Failed compiling regex", 0);
|
||||||
|
|
||||||
|
int state = 1;
|
||||||
|
int line_num = 0;
|
||||||
|
int linebreak_debt = 0;
|
||||||
|
AssDialogue *line = 0;
|
||||||
while (file.HasMoreLines()) {
|
while (file.HasMoreLines()) {
|
||||||
// Reads line
|
wxString text_line = file.ReadLineFromFile();
|
||||||
wxString curLine = file.ReadLineFromFile();
|
line_num++;
|
||||||
fileLine++;
|
text_line.Trim(true).Trim(false);
|
||||||
|
|
||||||
if (mode == 0) {
|
switch (state) {
|
||||||
// Checks if there is anything to read
|
case 1:
|
||||||
if (curLine.IsEmpty()) continue;
|
{
|
||||||
|
// start of file, no subtitles found yet
|
||||||
// Check if it's a line number
|
if (text_line.IsEmpty())
|
||||||
if (!curLine.IsNumber()) {
|
// ignore blank lines
|
||||||
Clear();
|
break;
|
||||||
if (line) delete line;
|
if (text_line.IsNumber()) {
|
||||||
throw wxString::Format(_T("Parse error on entry %i at line %i (expecting line number). Possible malformed file."),linen,fileLine);
|
// found the line number, throw it away and hope for timestamps
|
||||||
|
state = 2;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (timestamp_regex.Matches(text_line)) {
|
||||||
|
goto found_timestamps;
|
||||||
|
}
|
||||||
|
char cvtbuf[16]; sprintf(cvtbuf, "%d", line_num);
|
||||||
|
throw SRTParseError(std::string("Parsing SRT: Expected subtitle index at line ") + cvtbuf, 0);
|
||||||
}
|
}
|
||||||
|
case 2:
|
||||||
// Read line number
|
{
|
||||||
curLine.ToLong(&templ);
|
// want timestamps
|
||||||
if (templ != linen) {
|
if (timestamp_regex.Matches(text_line) == false) {
|
||||||
linen = templ;
|
// bad format
|
||||||
}
|
char cvtbuf[16]; sprintf(cvtbuf, "%d", line_num);
|
||||||
line = new AssDialogue();
|
throw SRTParseError(std::string("Parsing SRT: Expected timestamp pair at line ") + cvtbuf, 0);
|
||||||
mode = 1;
|
}
|
||||||
}
|
found_timestamps:
|
||||||
|
if (line != 0) {
|
||||||
else if (mode == 1) {
|
// finalise active line
|
||||||
// Read timestamps
|
line->ParseSRTTags();
|
||||||
if (curLine.substr(13,3) != _T("-->")) {
|
line = 0;
|
||||||
Clear();
|
}
|
||||||
if (line) delete line;
|
// create new subtitle
|
||||||
throw wxString::Format(_T("Parse error on entry %i at line %i (expecting timestamps). Possible malformed file."),linen,fileLine);
|
line = new AssDialogue();
|
||||||
}
|
line->group = L"[Events]";
|
||||||
line->Start.ParseSRT(curLine.substr(0,12));
|
|
||||||
line->End.ParseSRT(curLine.substr(17,12));
|
|
||||||
mode = 2;
|
|
||||||
}
|
|
||||||
|
|
||||||
else if (mode == 2) {
|
|
||||||
// Checks if it's done
|
|
||||||
bool eof = !file.HasMoreLines();
|
|
||||||
bool isDone = curLine.IsEmpty();
|
|
||||||
|
|
||||||
// Append text
|
|
||||||
if (!isDone) {
|
|
||||||
if (line->Text != _T("")) line->Text += _T("\\N");
|
|
||||||
line->Text += curLine;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Done
|
|
||||||
if (isDone || eof) {
|
|
||||||
mode = 0;
|
|
||||||
linen++;
|
|
||||||
line->group = _T("[Events]");
|
|
||||||
line->Style = _T("Default");
|
line->Style = _T("Default");
|
||||||
line->Comment = false;
|
line->Comment = false;
|
||||||
line->ParseSRTTags();
|
// this parsing should best be moved out of AssTime
|
||||||
|
line->Start.ParseSRT(timestamp_regex.GetMatch(text_line, 1));
|
||||||
|
line->End.ParseSRT(timestamp_regex.GetMatch(text_line, 2));
|
||||||
|
// store pointer to subtitle, we'll continue working on it
|
||||||
Line->push_back(line);
|
Line->push_back(line);
|
||||||
lines++;
|
// next we're reading the text
|
||||||
line = NULL;
|
state = 3;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case 3:
|
||||||
|
{
|
||||||
|
// reading first line of subtitle text
|
||||||
|
if (text_line.IsEmpty()) {
|
||||||
|
// that's not very interesting... blank subtitle?
|
||||||
|
state = 5;
|
||||||
|
linebreak_debt = 1;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
line->Text.Append(text_line);
|
||||||
|
state = 4;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case 4:
|
||||||
|
{
|
||||||
|
// reading following line of subtitle text
|
||||||
|
if (text_line.IsEmpty()) {
|
||||||
|
// blank line, next may begin a new subtitle
|
||||||
|
state = 5;
|
||||||
|
linebreak_debt = 1;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
line->Text.Append(L"\\N").Append(text_line);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case 5:
|
||||||
|
{
|
||||||
|
// blank line in subtitle text
|
||||||
|
linebreak_debt++;
|
||||||
|
if (text_line.IsEmpty()) {
|
||||||
|
// multiple blank lines in a row, just add a line break...
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (text_line.IsNumber()) {
|
||||||
|
// must be a subtitle index!
|
||||||
|
// go for timestamps next
|
||||||
|
state = 2;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (timestamp_regex.Matches(text_line)) {
|
||||||
|
goto found_timestamps;
|
||||||
|
}
|
||||||
|
// assume it's a continuation of the subtitle text
|
||||||
|
// resolve our line break debt and append the line text
|
||||||
|
while (linebreak_debt-- > 0)
|
||||||
|
line->Text.Append(L"\\N");
|
||||||
|
line->Text.Append(text_line);
|
||||||
|
state = 4;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
{
|
||||||
|
char cvtbuf[16]; sprintf(cvtbuf, "%d", state);
|
||||||
|
throw agi::InternalError(std::string("Parsing SRT: Reached unexpected state ") + cvtbuf, 0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// No lines?
|
if (state == 1 || state == 2) {
|
||||||
if (lines == 0) {
|
throw SRTParseError(std::string("Parsing SRT: Incomplete file"), 0);
|
||||||
line = new AssDialogue();
|
}
|
||||||
line->group = _T("[Events]");
|
|
||||||
line->Style = _T("Default");
|
if (line) {
|
||||||
line->Start.SetMS(0);
|
// an unfinalised line
|
||||||
line->End.SetMS(5000);
|
line->ParseSRTTags();
|
||||||
Line->push_back(line);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue