diff --git a/aegisub/src/ass_dialogue.cpp b/aegisub/src/ass_dialogue.cpp index 286843f6e..a7742f056 100644 --- a/aegisub/src/ass_dialogue.cpp +++ b/aegisub/src/ass_dialogue.cpp @@ -243,137 +243,6 @@ wxString AssDialogue::GetSSAText () const { return GetData(true); } -void AssDialogue::ParseSRTTags () { - // Search and replace - size_t total = 0; - total += Text.Replace("","{\\i1}"); - total += Text.Replace("","{\\i0}"); - total += Text.Replace("","{\\b1}"); - total += Text.Replace("","{\\b0}"); - total += Text.Replace("","{\\u1}"); - total += Text.Replace("","{\\u0}"); - total += Text.Replace("","{\\s1}"); - total += Text.Replace("","{\\s0}"); - - // Process tag - wxString work = Text; - work.UpperCase(); - size_t pos_open = 0; - size_t pos_close = 0; - size_t pos = 0; - size_t end = 0; - size_t start = 0; - bool isOpen; - - // Iterate - pos_open = work.find("",start)+1; - //if (end == wxString::npos) continue; - - // Open tag - if (isOpen) { - wxString replaced; - - // Color tag - if ((pos = work.find("COLOR=\"",start)) != wxString::npos) { - if (pos < end) { - pos += 7; - size_t end_tag = Text.find("\"",pos); - if (end_tag != wxString::npos) { - if (end_tag-pos == 7) { - replaced += "{\\c&H"; - replaced += work.substr(pos+5,2); - replaced += work.substr(pos+3,2); - replaced += work.substr(pos+1,2); - replaced += "&}"; - total++; - } - } - } - } - - // Face tag - if ((pos = work.find("FACE=\"",start)) != wxString::npos) { - if (pos < end) { - pos += 6; - size_t end_tag = work.find("\"",pos); - if (end_tag != wxString::npos) { - replaced += "{\\fn"; - replaced += work.substr(pos,end_tag-pos); - replaced += "}"; - total++; - } - } - } - - // Size tag - if ((pos = work.find("SIZE=\"",start)) != wxString::npos) { - if (pos < end) { - pos += 6; - size_t end_tag = Text.find("\"",pos); - if (end_tag != wxString::npos) { - replaced += "{\\fs"; - replaced += work.substr(pos,end_tag-pos); - replaced += "}"; - total++; - } - } - } - - // Replace whole tag - //Text = Text.substr(0,start) + replaced + Text.substr(end); - Text = Text.substr(0, start); - Text << replaced << Text.substr(end); - total++; - } - - // Close tag - else { - // Find if it's italic, bold, underline, and strikeout - wxString prev = Text.Left(start); - bool isItalic=false,isBold=false,isUnder=false,isStrike=false; - if (CountMatches(prev,"{\\i1}") > CountMatches(prev,"{\\i0}")) isItalic = true; - if (CountMatches(prev,"{\\b1}") > CountMatches(prev,"{\\b0}")) isBold = true; - if (CountMatches(prev,"{\\u1}") > CountMatches(prev,"{\\u0}")) isUnder = true; - if (CountMatches(prev,"{\\s1}") > CountMatches(prev,"{\\s0}")) isStrike = true; - - // Generate new tag, by reseting and then restoring flags - wxString replaced = "{\\r"; - if (isItalic) replaced += "\\i1"; - if (isBold) replaced += "\\b1"; - if (isUnder) replaced += "\\u1"; - if (isStrike) replaced += "\\s1"; - replaced += "}"; - - // Replace - //Text = Text.substr(0,start) + replaced + Text.substr(end); - Text = Text.substr(0, start); - Text << replaced << Text.substr(end); - total++; - } - - // Get next - work = Text; - work.UpperCase(); - pos_open = work.find("= 3600000) { - _ms -= 3600000; - h++; - } - - // Ass overflow - if (h > 9) { - h = 9; - m = 59; - s = 59; - ms = 999; - } - - // Minutes - while (_ms >= 60000) { - _ms -= 60000; - m++; - } - - // Seconds - while (_ms >= 1000) { - _ms -= 1000; - s++; - } - ms = _ms; - - wxString result = wxString::Format("%02i:%02i:%02i,%03i",h,m,s,ms); - return result; -} - - - /// @brief AssTime comparison /// @param t1 /// @param t2 diff --git a/aegisub/src/ass_time.h b/aegisub/src/ass_time.h index 3071b32b7..57f66e04d 100644 --- a/aegisub/src/ass_time.h +++ b/aegisub/src/ass_time.h @@ -75,9 +75,7 @@ public: int GetMS() const; // Returns milliseconds void SetMS(int ms); // Sets values to milliseconds void ParseASS(const wxString text); // Sets value to text-form time, in ASS format - void ParseSRT(const wxString text); // Sets value to text-form time, in SRT format wxString GetASSFormated(bool ms=false) const; // Returns the ASS representation of time - wxString GetSRTFormated(); // Returns the SRT representation of time }; // Comparison operators diff --git a/aegisub/src/subtitle_format_srt.cpp b/aegisub/src/subtitle_format_srt.cpp index 19eaf957f..1153c8f86 100644 --- a/aegisub/src/subtitle_format_srt.cpp +++ b/aegisub/src/subtitle_format_srt.cpp @@ -42,13 +42,338 @@ #include "ass_dialogue.h" #include "ass_file.h" +#include "ass_style.h" +#include "colorspace.h" +#include "compat.h" #include "subtitle_format_srt.h" #include "text_file_reader.h" #include "text_file_writer.h" - DEFINE_SIMPLE_EXCEPTION(SRTParseError, SubtitleFormatParseError, "subtitle_io/parse/srt") +namespace { +class SrtTagParser { + struct FontAttribs { + wxString face; + wxString size; + wxString color; + }; + + enum TagType { + // leave 0 unused so indexing an unknown tag in the map won't clash + TAG_BOLD_OPEN = 1, + TAG_BOLD_CLOSE, + TAG_ITALICS_OPEN, + TAG_ITALICS_CLOSE, + TAG_UNDERLINE_OPEN, + TAG_UNDERLINE_CLOSE, + TAG_STRIKEOUT_OPEN, + TAG_STRIKEOUT_CLOSE, + TAG_FONT_OPEN, + TAG_FONT_CLOSE, + }; + + wxRegEx tag_matcher; + wxRegEx attrib_matcher; + std::map tag_name_cases; + +public: + SrtTagParser() + : tag_matcher("^(.*?)<(/?b|/?i|/?u|/?s|/?font)([^>]*)>(.*)$", wxRE_ICASE|wxRE_ADVANCED) + , attrib_matcher("^[[:space:]]+(face|size|color)=('[^']*'|\"[^\"]*\"|[^[:space:]]+)", wxRE_ICASE|wxRE_ADVANCED) + { + if (!tag_matcher.IsValid()) + throw agi::InternalError("Parsing SRT: Failed compiling tag matching regex", 0); + if (!attrib_matcher.IsValid()) + throw agi::InternalError("Parsing SRT: Failed compiling tag attribute matching regex", 0); + + tag_name_cases["b"] = TAG_BOLD_OPEN; + tag_name_cases["/b"] = TAG_BOLD_CLOSE; + tag_name_cases["i"] = TAG_ITALICS_OPEN; + tag_name_cases["/i"] = TAG_ITALICS_CLOSE; + tag_name_cases["u"] = TAG_UNDERLINE_OPEN; + tag_name_cases["/u"] = TAG_UNDERLINE_CLOSE; + tag_name_cases["s"] = TAG_STRIKEOUT_OPEN; + tag_name_cases["/s"] = TAG_STRIKEOUT_CLOSE; + tag_name_cases["font"] = TAG_FONT_OPEN; + tag_name_cases["/font"] = TAG_FONT_CLOSE; + } + + wxString ToAss(wxString srt) + { + int bold_level = 0; + int italics_level = 0; + int underline_level = 0; + int strikeout_level = 0; + std::vector font_stack; + + wxString ass; // result to be built + + while (!srt.empty()) + { + if (!tag_matcher.Matches(srt)) + { + // no more tags could be matched, end of string + ass.append(srt); + break; + } + + // we found a tag, translate it + wxString pre_text = tag_matcher.GetMatch(srt, 1); + wxString tag_name = tag_matcher.GetMatch(srt, 2); + wxString tag_attrs = tag_matcher.GetMatch(srt, 3); + wxString post_text = tag_matcher.GetMatch(srt, 4); + + // the text before the tag goes through unchanged + ass.append(pre_text); + // the text after the tag is the input for next iteration + srt = post_text; + + switch (tag_name_cases[tag_name.Lower()]) + { + case TAG_BOLD_OPEN: + if (bold_level == 0) + ass.append("{\\b1}"); + bold_level++; + break; + case TAG_BOLD_CLOSE: + if (bold_level == 1) + ass.append("{\\b}"); + if (bold_level > 0) + bold_level--; + break; + case TAG_ITALICS_OPEN: + if (italics_level == 0) + ass.append("{\\i1}"); + italics_level++; + break; + case TAG_ITALICS_CLOSE: + if (italics_level == 1) + ass.append("{\\i}"); + if (italics_level > 0) + italics_level--; + break; + case TAG_UNDERLINE_OPEN: + if (underline_level == 0) + ass.append("{\\u1}"); + underline_level++; + break; + case TAG_UNDERLINE_CLOSE: + if (underline_level == 1) + ass.append("{\\u}"); + if (underline_level > 0) + underline_level--; + break; + case TAG_STRIKEOUT_OPEN: + if (strikeout_level == 0) + ass.append("{\\s1}"); + strikeout_level++; + break; + case TAG_STRIKEOUT_CLOSE: + if (strikeout_level == 1) + ass.append("{\\s}"); + if (strikeout_level > 0) + strikeout_level--; + break; + case TAG_FONT_OPEN: + { + // new attributes to fill in + FontAttribs new_attribs; + FontAttribs old_attribs; + // start out with any previous ones on stack + if (font_stack.size() > 0) + { + old_attribs = font_stack.back(); + } + new_attribs = old_attribs; + // now find all attributes on this font tag + while (attrib_matcher.Matches(tag_attrs)) + { + // get attribute name and values + wxString attr_name = attrib_matcher.GetMatch(tag_attrs, 1); + wxString attr_value = attrib_matcher.GetMatch(tag_attrs, 2); + // clean them + attr_name.MakeLower(); + if ((attr_value.StartsWith("'") && attr_value.EndsWith("'")) || + (attr_value.StartsWith("\"") && attr_value.EndsWith("\""))) + { + attr_value = attr_value.Mid(1, attr_value.Len()-2); + } + // handle the attributes + if (attr_name == "face") + { + new_attribs.face = wxString::Format("{\\fn%s}", attr_value); + } + else if (attr_name == "size") + { + new_attribs.size = wxString::Format("{\\fs%s}", attr_value); + } + else if (attr_name == "color") + { + wxColour wxcl = html_to_color(attr_value); + wxString colorstr = AssColor(wxcl).GetASSFormatted(false, false, false); + new_attribs.color = wxString::Format("{\\c%s}", colorstr); + } + // remove this attribute to prepare for the next + size_t attr_pos, attr_len; + attrib_matcher.GetMatch(&attr_pos, &attr_len, 0); + tag_attrs.erase(attr_pos, attr_len); + } + // the attributes changed from old are then written out + if (new_attribs.face != old_attribs.face) + ass.append(new_attribs.face); + if (new_attribs.size != old_attribs.size) + ass.append(new_attribs.size); + if (new_attribs.color != old_attribs.color) + ass.append(new_attribs.color); + // lastly dump the new attributes state onto the stack + font_stack.push_back(new_attribs); + } + break; + case TAG_FONT_CLOSE: + { + // this requires a font stack entry + if (font_stack.empty()) + break; + // get the current attribs + FontAttribs cur_attribs = font_stack.back(); + // remove them from the stack + font_stack.pop_back(); + // grab the old attributes if there are any + FontAttribs old_attribs; + if (font_stack.size() > 0) + old_attribs = font_stack.back(); + // then restore the attributes to previous settings + if (cur_attribs.face != old_attribs.face) + { + if (old_attribs.face.empty()) + ass.append("{\\fn}"); + else + ass.append(old_attribs.face); + } + if (cur_attribs.size != old_attribs.size) + { + if (old_attribs.size.empty()) + ass.append("{\\fs}"); + else + ass.append(old_attribs.size); + } + if (cur_attribs.color != old_attribs.color) + { + if (old_attribs.color.empty()) + ass.append("{\\c}"); + else + ass.append(old_attribs.color); + } + } + break; + default: + // unknown tag, replicate it in the output + ass.append("<").append(tag_name).append(tag_attrs).append(">"); + break; + } + } + + // make it a little prettier, join tag groups + ass.Replace("}{", "", true); + + return ass; + } +}; + +AssTime ReadSRTTime(wxString const& ts) +{ + // For the sake of your sanity, please do not read this function. + + int d, h, m, s, ms; + d = h = m = s = ms = 0; + + size_t ci = 0; + int ms_chars = 0; + + for (; ci < ts.length(); ++ci) + { + char ch = ts[ci]; + switch (ch) + { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + s = s * 10 + (ch - '0'); + break; + case ':': + d = h; + h = m; + m = s; + s = 0; + break; + case ',': + ci++; + goto milliseconds; + default: + goto allparsed; + } + } + goto allparsed; +milliseconds: + for (; ci < ts.length(); ++ci) + { + char ch = ts[ci]; + switch (ch) + { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + ms = ms * 10 + (ch - '0'); + ms_chars++; + break; + default: + goto allparsed; + } + } +allparsed: + while (ms_chars < 3) ms *= 10, ms_chars++; + while (ms_chars > 3) ms /= 10, ms_chars--; + + AssTime res; + res.SetMS(ms + 1000*(s + 60*(m + 60*(h + d*24)))); + return res; +} + +wxString WriteSRTTime(AssTime const& ts) +{ + int time = ts.GetMS(); + + int ms_part = time % 1000; + time /= 1000; // now holds seconds + + int s_part = time % 60; + time /= 60; // now holds minutes + + int m_part = time % 60; + time /= 60; // now holds hours + + int h_part = time; + + return wxString::Format("%02d:%02d:%02d,%03d", h_part, m_part, s_part, ms_part); +} + +} + SRTSubtitleFormat::SRTSubtitleFormat() : SubtitleFormat("SubRip") { @@ -67,18 +392,19 @@ wxArrayString SRTSubtitleFormat::GetWriteWildcards() const { void SRTSubtitleFormat::ReadFile(wxString const& filename, wxString const& encoding) { using namespace std; - TextFileReader file(filename,encoding); - + TextFileReader file(filename, encoding); LoadDefault(false); // See parsing algorithm at // "hh:mm:ss,fff --> hh:mm:ss,fff" (e.g. "00:00:04,070 --> 00:00:10,04") /// @todo: move the full parsing of SRT timestamps here, instead of having it in AssTime - wxRegEx timestamp_regex("^([0-9]{2}:[0-9]{2}:[0-9]{2},[0-9]{3}) --> ([0-9]{2}:[0-9]{2}:[0-9]{2},[0-9]{3})"); + wxRegEx timestamp_regex("^([0-9]{2}:[0-9]{2}:[0-9]{2},[0-9]{1,}) --> ([0-9]{2}:[0-9]{2}:[0-9]{2},[0-9]{1,})"); if (!timestamp_regex.IsValid()) throw agi::InternalError("Parsing SRT: Failed compiling regex", 0); + SrtTagParser tag_parser; + int state = 1; int line_num = 0; int linebreak_debt = 0; @@ -90,9 +416,8 @@ void SRTSubtitleFormat::ReadFile(wxString const& filename, wxString const& encod switch (state) { case 1: - { // start of file, no subtitles found yet - if (text_line.IsEmpty()) + if (text_line.empty()) // ignore blank lines break; if (text_line.IsNumber()) { @@ -100,44 +425,37 @@ void SRTSubtitleFormat::ReadFile(wxString const& filename, wxString const& encod state = 2; break; } - if (timestamp_regex.Matches(text_line)) { + if (timestamp_regex.Matches(text_line)) goto found_timestamps; - } - char cvtbuf[16]; sprintf(cvtbuf, "%d", line_num); - throw SRTParseError(std::string("Parsing SRT: Expected subtitle index at line ") + cvtbuf, 0); - } + + throw SRTParseError(STD_STR(wxString::Format("Parsing SRT: Expected subtitle index at line %d", line_num)), 0); case 2: - { // want timestamps - if (timestamp_regex.Matches(text_line) == false) { + if (timestamp_regex.Matches(text_line) == false) // bad format - char cvtbuf[16]; sprintf(cvtbuf, "%d", line_num); - throw SRTParseError(std::string("Parsing SRT: Expected timestamp pair at line ") + cvtbuf, 0); - } + throw SRTParseError(STD_STR(wxString::Format("Parsing SRT: Expected timestamp pair at line %d", line_num)), 0); found_timestamps: - if (line != 0) { - // finalise active line - line->ParseSRTTags(); + if (line) { + // finalize active line + line->Text = tag_parser.ToAss(line->Text); line = 0; } // create new subtitle - line = new AssDialogue(); + line = new AssDialogue; line->group = "[Events]"; line->Style = "Default"; line->Comment = false; // this parsing should best be moved out of AssTime - line->Start.ParseSRT(timestamp_regex.GetMatch(text_line, 1)); - line->End.ParseSRT(timestamp_regex.GetMatch(text_line, 2)); + line->Start = ReadSRTTime(timestamp_regex.GetMatch(text_line, 1)); + line->End = ReadSRTTime(timestamp_regex.GetMatch(text_line, 2)); // store pointer to subtitle, we'll continue working on it Line->push_back(line); // next we're reading the text state = 3; break; - } case 3: - { // reading first line of subtitle text - if (text_line.IsEmpty()) { + if (text_line.empty()) { // that's not very interesting... blank subtitle? state = 5; // no previous line that needs a line break after @@ -147,11 +465,9 @@ found_timestamps: line->Text.Append(text_line); state = 4; break; - } case 4: - { // reading following line of subtitle text - if (text_line.IsEmpty()) { + if (text_line.empty()) { // blank line, next may begin a new subtitle state = 5; // previous line needs a line break after @@ -160,24 +476,21 @@ found_timestamps: } line->Text.Append("\\N").Append(text_line); break; - } case 5: - { // blank line in subtitle text linebreak_debt++; - if (text_line.IsEmpty()) { + if (text_line.empty()) // multiple blank lines in a row, just add a line break... break; - } if (text_line.IsNumber()) { // must be a subtitle index! // go for timestamps next state = 2; break; } - if (timestamp_regex.Matches(text_line)) { + if (timestamp_regex.Matches(text_line)) goto found_timestamps; - } + // assume it's a continuation of the subtitle text // resolve our line break debt and append the line text while (linebreak_debt-- > 0) @@ -185,12 +498,8 @@ found_timestamps: line->Text.Append(text_line); state = 4; break; - } default: - { - char cvtbuf[16]; sprintf(cvtbuf, "%d", state); - throw agi::InternalError(std::string("Parsing SRT: Reached unexpected state ") + cvtbuf, 0); - } + throw agi::InternalError(STD_STR(wxString::Format("Parsing SRT: Reached unexpected state %d", state)), 0); } } @@ -198,10 +507,9 @@ found_timestamps: throw SRTParseError("Parsing SRT: Incomplete file", 0); } - if (line) { - // an unfinalised line - line->ParseSRTTags(); - } + if (line) + // an unfinalized line + line->Text = tag_parser.ToAss(line->Text); } void SRTSubtitleFormat::WriteFile(wxString const& filename, wxString const& encoding) { @@ -225,8 +533,8 @@ void SRTSubtitleFormat::WriteFile(wxString const& filename, wxString const& enco int i=1; for (std::list::iterator cur=Line->begin();cur!=Line->end();cur++) { if (AssDialogue *current = dynamic_cast(*cur)) { - file.WriteLineToFile(wxString::Format("%i", i++)); - file.WriteLineToFile(current->Start.GetSRTFormated() + " --> " + current->End.GetSRTFormated()); + file.WriteLineToFile(wxString::Format("%d", i++)); + file.WriteLineToFile(WriteSRTTime(current->Start) + " --> " + WriteSRTTime(current->End)); file.WriteLineToFile(current->Text); file.WriteLineToFile(""); }