Use mmap for reading subtitles from Matroska files
Cuts cold-cache read time for an arbitrary 1 GB file read over a network off a USB 2.0 hard drive from ~70 seconds to ~45 seconds.
This commit is contained in:
parent
00b4d6908f
commit
0c9f39ca25
1 changed files with 72 additions and 101 deletions
173
src/mkv_wrap.cpp
173
src/mkv_wrap.cpp
|
@ -43,46 +43,78 @@
|
||||||
#include "dialog_progress.h"
|
#include "dialog_progress.h"
|
||||||
#include "MatroskaParser.h"
|
#include "MatroskaParser.h"
|
||||||
|
|
||||||
|
#include <libaegisub/file_mapping.h>
|
||||||
#include <libaegisub/fs.h>
|
#include <libaegisub/fs.h>
|
||||||
#include <libaegisub/scoped_ptr.h>
|
#include <libaegisub/scoped_ptr.h>
|
||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <boost/algorithm/string/classification.hpp>
|
#include <boost/algorithm/string/classification.hpp>
|
||||||
#include <boost/algorithm/string/replace.hpp>
|
#include <boost/algorithm/string/replace.hpp>
|
||||||
#include <boost/algorithm/string/split.hpp>
|
|
||||||
#include <boost/format.hpp>
|
#include <boost/format.hpp>
|
||||||
#include <boost/lexical_cast.hpp>
|
#include <boost/lexical_cast.hpp>
|
||||||
#include <boost/range/irange.hpp>
|
#include <boost/range/irange.hpp>
|
||||||
#include <boost/tokenizer.hpp>
|
#include <boost/tokenizer.hpp>
|
||||||
#include <cerrno>
|
|
||||||
#include <cstdint>
|
|
||||||
#include <cstdio>
|
|
||||||
#include <iterator>
|
#include <iterator>
|
||||||
|
|
||||||
#include <wx/choicdlg.h> // Keep this last so wxUSE_CHOICEDLG is set.
|
#include <wx/choicdlg.h> // Keep this last so wxUSE_CHOICEDLG is set.
|
||||||
|
|
||||||
class MkvStdIO final : public InputStream {
|
struct MkvStdIO final : InputStream {
|
||||||
public:
|
agi::read_file_mapping file;
|
||||||
MkvStdIO(agi::fs::path const& filename);
|
std::string error;
|
||||||
~MkvStdIO() { if (fp) fclose(fp); }
|
|
||||||
|
|
||||||
FILE *fp = nullptr;
|
static int Read(InputStream *st, ulonglong pos, void *buffer, int count) {
|
||||||
int error = 0;
|
auto *self = static_cast<MkvStdIO*>(st);
|
||||||
|
if (pos == self->file.size())
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
try {
|
||||||
|
memcpy(buffer, self->file.read(pos, count), count);
|
||||||
|
}
|
||||||
|
catch (agi::Exception const& e) {
|
||||||
|
self->error = e.GetChainedMessage();
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
return count;
|
||||||
|
}
|
||||||
|
|
||||||
|
static longlong Scan(InputStream *st, ulonglong start, unsigned signature) {
|
||||||
|
auto *self = static_cast<MkvStdIO*>(st);
|
||||||
|
try {
|
||||||
|
unsigned cmp = 0;
|
||||||
|
for (auto i : boost::irange(start, self->file.size())) {
|
||||||
|
int c = *self->file.read(i, 1);
|
||||||
|
cmp = ((cmp << 8) | c) & 0xffffffff;
|
||||||
|
if (cmp == signature)
|
||||||
|
return i - 4;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
catch (agi::Exception const& e) {
|
||||||
|
self->error = e.GetChainedMessage();
|
||||||
|
}
|
||||||
|
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
static longlong Size(InputStream *st) {
|
||||||
|
return static_cast<MkvStdIO*>(st)->file.size();
|
||||||
|
}
|
||||||
|
|
||||||
|
MkvStdIO(agi::fs::path const& filename) : file(filename) {
|
||||||
|
read = &MkvStdIO::Read;
|
||||||
|
scan = &MkvStdIO::Scan;
|
||||||
|
getcachesize = [](InputStream *) -> unsigned int { return 16 * 1024 * 1024; };
|
||||||
|
geterror = [](InputStream *st) -> const char * { return ((MkvStdIO *)st)->error.c_str(); };
|
||||||
|
memalloc = [](InputStream *, size_t size) { return malloc(size); };
|
||||||
|
memrealloc = [](InputStream *, void *mem, size_t size) { return realloc(mem, size); };
|
||||||
|
memfree = [](InputStream *, void *mem) { free(mem); };
|
||||||
|
progress = [](InputStream *, ulonglong, ulonglong) { return 1; };
|
||||||
|
getfilesize = &MkvStdIO::Size;
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
#define CACHESIZE 1024
|
|
||||||
|
|
||||||
#ifdef __VISUALC__
|
|
||||||
#define std_fseek _fseeki64
|
|
||||||
#define std_ftell _ftelli64
|
|
||||||
#else
|
|
||||||
#define std_fseek fseeko
|
|
||||||
#define std_ftell ftello
|
|
||||||
#endif
|
|
||||||
|
|
||||||
static void read_subtitles(agi::ProgressSink *ps, MatroskaFile *file, MkvStdIO *input, bool srt, double totalTime, AssParser *parser) {
|
static void read_subtitles(agi::ProgressSink *ps, MatroskaFile *file, MkvStdIO *input, bool srt, double totalTime, AssParser *parser) {
|
||||||
std::vector<std::pair<int, std::string>> subList;
|
std::vector<std::pair<int, std::string>> subList;
|
||||||
std::string readBuf;
|
|
||||||
|
|
||||||
// Load blocks
|
// Load blocks
|
||||||
ulonglong startTime, endTime, filePos;
|
ulonglong startTime, endTime, filePos;
|
||||||
|
@ -92,36 +124,42 @@ static void read_subtitles(agi::ProgressSink *ps, MatroskaFile *file, MkvStdIO *
|
||||||
if (ps->IsCancelled()) return;
|
if (ps->IsCancelled()) return;
|
||||||
if (frameSize == 0) continue;
|
if (frameSize == 0) continue;
|
||||||
|
|
||||||
readBuf.resize(frameSize);
|
const auto readBuf = input->file.read(filePos, frameSize);
|
||||||
std_fseek(input->fp, filePos, SEEK_SET);
|
const auto readBufEnd = readBuf + frameSize;
|
||||||
fread(&readBuf[0], 1, frameSize, input->fp);
|
|
||||||
|
|
||||||
// Get start and end times
|
// Get start and end times
|
||||||
longlong timecodeScaleLow = 1000000;
|
longlong timecodeScaleLow = 1000000;
|
||||||
AssTime subStart = startTime / timecodeScaleLow;
|
AssTime subStart = startTime / timecodeScaleLow;
|
||||||
AssTime subEnd = endTime / timecodeScaleLow;
|
AssTime subEnd = endTime / timecodeScaleLow;
|
||||||
|
|
||||||
|
using str_range = boost::iterator_range<const char *>;
|
||||||
|
|
||||||
// Process SSA/ASS
|
// Process SSA/ASS
|
||||||
if (!srt) {
|
if (!srt) {
|
||||||
std::vector<boost::iterator_range<std::string::iterator>> chunks;
|
auto first = std::find(readBuf, readBufEnd, ',');
|
||||||
boost::split(chunks, readBuf, boost::is_any_of(","));
|
if (first == readBufEnd) continue;
|
||||||
|
auto second = std::find(first + 1, readBufEnd, ',');
|
||||||
|
if (second == readBufEnd) continue;
|
||||||
|
|
||||||
subList.emplace_back(
|
subList.emplace_back(
|
||||||
boost::lexical_cast<int>(chunks[0]),
|
boost::lexical_cast<int>(str_range(readBuf, first)),
|
||||||
str(boost::format("Dialogue: %d,%s,%s,%s")
|
str(boost::format("Dialogue: %d,%s,%s,%s")
|
||||||
% boost::lexical_cast<int>(chunks[1])
|
% boost::lexical_cast<int>(str_range(first + 1, second))
|
||||||
% subStart.GetAssFormated()
|
% subStart.GetAssFormated()
|
||||||
% subEnd.GetAssFormated()
|
% subEnd.GetAssFormated()
|
||||||
% boost::make_iterator_range(begin(chunks[2]), readBuf.end())));
|
% str_range(second + 1, readBufEnd)));
|
||||||
}
|
}
|
||||||
// Process SRT
|
// Process SRT
|
||||||
else {
|
else {
|
||||||
readBuf = str(boost::format("Dialogue: 0,%s,%s,Default,,0,0,0,,%s") % subStart.GetAssFormated() % subEnd.GetAssFormated() % readBuf);
|
auto line = str(boost::format("Dialogue: 0,%s,%s,Default,,0,0,0,,%s")
|
||||||
boost::replace_all(readBuf, "\r\n", "\\N");
|
% subStart.GetAssFormated()
|
||||||
boost::replace_all(readBuf, "\r", "\\N");
|
% subEnd.GetAssFormated()
|
||||||
boost::replace_all(readBuf, "\n", "\\N");
|
% str_range(readBuf, readBufEnd));
|
||||||
|
boost::replace_all(line, "\r\n", "\\N");
|
||||||
|
boost::replace_all(line, "\r", "\\N");
|
||||||
|
boost::replace_all(line, "\n", "\\N");
|
||||||
|
|
||||||
subList.emplace_back(subList.size(), readBuf);
|
subList.emplace_back(subList.size(), std::move(line));
|
||||||
}
|
}
|
||||||
|
|
||||||
ps->SetProgress(startTime / timecodeScaleLow, totalTime);
|
ps->SetProgress(startTime / timecodeScaleLow, totalTime);
|
||||||
|
@ -238,70 +276,3 @@ bool MatroskaWrapper::HasSubtitles(agi::fs::path const& filename) {
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
int StdIoRead(InputStream *_st, ulonglong pos, void *buffer, int count) {
|
|
||||||
auto *st = static_cast<MkvStdIO*>(_st);
|
|
||||||
if (std_fseek(st->fp, pos, SEEK_SET)) {
|
|
||||||
st->error = errno;
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
auto rd = fread(buffer, 1, count, st->fp);
|
|
||||||
if (rd == 0) {
|
|
||||||
if (feof(st->fp))
|
|
||||||
return 0;
|
|
||||||
st->error = errno;
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
return rd;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// @brief scan for a signature sig(big-endian) starting at file position pos
|
|
||||||
/// @return position of the first byte of signature or -1 if error/not found
|
|
||||||
longlong StdIoScan(InputStream *st, ulonglong start, unsigned signature) {
|
|
||||||
FILE *fp = static_cast<MkvStdIO*>(st)->fp;
|
|
||||||
|
|
||||||
if (std_fseek(fp, start, SEEK_SET))
|
|
||||||
return -1;
|
|
||||||
|
|
||||||
int c;
|
|
||||||
unsigned cmp = 0;
|
|
||||||
while ((c = getc(fp)) != EOF) {
|
|
||||||
cmp = ((cmp << 8) | c) & 0xffffffff;
|
|
||||||
if (cmp == signature)
|
|
||||||
return std_ftell(fp) - 4;
|
|
||||||
}
|
|
||||||
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
longlong StdIoGetFileSize(InputStream *st) {
|
|
||||||
auto fp = static_cast<MkvStdIO*>(st)->fp;
|
|
||||||
auto cpos = std_ftell(fp);
|
|
||||||
std_fseek(fp, 0, SEEK_END);
|
|
||||||
auto epos = std_ftell(fp);
|
|
||||||
std_fseek(fp, cpos, SEEK_SET);
|
|
||||||
return epos;
|
|
||||||
}
|
|
||||||
|
|
||||||
MkvStdIO::MkvStdIO(agi::fs::path const& filename) {
|
|
||||||
read = StdIoRead;
|
|
||||||
scan = StdIoScan;
|
|
||||||
getcachesize = [](InputStream *) -> unsigned int { return CACHESIZE; };
|
|
||||||
geterror = [](InputStream *st) -> const char * { return strerror(((MkvStdIO *)st)->error); };
|
|
||||||
memalloc = [](InputStream *, size_t size) { return malloc(size); };
|
|
||||||
memrealloc = [](InputStream *, void *mem, size_t size) { return realloc(mem, size); };
|
|
||||||
memfree = [](InputStream *, void *mem) { free(mem); };
|
|
||||||
progress = [](InputStream *, ulonglong, ulonglong) { return 1; };
|
|
||||||
getfilesize = StdIoGetFileSize;
|
|
||||||
|
|
||||||
#ifdef __VISUALC__
|
|
||||||
fp = _wfopen(filename.c_str(), L"rb");
|
|
||||||
#else
|
|
||||||
fp = fopen(filename.c_str(), "rb");
|
|
||||||
#endif
|
|
||||||
if (!fp)
|
|
||||||
throw agi::fs::FileNotFound(filename);
|
|
||||||
|
|
||||||
setvbuf(fp, nullptr, _IOFBF, CACHESIZE);
|
|
||||||
}
|
|
||||||
|
|
Loading…
Reference in a new issue