// Copyright (c) 2007-2008 Fredrik Mellbin // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal // in the Software without restriction, including without limitation the rights // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell // copies of the Software, and to permit persons to whom the Software is // furnished to do so, subject to the following conditions: // // The above copyright notice and this permission notice shall be included in // all copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. #include #include #include #include #include #include #include "indexing.h" #include "wave64writer.h" class MatroskaAudioContext { public: Wave64Writer *W64W; AVCodecContext *CTX; CompressedStream *CS; int64_t CurrentSample; MatroskaAudioContext() { W64W = NULL; CTX = NULL; CS = NULL; CurrentSample = 0; } ~MatroskaAudioContext() { delete W64W; if (CTX) { avcodec_close(CTX); av_free(CTX); } if (CS) cs_Destroy(CS); } }; class FFAudioContext { public: Wave64Writer *W64W; AVCodecContext *CTX; int64_t CurrentSample; FFAudioContext() { W64W = NULL; CTX = NULL; CurrentSample = 0; } ~FFAudioContext() { delete W64W; if (CTX) avcodec_close(CTX); } }; class MatroskaIndexMemory { private: int16_t *DecodingBuffer; MatroskaAudioContext *AudioContexts; MatroskaFile *MF; MatroskaReaderContext *MC; public: MatroskaIndexMemory(int Tracks, int16_t *&DecodingBuffer, MatroskaAudioContext *&AudioContexts, MatroskaFile *MF, MatroskaReaderContext *MC) { DecodingBuffer = new int16_t[AVCODEC_MAX_AUDIO_FRAME_SIZE*10]; AudioContexts = new MatroskaAudioContext[Tracks]; this->DecodingBuffer = DecodingBuffer; this->AudioContexts = AudioContexts; this->MF = MF; this->MC = MC; } ~MatroskaIndexMemory() { delete [] DecodingBuffer; delete [] AudioContexts; mkv_Close(MF); fclose(MC->ST.fp); } }; class FFIndexMemory { private: int16_t *DecodingBuffer; FFAudioContext *AudioContexts; AVFormatContext *FormatContext; public: FFIndexMemory(int Tracks, int16_t *&DecodingBuffer, FFAudioContext *&AudioContexts, AVFormatContext *&FormatContext) { DecodingBuffer = new int16_t[AVCODEC_MAX_AUDIO_FRAME_SIZE*10]; AudioContexts = new FFAudioContext[Tracks]; this->DecodingBuffer = DecodingBuffer; this->AudioContexts = AudioContexts; this->FormatContext = FormatContext; } ~FFIndexMemory() { delete [] DecodingBuffer; delete [] AudioContexts; av_close_input_file(FormatContext); } }; static bool DTSComparison(FrameInfo FI1, FrameInfo FI2) { return FI1.DTS < FI2.DTS; } static void SortTrackIndices(FrameIndex *TrackIndices) { for (FrameIndex::iterator Cur=TrackIndices->begin(); Cur!=TrackIndices->end(); Cur++) std::sort(Cur->begin(), Cur->end(), DTSComparison); } int WriteIndex(const char *IndexFile, FrameIndex *TrackIndices, char *ErrorMsg, unsigned MsgSize) { std::ofstream Index(IndexFile, std::ios::out | std::ios::binary | std::ios::trunc); if (!Index.is_open()) { _snprintf(ErrorMsg, MsgSize, "Failed to open '%s' for writing", IndexFile); return 1; } // Write the index file header IndexHeader IH; IH.Id = INDEXID; IH.Version = INDEXVERSION; IH.Tracks = TrackIndices->size(); IH.Decoder = TrackIndices->Decoder; Index.write(reinterpret_cast(&IH), sizeof(IH)); for (unsigned int i = 0; i < IH.Tracks; i++) { int TT = (*TrackIndices)[i].TT; Index.write(reinterpret_cast(&TT), sizeof(TT)); int Num = (*TrackIndices)[i].TB.Num; Index.write(reinterpret_cast(&Num), sizeof(Num)); int Den = (*TrackIndices)[i].TB.Den; Index.write(reinterpret_cast(&Den), sizeof(Den)); size_t Frames = (*TrackIndices)[i].size(); Index.write(reinterpret_cast(&Frames), sizeof(Frames)); for (size_t j = 0; j < Frames; j++) Index.write(reinterpret_cast(&(TrackIndices->at(i)[j])), sizeof(FrameInfo)); } return 0; } static FrameIndex *MakeMatroskaIndex(const char *SourceFile, int IndexMask, int DumpMask, const char *AudioFile, bool IgnoreDecodeErrors, IndexCallback IP, void *Private, char *ErrorMsg, unsigned MsgSize) { MatroskaFile *MF; char ErrorMessage[256]; MatroskaReaderContext MC; MC.Buffer = NULL; MC.BufferSize = 0; InitStdIoStream(&MC.ST); MC.ST.fp = fopen(SourceFile, "rb"); if (MC.ST.fp == NULL) { _snprintf(ErrorMsg, MsgSize, "Can't open '%s': %s", SourceFile, strerror(errno)); return NULL; } setvbuf(MC.ST.fp, NULL, _IOFBF, CACHESIZE); MF = mkv_OpenEx(&MC.ST.base, 0, 0, ErrorMessage, sizeof(ErrorMessage)); if (MF == NULL) { fclose(MC.ST.fp); _snprintf(ErrorMsg, MsgSize, "Can't parse Matroska file: %s", ErrorMessage); return NULL; } // Audio stuff int16_t *db; MatroskaAudioContext *AudioContexts; MatroskaIndexMemory IM = MatroskaIndexMemory(mkv_GetNumTracks(MF), db, AudioContexts, MF, &MC); for (unsigned int i = 0; i < mkv_GetNumTracks(MF); i++) { if (IndexMask & (1 << i) && mkv_GetTrackInfo(MF, i)->Type == TT_AUDIO) { AVCodecContext *AudioCodecContext = avcodec_alloc_context(); AudioCodecContext->extradata = (uint8_t *)mkv_GetTrackInfo(MF, i)->CodecPrivate; AudioCodecContext->extradata_size = mkv_GetTrackInfo(MF, i)->CodecPrivateSize; AudioContexts[i].CTX = AudioCodecContext; if (mkv_GetTrackInfo(MF, i)->CompEnabled) { AudioContexts[i].CS = cs_Create(MF, i, ErrorMessage, sizeof(ErrorMessage)); if (AudioContexts[i].CS == NULL) { av_free(AudioCodecContext); AudioContexts[i].CTX = NULL; _snprintf(ErrorMsg, MsgSize, "Can't create decompressor: %s", ErrorMessage); return NULL; } } AVCodec *AudioCodec = avcodec_find_decoder(MatroskaToFFCodecID(mkv_GetTrackInfo(MF, i))); if (AudioCodec == NULL) { av_free(AudioCodecContext); AudioContexts[i].CTX = NULL; _snprintf(ErrorMsg, MsgSize, "Audio codec not found"); return NULL; } if (avcodec_open(AudioCodecContext, AudioCodec) < 0) { av_free(AudioCodecContext); AudioContexts[i].CTX = NULL; _snprintf(ErrorMsg, MsgSize, "Could not open audio codec"); return NULL; } } else { IndexMask &= ~(1 << i); } } // int64_t CurrentPos = _ftelli64(MC.ST.fp); _fseeki64(MC.ST.fp, 0, SEEK_END); int64_t SourceSize = _ftelli64(MC.ST.fp); _fseeki64(MC.ST.fp, CurrentPos, SEEK_SET); FrameIndex *TrackIndices = new FrameIndex(); TrackIndices->Decoder = 1; for (unsigned int i = 0; i < mkv_GetNumTracks(MF); i++) TrackIndices->push_back(FrameInfoVector(mkv_TruncFloat(mkv_GetTrackInfo(MF, i)->TimecodeScale), 1000000, mkv_GetTrackInfo(MF, i)->Type - 1)); ulonglong StartTime, EndTime, FilePos; unsigned int Track, FrameFlags, FrameSize; while (mkv_ReadFrame(MF, 0, &Track, &StartTime, &EndTime, &FilePos, &FrameSize, &FrameFlags) == 0) { // Update progress if (IP) { if ((*IP)(0, _ftelli64(MC.ST.fp), SourceSize, Private)) { _snprintf(ErrorMsg, MsgSize, "Cancelled by user"); delete TrackIndices; return NULL; } } // Only create index entries for video for now to save space if (mkv_GetTrackInfo(MF, Track)->Type == TT_VIDEO) { (*TrackIndices)[Track].push_back(FrameInfo(StartTime, (FrameFlags & FRAME_KF) != 0)); } else if (mkv_GetTrackInfo(MF, Track)->Type == TT_AUDIO && (IndexMask & (1 << Track))) { (*TrackIndices)[Track].push_back(FrameInfo(AudioContexts[Track].CurrentSample, FilePos, FrameSize, (FrameFlags & FRAME_KF) != 0)); ReadFrame(FilePos, FrameSize, AudioContexts[Track].CS, MC, ErrorMsg, MsgSize); int Size = FrameSize; uint8_t *Data = MC.Buffer; AVCodecContext *AudioCodecContext = AudioContexts[Track].CTX; while (Size > 0) { int dbsize = AVCODEC_MAX_AUDIO_FRAME_SIZE*10; int Ret = avcodec_decode_audio2(AudioCodecContext, db, &dbsize, Data, Size); if (Ret < 0) { if (IgnoreDecodeErrors) { (*TrackIndices)[Track].clear(); IndexMask &= ~(1 << Track); break; } else { _snprintf(ErrorMsg, MsgSize, "Audio decoding error"); delete TrackIndices; return NULL; } } if (Ret > 0) { Size -= Ret; Data += Ret; } if (dbsize > 0) AudioContexts[Track].CurrentSample += (dbsize * 8) / (av_get_bits_per_sample_format(AudioCodecContext->sample_fmt) * AudioCodecContext->channels); if (dbsize > 0 && (DumpMask & (1 << Track))) { // Delay writer creation until after an audio frame has been decoded. This ensures that all parameters are known when writing the headers. if (!AudioContexts[Track].W64W) { char ABuf[50]; std::string WN(AudioFile); int Offset = StartTime * mkv_TruncFloat(mkv_GetTrackInfo(MF, Track)->TimecodeScale) / (double)1000000; _snprintf(ABuf, sizeof(ABuf), ".%02d.delay.%d.w64", Track, Offset); WN += ABuf; AudioContexts[Track].W64W = new Wave64Writer(WN.c_str(), av_get_bits_per_sample_format(AudioCodecContext->sample_fmt), AudioCodecContext->channels, AudioCodecContext->sample_rate, AudioFMTIsFloat(AudioCodecContext->sample_fmt)); } AudioContexts[Track].W64W->WriteData(db, dbsize); } } } } SortTrackIndices(TrackIndices); return TrackIndices; } FrameIndex *MakeIndex(const char *SourceFile, int IndexMask, int DumpMask, const char *AudioFile, bool IgnoreDecodeErrors, IndexCallback IP, void *Private, char *ErrorMsg, unsigned MsgSize) { AVFormatContext *FormatContext = NULL; IndexMask |= DumpMask; if (av_open_input_file(&FormatContext, SourceFile, NULL, 0, NULL) != 0) { _snprintf(ErrorMsg, MsgSize, "Can't open '%s'", SourceFile); return NULL; } // Do matroska indexing instead? if (!strcmp(FormatContext->iformat->name, "matroska")) { av_close_input_file(FormatContext); return MakeMatroskaIndex(SourceFile, IndexMask, DumpMask, AudioFile, IgnoreDecodeErrors, IP, Private, ErrorMsg, MsgSize); } if (av_find_stream_info(FormatContext) < 0) { av_close_input_file(FormatContext); _snprintf(ErrorMsg, MsgSize, "Couldn't find stream information"); return NULL; } // Audio stuff int16_t *db; FFAudioContext *AudioContexts; FFIndexMemory IM = FFIndexMemory(FormatContext->nb_streams, db, AudioContexts, FormatContext); for (unsigned int i = 0; i < FormatContext->nb_streams; i++) { if (IndexMask & (1 << i) && FormatContext->streams[i]->codec->codec_type == CODEC_TYPE_AUDIO) { AVCodecContext *AudioCodecContext = FormatContext->streams[i]->codec; AVCodec *AudioCodec = avcodec_find_decoder(AudioCodecContext->codec_id); if (AudioCodec == NULL) { _snprintf(ErrorMsg, MsgSize, "Audio codec not found"); return NULL; } if (avcodec_open(AudioCodecContext, AudioCodec) < 0) { _snprintf(ErrorMsg, MsgSize, "Could not open audio codec"); return NULL; } AudioContexts[i].CTX = AudioCodecContext; } else { IndexMask &= ~(1 << i); } } // FrameIndex *TrackIndices = new FrameIndex(); TrackIndices->Decoder = 0; for (unsigned int i = 0; i < FormatContext->nb_streams; i++) TrackIndices->push_back(FrameInfoVector(FormatContext->streams[i]->time_base.num * 1000, FormatContext->streams[i]->time_base.den, FormatContext->streams[i]->codec->codec_type)); AVPacket Packet; while (av_read_frame(FormatContext, &Packet) >= 0) { // Update progress if (IP) { if ((*IP)(0, FormatContext->pb->pos, FormatContext->file_size, Private)) { _snprintf(ErrorMsg, MsgSize, "Cancelled by user"); delete TrackIndices; return NULL; } } // Only create index entries for video for now to save space if (FormatContext->streams[Packet.stream_index]->codec->codec_type == CODEC_TYPE_VIDEO) { (*TrackIndices)[Packet.stream_index].push_back(FrameInfo(Packet.dts, (Packet.flags & PKT_FLAG_KEY) ? 1 : 0)); } else if (FormatContext->streams[Packet.stream_index]->codec->codec_type == CODEC_TYPE_AUDIO && (IndexMask & (1 << Packet.stream_index))) { (*TrackIndices)[Packet.stream_index].push_back(FrameInfo(Packet.dts, AudioContexts[Packet.stream_index].CurrentSample, (Packet.flags & PKT_FLAG_KEY) ? 1 : 0)); AVCodecContext *AudioCodecContext = FormatContext->streams[Packet.stream_index]->codec; int Size = Packet.size; uint8_t *Data = Packet.data; while (Size > 0) { int dbsize = AVCODEC_MAX_AUDIO_FRAME_SIZE*10; int Ret = avcodec_decode_audio2(AudioCodecContext, db, &dbsize, Data, Size); if (Ret < 0) { if (IgnoreDecodeErrors) { (*TrackIndices)[Packet.stream_index].clear(); IndexMask &= ~(1 << Packet.stream_index); break; } else { _snprintf(ErrorMsg, MsgSize, "Audio decoding error"); delete TrackIndices; return NULL; } } if (Ret > 0) { Size -= Ret; Data += Ret; } if (dbsize > 0) AudioContexts[Packet.stream_index].CurrentSample += (dbsize * 8) / (av_get_bits_per_sample_format(AudioCodecContext->sample_fmt) * AudioCodecContext->channels); if (dbsize > 0 && (DumpMask & (1 << Packet.stream_index))) { // Delay writer creation until after an audio frame has been decoded. This ensures that all parameters are known when writing the headers. if (!AudioContexts[Packet.stream_index].W64W) { char ABuf[50]; std::string WN(AudioFile); int Offset = (Packet.dts * FormatContext->streams[Packet.stream_index]->time_base.num) / (double)(FormatContext->streams[Packet.stream_index]->time_base.den * 1000); _snprintf(ABuf, sizeof(ABuf), ".%02d.delay.%d.w64", Packet.stream_index, Offset); WN += ABuf; AudioContexts[Packet.stream_index].W64W = new Wave64Writer(WN.c_str(), av_get_bits_per_sample_format(AudioCodecContext->sample_fmt), AudioCodecContext->channels, AudioCodecContext->sample_rate, AudioFMTIsFloat(AudioCodecContext->sample_fmt)); } AudioContexts[Packet.stream_index].W64W->WriteData(db, dbsize); } } } av_free_packet(&Packet); } SortTrackIndices(TrackIndices); return TrackIndices; } FrameIndex *ReadIndex(const char *IndexFile, char *ErrorMsg, unsigned MsgSize) { std::ifstream Index(IndexFile, std::ios::in | std::ios::binary); if (!Index.is_open()) { _snprintf(ErrorMsg, MsgSize, "Failed to open '%s' for reading", IndexFile); return NULL; } // Read the index file header IndexHeader IH; Index.read(reinterpret_cast(&IH), sizeof(IH)); if (IH.Id != INDEXID) { _snprintf(ErrorMsg, MsgSize, "'%s' is not a valid index file", IndexFile); return NULL; } if (IH.Version != INDEXVERSION) { _snprintf(ErrorMsg, MsgSize, "'%s' is not the expected index version", IndexFile); return NULL; } FrameIndex *TrackIndices = new FrameIndex(); try { TrackIndices->Decoder = IH.Decoder; for (unsigned int i = 0; i < IH.Tracks; i++) { // Read how many records belong to the current stream int TT; Index.read(reinterpret_cast(&TT), sizeof(TT)); int Num; Index.read(reinterpret_cast(&Num), sizeof(Num)); int Den; Index.read(reinterpret_cast(&Den), sizeof(Den)); size_t Frames; Index.read(reinterpret_cast(&Frames), sizeof(Frames)); TrackIndices->push_back(FrameInfoVector(Num, Den, TT)); FrameInfo FI(0, false); for (size_t j = 0; j < Frames; j++) { Index.read(reinterpret_cast(&FI), sizeof(FrameInfo)); TrackIndices->at(i).push_back(FI); } } } catch (...) { delete TrackIndices; _snprintf(ErrorMsg, MsgSize, "Unknown error while reading index information in '%s'", IndexFile); return NULL; } return TrackIndices; } int FrameInfoVector::WriteTimecodes(const char *TimecodeFile, char *ErrorMsg, unsigned MsgSize) { std::ofstream Timecodes(TimecodeFile, std::ios::out | std::ios::trunc); if (!Timecodes.is_open()) { _snprintf(ErrorMsg, MsgSize, "Failed to open '%s' for writing", TimecodeFile); return 1; } Timecodes << "# timecode format v2\n"; for (iterator Cur=begin(); Cur!=end(); Cur++) Timecodes << (int64_t)((Cur->DTS * TB.Num) / (double)TB.Den) << "\n"; return 0; } int FrameInfoVector::FrameFromDTS(int64_t DTS) { for (int i = 0; i < static_cast(size()); i++) if (at(i).DTS == DTS) return i; return -1; } int FrameInfoVector::ClosestFrameFromDTS(int64_t DTS) { int Frame = 0; int64_t BestDiff = 0xFFFFFFFFFFFFFFLL; // big number for (int i = 0; i < static_cast(size()); i++) { int64_t CurrentDiff = FFABS(at(i).DTS - DTS); if (CurrentDiff < BestDiff) { BestDiff = CurrentDiff; Frame = i; } } return Frame; } int FrameInfoVector::FindClosestKeyFrame(int Frame) { Frame = FFMIN(FFMAX(Frame, 0), size() - 1); for (int i = Frame; i > 0; i--) if (at(i).KeyFrame) return i; return 0; } FrameInfoVector::FrameInfoVector() { this->TT = 0; this->TB.Num = 0; this->TB.Den = 0; } FrameInfoVector::FrameInfoVector(int Num, int Den, int TT) { this->TT = TT; this->TB.Num = Num; this->TB.Den = Den; }