Fix an old bug that would drop the first or second video frame
Greatly improved audio decoding by adding a small internal cache

Originally committed to SVN as r2894.
This commit is contained in:
Fredrik Mellbin 2009-05-03 19:25:54 +00:00
parent 869e5c7594
commit 0a221706d5
8 changed files with 216 additions and 78 deletions

View file

@ -25,7 +25,79 @@
#define _snprintf snprintf
#endif
TAudioBlock::TAudioBlock(int64_t Start, int64_t Samples, uint8_t *SrcData, int64_t SrcBytes) {
this->Start = Start;
this->Samples = Samples;
Data = new uint8_t[SrcBytes];
memcpy(Data, SrcData, SrcBytes);
}
TAudioBlock::~TAudioBlock() {
delete[] Data;
}
TAudioCache::TAudioCache() {
MaxCacheBlocks = 0;
BytesPerSample = 0;
}
TAudioCache::~TAudioCache() {
for (TAudioCache::iterator it=begin(); it != end(); it++)
delete *it;
}
void TAudioCache::Initialize(int BytesPerSample, int MaxCacheBlocks) {
this->BytesPerSample = BytesPerSample;
this->MaxCacheBlocks = MaxCacheBlocks;
}
void TAudioCache::CacheBlock(int64_t Start, int64_t Samples, uint8_t *SrcData) {
if (BytesPerSample > 0) {
for (TAudioCache::iterator it=begin(); it != end(); it++) {
if ((*it)->Start == Start) {
delete *it;
erase(it);
break;
}
}
push_front(new TAudioBlock(Start, Samples, SrcData, Samples * BytesPerSample));
if (size() >= MaxCacheBlocks) {
delete back();
pop_back();
}
}
}
bool TAudioCache::AudioBlockComp(TAudioBlock *A, TAudioBlock *B) {
return A->Start < B->Start;
}
int64_t TAudioCache::FillRequest(int64_t Start, int64_t Samples, uint8_t *Dst) {
// May be better to move used blocks to the front
std::list<TAudioBlock *> UsedBlocks;
for (TAudioCache::iterator it=begin(); it != end(); it++) {
int64_t SrcOffset = FFMAX(0, Start - (*it)->Start);
int64_t DstOffset = FFMAX(0, (*it)->Start - Start);
int64_t CopySamples = FFMIN((*it)->Samples - SrcOffset, Samples - DstOffset);
if (CopySamples > 0) {
memcpy(Dst + DstOffset * BytesPerSample, (*it)->Data + SrcOffset * BytesPerSample, CopySamples * BytesPerSample);
UsedBlocks.push_back(*it);
}
}
UsedBlocks.sort(AudioBlockComp);
int64_t Ret = Start;
for (std::list<TAudioBlock *>::iterator it = UsedBlocks.begin(); it != UsedBlocks.end(); it++) {
if (it == UsedBlocks.begin() || Ret == (*it)->Start)
Ret = (*it)->Start + (*it)->Samples;
else
break;
}
return FFMIN(Ret, Start + Samples);
}
AudioBase::AudioBase() {
CurrentSample = 0;
DecodingBuffer = new uint8_t[AVCODEC_MAX_AUDIO_FRAME_SIZE * 10];
};
@ -88,11 +160,14 @@ FFAudioSource::FFAudioSource(const char *SourceFile, int Track, FrameIndex *Trac
}
// Always try to decode a frame to make sure all required parameters are known
uint8_t DummyBuf[512];
if (GetAudio(DummyBuf, 0, 1, ErrorMsg, MsgSize)) {
int64_t Dummy;
if (DecodeNextAudioBlock(DecodingBuffer, &Dummy, ErrorMsg, MsgSize) < 0) {
Free(true);
throw ErrorMsg;
}
av_seek_frame(FormatContext, AudioTrack, Frames[0].DTS, AVSEEK_FLAG_BACKWARD);
avcodec_flush_buffers(CodecContext);
AP.BitsPerSample = av_get_bits_per_sample_format(CodecContext->sample_fmt);
AP.Channels = CodecContext->channels;;
@ -105,6 +180,8 @@ FFAudioSource::FFAudioSource(const char *SourceFile, int Track, FrameIndex *Trac
_snprintf(ErrorMsg, MsgSize, "Codec returned zero size audio");
throw ErrorMsg;
}
AudioCache.Initialize((AP.Channels *AP.BitsPerSample) / 8, 50);
}
int FFAudioSource::DecodeNextAudioBlock(uint8_t *Buf, int64_t *Count, char *ErrorMsg, unsigned MsgSize) {
@ -112,8 +189,8 @@ int FFAudioSource::DecodeNextAudioBlock(uint8_t *Buf, int64_t *Count, char *Erro
int Ret = -1;
*Count = 0;
AVPacket Packet, TempPacket;
init_null_packet(&Packet);
init_null_packet(&TempPacket);
InitNullPacket(&Packet);
InitNullPacket(&TempPacket);
while (av_read_frame(FormatContext, &Packet) >= 0) {
if (Packet.stream_index == AudioTrack) {
@ -151,59 +228,76 @@ Done:
int FFAudioSource::GetAudio(void *Buf, int64_t Start, int64_t Count, char *ErrorMsg, unsigned MsgSize) {
const int64_t SizeConst = (av_get_bits_per_sample_format(CodecContext->sample_fmt) * CodecContext->channels) / 8;
size_t CurrentAudioBlock = FFMAX((int64_t)FindClosestAudioKeyFrame(Start) - 50, (int64_t)0);
memset(Buf, 0, SizeConst * Count);
AVPacket Packet;
avcodec_flush_buffers(CodecContext);
av_seek_frame(FormatContext, AudioTrack, Frames[CurrentAudioBlock].DTS, AVSEEK_FLAG_BACKWARD);
int PreDecBlocks = 0;
uint8_t *DstBuf = static_cast<uint8_t *>(Buf);
// Establish where we actually are
// Trigger on packet dts difference since groups can otherwise be indistinguishable
int64_t LastDTS = - 1;
while (av_read_frame(FormatContext, &Packet) >= 0) {
if (Packet.stream_index == AudioTrack) {
if (LastDTS < 0) {
LastDTS = Packet.dts;
} else if (LastDTS != Packet.dts) {
for (size_t i = 0; i < Frames.size(); i++)
if (Frames[i].DTS == Packet.dts) {
// The current match was consumed
CurrentAudioBlock = i + 1;
break;
}
// Fill with everything in the cache
int64_t CacheEnd = AudioCache.FillRequest(Start, Count, DstBuf);
// Was everything in the cache?
if (CacheEnd == Start + Count)
return 0;
av_free_packet(&Packet);
break;
size_t CurrentAudioBlock;
// Is seeking required to decode the requested samples?
// if (!(CurrentSample >= Start && CurrentSample <= CacheEnd)) {
if (CurrentSample != CacheEnd) {
PreDecBlocks = 15;
CurrentAudioBlock = FFMAX((int64_t)FindClosestAudioKeyFrame(CacheEnd) - PreDecBlocks - 20, (int64_t)0);
av_seek_frame(FormatContext, AudioTrack, Frames[CurrentAudioBlock].DTS, AVSEEK_FLAG_BACKWARD);
avcodec_flush_buffers(CodecContext);
AVPacket Packet;
InitNullPacket(&Packet);
// Establish where we actually are
// Trigger on packet dts difference since groups can otherwise be indistinguishable
int64_t LastDTS = - 1;
while (av_read_frame(FormatContext, &Packet) >= 0) {
if (Packet.stream_index == AudioTrack) {
if (LastDTS < 0) {
LastDTS = Packet.dts;
} else if (LastDTS != Packet.dts) {
for (size_t i = 0; i < Frames.size(); i++)
if (Frames[i].DTS == Packet.dts) {
// The current match was consumed
CurrentAudioBlock = i + 1;
break;
}
av_free_packet(&Packet);
break;
}
}
}
av_free_packet(&Packet);
av_free_packet(&Packet);
}
} else {
CurrentAudioBlock = FindClosestAudioKeyFrame(CurrentSample);
}
uint8_t *DstBuf = (uint8_t *)Buf;
int64_t RemainingSamples = Count;
int64_t DecodeCount;
do {
int64_t DecodeStart = Frames[CurrentAudioBlock].SampleStart;
int Ret = DecodeNextAudioBlock(DecodingBuffer, &DecodeCount, ErrorMsg, MsgSize);
if (Ret < 0) {
// FIXME
//Env->ThrowError("Bleh, bad audio decoding");
}
// Cache the block if enough blocks before it have been decoded to avoid garbage
if (PreDecBlocks == 0) {
AudioCache.CacheBlock(Frames[CurrentAudioBlock].SampleStart, DecodeCount, DecodingBuffer);
CacheEnd = AudioCache.FillRequest(CacheEnd, Start + Count - CacheEnd, DstBuf + (CacheEnd - Start) * SizeConst);
} else {
PreDecBlocks--;
}
CurrentAudioBlock++;
int64_t OffsetBytes = SizeConst * FFMAX(0, Start - DecodeStart);
int64_t CopyBytes = FFMAX(0, SizeConst * FFMIN(RemainingSamples, DecodeCount - FFMAX(0, Start - DecodeStart)));
memcpy(DstBuf, DecodingBuffer + OffsetBytes, CopyBytes);
DstBuf += CopyBytes;
if (SizeConst)
RemainingSamples -= CopyBytes / SizeConst;
} while (RemainingSamples > 0 && CurrentAudioBlock < Frames.size());
if (CurrentAudioBlock < Frames.size())
CurrentSample = Frames[CurrentAudioBlock].SampleStart;
} while (Start + Count - CacheEnd > 0 && CurrentAudioBlock < Frames.size());
return 0;
}
@ -282,11 +376,12 @@ MatroskaAudioSource::MatroskaAudioSource(const char *SourceFile, int Track, Fram
}
// Always try to decode a frame to make sure all required parameters are known
uint8_t DummyBuf[512];
if (GetAudio(DummyBuf, 0, 1, ErrorMsg, MsgSize)) {
int64_t Dummy;
if (DecodeNextAudioBlock(DecodingBuffer, &Dummy, Frames[0].FilePos, Frames[0].FrameSize, ErrorMsg, MsgSize) < 0) {
Free(true);
throw ErrorMsg;
}
avcodec_flush_buffers(CodecContext);
AP.BitsPerSample = av_get_bits_per_sample_format(CodecContext->sample_fmt);
AP.Channels = CodecContext->channels;;
@ -299,6 +394,8 @@ MatroskaAudioSource::MatroskaAudioSource(const char *SourceFile, int Track, Fram
_snprintf(ErrorMsg, MsgSize, "Codec returned zero size audio");
throw ErrorMsg;
}
AudioCache.Initialize((AP.Channels *AP.BitsPerSample) / 8, 50);
}
MatroskaAudioSource::~MatroskaAudioSource() {
@ -307,34 +404,49 @@ MatroskaAudioSource::~MatroskaAudioSource() {
int MatroskaAudioSource::GetAudio(void *Buf, int64_t Start, int64_t Count, char *ErrorMsg, unsigned MsgSize) {
const int64_t SizeConst = (av_get_bits_per_sample_format(CodecContext->sample_fmt) * CodecContext->channels) / 8;
size_t CurrentAudioBlock = FFMAX((int64_t)FindClosestAudioKeyFrame(Start) - 10, (int64_t)0);
avcodec_flush_buffers(CodecContext);
memset(Buf, 0, SizeConst * Count);
uint8_t *DstBuf = (uint8_t *)Buf;
int64_t RemainingSamples = Count;
int PreDecBlocks = 0;
uint8_t *DstBuf = static_cast<uint8_t *>(Buf);
// Fill with everything in the cache
int64_t CacheEnd = AudioCache.FillRequest(Start, Count, DstBuf);
// Was everything in the cache?
if (CacheEnd == Start + Count)
return 0;
size_t CurrentAudioBlock;
// Is seeking required to decode the requested samples?
// if (!(CurrentSample >= Start && CurrentSample <= CacheEnd)) {
if (CurrentSample != CacheEnd) {
PreDecBlocks = 15;
CurrentAudioBlock = FFMAX((int64_t)FindClosestAudioKeyFrame(CacheEnd) - PreDecBlocks, (int64_t)0);
avcodec_flush_buffers(CodecContext);
} else {
CurrentAudioBlock = FindClosestAudioKeyFrame(CurrentSample);
}
int64_t DecodeCount;
do {
int64_t DecodeStart = Frames[CurrentAudioBlock].SampleStart;
int Ret = DecodeNextAudioBlock(DecodingBuffer, &DecodeCount, Frames[CurrentAudioBlock].FilePos, Frames[CurrentAudioBlock].FrameSize, ErrorMsg, MsgSize);
if (Ret < 0) {
// FIXME
//Env->ThrowError("Bleh, bad audio decoding");
}
// Cache the block if enough blocks before it have been decoded to avoid garbage
if (PreDecBlocks == 0) {
AudioCache.CacheBlock(Frames[CurrentAudioBlock].SampleStart, DecodeCount, DecodingBuffer);
CacheEnd = AudioCache.FillRequest(CacheEnd, Start + Count - CacheEnd, DstBuf + (CacheEnd - Start) * SizeConst);
} else {
PreDecBlocks--;
}
CurrentAudioBlock++;
int64_t OffsetBytes = SizeConst * FFMAX(0, Start - DecodeStart);
int64_t CopyBytes = FFMAX(0, SizeConst * FFMIN(RemainingSamples, DecodeCount - FFMAX(0, Start - DecodeStart)));
memcpy(DstBuf, DecodingBuffer + OffsetBytes, CopyBytes);
DstBuf += CopyBytes;
if (SizeConst)
RemainingSamples -= CopyBytes / SizeConst;
} while (RemainingSamples > 0 && CurrentAudioBlock < Frames.size());
if (CurrentAudioBlock < Frames.size())
CurrentSample = Frames[CurrentAudioBlock].SampleStart;
} while (Start + Count - CacheEnd > 0 && CurrentAudioBlock < Frames.size());
return 0;
}
@ -344,7 +456,7 @@ int MatroskaAudioSource::DecodeNextAudioBlock(uint8_t *Buf, int64_t *Count, uint
int Ret = -1;
*Count = 0;
AVPacket TempPacket;
init_null_packet(&TempPacket);
InitNullPacket(&TempPacket);
// FIXME check return
ReadFrame(FilePos, FrameSize, CS, MC, ErrorMsg, MsgSize);

View file

@ -27,6 +27,8 @@ extern "C" {
}
#include <vector>
#include <list>
#include <memory>
#include "indexing.h"
#include "utils.h"
#include "ffms.h"
@ -42,8 +44,33 @@ extern "C" {
# include "guids.h"
#endif
class TAudioBlock {
public:
int64_t Start;
int64_t Samples;
uint8_t *Data;
TAudioBlock(int64_t Start, int64_t Samples, uint8_t *SrcData, int64_t SrcBytes);
~TAudioBlock();
};
class TAudioCache : protected std::list<TAudioBlock *> {
private:
int MaxCacheBlocks;
int BytesPerSample;
static bool AudioBlockComp(TAudioBlock *A, TAudioBlock *B);
public:
TAudioCache();
~TAudioCache();
void Initialize(int BytesPerSample, int MaxCacheBlocks);
void CacheBlock(int64_t Start, int64_t Samples, uint8_t *SrcData);
int64_t FillRequest(int64_t Start, int64_t Samples, uint8_t *Dst);
};
class AudioBase {
protected:
TAudioCache AudioCache;
int64_t CurrentSample;
uint8_t *DecodingBuffer;
FrameInfoVector Frames;
AVCodecContext *CodecContext;

View file

@ -234,6 +234,8 @@ Note that --enable-w32threads is required for multithreaded decoding to work.
<h2>Changes</h2>
<ul>
<li>2.00 beta 8<ul>
<li>Improved the audio decoding quality a lot by adding a simple cache, no more seeking is done when playing a file linearly and pops and other artifacts should be much more uncommon</li>
<li>Fixed a bug that would most of the time drop frame 0 and sometimes frame 1</li>
<li>Updated Haali's matroska parser code to the latest version</li>
<li>Updated FFmpeg to rev X</li>
</ul></li>

View file

@ -87,8 +87,8 @@ VideoBase::VideoBase() {
PPContext = NULL;
PPMode = NULL;
SWS = NULL;
LastFrameNum = -1;
CurrentFrame = 0;
LastFrameNum = 0;
CurrentFrame = 1;
CodecContext = NULL;
DecodeFrame = avcodec_alloc_frame();
PPFrame = DecodeFrame;
@ -269,7 +269,6 @@ FFVideoSource::FFVideoSource(const char *SourceFile, int Track, FrameIndex *Trac
// Cannot "output" to PPFrame without doing all other initialization
// This is the additional mess required for seekmode=-1 to work in a reasonable way
OutputFrame(DecodeFrame);
LastFrameNum = 0;
// Set AR variables
VP.SARNum = CodecContext->sample_aspect_ratio.num;
@ -282,7 +281,7 @@ FFVideoSource::~FFVideoSource() {
int FFVideoSource::DecodeNextFrame(AVFrame *AFrame, int64_t *AStartTime, char *ErrorMsg, unsigned MsgSize) {
AVPacket Packet;
init_null_packet(&Packet);
InitNullPacket(&Packet);
int FrameFinished = 0;
*AStartTime = -1;
@ -303,7 +302,7 @@ int FFVideoSource::DecodeNextFrame(AVFrame *AFrame, int64_t *AStartTime, char *E
// Flush the last frames
if (CodecContext->has_b_frames) {
AVPacket NullPacket;
init_null_packet(&NullPacket);
InitNullPacket(&NullPacket);
avcodec_decode_video2(CodecContext, AFrame, &FrameFinished, &NullPacket);
}
@ -495,7 +494,6 @@ MatroskaVideoSource::MatroskaVideoSource(const char *SourceFile, int Track,
// Output the already decoded frame so it isn't wasted
OutputFrame(DecodeFrame);
LastFrameNum = 0;
// Set AR variables
VP.SARNum = TI->AV.Video.DisplayWidth * TI->AV.Video.PixelHeight;
@ -516,7 +514,7 @@ int MatroskaVideoSource::DecodeNextFrame(AVFrame *AFrame, int64_t *AFirstStartTi
int FrameFinished = 0;
*AFirstStartTime = -1;
AVPacket Packet;
init_null_packet(&Packet);
InitNullPacket(&Packet);
ulonglong StartTime, EndTime, FilePos;
unsigned int Track, FrameFlags, FrameSize;
@ -543,7 +541,7 @@ int MatroskaVideoSource::DecodeNextFrame(AVFrame *AFrame, int64_t *AFirstStartTi
// Flush the last frames
if (CodecContext->has_b_frames) {
AVPacket NullPacket;
init_null_packet(&NullPacket);
InitNullPacket(&NullPacket);
avcodec_decode_video2(CodecContext, AFrame, &FrameFinished, &NullPacket);
}
@ -735,7 +733,6 @@ HaaliVideoSource::HaaliVideoSource(const char *SourceFile, int Track,
// Output the already decoded frame so it isn't wasted
OutputFrame(DecodeFrame);
LastFrameNum = 0;
// Set AR variables
CComVariant pV;
@ -756,7 +753,7 @@ int HaaliVideoSource::DecodeNextFrame(AVFrame *AFrame, int64_t *AFirstStartTime,
int FrameFinished = 0;
*AFirstStartTime = -1;
AVPacket Packet;
init_null_packet(&Packet);
InitNullPacket(&Packet);
for (;;) {
CComPtr<IMMFrame> pMMF;
@ -789,7 +786,7 @@ int HaaliVideoSource::DecodeNextFrame(AVFrame *AFrame, int64_t *AFirstStartTime,
// Flush the last frames
if (CodecContext->has_b_frames) {
AVPacket NullPacket;
init_null_packet(&NullPacket);
InitNullPacket(&NullPacket);
avcodec_decode_video2(CodecContext, AFrame, &FrameFinished, &NullPacket);
}

View file

@ -332,7 +332,7 @@ static FrameIndex *MakeHaaliIndex(const char *SourceFile, int IndexMask, int Dum
//
AVPacket TempPacket;
init_null_packet(&TempPacket);
InitNullPacket(&TempPacket);
for (;;) {
if (IP) {
@ -489,7 +489,7 @@ static FrameIndex *MakeMatroskaIndex(const char *SourceFile, int IndexMask, int
ulonglong StartTime, EndTime, FilePos;
unsigned int Track, FrameFlags, FrameSize;
AVPacket TempPacket;
init_null_packet(&TempPacket);
InitNullPacket(&TempPacket);
while (mkv_ReadFrame(MF, 0, &Track, &StartTime, &EndTime, &FilePos, &FrameSize, &FrameFlags) == 0) {
// Update progress
@ -629,8 +629,8 @@ FrameIndex *MakeIndex(const char *SourceFile, int IndexMask, int DumpMask, const
FormatContext->streams[i]->codec->codec_type));
AVPacket Packet, TempPacket;
init_null_packet(&Packet);
init_null_packet(&TempPacket);
InitNullPacket(&Packet);
InitNullPacket(&TempPacket);
while (av_read_frame(FormatContext, &Packet) >= 0) {
// Update progress
if (IP) {

View file

@ -25,7 +25,7 @@
#include "utils.h"
#include "ffms.h"
#define INDEXVERSION 18
#define INDEXVERSION 20
#define INDEXID 0x53920873
struct IndexHeader {

View file

@ -131,7 +131,7 @@ bool AudioFMTIsFloat(SampleFormat FMT){
}
}
void init_null_packet(AVPacket *pkt) {
void InitNullPacket(AVPacket *pkt) {
av_init_packet(pkt);
pkt->data = NULL;
pkt->size = 0;

View file

@ -62,7 +62,7 @@ public:
int GetCPUFlags();
int ReadFrame(uint64_t FilePos, unsigned int &FrameSize, CompressedStream *CS, MatroskaReaderContext &Context, char *ErrorMsg, unsigned MsgSize);
bool AudioFMTIsFloat(SampleFormat FMT);
void init_null_packet(AVPacket *pkt);
void InitNullPacket(AVPacket *pkt);
#ifdef HAALISOURCE
unsigned vtSize(VARIANT &vt);
void vtCopy(VARIANT& vt,void *dest);