Aegisub/FFmpegSource/ffmpegsource.cpp
Fredrik Mellbin 02e15b0ac5 FFmpegSource 1.3
Originally committed to SVN as r1421.
2007-07-13 05:15:28 +00:00

1065 lines
32 KiB
C++

#include <windows.h>
#include <stdio.h>
#include <vector>
#include <stdlib.h>
#include <errno.h>
#include <string.h>
#include <fcntl.h>
#include <io.h>
extern "C" {
#include <ffmpeg\avformat.h>
#include <ffmpeg\avcodec.h>
#include <ffmpeg\swscale.h>
}
#include "MatroskaParser.h"
#include "avisynth.h"
#include "stdiostream.c"
#include "matroskacodecs.c"
class FFBase : public IClip {
protected:
VideoInfo VI;
public:
bool __stdcall GetParity(int n) { return false; }
void __stdcall GetAudio(void* buf, __int64 start, __int64 count, IScriptEnvironment* env) { }
void __stdcall SetCacheHints(int cachehints, int frame_range) { }
PVideoFrame __stdcall GetFrame(int n, IScriptEnvironment* env) { return NULL; }
const VideoInfo& __stdcall GetVideoInfo() { return VI; }
FFBase() {
memset(&VI, 0, sizeof(VI));
}
};
class FFVideoBase : public FFBase {
private:
SwsContext *SWS;
int ConvertToFormat;
int ConvertFromFormat;
protected:
struct FrameInfo {
int64_t DTS;
bool KeyFrame;
FrameInfo(int64_t _DTS, bool _KeyFrame) : DTS(_DTS), KeyFrame(_KeyFrame) {};
};
std::vector<FrameInfo> FrameToDTS;
int FindClosestKeyFrame(int Frame) {
for (unsigned int i = Frame; i > 0; i--)
if (FrameToDTS[i].KeyFrame)
return i;
return 0;
}
int FrameFromDTS(int64_t DTS) {
for (unsigned int i = 0; i < FrameToDTS.size(); i++)
if (FrameToDTS[i].DTS == DTS)
return i;
return -1;
}
int ClosestFrameFromDTS(int64_t DTS) {
int Frame = 0;
int64_t BestDiff = 0xFFFFFFFFFFFFFF;
for (unsigned int i = 0; i < FrameToDTS.size(); i++) {
int64_t CurrentDiff = FFABS(FrameToDTS[i].DTS - DTS);
if (CurrentDiff < BestDiff) {
BestDiff = CurrentDiff;
Frame = i;
}
}
return Frame;
}
bool LoadFrameInfoFromFile(FILE *CacheFile) {
if (ftell(CacheFile)) {
rewind(CacheFile);
fscanf(CacheFile, "%d\n", &VI.num_frames);
for (int i = 0; i < VI.num_frames; i++) {
int64_t DTSTemp;
int KFTemp;
fscanf(CacheFile, "%lld %d\n", &DTSTemp, &KFTemp);
FrameToDTS.push_back(FrameInfo(DTSTemp, KFTemp != 0));
}
return true;
}
return false;
}
void SetOutputFormat(int CurrentFormat, IScriptEnvironment *Env) {
int Loss;
int BestFormat = avcodec_find_best_pix_fmt((1 << PIX_FMT_YUV420P) | (1 << PIX_FMT_YUYV422) | (1 << PIX_FMT_RGB32) | (1 << PIX_FMT_BGR24), CurrentFormat, 1 /* Required to prevent pointless RGB32 => RGB24 conversion */, &Loss);
switch (BestFormat) {
case PIX_FMT_YUV420P: VI.pixel_type = VideoInfo::CS_I420; break;
case PIX_FMT_YUYV422: VI.pixel_type = VideoInfo::CS_YUY2; break;
case PIX_FMT_RGB32: VI.pixel_type = VideoInfo::CS_BGR32; break;
case PIX_FMT_BGR24: VI.pixel_type = VideoInfo::CS_BGR24; break;
default:
Env->ThrowError("FFmpegSource: No suitable output format found");
}
if (BestFormat != CurrentFormat) {
ConvertFromFormat = CurrentFormat;
ConvertToFormat = BestFormat;
SWS = sws_getContext(VI.width, VI.height, ConvertFromFormat, VI.width, VI.height, ConvertToFormat, SWS_BICUBIC, NULL, NULL, NULL);
}
}
PVideoFrame OutputFrame(AVFrame *Frame, IScriptEnvironment *Env) {
PVideoFrame Dst = Env->NewVideoFrame(VI);
if (ConvertToFormat != PIX_FMT_NONE && VI.pixel_type == VideoInfo::CS_I420) {
uint8_t *DstData[3] = {Dst->GetWritePtr(PLANAR_Y), Dst->GetWritePtr(PLANAR_U), Dst->GetWritePtr(PLANAR_V)};
int DstStride[3] = {Dst->GetPitch(PLANAR_Y), Dst->GetPitch(PLANAR_U), Dst->GetPitch(PLANAR_V)};
sws_scale(SWS, Frame->data, Frame->linesize, 0, VI.height, DstData, DstStride);
} else if (ConvertToFormat != PIX_FMT_NONE) {
if (VI.IsRGB()) {
uint8_t *DstData[1] = {Dst->GetWritePtr() + Dst->GetPitch() * (Dst->GetHeight() - 1)};
int DstStride[1] = {-Dst->GetPitch()};
sws_scale(SWS, Frame->data, Frame->linesize, 0, VI.height, DstData, DstStride);
} else {
uint8_t *DstData[1] = {Dst->GetWritePtr()};
int DstStride[1] = {Dst->GetPitch()};
sws_scale(SWS, Frame->data, Frame->linesize, 0, VI.height, DstData, DstStride);
}
} else if (VI.pixel_type == VideoInfo::CS_I420) {
Env->BitBlt(Dst->GetWritePtr(PLANAR_Y), Dst->GetPitch(PLANAR_Y), Frame->data[0], Frame->linesize[0], Dst->GetRowSize(PLANAR_Y), Dst->GetHeight(PLANAR_Y));
Env->BitBlt(Dst->GetWritePtr(PLANAR_U), Dst->GetPitch(PLANAR_U), Frame->data[1], Frame->linesize[1], Dst->GetRowSize(PLANAR_U), Dst->GetHeight(PLANAR_U));
Env->BitBlt(Dst->GetWritePtr(PLANAR_V), Dst->GetPitch(PLANAR_V), Frame->data[2], Frame->linesize[2], Dst->GetRowSize(PLANAR_V), Dst->GetHeight(PLANAR_V));
} else {
if (VI.IsRGB())
Env->BitBlt(Dst->GetWritePtr() + Dst->GetPitch() * (Dst->GetHeight() - 1), -Dst->GetPitch(), Frame->data[0], Frame->linesize[0], Dst->GetRowSize(), Dst->GetHeight());
else
Env->BitBlt(Dst->GetWritePtr(), Dst->GetPitch(), Frame->data[0], Frame->linesize[0], Dst->GetRowSize(), Dst->GetHeight());
}
return Dst;
}
public:
FFVideoBase() {
SWS = NULL;
ConvertToFormat = PIX_FMT_NONE;
ConvertFromFormat = PIX_FMT_NONE;
}
};
class FFAudioBase : public FFBase {
protected:
struct SampleInfo {
int64_t SampleStart;
int64_t DTS;
int64_t FilePos;
bool KeyFrame;
SampleInfo(int64_t _SampleStart, int64_t _DTS, int64_t _FilePos, bool _KeyFrame) {
DTS = _DTS;
SampleStart = _SampleStart;
FilePos = _FilePos;
KeyFrame = _KeyFrame;
}
};
std::vector<SampleInfo> SI;
int FindClosestAudioKeyFrame(int64_t Sample) {
for (unsigned int i = SI.size() - 1; i > 0; i--)
if (SI[i].SampleStart <= Sample && SI[i].KeyFrame)
return i;
return 0;
}
int SampleFromDTS(int64_t DTS) {
for (unsigned int i = 0; i < SI.size(); i++)
if (SI[i].DTS == DTS)
return i;
return -1;
}
int ClosestSampleFromDTS(int64_t DTS) {
int Index = 0;
int64_t BestDiff = 0xFFFFFFFFFFFFFF;
for (unsigned int i = 0; i < SI.size(); i++) {
int64_t CurrentDiff = FFABS(SI[i].DTS - DTS);
if (CurrentDiff < BestDiff) {
BestDiff = CurrentDiff;
Index = i;
}
}
return Index;
}
};
class FFMatroskaBase {
private:
StdIoStream ST;
unsigned int BufferSize;
CompressedStream *CS;
protected:
AVCodecContext CodecContext;
AVCodec *Codec;
MatroskaFile *MF;
int Track;
char ErrorMessage[256];
uint8_t *Buffer;
FFMatroskaBase(const char *Source, int _Track, unsigned char TrackType, IScriptEnvironment* Env) {
Track = _Track;
BufferSize = 0;
Buffer = NULL;
CS = NULL;
memset(&ST,0,sizeof(ST));
ST.base.read = (int (__cdecl *)(InputStream *,ulonglong,void *,int))StdIoRead;
ST.base.scan = (longlong (__cdecl *)(InputStream *,ulonglong,unsigned int))StdIoScan;
ST.base.getcachesize = (unsigned int (__cdecl *)(InputStream *))StdIoGetCacheSize;
ST.base.geterror = (const char *(__cdecl *)(InputStream *))StdIoGetLastError;
ST.base.memalloc = (void *(__cdecl *)(InputStream *,size_t))StdIoMalloc;
ST.base.memrealloc = (void *(__cdecl *)(InputStream *,void *,size_t))StdIoRealloc;
ST.base.memfree = (void (__cdecl *)(InputStream *,void *)) StdIoFree;
ST.base.progress = (int (__cdecl *)(InputStream *,ulonglong,ulonglong))StdIoProgress;
ST.fp = fopen(Source, "rb");
if (ST.fp == NULL)
Env->ThrowError("FFmpegSource: Can't open '%s': %s", Source, strerror(errno));
setvbuf(ST.fp, NULL, _IOFBF, CACHESIZE);
MF = mkv_OpenEx(&ST.base, 0, 0, ErrorMessage, sizeof(ErrorMessage));
if (MF == NULL) {
fclose(ST.fp);
Env->ThrowError("FFmpegSource: Can't parse Matroska file: %s", ErrorMessage);
}
if (Track < 0)
for (unsigned int i = 0; i < mkv_GetNumTracks(MF); i++)
if (mkv_GetTrackInfo(MF, i)->Type == TrackType) {
Track = i;
break;
}
if (Track < 0)
Env->ThrowError("FFmpegSource: No suitable track found");
if ((unsigned int)Track >= mkv_GetNumTracks(MF))
Env->ThrowError("FFmpegSource: Invalid track number");
TrackInfo *TI = mkv_GetTrackInfo(MF, Track);
if (TI->Type != TrackType)
Env->ThrowError("FFmpegSource: Selected track is not of the right type");
mkv_SetTrackMask(MF, ~(1 << Track));
if (TI->CompEnabled) {
CS = cs_Create(MF, Track, ErrorMessage, sizeof(ErrorMessage));
if (CS == NULL)
Env->ThrowError("FFmpegSource: Can't create decompressor: %s", ErrorMessage);
}
avcodec_get_context_defaults(&CodecContext);
CodecContext.extradata = (uint8_t *)TI->CodecPrivate;
CodecContext.extradata_size = TI->CodecPrivateSize;
Codec = avcodec_find_decoder(MatroskaToFFCodecID(TI));
if (Codec == NULL)
Env->ThrowError("FFmpegSource: Codec not found");
if (avcodec_open(&CodecContext, Codec) < 0)
Env->ThrowError("FFmpegSource: Could not open codec");
}
~FFMatroskaBase() {
free(Buffer);
mkv_Close(MF);
fclose(ST.fp);
avcodec_close(&CodecContext);
}
unsigned int ReadNextFrame(int64_t *FirstStartTime, IScriptEnvironment *Env) {
unsigned int TrackNumber, FrameFlags, FrameSize;
uint64_t EndTime, FilePos, StartTime;
*FirstStartTime = -1;
while (mkv_ReadFrame(MF, 0, &TrackNumber, &StartTime, &EndTime, &FilePos, &FrameSize, &FrameFlags) == 0) {
if (*FirstStartTime < 0)
*FirstStartTime = StartTime;
if (CS) {
char CSBuffer[4096];
int DecompressedFrameSize = 0;
cs_NextFrame(CS, FilePos, FrameSize);
for (;;) {
int ReadBytes = cs_ReadData(CS, CSBuffer, sizeof(CSBuffer));
if (ReadBytes < 0)
Env->ThrowError("FFmpegSource: Error decompressing data: %s", cs_GetLastError(CS));
if (ReadBytes == 0)
return DecompressedFrameSize;
if (BufferSize < DecompressedFrameSize + ReadBytes) {
BufferSize = FrameSize;
Buffer = (uint8_t *)realloc(Buffer, BufferSize);
if (Buffer == NULL)
Env->ThrowError("FFmpegSource: Out of memory");
}
memcpy(Buffer + DecompressedFrameSize, CSBuffer, ReadBytes);
DecompressedFrameSize += ReadBytes;
}
} else {
if (fseek(ST.fp, FilePos, SEEK_SET))
Env->ThrowError("FFmpegSource: fseek(): %s", strerror(errno));
if (BufferSize < FrameSize) {
BufferSize = FrameSize;
Buffer = (uint8_t *)realloc(Buffer, BufferSize);
if (Buffer == NULL)
Env->ThrowError("FFmpegSource: Out of memory");
}
size_t ReadBytes = fread(Buffer, 1, FrameSize, ST.fp);
if (ReadBytes != FrameSize) {
if (ReadBytes == 0) {
if (feof(ST.fp))
Env->ThrowError("FFmpegSource: Unexpected EOF while reading frame");
else
Env->ThrowError("FFmpegSource: Error reading frame: %s", strerror(errno));
} else
Env->ThrowError("FFmpegSource: Short read while reading frame");
return 0;
}
return FrameSize;
}
}
return 0;
}
};
class FFMKASource : public FFAudioBase, private FFMatroskaBase {
private:
int CurrentSample;
int DecodeNextAudioBlock(uint8_t *Buf, int64_t *FirstStartTime, int64_t *Count, IScriptEnvironment* Env);
public:
FFMKASource(const char *Source, int _Track, IScriptEnvironment* Env) : FFMatroskaBase(Source, _Track, TT_AUDIO, Env) {
CurrentSample = 0;
TrackInfo *TI = mkv_GetTrackInfo(MF, Track);
switch (CodecContext.sample_fmt) {
case SAMPLE_FMT_U8: VI.sample_type = SAMPLE_INT8; break;
case SAMPLE_FMT_S16: VI.sample_type = SAMPLE_INT16; break;
case SAMPLE_FMT_S24: VI.sample_type = SAMPLE_INT24; break;
case SAMPLE_FMT_S32: VI.sample_type = SAMPLE_INT32; break;
case SAMPLE_FMT_FLT: VI.sample_type = SAMPLE_FLOAT; break;
default:
Env->ThrowError("FFmpegSource: Unsupported/unknown sample format");
}
VI.nchannels = TI->AV.Audio.Channels;
VI.audio_samples_per_second = mkv_TruncFloat(TI->AV.Audio.SamplingFreq);
unsigned TrackNumber, FrameSize, FrameFlags;
ulonglong StartTime, EndTime, FilePos;
while (mkv_ReadFrame(MF, 0, &TrackNumber, &StartTime, &EndTime, &FilePos, &FrameSize, &FrameFlags) == 0) {
if (!(FrameFlags & FRAME_UNKNOWN_START))
SI.push_back(SampleInfo(VI.num_audio_samples, StartTime, FilePos, (FrameFlags & FRAME_KF) != 0));
if (true) {
switch (CodecContext.codec_id) {
case CODEC_ID_MP3:
case CODEC_ID_MP2:
VI.num_audio_samples += 1152; break;
default:
Env->ThrowError("Only mp2 and mp3 is currently supported in matroska");
}
} else {
}
}
mkv_Seek(MF, SI[0].DTS, MKVF_SEEK_TO_PREV_KEYFRAME);
avcodec_flush_buffers(&CodecContext);
}
void __stdcall GetAudio(void* Buf, __int64 Start, __int64 Count, IScriptEnvironment* Env);
};
int FFMKASource::DecodeNextAudioBlock(uint8_t *Buf, int64_t *FirstStartTime, int64_t *Count, IScriptEnvironment* Env) {
int Ret = -1;
int FrameSize;
int64_t TempStartTime = -1;
*FirstStartTime = -1;
*Count = 0;
while (FrameSize = ReadNextFrame(&TempStartTime, Env)) {
if (*FirstStartTime < 0)
*FirstStartTime = TempStartTime;
uint8_t *Data = (uint8_t *)Buffer;
int Size = FrameSize;
while (Size > 0) {
int TempOutputBufSize = AVCODEC_MAX_AUDIO_FRAME_SIZE;
Ret = avcodec_decode_audio2(&CodecContext, (int16_t *)Buf, &TempOutputBufSize, Data, Size);
if (Ret < 0)
goto Done;
Size -= Ret;
Data += Ret;
Buf += TempOutputBufSize;
*Count += VI.AudioSamplesFromBytes(TempOutputBufSize);
if (Size <= 0)
goto Done;
}
}
Done:
return Ret;
}
void __stdcall FFMKASource::GetAudio(void* Buf, __int64 Start, __int64 Count, IScriptEnvironment* Env) {
bool HasSeeked = false;
int ClosestKF = FindClosestAudioKeyFrame(Start);
memset(Buf, 0, VI.BytesFromAudioSamples(Count));
if (Start < CurrentSample || SI[ClosestKF].SampleStart > CurrentSample) {
mkv_Seek(MF, SI[max(ClosestKF - 10, 0)].DTS, MKVF_SEEK_TO_PREV_KEYFRAME);
avcodec_flush_buffers(&CodecContext);
HasSeeked = true;
}
uint8_t *DstBuf = (uint8_t *)Buf;
int64_t RemainingSamples = Count;
int64_t DecodeCount;
do {
int64_t StartTime;
uint8_t DecodingBuffer[AVCODEC_MAX_AUDIO_FRAME_SIZE];
int64_t DecodeStart = CurrentSample;
int Ret = DecodeNextAudioBlock(DecodingBuffer, &StartTime, &DecodeCount, Env);
if (HasSeeked) {
HasSeeked = false;
int CurrentSampleIndex = SampleFromDTS(StartTime);
if (StartTime < 0 || CurrentSampleIndex < 0 || (CurrentSample = DecodeStart = SI[max(CurrentSampleIndex, 0)].SampleStart) < 0)
Env->ThrowError("FFmpegSource: Sample accurate seeking is not possible in this file");
}
int OffsetBytes = VI.BytesFromAudioSamples(max(0, Start - DecodeStart));
int CopyBytes = max(0, VI.BytesFromAudioSamples(min(RemainingSamples, DecodeCount - max(0, Start - DecodeStart))));
memcpy(DstBuf, DecodingBuffer + OffsetBytes, CopyBytes);
DstBuf += CopyBytes;
CurrentSample += DecodeCount;
RemainingSamples -= VI.AudioSamplesFromBytes(CopyBytes);
} while (RemainingSamples > 0 && DecodeCount > 0);
}
class FFMKVSource : public FFVideoBase, private FFMatroskaBase {
private:
AVFrame *Frame;
int CurrentFrame;
int DecodeNextFrame(AVFrame *Frame, int64_t *FirstStartTime, IScriptEnvironment* Env);
public:
FFMKVSource(const char *Source, int _Track, FILE *Timecodes, bool Cache, FILE *CacheFile, IScriptEnvironment* Env) : FFMatroskaBase(Source, _Track, TT_VIDEO, Env) {
Frame = NULL;
CurrentFrame = 0;
TrackInfo *TI = mkv_GetTrackInfo(MF, Track);
VI.image_type = VideoInfo::IT_TFF;
VI.width = TI->AV.Video.PixelWidth;
VI.height = TI->AV.Video.PixelHeight;
VI.fps_denominator = 1;
VI.fps_numerator = 30;
SetOutputFormat(CodecContext.pix_fmt, Env);
// Needs to be indexed?
if (!(Cache && LoadFrameInfoFromFile(CacheFile))) {
unsigned int TrackNumber, FrameSize, FrameFlags;
uint64_t StartTime, EndTime, FilePos;
while (mkv_ReadFrame(MF, 0, &TrackNumber, &StartTime, &EndTime, &FilePos, &FrameSize, &FrameFlags) == 0) {
FrameToDTS.push_back(FrameInfo(StartTime, (FrameFlags & FRAME_KF) != 0));
VI.num_frames++;
}
if (VI.num_frames == 0)
Env->ThrowError("FFmpegSource: Video track contains no frames");
mkv_Seek(MF, FrameToDTS[0].DTS, MKVF_SEEK_TO_PREV_KEYFRAME);
if (Cache) {
fprintf(CacheFile, "%d\n", VI.num_frames);
for (int i = 0; i < VI.num_frames; i++)
fprintf(CacheFile, "%lld %d\n", FrameToDTS[i].DTS, (int)(FrameToDTS[i].KeyFrame ? 1 : 0));
}
}
if (Timecodes)
for (int i = 0; i < VI.num_frames; i++)
fprintf(Timecodes, "%f\n", (FrameToDTS[i].DTS * mkv_TruncFloat(TI->TimecodeScale)) / (double)(1000000));
Frame = avcodec_alloc_frame();
}
~FFMKVSource() {
av_free(Frame);
}
PVideoFrame __stdcall GetFrame(int n, IScriptEnvironment* Env);
};
int FFMKVSource::DecodeNextFrame(AVFrame *Frame, int64_t *FirstStartTime, IScriptEnvironment* Env) {
int FrameFinished = 0;
int Ret = -1;
int FrameSize;
int64_t TempStartTime = -1;
while (FrameSize = ReadNextFrame(&TempStartTime, Env)) {
if (*FirstStartTime < 0)
*FirstStartTime = TempStartTime;
Ret = avcodec_decode_video(&CodecContext, Frame, &FrameFinished, Buffer, FrameSize);
if (FrameFinished)
goto Done;
}
// Flush the last frame
if (CurrentFrame == VI.num_frames - 1)
Ret = avcodec_decode_video(&CodecContext, Frame, &FrameFinished, NULL, 0);
Done:
return Ret;
}
PVideoFrame __stdcall FFMKVSource::GetFrame(int n, IScriptEnvironment* Env) {
bool HasSeeked = false;
if (n < CurrentFrame || FindClosestKeyFrame(n) > CurrentFrame) {
mkv_Seek(MF, FrameToDTS[n].DTS, MKVF_SEEK_TO_PREV_KEYFRAME);
avcodec_flush_buffers(&CodecContext);
HasSeeked = true;
}
do {
int64_t StartTime;
int Ret = DecodeNextFrame(Frame, &StartTime, Env);
if (HasSeeked) {
HasSeeked = false;
if (StartTime < 0 || (CurrentFrame = FrameFromDTS(StartTime)) < 0)
Env->ThrowError("FFmpegSource: Frame accurate seeking is not possible in this file");
}
CurrentFrame++;
} while (CurrentFrame <= n);
return OutputFrame(Frame, Env);
}
class FFAVFormatBase {
protected:
AVFormatContext *FormatContext;
AVCodecContext *CodecContext;
AVCodec *Codec;
int Track;
public:
FFAVFormatBase(const char *Source, int _Track, CodecType TrackType, IScriptEnvironment* Env) {
FormatContext = NULL;
Codec = NULL;
Track = _Track;
if (av_open_input_file(&FormatContext, Source, NULL, 0, NULL) != 0)
Env->ThrowError("FFmpegSource: Couldn't open \"%s\"", Source);
if (av_find_stream_info(FormatContext) < 0)
Env->ThrowError("FFmpegSource: Couldn't find stream information");
if (Track < 0)
for(unsigned int i = 0; i < FormatContext->nb_streams; i++)
if(FormatContext->streams[i]->codec->codec_type == TrackType) {
Track = i;
break;
}
if(Track < -1)
Env->ThrowError("FFmpegSource: No suitable track found");
if (Track >= (int)FormatContext->nb_streams)
Env->ThrowError("FFmpegSource: Invalid track number");
if (FormatContext->streams[Track]->codec->codec_type != TrackType)
Env->ThrowError("FFmpegSource: Selected track is not of the right type");
CodecContext = FormatContext->streams[Track]->codec;
Codec = avcodec_find_decoder(CodecContext->codec_id);
if(Codec == NULL)
Env->ThrowError("FFmpegSource: Codec not found");
if(avcodec_open(CodecContext, Codec) < 0)
Env->ThrowError("FFmpegSource: Could not open codec");
}
~FFAVFormatBase() {
avcodec_close(CodecContext);
av_close_input_file(FormatContext);
}
};
class FFAudioSource : public FFAudioBase, private FFAVFormatBase {
private:
int CurrentSample;
int DecodeNextAudioBlock(uint8_t *Buf, int64_t *DTS, int64_t *Count, IScriptEnvironment* Env);
public:
FFAudioSource(const char *Source, int _Track, IScriptEnvironment* Env) : FFAVFormatBase(Source, _Track, CODEC_TYPE_AUDIO, Env) {
CurrentSample = 0;
switch (CodecContext->sample_fmt) {
case SAMPLE_FMT_U8: VI.sample_type = SAMPLE_INT8; break;
case SAMPLE_FMT_S16: VI.sample_type = SAMPLE_INT16; break;
case SAMPLE_FMT_S24: VI.sample_type = SAMPLE_INT24; break;
case SAMPLE_FMT_S32: VI.sample_type = SAMPLE_INT32; break;
case SAMPLE_FMT_FLT: VI.sample_type = SAMPLE_FLOAT; break;
default:
Env->ThrowError("FFmpegSource: Unsupported/unknown sample format");
}
VI.nchannels = CodecContext->channels;
VI.audio_samples_per_second = CodecContext->sample_rate;
if (CodecContext->frame_size == 0)
Env->ThrowError("Variable frame sizes currently unsupported");
AVPacket Packet;
while (av_read_frame(FormatContext, &Packet) >= 0) {
if (Packet.stream_index == Track) {
SI.push_back(SampleInfo(VI.num_audio_samples, Packet.dts, Packet.pos, (Packet.flags & PKT_FLAG_KEY) != 0));
VI.num_audio_samples += CodecContext->frame_size;
}
av_free_packet(&Packet);
}
if (VI.num_audio_samples == 0)
Env->ThrowError("FFmpegSource: Audio track contains no samples");
av_seek_frame(FormatContext, Track, SI[0].DTS, AVSEEK_FLAG_BACKWARD);
avcodec_flush_buffers(CodecContext);
}
void __stdcall GetAudio(void* Buf, __int64 Start, __int64 Count, IScriptEnvironment* Env);
};
int FFAudioSource::DecodeNextAudioBlock(uint8_t *Buf, int64_t *DTS, int64_t *Count, IScriptEnvironment* Env) {
AVPacket Packet;
int Ret = -1;
*Count = 0;
*DTS = -1;
while (av_read_frame(FormatContext, &Packet) >= 0) {
if (Packet.stream_index == Track) {
if (*DTS < 0)
*DTS = Packet.dts;
uint8_t *Data = (uint8_t *)Packet.data;
int Size = Packet.size;
while (Size > 0) {
int TempOutputBufSize = AVCODEC_MAX_AUDIO_FRAME_SIZE;
Ret = avcodec_decode_audio2(CodecContext, (int16_t *)Buf, &TempOutputBufSize, Data, Size);
if (Ret < 0) {
av_free_packet(&Packet);
goto Done;
}
Size -= Ret;
Data += Ret;
Buf += TempOutputBufSize;
*Count += VI.AudioSamplesFromBytes(TempOutputBufSize);
if (Size <= 0) {
av_free_packet(&Packet);
goto Done;
}
}
}
av_free_packet(&Packet);
}
Done:
return Ret;
}
void __stdcall FFAudioSource::GetAudio(void* Buf, __int64 Start, __int64 Count, IScriptEnvironment* Env) {
bool HasSeeked = false;
int ClosestKF = FindClosestAudioKeyFrame(Start);
memset(Buf, 0, VI.BytesFromAudioSamples(Count));
if (Start < CurrentSample || SI[ClosestKF].SampleStart > CurrentSample) {
av_seek_frame(FormatContext, Track, SI[max(ClosestKF - 10, 0)].DTS, AVSEEK_FLAG_BACKWARD);
avcodec_flush_buffers(CodecContext);
HasSeeked = true;
}
uint8_t *DstBuf = (uint8_t *)Buf;
int64_t RemainingSamples = Count;
int64_t DecodeCount;
do {
int64_t StartTime;
uint8_t DecodingBuffer[AVCODEC_MAX_AUDIO_FRAME_SIZE];
int64_t DecodeStart = CurrentSample;
int Ret = DecodeNextAudioBlock(DecodingBuffer, &StartTime, &DecodeCount, Env);
if (HasSeeked) {
HasSeeked = false;
if (StartTime < 0 || (CurrentSample = DecodeStart = SI[max(ClosestSampleFromDTS(StartTime) - 0, 0)].SampleStart) < 0)
Env->ThrowError("FFmpegSource: Sample accurate seeking is not possible in this file");
}
int OffsetBytes = VI.BytesFromAudioSamples(max(0, Start - DecodeStart));
int CopyBytes = max(0, VI.BytesFromAudioSamples(min(RemainingSamples, DecodeCount - max(0, Start - DecodeStart))));
memcpy(DstBuf, DecodingBuffer + OffsetBytes, CopyBytes);
DstBuf += CopyBytes;
CurrentSample += DecodeCount;
RemainingSamples -= VI.AudioSamplesFromBytes(CopyBytes);
} while (RemainingSamples > 0 && DecodeCount > 0);
}
class FFVideoSource : public FFVideoBase, private FFAVFormatBase {
private:
AVFrame *Frame;
int CurrentFrame;
int SeekMode;
int DecodeNextFrame(AVFrame *Frame, int64_t *DTS);
public:
FFVideoSource(const char *Source, int _Track, int _SeekMode, FILE *Timecodes, bool Cache, FILE *CacheFile, IScriptEnvironment* Env) : FFAVFormatBase(Source, _Track, CODEC_TYPE_VIDEO, Env) {
CurrentFrame = 0;
Frame = NULL;
SeekMode = _SeekMode;
VI.image_type = VideoInfo::IT_TFF;
VI.width = CodecContext->width;
VI.height = CodecContext->height;
VI.fps_denominator = FormatContext->streams[Track]->time_base.num;
VI.fps_numerator = FormatContext->streams[Track]->time_base.den;
VI.num_frames = (int)FormatContext->streams[Track]->duration;
// sanity check framerate
if (VI.fps_denominator > VI.fps_numerator || VI.fps_denominator <= 0 || VI.fps_numerator <= 0) {
VI.fps_denominator = 1;
VI.fps_numerator = 30;
}
SetOutputFormat(CodecContext->pix_fmt, Env);
// Needs to be indexed?
if (!(Cache && LoadFrameInfoFromFile(CacheFile))) {
AVPacket Packet;
VI.num_frames = 0;
while (av_read_frame(FormatContext, &Packet) >= 0) {
if (Packet.stream_index == Track) {
FrameToDTS.push_back(FrameInfo(Packet.dts, (Packet.flags & PKT_FLAG_KEY) != 0));
VI.num_frames++;
}
av_free_packet(&Packet);
}
if (VI.num_frames == 0)
Env->ThrowError("FFmpegSource: Video track contains no frames");
av_seek_frame(FormatContext, Track, FrameToDTS[0].DTS, AVSEEK_FLAG_BACKWARD);
if (Cache) {
fprintf(CacheFile, "%d\n", VI.num_frames);
for (int i = 0; i < VI.num_frames; i++)
fprintf(CacheFile, "%lld %d\n", FrameToDTS[i].DTS, (int)(FrameToDTS[i].KeyFrame ? 1 : 0));
}
}
if (Timecodes)
for (int i = 0; i < VI.num_frames; i++)
fprintf(Timecodes, "%f\n", (FrameToDTS[i].DTS * FormatContext->streams[Track]->time_base.num * 1000) / (double)(FormatContext->streams[Track]->time_base.den));
Frame = avcodec_alloc_frame();
}
~FFVideoSource() {
av_free(Frame);
}
PVideoFrame __stdcall GetFrame(int n, IScriptEnvironment* Env);
};
int FFVideoSource::DecodeNextFrame(AVFrame *Frame, int64_t *DTS) {
AVPacket Packet;
int FrameFinished = 0;
int Ret = -1;
*DTS = -1;
while (av_read_frame(FormatContext, &Packet) >= 0) {
if (Packet.stream_index == Track) {
Ret = avcodec_decode_video(CodecContext, Frame, &FrameFinished, Packet.data, Packet.size);
if (*DTS < 0)
*DTS = Packet.dts;
}
av_free_packet(&Packet);
if (FrameFinished)
goto Done;
}
// Flush the last frame
if (CurrentFrame == VI.num_frames - 1)
Ret = avcodec_decode_video(CodecContext, Frame, &FrameFinished, NULL, 0);
Done:
return Ret;
}
PVideoFrame __stdcall FFVideoSource::GetFrame(int n, IScriptEnvironment* Env) {
bool HasSeeked = false;
int ClosestKF = FindClosestKeyFrame(n);
if (SeekMode == 0) {
if (n < CurrentFrame) {
av_seek_frame(FormatContext, Track, FrameToDTS[0].DTS, AVSEEK_FLAG_BACKWARD);
avcodec_flush_buffers(CodecContext);
CurrentFrame = 0;
}
} else {
// 10 frames is used as a margin to prevent excessive seeking since the predicted best keyframe isn't always selected by avformat
if (n < CurrentFrame || ClosestKF > CurrentFrame + 10 || (SeekMode == 3 && n > CurrentFrame + 10)) {
av_seek_frame(FormatContext, Track, (SeekMode == 3) ? FrameToDTS[n].DTS : FrameToDTS[ClosestKF].DTS, AVSEEK_FLAG_BACKWARD);
avcodec_flush_buffers(CodecContext);
HasSeeked = true;
}
}
do {
int64_t DTS;
int Ret = DecodeNextFrame(Frame, &DTS);
if (SeekMode == 0 && Frame->key_frame)
FrameToDTS[CurrentFrame].KeyFrame = true;
if (HasSeeked) {
HasSeeked = false;
// Is the seek destination time known? Does it belong to a frame?
if (DTS < 0 || (CurrentFrame = FrameFromDTS(DTS)) < 0) {
switch (SeekMode) {
case 1:
Env->ThrowError("FFmpegSource: Frame accurate seeking is not possible in this file");
case 2:
case 3:
CurrentFrame = ClosestFrameFromDTS(DTS);
break;
default:
Env->ThrowError("FFmpegSource: Failed assertion");
}
}
}
CurrentFrame++;
} while (CurrentFrame <= n);
return OutputFrame(Frame, Env);
}
class FFAudioRefSource : public FFBase, private FFAVFormatBase {
private:
FILE *AudioCache;
public:
FFAudioRefSource(const char *Source, int _Track, const char *CacheFile, IScriptEnvironment* Env) : FFAVFormatBase(Source, _Track, CODEC_TYPE_AUDIO, Env) {
switch (CodecContext->sample_fmt) {
case SAMPLE_FMT_U8: VI.sample_type = SAMPLE_INT8; break;
case SAMPLE_FMT_S16: VI.sample_type = SAMPLE_INT16; break;
case SAMPLE_FMT_S24: VI.sample_type = SAMPLE_INT24; break;
case SAMPLE_FMT_S32: VI.sample_type = SAMPLE_INT32; break;
case SAMPLE_FMT_FLT: VI.sample_type = SAMPLE_FLOAT; break;
default:
Env->ThrowError("FFmpegSource: Unsupported/unknown sample format");
}
VI.nchannels = CodecContext->channels;
VI.audio_samples_per_second = CodecContext->sample_rate;
AudioCache = fopen(CacheFile, "ab+");
if (AudioCache == NULL)
Env->ThrowError("FFmpegSource: Failed to open the cache file");
_fseeki64(AudioCache, 0, SEEK_END);
int64_t CacheSize = _ftelli64(AudioCache);
if (CacheSize > 0) {
VI.num_audio_samples = VI.AudioSamplesFromBytes(CacheSize);
} else {
AVPacket Packet;
uint8_t DecodingBuffer[AVCODEC_MAX_AUDIO_FRAME_SIZE];
while (av_read_frame(FormatContext, &Packet) >= 0) {
if (Packet.stream_index == Track) {
uint8_t *Data = (uint8_t *)Packet.data;
int Size = Packet.size;
while (Size > 0) {
int TempOutputBufSize = AVCODEC_MAX_AUDIO_FRAME_SIZE;
int Ret = avcodec_decode_audio2(CodecContext, (int16_t *)DecodingBuffer, &TempOutputBufSize, Data, Size);
if (Ret < 0) {
av_free_packet(&Packet);
Env->ThrowError("FFmpegSource: Audio decoding error");
}
Size -= Ret;
Data += Ret;
VI.num_audio_samples += VI.AudioSamplesFromBytes(TempOutputBufSize);
fwrite(DecodingBuffer, 1, TempOutputBufSize, AudioCache);
}
}
av_free_packet(&Packet);
}
}
}
~FFAudioRefSource() {
fclose(AudioCache);
}
void __stdcall GetAudio(void* Buf, __int64 Start, __int64 Count, IScriptEnvironment* Env) {
_fseeki64(AudioCache, VI.BytesFromAudioSamples(Start), SEEK_SET);
fread(Buf, 1, VI.BytesFromAudioSamples(Count), AudioCache);
}
};
AVSValue __cdecl CreateFFVideoSource(AVSValue Args, void* UserData, IScriptEnvironment* Env) {
if (!Args[0].Defined())
Env->ThrowError("FFmpegSource: No source specified");
const char *Source = Args[0].AsString();
int Track = Args[1].AsInt(-1);
int SeekMode = Args[2].AsInt(1);
const char *Timecodes = Args[3].AsString("");
bool Cache = Args[4].AsBool(true);
// Generate default cache filename
char DefaultCacheFilename[1024];
strcpy(DefaultCacheFilename, Source);
strcat(DefaultCacheFilename, ".ffcache");
const char *CacheFilename = Args[5].AsString(DefaultCacheFilename);
if (SeekMode < 0 || SeekMode > 3)
Env->ThrowError("FFmpegSource: Invalid seek mode selected");
FILE *TCFile = NULL;
if (strcmp(Timecodes, "")) {
TCFile = fopen(Timecodes, "w");
if (!TCFile)
Env->ThrowError("FFmpegSource: Failed to open timecode output file for writing");
fprintf(TCFile, "# timecode format v2\n");
}
FILE *CacheFile = NULL;
if (Cache) {
CacheFile = fopen(CacheFilename, "a+");
if (!CacheFile)
Env->ThrowError("FFmpegSource: Failed to open cache file");
fseek(CacheFile, 0, SEEK_END);
}
av_register_all();
AVFormatContext *FormatContext;
if (av_open_input_file(&FormatContext, Source, NULL, 0, NULL) != 0)
Env->ThrowError("FFmpegSource: Couldn't open \"%s\"", Args[0].AsString());
bool IsMatroska = !strcmp(FormatContext->iformat->name, "matroska");
av_close_input_file(FormatContext);
FFBase *Ret;
if (IsMatroska)
Ret = new FFMKVSource(Source, Track, TCFile, Cache, CacheFile, Env);
else
Ret = new FFVideoSource(Source, Track, SeekMode, TCFile, Cache, CacheFile, Env);
if (TCFile)
fclose(TCFile);
if (CacheFile)
fclose(CacheFile);
return Ret;
}
AVSValue __cdecl CreateFFAudioSource(AVSValue Args, void* UserData, IScriptEnvironment* Env) {
if (!Args[0].Defined())
Env->ThrowError("FFmpegSource: No source specified");
const char *Source = Args[0].AsString();
int Track = Args[1].AsInt(-1);
av_register_all();
AVFormatContext *FormatContext;
if (av_open_input_file(&FormatContext, Source, NULL, 0, NULL) != 0)
Env->ThrowError("FFmpegSource: Couldn't open \"%s\"", Source);
bool IsMatroska = !strcmp(FormatContext->iformat->name, "matroska");
av_close_input_file(FormatContext);
if (IsMatroska)
return new FFMKASource(Source, Track, Env);
else
return new FFAudioSource(Source, Track, Env);
}
AVSValue __cdecl CreateFFAudioRefSource(AVSValue Args, void* UserData, IScriptEnvironment* Env) {
if (!Args[0].Defined())
Env->ThrowError("FFmpegSource: No source specified");
const char *Source = Args[0].AsString();
int Track = Args[1].AsInt(-1);
// Generate default cache filename
char DefaultCacheFilename[1024];
strcpy(DefaultCacheFilename, Source);
strcat(DefaultCacheFilename, ".ffracache");
const char *CacheFilename = Args[2].AsString(DefaultCacheFilename);
av_register_all();
return new FFAudioRefSource(Source, Track, CacheFilename, Env);
}
extern "C" __declspec(dllexport) const char* __stdcall AvisynthPluginInit2(IScriptEnvironment* Env) {
Env->AddFunction("FFVideoSource", "[source]s[track]i[seekmode]i[timecodes]s[cache]b[cachefile]s", CreateFFVideoSource, 0);
Env->AddFunction("FFAudioSource", "[source]s[track]i", CreateFFAudioSource, 0);
Env->AddFunction("FFAudioRefSource", "[source]s[track]i[cachefile]s", CreateFFAudioRefSource, 0);
return "FFmpegSource";
};