Issue #113 - reading PCM audio directly from disk. Almost works, but downsampling from non-mono to mono seems to be broken.

Originally committed to SVN as r1528.
2007-08-22 20:58:53 +00:00 · 2007-08-22 20:58:53 +00:00 · 93f6ef6d7a
commit 93f6ef6d7a
parent 646d2b2e84
6 changed files with 386 additions and 2 deletions
--- a/aegisub/Makefile.am
+++ b/aegisub/Makefile.am
@ -121,6 +121,7 @@ aegisub_SOURCES = \
 	audio_provider.cpp \
 	audio_provider_hd.cpp \
 	audio_provider_ram.cpp \
 	audio_provider_pcm.cpp \
 	audio_provider_stream.cpp \
 	audio_spectrum.cpp \
 	avisynth_wrap.cpp \
--- a/aegisub/audio_provider.cpp
+++ b/aegisub/audio_provider.cpp
@ -39,6 +39,7 @@
 #include <wx/wxprec.h>
 #include "audio_provider_ram.h"
 #include "audio_provider_hd.h"
 #include "audio_provider_pcm.h"
 #include "options.h"
 #include "audio_display.h"
@ -186,6 +187,10 @@ AudioProvider *AudioProviderFactory::GetAudioProvider(wxString filename, int cac
 	// Prepare provider
 	AudioProvider *provider = NULL;
 	// Try a PCM provider first
 	provider = CreatePCMAudioProvider(filename);
 	if (provider) return provider;
 	// List of providers
 	wxArrayString list = GetFactoryList(Options.AsText(_T("Audio provider")));
--- a/aegisub/audio_provider.h
+++ b/aegisub/audio_provider.h
@ -58,7 +58,7 @@ private:
 protected:
 	int channels;
-	__int64 num_samples;
+	__int64 num_samples; // for one channel, ie. number of PCM frames
 	int sample_rate;
 	int bytes_per_sample;
--- a/aegisub/audio_provider_pcm.cpp
+++ b/aegisub/audio_provider_pcm.cpp
@ -0,0 +1,304 @@
 // Copyright (c) 2007, Niels Martin Hansen
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are met:
 //
 //   * Redistributions of source code must retain the above copyright notice,
 //     this list of conditions and the following disclaimer.
 //   * Redistributions in binary form must reproduce the above copyright notice,
 //     this list of conditions and the following disclaimer in the documentation
 //     and/or other materials provided with the distribution.
 //   * Neither the name of the Aegisub Group nor the names of its contributors
 //     may be used to endorse or promote products derived from this software
 //     without specific prior written permission.
 //
 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
 // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 // POSSIBILITY OF SUCH DAMAGE.
 //
 // -----------------------------------------------------------------------------
 //
 // AEGISUB
 //
 // Website: http://aegisub.cellosoft.com
 // Contact: mailto:jiifurusu@gmail.com
 //
 #include <wx/filename.h>
 #include <wx/file.h>
 #include "audio_provider_pcm.h"
 #include "utils.h"
 #include <stdint.h>
 void PCMAudioProvider::GetAudio(void *buf, __int64 start, __int64 count)
 {
 	// We'll be seeking in the file so state can become inconsistent
 	wxMutexLocker _fml(filemutex);
 	// Read blocks from the file
 	size_t index = 0;
 	while (count > 0 && index < index_points.size()) {
 		// Check if this index contains the samples we're looking for
 		IndexPoint &ip = index_points[index];
 		if (ip.start_sample <= start && ip.start_sample+ip.num_samples > start) {
 			// How many samples we can maximum take from this block
 			long long samples_can_do = ip.num_samples - start + ip.start_sample;
 			if (samples_can_do > count) samples_can_do = count;
 			// Read as many samples we can
 			file.Seek(ip.start_byte + (start - ip.start_sample) * bytes_per_sample, wxFromStart);
 			file.Read(buf, samples_can_do * bytes_per_sample * channels);
 			// Update data
 			buf = (char*)buf + samples_can_do * bytes_per_sample * channels;
 			start += samples_can_do;
 			count -= samples_can_do;
 		}
 		index++;
 	}
 	// If we exhausted all sample sections zerofill the rest
 	if (count > 0) {
 		if (bytes_per_sample == 1)
 			// 8 bit formats are usually unsigned with bias 127
 			memset(buf, 127, count*channels);
 		else
 			// While everything else is signed
 			memset(buf, 0, count*bytes_per_sample*channels);
 	}
 }
 // RIFF WAV PCM provider
 // Overview of RIFF WAV: <http://www.sonicspot.com/guide/wavefiles.html>
 class  RiffWavPCMAudioProvider : public PCMAudioProvider {
 private:
 	struct ChunkHeader {
 		char type[4];
 		uint32_t size; // XXX: Assume we're compiling on little endian
 	};
 	struct RIFFChunk {
 		ChunkHeader ch;
 		char format[4];
 	};
 	struct fmtChunk {
 		// Skip the chunk header here, it's processed separately
 		uint16_t compression; // compression format used -- 0x01 = PCM
 		uint16_t channels;
 		uint32_t samplerate;
 		uint32_t avg_bytes_sec; // can't always be trusted
 		uint16_t block_align;
 		uint16_t significant_bits_sample;
 		// Here was supposed to be some more fields but we don't need them
 		// and just skipping by the size of the struct wouldn't be safe
 		// either way, as the fields can depend on the compression.
 	};
 public:
 	RiffWavPCMAudioProvider(const wxString &_filename)
 	{
 		filename = _filename;
 		if (!file.Open(_filename, wxFile::read)) throw _T("RIFF PCM WAV audio provider: Unable to open file for reading");
 		// Read header
 		file.Seek(0);
 		RIFFChunk header;
 		if (file.Read(&header, sizeof(header)) < sizeof(header)) throw _T("RIFF PCM WAV audio provider: file is too small to contain a RIFF header");
 		// Check that it's good
 		if (strncmp(header.ch.type, "RIFF", 4)) throw _T("RIFF PCM WAV audio provider: File is not a RIFF file");
 		if (strncmp(header.format, "WAVE", 4)) throw _T("RIFF PCM WAV audio provider: File is not a RIFF WAV file");
 		// Count how much more data we can have in the entire file
 		// The first 4 bytes are already eaten by the header.format field
 		uint32_t data_left = header.ch.size - 4;
 		// How far into the file we have processed.
 		// Must be incremented by the riff chunk size fields.
 		uint32_t filepos = sizeof(header);
 		bool got_fmt_header = false;
 		// Inherited from AudioProvider
 		num_samples = 0;
 		// Continue reading chunks until out of data
 		while (data_left) {
 			file.Seek(filepos);
 			ChunkHeader ch;
 			if (file.Read(&ch, sizeof(ch)) < sizeof(ch)) break;
 			// Update counters
 			data_left -= sizeof(ch);
 			filepos += sizeof(ch);
 			if (strncmp(ch.type, "fmt ", 4) == 0) {
 				if (got_fmt_header) throw _T("RIFF PCM WAV audio provider: Invalid file, multiple 'fmt ' chunks");
 				got_fmt_header = true;
 				fmtChunk fmt;
 				if (file.Read(&fmt, sizeof(fmt)) < sizeof(fmt)) throw _T("RIFF PCM WAV audio provider: File ended before end of 'fmt ' chunk");
 				if (fmt.compression != 1) throw _T("RIFF PCM WAV audio provider: Can't use file, not PCM encoding");
 				// Set stuff inherited from the AudioProvider class
 				sample_rate = fmt.samplerate;
 				channels = fmt.channels;
 				bytes_per_sample = (fmt.significant_bits_sample + 7) / 8; // round up to nearest whole byte
 			}
 			else if (strncmp(ch.type, "data", 4) == 0) {
 				// This won't pick up 'data' chunks inside 'wavl' chunks
 				// since the 'wavl' chunks wrap those.
 				long long samples = ch.size / bytes_per_sample;
 				long long frames = samples / channels;
 				IndexPoint ip;
 				ip.start_sample = num_samples;
 				ip.num_samples = frames;
 				ip.start_byte = filepos;
 				index_points.push_back(ip);
 				num_samples += frames;
 			}
 			// Support wavl (wave list) chunks too?
 			// Update counters
 			// Make sure they're word aligned
 			data_left -= (ch.size + 1) & ~1;
 			filepos += (ch.size + 1) & ~1;
 		}
 	}
 };
 // Mix down any number of channels to mono
 class DownmixingAudioProvider : public AudioProvider {
 private:
 	AudioProvider *provider;
 	int src_channels;
 public:
 	DownmixingAudioProvider(AudioProvider *source)
 	{
 		filename = source->GetFilename();
 		channels = 1; // target
 		src_channels = source->GetChannels();
 		num_samples = source->GetNumSamples();
 		bytes_per_sample = source->GetBytesPerSample();
 		sample_rate = source->GetSampleRate();
 		// We now own this
 		provider = source;
 		if (!(bytes_per_sample == 1 || bytes_per_sample == 2)) throw _T("Downmixing Audio Provider: Can only downmix 8 and 16 bit audio");
 	}
 	~DownmixingAudioProvider()
 	{
 		delete provider;
 	}
 	void GetAudio(void *buf, __int64 start, __int64 count)
 	{
 		if (count == 0) return;
 		// We can do this ourselves
 		if (start >= num_samples) {
 			if (bytes_per_sample == 1)
 				// 8 bit formats are usually unsigned with bias 127
 				memset(buf, 127, count);
 			else
 				// While everything else is signed
 				memset(buf, 0, count*bytes_per_sample);
 			return;
 		}
 		// So alloc some temporary memory for this
 		// Depending on use, this might be made faster by using
 		// a pre-allocced block of memory...?
 		char *tmp = new char[count*bytes_per_sample*src_channels];
 		provider->GetAudio(tmp, start, count);
 		// Now downmix
 		// Just average the samples over the channels (really bad if they're out of phase!)
 		if (bytes_per_sample == 1) {
 			uint8_t *src = (uint8_t *)tmp;
 			uint8_t *dst = (uint8_t *)buf;
 			while (count > 0) {
 				int sum = 0;
 				for (int c = 0; c < src_channels; c++)
 					sum += *(src++);
 				*(dst++) = (uint8_t)(sum / src_channels);
 				count--;
 			}
 		}
 		else if (bytes_per_sample == 2) {
 			int16_t *src = (int16_t *)tmp;
 			int16_t *dst = (int16_t *)buf;
 			while (count > 0) {
 				int sum = 0;
 				for (int c = 0; c < src_channels; c++)
 					sum += *(src++);
 				*(dst++) = (int16_t)(sum / src_channels);
 				count--;
 			}
 		}
 		// Done downmixing, free the work buffer
 		delete[] tmp;
 	}
 };
 AudioProvider *CreatePCMAudioProvider(const wxString &filename)
 {
 	AudioProvider *provider = 0;
 	wxLogDebug(_T("Inside CreatePCMAudioProvider"));
 	// Try Microsoft/IBM RIFF WAV first
 	// XXX: This is going to blow up if built on big endian archs
 	wxLogDebug(_T("Trying to create RIFF WAV PCM provider"));
 	try { provider = new RiffWavPCMAudioProvider(filename); }
 	catch (const wxChar *e) { wxLogWarning(_T("Thrown creating RIFF PCM WAV provider: %s"), e); provider = 0; }
 	catch (...) { provider = 0; }
 	if (provider && provider->GetChannels() > 1) {
 		wxLogDebug(_T("Have a PCM provider with non-mono sound"));
 		// Can't feed non-mono audio to the rest of the program.
 		// Create a downmixing proxy and if it fails, don't provide PCM.
 		try {
 			provider = new DownmixingAudioProvider(provider);
 		}
 		catch (...) {
 			wxLogDebug(_T("Failed creating downmixer for PCM"));
 			delete provider;
 			provider = 0;
 		}
 	}
 	wxLogDebug(_T("Returning from CreatePCMAudioProvider: %p"), provider);
 	return provider;
 }
--- a/aegisub/audio_provider_pcm.h
+++ b/aegisub/audio_provider_pcm.h
@ -0,0 +1,73 @@
 // Copyright (c) 2007, Niels Martin Hansen
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are met:
 //
 //   * Redistributions of source code must retain the above copyright notice,
 //     this list of conditions and the following disclaimer.
 //   * Redistributions in binary form must reproduce the above copyright notice,
 //     this list of conditions and the following disclaimer in the documentation
 //     and/or other materials provided with the distribution.
 //   * Neither the name of the Aegisub Group nor the names of its contributors
 //     may be used to endorse or promote products derived from this software
 //     without specific prior written permission.
 //
 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
 // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 // POSSIBILITY OF SUCH DAMAGE.
 //
 // -----------------------------------------------------------------------------
 //
 // AEGISUB
 //
 // Website: http://aegisub.cellosoft.com
 // Contact: mailto:jiifurusu@gmail.com
 //
 #pragma once
 ///////////
 // Headers
 #include "audio_provider.h"
 #include <wx/file.h>
 #include <vector>
 /////////////////////////////
 // Audio provider base class
 class PCMAudioProvider : public AudioProvider {
 protected:
 	wxMutex filemutex;
 	wxFile file;
 	// Hold data for an index point,
 	// to support files where audio data are
 	// split into multiple blocks.
 	// Using long long's should be safe on most compilers,
 	// wx defines wxFileOffset as long long when possible
 	struct IndexPoint {
 		long long start_byte;
 		long long start_sample;
 		long long num_samples;
 	};
 	typedef std::vector<IndexPoint> IndexVector;
 	IndexVector index_points;
 public:
 	virtual void GetAudio(void *buf, __int64 start, __int64 count);
 };
 // Construct the right PCM audio provider (if any) for the file
 AudioProvider *CreatePCMAudioProvider(const wxString &filename);
--- a/aegisub/changelog.txt
+++ b/aegisub/changelog.txt
@ -55,9 +55,10 @@ Please visit http://aegisub.net to download latest version
  o Moved karaoke syllable text in audio display to the top instead of bottom, since it often covered important information in spectrum mode (jfs)
  o Misc. changes and bugfixes in karaoke mode. Using the syllable splitter should be easier now. (jfs)
  o Fixed loading of Avisynth Scripts as audio. (AMZ)
  o PCM WAV files are now streamed directly from disk instead of being "decoded" into RAM or onto another disk file. Only works for 8 and 16 bit sampledepths. (jfs)
 - Changes to Audio Spectrum: (jfs)
  o The calculated FFT data are now cached, so things should be faster.
-    - The maximum size of the cache can be configured. Default is unlimited, which provides the best performance assuming enough memory is available.
+    - The maximum size of the cache can be configured. Default is unlimited, which provides the best performance assuming enough memory is available. (Limit it if you're working with huge audio files!)
  o The quality of the spectrum can be easier configured, better quality requires more CPU and memory.
  o Actual signal power is now more accurately represented.
  o The palette is changed.