Issue #113 - reading PCM audio directly from disk. Almost works, but downsampling from non-mono to mono seems to be broken.

Originally committed to SVN as r1528.
2007-08-22 20:58:53 +00:00 · 2007-08-22 20:58:53 +00:00 · 93f6ef6d7a
commit 93f6ef6d7a
parent 646d2b2e84
6 changed files with 386 additions and 2 deletions
--- a/aegisub/Makefile.am
+++ b/aegisub/Makefile.am
@ -121,6 +121,7 @@ aegisub_SOURCES = \
 	audio_provider.cpp \
 	audio_provider_hd.cpp \
 	audio_provider_ram.cpp \
+	audio_provider_pcm.cpp \
 	audio_provider_stream.cpp \
 	audio_spectrum.cpp \
 	avisynth_wrap.cpp \
--- a/aegisub/audio_provider.cpp
+++ b/aegisub/audio_provider.cpp
@ -39,6 +39,7 @@
 #include <wx/wxprec.h>
 #include "audio_provider_ram.h"
 #include "audio_provider_hd.h"
+#include "audio_provider_pcm.h"
 #include "options.h"
 #include "audio_display.h"

@ -186,6 +187,10 @@ AudioProvider *AudioProviderFactory::GetAudioProvider(wxString filename, int cac
 	// Prepare provider
 	AudioProvider *provider = NULL;

+	// Try a PCM provider first
+	provider = CreatePCMAudioProvider(filename);
+	if (provider) return provider;
+
 	// List of providers
 	wxArrayString list = GetFactoryList(Options.AsText(_T("Audio provider")));

--- a/aegisub/audio_provider.h
+++ b/aegisub/audio_provider.h
@ -58,7 +58,7 @@ private:

 protected:
 	int channels;
-	__int64 num_samples;
+	__int64 num_samples; // for one channel, ie. number of PCM frames
 	int sample_rate;
 	int bytes_per_sample;

--- a/aegisub/audio_provider_pcm.cpp
+++ b/aegisub/audio_provider_pcm.cpp
@ -0,0 +1,304 @@
+// Copyright (c) 2007, Niels Martin Hansen
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//   * Redistributions of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//   * Redistributions in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//   * Neither the name of the Aegisub Group nor the names of its contributors
+//     may be used to endorse or promote products derived from this software
+//     without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//
+// -----------------------------------------------------------------------------
+//
+// AEGISUB
+//
+// Website: http://aegisub.cellosoft.com
+// Contact: mailto:jiifurusu@gmail.com
+//
+
+
+#include <wx/filename.h>
+#include <wx/file.h>
+#include "audio_provider_pcm.h"
+#include "utils.h"
+#include <stdint.h>
+
+
+void PCMAudioProvider::GetAudio(void *buf, __int64 start, __int64 count)
+{
+	// We'll be seeking in the file so state can become inconsistent
+	wxMutexLocker _fml(filemutex);
+
+	// Read blocks from the file
+	size_t index = 0;
+	while (count > 0 && index < index_points.size()) {
+		// Check if this index contains the samples we're looking for
+		IndexPoint &ip = index_points[index];
+		if (ip.start_sample <= start && ip.start_sample+ip.num_samples > start) {
+
+			// How many samples we can maximum take from this block
+			long long samples_can_do = ip.num_samples - start + ip.start_sample;
+			if (samples_can_do > count) samples_can_do = count;
+
+			// Read as many samples we can
+			file.Seek(ip.start_byte + (start - ip.start_sample) * bytes_per_sample, wxFromStart);
+			file.Read(buf, samples_can_do * bytes_per_sample * channels);
+
+			// Update data
+			buf = (char*)buf + samples_can_do * bytes_per_sample * channels;
+			start += samples_can_do;
+			count -= samples_can_do;
+
+		}
+
+		index++;
+	}
+
+	// If we exhausted all sample sections zerofill the rest
+	if (count > 0) {
+		if (bytes_per_sample == 1)
+			// 8 bit formats are usually unsigned with bias 127
+			memset(buf, 127, count*channels);
+		else
+			// While everything else is signed
+			memset(buf, 0, count*bytes_per_sample*channels);
+	}
+}
+
+
+// RIFF WAV PCM provider
+// Overview of RIFF WAV: <http://www.sonicspot.com/guide/wavefiles.html>
+
+class  RiffWavPCMAudioProvider : public PCMAudioProvider {
+private:
+	struct ChunkHeader {
+		char type[4];
+		uint32_t size; // XXX: Assume we're compiling on little endian
+	};
+	struct RIFFChunk {
+		ChunkHeader ch;
+		char format[4];
+	};
+	struct fmtChunk {
+		// Skip the chunk header here, it's processed separately
+		uint16_t compression; // compression format used -- 0x01 = PCM
+		uint16_t channels;
+		uint32_t samplerate;
+		uint32_t avg_bytes_sec; // can't always be trusted
+		uint16_t block_align;
+		uint16_t significant_bits_sample;
+		// Here was supposed to be some more fields but we don't need them
+		// and just skipping by the size of the struct wouldn't be safe
+		// either way, as the fields can depend on the compression.
+	};
+
+public:
+	RiffWavPCMAudioProvider(const wxString &_filename)
+	{
+		filename = _filename;
+
+		if (!file.Open(_filename, wxFile::read)) throw _T("RIFF PCM WAV audio provider: Unable to open file for reading");
+
+		// Read header
+		file.Seek(0);
+		RIFFChunk header;
+		if (file.Read(&header, sizeof(header)) < sizeof(header)) throw _T("RIFF PCM WAV audio provider: file is too small to contain a RIFF header");
+
+		// Check that it's good
+		if (strncmp(header.ch.type, "RIFF", 4)) throw _T("RIFF PCM WAV audio provider: File is not a RIFF file");
+		if (strncmp(header.format, "WAVE", 4)) throw _T("RIFF PCM WAV audio provider: File is not a RIFF WAV file");
+
+		// Count how much more data we can have in the entire file
+		// The first 4 bytes are already eaten by the header.format field
+		uint32_t data_left = header.ch.size - 4;
+		// How far into the file we have processed.
+		// Must be incremented by the riff chunk size fields.
+		uint32_t filepos = sizeof(header);
+
+		bool got_fmt_header = false;
+
+		// Inherited from AudioProvider
+		num_samples = 0;
+
+		// Continue reading chunks until out of data
+		while (data_left) {
+			file.Seek(filepos);
+			ChunkHeader ch;
+			if (file.Read(&ch, sizeof(ch)) < sizeof(ch)) break;
+
+			// Update counters
+			data_left -= sizeof(ch);
+			filepos += sizeof(ch);
+
+			if (strncmp(ch.type, "fmt ", 4) == 0) {
+				if (got_fmt_header) throw _T("RIFF PCM WAV audio provider: Invalid file, multiple 'fmt ' chunks");
+				got_fmt_header = true;
+
+				fmtChunk fmt;
+				if (file.Read(&fmt, sizeof(fmt)) < sizeof(fmt)) throw _T("RIFF PCM WAV audio provider: File ended before end of 'fmt ' chunk");
+
+				if (fmt.compression != 1) throw _T("RIFF PCM WAV audio provider: Can't use file, not PCM encoding");
+
+				// Set stuff inherited from the AudioProvider class
+				sample_rate = fmt.samplerate;
+				channels = fmt.channels;
+				bytes_per_sample = (fmt.significant_bits_sample + 7) / 8; // round up to nearest whole byte
+			}
+
+			else if (strncmp(ch.type, "data", 4) == 0) {
+				// This won't pick up 'data' chunks inside 'wavl' chunks
+				// since the 'wavl' chunks wrap those.
+
+				long long samples = ch.size / bytes_per_sample;
+				long long frames = samples / channels;
+
+				IndexPoint ip;
+				ip.start_sample = num_samples;
+				ip.num_samples = frames;
+				ip.start_byte = filepos;
+				index_points.push_back(ip);
+
+				num_samples += frames;
+			}
+
+			// Support wavl (wave list) chunks too?
+
+			// Update counters
+			// Make sure they're word aligned
+			data_left -= (ch.size + 1) & ~1;
+			filepos += (ch.size + 1) & ~1;
+		}
+	}
+};
+
+
+// Mix down any number of channels to mono
+
+class DownmixingAudioProvider : public AudioProvider {
+private:
+	AudioProvider *provider;
+	int src_channels;
+
+public:
+	DownmixingAudioProvider(AudioProvider *source)
+	{
+		filename = source->GetFilename();
+		channels = 1; // target
+		src_channels = source->GetChannels();
+		num_samples = source->GetNumSamples();
+		bytes_per_sample = source->GetBytesPerSample();
+		sample_rate = source->GetSampleRate();
+
+		// We now own this
+		provider = source;
+
+		if (!(bytes_per_sample == 1 || bytes_per_sample == 2)) throw _T("Downmixing Audio Provider: Can only downmix 8 and 16 bit audio");
+	}
+
+	~DownmixingAudioProvider()
+	{
+		delete provider;
+	}
+
+	void GetAudio(void *buf, __int64 start, __int64 count)
+	{
+		if (count == 0) return;
+
+		// We can do this ourselves
+		if (start >= num_samples) {
+			if (bytes_per_sample == 1)
+				// 8 bit formats are usually unsigned with bias 127
+				memset(buf, 127, count);
+			else
+				// While everything else is signed
+				memset(buf, 0, count*bytes_per_sample);
+
+			return;
+		}
+
+		// So alloc some temporary memory for this
+		// Depending on use, this might be made faster by using
+		// a pre-allocced block of memory...?
+		char *tmp = new char[count*bytes_per_sample*src_channels];
+
+		provider->GetAudio(tmp, start, count);
+
+		// Now downmix
+		// Just average the samples over the channels (really bad if they're out of phase!)
+		if (bytes_per_sample == 1) {
+			uint8_t *src = (uint8_t *)tmp;
+			uint8_t *dst = (uint8_t *)buf;
+
+			while (count > 0) {
+				int sum = 0;
+				for (int c = 0; c < src_channels; c++)
+					sum += *(src++);
+				*(dst++) = (uint8_t)(sum / src_channels);
+				count--;
+			}
+		}
+		else if (bytes_per_sample == 2) {
+			int16_t *src = (int16_t *)tmp;
+			int16_t *dst = (int16_t *)buf;
+
+			while (count > 0) {
+				int sum = 0;
+				for (int c = 0; c < src_channels; c++)
+					sum += *(src++);
+				*(dst++) = (int16_t)(sum / src_channels);
+				count--;
+			}
+		}
+
+		// Done downmixing, free the work buffer
+		delete[] tmp;
+	}
+};
+
+
+AudioProvider *CreatePCMAudioProvider(const wxString &filename)
+{
+	AudioProvider *provider = 0;
+	wxLogDebug(_T("Inside CreatePCMAudioProvider"));
+
+	// Try Microsoft/IBM RIFF WAV first
+	// XXX: This is going to blow up if built on big endian archs
+	wxLogDebug(_T("Trying to create RIFF WAV PCM provider"));
+	try { provider = new RiffWavPCMAudioProvider(filename); }
+	catch (const wxChar *e) { wxLogWarning(_T("Thrown creating RIFF PCM WAV provider: %s"), e); provider = 0; }
+	catch (...) { provider = 0; }
+
+	if (provider && provider->GetChannels() > 1) {
+		wxLogDebug(_T("Have a PCM provider with non-mono sound"));
+		// Can't feed non-mono audio to the rest of the program.
+		// Create a downmixing proxy and if it fails, don't provide PCM.
+		try {
+			provider = new DownmixingAudioProvider(provider);
+		}
+		catch (...) {
+			wxLogDebug(_T("Failed creating downmixer for PCM"));
+			delete provider;
+			provider = 0;
+		}
+	}
+
+	wxLogDebug(_T("Returning from CreatePCMAudioProvider: %p"), provider);
+	return provider;
+}
--- a/aegisub/audio_provider_pcm.h
+++ b/aegisub/audio_provider_pcm.h
@ -0,0 +1,73 @@
+// Copyright (c) 2007, Niels Martin Hansen
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//   * Redistributions of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//   * Redistributions in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//   * Neither the name of the Aegisub Group nor the names of its contributors
+//     may be used to endorse or promote products derived from this software
+//     without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//
+// -----------------------------------------------------------------------------
+//
+// AEGISUB
+//
+// Website: http://aegisub.cellosoft.com
+// Contact: mailto:jiifurusu@gmail.com
+//
+
+
+#pragma once
+
+
+///////////
+// Headers
+#include "audio_provider.h"
+#include <wx/file.h>
+#include <vector>
+
+
+/////////////////////////////
+// Audio provider base class
+class PCMAudioProvider : public AudioProvider {
+protected:
+	wxMutex filemutex;
+	wxFile file;
+
+	// Hold data for an index point,
+	// to support files where audio data are
+	// split into multiple blocks.
+	// Using long long's should be safe on most compilers,
+	// wx defines wxFileOffset as long long when possible
+	struct IndexPoint {
+		long long start_byte;
+		long long start_sample;
+		long long num_samples;
+	};
+	typedef std::vector<IndexPoint> IndexVector;
+	IndexVector index_points;
+
+public:
+	virtual void GetAudio(void *buf, __int64 start, __int64 count);
+};
+
+// Construct the right PCM audio provider (if any) for the file
+AudioProvider *CreatePCMAudioProvider(const wxString &filename);
+
--- a/aegisub/changelog.txt
+++ b/aegisub/changelog.txt
@ -55,9 +55,10 @@ Please visit http://aegisub.net to download latest version
  o Moved karaoke syllable text in audio display to the top instead of bottom, since it often covered important information in spectrum mode (jfs)
  o Misc. changes and bugfixes in karaoke mode. Using the syllable splitter should be easier now. (jfs)
  o Fixed loading of Avisynth Scripts as audio. (AMZ)
+  o PCM WAV files are now streamed directly from disk instead of being "decoded" into RAM or onto another disk file. Only works for 8 and 16 bit sampledepths. (jfs)
 - Changes to Audio Spectrum: (jfs)
  o The calculated FFT data are now cached, so things should be faster.
-    - The maximum size of the cache can be configured. Default is unlimited, which provides the best performance assuming enough memory is available.
+    - The maximum size of the cache can be configured. Default is unlimited, which provides the best performance assuming enough memory is available. (Limit it if you're working with huge audio files!)
  o The quality of the spectrum can be easier configured, better quality requires more CPU and memory.
  o Actual signal power is now more accurately represented.
  o The palette is changed.