diff --git a/libaegisub/audio/provider.cpp b/libaegisub/audio/provider.cpp
index 961e5bf89..0abb67eeb 100644
--- a/libaegisub/audio/provider.cpp
+++ b/libaegisub/audio/provider.cpp
@@ -21,13 +21,107 @@
 #include "libaegisub/log.h"
 #include "libaegisub/util.h"
 
-namespace agi {
-void AudioProvider::GetAudioWithVolume(void *buf, int64_t start, int64_t count, double volume) const {
-	GetAudio(buf, start, count);
+namespace {
 
+template<typename Source>
+class ConvertFloatToInt16 {
+	Source* src;
+public:
+	ConvertFloatToInt16(Source* src) :src(src) {}
+	int16_t operator[](size_t idx) const {
+		Source expanded = src[idx] * 32768;
+		return expanded < -32768 ? -32768 :
+			expanded > 32767 ? 32767 :
+			static_cast<int16_t>(expanded);
+	}
+};
+
+// 8 bits per sample is assumed to be unsigned with a bias of 128,
+// while everything else is assumed to be signed with zero bias
+class ConvertIntToInt16 {
+	void* src;
+	int bytes_per_sample;
+public:
+	ConvertIntToInt16(void* src, int bytes_per_sample) :src(src), bytes_per_sample(bytes_per_sample) {}
+	const int16_t& operator[](size_t idx) const {
+		return *reinterpret_cast<int16_t*>(reinterpret_cast<char*>(src) + (idx + 1) * bytes_per_sample - sizeof(int16_t));
+	}
+};
+class ConvertUInt8ToInt16 {
+	uint8_t* src;
+public:
+	ConvertUInt8ToInt16(uint8_t* src) :src(src) {}
+	int16_t operator[](size_t idx) const {
+		return int16_t(src[idx]-128) << 8;
+	}
+};
+
+template<typename Source>
+class DownmixToMono {
+	Source src;
+	int channels;
+public:
+	DownmixToMono(Source src, int channels) :src(src), channels(channels) {}
+	int16_t operator[](size_t idx) const {
+		int ret = 0;
+		// Just average the channels together
+		for (int i = 0; i < channels; ++i)
+			ret += src[idx * channels + i];
+		return ret / channels;
+	}
+};
+}
+
+namespace agi {
+void AudioProvider::FillBufferInt16Mono(int16_t* buf, int64_t start, int64_t count) const {
+	if (!float_samples && bytes_per_sample == 2 && channels == 1) {
+		FillBuffer(buf, start, count);
+		return;
+	}
+	void* buff = malloc(bytes_per_sample * count * channels);
+	FillBuffer(buff, start, count);
+	if (channels == 1) {
+		if (float_samples) {
+			if (bytes_per_sample == sizeof(float))
+				for (int64_t i = 0; i < count; ++i)
+					buf[i] = ConvertFloatToInt16<float>(reinterpret_cast<float*>(buff))[i];
+			else if (bytes_per_sample == sizeof(double))
+				for (int64_t i = 0; i < count; ++i)
+					buf[i] = ConvertFloatToInt16<double>(reinterpret_cast<double*>(buff))[i];
+		}
+		else {
+			if (bytes_per_sample == sizeof(uint8_t))
+				for (int64_t i = 0; i < count; ++i)
+					buf[i] = ConvertUInt8ToInt16(reinterpret_cast<uint8_t*>(buff))[i];
+			else
+				for (int64_t i = 0; i < count; ++i)
+					buf[i] = ConvertIntToInt16(buff, bytes_per_sample)[i];
+		}
+	}
+	else {
+		if (float_samples) {
+			if (bytes_per_sample == sizeof(float))
+				for (int64_t i = 0; i < count; ++i)
+					buf[i] = DownmixToMono<ConvertFloatToInt16<float> >(ConvertFloatToInt16<float>(reinterpret_cast<float*>(buff)), channels)[i];
+			else if (bytes_per_sample == sizeof(double))
+				for (int64_t i = 0; i < count; ++i)
+					buf[i] = DownmixToMono<ConvertFloatToInt16<double> >(ConvertFloatToInt16<double>(reinterpret_cast<double*>(buff)), channels)[i];
+		}
+		else {
+			if (bytes_per_sample == sizeof(uint8_t))
+				for (int64_t i = 0; i < count; ++i)
+					buf[i] = DownmixToMono<ConvertUInt8ToInt16>(ConvertUInt8ToInt16(reinterpret_cast<uint8_t*>(buff)), channels)[i];
+			else
+				for (int64_t i = 0; i < count; ++i)
+					buf[i] = DownmixToMono<ConvertIntToInt16>(ConvertIntToInt16(buff, bytes_per_sample), channels)[i];
+		}
+	}
+	free(buff);
+}
+
+void AudioProvider::GetInt16MonoAudioWithVolume(int16_t *buf, int64_t start, int64_t count, double volume) const {
+	GetInt16MonoAudio(buf, start, count);
 	if (volume == 1.0) return;
-	if (bytes_per_sample != 2)
-		throw agi::InternalError("GetAudioWithVolume called on unconverted audio stream");
 
 	auto buffer = static_cast<int16_t *>(buf);
 	for (size_t i = 0; i < (size_t)count; ++i)
@@ -75,6 +169,39 @@ void AudioProvider::GetAudio(void *buf, int64_t start, int64_t count) const {
 	}
 }
 
+void AudioProvider::GetInt16MonoAudio(int16_t* buf, int64_t start, int64_t count) const {
+	if (start < 0) {
+		memset(buf, 0, sizeof(int16_t) * std::min(-start, count));
+		buf -= start;
+		count += start;
+		start = 0;
+	}
+
+	if (start + count > num_samples) {
+		int64_t zero_count = std::min(count, start + count - num_samples);
+		count -= zero_count;
+		memset(buf + count, 0, sizeof(int16_t) * zero_count);
+	}
+
+	if (count <= 0) return;
+
+	try {
+		FillBufferInt16Mono(buf, start, count);
+	}
+	catch (AudioDecodeError const& e) {
+		// We don't have any good way to report errors here, so just log the
+		// failure and return silence
+		LOG_E("audio_provider") << e.GetMessage();
+		memset(buf, 0, sizeof(int16_t) * count);
+		return;
+	}
+	catch (...) {
+		LOG_E("audio_provider") << "Unknown audio decoding error";
+		memset(buf, 0, sizeof(int16_t) * count);
+		return;
+	}
+}
+
 namespace {
 class writer {
 	io::Save outfile;
@@ -114,7 +241,7 @@ void SaveAudioClip(AudioProvider const& provider, fs::path const& path, int star
 
 	out.write("WAVEfmt ");
 	out.write<int32_t>(16); // Size of chunk
-	out.write<int16_t>(1);  // compression format (PCM)
+	out.write<int16_t>(provider.AreSamplesFloat() ? 3 : 1);  // compression format (1: WAVE_FORMAT_PCM, 3: WAVE_FORMAT_IEEE_FLOAT)
 	out.write<int16_t>(provider.GetChannels());
 	out.write<int32_t>(provider.GetSampleRate());
 	out.write<int32_t>(provider.GetSampleRate() * provider.GetChannels() * provider.GetBytesPerSample());
@@ -134,4 +261,4 @@ void SaveAudioClip(AudioProvider const& provider, fs::path const& path, int star
 		out.write(buf);
 	}
 }
-}
+}
\ No newline at end of file
diff --git a/libaegisub/audio/provider_convert.cpp b/libaegisub/audio/provider_convert.cpp
index b45d8a852..21603c28a 100644
--- a/libaegisub/audio/provider_convert.cpp
+++ b/libaegisub/audio/provider_convert.cpp
@@ -22,119 +22,19 @@
 #include <limits>
 
 using namespace agi;
-
-/// Anything integral -> 16 bit signed machine-endian audio converter
 namespace {
-template<class Target>
-class BitdepthConvertAudioProvider final : public AudioProviderWrapper {
-	int src_bytes_per_sample;
-	mutable std::vector<uint8_t> src_buf;
-
+class ConvertAudioProvider final : public AudioProviderWrapper {
 public:
-	BitdepthConvertAudioProvider(std::unique_ptr<AudioProvider> src) : AudioProviderWrapper(std::move(src)) {
-		if (bytes_per_sample > 8)
-			throw AudioProviderError("Audio format converter: audio with bitdepths greater than 64 bits/sample is currently unsupported");
-
-		src_bytes_per_sample = bytes_per_sample;
-		bytes_per_sample = sizeof(Target);
-	}
-
-	void FillBuffer(void *buf, int64_t start, int64_t count64) const override {
-		auto count = static_cast<size_t>(count64);
-		assert(count == count64);
-
-		src_buf.resize(count * src_bytes_per_sample * channels);
-		source->GetAudio(src_buf.data(), start, count);
-
-		auto dest = static_cast<int16_t*>(buf);
-
-		for (int64_t i = 0; i < count * channels; ++i) {
-			int64_t sample = 0;
-
-			// 8 bits per sample is assumed to be unsigned with a bias of 127,
-			// while everything else is assumed to be signed with zero bias
-			if (src_bytes_per_sample == 1)
-				sample = src_buf[i] - 128;
-			else {
-				for (int j = src_bytes_per_sample; j > 0; --j) {
-					sample <<= 8;
-					sample += src_buf[i * src_bytes_per_sample + j - 1];
-				}
-			}
-
-			if (static_cast<size_t>(src_bytes_per_sample) > sizeof(Target))
-				sample /= 1LL << (src_bytes_per_sample - sizeof(Target)) * 8;
-			else if (static_cast<size_t>(src_bytes_per_sample) < sizeof(Target))
-				sample *=  1LL << (sizeof(Target) - src_bytes_per_sample ) * 8;
-
-			dest[i] = static_cast<Target>(sample);
-		}
-	}
-};
-
-/// Floating point -> 16 bit signed machine-endian audio converter
-template<class Source, class Target>
-class FloatConvertAudioProvider final : public AudioProviderWrapper {
-	mutable std::vector<Source> src_buf;
-
-public:
-	FloatConvertAudioProvider(std::unique_ptr<AudioProvider> src) : AudioProviderWrapper(std::move(src)) {
-		bytes_per_sample = sizeof(Target);
+	ConvertAudioProvider(std::unique_ptr<AudioProvider> src) : AudioProviderWrapper(std::move(src)) {
 		float_samples = false;
-	}
-
-	void FillBuffer(void *buf, int64_t start, int64_t count64) const override {
-		auto count = static_cast<size_t>(count64);
-		assert(count == count64);
-
-		src_buf.resize(count * channels);
-		source->GetAudio(&src_buf[0], start, count);
-
-		auto dest = static_cast<Target*>(buf);
-
-		for (size_t i = 0; i < static_cast<size_t>(count * channels); ++i) {
-			Source expanded;
-			if (src_buf[i] < 0)
-				expanded = static_cast<Target>(-src_buf[i] * std::numeric_limits<Target>::min());
-			else
-				expanded = static_cast<Target>(src_buf[i] * std::numeric_limits<Target>::max());
-
-			dest[i] = expanded < std::numeric_limits<Target>::min() ? std::numeric_limits<Target>::min() :
-			          expanded > std::numeric_limits<Target>::max() ? std::numeric_limits<Target>::max() :
-			                                                          static_cast<Target>(expanded);
-		}
-	}
-};
-
-/// Non-mono 16-bit signed machine-endian -> mono 16-bit signed machine endian converter
-class DownmixAudioProvider final : public AudioProviderWrapper {
-	int src_channels;
-	mutable std::vector<int16_t> src_buf;
-
-public:
-	DownmixAudioProvider(std::unique_ptr<AudioProvider> src) : AudioProviderWrapper(std::move(src)) {
-		src_channels = channels;
 		channels = 1;
+		bytes_per_sample = sizeof(int16_t);
 	}
 
-	void FillBuffer(void *buf, int64_t start, int64_t count64) const override {
-		auto count = static_cast<size_t>(count64);
-		assert(count == count64);
-
-		src_buf.resize(count * src_channels);
-		source->GetAudio(&src_buf[0], start, count);
-
-		auto dst = static_cast<int16_t*>(buf);
-		// Just average the channels together
-		while (count-- > 0) {
-			int sum = 0;
-			for (int c = 0; c < src_channels; ++c)
-				sum += src_buf[count * src_channels + c];
-			dst[count] = static_cast<int16_t>(sum / src_channels);
-		}
+	void FillBuffer(void *buf, int64_t start, int64_t count) const override {
+		source->GetInt16MonoAudio(reinterpret_cast<int16_t*>(buf), start, count);
 	}
 };
-
 /// Sample doubler with linear interpolation for the samples provider
 /// Requires 16-bit mono input
 class SampleDoublingAudioProvider final : public AudioProviderWrapper {
@@ -177,26 +77,23 @@ std::unique_ptr<AudioProvider> CreateConvertAudioProvider(std::unique_ptr<AudioP
 	// Ensure 16-bit audio with proper endianness
 	if (provider->AreSamplesFloat()) {
 		LOG_D("audio_provider") << "Converting float to S16";
-		if (provider->GetBytesPerSample() == sizeof(float))
-			provider = agi::make_unique<FloatConvertAudioProvider<float, int16_t>>(std::move(provider));
-		else
-			provider = agi::make_unique<FloatConvertAudioProvider<double, int16_t>>(std::move(provider));
 	}
 	if (provider->GetBytesPerSample() != 2) {
-		LOG_D("audio_provider") << "Converting " << provider->GetBytesPerSample() << " bytes per sample or wrong endian to S16";
-		provider = agi::make_unique<BitdepthConvertAudioProvider<int16_t>>(std::move(provider));
+		LOG_D("audio_provider") << "Converting " << provider->GetBytesPerSample() << " bytes per sample to S16";
 	}
 
 	// We currently only support mono audio
 	if (provider->GetChannels() != 1) {
 		LOG_D("audio_provider") << "Downmixing to mono from " << provider->GetChannels() << " channels";
-		provider = agi::make_unique<DownmixAudioProvider>(std::move(provider));
 	}
 
 	// Some players don't like low sample rate audio
-	while (provider->GetSampleRate() < 32000) {
-		LOG_D("audio_provider") << "Doubling sample rate";
-		provider = agi::make_unique<SampleDoublingAudioProvider>(std::move(provider));
+	if (provider->GetSampleRate() < 32000) {
+		provider = agi::make_unique<ConvertAudioProvider>(std::move(provider));
+		while (provider->GetSampleRate() < 32000) {
+			LOG_D("audio_provider") << "Doubling sample rate";
+			provider = agi::make_unique<SampleDoublingAudioProvider>(std::move(provider));
+		}
 	}
 
 	return provider;
diff --git a/libaegisub/audio/provider_hd.cpp b/libaegisub/audio/provider_hd.cpp
index 19e33eeed..2969b244a 100644
--- a/libaegisub/audio/provider_hd.cpp
+++ b/libaegisub/audio/provider_hd.cpp
@@ -43,15 +43,15 @@ class HDAudioProvider final : public AudioProviderWrapper {
 		}
 
 		if (count > 0) {
-			start *= bytes_per_sample;
-			count *= bytes_per_sample;
+			start *= bytes_per_sample * channels;
+			count *= bytes_per_sample * channels;
 			memcpy(buf, file.read(start, count), count);
 		}
 	}
 
 	fs::path CacheFilename(fs::path const& dir) {
 		// Check free space
-		if ((uint64_t)num_samples * bytes_per_sample > fs::FreeSpace(dir))
+		if ((uint64_t)num_samples * bytes_per_sample * channels > fs::FreeSpace(dir))
 			throw AudioProviderError("Not enough free disk space in " + dir.string() + " to cache the audio");
 
 		return format("audio-%lld-%lld", time(nullptr),
@@ -61,7 +61,7 @@ class HDAudioProvider final : public AudioProviderWrapper {
 public:
 	HDAudioProvider(std::unique_ptr<AudioProvider> src, agi::fs::path const& dir)
 	: AudioProviderWrapper(std::move(src))
-	, file(dir / CacheFilename(dir), num_samples * bytes_per_sample)
+	, file(dir / CacheFilename(dir), num_samples * bytes_per_sample * channels)
 	{
 		decoded_samples = 0;
 		decoder = std::thread([&] {
diff --git a/libaegisub/audio/provider_lock.cpp b/libaegisub/audio/provider_lock.cpp
index eb397e410..e405487a1 100644
--- a/libaegisub/audio/provider_lock.cpp
+++ b/libaegisub/audio/provider_lock.cpp
@@ -29,6 +29,11 @@ class LockAudioProvider final : public agi::AudioProviderWrapper {
 		source->GetAudio(buf, start, count);
 	}
 
+	void FillBufferInt16Mono(int16_t *buf, int64_t start, int64_t count) const override {
+		std::unique_lock<std::mutex> lock(mutex);
+		source->GetInt16MonoAudio(buf, start, count);
+	}
+
 public:
 	LockAudioProvider(std::unique_ptr<AudioProvider> src)
 	: AudioProviderWrapper(std::move(src))
diff --git a/libaegisub/audio/provider_ram.cpp b/libaegisub/audio/provider_ram.cpp
index 0c1da546c..e708b0112 100644
--- a/libaegisub/audio/provider_ram.cpp
+++ b/libaegisub/audio/provider_ram.cpp
@@ -46,14 +46,14 @@ public:
 		decoded_samples = 0;
 
 		try {
-			blockcache.resize((source->GetNumSamples() * source->GetBytesPerSample() + CacheBlockSize - 1) >> CacheBits);
+			blockcache.resize((num_samples * bytes_per_sample * channels + CacheBlockSize - 1) >> CacheBits);
 		}
 		catch (std::bad_alloc const&) {
 			throw AudioProviderError("Not enough memory available to cache in RAM");
 		}
 
 		decoder = std::thread([&] {
-			int64_t readsize = CacheBlockSize / source->GetBytesPerSample();
+			int64_t readsize = CacheBlockSize / bytes_per_sample / channels;
 			for (size_t i = 0; i < blockcache.size(); i++) {
 				if (cancelled) break;
 				auto actual_read = std::min<int64_t>(readsize, num_samples - i * readsize);
@@ -71,20 +71,22 @@ public:
 
 void RAMAudioProvider::FillBuffer(void *buf, int64_t start, int64_t count) const {
 	auto charbuf = static_cast<char *>(buf);
-	for (int64_t bytes_remaining = count * bytes_per_sample; bytes_remaining; ) {
+	for (int64_t bytes_remaining = count * bytes_per_sample * channels; bytes_remaining; ) {
 		if (start >= decoded_samples) {
 			memset(charbuf, 0, bytes_remaining);
 			break;
 		}
 
-		const int i = (start * bytes_per_sample) >> CacheBits;
-		const int start_offset = (start * bytes_per_sample) & (CacheBlockSize-1);
-		const int read_size = std::min<int>(bytes_remaining, CacheBlockSize - start_offset);
+		const int64_t samples_per_block = CacheBlockSize / bytes_per_sample / channels;
+
+		const size_t i = start / samples_per_block;
+		const int start_offset = (start % samples_per_block) * bytes_per_sample * channels;
+		const int read_size = std::min<int>(bytes_remaining, samples_per_block * bytes_per_sample * channels - start_offset);
 
 		memcpy(charbuf, &blockcache[i][start_offset], read_size);
 		charbuf += read_size;
 		bytes_remaining -= read_size;
-		start += read_size / bytes_per_sample;
+		start += read_size / bytes_per_sample / channels;
 	}
 }
 }
@@ -93,4 +95,4 @@ namespace agi {
 std::unique_ptr<AudioProvider> CreateRAMAudioProvider(std::unique_ptr<AudioProvider> src) {
 	return agi::make_unique<RAMAudioProvider>(std::move(src));
 }
-}
+}
\ No newline at end of file
diff --git a/libaegisub/include/libaegisub/audio/provider.h b/libaegisub/include/libaegisub/audio/provider.h
index 70460a723..741979b12 100644
--- a/libaegisub/include/libaegisub/audio/provider.h
+++ b/libaegisub/include/libaegisub/audio/provider.h
@@ -20,8 +20,8 @@
 #include <libaegisub/fs_fwd.h>
 
 #include <atomic>
-#include <memory>
 #include <vector>
+#include <memory>
 
 namespace agi {
 class AudioProvider {
@@ -37,6 +37,7 @@ protected:
 	bool float_samples = false;
 
 	virtual void FillBuffer(void *buf, int64_t start, int64_t count) const = 0;
+	virtual void FillBufferInt16Mono(int16_t* buf, int64_t start, int64_t count) const;
 
 	void ZeroFill(void *buf, int64_t count) const;
 
@@ -44,7 +45,8 @@ public:
 	virtual ~AudioProvider() = default;
 
 	void GetAudio(void *buf, int64_t start, int64_t count) const;
-	void GetAudioWithVolume(void *buf, int64_t start, int64_t count, double volume) const;
+	void GetInt16MonoAudio(int16_t* buf, int64_t start, int64_t count) const;
+	void GetInt16MonoAudioWithVolume(int16_t *buf, int64_t start, int64_t count, double volume) const;
 
 	int64_t GetNumSamples()     const { return num_samples; }
 	int64_t GetDecodedSamples() const { return decoded_samples; }
diff --git a/meson.build b/meson.build
index 2e3aabcfe..b0595359f 100644
--- a/meson.build
+++ b/meson.build
@@ -7,7 +7,7 @@ project('Aegisub', ['c', 'cpp'],
 cmake = import('cmake')
 
 if host_machine.system() == 'windows'
-    add_project_arguments('-DNOMINMAX', '-D_WIN32_WINNT=0x0601', language: 'cpp')
+    add_project_arguments('-DNOMINMAX', language: 'cpp')
 
     if not get_option('csri').disabled()
         add_global_arguments('-DCSRI_NO_EXPORT', language: 'c')
@@ -266,20 +266,44 @@ if get_option('vapoursynth').enabled()
     dep_avail += 'VapourSynth'
 endif
 
-if host_machine.system() == 'windows' and not get_option('directsound').disabled()
-    dsound_dep = cc.find_library('dsound', required: get_option('directsound'))
-    winmm_dep = cc.find_library('winmm', required: get_option('directsound'))
-    ole32_dep = cc.find_library('ole32', required: get_option('directsound'))
-    have_dsound_h = cc.has_header('dsound.h')
-    if not have_dsound_h and get_option('directsound').enabled()
-        error('DirectSound enabled but dsound.h not found')
+if host_machine.system() == 'windows'
+    if not get_option('directsound').disabled()
+        dsound_dep = cc.find_library('dsound', required: get_option('directsound'))
+        winmm_dep = cc.find_library('winmm', required: get_option('directsound'))
+        ole32_dep = cc.find_library('ole32', required: get_option('directsound'))
+        have_dsound_h = cc.has_header('dsound.h')
+        if not have_dsound_h and get_option('directsound').enabled()
+            error('DirectSound enabled but dsound.h not found')
+        endif
+
+        dxguid_dep = cc.find_library('dxguid', required: true)
+        if dsound_dep.found() and winmm_dep.found() and ole32_dep.found() and dxguid_dep.found() and have_dsound_h
+            deps += [dsound_dep, winmm_dep, ole32_dep, dxguid_dep]
+            conf.set('WITH_DIRECTSOUND', 1)
+            dep_avail += 'DirectSound'
+        endif
     endif
 
-    dxguid_dep = cc.find_library('dxguid', required: true)
-    if dsound_dep.found() and winmm_dep.found() and ole32_dep.found() and dxguid_dep.found() and have_dsound_h
-        deps += [dsound_dep, winmm_dep, ole32_dep, dxguid_dep]
-        conf.set('WITH_DIRECTSOUND', 1)
-        dep_avail += 'DirectSound'
+    if not get_option('xaudio2').disabled()
+        have_xaudio_h = cc.has_header('xaudio2.h')
+        xaudio2_dep = cc.find_library('xaudio2', required: true)
+        if have_xaudio_h and xaudio2_dep.found()
+            deps += [xaudio2_dep]
+            conf.set('WITH_XAUDIO2', 1)
+            dep_avail += 'XAudio2'
+            # XAudio2 needs Windows 8 or newer, so we tell meson not to define an older windows or else it can break things.
+            add_project_arguments('-D_WIN32_WINNT=0x0602', language: 'cpp')
+        else
+            # Windows 8 not required if XAudio2 fails to be found. revert for compat.
+            add_project_arguments('-D_WIN32_WINNT=0x0601', language: 'cpp')
+        endif
+        
+        if not have_dsound_h and get_option('xaudio2').enabled()
+            error('xaudio2 enabled but xaudio2.h not found')
+        endif
+    else
+        # Windows 8 not required if XAudio2 is disabled. revert for compat.
+        add_project_arguments('-D_WIN32_WINNT=0x0601', language: 'cpp')
     endif
 endif
 
diff --git a/meson_options.txt b/meson_options.txt
index 8be9ca584..5fde15e8a 100644
--- a/meson_options.txt
+++ b/meson_options.txt
@@ -3,7 +3,8 @@ option('openal', type: 'feature', description: 'OpenAL audio output')
 option('libpulse', type: 'feature', description: 'PulseAudio audio output')
 option('portaudio', type: 'feature', description: 'PortAudio audio output')
 option('directsound', type: 'feature', description: 'DirectSound audio output')
-option('default_audio_output', type: 'combo', choices: ['auto', 'ALSA', 'OpenAL', 'PulseAudio', 'PortAudio', 'DirectSound'], description: 'Default audio output')
+option('xaudio2', type: 'feature', description: 'XAudio2 audio output')
+option('default_audio_output', type: 'combo', choices: ['auto', 'ALSA', 'OpenAL', 'PulseAudio', 'PortAudio', 'DirectSound', 'XAudio2'], description: 'Default audio output')
 
 option('ffms2', type: 'feature', description: 'FFMS2 video source')
 option('avisynth', type: 'feature', description: 'AviSynth video source')
diff --git a/src/audio_player.cpp b/src/audio_player.cpp
index f5a8327ca..c46ba0a40 100644
--- a/src/audio_player.cpp
+++ b/src/audio_player.cpp
@@ -43,6 +43,7 @@
 std::unique_ptr<AudioPlayer> CreateAlsaPlayer(agi::AudioProvider *providers, wxWindow *window);
 std::unique_ptr<AudioPlayer> CreateDirectSoundPlayer(agi::AudioProvider *providers, wxWindow *window);
 std::unique_ptr<AudioPlayer> CreateDirectSound2Player(agi::AudioProvider *providers, wxWindow *window);
+std::unique_ptr<AudioPlayer> CreateXAudio2Player(agi::AudioProvider *providers, wxWindow *window);
 std::unique_ptr<AudioPlayer> CreateOpenALPlayer(agi::AudioProvider *providers, wxWindow *window);
 std::unique_ptr<AudioPlayer> CreatePortAudioPlayer(agi::AudioProvider *providers, wxWindow *window);
 std::unique_ptr<AudioPlayer> CreatePulseAudioPlayer(agi::AudioProvider *providers, wxWindow *window);
@@ -63,6 +64,9 @@ namespace {
 		{"DirectSound-old", CreateDirectSoundPlayer, false},
 		{"DirectSound", CreateDirectSound2Player, false},
 #endif
+#ifdef WITH_XAUDIO2
+		{"Xaudio2", CreateXAudio2Player, false},
+#endif
 #ifdef WITH_OPENAL
 		{"OpenAL", CreateOpenALPlayer, false},
 #endif
diff --git a/src/audio_player_alsa.cpp b/src/audio_player_alsa.cpp
index 5a1705622..5a98d9ed7 100644
--- a/src/audio_player_alsa.cpp
+++ b/src/audio_player_alsa.cpp
@@ -127,7 +127,7 @@ void AlsaPlayer::PlaybackThread()
 
 do_setup:
 	snd_pcm_format_t pcm_format;
-	switch (provider->GetBytesPerSample())
+	switch (/*provider->GetBytesPerSample()*/ sizeof(int16_t))
 	{
 	case 1:
 		LOG_D("audio/player/alsa") << "format U8";
@@ -143,7 +143,7 @@ do_setup:
 	if (snd_pcm_set_params(pcm,
 	                       pcm_format,
 	                       SND_PCM_ACCESS_RW_INTERLEAVED,
-	                       provider->GetChannels(),
+	                       /*provider->GetChannels()*/ 1,
 	                       provider->GetSampleRate(),
 	                       1, // allow resample
 	                       100*1000 // 100 milliseconds latency
@@ -151,7 +151,8 @@ do_setup:
 		return;
 	LOG_D("audio/player/alsa") << "set pcm params";
 
-	size_t framesize = provider->GetChannels() * provider->GetBytesPerSample();
+	//size_t framesize = provider->GetChannels() * provider->GetBytesPerSample();
+	size_t framesize = sizeof(int16_t);
 
 	while (true)
 	{
@@ -175,7 +176,7 @@ do_setup:
 		{
 			auto avail = std::min(snd_pcm_avail(pcm), (snd_pcm_sframes_t)(end_position-position));
 			decode_buffer.resize(avail * framesize);
-			provider->GetAudioWithVolume(decode_buffer.data(), position, avail, volume);
+			provider->GetInt16MonoAudioWithVolume(reinterpret_cast<int16_t*>(decode_buffer.data()), position, avail, volume);
 
 			snd_pcm_sframes_t written = 0;
 			while (written <= 0)
@@ -235,7 +236,7 @@ do_setup:
 
 			{
 				decode_buffer.resize(avail * framesize);
-				provider->GetAudioWithVolume(decode_buffer.data(), position, avail, volume);
+				provider->GetInt16MonoAudioWithVolume(reinterpret_cast<int16_t*>(decode_buffer.data()), position, avail, volume);
 				snd_pcm_sframes_t written = 0;
 				while (written <= 0)
 				{
@@ -352,4 +353,4 @@ std::unique_ptr<AudioPlayer> CreateAlsaPlayer(agi::AudioProvider *provider, wxWi
 	return agi::make_unique<AlsaPlayer>(provider);
 }
 
-#endif // WITH_ALSA
+#endif // WITH_ALSA
\ No newline at end of file
diff --git a/src/audio_player_dsound.cpp b/src/audio_player_dsound.cpp
index 01b47b354..8b50492a0 100644
--- a/src/audio_player_dsound.cpp
+++ b/src/audio_player_dsound.cpp
@@ -45,6 +45,7 @@
 
 #include <mmsystem.h>
 #include <dsound.h>
+#include <cguid.h>
 
 namespace {
 class DirectSoundPlayer;
@@ -111,8 +112,10 @@ DirectSoundPlayer::DirectSoundPlayer(agi::AudioProvider *provider, wxWindow *par
 	WAVEFORMATEX waveFormat;
 	waveFormat.wFormatTag = WAVE_FORMAT_PCM;
 	waveFormat.nSamplesPerSec = provider->GetSampleRate();
-	waveFormat.nChannels = provider->GetChannels();
-	waveFormat.wBitsPerSample = provider->GetBytesPerSample() * 8;
+	//waveFormat.nChannels = provider->GetChannels();
+	//waveFormat.wBitsPerSample = provider->GetBytesPerSample() * 8;
+	waveFormat.nChannels = 1;
+	waveFormat.wBitsPerSample = sizeof(int16_t) * 8;
 	waveFormat.nBlockAlign = waveFormat.nChannels * waveFormat.wBitsPerSample / 8;
 	waveFormat.nAvgBytesPerSec = waveFormat.nSamplesPerSec * waveFormat.nBlockAlign;
 	waveFormat.cbSize = sizeof(waveFormat);
@@ -160,7 +163,7 @@ bool DirectSoundPlayer::FillBuffer(bool fill) {
 	HRESULT res;
 	void *ptr1, *ptr2;
 	unsigned long int size1, size2;
-	int bytesps = provider->GetBytesPerSample();
+	int bytesps = /*provider->GetBytesPerSample()*/ sizeof(int16_t);
 
 	// To write length
 	int toWrite = 0;
@@ -223,8 +226,8 @@ RetryLock:
 	LOG_D_IF(!count1 && !count2, "audio/player/dsound1") << "DS fill: nothing";
 
 	// Get source wave
-	if (count1) provider->GetAudioWithVolume(ptr1, playPos, count1, volume);
-	if (count2) provider->GetAudioWithVolume(ptr2, playPos+count1, count2, volume);
+	if (count1) provider->GetInt16MonoAudioWithVolume(reinterpret_cast<int16_t*>(ptr1), playPos, count1, volume);
+	if (count2) provider->GetInt16MonoAudioWithVolume(reinterpret_cast<int16_t*>(ptr2), playPos+count1, count2, volume);
 	playPos += count1+count2;
 
 	buffer->Unlock(ptr1,count1*bytesps,ptr2,count2*bytesps);
@@ -254,7 +257,7 @@ void DirectSoundPlayer::Play(int64_t start,int64_t count) {
 	FillBuffer(true);
 
 	DWORD play_flag = 0;
-	if (count*provider->GetBytesPerSample() > bufSize) {
+	if (count*/*provider->GetBytesPerSample()*/sizeof(int16_t) > bufSize) {
 		// Start thread
 		thread = new DirectSoundPlayerThread(this);
 		thread->Create();
@@ -371,4 +374,4 @@ std::unique_ptr<AudioPlayer> CreateDirectSoundPlayer(agi::AudioProvider *provide
 	return agi::make_unique<DirectSoundPlayer>(provider, parent);
 }
 
-#endif // WITH_DIRECTSOUND
+#endif // WITH_DIRECTSOUND
\ No newline at end of file
diff --git a/src/audio_player_dsound2.cpp b/src/audio_player_dsound2.cpp
index dd7bf8680..ad9149803 100644
--- a/src/audio_player_dsound2.cpp
+++ b/src/audio_player_dsound2.cpp
@@ -317,13 +317,14 @@ void DirectSoundPlayer2Thread::Run()
 
 	// Describe the wave format
 	WAVEFORMATEX waveFormat;
-	waveFormat.wFormatTag = WAVE_FORMAT_PCM;
 	waveFormat.nSamplesPerSec = provider->GetSampleRate();
+	waveFormat.cbSize = 0;
+	waveFormat.wFormatTag = provider->AreSamplesFloat() ? 3 : WAVE_FORMAT_PCM; // Eh fuck it.
 	waveFormat.nChannels = provider->GetChannels();
 	waveFormat.wBitsPerSample = provider->GetBytesPerSample() * 8;
 	waveFormat.nBlockAlign = waveFormat.nChannels * waveFormat.wBitsPerSample / 8;
 	waveFormat.nAvgBytesPerSec = waveFormat.nSamplesPerSec * waveFormat.nBlockAlign;
-	waveFormat.cbSize = sizeof(waveFormat);
+	//waveFormat.cbSize = sizeof(waveFormat);
 
 	// And the buffer itself
 	int aim = waveFormat.nAvgBytesPerSec * (wanted_latency*buffer_length)/1000;
@@ -332,7 +333,7 @@ void DirectSoundPlayer2Thread::Run()
 	DWORD bufSize = mid(min,aim,max); // size of entire playback buffer
 	DSBUFFERDESC desc;
 	desc.dwSize = sizeof(DSBUFFERDESC);
-	desc.dwFlags = DSBCAPS_GETCURRENTPOSITION2 | DSBCAPS_GLOBALFOCUS;
+	desc.dwFlags = DSBCAPS_CTRLVOLUME | DSBCAPS_GETCURRENTPOSITION2 | DSBCAPS_GLOBALFOCUS;
 	desc.dwBufferBytes = bufSize;
 	desc.dwReserved = 0;
 	desc.lpwfxFormat = &waveFormat;
@@ -461,6 +462,15 @@ stop_playback:
 			goto do_fill_buffer;
 
 		case WAIT_OBJECT_0+3:
+			{
+				LONG invert_volume = (LONG)((this->volume - 1.0) * 5000.0); // Hrmm weirdly it's half?
+				// Look, I would have used a min max but it just errored out for me lol.
+				if (invert_volume > DSBVOLUME_MAX)
+					invert_volume = DSBVOLUME_MAX;
+				else if (invert_volume < DSBVOLUME_MIN / 2)
+					invert_volume = DSBVOLUME_MIN / 2;
+				bfr->SetVolume(invert_volume);
+			}
 			// Change volume
 			// We aren't thread safe right now, filling the buffers grabs volume directly
 			// from the field set by the controlling thread, but it shouldn't be a major
@@ -608,7 +618,7 @@ DWORD DirectSoundPlayer2Thread::FillAndUnlockBuffers(void *buf1, DWORD buf1sz, v
 			buf2sz = 0;
 		}
 
-		provider->GetAudioWithVolume(buf1, input_frame, buf1szf, volume);
+		provider->GetAudio(buf1, input_frame, buf1szf);
 
 		input_frame += buf1szf;
 	}
@@ -621,7 +631,7 @@ DWORD DirectSoundPlayer2Thread::FillAndUnlockBuffers(void *buf1, DWORD buf1sz, v
 			buf2sz = buf2szf * bytes_per_frame;
 		}
 
-		provider->GetAudioWithVolume(buf2, input_frame, buf2szf, volume);
+		provider->GetAudio(buf2, input_frame, buf2szf);
 
 		input_frame += buf2szf;
 	}
@@ -932,4 +942,4 @@ std::unique_ptr<AudioPlayer> CreateDirectSound2Player(agi::AudioProvider *provid
 	return agi::make_unique<DirectSoundPlayer2>(provider, parent);
 }
 
-#endif // WITH_DIRECTSOUND
+#endif // WITH_DIRECTSOUND
\ No newline at end of file
diff --git a/src/audio_player_openal.cpp b/src/audio_player_openal.cpp
index b0f8372bd..d95d804ce 100644
--- a/src/audio_player_openal.cpp
+++ b/src/audio_player_openal.cpp
@@ -125,7 +125,7 @@ public:
 OpenALPlayer::OpenALPlayer(agi::AudioProvider *provider)
 : AudioPlayer(provider)
 , samplerate(provider->GetSampleRate())
-, bpf(provider->GetChannels() * provider->GetBytesPerSample())
+, bpf(/*provider->GetChannels() * provider->GetBytesPerSample()*/sizeof(int16_t))
 {
 	device = alcOpenDevice(nullptr);
 	if (!device) throw AudioPlayerOpenError("Failed opening default OpenAL device");
@@ -241,7 +241,7 @@ void OpenALPlayer::FillBuffers(ALsizei count)
 
 		if (fill_len > 0)
 			// Get fill_len frames of audio
-			provider->GetAudioWithVolume(&decode_buffer[0], cur_frame, fill_len, volume);
+			provider->GetInt16MonoAudioWithVolume(reinterpret_cast<int16_t*>(decode_buffer.data()), cur_frame, fill_len, volume);
 		if ((size_t)fill_len * bpf < decode_buffer.size())
 			// And zerofill the rest
 			memset(&decode_buffer[fill_len * bpf], 0, decode_buffer.size() - fill_len * bpf);
@@ -308,4 +308,4 @@ std::unique_ptr<AudioPlayer> CreateOpenALPlayer(agi::AudioProvider *provider, wx
 	return agi::make_unique<OpenALPlayer>(provider);
 }
 
-#endif // WITH_OPENAL
+#endif // WITH_OPENAL
\ No newline at end of file
diff --git a/src/audio_player_oss.cpp b/src/audio_player_oss.cpp
index 93950baef..0934ce197 100644
--- a/src/audio_player_oss.cpp
+++ b/src/audio_player_oss.cpp
@@ -131,7 +131,7 @@ public:
 
         while (!TestDestroy() && parent->cur_frame < parent->end_frame) {
             int rsize = std::min(wsize, parent->end_frame - parent->cur_frame);
-            parent->provider->GetAudioWithVolume(buf, parent->cur_frame,
+            parent->provider->GetInt16MonoAudioWithVolume(reinterpret_cast<int16_t*>(buf), parent->cur_frame,
                                                  rsize, parent->volume);
             int written = ::write(parent->dspdev, buf, rsize * parent->bpf);
             parent->cur_frame += written / parent->bpf;
@@ -146,7 +146,7 @@ public:
 
 void OSSPlayer::OpenStream()
 {
-    bpf = provider->GetChannels() * provider->GetBytesPerSample();
+    bpf = /*provider->GetChannels() * provider->GetBytesPerSample()*/sizeof(int16_t);
 
     // Open device
     wxString device = to_wx(OPT_GET("Player/Audio/OSS/Device")->GetString());
@@ -162,14 +162,14 @@ void OSSPlayer::OpenStream()
 #endif
 
     // Set number of channels
-    int channels = provider->GetChannels();
+    int channels = /*provider->GetChannels()*/1;
     if (ioctl(dspdev, SNDCTL_DSP_CHANNELS, &channels) < 0) {
         throw AudioPlayerOpenError("OSS player: setting channels failed");
     }
 
     // Set sample format
     int sample_format;
-    switch (provider->GetBytesPerSample()) {
+    switch (/*provider->GetBytesPerSample()*/sizeof(int16_t)) {
         case 1:
             sample_format = AFMT_S8;
             break;
@@ -283,4 +283,4 @@ std::unique_ptr<AudioPlayer> CreateOSSPlayer(agi::AudioProvider *provider, wxWin
     return agi::make_unique<OSSPlayer>(provider);
 }
 
-#endif // WITH_OSS
+#endif // WITH_OSS
\ No newline at end of file
diff --git a/src/audio_player_portaudio.cpp b/src/audio_player_portaudio.cpp
index 7a5babcdc..513b8b28b 100644
--- a/src/audio_player_portaudio.cpp
+++ b/src/audio_player_portaudio.cpp
@@ -140,7 +140,7 @@ void PortAudioPlayer::OpenStream() {
 		const PaDeviceInfo *device_info = Pa_GetDeviceInfo((*device_ids)[i]);
 		PaStreamParameters pa_output_p;
 		pa_output_p.device = (*device_ids)[i];
-		pa_output_p.channelCount = provider->GetChannels();
+		pa_output_p.channelCount = /*provider->GetChannels()*/ 1;
 		pa_output_p.sampleFormat = paInt16;
 		pa_output_p.suggestedLatency = device_info->defaultLowOutputLatency;
 		pa_output_p.hostApiSpecificStreamInfo = nullptr;
@@ -222,7 +222,7 @@ int PortAudioPlayer::paCallback(const void *inputBuffer, void *outputBuffer,
 
 	// Play something
 	if (lenAvailable > 0) {
-		player->provider->GetAudioWithVolume(outputBuffer, player->current, lenAvailable, player->GetVolume());
+		player->provider->GetInt16MonoAudioWithVolume(reinterpret_cast<int16_t*>(outputBuffer), player->current, lenAvailable, player->GetVolume());
 
 		// Set play position
 		player->current += lenAvailable;
@@ -283,4 +283,4 @@ std::unique_ptr<AudioPlayer> CreatePortAudioPlayer(agi::AudioProvider *provider,
 	return agi::make_unique<PortAudioPlayer>(provider);
 }
 
-#endif // WITH_PORTAUDIO
+#endif // WITH_PORTAUDIO
\ No newline at end of file
diff --git a/src/audio_player_pulse.cpp b/src/audio_player_pulse.cpp
index 7174356bd..09882cb74 100644
--- a/src/audio_player_pulse.cpp
+++ b/src/audio_player_pulse.cpp
@@ -133,11 +133,11 @@ PulseAudioPlayer::PulseAudioPlayer(agi::AudioProvider *provider) : AudioPlayer(p
 	}
 
 	// Set up stream
-	bpf = provider->GetChannels() * provider->GetBytesPerSample();
+	bpf = /*provider->GetChannels() * provider->GetBytesPerSample()*/sizeof(int16_t);
 	pa_sample_spec ss;
 	ss.format = PA_SAMPLE_S16LE; // FIXME
 	ss.rate = provider->GetSampleRate();
-	ss.channels = provider->GetChannels();
+	ss.channels = /*provider->GetChannels()*/1;
 	pa_channel_map map;
 	pa_channel_map_init_auto(&map, ss.channels, PA_CHANNEL_MAP_DEFAULT);
 
@@ -308,7 +308,7 @@ void PulseAudioPlayer::pa_stream_write(pa_stream *p, size_t length, PulseAudioPl
 	unsigned long maxframes = thread->end_frame - thread->cur_frame;
 	if (frames > maxframes) frames = maxframes;
 	void *buf = malloc(frames * bpf);
-	thread->provider->GetAudioWithVolume(buf, thread->cur_frame, frames, thread->volume);
+	thread->provider->GetInt16MonoAudioWithVolume(reinterpret_cast<int16_t*>(buf), thread->cur_frame, frames, thread->volume);
 	::pa_stream_write(p, buf, frames*bpf, free, 0, PA_SEEK_RELATIVE);
 	thread->cur_frame += frames;
 }
@@ -324,4 +324,4 @@ void PulseAudioPlayer::pa_stream_notify(pa_stream *p, PulseAudioPlayer *thread)
 std::unique_ptr<AudioPlayer> CreatePulseAudioPlayer(agi::AudioProvider *provider, wxWindow *) {
 	return agi::make_unique<PulseAudioPlayer>(provider);
 }
-#endif // WITH_LIBPULSE
+#endif // WITH_LIBPULSE
\ No newline at end of file
diff --git a/src/audio_player_xaudio2.cpp b/src/audio_player_xaudio2.cpp
new file mode 100644
index 000000000..a7cbf0f28
--- /dev/null
+++ b/src/audio_player_xaudio2.cpp
@@ -0,0 +1,694 @@
+// Copyright (c) 2019, Qirui Wang
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//   * Redistributions of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//   * Redistributions in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//   * Neither the name of the Aegisub Group nor the names of its contributors
+//     may be used to endorse or promote products derived from this software
+//     without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//
+// Aegisub Project http://www.aegisub.org/
+
+#ifdef WITH_XAUDIO2
+#include "include/aegisub/audio_player.h"
+
+#include "options.h"
+
+#include <libaegisub/audio/provider.h>
+#include <libaegisub/scoped_ptr.h>
+#include <libaegisub/log.h>
+#include <libaegisub/make_unique.h>
+
+#ifndef XAUDIO2_REDIST
+#include <xaudio2.h>
+#else
+#include <xaudio2redist.h>
+#endif
+
+namespace {
+class XAudio2Thread;
+
+/// @class XAudio2Player
+/// @brief XAudio2-based audio player
+///
+/// The core design idea is to have a playback thread that performs all playback operations, and use the player object as a proxy to send commands to the playback thread.
+class XAudio2Player final : public AudioPlayer {
+	/// The playback thread
+	std::unique_ptr<XAudio2Thread> thread;
+
+	/// Desired length in milliseconds to write ahead of the playback cursor
+	int WantedLatency;
+
+	/// Multiplier for WantedLatency to get total buffer length
+	int BufferLength;
+
+	/// @brief Tell whether playback thread is alive
+	/// @return True if there is a playback thread and it's ready
+	bool IsThreadAlive();
+
+public:
+	/// @brief Constructor
+	XAudio2Player(agi::AudioProvider* provider);
+	/// @brief Destructor
+	~XAudio2Player() = default;
+
+	/// @brief Start playback
+	/// @param start First audio frame to play
+	/// @param count Number of audio frames to play
+	void Play(int64_t start, int64_t count);
+
+	/// @brief Stop audio playback
+	/// @param timerToo Whether to also stop the playback update timer
+	void Stop();
+
+	/// @brief Tell whether playback is active
+	/// @return True if audio is playing back
+	bool IsPlaying();
+
+	/// @brief Get playback end position
+	/// @return Audio frame index
+	///
+	/// Returns 0 if playback is stopped or there is no playback thread
+	int64_t GetEndPosition();
+	/// @brief Get approximate playback position
+	/// @return Index of audio frame user is currently hearing
+	///
+	/// Returns 0 if playback is stopped or there is no playback thread
+	int64_t GetCurrentPosition();
+
+	/// @brief Change playback end position
+	/// @param pos New end position
+	void SetEndPosition(int64_t pos);
+
+	/// @brief Change playback volume
+	/// @param vol Amplification factor
+	void SetVolume(double vol);
+};
+
+/// @brief RAII support class to init and de-init the COM library
+struct COMInitialization {
+
+	/// Flag set if an inited COM library is managed
+	bool inited = false;
+
+	/// @brief Destructor, de-inits COM if it is inited
+	~COMInitialization() {
+		if (inited) CoUninitialize();
+	}
+
+	/// @brief Initialise the COM library as single-threaded apartment if isn't already inited by us
+	bool Init() {
+		if (!inited && SUCCEEDED(CoInitialize(nullptr)))
+			inited = true;
+		return inited;
+	}
+};
+
+struct ReleaseCOMObject {
+	void operator()(IUnknown* obj) {
+		if (obj) obj->Release();
+	}
+};
+
+/// @brief RAII wrapper around Win32 HANDLE type
+struct Win32KernelHandle final : public agi::scoped_holder<HANDLE, BOOL(__stdcall*)(HANDLE)> {
+	/// @brief Create with a managed handle
+	/// @param handle Win32 handle to manage
+	Win32KernelHandle(HANDLE handle = 0) :scoped_holder(handle, CloseHandle) {}
+
+	Win32KernelHandle& operator=(HANDLE new_handle) {
+		scoped_holder::operator=(new_handle);
+		return *this;
+	}
+};
+
+/// @class XAudio2Thread
+/// @brief Playback thread class for XAudio2Player
+///
+/// Not based on wxThread, but uses Win32 threads directly
+class XAudio2Thread :public IXAudio2VoiceCallback {
+	/// @brief Win32 thread entry point
+	/// @param parameter Pointer to our thread object
+	/// @return Thread return value, always 0 here
+	static unsigned int __stdcall ThreadProc(void* parameter);
+	/// @brief Thread entry point
+	void Run();
+
+	/// @brief Check for error state and throw exception if one occurred
+	void CheckError();
+
+	/// Win32 handle to the thread
+	Win32KernelHandle thread_handle;
+
+	/// Event object, world to thread, set to start playback
+	Win32KernelHandle event_start_playback;
+
+	/// Event object, world to thread, set to stop playback
+	Win32KernelHandle event_stop_playback;
+
+	/// Event object, world to thread, set if playback end time was updated
+	Win32KernelHandle event_update_end_time;
+
+	/// Event object, world to thread, set if the volume was changed
+	Win32KernelHandle event_set_volume;
+
+	/// Event object, world to thread, set if the thread should end as soon as possible
+	Win32KernelHandle event_buffer_end;
+
+	/// Event object, world to thread, set if the thread should end as soon as possible
+	Win32KernelHandle event_kill_self;
+
+	/// Event object, thread to world, set when the thread has entered its main loop
+	Win32KernelHandle thread_running;
+
+	/// Event object, thread to world, set when playback is ongoing
+	Win32KernelHandle is_playing;
+
+	/// Event object, thread to world, set if an error state has occurred (implies thread is dying)
+	Win32KernelHandle error_happened;
+
+	/// Statically allocated error message text describing reason for error_happened being set
+	const char* error_message = nullptr;
+
+	/// Playback volume, 1.0 is "unchanged"
+	double volume = 1.0;
+
+	/// Audio frame to start playback at
+	int64_t start_frame = 0;
+
+	/// Audio frame to end playback at
+	int64_t end_frame = 0;
+
+	/// Desired length in milliseconds to write ahead of the playback cursor
+	int wanted_latency;
+
+	/// Multiplier for WantedLatency to get total buffer length
+	int buffer_length;
+
+	/// System millisecond timestamp of last playback start, used to calculate playback position
+	ULONGLONG last_playback_restart;
+
+	/// Audio provider to take sample data from
+	agi::AudioProvider* provider;
+
+	/// Buffer occupied indicator
+	std::vector<bool> buffer_occupied;
+
+public:
+	/// @brief Constructor, creates and starts playback thread
+	/// @param provider       Audio provider to take sample data from
+	/// @param WantedLatency Desired length in milliseconds to write ahead of the playback cursor
+	/// @param BufferLength  Multiplier for WantedLatency to get total buffer length
+	XAudio2Thread(agi::AudioProvider* provider, int WantedLatency, int BufferLength);
+	/// @brief Destructor, waits for thread to have died
+	~XAudio2Thread();
+
+	// IXAudio2VoiceCallback
+	void STDMETHODCALLTYPE OnVoiceProcessingPassStart(UINT32 BytesRequired) override {}
+	void STDMETHODCALLTYPE OnVoiceProcessingPassEnd() override {}
+	void STDMETHODCALLTYPE OnStreamEnd() override {}
+	void STDMETHODCALLTYPE OnBufferStart(void* pBufferContext) override {}
+	void STDMETHODCALLTYPE OnBufferEnd(void* pBufferContext) override {
+		intptr_t i = reinterpret_cast<intptr_t>(pBufferContext);
+		buffer_occupied[i] = false;
+		SetEvent(event_buffer_end);
+	}
+	void STDMETHODCALLTYPE OnLoopEnd(void* pBufferContext) override {}
+	void STDMETHODCALLTYPE OnVoiceError(void* pBufferContext, HRESULT Error) override {}
+
+	/// @brief Start audio playback
+	/// @param start Audio frame to start playback at
+	/// @param count Number of audio frames to play
+	void Play(int64_t start, int64_t count);
+
+	/// @brief Stop audio playback
+	void Stop();
+
+	/// @brief Change audio playback end point
+	/// @param new_end_frame New last audio frame to play
+	///
+	/// Playback stops instantly if new_end_frame is before the current playback position
+	void SetEndFrame(int64_t new_end_frame);
+
+	/// @brief Change audio playback volume
+	/// @param new_volume New playback amplification factor, 1.0 is "unchanged"
+	void SetVolume(double new_volume);
+
+	/// @brief Tell whether audio playback is active
+	/// @return True if audio is being played back, false if it is not
+	bool IsPlaying();
+
+	/// @brief Get approximate current audio frame being heard by the user
+	/// @return Audio frame index
+	///
+	/// Returns 0 if not playing
+	int64_t GetCurrentFrame();
+
+	/// @brief Get audio playback end point
+	/// @return Audio frame index
+	int64_t GetEndFrame();
+
+	/// @brief Tell whether playback thread has died
+	/// @return True if thread is no longer running
+	bool IsDead();
+};
+
+unsigned int __stdcall XAudio2Thread::ThreadProc(void* parameter) {
+	static_cast<XAudio2Thread*>(parameter)->Run();
+	return 0;
+}
+
+/// Macro used to set error_message, error_happened and end the thread
+#define REPORT_ERROR(msg) \
+{ \
+	ResetEvent(is_playing); \
+	error_message = "XAudio2Thread: " msg; \
+	SetEvent(error_happened); \
+	return; \
+}
+
+void XAudio2Thread::Run() {
+	COMInitialization COM_library;
+	if (!COM_library.Init()) {
+		REPORT_ERROR("Could not initialise COM")
+	}
+	IXAudio2* pXAudio2;
+	IXAudio2SourceVoice* pSourceVoice;
+	HRESULT hr;
+	if (FAILED(hr = XAudio2Create(&pXAudio2, 0, XAUDIO2_DEFAULT_PROCESSOR))) {
+		REPORT_ERROR("Failed initializing XAudio2")
+	}
+	IXAudio2MasteringVoice* pMasterVoice = NULL;
+	if (FAILED(hr = pXAudio2->CreateMasteringVoice(&pMasterVoice))) {
+		REPORT_ERROR("Failed initializing XAudio2 MasteringVoice")
+	}
+
+	// Describe the wave format
+	WAVEFORMATEX wfx;
+	wfx.nSamplesPerSec = provider->GetSampleRate();
+	wfx.cbSize = 0;
+	bool original = true;
+	wfx.wFormatTag = provider->AreSamplesFloat() ? WAVE_FORMAT_IEEE_FLOAT : WAVE_FORMAT_PCM;
+	wfx.nChannels = provider->GetChannels();
+	wfx.wBitsPerSample = provider->GetBytesPerSample() * 8;
+	wfx.nBlockAlign = wfx.nChannels * wfx.wBitsPerSample / 8;
+	wfx.nAvgBytesPerSec = wfx.nSamplesPerSec * wfx.nBlockAlign;
+
+	if (FAILED(hr = pXAudio2->CreateSourceVoice(&pSourceVoice, &wfx, 0, 2, this))) {
+		if (hr == XAUDIO2_E_INVALID_CALL) {
+			// Retry with 16bit mono
+			original = false;
+			wfx.wFormatTag = WAVE_FORMAT_PCM;
+			wfx.nChannels = 1;
+			wfx.wBitsPerSample = sizeof(int16_t) * 8;
+			wfx.nBlockAlign = wfx.nChannels * wfx.wBitsPerSample / 8;
+			wfx.nAvgBytesPerSec = wfx.nSamplesPerSec * wfx.nBlockAlign;
+			if (FAILED(hr = pXAudio2->CreateSourceVoice(&pSourceVoice, &wfx, 0, 2, this))) {
+				REPORT_ERROR("Failed initializing XAudio2 SourceVoice")
+			}
+		}
+		else {
+			REPORT_ERROR("Failed initializing XAudio2 SourceVoice")
+		}
+	}
+
+	// Now we're ready to roll!
+	SetEvent(thread_running);
+	bool running = true;
+
+	HANDLE events_to_wait[] = {
+		event_start_playback,
+		event_stop_playback,
+		event_update_end_time,
+		event_set_volume,
+		event_buffer_end,
+		event_kill_self
+	};
+
+	int64_t next_input_frame = 0;
+	DWORD buffer_offset = 0;
+	bool playback_should_be_running = false;
+	int current_latency = wanted_latency;
+	const int wanted_frames = wanted_latency * wfx.nSamplesPerSec / 1000;
+	const DWORD wanted_latency_bytes = wanted_frames * wfx.nBlockAlign;
+	std::vector<std::vector<BYTE> > buff(buffer_length);
+	for (auto& i : buff)
+		i.resize(wanted_latency_bytes);
+
+	while (running) {
+		DWORD wait_result = WaitForMultipleObjects(sizeof(events_to_wait) / sizeof(HANDLE), events_to_wait, FALSE, INFINITE);
+
+		switch (wait_result) {
+		case WAIT_OBJECT_0 + 0:
+			// Start or restart playback
+			pSourceVoice->Stop();
+			pSourceVoice->FlushSourceBuffers();
+
+			next_input_frame = start_frame;
+			playback_should_be_running = true;
+			pSourceVoice->Start();
+			SetEvent(is_playing);
+			goto do_fill_buffer;
+
+		case WAIT_OBJECT_0 + 1:
+		stop_playback:
+			// Stop playing
+			ResetEvent(is_playing);
+			pSourceVoice->Stop();
+			pSourceVoice->FlushSourceBuffers();
+			playback_should_be_running = false;
+			break;
+
+		case WAIT_OBJECT_0 + 2:
+			// Set end frame
+			if (end_frame <= next_input_frame)
+				goto stop_playback;
+			goto do_fill_buffer;
+
+		case WAIT_OBJECT_0 + 3:
+			// Change volume
+			pSourceVoice->SetVolume(volume);
+			break;
+
+		case WAIT_OBJECT_0 + 4:
+			// Buffer end
+		do_fill_buffer:
+			// Time to fill more into buffer
+			if (!playback_should_be_running)
+				break;
+
+			for (int i = 0; i < buffer_length; ++i) {
+				if (!buffer_occupied[i]) {
+					int fill_len = std::min<int>(end_frame - next_input_frame, wanted_frames);
+					if (fill_len <= 0)
+						break;
+					buffer_occupied[i] = true;
+					if (original)
+						provider->GetAudio(buff[i].data(), next_input_frame, fill_len);
+					else
+						provider->GetInt16MonoAudio(reinterpret_cast<int16_t*>(buff[i].data()), next_input_frame, fill_len);
+					next_input_frame += fill_len;
+					XAUDIO2_BUFFER xbf;
+					xbf.Flags = fill_len + next_input_frame == end_frame ? XAUDIO2_END_OF_STREAM : 0;
+					xbf.AudioBytes = fill_len * wfx.nBlockAlign;
+					xbf.pAudioData = buff[i].data();
+					xbf.PlayBegin = 0;
+					xbf.PlayLength = 0;
+					xbf.LoopBegin = 0;
+					xbf.LoopLength = 0;
+					xbf.LoopCount = 0;
+					xbf.pContext = reinterpret_cast<void*>(static_cast<intptr_t>(i));
+					if (FAILED(hr = pSourceVoice->SubmitSourceBuffer(&xbf))) {
+						REPORT_ERROR("Failed initializing Submit Buffer")
+					}
+				}
+			}
+			break;
+
+		case WAIT_OBJECT_0 + 5:
+			// Perform suicide
+			running = false;
+			pXAudio2->Release();
+			ResetEvent(is_playing);
+			playback_should_be_running = false;
+			break;
+
+		default:
+			REPORT_ERROR("Something bad happened while waiting on events in playback loop, either the wait failed or an event object was abandoned.")
+				break;
+		}
+	}
+}
+
+#undef REPORT_ERROR
+
+void XAudio2Thread::CheckError()
+{
+	try {
+		switch (WaitForSingleObject(error_happened, 0))
+		{
+		case WAIT_OBJECT_0:
+			throw error_message;
+
+		case WAIT_ABANDONED:
+			throw "The XAudio2Thread error signal event was abandoned, somehow. This should not happen.";
+
+		case WAIT_FAILED:
+			throw "Failed checking state of XAudio2Thread error signal event.";
+
+		case WAIT_TIMEOUT:
+		default:
+			return;
+		}
+	}
+	catch (...) {
+		ResetEvent(is_playing);
+		ResetEvent(thread_running);
+		throw;
+	}
+}
+
+XAudio2Thread::XAudio2Thread(agi::AudioProvider* provider, int WantedLatency, int BufferLength)
+	: event_start_playback(CreateEvent(0, FALSE, FALSE, 0))
+	, event_stop_playback(CreateEvent(0, FALSE, FALSE, 0))
+	, event_update_end_time(CreateEvent(0, FALSE, FALSE, 0))
+	, event_set_volume(CreateEvent(0, FALSE, FALSE, 0))
+	, event_buffer_end(CreateEvent(0, FALSE, FALSE, 0))
+	, event_kill_self(CreateEvent(0, FALSE, FALSE, 0))
+	, thread_running(CreateEvent(0, TRUE, FALSE, 0))
+	, is_playing(CreateEvent(0, TRUE, FALSE, 0))
+	, error_happened(CreateEvent(0, FALSE, FALSE, 0))
+	, wanted_latency(WantedLatency)
+	, buffer_length(BufferLength < XAUDIO2_MAX_QUEUED_BUFFERS ? BufferLength : XAUDIO2_MAX_QUEUED_BUFFERS)
+	, provider(provider)
+	, buffer_occupied(BufferLength)
+{
+	if (!(thread_handle = (HANDLE)_beginthreadex(0, 0, ThreadProc, this, 0, 0))) {
+		throw AudioPlayerOpenError("Failed creating playback thread in XAudio2Player. This is bad.");
+	}
+
+	HANDLE running_or_error[] = { thread_running, error_happened };
+	switch (WaitForMultipleObjects(2, running_or_error, FALSE, INFINITE)) {
+	case WAIT_OBJECT_0:
+		// running, all good
+		return;
+
+	case WAIT_OBJECT_0 + 1:
+		// error happened, we fail
+		throw AudioPlayerOpenError(error_message ? error_message : "Failed wait for thread start or thread error in XAudio2Player. This is bad.");
+
+	default:
+		throw AudioPlayerOpenError("Failed wait for thread start or thread error in XAudio2Player. This is bad.");
+	}
+}
+
+XAudio2Thread::~XAudio2Thread() {
+	SetEvent(event_kill_self);
+	WaitForSingleObject(thread_handle, INFINITE);
+}
+
+void XAudio2Thread::Play(int64_t start, int64_t count)
+{
+	CheckError();
+
+	start_frame = start;
+	end_frame = start + count;
+	SetEvent(event_start_playback);
+
+	last_playback_restart = GetTickCount64();
+
+	// Block until playback actually begins to avoid race conditions with
+	// checking if playback is in progress
+	HANDLE events_to_wait[] = { is_playing, error_happened };
+	switch (WaitForMultipleObjects(2, events_to_wait, FALSE, INFINITE)) {
+	case WAIT_OBJECT_0 + 0: // Playing
+		LOG_D("audio/player/xaudio2") << "Playback begun";
+		break;
+	case WAIT_OBJECT_0 + 1: // Error
+		throw error_message;
+	default:
+		throw agi::InternalError("Unexpected result from WaitForMultipleObjects in XAudio2Thread::Play");
+	}
+}
+
+void XAudio2Thread::Stop() {
+	CheckError();
+
+	SetEvent(event_stop_playback);
+}
+
+void XAudio2Thread::SetEndFrame(int64_t new_end_frame) {
+	CheckError();
+
+	end_frame = new_end_frame;
+	SetEvent(event_update_end_time);
+}
+
+void XAudio2Thread::SetVolume(double new_volume) {
+	CheckError();
+
+	volume = new_volume;
+	SetEvent(event_set_volume);
+}
+
+bool XAudio2Thread::IsPlaying() {
+	CheckError();
+
+	switch (WaitForSingleObject(is_playing, 0))
+	{
+	case WAIT_ABANDONED:
+		throw "The XAudio2Thread playback state event was abandoned, somehow. This should not happen.";
+
+	case WAIT_FAILED:
+		throw "Failed checking state of XAudio2Thread playback state event.";
+
+	case WAIT_OBJECT_0:
+		return true;
+
+	case WAIT_TIMEOUT:
+	default:
+		return false;
+	}
+}
+
+int64_t XAudio2Thread::GetCurrentFrame() {
+	CheckError();
+	if (!IsPlaying()) return 0;
+	ULONGLONG milliseconds_elapsed = GetTickCount64() - last_playback_restart;
+	return start_frame + milliseconds_elapsed * provider->GetSampleRate() / 1000;
+}
+
+int64_t XAudio2Thread::GetEndFrame() {
+	CheckError();
+	return end_frame;
+}
+
+bool XAudio2Thread::IsDead() {
+	switch (WaitForSingleObject(thread_running, 0))
+	{
+	case WAIT_OBJECT_0:
+		return false;
+	default:
+		return true;
+	}
+}
+
+XAudio2Player::XAudio2Player(agi::AudioProvider* provider) :AudioPlayer(provider) {
+	// The buffer will hold BufferLength times WantedLatency milliseconds of audio
+	WantedLatency = OPT_GET("Player/Audio/DirectSound/Buffer Latency")->GetInt();
+	BufferLength = OPT_GET("Player/Audio/DirectSound/Buffer Length")->GetInt();
+
+	// sanity checking
+	if (WantedLatency <= 0)
+		WantedLatency = 100;
+	if (BufferLength <= 0)
+		BufferLength = 5;
+
+	try {
+		thread = agi::make_unique<XAudio2Thread>(provider, WantedLatency, BufferLength);
+	}
+	catch (const char* msg) {
+		LOG_E("audio/player/xaudio2") << msg;
+		throw AudioPlayerOpenError(msg);
+	}
+}
+
+bool XAudio2Player::IsThreadAlive() {
+	if (thread && thread->IsDead())
+		thread.reset();
+	return static_cast<bool>(thread);
+}
+
+void XAudio2Player::Play(int64_t start, int64_t count) {
+	try {
+		thread->Play(start, count);
+	}
+	catch (const char* msg) {
+		LOG_E("audio/player/xaudio2") << msg;
+	}
+}
+
+void XAudio2Player::Stop() {
+	try {
+		if (IsThreadAlive()) thread->Stop();
+	}
+	catch (const char* msg) {
+		LOG_E("audio/player/xaudio2") << msg;
+	}
+}
+
+bool XAudio2Player::IsPlaying() {
+	try {
+		if (!IsThreadAlive()) return false;
+		return thread->IsPlaying();
+	}
+	catch (const char* msg) {
+		LOG_E("audio/player/xaudio2") << msg;
+		return false;
+	}
+}
+
+int64_t XAudio2Player::GetEndPosition() {
+	try {
+		if (!IsThreadAlive()) return 0;
+		return thread->GetEndFrame();
+	}
+	catch (const char* msg) {
+		LOG_E("audio/player/xaudio2") << msg;
+		return 0;
+	}
+}
+
+int64_t XAudio2Player::GetCurrentPosition() {
+	try {
+		if (!IsThreadAlive()) return 0;
+		return thread->GetCurrentFrame();
+	}
+	catch (const char* msg) {
+		LOG_E("audio/player/xaudio2") << msg;
+		return 0;
+	}
+}
+
+void XAudio2Player::SetEndPosition(int64_t pos) {
+	try {
+		if (IsThreadAlive()) thread->SetEndFrame(pos);
+	}
+	catch (const char* msg) {
+		LOG_E("audio/player/xaudio2") << msg;
+	}
+}
+
+void XAudio2Player::SetVolume(double vol) {
+	try {
+		if (IsThreadAlive()) thread->SetVolume(vol);
+	}
+	catch (const char* msg) {
+		LOG_E("audio/player/xaudio2") << msg;
+	}
+}
+}
+
+std::unique_ptr<AudioPlayer> CreateXAudio2Player(agi::AudioProvider* provider, wxWindow*) {
+	return agi::make_unique<XAudio2Player>(provider);
+}
+
+#endif // WITH_XAUDIO2
diff --git a/src/audio_renderer_spectrum.cpp b/src/audio_renderer_spectrum.cpp
index 217090a6f..7190c448a 100644
--- a/src/audio_renderer_spectrum.cpp
+++ b/src/audio_renderer_spectrum.cpp
@@ -208,8 +208,8 @@ void AudioSpectrumRenderer::FillBlock(size_t block_index, float *block)
 	assert(block);
 
 	int64_t first_sample = (((int64_t)block_index) << derivation_dist) - ((int64_t)1 << derivation_size);
-	provider->GetAudio(&audio_scratch[0], first_sample, 2 << derivation_size);
-
+	provider->GetInt16MonoAudio(audio_scratch.data(), first_sample, 2 << derivation_size);
+	
 	// Because the FFTs used here are unnormalized DFTs, we have to compensate
 	// the possible length difference between derivation_size used in the
 	// calculations and its user-provided counterpart. Thus, the display is
diff --git a/src/audio_renderer_waveform.cpp b/src/audio_renderer_waveform.cpp
index d5bb802fb..789dca024 100644
--- a/src/audio_renderer_waveform.cpp
+++ b/src/audio_renderer_waveform.cpp
@@ -88,7 +88,7 @@ void AudioWaveformRenderer::Render(wxBitmap &bmp, int start, AudioRenderingStyle
 
 	for (int x = 0; x < rect.width; ++x)
 	{
-		provider->GetAudio(audio_buffer.get(), (int64_t)cur_sample, (int64_t)pixel_samples);
+		provider->GetInt16MonoAudio(reinterpret_cast<int16_t*>(audio_buffer.get()), (int64_t)cur_sample, (int64_t)pixel_samples);
 		cur_sample += pixel_samples;
 
 		int peak_min = 0, peak_max = 0;
diff --git a/src/meson.build b/src/meson.build
index 22999a2c3..c4ca4fa1d 100644
--- a/src/meson.build
+++ b/src/meson.build
@@ -233,7 +233,7 @@ opt_src = [
     ['OSS', 'audio_player_oss.cpp'],
     ['DirectSound', ['audio_player_dsound.cpp',
                      'audio_player_dsound2.cpp']],
-
+    ['XAudio2', 'audio_player_xaudio2.cpp'],
     ['FFMS2', ['audio_provider_ffmpegsource.cpp',
                'video_provider_ffmpegsource.cpp',
                'ffmpegsource_common.cpp']],
diff --git a/src/res/res.rc b/src/res/res.rc
index 26f268165..0fdf8cb21 100644
--- a/src/res/res.rc
+++ b/src/res/res.rc
@@ -39,8 +39,13 @@ eyedropper_cursor CURSOR "../bitmaps/windows/eyedropper.cur"
 #endif
 
 VS_VERSION_INFO VERSIONINFO
+#ifdef TAGGED_RELEASE
 FILEVERSION    	RESOURCE_BASE_VERSION, BUILD_GIT_VERSION_NUMBER
 PRODUCTVERSION 	RESOURCE_BASE_VERSION, 0
+#else
+FILEVERSION    	BUILD_GIT_VERSION_NUMBER, BUILD_GIT_VERSION_NUMBER
+PRODUCTVERSION 	BUILD_GIT_VERSION_NUMBER, 0
+#endif
 FILEFLAGSMASK  	VS_FFI_FILEFLAGSMASK
 FILEFLAGS      	(AGI_RC_FLAG_DEBUG|AGI_RC_FLAG_PRERELEASE)
 FILEOS         	VOS__WINDOWS32
diff --git a/tests/tests/audio.cpp b/tests/tests/audio.cpp
index 23255c042..300834e5a 100644
--- a/tests/tests/audio.cpp
+++ b/tests/tests/audio.cpp
@@ -172,21 +172,21 @@ TEST(lagi_audio, save_audio_clip_out_of_audio_range) {
 
 TEST(lagi_audio, get_with_volume) {
 	TestAudioProvider<> provider;
-	uint16_t buff[4];
+	int16_t buff[4];
 
-	provider.GetAudioWithVolume(buff, 0, 4, 1.0);
+	provider.GetInt16MonoAudioWithVolume(buff, 0, 4, 1.0);
 	EXPECT_EQ(0, buff[0]);
 	EXPECT_EQ(1, buff[1]);
 	EXPECT_EQ(2, buff[2]);
 	EXPECT_EQ(3, buff[3]);
 
-	provider.GetAudioWithVolume(buff, 0, 4, 0.0);
+	provider.GetInt16MonoAudioWithVolume(buff, 0, 4, 0.0);
 	EXPECT_EQ(0, buff[0]);
 	EXPECT_EQ(0, buff[1]);
 	EXPECT_EQ(0, buff[2]);
 	EXPECT_EQ(0, buff[3]);
 
-	provider.GetAudioWithVolume(buff, 0, 4, 2.0);
+	provider.GetInt16MonoAudioWithVolume(buff, 0, 4, 2.0);
 	EXPECT_EQ(0, buff[0]);
 	EXPECT_EQ(2, buff[1]);
 	EXPECT_EQ(4, buff[2]);
@@ -195,8 +195,8 @@ TEST(lagi_audio, get_with_volume) {
 
 TEST(lagi_audio, volume_should_clamp_rather_than_wrap) {
 	TestAudioProvider<> provider;
-	uint16_t buff[1];
-	provider.GetAudioWithVolume(buff, 30000, 1, 2.0);
+	int16_t buff[1];
+	provider.GetInt16MonoAudioWithVolume(buff, 30000, 1, 2.0);
 	EXPECT_EQ(SHRT_MAX, buff[0]);
 }
 
@@ -232,7 +232,7 @@ TEST(lagi_audio, convert_8bit) {
 	auto provider = agi::CreateConvertAudioProvider(agi::make_unique<TestAudioProvider<uint8_t>>());
 
 	int16_t data[256];
-	provider->GetAudio(data, 0, 256);
+	provider->GetInt16MonoAudio(data, 0, 256);
 	for (int i = 0; i < 256; ++i)
 		ASSERT_EQ((i - 128) * 256, data[i]);
 }
@@ -243,13 +243,13 @@ TEST(lagi_audio, convert_32bit) {
 	auto provider = agi::CreateConvertAudioProvider(std::move(src));
 
 	int16_t sample;
-	provider->GetAudio(&sample, 0, 1);
+	provider->GetInt16MonoAudio(&sample, 0, 1);
 	EXPECT_EQ(SHRT_MIN, sample);
 
-	provider->GetAudio(&sample, 1LL << 31, 1);
+	provider->GetInt16MonoAudio(&sample, 1LL << 31, 1);
 	EXPECT_EQ(0, sample);
 
-	provider->GetAudio(&sample, (1LL << 32) - 1, 1);
+	provider->GetInt16MonoAudio(&sample, (1LL << 32) - 1, 1);
 	EXPECT_EQ(SHRT_MAX, sample);
 }
 
@@ -310,10 +310,10 @@ TEST(lagi_audio, stereo_downmix) {
 	};
 
 	auto provider = agi::CreateConvertAudioProvider(agi::make_unique<AudioProvider>());
-	EXPECT_EQ(1, provider->GetChannels());
+	EXPECT_EQ(2, provider->GetChannels());
 
 	int16_t samples[100];
-	provider->GetAudio(samples, 0, 100);
+	provider->GetInt16MonoAudio(samples, 0, 100);
 	for (int i = 0; i < 100; ++i)
 		EXPECT_EQ(i, samples[i]);
 }
@@ -333,27 +333,27 @@ struct FloatAudioProvider : agi::AudioProvider {
 		auto out = static_cast<Float *>(buf);
 		for (int64_t end = start + count; start < end; ++start) {
 			auto shifted = start + SHRT_MIN;
-			*out++ = (Float)(1.0 * shifted / (shifted < 0 ? -SHRT_MIN : SHRT_MAX));
+			*out++ = (Float)(shifted) / (-SHRT_MIN);
 		}
 	}
 };
 
 TEST(lagi_audio, float_conversion) {
 	auto provider = agi::CreateConvertAudioProvider(agi::make_unique<FloatAudioProvider<float>>());
-	EXPECT_FALSE(provider->AreSamplesFloat());
+	EXPECT_TRUE(provider->AreSamplesFloat());
 
 	int16_t samples[1 << 16];
-	provider->GetAudio(samples, 0, 1 << 16);
+	provider->GetInt16MonoAudio(samples, 0, 1 << 16);
 	for (int i = 0; i < (1 << 16); ++i)
 		ASSERT_EQ(i + SHRT_MIN, samples[i]);
 }
 
 TEST(lagi_audio, double_conversion) {
 	auto provider = agi::CreateConvertAudioProvider(agi::make_unique<FloatAudioProvider<double>>());
-	EXPECT_FALSE(provider->AreSamplesFloat());
+	EXPECT_TRUE(provider->AreSamplesFloat());
 
 	int16_t samples[1 << 16];
-	provider->GetAudio(samples, 0, 1 << 16);
+	provider->GetInt16MonoAudio(samples, 0, 1 << 16);
 	for (int i = 0; i < (1 << 16); ++i)
 		ASSERT_EQ(i + SHRT_MIN, samples[i]);
 }
@@ -551,4 +551,4 @@ TEST(lagi_audio, wave64_truncated) {
 	}
 
 	agi::fs::Remove(path);
-}
+}
\ No newline at end of file