Aegisub/aegisub/audio_spectrum.cpp
Niels Martin Hansen a6773bf842 Additional sanity check in spectrum code
Changed default settings for spectrum (no reason to throw away 32 perfectly good bands!)

Originally committed to SVN as r735.
2007-01-07 07:28:39 +00:00

333 lines
10 KiB
C++

// Copyright (c) 2005, 2006, Rodrigo Braz Monteiro
// Copyright (c) 2006, 2007, Niels Martin Hansen
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * Neither the name of the Aegisub Group nor the names of its contributors
// may be used to endorse or promote products derived from this software
// without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.
//
// -----------------------------------------------------------------------------
//
// AEGISUB
//
// Website: http://aegisub.cellosoft.com
// Contact: mailto:zeratul@cellosoft.com
//
#include <assert.h>
#include "audio_spectrum.h"
#include "fft.h"
#include "colorspace.h"
#include "options.h"
// Audio spectrum FFT data cache
AudioSpectrumCache::CacheLine AudioSpectrumCache::null_line;
unsigned long AudioSpectrumCache::line_length;
void AudioSpectrumCache::SetLineLength(unsigned long new_length)
{
line_length = new_length;
null_line.resize(new_length, 0);
}
// Bottom level FFT cache, holds actual power data itself
class FinalSpectrumCache : public AudioSpectrumCache {
private:
std::vector<CacheLine> data;
unsigned long start, length; // start and end of range
public:
CacheLine& GetLine(unsigned long i)
{
// This check ought to be redundant
if (i >= start && i-start < length)
return data[i - start];
else
return null_line;
}
FinalSpectrumCache(AudioProvider *provider, unsigned long _start, unsigned long _length)
{
start = _start;
length = _length;
assert(length > 2);
// First fill the data vector with blanks
// Both start and end are included in the range stored, so we have end-start+1 elements
data.resize(length, null_line);
// Start sample number of the next line calculated
// line_length is half of the number of samples used to calculate a line, since half of the output from
// a Fourier transform of real data is redundant, and not interesting for the purpose of creating
// a frequenmcy/power spectrum.
__int64 sample = start * line_length*2;
// Raw sample data
short *raw_sample_data = new short[line_length*2];
float *sample_data = new float[line_length*2];
// Real and imaginary components of the output
float *out_r = new float[line_length*2];
float *out_i = new float[line_length*2];
FFT fft; // TODO: use FFTW instead? A wavelet?
for (unsigned long i = 0; i < length; ++i) {
provider->GetAudio(raw_sample_data, sample, line_length*2);
for (size_t j = 0; j < line_length; ++j) {
sample_data[j*2] = (float)raw_sample_data[j*2];
sample_data[j*2+1] = (float)raw_sample_data[j*2+1];
}
fft.Transform(line_length*2, sample_data, out_r, out_i);
CacheLine &line = data[i];
for (size_t j = 0; j < line_length; ++j) {
line[j] = sqrt(out_r[j]*out_r[j] + out_i[j]*out_i[j]);
}
sample += line_length*2;
}
delete[] raw_sample_data;
delete[] sample_data;
delete[] out_r;
delete[] out_i;
}
virtual ~FinalSpectrumCache()
{
}
};
// Non-bottom-level cache, refers to other caches to do the work
class IntermediateSpectrumCache : public AudioSpectrumCache {
private:
std::vector<AudioSpectrumCache*> sub_caches;
unsigned long start, length, subcache_length;
bool subcaches_are_final;
int depth;
AudioProvider *provider;
public:
CacheLine &GetLine(unsigned long i)
{
if (i >= start && i-start <= length) {
// Determine which sub-cache this line resides in
size_t subcache = (i-start) / subcache_length;
assert(subcache >= 0 && subcache < sub_caches.size());
if (!sub_caches[subcache]) {
if (subcaches_are_final) {
sub_caches[subcache] = new FinalSpectrumCache(provider, start+subcache*subcache_length, subcache_length);
} else {
sub_caches[subcache] = new IntermediateSpectrumCache(provider, start+subcache*subcache_length, subcache_length, depth+1);
}
}
return sub_caches[subcache]->GetLine(i);
} else {
return null_line;
}
}
IntermediateSpectrumCache(AudioProvider *_provider, unsigned long _start, unsigned long _length, int _depth)
{
provider = _provider;
start = _start;
length = _length;
depth = _depth;
// FIXME: this calculation probably needs tweaking
int num_subcaches = 1;
unsigned long tmp = length;
while (tmp > 0) {
tmp /= 16;
num_subcaches *= 2;
}
subcache_length = length / (num_subcaches-1);
subcaches_are_final = num_subcaches <= 4;
sub_caches.resize(num_subcaches, 0);
}
virtual ~IntermediateSpectrumCache()
{
for (size_t i = 0; i < sub_caches.size(); ++i)
if (sub_caches[i])
delete sub_caches[i];
}
};
// AudioSpectrum
AudioSpectrum::AudioSpectrum(AudioProvider *_provider, unsigned long _line_length)
{
provider = _provider;
line_length = _line_length;
__int64 _num_lines = provider->GetNumSamples() / line_length / 2;
//assert (_num_lines < (1<<31)); // hope it fits into 32 bits...
num_lines = (unsigned long)_num_lines;
AudioSpectrumCache::SetLineLength(line_length);
cache = new IntermediateSpectrumCache(provider, 0, num_lines, 0);
power_scale = 1;
minband = Options.AsInt(_T("Audio Spectrum Cutoff"));
maxband = line_length - minband * 2/3; // TODO: make this customisable?
// Generate colour maps
unsigned char *palptr = colours_normal;
for (int i = 0; i < 256; i++) {
hsl_to_rgb(170 + i * 2/3, 128 + i/2, i, palptr+0, palptr+1, palptr+2);
palptr += 3;
}
palptr = colours_selected;
for (int i = 0; i < 256; i++) {
hsl_to_rgb(170 + i * 2/3, 128 + i/2, i*3/4+64, palptr+0, palptr+1, palptr+2);
palptr += 3;
}
}
AudioSpectrum::~AudioSpectrum()
{
delete cache;
}
void AudioSpectrum::RenderRange(__int64 range_start, __int64 range_end, bool selected, unsigned char *img, int imgleft, int imgwidth, int imgpitch, int imgheight)
{
unsigned long first_line = (unsigned long)(range_start / line_length / 2);
unsigned long last_line = (unsigned long)(range_end / line_length / 2);
unsigned long lines_to_render = last_line - first_line + 1;
float *power = new float[line_length];
int last_imgcol_rendered = -1;
unsigned char *palette;
if (selected)
palette = colours_selected;
else
palette = colours_normal;
// Some scaling constants
const int maxpower = (1 << (16 - 1))*256;
const double upscale = power_scale * 16384 / line_length;
const double onethirdmaxpower = maxpower / 3, twothirdmaxpower = maxpower * 2/3;
const double logoverscale = log(maxpower*upscale - twothirdmaxpower);
for (unsigned long i = first_line; i <= last_line; ++i) {
// Handle horizontal compression and don't unneededly re-render columns
int imgcol = imgleft + imgwidth * (i - first_line) / (last_line - first_line + 1);
if (imgcol <= last_imgcol_rendered)
continue;
AudioSpectrumCache::CacheLine &line = cache->GetLine(i);
// Calculate the signal power over frequency
// "Compressed" scale
for (unsigned int j = 0; j < line_length; j++) {
// First do a simple linear scale power calculation -- 8 gives a reasonable default scaling
power[j] = line[j] * upscale;
if (power[j] > maxpower * 2/3) {
double p = power[j] - twothirdmaxpower;
p = log(p) * onethirdmaxpower / logoverscale;
power[j] = p + twothirdmaxpower;
}
}
#define WRITE_PIXEL \
if (intensity < 0) intensity = 0; \
if (intensity > 255) intensity = 255; \
img[((imgheight-y-1)*imgpitch+x)*3 + 0] = palette[intensity*3+0]; \
img[((imgheight-y-1)*imgpitch+x)*3 + 1] = palette[intensity*3+1]; \
img[((imgheight-y-1)*imgpitch+x)*3 + 2] = palette[intensity*3+2];
// Handle horizontal expansion
int next_line_imgcol = imgleft + imgwidth * (i - first_line + 1) / (last_line - first_line + 1);
if (next_line_imgcol >= imgpitch)
next_line_imgcol = imgpitch-1;
for (int x = imgcol; x <= next_line_imgcol; ++x) {
// Decide which rendering algo to use
if (maxband - minband > imgheight) {
// more than one frequency sample per pixel (vertically compress data)
// pick the largest value per pixel for display
// Iterate over pixels, picking a range of samples for each
for (int y = 0; y < imgheight; ++y) {
int sample1 = maxband * y/imgheight + minband;
int sample2 = maxband * (y+1)/imgheight + minband;
float maxval = 0;
for (int samp = sample1; samp <= sample2; samp++) {
if (power[samp] > maxval) maxval = power[samp];
}
int intensity = int(256 * maxval / maxpower);
WRITE_PIXEL
}
}
else {
// less than one frequency sample per pixel (vertically expand data)
// interpolate between pixels
// can also happen with exactly one sample per pixel, but how often is that?
// Iterate over pixels, picking the nearest power values
for (int y = 0; y < imgheight; ++y) {
float ideal = (float)(y+1.)/imgheight * maxband;
float sample1 = power[(int)floor(ideal)+minband];
float sample2 = power[(int)ceil(ideal)+minband];
float frac = ideal - floor(ideal);
int intensity = int(((1-frac)*sample1 + frac*sample2) / maxpower * 256);
WRITE_PIXEL
}
}
}
#undef WRITE_PIXEL
}
delete[] power;
}
void AudioSpectrum::SetScaling(float _power_scale)
{
power_scale = _power_scale;
}