forked from mia/Aegisub
New spectrum rendering code, scrapped threading and now caches FFT results instead. Also the visual appearance of the selection in spectrum was improved.
Originally committed to SVN as r731.
This commit is contained in:
parent
d0a7745d41
commit
9babda0720
5 changed files with 465 additions and 240 deletions
|
@ -66,6 +66,8 @@ AudioDisplay::AudioDisplay(wxWindow *parent,VideoDisplay *display)
|
|||
video = NULL;
|
||||
origImage = NULL;
|
||||
spectrumDisplay = NULL;
|
||||
spectrumDisplaySelected = NULL;
|
||||
spectrumRenderer = NULL;
|
||||
ScrollBar = NULL;
|
||||
dialogue = NULL;
|
||||
karaoke = NULL;
|
||||
|
@ -112,7 +114,9 @@ AudioDisplay::~AudioDisplay() {
|
|||
delete provider;
|
||||
delete player;
|
||||
delete origImage;
|
||||
delete spectrumRenderer;
|
||||
delete spectrumDisplay;
|
||||
delete spectrumDisplaySelected;
|
||||
delete peak;
|
||||
delete min;
|
||||
}
|
||||
|
@ -214,11 +218,6 @@ void AudioDisplay::UpdateImage(bool weak) {
|
|||
// Draw spectrum
|
||||
if (spectrum) {
|
||||
DrawSpectrum(dc,weak);
|
||||
|
||||
// Invert the selection, if any
|
||||
if (hasSel && selStart < selEnd && Options.AsBool(_T("Audio Spectrum invert selection"))) {
|
||||
dc.Blit(selStart, 0, selEnd-selStart, h, &dc, selStart, 0, wxSRC_INVERT);
|
||||
}
|
||||
}
|
||||
|
||||
// Draw seconds boundaries
|
||||
|
@ -374,13 +373,14 @@ void AudioDisplay::UpdateImage(bool weak) {
|
|||
}
|
||||
}
|
||||
catch (...) {
|
||||
// FIXME?
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Modified text
|
||||
if (NeedCommit) {
|
||||
dc.SetFont(wxFont(9,wxDEFAULT,wxFONTSTYLE_NORMAL,wxFONTWEIGHT_BOLD,false,_T("Verdana")));
|
||||
dc.SetFont(wxFont(9,wxDEFAULT,wxFONTSTYLE_NORMAL,wxFONTWEIGHT_BOLD,false,_T("Verdana"))); // FIXME: hardcoded font name
|
||||
dc.SetTextForeground(wxColour(255,0,0));
|
||||
if (selStart <= selEnd) {
|
||||
dc.DrawText(_T("Modified"),4,4);
|
||||
|
@ -409,7 +409,7 @@ void AudioDisplay::UpdateImage(bool weak) {
|
|||
dc.SetPen(wxSystemSettings::GetColour(wxSYS_COLOUR_BTNTEXT));
|
||||
dc.SetTextForeground(wxSystemSettings::GetColour(wxSYS_COLOUR_BTNTEXT));
|
||||
wxFont scaleFont;
|
||||
scaleFont.SetFaceName(_T("Tahoma"));
|
||||
scaleFont.SetFaceName(_T("Tahoma")); // FIXME: hardcoded font name
|
||||
scaleFont.SetPointSize(8);
|
||||
dc.SetFont(scaleFont);
|
||||
|
||||
|
@ -503,259 +503,61 @@ void AudioDisplay::DrawWaveform(wxDC &dc,bool weak) {
|
|||
}
|
||||
|
||||
|
||||
static int spectrumColorMap[256];
|
||||
static unsigned short spectrumColorMap16[256];
|
||||
static bool colorMapsGenerated = false;
|
||||
|
||||
|
||||
//////////////////////////////////////
|
||||
// Spectrum analyser rendering thread
|
||||
class SpectrumRendererThread : public wxThread {
|
||||
public:
|
||||
SpectrumRendererThread() : wxThread(wxTHREAD_JOINABLE) {
|
||||
if (Create() != wxTHREAD_NO_ERROR)
|
||||
throw _T("Error creating Spectrum rendering thread.");
|
||||
}
|
||||
|
||||
int *data; // image data to write to (shared)
|
||||
int window; // 1 << Options.AsInt(_T("Audio Spectrum Window"))
|
||||
int firstbar, lastbar; // first and last vertical bar to draw
|
||||
int w, h; // width and height of canvas
|
||||
int cutoff; // cutoff frequency
|
||||
float *base_in; // audio sample data (shared)
|
||||
int samples; // number of samples per column
|
||||
int depth; // display bit depth
|
||||
float scale; // vertical scale of display, exponential, min=0, mid=1, max=8
|
||||
|
||||
protected:
|
||||
wxThread::ExitCode Entry() {
|
||||
// Pointers to image data
|
||||
int *write_ptr = data;
|
||||
unsigned short *write_ptr16 = (unsigned short *)data;
|
||||
|
||||
// FFT output data
|
||||
float *out_r = new float[window]; // real part
|
||||
float *out_i = new float[window]; // imaginary part
|
||||
float *power = new float[window]; // calculated signal power
|
||||
|
||||
// Prepare constants
|
||||
const int halfwindow = window/2;
|
||||
//const int posThres = MAX(1,int(double(halfwindow-cutoff)/double(h)*0.5/scale + 0.5));
|
||||
const int maxband = (halfwindow-cutoff) * 2/3;
|
||||
const float mult = float(h)/float(halfwindow-cutoff)/255.f;
|
||||
|
||||
// Calculation loop
|
||||
for (int i = firstbar; i < lastbar; i++) {
|
||||
__int64 curStart = i*samples-(window/2);
|
||||
if (curStart < 0) curStart = 0;
|
||||
|
||||
// Position input
|
||||
float *in = base_in + curStart;
|
||||
|
||||
// Perform the FFT
|
||||
FFT fft;
|
||||
fft.Transform(window,in,out_r,out_i);
|
||||
|
||||
// Position pointer
|
||||
write_ptr = data+i+h*w;
|
||||
write_ptr16 = ((unsigned short*)data)+(i+h*w);
|
||||
|
||||
// The maximum power output from the FFT
|
||||
// Derived by maximising the result from the DFT function:
|
||||
// f[u] = sum(x=0,N-1)[ f(x) * exp(-2 * pi * i * u * x) ]
|
||||
// Where N is the number of samples transformed.
|
||||
// = N * 2^(B-1) * exp(-2 * pi * i * u * x)
|
||||
// Maximising by f(x) constant at maximum sample value.
|
||||
// B is bit-depth of the samples, so 2^(B-1) is the maximum sample value.
|
||||
// = N * 2^(B-1) * [ cos(-2*pi*u*x) + i sin(-2*pi*u*x) ]
|
||||
// Expanding using Euler's formula.
|
||||
// = N * 2^(B-1) * [ cos(2*pi*u*x) - i sin(2*pi*u*x) ]
|
||||
// cos(-x) = cos(x) and sin(-x) = -sin(x)
|
||||
// = N * 2^(B-1) * cos(2*pi*u*x) - N * 2^(B-1) * i sin(2*pi*u*x) [A]
|
||||
// Expand the bracket.
|
||||
// Now determine the maximum magnitude of [A], letting u be constant and x variable.
|
||||
// | N * 2^(B-1) * cos(2*pi*u*x) - N * 2^(B-1) * i sin(2*pi*u*x) |
|
||||
// = sqrt( [N * 2^(B-1) * cos(2*pi*u*x)]^2 + [N * 2^(B-1) * sin(2*pi*u*x)]^2 )
|
||||
// = sqrt( N^2 * 4^(B-1) * cos^2(2*pi*u*x) + N^2 * 4^(B-1) * sin^2(2*pi*u*x) )
|
||||
// = sqrt( N^2 * 4^(B-1) * [ cos^2(2*pi*u*x) + sin^2(2*pi*u*x) ] )
|
||||
// = sqrt( N^2 * 4^(B-1) )
|
||||
// It's known that sin^2(x) + cos^2(x) = 1.
|
||||
// = N * 2^(B-1)
|
||||
|
||||
int maxpower = (1 << (16 - 1))*256;
|
||||
|
||||
// Calculate the signal power over frequency
|
||||
#if 0
|
||||
// Logarithmic scale
|
||||
for (int j = 0; j < window; j++) {
|
||||
float t = out_r[j]*out_r[j] + out_i[j]*out_i[j];
|
||||
if (t < 1)
|
||||
power[j] = 0;
|
||||
else
|
||||
power[j] = 10. * log10(t) * 64; // try changing the constant 64 if playing with this
|
||||
}
|
||||
maxpower = 10 * log10((float)maxpower);
|
||||
#elif 1
|
||||
// "Compressed" scale
|
||||
double onethirdmaxpower = maxpower / 3, twothirdmaxpower = maxpower * 2/3;
|
||||
double logoverscale = log(maxpower*8*scale - twothirdmaxpower);
|
||||
for (int j = 0; j < window; j++) {
|
||||
// First do a simple linear scale power calculation -- 8 gives a reasonable default scaling
|
||||
power[j] = sqrt(out_r[j]*out_r[j] + out_i[j]*out_i[j]) * 8 * scale;
|
||||
if (power[j] > maxpower * 2/3) {
|
||||
double p = power[j] - twothirdmaxpower;
|
||||
p = log(p) * onethirdmaxpower / logoverscale;
|
||||
power[j] = p + twothirdmaxpower;
|
||||
}
|
||||
}
|
||||
#else
|
||||
// Linear scale
|
||||
for (int j = 0; j < window; j++) {
|
||||
power[j] = sqrt(out_r[j]*out_r[j] + out_i[j]*out_i[j]);
|
||||
}
|
||||
#endif
|
||||
|
||||
#define WRITE_PIXEL \
|
||||
if (intensity > 255) intensity = 255; \
|
||||
if (intensity < 0) intensity = 0; \
|
||||
if (depth == 32) { \
|
||||
write_ptr -= w; \
|
||||
*write_ptr = spectrumColorMap[intensity]; \
|
||||
} \
|
||||
else if (depth == 16) { \
|
||||
write_ptr16 -= w; \
|
||||
*write_ptr16 = spectrumColorMap16[intensity]; \
|
||||
}
|
||||
|
||||
// Decide which rendering algo to use
|
||||
if (halfwindow-cutoff > h) {
|
||||
// more than one frequency sample per pixel (vertically compress data)
|
||||
// pick the largest value per pixel for display
|
||||
|
||||
// Iterate over pixels, picking a range of samples for each
|
||||
for (int j = 0; j < h; j++) {
|
||||
int sample1 = maxband * j/h + cutoff;
|
||||
int sample2 = maxband * (j+1)/h + cutoff;
|
||||
float maxval = 0;
|
||||
for (int samp = sample1; samp <= sample2; samp++) {
|
||||
if (power[samp] > maxval) maxval = power[samp];
|
||||
}
|
||||
int intensity = int(256 * maxval / maxpower);
|
||||
WRITE_PIXEL
|
||||
}
|
||||
}
|
||||
else {
|
||||
// less than one frequency sample per pixel (vertically expand data)
|
||||
// interpolate between pixels
|
||||
// can also happen with exactly one sample per pixel, but how often is that?
|
||||
|
||||
// Iterate over pixels, picking the nearest power values
|
||||
for (int j = 0; j < h; j++) {
|
||||
float ideal = (float)(j+1.)/h * maxband;
|
||||
float sample1 = power[(int)floor(ideal)+cutoff];
|
||||
float sample2 = power[(int)ceil(ideal)+cutoff];
|
||||
float frac = ideal - floor(ideal);
|
||||
int intensity = int(((1-frac)*sample1 + frac*sample2) / maxpower * 256);
|
||||
WRITE_PIXEL
|
||||
}
|
||||
}
|
||||
|
||||
#undef WRITE_PIXEL
|
||||
}
|
||||
|
||||
delete out_r;
|
||||
delete out_i;
|
||||
delete power;
|
||||
|
||||
return 0;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
//////////////////////////
|
||||
// Draw spectrum analyzer
|
||||
void AudioDisplay::DrawSpectrum(wxDC &finaldc,bool weak) {
|
||||
// Spectrum bitmap
|
||||
if (!weak || !spectrumDisplay || spectrumDisplay->GetWidth() != w || spectrumDisplay->GetHeight() != h) {
|
||||
if (spectrumDisplay) {
|
||||
delete spectrumDisplay;
|
||||
delete spectrumDisplaySelected;
|
||||
spectrumDisplay = 0;
|
||||
spectrumDisplaySelected = 0;
|
||||
}
|
||||
//spectrumDisplay = new wxBitmap(w,h);
|
||||
weak = false;
|
||||
}
|
||||
|
||||
if (!weak) {
|
||||
// Generate colors
|
||||
if (!colorMapsGenerated) {
|
||||
unsigned char r,g,b;
|
||||
for (int i=0;i<256;i++) {
|
||||
//hsv_to_rgb(255 - i, 255 - i * 3/10, 255*3/10 + i * 7/10, &r, &g, &b);
|
||||
hsl_to_rgb(170 + i * 2/3, 128 + i/2, i, &r, &g, &b);
|
||||
spectrumColorMap[i] = b | (g<<8) | (r<<16);
|
||||
spectrumColorMap16[i] = ((r>>3)<<11) | ((g>>2)<<5) | b>>3;
|
||||
}
|
||||
colorMapsGenerated = true;
|
||||
}
|
||||
int depth = wxDisplayDepth();
|
||||
unsigned char *img = (unsigned char *)malloc(h*w*3); // wxImage requires using malloc
|
||||
|
||||
// Prepare arrays
|
||||
int cutOff = Options.AsInt(_T("Audio Spectrum Cutoff"));
|
||||
int window = 1 << Options.AsInt(_T("Audio Spectrum Window"));
|
||||
int totalLen = w*samples+window;
|
||||
float *raw_float = new float[totalLen];
|
||||
short *raw_int = new short[totalLen];
|
||||
float *in = raw_float;
|
||||
if (!spectrumRenderer)
|
||||
spectrumRenderer = new AudioSpectrum(provider, 1<<Options.AsInt(_T("Audio Spectrum Window")));
|
||||
|
||||
// Fill input
|
||||
__int64 start = Position*samples;
|
||||
provider->GetAudio(raw_int,start,totalLen);
|
||||
for (int j=0;j<totalLen;j++) {
|
||||
raw_float[j] = (float)raw_int[j];
|
||||
}
|
||||
delete raw_int;
|
||||
spectrumRenderer->SetScaling(scale);
|
||||
|
||||
// For image data
|
||||
int *data = new int[w*h*depth/32];
|
||||
// Use a slightly slower, but simple way
|
||||
// Always draw the spectrum for the entire width
|
||||
// Hack: without those divs by 2 the display is horizontally compressed
|
||||
spectrumRenderer->RenderRange(Position*samples, (Position+w)*samples, false, img, 0, w, w, h);
|
||||
|
||||
////// START OF PARALLELISED CODE //////
|
||||
const int cpu_count = MAX(wxThread::GetCPUCount(), 1);
|
||||
std::vector<SpectrumRendererThread*> threads(cpu_count);
|
||||
for (int i = 0; i < cpu_count; i++) {
|
||||
// Ugh, way too much data to copy in
|
||||
threads[i] = new SpectrumRendererThread();
|
||||
threads[i]->data = data;
|
||||
threads[i]->window = window;
|
||||
threads[i]->firstbar = i * w/cpu_count;
|
||||
threads[i]->lastbar = (i+1) * w/cpu_count;
|
||||
threads[i]->w = w;
|
||||
threads[i]->h = h;
|
||||
threads[i]->cutoff = cutOff;
|
||||
threads[i]->base_in = raw_float;
|
||||
threads[i]->samples = samples;
|
||||
threads[i]->depth = depth;
|
||||
threads[i]->scale = scale;
|
||||
threads[i]->Run();
|
||||
}
|
||||
// Threads started, wait for them to end
|
||||
for (int i = 0; i < cpu_count; i++) {
|
||||
threads[i]->Wait();
|
||||
delete threads[i];
|
||||
}
|
||||
// The spectrum bitmap will have been deleted above already, so just make a new one
|
||||
wxImage imgobj(w, h, img, false);
|
||||
spectrumDisplay = new wxBitmap(imgobj);
|
||||
}
|
||||
|
||||
// Clear memory
|
||||
delete raw_float;
|
||||
|
||||
// Create image FIXME *BREAKS ON NON-WIN32* (see wx docs)
|
||||
spectrumDisplay = new wxBitmap((const char*)data,w,h,depth);
|
||||
if (hasSel && selStartCap < selEndCap &&
|
||||
((selStartCap > Position && selStartCap < Position+w) ||
|
||||
(selEndCap > Position && selEndCap < Position+w) ||
|
||||
(selStartCap < Position && selEndCap > Position+w)) &&
|
||||
!spectrumDisplaySelected) {
|
||||
// There is a visible selection and we don't have a rendered one
|
||||
// This should be done regardless whether we're "weak" or not
|
||||
// Assume a few things were already set up when things were first rendered though
|
||||
unsigned char *img = (unsigned char *)malloc(h*w*3);
|
||||
spectrumRenderer->RenderRange(Position*samples, (Position+w)*samples, true, img, 0, w, w, h);
|
||||
wxImage imgobj(w, h, img, false);
|
||||
spectrumDisplaySelected = new wxBitmap(imgobj);
|
||||
}
|
||||
|
||||
// Draw
|
||||
wxMemoryDC dc;
|
||||
dc.SelectObject(*spectrumDisplay);
|
||||
finaldc.Blit(0,0,w,h,&dc,0,0);
|
||||
}
|
||||
|
||||
if (hasSel && spectrumDisplaySelected && selStartCap < selEndCap) {
|
||||
dc.SelectObject(*spectrumDisplaySelected);
|
||||
finaldc.Blit(selStart, 0, selEnd-selStart, h, &dc, selStart, 0);
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////
|
||||
// Get selection position
|
||||
|
@ -939,8 +741,10 @@ void AudioDisplay::SetFile(wxString file, VideoProvider *vprovider) {
|
|||
if (player) player->CloseStream();
|
||||
delete provider;
|
||||
delete player;
|
||||
delete spectrumRenderer;
|
||||
provider = NULL;
|
||||
player = NULL;
|
||||
spectrumRenderer = NULL;
|
||||
Reset();
|
||||
|
||||
loaded = false;
|
||||
|
|
|
@ -43,6 +43,7 @@
|
|||
#include <wx/wxprec.h>
|
||||
#include "audio_provider.h"
|
||||
#include "audio_player.h"
|
||||
#include "audio_spectrum.h"
|
||||
|
||||
|
||||
//////////////
|
||||
|
@ -67,8 +68,11 @@ private:
|
|||
AssDialogue *dialogue;
|
||||
VideoDisplay *video;
|
||||
|
||||
AudioSpectrum *spectrumRenderer;
|
||||
|
||||
wxBitmap *origImage;
|
||||
wxBitmap *spectrumDisplay;
|
||||
wxBitmap *spectrumDisplaySelected;
|
||||
__int64 PositionSample;
|
||||
float scale;
|
||||
int samples;
|
||||
|
@ -106,7 +110,7 @@ private:
|
|||
int scrubLastRate;
|
||||
|
||||
void OnPaint(wxPaintEvent &event);
|
||||
void OnMouseEvent(wxMouseEvent &event);
|
||||
void OnMouseEvent(wxMouseEvent &event);
|
||||
void OnSize(wxSizeEvent &event);
|
||||
void OnUpdateTimer(wxTimerEvent &event);
|
||||
void OnKeyDown(wxKeyEvent &event);
|
||||
|
|
327
aegisub/audio_spectrum.cpp
Normal file
327
aegisub/audio_spectrum.cpp
Normal file
|
@ -0,0 +1,327 @@
|
|||
// Copyright (c) 2005, 2006, Rodrigo Braz Monteiro
|
||||
// Copyright (c) 2006, 2007, Niels Martin Hansen
|
||||
// All rights reserved.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
// * Neither the name of the Aegisub Group nor the names of its contributors
|
||||
// may be used to endorse or promote products derived from this software
|
||||
// without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
// POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// -----------------------------------------------------------------------------
|
||||
//
|
||||
// AEGISUB
|
||||
//
|
||||
// Website: http://aegisub.cellosoft.com
|
||||
// Contact: mailto:zeratul@cellosoft.com
|
||||
//
|
||||
|
||||
#include <assert.h>
|
||||
#include "audio_spectrum.h"
|
||||
#include "fft.h"
|
||||
#include "colorspace.h"
|
||||
#include "options.h"
|
||||
|
||||
|
||||
// Audio spectrum FFT data cache
|
||||
|
||||
AudioSpectrumCache::CacheLine AudioSpectrumCache::null_line;
|
||||
unsigned long AudioSpectrumCache::line_length;
|
||||
|
||||
void AudioSpectrumCache::SetLineLength(unsigned long new_length)
|
||||
{
|
||||
line_length = new_length;
|
||||
null_line.resize(new_length, 0);
|
||||
}
|
||||
|
||||
|
||||
// Bottom level FFT cache, holds actual power data itself
|
||||
|
||||
class FinalSpectrumCache : public AudioSpectrumCache {
|
||||
private:
|
||||
std::vector<CacheLine> data;
|
||||
unsigned long start, length; // start and end of range
|
||||
|
||||
public:
|
||||
CacheLine& GetLine(unsigned long i)
|
||||
{
|
||||
// This check ought to be redundant
|
||||
if (i >= start && i-start < length)
|
||||
return data[i - start];
|
||||
else
|
||||
return null_line;
|
||||
}
|
||||
|
||||
FinalSpectrumCache(AudioProvider *provider, unsigned long _start, unsigned long _length)
|
||||
{
|
||||
start = _start;
|
||||
length = _length;
|
||||
|
||||
assert(length > 2);
|
||||
|
||||
// First fill the data vector with blanks
|
||||
// Both start and end are included in the range stored, so we have end-start+1 elements
|
||||
data.resize(length, null_line);
|
||||
|
||||
// Start sample number of the next line calculated
|
||||
// line_length is half of the number of samples used to calculate a line, since half of the output from
|
||||
// a Fourier transform of real data is redundant, and not interesting for the purpose of creating
|
||||
// a frequenmcy/power spectrum.
|
||||
__int64 sample = start * line_length*2;
|
||||
|
||||
// Raw sample data
|
||||
short *raw_sample_data = new short[line_length*2];
|
||||
float *sample_data = new float[line_length*2];
|
||||
// Real and imaginary components of the output
|
||||
float *out_r = new float[line_length*2];
|
||||
float *out_i = new float[line_length*2];
|
||||
|
||||
FFT fft; // TODO: use FFTW instead?
|
||||
|
||||
for (unsigned long i = 0; i < length; ++i) {
|
||||
provider->GetAudio(raw_sample_data, sample, line_length*2);
|
||||
for (size_t j = 0; j < line_length; ++j) {
|
||||
sample_data[j*2] = (float)raw_sample_data[j*2];
|
||||
sample_data[j*2+1] = (float)raw_sample_data[j*2+1];
|
||||
}
|
||||
|
||||
fft.Transform(line_length*2, sample_data, out_r, out_i);
|
||||
|
||||
CacheLine &line = data[i];
|
||||
for (size_t j = 0; j < line_length; ++j) {
|
||||
line[j] = sqrt(out_r[j]*out_r[j] + out_i[j]*out_i[j]);
|
||||
}
|
||||
|
||||
sample += line_length*2;
|
||||
}
|
||||
|
||||
delete[] raw_sample_data;
|
||||
delete[] sample_data;
|
||||
delete[] out_r;
|
||||
delete[] out_i;
|
||||
}
|
||||
|
||||
virtual ~FinalSpectrumCache()
|
||||
{
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
|
||||
// Non-bottom-level cache, refers to other caches to do the work
|
||||
|
||||
class IntermediateSpectrumCache : public AudioSpectrumCache {
|
||||
private:
|
||||
std::vector<AudioSpectrumCache*> sub_caches;
|
||||
unsigned long start, length, subcache_length;
|
||||
bool subcaches_are_final;
|
||||
int depth;
|
||||
AudioProvider *provider;
|
||||
|
||||
public:
|
||||
CacheLine &GetLine(unsigned long i)
|
||||
{
|
||||
if (i >= start && i-start <= length) {
|
||||
// Determine which sub-cache this line resides in
|
||||
int subcache = (i-start) / subcache_length;
|
||||
assert(subcache >= 0 && subcache < sub_caches.size());
|
||||
|
||||
if (!sub_caches[subcache]) {
|
||||
if (subcaches_are_final) {
|
||||
sub_caches[subcache] = new FinalSpectrumCache(provider, start+subcache*subcache_length, subcache_length);
|
||||
} else {
|
||||
sub_caches[subcache] = new IntermediateSpectrumCache(provider, start+subcache*subcache_length, subcache_length, depth+1);
|
||||
}
|
||||
}
|
||||
|
||||
return sub_caches[subcache]->GetLine(i);
|
||||
} else {
|
||||
return null_line;
|
||||
}
|
||||
}
|
||||
|
||||
IntermediateSpectrumCache(AudioProvider *_provider, unsigned long _start, unsigned long _length, int _depth)
|
||||
{
|
||||
provider = _provider;
|
||||
start = _start;
|
||||
length = _length;
|
||||
depth = _depth;
|
||||
|
||||
// FIXME: this calculation probably needs tweaking
|
||||
int num_subcaches = 1;
|
||||
unsigned long tmp = length;
|
||||
while (tmp > 0) {
|
||||
tmp /= 16;
|
||||
num_subcaches *= 2;
|
||||
}
|
||||
subcache_length = length / (num_subcaches-1);
|
||||
|
||||
subcaches_are_final = num_subcaches <= 4;
|
||||
|
||||
sub_caches.resize(num_subcaches, 0);
|
||||
}
|
||||
|
||||
virtual ~IntermediateSpectrumCache()
|
||||
{
|
||||
for (size_t i = 0; i < sub_caches.size(); ++i)
|
||||
if (sub_caches[i])
|
||||
delete sub_caches[i];
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
|
||||
// AudioSpectrum
|
||||
|
||||
AudioSpectrum::AudioSpectrum(AudioProvider *_provider, unsigned long _line_length)
|
||||
{
|
||||
provider = _provider;
|
||||
line_length = _line_length;
|
||||
|
||||
__int64 _num_lines = provider->GetNumSamples() / line_length / 2;
|
||||
//assert (_num_lines < (1<<31)); // hope it fits into 32 bits...
|
||||
num_lines = (unsigned long)_num_lines;
|
||||
|
||||
AudioSpectrumCache::SetLineLength(line_length);
|
||||
cache = new IntermediateSpectrumCache(provider, 0, num_lines, 0);
|
||||
|
||||
power_scale = 1;
|
||||
minband = Options.AsInt(_T("Audio Spectrum Cutoff"));
|
||||
maxband = line_length - minband * 2/3; // TODO: make this customisable?
|
||||
|
||||
// Generate colour maps
|
||||
unsigned char *palptr = colours_normal;
|
||||
for (int i = 0; i < 256; i++) {
|
||||
hsl_to_rgb(170 + i * 2/3, 128 + i/2, i, palptr+0, palptr+1, palptr+2);
|
||||
palptr += 3;
|
||||
}
|
||||
palptr = colours_selected;
|
||||
for (int i = 0; i < 256; i++) {
|
||||
hsl_to_rgb(170 + i * 2/3, 128 + i/2, i*3/4+64, palptr+0, palptr+1, palptr+2);
|
||||
palptr += 3;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
AudioSpectrum::~AudioSpectrum()
|
||||
{
|
||||
delete cache;
|
||||
}
|
||||
|
||||
|
||||
void AudioSpectrum::RenderRange(__int64 range_start, __int64 range_end, bool selected, unsigned char *img, int imgleft, int imgwidth, int imgpitch, int imgheight)
|
||||
{
|
||||
unsigned long first_line = (unsigned long)(range_start / line_length / 2);
|
||||
unsigned long last_line = (unsigned long)(range_end / line_length / 2);
|
||||
unsigned long lines_to_render = last_line - first_line + 1;
|
||||
|
||||
float *power = new float[line_length];
|
||||
|
||||
int last_imgcol_rendered = -1;
|
||||
|
||||
unsigned char *palette;
|
||||
if (selected)
|
||||
palette = colours_selected;
|
||||
else
|
||||
palette = colours_normal;
|
||||
|
||||
for (unsigned long i = first_line; i <= last_line; ++i) {
|
||||
// Handle horizontal compression and don't unneededly re-render columns
|
||||
int imgcol = imgleft + imgwidth * (i - first_line) / (last_line - first_line + 1);
|
||||
if (imgcol <= last_imgcol_rendered)
|
||||
continue;
|
||||
|
||||
AudioSpectrumCache::CacheLine &line = cache->GetLine(i);
|
||||
|
||||
int maxpower = (1 << (16 - 1))*256;
|
||||
|
||||
// Calculate the signal power over frequency
|
||||
// "Compressed" scale
|
||||
double onethirdmaxpower = maxpower / 3, twothirdmaxpower = maxpower * 2/3;
|
||||
double logoverscale = log(maxpower*8*power_scale - twothirdmaxpower);
|
||||
for (int j = 0; j < line_length; j++) {
|
||||
// First do a simple linear scale power calculation -- 8 gives a reasonable default scaling
|
||||
power[j] = line[j] * 8 * power_scale;
|
||||
if (power[j] > maxpower * 2/3) {
|
||||
double p = power[j] - twothirdmaxpower;
|
||||
p = log(p) * onethirdmaxpower / logoverscale;
|
||||
power[j] = p + twothirdmaxpower;
|
||||
}
|
||||
}
|
||||
|
||||
#define WRITE_PIXEL \
|
||||
img[((imgheight-y-1)*imgpitch+x)*3 + 0] = palette[intensity*3+0]; \
|
||||
img[((imgheight-y-1)*imgpitch+x)*3 + 1] = palette[intensity*3+1]; \
|
||||
img[((imgheight-y-1)*imgpitch+x)*3 + 2] = palette[intensity*3+2];
|
||||
|
||||
int next_line_imgcol = imgleft + imgwidth * (i - first_line + 1) / (last_line - first_line + 1);
|
||||
if (next_line_imgcol >= imgpitch)
|
||||
next_line_imgcol = imgpitch-1;
|
||||
|
||||
for (int x = imgcol; x <= next_line_imgcol; ++x) {
|
||||
|
||||
// Decide which rendering algo to use
|
||||
if (maxband - minband > imgheight) {
|
||||
// more than one frequency sample per pixel (vertically compress data)
|
||||
// pick the largest value per pixel for display
|
||||
|
||||
// Iterate over pixels, picking a range of samples for each
|
||||
for (int y = 0; y < imgheight; ++y) {
|
||||
int sample1 = maxband * y/imgheight + minband;
|
||||
int sample2 = maxband * (y+1)/imgheight + minband;
|
||||
float maxval = 0;
|
||||
for (int samp = sample1; samp <= sample2; samp++) {
|
||||
if (power[samp] > maxval) maxval = power[samp];
|
||||
}
|
||||
int intensity = int(256 * maxval / maxpower);
|
||||
WRITE_PIXEL
|
||||
}
|
||||
}
|
||||
else {
|
||||
// less than one frequency sample per pixel (vertically expand data)
|
||||
// interpolate between pixels
|
||||
// can also happen with exactly one sample per pixel, but how often is that?
|
||||
|
||||
// Iterate over pixels, picking the nearest power values
|
||||
for (int y = 0; y < imgheight; ++y) {
|
||||
float ideal = (float)(y+1.)/imgheight * maxband;
|
||||
float sample1 = power[(int)floor(ideal)+minband];
|
||||
float sample2 = power[(int)ceil(ideal)+minband];
|
||||
float frac = ideal - floor(ideal);
|
||||
int intensity = int(((1-frac)*sample1 + frac*sample2) / maxpower * 256);
|
||||
WRITE_PIXEL
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#undef WRITE_PIXEL
|
||||
|
||||
}
|
||||
|
||||
delete[] power;
|
||||
}
|
||||
|
||||
|
||||
void AudioSpectrum::SetScaling(float _power_scale)
|
||||
{
|
||||
power_scale = _power_scale;
|
||||
}
|
||||
|
89
aegisub/audio_spectrum.h
Normal file
89
aegisub/audio_spectrum.h
Normal file
|
@ -0,0 +1,89 @@
|
|||
// Copyright (c) 2005, 2006, Rodrigo Braz Monteiro
|
||||
// Copyright (c) 2006, 2007, Niels Martin Hansen
|
||||
// All rights reserved.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
// * Neither the name of the Aegisub Group nor the names of its contributors
|
||||
// may be used to endorse or promote products derived from this software
|
||||
// without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
// POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// -----------------------------------------------------------------------------
|
||||
//
|
||||
// AEGISUB
|
||||
//
|
||||
// Website: http://aegisub.cellosoft.com
|
||||
// Contact: mailto:zeratul@cellosoft.com
|
||||
//
|
||||
|
||||
#ifndef AUDIO_SPECTRUM_H
|
||||
#define AUDIO_SPECTRUM_H
|
||||
|
||||
#include <wx/wxprec.h>
|
||||
#include <vector>
|
||||
#include "audio_provider.h"
|
||||
|
||||
|
||||
// Spectrum cache basically caches the raw result of FFT
|
||||
class AudioSpectrumCache {
|
||||
public:
|
||||
typedef std::vector<float> CacheLine;
|
||||
|
||||
virtual CacheLine& GetLine(unsigned long i) = 0;
|
||||
|
||||
static void SetLineLength(unsigned long new_length);
|
||||
|
||||
virtual ~AudioSpectrumCache() {};
|
||||
|
||||
protected:
|
||||
static CacheLine null_line;
|
||||
static unsigned long line_length;
|
||||
};
|
||||
|
||||
|
||||
class AudioSpectrum {
|
||||
private:
|
||||
// Data provider
|
||||
AudioSpectrumCache *cache;
|
||||
|
||||
// Colour pallettes
|
||||
unsigned char colours_normal[256*3];
|
||||
unsigned char colours_selected[256*3];
|
||||
|
||||
AudioProvider *provider;
|
||||
|
||||
unsigned long line_length; // number of frequency components per line (half of number of samples)
|
||||
unsigned long num_lines; // number of lines needed for the audio
|
||||
float power_scale; // amplification of displayed power
|
||||
int minband; // smallest frequency band displayed
|
||||
int maxband; // largest frequency band displayed
|
||||
|
||||
public:
|
||||
AudioSpectrum(AudioProvider *_provider, unsigned long _line_length);
|
||||
~AudioSpectrum();
|
||||
|
||||
void RenderRange(__int64 range_start, __int64 range_end, bool selected, unsigned char *img, int imgleft, int imgwidth, int imgpitch, int imgheight);
|
||||
|
||||
void SetScaling(float _power_scale);
|
||||
};
|
||||
|
||||
|
||||
#endif
|
|
@ -6,14 +6,15 @@ Please visit http://aegisub.net to download latest version
|
|||
- New Aegisub logo. (AMZ)
|
||||
- Automation 4 has replaced Automation 3, see the help file for more details (jfs)
|
||||
o Automation 4 Lua uses Lua 5.1 instead of 5.0, meaning some new language features
|
||||
o It is now possible to write macros that manipulates subtitles directly
|
||||
o It is now possible to write macros that manipulate subtitles directly
|
||||
o Scripts have full access to the entire subtitle file, not just the "Events" section
|
||||
- Support reading SSA/ASS files with intermixed V4 and V4+ Styles sections (jfs)
|
||||
- Fixed loading of sections with unexpected cases. (AMZ)
|
||||
- Changes to Audio Spectrum: (jfs)
|
||||
o Calculation/drawing code is now multithreaded, meaning it runs faster on SMP (eg. dual-core) systems
|
||||
o The calculated FFT data are now cached, so things should be faster
|
||||
o Actual signal power is now more accurately represented
|
||||
o The palette is changed
|
||||
o Rendering now more accurately represents actual signal power
|
||||
o The selection is no longer shown by ugly reverse colour but with a different palette instead
|
||||
o Use vertical zoom slider to amplify/dampen displayed signal strength (useful for better visualisation of quiet sections, or easier picking out the dominating frequencies in noisy sections)
|
||||
- Plain-text export (jfs)
|
||||
- The style of the current line is automatically selected when opening the Style Manager (jfs)
|
||||
|
|
Loading…
Reference in a new issue