diff --git a/aegisub/audio_display.cpp b/aegisub/audio_display.cpp index e49ba3c74..8cdc8a8d0 100644 --- a/aegisub/audio_display.cpp +++ b/aegisub/audio_display.cpp @@ -66,6 +66,8 @@ AudioDisplay::AudioDisplay(wxWindow *parent,VideoDisplay *display) video = NULL; origImage = NULL; spectrumDisplay = NULL; + spectrumDisplaySelected = NULL; + spectrumRenderer = NULL; ScrollBar = NULL; dialogue = NULL; karaoke = NULL; @@ -112,7 +114,9 @@ AudioDisplay::~AudioDisplay() { delete provider; delete player; delete origImage; + delete spectrumRenderer; delete spectrumDisplay; + delete spectrumDisplaySelected; delete peak; delete min; } @@ -214,11 +218,6 @@ void AudioDisplay::UpdateImage(bool weak) { // Draw spectrum if (spectrum) { DrawSpectrum(dc,weak); - - // Invert the selection, if any - if (hasSel && selStart < selEnd && Options.AsBool(_T("Audio Spectrum invert selection"))) { - dc.Blit(selStart, 0, selEnd-selStart, h, &dc, selStart, 0, wxSRC_INVERT); - } } // Draw seconds boundaries @@ -374,13 +373,14 @@ void AudioDisplay::UpdateImage(bool weak) { } } catch (...) { + // FIXME? } } } // Modified text if (NeedCommit) { - dc.SetFont(wxFont(9,wxDEFAULT,wxFONTSTYLE_NORMAL,wxFONTWEIGHT_BOLD,false,_T("Verdana"))); + dc.SetFont(wxFont(9,wxDEFAULT,wxFONTSTYLE_NORMAL,wxFONTWEIGHT_BOLD,false,_T("Verdana"))); // FIXME: hardcoded font name dc.SetTextForeground(wxColour(255,0,0)); if (selStart <= selEnd) { dc.DrawText(_T("Modified"),4,4); @@ -409,7 +409,7 @@ void AudioDisplay::UpdateImage(bool weak) { dc.SetPen(wxSystemSettings::GetColour(wxSYS_COLOUR_BTNTEXT)); dc.SetTextForeground(wxSystemSettings::GetColour(wxSYS_COLOUR_BTNTEXT)); wxFont scaleFont; - scaleFont.SetFaceName(_T("Tahoma")); + scaleFont.SetFaceName(_T("Tahoma")); // FIXME: hardcoded font name scaleFont.SetPointSize(8); dc.SetFont(scaleFont); @@ -503,259 +503,61 @@ void AudioDisplay::DrawWaveform(wxDC &dc,bool weak) { } -static int spectrumColorMap[256]; -static unsigned short spectrumColorMap16[256]; -static bool colorMapsGenerated = false; - - -////////////////////////////////////// -// Spectrum analyser rendering thread -class SpectrumRendererThread : public wxThread { -public: - SpectrumRendererThread() : wxThread(wxTHREAD_JOINABLE) { - if (Create() != wxTHREAD_NO_ERROR) - throw _T("Error creating Spectrum rendering thread."); - } - - int *data; // image data to write to (shared) - int window; // 1 << Options.AsInt(_T("Audio Spectrum Window")) - int firstbar, lastbar; // first and last vertical bar to draw - int w, h; // width and height of canvas - int cutoff; // cutoff frequency - float *base_in; // audio sample data (shared) - int samples; // number of samples per column - int depth; // display bit depth - float scale; // vertical scale of display, exponential, min=0, mid=1, max=8 - -protected: - wxThread::ExitCode Entry() { - // Pointers to image data - int *write_ptr = data; - unsigned short *write_ptr16 = (unsigned short *)data; - - // FFT output data - float *out_r = new float[window]; // real part - float *out_i = new float[window]; // imaginary part - float *power = new float[window]; // calculated signal power - - // Prepare constants - const int halfwindow = window/2; - //const int posThres = MAX(1,int(double(halfwindow-cutoff)/double(h)*0.5/scale + 0.5)); - const int maxband = (halfwindow-cutoff) * 2/3; - const float mult = float(h)/float(halfwindow-cutoff)/255.f; - - // Calculation loop - for (int i = firstbar; i < lastbar; i++) { - __int64 curStart = i*samples-(window/2); - if (curStart < 0) curStart = 0; - - // Position input - float *in = base_in + curStart; - - // Perform the FFT - FFT fft; - fft.Transform(window,in,out_r,out_i); - - // Position pointer - write_ptr = data+i+h*w; - write_ptr16 = ((unsigned short*)data)+(i+h*w); - - // The maximum power output from the FFT - // Derived by maximising the result from the DFT function: - // f[u] = sum(x=0,N-1)[ f(x) * exp(-2 * pi * i * u * x) ] - // Where N is the number of samples transformed. - // = N * 2^(B-1) * exp(-2 * pi * i * u * x) - // Maximising by f(x) constant at maximum sample value. - // B is bit-depth of the samples, so 2^(B-1) is the maximum sample value. - // = N * 2^(B-1) * [ cos(-2*pi*u*x) + i sin(-2*pi*u*x) ] - // Expanding using Euler's formula. - // = N * 2^(B-1) * [ cos(2*pi*u*x) - i sin(2*pi*u*x) ] - // cos(-x) = cos(x) and sin(-x) = -sin(x) - // = N * 2^(B-1) * cos(2*pi*u*x) - N * 2^(B-1) * i sin(2*pi*u*x) [A] - // Expand the bracket. - // Now determine the maximum magnitude of [A], letting u be constant and x variable. - // | N * 2^(B-1) * cos(2*pi*u*x) - N * 2^(B-1) * i sin(2*pi*u*x) | - // = sqrt( [N * 2^(B-1) * cos(2*pi*u*x)]^2 + [N * 2^(B-1) * sin(2*pi*u*x)]^2 ) - // = sqrt( N^2 * 4^(B-1) * cos^2(2*pi*u*x) + N^2 * 4^(B-1) * sin^2(2*pi*u*x) ) - // = sqrt( N^2 * 4^(B-1) * [ cos^2(2*pi*u*x) + sin^2(2*pi*u*x) ] ) - // = sqrt( N^2 * 4^(B-1) ) - // It's known that sin^2(x) + cos^2(x) = 1. - // = N * 2^(B-1) - - int maxpower = (1 << (16 - 1))*256; - - // Calculate the signal power over frequency -#if 0 - // Logarithmic scale - for (int j = 0; j < window; j++) { - float t = out_r[j]*out_r[j] + out_i[j]*out_i[j]; - if (t < 1) - power[j] = 0; - else - power[j] = 10. * log10(t) * 64; // try changing the constant 64 if playing with this - } - maxpower = 10 * log10((float)maxpower); -#elif 1 - // "Compressed" scale - double onethirdmaxpower = maxpower / 3, twothirdmaxpower = maxpower * 2/3; - double logoverscale = log(maxpower*8*scale - twothirdmaxpower); - for (int j = 0; j < window; j++) { - // First do a simple linear scale power calculation -- 8 gives a reasonable default scaling - power[j] = sqrt(out_r[j]*out_r[j] + out_i[j]*out_i[j]) * 8 * scale; - if (power[j] > maxpower * 2/3) { - double p = power[j] - twothirdmaxpower; - p = log(p) * onethirdmaxpower / logoverscale; - power[j] = p + twothirdmaxpower; - } - } -#else - // Linear scale - for (int j = 0; j < window; j++) { - power[j] = sqrt(out_r[j]*out_r[j] + out_i[j]*out_i[j]); - } -#endif - -#define WRITE_PIXEL \ - if (intensity > 255) intensity = 255; \ - if (intensity < 0) intensity = 0; \ - if (depth == 32) { \ - write_ptr -= w; \ - *write_ptr = spectrumColorMap[intensity]; \ - } \ - else if (depth == 16) { \ - write_ptr16 -= w; \ - *write_ptr16 = spectrumColorMap16[intensity]; \ - } - - // Decide which rendering algo to use - if (halfwindow-cutoff > h) { - // more than one frequency sample per pixel (vertically compress data) - // pick the largest value per pixel for display - - // Iterate over pixels, picking a range of samples for each - for (int j = 0; j < h; j++) { - int sample1 = maxband * j/h + cutoff; - int sample2 = maxband * (j+1)/h + cutoff; - float maxval = 0; - for (int samp = sample1; samp <= sample2; samp++) { - if (power[samp] > maxval) maxval = power[samp]; - } - int intensity = int(256 * maxval / maxpower); - WRITE_PIXEL - } - } - else { - // less than one frequency sample per pixel (vertically expand data) - // interpolate between pixels - // can also happen with exactly one sample per pixel, but how often is that? - - // Iterate over pixels, picking the nearest power values - for (int j = 0; j < h; j++) { - float ideal = (float)(j+1.)/h * maxband; - float sample1 = power[(int)floor(ideal)+cutoff]; - float sample2 = power[(int)ceil(ideal)+cutoff]; - float frac = ideal - floor(ideal); - int intensity = int(((1-frac)*sample1 + frac*sample2) / maxpower * 256); - WRITE_PIXEL - } - } - -#undef WRITE_PIXEL - } - - delete out_r; - delete out_i; - delete power; - - return 0; - } -}; - - ////////////////////////// // Draw spectrum analyzer void AudioDisplay::DrawSpectrum(wxDC &finaldc,bool weak) { - // Spectrum bitmap if (!weak || !spectrumDisplay || spectrumDisplay->GetWidth() != w || spectrumDisplay->GetHeight() != h) { if (spectrumDisplay) { delete spectrumDisplay; + delete spectrumDisplaySelected; spectrumDisplay = 0; + spectrumDisplaySelected = 0; } - //spectrumDisplay = new wxBitmap(w,h); weak = false; } if (!weak) { - // Generate colors - if (!colorMapsGenerated) { - unsigned char r,g,b; - for (int i=0;i<256;i++) { - //hsv_to_rgb(255 - i, 255 - i * 3/10, 255*3/10 + i * 7/10, &r, &g, &b); - hsl_to_rgb(170 + i * 2/3, 128 + i/2, i, &r, &g, &b); - spectrumColorMap[i] = b | (g<<8) | (r<<16); - spectrumColorMap16[i] = ((r>>3)<<11) | ((g>>2)<<5) | b>>3; - } - colorMapsGenerated = true; - } - int depth = wxDisplayDepth(); + unsigned char *img = (unsigned char *)malloc(h*w*3); // wxImage requires using malloc - // Prepare arrays - int cutOff = Options.AsInt(_T("Audio Spectrum Cutoff")); - int window = 1 << Options.AsInt(_T("Audio Spectrum Window")); - int totalLen = w*samples+window; - float *raw_float = new float[totalLen]; - short *raw_int = new short[totalLen]; - float *in = raw_float; + if (!spectrumRenderer) + spectrumRenderer = new AudioSpectrum(provider, 1<GetAudio(raw_int,start,totalLen); - for (int j=0;jSetScaling(scale); - // For image data - int *data = new int[w*h*depth/32]; + // Use a slightly slower, but simple way + // Always draw the spectrum for the entire width + // Hack: without those divs by 2 the display is horizontally compressed + spectrumRenderer->RenderRange(Position*samples, (Position+w)*samples, false, img, 0, w, w, h); - ////// START OF PARALLELISED CODE ////// - const int cpu_count = MAX(wxThread::GetCPUCount(), 1); - std::vector threads(cpu_count); - for (int i = 0; i < cpu_count; i++) { - // Ugh, way too much data to copy in - threads[i] = new SpectrumRendererThread(); - threads[i]->data = data; - threads[i]->window = window; - threads[i]->firstbar = i * w/cpu_count; - threads[i]->lastbar = (i+1) * w/cpu_count; - threads[i]->w = w; - threads[i]->h = h; - threads[i]->cutoff = cutOff; - threads[i]->base_in = raw_float; - threads[i]->samples = samples; - threads[i]->depth = depth; - threads[i]->scale = scale; - threads[i]->Run(); - } - // Threads started, wait for them to end - for (int i = 0; i < cpu_count; i++) { - threads[i]->Wait(); - delete threads[i]; - } + // The spectrum bitmap will have been deleted above already, so just make a new one + wxImage imgobj(w, h, img, false); + spectrumDisplay = new wxBitmap(imgobj); + } - // Clear memory - delete raw_float; - - // Create image FIXME *BREAKS ON NON-WIN32* (see wx docs) - spectrumDisplay = new wxBitmap((const char*)data,w,h,depth); + if (hasSel && selStartCap < selEndCap && + ((selStartCap > Position && selStartCap < Position+w) || + (selEndCap > Position && selEndCap < Position+w) || + (selStartCap < Position && selEndCap > Position+w)) && + !spectrumDisplaySelected) { + // There is a visible selection and we don't have a rendered one + // This should be done regardless whether we're "weak" or not + // Assume a few things were already set up when things were first rendered though + unsigned char *img = (unsigned char *)malloc(h*w*3); + spectrumRenderer->RenderRange(Position*samples, (Position+w)*samples, true, img, 0, w, w, h); + wxImage imgobj(w, h, img, false); + spectrumDisplaySelected = new wxBitmap(imgobj); } // Draw wxMemoryDC dc; dc.SelectObject(*spectrumDisplay); finaldc.Blit(0,0,w,h,&dc,0,0); -} + if (hasSel && spectrumDisplaySelected && selStartCap < selEndCap) { + dc.SelectObject(*spectrumDisplaySelected); + finaldc.Blit(selStart, 0, selEnd-selStart, h, &dc, selStart, 0); + } +} ////////////////////////// // Get selection position @@ -939,8 +741,10 @@ void AudioDisplay::SetFile(wxString file, VideoProvider *vprovider) { if (player) player->CloseStream(); delete provider; delete player; + delete spectrumRenderer; provider = NULL; player = NULL; + spectrumRenderer = NULL; Reset(); loaded = false; diff --git a/aegisub/audio_display.h b/aegisub/audio_display.h index 671b2afed..891608b08 100644 --- a/aegisub/audio_display.h +++ b/aegisub/audio_display.h @@ -43,6 +43,7 @@ #include #include "audio_provider.h" #include "audio_player.h" +#include "audio_spectrum.h" ////////////// @@ -67,8 +68,11 @@ private: AssDialogue *dialogue; VideoDisplay *video; + AudioSpectrum *spectrumRenderer; + wxBitmap *origImage; wxBitmap *spectrumDisplay; + wxBitmap *spectrumDisplaySelected; __int64 PositionSample; float scale; int samples; @@ -106,7 +110,7 @@ private: int scrubLastRate; void OnPaint(wxPaintEvent &event); - void OnMouseEvent(wxMouseEvent &event); + void OnMouseEvent(wxMouseEvent &event); void OnSize(wxSizeEvent &event); void OnUpdateTimer(wxTimerEvent &event); void OnKeyDown(wxKeyEvent &event); diff --git a/aegisub/audio_spectrum.cpp b/aegisub/audio_spectrum.cpp new file mode 100644 index 000000000..33c4a0439 --- /dev/null +++ b/aegisub/audio_spectrum.cpp @@ -0,0 +1,327 @@ +// Copyright (c) 2005, 2006, Rodrigo Braz Monteiro +// Copyright (c) 2006, 2007, Niels Martin Hansen +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of the Aegisub Group nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +// ----------------------------------------------------------------------------- +// +// AEGISUB +// +// Website: http://aegisub.cellosoft.com +// Contact: mailto:zeratul@cellosoft.com +// + +#include +#include "audio_spectrum.h" +#include "fft.h" +#include "colorspace.h" +#include "options.h" + + +// Audio spectrum FFT data cache + +AudioSpectrumCache::CacheLine AudioSpectrumCache::null_line; +unsigned long AudioSpectrumCache::line_length; + +void AudioSpectrumCache::SetLineLength(unsigned long new_length) +{ + line_length = new_length; + null_line.resize(new_length, 0); +} + + +// Bottom level FFT cache, holds actual power data itself + +class FinalSpectrumCache : public AudioSpectrumCache { +private: + std::vector data; + unsigned long start, length; // start and end of range + +public: + CacheLine& GetLine(unsigned long i) + { + // This check ought to be redundant + if (i >= start && i-start < length) + return data[i - start]; + else + return null_line; + } + + FinalSpectrumCache(AudioProvider *provider, unsigned long _start, unsigned long _length) + { + start = _start; + length = _length; + + assert(length > 2); + + // First fill the data vector with blanks + // Both start and end are included in the range stored, so we have end-start+1 elements + data.resize(length, null_line); + + // Start sample number of the next line calculated + // line_length is half of the number of samples used to calculate a line, since half of the output from + // a Fourier transform of real data is redundant, and not interesting for the purpose of creating + // a frequenmcy/power spectrum. + __int64 sample = start * line_length*2; + + // Raw sample data + short *raw_sample_data = new short[line_length*2]; + float *sample_data = new float[line_length*2]; + // Real and imaginary components of the output + float *out_r = new float[line_length*2]; + float *out_i = new float[line_length*2]; + + FFT fft; // TODO: use FFTW instead? + + for (unsigned long i = 0; i < length; ++i) { + provider->GetAudio(raw_sample_data, sample, line_length*2); + for (size_t j = 0; j < line_length; ++j) { + sample_data[j*2] = (float)raw_sample_data[j*2]; + sample_data[j*2+1] = (float)raw_sample_data[j*2+1]; + } + + fft.Transform(line_length*2, sample_data, out_r, out_i); + + CacheLine &line = data[i]; + for (size_t j = 0; j < line_length; ++j) { + line[j] = sqrt(out_r[j]*out_r[j] + out_i[j]*out_i[j]); + } + + sample += line_length*2; + } + + delete[] raw_sample_data; + delete[] sample_data; + delete[] out_r; + delete[] out_i; + } + + virtual ~FinalSpectrumCache() + { + } + +}; + + +// Non-bottom-level cache, refers to other caches to do the work + +class IntermediateSpectrumCache : public AudioSpectrumCache { +private: + std::vector sub_caches; + unsigned long start, length, subcache_length; + bool subcaches_are_final; + int depth; + AudioProvider *provider; + +public: + CacheLine &GetLine(unsigned long i) + { + if (i >= start && i-start <= length) { + // Determine which sub-cache this line resides in + int subcache = (i-start) / subcache_length; + assert(subcache >= 0 && subcache < sub_caches.size()); + + if (!sub_caches[subcache]) { + if (subcaches_are_final) { + sub_caches[subcache] = new FinalSpectrumCache(provider, start+subcache*subcache_length, subcache_length); + } else { + sub_caches[subcache] = new IntermediateSpectrumCache(provider, start+subcache*subcache_length, subcache_length, depth+1); + } + } + + return sub_caches[subcache]->GetLine(i); + } else { + return null_line; + } + } + + IntermediateSpectrumCache(AudioProvider *_provider, unsigned long _start, unsigned long _length, int _depth) + { + provider = _provider; + start = _start; + length = _length; + depth = _depth; + + // FIXME: this calculation probably needs tweaking + int num_subcaches = 1; + unsigned long tmp = length; + while (tmp > 0) { + tmp /= 16; + num_subcaches *= 2; + } + subcache_length = length / (num_subcaches-1); + + subcaches_are_final = num_subcaches <= 4; + + sub_caches.resize(num_subcaches, 0); + } + + virtual ~IntermediateSpectrumCache() + { + for (size_t i = 0; i < sub_caches.size(); ++i) + if (sub_caches[i]) + delete sub_caches[i]; + } + +}; + + +// AudioSpectrum + +AudioSpectrum::AudioSpectrum(AudioProvider *_provider, unsigned long _line_length) +{ + provider = _provider; + line_length = _line_length; + + __int64 _num_lines = provider->GetNumSamples() / line_length / 2; + //assert (_num_lines < (1<<31)); // hope it fits into 32 bits... + num_lines = (unsigned long)_num_lines; + + AudioSpectrumCache::SetLineLength(line_length); + cache = new IntermediateSpectrumCache(provider, 0, num_lines, 0); + + power_scale = 1; + minband = Options.AsInt(_T("Audio Spectrum Cutoff")); + maxband = line_length - minband * 2/3; // TODO: make this customisable? + + // Generate colour maps + unsigned char *palptr = colours_normal; + for (int i = 0; i < 256; i++) { + hsl_to_rgb(170 + i * 2/3, 128 + i/2, i, palptr+0, palptr+1, palptr+2); + palptr += 3; + } + palptr = colours_selected; + for (int i = 0; i < 256; i++) { + hsl_to_rgb(170 + i * 2/3, 128 + i/2, i*3/4+64, palptr+0, palptr+1, palptr+2); + palptr += 3; + } +} + + +AudioSpectrum::~AudioSpectrum() +{ + delete cache; +} + + +void AudioSpectrum::RenderRange(__int64 range_start, __int64 range_end, bool selected, unsigned char *img, int imgleft, int imgwidth, int imgpitch, int imgheight) +{ + unsigned long first_line = (unsigned long)(range_start / line_length / 2); + unsigned long last_line = (unsigned long)(range_end / line_length / 2); + unsigned long lines_to_render = last_line - first_line + 1; + + float *power = new float[line_length]; + + int last_imgcol_rendered = -1; + + unsigned char *palette; + if (selected) + palette = colours_selected; + else + palette = colours_normal; + + for (unsigned long i = first_line; i <= last_line; ++i) { + // Handle horizontal compression and don't unneededly re-render columns + int imgcol = imgleft + imgwidth * (i - first_line) / (last_line - first_line + 1); + if (imgcol <= last_imgcol_rendered) + continue; + + AudioSpectrumCache::CacheLine &line = cache->GetLine(i); + + int maxpower = (1 << (16 - 1))*256; + + // Calculate the signal power over frequency + // "Compressed" scale + double onethirdmaxpower = maxpower / 3, twothirdmaxpower = maxpower * 2/3; + double logoverscale = log(maxpower*8*power_scale - twothirdmaxpower); + for (int j = 0; j < line_length; j++) { + // First do a simple linear scale power calculation -- 8 gives a reasonable default scaling + power[j] = line[j] * 8 * power_scale; + if (power[j] > maxpower * 2/3) { + double p = power[j] - twothirdmaxpower; + p = log(p) * onethirdmaxpower / logoverscale; + power[j] = p + twothirdmaxpower; + } + } + +#define WRITE_PIXEL \ + img[((imgheight-y-1)*imgpitch+x)*3 + 0] = palette[intensity*3+0]; \ + img[((imgheight-y-1)*imgpitch+x)*3 + 1] = palette[intensity*3+1]; \ + img[((imgheight-y-1)*imgpitch+x)*3 + 2] = palette[intensity*3+2]; + + int next_line_imgcol = imgleft + imgwidth * (i - first_line + 1) / (last_line - first_line + 1); + if (next_line_imgcol >= imgpitch) + next_line_imgcol = imgpitch-1; + + for (int x = imgcol; x <= next_line_imgcol; ++x) { + + // Decide which rendering algo to use + if (maxband - minband > imgheight) { + // more than one frequency sample per pixel (vertically compress data) + // pick the largest value per pixel for display + + // Iterate over pixels, picking a range of samples for each + for (int y = 0; y < imgheight; ++y) { + int sample1 = maxband * y/imgheight + minband; + int sample2 = maxband * (y+1)/imgheight + minband; + float maxval = 0; + for (int samp = sample1; samp <= sample2; samp++) { + if (power[samp] > maxval) maxval = power[samp]; + } + int intensity = int(256 * maxval / maxpower); + WRITE_PIXEL + } + } + else { + // less than one frequency sample per pixel (vertically expand data) + // interpolate between pixels + // can also happen with exactly one sample per pixel, but how often is that? + + // Iterate over pixels, picking the nearest power values + for (int y = 0; y < imgheight; ++y) { + float ideal = (float)(y+1.)/imgheight * maxband; + float sample1 = power[(int)floor(ideal)+minband]; + float sample2 = power[(int)ceil(ideal)+minband]; + float frac = ideal - floor(ideal); + int intensity = int(((1-frac)*sample1 + frac*sample2) / maxpower * 256); + WRITE_PIXEL + } + } + } + +#undef WRITE_PIXEL + + } + + delete[] power; +} + + +void AudioSpectrum::SetScaling(float _power_scale) +{ + power_scale = _power_scale; +} + diff --git a/aegisub/audio_spectrum.h b/aegisub/audio_spectrum.h new file mode 100644 index 000000000..6f35873c8 --- /dev/null +++ b/aegisub/audio_spectrum.h @@ -0,0 +1,89 @@ +// Copyright (c) 2005, 2006, Rodrigo Braz Monteiro +// Copyright (c) 2006, 2007, Niels Martin Hansen +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of the Aegisub Group nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +// ----------------------------------------------------------------------------- +// +// AEGISUB +// +// Website: http://aegisub.cellosoft.com +// Contact: mailto:zeratul@cellosoft.com +// + +#ifndef AUDIO_SPECTRUM_H +#define AUDIO_SPECTRUM_H + +#include +#include +#include "audio_provider.h" + + +// Spectrum cache basically caches the raw result of FFT +class AudioSpectrumCache { +public: + typedef std::vector CacheLine; + + virtual CacheLine& GetLine(unsigned long i) = 0; + + static void SetLineLength(unsigned long new_length); + + virtual ~AudioSpectrumCache() {}; + +protected: + static CacheLine null_line; + static unsigned long line_length; +}; + + +class AudioSpectrum { +private: + // Data provider + AudioSpectrumCache *cache; + + // Colour pallettes + unsigned char colours_normal[256*3]; + unsigned char colours_selected[256*3]; + + AudioProvider *provider; + + unsigned long line_length; // number of frequency components per line (half of number of samples) + unsigned long num_lines; // number of lines needed for the audio + float power_scale; // amplification of displayed power + int minband; // smallest frequency band displayed + int maxband; // largest frequency band displayed + +public: + AudioSpectrum(AudioProvider *_provider, unsigned long _line_length); + ~AudioSpectrum(); + + void RenderRange(__int64 range_start, __int64 range_end, bool selected, unsigned char *img, int imgleft, int imgwidth, int imgpitch, int imgheight); + + void SetScaling(float _power_scale); +}; + + +#endif diff --git a/aegisub/changelog.txt b/aegisub/changelog.txt index 190b50707..d675bd462 100644 --- a/aegisub/changelog.txt +++ b/aegisub/changelog.txt @@ -6,14 +6,15 @@ Please visit http://aegisub.net to download latest version - New Aegisub logo. (AMZ) - Automation 4 has replaced Automation 3, see the help file for more details (jfs) o Automation 4 Lua uses Lua 5.1 instead of 5.0, meaning some new language features - o It is now possible to write macros that manipulates subtitles directly + o It is now possible to write macros that manipulate subtitles directly o Scripts have full access to the entire subtitle file, not just the "Events" section - Support reading SSA/ASS files with intermixed V4 and V4+ Styles sections (jfs) - Fixed loading of sections with unexpected cases. (AMZ) - Changes to Audio Spectrum: (jfs) - o Calculation/drawing code is now multithreaded, meaning it runs faster on SMP (eg. dual-core) systems + o The calculated FFT data are now cached, so things should be faster + o Actual signal power is now more accurately represented o The palette is changed - o Rendering now more accurately represents actual signal power + o The selection is no longer shown by ugly reverse colour but with a different palette instead o Use vertical zoom slider to amplify/dampen displayed signal strength (useful for better visualisation of quiet sections, or easier picking out the dominating frequencies in noisy sections) - Plain-text export (jfs) - The style of the current line is automatically selected when opening the Style Manager (jfs)