Rewrote audio spectrum rendering algo

Originally committed to SVN as r530.
This commit is contained in:
Niels Martin Hansen 2006-08-27 22:29:14 +00:00
parent 29961a029e
commit df964c78cd
2 changed files with 90 additions and 19 deletions

View file

@ -232,7 +232,7 @@ void AudioDisplay::UpdateImage(bool weak) {
// Draw keyframes // Draw keyframes
if (video->loaded && draw_boundary_lines) { if (video->loaded && draw_boundary_lines) {
int nKeys = video->KeyFrames.Count(); int nKeys = (int)video->KeyFrames.Count();
dc.SetPen(wxPen(wxColour(255,0,255),1)); dc.SetPen(wxPen(wxColour(255,0,255),1));
// Get min and max frames to care about // Get min and max frames to care about
@ -514,8 +514,9 @@ protected:
unsigned short *write_ptr16 = (unsigned short *)data; unsigned short *write_ptr16 = (unsigned short *)data;
// FFT output data // FFT output data
float *out_r = new float[window]; float *out_r = new float[window]; // real part
float *out_i = new float[window]; float *out_i = new float[window]; // imaginary part
float *power = new float[window]; // calculated signal power
// Prepare constants // Prepare constants
const int halfwindow = window/2; const int halfwindow = window/2;
@ -534,18 +535,84 @@ protected:
FFT fft; FFT fft;
fft.Transform(window,in,out_r,out_i); fft.Transform(window,in,out_r,out_i);
// Position pointer
write_ptr = data+i+h*w;
write_ptr16 = ((unsigned short*)data)+(i+h*w);
// Calculate the signal power over frequency
for (int j = 0; j < window; j++) {
power[j] = sqrt(out_r[j]*out_r[j] + out_i[j]*out_i[j]);
}
// According to the formula at http://en.wikipedia.org/wiki/Fast_Fourier_transform:
// X_k = SUM ( n=0, N-1, x_n * e^(-2*pi*i / N * n * k) )
// The maximum output value for our case (real-valued-only input, range -16384 to +16383, N=1024)
// must be:
// O(X_k) = O( SUM ( n=0, 1023, 16383 * exp(-2*pi*i / 1024 * 1023 * 1023) ) )
// = 1024 * 16383 * exp(-pi*i / 512 * 1023 * 1023)
// ~= 16777216 * exp(-2*pi * i * 1024)
// Since exp(ix) = cos(x) + i * sin(x), |a * exp(i*b)| = a for all real a and b, max
// power will be 16777216.
// More generally, in this context, it will be:
// samples * 2^(audio_bit_depth-1)
// Currently 16 bit audio is assumed, meaning samples*16384
// But scale this by a user amount (vertical zoom0 -- scale is from 0 to 8
int maxpower = window*16384 / (16*256*scale);
#define WRITE_PIXEL \
if (intensity > 255) intensity = 255; \
if (intensity < 0) intensity = 0; \
if (depth == 32) { \
write_ptr -= w; \
*write_ptr = spectrumColorMap[intensity]; \
} \
else if (depth == 16) { \
write_ptr16 -= w; \
*write_ptr16 = spectrumColorMap16[intensity]; \
}
// Decide which rendering algo to use
if (halfwindow-cutoff > h) {
// more than one frequency sample per pixel (vertically compress data)
// pick the largest value per pixel for display
// Iterate over pixels, picking a range of samples for each
for (int j = 0; j < h; j++) {
int sample1 = (halfwindow-cutoff) * j/h + cutoff;
int sample2 = (halfwindow-cutoff) * (j+1)/h + cutoff;
float maxval = 0;
for (int samp = sample1; samp <= sample2; samp++) {
if (power[samp] > maxval) maxval = power[samp];
}
int intensity = int(maxval / maxpower);
WRITE_PIXEL
}
}
else {
// less than one frequency sample per pixel (vertically expand data)
// interpolate between pixels
// can also happen with exactly one sample per pixel, but how often is that?
// Iterate over pixels, picking the nearest power values
for (int j = 0; j < h; j++) {
float ideal = (float)(j+1.)/h * (halfwindow-cutoff);
float sample1 = power[(int)floor(ideal)+cutoff];
float sample2 = power[(int)ceil(ideal)+cutoff];
float frac = ideal - floor(ideal);
int intensity = int(((1-frac)*sample1 + frac*sample2) / maxpower * 255);
WRITE_PIXEL
}
}
#undef WRITE_PIXEL
// Draw bar // Draw bar
float accum = 0; /*float accum = 0;
int accumPos = posThres; int accumPos = posThres;
int y = h; int y = h;
int intensity; int intensity;
float t1,t2; float t1,t2;
// Position pointer
write_ptr = data+i+h*w;
write_ptr16 = ((unsigned short*)data)+(i+h*w);
// Draw loop
for (int j=cutoff;j<halfwindow;j++) { for (int j=cutoff;j<halfwindow;j++) {
// Calculate magnitude and add to accumulator // Calculate magnitude and add to accumulator
t1 = out_r[j]; t1 = out_r[j];
@ -577,11 +644,12 @@ protected:
accumPos = posThres; accumPos = posThres;
accum = 0; accum = 0;
} }
} }*/
} }
delete out_r; delete out_r;
delete out_i; delete out_i;
delete power;
return 0; return 0;
} }
@ -795,7 +863,7 @@ void AudioDisplay::GetDialoguePos(__int64 &selStart,__int64 &selEnd, bool cap) {
void AudioDisplay::GetKaraokePos(__int64 &karStart,__int64 &karEnd, bool cap) { void AudioDisplay::GetKaraokePos(__int64 &karStart,__int64 &karEnd, bool cap) {
try { try {
// Wrap around // Wrap around
int nsyls = karaoke->syllables.size(); int nsyls = (int)karaoke->syllables.size();
if (karaoke->curSyllable == -1) { if (karaoke->curSyllable == -1) {
karaoke->SetSyllable(nsyls-1); karaoke->SetSyllable(nsyls-1);
} }
@ -1178,7 +1246,7 @@ void AudioDisplay::SetDialogue(SubtitlesGrid *_grid,AssDialogue *diag,int n) {
NeedCommit = karaoke->LoadFromDialogue(dialogue); NeedCommit = karaoke->LoadFromDialogue(dialogue);
// Reset karaoke pos // Reset karaoke pos
if (karaoke->curSyllable == -1) karaoke->SetSyllable(karaoke->syllables.size()-1); if (karaoke->curSyllable == -1) karaoke->SetSyllable((int)karaoke->syllables.size()-1);
else karaoke->SetSyllable(0); else karaoke->SetSyllable(0);
} }
@ -1220,7 +1288,7 @@ void AudioDisplay::CommitChanges () {
// Update dialogues // Update dialogues
blockUpdate = true; blockUpdate = true;
wxArrayInt sel = grid->GetSelection(); wxArrayInt sel = grid->GetSelection();
int sels = sel.Count(); int sels = (int)sel.Count();
AssDialogue *curDiag; AssDialogue *curDiag;
for (int i=-1;i<sels;i++) { for (int i=-1;i<sels;i++) {
if (i == -1) curDiag = dialogue; if (i == -1) curDiag = dialogue;
@ -1644,7 +1712,7 @@ void AudioDisplay::OnMouseEvent(wxMouseEvent& event) {
defCursor = false; defCursor = false;
if (event.LeftIsDown()) { if (event.LeftIsDown()) {
hold = 4; hold = 4;
holdSyl = i; holdSyl = (int)i;
gotGrab = true; gotGrab = true;
} }
break; break;
@ -1724,7 +1792,7 @@ void AudioDisplay::OnMouseEvent(wxMouseEvent& event) {
int curpos,len,pos,nkar; int curpos,len,pos,nkar;
KaraokeSyllable *curSyl=NULL,*nextSyl=NULL; KaraokeSyllable *curSyl=NULL,*nextSyl=NULL;
curSyl = &karaoke->syllables.at(holdSyl); curSyl = &karaoke->syllables.at(holdSyl);
nkar = karaoke->syllables.size(); nkar = (int)karaoke->syllables.size();
if (holdSyl < nkar-1) { if (holdSyl < nkar-1) {
nextSyl = &karaoke->syllables.at(holdSyl+1); nextSyl = &karaoke->syllables.at(holdSyl+1);
} }
@ -2096,7 +2164,7 @@ void AudioDisplay::Next() {
CommitChanges(); CommitChanges();
} }
else if (result == wxCANCEL) { else if (result == wxCANCEL) {
karaoke->curSyllable = karaoke->syllables.size()-1; karaoke->curSyllable = (int)karaoke->syllables.size()-1;
return; return;
} }
} }
@ -2177,7 +2245,7 @@ int AudioDisplay::GetSyllableAtX(int x) {
sylstart = karaoke->syllables.at(i).position*10 + curStartMS; sylstart = karaoke->syllables.at(i).position*10 + curStartMS;
sylend = karaoke->syllables.at(i).length*10 + sylstart; sylend = karaoke->syllables.at(i).length*10 + sylstart;
if (ms >= sylstart && ms < sylend) { if (ms >= sylstart && ms < sylend) {
return i; return (int)i;
} }
} }
return -1; return -1;

View file

@ -5,8 +5,11 @@ Please visit http://aegisub.net to download latest version
- Support reading SSA/ASS files with intermixed V4 and V4+ Styles sections (jfs) - Support reading SSA/ASS files with intermixed V4 and V4+ Styles sections (jfs)
- Fixed loading of sections with unexpected cases. (AMZ) - Fixed loading of sections with unexpected cases. (AMZ)
- Made Audio Spectrum calculation/drawing code use multithreading, resulting in a significant speedup on SMP (eg. dual-core) systems (jfs) - Changes to Audio Spectrum: (jfs)
- Changed palette for audio spectrum (jfs) o Calculation/drawing code is now multithreaded, meaning it runs faster on SMP (eg. dual-core) systems
o The palette is changed
o Rendering now more accurately represents actual signal power
o Use vertical zoom slider to amplify/dampen displayed signal strength (useful for better visualisation of quiet sections, or easier picking out the dominating frequencies in noisy sections)
= 1.10 beta - 2006.08.07 =========================== = 1.10 beta - 2006.08.07 ===========================