forked from mia/Aegisub
Back to classic pixmix/pixmix2 architecture. Probably made some parts clearer. More and more correct documentation of the Draw code. Probably more correct (at least not less correct, it seems) alpha calculation.
Originally committed to SVN as r1543.
This commit is contained in:
parent
35f2b7caa2
commit
268b6c45af
1 changed files with 61 additions and 56 deletions
|
@ -773,8 +773,21 @@ bool Rasterizer::Rasterize(int xsub, int ysub, bool fBlur)
|
||||||
|
|
||||||
static __forceinline void pixmix(DWORD *dst, DWORD color, DWORD alpha)
|
static __forceinline void pixmix(DWORD *dst, DWORD color, DWORD alpha)
|
||||||
{
|
{
|
||||||
int a = (((alpha)*(color>>24))>>12)&0xff;
|
int a = (((alpha)*(color>>24))>>6)&0xff;
|
||||||
|
// Make sure both a and ia are in range 1..256 for the >>8 operations below to be correct
|
||||||
int ia = 256-a;
|
int ia = 256-a;
|
||||||
|
a+=1;
|
||||||
|
|
||||||
|
*dst = ((((*dst&0x00ff00ff)*ia + (color&0x00ff00ff)*a)&0xff00ff00)>>8)
|
||||||
|
| ((((*dst&0x0000ff00)*ia + (color&0x0000ff00)*a)&0x00ff0000)>>8)
|
||||||
|
| ((((*dst>>8)&0x00ff0000)*ia)&0xff000000);
|
||||||
|
}
|
||||||
|
|
||||||
|
static __forceinline void pixmix2(DWORD *dst, DWORD color, DWORD shapealpha, DWORD clipalpha)
|
||||||
|
{
|
||||||
|
int a = (((shapealpha)*(clipalpha)*(color>>24))>>12)&0xff;
|
||||||
|
int ia = 256-a;
|
||||||
|
a+=1;
|
||||||
|
|
||||||
*dst = ((((*dst&0x00ff00ff)*ia + (color&0x00ff00ff)*a)&0xff00ff00)>>8)
|
*dst = ((((*dst&0x00ff00ff)*ia + (color&0x00ff00ff)*a)&0xff00ff00)>>8)
|
||||||
| ((((*dst&0x0000ff00)*ia + (color&0x0000ff00)*a)&0x00ff0000)>>8)
|
| ((((*dst&0x0000ff00)*ia + (color&0x0000ff00)*a)&0x00ff0000)>>8)
|
||||||
|
@ -786,11 +799,30 @@ static __forceinline void pixmix(DWORD *dst, DWORD color, DWORD alpha)
|
||||||
|
|
||||||
static __forceinline void pixmix_sse2(DWORD* dst, DWORD color, DWORD alpha)
|
static __forceinline void pixmix_sse2(DWORD* dst, DWORD color, DWORD alpha)
|
||||||
{
|
{
|
||||||
alpha = ((alpha * (color>>24)) >> 12) & 0xff;
|
alpha = (((alpha) * (color>>24)) >> 6) & 0xff;
|
||||||
color &= 0xffffff;
|
color &= 0xffffff;
|
||||||
|
|
||||||
__m128i zero = _mm_setzero_si128();
|
__m128i zero = _mm_setzero_si128();
|
||||||
__m128i a = _mm_set1_epi32((alpha << 16) | (0x100 - alpha));
|
__m128i a = _mm_set1_epi32(((alpha+1) << 16) | (0x100 - alpha));
|
||||||
|
__m128i d = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*dst), zero);
|
||||||
|
__m128i s = _mm_unpacklo_epi8(_mm_cvtsi32_si128(color), zero);
|
||||||
|
__m128i r = _mm_unpacklo_epi16(d, s);
|
||||||
|
|
||||||
|
r = _mm_madd_epi16(r, a);
|
||||||
|
r = _mm_srli_epi32(r, 8);
|
||||||
|
r = _mm_packs_epi32(r, r);
|
||||||
|
r = _mm_packus_epi16(r, r);
|
||||||
|
|
||||||
|
*dst = (DWORD)_mm_cvtsi128_si32(r);
|
||||||
|
}
|
||||||
|
|
||||||
|
static __forceinline void pixmix2_sse2(DWORD* dst, DWORD color, DWORD shapealpha, DWORD clipalpha)
|
||||||
|
{
|
||||||
|
int alpha = (((shapealpha)*(clipalpha)*(color>>24))>>12)&0xff;
|
||||||
|
color &= 0xffffff;
|
||||||
|
|
||||||
|
__m128i zero = _mm_setzero_si128();
|
||||||
|
__m128i a = _mm_set1_epi32(((alpha+1) << 16) | (0x100 - alpha));
|
||||||
__m128i d = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*dst), zero);
|
__m128i d = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*dst), zero);
|
||||||
__m128i s = _mm_unpacklo_epi8(_mm_cvtsi32_si128(color), zero);
|
__m128i s = _mm_unpacklo_epi8(_mm_cvtsi32_si128(color), zero);
|
||||||
__m128i r = _mm_unpacklo_epi16(d, s);
|
__m128i r = _mm_unpacklo_epi16(d, s);
|
||||||
|
@ -813,7 +845,8 @@ static const __int64 _00ff00ff00ff00ff = 0x00ff00ff00ff00ffi64;
|
||||||
// clipRect is a rectangular clip region to render inside.
|
// clipRect is a rectangular clip region to render inside.
|
||||||
// pAlphaMask is an alpha clipping mask.
|
// pAlphaMask is an alpha clipping mask.
|
||||||
// xsub and ysub ???
|
// xsub and ysub ???
|
||||||
// switchpts seems to be an array of interlaced colour switching coordinates/colours to switch to.
|
// switchpts seems to be an array of fill colours interlaced with coordinates.
|
||||||
|
// switchpts[i*2] contains a colour and switchpts[i*2+1] contains the coordinate to use that colour from
|
||||||
// fBody tells whether to render the body of the subs.
|
// fBody tells whether to render the body of the subs.
|
||||||
// fBorder tells whether to render the border of the subs.
|
// fBorder tells whether to render the border of the subs.
|
||||||
CRect Rasterizer::Draw(SubPicDesc& spd, CRect& clipRect, byte* pAlphaMask, int xsub, int ysub, const long* switchpts, bool fBody, bool fBorder)
|
CRect Rasterizer::Draw(SubPicDesc& spd, CRect& clipRect, byte* pAlphaMask, int xsub, int ysub, const long* switchpts, bool fBody, bool fBorder)
|
||||||
|
@ -853,13 +886,16 @@ CRect Rasterizer::Draw(SubPicDesc& spd, CRect& clipRect, byte* pAlphaMask, int x
|
||||||
|
|
||||||
// The alpha bitmap of the subtitles?
|
// The alpha bitmap of the subtitles?
|
||||||
const byte* src = mpOverlayBuffer + 2*(mOverlayWidth * yo + xo);
|
const byte* src = mpOverlayBuffer + 2*(mOverlayWidth * yo + xo);
|
||||||
|
// s points to what the "body" to use is
|
||||||
|
// If we're rendering body fill and border, src+1 points to the array of
|
||||||
|
// widened regions which contain both border and fill in one.
|
||||||
const byte* s = fBorder ? (src+1) : src;
|
const byte* s = fBorder ? (src+1) : src;
|
||||||
// The complex "vector clip mask" I think.
|
// The complex "vector clip mask" I think.
|
||||||
const byte* am = pAlphaMask + spd.w * y + x;
|
const byte* am = pAlphaMask + spd.w * y + x;
|
||||||
// How would this differ from src?
|
// How would this differ from src?
|
||||||
unsigned long* dst = (unsigned long *)((char *)spd.bits + spd.pitch * y) + x;
|
unsigned long* dst = (unsigned long *)((char *)spd.bits + spd.pitch * y) + x;
|
||||||
|
|
||||||
// ??? What is switchpts ?
|
// Grab the first colour
|
||||||
unsigned long color = switchpts[0];
|
unsigned long color = switchpts[0];
|
||||||
|
|
||||||
// CPUID from VDub
|
// CPUID from VDub
|
||||||
|
@ -871,23 +907,24 @@ CRect Rasterizer::Draw(SubPicDesc& spd, CRect& clipRect, byte* pAlphaMask, int x
|
||||||
// Basic case of no complex clipping mask
|
// Basic case of no complex clipping mask
|
||||||
if(!pAlphaMask)
|
if(!pAlphaMask)
|
||||||
{
|
{
|
||||||
// Again, what is switchpts?
|
// If the first colour switching coordinate is at "infinite" we're
|
||||||
|
// never switching and can use some simpler code.
|
||||||
|
// ??? Is this optimisation really worth the extra readability issues it adds?
|
||||||
if(switchpts[1] == 0xffffffff)
|
if(switchpts[1] == 0xffffffff)
|
||||||
{
|
{
|
||||||
// Are we rendering the fill or a border/shadow? I think...
|
// fBody is true if we're rendering a fill or a shadow.
|
||||||
if(fBody)
|
if(fBody)
|
||||||
{
|
{
|
||||||
// Run over every pixel, overlaying the subtitles with the fill colour
|
// Run over every pixel, overlaying the subtitles with the fill colour
|
||||||
if(fSSE2)
|
if(fSSE2)
|
||||||
for(int wt=0; wt<w; ++wt)
|
for(int wt=0; wt<w; ++wt)
|
||||||
// Why s[wt*2] and not s[wt] ?
|
|
||||||
// The <<6 is due to pixmix expecting the alpha parameter to be
|
// The <<6 is due to pixmix expecting the alpha parameter to be
|
||||||
// the multiplication of two 6-bit unsigned numbers but we
|
// the multiplication of two 6-bit unsigned numbers but we
|
||||||
// only have one here. (No alpha mask.)
|
// only have one here. (No alpha mask.)
|
||||||
pixmix_sse2(&dst[wt], color, s[wt*2]<<6);
|
pixmix_sse2(&dst[wt], color, s[wt*2]);
|
||||||
else
|
else
|
||||||
for(int wt=0; wt<w; ++wt)
|
for(int wt=0; wt<w; ++wt)
|
||||||
pixmix(&dst[wt], color, s[wt*2]<<6);
|
pixmix(&dst[wt], color, s[wt*2]);
|
||||||
}
|
}
|
||||||
// Not body, ie. something else (border, shadow, I guess)
|
// Not body, ie. something else (border, shadow, I guess)
|
||||||
else
|
else
|
||||||
|
@ -902,10 +939,10 @@ CRect Rasterizer::Draw(SubPicDesc& spd, CRect& clipRect, byte* pAlphaMask, int x
|
||||||
// created by CreateWidenedRegion, and thus contains
|
// created by CreateWidenedRegion, and thus contains
|
||||||
// both the fill and the border, so subtracting the fill
|
// both the fill and the border, so subtracting the fill
|
||||||
// from that is always safe.
|
// from that is always safe.
|
||||||
pixmix_sse2(&dst[wt], color, (src[wt*2+1] - src[wt*2])<<6);
|
pixmix_sse2(&dst[wt], color, src[wt*2+1] - src[wt*2]);
|
||||||
else
|
else
|
||||||
for(int wt=0; wt<w; ++wt)
|
for(int wt=0; wt<w; ++wt)
|
||||||
pixmix(&dst[wt], color, (src[wt*2+1] - src[wt*2])<<6);
|
pixmix(&dst[wt], color, src[wt*2+1] - src[wt*2]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// not (switchpts[1] == 0xffffffff)
|
// not (switchpts[1] == 0xffffffff)
|
||||||
|
@ -923,13 +960,13 @@ CRect Rasterizer::Draw(SubPicDesc& spd, CRect& clipRect, byte* pAlphaMask, int x
|
||||||
// So if we have passed the switchpoint (?) switch to another colour
|
// So if we have passed the switchpoint (?) switch to another colour
|
||||||
// (So switchpts stores both colours *and* coordinates?)
|
// (So switchpts stores both colours *and* coordinates?)
|
||||||
if(wt+xo >= sw[1]) {while(wt+xo >= sw[1]) sw += 2; color = sw[-2];}
|
if(wt+xo >= sw[1]) {while(wt+xo >= sw[1]) sw += 2; color = sw[-2];}
|
||||||
pixmix_sse2(&dst[wt], color, s[wt*2]<<6);
|
pixmix_sse2(&dst[wt], color, s[wt*2]);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
for(int wt=0; wt<w; ++wt)
|
for(int wt=0; wt<w; ++wt)
|
||||||
{
|
{
|
||||||
if(wt+xo >= sw[1]) {while(wt+xo >= sw[1]) sw += 2; color = sw[-2];}
|
if(wt+xo >= sw[1]) {while(wt+xo >= sw[1]) sw += 2; color = sw[-2];}
|
||||||
pixmix(&dst[wt], color, s[wt*2]<<6);
|
pixmix(&dst[wt], color, s[wt*2]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// Not body
|
// Not body
|
||||||
|
@ -939,13 +976,13 @@ CRect Rasterizer::Draw(SubPicDesc& spd, CRect& clipRect, byte* pAlphaMask, int x
|
||||||
for(int wt=0; wt<w; ++wt)
|
for(int wt=0; wt<w; ++wt)
|
||||||
{
|
{
|
||||||
if(wt+xo >= sw[1]) {while(wt+xo >= sw[1]) sw += 2; color = sw[-2];}
|
if(wt+xo >= sw[1]) {while(wt+xo >= sw[1]) sw += 2; color = sw[-2];}
|
||||||
pixmix_sse2(&dst[wt], color, (src[wt*2+1] - src[wt*2])<<6);
|
pixmix_sse2(&dst[wt], color, src[wt*2+1] - src[wt*2]);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
for(int wt=0; wt<w; ++wt)
|
for(int wt=0; wt<w; ++wt)
|
||||||
{
|
{
|
||||||
if(wt+xo >= sw[1]) {while(wt+xo >= sw[1]) sw += 2; color = sw[-2];}
|
if(wt+xo >= sw[1]) {while(wt+xo >= sw[1]) sw += 2; color = sw[-2];}
|
||||||
pixmix(&dst[wt], color, (src[wt*2+1] - src[wt*2])<<6);
|
pixmix(&dst[wt], color, src[wt*2+1] - src[wt*2]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -957,12 +994,6 @@ CRect Rasterizer::Draw(SubPicDesc& spd, CRect& clipRect, byte* pAlphaMask, int x
|
||||||
{
|
{
|
||||||
if(fBody)
|
if(fBody)
|
||||||
{
|
{
|
||||||
/*const byte* s = fBorder?(src+1):src;
|
|
||||||
|
|
||||||
for(int wt=0; wt<w; ++wt)
|
|
||||||
{
|
|
||||||
pixmix2(s[wt*2]);
|
|
||||||
}*/
|
|
||||||
if(fSSE2)
|
if(fSSE2)
|
||||||
for(int wt=0; wt<w; ++wt)
|
for(int wt=0; wt<w; ++wt)
|
||||||
// Both s and am contain 6-bit bitmaps of two different
|
// Both s and am contain 6-bit bitmaps of two different
|
||||||
|
@ -970,23 +1001,19 @@ CRect Rasterizer::Draw(SubPicDesc& spd, CRect& clipRect, byte* pAlphaMask, int x
|
||||||
// clipping mask.
|
// clipping mask.
|
||||||
// Multiplying them together yields a 12-bit number.
|
// Multiplying them together yields a 12-bit number.
|
||||||
// I think some imprecision is introduced here??
|
// I think some imprecision is introduced here??
|
||||||
pixmix_sse2(&dst[wt], color, s[wt*2] * am[wt]);
|
pixmix2_sse2(&dst[wt], color, s[wt*2], am[wt]);
|
||||||
else
|
else
|
||||||
for(int wt=0; wt<w; ++wt)
|
for(int wt=0; wt<w; ++wt)
|
||||||
pixmix(&dst[wt], color, s[wt*2] * am[wt]);
|
pixmix2(&dst[wt], color, s[wt*2], am[wt]);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
/*for(int wt=0; wt<w; ++wt)
|
|
||||||
{
|
|
||||||
pixmix2(src[wt*2+1]-src[wt*2]);
|
|
||||||
}*/
|
|
||||||
if(fSSE2)
|
if(fSSE2)
|
||||||
for(int wt=0; wt<w; ++wt)
|
for(int wt=0; wt<w; ++wt)
|
||||||
pixmix_sse2(&dst[wt], color, (src[wt*2+1] - src[wt*2]) * am[wt]);
|
pixmix2_sse2(&dst[wt], color, src[wt*2+1] - src[wt*2], am[wt]);
|
||||||
else
|
else
|
||||||
for(int wt=0; wt<w; ++wt)
|
for(int wt=0; wt<w; ++wt)
|
||||||
pixmix(&dst[wt], color, (src[wt*2+1] - src[wt*2]) * am[wt]);
|
pixmix2(&dst[wt], color, src[wt*2+1] - src[wt*2], am[wt]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
@ -995,18 +1022,6 @@ CRect Rasterizer::Draw(SubPicDesc& spd, CRect& clipRect, byte* pAlphaMask, int x
|
||||||
|
|
||||||
if(fBody)
|
if(fBody)
|
||||||
{
|
{
|
||||||
/*const byte* s = fBorder?(src+1):src;
|
|
||||||
|
|
||||||
for(int wt=0; wt<w; ++wt)
|
|
||||||
{
|
|
||||||
if(wt+xo >= sw[1])
|
|
||||||
{
|
|
||||||
while(wt+xo >= sw[1]) sw += 2;
|
|
||||||
color = sw[-2];
|
|
||||||
}
|
|
||||||
|
|
||||||
pixmix2(s[wt*2]);
|
|
||||||
}*/
|
|
||||||
if(fSSE2)
|
if(fSSE2)
|
||||||
for(int wt=0; wt<w; ++wt)
|
for(int wt=0; wt<w; ++wt)
|
||||||
{
|
{
|
||||||
|
@ -1014,7 +1029,7 @@ CRect Rasterizer::Draw(SubPicDesc& spd, CRect& clipRect, byte* pAlphaMask, int x
|
||||||
while(wt+xo >= sw[1])
|
while(wt+xo >= sw[1])
|
||||||
sw += 2; color = sw[-2];
|
sw += 2; color = sw[-2];
|
||||||
}
|
}
|
||||||
pixmix_sse2(&dst[wt], color, s[wt*2] * am[wt]);
|
pixmix2_sse2(&dst[wt], color, s[wt*2], am[wt]);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
for(int wt=0; wt<w; ++wt)
|
for(int wt=0; wt<w; ++wt)
|
||||||
|
@ -1023,21 +1038,11 @@ CRect Rasterizer::Draw(SubPicDesc& spd, CRect& clipRect, byte* pAlphaMask, int x
|
||||||
while(wt+xo >= sw[1])
|
while(wt+xo >= sw[1])
|
||||||
sw += 2; color = sw[-2];
|
sw += 2; color = sw[-2];
|
||||||
}
|
}
|
||||||
pixmix(&dst[wt], color, s[wt*2] * am[wt]);
|
pixmix2(&dst[wt], color, s[wt*2], am[wt]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
/*for(int wt=0; wt<w; ++wt)
|
|
||||||
{
|
|
||||||
if(wt+xo >= sw[1])
|
|
||||||
{
|
|
||||||
while(wt+xo >= sw[1]) sw += 2;
|
|
||||||
color = sw[-2];
|
|
||||||
}
|
|
||||||
|
|
||||||
pixmix2(src[wt*2+1]-src[wt*2]);
|
|
||||||
}*/
|
|
||||||
if(fSSE2)
|
if(fSSE2)
|
||||||
for(int wt=0; wt<w; ++wt)
|
for(int wt=0; wt<w; ++wt)
|
||||||
{
|
{
|
||||||
|
@ -1045,7 +1050,7 @@ CRect Rasterizer::Draw(SubPicDesc& spd, CRect& clipRect, byte* pAlphaMask, int x
|
||||||
while(wt+xo >= sw[1])
|
while(wt+xo >= sw[1])
|
||||||
sw += 2; color = sw[-2];
|
sw += 2; color = sw[-2];
|
||||||
}
|
}
|
||||||
pixmix_sse2(&dst[wt], color, (src[wt*2+1] - src[wt*2]) * am[wt]);
|
pixmix2_sse2(&dst[wt], color, src[wt*2+1] - src[wt*2], am[wt]);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
for(int wt=0; wt<w; ++wt)
|
for(int wt=0; wt<w; ++wt)
|
||||||
|
@ -1054,7 +1059,7 @@ CRect Rasterizer::Draw(SubPicDesc& spd, CRect& clipRect, byte* pAlphaMask, int x
|
||||||
while(wt+xo >= sw[1])
|
while(wt+xo >= sw[1])
|
||||||
sw += 2; color = sw[-2];
|
sw += 2; color = sw[-2];
|
||||||
}
|
}
|
||||||
pixmix(&dst[wt], color, (src[wt*2+1] - src[wt*2]) * am[wt]);
|
pixmix2(&dst[wt], color, src[wt*2+1] - src[wt*2], am[wt]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue