From 316f1afb6e0a11d83dd461018fc82aac0859f9b8 Mon Sep 17 00:00:00 2001 From: Niels Martin Hansen Date: Thu, 24 Jul 2008 00:10:43 +0000 Subject: [PATCH] Optimise safe_subtract to avoid conditionals. Originally committed to SVN as r2283. --- vsfilter/subtitles/Rasterizer.cpp | 35 ++++++++++++++++++++----------- 1 file changed, 23 insertions(+), 12 deletions(-) diff --git a/vsfilter/subtitles/Rasterizer.cpp b/vsfilter/subtitles/Rasterizer.cpp index cc5270e71..7329c5ef6 100644 --- a/vsfilter/subtitles/Rasterizer.cpp +++ b/vsfilter/subtitles/Rasterizer.cpp @@ -861,11 +861,18 @@ static __forceinline void pixmix2_sse2(DWORD* dst, DWORD color, DWORD shapealpha *dst = (DWORD)_mm_cvtsi128_si32(r); } +#include + // Calculate a-b but without risk of underflow -template -static __forceinline T safe_subtract(T a, T b) +static __forceinline DWORD safe_subtract(DWORD a, DWORD b) { - return (b > a) ? 0 : a - b; + // What a waste of bits... + __m64 ap = _mm_cvtsi32_si64(a); + __m64 bp = _mm_cvtsi32_si64(b); + __m64 rp = _mm_subs_pu16(ap, bp); + return (DWORD)_mm_cvtsi64_si32(rp); + // Don't need an EMMS because nothing in Draw() depends on FPU + // and we EMMS at the end of Draw(). } // For CPUID usage in Rasterizer::Draw @@ -959,19 +966,19 @@ CRect Rasterizer::Draw(SubPicDesc& spd, CRect& clipRect, byte* pAlphaMask, int x for(int wt=0; wt