264 lines
6.9 KiB
NASM
264 lines
6.9 KiB
NASM
|
; Avisynth v2.5. Copyright 2002 Ben Rudiak-Gould et al.
|
||
|
; http://www.avisynth.org
|
||
|
;
|
||
|
; This program is free software; you can redistribute it and/or modify
|
||
|
; it under the terms of the GNU General Public License as published by
|
||
|
; the Free Software Foundation; either version 2 of the License, or
|
||
|
; (at your option) any later version.
|
||
|
;
|
||
|
; This program is distributed in the hope that it will be useful,
|
||
|
; but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
|
; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||
|
; GNU General Public License for more details.
|
||
|
;
|
||
|
; You should have received a copy of the GNU General Public License
|
||
|
; along with this program; if not, write to the Free Software
|
||
|
; Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA, or visit
|
||
|
; http://www.gnu.org/copyleft/gpl.html .
|
||
|
;
|
||
|
; Linking Avisynth statically or dynamically with other modules is making a
|
||
|
; combined work based on Avisynth. Thus, the terms and conditions of the GNU
|
||
|
; General Public License cover the whole combination.
|
||
|
;
|
||
|
; As a special exception, the copyright holders of Avisynth give you
|
||
|
; permission to link Avisynth with independent modules that communicate with
|
||
|
; Avisynth solely through the interfaces defined in avisynth.h, regardless of the license
|
||
|
; terms of these independent modules, and to copy and distribute the
|
||
|
; resulting combined work under terms of your choice, provided that
|
||
|
; every copy of the combined work is accompanied by a complete copy of
|
||
|
; the source code of Avisynth (the version of Avisynth used to produce the
|
||
|
; combined work), being distributed under the terms of the GNU General
|
||
|
; Public License plus this exception. An independent module is a module
|
||
|
; which is not derived from or based on Avisynth, such as 3rd-party filters,
|
||
|
; import and export plugins, or graphical user interfaces.
|
||
|
|
||
|
.586
|
||
|
.mmx
|
||
|
.model flat
|
||
|
|
||
|
; alignment has to be 'page' so that I can use 'align 32' below
|
||
|
|
||
|
_TEXT64 segment page public use32 'CODE'
|
||
|
|
||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||
|
|
||
|
align 8
|
||
|
|
||
|
yuv2rgb_constants:
|
||
|
|
||
|
x0000_0000_0010_0010 dq 00000000000100010h
|
||
|
x0080_0080_0080_0080 dq 00080008000800080h
|
||
|
x00FF_00FF_00FF_00FF dq 000FF00FF00FF00FFh
|
||
|
x00002000_00002000 dq 00000200000002000h
|
||
|
xFF000000_FF000000 dq 0FF000000FF000000h
|
||
|
cy dq 000004A8500004A85h
|
||
|
crv dq 03313000033130000h
|
||
|
cgu_cgv dq 0E5FCF377E5FCF377h
|
||
|
cbu dq 00000408D0000408Dh
|
||
|
|
||
|
yuv2rgb_constants_rec709:
|
||
|
|
||
|
dq 00000000000100010h
|
||
|
dq 00080008000800080h
|
||
|
dq 000FF00FF00FF00FFh
|
||
|
dq 00000200000002000h
|
||
|
dq 0FF000000FF000000h
|
||
|
dq 000004A8500004A85h
|
||
|
dq 03960000039600000h
|
||
|
dq 0EEF5F930EEF5F930h
|
||
|
dq 00000439B0000439Bh
|
||
|
|
||
|
ofs_x0000_0000_0010_0010 = 0
|
||
|
ofs_x0080_0080_0080_0080 = 8
|
||
|
ofs_x00FF_00FF_00FF_00FF = 16
|
||
|
ofs_x00002000_00002000 = 24
|
||
|
ofs_xFF000000_FF000000 = 32
|
||
|
ofs_cy = 40
|
||
|
ofs_crv = 48
|
||
|
ofs_cgu_cgv = 56
|
||
|
ofs_cbu = 64
|
||
|
|
||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||
|
|
||
|
GET_Y MACRO mma,uyvy
|
||
|
IF &uyvy
|
||
|
psrlw mma,8
|
||
|
ELSE
|
||
|
pand mma,[edx+ofs_x00FF_00FF_00FF_00FF]
|
||
|
ENDIF
|
||
|
ENDM
|
||
|
|
||
|
GET_UV MACRO mma,uyvy
|
||
|
GET_Y mma,1-uyvy
|
||
|
ENDM
|
||
|
|
||
|
YUV2RGB_INNER_LOOP MACRO uyvy,rgb32,no_next_pixel
|
||
|
|
||
|
;; This YUV422->RGB conversion code uses only four MMX registers per
|
||
|
;; source dword, so I convert two dwords in parallel. Lines corresponding
|
||
|
;; to the "second pipe" are indented an extra space. There's almost no
|
||
|
;; overlap, except at the end and in the three lines marked ***.
|
||
|
;; revised 4july,2002 to properly set alpha in rgb32 to default "on" & other small memory optimizations
|
||
|
|
||
|
movd mm0, dword ptr [esi]
|
||
|
movd mm5, dword ptr [esi+4]
|
||
|
movq mm1,mm0
|
||
|
GET_Y mm0,&uyvy ; mm0 = __________Y1__Y0
|
||
|
movq mm4,mm5
|
||
|
GET_UV mm1,&uyvy ; mm1 = __________V0__U0
|
||
|
GET_Y mm4,&uyvy
|
||
|
movq mm2,mm5 ; *** avoid reload from [esi+4]
|
||
|
GET_UV mm5,&uyvy
|
||
|
psubw mm0, qword ptr [edx+ofs_x0000_0000_0010_0010]
|
||
|
movd mm6, dword ptr [esi+8-4*(no_next_pixel)]
|
||
|
GET_UV mm2,&uyvy ; mm2 = __________V2__U2
|
||
|
psubw mm4, qword ptr [edx+ofs_x0000_0000_0010_0010]
|
||
|
paddw mm2,mm1
|
||
|
GET_UV mm6,&uyvy
|
||
|
psubw mm1, qword ptr [edx+ofs_x0080_0080_0080_0080]
|
||
|
paddw mm6,mm5
|
||
|
psllq mm2,32
|
||
|
psubw mm5, qword ptr [edx+ofs_x0080_0080_0080_0080]
|
||
|
punpcklwd mm0,mm2 ; mm0 = ______Y1______Y0
|
||
|
psllq mm6,32
|
||
|
pmaddwd mm0, qword ptr [edx+ofs_cy]
|
||
|
punpcklwd mm4,mm6
|
||
|
paddw mm1,mm1
|
||
|
pmaddwd mm4, qword ptr [edx+ofs_cy]
|
||
|
paddw mm5,mm5
|
||
|
paddw mm1,mm2 ; mm1 = __V1__U1__V0__U0 * 2
|
||
|
paddd mm0,[edx+ofs_x00002000_00002000]
|
||
|
paddw mm5,mm6
|
||
|
movq mm2,mm1
|
||
|
paddd mm4,[edx+ofs_x00002000_00002000]
|
||
|
movq mm3,mm1
|
||
|
movq mm6,mm5
|
||
|
pmaddwd mm1,[edx+ofs_crv]
|
||
|
movq mm7,mm5
|
||
|
paddd mm1,mm0
|
||
|
pmaddwd mm5,[edx+ofs_crv]
|
||
|
psrad mm1,14 ; mm1 = RRRRRRRRrrrrrrrr
|
||
|
paddd mm5,mm4
|
||
|
pmaddwd mm2,[edx+ofs_cgu_cgv]
|
||
|
psrad mm5,14
|
||
|
paddd mm2,mm0
|
||
|
pmaddwd mm6,[edx+ofs_cgu_cgv]
|
||
|
psrad mm2,14 ; mm2 = GGGGGGGGgggggggg
|
||
|
paddd mm6,mm4
|
||
|
pmaddwd mm3,[edx+ofs_cbu]
|
||
|
psrad mm6,14
|
||
|
paddd mm3,mm0
|
||
|
pmaddwd mm7,[edx+ofs_cbu]
|
||
|
add esi,8
|
||
|
add edi,12+4*rgb32
|
||
|
IFE &no_next_pixel
|
||
|
cmp esi,ecx
|
||
|
ENDIF
|
||
|
psrad mm3,14 ; mm3 = BBBBBBBBbbbbbbbb
|
||
|
paddd mm7,mm4
|
||
|
pxor mm0,mm0
|
||
|
psrad mm7,14
|
||
|
packssdw mm3,mm2 ; mm3 = GGGGggggBBBBbbbb
|
||
|
packssdw mm7,mm6
|
||
|
packssdw mm1,mm0 ; mm1 = ________RRRRrrrr
|
||
|
packssdw mm5,mm0 ; *** avoid pxor mm4,mm4
|
||
|
movq mm2,mm3
|
||
|
movq mm6,mm7
|
||
|
punpcklwd mm2,mm1 ; mm2 = RRRRBBBBrrrrbbbb
|
||
|
punpcklwd mm6,mm5
|
||
|
punpckhwd mm3,mm1 ; mm3 = ____GGGG____gggg
|
||
|
punpckhwd mm7,mm5
|
||
|
movq mm0,mm2
|
||
|
movq mm4,mm6
|
||
|
punpcklwd mm0,mm3 ; mm0 = ____rrrrggggbbbb
|
||
|
punpcklwd mm4,mm7
|
||
|
IFE &rgb32
|
||
|
psllq mm0,16
|
||
|
psllq mm4,16
|
||
|
ENDIF
|
||
|
punpckhwd mm2,mm3 ; mm2 = ____RRRRGGGGBBBB
|
||
|
punpckhwd mm6,mm7
|
||
|
packuswb mm0,mm2 ; mm0 = __RRGGBB__rrggbb <- ta dah!
|
||
|
packuswb mm4,mm6
|
||
|
|
||
|
IF &rgb32
|
||
|
por mm0, [edx+ofs_xFF000000_FF000000] ; set alpha channels "on"
|
||
|
por mm4, [edx+ofs_xFF000000_FF000000]
|
||
|
movq [edi-16],mm0 ; store the quadwords independently
|
||
|
movq [edi-8],mm4
|
||
|
ELSE
|
||
|
psrlq mm0,8 ; pack the two quadwords into 12 bytes
|
||
|
psllq mm4,8 ; (note: the two shifts above leave
|
||
|
movd dword ptr [edi-12],mm0 ; mm0,4 = __RRGGBBrrggbb__)
|
||
|
psrlq mm0,32
|
||
|
por mm4,mm0
|
||
|
movd dword ptr [edi-8],mm4
|
||
|
psrlq mm4,32
|
||
|
movd dword ptr [edi-4],mm4
|
||
|
ENDIF
|
||
|
|
||
|
ENDM
|
||
|
|
||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||
|
|
||
|
YUV2RGB_PROC MACRO procname,uyvy,rgb32
|
||
|
|
||
|
PUBLIC C _&procname
|
||
|
|
||
|
;;void __cdecl procname(
|
||
|
;; [esp+ 4] const BYTE* src,
|
||
|
;; [esp+ 8] BYTE* dst,
|
||
|
;; [esp+12] const BYTE* src_end,
|
||
|
;; [esp+16] int src_pitch,
|
||
|
;; [esp+20] int row_size,
|
||
|
;; [esp+24] bool rec709);
|
||
|
|
||
|
_&procname PROC
|
||
|
|
||
|
push esi
|
||
|
push edi
|
||
|
push ebx
|
||
|
|
||
|
mov eax,[esp+16+12]
|
||
|
mov esi,[esp+12+12] ; read source bottom-up
|
||
|
mov edi,[esp+8+12]
|
||
|
mov ebx,[esp+20+12]
|
||
|
mov edx,offset yuv2rgb_constants
|
||
|
test byte ptr [esp+24+12],1
|
||
|
jz loop0
|
||
|
mov edx,offset yuv2rgb_constants_rec709
|
||
|
|
||
|
loop0:
|
||
|
sub esi,eax
|
||
|
lea ecx,[esi+ebx-8]
|
||
|
|
||
|
align 32
|
||
|
loop1:
|
||
|
YUV2RGB_INNER_LOOP uyvy,rgb32,0
|
||
|
jb loop1
|
||
|
|
||
|
YUV2RGB_INNER_LOOP uyvy,rgb32,1
|
||
|
|
||
|
sub esi,ebx
|
||
|
cmp esi,[esp+4+12]
|
||
|
ja loop0
|
||
|
|
||
|
emms
|
||
|
pop ebx
|
||
|
pop edi
|
||
|
pop esi
|
||
|
retn
|
||
|
|
||
|
_&procname ENDP
|
||
|
|
||
|
ENDM
|
||
|
|
||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||
|
|
||
|
YUV2RGB_PROC mmx_YUY2toRGB24,0,0
|
||
|
YUV2RGB_PROC mmx_YUY2toRGB32,0,1
|
||
|
|
||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||
|
|
||
|
END
|