
	%include 'helper.asm'

	BITS	32
	
	GLOBAL	screen_blit_T1
	GLOBAL	screen_blit_T2
	GLOBAL	screen_blit_T2_MMX
	GLOBAL	screen_blit_T3
	GLOBAL	screen_blit_T3_MMX
	
	SECTION	.text

;screen_blit_T1(dst,dw,dx,dy,src,sw,sx,sy,w,h)
screen_blit_T1:
	BEGIN
	PUSHL	eax
	PUSHL	ebx
	PUSHL	ecx
	PUSHL	edx
	PUSHL	esi
	PUSHL	edi

	cld

	;dst
	mov	edi,	[esp + ARGS]		;dst
	mov	eax, [esp + ARGS + 8]	;dx
	mov	ebx, [esp + ARGS + 12]	;dy
	lea	edi, [edi + eax * 2 + 320]
	add	ebx, 120
	mov	eax, [esp + ARGS + 4]	;dw
	mul	ebx
	add	edi, eax

	;src
	mov	esi,	[esp + ARGS + 16]	;src
	mov	eax,	[esp + ARGS + 24]	;sx
	mov	ebx, [esp + ARGS + 28]	;sy
	lea	esi,	[esi + eax * 2]
	mov	eax,	[esp + ARGS + 20]	;sw
	mul	ebx
	add	esi, eax
	
	mov	eax,	[esp + ARGS + 32]	;w
	mov	ebx, [esp + ARGS + 20]	;sw
	neg	eax
	mov	edx, [esp + ARGS + 4]	;dw
	lea	ebx, [ebx + eax * 2]
	lea	edx, [edx + eax * 2]

	mov	eax,	[esp + ARGS + 32]	;w
	mov	ecx,	[esp + ARGS + 36]	;h
	shr	eax, 1

	AlignFunc

.LP01:
	push	ecx
	mov	ecx, eax
	push	eax
	rep	movsd
	add	edi,	edx
	add	esi,	ebx
	pop	eax
	pop	ecx
	loop	.LP01

	pop	edi
	pop	esi
	pop	edx
	pop	ecx
	pop	ebx
	pop	eax
	ret

	AlignFunc

;screen_blit_T2(dst,dw,dx,dy,src,sw,sx,sy,w,h)
screen_blit_T2:
	BEGIN
	PUSHL	eax
	PUSHL	ebx
	PUSHL	ecx
	PUSHL	edx
	PUSHL	esi
	PUSHL	edi

	cld

	;dst
	mov	edi,	[esp + ARGS]		;dst
	mov	eax, [esp + ARGS + 8]	;dx
	mov	ebx, [esp + ARGS + 12]	;dy
	lea	edi, [edi + eax * 2]	
	mov	eax, [esp + ARGS + 4]	;dw
	mul	ebx
	lea	edi, [edi + eax * 2]	

	;src
	mov	esi,	[esp + ARGS + 16]	;src
	mov	eax,	[esp + ARGS + 24]	;sx
	mov	ebx, [esp + ARGS + 28]	;sy
	lea	esi,	[esi + eax * 2]	
	mov	eax,	[esp + ARGS + 20]	;sw
	mul	ebx
	add	esi, eax
	
	mov	eax,	[esp + ARGS + 32]	;w
	mov	ebx, [esp + ARGS + 20]	;sw
	neg	eax
	mov	edx, [esp + ARGS + 4]	;dw
	shl	edx, 1
	lea	ebx, [ebx + eax * 2]
	lea	edx, [edx + eax * 4]

	mov	eax,	[esp + ARGS + 32]	;w
	mov	ecx,	[esp + ARGS + 36]	;h

	AlignFunc
.LP01:
	push	ecx
	mov	ecx, eax
	push	eax

	AlignFunc
.LP02:
	mov	ax, word [esi]
	add	esi, 2
	mov	word [edi], ax
	mov	word [edi + 2], ax
	add	edi, 4

	loop	.LP02

	add	edi,	edx
	add	esi,	ebx
	pop	eax
	pop	ecx
	loop	.LP01
	
	pop	edi
	pop	esi
	pop	edx
	pop	ecx
	pop	ebx
	pop	eax

	ret

	AlignFunc
;screen_blit_T2_MMX(dst,dw,dx,dy,src,sw,sx,sy,w,h)
screen_blit_T2_MMX:
	BEGIN
	PUSHL	eax
	PUSHL	ebx
	PUSHL	ecx
	PUSHL	edx
	PUSHL	esi
	PUSHL	edi

	cld

	;dst
	mov	edi,	[esp + ARGS]		;dst
	mov	eax, [esp + ARGS + 8]	;dx
	mov	ebx, [esp + ARGS + 12]	;dy
	lea	edi, [edi + eax * 2]	
	mov	eax, [esp + ARGS + 4]	;dw
	mul	ebx
	lea	edi, [edi + eax * 2]	

	;src
	mov	esi,	[esp + ARGS + 16]	;src
	mov	eax,	[esp + ARGS + 24]	;sx
	mov	ebx, [esp + ARGS + 28]	;sy
	lea	esi,	[esi + eax * 2]	
	mov	eax,	[esp + ARGS + 20]	;sw
	mul	ebx
	add	esi, eax
	
	mov	eax,	[esp + ARGS + 32]	;w
	mov	ebx, [esp + ARGS + 20]	;sw
	neg	eax
	mov	edx, [esp + ARGS + 4]	;dw
	shl	edx, 1
	lea	ebx, [ebx + eax * 2]
	lea	edx, [edx + eax * 4]

	mov	eax,	[esp + ARGS + 32]	;w
	shr	eax,	2
	mov	ecx,	[esp + ARGS + 36]	;h

	AlignFunc
.LP01:
	push	ecx
	mov	ecx, eax
	push	eax

	AlignFunc
.LP02:
	movq	mm0, qword [esi]
	add	esi, 8
	movq	mm1, mm0
	movq	mm2, mm0
	punpcklwd	mm1, mm0
	punpckhwd	mm2, mm0
	movq	qword [edi  ], mm1
	movq	qword [edi+8], mm2
	add	edi, 16
	loop	.LP02

	add	edi,	edx
	add	esi,	ebx
	pop	eax
	pop	ecx
	loop	.LP01

	emms
	
	pop	edi
	pop	esi
	pop	edx
	pop	ecx
	pop	ebx
	pop	eax

	ret

	AlignFunc
screen_blit_T3:
	BEGIN
	PUSHL	eax
	PUSHL	ebx
	PUSHL	ecx
	PUSHL	edx
	PUSHL	esi
	PUSHL	edi

	cld

	;dst
	mov	edi,	[esp + ARGS]		;dst
	mov	eax, [esp + ARGS + 8]	;dx
	mov	ebx, [esp + ARGS + 12]	;dy
	lea	edi, [edi + eax * 2]	
	mov	eax, [esp + ARGS + 4]	;dw
	mul	ebx
	lea	edi, [edi + eax * 2]	

	;src
	mov	esi,	[esp + ARGS + 16]	;src
	mov	eax,	[esp + ARGS + 24]	;sx
	mov	ebx, [esp + ARGS + 28]	;sy
	lea	esi,	[esi + eax * 2]	
	mov	eax,	[esp + ARGS + 20]	;sw
	mul	ebx
	add	esi, eax
	
	mov	eax,	[esp + ARGS + 32]	;w
	mov	ebx, [esp + ARGS + 20]	;sw
	neg	eax
	mov	edx, [esp + ARGS + 4]	;dw
	shl	edx, 1
	lea	ebx, [ebx + eax * 2]
	lea	edx, [edx + eax * 4]

	mov	eax,	[esp + ARGS + 32]	;w
	mov	ecx,	[esp + ARGS + 36]	;h

	AlignFunc
.LP01:
	push	ecx
	mov	ecx, eax
	push	eax

	AlignFunc
.LP02:
	mov	ax, word [esi]
	add	esi, 2
	mov	word [edi], ax
	mov	word [edi + 2], ax
	mov	word [edi + 1280], ax
	mov	word [edi + 2 + 1280], ax
	add	edi, 4

	loop	.LP02

	add	edi,	edx
	add	esi,	ebx
	pop	eax
	pop	ecx
	loop	.LP01
	
	pop	edi
	pop	esi
	pop	edx
	pop	ecx
	pop	ebx
	pop	eax

	ret
	AlignFunc

	AlignFunc
screen_blit_T3_MMX:
	BEGIN
	PUSHL	eax
	PUSHL	ebx
	PUSHL	ecx
	PUSHL	edx
	PUSHL	esi
	PUSHL	edi

	cld

	;dst
	mov	edi,	[esp + ARGS]		;dst
	mov	eax, [esp + ARGS + 8]	;dx
	mov	ebx, [esp + ARGS + 12]	;dy
	lea	edi, [edi + eax * 2]	
	mov	eax, [esp + ARGS + 4]	;dw
	mul	ebx
	lea	edi, [edi + eax * 2]	

	;src
	mov	esi,	[esp + ARGS + 16]	;src
	mov	eax,	[esp + ARGS + 24]	;sx
	mov	ebx, [esp + ARGS + 28]	;sy
	lea	esi,	[esi + eax * 2]	
	mov	eax,	[esp + ARGS + 20]	;sw
	mul	ebx
	add	esi, eax
	
	mov	eax,	[esp + ARGS + 32]	;w
	mov	ebx, [esp + ARGS + 20]	;sw
	neg	eax
	mov	edx, [esp + ARGS + 4]	;dw
	shl	edx, 1
	lea	ebx, [ebx + eax * 2]
	lea	edx, [edx + eax * 4]

	mov	eax,	[esp + ARGS + 32]	;w
	shr	eax, 2
	mov	ecx,	[esp + ARGS + 36]	;h

	AlignFunc
.LP01:
	push	ecx
	mov	ecx, eax
	push	eax

	AlignFunc
.LP02:
	movq	mm0, qword [esi]
	add	esi, 8
	movq	mm1, mm0
	movq	mm2, mm0
	punpcklwd	mm1, mm0
	punpckhwd	mm2, mm0
	movq	qword [edi  ], mm1
	movq	qword [edi+8], mm2
	movq	qword [edi+1280+0], mm1
	movq	qword [edi+1280+8], mm2
	add	edi, 16
	loop	.LP02

	add	edi,	edx
	add	esi,	ebx
	pop	eax
	pop	ecx
	loop	.LP01
	
	emms

	pop	edi
	pop	esi
	pop	edx
	pop	ecx
	pop	ebx
	pop	eax

	ret
