
#include "CMameScreen.h"
#include "CMameView.h"

#include <Application.h>

#include <stdio.h>
#include <string.h>
//#include "string_asm.h"




static __inline__ void fast_memmove(void *d, const void *s, size_t count)
{
#ifdef __INTEL__
  int d0, d1, d2, d3;
    if (d < s) {
__asm__ __volatile__ (
	"cld\n\t"
	"shrl $1,%%ecx\n\t"
	"jnc 1f\n\t"
	"movsb\n"
	"1:\tshrl $1,%%ecx\n\t"
	"jnc 2f\n\t"
	"movsw\n"
	"2:\trep\n\t"
	"movsl"
	: "=&c" (d0), "=&D" (d1), "=&S" (d2)
	:"0"(count),"1"((long)d),"2"((long)s)
	:"memory");
    } else {
__asm__ __volatile__ (
	"std\n\t"
	"shrl $1,%%ecx\n\t"
	"jnc 1f\n\t"
	"movb 3(%%esi),%%al\n\t"
	"movb %%al,3(%%edi)\n\t"
	"decl %%esi\n\t"
	"decl %%edi\n"
	"1:\tshrl $1,%%ecx\n\t"
	"jnc 2f\n\t"
	"movw 2(%%esi),%%ax\n\t"
	"movw %%ax,2(%%edi)\n\t"
	"decl %%esi\n\t"
	"decl %%edi\n\t"
	"decl %%esi\n\t"
	"decl %%edi\n"
	"2:\trep\n\t"
	"movsl\n\t"
	"cld"
	: "=&c" (d0), "=&D" (d1), "=&S" (d2), "=&a" (d3)
	:"0"(count),"1"(count-4+(long)d),"2"(count-4+(long)s)
	:"memory");
    }
#else
double * dd=(double *)d;
double * ds=(double *)s;
double * end=ds+(count>>3);
while (ds<end)	
	{
	*dd++=*ds++;	
	}
#endif
}

CMameScreen::CMameScreen(
	status_t		*outRet,
	BRect			inFrame,
	int				inColorDepth) : BWindowScreen("MAME", B_8_BIT_640x480, outRet)
{
	int32	deltaSize;
	
	mView = new CMameView(Bounds());

	AddChild(mView);
	mView->MakeFocus();

	mRequestedSize = inFrame;
	mRequestedDepth = inColorDepth;

	mConnected = false; 
printf("CMameScreen::ctor - Disconnected\n");
	deltaSize = (inFrame.Width()+1)*(inFrame.Height()+1)+1;
	if (inColorDepth != 8)
		deltaSize *= 2;
	mDelta = new uchar[deltaSize];
		
	Show();
}

CMameScreen::~CMameScreen()
{
	delete [] mDelta;
}

void
CMameScreen::ShutDown()
{
	PostMessage(B_QUIT_REQUESTED);
}

void
CMameScreen::SetPenAtIdx(
	int32			inIdx,
	uint8			inRed,
	uint8			inGreen,
	uint8			inBlue)
{
	mPalette[inIdx].red = inRed;
	mPalette[inIdx].green = inGreen;
	mPalette[inIdx].blue = inBlue;
}

void
CMameScreen::SetPalette()
{
	Lock();
	SetColorList(mPalette);
	Unlock();
}

void
CMameScreen::Update8BitDirect(
	uint8			*inSource,
	int32			inLineNumber,
	int32			inScreenWidth,
	int32			inVisibleWidth,
	bool			inDouble)
{
	double*p;
	double*q;


	// Fill in values for vector games...
	if (inVisibleWidth == 0) {
		inVisibleWidth = mRequestedSize.right + 1;
	}
	if (inScreenWidth == 0) {
		inScreenWidth = mRequestedSize.right + 1;
	}

	if (inScreenWidth%8) {
		inScreenWidth = (inScreenWidth+16) & ~15;
		//printf("rounding from %d to %d\n", inWidth, (inWidth+8) & ~7);
	}

	//const int dwidth=(inWidth-16)>>3;//(inWidth-16)/8;
	uint32 qwidth=inVisibleWidth>>3;
	const uint32 pwidth=mLineLength>>3;

	inLineNumber++;
	// 'inWidth' should be rounded up to nearest
	// multiple of eith - this fixes Gorf.
//	uint32 round_width;

	if (mConnected)
	{

		// 'inWidth' should be rounded up to nearest
		// multiple of eith - this fixes Gorf.
//		if (inVisibleWidth%8) {
//			/*qwidth*/ inVisibleWidth = ((inVisibleWidth+8)& ~7)>>3;
//		}

		if (!inDouble) 
			{
			p=(double *)mFrameBuf;
			q=(double *)inSource;
			uint32 i=0;
			while (i++<inLineNumber)
				{
				fast_memmove(p, q, /*round_width*/ inVisibleWidth);
				//p2=p;
				p=p+pwidth;
				//q2=q;
				q=q+	(inScreenWidth+16)/8; 	//qwidth;

				//fast_memmove(p2, q2, round_width);
			//	for(int y=0;y<dwidth;y++)
			//		{
			//			*p2++=*q2++;
			//		}
				}
			}
		else
			PixDblBlit8Bit(inSource, inScreenWidth+16 /*round_width*/, inVisibleWidth /*-16*/, inLineNumber, mFrameBuf, mLineLength);
	}
}
	
void
CMameScreen::Update8BitDirectWScan(
	uint8			*inSource,
	int32			inLineNumber,
	int32			inScreenWidth,
	int32			inVisibleWidth)
{
	// 'width' should be rounded up to nearest
	// multiple of eith - this fixes Gorf.
	if (mConnected) {
			int round_width;
		if (inVisibleWidth%8) {
			round_width = (inVisibleWidth+8) & ~7;
			//printf("rounding from %d to %d\n", inWidth, (inWidth+8) & ~7);
		} else {
			round_width = inVisibleWidth;
		}
	
		PixDblBlit8BitWScan(inSource, inScreenWidth+16 /*round_width*/, inVisibleWidth, inLineNumber, mFrameBuf, mLineLength);
	}
}

void
CMameScreen::PixDblBlit8Bit(
	const void		*inSrcBitmap,
	int32			inSrcRowBytes,
	int32			inSrcWidth,
	int32			inSrcHeight,
	void			*outDstBitmap,
	int32			outDstRowBytes)
{
	/*	ensure alignment that we depend on	*/
	//ASSERT(!(((long)inSrcBitmap)&3) && !(((long)outDstBitmap)&7));
	double temp[1];
	register double temp2;

	unsigned long * dp=(unsigned long *)mDelta;
	while (--inSrcHeight >= 0)
	{
		unsigned long * src = ((unsigned long *)inSrcBitmap);
		double * dst1 = ((double *)outDstBitmap);
		double * dst2 = dst1+(outDstRowBytes>>3);//dst1+outDstRowBytes/8;
		int w = inSrcWidth;
		while ((w-=4) >= 0)
		{

		/* This loop might be optimizable by assembling the */
		/* "temp" double in another way; check profiling and */
		/* disassembly to make sure. I think this is close to */
		/* optimal, though. */

		/* If we're in little-endian mode, the bytes go the other way */
		if (*src!=*dp)
			{
			*dp++=*src;
#if __INTEL__

			unsigned long pixx = *src++;
			unsigned char * mid = (unsigned char *)&temp[0];
			*(mid++) = pixx;
			*(mid++) = pixx;
			pixx >>= 8;
			*(mid++) = pixx;
			*(mid++) = pixx;
			pixx >>= 8;
			*(mid++) = pixx;
			*(mid++) = pixx;
			pixx >>= 8;
			*(mid++) = pixx;
			*(mid++) = pixx;
			

#else	//did not deltify the big endian stuff - trent
			unsigned long pixx = *(++src);
			unsigned char * mid = (unsigned char *)&temp[1];
			*(--mid) = pixx;
			*(--mid) = pixx;
			pixx >>= 8;
			*(--mid) = pixx;
			*(--mid) = pixx;
			pixx >>= 8;
			*(--mid) = pixx;
			*(--mid) = pixx;
			pixx >>= 8;
			*(--mid) = pixx;
			*(--mid) = pixx;
#endif

		/* This is the kick-ass magic part! Don't touch! */

			temp2 = temp[0];
			*(dst1++) = temp2;
			*(dst2++) = temp2;

				}
			else
				{
				dst1++;dst2++;src++;dp++;
				}

		} 
		inSrcBitmap = ((char *)inSrcBitmap)+inSrcRowBytes;
		outDstBitmap = ((char *)outDstBitmap)+(outDstRowBytes<<1);//2*outDstRowBytes;
	}
}

void
CMameScreen::PixDblBlit8BitWScan(
	const void		*inSrcBitmap,
	int32			inSrcRowBytes,
	int32			inSrcWidth,
	int32			inSrcHeight,
	void			*outDstBitmap,
	int32			outDstRowBytes)
{
	/*	ensure alignment that we depend on	*/
	//ASSERT(!(((long)inSrcBitmap)&3) && !(((long)outDstBitmap)&7));
	double temp[1];
	register double temp2;

	unsigned long * dp=(unsigned long *)mDelta;
	while (--inSrcHeight >= 0)
	{
		unsigned long * src = ((unsigned long *)inSrcBitmap);
		double * dst1 = ((double *)outDstBitmap);
		double * dst2 = dst1+(outDstRowBytes>>3);//dst1+outDstRowBytes/8;
		int w = inSrcWidth;
		while ((w-=4) >= 0)
		{

		/* This loop might be optimizable by assembling the */
		/* "temp" double in another way; check profiling and */
		/* disassembly to make sure. I think this is close to */
		/* optimal, though. */

		/* If we're in little-endian mode, the bytes go the other way */
		if (*src!=*dp)
			{
			*dp++=*src;
#if __INTEL__

			unsigned long pixx = *src++;
			unsigned char * mid = (unsigned char *)&temp[0];
			*(mid++) = pixx;
			*(mid++) = pixx;
			pixx >>= 8;
			*(mid++) = pixx;
			*(mid++) = pixx;
			pixx >>= 8;
			*(mid++) = pixx;
			*(mid++) = pixx;
			pixx >>= 8;
			*(mid++) = pixx;
			*(mid++) = pixx;
			

#else	//did not deltify the big endian stuff - trent
			unsigned long pixx = *(++src);
			unsigned char * mid = (unsigned char *)&temp[1];
			*(--mid) = pixx;
			*(--mid) = pixx;
			pixx >>= 8;
			*(--mid) = pixx;
			*(--mid) = pixx;
			pixx >>= 8;
			*(--mid) = pixx;
			*(--mid) = pixx;
			pixx >>= 8;
			*(--mid) = pixx;
			*(--mid) = pixx;
#endif

		/* This is the kick-ass magic part! Don't touch! */

			temp2 = temp[0];
			*(dst1++) = temp2;
			*(dst2++) = 0.0;
			//dst2++;

				}
			else
				{
				dst1++;dst2++;src++;dp++;
				}

		} 
		inSrcBitmap = ((char *)inSrcBitmap)+inSrcRowBytes;
		outDstBitmap = ((char *)outDstBitmap)+(outDstRowBytes<<1);//2*outDstRowBytes;
	}
}

bool
CMameScreen::QuitRequested()
{
	Hide();
	mConnected = false;
printf("CMameScreen::QuitRequested - Disconnected\n");
	
	WaitToQuit();
	
	return true;
}

void
CMameScreen::ScreenConnected(
	bool			inConnected)
{
	float offset_y=0;
	float offset_x=0;
	
	BRect view_rect(0,0,CardInfo()->width,CardInfo()->height);
	
	int width =  (int)mRequestedSize.right;
	int height = (int)mRequestedSize.bottom;
	
	if (inConnected) { //screen gets a connection 
		puts("ScreenConnected(true)");
	
		if (mRequestedDepth==8) {
			if (width <= 640 && height <= 480) {
				puts("B_8_BIT_640x480");
				SetSpace(B_8_BIT_640x480);
			} else if (width <= 800 && height <=600) {
				puts("B_8_BIT_800x600");
				SetSpace(B_8_BIT_800x600);
			}
			else if (width <= 1024 && height <=768) {
				puts("B_8_BIT_1024x768");
				SetSpace(B_8_BIT_1024x768);
			} else {
				puts("B_8_BIT_1280x1024");
				SetSpace(B_8_BIT_1280x1024);
			}
		} else {
			if (width <= 640 && height <= 480) {
				puts("B_16_BIT_640x480");
				SetSpace(B_16_BIT_640x480);
			}
			else if (width <= 800 && height <=600) {
				puts("B_16_BIT_800x600");
				SetSpace(B_16_BIT_800x600);
			}
			else if (width <= 1024 && height <=768) {
				puts("B_16_BIT_1024x768");
				SetSpace(B_16_BIT_1024x768);
			}
			else {
				puts("B_16_BIT_1280x1024");
				SetSpace(B_16_BIT_1280x1024);
			}
		}
		view_rect.Set(0,0,CardInfo()->width,CardInfo()->height);
	
		//get pointer to frame buffer and bytes per row
		mFrameBuf=(uint8*)(CardInfo()->frame_buffer);
		mLineLength=FrameBufferInfo()->bytes_per_row;
	
		// clean the framebuffer
	    memset(mFrameBuf,0,(long)(view_rect.bottom*mLineLength));
	
		//now center the image
		offset_x=(view_rect.right-mRequestedSize.right)/2;
		offset_y=(view_rect.bottom-mRequestedSize.bottom)/2;
		mFrameBuf=mFrameBuf+(int)offset_x+(int)offset_y*mLineLength;
	
		SetColorList(mPalette);
	
		mConnected=true;   
printf("CMameScreen::ScreenConnected - Connected\n");
	}
	else {
		mConnected=false;
printf("CMameScreen::ScreenConnected - Disconnected\n");
	}
}
