// Function-wrapped Watcom pragmas
// by Jonathon Fowler (jonof@edgenetwk.com)
//
// These functions represent some of the more longer-winded pragmas
// from the original pragmas.h wrapped into functions for easier
// use since many jumps and whatnot make it harder to write macro-
// inline versions. I'll eventually convert these to macro-inline
// equivalents.		--Jonathon

//#include "pragmas.h"

long dmval;

#define ASM __asm__ __volatile__


long boundmulscale(long a, long b, long c)
{
	ASM (
		"imull %%ebx\n\t"
		"movl %%edx, %%ebx\n\t"		// mov ebx, edx
		"shrdl %%cl, %%edx, %%eax\n\t"	// mov eax, edx, cl
		"sarl %%cl, %%edx\n\t"		// sar edx, cl
		"xorl %%eax, %%edx\n\t"		// xor edx, eax
		"js 0f\n\t"			// js checkit
		"xorl %%eax, %%edx\n\t"		// xor edx, eax
		"jz 1f\n\t"			// js skipboundit
		"cmpl $0xffffffff, %%edx\n\t"	// cmp edx, 0xffffffff
		"je 1f\n\t"			// je skipboundit
		"0:\n\t"			// checkit:
		"movl %%ebx, %%eax\n\t"		// mov eax, ebx
		"sarl $31, %%eax\n\t"		// sar eax, 31
		"xorl $0x7fffffff, %%eax\n\t"	// xor eax, 0x7fffffff
		"1:"				// skipboundit:
		: "=a" (a)			// output eax
		: "a" (a), "b" (b), "c" (c)	// input eax ebx ecx
		: "edx", "cc"
	);
	return a;
}


void clearbufbyte(void *D, long c, long a)
{
	ASM (
		"cmpl $4, %%ecx\n\t"
		"jae 1f\n\t"
		"testb $1, %%cl\n\t"
		"jz 0f\n\t"			// jz preskip
		"stosb\n\t"
		"0:\n\t"			// preskip:
		"shrl $1, %%ecx\n\t"
		"rep\n\t"
		"stosw\n\t"
		"jmp 5f\n\t"			// jmp endit
		"1:\n\t"			// intcopy:
		"testl $1, %%edi\n\t"
		"jz 2f\n\t"			// jz skip1
		"stosb\n\t"
		"decl %%ecx\n\t"
		"2:\n\t"			// skip1:
		"testl $2, %%edi\n\t"
		"jz 3f\n\t"			// jz skip2
		"stosw\n\t"
		"subl $2, %%ecx\n\t"
		"3:\n\t"			// skip2:
		"movl %%ecx, %%ebx\n\t"
		"shrl $2, %%ecx\n\t"
		"rep\n\t"
		"stosl\n\t"
		"testb $2, %%bl\n\t"
		"jz 4f\n\t"			// jz skip3
		"stosw\n\t"
		"4:\n\t"			// skip3:
		"testb $1, %%bl\n\t"
		"jz 5f\n\t"			// jz endit
		"stosb\n\t"
		"5:"				// endit
		: : "D" (D), "c" (c), "a" (a)
		: "ebx", "memory", "cc"
	);
}

void copybufbyte(void *S, void *D, long c)
{
	ASM (
		"cmpl $4, %%ecx\n\t"		// cmp ecx, 4
		"jae 1f\n\t"
		"testb $1, %%cl\n\t"		// test cl, 1
		"jz 0f\n\t"
		"movsb\n\t"
		"0:\n\t"			// preskip:
		"shrl $1, %%ecx\n\t"		// shr ecx, 1
		"rep\n\t"
		"movsw\n\t"
		"jmp 5f\n\t"
		"1:\n\t"			// intcopy:
		"testl $1, %%edi\n\t"		// test edi, 1
		"jz 2f\n\t"
		"movsb\n\t"
		"decl %%ecx\n\t"
		"2:\n\t"			// skip1:
		"testl $2, %%edi\n\t"		// test edi, 2
		"jz 3f\n\t"
		"movsw\n\t"
		"subl $2, %%ecx\n\t"		// sub ecx, 2
		"3:\n\t"			// skip2:
		"movl %%ecx, %%ebx\n\t"		// mov ebx, ecx
		"shrl $2, %%ecx\n\t"		// shr ecx ,2
		"rep\n\t"
		"movsl\n\t"
		"testb $2, %%bl\n\t"		// test bl, 2
		"jz 4f\n\t"
		"movsw\n\t"
		"4:\n\t"			// skip3:
		"testb $1, %%bl\n\t"		// test bl, 1
		"jz 5f\n\t"
		"movsb\n\t"
		"5:"				// endit:
		: : "S" (S), "D" (D), "c" (c)
		: "ebx", "memory", "cc"
	);
}

void copybufreverse(void *S, void *D, long c)
{
	ASM (
		"shrl $1, %%ecx\n\t"
		"jnc 0f\n\t"		// jnc skipit1
		"movb (%%esi), %%al\n\t"
		"decl %%esi\n\t"
		"movb %%al, (%%edi)\n\t"
		"incl %%edi\n\t"
		"0:\n\t"		// skipit1:
		"shrl $1, %%ecx\n\t"
		"jnc 1f\n\t"		// jnc skipit2
		"movw -1(%%esi), %%ax\n\t"
		"subl $2, %%esi\n\t"
		"rorw $8, %%ax\n\t"
		"movw %%ax, (%%edi)\n\t"
		"addl $2, %%edi\n\t"
		"1:\n\t"		// skipit2
		"testl %%ecx, %%ecx\n\t"
		"jz 3f\n\t"		// jz endloop
		"2:\n\t"		// begloop
		"movl -3(%%esi), %%eax\n\t"
		"subl $4, %%esi\n\t"
		"bswapl %%eax\n\t"
		"movl %%eax, (%%edi)\n\t"
		"addl $4, %%edi\n\t"
		"decl %%ecx\n\t"
		"jnz 2b\n\t"		// jnz begloop
		"3:"
		: : "S" (S), "D" (D), "c" (c)
		: "eax", "memory", "cc"
	);
}

void qinterpolatedown16(long a, long c, long d, long S)
{
	ASM (
		"movl %%ecx, %%ebx\n\t"
		"shrl $1, %%ecx\n\t"
		"jz 1f\n\t"		// jz skipbegcalc
		"0:\n\t"		// begqcalc:
		"leal (%%edx,%%esi,), %%edi\n\t"
		"sarl $16, %%edx\n\t"
		"movl %%edx, (%%eax)\n\t"
		"leal (%%edi,%%esi,), %%edx\n\t"
		"sarl $16, %%edi\n\t"
		"movl %%edi, 4(%%eax)\n\t"
		"addl $8, %%eax\n\t"
		"decl %%ecx\n\t"
		"jnz 0b\n\t"		// jnz begqcalc
		"testl $1, %%ebx\n\t"
		"jz 2f\n\t"		// jz skipbegqcalc2
		"1:\n\t"		// skipbegcalc:
		"sarl $16, %%edx\n\t"
		"movl %%edx, (%%eax)\n\t"
		"2:"			// skipbegcalc2:
		: : "a" (a), "c" (c), "d" (d), "S" (S)
		: "ebx", "edi", "memory", "cc"
	);
}

void qinterpolatedown16short(long a, long c, long d, long S)
{
	ASM (
		"testl %%ecx, %%ecx\n\t"
		"jz 3f\n\t"
		"testb $2, %%al\n\t"
		"jz 0f\n\t"			// jz skipalignit
		"movl %%edx, %%ebx\n\t"
		"sarl $16, %%ebx\n\t"
		"movw %%bx, (%%eax)\n\t"
		"addl %%esi, %%edx\n\t"
		"addl $2, %%eax\n\t"
		"decl %%ecx\n\t"
		"jz 3f\n\t"
		"0:\n\t"			// skipalignit:
		"subl $2, %%ecx\n\t"
		"jc 2f\n\t"			// jc finishit
		"1:\n\t"			// begqcalc:
		"movl %%edx, %%ebx\n\t"
		"addl %%esi, %%edx\n\t"
		"sarl $16, %%ebx\n\t"
		"movl %%edx, %%edi\n\t"
		"andl $0xffff0000, %%edi\n\t"
		"addl %%esi, %%edx\n\t"
		"addl %%edi, %%ebx\n\t"
		"movl %%ebx, (%%eax)\n\t"
		"addl $4, %%eax\n\t"
		"subl $2, %%ecx\n\t"
		"jnc 1b\n\t"			// jnc begqcalc
		"testb $1, %%cl\n\t"
		"jz 3f\n\t"			// jz endit
		"2:\n\t"			// finishit:
		"movl %%edx, %%ebx\n\t"
		"sarl $16, %%ebx\n\t"
		"movw %%bx, (%%eax)\n\t"
		"3:"				// endit:
		: : "a" (a), "c" (c), "d" (d), "S" (S)
		: "ebx", "edi", "memory", "cc"
	);
}

/*
#pragma aux redblueblit = \
	"xor ecx, ecx",\
	"begblit: mov eax, dword ptr [edx+ecx]",\
	"shl eax, 4",\
	"add eax, dword ptr [ebx+ecx]",\
	"mov dword ptr [ecx+0xa0000], eax",\
	"add ecx, 4",\
	"cmp ecx, esi",\
	"jb begblit",\
	parm [ebx][edx][esi]\
	modify exact [eax ecx]\
*/
