sl@0: // Copyright (c) 2008-2009 Nokia Corporation and/or its subsidiary(-ies).
sl@0: // All rights reserved.
sl@0: // This component and the accompanying materials are made available
sl@0: // under the terms of the License "Eclipse Public License v1.0"
sl@0: // which accompanies this distribution, and is available
sl@0: // at the URL "http://www.eclipse.org/legal/epl-v10.html".
sl@0: //
sl@0: // Initial Contributors:
sl@0: // Nokia Corporation - initial contribution.
sl@0: //
sl@0: // Contributors:
sl@0: //
sl@0: // Description:
sl@0: // e32\common\x86\atomics.cia
sl@0: // 
sl@0: //
sl@0: 
sl@0: #include <e32atomics.h>
sl@0: #include <cpudefs.h>
sl@0: 
sl@0: /*
sl@0: Versions needed:
sl@0: 	WINS/WINSCW		Use X86 locked operations. Assume Pentium or above CPU (CMPXCHG8B available)
sl@0: 	X86				For Pentium and above use locked operations
sl@0: 					For 486 use locked operations for 8, 16, 32 bit. For 64 bit must disable interrupts.
sl@0: 					NOTE: 486 not supported at the moment
sl@0: 	ARMv4/ARMv5		Must disable interrupts.
sl@0: 	ARMv6			LDREX/STREX for 8, 16, 32 bit. For 64 bit must disable interrupts (maybe).
sl@0: 	ARMv6K/ARMv7	LDREXB/LDREXH/LDREX/LDREXD
sl@0: 
sl@0: Need both kernel side and user side versions
sl@0: */
sl@0: 
sl@0: #if	defined(__SMP__) || !defined(__EPOC32__)
sl@0: #define	__BARRIERS_NEEDED__
sl@0: #define	__LOCK__	"lock "
sl@0: #else
sl@0: #define	__LOCK__
sl@0: #endif
sl@0: 
sl@0: 
sl@0: extern "C" {
sl@0: 
sl@0: #undef	__TUintX__
sl@0: #undef	__TIntX__
sl@0: #undef	__fname__
sl@0: #undef	__redir__
sl@0: #undef	__A_REG__
sl@0: #undef	__C_REG__
sl@0: #undef	__D_REG__
sl@0: #define	__TUintX__		TUint32
sl@0: #define	__TIntX__		TInt32
sl@0: #define	__fname__(x)	x##32
sl@0: #define	__redir__(x)	asm("jmp _"#x "32")
sl@0: #define	__A_REG__		"eax"
sl@0: #define	__C_REG__		"ecx"
sl@0: #define	__D_REG__		"edx"
sl@0: #include "atomic_skeleton.h"
sl@0: 
sl@0: #undef	__TUintX__
sl@0: #undef	__TIntX__
sl@0: #undef	__fname__
sl@0: #undef	__redir__
sl@0: #undef	__A_REG__
sl@0: #undef	__C_REG__
sl@0: #undef	__D_REG__
sl@0: #define	__TUintX__		TUint16
sl@0: #define	__TIntX__		TInt16
sl@0: #define	__fname__(x)	x##16
sl@0: #define	__redir__(x)	asm("jmp _"#x "16")
sl@0: #define	__A_REG__		"ax"
sl@0: #define	__C_REG__		"cx"
sl@0: #define	__D_REG__		"dx"
sl@0: #include "atomic_skeleton.h"
sl@0: 
sl@0: #undef	__TUintX__
sl@0: #undef	__TIntX__
sl@0: #undef	__fname__
sl@0: #undef	__redir__
sl@0: #undef	__A_REG__
sl@0: #undef	__C_REG__
sl@0: #undef	__D_REG__
sl@0: #define	__TUintX__		TUint8
sl@0: #define	__TIntX__		TInt8
sl@0: #define	__fname__(x)	x##8
sl@0: #define	__redir__(x)	asm("jmp _"#x "8")
sl@0: #define	__A_REG__		"al"
sl@0: #define	__C_REG__		"cl"
sl@0: #define	__D_REG__		"dl"
sl@0: #include "atomic_skeleton.h"
sl@0: 
sl@0: #undef	__TUintX__
sl@0: #undef	__TIntX__
sl@0: #undef	__fname__
sl@0: #undef	__redir__
sl@0: #undef	__A_REG__
sl@0: #undef	__C_REG__
sl@0: #undef	__D_REG__
sl@0: 
sl@0: /** Full memory barrier for explicit memory accesses
sl@0: 
sl@0: */
sl@0: EXPORT_C __NAKED__ void __e32_memory_barrier()
sl@0: 	{
sl@0: #ifdef __BARRIERS_NEEDED__
sl@0: 	asm("lock add dword ptr [esp], 0 ");
sl@0: #endif
sl@0: 	asm("ret ");
sl@0: 	}
sl@0: 
sl@0: 
sl@0: /** Barrier guaranteeing completion as well as ordering
sl@0: 
sl@0: */
sl@0: EXPORT_C __NAKED__ void __e32_io_completion_barrier()
sl@0: 	{
sl@0: 	asm("push ebx ");
sl@0: 	asm("cpuid ");
sl@0: 	asm("pop ebx ");
sl@0: 	asm("ret ");
sl@0: 	}
sl@0: 
sl@0: 
sl@0: /** Find the most significant 1 in a 32 bit word
sl@0: 
sl@0: 	@param	v	The word to be scanned
sl@0: 	@return		The bit number of the most significant 1 if v != 0
sl@0: 				-1 if v == 0
sl@0: */
sl@0: EXPORT_C __NAKED__ TInt __e32_find_ms1_32(TUint32 /*v*/)
sl@0: 	{
sl@0: 	asm("bsr eax, [esp+4] ");
sl@0: 	asm("jnz short 1f ");
sl@0: 	asm("mov eax, 0xffffffff ");
sl@0: 	asm("1: ");
sl@0: 	asm("ret ");
sl@0: 	}
sl@0: 
sl@0: 
sl@0: /** Find the least significant 1 in a 32 bit word
sl@0: 
sl@0: 	@param	v	The word to be scanned
sl@0: 	@return		The bit number of the least significant 1 if v != 0
sl@0: 				-1 if v == 0
sl@0: */
sl@0: EXPORT_C __NAKED__ TInt __e32_find_ls1_32(TUint32 /*v*/)
sl@0: 	{
sl@0: 	asm("bsf eax, [esp+4] ");
sl@0: 	asm("jnz short 1f ");
sl@0: 	asm("mov eax, 0xffffffff ");
sl@0: 	asm("1: ");
sl@0: 	asm("ret ");
sl@0: 	}
sl@0: 
sl@0: 
sl@0: /** Count the number of 1's in a 32 bit word
sl@0: 
sl@0: 	@param	v	The word to be scanned
sl@0: 	@return		The number of 1's
sl@0: */
sl@0: EXPORT_C __NAKED__ TInt __e32_bit_count_32(TUint32 /*v*/)
sl@0: 	{
sl@0: 	asm("mov eax, [esp+4] ");
sl@0: 	asm("mov edx, eax ");
sl@0: 	asm("and eax, 0xaaaaaaaa ");
sl@0: 	asm("and edx, 0x55555555 ");	/* edx = even bits of arg */
sl@0: 	asm("shr eax, 1 ");				/* eax = odd bits of arg shifted into even bits */
sl@0: 	asm("add eax, edx ");			/* eax = 16 groups of 2 bit counts */
sl@0: 	asm("mov edx, eax ");
sl@0: 	asm("and eax, 0xcccccccc ");
sl@0: 	asm("and edx, 0x33333333 ");	/* even groups of 2 */
sl@0: 	asm("shr eax, 2 ");				/* odd groups of 2 shifted to even positions */
sl@0: 	asm("add eax, edx ");			/* 8 groups of 4 bit counts */
sl@0: 	asm("mov edx, eax ");
sl@0: 	asm("shr eax, 4 ");
sl@0: 	asm("add eax, edx ");			/* even nibbles = sum of 8 bits, odd nibbles garbage */
sl@0: 	asm("and eax, 0x0f0f0f0f ");	/* eliminate garbage nibbles */
sl@0: 	asm("add al, ah ");				/* AL = bit count of lower 16 bits */
sl@0: 	asm("mov dl, al ");
sl@0: 	asm("shr eax, 16 ");
sl@0: 	asm("add al, ah ");				/* AL = bit count of upper 16 bits */
sl@0: 	asm("xor ah, ah ");				/* top 24 bits of EAX now zero */
sl@0: 	asm("add al, dl ");				/* AL = bit count of entire 32 bits */
sl@0: 	asm("ret ");
sl@0: 	}
sl@0: 
sl@0: 
sl@0: /** Find the most significant 1 in a 64 bit word
sl@0: 
sl@0: 	@param	v	The word to be scanned
sl@0: 	@return		The bit number of the most significant 1 if v != 0
sl@0: 				-1 if v == 0
sl@0: */
sl@0: EXPORT_C __NAKED__ TInt __e32_find_ms1_64(TUint64 /*v*/)
sl@0: 	{
sl@0: 	asm("bsr eax, [esp+8] ");
sl@0: 	asm("jnz short 2f ");
sl@0: 	asm("bsr eax, [esp+4] ");
sl@0: 	asm("jnz short 1f ");
sl@0: 	asm("mov eax, 0xffffffff ");
sl@0: 	asm("2: ");
sl@0: 	asm("or eax, 32 ");
sl@0: 	asm("1: ");
sl@0: 	asm("ret ");
sl@0: 	}
sl@0: 
sl@0: 
sl@0: /** Find the least significant 1 in a 64 bit word
sl@0: 
sl@0: 	@param	v	The word to be scanned
sl@0: 	@return		The bit number of the least significant 1 if v != 0
sl@0: 				-1 if v == 0
sl@0: */
sl@0: EXPORT_C __NAKED__ TInt __e32_find_ls1_64(TUint64 /*v*/)
sl@0: 	{
sl@0: 	asm("bsf eax, [esp+4] ");
sl@0: 	asm("jnz short 1f ");
sl@0: 	asm("bsf eax, [esp+8] ");
sl@0: 	asm("jnz short 2f ");
sl@0: 	asm("mov eax, 0xffffffff ");
sl@0: 	asm("2: ");
sl@0: 	asm("or eax, 32 ");
sl@0: 	asm("1: ");
sl@0: 	asm("ret ");
sl@0: 	}
sl@0: 
sl@0: 
sl@0: /** Count the number of 1's in a 64 bit word
sl@0: 
sl@0: 	@param	v	The word to be scanned
sl@0: 	@return		The number of 1's
sl@0: */
sl@0: EXPORT_C __NAKED__ TInt __e32_bit_count_64(TUint64 /*v*/)
sl@0: 	{
sl@0: 	asm("mov eax, [esp+4] ");
sl@0: 	asm("mov edx, [esp+8] ");
sl@0: 
sl@0: 	asm("mov ecx, eax ");
sl@0: 	asm("and eax, 0xaaaaaaaa ");
sl@0: 	asm("and ecx, 0x55555555 ");
sl@0: 	asm("shr eax, 1 ");
sl@0: 	asm("add eax, ecx ");
sl@0: 	asm("mov ecx, eax ");
sl@0: 	asm("and eax, 0xcccccccc ");
sl@0: 	asm("and ecx, 0x33333333 ");
sl@0: 	asm("shr eax, 2 ");
sl@0: 	asm("add ecx, eax ");
sl@0: 
sl@0: 	asm("mov eax, edx ");
sl@0: 	asm("and eax, 0xaaaaaaaa ");
sl@0: 	asm("and edx, 0x55555555 ");
sl@0: 	asm("shr eax, 1 ");
sl@0: 	asm("add eax, edx ");
sl@0: 	asm("mov edx, eax ");
sl@0: 	asm("and eax, 0xcccccccc ");
sl@0: 	asm("and edx, 0x33333333 ");
sl@0: 	asm("shr eax, 2 ");
sl@0: 	asm("add eax, edx ");
sl@0: 
sl@0: 	asm("add eax, ecx ");
sl@0: 	asm("mov edx, eax ");
sl@0: 	asm("and eax, 0xf0f0f0f0 ");
sl@0: 	asm("and edx, 0x0f0f0f0f ");
sl@0: 	asm("shr eax, 4 ");
sl@0: 	asm("add eax, edx ");
sl@0: 	asm("add al, ah ");
sl@0: 	asm("mov dl, al ");
sl@0: 	asm("shr eax, 16 ");
sl@0: 	asm("add al, ah ");
sl@0: 	asm("xor ah, ah ");
sl@0: 	asm("add al, dl ");
sl@0: 	asm("ret ");
sl@0: 	}
sl@0: 
sl@0: 
sl@0: 
sl@0: 
sl@0: /** Read a 64 bit word with acquire semantics
sl@0: 
sl@0: 	@param	a	Address of word to be read - must be a multiple of 8
sl@0: 	@return		The value read
sl@0: */
sl@0: EXPORT_C __NAKED__ TUint64	__e32_atomic_load_acq64(const volatile TAny* /*a*/)
sl@0: 	{
sl@0: 	asm("push ebx ");
sl@0: 	asm("push edi ");
sl@0: 	asm("mov edi, [esp+12] ");
sl@0: 	asm("mov eax, 0x0badbeef ");
sl@0: 	asm("mov edx, eax ");
sl@0: 	asm("mov ebx, eax ");
sl@0: 	asm("mov ecx, eax ");
sl@0: 	asm(__LOCK__ "cmpxchg8b [edi] ");
sl@0: 	asm("pop edi ");
sl@0: 	asm("pop ebx ");
sl@0: 	asm("ret ");
sl@0: 	}
sl@0: 
sl@0: 
sl@0: /** Write a 64 bit word with release semantics
sl@0: 
sl@0: 	@param	a	Address of word to be written - must be a multiple of 8
sl@0: 	@param	v	The value to be written
sl@0: 	@return		The value written
sl@0: */
sl@0: EXPORT_C __NAKED__ TUint64	__e32_atomic_store_rel64(volatile TAny* /*a*/, TUint64 /*v*/)
sl@0: 	{
sl@0: 	asm("push ebx ");
sl@0: 	asm("push edi ");
sl@0: 	asm("mov edi, [esp+12] ");
sl@0: 	asm("mov ebx, [esp+16] ");
sl@0: 	asm("mov ecx, [esp+20] ");
sl@0: 	asm("mov eax, [edi] ");
sl@0: 	asm("mov edx, [edi+4] ");
sl@0: 	asm("1: ");
sl@0: 	asm(__LOCK__ "cmpxchg8b [edi] " );
sl@0: 	asm("jne short 1b ");
sl@0: 	asm("mov eax, ebx ");
sl@0: 	asm("mov edx, ecx ");
sl@0: 	asm("pop edi ");
sl@0: 	asm("pop ebx ");
sl@0: 	asm("ret ");
sl@0: 	}
sl@0: 
sl@0: 
sl@0: /** Write a 64 bit word with full barrier semantics
sl@0: 
sl@0: 	@param	a	Address of word to be written - must be a multiple of 8
sl@0: 	@param	v	The value to be written
sl@0: 	@return		The value written
sl@0: */
sl@0: EXPORT_C __NAKED__ TUint64	__e32_atomic_store_ord64(volatile TAny* /*a*/, TUint64 /*v*/)
sl@0: 	{
sl@0: 	asm("jmp ___e32_atomic_store_rel64 ");
sl@0: 	}
sl@0: 
sl@0: 
sl@0: /** Write a 64 bit word to memory and return the original value of the memory.
sl@0: 	Relaxed ordering.
sl@0: 
sl@0: 	@param	a	Address of word to be written - must be a multiple of 8
sl@0: 	@param	v	The value to be written
sl@0: 	@return		The original value of *a
sl@0: */
sl@0: EXPORT_C __NAKED__ TUint64	__e32_atomic_swp_rlx64(volatile TAny* /*a*/, TUint64 /*v*/)
sl@0: 	{
sl@0: 	asm("jmp ___e32_atomic_swp_ord64 ");
sl@0: 	}
sl@0: 
sl@0: 
sl@0: /** Write a 64 bit word to memory and return the original value of the memory.
sl@0: 	Acquire semantics.
sl@0: 
sl@0: 	@param	a	Address of word to be written - must be a multiple of 8
sl@0: 	@param	v	The value to be written
sl@0: 	@return		The original value of *a
sl@0: */
sl@0: EXPORT_C __NAKED__ TUint64	__e32_atomic_swp_acq64(volatile TAny* /*a*/, TUint64 /*v*/)
sl@0: 	{
sl@0: 	asm("jmp ___e32_atomic_swp_ord64 ");
sl@0: 	}
sl@0: 
sl@0: 
sl@0: /** Write a 64 bit word to memory and return the original value of the memory.
sl@0: 	Release semantics.
sl@0: 
sl@0: 	@param	a	Address of word to be written - must be a multiple of 8
sl@0: 	@param	v	The value to be written
sl@0: 	@return		The original value of *a
sl@0: */
sl@0: EXPORT_C __NAKED__ TUint64	__e32_atomic_swp_rel64(volatile TAny* /*a*/, TUint64 /*v*/)
sl@0: 	{
sl@0: 	asm("jmp ___e32_atomic_swp_ord64 ");
sl@0: 	}
sl@0: 
sl@0: 
sl@0: /** Write a 64 bit word to memory and return the original value of the memory.
sl@0: 	Full barrier semantics.
sl@0: 
sl@0: 	@param	a	Address of word to be written - must be a multiple of 8
sl@0: 	@param	v	The value to be written
sl@0: 	@return		The original value of *a
sl@0: */
sl@0: EXPORT_C __NAKED__ TUint64	__e32_atomic_swp_ord64(volatile TAny* /*a*/, TUint64 /*v*/)
sl@0: 	{
sl@0: 	asm("push ebx ");
sl@0: 	asm("push edi ");
sl@0: 	asm("mov edi, [esp+12] ");
sl@0: 	asm("mov ebx, [esp+16] ");
sl@0: 	asm("mov ecx, [esp+20] ");
sl@0: 	asm("mov eax, [edi] ");
sl@0: 	asm("mov edx, [edi+4] ");
sl@0: 	asm("1: ");
sl@0: 	asm(__LOCK__ "cmpxchg8b [edi] ");
sl@0: 	asm("jne short 1b ");
sl@0: 	asm("pop edi ");
sl@0: 	asm("pop ebx ");
sl@0: 	asm("ret ");
sl@0: 	}
sl@0: 
sl@0: 
sl@0: /** 64 bit compare and swap, relaxed ordering.
sl@0: 
sl@0: 	Atomically performs the following operation:
sl@0: 		if (*a == *q)	{ *a = v; return TRUE; }
sl@0: 		else			{ *q = *a; return FALSE; }
sl@0: 
sl@0: 	@param	a	Address of word to be written - must be a multiple of 8
sl@0: 	@param	q	Address of location containing expected value
sl@0: 	@param	v	The new value to be written if the old value is as expected
sl@0: 	@return		TRUE if *a was updated, FALSE otherwise
sl@0: */
sl@0: EXPORT_C __NAKED__ TBool		__e32_atomic_cas_rlx64(volatile TAny* /*a*/, TUint64* /*q*/, TUint64 /*v*/)
sl@0: 	{
sl@0: 	asm("jmp ___e32_atomic_cas_ord64 ");
sl@0: 	}
sl@0: 
sl@0: 
sl@0: /** 64 bit compare and swap, acquire semantics.
sl@0: 
sl@0: 	Atomically performs the following operation:
sl@0: 		if (*a == *q)	{ *a = v; return TRUE; }
sl@0: 		else			{ *q = *a; return FALSE; }
sl@0: 
sl@0: 	@param	a	Address of word to be written - must be a multiple of 8
sl@0: 	@param	q	Address of location containing expected value
sl@0: 	@param	v	The new value to be written if the old value is as expected
sl@0: 	@return		TRUE if *a was updated, FALSE otherwise
sl@0: */
sl@0: EXPORT_C __NAKED__ TBool		__e32_atomic_cas_acq64(volatile TAny* /*a*/, TUint64* /*q*/, TUint64 /*v*/)
sl@0: 	{
sl@0: 	asm("jmp ___e32_atomic_cas_ord64 ");
sl@0: 	}
sl@0: 
sl@0: 
sl@0: /** 64 bit compare and swap, release semantics.
sl@0: 
sl@0: 	Atomically performs the following operation:
sl@0: 		if (*a == *q)	{ *a = v; return TRUE; }
sl@0: 		else			{ *q = *a; return FALSE; }
sl@0: 
sl@0: 	@param	a	Address of word to be written - must be a multiple of 8
sl@0: 	@param	q	Address of location containing expected value
sl@0: 	@param	v	The new value to be written if the old value is as expected
sl@0: 	@return		TRUE if *a was updated, FALSE otherwise
sl@0: */
sl@0: EXPORT_C __NAKED__ TBool		__e32_atomic_cas_rel64(volatile TAny* /*a*/, TUint64* /*q*/, TUint64 /*v*/)
sl@0: 	{
sl@0: 	asm("jmp ___e32_atomic_cas_ord64 ");
sl@0: 	}
sl@0: 
sl@0: 
sl@0: /** 64 bit compare and swap, full barrier semantics.
sl@0: 
sl@0: 	Atomically performs the following operation:
sl@0: 		if (*a == *q)	{ *a = v; return TRUE; }
sl@0: 		else			{ *q = *a; return FALSE; }
sl@0: 
sl@0: 	@param	a	Address of word to be written - must be a multiple of 8
sl@0: 	@param	q	Address of location containing expected value
sl@0: 	@param	v	The new value to be written if the old value is as expected
sl@0: 	@return		TRUE if *a was updated, FALSE otherwise
sl@0: */
sl@0: EXPORT_C __NAKED__ TBool		__e32_atomic_cas_ord64(volatile TAny* /*a*/, TUint64* /*q*/, TUint64 /*v*/)
sl@0: 	{
sl@0: 	asm("push ebx ");
sl@0: 	asm("push edi ");
sl@0: 	asm("push esi ");
sl@0: 	asm("mov edi, [esp+16] ");			// edi = a
sl@0: 	asm("mov esi, [esp+20] ");			// esi = q
sl@0: 	asm("mov ebx, [esp+24] ");			// ecx:ebx = v
sl@0: 	asm("mov ecx, [esp+28] ");
sl@0: 	asm("mov eax, [esi] ");				// edx:eax = *q
sl@0: 	asm("mov edx, [esi+4] ");
sl@0: 	asm(__LOCK__ "cmpxchg8b [edi] ");	// if (*a==*q) *a=v, ZF=1 else edx:eax=*a, ZF=0
sl@0: 	asm("jne short 2f ");
sl@0: 	asm("mov eax, 1 ");
sl@0: 	asm("pop esi ");
sl@0: 	asm("pop edi ");
sl@0: 	asm("pop ebx ");
sl@0: 	asm("ret ");
sl@0: 	asm("2: ");
sl@0: 	asm("mov [esi], eax ");				// *q = edx:eax
sl@0: 	asm("mov [esi+4], edx ");
sl@0: 	asm("xor eax, eax ");
sl@0: 	asm("pop esi ");
sl@0: 	asm("pop edi ");
sl@0: 	asm("pop ebx ");
sl@0: 	asm("ret ");
sl@0: 	}
sl@0: 
sl@0: 
sl@0: /** 64 bit atomic add, relaxed ordering.
sl@0: 
sl@0: 	Atomically performs the following operation:
sl@0: 		oldv = *a; *a = oldv + v; return oldv;
sl@0: 
sl@0: 	@param	a	Address of word to be updated - must be a multiple of 8
sl@0: 	@param	v	The value to be added
sl@0: 	@return		The original value of *a
sl@0: */
sl@0: EXPORT_C __NAKED__ TUint64	__e32_atomic_add_rlx64(volatile TAny* /*a*/, TUint64 /*v*/)
sl@0: 	{
sl@0: 	asm("jmp ___e32_atomic_add_ord64 ");
sl@0: 	}
sl@0: 
sl@0: 
sl@0: /** 64 bit atomic add, acquire semantics.
sl@0: 
sl@0: 	Atomically performs the following operation:
sl@0: 		oldv = *a; *a = oldv + v; return oldv;
sl@0: 
sl@0: 	@param	a	Address of word to be updated - must be a multiple of 8
sl@0: 	@param	v	The value to be added
sl@0: 	@return		The original value of *a
sl@0: */
sl@0: EXPORT_C __NAKED__ TUint64	__e32_atomic_add_acq64(volatile TAny* /*a*/, TUint64 /*v*/)
sl@0: 	{
sl@0: 	asm("jmp ___e32_atomic_add_ord64 ");
sl@0: 	}
sl@0: 
sl@0: 
sl@0: /** 64 bit atomic add, release semantics.
sl@0: 
sl@0: 	Atomically performs the following operation:
sl@0: 		oldv = *a; *a = oldv + v; return oldv;
sl@0: 
sl@0: 	@param	a	Address of word to be updated - must be a multiple of 8
sl@0: 	@param	v	The value to be added
sl@0: 	@return		The original value of *a
sl@0: */
sl@0: EXPORT_C __NAKED__ TUint64	__e32_atomic_add_rel64(volatile TAny* /*a*/, TUint64 /*v*/)
sl@0: 	{
sl@0: 	asm("jmp ___e32_atomic_add_ord64 ");
sl@0: 	}
sl@0: 
sl@0: 
sl@0: /** 64 bit atomic add, full barrier semantics.
sl@0: 
sl@0: 	Atomically performs the following operation:
sl@0: 		oldv = *a; *a = oldv + v; return oldv;
sl@0: 
sl@0: 	@param	a	Address of word to be updated - must be a multiple of 8
sl@0: 	@param	v	The value to be added
sl@0: 	@return		The original value of *a
sl@0: */
sl@0: EXPORT_C __NAKED__ TUint64	__e32_atomic_add_ord64(volatile TAny* /*a*/, TUint64 /*v*/)
sl@0: 	{
sl@0: 	asm("push ebx ");
sl@0: 	asm("push edi ");
sl@0: 	asm("mov edi, [esp+12] ");			// edi = a
sl@0: 	asm("mov eax, [edi] ");				// edx:eax = oldv
sl@0: 	asm("mov edx, [edi+4] ");
sl@0: 	asm("1: ");
sl@0: 	asm("mov ebx, eax ");
sl@0: 	asm("mov ecx, edx ");
sl@0: 	asm("add ebx, [esp+16] ");			// ecx:ebx = oldv + v
sl@0: 	asm("adc ecx, [esp+20] ");
sl@0: 	asm(__LOCK__ "cmpxchg8b [edi] ");	// if (*a==oldv) *a=oldv+v, ZF=1 else edx:eax=*a, ZF=0
sl@0: 	asm("jne short 1b ");
sl@0: 	asm("pop edi ");
sl@0: 	asm("pop ebx ");
sl@0: 	asm("ret ");
sl@0: 	}
sl@0: 
sl@0: 
sl@0: /** 64 bit atomic bitwise logical AND, relaxed ordering.
sl@0: 
sl@0: 	Atomically performs the following operation:
sl@0: 		oldv = *a; *a = oldv & v; return oldv;
sl@0: 
sl@0: 	@param	a	Address of word to be updated - must be a multiple of 8
sl@0: 	@param	v	The value to be ANDed with *a
sl@0: 	@return		The original value of *a
sl@0: */
sl@0: EXPORT_C __NAKED__ TUint64	__e32_atomic_and_rlx64(volatile TAny* /*a*/, TUint64 /*v*/)
sl@0: 	{
sl@0: 	asm("jmp ___e32_atomic_and_ord64 ");
sl@0: 	}
sl@0: 
sl@0: 
sl@0: /** 64 bit atomic bitwise logical AND, acquire semantics.
sl@0: 
sl@0: 	Atomically performs the following operation:
sl@0: 		oldv = *a; *a = oldv & v; return oldv;
sl@0: 
sl@0: 	@param	a	Address of word to be updated - must be a multiple of 8
sl@0: 	@param	v	The value to be ANDed with *a
sl@0: 	@return		The original value of *a
sl@0: */
sl@0: EXPORT_C __NAKED__ TUint64	__e32_atomic_and_acq64(volatile TAny* /*a*/, TUint64 /*v*/)
sl@0: 	{
sl@0: 	asm("jmp ___e32_atomic_and_ord64 ");
sl@0: 	}
sl@0: 
sl@0: 
sl@0: /** 64 bit atomic bitwise logical AND, release semantics.
sl@0: 
sl@0: 	Atomically performs the following operation:
sl@0: 		oldv = *a; *a = oldv & v; return oldv;
sl@0: 
sl@0: 	@param	a	Address of word to be updated - must be a multiple of 8
sl@0: 	@param	v	The value to be ANDed with *a
sl@0: 	@return		The original value of *a
sl@0: */
sl@0: EXPORT_C __NAKED__ TUint64	__e32_atomic_and_rel64(volatile TAny* /*a*/, TUint64 /*v*/)
sl@0: 	{
sl@0: 	asm("jmp ___e32_atomic_and_ord64 ");
sl@0: 	}
sl@0: 
sl@0: 
sl@0: /** 64 bit atomic bitwise logical AND, full barrier semantics.
sl@0: 
sl@0: 	Atomically performs the following operation:
sl@0: 		oldv = *a; *a = oldv & v; return oldv;
sl@0: 
sl@0: 	@param	a	Address of word to be updated - must be a multiple of 8
sl@0: 	@param	v	The value to be ANDed with *a
sl@0: 	@return		The original value of *a
sl@0: */
sl@0: EXPORT_C __NAKED__ TUint64	__e32_atomic_and_ord64(volatile TAny* /*a*/, TUint64 /*v*/)
sl@0: 	{
sl@0: 	asm("push ebx ");
sl@0: 	asm("push edi ");
sl@0: 	asm("mov edi, [esp+12] ");			// edi = a
sl@0: 	asm("mov eax, [edi] ");				// edx:eax = oldv
sl@0: 	asm("mov edx, [edi+4] ");
sl@0: 	asm("1: ");
sl@0: 	asm("mov ebx, eax ");
sl@0: 	asm("mov ecx, edx ");
sl@0: 	asm("and ebx, [esp+16] ");			// ecx:ebx = oldv & v
sl@0: 	asm("and ecx, [esp+20] ");
sl@0: 	asm(__LOCK__ "cmpxchg8b [edi] ");	// if (*a==oldv) *a=oldv&v, ZF=1 else edx:eax=*a, ZF=0
sl@0: 	asm("jne short 1b ");
sl@0: 	asm("pop edi ");
sl@0: 	asm("pop ebx ");
sl@0: 	asm("ret ");
sl@0: 	}
sl@0: 
sl@0: 
sl@0: /** 64 bit atomic bitwise logical inclusive OR, relaxed ordering.
sl@0: 
sl@0: 	Atomically performs the following operation:
sl@0: 		oldv = *a; *a = oldv | v; return oldv;
sl@0: 
sl@0: 	@param	a	Address of word to be updated - must be a multiple of 8
sl@0: 	@param	v	The value to be ORed with *a
sl@0: 	@return		The original value of *a
sl@0: */
sl@0: EXPORT_C __NAKED__ TUint64	__e32_atomic_ior_rlx64(volatile TAny* /*a*/, TUint64 /*v*/)
sl@0: 	{
sl@0: 	asm("jmp ___e32_atomic_ior_ord64 ");
sl@0: 	}
sl@0: 
sl@0: 
sl@0: /** 64 bit atomic bitwise logical inclusive OR, acquire semantics.
sl@0: 
sl@0: 	Atomically performs the following operation:
sl@0: 		oldv = *a; *a = oldv | v; return oldv;
sl@0: 
sl@0: 	@param	a	Address of word to be updated - must be a multiple of 8
sl@0: 	@param	v	The value to be ORed with *a
sl@0: 	@return		The original value of *a
sl@0: */
sl@0: EXPORT_C __NAKED__ TUint64	__e32_atomic_ior_acq64(volatile TAny* /*a*/, TUint64 /*v*/)
sl@0: 	{
sl@0: 	asm("jmp ___e32_atomic_ior_ord64 ");
sl@0: 	}
sl@0: 
sl@0: 
sl@0: /** 64 bit atomic bitwise logical inclusive OR, release semantics.
sl@0: 
sl@0: 	Atomically performs the following operation:
sl@0: 		oldv = *a; *a = oldv | v; return oldv;
sl@0: 
sl@0: 	@param	a	Address of word to be updated - must be a multiple of 8
sl@0: 	@param	v	The value to be ORed with *a
sl@0: 	@return		The original value of *a
sl@0: */
sl@0: EXPORT_C __NAKED__ TUint64	__e32_atomic_ior_rel64(volatile TAny* /*a*/, TUint64 /*v*/)
sl@0: 	{
sl@0: 	asm("jmp ___e32_atomic_ior_ord64 ");
sl@0: 	}
sl@0: 
sl@0: 
sl@0: /** 64 bit atomic bitwise logical inclusive OR, full barrier semantics.
sl@0: 
sl@0: 	Atomically performs the following operation:
sl@0: 		oldv = *a; *a = oldv | v; return oldv;
sl@0: 
sl@0: 	@param	a	Address of word to be updated - must be a multiple of 8
sl@0: 	@param	v	The value to be ORed with *a
sl@0: 	@return		The original value of *a
sl@0: */
sl@0: EXPORT_C __NAKED__ TUint64	__e32_atomic_ior_ord64(volatile TAny* /*a*/, TUint64 /*v*/)
sl@0: 	{
sl@0: 	asm("push ebx ");
sl@0: 	asm("push edi ");
sl@0: 	asm("mov edi, [esp+12] ");			// edi = a
sl@0: 	asm("mov eax, [edi] ");				// edx:eax = oldv
sl@0: 	asm("mov edx, [edi+4] ");
sl@0: 	asm("1: ");
sl@0: 	asm("mov ebx, eax ");
sl@0: 	asm("mov ecx, edx ");
sl@0: 	asm("or ebx, [esp+16] ");			// ecx:ebx = oldv | v
sl@0: 	asm("or ecx, [esp+20] ");
sl@0: 	asm(__LOCK__ "cmpxchg8b [edi] ");	// if (*a==oldv) *a=oldv|v, ZF=1 else edx:eax=*a, ZF=0
sl@0: 	asm("jne short 1b ");
sl@0: 	asm("pop edi ");
sl@0: 	asm("pop ebx ");
sl@0: 	asm("ret ");
sl@0: 	}
sl@0: 
sl@0: 
sl@0: /** 64 bit atomic bitwise logical exclusive OR, relaxed ordering.
sl@0: 
sl@0: 	Atomically performs the following operation:
sl@0: 		oldv = *a; *a = oldv ^ v; return oldv;
sl@0: 
sl@0: 	@param	a	Address of word to be updated - must be a multiple of 8
sl@0: 	@param	v	The value to be XORed with *a
sl@0: 	@return		The original value of *a
sl@0: */
sl@0: EXPORT_C __NAKED__ TUint64	__e32_atomic_xor_rlx64(volatile TAny* /*a*/, TUint64 /*v*/)
sl@0: 	{
sl@0: 	asm("jmp ___e32_atomic_xor_ord64 ");
sl@0: 	}
sl@0: 
sl@0: 
sl@0: /** 64 bit atomic bitwise logical exclusive OR, acquire semantics.
sl@0: 
sl@0: 	Atomically performs the following operation:
sl@0: 		oldv = *a; *a = oldv ^ v; return oldv;
sl@0: 
sl@0: 	@param	a	Address of word to be updated - must be a multiple of 8
sl@0: 	@param	v	The value to be XORed with *a
sl@0: 	@return		The original value of *a
sl@0: */
sl@0: EXPORT_C __NAKED__ TUint64	__e32_atomic_xor_acq64(volatile TAny* /*a*/, TUint64 /*v*/)
sl@0: 	{
sl@0: 	asm("jmp ___e32_atomic_xor_ord64 ");
sl@0: 	}
sl@0: 
sl@0: 
sl@0: /** 64 bit atomic bitwise logical exclusive OR, release semantics.
sl@0: 
sl@0: 	Atomically performs the following operation:
sl@0: 		oldv = *a; *a = oldv ^ v; return oldv;
sl@0: 
sl@0: 	@param	a	Address of word to be updated - must be a multiple of 8
sl@0: 	@param	v	The value to be XORed with *a
sl@0: 	@return		The original value of *a
sl@0: */
sl@0: EXPORT_C __NAKED__ TUint64	__e32_atomic_xor_rel64(volatile TAny* /*a*/, TUint64 /*v*/)
sl@0: 	{
sl@0: 	asm("jmp ___e32_atomic_xor_ord64 ");
sl@0: 	}
sl@0: 
sl@0: 
sl@0: /** 64 bit atomic bitwise logical exclusive OR, full barrier semantics.
sl@0: 
sl@0: 	Atomically performs the following operation:
sl@0: 		oldv = *a; *a = oldv ^ v; return oldv;
sl@0: 
sl@0: 	@param	a	Address of word to be updated - must be a multiple of 8
sl@0: 	@param	v	The value to be XORed with *a
sl@0: 	@return		The original value of *a
sl@0: */
sl@0: EXPORT_C __NAKED__ TUint64	__e32_atomic_xor_ord64(volatile TAny* /*a*/, TUint64 /*v*/)
sl@0: 	{
sl@0: 	asm("push ebx ");
sl@0: 	asm("push edi ");
sl@0: 	asm("mov edi, [esp+12] ");			// edi = a
sl@0: 	asm("mov eax, [edi] ");				// edx:eax = oldv
sl@0: 	asm("mov edx, [edi+4] ");
sl@0: 	asm("1: ");
sl@0: 	asm("mov ebx, eax ");
sl@0: 	asm("mov ecx, edx ");
sl@0: 	asm("xor ebx, [esp+16] ");			// ecx:ebx = oldv ^ v
sl@0: 	asm("xor ecx, [esp+20] ");
sl@0: 	asm(__LOCK__ "cmpxchg8b [edi] ");	// if (*a==oldv) *a=oldv^v, ZF=1 else edx:eax=*a, ZF=0
sl@0: 	asm("jne short 1b ");
sl@0: 	asm("pop edi ");
sl@0: 	asm("pop ebx ");
sl@0: 	asm("ret ");
sl@0: 	}
sl@0: 
sl@0: 
sl@0: /** 64 bit atomic bitwise universal function, relaxed ordering.
sl@0: 
sl@0: 	Atomically performs the following operation:
sl@0: 		oldv = *a; *a = (oldv & u) ^ v; return oldv;
sl@0: 
sl@0: 	@param	a	Address of word to be updated - must be a multiple of 8
sl@0: 	@param	u	The value to be ANDed with *a
sl@0: 	@param	v	The value to be XORed with (*a&u)
sl@0: 	@return		The original value of *a
sl@0: */
sl@0: EXPORT_C __NAKED__ TUint64	__e32_atomic_axo_rlx64(volatile TAny* /*a*/, TUint64 /*u*/, TUint64 /*v*/)
sl@0: 	{
sl@0: 	asm("jmp ___e32_atomic_axo_ord64 ");
sl@0: 	}
sl@0: 
sl@0: 
sl@0: /** 64 bit atomic bitwise universal function, acquire semantics.
sl@0: 
sl@0: 	Atomically performs the following operation:
sl@0: 		oldv = *a; *a = (oldv & u) ^ v; return oldv;
sl@0: 
sl@0: 	@param	a	Address of word to be updated - must be a multiple of 8
sl@0: 	@param	u	The value to be ANDed with *a
sl@0: 	@param	v	The value to be XORed with (*a&u)
sl@0: 	@return		The original value of *a
sl@0: */
sl@0: EXPORT_C __NAKED__ TUint64	__e32_atomic_axo_acq64(volatile TAny* /*a*/, TUint64 /*u*/, TUint64 /*v*/)
sl@0: 	{
sl@0: 	asm("jmp ___e32_atomic_axo_ord64 ");
sl@0: 	}
sl@0: 
sl@0: 
sl@0: /** 64 bit atomic bitwise universal function, release semantics.
sl@0: 
sl@0: 	Atomically performs the following operation:
sl@0: 		oldv = *a; *a = (oldv & u) ^ v; return oldv;
sl@0: 
sl@0: 	@param	a	Address of word to be updated - must be a multiple of 8
sl@0: 	@param	u	The value to be ANDed with *a
sl@0: 	@param	v	The value to be XORed with (*a&u)
sl@0: 	@return		The original value of *a
sl@0: */
sl@0: EXPORT_C __NAKED__ TUint64	__e32_atomic_axo_rel64(volatile TAny* /*a*/, TUint64 /*u*/, TUint64 /*v*/)
sl@0: 	{
sl@0: 	asm("jmp ___e32_atomic_axo_ord64 ");
sl@0: 	}
sl@0: 
sl@0: 
sl@0: /** 64 bit atomic bitwise universal function, release semantics.
sl@0: 
sl@0: 	Atomically performs the following operation:
sl@0: 		oldv = *a; *a = (oldv & u) ^ v; return oldv;
sl@0: 
sl@0: 	@param	a	Address of word to be updated - must be a multiple of 8
sl@0: 	@param	u	The value to be ANDed with *a
sl@0: 	@param	v	The value to be XORed with (*a&u)
sl@0: 	@return		The original value of *a
sl@0: */
sl@0: EXPORT_C __NAKED__ TUint64	__e32_atomic_axo_ord64(volatile TAny* /*a*/, TUint64 /*u*/, TUint64 /*v*/)
sl@0: 	{
sl@0: 	asm("push ebx ");
sl@0: 	asm("push edi ");
sl@0: 	asm("mov edi, [esp+12] ");			// edi = a
sl@0: 	asm("mov eax, [edi] ");				// edx:eax = oldv
sl@0: 	asm("mov edx, [edi+4] ");
sl@0: 	asm("1: ");
sl@0: 	asm("mov ebx, eax ");
sl@0: 	asm("mov ecx, edx ");
sl@0: 	asm("and ebx, [esp+16] ");			// ecx:ebx = oldv & u
sl@0: 	asm("and ecx, [esp+20] ");
sl@0: 	asm("xor ebx, [esp+24] ");			// ecx:ebx = (oldv & u) ^ v
sl@0: 	asm("xor ecx, [esp+28] ");
sl@0: 	asm(__LOCK__ "cmpxchg8b [edi] ");	// if (*a==oldv) *a=(oldv&u)^v, ZF=1 else edx:eax=*a, ZF=0
sl@0: 	asm("jne short 1b ");
sl@0: 	asm("pop edi ");
sl@0: 	asm("pop ebx ");
sl@0: 	asm("ret ");
sl@0: 	}
sl@0: 
sl@0: 
sl@0: /** 64 bit threshold and add, unsigned, relaxed ordering.
sl@0: 
sl@0: 	Atomically performs the following operation:
sl@0: 		oldv = *a; if (oldv>=t) *a=oldv+u else *a=oldv+v; return oldv;
sl@0: 
sl@0: 	@param	a	Address of data to be updated - must be naturally aligned
sl@0: 	@param	t	The threshold to compare *a to (unsigned compare)
sl@0: 	@param	u	The value to be added to *a if it is originally >= t
sl@0: 	@param	u	The value to be added to *a if it is originally < t
sl@0: 	@return		The original value of *a
sl@0: */
sl@0: EXPORT_C __NAKED__ TUint64	__e32_atomic_tau_rlx64(volatile TAny* /*a*/, TUint64 /*t*/, TUint64 /*u*/, TUint64 /*v*/)
sl@0: 	{
sl@0: 	asm("jmp ___e32_atomic_tau_ord64 ");
sl@0: 	}
sl@0: 
sl@0: 
sl@0: /** 64 bit threshold and add, unsigned, acquire semantics.
sl@0: 
sl@0: 	Atomically performs the following operation:
sl@0: 		oldv = *a; if (oldv>=t) *a=oldv+u else *a=oldv+v; return oldv;
sl@0: 
sl@0: 	@param	a	Address of data to be updated - must be naturally aligned
sl@0: 	@param	t	The threshold to compare *a to (unsigned compare)
sl@0: 	@param	u	The value to be added to *a if it is originally >= t
sl@0: 	@param	u	The value to be added to *a if it is originally < t
sl@0: 	@return		The original value of *a
sl@0: */
sl@0: EXPORT_C __NAKED__ TUint64	__e32_atomic_tau_acq64(volatile TAny* /*a*/, TUint64 /*t*/, TUint64 /*u*/, TUint64 /*v*/)
sl@0: 	{
sl@0: 	asm("jmp ___e32_atomic_tau_ord64 ");
sl@0: 	}
sl@0: 
sl@0: 
sl@0: /** 64 bit threshold and add, unsigned, release semantics.
sl@0: 
sl@0: 	Atomically performs the following operation:
sl@0: 		oldv = *a; if (oldv>=t) *a=oldv+u else *a=oldv+v; return oldv;
sl@0: 
sl@0: 	@param	a	Address of data to be updated - must be naturally aligned
sl@0: 	@param	t	The threshold to compare *a to (unsigned compare)
sl@0: 	@param	u	The value to be added to *a if it is originally >= t
sl@0: 	@param	u	The value to be added to *a if it is originally < t
sl@0: 	@return		The original value of *a
sl@0: */
sl@0: EXPORT_C __NAKED__ TUint64	__e32_atomic_tau_rel64(volatile TAny* /*a*/, TUint64 /*t*/, TUint64 /*u*/, TUint64 /*v*/)
sl@0: 	{
sl@0: 	asm("jmp ___e32_atomic_tau_ord64 ");
sl@0: 	}
sl@0: 
sl@0: 
sl@0: /** 64 bit threshold and add, unsigned, full barrier semantics.
sl@0: 
sl@0: 	Atomically performs the following operation:
sl@0: 		oldv = *a; if (oldv>=t) *a=oldv+u else *a=oldv+v; return oldv;
sl@0: 
sl@0: 	@param	a	Address of data to be updated - must be naturally aligned
sl@0: 	@param	t	The threshold to compare *a to (unsigned compare)
sl@0: 	@param	u	The value to be added to *a if it is originally >= t
sl@0: 	@param	u	The value to be added to *a if it is originally < t
sl@0: 	@return		The original value of *a
sl@0: */
sl@0: EXPORT_C __NAKED__ TUint64	__e32_atomic_tau_ord64(volatile TAny* /*a*/, TUint64 /*t*/, TUint64 /*u*/, TUint64 /*v*/)
sl@0: 	{
sl@0: 	asm("push ebx ");
sl@0: 	asm("push edi ");
sl@0: 	asm("mov edi, [esp+12] ");			// edi = a
sl@0: 	asm("mov eax, [edi] ");				// edx:eax = oldv
sl@0: 	asm("mov edx, [edi+4] ");
sl@0: 	asm("1: ");
sl@0: 	asm("mov ebx, edx ");
sl@0: 	asm("cmp eax, [esp+16] ");			// eax - t.low, CF=borrow
sl@0: 	asm("sbb ebx, [esp+20] ");			// CF = borrow from (oldv - t)
sl@0: 	asm("jnc short 2f ");				// no borrow means oldv>=t so use u
sl@0: 	asm("mov ebx, [esp+32] ");			// ecx:ebx = v
sl@0: 	asm("mov ecx, [esp+36] ");
sl@0: 	asm("jmp short 3f ");
sl@0: 	asm("2: ");
sl@0: 	asm("mov ebx, [esp+24] ");			// ecx:ebx = u
sl@0: 	asm("mov ecx, [esp+28] ");
sl@0: 	asm("3: ");
sl@0: 	asm("add ebx, eax ");				// ecx:ebx = oldv + u or v
sl@0: 	asm("adc ecx, edx ");
sl@0: 	asm(__LOCK__ "cmpxchg8b [edi] ");
sl@0: 	asm("jne short 1b ");
sl@0: 	asm("pop edi ");
sl@0: 	asm("pop ebx ");
sl@0: 	asm("ret ");
sl@0: 	}
sl@0: 
sl@0: 
sl@0: /** 64 bit threshold and add, signed, relaxed ordering.
sl@0: 
sl@0: 	Atomically performs the following operation:
sl@0: 		oldv = *a; if (oldv>=t) *a=oldv+u else *a=oldv+v; return oldv;
sl@0: 
sl@0: 	@param	a	Address of data to be updated - must be naturally aligned
sl@0: 	@param	t	The threshold to compare *a to (signed compare)
sl@0: 	@param	u	The value to be added to *a if it is originally >= t
sl@0: 	@param	u	The value to be added to *a if it is originally < t
sl@0: 	@return		The original value of *a
sl@0: */
sl@0: EXPORT_C __NAKED__ TInt64	__e32_atomic_tas_rlx64(volatile TAny* /*a*/, TInt64 /*t*/, TInt64 /*u*/, TInt64 /*v*/)
sl@0: 	{
sl@0: 	asm("jmp ___e32_atomic_tas_ord64 ");
sl@0: 	}
sl@0: 
sl@0: 
sl@0: /** 64 bit threshold and add, signed, acquire semantics.
sl@0: 
sl@0: 	Atomically performs the following operation:
sl@0: 		oldv = *a; if (oldv>=t) *a=oldv+u else *a=oldv+v; return oldv;
sl@0: 
sl@0: 	@param	a	Address of data to be updated - must be naturally aligned
sl@0: 	@param	t	The threshold to compare *a to (signed compare)
sl@0: 	@param	u	The value to be added to *a if it is originally >= t
sl@0: 	@param	u	The value to be added to *a if it is originally < t
sl@0: 	@return		The original value of *a
sl@0: */
sl@0: EXPORT_C __NAKED__ TInt64	__e32_atomic_tas_acq64(volatile TAny* /*a*/, TInt64 /*t*/, TInt64 /*u*/, TInt64 /*v*/)
sl@0: 	{
sl@0: 	asm("jmp ___e32_atomic_tas_ord64 ");
sl@0: 	}
sl@0: 
sl@0: 
sl@0: /** 64 bit threshold and add, signed, release semantics.
sl@0: 
sl@0: 	Atomically performs the following operation:
sl@0: 		oldv = *a; if (oldv>=t) *a=oldv+u else *a=oldv+v; return oldv;
sl@0: 
sl@0: 	@param	a	Address of data to be updated - must be naturally aligned
sl@0: 	@param	t	The threshold to compare *a to (signed compare)
sl@0: 	@param	u	The value to be added to *a if it is originally >= t
sl@0: 	@param	u	The value to be added to *a if it is originally < t
sl@0: 	@return		The original value of *a
sl@0: */
sl@0: EXPORT_C __NAKED__ TInt64	__e32_atomic_tas_rel64(volatile TAny* /*a*/, TInt64 /*t*/, TInt64 /*u*/, TInt64 /*v*/)
sl@0: 	{
sl@0: 	asm("jmp ___e32_atomic_tas_ord64 ");
sl@0: 	}
sl@0: 
sl@0: 
sl@0: /** 64 bit threshold and add, signed, full barrier semantics.
sl@0: 
sl@0: 	Atomically performs the following operation:
sl@0: 		oldv = *a; if (oldv>=t) *a=oldv+u else *a=oldv+v; return oldv;
sl@0: 
sl@0: 	@param	a	Address of data to be updated - must be naturally aligned
sl@0: 	@param	t	The threshold to compare *a to (signed compare)
sl@0: 	@param	u	The value to be added to *a if it is originally >= t
sl@0: 	@param	u	The value to be added to *a if it is originally < t
sl@0: 	@return		The original value of *a
sl@0: */
sl@0: EXPORT_C __NAKED__ TInt64	__e32_atomic_tas_ord64(volatile TAny* /*a*/, TInt64 /*t*/, TInt64 /*u*/, TInt64 /*v*/)
sl@0: 	{
sl@0: 	asm("push ebx ");
sl@0: 	asm("push edi ");
sl@0: 	asm("mov edi, [esp+12] ");			// edi = a
sl@0: 	asm("mov eax, [edi] ");				// edx:eax = oldv
sl@0: 	asm("mov edx, [edi+4] ");
sl@0: 	asm("1: ");
sl@0: 	asm("mov ebx, edx ");
sl@0: 	asm("cmp eax, [esp+16] ");			// eax - t.low, CF=borrow
sl@0: 	asm("sbb ebx, [esp+20] ");			// SF=sign, OF=overflow from (oldv - t)
sl@0: 	asm("jge short 2f ");				// SF==OF (GE condition) means oldv>=t so use u
sl@0: 	asm("mov ebx, [esp+32] ");			// ecx:ebx = v
sl@0: 	asm("mov ecx, [esp+36] ");
sl@0: 	asm("jmp short 3f ");
sl@0: 	asm("2: ");
sl@0: 	asm("mov ebx, [esp+24] ");			// ecx:ebx = u
sl@0: 	asm("mov ecx, [esp+28] ");
sl@0: 	asm("3: ");
sl@0: 	asm("add ebx, eax ");				// ecx:ebx = oldv + u or v
sl@0: 	asm("adc ecx, edx ");
sl@0: 	asm(__LOCK__ "cmpxchg8b [edi] ");
sl@0: 	asm("jne short 1b ");
sl@0: 	asm("pop edi ");
sl@0: 	asm("pop ebx ");
sl@0: 	asm("ret ");
sl@0: 	}
sl@0: 
sl@0: } // extern "C"