Symaptic: os/kernelhwsrv/kernel/eka/common/x86/atomics.cia@bde4ae8d615e (annotated)

sl@0	1	// Copyright (c) 2008-2009 Nokia Corporation and/or its subsidiary(-ies).
sl@0	2	// All rights reserved.
sl@0	3	// This component and the accompanying materials are made available
sl@0	4	// under the terms of the License "Eclipse Public License v1.0"
sl@0	5	// which accompanies this distribution, and is available
sl@0	6	// at the URL "http://www.eclipse.org/legal/epl-v10.html".
sl@0	7	//
sl@0	8	// Initial Contributors:
sl@0	9	// Nokia Corporation - initial contribution.
sl@0	10	//
sl@0	11	// Contributors:
sl@0	12	//
sl@0	13	// Description:
sl@0	14	// e32\common\x86\atomics.cia
sl@0	15	//
sl@0	16	//
sl@0	17
sl@0	18	#include <e32atomics.h>
sl@0	19	#include <cpudefs.h>
sl@0	20
sl@0	21	/*
sl@0	22	Versions needed:
sl@0	23	WINS/WINSCW Use X86 locked operations. Assume Pentium or above CPU (CMPXCHG8B available)
sl@0	24	X86 For Pentium and above use locked operations
sl@0	25	For 486 use locked operations for 8, 16, 32 bit. For 64 bit must disable interrupts.
sl@0	26	NOTE: 486 not supported at the moment
sl@0	27	ARMv4/ARMv5 Must disable interrupts.
sl@0	28	ARMv6 LDREX/STREX for 8, 16, 32 bit. For 64 bit must disable interrupts (maybe).
sl@0	29	ARMv6K/ARMv7 LDREXB/LDREXH/LDREX/LDREXD
sl@0	30
sl@0	31	Need both kernel side and user side versions
sl@0	32	*/
sl@0	33
sl@0	34	#if defined(__SMP__) \|\| !defined(__EPOC32__)
sl@0	35	#define __BARRIERS_NEEDED__
sl@0	36	#define __LOCK__ "lock "
sl@0	37	#else
sl@0	38	#define __LOCK__
sl@0	39	#endif
sl@0	40
sl@0	41
sl@0	42	extern "C" {
sl@0	43
sl@0	44	#undef __TUintX__
sl@0	45	#undef __TIntX__
sl@0	46	#undef __fname__
sl@0	47	#undef __redir__
sl@0	48	#undef __A_REG__
sl@0	49	#undef __C_REG__
sl@0	50	#undef __D_REG__
sl@0	51	#define __TUintX__ TUint32
sl@0	52	#define __TIntX__ TInt32
sl@0	53	#define __fname__(x) x##32
sl@0	54	#define __redir__(x) asm("jmp _"#x "32")
sl@0	55	#define __A_REG__ "eax"
sl@0	56	#define __C_REG__ "ecx"
sl@0	57	#define __D_REG__ "edx"
sl@0	58	#include "atomic_skeleton.h"
sl@0	59
sl@0	60	#undef __TUintX__
sl@0	61	#undef __TIntX__
sl@0	62	#undef __fname__
sl@0	63	#undef __redir__
sl@0	64	#undef __A_REG__
sl@0	65	#undef __C_REG__
sl@0	66	#undef __D_REG__
sl@0	67	#define __TUintX__ TUint16
sl@0	68	#define __TIntX__ TInt16
sl@0	69	#define __fname__(x) x##16
sl@0	70	#define __redir__(x) asm("jmp _"#x "16")
sl@0	71	#define __A_REG__ "ax"
sl@0	72	#define __C_REG__ "cx"
sl@0	73	#define __D_REG__ "dx"
sl@0	74	#include "atomic_skeleton.h"
sl@0	75
sl@0	76	#undef __TUintX__
sl@0	77	#undef __TIntX__
sl@0	78	#undef __fname__
sl@0	79	#undef __redir__
sl@0	80	#undef __A_REG__
sl@0	81	#undef __C_REG__
sl@0	82	#undef __D_REG__
sl@0	83	#define __TUintX__ TUint8
sl@0	84	#define __TIntX__ TInt8
sl@0	85	#define __fname__(x) x##8
sl@0	86	#define __redir__(x) asm("jmp _"#x "8")
sl@0	87	#define __A_REG__ "al"
sl@0	88	#define __C_REG__ "cl"
sl@0	89	#define __D_REG__ "dl"
sl@0	90	#include "atomic_skeleton.h"
sl@0	91
sl@0	92	#undef __TUintX__
sl@0	93	#undef __TIntX__
sl@0	94	#undef __fname__
sl@0	95	#undef __redir__
sl@0	96	#undef __A_REG__
sl@0	97	#undef __C_REG__
sl@0	98	#undef __D_REG__
sl@0	99
sl@0	100	/** Full memory barrier for explicit memory accesses
sl@0	101
sl@0	102	*/
sl@0	103	EXPORT_C __NAKED__ void __e32_memory_barrier()
sl@0	104	{
sl@0	105	#ifdef __BARRIERS_NEEDED__
sl@0	106	asm("lock add dword ptr [esp], 0 ");
sl@0	107	#endif
sl@0	108	asm("ret ");
sl@0	109	}
sl@0	110
sl@0	111
sl@0	112	/** Barrier guaranteeing completion as well as ordering
sl@0	113
sl@0	114	*/
sl@0	115	EXPORT_C __NAKED__ void __e32_io_completion_barrier()
sl@0	116	{
sl@0	117	asm("push ebx ");
sl@0	118	asm("cpuid ");
sl@0	119	asm("pop ebx ");
sl@0	120	asm("ret ");
sl@0	121	}
sl@0	122
sl@0	123
sl@0	124	/** Find the most significant 1 in a 32 bit word
sl@0	125
sl@0	126	@param v The word to be scanned
sl@0	127	@return The bit number of the most significant 1 if v != 0
sl@0	128	-1 if v == 0
sl@0	129	*/
sl@0	130	EXPORT_C __NAKED__ TInt __e32_find_ms1_32(TUint32 /v/)
sl@0	131	{
sl@0	132	asm("bsr eax, [esp+4] ");
sl@0	133	asm("jnz short 1f ");
sl@0	134	asm("mov eax, 0xffffffff ");
sl@0	135	asm("1: ");
sl@0	136	asm("ret ");
sl@0	137	}
sl@0	138
sl@0	139
sl@0	140	/** Find the least significant 1 in a 32 bit word
sl@0	141
sl@0	142	@param v The word to be scanned
sl@0	143	@return The bit number of the least significant 1 if v != 0
sl@0	144	-1 if v == 0
sl@0	145	*/
sl@0	146	EXPORT_C __NAKED__ TInt __e32_find_ls1_32(TUint32 /v/)
sl@0	147	{
sl@0	148	asm("bsf eax, [esp+4] ");
sl@0	149	asm("jnz short 1f ");
sl@0	150	asm("mov eax, 0xffffffff ");
sl@0	151	asm("1: ");
sl@0	152	asm("ret ");
sl@0	153	}
sl@0	154
sl@0	155
sl@0	156	/** Count the number of 1's in a 32 bit word
sl@0	157
sl@0	158	@param v The word to be scanned
sl@0	159	@return The number of 1's
sl@0	160	*/
sl@0	161	EXPORT_C __NAKED__ TInt __e32_bit_count_32(TUint32 /v/)
sl@0	162	{
sl@0	163	asm("mov eax, [esp+4] ");
sl@0	164	asm("mov edx, eax ");
sl@0	165	asm("and eax, 0xaaaaaaaa ");
sl@0	166	asm("and edx, 0x55555555 "); /* edx = even bits of arg */
sl@0	167	asm("shr eax, 1 "); /* eax = odd bits of arg shifted into even bits */
sl@0	168	asm("add eax, edx "); /* eax = 16 groups of 2 bit counts */
sl@0	169	asm("mov edx, eax ");
sl@0	170	asm("and eax, 0xcccccccc ");
sl@0	171	asm("and edx, 0x33333333 "); /* even groups of 2 */
sl@0	172	asm("shr eax, 2 "); /* odd groups of 2 shifted to even positions */
sl@0	173	asm("add eax, edx "); /* 8 groups of 4 bit counts */
sl@0	174	asm("mov edx, eax ");
sl@0	175	asm("shr eax, 4 ");
sl@0	176	asm("add eax, edx "); /* even nibbles = sum of 8 bits, odd nibbles garbage */
sl@0	177	asm("and eax, 0x0f0f0f0f "); /* eliminate garbage nibbles */
sl@0	178	asm("add al, ah "); /* AL = bit count of lower 16 bits */
sl@0	179	asm("mov dl, al ");
sl@0	180	asm("shr eax, 16 ");
sl@0	181	asm("add al, ah "); /* AL = bit count of upper 16 bits */
sl@0	182	asm("xor ah, ah "); /* top 24 bits of EAX now zero */
sl@0	183	asm("add al, dl "); /* AL = bit count of entire 32 bits */
sl@0	184	asm("ret ");
sl@0	185	}
sl@0	186
sl@0	187
sl@0	188	/** Find the most significant 1 in a 64 bit word
sl@0	189
sl@0	190	@param v The word to be scanned
sl@0	191	@return The bit number of the most significant 1 if v != 0
sl@0	192	-1 if v == 0
sl@0	193	*/
sl@0	194	EXPORT_C __NAKED__ TInt __e32_find_ms1_64(TUint64 /v/)
sl@0	195	{
sl@0	196	asm("bsr eax, [esp+8] ");
sl@0	197	asm("jnz short 2f ");
sl@0	198	asm("bsr eax, [esp+4] ");
sl@0	199	asm("jnz short 1f ");
sl@0	200	asm("mov eax, 0xffffffff ");
sl@0	201	asm("2: ");
sl@0	202	asm("or eax, 32 ");
sl@0	203	asm("1: ");
sl@0	204	asm("ret ");
sl@0	205	}
sl@0	206
sl@0	207
sl@0	208	/** Find the least significant 1 in a 64 bit word
sl@0	209
sl@0	210	@param v The word to be scanned
sl@0	211	@return The bit number of the least significant 1 if v != 0
sl@0	212	-1 if v == 0
sl@0	213	*/
sl@0	214	EXPORT_C __NAKED__ TInt __e32_find_ls1_64(TUint64 /v/)
sl@0	215	{
sl@0	216	asm("bsf eax, [esp+4] ");
sl@0	217	asm("jnz short 1f ");
sl@0	218	asm("bsf eax, [esp+8] ");
sl@0	219	asm("jnz short 2f ");
sl@0	220	asm("mov eax, 0xffffffff ");
sl@0	221	asm("2: ");
sl@0	222	asm("or eax, 32 ");
sl@0	223	asm("1: ");
sl@0	224	asm("ret ");
sl@0	225	}
sl@0	226
sl@0	227
sl@0	228	/** Count the number of 1's in a 64 bit word
sl@0	229
sl@0	230	@param v The word to be scanned
sl@0	231	@return The number of 1's
sl@0	232	*/
sl@0	233	EXPORT_C __NAKED__ TInt __e32_bit_count_64(TUint64 /v/)
sl@0	234	{
sl@0	235	asm("mov eax, [esp+4] ");
sl@0	236	asm("mov edx, [esp+8] ");
sl@0	237
sl@0	238	asm("mov ecx, eax ");
sl@0	239	asm("and eax, 0xaaaaaaaa ");
sl@0	240	asm("and ecx, 0x55555555 ");
sl@0	241	asm("shr eax, 1 ");
sl@0	242	asm("add eax, ecx ");
sl@0	243	asm("mov ecx, eax ");
sl@0	244	asm("and eax, 0xcccccccc ");
sl@0	245	asm("and ecx, 0x33333333 ");
sl@0	246	asm("shr eax, 2 ");
sl@0	247	asm("add ecx, eax ");
sl@0	248
sl@0	249	asm("mov eax, edx ");
sl@0	250	asm("and eax, 0xaaaaaaaa ");
sl@0	251	asm("and edx, 0x55555555 ");
sl@0	252	asm("shr eax, 1 ");
sl@0	253	asm("add eax, edx ");
sl@0	254	asm("mov edx, eax ");
sl@0	255	asm("and eax, 0xcccccccc ");
sl@0	256	asm("and edx, 0x33333333 ");
sl@0	257	asm("shr eax, 2 ");
sl@0	258	asm("add eax, edx ");
sl@0	259
sl@0	260	asm("add eax, ecx ");
sl@0	261	asm("mov edx, eax ");
sl@0	262	asm("and eax, 0xf0f0f0f0 ");
sl@0	263	asm("and edx, 0x0f0f0f0f ");
sl@0	264	asm("shr eax, 4 ");
sl@0	265	asm("add eax, edx ");
sl@0	266	asm("add al, ah ");
sl@0	267	asm("mov dl, al ");
sl@0	268	asm("shr eax, 16 ");
sl@0	269	asm("add al, ah ");
sl@0	270	asm("xor ah, ah ");
sl@0	271	asm("add al, dl ");
sl@0	272	asm("ret ");
sl@0	273	}
sl@0	274
sl@0	275
sl@0	276
sl@0	277
sl@0	278	/** Read a 64 bit word with acquire semantics
sl@0	279
sl@0	280	@param a Address of word to be read - must be a multiple of 8
sl@0	281	@return The value read
sl@0	282	*/
sl@0	283	EXPORT_C __NAKED__ TUint64 __e32_atomic_load_acq64(const volatile TAny* /a/)
sl@0	284	{
sl@0	285	asm("push ebx ");
sl@0	286	asm("push edi ");
sl@0	287	asm("mov edi, [esp+12] ");
sl@0	288	asm("mov eax, 0x0badbeef ");
sl@0	289	asm("mov edx, eax ");
sl@0	290	asm("mov ebx, eax ");
sl@0	291	asm("mov ecx, eax ");
sl@0	292	asm(__LOCK__ "cmpxchg8b [edi] ");
sl@0	293	asm("pop edi ");
sl@0	294	asm("pop ebx ");
sl@0	295	asm("ret ");
sl@0	296	}
sl@0	297
sl@0	298
sl@0	299	/** Write a 64 bit word with release semantics
sl@0	300
sl@0	301	@param a Address of word to be written - must be a multiple of 8
sl@0	302	@param v The value to be written
sl@0	303	@return The value written
sl@0	304	*/
sl@0	305	EXPORT_C __NAKED__ TUint64 __e32_atomic_store_rel64(volatile TAny* /a/, TUint64 /v/)
sl@0	306	{
sl@0	307	asm("push ebx ");
sl@0	308	asm("push edi ");
sl@0	309	asm("mov edi, [esp+12] ");
sl@0	310	asm("mov ebx, [esp+16] ");
sl@0	311	asm("mov ecx, [esp+20] ");
sl@0	312	asm("mov eax, [edi] ");
sl@0	313	asm("mov edx, [edi+4] ");
sl@0	314	asm("1: ");
sl@0	315	asm(__LOCK__ "cmpxchg8b [edi] " );
sl@0	316	asm("jne short 1b ");
sl@0	317	asm("mov eax, ebx ");
sl@0	318	asm("mov edx, ecx ");
sl@0	319	asm("pop edi ");
sl@0	320	asm("pop ebx ");
sl@0	321	asm("ret ");
sl@0	322	}
sl@0	323
sl@0	324
sl@0	325	/** Write a 64 bit word with full barrier semantics
sl@0	326
sl@0	327	@param a Address of word to be written - must be a multiple of 8
sl@0	328	@param v The value to be written
sl@0	329	@return The value written
sl@0	330	*/
sl@0	331	EXPORT_C __NAKED__ TUint64 __e32_atomic_store_ord64(volatile TAny* /a/, TUint64 /v/)
sl@0	332	{
sl@0	333	asm("jmp ___e32_atomic_store_rel64 ");
sl@0	334	}
sl@0	335
sl@0	336
sl@0	337	/** Write a 64 bit word to memory and return the original value of the memory.
sl@0	338	Relaxed ordering.
sl@0	339
sl@0	340	@param a Address of word to be written - must be a multiple of 8
sl@0	341	@param v The value to be written
sl@0	342	@return The original value of *a
sl@0	343	*/
sl@0	344	EXPORT_C __NAKED__ TUint64 __e32_atomic_swp_rlx64(volatile TAny* /a/, TUint64 /v/)
sl@0	345	{
sl@0	346	asm("jmp ___e32_atomic_swp_ord64 ");
sl@0	347	}
sl@0	348
sl@0	349
sl@0	350	/** Write a 64 bit word to memory and return the original value of the memory.
sl@0	351	Acquire semantics.
sl@0	352
sl@0	353	@param a Address of word to be written - must be a multiple of 8
sl@0	354	@param v The value to be written
sl@0	355	@return The original value of *a
sl@0	356	*/
sl@0	357	EXPORT_C __NAKED__ TUint64 __e32_atomic_swp_acq64(volatile TAny* /a/, TUint64 /v/)
sl@0	358	{
sl@0	359	asm("jmp ___e32_atomic_swp_ord64 ");
sl@0	360	}
sl@0	361
sl@0	362
sl@0	363	/** Write a 64 bit word to memory and return the original value of the memory.
sl@0	364	Release semantics.
sl@0	365
sl@0	366	@param a Address of word to be written - must be a multiple of 8
sl@0	367	@param v The value to be written
sl@0	368	@return The original value of *a
sl@0	369	*/
sl@0	370	EXPORT_C __NAKED__ TUint64 __e32_atomic_swp_rel64(volatile TAny* /a/, TUint64 /v/)
sl@0	371	{
sl@0	372	asm("jmp ___e32_atomic_swp_ord64 ");
sl@0	373	}
sl@0	374
sl@0	375
sl@0	376	/** Write a 64 bit word to memory and return the original value of the memory.
sl@0	377	Full barrier semantics.
sl@0	378
sl@0	379	@param a Address of word to be written - must be a multiple of 8
sl@0	380	@param v The value to be written
sl@0	381	@return The original value of *a
sl@0	382	*/
sl@0	383	EXPORT_C __NAKED__ TUint64 __e32_atomic_swp_ord64(volatile TAny* /a/, TUint64 /v/)
sl@0	384	{
sl@0	385	asm("push ebx ");
sl@0	386	asm("push edi ");
sl@0	387	asm("mov edi, [esp+12] ");
sl@0	388	asm("mov ebx, [esp+16] ");
sl@0	389	asm("mov ecx, [esp+20] ");
sl@0	390	asm("mov eax, [edi] ");
sl@0	391	asm("mov edx, [edi+4] ");
sl@0	392	asm("1: ");
sl@0	393	asm(__LOCK__ "cmpxchg8b [edi] ");
sl@0	394	asm("jne short 1b ");
sl@0	395	asm("pop edi ");
sl@0	396	asm("pop ebx ");
sl@0	397	asm("ret ");
sl@0	398	}
sl@0	399
sl@0	400
sl@0	401	/** 64 bit compare and swap, relaxed ordering.
sl@0	402
sl@0	403	Atomically performs the following operation:
sl@0	404	if (a == q) { *a = v; return TRUE; }
sl@0	405	else { q = a; return FALSE; }
sl@0	406
sl@0	407	@param a Address of word to be written - must be a multiple of 8
sl@0	408	@param q Address of location containing expected value
sl@0	409	@param v The new value to be written if the old value is as expected
sl@0	410	@return TRUE if *a was updated, FALSE otherwise
sl@0	411	*/
sl@0	412	EXPORT_C __NAKED__ TBool __e32_atomic_cas_rlx64(volatile TAny* /a/, TUint64* /q/, TUint64 /v/)
sl@0	413	{
sl@0	414	asm("jmp ___e32_atomic_cas_ord64 ");
sl@0	415	}
sl@0	416
sl@0	417
sl@0	418	/** 64 bit compare and swap, acquire semantics.
sl@0	419
sl@0	420	Atomically performs the following operation:
sl@0	421	if (a == q) { *a = v; return TRUE; }
sl@0	422	else { q = a; return FALSE; }
sl@0	423
sl@0	424	@param a Address of word to be written - must be a multiple of 8
sl@0	425	@param q Address of location containing expected value
sl@0	426	@param v The new value to be written if the old value is as expected
sl@0	427	@return TRUE if *a was updated, FALSE otherwise
sl@0	428	*/
sl@0	429	EXPORT_C __NAKED__ TBool __e32_atomic_cas_acq64(volatile TAny* /a/, TUint64* /q/, TUint64 /v/)
sl@0	430	{
sl@0	431	asm("jmp ___e32_atomic_cas_ord64 ");
sl@0	432	}
sl@0	433
sl@0	434
sl@0	435	/** 64 bit compare and swap, release semantics.
sl@0	436
sl@0	437	Atomically performs the following operation:
sl@0	438	if (a == q) { *a = v; return TRUE; }
sl@0	439	else { q = a; return FALSE; }
sl@0	440
sl@0	441	@param a Address of word to be written - must be a multiple of 8
sl@0	442	@param q Address of location containing expected value
sl@0	443	@param v The new value to be written if the old value is as expected
sl@0	444	@return TRUE if *a was updated, FALSE otherwise
sl@0	445	*/
sl@0	446	EXPORT_C __NAKED__ TBool __e32_atomic_cas_rel64(volatile TAny* /a/, TUint64* /q/, TUint64 /v/)
sl@0	447	{
sl@0	448	asm("jmp ___e32_atomic_cas_ord64 ");
sl@0	449	}
sl@0	450
sl@0	451
sl@0	452	/** 64 bit compare and swap, full barrier semantics.
sl@0	453
sl@0	454	Atomically performs the following operation:
sl@0	455	if (a == q) { *a = v; return TRUE; }
sl@0	456	else { q = a; return FALSE; }
sl@0	457
sl@0	458	@param a Address of word to be written - must be a multiple of 8
sl@0	459	@param q Address of location containing expected value
sl@0	460	@param v The new value to be written if the old value is as expected
sl@0	461	@return TRUE if *a was updated, FALSE otherwise
sl@0	462	*/
sl@0	463	EXPORT_C __NAKED__ TBool __e32_atomic_cas_ord64(volatile TAny* /a/, TUint64* /q/, TUint64 /v/)
sl@0	464	{
sl@0	465	asm("push ebx ");
sl@0	466	asm("push edi ");
sl@0	467	asm("push esi ");
sl@0	468	asm("mov edi, [esp+16] "); // edi = a
sl@0	469	asm("mov esi, [esp+20] "); // esi = q
sl@0	470	asm("mov ebx, [esp+24] "); // ecx:ebx = v
sl@0	471	asm("mov ecx, [esp+28] ");
sl@0	472	asm("mov eax, [esi] "); // edx:eax = *q
sl@0	473	asm("mov edx, [esi+4] ");
sl@0	474	asm(__LOCK__ "cmpxchg8b [edi] "); // if (a==q) a=v, ZF=1 else edx:eax=a, ZF=0
sl@0	475	asm("jne short 2f ");
sl@0	476	asm("mov eax, 1 ");
sl@0	477	asm("pop esi ");
sl@0	478	asm("pop edi ");
sl@0	479	asm("pop ebx ");
sl@0	480	asm("ret ");
sl@0	481	asm("2: ");
sl@0	482	asm("mov [esi], eax "); // *q = edx:eax
sl@0	483	asm("mov [esi+4], edx ");
sl@0	484	asm("xor eax, eax ");
sl@0	485	asm("pop esi ");
sl@0	486	asm("pop edi ");
sl@0	487	asm("pop ebx ");
sl@0	488	asm("ret ");
sl@0	489	}
sl@0	490
sl@0	491
sl@0	492	/** 64 bit atomic add, relaxed ordering.
sl@0	493
sl@0	494	Atomically performs the following operation:
sl@0	495	oldv = a; a = oldv + v; return oldv;
sl@0	496
sl@0	497	@param a Address of word to be updated - must be a multiple of 8
sl@0	498	@param v The value to be added
sl@0	499	@return The original value of *a
sl@0	500	*/
sl@0	501	EXPORT_C __NAKED__ TUint64 __e32_atomic_add_rlx64(volatile TAny* /a/, TUint64 /v/)
sl@0	502	{
sl@0	503	asm("jmp ___e32_atomic_add_ord64 ");
sl@0	504	}
sl@0	505
sl@0	506
sl@0	507	/** 64 bit atomic add, acquire semantics.
sl@0	508
sl@0	509	Atomically performs the following operation:
sl@0	510	oldv = a; a = oldv + v; return oldv;
sl@0	511
sl@0	512	@param a Address of word to be updated - must be a multiple of 8
sl@0	513	@param v The value to be added
sl@0	514	@return The original value of *a
sl@0	515	*/
sl@0	516	EXPORT_C __NAKED__ TUint64 __e32_atomic_add_acq64(volatile TAny* /a/, TUint64 /v/)
sl@0	517	{
sl@0	518	asm("jmp ___e32_atomic_add_ord64 ");
sl@0	519	}
sl@0	520
sl@0	521
sl@0	522	/** 64 bit atomic add, release semantics.
sl@0	523
sl@0	524	Atomically performs the following operation:
sl@0	525	oldv = a; a = oldv + v; return oldv;
sl@0	526
sl@0	527	@param a Address of word to be updated - must be a multiple of 8
sl@0	528	@param v The value to be added
sl@0	529	@return The original value of *a
sl@0	530	*/
sl@0	531	EXPORT_C __NAKED__ TUint64 __e32_atomic_add_rel64(volatile TAny* /a/, TUint64 /v/)
sl@0	532	{
sl@0	533	asm("jmp ___e32_atomic_add_ord64 ");
sl@0	534	}
sl@0	535
sl@0	536
sl@0	537	/** 64 bit atomic add, full barrier semantics.
sl@0	538
sl@0	539	Atomically performs the following operation:
sl@0	540	oldv = a; a = oldv + v; return oldv;
sl@0	541
sl@0	542	@param a Address of word to be updated - must be a multiple of 8
sl@0	543	@param v The value to be added
sl@0	544	@return The original value of *a
sl@0	545	*/
sl@0	546	EXPORT_C __NAKED__ TUint64 __e32_atomic_add_ord64(volatile TAny* /a/, TUint64 /v/)
sl@0	547	{
sl@0	548	asm("push ebx ");
sl@0	549	asm("push edi ");
sl@0	550	asm("mov edi, [esp+12] "); // edi = a
sl@0	551	asm("mov eax, [edi] "); // edx:eax = oldv
sl@0	552	asm("mov edx, [edi+4] ");
sl@0	553	asm("1: ");
sl@0	554	asm("mov ebx, eax ");
sl@0	555	asm("mov ecx, edx ");
sl@0	556	asm("add ebx, [esp+16] "); // ecx:ebx = oldv + v
sl@0	557	asm("adc ecx, [esp+20] ");
sl@0	558	asm(__LOCK__ "cmpxchg8b [edi] "); // if (a==oldv) a=oldv+v, ZF=1 else edx:eax=*a, ZF=0
sl@0	559	asm("jne short 1b ");
sl@0	560	asm("pop edi ");
sl@0	561	asm("pop ebx ");
sl@0	562	asm("ret ");
sl@0	563	}
sl@0	564
sl@0	565
sl@0	566	/** 64 bit atomic bitwise logical AND, relaxed ordering.
sl@0	567
sl@0	568	Atomically performs the following operation:
sl@0	569	oldv = a; a = oldv & v; return oldv;
sl@0	570
sl@0	571	@param a Address of word to be updated - must be a multiple of 8
sl@0	572	@param v The value to be ANDed with *a
sl@0	573	@return The original value of *a
sl@0	574	*/
sl@0	575	EXPORT_C __NAKED__ TUint64 __e32_atomic_and_rlx64(volatile TAny* /a/, TUint64 /v/)
sl@0	576	{
sl@0	577	asm("jmp ___e32_atomic_and_ord64 ");
sl@0	578	}
sl@0	579
sl@0	580
sl@0	581	/** 64 bit atomic bitwise logical AND, acquire semantics.
sl@0	582
sl@0	583	Atomically performs the following operation:
sl@0	584	oldv = a; a = oldv & v; return oldv;
sl@0	585
sl@0	586	@param a Address of word to be updated - must be a multiple of 8
sl@0	587	@param v The value to be ANDed with *a
sl@0	588	@return The original value of *a
sl@0	589	*/
sl@0	590	EXPORT_C __NAKED__ TUint64 __e32_atomic_and_acq64(volatile TAny* /a/, TUint64 /v/)
sl@0	591	{
sl@0	592	asm("jmp ___e32_atomic_and_ord64 ");
sl@0	593	}
sl@0	594
sl@0	595
sl@0	596	/** 64 bit atomic bitwise logical AND, release semantics.
sl@0	597
sl@0	598	Atomically performs the following operation:
sl@0	599	oldv = a; a = oldv & v; return oldv;
sl@0	600
sl@0	601	@param a Address of word to be updated - must be a multiple of 8
sl@0	602	@param v The value to be ANDed with *a
sl@0	603	@return The original value of *a
sl@0	604	*/
sl@0	605	EXPORT_C __NAKED__ TUint64 __e32_atomic_and_rel64(volatile TAny* /a/, TUint64 /v/)
sl@0	606	{
sl@0	607	asm("jmp ___e32_atomic_and_ord64 ");
sl@0	608	}
sl@0	609
sl@0	610
sl@0	611	/** 64 bit atomic bitwise logical AND, full barrier semantics.
sl@0	612
sl@0	613	Atomically performs the following operation:
sl@0	614	oldv = a; a = oldv & v; return oldv;
sl@0	615
sl@0	616	@param a Address of word to be updated - must be a multiple of 8
sl@0	617	@param v The value to be ANDed with *a
sl@0	618	@return The original value of *a
sl@0	619	*/
sl@0	620	EXPORT_C __NAKED__ TUint64 __e32_atomic_and_ord64(volatile TAny* /a/, TUint64 /v/)
sl@0	621	{
sl@0	622	asm("push ebx ");
sl@0	623	asm("push edi ");
sl@0	624	asm("mov edi, [esp+12] "); // edi = a
sl@0	625	asm("mov eax, [edi] "); // edx:eax = oldv
sl@0	626	asm("mov edx, [edi+4] ");
sl@0	627	asm("1: ");
sl@0	628	asm("mov ebx, eax ");
sl@0	629	asm("mov ecx, edx ");
sl@0	630	asm("and ebx, [esp+16] "); // ecx:ebx = oldv & v
sl@0	631	asm("and ecx, [esp+20] ");
sl@0	632	asm(__LOCK__ "cmpxchg8b [edi] "); // if (a==oldv) a=oldv&v, ZF=1 else edx:eax=*a, ZF=0
sl@0	633	asm("jne short 1b ");
sl@0	634	asm("pop edi ");
sl@0	635	asm("pop ebx ");
sl@0	636	asm("ret ");
sl@0	637	}
sl@0	638
sl@0	639
sl@0	640	/** 64 bit atomic bitwise logical inclusive OR, relaxed ordering.
sl@0	641
sl@0	642	Atomically performs the following operation:
sl@0	643	oldv = a; a = oldv \| v; return oldv;
sl@0	644
sl@0	645	@param a Address of word to be updated - must be a multiple of 8
sl@0	646	@param v The value to be ORed with *a
sl@0	647	@return The original value of *a
sl@0	648	*/
sl@0	649	EXPORT_C __NAKED__ TUint64 __e32_atomic_ior_rlx64(volatile TAny* /a/, TUint64 /v/)
sl@0	650	{
sl@0	651	asm("jmp ___e32_atomic_ior_ord64 ");
sl@0	652	}
sl@0	653
sl@0	654
sl@0	655	/** 64 bit atomic bitwise logical inclusive OR, acquire semantics.
sl@0	656
sl@0	657	Atomically performs the following operation:
sl@0	658	oldv = a; a = oldv \| v; return oldv;
sl@0	659
sl@0	660	@param a Address of word to be updated - must be a multiple of 8
sl@0	661	@param v The value to be ORed with *a
sl@0	662	@return The original value of *a
sl@0	663	*/
sl@0	664	EXPORT_C __NAKED__ TUint64 __e32_atomic_ior_acq64(volatile TAny* /a/, TUint64 /v/)
sl@0	665	{
sl@0	666	asm("jmp ___e32_atomic_ior_ord64 ");
sl@0	667	}
sl@0	668
sl@0	669
sl@0	670	/** 64 bit atomic bitwise logical inclusive OR, release semantics.
sl@0	671
sl@0	672	Atomically performs the following operation:
sl@0	673	oldv = a; a = oldv \| v; return oldv;
sl@0	674
sl@0	675	@param a Address of word to be updated - must be a multiple of 8
sl@0	676	@param v The value to be ORed with *a
sl@0	677	@return The original value of *a
sl@0	678	*/
sl@0	679	EXPORT_C __NAKED__ TUint64 __e32_atomic_ior_rel64(volatile TAny* /a/, TUint64 /v/)
sl@0	680	{
sl@0	681	asm("jmp ___e32_atomic_ior_ord64 ");
sl@0	682	}
sl@0	683
sl@0	684
sl@0	685	/** 64 bit atomic bitwise logical inclusive OR, full barrier semantics.
sl@0	686
sl@0	687	Atomically performs the following operation:
sl@0	688	oldv = a; a = oldv \| v; return oldv;
sl@0	689
sl@0	690	@param a Address of word to be updated - must be a multiple of 8
sl@0	691	@param v The value to be ORed with *a
sl@0	692	@return The original value of *a
sl@0	693	*/
sl@0	694	EXPORT_C __NAKED__ TUint64 __e32_atomic_ior_ord64(volatile TAny* /a/, TUint64 /v/)
sl@0	695	{
sl@0	696	asm("push ebx ");
sl@0	697	asm("push edi ");
sl@0	698	asm("mov edi, [esp+12] "); // edi = a
sl@0	699	asm("mov eax, [edi] "); // edx:eax = oldv
sl@0	700	asm("mov edx, [edi+4] ");
sl@0	701	asm("1: ");
sl@0	702	asm("mov ebx, eax ");
sl@0	703	asm("mov ecx, edx ");
sl@0	704	asm("or ebx, [esp+16] "); // ecx:ebx = oldv \| v
sl@0	705	asm("or ecx, [esp+20] ");
sl@0	706	asm(__LOCK__ "cmpxchg8b [edi] "); // if (a==oldv) a=oldv\|v, ZF=1 else edx:eax=*a, ZF=0
sl@0	707	asm("jne short 1b ");
sl@0	708	asm("pop edi ");
sl@0	709	asm("pop ebx ");
sl@0	710	asm("ret ");
sl@0	711	}
sl@0	712
sl@0	713
sl@0	714	/** 64 bit atomic bitwise logical exclusive OR, relaxed ordering.
sl@0	715
sl@0	716	Atomically performs the following operation:
sl@0	717	oldv = a; a = oldv ^ v; return oldv;
sl@0	718
sl@0	719	@param a Address of word to be updated - must be a multiple of 8
sl@0	720	@param v The value to be XORed with *a
sl@0	721	@return The original value of *a
sl@0	722	*/
sl@0	723	EXPORT_C __NAKED__ TUint64 __e32_atomic_xor_rlx64(volatile TAny* /a/, TUint64 /v/)
sl@0	724	{
sl@0	725	asm("jmp ___e32_atomic_xor_ord64 ");
sl@0	726	}
sl@0	727
sl@0	728
sl@0	729	/** 64 bit atomic bitwise logical exclusive OR, acquire semantics.
sl@0	730
sl@0	731	Atomically performs the following operation:
sl@0	732	oldv = a; a = oldv ^ v; return oldv;
sl@0	733
sl@0	734	@param a Address of word to be updated - must be a multiple of 8
sl@0	735	@param v The value to be XORed with *a
sl@0	736	@return The original value of *a
sl@0	737	*/
sl@0	738	EXPORT_C __NAKED__ TUint64 __e32_atomic_xor_acq64(volatile TAny* /a/, TUint64 /v/)
sl@0	739	{
sl@0	740	asm("jmp ___e32_atomic_xor_ord64 ");
sl@0	741	}
sl@0	742
sl@0	743
sl@0	744	/** 64 bit atomic bitwise logical exclusive OR, release semantics.
sl@0	745
sl@0	746	Atomically performs the following operation:
sl@0	747	oldv = a; a = oldv ^ v; return oldv;
sl@0	748
sl@0	749	@param a Address of word to be updated - must be a multiple of 8
sl@0	750	@param v The value to be XORed with *a
sl@0	751	@return The original value of *a
sl@0	752	*/
sl@0	753	EXPORT_C __NAKED__ TUint64 __e32_atomic_xor_rel64(volatile TAny* /a/, TUint64 /v/)
sl@0	754	{
sl@0	755	asm("jmp ___e32_atomic_xor_ord64 ");
sl@0	756	}
sl@0	757
sl@0	758
sl@0	759	/** 64 bit atomic bitwise logical exclusive OR, full barrier semantics.
sl@0	760
sl@0	761	Atomically performs the following operation:
sl@0	762	oldv = a; a = oldv ^ v; return oldv;
sl@0	763
sl@0	764	@param a Address of word to be updated - must be a multiple of 8
sl@0	765	@param v The value to be XORed with *a
sl@0	766	@return The original value of *a
sl@0	767	*/
sl@0	768	EXPORT_C __NAKED__ TUint64 __e32_atomic_xor_ord64(volatile TAny* /a/, TUint64 /v/)
sl@0	769	{
sl@0	770	asm("push ebx ");
sl@0	771	asm("push edi ");
sl@0	772	asm("mov edi, [esp+12] "); // edi = a
sl@0	773	asm("mov eax, [edi] "); // edx:eax = oldv
sl@0	774	asm("mov edx, [edi+4] ");
sl@0	775	asm("1: ");
sl@0	776	asm("mov ebx, eax ");
sl@0	777	asm("mov ecx, edx ");
sl@0	778	asm("xor ebx, [esp+16] "); // ecx:ebx = oldv ^ v
sl@0	779	asm("xor ecx, [esp+20] ");
sl@0	780	asm(__LOCK__ "cmpxchg8b [edi] "); // if (a==oldv) a=oldv^v, ZF=1 else edx:eax=*a, ZF=0
sl@0	781	asm("jne short 1b ");
sl@0	782	asm("pop edi ");
sl@0	783	asm("pop ebx ");
sl@0	784	asm("ret ");
sl@0	785	}
sl@0	786
sl@0	787
sl@0	788	/** 64 bit atomic bitwise universal function, relaxed ordering.
sl@0	789
sl@0	790	Atomically performs the following operation:
sl@0	791	oldv = a; a = (oldv & u) ^ v; return oldv;
sl@0	792
sl@0	793	@param a Address of word to be updated - must be a multiple of 8
sl@0	794	@param u The value to be ANDed with *a
sl@0	795	@param v The value to be XORed with (*a&u)
sl@0	796	@return The original value of *a
sl@0	797	*/
sl@0	798	EXPORT_C __NAKED__ TUint64 __e32_atomic_axo_rlx64(volatile TAny* /a/, TUint64 /u/, TUint64 /v/)
sl@0	799	{
sl@0	800	asm("jmp ___e32_atomic_axo_ord64 ");
sl@0	801	}
sl@0	802
sl@0	803
sl@0	804	/** 64 bit atomic bitwise universal function, acquire semantics.
sl@0	805
sl@0	806	Atomically performs the following operation:
sl@0	807	oldv = a; a = (oldv & u) ^ v; return oldv;
sl@0	808
sl@0	809	@param a Address of word to be updated - must be a multiple of 8
sl@0	810	@param u The value to be ANDed with *a
sl@0	811	@param v The value to be XORed with (*a&u)
sl@0	812	@return The original value of *a
sl@0	813	*/
sl@0	814	EXPORT_C __NAKED__ TUint64 __e32_atomic_axo_acq64(volatile TAny* /a/, TUint64 /u/, TUint64 /v/)
sl@0	815	{
sl@0	816	asm("jmp ___e32_atomic_axo_ord64 ");
sl@0	817	}
sl@0	818
sl@0	819
sl@0	820	/** 64 bit atomic bitwise universal function, release semantics.
sl@0	821
sl@0	822	Atomically performs the following operation:
sl@0	823	oldv = a; a = (oldv & u) ^ v; return oldv;
sl@0	824
sl@0	825	@param a Address of word to be updated - must be a multiple of 8
sl@0	826	@param u The value to be ANDed with *a
sl@0	827	@param v The value to be XORed with (*a&u)
sl@0	828	@return The original value of *a
sl@0	829	*/
sl@0	830	EXPORT_C __NAKED__ TUint64 __e32_atomic_axo_rel64(volatile TAny* /a/, TUint64 /u/, TUint64 /v/)
sl@0	831	{
sl@0	832	asm("jmp ___e32_atomic_axo_ord64 ");
sl@0	833	}
sl@0	834
sl@0	835
sl@0	836	/** 64 bit atomic bitwise universal function, release semantics.
sl@0	837
sl@0	838	Atomically performs the following operation:
sl@0	839	oldv = a; a = (oldv & u) ^ v; return oldv;
sl@0	840
sl@0	841	@param a Address of word to be updated - must be a multiple of 8
sl@0	842	@param u The value to be ANDed with *a
sl@0	843	@param v The value to be XORed with (*a&u)
sl@0	844	@return The original value of *a
sl@0	845	*/
sl@0	846	EXPORT_C __NAKED__ TUint64 __e32_atomic_axo_ord64(volatile TAny* /a/, TUint64 /u/, TUint64 /v/)
sl@0	847	{
sl@0	848	asm("push ebx ");
sl@0	849	asm("push edi ");
sl@0	850	asm("mov edi, [esp+12] "); // edi = a
sl@0	851	asm("mov eax, [edi] "); // edx:eax = oldv
sl@0	852	asm("mov edx, [edi+4] ");
sl@0	853	asm("1: ");
sl@0	854	asm("mov ebx, eax ");
sl@0	855	asm("mov ecx, edx ");
sl@0	856	asm("and ebx, [esp+16] "); // ecx:ebx = oldv & u
sl@0	857	asm("and ecx, [esp+20] ");
sl@0	858	asm("xor ebx, [esp+24] "); // ecx:ebx = (oldv & u) ^ v
sl@0	859	asm("xor ecx, [esp+28] ");
sl@0	860	asm(__LOCK__ "cmpxchg8b [edi] "); // if (a==oldv) a=(oldv&u)^v, ZF=1 else edx:eax=*a, ZF=0
sl@0	861	asm("jne short 1b ");
sl@0	862	asm("pop edi ");
sl@0	863	asm("pop ebx ");
sl@0	864	asm("ret ");
sl@0	865	}
sl@0	866
sl@0	867
sl@0	868	/** 64 bit threshold and add, unsigned, relaxed ordering.
sl@0	869
sl@0	870	Atomically performs the following operation:
sl@0	871	oldv = a; if (oldv>=t) a=oldv+u else *a=oldv+v; return oldv;
sl@0	872
sl@0	873	@param a Address of data to be updated - must be naturally aligned
sl@0	874	@param t The threshold to compare *a to (unsigned compare)
sl@0	875	@param u The value to be added to *a if it is originally >= t
sl@0	876	@param u The value to be added to *a if it is originally < t
sl@0	877	@return The original value of *a
sl@0	878	*/
sl@0	879	EXPORT_C __NAKED__ TUint64 __e32_atomic_tau_rlx64(volatile TAny* /a/, TUint64 /t/, TUint64 /u/, TUint64 /v/)
sl@0	880	{
sl@0	881	asm("jmp ___e32_atomic_tau_ord64 ");
sl@0	882	}
sl@0	883
sl@0	884
sl@0	885	/** 64 bit threshold and add, unsigned, acquire semantics.
sl@0	886
sl@0	887	Atomically performs the following operation:
sl@0	888	oldv = a; if (oldv>=t) a=oldv+u else *a=oldv+v; return oldv;
sl@0	889
sl@0	890	@param a Address of data to be updated - must be naturally aligned
sl@0	891	@param t The threshold to compare *a to (unsigned compare)
sl@0	892	@param u The value to be added to *a if it is originally >= t
sl@0	893	@param u The value to be added to *a if it is originally < t
sl@0	894	@return The original value of *a
sl@0	895	*/
sl@0	896	EXPORT_C __NAKED__ TUint64 __e32_atomic_tau_acq64(volatile TAny* /a/, TUint64 /t/, TUint64 /u/, TUint64 /v/)
sl@0	897	{
sl@0	898	asm("jmp ___e32_atomic_tau_ord64 ");
sl@0	899	}
sl@0	900
sl@0	901
sl@0	902	/** 64 bit threshold and add, unsigned, release semantics.
sl@0	903
sl@0	904	Atomically performs the following operation:
sl@0	905	oldv = a; if (oldv>=t) a=oldv+u else *a=oldv+v; return oldv;
sl@0	906
sl@0	907	@param a Address of data to be updated - must be naturally aligned
sl@0	908	@param t The threshold to compare *a to (unsigned compare)
sl@0	909	@param u The value to be added to *a if it is originally >= t
sl@0	910	@param u The value to be added to *a if it is originally < t
sl@0	911	@return The original value of *a
sl@0	912	*/
sl@0	913	EXPORT_C __NAKED__ TUint64 __e32_atomic_tau_rel64(volatile TAny* /a/, TUint64 /t/, TUint64 /u/, TUint64 /v/)
sl@0	914	{
sl@0	915	asm("jmp ___e32_atomic_tau_ord64 ");
sl@0	916	}
sl@0	917
sl@0	918
sl@0	919	/** 64 bit threshold and add, unsigned, full barrier semantics.
sl@0	920
sl@0	921	Atomically performs the following operation:
sl@0	922	oldv = a; if (oldv>=t) a=oldv+u else *a=oldv+v; return oldv;
sl@0	923
sl@0	924	@param a Address of data to be updated - must be naturally aligned
sl@0	925	@param t The threshold to compare *a to (unsigned compare)
sl@0	926	@param u The value to be added to *a if it is originally >= t
sl@0	927	@param u The value to be added to *a if it is originally < t
sl@0	928	@return The original value of *a
sl@0	929	*/
sl@0	930	EXPORT_C __NAKED__ TUint64 __e32_atomic_tau_ord64(volatile TAny* /a/, TUint64 /t/, TUint64 /u/, TUint64 /v/)
sl@0	931	{
sl@0	932	asm("push ebx ");
sl@0	933	asm("push edi ");
sl@0	934	asm("mov edi, [esp+12] "); // edi = a
sl@0	935	asm("mov eax, [edi] "); // edx:eax = oldv
sl@0	936	asm("mov edx, [edi+4] ");
sl@0	937	asm("1: ");
sl@0	938	asm("mov ebx, edx ");
sl@0	939	asm("cmp eax, [esp+16] "); // eax - t.low, CF=borrow
sl@0	940	asm("sbb ebx, [esp+20] "); // CF = borrow from (oldv - t)
sl@0	941	asm("jnc short 2f "); // no borrow means oldv>=t so use u
sl@0	942	asm("mov ebx, [esp+32] "); // ecx:ebx = v
sl@0	943	asm("mov ecx, [esp+36] ");
sl@0	944	asm("jmp short 3f ");
sl@0	945	asm("2: ");
sl@0	946	asm("mov ebx, [esp+24] "); // ecx:ebx = u
sl@0	947	asm("mov ecx, [esp+28] ");
sl@0	948	asm("3: ");
sl@0	949	asm("add ebx, eax "); // ecx:ebx = oldv + u or v
sl@0	950	asm("adc ecx, edx ");
sl@0	951	asm(__LOCK__ "cmpxchg8b [edi] ");
sl@0	952	asm("jne short 1b ");
sl@0	953	asm("pop edi ");
sl@0	954	asm("pop ebx ");
sl@0	955	asm("ret ");
sl@0	956	}
sl@0	957
sl@0	958
sl@0	959	/** 64 bit threshold and add, signed, relaxed ordering.
sl@0	960
sl@0	961	Atomically performs the following operation:
sl@0	962	oldv = a; if (oldv>=t) a=oldv+u else *a=oldv+v; return oldv;
sl@0	963
sl@0	964	@param a Address of data to be updated - must be naturally aligned
sl@0	965	@param t The threshold to compare *a to (signed compare)
sl@0	966	@param u The value to be added to *a if it is originally >= t
sl@0	967	@param u The value to be added to *a if it is originally < t
sl@0	968	@return The original value of *a
sl@0	969	*/
sl@0	970	EXPORT_C __NAKED__ TInt64 __e32_atomic_tas_rlx64(volatile TAny* /a/, TInt64 /t/, TInt64 /u/, TInt64 /v/)
sl@0	971	{
sl@0	972	asm("jmp ___e32_atomic_tas_ord64 ");
sl@0	973	}
sl@0	974
sl@0	975
sl@0	976	/** 64 bit threshold and add, signed, acquire semantics.
sl@0	977
sl@0	978	Atomically performs the following operation:
sl@0	979	oldv = a; if (oldv>=t) a=oldv+u else *a=oldv+v; return oldv;
sl@0	980
sl@0	981	@param a Address of data to be updated - must be naturally aligned
sl@0	982	@param t The threshold to compare *a to (signed compare)
sl@0	983	@param u The value to be added to *a if it is originally >= t
sl@0	984	@param u The value to be added to *a if it is originally < t
sl@0	985	@return The original value of *a
sl@0	986	*/
sl@0	987	EXPORT_C __NAKED__ TInt64 __e32_atomic_tas_acq64(volatile TAny* /a/, TInt64 /t/, TInt64 /u/, TInt64 /v/)
sl@0	988	{
sl@0	989	asm("jmp ___e32_atomic_tas_ord64 ");
sl@0	990	}
sl@0	991
sl@0	992
sl@0	993	/** 64 bit threshold and add, signed, release semantics.
sl@0	994
sl@0	995	Atomically performs the following operation:
sl@0	996	oldv = a; if (oldv>=t) a=oldv+u else *a=oldv+v; return oldv;
sl@0	997
sl@0	998	@param a Address of data to be updated - must be naturally aligned
sl@0	999	@param t The threshold to compare *a to (signed compare)
sl@0	1000	@param u The value to be added to *a if it is originally >= t
sl@0	1001	@param u The value to be added to *a if it is originally < t
sl@0	1002	@return The original value of *a
sl@0	1003	*/
sl@0	1004	EXPORT_C __NAKED__ TInt64 __e32_atomic_tas_rel64(volatile TAny* /a/, TInt64 /t/, TInt64 /u/, TInt64 /v/)
sl@0	1005	{
sl@0	1006	asm("jmp ___e32_atomic_tas_ord64 ");
sl@0	1007	}
sl@0	1008
sl@0	1009
sl@0	1010	/** 64 bit threshold and add, signed, full barrier semantics.
sl@0	1011
sl@0	1012	Atomically performs the following operation:
sl@0	1013	oldv = a; if (oldv>=t) a=oldv+u else *a=oldv+v; return oldv;
sl@0	1014
sl@0	1015	@param a Address of data to be updated - must be naturally aligned
sl@0	1016	@param t The threshold to compare *a to (signed compare)
sl@0	1017	@param u The value to be added to *a if it is originally >= t
sl@0	1018	@param u The value to be added to *a if it is originally < t
sl@0	1019	@return The original value of *a
sl@0	1020	*/
sl@0	1021	EXPORT_C __NAKED__ TInt64 __e32_atomic_tas_ord64(volatile TAny* /a/, TInt64 /t/, TInt64 /u/, TInt64 /v/)
sl@0	1022	{
sl@0	1023	asm("push ebx ");
sl@0	1024	asm("push edi ");
sl@0	1025	asm("mov edi, [esp+12] "); // edi = a
sl@0	1026	asm("mov eax, [edi] "); // edx:eax = oldv
sl@0	1027	asm("mov edx, [edi+4] ");
sl@0	1028	asm("1: ");
sl@0	1029	asm("mov ebx, edx ");
sl@0	1030	asm("cmp eax, [esp+16] "); // eax - t.low, CF=borrow
sl@0	1031	asm("sbb ebx, [esp+20] "); // SF=sign, OF=overflow from (oldv - t)
sl@0	1032	asm("jge short 2f "); // SF==OF (GE condition) means oldv>=t so use u
sl@0	1033	asm("mov ebx, [esp+32] "); // ecx:ebx = v
sl@0	1034	asm("mov ecx, [esp+36] ");
sl@0	1035	asm("jmp short 3f ");
sl@0	1036	asm("2: ");
sl@0	1037	asm("mov ebx, [esp+24] "); // ecx:ebx = u
sl@0	1038	asm("mov ecx, [esp+28] ");
sl@0	1039	asm("3: ");
sl@0	1040	asm("add ebx, eax "); // ecx:ebx = oldv + u or v
sl@0	1041	asm("adc ecx, edx ");
sl@0	1042	asm(__LOCK__ "cmpxchg8b [edi] ");
sl@0	1043	asm("jne short 1b ");
sl@0	1044	asm("pop edi ");
sl@0	1045	asm("pop ebx ");
sl@0	1046	asm("ret ");
sl@0	1047	}
sl@0	1048
sl@0	1049	} // extern "C"

author	sl@SLION-WIN7.fritz.box
	Fri, 15 Jun 2012 03:10:57 +0200
changeset 0	bde4ae8d615e
permissions	-rw-r--r--