os/kernelhwsrv/kernel/eka/common/win32/atomics.cpp
changeset 0 bde4ae8d615e
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/os/kernelhwsrv/kernel/eka/common/win32/atomics.cpp	Fri Jun 15 03:10:57 2012 +0200
     1.3 @@ -0,0 +1,1042 @@
     1.4 +// Copyright (c) 2008-2009 Nokia Corporation and/or its subsidiary(-ies).
     1.5 +// All rights reserved.
     1.6 +// This component and the accompanying materials are made available
     1.7 +// under the terms of the License "Eclipse Public License v1.0"
     1.8 +// which accompanies this distribution, and is available
     1.9 +// at the URL "http://www.eclipse.org/legal/epl-v10.html".
    1.10 +//
    1.11 +// Initial Contributors:
    1.12 +// Nokia Corporation - initial contribution.
    1.13 +//
    1.14 +// Contributors:
    1.15 +//
    1.16 +// Description:
    1.17 +// e32\common\x86\atomics.cpp
    1.18 +// 
    1.19 +//
    1.20 +
    1.21 +#include <e32atomics.h>
    1.22 +#include <cpudefs.h>
    1.23 +
    1.24 +/*
    1.25 +Versions needed:
    1.26 +	WINS/WINSCW		Use X86 locked operations. Assume Pentium or above CPU (CMPXCHG8B available)
    1.27 +	X86				For Pentium and above use locked operations
    1.28 +					For 486 use locked operations for 8, 16, 32 bit. For 64 bit must disable interrupts.
    1.29 +					NOTE: 486 not supported at the moment
    1.30 +	ARMv4/ARMv5		Must disable interrupts.
    1.31 +	ARMv6			LDREX/STREX for 8, 16, 32 bit. For 64 bit must disable interrupts (maybe).
    1.32 +	ARMv6K/ARMv7	LDREXB/LDREXH/LDREX/LDREXD
    1.33 +
    1.34 +Need both kernel side and user side versions
    1.35 +*/
    1.36 +
    1.37 +#if	defined(__SMP__) || !defined(__EPOC32__)
    1.38 +#define	__BARRIERS_NEEDED__
    1.39 +#define	__LOCK__	lock
    1.40 +#else
    1.41 +#define	__LOCK__
    1.42 +#endif
    1.43 +
    1.44 +
    1.45 +extern "C" {
    1.46 +
    1.47 +#undef	__TUintX__
    1.48 +#undef	__TIntX__
    1.49 +#undef	__fname__
    1.50 +#undef	__A_REG__
    1.51 +#undef	__C_REG__
    1.52 +#undef	__D_REG__
    1.53 +#define	__TUintX__		TUint32
    1.54 +#define	__TIntX__		TInt32
    1.55 +#define	__fname__(x)	x##32
    1.56 +#define	__A_REG__		eax
    1.57 +#define	__C_REG__		ecx
    1.58 +#define	__D_REG__		edx
    1.59 +#include "atomic_skeleton.h"
    1.60 +
    1.61 +#undef	__TUintX__
    1.62 +#undef	__TIntX__
    1.63 +#undef	__fname__
    1.64 +#undef	__A_REG__
    1.65 +#undef	__C_REG__
    1.66 +#undef	__D_REG__
    1.67 +#define	__TUintX__		TUint16
    1.68 +#define	__TIntX__		TInt16
    1.69 +#define	__fname__(x)	x##16
    1.70 +#define	__A_REG__		ax
    1.71 +#define	__C_REG__		cx
    1.72 +#define	__D_REG__		dx
    1.73 +#include "atomic_skeleton.h"
    1.74 +
    1.75 +#undef	__TUintX__
    1.76 +#undef	__TIntX__
    1.77 +#undef	__fname__
    1.78 +#undef	__A_REG__
    1.79 +#undef	__C_REG__
    1.80 +#undef	__D_REG__
    1.81 +#define	__TUintX__		TUint8
    1.82 +#define	__TIntX__		TInt8
    1.83 +#define	__fname__(x)	x##8
    1.84 +#define	__A_REG__		al
    1.85 +#define	__C_REG__		cl
    1.86 +#define	__D_REG__		dl
    1.87 +#include "atomic_skeleton.h"
    1.88 +
    1.89 +#undef	__TUintX__
    1.90 +#undef	__TIntX__
    1.91 +#undef	__fname__
    1.92 +#undef	__A_REG__
    1.93 +#undef	__C_REG__
    1.94 +#undef	__D_REG__
    1.95 +
    1.96 +/** Full memory barrier for explicit memory accesses
    1.97 +
    1.98 +*/
    1.99 +EXPORT_C __NAKED__ void __e32_memory_barrier()
   1.100 +	{
   1.101 +#ifdef __BARRIERS_NEEDED__
   1.102 +	_asm lock add dword ptr [esp], 0
   1.103 +#endif
   1.104 +	_asm ret
   1.105 +	}
   1.106 +
   1.107 +
   1.108 +/** Barrier guaranteeing completion as well as ordering
   1.109 +
   1.110 +*/
   1.111 +EXPORT_C __NAKED__ void __e32_io_completion_barrier()
   1.112 +	{
   1.113 +	_asm push ebx
   1.114 +	_asm cpuid
   1.115 +	_asm pop ebx
   1.116 +	_asm ret
   1.117 +	}
   1.118 +
   1.119 +
   1.120 +/** Find the most significant 1 in a 32 bit word
   1.121 +
   1.122 +	@param	v	The word to be scanned
   1.123 +	@return		The bit number of the most significant 1 if v != 0
   1.124 +				-1 if v == 0
   1.125 +*/
   1.126 +EXPORT_C __NAKED__ TInt __e32_find_ms1_32(TUint32 /*v*/)
   1.127 +	{
   1.128 +	_asm bsr eax, [esp+4]
   1.129 +	_asm jnz short done
   1.130 +	_asm mov eax, 0ffffffffh
   1.131 +done:
   1.132 +	_asm ret
   1.133 +	}
   1.134 +
   1.135 +
   1.136 +/** Find the least significant 1 in a 32 bit word
   1.137 +
   1.138 +	@param	v	The word to be scanned
   1.139 +	@return		The bit number of the least significant 1 if v != 0
   1.140 +				-1 if v == 0
   1.141 +*/
   1.142 +EXPORT_C __NAKED__ TInt __e32_find_ls1_32(TUint32 /*v*/)
   1.143 +	{
   1.144 +	_asm bsf eax, [esp+4]
   1.145 +	_asm jnz short done
   1.146 +	_asm mov eax, 0ffffffffh
   1.147 +done:
   1.148 +	_asm ret
   1.149 +	}
   1.150 +
   1.151 +
   1.152 +/** Count the number of 1's in a 32 bit word
   1.153 +
   1.154 +	@param	v	The word to be scanned
   1.155 +	@return		The number of 1's
   1.156 +*/
   1.157 +EXPORT_C __NAKED__ TInt __e32_bit_count_32(TUint32 /*v*/)
   1.158 +	{
   1.159 +	_asm mov eax, [esp+4]
   1.160 +	_asm mov edx, eax
   1.161 +	_asm and eax, 0aaaaaaaah
   1.162 +	_asm and edx, 055555555h
   1.163 +	_asm shr eax, 1
   1.164 +	_asm add eax, edx
   1.165 +	_asm mov edx, eax
   1.166 +	_asm and eax, 0cccccccch
   1.167 +	_asm and edx, 033333333h
   1.168 +	_asm shr eax, 2
   1.169 +	_asm add eax, edx
   1.170 +	_asm mov edx, eax
   1.171 +	_asm shr eax, 4
   1.172 +	_asm add eax, edx
   1.173 +	_asm and eax, 00f0f0f0fh
   1.174 +	_asm add al, ah
   1.175 +	_asm mov dl, al
   1.176 +	_asm shr eax, 16
   1.177 +	_asm add al, ah
   1.178 +	_asm xor ah, ah
   1.179 +	_asm add al, dl
   1.180 +	_asm ret
   1.181 +	}
   1.182 +
   1.183 +
   1.184 +/** Find the most significant 1 in a 64 bit word
   1.185 +
   1.186 +	@param	v	The word to be scanned
   1.187 +	@return		The bit number of the most significant 1 if v != 0
   1.188 +				-1 if v == 0
   1.189 +*/
   1.190 +EXPORT_C __NAKED__ TInt __e32_find_ms1_64(TUint64 /*v*/)
   1.191 +	{
   1.192 +	_asm bsr eax, [esp+8]
   1.193 +	_asm jnz short mswnz
   1.194 +	_asm bsr eax, [esp+4]
   1.195 +	_asm jnz short lswnz
   1.196 +	_asm mov eax, 0ffffffffh
   1.197 +mswnz:
   1.198 +	_asm or eax, 32
   1.199 +lswnz:
   1.200 +	_asm ret
   1.201 +	}
   1.202 +
   1.203 +
   1.204 +/** Find the least significant 1 in a 64 bit word
   1.205 +
   1.206 +	@param	v	The word to be scanned
   1.207 +	@return		The bit number of the least significant 1 if v != 0
   1.208 +				-1 if v == 0
   1.209 +*/
   1.210 +EXPORT_C __NAKED__ TInt __e32_find_ls1_64(TUint64 /*v*/)
   1.211 +	{
   1.212 +	_asm bsf eax, [esp+4]
   1.213 +	_asm jnz short lswnz
   1.214 +	_asm bsf eax, [esp+8]
   1.215 +	_asm jnz short mswnz
   1.216 +	_asm mov eax, 0ffffffffh
   1.217 +mswnz:
   1.218 +	_asm or eax, 32
   1.219 +lswnz:
   1.220 +	_asm ret
   1.221 +	}
   1.222 +
   1.223 +
   1.224 +/** Count the number of 1's in a 64 bit word
   1.225 +
   1.226 +	@param	v	The word to be scanned
   1.227 +	@return		The number of 1's
   1.228 +*/
   1.229 +EXPORT_C __NAKED__ TInt __e32_bit_count_64(TUint64 /*v*/)
   1.230 +	{
   1.231 +	_asm mov eax, [esp+4]
   1.232 +	_asm mov edx, [esp+8]
   1.233 +
   1.234 +	_asm mov ecx, eax
   1.235 +	_asm and eax, 0aaaaaaaah
   1.236 +	_asm and ecx, 055555555h
   1.237 +	_asm shr eax, 1
   1.238 +	_asm add eax, ecx			/* 16 groups of 2 bits, count=0,1,2 */
   1.239 +	_asm mov ecx, eax
   1.240 +	_asm and eax, 0cccccccch
   1.241 +	_asm and ecx, 033333333h
   1.242 +	_asm shr eax, 2
   1.243 +	_asm add ecx, eax			/* 8 groups of 4 bits, count=0...4 */
   1.244 +
   1.245 +	_asm mov eax, edx
   1.246 +	_asm and eax, 0aaaaaaaah
   1.247 +	_asm and edx, 055555555h
   1.248 +	_asm shr eax, 1
   1.249 +	_asm add eax, edx			/* 16 groups of 2 bits, count=0,1,2 */
   1.250 +	_asm mov edx, eax
   1.251 +	_asm and eax, 0cccccccch
   1.252 +	_asm and edx, 033333333h
   1.253 +	_asm shr eax, 2
   1.254 +	_asm add eax, edx			/* 8 groups of 4 bits, count=0...4 */
   1.255 +
   1.256 +	_asm add eax, ecx			/* 8 groups of 4 bits, count=0...8 */
   1.257 +	_asm mov edx, eax
   1.258 +	_asm and eax, 0f0f0f0f0h
   1.259 +	_asm and edx, 00f0f0f0fh
   1.260 +	_asm shr eax, 4
   1.261 +	_asm add eax, edx			/* 4 groups of 8 bits, count=0...16 */
   1.262 +	_asm add al, ah
   1.263 +	_asm mov dl, al
   1.264 +	_asm shr eax, 16
   1.265 +	_asm add al, ah
   1.266 +	_asm xor ah, ah
   1.267 +	_asm add al, dl
   1.268 +	_asm ret
   1.269 +	}
   1.270 +
   1.271 +
   1.272 +
   1.273 +
   1.274 +/** Read a 64 bit word with acquire semantics
   1.275 +
   1.276 +	@param	a	Address of word to be read - must be a multiple of 8
   1.277 +	@return		The value read
   1.278 +*/
   1.279 +EXPORT_C __NAKED__ TUint64	__e32_atomic_load_acq64(const volatile TAny* /*a*/)
   1.280 +	{
   1.281 +	_asm push ebx
   1.282 +	_asm push edi
   1.283 +	_asm mov edi, [esp+12]
   1.284 +	_asm mov eax, 0badbeefh
   1.285 +	_asm mov edx, eax
   1.286 +	_asm mov ebx, eax
   1.287 +	_asm mov ecx, eax
   1.288 +	_asm __LOCK__ cmpxchg8b [edi]
   1.289 +	_asm pop edi
   1.290 +	_asm pop ebx
   1.291 +	_asm ret
   1.292 +	}
   1.293 +
   1.294 +
   1.295 +/** Write a 64 bit word with release semantics
   1.296 +
   1.297 +	@param	a	Address of word to be written - must be a multiple of 8
   1.298 +	@param	v	The value to be written
   1.299 +	@return		The value written
   1.300 +*/
   1.301 +EXPORT_C __NAKED__ TUint64	__e32_atomic_store_rel64(volatile TAny* /*a*/, TUint64 /*v*/)
   1.302 +	{
   1.303 +	_asm push ebx
   1.304 +	_asm push edi
   1.305 +	_asm mov edi, [esp+12]
   1.306 +	_asm mov ebx, [esp+16]
   1.307 +	_asm mov ecx, [esp+20]
   1.308 +	_asm mov eax, [edi]
   1.309 +	_asm mov edx, [edi+4]
   1.310 +	_asm retry:
   1.311 +	_asm __LOCK__ cmpxchg8b [edi]
   1.312 +	_asm jne short retry
   1.313 +	_asm mov eax, ebx
   1.314 +	_asm mov edx, ecx
   1.315 +	_asm pop edi
   1.316 +	_asm pop ebx
   1.317 +	_asm ret
   1.318 +	}
   1.319 +
   1.320 +
   1.321 +/** Write a 64 bit word with full barrier semantics
   1.322 +
   1.323 +	@param	a	Address of word to be written - must be a multiple of 8
   1.324 +	@param	v	The value to be written
   1.325 +	@return		The value written
   1.326 +*/
   1.327 +EXPORT_C __NAKED__ TUint64	__e32_atomic_store_ord64(volatile TAny* /*a*/, TUint64 /*v*/)
   1.328 +	{
   1.329 +	_asm jmp __e32_atomic_store_rel64
   1.330 +	}
   1.331 +
   1.332 +
   1.333 +/** Write a 64 bit word to memory and return the original value of the memory.
   1.334 +	Relaxed ordering.
   1.335 +
   1.336 +	@param	a	Address of word to be written - must be a multiple of 8
   1.337 +	@param	v	The value to be written
   1.338 +	@return		The original value of *a
   1.339 +*/
   1.340 +EXPORT_C __NAKED__ TUint64	__e32_atomic_swp_rlx64(volatile TAny* /*a*/, TUint64 /*v*/)
   1.341 +	{
   1.342 +	_asm jmp __e32_atomic_swp_ord64
   1.343 +	}
   1.344 +
   1.345 +
   1.346 +/** Write a 64 bit word to memory and return the original value of the memory.
   1.347 +	Acquire semantics.
   1.348 +
   1.349 +	@param	a	Address of word to be written - must be a multiple of 8
   1.350 +	@param	v	The value to be written
   1.351 +	@return		The original value of *a
   1.352 +*/
   1.353 +EXPORT_C __NAKED__ TUint64	__e32_atomic_swp_acq64(volatile TAny* /*a*/, TUint64 /*v*/)
   1.354 +	{
   1.355 +	_asm jmp __e32_atomic_swp_ord64
   1.356 +	}
   1.357 +
   1.358 +
   1.359 +/** Write a 64 bit word to memory and return the original value of the memory.
   1.360 +	Release semantics.
   1.361 +
   1.362 +	@param	a	Address of word to be written - must be a multiple of 8
   1.363 +	@param	v	The value to be written
   1.364 +	@return		The original value of *a
   1.365 +*/
   1.366 +EXPORT_C __NAKED__ TUint64	__e32_atomic_swp_rel64(volatile TAny* /*a*/, TUint64 /*v*/)
   1.367 +	{
   1.368 +	_asm jmp __e32_atomic_swp_ord64
   1.369 +	}
   1.370 +
   1.371 +
   1.372 +/** Write a 64 bit word to memory and return the original value of the memory.
   1.373 +	Full barrier semantics.
   1.374 +
   1.375 +	@param	a	Address of word to be written - must be a multiple of 8
   1.376 +	@param	v	The value to be written
   1.377 +	@return		The original value of *a
   1.378 +*/
   1.379 +EXPORT_C __NAKED__ TUint64	__e32_atomic_swp_ord64(volatile TAny* /*a*/, TUint64 /*v*/)
   1.380 +	{
   1.381 +	_asm push ebx
   1.382 +	_asm push edi
   1.383 +	_asm mov edi, [esp+12]
   1.384 +	_asm mov ebx, [esp+16]
   1.385 +	_asm mov ecx, [esp+20]
   1.386 +	_asm mov eax, [edi]
   1.387 +	_asm mov edx, [edi+4]
   1.388 +	_asm retry:
   1.389 +	_asm __LOCK__ cmpxchg8b [edi]
   1.390 +	_asm jne short retry
   1.391 +	_asm pop edi
   1.392 +	_asm pop ebx
   1.393 +	_asm ret
   1.394 +	}
   1.395 +
   1.396 +
   1.397 +/** 64 bit compare and swap, relaxed ordering.
   1.398 +
   1.399 +	Atomically performs the following operation:
   1.400 +		if (*a == *q)	{ *a = v; return TRUE; }
   1.401 +		else			{ *q = *a; return FALSE; }
   1.402 +
   1.403 +	@param	a	Address of word to be written - must be a multiple of 8
   1.404 +	@param	q	Address of location containing expected value
   1.405 +	@param	v	The new value to be written if the old value is as expected
   1.406 +	@return		TRUE if *a was updated, FALSE otherwise
   1.407 +*/
   1.408 +EXPORT_C __NAKED__ TBool		__e32_atomic_cas_rlx64(volatile TAny* /*a*/, TUint64* /*q*/, TUint64 /*v*/)
   1.409 +	{
   1.410 +	_asm jmp __e32_atomic_cas_ord64
   1.411 +	}
   1.412 +
   1.413 +
   1.414 +/** 64 bit compare and swap, acquire semantics.
   1.415 +
   1.416 +	Atomically performs the following operation:
   1.417 +		if (*a == *q)	{ *a = v; return TRUE; }
   1.418 +		else			{ *q = *a; return FALSE; }
   1.419 +
   1.420 +	@param	a	Address of word to be written - must be a multiple of 8
   1.421 +	@param	q	Address of location containing expected value
   1.422 +	@param	v	The new value to be written if the old value is as expected
   1.423 +	@return		TRUE if *a was updated, FALSE otherwise
   1.424 +*/
   1.425 +EXPORT_C __NAKED__ TBool		__e32_atomic_cas_acq64(volatile TAny* /*a*/, TUint64* /*q*/, TUint64 /*v*/)
   1.426 +	{
   1.427 +	_asm jmp __e32_atomic_cas_ord64
   1.428 +	}
   1.429 +
   1.430 +
   1.431 +/** 64 bit compare and swap, release semantics.
   1.432 +
   1.433 +	Atomically performs the following operation:
   1.434 +		if (*a == *q)	{ *a = v; return TRUE; }
   1.435 +		else			{ *q = *a; return FALSE; }
   1.436 +
   1.437 +	@param	a	Address of word to be written - must be a multiple of 8
   1.438 +	@param	q	Address of location containing expected value
   1.439 +	@param	v	The new value to be written if the old value is as expected
   1.440 +	@return		TRUE if *a was updated, FALSE otherwise
   1.441 +*/
   1.442 +EXPORT_C __NAKED__ TBool		__e32_atomic_cas_rel64(volatile TAny* /*a*/, TUint64* /*q*/, TUint64 /*v*/)
   1.443 +	{
   1.444 +	_asm jmp __e32_atomic_cas_ord64
   1.445 +	}
   1.446 +
   1.447 +
   1.448 +/** 64 bit compare and swap, full barrier semantics.
   1.449 +
   1.450 +	Atomically performs the following operation:
   1.451 +		if (*a == *q)	{ *a = v; return TRUE; }
   1.452 +		else			{ *q = *a; return FALSE; }
   1.453 +
   1.454 +	@param	a	Address of word to be written - must be a multiple of 8
   1.455 +	@param	q	Address of location containing expected value
   1.456 +	@param	v	The new value to be written if the old value is as expected
   1.457 +	@return		TRUE if *a was updated, FALSE otherwise
   1.458 +*/
   1.459 +EXPORT_C __NAKED__ TBool		__e32_atomic_cas_ord64(volatile TAny* /*a*/, TUint64* /*q*/, TUint64 /*v*/)
   1.460 +	{
   1.461 +	_asm push ebx
   1.462 +	_asm push edi
   1.463 +	_asm push esi
   1.464 +	_asm mov edi, [esp+16]				// edi = a
   1.465 +	_asm mov esi, [esp+20]				// esi = q
   1.466 +	_asm mov ebx, [esp+24]				// ecx:ebx = v
   1.467 +	_asm mov ecx, [esp+28]
   1.468 +	_asm mov eax, [esi]					// edx:eax = *q
   1.469 +	_asm mov edx, [esi+4]
   1.470 +	_asm __LOCK__ cmpxchg8b [edi]		// if (*a==*q) *a=v, ZF=1 else edx:eax=*a, ZF=0
   1.471 +	_asm jne short cas_fail
   1.472 +	_asm mov eax, 1
   1.473 +	_asm pop esi
   1.474 +	_asm pop edi
   1.475 +	_asm pop ebx
   1.476 +	_asm ret
   1.477 +	_asm cas_fail:
   1.478 +	_asm mov [esi], eax					// *q = edx:eax
   1.479 +	_asm mov [esi+4], edx
   1.480 +	_asm xor eax, eax
   1.481 +	_asm pop esi
   1.482 +	_asm pop edi
   1.483 +	_asm pop ebx
   1.484 +	_asm ret
   1.485 +	}
   1.486 +
   1.487 +
   1.488 +/** 64 bit atomic add, relaxed ordering.
   1.489 +
   1.490 +	Atomically performs the following operation:
   1.491 +		oldv = *a; *a = oldv + v; return oldv;
   1.492 +
   1.493 +	@param	a	Address of word to be updated - must be a multiple of 8
   1.494 +	@param	v	The value to be added
   1.495 +	@return		The original value of *a
   1.496 +*/
   1.497 +EXPORT_C __NAKED__ TUint64	__e32_atomic_add_rlx64(volatile TAny* /*a*/, TUint64 /*v*/)
   1.498 +	{
   1.499 +	_asm jmp __e32_atomic_add_ord64
   1.500 +	}
   1.501 +
   1.502 +
   1.503 +/** 64 bit atomic add, acquire semantics.
   1.504 +
   1.505 +	Atomically performs the following operation:
   1.506 +		oldv = *a; *a = oldv + v; return oldv;
   1.507 +
   1.508 +	@param	a	Address of word to be updated - must be a multiple of 8
   1.509 +	@param	v	The value to be added
   1.510 +	@return		The original value of *a
   1.511 +*/
   1.512 +EXPORT_C __NAKED__ TUint64	__e32_atomic_add_acq64(volatile TAny* /*a*/, TUint64 /*v*/)
   1.513 +	{
   1.514 +	_asm jmp __e32_atomic_add_ord64
   1.515 +	}
   1.516 +
   1.517 +
   1.518 +/** 64 bit atomic add, release semantics.
   1.519 +
   1.520 +	Atomically performs the following operation:
   1.521 +		oldv = *a; *a = oldv + v; return oldv;
   1.522 +
   1.523 +	@param	a	Address of word to be updated - must be a multiple of 8
   1.524 +	@param	v	The value to be added
   1.525 +	@return		The original value of *a
   1.526 +*/
   1.527 +EXPORT_C __NAKED__ TUint64	__e32_atomic_add_rel64(volatile TAny* /*a*/, TUint64 /*v*/)
   1.528 +	{
   1.529 +	_asm jmp __e32_atomic_add_ord64
   1.530 +	}
   1.531 +
   1.532 +
   1.533 +/** 64 bit atomic add, full barrier semantics.
   1.534 +
   1.535 +	Atomically performs the following operation:
   1.536 +		oldv = *a; *a = oldv + v; return oldv;
   1.537 +
   1.538 +	@param	a	Address of word to be updated - must be a multiple of 8
   1.539 +	@param	v	The value to be added
   1.540 +	@return		The original value of *a
   1.541 +*/
   1.542 +EXPORT_C __NAKED__ TUint64	__e32_atomic_add_ord64(volatile TAny* /*a*/, TUint64 /*v*/)
   1.543 +	{
   1.544 +	_asm push ebx
   1.545 +	_asm push edi
   1.546 +	_asm mov edi, [esp+12]				// edi = a
   1.547 +	_asm mov eax, [edi]					// edx:eax = oldv
   1.548 +	_asm mov edx, [edi+4]
   1.549 +	_asm retry:
   1.550 +	_asm mov ebx, eax
   1.551 +	_asm mov ecx, edx
   1.552 +	_asm add ebx, [esp+16]				// ecx:ebx = oldv + v
   1.553 +	_asm adc ecx, [esp+20]
   1.554 +	_asm __LOCK__ cmpxchg8b [edi]		// if (*a==oldv) *a=oldv+v, ZF=1 else edx:eax=*a, ZF=0
   1.555 +	_asm jne short retry
   1.556 +	_asm pop edi
   1.557 +	_asm pop ebx
   1.558 +	_asm ret
   1.559 +	}
   1.560 +
   1.561 +
   1.562 +/** 64 bit atomic bitwise logical AND, relaxed ordering.
   1.563 +
   1.564 +	Atomically performs the following operation:
   1.565 +		oldv = *a; *a = oldv & v; return oldv;
   1.566 +
   1.567 +	@param	a	Address of word to be updated - must be a multiple of 8
   1.568 +	@param	v	The value to be ANDed with *a
   1.569 +	@return		The original value of *a
   1.570 +*/
   1.571 +EXPORT_C __NAKED__ TUint64	__e32_atomic_and_rlx64(volatile TAny* /*a*/, TUint64 /*v*/)
   1.572 +	{
   1.573 +	_asm jmp __e32_atomic_and_ord64
   1.574 +	}
   1.575 +
   1.576 +
   1.577 +/** 64 bit atomic bitwise logical AND, acquire semantics.
   1.578 +
   1.579 +	Atomically performs the following operation:
   1.580 +		oldv = *a; *a = oldv & v; return oldv;
   1.581 +
   1.582 +	@param	a	Address of word to be updated - must be a multiple of 8
   1.583 +	@param	v	The value to be ANDed with *a
   1.584 +	@return		The original value of *a
   1.585 +*/
   1.586 +EXPORT_C __NAKED__ TUint64	__e32_atomic_and_acq64(volatile TAny* /*a*/, TUint64 /*v*/)
   1.587 +	{
   1.588 +	_asm jmp __e32_atomic_and_ord64
   1.589 +	}
   1.590 +
   1.591 +
   1.592 +/** 64 bit atomic bitwise logical AND, release semantics.
   1.593 +
   1.594 +	Atomically performs the following operation:
   1.595 +		oldv = *a; *a = oldv & v; return oldv;
   1.596 +
   1.597 +	@param	a	Address of word to be updated - must be a multiple of 8
   1.598 +	@param	v	The value to be ANDed with *a
   1.599 +	@return		The original value of *a
   1.600 +*/
   1.601 +EXPORT_C __NAKED__ TUint64	__e32_atomic_and_rel64(volatile TAny* /*a*/, TUint64 /*v*/)
   1.602 +	{
   1.603 +	_asm jmp __e32_atomic_and_ord64
   1.604 +	}
   1.605 +
   1.606 +
   1.607 +/** 64 bit atomic bitwise logical AND, full barrier semantics.
   1.608 +
   1.609 +	Atomically performs the following operation:
   1.610 +		oldv = *a; *a = oldv & v; return oldv;
   1.611 +
   1.612 +	@param	a	Address of word to be updated - must be a multiple of 8
   1.613 +	@param	v	The value to be ANDed with *a
   1.614 +	@return		The original value of *a
   1.615 +*/
   1.616 +EXPORT_C __NAKED__ TUint64	__e32_atomic_and_ord64(volatile TAny* /*a*/, TUint64 /*v*/)
   1.617 +	{
   1.618 +	_asm push ebx
   1.619 +	_asm push edi
   1.620 +	_asm mov edi, [esp+12]				// edi = a
   1.621 +	_asm mov eax, [edi]					// edx:eax = oldv
   1.622 +	_asm mov edx, [edi+4]
   1.623 +	_asm retry:
   1.624 +	_asm mov ebx, eax
   1.625 +	_asm mov ecx, edx
   1.626 +	_asm and ebx, [esp+16]				// ecx:ebx = oldv & v
   1.627 +	_asm and ecx, [esp+20]
   1.628 +	_asm __LOCK__ cmpxchg8b [edi]		// if (*a==oldv) *a=oldv&v, ZF=1 else edx:eax=*a, ZF=0
   1.629 +	_asm jne short retry
   1.630 +	_asm pop edi
   1.631 +	_asm pop ebx
   1.632 +	_asm ret
   1.633 +	}
   1.634 +
   1.635 +
   1.636 +/** 64 bit atomic bitwise logical inclusive OR, relaxed ordering.
   1.637 +
   1.638 +	Atomically performs the following operation:
   1.639 +		oldv = *a; *a = oldv | v; return oldv;
   1.640 +
   1.641 +	@param	a	Address of word to be updated - must be a multiple of 8
   1.642 +	@param	v	The value to be ORed with *a
   1.643 +	@return		The original value of *a
   1.644 +*/
   1.645 +EXPORT_C __NAKED__ TUint64	__e32_atomic_ior_rlx64(volatile TAny* /*a*/, TUint64 /*v*/)
   1.646 +	{
   1.647 +	_asm jmp __e32_atomic_ior_ord64
   1.648 +	}
   1.649 +
   1.650 +
   1.651 +/** 64 bit atomic bitwise logical inclusive OR, acquire semantics.
   1.652 +
   1.653 +	Atomically performs the following operation:
   1.654 +		oldv = *a; *a = oldv | v; return oldv;
   1.655 +
   1.656 +	@param	a	Address of word to be updated - must be a multiple of 8
   1.657 +	@param	v	The value to be ORed with *a
   1.658 +	@return		The original value of *a
   1.659 +*/
   1.660 +EXPORT_C __NAKED__ TUint64	__e32_atomic_ior_acq64(volatile TAny* /*a*/, TUint64 /*v*/)
   1.661 +	{
   1.662 +	_asm jmp __e32_atomic_ior_ord64
   1.663 +	}
   1.664 +
   1.665 +
   1.666 +/** 64 bit atomic bitwise logical inclusive OR, release semantics.
   1.667 +
   1.668 +	Atomically performs the following operation:
   1.669 +		oldv = *a; *a = oldv | v; return oldv;
   1.670 +
   1.671 +	@param	a	Address of word to be updated - must be a multiple of 8
   1.672 +	@param	v	The value to be ORed with *a
   1.673 +	@return		The original value of *a
   1.674 +*/
   1.675 +EXPORT_C __NAKED__ TUint64	__e32_atomic_ior_rel64(volatile TAny* /*a*/, TUint64 /*v*/)
   1.676 +	{
   1.677 +	_asm jmp __e32_atomic_ior_ord64
   1.678 +	}
   1.679 +
   1.680 +
   1.681 +/** 64 bit atomic bitwise logical inclusive OR, full barrier semantics.
   1.682 +
   1.683 +	Atomically performs the following operation:
   1.684 +		oldv = *a; *a = oldv | v; return oldv;
   1.685 +
   1.686 +	@param	a	Address of word to be updated - must be a multiple of 8
   1.687 +	@param	v	The value to be ORed with *a
   1.688 +	@return		The original value of *a
   1.689 +*/
   1.690 +EXPORT_C __NAKED__ TUint64	__e32_atomic_ior_ord64(volatile TAny* /*a*/, TUint64 /*v*/)
   1.691 +	{
   1.692 +	_asm push ebx
   1.693 +	_asm push edi
   1.694 +	_asm mov edi, [esp+12]				// edi = a
   1.695 +	_asm mov eax, [edi]					// edx:eax = oldv
   1.696 +	_asm mov edx, [edi+4]
   1.697 +	_asm retry:
   1.698 +	_asm mov ebx, eax
   1.699 +	_asm mov ecx, edx
   1.700 +	_asm or ebx, [esp+16]				// ecx:ebx = oldv | v
   1.701 +	_asm or ecx, [esp+20]
   1.702 +	_asm __LOCK__ cmpxchg8b [edi]		// if (*a==oldv) *a=oldv|v, ZF=1 else edx:eax=*a, ZF=0
   1.703 +	_asm jne short retry
   1.704 +	_asm pop edi
   1.705 +	_asm pop ebx
   1.706 +	_asm ret
   1.707 +	}
   1.708 +
   1.709 +
   1.710 +/** 64 bit atomic bitwise logical exclusive OR, relaxed ordering.
   1.711 +
   1.712 +	Atomically performs the following operation:
   1.713 +		oldv = *a; *a = oldv ^ v; return oldv;
   1.714 +
   1.715 +	@param	a	Address of word to be updated - must be a multiple of 8
   1.716 +	@param	v	The value to be XORed with *a
   1.717 +	@return		The original value of *a
   1.718 +*/
   1.719 +EXPORT_C __NAKED__ TUint64	__e32_atomic_xor_rlx64(volatile TAny* /*a*/, TUint64 /*v*/)
   1.720 +	{
   1.721 +	_asm jmp __e32_atomic_xor_ord64
   1.722 +	}
   1.723 +
   1.724 +
   1.725 +/** 64 bit atomic bitwise logical exclusive OR, acquire semantics.
   1.726 +
   1.727 +	Atomically performs the following operation:
   1.728 +		oldv = *a; *a = oldv ^ v; return oldv;
   1.729 +
   1.730 +	@param	a	Address of word to be updated - must be a multiple of 8
   1.731 +	@param	v	The value to be XORed with *a
   1.732 +	@return		The original value of *a
   1.733 +*/
   1.734 +EXPORT_C __NAKED__ TUint64	__e32_atomic_xor_acq64(volatile TAny* /*a*/, TUint64 /*v*/)
   1.735 +	{
   1.736 +	_asm jmp __e32_atomic_xor_ord64
   1.737 +	}
   1.738 +
   1.739 +
   1.740 +/** 64 bit atomic bitwise logical exclusive OR, release semantics.
   1.741 +
   1.742 +	Atomically performs the following operation:
   1.743 +		oldv = *a; *a = oldv ^ v; return oldv;
   1.744 +
   1.745 +	@param	a	Address of word to be updated - must be a multiple of 8
   1.746 +	@param	v	The value to be XORed with *a
   1.747 +	@return		The original value of *a
   1.748 +*/
   1.749 +EXPORT_C __NAKED__ TUint64	__e32_atomic_xor_rel64(volatile TAny* /*a*/, TUint64 /*v*/)
   1.750 +	{
   1.751 +	_asm jmp __e32_atomic_xor_ord64
   1.752 +	}
   1.753 +
   1.754 +
   1.755 +/** 64 bit atomic bitwise logical exclusive OR, full barrier semantics.
   1.756 +
   1.757 +	Atomically performs the following operation:
   1.758 +		oldv = *a; *a = oldv ^ v; return oldv;
   1.759 +
   1.760 +	@param	a	Address of word to be updated - must be a multiple of 8
   1.761 +	@param	v	The value to be XORed with *a
   1.762 +	@return		The original value of *a
   1.763 +*/
   1.764 +EXPORT_C __NAKED__ TUint64	__e32_atomic_xor_ord64(volatile TAny* /*a*/, TUint64 /*v*/)
   1.765 +	{
   1.766 +	_asm push ebx
   1.767 +	_asm push edi
   1.768 +	_asm mov edi, [esp+12]				// edi = a
   1.769 +	_asm mov eax, [edi]					// edx:eax = oldv
   1.770 +	_asm mov edx, [edi+4]
   1.771 +	_asm retry:
   1.772 +	_asm mov ebx, eax
   1.773 +	_asm mov ecx, edx
   1.774 +	_asm xor ebx, [esp+16]				// ecx:ebx = oldv ^ v
   1.775 +	_asm xor ecx, [esp+20]
   1.776 +	_asm __LOCK__ cmpxchg8b [edi]		// if (*a==oldv) *a=oldv^v, ZF=1 else edx:eax=*a, ZF=0
   1.777 +	_asm jne short retry
   1.778 +	_asm pop edi
   1.779 +	_asm pop ebx
   1.780 +	_asm ret
   1.781 +	}
   1.782 +
   1.783 +
   1.784 +/** 64 bit atomic bitwise universal function, relaxed ordering.
   1.785 +
   1.786 +	Atomically performs the following operation:
   1.787 +		oldv = *a; *a = (oldv & u) ^ v; return oldv;
   1.788 +
   1.789 +	@param	a	Address of word to be updated - must be a multiple of 8
   1.790 +	@param	u	The value to be ANDed with *a
   1.791 +	@param	v	The value to be XORed with (*a&u)
   1.792 +	@return		The original value of *a
   1.793 +*/
   1.794 +EXPORT_C __NAKED__ TUint64	__e32_atomic_axo_rlx64(volatile TAny* /*a*/, TUint64 /*u*/, TUint64 /*v*/)
   1.795 +	{
   1.796 +	_asm jmp __e32_atomic_axo_ord64
   1.797 +	}
   1.798 +
   1.799 +
   1.800 +/** 64 bit atomic bitwise universal function, acquire semantics.
   1.801 +
   1.802 +	Atomically performs the following operation:
   1.803 +		oldv = *a; *a = (oldv & u) ^ v; return oldv;
   1.804 +
   1.805 +	@param	a	Address of word to be updated - must be a multiple of 8
   1.806 +	@param	u	The value to be ANDed with *a
   1.807 +	@param	v	The value to be XORed with (*a&u)
   1.808 +	@return		The original value of *a
   1.809 +*/
   1.810 +EXPORT_C __NAKED__ TUint64	__e32_atomic_axo_acq64(volatile TAny* /*a*/, TUint64 /*u*/, TUint64 /*v*/)
   1.811 +	{
   1.812 +	_asm jmp __e32_atomic_axo_ord64
   1.813 +	}
   1.814 +
   1.815 +
   1.816 +/** 64 bit atomic bitwise universal function, release semantics.
   1.817 +
   1.818 +	Atomically performs the following operation:
   1.819 +		oldv = *a; *a = (oldv & u) ^ v; return oldv;
   1.820 +
   1.821 +	@param	a	Address of word to be updated - must be a multiple of 8
   1.822 +	@param	u	The value to be ANDed with *a
   1.823 +	@param	v	The value to be XORed with (*a&u)
   1.824 +	@return		The original value of *a
   1.825 +*/
   1.826 +EXPORT_C __NAKED__ TUint64	__e32_atomic_axo_rel64(volatile TAny* /*a*/, TUint64 /*u*/, TUint64 /*v*/)
   1.827 +	{
   1.828 +	_asm jmp __e32_atomic_axo_ord64
   1.829 +	}
   1.830 +
   1.831 +
   1.832 +/** 64 bit atomic bitwise universal function, release semantics.
   1.833 +
   1.834 +	Atomically performs the following operation:
   1.835 +		oldv = *a; *a = (oldv & u) ^ v; return oldv;
   1.836 +
   1.837 +	@param	a	Address of word to be updated - must be a multiple of 8
   1.838 +	@param	u	The value to be ANDed with *a
   1.839 +	@param	v	The value to be XORed with (*a&u)
   1.840 +	@return		The original value of *a
   1.841 +*/
   1.842 +EXPORT_C __NAKED__ TUint64	__e32_atomic_axo_ord64(volatile TAny* /*a*/, TUint64 /*u*/, TUint64 /*v*/)
   1.843 +	{
   1.844 +	_asm push ebx
   1.845 +	_asm push edi
   1.846 +	_asm mov edi, [esp+12]				// edi = a
   1.847 +	_asm mov eax, [edi]					// edx:eax = oldv
   1.848 +	_asm mov edx, [edi+4]
   1.849 +	_asm retry:
   1.850 +	_asm mov ebx, eax
   1.851 +	_asm mov ecx, edx
   1.852 +	_asm and ebx, [esp+16]				// ecx:ebx = oldv & u
   1.853 +	_asm and ecx, [esp+20]
   1.854 +	_asm xor ebx, [esp+24]				// ecx:ebx = (oldv & u) ^ v
   1.855 +	_asm xor ecx, [esp+28]
   1.856 +	_asm __LOCK__ cmpxchg8b [edi]		// if (*a==oldv) *a=(oldv&u)^v, ZF=1 else edx:eax=*a, ZF=0
   1.857 +	_asm jne short retry
   1.858 +	_asm pop edi
   1.859 +	_asm pop ebx
   1.860 +	_asm ret
   1.861 +	}
   1.862 +
   1.863 +
   1.864 +/** 64 bit threshold and add, unsigned, relaxed ordering.
   1.865 +
   1.866 +	Atomically performs the following operation:
   1.867 +		oldv = *a; if (oldv>=t) *a=oldv+u else *a=oldv+v; return oldv;
   1.868 +
   1.869 +	@param	a	Address of data to be updated - must be naturally aligned
   1.870 +	@param	t	The threshold to compare *a to (unsigned compare)
   1.871 +	@param	u	The value to be added to *a if it is originally >= t
   1.872 +	@param	u	The value to be added to *a if it is originally < t
   1.873 +	@return		The original value of *a
   1.874 +*/
   1.875 +EXPORT_C __NAKED__ TUint64	__e32_atomic_tau_rlx64(volatile TAny* /*a*/, TUint64 /*t*/, TUint64 /*u*/, TUint64 /*v*/)
   1.876 +	{
   1.877 +	_asm jmp __e32_atomic_tau_ord64
   1.878 +	}
   1.879 +
   1.880 +
   1.881 +/** 64 bit threshold and add, unsigned, acquire semantics.
   1.882 +
   1.883 +	Atomically performs the following operation:
   1.884 +		oldv = *a; if (oldv>=t) *a=oldv+u else *a=oldv+v; return oldv;
   1.885 +
   1.886 +	@param	a	Address of data to be updated - must be naturally aligned
   1.887 +	@param	t	The threshold to compare *a to (unsigned compare)
   1.888 +	@param	u	The value to be added to *a if it is originally >= t
   1.889 +	@param	u	The value to be added to *a if it is originally < t
   1.890 +	@return		The original value of *a
   1.891 +*/
   1.892 +EXPORT_C __NAKED__ TUint64	__e32_atomic_tau_acq64(volatile TAny* /*a*/, TUint64 /*t*/, TUint64 /*u*/, TUint64 /*v*/)
   1.893 +	{
   1.894 +	_asm jmp __e32_atomic_tau_ord64
   1.895 +	}
   1.896 +
   1.897 +
   1.898 +/** 64 bit threshold and add, unsigned, release semantics.
   1.899 +
   1.900 +	Atomically performs the following operation:
   1.901 +		oldv = *a; if (oldv>=t) *a=oldv+u else *a=oldv+v; return oldv;
   1.902 +
   1.903 +	@param	a	Address of data to be updated - must be naturally aligned
   1.904 +	@param	t	The threshold to compare *a to (unsigned compare)
   1.905 +	@param	u	The value to be added to *a if it is originally >= t
   1.906 +	@param	u	The value to be added to *a if it is originally < t
   1.907 +	@return		The original value of *a
   1.908 +*/
   1.909 +EXPORT_C __NAKED__ TUint64	__e32_atomic_tau_rel64(volatile TAny* /*a*/, TUint64 /*t*/, TUint64 /*u*/, TUint64 /*v*/)
   1.910 +	{
   1.911 +	_asm jmp __e32_atomic_tau_ord64
   1.912 +	}
   1.913 +
   1.914 +
   1.915 +/** 64 bit threshold and add, unsigned, full barrier semantics.
   1.916 +
   1.917 +	Atomically performs the following operation:
   1.918 +		oldv = *a; if (oldv>=t) *a=oldv+u else *a=oldv+v; return oldv;
   1.919 +
   1.920 +	@param	a	Address of data to be updated - must be naturally aligned
   1.921 +	@param	t	The threshold to compare *a to (unsigned compare)
   1.922 +	@param	u	The value to be added to *a if it is originally >= t
   1.923 +	@param	u	The value to be added to *a if it is originally < t
   1.924 +	@return		The original value of *a
   1.925 +*/
   1.926 +EXPORT_C __NAKED__ TUint64	__e32_atomic_tau_ord64(volatile TAny* /*a*/, TUint64 /*t*/, TUint64 /*u*/, TUint64 /*v*/)
   1.927 +	{
   1.928 +	_asm push ebx
   1.929 +	_asm push edi
   1.930 +	_asm mov edi, [esp+12]				// edi = a
   1.931 +	_asm mov eax, [edi]					// edx:eax = oldv
   1.932 +	_asm mov edx, [edi+4]
   1.933 +	_asm retry:
   1.934 +	_asm mov ebx, edx
   1.935 +	_asm cmp eax, [esp+16]				// eax - t.low, CF=borrow
   1.936 +	_asm sbb ebx, [esp+20]				// CF = borrow from (oldv - t)
   1.937 +	_asm jnc short use_u				// no borrow means oldv>=t so use u
   1.938 +	_asm mov ebx, [esp+32]				// ecx:ebx = v
   1.939 +	_asm mov ecx, [esp+36]
   1.940 +	_asm jmp short use_v
   1.941 +	_asm use_u:
   1.942 +	_asm mov ebx, [esp+24]				// ecx:ebx = u
   1.943 +	_asm mov ecx, [esp+28]
   1.944 +	_asm use_v:
   1.945 +	_asm add ebx, eax					// ecx:ebx = oldv + u or v
   1.946 +	_asm adc ecx, edx
   1.947 +	_asm __LOCK__ cmpxchg8b [edi]
   1.948 +	_asm jne short retry
   1.949 +	_asm pop edi
   1.950 +	_asm pop ebx
   1.951 +	_asm ret
   1.952 +	}
   1.953 +
   1.954 +
   1.955 +/** 64 bit threshold and add, signed, relaxed ordering.
   1.956 +
   1.957 +	Atomically performs the following operation:
   1.958 +		oldv = *a; if (oldv>=t) *a=oldv+u else *a=oldv+v; return oldv;
   1.959 +
   1.960 +	@param	a	Address of data to be updated - must be naturally aligned
   1.961 +	@param	t	The threshold to compare *a to (signed compare)
   1.962 +	@param	u	The value to be added to *a if it is originally >= t
   1.963 +	@param	u	The value to be added to *a if it is originally < t
   1.964 +	@return		The original value of *a
   1.965 +*/
   1.966 +EXPORT_C __NAKED__ TInt64	__e32_atomic_tas_rlx64(volatile TAny* /*a*/, TInt64 /*t*/, TInt64 /*u*/, TInt64 /*v*/)
   1.967 +	{
   1.968 +	_asm jmp __e32_atomic_tas_ord64
   1.969 +	}
   1.970 +
   1.971 +
   1.972 +/** 64 bit threshold and add, signed, acquire semantics.
   1.973 +
   1.974 +	Atomically performs the following operation:
   1.975 +		oldv = *a; if (oldv>=t) *a=oldv+u else *a=oldv+v; return oldv;
   1.976 +
   1.977 +	@param	a	Address of data to be updated - must be naturally aligned
   1.978 +	@param	t	The threshold to compare *a to (signed compare)
   1.979 +	@param	u	The value to be added to *a if it is originally >= t
   1.980 +	@param	u	The value to be added to *a if it is originally < t
   1.981 +	@return		The original value of *a
   1.982 +*/
   1.983 +EXPORT_C __NAKED__ TInt64	__e32_atomic_tas_acq64(volatile TAny* /*a*/, TInt64 /*t*/, TInt64 /*u*/, TInt64 /*v*/)
   1.984 +	{
   1.985 +	_asm jmp __e32_atomic_tas_ord64
   1.986 +	}
   1.987 +
   1.988 +
   1.989 +/** 64 bit threshold and add, signed, release semantics.
   1.990 +
   1.991 +	Atomically performs the following operation:
   1.992 +		oldv = *a; if (oldv>=t) *a=oldv+u else *a=oldv+v; return oldv;
   1.993 +
   1.994 +	@param	a	Address of data to be updated - must be naturally aligned
   1.995 +	@param	t	The threshold to compare *a to (signed compare)
   1.996 +	@param	u	The value to be added to *a if it is originally >= t
   1.997 +	@param	u	The value to be added to *a if it is originally < t
   1.998 +	@return		The original value of *a
   1.999 +*/
  1.1000 +EXPORT_C __NAKED__ TInt64	__e32_atomic_tas_rel64(volatile TAny* /*a*/, TInt64 /*t*/, TInt64 /*u*/, TInt64 /*v*/)
  1.1001 +	{
  1.1002 +	_asm jmp __e32_atomic_tas_ord64
  1.1003 +	}
  1.1004 +
  1.1005 +
  1.1006 +/** 64 bit threshold and add, signed, full barrier semantics.
  1.1007 +
  1.1008 +	Atomically performs the following operation:
  1.1009 +		oldv = *a; if (oldv>=t) *a=oldv+u else *a=oldv+v; return oldv;
  1.1010 +
  1.1011 +	@param	a	Address of data to be updated - must be naturally aligned
  1.1012 +	@param	t	The threshold to compare *a to (signed compare)
  1.1013 +	@param	u	The value to be added to *a if it is originally >= t
  1.1014 +	@param	u	The value to be added to *a if it is originally < t
  1.1015 +	@return		The original value of *a
  1.1016 +*/
  1.1017 +EXPORT_C __NAKED__ TInt64	__e32_atomic_tas_ord64(volatile TAny* /*a*/, TInt64 /*t*/, TInt64 /*u*/, TInt64 /*v*/)
  1.1018 +	{
  1.1019 +	_asm push ebx
  1.1020 +	_asm push edi
  1.1021 +	_asm mov edi, [esp+12]				// edi = a
  1.1022 +	_asm mov eax, [edi]					// edx:eax = oldv
  1.1023 +	_asm mov edx, [edi+4]
  1.1024 +	_asm retry:
  1.1025 +	_asm mov ebx, edx
  1.1026 +	_asm cmp eax, [esp+16]				// eax - t.low, CF=borrow
  1.1027 +	_asm sbb ebx, [esp+20]				// SF=sign, OF=overflow from (oldv - t)
  1.1028 +	_asm jge short use_u				// SF==OF (GE condition) means oldv>=t so use u
  1.1029 +	_asm mov ebx, [esp+32]				// ecx:ebx = v
  1.1030 +	_asm mov ecx, [esp+36]
  1.1031 +	_asm jmp short use_v
  1.1032 +	_asm use_u:
  1.1033 +	_asm mov ebx, [esp+24]				// ecx:ebx = u
  1.1034 +	_asm mov ecx, [esp+28]
  1.1035 +	_asm use_v:
  1.1036 +	_asm add ebx, eax					// ecx:ebx = oldv + u or v
  1.1037 +	_asm adc ecx, edx
  1.1038 +	_asm __LOCK__ cmpxchg8b [edi]
  1.1039 +	_asm jne short retry
  1.1040 +	_asm pop edi
  1.1041 +	_asm pop ebx
  1.1042 +	_asm ret
  1.1043 +	}
  1.1044 +
  1.1045 +} // extern "C"