os/kernelhwsrv/kernel/eka/common/x86/atomics.cia
author sl@SLION-WIN7.fritz.box
Fri, 15 Jun 2012 03:10:57 +0200
changeset 0 bde4ae8d615e
permissions -rw-r--r--
First public contribution.
     1 // Copyright (c) 2008-2009 Nokia Corporation and/or its subsidiary(-ies).
     2 // All rights reserved.
     3 // This component and the accompanying materials are made available
     4 // under the terms of the License "Eclipse Public License v1.0"
     5 // which accompanies this distribution, and is available
     6 // at the URL "http://www.eclipse.org/legal/epl-v10.html".
     7 //
     8 // Initial Contributors:
     9 // Nokia Corporation - initial contribution.
    10 //
    11 // Contributors:
    12 //
    13 // Description:
    14 // e32\common\x86\atomics.cia
    15 // 
    16 //
    17 
    18 #include <e32atomics.h>
    19 #include <cpudefs.h>
    20 
    21 /*
    22 Versions needed:
    23 	WINS/WINSCW		Use X86 locked operations. Assume Pentium or above CPU (CMPXCHG8B available)
    24 	X86				For Pentium and above use locked operations
    25 					For 486 use locked operations for 8, 16, 32 bit. For 64 bit must disable interrupts.
    26 					NOTE: 486 not supported at the moment
    27 	ARMv4/ARMv5		Must disable interrupts.
    28 	ARMv6			LDREX/STREX for 8, 16, 32 bit. For 64 bit must disable interrupts (maybe).
    29 	ARMv6K/ARMv7	LDREXB/LDREXH/LDREX/LDREXD
    30 
    31 Need both kernel side and user side versions
    32 */
    33 
    34 #if	defined(__SMP__) || !defined(__EPOC32__)
    35 #define	__BARRIERS_NEEDED__
    36 #define	__LOCK__	"lock "
    37 #else
    38 #define	__LOCK__
    39 #endif
    40 
    41 
    42 extern "C" {
    43 
    44 #undef	__TUintX__
    45 #undef	__TIntX__
    46 #undef	__fname__
    47 #undef	__redir__
    48 #undef	__A_REG__
    49 #undef	__C_REG__
    50 #undef	__D_REG__
    51 #define	__TUintX__		TUint32
    52 #define	__TIntX__		TInt32
    53 #define	__fname__(x)	x##32
    54 #define	__redir__(x)	asm("jmp _"#x "32")
    55 #define	__A_REG__		"eax"
    56 #define	__C_REG__		"ecx"
    57 #define	__D_REG__		"edx"
    58 #include "atomic_skeleton.h"
    59 
    60 #undef	__TUintX__
    61 #undef	__TIntX__
    62 #undef	__fname__
    63 #undef	__redir__
    64 #undef	__A_REG__
    65 #undef	__C_REG__
    66 #undef	__D_REG__
    67 #define	__TUintX__		TUint16
    68 #define	__TIntX__		TInt16
    69 #define	__fname__(x)	x##16
    70 #define	__redir__(x)	asm("jmp _"#x "16")
    71 #define	__A_REG__		"ax"
    72 #define	__C_REG__		"cx"
    73 #define	__D_REG__		"dx"
    74 #include "atomic_skeleton.h"
    75 
    76 #undef	__TUintX__
    77 #undef	__TIntX__
    78 #undef	__fname__
    79 #undef	__redir__
    80 #undef	__A_REG__
    81 #undef	__C_REG__
    82 #undef	__D_REG__
    83 #define	__TUintX__		TUint8
    84 #define	__TIntX__		TInt8
    85 #define	__fname__(x)	x##8
    86 #define	__redir__(x)	asm("jmp _"#x "8")
    87 #define	__A_REG__		"al"
    88 #define	__C_REG__		"cl"
    89 #define	__D_REG__		"dl"
    90 #include "atomic_skeleton.h"
    91 
    92 #undef	__TUintX__
    93 #undef	__TIntX__
    94 #undef	__fname__
    95 #undef	__redir__
    96 #undef	__A_REG__
    97 #undef	__C_REG__
    98 #undef	__D_REG__
    99 
   100 /** Full memory barrier for explicit memory accesses
   101 
   102 */
   103 EXPORT_C __NAKED__ void __e32_memory_barrier()
   104 	{
   105 #ifdef __BARRIERS_NEEDED__
   106 	asm("lock add dword ptr [esp], 0 ");
   107 #endif
   108 	asm("ret ");
   109 	}
   110 
   111 
   112 /** Barrier guaranteeing completion as well as ordering
   113 
   114 */
   115 EXPORT_C __NAKED__ void __e32_io_completion_barrier()
   116 	{
   117 	asm("push ebx ");
   118 	asm("cpuid ");
   119 	asm("pop ebx ");
   120 	asm("ret ");
   121 	}
   122 
   123 
   124 /** Find the most significant 1 in a 32 bit word
   125 
   126 	@param	v	The word to be scanned
   127 	@return		The bit number of the most significant 1 if v != 0
   128 				-1 if v == 0
   129 */
   130 EXPORT_C __NAKED__ TInt __e32_find_ms1_32(TUint32 /*v*/)
   131 	{
   132 	asm("bsr eax, [esp+4] ");
   133 	asm("jnz short 1f ");
   134 	asm("mov eax, 0xffffffff ");
   135 	asm("1: ");
   136 	asm("ret ");
   137 	}
   138 
   139 
   140 /** Find the least significant 1 in a 32 bit word
   141 
   142 	@param	v	The word to be scanned
   143 	@return		The bit number of the least significant 1 if v != 0
   144 				-1 if v == 0
   145 */
   146 EXPORT_C __NAKED__ TInt __e32_find_ls1_32(TUint32 /*v*/)
   147 	{
   148 	asm("bsf eax, [esp+4] ");
   149 	asm("jnz short 1f ");
   150 	asm("mov eax, 0xffffffff ");
   151 	asm("1: ");
   152 	asm("ret ");
   153 	}
   154 
   155 
   156 /** Count the number of 1's in a 32 bit word
   157 
   158 	@param	v	The word to be scanned
   159 	@return		The number of 1's
   160 */
   161 EXPORT_C __NAKED__ TInt __e32_bit_count_32(TUint32 /*v*/)
   162 	{
   163 	asm("mov eax, [esp+4] ");
   164 	asm("mov edx, eax ");
   165 	asm("and eax, 0xaaaaaaaa ");
   166 	asm("and edx, 0x55555555 ");	/* edx = even bits of arg */
   167 	asm("shr eax, 1 ");				/* eax = odd bits of arg shifted into even bits */
   168 	asm("add eax, edx ");			/* eax = 16 groups of 2 bit counts */
   169 	asm("mov edx, eax ");
   170 	asm("and eax, 0xcccccccc ");
   171 	asm("and edx, 0x33333333 ");	/* even groups of 2 */
   172 	asm("shr eax, 2 ");				/* odd groups of 2 shifted to even positions */
   173 	asm("add eax, edx ");			/* 8 groups of 4 bit counts */
   174 	asm("mov edx, eax ");
   175 	asm("shr eax, 4 ");
   176 	asm("add eax, edx ");			/* even nibbles = sum of 8 bits, odd nibbles garbage */
   177 	asm("and eax, 0x0f0f0f0f ");	/* eliminate garbage nibbles */
   178 	asm("add al, ah ");				/* AL = bit count of lower 16 bits */
   179 	asm("mov dl, al ");
   180 	asm("shr eax, 16 ");
   181 	asm("add al, ah ");				/* AL = bit count of upper 16 bits */
   182 	asm("xor ah, ah ");				/* top 24 bits of EAX now zero */
   183 	asm("add al, dl ");				/* AL = bit count of entire 32 bits */
   184 	asm("ret ");
   185 	}
   186 
   187 
   188 /** Find the most significant 1 in a 64 bit word
   189 
   190 	@param	v	The word to be scanned
   191 	@return		The bit number of the most significant 1 if v != 0
   192 				-1 if v == 0
   193 */
   194 EXPORT_C __NAKED__ TInt __e32_find_ms1_64(TUint64 /*v*/)
   195 	{
   196 	asm("bsr eax, [esp+8] ");
   197 	asm("jnz short 2f ");
   198 	asm("bsr eax, [esp+4] ");
   199 	asm("jnz short 1f ");
   200 	asm("mov eax, 0xffffffff ");
   201 	asm("2: ");
   202 	asm("or eax, 32 ");
   203 	asm("1: ");
   204 	asm("ret ");
   205 	}
   206 
   207 
   208 /** Find the least significant 1 in a 64 bit word
   209 
   210 	@param	v	The word to be scanned
   211 	@return		The bit number of the least significant 1 if v != 0
   212 				-1 if v == 0
   213 */
   214 EXPORT_C __NAKED__ TInt __e32_find_ls1_64(TUint64 /*v*/)
   215 	{
   216 	asm("bsf eax, [esp+4] ");
   217 	asm("jnz short 1f ");
   218 	asm("bsf eax, [esp+8] ");
   219 	asm("jnz short 2f ");
   220 	asm("mov eax, 0xffffffff ");
   221 	asm("2: ");
   222 	asm("or eax, 32 ");
   223 	asm("1: ");
   224 	asm("ret ");
   225 	}
   226 
   227 
   228 /** Count the number of 1's in a 64 bit word
   229 
   230 	@param	v	The word to be scanned
   231 	@return		The number of 1's
   232 */
   233 EXPORT_C __NAKED__ TInt __e32_bit_count_64(TUint64 /*v*/)
   234 	{
   235 	asm("mov eax, [esp+4] ");
   236 	asm("mov edx, [esp+8] ");
   237 
   238 	asm("mov ecx, eax ");
   239 	asm("and eax, 0xaaaaaaaa ");
   240 	asm("and ecx, 0x55555555 ");
   241 	asm("shr eax, 1 ");
   242 	asm("add eax, ecx ");
   243 	asm("mov ecx, eax ");
   244 	asm("and eax, 0xcccccccc ");
   245 	asm("and ecx, 0x33333333 ");
   246 	asm("shr eax, 2 ");
   247 	asm("add ecx, eax ");
   248 
   249 	asm("mov eax, edx ");
   250 	asm("and eax, 0xaaaaaaaa ");
   251 	asm("and edx, 0x55555555 ");
   252 	asm("shr eax, 1 ");
   253 	asm("add eax, edx ");
   254 	asm("mov edx, eax ");
   255 	asm("and eax, 0xcccccccc ");
   256 	asm("and edx, 0x33333333 ");
   257 	asm("shr eax, 2 ");
   258 	asm("add eax, edx ");
   259 
   260 	asm("add eax, ecx ");
   261 	asm("mov edx, eax ");
   262 	asm("and eax, 0xf0f0f0f0 ");
   263 	asm("and edx, 0x0f0f0f0f ");
   264 	asm("shr eax, 4 ");
   265 	asm("add eax, edx ");
   266 	asm("add al, ah ");
   267 	asm("mov dl, al ");
   268 	asm("shr eax, 16 ");
   269 	asm("add al, ah ");
   270 	asm("xor ah, ah ");
   271 	asm("add al, dl ");
   272 	asm("ret ");
   273 	}
   274 
   275 
   276 
   277 
   278 /** Read a 64 bit word with acquire semantics
   279 
   280 	@param	a	Address of word to be read - must be a multiple of 8
   281 	@return		The value read
   282 */
   283 EXPORT_C __NAKED__ TUint64	__e32_atomic_load_acq64(const volatile TAny* /*a*/)
   284 	{
   285 	asm("push ebx ");
   286 	asm("push edi ");
   287 	asm("mov edi, [esp+12] ");
   288 	asm("mov eax, 0x0badbeef ");
   289 	asm("mov edx, eax ");
   290 	asm("mov ebx, eax ");
   291 	asm("mov ecx, eax ");
   292 	asm(__LOCK__ "cmpxchg8b [edi] ");
   293 	asm("pop edi ");
   294 	asm("pop ebx ");
   295 	asm("ret ");
   296 	}
   297 
   298 
   299 /** Write a 64 bit word with release semantics
   300 
   301 	@param	a	Address of word to be written - must be a multiple of 8
   302 	@param	v	The value to be written
   303 	@return		The value written
   304 */
   305 EXPORT_C __NAKED__ TUint64	__e32_atomic_store_rel64(volatile TAny* /*a*/, TUint64 /*v*/)
   306 	{
   307 	asm("push ebx ");
   308 	asm("push edi ");
   309 	asm("mov edi, [esp+12] ");
   310 	asm("mov ebx, [esp+16] ");
   311 	asm("mov ecx, [esp+20] ");
   312 	asm("mov eax, [edi] ");
   313 	asm("mov edx, [edi+4] ");
   314 	asm("1: ");
   315 	asm(__LOCK__ "cmpxchg8b [edi] " );
   316 	asm("jne short 1b ");
   317 	asm("mov eax, ebx ");
   318 	asm("mov edx, ecx ");
   319 	asm("pop edi ");
   320 	asm("pop ebx ");
   321 	asm("ret ");
   322 	}
   323 
   324 
   325 /** Write a 64 bit word with full barrier semantics
   326 
   327 	@param	a	Address of word to be written - must be a multiple of 8
   328 	@param	v	The value to be written
   329 	@return		The value written
   330 */
   331 EXPORT_C __NAKED__ TUint64	__e32_atomic_store_ord64(volatile TAny* /*a*/, TUint64 /*v*/)
   332 	{
   333 	asm("jmp ___e32_atomic_store_rel64 ");
   334 	}
   335 
   336 
   337 /** Write a 64 bit word to memory and return the original value of the memory.
   338 	Relaxed ordering.
   339 
   340 	@param	a	Address of word to be written - must be a multiple of 8
   341 	@param	v	The value to be written
   342 	@return		The original value of *a
   343 */
   344 EXPORT_C __NAKED__ TUint64	__e32_atomic_swp_rlx64(volatile TAny* /*a*/, TUint64 /*v*/)
   345 	{
   346 	asm("jmp ___e32_atomic_swp_ord64 ");
   347 	}
   348 
   349 
   350 /** Write a 64 bit word to memory and return the original value of the memory.
   351 	Acquire semantics.
   352 
   353 	@param	a	Address of word to be written - must be a multiple of 8
   354 	@param	v	The value to be written
   355 	@return		The original value of *a
   356 */
   357 EXPORT_C __NAKED__ TUint64	__e32_atomic_swp_acq64(volatile TAny* /*a*/, TUint64 /*v*/)
   358 	{
   359 	asm("jmp ___e32_atomic_swp_ord64 ");
   360 	}
   361 
   362 
   363 /** Write a 64 bit word to memory and return the original value of the memory.
   364 	Release semantics.
   365 
   366 	@param	a	Address of word to be written - must be a multiple of 8
   367 	@param	v	The value to be written
   368 	@return		The original value of *a
   369 */
   370 EXPORT_C __NAKED__ TUint64	__e32_atomic_swp_rel64(volatile TAny* /*a*/, TUint64 /*v*/)
   371 	{
   372 	asm("jmp ___e32_atomic_swp_ord64 ");
   373 	}
   374 
   375 
   376 /** Write a 64 bit word to memory and return the original value of the memory.
   377 	Full barrier semantics.
   378 
   379 	@param	a	Address of word to be written - must be a multiple of 8
   380 	@param	v	The value to be written
   381 	@return		The original value of *a
   382 */
   383 EXPORT_C __NAKED__ TUint64	__e32_atomic_swp_ord64(volatile TAny* /*a*/, TUint64 /*v*/)
   384 	{
   385 	asm("push ebx ");
   386 	asm("push edi ");
   387 	asm("mov edi, [esp+12] ");
   388 	asm("mov ebx, [esp+16] ");
   389 	asm("mov ecx, [esp+20] ");
   390 	asm("mov eax, [edi] ");
   391 	asm("mov edx, [edi+4] ");
   392 	asm("1: ");
   393 	asm(__LOCK__ "cmpxchg8b [edi] ");
   394 	asm("jne short 1b ");
   395 	asm("pop edi ");
   396 	asm("pop ebx ");
   397 	asm("ret ");
   398 	}
   399 
   400 
   401 /** 64 bit compare and swap, relaxed ordering.
   402 
   403 	Atomically performs the following operation:
   404 		if (*a == *q)	{ *a = v; return TRUE; }
   405 		else			{ *q = *a; return FALSE; }
   406 
   407 	@param	a	Address of word to be written - must be a multiple of 8
   408 	@param	q	Address of location containing expected value
   409 	@param	v	The new value to be written if the old value is as expected
   410 	@return		TRUE if *a was updated, FALSE otherwise
   411 */
   412 EXPORT_C __NAKED__ TBool		__e32_atomic_cas_rlx64(volatile TAny* /*a*/, TUint64* /*q*/, TUint64 /*v*/)
   413 	{
   414 	asm("jmp ___e32_atomic_cas_ord64 ");
   415 	}
   416 
   417 
   418 /** 64 bit compare and swap, acquire semantics.
   419 
   420 	Atomically performs the following operation:
   421 		if (*a == *q)	{ *a = v; return TRUE; }
   422 		else			{ *q = *a; return FALSE; }
   423 
   424 	@param	a	Address of word to be written - must be a multiple of 8
   425 	@param	q	Address of location containing expected value
   426 	@param	v	The new value to be written if the old value is as expected
   427 	@return		TRUE if *a was updated, FALSE otherwise
   428 */
   429 EXPORT_C __NAKED__ TBool		__e32_atomic_cas_acq64(volatile TAny* /*a*/, TUint64* /*q*/, TUint64 /*v*/)
   430 	{
   431 	asm("jmp ___e32_atomic_cas_ord64 ");
   432 	}
   433 
   434 
   435 /** 64 bit compare and swap, release semantics.
   436 
   437 	Atomically performs the following operation:
   438 		if (*a == *q)	{ *a = v; return TRUE; }
   439 		else			{ *q = *a; return FALSE; }
   440 
   441 	@param	a	Address of word to be written - must be a multiple of 8
   442 	@param	q	Address of location containing expected value
   443 	@param	v	The new value to be written if the old value is as expected
   444 	@return		TRUE if *a was updated, FALSE otherwise
   445 */
   446 EXPORT_C __NAKED__ TBool		__e32_atomic_cas_rel64(volatile TAny* /*a*/, TUint64* /*q*/, TUint64 /*v*/)
   447 	{
   448 	asm("jmp ___e32_atomic_cas_ord64 ");
   449 	}
   450 
   451 
   452 /** 64 bit compare and swap, full barrier semantics.
   453 
   454 	Atomically performs the following operation:
   455 		if (*a == *q)	{ *a = v; return TRUE; }
   456 		else			{ *q = *a; return FALSE; }
   457 
   458 	@param	a	Address of word to be written - must be a multiple of 8
   459 	@param	q	Address of location containing expected value
   460 	@param	v	The new value to be written if the old value is as expected
   461 	@return		TRUE if *a was updated, FALSE otherwise
   462 */
   463 EXPORT_C __NAKED__ TBool		__e32_atomic_cas_ord64(volatile TAny* /*a*/, TUint64* /*q*/, TUint64 /*v*/)
   464 	{
   465 	asm("push ebx ");
   466 	asm("push edi ");
   467 	asm("push esi ");
   468 	asm("mov edi, [esp+16] ");			// edi = a
   469 	asm("mov esi, [esp+20] ");			// esi = q
   470 	asm("mov ebx, [esp+24] ");			// ecx:ebx = v
   471 	asm("mov ecx, [esp+28] ");
   472 	asm("mov eax, [esi] ");				// edx:eax = *q
   473 	asm("mov edx, [esi+4] ");
   474 	asm(__LOCK__ "cmpxchg8b [edi] ");	// if (*a==*q) *a=v, ZF=1 else edx:eax=*a, ZF=0
   475 	asm("jne short 2f ");
   476 	asm("mov eax, 1 ");
   477 	asm("pop esi ");
   478 	asm("pop edi ");
   479 	asm("pop ebx ");
   480 	asm("ret ");
   481 	asm("2: ");
   482 	asm("mov [esi], eax ");				// *q = edx:eax
   483 	asm("mov [esi+4], edx ");
   484 	asm("xor eax, eax ");
   485 	asm("pop esi ");
   486 	asm("pop edi ");
   487 	asm("pop ebx ");
   488 	asm("ret ");
   489 	}
   490 
   491 
   492 /** 64 bit atomic add, relaxed ordering.
   493 
   494 	Atomically performs the following operation:
   495 		oldv = *a; *a = oldv + v; return oldv;
   496 
   497 	@param	a	Address of word to be updated - must be a multiple of 8
   498 	@param	v	The value to be added
   499 	@return		The original value of *a
   500 */
   501 EXPORT_C __NAKED__ TUint64	__e32_atomic_add_rlx64(volatile TAny* /*a*/, TUint64 /*v*/)
   502 	{
   503 	asm("jmp ___e32_atomic_add_ord64 ");
   504 	}
   505 
   506 
   507 /** 64 bit atomic add, acquire semantics.
   508 
   509 	Atomically performs the following operation:
   510 		oldv = *a; *a = oldv + v; return oldv;
   511 
   512 	@param	a	Address of word to be updated - must be a multiple of 8
   513 	@param	v	The value to be added
   514 	@return		The original value of *a
   515 */
   516 EXPORT_C __NAKED__ TUint64	__e32_atomic_add_acq64(volatile TAny* /*a*/, TUint64 /*v*/)
   517 	{
   518 	asm("jmp ___e32_atomic_add_ord64 ");
   519 	}
   520 
   521 
   522 /** 64 bit atomic add, release semantics.
   523 
   524 	Atomically performs the following operation:
   525 		oldv = *a; *a = oldv + v; return oldv;
   526 
   527 	@param	a	Address of word to be updated - must be a multiple of 8
   528 	@param	v	The value to be added
   529 	@return		The original value of *a
   530 */
   531 EXPORT_C __NAKED__ TUint64	__e32_atomic_add_rel64(volatile TAny* /*a*/, TUint64 /*v*/)
   532 	{
   533 	asm("jmp ___e32_atomic_add_ord64 ");
   534 	}
   535 
   536 
   537 /** 64 bit atomic add, full barrier semantics.
   538 
   539 	Atomically performs the following operation:
   540 		oldv = *a; *a = oldv + v; return oldv;
   541 
   542 	@param	a	Address of word to be updated - must be a multiple of 8
   543 	@param	v	The value to be added
   544 	@return		The original value of *a
   545 */
   546 EXPORT_C __NAKED__ TUint64	__e32_atomic_add_ord64(volatile TAny* /*a*/, TUint64 /*v*/)
   547 	{
   548 	asm("push ebx ");
   549 	asm("push edi ");
   550 	asm("mov edi, [esp+12] ");			// edi = a
   551 	asm("mov eax, [edi] ");				// edx:eax = oldv
   552 	asm("mov edx, [edi+4] ");
   553 	asm("1: ");
   554 	asm("mov ebx, eax ");
   555 	asm("mov ecx, edx ");
   556 	asm("add ebx, [esp+16] ");			// ecx:ebx = oldv + v
   557 	asm("adc ecx, [esp+20] ");
   558 	asm(__LOCK__ "cmpxchg8b [edi] ");	// if (*a==oldv) *a=oldv+v, ZF=1 else edx:eax=*a, ZF=0
   559 	asm("jne short 1b ");
   560 	asm("pop edi ");
   561 	asm("pop ebx ");
   562 	asm("ret ");
   563 	}
   564 
   565 
   566 /** 64 bit atomic bitwise logical AND, relaxed ordering.
   567 
   568 	Atomically performs the following operation:
   569 		oldv = *a; *a = oldv & v; return oldv;
   570 
   571 	@param	a	Address of word to be updated - must be a multiple of 8
   572 	@param	v	The value to be ANDed with *a
   573 	@return		The original value of *a
   574 */
   575 EXPORT_C __NAKED__ TUint64	__e32_atomic_and_rlx64(volatile TAny* /*a*/, TUint64 /*v*/)
   576 	{
   577 	asm("jmp ___e32_atomic_and_ord64 ");
   578 	}
   579 
   580 
   581 /** 64 bit atomic bitwise logical AND, acquire semantics.
   582 
   583 	Atomically performs the following operation:
   584 		oldv = *a; *a = oldv & v; return oldv;
   585 
   586 	@param	a	Address of word to be updated - must be a multiple of 8
   587 	@param	v	The value to be ANDed with *a
   588 	@return		The original value of *a
   589 */
   590 EXPORT_C __NAKED__ TUint64	__e32_atomic_and_acq64(volatile TAny* /*a*/, TUint64 /*v*/)
   591 	{
   592 	asm("jmp ___e32_atomic_and_ord64 ");
   593 	}
   594 
   595 
   596 /** 64 bit atomic bitwise logical AND, release semantics.
   597 
   598 	Atomically performs the following operation:
   599 		oldv = *a; *a = oldv & v; return oldv;
   600 
   601 	@param	a	Address of word to be updated - must be a multiple of 8
   602 	@param	v	The value to be ANDed with *a
   603 	@return		The original value of *a
   604 */
   605 EXPORT_C __NAKED__ TUint64	__e32_atomic_and_rel64(volatile TAny* /*a*/, TUint64 /*v*/)
   606 	{
   607 	asm("jmp ___e32_atomic_and_ord64 ");
   608 	}
   609 
   610 
   611 /** 64 bit atomic bitwise logical AND, full barrier semantics.
   612 
   613 	Atomically performs the following operation:
   614 		oldv = *a; *a = oldv & v; return oldv;
   615 
   616 	@param	a	Address of word to be updated - must be a multiple of 8
   617 	@param	v	The value to be ANDed with *a
   618 	@return		The original value of *a
   619 */
   620 EXPORT_C __NAKED__ TUint64	__e32_atomic_and_ord64(volatile TAny* /*a*/, TUint64 /*v*/)
   621 	{
   622 	asm("push ebx ");
   623 	asm("push edi ");
   624 	asm("mov edi, [esp+12] ");			// edi = a
   625 	asm("mov eax, [edi] ");				// edx:eax = oldv
   626 	asm("mov edx, [edi+4] ");
   627 	asm("1: ");
   628 	asm("mov ebx, eax ");
   629 	asm("mov ecx, edx ");
   630 	asm("and ebx, [esp+16] ");			// ecx:ebx = oldv & v
   631 	asm("and ecx, [esp+20] ");
   632 	asm(__LOCK__ "cmpxchg8b [edi] ");	// if (*a==oldv) *a=oldv&v, ZF=1 else edx:eax=*a, ZF=0
   633 	asm("jne short 1b ");
   634 	asm("pop edi ");
   635 	asm("pop ebx ");
   636 	asm("ret ");
   637 	}
   638 
   639 
   640 /** 64 bit atomic bitwise logical inclusive OR, relaxed ordering.
   641 
   642 	Atomically performs the following operation:
   643 		oldv = *a; *a = oldv | v; return oldv;
   644 
   645 	@param	a	Address of word to be updated - must be a multiple of 8
   646 	@param	v	The value to be ORed with *a
   647 	@return		The original value of *a
   648 */
   649 EXPORT_C __NAKED__ TUint64	__e32_atomic_ior_rlx64(volatile TAny* /*a*/, TUint64 /*v*/)
   650 	{
   651 	asm("jmp ___e32_atomic_ior_ord64 ");
   652 	}
   653 
   654 
   655 /** 64 bit atomic bitwise logical inclusive OR, acquire semantics.
   656 
   657 	Atomically performs the following operation:
   658 		oldv = *a; *a = oldv | v; return oldv;
   659 
   660 	@param	a	Address of word to be updated - must be a multiple of 8
   661 	@param	v	The value to be ORed with *a
   662 	@return		The original value of *a
   663 */
   664 EXPORT_C __NAKED__ TUint64	__e32_atomic_ior_acq64(volatile TAny* /*a*/, TUint64 /*v*/)
   665 	{
   666 	asm("jmp ___e32_atomic_ior_ord64 ");
   667 	}
   668 
   669 
   670 /** 64 bit atomic bitwise logical inclusive OR, release semantics.
   671 
   672 	Atomically performs the following operation:
   673 		oldv = *a; *a = oldv | v; return oldv;
   674 
   675 	@param	a	Address of word to be updated - must be a multiple of 8
   676 	@param	v	The value to be ORed with *a
   677 	@return		The original value of *a
   678 */
   679 EXPORT_C __NAKED__ TUint64	__e32_atomic_ior_rel64(volatile TAny* /*a*/, TUint64 /*v*/)
   680 	{
   681 	asm("jmp ___e32_atomic_ior_ord64 ");
   682 	}
   683 
   684 
   685 /** 64 bit atomic bitwise logical inclusive OR, full barrier semantics.
   686 
   687 	Atomically performs the following operation:
   688 		oldv = *a; *a = oldv | v; return oldv;
   689 
   690 	@param	a	Address of word to be updated - must be a multiple of 8
   691 	@param	v	The value to be ORed with *a
   692 	@return		The original value of *a
   693 */
   694 EXPORT_C __NAKED__ TUint64	__e32_atomic_ior_ord64(volatile TAny* /*a*/, TUint64 /*v*/)
   695 	{
   696 	asm("push ebx ");
   697 	asm("push edi ");
   698 	asm("mov edi, [esp+12] ");			// edi = a
   699 	asm("mov eax, [edi] ");				// edx:eax = oldv
   700 	asm("mov edx, [edi+4] ");
   701 	asm("1: ");
   702 	asm("mov ebx, eax ");
   703 	asm("mov ecx, edx ");
   704 	asm("or ebx, [esp+16] ");			// ecx:ebx = oldv | v
   705 	asm("or ecx, [esp+20] ");
   706 	asm(__LOCK__ "cmpxchg8b [edi] ");	// if (*a==oldv) *a=oldv|v, ZF=1 else edx:eax=*a, ZF=0
   707 	asm("jne short 1b ");
   708 	asm("pop edi ");
   709 	asm("pop ebx ");
   710 	asm("ret ");
   711 	}
   712 
   713 
   714 /** 64 bit atomic bitwise logical exclusive OR, relaxed ordering.
   715 
   716 	Atomically performs the following operation:
   717 		oldv = *a; *a = oldv ^ v; return oldv;
   718 
   719 	@param	a	Address of word to be updated - must be a multiple of 8
   720 	@param	v	The value to be XORed with *a
   721 	@return		The original value of *a
   722 */
   723 EXPORT_C __NAKED__ TUint64	__e32_atomic_xor_rlx64(volatile TAny* /*a*/, TUint64 /*v*/)
   724 	{
   725 	asm("jmp ___e32_atomic_xor_ord64 ");
   726 	}
   727 
   728 
   729 /** 64 bit atomic bitwise logical exclusive OR, acquire semantics.
   730 
   731 	Atomically performs the following operation:
   732 		oldv = *a; *a = oldv ^ v; return oldv;
   733 
   734 	@param	a	Address of word to be updated - must be a multiple of 8
   735 	@param	v	The value to be XORed with *a
   736 	@return		The original value of *a
   737 */
   738 EXPORT_C __NAKED__ TUint64	__e32_atomic_xor_acq64(volatile TAny* /*a*/, TUint64 /*v*/)
   739 	{
   740 	asm("jmp ___e32_atomic_xor_ord64 ");
   741 	}
   742 
   743 
   744 /** 64 bit atomic bitwise logical exclusive OR, release semantics.
   745 
   746 	Atomically performs the following operation:
   747 		oldv = *a; *a = oldv ^ v; return oldv;
   748 
   749 	@param	a	Address of word to be updated - must be a multiple of 8
   750 	@param	v	The value to be XORed with *a
   751 	@return		The original value of *a
   752 */
   753 EXPORT_C __NAKED__ TUint64	__e32_atomic_xor_rel64(volatile TAny* /*a*/, TUint64 /*v*/)
   754 	{
   755 	asm("jmp ___e32_atomic_xor_ord64 ");
   756 	}
   757 
   758 
   759 /** 64 bit atomic bitwise logical exclusive OR, full barrier semantics.
   760 
   761 	Atomically performs the following operation:
   762 		oldv = *a; *a = oldv ^ v; return oldv;
   763 
   764 	@param	a	Address of word to be updated - must be a multiple of 8
   765 	@param	v	The value to be XORed with *a
   766 	@return		The original value of *a
   767 */
   768 EXPORT_C __NAKED__ TUint64	__e32_atomic_xor_ord64(volatile TAny* /*a*/, TUint64 /*v*/)
   769 	{
   770 	asm("push ebx ");
   771 	asm("push edi ");
   772 	asm("mov edi, [esp+12] ");			// edi = a
   773 	asm("mov eax, [edi] ");				// edx:eax = oldv
   774 	asm("mov edx, [edi+4] ");
   775 	asm("1: ");
   776 	asm("mov ebx, eax ");
   777 	asm("mov ecx, edx ");
   778 	asm("xor ebx, [esp+16] ");			// ecx:ebx = oldv ^ v
   779 	asm("xor ecx, [esp+20] ");
   780 	asm(__LOCK__ "cmpxchg8b [edi] ");	// if (*a==oldv) *a=oldv^v, ZF=1 else edx:eax=*a, ZF=0
   781 	asm("jne short 1b ");
   782 	asm("pop edi ");
   783 	asm("pop ebx ");
   784 	asm("ret ");
   785 	}
   786 
   787 
   788 /** 64 bit atomic bitwise universal function, relaxed ordering.
   789 
   790 	Atomically performs the following operation:
   791 		oldv = *a; *a = (oldv & u) ^ v; return oldv;
   792 
   793 	@param	a	Address of word to be updated - must be a multiple of 8
   794 	@param	u	The value to be ANDed with *a
   795 	@param	v	The value to be XORed with (*a&u)
   796 	@return		The original value of *a
   797 */
   798 EXPORT_C __NAKED__ TUint64	__e32_atomic_axo_rlx64(volatile TAny* /*a*/, TUint64 /*u*/, TUint64 /*v*/)
   799 	{
   800 	asm("jmp ___e32_atomic_axo_ord64 ");
   801 	}
   802 
   803 
   804 /** 64 bit atomic bitwise universal function, acquire semantics.
   805 
   806 	Atomically performs the following operation:
   807 		oldv = *a; *a = (oldv & u) ^ v; return oldv;
   808 
   809 	@param	a	Address of word to be updated - must be a multiple of 8
   810 	@param	u	The value to be ANDed with *a
   811 	@param	v	The value to be XORed with (*a&u)
   812 	@return		The original value of *a
   813 */
   814 EXPORT_C __NAKED__ TUint64	__e32_atomic_axo_acq64(volatile TAny* /*a*/, TUint64 /*u*/, TUint64 /*v*/)
   815 	{
   816 	asm("jmp ___e32_atomic_axo_ord64 ");
   817 	}
   818 
   819 
   820 /** 64 bit atomic bitwise universal function, release semantics.
   821 
   822 	Atomically performs the following operation:
   823 		oldv = *a; *a = (oldv & u) ^ v; return oldv;
   824 
   825 	@param	a	Address of word to be updated - must be a multiple of 8
   826 	@param	u	The value to be ANDed with *a
   827 	@param	v	The value to be XORed with (*a&u)
   828 	@return		The original value of *a
   829 */
   830 EXPORT_C __NAKED__ TUint64	__e32_atomic_axo_rel64(volatile TAny* /*a*/, TUint64 /*u*/, TUint64 /*v*/)
   831 	{
   832 	asm("jmp ___e32_atomic_axo_ord64 ");
   833 	}
   834 
   835 
   836 /** 64 bit atomic bitwise universal function, release semantics.
   837 
   838 	Atomically performs the following operation:
   839 		oldv = *a; *a = (oldv & u) ^ v; return oldv;
   840 
   841 	@param	a	Address of word to be updated - must be a multiple of 8
   842 	@param	u	The value to be ANDed with *a
   843 	@param	v	The value to be XORed with (*a&u)
   844 	@return		The original value of *a
   845 */
   846 EXPORT_C __NAKED__ TUint64	__e32_atomic_axo_ord64(volatile TAny* /*a*/, TUint64 /*u*/, TUint64 /*v*/)
   847 	{
   848 	asm("push ebx ");
   849 	asm("push edi ");
   850 	asm("mov edi, [esp+12] ");			// edi = a
   851 	asm("mov eax, [edi] ");				// edx:eax = oldv
   852 	asm("mov edx, [edi+4] ");
   853 	asm("1: ");
   854 	asm("mov ebx, eax ");
   855 	asm("mov ecx, edx ");
   856 	asm("and ebx, [esp+16] ");			// ecx:ebx = oldv & u
   857 	asm("and ecx, [esp+20] ");
   858 	asm("xor ebx, [esp+24] ");			// ecx:ebx = (oldv & u) ^ v
   859 	asm("xor ecx, [esp+28] ");
   860 	asm(__LOCK__ "cmpxchg8b [edi] ");	// if (*a==oldv) *a=(oldv&u)^v, ZF=1 else edx:eax=*a, ZF=0
   861 	asm("jne short 1b ");
   862 	asm("pop edi ");
   863 	asm("pop ebx ");
   864 	asm("ret ");
   865 	}
   866 
   867 
   868 /** 64 bit threshold and add, unsigned, relaxed ordering.
   869 
   870 	Atomically performs the following operation:
   871 		oldv = *a; if (oldv>=t) *a=oldv+u else *a=oldv+v; return oldv;
   872 
   873 	@param	a	Address of data to be updated - must be naturally aligned
   874 	@param	t	The threshold to compare *a to (unsigned compare)
   875 	@param	u	The value to be added to *a if it is originally >= t
   876 	@param	u	The value to be added to *a if it is originally < t
   877 	@return		The original value of *a
   878 */
   879 EXPORT_C __NAKED__ TUint64	__e32_atomic_tau_rlx64(volatile TAny* /*a*/, TUint64 /*t*/, TUint64 /*u*/, TUint64 /*v*/)
   880 	{
   881 	asm("jmp ___e32_atomic_tau_ord64 ");
   882 	}
   883 
   884 
   885 /** 64 bit threshold and add, unsigned, acquire semantics.
   886 
   887 	Atomically performs the following operation:
   888 		oldv = *a; if (oldv>=t) *a=oldv+u else *a=oldv+v; return oldv;
   889 
   890 	@param	a	Address of data to be updated - must be naturally aligned
   891 	@param	t	The threshold to compare *a to (unsigned compare)
   892 	@param	u	The value to be added to *a if it is originally >= t
   893 	@param	u	The value to be added to *a if it is originally < t
   894 	@return		The original value of *a
   895 */
   896 EXPORT_C __NAKED__ TUint64	__e32_atomic_tau_acq64(volatile TAny* /*a*/, TUint64 /*t*/, TUint64 /*u*/, TUint64 /*v*/)
   897 	{
   898 	asm("jmp ___e32_atomic_tau_ord64 ");
   899 	}
   900 
   901 
   902 /** 64 bit threshold and add, unsigned, release semantics.
   903 
   904 	Atomically performs the following operation:
   905 		oldv = *a; if (oldv>=t) *a=oldv+u else *a=oldv+v; return oldv;
   906 
   907 	@param	a	Address of data to be updated - must be naturally aligned
   908 	@param	t	The threshold to compare *a to (unsigned compare)
   909 	@param	u	The value to be added to *a if it is originally >= t
   910 	@param	u	The value to be added to *a if it is originally < t
   911 	@return		The original value of *a
   912 */
   913 EXPORT_C __NAKED__ TUint64	__e32_atomic_tau_rel64(volatile TAny* /*a*/, TUint64 /*t*/, TUint64 /*u*/, TUint64 /*v*/)
   914 	{
   915 	asm("jmp ___e32_atomic_tau_ord64 ");
   916 	}
   917 
   918 
   919 /** 64 bit threshold and add, unsigned, full barrier semantics.
   920 
   921 	Atomically performs the following operation:
   922 		oldv = *a; if (oldv>=t) *a=oldv+u else *a=oldv+v; return oldv;
   923 
   924 	@param	a	Address of data to be updated - must be naturally aligned
   925 	@param	t	The threshold to compare *a to (unsigned compare)
   926 	@param	u	The value to be added to *a if it is originally >= t
   927 	@param	u	The value to be added to *a if it is originally < t
   928 	@return		The original value of *a
   929 */
   930 EXPORT_C __NAKED__ TUint64	__e32_atomic_tau_ord64(volatile TAny* /*a*/, TUint64 /*t*/, TUint64 /*u*/, TUint64 /*v*/)
   931 	{
   932 	asm("push ebx ");
   933 	asm("push edi ");
   934 	asm("mov edi, [esp+12] ");			// edi = a
   935 	asm("mov eax, [edi] ");				// edx:eax = oldv
   936 	asm("mov edx, [edi+4] ");
   937 	asm("1: ");
   938 	asm("mov ebx, edx ");
   939 	asm("cmp eax, [esp+16] ");			// eax - t.low, CF=borrow
   940 	asm("sbb ebx, [esp+20] ");			// CF = borrow from (oldv - t)
   941 	asm("jnc short 2f ");				// no borrow means oldv>=t so use u
   942 	asm("mov ebx, [esp+32] ");			// ecx:ebx = v
   943 	asm("mov ecx, [esp+36] ");
   944 	asm("jmp short 3f ");
   945 	asm("2: ");
   946 	asm("mov ebx, [esp+24] ");			// ecx:ebx = u
   947 	asm("mov ecx, [esp+28] ");
   948 	asm("3: ");
   949 	asm("add ebx, eax ");				// ecx:ebx = oldv + u or v
   950 	asm("adc ecx, edx ");
   951 	asm(__LOCK__ "cmpxchg8b [edi] ");
   952 	asm("jne short 1b ");
   953 	asm("pop edi ");
   954 	asm("pop ebx ");
   955 	asm("ret ");
   956 	}
   957 
   958 
   959 /** 64 bit threshold and add, signed, relaxed ordering.
   960 
   961 	Atomically performs the following operation:
   962 		oldv = *a; if (oldv>=t) *a=oldv+u else *a=oldv+v; return oldv;
   963 
   964 	@param	a	Address of data to be updated - must be naturally aligned
   965 	@param	t	The threshold to compare *a to (signed compare)
   966 	@param	u	The value to be added to *a if it is originally >= t
   967 	@param	u	The value to be added to *a if it is originally < t
   968 	@return		The original value of *a
   969 */
   970 EXPORT_C __NAKED__ TInt64	__e32_atomic_tas_rlx64(volatile TAny* /*a*/, TInt64 /*t*/, TInt64 /*u*/, TInt64 /*v*/)
   971 	{
   972 	asm("jmp ___e32_atomic_tas_ord64 ");
   973 	}
   974 
   975 
   976 /** 64 bit threshold and add, signed, acquire semantics.
   977 
   978 	Atomically performs the following operation:
   979 		oldv = *a; if (oldv>=t) *a=oldv+u else *a=oldv+v; return oldv;
   980 
   981 	@param	a	Address of data to be updated - must be naturally aligned
   982 	@param	t	The threshold to compare *a to (signed compare)
   983 	@param	u	The value to be added to *a if it is originally >= t
   984 	@param	u	The value to be added to *a if it is originally < t
   985 	@return		The original value of *a
   986 */
   987 EXPORT_C __NAKED__ TInt64	__e32_atomic_tas_acq64(volatile TAny* /*a*/, TInt64 /*t*/, TInt64 /*u*/, TInt64 /*v*/)
   988 	{
   989 	asm("jmp ___e32_atomic_tas_ord64 ");
   990 	}
   991 
   992 
   993 /** 64 bit threshold and add, signed, release semantics.
   994 
   995 	Atomically performs the following operation:
   996 		oldv = *a; if (oldv>=t) *a=oldv+u else *a=oldv+v; return oldv;
   997 
   998 	@param	a	Address of data to be updated - must be naturally aligned
   999 	@param	t	The threshold to compare *a to (signed compare)
  1000 	@param	u	The value to be added to *a if it is originally >= t
  1001 	@param	u	The value to be added to *a if it is originally < t
  1002 	@return		The original value of *a
  1003 */
  1004 EXPORT_C __NAKED__ TInt64	__e32_atomic_tas_rel64(volatile TAny* /*a*/, TInt64 /*t*/, TInt64 /*u*/, TInt64 /*v*/)
  1005 	{
  1006 	asm("jmp ___e32_atomic_tas_ord64 ");
  1007 	}
  1008 
  1009 
  1010 /** 64 bit threshold and add, signed, full barrier semantics.
  1011 
  1012 	Atomically performs the following operation:
  1013 		oldv = *a; if (oldv>=t) *a=oldv+u else *a=oldv+v; return oldv;
  1014 
  1015 	@param	a	Address of data to be updated - must be naturally aligned
  1016 	@param	t	The threshold to compare *a to (signed compare)
  1017 	@param	u	The value to be added to *a if it is originally >= t
  1018 	@param	u	The value to be added to *a if it is originally < t
  1019 	@return		The original value of *a
  1020 */
  1021 EXPORT_C __NAKED__ TInt64	__e32_atomic_tas_ord64(volatile TAny* /*a*/, TInt64 /*t*/, TInt64 /*u*/, TInt64 /*v*/)
  1022 	{
  1023 	asm("push ebx ");
  1024 	asm("push edi ");
  1025 	asm("mov edi, [esp+12] ");			// edi = a
  1026 	asm("mov eax, [edi] ");				// edx:eax = oldv
  1027 	asm("mov edx, [edi+4] ");
  1028 	asm("1: ");
  1029 	asm("mov ebx, edx ");
  1030 	asm("cmp eax, [esp+16] ");			// eax - t.low, CF=borrow
  1031 	asm("sbb ebx, [esp+20] ");			// SF=sign, OF=overflow from (oldv - t)
  1032 	asm("jge short 2f ");				// SF==OF (GE condition) means oldv>=t so use u
  1033 	asm("mov ebx, [esp+32] ");			// ecx:ebx = v
  1034 	asm("mov ecx, [esp+36] ");
  1035 	asm("jmp short 3f ");
  1036 	asm("2: ");
  1037 	asm("mov ebx, [esp+24] ");			// ecx:ebx = u
  1038 	asm("mov ecx, [esp+28] ");
  1039 	asm("3: ");
  1040 	asm("add ebx, eax ");				// ecx:ebx = oldv + u or v
  1041 	asm("adc ecx, edx ");
  1042 	asm(__LOCK__ "cmpxchg8b [edi] ");
  1043 	asm("jne short 1b ");
  1044 	asm("pop edi ");
  1045 	asm("pop ebx ");
  1046 	asm("ret ");
  1047 	}
  1048 
  1049 } // extern "C"