os/kernelhwsrv/kernel/eka/common/x86/atomics.cia
author sl@SLION-WIN7.fritz.box
Fri, 15 Jun 2012 03:10:57 +0200
changeset 0 bde4ae8d615e
permissions -rw-r--r--
First public contribution.
sl@0
     1
// Copyright (c) 2008-2009 Nokia Corporation and/or its subsidiary(-ies).
sl@0
     2
// All rights reserved.
sl@0
     3
// This component and the accompanying materials are made available
sl@0
     4
// under the terms of the License "Eclipse Public License v1.0"
sl@0
     5
// which accompanies this distribution, and is available
sl@0
     6
// at the URL "http://www.eclipse.org/legal/epl-v10.html".
sl@0
     7
//
sl@0
     8
// Initial Contributors:
sl@0
     9
// Nokia Corporation - initial contribution.
sl@0
    10
//
sl@0
    11
// Contributors:
sl@0
    12
//
sl@0
    13
// Description:
sl@0
    14
// e32\common\x86\atomics.cia
sl@0
    15
// 
sl@0
    16
//
sl@0
    17
sl@0
    18
#include <e32atomics.h>
sl@0
    19
#include <cpudefs.h>
sl@0
    20
sl@0
    21
/*
sl@0
    22
Versions needed:
sl@0
    23
	WINS/WINSCW		Use X86 locked operations. Assume Pentium or above CPU (CMPXCHG8B available)
sl@0
    24
	X86				For Pentium and above use locked operations
sl@0
    25
					For 486 use locked operations for 8, 16, 32 bit. For 64 bit must disable interrupts.
sl@0
    26
					NOTE: 486 not supported at the moment
sl@0
    27
	ARMv4/ARMv5		Must disable interrupts.
sl@0
    28
	ARMv6			LDREX/STREX for 8, 16, 32 bit. For 64 bit must disable interrupts (maybe).
sl@0
    29
	ARMv6K/ARMv7	LDREXB/LDREXH/LDREX/LDREXD
sl@0
    30
sl@0
    31
Need both kernel side and user side versions
sl@0
    32
*/
sl@0
    33
sl@0
    34
#if	defined(__SMP__) || !defined(__EPOC32__)
sl@0
    35
#define	__BARRIERS_NEEDED__
sl@0
    36
#define	__LOCK__	"lock "
sl@0
    37
#else
sl@0
    38
#define	__LOCK__
sl@0
    39
#endif
sl@0
    40
sl@0
    41
sl@0
    42
extern "C" {
sl@0
    43
sl@0
    44
#undef	__TUintX__
sl@0
    45
#undef	__TIntX__
sl@0
    46
#undef	__fname__
sl@0
    47
#undef	__redir__
sl@0
    48
#undef	__A_REG__
sl@0
    49
#undef	__C_REG__
sl@0
    50
#undef	__D_REG__
sl@0
    51
#define	__TUintX__		TUint32
sl@0
    52
#define	__TIntX__		TInt32
sl@0
    53
#define	__fname__(x)	x##32
sl@0
    54
#define	__redir__(x)	asm("jmp _"#x "32")
sl@0
    55
#define	__A_REG__		"eax"
sl@0
    56
#define	__C_REG__		"ecx"
sl@0
    57
#define	__D_REG__		"edx"
sl@0
    58
#include "atomic_skeleton.h"
sl@0
    59
sl@0
    60
#undef	__TUintX__
sl@0
    61
#undef	__TIntX__
sl@0
    62
#undef	__fname__
sl@0
    63
#undef	__redir__
sl@0
    64
#undef	__A_REG__
sl@0
    65
#undef	__C_REG__
sl@0
    66
#undef	__D_REG__
sl@0
    67
#define	__TUintX__		TUint16
sl@0
    68
#define	__TIntX__		TInt16
sl@0
    69
#define	__fname__(x)	x##16
sl@0
    70
#define	__redir__(x)	asm("jmp _"#x "16")
sl@0
    71
#define	__A_REG__		"ax"
sl@0
    72
#define	__C_REG__		"cx"
sl@0
    73
#define	__D_REG__		"dx"
sl@0
    74
#include "atomic_skeleton.h"
sl@0
    75
sl@0
    76
#undef	__TUintX__
sl@0
    77
#undef	__TIntX__
sl@0
    78
#undef	__fname__
sl@0
    79
#undef	__redir__
sl@0
    80
#undef	__A_REG__
sl@0
    81
#undef	__C_REG__
sl@0
    82
#undef	__D_REG__
sl@0
    83
#define	__TUintX__		TUint8
sl@0
    84
#define	__TIntX__		TInt8
sl@0
    85
#define	__fname__(x)	x##8
sl@0
    86
#define	__redir__(x)	asm("jmp _"#x "8")
sl@0
    87
#define	__A_REG__		"al"
sl@0
    88
#define	__C_REG__		"cl"
sl@0
    89
#define	__D_REG__		"dl"
sl@0
    90
#include "atomic_skeleton.h"
sl@0
    91
sl@0
    92
#undef	__TUintX__
sl@0
    93
#undef	__TIntX__
sl@0
    94
#undef	__fname__
sl@0
    95
#undef	__redir__
sl@0
    96
#undef	__A_REG__
sl@0
    97
#undef	__C_REG__
sl@0
    98
#undef	__D_REG__
sl@0
    99
sl@0
   100
/** Full memory barrier for explicit memory accesses
sl@0
   101
sl@0
   102
*/
sl@0
   103
EXPORT_C __NAKED__ void __e32_memory_barrier()
sl@0
   104
	{
sl@0
   105
#ifdef __BARRIERS_NEEDED__
sl@0
   106
	asm("lock add dword ptr [esp], 0 ");
sl@0
   107
#endif
sl@0
   108
	asm("ret ");
sl@0
   109
	}
sl@0
   110
sl@0
   111
sl@0
   112
/** Barrier guaranteeing completion as well as ordering
sl@0
   113
sl@0
   114
*/
sl@0
   115
EXPORT_C __NAKED__ void __e32_io_completion_barrier()
sl@0
   116
	{
sl@0
   117
	asm("push ebx ");
sl@0
   118
	asm("cpuid ");
sl@0
   119
	asm("pop ebx ");
sl@0
   120
	asm("ret ");
sl@0
   121
	}
sl@0
   122
sl@0
   123
sl@0
   124
/** Find the most significant 1 in a 32 bit word
sl@0
   125
sl@0
   126
	@param	v	The word to be scanned
sl@0
   127
	@return		The bit number of the most significant 1 if v != 0
sl@0
   128
				-1 if v == 0
sl@0
   129
*/
sl@0
   130
EXPORT_C __NAKED__ TInt __e32_find_ms1_32(TUint32 /*v*/)
sl@0
   131
	{
sl@0
   132
	asm("bsr eax, [esp+4] ");
sl@0
   133
	asm("jnz short 1f ");
sl@0
   134
	asm("mov eax, 0xffffffff ");
sl@0
   135
	asm("1: ");
sl@0
   136
	asm("ret ");
sl@0
   137
	}
sl@0
   138
sl@0
   139
sl@0
   140
/** Find the least significant 1 in a 32 bit word
sl@0
   141
sl@0
   142
	@param	v	The word to be scanned
sl@0
   143
	@return		The bit number of the least significant 1 if v != 0
sl@0
   144
				-1 if v == 0
sl@0
   145
*/
sl@0
   146
EXPORT_C __NAKED__ TInt __e32_find_ls1_32(TUint32 /*v*/)
sl@0
   147
	{
sl@0
   148
	asm("bsf eax, [esp+4] ");
sl@0
   149
	asm("jnz short 1f ");
sl@0
   150
	asm("mov eax, 0xffffffff ");
sl@0
   151
	asm("1: ");
sl@0
   152
	asm("ret ");
sl@0
   153
	}
sl@0
   154
sl@0
   155
sl@0
   156
/** Count the number of 1's in a 32 bit word
sl@0
   157
sl@0
   158
	@param	v	The word to be scanned
sl@0
   159
	@return		The number of 1's
sl@0
   160
*/
sl@0
   161
EXPORT_C __NAKED__ TInt __e32_bit_count_32(TUint32 /*v*/)
sl@0
   162
	{
sl@0
   163
	asm("mov eax, [esp+4] ");
sl@0
   164
	asm("mov edx, eax ");
sl@0
   165
	asm("and eax, 0xaaaaaaaa ");
sl@0
   166
	asm("and edx, 0x55555555 ");	/* edx = even bits of arg */
sl@0
   167
	asm("shr eax, 1 ");				/* eax = odd bits of arg shifted into even bits */
sl@0
   168
	asm("add eax, edx ");			/* eax = 16 groups of 2 bit counts */
sl@0
   169
	asm("mov edx, eax ");
sl@0
   170
	asm("and eax, 0xcccccccc ");
sl@0
   171
	asm("and edx, 0x33333333 ");	/* even groups of 2 */
sl@0
   172
	asm("shr eax, 2 ");				/* odd groups of 2 shifted to even positions */
sl@0
   173
	asm("add eax, edx ");			/* 8 groups of 4 bit counts */
sl@0
   174
	asm("mov edx, eax ");
sl@0
   175
	asm("shr eax, 4 ");
sl@0
   176
	asm("add eax, edx ");			/* even nibbles = sum of 8 bits, odd nibbles garbage */
sl@0
   177
	asm("and eax, 0x0f0f0f0f ");	/* eliminate garbage nibbles */
sl@0
   178
	asm("add al, ah ");				/* AL = bit count of lower 16 bits */
sl@0
   179
	asm("mov dl, al ");
sl@0
   180
	asm("shr eax, 16 ");
sl@0
   181
	asm("add al, ah ");				/* AL = bit count of upper 16 bits */
sl@0
   182
	asm("xor ah, ah ");				/* top 24 bits of EAX now zero */
sl@0
   183
	asm("add al, dl ");				/* AL = bit count of entire 32 bits */
sl@0
   184
	asm("ret ");
sl@0
   185
	}
sl@0
   186
sl@0
   187
sl@0
   188
/** Find the most significant 1 in a 64 bit word
sl@0
   189
sl@0
   190
	@param	v	The word to be scanned
sl@0
   191
	@return		The bit number of the most significant 1 if v != 0
sl@0
   192
				-1 if v == 0
sl@0
   193
*/
sl@0
   194
EXPORT_C __NAKED__ TInt __e32_find_ms1_64(TUint64 /*v*/)
sl@0
   195
	{
sl@0
   196
	asm("bsr eax, [esp+8] ");
sl@0
   197
	asm("jnz short 2f ");
sl@0
   198
	asm("bsr eax, [esp+4] ");
sl@0
   199
	asm("jnz short 1f ");
sl@0
   200
	asm("mov eax, 0xffffffff ");
sl@0
   201
	asm("2: ");
sl@0
   202
	asm("or eax, 32 ");
sl@0
   203
	asm("1: ");
sl@0
   204
	asm("ret ");
sl@0
   205
	}
sl@0
   206
sl@0
   207
sl@0
   208
/** Find the least significant 1 in a 64 bit word
sl@0
   209
sl@0
   210
	@param	v	The word to be scanned
sl@0
   211
	@return		The bit number of the least significant 1 if v != 0
sl@0
   212
				-1 if v == 0
sl@0
   213
*/
sl@0
   214
EXPORT_C __NAKED__ TInt __e32_find_ls1_64(TUint64 /*v*/)
sl@0
   215
	{
sl@0
   216
	asm("bsf eax, [esp+4] ");
sl@0
   217
	asm("jnz short 1f ");
sl@0
   218
	asm("bsf eax, [esp+8] ");
sl@0
   219
	asm("jnz short 2f ");
sl@0
   220
	asm("mov eax, 0xffffffff ");
sl@0
   221
	asm("2: ");
sl@0
   222
	asm("or eax, 32 ");
sl@0
   223
	asm("1: ");
sl@0
   224
	asm("ret ");
sl@0
   225
	}
sl@0
   226
sl@0
   227
sl@0
   228
/** Count the number of 1's in a 64 bit word
sl@0
   229
sl@0
   230
	@param	v	The word to be scanned
sl@0
   231
	@return		The number of 1's
sl@0
   232
*/
sl@0
   233
EXPORT_C __NAKED__ TInt __e32_bit_count_64(TUint64 /*v*/)
sl@0
   234
	{
sl@0
   235
	asm("mov eax, [esp+4] ");
sl@0
   236
	asm("mov edx, [esp+8] ");
sl@0
   237
sl@0
   238
	asm("mov ecx, eax ");
sl@0
   239
	asm("and eax, 0xaaaaaaaa ");
sl@0
   240
	asm("and ecx, 0x55555555 ");
sl@0
   241
	asm("shr eax, 1 ");
sl@0
   242
	asm("add eax, ecx ");
sl@0
   243
	asm("mov ecx, eax ");
sl@0
   244
	asm("and eax, 0xcccccccc ");
sl@0
   245
	asm("and ecx, 0x33333333 ");
sl@0
   246
	asm("shr eax, 2 ");
sl@0
   247
	asm("add ecx, eax ");
sl@0
   248
sl@0
   249
	asm("mov eax, edx ");
sl@0
   250
	asm("and eax, 0xaaaaaaaa ");
sl@0
   251
	asm("and edx, 0x55555555 ");
sl@0
   252
	asm("shr eax, 1 ");
sl@0
   253
	asm("add eax, edx ");
sl@0
   254
	asm("mov edx, eax ");
sl@0
   255
	asm("and eax, 0xcccccccc ");
sl@0
   256
	asm("and edx, 0x33333333 ");
sl@0
   257
	asm("shr eax, 2 ");
sl@0
   258
	asm("add eax, edx ");
sl@0
   259
sl@0
   260
	asm("add eax, ecx ");
sl@0
   261
	asm("mov edx, eax ");
sl@0
   262
	asm("and eax, 0xf0f0f0f0 ");
sl@0
   263
	asm("and edx, 0x0f0f0f0f ");
sl@0
   264
	asm("shr eax, 4 ");
sl@0
   265
	asm("add eax, edx ");
sl@0
   266
	asm("add al, ah ");
sl@0
   267
	asm("mov dl, al ");
sl@0
   268
	asm("shr eax, 16 ");
sl@0
   269
	asm("add al, ah ");
sl@0
   270
	asm("xor ah, ah ");
sl@0
   271
	asm("add al, dl ");
sl@0
   272
	asm("ret ");
sl@0
   273
	}
sl@0
   274
sl@0
   275
sl@0
   276
sl@0
   277
sl@0
   278
/** Read a 64 bit word with acquire semantics
sl@0
   279
sl@0
   280
	@param	a	Address of word to be read - must be a multiple of 8
sl@0
   281
	@return		The value read
sl@0
   282
*/
sl@0
   283
EXPORT_C __NAKED__ TUint64	__e32_atomic_load_acq64(const volatile TAny* /*a*/)
sl@0
   284
	{
sl@0
   285
	asm("push ebx ");
sl@0
   286
	asm("push edi ");
sl@0
   287
	asm("mov edi, [esp+12] ");
sl@0
   288
	asm("mov eax, 0x0badbeef ");
sl@0
   289
	asm("mov edx, eax ");
sl@0
   290
	asm("mov ebx, eax ");
sl@0
   291
	asm("mov ecx, eax ");
sl@0
   292
	asm(__LOCK__ "cmpxchg8b [edi] ");
sl@0
   293
	asm("pop edi ");
sl@0
   294
	asm("pop ebx ");
sl@0
   295
	asm("ret ");
sl@0
   296
	}
sl@0
   297
sl@0
   298
sl@0
   299
/** Write a 64 bit word with release semantics
sl@0
   300
sl@0
   301
	@param	a	Address of word to be written - must be a multiple of 8
sl@0
   302
	@param	v	The value to be written
sl@0
   303
	@return		The value written
sl@0
   304
*/
sl@0
   305
EXPORT_C __NAKED__ TUint64	__e32_atomic_store_rel64(volatile TAny* /*a*/, TUint64 /*v*/)
sl@0
   306
	{
sl@0
   307
	asm("push ebx ");
sl@0
   308
	asm("push edi ");
sl@0
   309
	asm("mov edi, [esp+12] ");
sl@0
   310
	asm("mov ebx, [esp+16] ");
sl@0
   311
	asm("mov ecx, [esp+20] ");
sl@0
   312
	asm("mov eax, [edi] ");
sl@0
   313
	asm("mov edx, [edi+4] ");
sl@0
   314
	asm("1: ");
sl@0
   315
	asm(__LOCK__ "cmpxchg8b [edi] " );
sl@0
   316
	asm("jne short 1b ");
sl@0
   317
	asm("mov eax, ebx ");
sl@0
   318
	asm("mov edx, ecx ");
sl@0
   319
	asm("pop edi ");
sl@0
   320
	asm("pop ebx ");
sl@0
   321
	asm("ret ");
sl@0
   322
	}
sl@0
   323
sl@0
   324
sl@0
   325
/** Write a 64 bit word with full barrier semantics
sl@0
   326
sl@0
   327
	@param	a	Address of word to be written - must be a multiple of 8
sl@0
   328
	@param	v	The value to be written
sl@0
   329
	@return		The value written
sl@0
   330
*/
sl@0
   331
EXPORT_C __NAKED__ TUint64	__e32_atomic_store_ord64(volatile TAny* /*a*/, TUint64 /*v*/)
sl@0
   332
	{
sl@0
   333
	asm("jmp ___e32_atomic_store_rel64 ");
sl@0
   334
	}
sl@0
   335
sl@0
   336
sl@0
   337
/** Write a 64 bit word to memory and return the original value of the memory.
sl@0
   338
	Relaxed ordering.
sl@0
   339
sl@0
   340
	@param	a	Address of word to be written - must be a multiple of 8
sl@0
   341
	@param	v	The value to be written
sl@0
   342
	@return		The original value of *a
sl@0
   343
*/
sl@0
   344
EXPORT_C __NAKED__ TUint64	__e32_atomic_swp_rlx64(volatile TAny* /*a*/, TUint64 /*v*/)
sl@0
   345
	{
sl@0
   346
	asm("jmp ___e32_atomic_swp_ord64 ");
sl@0
   347
	}
sl@0
   348
sl@0
   349
sl@0
   350
/** Write a 64 bit word to memory and return the original value of the memory.
sl@0
   351
	Acquire semantics.
sl@0
   352
sl@0
   353
	@param	a	Address of word to be written - must be a multiple of 8
sl@0
   354
	@param	v	The value to be written
sl@0
   355
	@return		The original value of *a
sl@0
   356
*/
sl@0
   357
EXPORT_C __NAKED__ TUint64	__e32_atomic_swp_acq64(volatile TAny* /*a*/, TUint64 /*v*/)
sl@0
   358
	{
sl@0
   359
	asm("jmp ___e32_atomic_swp_ord64 ");
sl@0
   360
	}
sl@0
   361
sl@0
   362
sl@0
   363
/** Write a 64 bit word to memory and return the original value of the memory.
sl@0
   364
	Release semantics.
sl@0
   365
sl@0
   366
	@param	a	Address of word to be written - must be a multiple of 8
sl@0
   367
	@param	v	The value to be written
sl@0
   368
	@return		The original value of *a
sl@0
   369
*/
sl@0
   370
EXPORT_C __NAKED__ TUint64	__e32_atomic_swp_rel64(volatile TAny* /*a*/, TUint64 /*v*/)
sl@0
   371
	{
sl@0
   372
	asm("jmp ___e32_atomic_swp_ord64 ");
sl@0
   373
	}
sl@0
   374
sl@0
   375
sl@0
   376
/** Write a 64 bit word to memory and return the original value of the memory.
sl@0
   377
	Full barrier semantics.
sl@0
   378
sl@0
   379
	@param	a	Address of word to be written - must be a multiple of 8
sl@0
   380
	@param	v	The value to be written
sl@0
   381
	@return		The original value of *a
sl@0
   382
*/
sl@0
   383
EXPORT_C __NAKED__ TUint64	__e32_atomic_swp_ord64(volatile TAny* /*a*/, TUint64 /*v*/)
sl@0
   384
	{
sl@0
   385
	asm("push ebx ");
sl@0
   386
	asm("push edi ");
sl@0
   387
	asm("mov edi, [esp+12] ");
sl@0
   388
	asm("mov ebx, [esp+16] ");
sl@0
   389
	asm("mov ecx, [esp+20] ");
sl@0
   390
	asm("mov eax, [edi] ");
sl@0
   391
	asm("mov edx, [edi+4] ");
sl@0
   392
	asm("1: ");
sl@0
   393
	asm(__LOCK__ "cmpxchg8b [edi] ");
sl@0
   394
	asm("jne short 1b ");
sl@0
   395
	asm("pop edi ");
sl@0
   396
	asm("pop ebx ");
sl@0
   397
	asm("ret ");
sl@0
   398
	}
sl@0
   399
sl@0
   400
sl@0
   401
/** 64 bit compare and swap, relaxed ordering.
sl@0
   402
sl@0
   403
	Atomically performs the following operation:
sl@0
   404
		if (*a == *q)	{ *a = v; return TRUE; }
sl@0
   405
		else			{ *q = *a; return FALSE; }
sl@0
   406
sl@0
   407
	@param	a	Address of word to be written - must be a multiple of 8
sl@0
   408
	@param	q	Address of location containing expected value
sl@0
   409
	@param	v	The new value to be written if the old value is as expected
sl@0
   410
	@return		TRUE if *a was updated, FALSE otherwise
sl@0
   411
*/
sl@0
   412
EXPORT_C __NAKED__ TBool		__e32_atomic_cas_rlx64(volatile TAny* /*a*/, TUint64* /*q*/, TUint64 /*v*/)
sl@0
   413
	{
sl@0
   414
	asm("jmp ___e32_atomic_cas_ord64 ");
sl@0
   415
	}
sl@0
   416
sl@0
   417
sl@0
   418
/** 64 bit compare and swap, acquire semantics.
sl@0
   419
sl@0
   420
	Atomically performs the following operation:
sl@0
   421
		if (*a == *q)	{ *a = v; return TRUE; }
sl@0
   422
		else			{ *q = *a; return FALSE; }
sl@0
   423
sl@0
   424
	@param	a	Address of word to be written - must be a multiple of 8
sl@0
   425
	@param	q	Address of location containing expected value
sl@0
   426
	@param	v	The new value to be written if the old value is as expected
sl@0
   427
	@return		TRUE if *a was updated, FALSE otherwise
sl@0
   428
*/
sl@0
   429
EXPORT_C __NAKED__ TBool		__e32_atomic_cas_acq64(volatile TAny* /*a*/, TUint64* /*q*/, TUint64 /*v*/)
sl@0
   430
	{
sl@0
   431
	asm("jmp ___e32_atomic_cas_ord64 ");
sl@0
   432
	}
sl@0
   433
sl@0
   434
sl@0
   435
/** 64 bit compare and swap, release semantics.
sl@0
   436
sl@0
   437
	Atomically performs the following operation:
sl@0
   438
		if (*a == *q)	{ *a = v; return TRUE; }
sl@0
   439
		else			{ *q = *a; return FALSE; }
sl@0
   440
sl@0
   441
	@param	a	Address of word to be written - must be a multiple of 8
sl@0
   442
	@param	q	Address of location containing expected value
sl@0
   443
	@param	v	The new value to be written if the old value is as expected
sl@0
   444
	@return		TRUE if *a was updated, FALSE otherwise
sl@0
   445
*/
sl@0
   446
EXPORT_C __NAKED__ TBool		__e32_atomic_cas_rel64(volatile TAny* /*a*/, TUint64* /*q*/, TUint64 /*v*/)
sl@0
   447
	{
sl@0
   448
	asm("jmp ___e32_atomic_cas_ord64 ");
sl@0
   449
	}
sl@0
   450
sl@0
   451
sl@0
   452
/** 64 bit compare and swap, full barrier semantics.
sl@0
   453
sl@0
   454
	Atomically performs the following operation:
sl@0
   455
		if (*a == *q)	{ *a = v; return TRUE; }
sl@0
   456
		else			{ *q = *a; return FALSE; }
sl@0
   457
sl@0
   458
	@param	a	Address of word to be written - must be a multiple of 8
sl@0
   459
	@param	q	Address of location containing expected value
sl@0
   460
	@param	v	The new value to be written if the old value is as expected
sl@0
   461
	@return		TRUE if *a was updated, FALSE otherwise
sl@0
   462
*/
sl@0
   463
EXPORT_C __NAKED__ TBool		__e32_atomic_cas_ord64(volatile TAny* /*a*/, TUint64* /*q*/, TUint64 /*v*/)
sl@0
   464
	{
sl@0
   465
	asm("push ebx ");
sl@0
   466
	asm("push edi ");
sl@0
   467
	asm("push esi ");
sl@0
   468
	asm("mov edi, [esp+16] ");			// edi = a
sl@0
   469
	asm("mov esi, [esp+20] ");			// esi = q
sl@0
   470
	asm("mov ebx, [esp+24] ");			// ecx:ebx = v
sl@0
   471
	asm("mov ecx, [esp+28] ");
sl@0
   472
	asm("mov eax, [esi] ");				// edx:eax = *q
sl@0
   473
	asm("mov edx, [esi+4] ");
sl@0
   474
	asm(__LOCK__ "cmpxchg8b [edi] ");	// if (*a==*q) *a=v, ZF=1 else edx:eax=*a, ZF=0
sl@0
   475
	asm("jne short 2f ");
sl@0
   476
	asm("mov eax, 1 ");
sl@0
   477
	asm("pop esi ");
sl@0
   478
	asm("pop edi ");
sl@0
   479
	asm("pop ebx ");
sl@0
   480
	asm("ret ");
sl@0
   481
	asm("2: ");
sl@0
   482
	asm("mov [esi], eax ");				// *q = edx:eax
sl@0
   483
	asm("mov [esi+4], edx ");
sl@0
   484
	asm("xor eax, eax ");
sl@0
   485
	asm("pop esi ");
sl@0
   486
	asm("pop edi ");
sl@0
   487
	asm("pop ebx ");
sl@0
   488
	asm("ret ");
sl@0
   489
	}
sl@0
   490
sl@0
   491
sl@0
   492
/** 64 bit atomic add, relaxed ordering.
sl@0
   493
sl@0
   494
	Atomically performs the following operation:
sl@0
   495
		oldv = *a; *a = oldv + v; return oldv;
sl@0
   496
sl@0
   497
	@param	a	Address of word to be updated - must be a multiple of 8
sl@0
   498
	@param	v	The value to be added
sl@0
   499
	@return		The original value of *a
sl@0
   500
*/
sl@0
   501
EXPORT_C __NAKED__ TUint64	__e32_atomic_add_rlx64(volatile TAny* /*a*/, TUint64 /*v*/)
sl@0
   502
	{
sl@0
   503
	asm("jmp ___e32_atomic_add_ord64 ");
sl@0
   504
	}
sl@0
   505
sl@0
   506
sl@0
   507
/** 64 bit atomic add, acquire semantics.
sl@0
   508
sl@0
   509
	Atomically performs the following operation:
sl@0
   510
		oldv = *a; *a = oldv + v; return oldv;
sl@0
   511
sl@0
   512
	@param	a	Address of word to be updated - must be a multiple of 8
sl@0
   513
	@param	v	The value to be added
sl@0
   514
	@return		The original value of *a
sl@0
   515
*/
sl@0
   516
EXPORT_C __NAKED__ TUint64	__e32_atomic_add_acq64(volatile TAny* /*a*/, TUint64 /*v*/)
sl@0
   517
	{
sl@0
   518
	asm("jmp ___e32_atomic_add_ord64 ");
sl@0
   519
	}
sl@0
   520
sl@0
   521
sl@0
   522
/** 64 bit atomic add, release semantics.
sl@0
   523
sl@0
   524
	Atomically performs the following operation:
sl@0
   525
		oldv = *a; *a = oldv + v; return oldv;
sl@0
   526
sl@0
   527
	@param	a	Address of word to be updated - must be a multiple of 8
sl@0
   528
	@param	v	The value to be added
sl@0
   529
	@return		The original value of *a
sl@0
   530
*/
sl@0
   531
EXPORT_C __NAKED__ TUint64	__e32_atomic_add_rel64(volatile TAny* /*a*/, TUint64 /*v*/)
sl@0
   532
	{
sl@0
   533
	asm("jmp ___e32_atomic_add_ord64 ");
sl@0
   534
	}
sl@0
   535
sl@0
   536
sl@0
   537
/** 64 bit atomic add, full barrier semantics.
sl@0
   538
sl@0
   539
	Atomically performs the following operation:
sl@0
   540
		oldv = *a; *a = oldv + v; return oldv;
sl@0
   541
sl@0
   542
	@param	a	Address of word to be updated - must be a multiple of 8
sl@0
   543
	@param	v	The value to be added
sl@0
   544
	@return		The original value of *a
sl@0
   545
*/
sl@0
   546
EXPORT_C __NAKED__ TUint64	__e32_atomic_add_ord64(volatile TAny* /*a*/, TUint64 /*v*/)
sl@0
   547
	{
sl@0
   548
	asm("push ebx ");
sl@0
   549
	asm("push edi ");
sl@0
   550
	asm("mov edi, [esp+12] ");			// edi = a
sl@0
   551
	asm("mov eax, [edi] ");				// edx:eax = oldv
sl@0
   552
	asm("mov edx, [edi+4] ");
sl@0
   553
	asm("1: ");
sl@0
   554
	asm("mov ebx, eax ");
sl@0
   555
	asm("mov ecx, edx ");
sl@0
   556
	asm("add ebx, [esp+16] ");			// ecx:ebx = oldv + v
sl@0
   557
	asm("adc ecx, [esp+20] ");
sl@0
   558
	asm(__LOCK__ "cmpxchg8b [edi] ");	// if (*a==oldv) *a=oldv+v, ZF=1 else edx:eax=*a, ZF=0
sl@0
   559
	asm("jne short 1b ");
sl@0
   560
	asm("pop edi ");
sl@0
   561
	asm("pop ebx ");
sl@0
   562
	asm("ret ");
sl@0
   563
	}
sl@0
   564
sl@0
   565
sl@0
   566
/** 64 bit atomic bitwise logical AND, relaxed ordering.
sl@0
   567
sl@0
   568
	Atomically performs the following operation:
sl@0
   569
		oldv = *a; *a = oldv & v; return oldv;
sl@0
   570
sl@0
   571
	@param	a	Address of word to be updated - must be a multiple of 8
sl@0
   572
	@param	v	The value to be ANDed with *a
sl@0
   573
	@return		The original value of *a
sl@0
   574
*/
sl@0
   575
EXPORT_C __NAKED__ TUint64	__e32_atomic_and_rlx64(volatile TAny* /*a*/, TUint64 /*v*/)
sl@0
   576
	{
sl@0
   577
	asm("jmp ___e32_atomic_and_ord64 ");
sl@0
   578
	}
sl@0
   579
sl@0
   580
sl@0
   581
/** 64 bit atomic bitwise logical AND, acquire semantics.
sl@0
   582
sl@0
   583
	Atomically performs the following operation:
sl@0
   584
		oldv = *a; *a = oldv & v; return oldv;
sl@0
   585
sl@0
   586
	@param	a	Address of word to be updated - must be a multiple of 8
sl@0
   587
	@param	v	The value to be ANDed with *a
sl@0
   588
	@return		The original value of *a
sl@0
   589
*/
sl@0
   590
EXPORT_C __NAKED__ TUint64	__e32_atomic_and_acq64(volatile TAny* /*a*/, TUint64 /*v*/)
sl@0
   591
	{
sl@0
   592
	asm("jmp ___e32_atomic_and_ord64 ");
sl@0
   593
	}
sl@0
   594
sl@0
   595
sl@0
   596
/** 64 bit atomic bitwise logical AND, release semantics.
sl@0
   597
sl@0
   598
	Atomically performs the following operation:
sl@0
   599
		oldv = *a; *a = oldv & v; return oldv;
sl@0
   600
sl@0
   601
	@param	a	Address of word to be updated - must be a multiple of 8
sl@0
   602
	@param	v	The value to be ANDed with *a
sl@0
   603
	@return		The original value of *a
sl@0
   604
*/
sl@0
   605
EXPORT_C __NAKED__ TUint64	__e32_atomic_and_rel64(volatile TAny* /*a*/, TUint64 /*v*/)
sl@0
   606
	{
sl@0
   607
	asm("jmp ___e32_atomic_and_ord64 ");
sl@0
   608
	}
sl@0
   609
sl@0
   610
sl@0
   611
/** 64 bit atomic bitwise logical AND, full barrier semantics.
sl@0
   612
sl@0
   613
	Atomically performs the following operation:
sl@0
   614
		oldv = *a; *a = oldv & v; return oldv;
sl@0
   615
sl@0
   616
	@param	a	Address of word to be updated - must be a multiple of 8
sl@0
   617
	@param	v	The value to be ANDed with *a
sl@0
   618
	@return		The original value of *a
sl@0
   619
*/
sl@0
   620
EXPORT_C __NAKED__ TUint64	__e32_atomic_and_ord64(volatile TAny* /*a*/, TUint64 /*v*/)
sl@0
   621
	{
sl@0
   622
	asm("push ebx ");
sl@0
   623
	asm("push edi ");
sl@0
   624
	asm("mov edi, [esp+12] ");			// edi = a
sl@0
   625
	asm("mov eax, [edi] ");				// edx:eax = oldv
sl@0
   626
	asm("mov edx, [edi+4] ");
sl@0
   627
	asm("1: ");
sl@0
   628
	asm("mov ebx, eax ");
sl@0
   629
	asm("mov ecx, edx ");
sl@0
   630
	asm("and ebx, [esp+16] ");			// ecx:ebx = oldv & v
sl@0
   631
	asm("and ecx, [esp+20] ");
sl@0
   632
	asm(__LOCK__ "cmpxchg8b [edi] ");	// if (*a==oldv) *a=oldv&v, ZF=1 else edx:eax=*a, ZF=0
sl@0
   633
	asm("jne short 1b ");
sl@0
   634
	asm("pop edi ");
sl@0
   635
	asm("pop ebx ");
sl@0
   636
	asm("ret ");
sl@0
   637
	}
sl@0
   638
sl@0
   639
sl@0
   640
/** 64 bit atomic bitwise logical inclusive OR, relaxed ordering.
sl@0
   641
sl@0
   642
	Atomically performs the following operation:
sl@0
   643
		oldv = *a; *a = oldv | v; return oldv;
sl@0
   644
sl@0
   645
	@param	a	Address of word to be updated - must be a multiple of 8
sl@0
   646
	@param	v	The value to be ORed with *a
sl@0
   647
	@return		The original value of *a
sl@0
   648
*/
sl@0
   649
EXPORT_C __NAKED__ TUint64	__e32_atomic_ior_rlx64(volatile TAny* /*a*/, TUint64 /*v*/)
sl@0
   650
	{
sl@0
   651
	asm("jmp ___e32_atomic_ior_ord64 ");
sl@0
   652
	}
sl@0
   653
sl@0
   654
sl@0
   655
/** 64 bit atomic bitwise logical inclusive OR, acquire semantics.
sl@0
   656
sl@0
   657
	Atomically performs the following operation:
sl@0
   658
		oldv = *a; *a = oldv | v; return oldv;
sl@0
   659
sl@0
   660
	@param	a	Address of word to be updated - must be a multiple of 8
sl@0
   661
	@param	v	The value to be ORed with *a
sl@0
   662
	@return		The original value of *a
sl@0
   663
*/
sl@0
   664
EXPORT_C __NAKED__ TUint64	__e32_atomic_ior_acq64(volatile TAny* /*a*/, TUint64 /*v*/)
sl@0
   665
	{
sl@0
   666
	asm("jmp ___e32_atomic_ior_ord64 ");
sl@0
   667
	}
sl@0
   668
sl@0
   669
sl@0
   670
/** 64 bit atomic bitwise logical inclusive OR, release semantics.
sl@0
   671
sl@0
   672
	Atomically performs the following operation:
sl@0
   673
		oldv = *a; *a = oldv | v; return oldv;
sl@0
   674
sl@0
   675
	@param	a	Address of word to be updated - must be a multiple of 8
sl@0
   676
	@param	v	The value to be ORed with *a
sl@0
   677
	@return		The original value of *a
sl@0
   678
*/
sl@0
   679
EXPORT_C __NAKED__ TUint64	__e32_atomic_ior_rel64(volatile TAny* /*a*/, TUint64 /*v*/)
sl@0
   680
	{
sl@0
   681
	asm("jmp ___e32_atomic_ior_ord64 ");
sl@0
   682
	}
sl@0
   683
sl@0
   684
sl@0
   685
/** 64 bit atomic bitwise logical inclusive OR, full barrier semantics.
sl@0
   686
sl@0
   687
	Atomically performs the following operation:
sl@0
   688
		oldv = *a; *a = oldv | v; return oldv;
sl@0
   689
sl@0
   690
	@param	a	Address of word to be updated - must be a multiple of 8
sl@0
   691
	@param	v	The value to be ORed with *a
sl@0
   692
	@return		The original value of *a
sl@0
   693
*/
sl@0
   694
EXPORT_C __NAKED__ TUint64	__e32_atomic_ior_ord64(volatile TAny* /*a*/, TUint64 /*v*/)
sl@0
   695
	{
sl@0
   696
	asm("push ebx ");
sl@0
   697
	asm("push edi ");
sl@0
   698
	asm("mov edi, [esp+12] ");			// edi = a
sl@0
   699
	asm("mov eax, [edi] ");				// edx:eax = oldv
sl@0
   700
	asm("mov edx, [edi+4] ");
sl@0
   701
	asm("1: ");
sl@0
   702
	asm("mov ebx, eax ");
sl@0
   703
	asm("mov ecx, edx ");
sl@0
   704
	asm("or ebx, [esp+16] ");			// ecx:ebx = oldv | v
sl@0
   705
	asm("or ecx, [esp+20] ");
sl@0
   706
	asm(__LOCK__ "cmpxchg8b [edi] ");	// if (*a==oldv) *a=oldv|v, ZF=1 else edx:eax=*a, ZF=0
sl@0
   707
	asm("jne short 1b ");
sl@0
   708
	asm("pop edi ");
sl@0
   709
	asm("pop ebx ");
sl@0
   710
	asm("ret ");
sl@0
   711
	}
sl@0
   712
sl@0
   713
sl@0
   714
/** 64 bit atomic bitwise logical exclusive OR, relaxed ordering.
sl@0
   715
sl@0
   716
	Atomically performs the following operation:
sl@0
   717
		oldv = *a; *a = oldv ^ v; return oldv;
sl@0
   718
sl@0
   719
	@param	a	Address of word to be updated - must be a multiple of 8
sl@0
   720
	@param	v	The value to be XORed with *a
sl@0
   721
	@return		The original value of *a
sl@0
   722
*/
sl@0
   723
EXPORT_C __NAKED__ TUint64	__e32_atomic_xor_rlx64(volatile TAny* /*a*/, TUint64 /*v*/)
sl@0
   724
	{
sl@0
   725
	asm("jmp ___e32_atomic_xor_ord64 ");
sl@0
   726
	}
sl@0
   727
sl@0
   728
sl@0
   729
/** 64 bit atomic bitwise logical exclusive OR, acquire semantics.
sl@0
   730
sl@0
   731
	Atomically performs the following operation:
sl@0
   732
		oldv = *a; *a = oldv ^ v; return oldv;
sl@0
   733
sl@0
   734
	@param	a	Address of word to be updated - must be a multiple of 8
sl@0
   735
	@param	v	The value to be XORed with *a
sl@0
   736
	@return		The original value of *a
sl@0
   737
*/
sl@0
   738
EXPORT_C __NAKED__ TUint64	__e32_atomic_xor_acq64(volatile TAny* /*a*/, TUint64 /*v*/)
sl@0
   739
	{
sl@0
   740
	asm("jmp ___e32_atomic_xor_ord64 ");
sl@0
   741
	}
sl@0
   742
sl@0
   743
sl@0
   744
/** 64 bit atomic bitwise logical exclusive OR, release semantics.
sl@0
   745
sl@0
   746
	Atomically performs the following operation:
sl@0
   747
		oldv = *a; *a = oldv ^ v; return oldv;
sl@0
   748
sl@0
   749
	@param	a	Address of word to be updated - must be a multiple of 8
sl@0
   750
	@param	v	The value to be XORed with *a
sl@0
   751
	@return		The original value of *a
sl@0
   752
*/
sl@0
   753
EXPORT_C __NAKED__ TUint64	__e32_atomic_xor_rel64(volatile TAny* /*a*/, TUint64 /*v*/)
sl@0
   754
	{
sl@0
   755
	asm("jmp ___e32_atomic_xor_ord64 ");
sl@0
   756
	}
sl@0
   757
sl@0
   758
sl@0
   759
/** 64 bit atomic bitwise logical exclusive OR, full barrier semantics.
sl@0
   760
sl@0
   761
	Atomically performs the following operation:
sl@0
   762
		oldv = *a; *a = oldv ^ v; return oldv;
sl@0
   763
sl@0
   764
	@param	a	Address of word to be updated - must be a multiple of 8
sl@0
   765
	@param	v	The value to be XORed with *a
sl@0
   766
	@return		The original value of *a
sl@0
   767
*/
sl@0
   768
EXPORT_C __NAKED__ TUint64	__e32_atomic_xor_ord64(volatile TAny* /*a*/, TUint64 /*v*/)
sl@0
   769
	{
sl@0
   770
	asm("push ebx ");
sl@0
   771
	asm("push edi ");
sl@0
   772
	asm("mov edi, [esp+12] ");			// edi = a
sl@0
   773
	asm("mov eax, [edi] ");				// edx:eax = oldv
sl@0
   774
	asm("mov edx, [edi+4] ");
sl@0
   775
	asm("1: ");
sl@0
   776
	asm("mov ebx, eax ");
sl@0
   777
	asm("mov ecx, edx ");
sl@0
   778
	asm("xor ebx, [esp+16] ");			// ecx:ebx = oldv ^ v
sl@0
   779
	asm("xor ecx, [esp+20] ");
sl@0
   780
	asm(__LOCK__ "cmpxchg8b [edi] ");	// if (*a==oldv) *a=oldv^v, ZF=1 else edx:eax=*a, ZF=0
sl@0
   781
	asm("jne short 1b ");
sl@0
   782
	asm("pop edi ");
sl@0
   783
	asm("pop ebx ");
sl@0
   784
	asm("ret ");
sl@0
   785
	}
sl@0
   786
sl@0
   787
sl@0
   788
/** 64 bit atomic bitwise universal function, relaxed ordering.
sl@0
   789
sl@0
   790
	Atomically performs the following operation:
sl@0
   791
		oldv = *a; *a = (oldv & u) ^ v; return oldv;
sl@0
   792
sl@0
   793
	@param	a	Address of word to be updated - must be a multiple of 8
sl@0
   794
	@param	u	The value to be ANDed with *a
sl@0
   795
	@param	v	The value to be XORed with (*a&u)
sl@0
   796
	@return		The original value of *a
sl@0
   797
*/
sl@0
   798
EXPORT_C __NAKED__ TUint64	__e32_atomic_axo_rlx64(volatile TAny* /*a*/, TUint64 /*u*/, TUint64 /*v*/)
sl@0
   799
	{
sl@0
   800
	asm("jmp ___e32_atomic_axo_ord64 ");
sl@0
   801
	}
sl@0
   802
sl@0
   803
sl@0
   804
/** 64 bit atomic bitwise universal function, acquire semantics.
sl@0
   805
sl@0
   806
	Atomically performs the following operation:
sl@0
   807
		oldv = *a; *a = (oldv & u) ^ v; return oldv;
sl@0
   808
sl@0
   809
	@param	a	Address of word to be updated - must be a multiple of 8
sl@0
   810
	@param	u	The value to be ANDed with *a
sl@0
   811
	@param	v	The value to be XORed with (*a&u)
sl@0
   812
	@return		The original value of *a
sl@0
   813
*/
sl@0
   814
EXPORT_C __NAKED__ TUint64	__e32_atomic_axo_acq64(volatile TAny* /*a*/, TUint64 /*u*/, TUint64 /*v*/)
sl@0
   815
	{
sl@0
   816
	asm("jmp ___e32_atomic_axo_ord64 ");
sl@0
   817
	}
sl@0
   818
sl@0
   819
sl@0
   820
/** 64 bit atomic bitwise universal function, release semantics.
sl@0
   821
sl@0
   822
	Atomically performs the following operation:
sl@0
   823
		oldv = *a; *a = (oldv & u) ^ v; return oldv;
sl@0
   824
sl@0
   825
	@param	a	Address of word to be updated - must be a multiple of 8
sl@0
   826
	@param	u	The value to be ANDed with *a
sl@0
   827
	@param	v	The value to be XORed with (*a&u)
sl@0
   828
	@return		The original value of *a
sl@0
   829
*/
sl@0
   830
EXPORT_C __NAKED__ TUint64	__e32_atomic_axo_rel64(volatile TAny* /*a*/, TUint64 /*u*/, TUint64 /*v*/)
sl@0
   831
	{
sl@0
   832
	asm("jmp ___e32_atomic_axo_ord64 ");
sl@0
   833
	}
sl@0
   834
sl@0
   835
sl@0
   836
/** 64 bit atomic bitwise universal function, release semantics.
sl@0
   837
sl@0
   838
	Atomically performs the following operation:
sl@0
   839
		oldv = *a; *a = (oldv & u) ^ v; return oldv;
sl@0
   840
sl@0
   841
	@param	a	Address of word to be updated - must be a multiple of 8
sl@0
   842
	@param	u	The value to be ANDed with *a
sl@0
   843
	@param	v	The value to be XORed with (*a&u)
sl@0
   844
	@return		The original value of *a
sl@0
   845
*/
sl@0
   846
EXPORT_C __NAKED__ TUint64	__e32_atomic_axo_ord64(volatile TAny* /*a*/, TUint64 /*u*/, TUint64 /*v*/)
sl@0
   847
	{
sl@0
   848
	asm("push ebx ");
sl@0
   849
	asm("push edi ");
sl@0
   850
	asm("mov edi, [esp+12] ");			// edi = a
sl@0
   851
	asm("mov eax, [edi] ");				// edx:eax = oldv
sl@0
   852
	asm("mov edx, [edi+4] ");
sl@0
   853
	asm("1: ");
sl@0
   854
	asm("mov ebx, eax ");
sl@0
   855
	asm("mov ecx, edx ");
sl@0
   856
	asm("and ebx, [esp+16] ");			// ecx:ebx = oldv & u
sl@0
   857
	asm("and ecx, [esp+20] ");
sl@0
   858
	asm("xor ebx, [esp+24] ");			// ecx:ebx = (oldv & u) ^ v
sl@0
   859
	asm("xor ecx, [esp+28] ");
sl@0
   860
	asm(__LOCK__ "cmpxchg8b [edi] ");	// if (*a==oldv) *a=(oldv&u)^v, ZF=1 else edx:eax=*a, ZF=0
sl@0
   861
	asm("jne short 1b ");
sl@0
   862
	asm("pop edi ");
sl@0
   863
	asm("pop ebx ");
sl@0
   864
	asm("ret ");
sl@0
   865
	}
sl@0
   866
sl@0
   867
sl@0
   868
/** 64 bit threshold and add, unsigned, relaxed ordering.
sl@0
   869
sl@0
   870
	Atomically performs the following operation:
sl@0
   871
		oldv = *a; if (oldv>=t) *a=oldv+u else *a=oldv+v; return oldv;
sl@0
   872
sl@0
   873
	@param	a	Address of data to be updated - must be naturally aligned
sl@0
   874
	@param	t	The threshold to compare *a to (unsigned compare)
sl@0
   875
	@param	u	The value to be added to *a if it is originally >= t
sl@0
   876
	@param	u	The value to be added to *a if it is originally < t
sl@0
   877
	@return		The original value of *a
sl@0
   878
*/
sl@0
   879
EXPORT_C __NAKED__ TUint64	__e32_atomic_tau_rlx64(volatile TAny* /*a*/, TUint64 /*t*/, TUint64 /*u*/, TUint64 /*v*/)
sl@0
   880
	{
sl@0
   881
	asm("jmp ___e32_atomic_tau_ord64 ");
sl@0
   882
	}
sl@0
   883
sl@0
   884
sl@0
   885
/** 64 bit threshold and add, unsigned, acquire semantics.
sl@0
   886
sl@0
   887
	Atomically performs the following operation:
sl@0
   888
		oldv = *a; if (oldv>=t) *a=oldv+u else *a=oldv+v; return oldv;
sl@0
   889
sl@0
   890
	@param	a	Address of data to be updated - must be naturally aligned
sl@0
   891
	@param	t	The threshold to compare *a to (unsigned compare)
sl@0
   892
	@param	u	The value to be added to *a if it is originally >= t
sl@0
   893
	@param	u	The value to be added to *a if it is originally < t
sl@0
   894
	@return		The original value of *a
sl@0
   895
*/
sl@0
   896
EXPORT_C __NAKED__ TUint64	__e32_atomic_tau_acq64(volatile TAny* /*a*/, TUint64 /*t*/, TUint64 /*u*/, TUint64 /*v*/)
sl@0
   897
	{
sl@0
   898
	asm("jmp ___e32_atomic_tau_ord64 ");
sl@0
   899
	}
sl@0
   900
sl@0
   901
sl@0
   902
/** 64 bit threshold and add, unsigned, release semantics.
sl@0
   903
sl@0
   904
	Atomically performs the following operation:
sl@0
   905
		oldv = *a; if (oldv>=t) *a=oldv+u else *a=oldv+v; return oldv;
sl@0
   906
sl@0
   907
	@param	a	Address of data to be updated - must be naturally aligned
sl@0
   908
	@param	t	The threshold to compare *a to (unsigned compare)
sl@0
   909
	@param	u	The value to be added to *a if it is originally >= t
sl@0
   910
	@param	u	The value to be added to *a if it is originally < t
sl@0
   911
	@return		The original value of *a
sl@0
   912
*/
sl@0
   913
EXPORT_C __NAKED__ TUint64	__e32_atomic_tau_rel64(volatile TAny* /*a*/, TUint64 /*t*/, TUint64 /*u*/, TUint64 /*v*/)
sl@0
   914
	{
sl@0
   915
	asm("jmp ___e32_atomic_tau_ord64 ");
sl@0
   916
	}
sl@0
   917
sl@0
   918
sl@0
   919
/** 64 bit threshold and add, unsigned, full barrier semantics.
sl@0
   920
sl@0
   921
	Atomically performs the following operation:
sl@0
   922
		oldv = *a; if (oldv>=t) *a=oldv+u else *a=oldv+v; return oldv;
sl@0
   923
sl@0
   924
	@param	a	Address of data to be updated - must be naturally aligned
sl@0
   925
	@param	t	The threshold to compare *a to (unsigned compare)
sl@0
   926
	@param	u	The value to be added to *a if it is originally >= t
sl@0
   927
	@param	u	The value to be added to *a if it is originally < t
sl@0
   928
	@return		The original value of *a
sl@0
   929
*/
sl@0
   930
EXPORT_C __NAKED__ TUint64	__e32_atomic_tau_ord64(volatile TAny* /*a*/, TUint64 /*t*/, TUint64 /*u*/, TUint64 /*v*/)
sl@0
   931
	{
sl@0
   932
	asm("push ebx ");
sl@0
   933
	asm("push edi ");
sl@0
   934
	asm("mov edi, [esp+12] ");			// edi = a
sl@0
   935
	asm("mov eax, [edi] ");				// edx:eax = oldv
sl@0
   936
	asm("mov edx, [edi+4] ");
sl@0
   937
	asm("1: ");
sl@0
   938
	asm("mov ebx, edx ");
sl@0
   939
	asm("cmp eax, [esp+16] ");			// eax - t.low, CF=borrow
sl@0
   940
	asm("sbb ebx, [esp+20] ");			// CF = borrow from (oldv - t)
sl@0
   941
	asm("jnc short 2f ");				// no borrow means oldv>=t so use u
sl@0
   942
	asm("mov ebx, [esp+32] ");			// ecx:ebx = v
sl@0
   943
	asm("mov ecx, [esp+36] ");
sl@0
   944
	asm("jmp short 3f ");
sl@0
   945
	asm("2: ");
sl@0
   946
	asm("mov ebx, [esp+24] ");			// ecx:ebx = u
sl@0
   947
	asm("mov ecx, [esp+28] ");
sl@0
   948
	asm("3: ");
sl@0
   949
	asm("add ebx, eax ");				// ecx:ebx = oldv + u or v
sl@0
   950
	asm("adc ecx, edx ");
sl@0
   951
	asm(__LOCK__ "cmpxchg8b [edi] ");
sl@0
   952
	asm("jne short 1b ");
sl@0
   953
	asm("pop edi ");
sl@0
   954
	asm("pop ebx ");
sl@0
   955
	asm("ret ");
sl@0
   956
	}
sl@0
   957
sl@0
   958
sl@0
   959
/** 64 bit threshold and add, signed, relaxed ordering.
sl@0
   960
sl@0
   961
	Atomically performs the following operation:
sl@0
   962
		oldv = *a; if (oldv>=t) *a=oldv+u else *a=oldv+v; return oldv;
sl@0
   963
sl@0
   964
	@param	a	Address of data to be updated - must be naturally aligned
sl@0
   965
	@param	t	The threshold to compare *a to (signed compare)
sl@0
   966
	@param	u	The value to be added to *a if it is originally >= t
sl@0
   967
	@param	u	The value to be added to *a if it is originally < t
sl@0
   968
	@return		The original value of *a
sl@0
   969
*/
sl@0
   970
EXPORT_C __NAKED__ TInt64	__e32_atomic_tas_rlx64(volatile TAny* /*a*/, TInt64 /*t*/, TInt64 /*u*/, TInt64 /*v*/)
sl@0
   971
	{
sl@0
   972
	asm("jmp ___e32_atomic_tas_ord64 ");
sl@0
   973
	}
sl@0
   974
sl@0
   975
sl@0
   976
/** 64 bit threshold and add, signed, acquire semantics.
sl@0
   977
sl@0
   978
	Atomically performs the following operation:
sl@0
   979
		oldv = *a; if (oldv>=t) *a=oldv+u else *a=oldv+v; return oldv;
sl@0
   980
sl@0
   981
	@param	a	Address of data to be updated - must be naturally aligned
sl@0
   982
	@param	t	The threshold to compare *a to (signed compare)
sl@0
   983
	@param	u	The value to be added to *a if it is originally >= t
sl@0
   984
	@param	u	The value to be added to *a if it is originally < t
sl@0
   985
	@return		The original value of *a
sl@0
   986
*/
sl@0
   987
EXPORT_C __NAKED__ TInt64	__e32_atomic_tas_acq64(volatile TAny* /*a*/, TInt64 /*t*/, TInt64 /*u*/, TInt64 /*v*/)
sl@0
   988
	{
sl@0
   989
	asm("jmp ___e32_atomic_tas_ord64 ");
sl@0
   990
	}
sl@0
   991
sl@0
   992
sl@0
   993
/** 64 bit threshold and add, signed, release semantics.
sl@0
   994
sl@0
   995
	Atomically performs the following operation:
sl@0
   996
		oldv = *a; if (oldv>=t) *a=oldv+u else *a=oldv+v; return oldv;
sl@0
   997
sl@0
   998
	@param	a	Address of data to be updated - must be naturally aligned
sl@0
   999
	@param	t	The threshold to compare *a to (signed compare)
sl@0
  1000
	@param	u	The value to be added to *a if it is originally >= t
sl@0
  1001
	@param	u	The value to be added to *a if it is originally < t
sl@0
  1002
	@return		The original value of *a
sl@0
  1003
*/
sl@0
  1004
EXPORT_C __NAKED__ TInt64	__e32_atomic_tas_rel64(volatile TAny* /*a*/, TInt64 /*t*/, TInt64 /*u*/, TInt64 /*v*/)
sl@0
  1005
	{
sl@0
  1006
	asm("jmp ___e32_atomic_tas_ord64 ");
sl@0
  1007
	}
sl@0
  1008
sl@0
  1009
sl@0
  1010
/** 64 bit threshold and add, signed, full barrier semantics.
sl@0
  1011
sl@0
  1012
	Atomically performs the following operation:
sl@0
  1013
		oldv = *a; if (oldv>=t) *a=oldv+u else *a=oldv+v; return oldv;
sl@0
  1014
sl@0
  1015
	@param	a	Address of data to be updated - must be naturally aligned
sl@0
  1016
	@param	t	The threshold to compare *a to (signed compare)
sl@0
  1017
	@param	u	The value to be added to *a if it is originally >= t
sl@0
  1018
	@param	u	The value to be added to *a if it is originally < t
sl@0
  1019
	@return		The original value of *a
sl@0
  1020
*/
sl@0
  1021
EXPORT_C __NAKED__ TInt64	__e32_atomic_tas_ord64(volatile TAny* /*a*/, TInt64 /*t*/, TInt64 /*u*/, TInt64 /*v*/)
sl@0
  1022
	{
sl@0
  1023
	asm("push ebx ");
sl@0
  1024
	asm("push edi ");
sl@0
  1025
	asm("mov edi, [esp+12] ");			// edi = a
sl@0
  1026
	asm("mov eax, [edi] ");				// edx:eax = oldv
sl@0
  1027
	asm("mov edx, [edi+4] ");
sl@0
  1028
	asm("1: ");
sl@0
  1029
	asm("mov ebx, edx ");
sl@0
  1030
	asm("cmp eax, [esp+16] ");			// eax - t.low, CF=borrow
sl@0
  1031
	asm("sbb ebx, [esp+20] ");			// SF=sign, OF=overflow from (oldv - t)
sl@0
  1032
	asm("jge short 2f ");				// SF==OF (GE condition) means oldv>=t so use u
sl@0
  1033
	asm("mov ebx, [esp+32] ");			// ecx:ebx = v
sl@0
  1034
	asm("mov ecx, [esp+36] ");
sl@0
  1035
	asm("jmp short 3f ");
sl@0
  1036
	asm("2: ");
sl@0
  1037
	asm("mov ebx, [esp+24] ");			// ecx:ebx = u
sl@0
  1038
	asm("mov ecx, [esp+28] ");
sl@0
  1039
	asm("3: ");
sl@0
  1040
	asm("add ebx, eax ");				// ecx:ebx = oldv + u or v
sl@0
  1041
	asm("adc ecx, edx ");
sl@0
  1042
	asm(__LOCK__ "cmpxchg8b [edi] ");
sl@0
  1043
	asm("jne short 1b ");
sl@0
  1044
	asm("pop edi ");
sl@0
  1045
	asm("pop ebx ");
sl@0
  1046
	asm("ret ");
sl@0
  1047
	}
sl@0
  1048
sl@0
  1049
} // extern "C"