1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
1.2 +++ b/os/kernelhwsrv/kernel/eka/common/win32/atomics.cpp Fri Jun 15 03:10:57 2012 +0200
1.3 @@ -0,0 +1,1042 @@
1.4 +// Copyright (c) 2008-2009 Nokia Corporation and/or its subsidiary(-ies).
1.5 +// All rights reserved.
1.6 +// This component and the accompanying materials are made available
1.7 +// under the terms of the License "Eclipse Public License v1.0"
1.8 +// which accompanies this distribution, and is available
1.9 +// at the URL "http://www.eclipse.org/legal/epl-v10.html".
1.10 +//
1.11 +// Initial Contributors:
1.12 +// Nokia Corporation - initial contribution.
1.13 +//
1.14 +// Contributors:
1.15 +//
1.16 +// Description:
1.17 +// e32\common\x86\atomics.cpp
1.18 +//
1.19 +//
1.20 +
1.21 +#include <e32atomics.h>
1.22 +#include <cpudefs.h>
1.23 +
1.24 +/*
1.25 +Versions needed:
1.26 + WINS/WINSCW Use X86 locked operations. Assume Pentium or above CPU (CMPXCHG8B available)
1.27 + X86 For Pentium and above use locked operations
1.28 + For 486 use locked operations for 8, 16, 32 bit. For 64 bit must disable interrupts.
1.29 + NOTE: 486 not supported at the moment
1.30 + ARMv4/ARMv5 Must disable interrupts.
1.31 + ARMv6 LDREX/STREX for 8, 16, 32 bit. For 64 bit must disable interrupts (maybe).
1.32 + ARMv6K/ARMv7 LDREXB/LDREXH/LDREX/LDREXD
1.33 +
1.34 +Need both kernel side and user side versions
1.35 +*/
1.36 +
1.37 +#if defined(__SMP__) || !defined(__EPOC32__)
1.38 +#define __BARRIERS_NEEDED__
1.39 +#define __LOCK__ lock
1.40 +#else
1.41 +#define __LOCK__
1.42 +#endif
1.43 +
1.44 +
1.45 +extern "C" {
1.46 +
1.47 +#undef __TUintX__
1.48 +#undef __TIntX__
1.49 +#undef __fname__
1.50 +#undef __A_REG__
1.51 +#undef __C_REG__
1.52 +#undef __D_REG__
1.53 +#define __TUintX__ TUint32
1.54 +#define __TIntX__ TInt32
1.55 +#define __fname__(x) x##32
1.56 +#define __A_REG__ eax
1.57 +#define __C_REG__ ecx
1.58 +#define __D_REG__ edx
1.59 +#include "atomic_skeleton.h"
1.60 +
1.61 +#undef __TUintX__
1.62 +#undef __TIntX__
1.63 +#undef __fname__
1.64 +#undef __A_REG__
1.65 +#undef __C_REG__
1.66 +#undef __D_REG__
1.67 +#define __TUintX__ TUint16
1.68 +#define __TIntX__ TInt16
1.69 +#define __fname__(x) x##16
1.70 +#define __A_REG__ ax
1.71 +#define __C_REG__ cx
1.72 +#define __D_REG__ dx
1.73 +#include "atomic_skeleton.h"
1.74 +
1.75 +#undef __TUintX__
1.76 +#undef __TIntX__
1.77 +#undef __fname__
1.78 +#undef __A_REG__
1.79 +#undef __C_REG__
1.80 +#undef __D_REG__
1.81 +#define __TUintX__ TUint8
1.82 +#define __TIntX__ TInt8
1.83 +#define __fname__(x) x##8
1.84 +#define __A_REG__ al
1.85 +#define __C_REG__ cl
1.86 +#define __D_REG__ dl
1.87 +#include "atomic_skeleton.h"
1.88 +
1.89 +#undef __TUintX__
1.90 +#undef __TIntX__
1.91 +#undef __fname__
1.92 +#undef __A_REG__
1.93 +#undef __C_REG__
1.94 +#undef __D_REG__
1.95 +
1.96 +/** Full memory barrier for explicit memory accesses
1.97 +
1.98 +*/
1.99 +EXPORT_C __NAKED__ void __e32_memory_barrier()
1.100 + {
1.101 +#ifdef __BARRIERS_NEEDED__
1.102 + _asm lock add dword ptr [esp], 0
1.103 +#endif
1.104 + _asm ret
1.105 + }
1.106 +
1.107 +
1.108 +/** Barrier guaranteeing completion as well as ordering
1.109 +
1.110 +*/
1.111 +EXPORT_C __NAKED__ void __e32_io_completion_barrier()
1.112 + {
1.113 + _asm push ebx
1.114 + _asm cpuid
1.115 + _asm pop ebx
1.116 + _asm ret
1.117 + }
1.118 +
1.119 +
1.120 +/** Find the most significant 1 in a 32 bit word
1.121 +
1.122 + @param v The word to be scanned
1.123 + @return The bit number of the most significant 1 if v != 0
1.124 + -1 if v == 0
1.125 +*/
1.126 +EXPORT_C __NAKED__ TInt __e32_find_ms1_32(TUint32 /*v*/)
1.127 + {
1.128 + _asm bsr eax, [esp+4]
1.129 + _asm jnz short done
1.130 + _asm mov eax, 0ffffffffh
1.131 +done:
1.132 + _asm ret
1.133 + }
1.134 +
1.135 +
1.136 +/** Find the least significant 1 in a 32 bit word
1.137 +
1.138 + @param v The word to be scanned
1.139 + @return The bit number of the least significant 1 if v != 0
1.140 + -1 if v == 0
1.141 +*/
1.142 +EXPORT_C __NAKED__ TInt __e32_find_ls1_32(TUint32 /*v*/)
1.143 + {
1.144 + _asm bsf eax, [esp+4]
1.145 + _asm jnz short done
1.146 + _asm mov eax, 0ffffffffh
1.147 +done:
1.148 + _asm ret
1.149 + }
1.150 +
1.151 +
1.152 +/** Count the number of 1's in a 32 bit word
1.153 +
1.154 + @param v The word to be scanned
1.155 + @return The number of 1's
1.156 +*/
1.157 +EXPORT_C __NAKED__ TInt __e32_bit_count_32(TUint32 /*v*/)
1.158 + {
1.159 + _asm mov eax, [esp+4]
1.160 + _asm mov edx, eax
1.161 + _asm and eax, 0aaaaaaaah
1.162 + _asm and edx, 055555555h
1.163 + _asm shr eax, 1
1.164 + _asm add eax, edx
1.165 + _asm mov edx, eax
1.166 + _asm and eax, 0cccccccch
1.167 + _asm and edx, 033333333h
1.168 + _asm shr eax, 2
1.169 + _asm add eax, edx
1.170 + _asm mov edx, eax
1.171 + _asm shr eax, 4
1.172 + _asm add eax, edx
1.173 + _asm and eax, 00f0f0f0fh
1.174 + _asm add al, ah
1.175 + _asm mov dl, al
1.176 + _asm shr eax, 16
1.177 + _asm add al, ah
1.178 + _asm xor ah, ah
1.179 + _asm add al, dl
1.180 + _asm ret
1.181 + }
1.182 +
1.183 +
1.184 +/** Find the most significant 1 in a 64 bit word
1.185 +
1.186 + @param v The word to be scanned
1.187 + @return The bit number of the most significant 1 if v != 0
1.188 + -1 if v == 0
1.189 +*/
1.190 +EXPORT_C __NAKED__ TInt __e32_find_ms1_64(TUint64 /*v*/)
1.191 + {
1.192 + _asm bsr eax, [esp+8]
1.193 + _asm jnz short mswnz
1.194 + _asm bsr eax, [esp+4]
1.195 + _asm jnz short lswnz
1.196 + _asm mov eax, 0ffffffffh
1.197 +mswnz:
1.198 + _asm or eax, 32
1.199 +lswnz:
1.200 + _asm ret
1.201 + }
1.202 +
1.203 +
1.204 +/** Find the least significant 1 in a 64 bit word
1.205 +
1.206 + @param v The word to be scanned
1.207 + @return The bit number of the least significant 1 if v != 0
1.208 + -1 if v == 0
1.209 +*/
1.210 +EXPORT_C __NAKED__ TInt __e32_find_ls1_64(TUint64 /*v*/)
1.211 + {
1.212 + _asm bsf eax, [esp+4]
1.213 + _asm jnz short lswnz
1.214 + _asm bsf eax, [esp+8]
1.215 + _asm jnz short mswnz
1.216 + _asm mov eax, 0ffffffffh
1.217 +mswnz:
1.218 + _asm or eax, 32
1.219 +lswnz:
1.220 + _asm ret
1.221 + }
1.222 +
1.223 +
1.224 +/** Count the number of 1's in a 64 bit word
1.225 +
1.226 + @param v The word to be scanned
1.227 + @return The number of 1's
1.228 +*/
1.229 +EXPORT_C __NAKED__ TInt __e32_bit_count_64(TUint64 /*v*/)
1.230 + {
1.231 + _asm mov eax, [esp+4]
1.232 + _asm mov edx, [esp+8]
1.233 +
1.234 + _asm mov ecx, eax
1.235 + _asm and eax, 0aaaaaaaah
1.236 + _asm and ecx, 055555555h
1.237 + _asm shr eax, 1
1.238 + _asm add eax, ecx /* 16 groups of 2 bits, count=0,1,2 */
1.239 + _asm mov ecx, eax
1.240 + _asm and eax, 0cccccccch
1.241 + _asm and ecx, 033333333h
1.242 + _asm shr eax, 2
1.243 + _asm add ecx, eax /* 8 groups of 4 bits, count=0...4 */
1.244 +
1.245 + _asm mov eax, edx
1.246 + _asm and eax, 0aaaaaaaah
1.247 + _asm and edx, 055555555h
1.248 + _asm shr eax, 1
1.249 + _asm add eax, edx /* 16 groups of 2 bits, count=0,1,2 */
1.250 + _asm mov edx, eax
1.251 + _asm and eax, 0cccccccch
1.252 + _asm and edx, 033333333h
1.253 + _asm shr eax, 2
1.254 + _asm add eax, edx /* 8 groups of 4 bits, count=0...4 */
1.255 +
1.256 + _asm add eax, ecx /* 8 groups of 4 bits, count=0...8 */
1.257 + _asm mov edx, eax
1.258 + _asm and eax, 0f0f0f0f0h
1.259 + _asm and edx, 00f0f0f0fh
1.260 + _asm shr eax, 4
1.261 + _asm add eax, edx /* 4 groups of 8 bits, count=0...16 */
1.262 + _asm add al, ah
1.263 + _asm mov dl, al
1.264 + _asm shr eax, 16
1.265 + _asm add al, ah
1.266 + _asm xor ah, ah
1.267 + _asm add al, dl
1.268 + _asm ret
1.269 + }
1.270 +
1.271 +
1.272 +
1.273 +
1.274 +/** Read a 64 bit word with acquire semantics
1.275 +
1.276 + @param a Address of word to be read - must be a multiple of 8
1.277 + @return The value read
1.278 +*/
1.279 +EXPORT_C __NAKED__ TUint64 __e32_atomic_load_acq64(const volatile TAny* /*a*/)
1.280 + {
1.281 + _asm push ebx
1.282 + _asm push edi
1.283 + _asm mov edi, [esp+12]
1.284 + _asm mov eax, 0badbeefh
1.285 + _asm mov edx, eax
1.286 + _asm mov ebx, eax
1.287 + _asm mov ecx, eax
1.288 + _asm __LOCK__ cmpxchg8b [edi]
1.289 + _asm pop edi
1.290 + _asm pop ebx
1.291 + _asm ret
1.292 + }
1.293 +
1.294 +
1.295 +/** Write a 64 bit word with release semantics
1.296 +
1.297 + @param a Address of word to be written - must be a multiple of 8
1.298 + @param v The value to be written
1.299 + @return The value written
1.300 +*/
1.301 +EXPORT_C __NAKED__ TUint64 __e32_atomic_store_rel64(volatile TAny* /*a*/, TUint64 /*v*/)
1.302 + {
1.303 + _asm push ebx
1.304 + _asm push edi
1.305 + _asm mov edi, [esp+12]
1.306 + _asm mov ebx, [esp+16]
1.307 + _asm mov ecx, [esp+20]
1.308 + _asm mov eax, [edi]
1.309 + _asm mov edx, [edi+4]
1.310 + _asm retry:
1.311 + _asm __LOCK__ cmpxchg8b [edi]
1.312 + _asm jne short retry
1.313 + _asm mov eax, ebx
1.314 + _asm mov edx, ecx
1.315 + _asm pop edi
1.316 + _asm pop ebx
1.317 + _asm ret
1.318 + }
1.319 +
1.320 +
1.321 +/** Write a 64 bit word with full barrier semantics
1.322 +
1.323 + @param a Address of word to be written - must be a multiple of 8
1.324 + @param v The value to be written
1.325 + @return The value written
1.326 +*/
1.327 +EXPORT_C __NAKED__ TUint64 __e32_atomic_store_ord64(volatile TAny* /*a*/, TUint64 /*v*/)
1.328 + {
1.329 + _asm jmp __e32_atomic_store_rel64
1.330 + }
1.331 +
1.332 +
1.333 +/** Write a 64 bit word to memory and return the original value of the memory.
1.334 + Relaxed ordering.
1.335 +
1.336 + @param a Address of word to be written - must be a multiple of 8
1.337 + @param v The value to be written
1.338 + @return The original value of *a
1.339 +*/
1.340 +EXPORT_C __NAKED__ TUint64 __e32_atomic_swp_rlx64(volatile TAny* /*a*/, TUint64 /*v*/)
1.341 + {
1.342 + _asm jmp __e32_atomic_swp_ord64
1.343 + }
1.344 +
1.345 +
1.346 +/** Write a 64 bit word to memory and return the original value of the memory.
1.347 + Acquire semantics.
1.348 +
1.349 + @param a Address of word to be written - must be a multiple of 8
1.350 + @param v The value to be written
1.351 + @return The original value of *a
1.352 +*/
1.353 +EXPORT_C __NAKED__ TUint64 __e32_atomic_swp_acq64(volatile TAny* /*a*/, TUint64 /*v*/)
1.354 + {
1.355 + _asm jmp __e32_atomic_swp_ord64
1.356 + }
1.357 +
1.358 +
1.359 +/** Write a 64 bit word to memory and return the original value of the memory.
1.360 + Release semantics.
1.361 +
1.362 + @param a Address of word to be written - must be a multiple of 8
1.363 + @param v The value to be written
1.364 + @return The original value of *a
1.365 +*/
1.366 +EXPORT_C __NAKED__ TUint64 __e32_atomic_swp_rel64(volatile TAny* /*a*/, TUint64 /*v*/)
1.367 + {
1.368 + _asm jmp __e32_atomic_swp_ord64
1.369 + }
1.370 +
1.371 +
1.372 +/** Write a 64 bit word to memory and return the original value of the memory.
1.373 + Full barrier semantics.
1.374 +
1.375 + @param a Address of word to be written - must be a multiple of 8
1.376 + @param v The value to be written
1.377 + @return The original value of *a
1.378 +*/
1.379 +EXPORT_C __NAKED__ TUint64 __e32_atomic_swp_ord64(volatile TAny* /*a*/, TUint64 /*v*/)
1.380 + {
1.381 + _asm push ebx
1.382 + _asm push edi
1.383 + _asm mov edi, [esp+12]
1.384 + _asm mov ebx, [esp+16]
1.385 + _asm mov ecx, [esp+20]
1.386 + _asm mov eax, [edi]
1.387 + _asm mov edx, [edi+4]
1.388 + _asm retry:
1.389 + _asm __LOCK__ cmpxchg8b [edi]
1.390 + _asm jne short retry
1.391 + _asm pop edi
1.392 + _asm pop ebx
1.393 + _asm ret
1.394 + }
1.395 +
1.396 +
1.397 +/** 64 bit compare and swap, relaxed ordering.
1.398 +
1.399 + Atomically performs the following operation:
1.400 + if (*a == *q) { *a = v; return TRUE; }
1.401 + else { *q = *a; return FALSE; }
1.402 +
1.403 + @param a Address of word to be written - must be a multiple of 8
1.404 + @param q Address of location containing expected value
1.405 + @param v The new value to be written if the old value is as expected
1.406 + @return TRUE if *a was updated, FALSE otherwise
1.407 +*/
1.408 +EXPORT_C __NAKED__ TBool __e32_atomic_cas_rlx64(volatile TAny* /*a*/, TUint64* /*q*/, TUint64 /*v*/)
1.409 + {
1.410 + _asm jmp __e32_atomic_cas_ord64
1.411 + }
1.412 +
1.413 +
1.414 +/** 64 bit compare and swap, acquire semantics.
1.415 +
1.416 + Atomically performs the following operation:
1.417 + if (*a == *q) { *a = v; return TRUE; }
1.418 + else { *q = *a; return FALSE; }
1.419 +
1.420 + @param a Address of word to be written - must be a multiple of 8
1.421 + @param q Address of location containing expected value
1.422 + @param v The new value to be written if the old value is as expected
1.423 + @return TRUE if *a was updated, FALSE otherwise
1.424 +*/
1.425 +EXPORT_C __NAKED__ TBool __e32_atomic_cas_acq64(volatile TAny* /*a*/, TUint64* /*q*/, TUint64 /*v*/)
1.426 + {
1.427 + _asm jmp __e32_atomic_cas_ord64
1.428 + }
1.429 +
1.430 +
1.431 +/** 64 bit compare and swap, release semantics.
1.432 +
1.433 + Atomically performs the following operation:
1.434 + if (*a == *q) { *a = v; return TRUE; }
1.435 + else { *q = *a; return FALSE; }
1.436 +
1.437 + @param a Address of word to be written - must be a multiple of 8
1.438 + @param q Address of location containing expected value
1.439 + @param v The new value to be written if the old value is as expected
1.440 + @return TRUE if *a was updated, FALSE otherwise
1.441 +*/
1.442 +EXPORT_C __NAKED__ TBool __e32_atomic_cas_rel64(volatile TAny* /*a*/, TUint64* /*q*/, TUint64 /*v*/)
1.443 + {
1.444 + _asm jmp __e32_atomic_cas_ord64
1.445 + }
1.446 +
1.447 +
1.448 +/** 64 bit compare and swap, full barrier semantics.
1.449 +
1.450 + Atomically performs the following operation:
1.451 + if (*a == *q) { *a = v; return TRUE; }
1.452 + else { *q = *a; return FALSE; }
1.453 +
1.454 + @param a Address of word to be written - must be a multiple of 8
1.455 + @param q Address of location containing expected value
1.456 + @param v The new value to be written if the old value is as expected
1.457 + @return TRUE if *a was updated, FALSE otherwise
1.458 +*/
1.459 +EXPORT_C __NAKED__ TBool __e32_atomic_cas_ord64(volatile TAny* /*a*/, TUint64* /*q*/, TUint64 /*v*/)
1.460 + {
1.461 + _asm push ebx
1.462 + _asm push edi
1.463 + _asm push esi
1.464 + _asm mov edi, [esp+16] // edi = a
1.465 + _asm mov esi, [esp+20] // esi = q
1.466 + _asm mov ebx, [esp+24] // ecx:ebx = v
1.467 + _asm mov ecx, [esp+28]
1.468 + _asm mov eax, [esi] // edx:eax = *q
1.469 + _asm mov edx, [esi+4]
1.470 + _asm __LOCK__ cmpxchg8b [edi] // if (*a==*q) *a=v, ZF=1 else edx:eax=*a, ZF=0
1.471 + _asm jne short cas_fail
1.472 + _asm mov eax, 1
1.473 + _asm pop esi
1.474 + _asm pop edi
1.475 + _asm pop ebx
1.476 + _asm ret
1.477 + _asm cas_fail:
1.478 + _asm mov [esi], eax // *q = edx:eax
1.479 + _asm mov [esi+4], edx
1.480 + _asm xor eax, eax
1.481 + _asm pop esi
1.482 + _asm pop edi
1.483 + _asm pop ebx
1.484 + _asm ret
1.485 + }
1.486 +
1.487 +
1.488 +/** 64 bit atomic add, relaxed ordering.
1.489 +
1.490 + Atomically performs the following operation:
1.491 + oldv = *a; *a = oldv + v; return oldv;
1.492 +
1.493 + @param a Address of word to be updated - must be a multiple of 8
1.494 + @param v The value to be added
1.495 + @return The original value of *a
1.496 +*/
1.497 +EXPORT_C __NAKED__ TUint64 __e32_atomic_add_rlx64(volatile TAny* /*a*/, TUint64 /*v*/)
1.498 + {
1.499 + _asm jmp __e32_atomic_add_ord64
1.500 + }
1.501 +
1.502 +
1.503 +/** 64 bit atomic add, acquire semantics.
1.504 +
1.505 + Atomically performs the following operation:
1.506 + oldv = *a; *a = oldv + v; return oldv;
1.507 +
1.508 + @param a Address of word to be updated - must be a multiple of 8
1.509 + @param v The value to be added
1.510 + @return The original value of *a
1.511 +*/
1.512 +EXPORT_C __NAKED__ TUint64 __e32_atomic_add_acq64(volatile TAny* /*a*/, TUint64 /*v*/)
1.513 + {
1.514 + _asm jmp __e32_atomic_add_ord64
1.515 + }
1.516 +
1.517 +
1.518 +/** 64 bit atomic add, release semantics.
1.519 +
1.520 + Atomically performs the following operation:
1.521 + oldv = *a; *a = oldv + v; return oldv;
1.522 +
1.523 + @param a Address of word to be updated - must be a multiple of 8
1.524 + @param v The value to be added
1.525 + @return The original value of *a
1.526 +*/
1.527 +EXPORT_C __NAKED__ TUint64 __e32_atomic_add_rel64(volatile TAny* /*a*/, TUint64 /*v*/)
1.528 + {
1.529 + _asm jmp __e32_atomic_add_ord64
1.530 + }
1.531 +
1.532 +
1.533 +/** 64 bit atomic add, full barrier semantics.
1.534 +
1.535 + Atomically performs the following operation:
1.536 + oldv = *a; *a = oldv + v; return oldv;
1.537 +
1.538 + @param a Address of word to be updated - must be a multiple of 8
1.539 + @param v The value to be added
1.540 + @return The original value of *a
1.541 +*/
1.542 +EXPORT_C __NAKED__ TUint64 __e32_atomic_add_ord64(volatile TAny* /*a*/, TUint64 /*v*/)
1.543 + {
1.544 + _asm push ebx
1.545 + _asm push edi
1.546 + _asm mov edi, [esp+12] // edi = a
1.547 + _asm mov eax, [edi] // edx:eax = oldv
1.548 + _asm mov edx, [edi+4]
1.549 + _asm retry:
1.550 + _asm mov ebx, eax
1.551 + _asm mov ecx, edx
1.552 + _asm add ebx, [esp+16] // ecx:ebx = oldv + v
1.553 + _asm adc ecx, [esp+20]
1.554 + _asm __LOCK__ cmpxchg8b [edi] // if (*a==oldv) *a=oldv+v, ZF=1 else edx:eax=*a, ZF=0
1.555 + _asm jne short retry
1.556 + _asm pop edi
1.557 + _asm pop ebx
1.558 + _asm ret
1.559 + }
1.560 +
1.561 +
1.562 +/** 64 bit atomic bitwise logical AND, relaxed ordering.
1.563 +
1.564 + Atomically performs the following operation:
1.565 + oldv = *a; *a = oldv & v; return oldv;
1.566 +
1.567 + @param a Address of word to be updated - must be a multiple of 8
1.568 + @param v The value to be ANDed with *a
1.569 + @return The original value of *a
1.570 +*/
1.571 +EXPORT_C __NAKED__ TUint64 __e32_atomic_and_rlx64(volatile TAny* /*a*/, TUint64 /*v*/)
1.572 + {
1.573 + _asm jmp __e32_atomic_and_ord64
1.574 + }
1.575 +
1.576 +
1.577 +/** 64 bit atomic bitwise logical AND, acquire semantics.
1.578 +
1.579 + Atomically performs the following operation:
1.580 + oldv = *a; *a = oldv & v; return oldv;
1.581 +
1.582 + @param a Address of word to be updated - must be a multiple of 8
1.583 + @param v The value to be ANDed with *a
1.584 + @return The original value of *a
1.585 +*/
1.586 +EXPORT_C __NAKED__ TUint64 __e32_atomic_and_acq64(volatile TAny* /*a*/, TUint64 /*v*/)
1.587 + {
1.588 + _asm jmp __e32_atomic_and_ord64
1.589 + }
1.590 +
1.591 +
1.592 +/** 64 bit atomic bitwise logical AND, release semantics.
1.593 +
1.594 + Atomically performs the following operation:
1.595 + oldv = *a; *a = oldv & v; return oldv;
1.596 +
1.597 + @param a Address of word to be updated - must be a multiple of 8
1.598 + @param v The value to be ANDed with *a
1.599 + @return The original value of *a
1.600 +*/
1.601 +EXPORT_C __NAKED__ TUint64 __e32_atomic_and_rel64(volatile TAny* /*a*/, TUint64 /*v*/)
1.602 + {
1.603 + _asm jmp __e32_atomic_and_ord64
1.604 + }
1.605 +
1.606 +
1.607 +/** 64 bit atomic bitwise logical AND, full barrier semantics.
1.608 +
1.609 + Atomically performs the following operation:
1.610 + oldv = *a; *a = oldv & v; return oldv;
1.611 +
1.612 + @param a Address of word to be updated - must be a multiple of 8
1.613 + @param v The value to be ANDed with *a
1.614 + @return The original value of *a
1.615 +*/
1.616 +EXPORT_C __NAKED__ TUint64 __e32_atomic_and_ord64(volatile TAny* /*a*/, TUint64 /*v*/)
1.617 + {
1.618 + _asm push ebx
1.619 + _asm push edi
1.620 + _asm mov edi, [esp+12] // edi = a
1.621 + _asm mov eax, [edi] // edx:eax = oldv
1.622 + _asm mov edx, [edi+4]
1.623 + _asm retry:
1.624 + _asm mov ebx, eax
1.625 + _asm mov ecx, edx
1.626 + _asm and ebx, [esp+16] // ecx:ebx = oldv & v
1.627 + _asm and ecx, [esp+20]
1.628 + _asm __LOCK__ cmpxchg8b [edi] // if (*a==oldv) *a=oldv&v, ZF=1 else edx:eax=*a, ZF=0
1.629 + _asm jne short retry
1.630 + _asm pop edi
1.631 + _asm pop ebx
1.632 + _asm ret
1.633 + }
1.634 +
1.635 +
1.636 +/** 64 bit atomic bitwise logical inclusive OR, relaxed ordering.
1.637 +
1.638 + Atomically performs the following operation:
1.639 + oldv = *a; *a = oldv | v; return oldv;
1.640 +
1.641 + @param a Address of word to be updated - must be a multiple of 8
1.642 + @param v The value to be ORed with *a
1.643 + @return The original value of *a
1.644 +*/
1.645 +EXPORT_C __NAKED__ TUint64 __e32_atomic_ior_rlx64(volatile TAny* /*a*/, TUint64 /*v*/)
1.646 + {
1.647 + _asm jmp __e32_atomic_ior_ord64
1.648 + }
1.649 +
1.650 +
1.651 +/** 64 bit atomic bitwise logical inclusive OR, acquire semantics.
1.652 +
1.653 + Atomically performs the following operation:
1.654 + oldv = *a; *a = oldv | v; return oldv;
1.655 +
1.656 + @param a Address of word to be updated - must be a multiple of 8
1.657 + @param v The value to be ORed with *a
1.658 + @return The original value of *a
1.659 +*/
1.660 +EXPORT_C __NAKED__ TUint64 __e32_atomic_ior_acq64(volatile TAny* /*a*/, TUint64 /*v*/)
1.661 + {
1.662 + _asm jmp __e32_atomic_ior_ord64
1.663 + }
1.664 +
1.665 +
1.666 +/** 64 bit atomic bitwise logical inclusive OR, release semantics.
1.667 +
1.668 + Atomically performs the following operation:
1.669 + oldv = *a; *a = oldv | v; return oldv;
1.670 +
1.671 + @param a Address of word to be updated - must be a multiple of 8
1.672 + @param v The value to be ORed with *a
1.673 + @return The original value of *a
1.674 +*/
1.675 +EXPORT_C __NAKED__ TUint64 __e32_atomic_ior_rel64(volatile TAny* /*a*/, TUint64 /*v*/)
1.676 + {
1.677 + _asm jmp __e32_atomic_ior_ord64
1.678 + }
1.679 +
1.680 +
1.681 +/** 64 bit atomic bitwise logical inclusive OR, full barrier semantics.
1.682 +
1.683 + Atomically performs the following operation:
1.684 + oldv = *a; *a = oldv | v; return oldv;
1.685 +
1.686 + @param a Address of word to be updated - must be a multiple of 8
1.687 + @param v The value to be ORed with *a
1.688 + @return The original value of *a
1.689 +*/
1.690 +EXPORT_C __NAKED__ TUint64 __e32_atomic_ior_ord64(volatile TAny* /*a*/, TUint64 /*v*/)
1.691 + {
1.692 + _asm push ebx
1.693 + _asm push edi
1.694 + _asm mov edi, [esp+12] // edi = a
1.695 + _asm mov eax, [edi] // edx:eax = oldv
1.696 + _asm mov edx, [edi+4]
1.697 + _asm retry:
1.698 + _asm mov ebx, eax
1.699 + _asm mov ecx, edx
1.700 + _asm or ebx, [esp+16] // ecx:ebx = oldv | v
1.701 + _asm or ecx, [esp+20]
1.702 + _asm __LOCK__ cmpxchg8b [edi] // if (*a==oldv) *a=oldv|v, ZF=1 else edx:eax=*a, ZF=0
1.703 + _asm jne short retry
1.704 + _asm pop edi
1.705 + _asm pop ebx
1.706 + _asm ret
1.707 + }
1.708 +
1.709 +
1.710 +/** 64 bit atomic bitwise logical exclusive OR, relaxed ordering.
1.711 +
1.712 + Atomically performs the following operation:
1.713 + oldv = *a; *a = oldv ^ v; return oldv;
1.714 +
1.715 + @param a Address of word to be updated - must be a multiple of 8
1.716 + @param v The value to be XORed with *a
1.717 + @return The original value of *a
1.718 +*/
1.719 +EXPORT_C __NAKED__ TUint64 __e32_atomic_xor_rlx64(volatile TAny* /*a*/, TUint64 /*v*/)
1.720 + {
1.721 + _asm jmp __e32_atomic_xor_ord64
1.722 + }
1.723 +
1.724 +
1.725 +/** 64 bit atomic bitwise logical exclusive OR, acquire semantics.
1.726 +
1.727 + Atomically performs the following operation:
1.728 + oldv = *a; *a = oldv ^ v; return oldv;
1.729 +
1.730 + @param a Address of word to be updated - must be a multiple of 8
1.731 + @param v The value to be XORed with *a
1.732 + @return The original value of *a
1.733 +*/
1.734 +EXPORT_C __NAKED__ TUint64 __e32_atomic_xor_acq64(volatile TAny* /*a*/, TUint64 /*v*/)
1.735 + {
1.736 + _asm jmp __e32_atomic_xor_ord64
1.737 + }
1.738 +
1.739 +
1.740 +/** 64 bit atomic bitwise logical exclusive OR, release semantics.
1.741 +
1.742 + Atomically performs the following operation:
1.743 + oldv = *a; *a = oldv ^ v; return oldv;
1.744 +
1.745 + @param a Address of word to be updated - must be a multiple of 8
1.746 + @param v The value to be XORed with *a
1.747 + @return The original value of *a
1.748 +*/
1.749 +EXPORT_C __NAKED__ TUint64 __e32_atomic_xor_rel64(volatile TAny* /*a*/, TUint64 /*v*/)
1.750 + {
1.751 + _asm jmp __e32_atomic_xor_ord64
1.752 + }
1.753 +
1.754 +
1.755 +/** 64 bit atomic bitwise logical exclusive OR, full barrier semantics.
1.756 +
1.757 + Atomically performs the following operation:
1.758 + oldv = *a; *a = oldv ^ v; return oldv;
1.759 +
1.760 + @param a Address of word to be updated - must be a multiple of 8
1.761 + @param v The value to be XORed with *a
1.762 + @return The original value of *a
1.763 +*/
1.764 +EXPORT_C __NAKED__ TUint64 __e32_atomic_xor_ord64(volatile TAny* /*a*/, TUint64 /*v*/)
1.765 + {
1.766 + _asm push ebx
1.767 + _asm push edi
1.768 + _asm mov edi, [esp+12] // edi = a
1.769 + _asm mov eax, [edi] // edx:eax = oldv
1.770 + _asm mov edx, [edi+4]
1.771 + _asm retry:
1.772 + _asm mov ebx, eax
1.773 + _asm mov ecx, edx
1.774 + _asm xor ebx, [esp+16] // ecx:ebx = oldv ^ v
1.775 + _asm xor ecx, [esp+20]
1.776 + _asm __LOCK__ cmpxchg8b [edi] // if (*a==oldv) *a=oldv^v, ZF=1 else edx:eax=*a, ZF=0
1.777 + _asm jne short retry
1.778 + _asm pop edi
1.779 + _asm pop ebx
1.780 + _asm ret
1.781 + }
1.782 +
1.783 +
1.784 +/** 64 bit atomic bitwise universal function, relaxed ordering.
1.785 +
1.786 + Atomically performs the following operation:
1.787 + oldv = *a; *a = (oldv & u) ^ v; return oldv;
1.788 +
1.789 + @param a Address of word to be updated - must be a multiple of 8
1.790 + @param u The value to be ANDed with *a
1.791 + @param v The value to be XORed with (*a&u)
1.792 + @return The original value of *a
1.793 +*/
1.794 +EXPORT_C __NAKED__ TUint64 __e32_atomic_axo_rlx64(volatile TAny* /*a*/, TUint64 /*u*/, TUint64 /*v*/)
1.795 + {
1.796 + _asm jmp __e32_atomic_axo_ord64
1.797 + }
1.798 +
1.799 +
1.800 +/** 64 bit atomic bitwise universal function, acquire semantics.
1.801 +
1.802 + Atomically performs the following operation:
1.803 + oldv = *a; *a = (oldv & u) ^ v; return oldv;
1.804 +
1.805 + @param a Address of word to be updated - must be a multiple of 8
1.806 + @param u The value to be ANDed with *a
1.807 + @param v The value to be XORed with (*a&u)
1.808 + @return The original value of *a
1.809 +*/
1.810 +EXPORT_C __NAKED__ TUint64 __e32_atomic_axo_acq64(volatile TAny* /*a*/, TUint64 /*u*/, TUint64 /*v*/)
1.811 + {
1.812 + _asm jmp __e32_atomic_axo_ord64
1.813 + }
1.814 +
1.815 +
1.816 +/** 64 bit atomic bitwise universal function, release semantics.
1.817 +
1.818 + Atomically performs the following operation:
1.819 + oldv = *a; *a = (oldv & u) ^ v; return oldv;
1.820 +
1.821 + @param a Address of word to be updated - must be a multiple of 8
1.822 + @param u The value to be ANDed with *a
1.823 + @param v The value to be XORed with (*a&u)
1.824 + @return The original value of *a
1.825 +*/
1.826 +EXPORT_C __NAKED__ TUint64 __e32_atomic_axo_rel64(volatile TAny* /*a*/, TUint64 /*u*/, TUint64 /*v*/)
1.827 + {
1.828 + _asm jmp __e32_atomic_axo_ord64
1.829 + }
1.830 +
1.831 +
1.832 +/** 64 bit atomic bitwise universal function, release semantics.
1.833 +
1.834 + Atomically performs the following operation:
1.835 + oldv = *a; *a = (oldv & u) ^ v; return oldv;
1.836 +
1.837 + @param a Address of word to be updated - must be a multiple of 8
1.838 + @param u The value to be ANDed with *a
1.839 + @param v The value to be XORed with (*a&u)
1.840 + @return The original value of *a
1.841 +*/
1.842 +EXPORT_C __NAKED__ TUint64 __e32_atomic_axo_ord64(volatile TAny* /*a*/, TUint64 /*u*/, TUint64 /*v*/)
1.843 + {
1.844 + _asm push ebx
1.845 + _asm push edi
1.846 + _asm mov edi, [esp+12] // edi = a
1.847 + _asm mov eax, [edi] // edx:eax = oldv
1.848 + _asm mov edx, [edi+4]
1.849 + _asm retry:
1.850 + _asm mov ebx, eax
1.851 + _asm mov ecx, edx
1.852 + _asm and ebx, [esp+16] // ecx:ebx = oldv & u
1.853 + _asm and ecx, [esp+20]
1.854 + _asm xor ebx, [esp+24] // ecx:ebx = (oldv & u) ^ v
1.855 + _asm xor ecx, [esp+28]
1.856 + _asm __LOCK__ cmpxchg8b [edi] // if (*a==oldv) *a=(oldv&u)^v, ZF=1 else edx:eax=*a, ZF=0
1.857 + _asm jne short retry
1.858 + _asm pop edi
1.859 + _asm pop ebx
1.860 + _asm ret
1.861 + }
1.862 +
1.863 +
1.864 +/** 64 bit threshold and add, unsigned, relaxed ordering.
1.865 +
1.866 + Atomically performs the following operation:
1.867 + oldv = *a; if (oldv>=t) *a=oldv+u else *a=oldv+v; return oldv;
1.868 +
1.869 + @param a Address of data to be updated - must be naturally aligned
1.870 + @param t The threshold to compare *a to (unsigned compare)
1.871 + @param u The value to be added to *a if it is originally >= t
1.872 + @param u The value to be added to *a if it is originally < t
1.873 + @return The original value of *a
1.874 +*/
1.875 +EXPORT_C __NAKED__ TUint64 __e32_atomic_tau_rlx64(volatile TAny* /*a*/, TUint64 /*t*/, TUint64 /*u*/, TUint64 /*v*/)
1.876 + {
1.877 + _asm jmp __e32_atomic_tau_ord64
1.878 + }
1.879 +
1.880 +
1.881 +/** 64 bit threshold and add, unsigned, acquire semantics.
1.882 +
1.883 + Atomically performs the following operation:
1.884 + oldv = *a; if (oldv>=t) *a=oldv+u else *a=oldv+v; return oldv;
1.885 +
1.886 + @param a Address of data to be updated - must be naturally aligned
1.887 + @param t The threshold to compare *a to (unsigned compare)
1.888 + @param u The value to be added to *a if it is originally >= t
1.889 + @param u The value to be added to *a if it is originally < t
1.890 + @return The original value of *a
1.891 +*/
1.892 +EXPORT_C __NAKED__ TUint64 __e32_atomic_tau_acq64(volatile TAny* /*a*/, TUint64 /*t*/, TUint64 /*u*/, TUint64 /*v*/)
1.893 + {
1.894 + _asm jmp __e32_atomic_tau_ord64
1.895 + }
1.896 +
1.897 +
1.898 +/** 64 bit threshold and add, unsigned, release semantics.
1.899 +
1.900 + Atomically performs the following operation:
1.901 + oldv = *a; if (oldv>=t) *a=oldv+u else *a=oldv+v; return oldv;
1.902 +
1.903 + @param a Address of data to be updated - must be naturally aligned
1.904 + @param t The threshold to compare *a to (unsigned compare)
1.905 + @param u The value to be added to *a if it is originally >= t
1.906 + @param u The value to be added to *a if it is originally < t
1.907 + @return The original value of *a
1.908 +*/
1.909 +EXPORT_C __NAKED__ TUint64 __e32_atomic_tau_rel64(volatile TAny* /*a*/, TUint64 /*t*/, TUint64 /*u*/, TUint64 /*v*/)
1.910 + {
1.911 + _asm jmp __e32_atomic_tau_ord64
1.912 + }
1.913 +
1.914 +
1.915 +/** 64 bit threshold and add, unsigned, full barrier semantics.
1.916 +
1.917 + Atomically performs the following operation:
1.918 + oldv = *a; if (oldv>=t) *a=oldv+u else *a=oldv+v; return oldv;
1.919 +
1.920 + @param a Address of data to be updated - must be naturally aligned
1.921 + @param t The threshold to compare *a to (unsigned compare)
1.922 + @param u The value to be added to *a if it is originally >= t
1.923 + @param u The value to be added to *a if it is originally < t
1.924 + @return The original value of *a
1.925 +*/
1.926 +EXPORT_C __NAKED__ TUint64 __e32_atomic_tau_ord64(volatile TAny* /*a*/, TUint64 /*t*/, TUint64 /*u*/, TUint64 /*v*/)
1.927 + {
1.928 + _asm push ebx
1.929 + _asm push edi
1.930 + _asm mov edi, [esp+12] // edi = a
1.931 + _asm mov eax, [edi] // edx:eax = oldv
1.932 + _asm mov edx, [edi+4]
1.933 + _asm retry:
1.934 + _asm mov ebx, edx
1.935 + _asm cmp eax, [esp+16] // eax - t.low, CF=borrow
1.936 + _asm sbb ebx, [esp+20] // CF = borrow from (oldv - t)
1.937 + _asm jnc short use_u // no borrow means oldv>=t so use u
1.938 + _asm mov ebx, [esp+32] // ecx:ebx = v
1.939 + _asm mov ecx, [esp+36]
1.940 + _asm jmp short use_v
1.941 + _asm use_u:
1.942 + _asm mov ebx, [esp+24] // ecx:ebx = u
1.943 + _asm mov ecx, [esp+28]
1.944 + _asm use_v:
1.945 + _asm add ebx, eax // ecx:ebx = oldv + u or v
1.946 + _asm adc ecx, edx
1.947 + _asm __LOCK__ cmpxchg8b [edi]
1.948 + _asm jne short retry
1.949 + _asm pop edi
1.950 + _asm pop ebx
1.951 + _asm ret
1.952 + }
1.953 +
1.954 +
1.955 +/** 64 bit threshold and add, signed, relaxed ordering.
1.956 +
1.957 + Atomically performs the following operation:
1.958 + oldv = *a; if (oldv>=t) *a=oldv+u else *a=oldv+v; return oldv;
1.959 +
1.960 + @param a Address of data to be updated - must be naturally aligned
1.961 + @param t The threshold to compare *a to (signed compare)
1.962 + @param u The value to be added to *a if it is originally >= t
1.963 + @param u The value to be added to *a if it is originally < t
1.964 + @return The original value of *a
1.965 +*/
1.966 +EXPORT_C __NAKED__ TInt64 __e32_atomic_tas_rlx64(volatile TAny* /*a*/, TInt64 /*t*/, TInt64 /*u*/, TInt64 /*v*/)
1.967 + {
1.968 + _asm jmp __e32_atomic_tas_ord64
1.969 + }
1.970 +
1.971 +
1.972 +/** 64 bit threshold and add, signed, acquire semantics.
1.973 +
1.974 + Atomically performs the following operation:
1.975 + oldv = *a; if (oldv>=t) *a=oldv+u else *a=oldv+v; return oldv;
1.976 +
1.977 + @param a Address of data to be updated - must be naturally aligned
1.978 + @param t The threshold to compare *a to (signed compare)
1.979 + @param u The value to be added to *a if it is originally >= t
1.980 + @param u The value to be added to *a if it is originally < t
1.981 + @return The original value of *a
1.982 +*/
1.983 +EXPORT_C __NAKED__ TInt64 __e32_atomic_tas_acq64(volatile TAny* /*a*/, TInt64 /*t*/, TInt64 /*u*/, TInt64 /*v*/)
1.984 + {
1.985 + _asm jmp __e32_atomic_tas_ord64
1.986 + }
1.987 +
1.988 +
1.989 +/** 64 bit threshold and add, signed, release semantics.
1.990 +
1.991 + Atomically performs the following operation:
1.992 + oldv = *a; if (oldv>=t) *a=oldv+u else *a=oldv+v; return oldv;
1.993 +
1.994 + @param a Address of data to be updated - must be naturally aligned
1.995 + @param t The threshold to compare *a to (signed compare)
1.996 + @param u The value to be added to *a if it is originally >= t
1.997 + @param u The value to be added to *a if it is originally < t
1.998 + @return The original value of *a
1.999 +*/
1.1000 +EXPORT_C __NAKED__ TInt64 __e32_atomic_tas_rel64(volatile TAny* /*a*/, TInt64 /*t*/, TInt64 /*u*/, TInt64 /*v*/)
1.1001 + {
1.1002 + _asm jmp __e32_atomic_tas_ord64
1.1003 + }
1.1004 +
1.1005 +
1.1006 +/** 64 bit threshold and add, signed, full barrier semantics.
1.1007 +
1.1008 + Atomically performs the following operation:
1.1009 + oldv = *a; if (oldv>=t) *a=oldv+u else *a=oldv+v; return oldv;
1.1010 +
1.1011 + @param a Address of data to be updated - must be naturally aligned
1.1012 + @param t The threshold to compare *a to (signed compare)
1.1013 + @param u The value to be added to *a if it is originally >= t
1.1014 + @param u The value to be added to *a if it is originally < t
1.1015 + @return The original value of *a
1.1016 +*/
1.1017 +EXPORT_C __NAKED__ TInt64 __e32_atomic_tas_ord64(volatile TAny* /*a*/, TInt64 /*t*/, TInt64 /*u*/, TInt64 /*v*/)
1.1018 + {
1.1019 + _asm push ebx
1.1020 + _asm push edi
1.1021 + _asm mov edi, [esp+12] // edi = a
1.1022 + _asm mov eax, [edi] // edx:eax = oldv
1.1023 + _asm mov edx, [edi+4]
1.1024 + _asm retry:
1.1025 + _asm mov ebx, edx
1.1026 + _asm cmp eax, [esp+16] // eax - t.low, CF=borrow
1.1027 + _asm sbb ebx, [esp+20] // SF=sign, OF=overflow from (oldv - t)
1.1028 + _asm jge short use_u // SF==OF (GE condition) means oldv>=t so use u
1.1029 + _asm mov ebx, [esp+32] // ecx:ebx = v
1.1030 + _asm mov ecx, [esp+36]
1.1031 + _asm jmp short use_v
1.1032 + _asm use_u:
1.1033 + _asm mov ebx, [esp+24] // ecx:ebx = u
1.1034 + _asm mov ecx, [esp+28]
1.1035 + _asm use_v:
1.1036 + _asm add ebx, eax // ecx:ebx = oldv + u or v
1.1037 + _asm adc ecx, edx
1.1038 + _asm __LOCK__ cmpxchg8b [edi]
1.1039 + _asm jne short retry
1.1040 + _asm pop edi
1.1041 + _asm pop ebx
1.1042 + _asm ret
1.1043 + }
1.1044 +
1.1045 +} // extern "C"