os/kernelhwsrv/kernel/eka/common/arm/cmem.cia
author sl
Tue, 10 Jun 2014 14:32:02 +0200
changeset 1 260cb5ec6c19
permissions -rw-r--r--
Update contrib.
sl@0
     1
// Copyright (c) 1995-2009 Nokia Corporation and/or its subsidiary(-ies).
sl@0
     2
// All rights reserved.
sl@0
     3
// This component and the accompanying materials are made available
sl@0
     4
// under the terms of the License "Eclipse Public License v1.0"
sl@0
     5
// which accompanies this distribution, and is available
sl@0
     6
// at the URL "http://www.eclipse.org/legal/epl-v10.html".
sl@0
     7
//
sl@0
     8
// Initial Contributors:
sl@0
     9
// Nokia Corporation - initial contribution.
sl@0
    10
//
sl@0
    11
// Contributors:
sl@0
    12
//
sl@0
    13
// Description:
sl@0
    14
// e32\common\arm\cmem.cia
sl@0
    15
// 
sl@0
    16
//
sl@0
    17
sl@0
    18
#include "../common.h"
sl@0
    19
#include <e32cia.h>
sl@0
    20
#if defined(__REPLACE_GENERIC_UTILS)
sl@0
    21
#include "replacement_utils.h"
sl@0
    22
#endif
sl@0
    23
sl@0
    24
#if defined(__MEM_MACHINE_CODED__)
sl@0
    25
sl@0
    26
#ifndef USE_REPLACEMENT_MEMSET
sl@0
    27
sl@0
    28
#if defined(_DEBUG)
sl@0
    29
sl@0
    30
#ifdef __STANDALONE_NANOKERNEL__
sl@0
    31
sl@0
    32
#define ARM_ASSERT_MULTIPLE_OF_FOUR(rt1, panicfunc)	\
sl@0
    33
	asm("tst "#rt1", #3"); \
sl@0
    34
	asm("ldrne "#rt1", ["#rt1"]")
sl@0
    35
sl@0
    36
#else	// __STANDALONE_NANOKERNEL__
sl@0
    37
GLDEF_C void PanicEWordMoveLengthNotMultipleOf4();
sl@0
    38
GLDEF_C void PanicEWordMoveSourceNotAligned();
sl@0
    39
GLDEF_C void PanicEWordMoveTargetNotAligned();
sl@0
    40
sl@0
    41
#define ARM_ASSERT_MULTIPLE_OF_FOUR(rt1, panicfunc)	\
sl@0
    42
	asm("tst "#rt1", #3"); \
sl@0
    43
	asm("bne " panicfunc )
sl@0
    44
sl@0
    45
#endif	// __STANDALONE_NANOKERNEL__
sl@0
    46
sl@0
    47
#else	// _DEBUG
sl@0
    48
sl@0
    49
#define ARM_ASSERT_MULTIPLE_OF_FOUR(rt1, panicfunc)
sl@0
    50
sl@0
    51
#endif	//_DEBUG
sl@0
    52
sl@0
    53
sl@0
    54
// See header file e32cmn.h for the in-source documentation.
sl@0
    55
extern "C" EXPORT_C __NAKED__ TAny* memclr(TAny* /*aTrg*/, unsigned int /*aLength*/)
sl@0
    56
	{
sl@0
    57
	KMEMCLRHOOK
sl@0
    58
	asm("mov r2, #0 ");
sl@0
    59
	asm("b fill ");
sl@0
    60
	}
sl@0
    61
sl@0
    62
// See header file e32cmn.h for the in-source documentation.
sl@0
    63
extern "C" EXPORT_C __NAKED__ TAny* memset(TAny* /*aTrg*/, TInt /*aValue*/, unsigned int /*aLength*/)
sl@0
    64
    {
sl@0
    65
    KMEMSETHOOK
sl@0
    66
    asm("   mov		 r3, r2 ");				/* length into r3 */
sl@0
    67
    asm("   and      r2,r1,#255");			/* fill value into r2 */
sl@0
    68
	asm("	mov		 r1, r3 ");				/* length into r1 */
sl@0
    69
sl@0
    70
    asm("fill:");
sl@0
    71
    asm("   cmp      r1,#8");
sl@0
    72
	asm("   bls      small_fill");			// only taken ~20% of the time
sl@0
    73
sl@0
    74
    asm("   stmfd    sp!,{r0,r4-r9,lr}");
sl@0
    75
	asm("   movs     r3, r0, lsl #30 ");	// Check if word aligned
sl@0
    76
	asm("   orr      r2,r2,r2,lsl #8");
sl@0
    77
    asm("   orr      r2,r2,r2,lsl #16");
sl@0
    78
	asm("   bne		 unaligned_fill ");
sl@0
    79
		
sl@0
    80
	// Align destination address to 32 byte boundary if possible
sl@0
    81
	
sl@0
    82
	asm("word_aligned_fill: ");
sl@0
    83
    asm("   mov      r4,r2");
sl@0
    84
    asm("   mov      r5,r2");
sl@0
    85
    asm("   mov      r6,r2");
sl@0
    86
	asm("   movs     r3, r0, lsl #27 ");
sl@0
    87
    asm("   beq      aligned_fill ");
sl@0
    88
    asm("   rsb      r3, r3, #0 ");				// calculate fill length necessary for aligment
sl@0
    89
	asm("   cmp      r1, r3, lsr #27 ");		// compare with remaining length
sl@0
    90
	asm("   blo		 smaller_fill ");			// skip alignment if greater
sl@0
    91
	asm("   msr      cpsr_f, r3 ");				// put length bits 4, 3, 2 into N, Z, C flags
sl@0
    92
    asm("   strcs    r2, [r0], #4 ");			// align to 8 byte boundary
sl@0
    93
    asm("   stmeqia  r0!, {r2, r4} ");			// align to 16 byte boundary
sl@0
    94
    asm("   stmmiia  r0!, {r2, r4-r6} ");		// align to 32 byte boundary
sl@0
    95
	asm("   sub      r1, r1, r3, lsr #27 ");	// adjust remaining length
sl@0
    96
sl@0
    97
    asm("aligned_fill:");
sl@0
    98
	asm("   cmp		 r1, #64 ");
sl@0
    99
	asm("   bhs		 big_fill ");
sl@0
   100
sl@0
   101
	// Fill 0-63 bytes
sl@0
   102
	
sl@0
   103
    asm("smaller_fill:");
sl@0
   104
    asm("   movs     r1, r1, lsl #26");
sl@0
   105
	asm("	beq		 mem_fill_end ");
sl@0
   106
    asm("   msr      cpsr_flg, r1 ");
sl@0
   107
    asm("   stmmiia  r0!,{r2,r4-r6}");	// Fill 32
sl@0
   108
    asm("   stmmiia  r0!,{r2,r4-r6}");
sl@0
   109
    asm("   stmeqia  r0!,{r2,r4-r6}");	// Fill 16
sl@0
   110
    asm("   stmcsia  r0!,{r2,r4}");		// Fill 8
sl@0
   111
    asm("   strvs    r2,[r0],#4");		// Fill 4
sl@0
   112
	asm("   movs	 r1, r1, lsl #4 ");
sl@0
   113
	asm("	bne		 smallest_fill ");
sl@0
   114
	asm("mem_fill_end: ");
sl@0
   115
	__POPRET("r0,r4-r9,");
sl@0
   116
sl@0
   117
	// Fill last 1-3 bytes
sl@0
   118
	
sl@0
   119
    asm("smallest_fill: ");
sl@0
   120
    asm("   msr      cpsr_flg,r1");
sl@0
   121
    asm("   strmih   r2,[r0],#2");  	// Fill 2
sl@0
   122
    asm("   streqb   r2,[r0],#1");  	// Fill 1
sl@0
   123
	__POPRET("r0,r4-r9,");
sl@0
   124
sl@0
   125
	// Fill loop for length >= 64
sl@0
   126
	
sl@0
   127
	asm("big_fill: ");
sl@0
   128
	asm("   mov      r3,r2");
sl@0
   129
    asm("   mov      r7,r2");
sl@0
   130
    asm("   mov      r8,r2");
sl@0
   131
    asm("   mov      r9,r2");
sl@0
   132
    asm("   movs     ip,r1,lsr #8");	// Number of 256 byte blocks to fill
sl@0
   133
	asm("   beq		 medium_fill ");
sl@0
   134
    asm("fill_256_bytes_loop:");
sl@0
   135
    asm("   stmia    r0!,{r2-r9}");		// Fill 256 bytes
sl@0
   136
    asm("   stmia    r0!,{r2-r9}");
sl@0
   137
    asm("   stmia    r0!,{r2-r9}");
sl@0
   138
    asm("   stmia    r0!,{r2-r9}");
sl@0
   139
    asm("   stmia    r0!,{r2-r9}");
sl@0
   140
    asm("   stmia    r0!,{r2-r9}");
sl@0
   141
    asm("   stmia    r0!,{r2-r9}");
sl@0
   142
    asm("   stmia    r0!,{r2-r9}");
sl@0
   143
    asm("   subs     ip,ip,#1");
sl@0
   144
    asm("   bne      fill_256_bytes_loop");
sl@0
   145
	asm("medium_fill: ");
sl@0
   146
    asm("   movs     ip,r1,lsl #24");
sl@0
   147
    asm("   msr      cpsr_flg,ip");	
sl@0
   148
    asm("   stmmiia  r0!,{r2-r9}");		// Fill 128
sl@0
   149
    asm("   stmmiia  r0!,{r2-r9}");  
sl@0
   150
    asm("   stmmiia  r0!,{r2-r9}"); 
sl@0
   151
    asm("   stmmiia  r0!,{r2-r9}"); 
sl@0
   152
    asm("   stmeqia  r0!,{r2-r9}");		// Fill 64
sl@0
   153
    asm("   stmeqia  r0!,{r2-r9}"); 
sl@0
   154
	asm("   and		 r1, r1, #63 ");
sl@0
   155
	asm("   b 		 smaller_fill");
sl@0
   156
sl@0
   157
	// Word-align destination address, length >= 8
sl@0
   158
	
sl@0
   159
	asm("unaligned_fill: ");
sl@0
   160
    asm("   rsb      r3, r3, #0 ");				// calculate fill length necessary for aligment
sl@0
   161
    asm("   msr      cpsr_flg, r3");
sl@0
   162
	asm("   streqb   r2, [r0], #1 ");			// align to 2 byte boundary
sl@0
   163
    asm("   strmih   r2, [r0], #2 ");			// align to 4 byte boundary
sl@0
   164
	asm("   sub      r1, r1, r3, lsr #30 ");
sl@0
   165
	asm("	b		 word_aligned_fill ");
sl@0
   166
sl@0
   167
	// Fill for length <= 8
sl@0
   168
	
sl@0
   169
	asm("small_fill: ");
sl@0
   170
	asm("	mov		 r3, r0 ");				/* r3=dest */
sl@0
   171
	asm("   adr      ip, small_fill_end ");
sl@0
   172
	asm("   sub		 pc, ip, r1, lsl #2 ");
sl@0
   173
    asm("   strb     r2, [r3], #1");
sl@0
   174
    asm("   strb     r2, [r3], #1");
sl@0
   175
    asm("   strb     r2, [r3], #1");
sl@0
   176
    asm("   strb     r2, [r3], #1");
sl@0
   177
    asm("   strb     r2, [r3], #1");
sl@0
   178
    asm("   strb     r2, [r3], #1");
sl@0
   179
    asm("   strb     r2, [r3], #1");
sl@0
   180
    asm("   strb     r2, [r3], #1");
sl@0
   181
	asm("small_fill_end: ");
sl@0
   182
	__JUMP(,lr);
sl@0
   183
sl@0
   184
#ifdef __EABI__
sl@0
   185
	// The AEABI switched the order of arg2 and arg3 to save an intruction when
sl@0
   186
	// calling 'memset' from 'memclr'	
sl@0
   187
	asm(".global __aeabi_memset8 ");
sl@0
   188
	asm("__aeabi_memset8: 		 ");
sl@0
   189
	asm(".global __aeabi_memset4 ");
sl@0
   190
	asm("__aeabi_memset4: 		 ");
sl@0
   191
	asm(".global __aeabi_memset  ");
sl@0
   192
	asm("__aeabi_memset: 		 ");
sl@0
   193
    asm("   and      r2, r2, #255");
sl@0
   194
	asm("	b		 fill		 ");
sl@0
   195
#endif
sl@0
   196
    }
sl@0
   197
sl@0
   198
#endif  // USE_REPLACEMENT_MEMSET
sl@0
   199
sl@0
   200
#ifndef USE_REPLACEMENT_MEMCPY
sl@0
   201
sl@0
   202
// See header file e32cmn.h for the in-source documentation.
sl@0
   203
sl@0
   204
extern "C" EXPORT_C __NAKED__ TAny* wordmove(TAny* /*aTrg*/, const TAny* /*aSrc*/, unsigned int /*aLength*/)
sl@0
   205
//
sl@0
   206
// Assumes all is aligned
sl@0
   207
//
sl@0
   208
    {
sl@0
   209
	ARM_ASSERT_MULTIPLE_OF_FOUR(r0, CSM_Z30PanicEWordMoveTargetNotAlignedv);
sl@0
   210
	ARM_ASSERT_MULTIPLE_OF_FOUR(r1, CSM_Z30PanicEWordMoveSourceNotAlignedv);
sl@0
   211
	ARM_ASSERT_MULTIPLE_OF_FOUR(r2, CSM_Z34PanicEWordMoveLengthNotMultipleOf4v);
sl@0
   212
sl@0
   213
	// Mask length to a multiple of four bytes to avoid memory, or register
sl@0
   214
	// corruption by the special cases below.
sl@0
   215
	asm("bic r2,r2,#3");
sl@0
   216
sl@0
   217
	// Length <= 24 in ~90% of cases, however can only copy > 16 bytes in 4
sl@0
   218
	// instructions if LDM instuction restores thumb state when loading the PC.	
sl@0
   219
#ifdef __CPU_ARM_LDR_PC_SETS_TBIT
sl@0
   220
	asm("cmp r2, #24 ");
sl@0
   221
#else
sl@0
   222
	asm("cmp r2, #16 ");
sl@0
   223
#endif
sl@0
   224
	PLD(1);
sl@0
   225
	asm("addls pc, pc, r2, lsl #2 ");		// take branch depending on size
sl@0
   226
	asm("b 9f ");							// too big
sl@0
   227
sl@0
   228
	// 0 words
sl@0
   229
	__JUMP(,lr);
sl@0
   230
	__JUMP(,lr);
sl@0
   231
	__JUMP(,lr);
sl@0
   232
	__JUMP(,lr);
sl@0
   233
sl@0
   234
	// 1 word
sl@0
   235
	asm("ldr ip, [r1] ");
sl@0
   236
	asm("str ip, [r0] ");
sl@0
   237
	__JUMP(,lr);
sl@0
   238
	__JUMP(,lr);
sl@0
   239
sl@0
   240
	// 2 words
sl@0
   241
	asm("ldmia r1, {r2,r3}");
sl@0
   242
	asm("stmia r0, {r2,r3}");
sl@0
   243
	__JUMP(,lr);
sl@0
   244
	__JUMP(,lr);
sl@0
   245
sl@0
   246
	// 3 words
sl@0
   247
	asm("ldmia r1, {r2,r3,ip}");
sl@0
   248
	asm("stmia r0, {r2,r3,ip}");
sl@0
   249
	__JUMP(,lr);
sl@0
   250
	__JUMP(,lr);
sl@0
   251
sl@0
   252
	// 4 words
sl@0
   253
	asm("ldmia r1, {r1,r2,r3,ip}");
sl@0
   254
	asm("stmia r0, {r1,r2,r3,ip}");
sl@0
   255
	__JUMP(,lr);
sl@0
   256
	__JUMP(,lr);
sl@0
   257
sl@0
   258
#ifdef __CPU_ARM_LDR_PC_SETS_TBIT
sl@0
   259
	// 5 words
sl@0
   260
	asm("stmfd sp!, {lr}");
sl@0
   261
	asm("ldmia r1, {r1,r2,r3,ip,lr}");
sl@0
   262
	asm("stmia r0, {r1,r2,r3,ip,lr}");
sl@0
   263
	asm("ldmfd sp!, {pc}");
sl@0
   264
sl@0
   265
	// 6 words
sl@0
   266
	asm("stmfd sp!, {r4,lr}");
sl@0
   267
	asm("ldmia r1, {r1,r2,r3,r4,ip,lr}");
sl@0
   268
	asm("stmia r0, {r1,r2,r3,r4,ip,lr}");
sl@0
   269
	asm("ldmfd sp!, {r4,pc}");
sl@0
   270
#endif
sl@0
   271
sl@0
   272
	asm("9: ");
sl@0
   273
    asm("subs r3, r0, r1 ");				// r3 = dest - source
sl@0
   274
	__JUMP(eq,lr);							// return if source = dest
sl@0
   275
    asm("stmfd sp!, {r0,r4-r11,lr} ");
sl@0
   276
	asm("cmphi r2, r3 ");					// if dest>source, compare length with dest-source
sl@0
   277
    asm("bls mem_move_fore ");				// if dest<source or length<=dest-source do forwards aligned copy
sl@0
   278
    asm("add r0, r0, r2 ");
sl@0
   279
    asm("add r1, r1, r2 ");
sl@0
   280
    asm("b mem_move_back ");				// Backwards aligned copy
sl@0
   281
    }
sl@0
   282
sl@0
   283
sl@0
   284
sl@0
   285
sl@0
   286
// See header file e32cmn.h for the in-source documentation.
sl@0
   287
extern "C" EXPORT_C __NAKED__ TAny* memmove(TAny* /*aTrg*/, const TAny* /*aSrc*/, unsigned int /*aLength*/)
sl@0
   288
	{
sl@0
   289
	KMEMMOVEHOOK
sl@0
   290
	// fall through
sl@0
   291
	}
sl@0
   292
sl@0
   293
sl@0
   294
sl@0
   295
// See header file e32cmn.h for the in-source documentation.
sl@0
   296
extern "C" EXPORT_C __NAKED__ TAny* memcpy(TAny* /*aTrg*/, const TAny* /*aSrc*/, unsigned int /*aLength*/)
sl@0
   297
    {
sl@0
   298
    KMEMCPYHOOK
sl@0
   299
//
sl@0
   300
// Check for zero length or source and target being the same
sl@0
   301
//
sl@0
   302
    asm("	cmp		r2, #0 ");				// zero length?
sl@0
   303
    asm("	subnes	r3, r0, r1 ");			// if not, r3 = dest-source
sl@0
   304
	__JUMP(eq,lr);							// if zero length or dest=source, nothing to do
sl@0
   305
	asm("	cmphi	r2, r3 ");				// if dest>source compare length to dest-source
sl@0
   306
	asm("	movhi	r3, #0 ");				// if dest>source and length>dest-source need to go backwards - set r3=0
sl@0
   307
//
sl@0
   308
//	If <16 bytes, just do byte moves
sl@0
   309
//
sl@0
   310
    asm("	cmp		r2,	#15 ");
sl@0
   311
	asm("	bhi		main_copy ");
sl@0
   312
sl@0
   313
	asm("	ldrb	r12, [r0] ");			// read dest so it's in cache - avoid lots of single accesses to external memory
sl@0
   314
	asm("	sub		r12, r0, #1 ");
sl@0
   315
	asm("	ldrb	r12, [r12, r2] ");		// read dest+length-1
sl@0
   316
	asm("	cmp		r3, #0 ");
sl@0
   317
	asm("	beq		small_copy_back ");		// r3=0 means go backwards
sl@0
   318
sl@0
   319
	asm("small_copy_fwd: ");
sl@0
   320
	asm("	mov		r3, r0 ");
sl@0
   321
	asm("	adr		r12, small_copy_fwd_end ");
sl@0
   322
	asm("	sub		pc, r12, r2, lsl #3 ");
sl@0
   323
sl@0
   324
	asm("	ldrb	r12, [r1], #1 ");
sl@0
   325
	asm("	strb	r12, [r3], #1 ");
sl@0
   326
	asm("	ldrb	r12, [r1], #1 ");
sl@0
   327
	asm("	strb	r12, [r3], #1 ");
sl@0
   328
	asm("	ldrb	r12, [r1], #1 ");
sl@0
   329
	asm("	strb	r12, [r3], #1 ");
sl@0
   330
	asm("	ldrb	r12, [r1], #1 ");
sl@0
   331
	asm("	strb	r12, [r3], #1 ");
sl@0
   332
	asm("	ldrb	r12, [r1], #1 ");
sl@0
   333
	asm("	strb	r12, [r3], #1 ");
sl@0
   334
	asm("	ldrb	r12, [r1], #1 ");
sl@0
   335
	asm("	strb	r12, [r3], #1 ");
sl@0
   336
	asm("	ldrb	r12, [r1], #1 ");
sl@0
   337
	asm("	strb	r12, [r3], #1 ");
sl@0
   338
	asm("	ldrb	r12, [r1], #1 ");
sl@0
   339
	asm("	strb	r12, [r3], #1 ");
sl@0
   340
	asm("	ldrb	r12, [r1], #1 ");
sl@0
   341
	asm("	strb	r12, [r3], #1 ");
sl@0
   342
	asm("	ldrb	r12, [r1], #1 ");
sl@0
   343
	asm("	strb	r12, [r3], #1 ");
sl@0
   344
	asm("	ldrb	r12, [r1], #1 ");
sl@0
   345
	asm("	strb	r12, [r3], #1 ");
sl@0
   346
	asm("	ldrb	r12, [r1], #1 ");
sl@0
   347
	asm("	strb	r12, [r3], #1 ");
sl@0
   348
	asm("	ldrb	r12, [r1], #1 ");
sl@0
   349
	asm("	strb	r12, [r3], #1 ");
sl@0
   350
	asm("	ldrb	r12, [r1], #1 ");
sl@0
   351
	asm("	strb	r12, [r3], #1 ");
sl@0
   352
	asm("	ldrb	r12, [r1], #1 ");
sl@0
   353
	asm("	strb	r12, [r3], #1 ");
sl@0
   354
	asm("small_copy_fwd_end: ");
sl@0
   355
	__JUMP(,lr);
sl@0
   356
sl@0
   357
	asm("small_copy_back: ");
sl@0
   358
	asm("	add		r3, r0, r2 ");
sl@0
   359
	asm("	add		r1, r1, r2 ");
sl@0
   360
	asm("	adr		r12, small_copy_back_end ");
sl@0
   361
	asm("	sub		pc, r12, r2, lsl #3 ");
sl@0
   362
sl@0
   363
	asm("	ldrb	r12, [r1, #-1]! ");
sl@0
   364
	asm("	strb	r12, [r3, #-1]! ");
sl@0
   365
	asm("	ldrb	r12, [r1, #-1]! ");
sl@0
   366
	asm("	strb	r12, [r3, #-1]! ");
sl@0
   367
	asm("	ldrb	r12, [r1, #-1]! ");
sl@0
   368
	asm("	strb	r12, [r3, #-1]! ");
sl@0
   369
	asm("	ldrb	r12, [r1, #-1]! ");
sl@0
   370
	asm("	strb	r12, [r3, #-1]! ");
sl@0
   371
	asm("	ldrb	r12, [r1, #-1]! ");
sl@0
   372
	asm("	strb	r12, [r3, #-1]! ");
sl@0
   373
	asm("	ldrb	r12, [r1, #-1]! ");
sl@0
   374
	asm("	strb	r12, [r3, #-1]! ");
sl@0
   375
	asm("	ldrb	r12, [r1, #-1]! ");
sl@0
   376
	asm("	strb	r12, [r3, #-1]! ");
sl@0
   377
	asm("	ldrb	r12, [r1, #-1]! ");
sl@0
   378
	asm("	strb	r12, [r3, #-1]! ");
sl@0
   379
	asm("	ldrb	r12, [r1, #-1]! ");
sl@0
   380
	asm("	strb	r12, [r3, #-1]! ");
sl@0
   381
	asm("	ldrb	r12, [r1, #-1]! ");
sl@0
   382
	asm("	strb	r12, [r3, #-1]! ");
sl@0
   383
	asm("	ldrb	r12, [r1, #-1]! ");
sl@0
   384
	asm("	strb	r12, [r3, #-1]! ");
sl@0
   385
	asm("	ldrb	r12, [r1, #-1]! ");
sl@0
   386
	asm("	strb	r12, [r3, #-1]! ");
sl@0
   387
	asm("	ldrb	r12, [r1, #-1]! ");
sl@0
   388
	asm("	strb	r12, [r3, #-1]! ");
sl@0
   389
	asm("	ldrb	r12, [r1, #-1]! ");
sl@0
   390
	asm("	strb	r12, [r3, #-1]! ");
sl@0
   391
	asm("	ldrb	r12, [r1, #-1]! ");
sl@0
   392
	asm("	strb	r12, [r3, #-1]! ");
sl@0
   393
	asm("small_copy_back_end: ");
sl@0
   394
	__JUMP(,lr);
sl@0
   395
sl@0
   396
	
sl@0
   397
	asm("main_copy: ");
sl@0
   398
	PLD(1);											// preload first two cache lines
sl@0
   399
	PLD_ioff(1, 32);
sl@0
   400
	asm("	stmfd	sp!, {r0,r4-r11,lr} ");			// r0 == dest, r1 == src, r2 == len
sl@0
   401
	asm("	cmp		r3, #0 ");
sl@0
   402
	asm("	beq		copy_back ");					// we must go backwards
sl@0
   403
    asm("   movs	r3, r0, lsl #30 ");				// check destination word aligned
sl@0
   404
	asm("   bne		dest_unaligned_fore ");
sl@0
   405
sl@0
   406
//
sl@0
   407
// Normal copy forwards. r0 should point to end address on exit
sl@0
   408
// Destination now word-aligned; if source is also word-aligned, do aligned copy.
sl@0
   409
//	
sl@0
   410
	asm("dest_aligned_fore: ");
sl@0
   411
    asm("   ands	r12, r1, #3 ");		// r12=alignment of source
sl@0
   412
    asm("   bne		copy_fwd_nonaligned ");
sl@0
   413
sl@0
   414
//
sl@0
   415
// We are now word aligned, at least 13 bytes to do
sl@0
   416
//
sl@0
   417
	
sl@0
   418
    asm("mem_move_fore:");
sl@0
   419
//
sl@0
   420
// superalign
sl@0
   421
//
sl@0
   422
    asm("	movs	r4, r0, lsl #27 ");		 		 		// destination alignment into r4
sl@0
   423
	asm("	beq		f_al_already_aligned ");				// fast path
sl@0
   424
	asm("	rsb		r4, r4, #0 ");							// bytes required to align destination to 32
sl@0
   425
	asm("	cmp		r2, r4, lsr #27 ");						// check that many remaining
sl@0
   426
	asm("	blo		its_smaller_fore ");					// if too short, just stick with word alignment
sl@0
   427
	asm("	msr		cpsr_flg, r4 ");		 		 		// destination alignment into N, Z, C flags
sl@0
   428
															// do word moves to align destination
sl@0
   429
	asm("	ldrcs	lr, [r1], #4 ");						// C flag == 1 word (we are already word aligned)
sl@0
   430
	asm("	ldmeqia	r1!, {r3,r9} ");						// Z flag == 2 words
sl@0
   431
	asm("	ldmmiia	r1!, {r5-r8} ");						// N flag == 4 words, destination now 32 byte aligned
sl@0
   432
	asm("	sub		r2, r2, r4, lsr #27 ");		 			// adjust length
sl@0
   433
	asm("	strcs	lr, [r0], #4 ");						// destination now 8 byte aligned
sl@0
   434
	asm("	stmeqia	r0!, {r3,r9} ");						// destination now 16 byte aligned
sl@0
   435
	asm("	stmmiia	r0!, {r5-r8} ");						// destination now 32 byte aligned	
sl@0
   436
sl@0
   437
	asm("f_al_already_aligned: ");
sl@0
   438
	asm("	cmp		r2, #64 ");
sl@0
   439
	asm("	bhs		large_copy_fore ");
sl@0
   440
//
sl@0
   441
// Less than 64 bytes to go...
sl@0
   442
//	
sl@0
   443
    asm("its_smaller_fore:");
sl@0
   444
    asm("	movs	ip, r2, lsl #26 ");		// length bits 5, 4, 3, 2 into N, Z, C, V
sl@0
   445
	asm("	beq		mem_copy_end ");		// skip if remaining length zero
sl@0
   446
    asm("	msr		cpsr_flg, ip ");
sl@0
   447
    asm("	ldmmiia	r1!, {r3-r10} ");
sl@0
   448
    asm("	stmmiia	r0!, {r3-r10} ");		// copy 32	
sl@0
   449
    asm("	ldmeqia	r1!, {r3-r6} ");
sl@0
   450
    asm("	ldmcsia	r1!, {r7-r8} ");
sl@0
   451
    asm("	ldrvs	r9, [r1], #4 ");
sl@0
   452
    asm("	stmeqia	r0!, {r3-r6} ");		// copy 16
sl@0
   453
    asm("	stmcsia	r0!, {r7-r8} ");		// copy 8
sl@0
   454
    asm("	strvs	r9, [r0], #4 ");		// copy 4
sl@0
   455
sl@0
   456
    asm("	movs	ip, r2, lsl #30 ");	
sl@0
   457
	asm("	bne		smallest_copy_fore ");
sl@0
   458
	
sl@0
   459
	asm("mem_copy_end: ");
sl@0
   460
	__POPRET("r0,r4-r11,");
sl@0
   461
sl@0
   462
	
sl@0
   463
//
sl@0
   464
// Less than 4 bytes to go...
sl@0
   465
//
sl@0
   466
	
sl@0
   467
	asm("smallest_copy_fore: ");
sl@0
   468
    asm("	msr		cpsr_flg, ip ");
sl@0
   469
    asm("	ldrmih	r3, [r1], #2 ");
sl@0
   470
    asm("	ldreqb	r4, [r1], #1 ");
sl@0
   471
    asm("	strmih	r3, [r0], #2 ");		// copy 2
sl@0
   472
    asm("	streqb	r4, [r0], #1 ");		// copy 1
sl@0
   473
	__POPRET("r0,r4-r11,");
sl@0
   474
sl@0
   475
	
sl@0
   476
//
sl@0
   477
// Do byte moves if necessary to word-align destination
sl@0
   478
//
sl@0
   479
	asm("dest_unaligned_fore: ");
sl@0
   480
	asm("	rsb		r3, r3, #0 ");
sl@0
   481
	asm("	msr		cpsr_flg, r3 ");
sl@0
   482
	asm("	ldrmib	r4, [r1], #1 ");				// move bytes to align destination
sl@0
   483
	asm("	ldrmib	r5, [r1], #1 ");
sl@0
   484
	asm("	ldreqb	r6, [r1], #1 ");
sl@0
   485
	asm("	sub		r2, r2, r3, lsr #30 ");			// adjust length, at least 13 bytes remaining
sl@0
   486
	asm("	strmib	r4, [r0], #1 ");
sl@0
   487
	asm("	strmib	r5, [r0], #1 ");
sl@0
   488
	asm("	streqb	r6, [r0], #1 ");
sl@0
   489
	asm("   b		dest_aligned_fore ");
sl@0
   490
sl@0
   491
	
sl@0
   492
//
sl@0
   493
//	Large copy, length >= 64
sl@0
   494
//
sl@0
   495
	
sl@0
   496
	asm("large_copy_fore: ");
sl@0
   497
	asm("	movs	ip, r2, lsr #6 ");						// ip = number of 64 blocks to copy
sl@0
   498
	asm("1: ");
sl@0
   499
	PLD_ioff(1, 32);
sl@0
   500
	PLD_ioff(1, 64);
sl@0
   501
    asm("	ldmia	r1!, {r3-r10} ");		// Copy 64
sl@0
   502
    asm("	stmia	r0!, {r3-r10} "); 
sl@0
   503
    asm("	ldmia	r1!, {r3-r10} ");
sl@0
   504
    asm("	subs	ip, ip, #1 ");
sl@0
   505
    asm("	stmia	r0!, {r3-r10} "); 	
sl@0
   506
	asm("	bne		1b ");		
sl@0
   507
	asm("	and		r2, r2, #63 ");
sl@0
   508
	asm("	b		its_smaller_fore ");
sl@0
   509
sl@0
   510
	
sl@0
   511
//
sl@0
   512
// Forward unlaigned copy
sl@0
   513
//	
sl@0
   514
	
sl@0
   515
	asm("copy_fwd_nonaligned:");
sl@0
   516
//
sl@0
   517
// superalign
sl@0
   518
//	
sl@0
   519
	asm("	bic		r1, r1, #3 ");					// align source
sl@0
   520
	asm("	ldr		r11, [r1], #4 ");				// get first word
sl@0
   521
	asm("	mov		r12, r12, lsl #3 ");			// r12 = 8*source alignment
sl@0
   522
	asm("	ands	r4, r0, #31 ");					// destination alignment into r4
sl@0
   523
	asm("	beq		medium_unal_copy ");			// skip if already aligned
sl@0
   524
	asm("	rsb		r4, r4, #32 ");					// r4 = bytes to align dest to 32
sl@0
   525
	asm("	cmp		r2, r4 ");						// check if length big enough to align to 32
sl@0
   526
	asm("	blo		copy_fwd_remainder ");			// skip if too small
sl@0
   527
	asm("	sub		r2, r2, r4 ");					// adjust length
sl@0
   528
	asm("	rsb		r3, r12, #32 ");				// r3 = 32 - 8*source alignment
sl@0
   529
sl@0
   530
	asm("1: ");
sl@0
   531
	asm("	mov		r5, r11, lsr r12 ");			// r5 = part of previous source word required to make destination word
sl@0
   532
	asm("	ldr		r11, [r1], #4 ");				// get next word
sl@0
   533
	asm("	subs	r4, r4, #4 ");					// 4 bytes less to do
sl@0
   534
	asm("	orr		r5, r5, r11, lsl r3 ");			// form next destination word
sl@0
   535
	asm("	str		r5, [r0], #4 ");				// and store it
sl@0
   536
	asm("	bne		1b ");							// loop until destination 32 byte aligned
sl@0
   537
sl@0
   538
	asm("medium_unal_copy: ");						// destination now aligned to 32 bytes
sl@0
   539
	asm("	movs	lr, r2, lsr #5 ");				// lr=number of 32-byte blocks
sl@0
   540
	asm("	beq		copy_fwd_remainder ");			// skip if length < 32
sl@0
   541
sl@0
   542
	asm("	cmp		r12, #16 ");
sl@0
   543
	asm("	beq		copy_fwd_nonaligned_2 ");		// branch if source = 2 mod 4
sl@0
   544
	asm("	bhi		copy_fwd_nonaligned_3 ");		// branch if source = 3 mod 4, else source = 1 mod 4
sl@0
   545
sl@0
   546
// source = 1 mod 4
sl@0
   547
	asm("copy_fwd_nonaligned_1: ");
sl@0
   548
	asm("	mov		r3, r11, lsr #8 ");
sl@0
   549
	asm("	ldmia	r1!, {r4-r11} ");
sl@0
   550
	PLD_ioff(1, 32);
sl@0
   551
	asm("	subs	lr, lr, #1 ");
sl@0
   552
	asm("	orr		r3, r3, r4, lsl #24 ");
sl@0
   553
	asm("	mov		r4, r4, lsr #8 ");
sl@0
   554
	asm("	orr		r4, r4, r5, lsl #24 ");
sl@0
   555
	asm("	mov		r5, r5, lsr #8 ");
sl@0
   556
	asm("	orr		r5, r5, r6, lsl #24 ");
sl@0
   557
	asm("	mov		r6, r6, lsr #8 ");
sl@0
   558
	asm("	orr		r6, r6, r7, lsl #24 ");
sl@0
   559
	asm("	mov		r7, r7, lsr #8 ");
sl@0
   560
	asm("	orr		r7, r7, r8, lsl #24 ");
sl@0
   561
	asm("	mov		r8, r8, lsr #8 ");
sl@0
   562
	asm("	orr		r8, r8, r9, lsl #24 ");
sl@0
   563
	asm("	mov		r9, r9, lsr #8 ");
sl@0
   564
	asm("	orr		r9, r9, r10, lsl #24 ");
sl@0
   565
	asm("	mov		r10, r10, lsr #8 ");
sl@0
   566
	asm("	orr		r10, r10, r11, lsl #24 ");
sl@0
   567
	asm("	stmia	r0!, {r3-r10} ");
sl@0
   568
	asm("	bne		copy_fwd_nonaligned_1 ");
sl@0
   569
	asm("	b		copy_fwd_remainder ");
sl@0
   570
sl@0
   571
// source = 2 mod 4
sl@0
   572
	asm("copy_fwd_nonaligned_2: ");
sl@0
   573
	asm("	mov		r3, r11, lsr #16 ");
sl@0
   574
	asm("	ldmia	r1!, {r4-r11} ");
sl@0
   575
	PLD_ioff(1, 32);
sl@0
   576
	asm("	subs	lr, lr, #1 ");
sl@0
   577
	asm("	orr		r3, r3, r4, lsl #16 ");
sl@0
   578
	asm("	mov		r4, r4, lsr #16 ");
sl@0
   579
	asm("	orr		r4, r4, r5, lsl #16 ");
sl@0
   580
	asm("	mov		r5, r5, lsr #16 ");
sl@0
   581
	asm("	orr		r5, r5, r6, lsl #16 ");
sl@0
   582
	asm("	mov		r6, r6, lsr #16 ");
sl@0
   583
	asm("	orr		r6, r6, r7, lsl #16 ");
sl@0
   584
	asm("	mov		r7, r7, lsr #16 ");
sl@0
   585
	asm("	orr		r7, r7, r8, lsl #16 ");
sl@0
   586
	asm("	mov		r8, r8, lsr #16 ");
sl@0
   587
	asm("	orr		r8, r8, r9, lsl #16 ");
sl@0
   588
	asm("	mov		r9, r9, lsr #16 ");
sl@0
   589
	asm("	orr		r9, r9, r10, lsl #16 ");
sl@0
   590
	asm("	mov		r10, r10, lsr #16 ");
sl@0
   591
	asm("	orr		r10, r10, r11, lsl #16 ");
sl@0
   592
	asm("	stmia	r0!, {r3-r10} ");
sl@0
   593
	asm("	bne		copy_fwd_nonaligned_2 ");
sl@0
   594
	asm("	b		copy_fwd_remainder ");
sl@0
   595
sl@0
   596
// source = 3 mod 4
sl@0
   597
	asm("copy_fwd_nonaligned_3: ");
sl@0
   598
	asm("	mov		r3, r11, lsr #24 ");
sl@0
   599
	asm("	ldmia	r1!, {r4-r11} ");
sl@0
   600
	PLD_ioff(1, 32);
sl@0
   601
	asm("	subs	lr, lr, #1 ");
sl@0
   602
	asm("	orr		r3, r3, r4, lsl #8 ");
sl@0
   603
	asm("	mov		r4, r4, lsr #24 ");
sl@0
   604
	asm("	orr		r4, r4, r5, lsl #8 ");
sl@0
   605
	asm("	mov		r5, r5, lsr #24 ");
sl@0
   606
	asm("	orr		r5, r5, r6, lsl #8 ");
sl@0
   607
	asm("	mov		r6, r6, lsr #24 ");
sl@0
   608
	asm("	orr		r6, r6, r7, lsl #8 ");
sl@0
   609
	asm("	mov		r7, r7, lsr #24 ");
sl@0
   610
	asm("	orr		r7, r7, r8, lsl #8 ");
sl@0
   611
	asm("	mov		r8, r8, lsr #24 ");
sl@0
   612
	asm("	orr		r8, r8, r9, lsl #8 ");
sl@0
   613
	asm("	mov		r9, r9, lsr #24 ");
sl@0
   614
	asm("	orr		r9, r9, r10, lsl #8 ");
sl@0
   615
	asm("	mov		r10, r10, lsr #24 ");
sl@0
   616
	asm("	orr		r10, r10, r11, lsl #8 ");
sl@0
   617
	asm("	stmia	r0!, {r3-r10} ");
sl@0
   618
	asm("	bne		copy_fwd_nonaligned_3 ");
sl@0
   619
sl@0
   620
// <32 bytes to go, source alignment could be 1, 2 or 3 mod 4
sl@0
   621
// r12 = 8 * (source mod 4)
sl@0
   622
	asm("copy_fwd_remainder: ");
sl@0
   623
	asm("	ands	r4, r2, #0x1c ");			// r4 = 4*number of words left
sl@0
   624
	asm("	beq		2f ");						// skip if none
sl@0
   625
	asm("	rsb		r3, r12, #32 ");			// r3 = 32 - 8*source alignment
sl@0
   626
sl@0
   627
	asm("1: ");
sl@0
   628
	asm("	mov		r5, r11, lsr r12 ");		// r5 = part of previous source word required to make destination word
sl@0
   629
	asm("	ldr		r11, [r1], #4 ");			// get next word
sl@0
   630
	asm("	subs	r4, r4, #4 ");				// 4 bytes less to do
sl@0
   631
	asm("	orr		r5, r5, r11, lsl r3 ");		// form next destination word
sl@0
   632
	asm("	str		r5, [r0], #4 ");			// and store it
sl@0
   633
	asm("	bne		1b ");						// loop until destination 32 byte aligned
sl@0
   634
sl@0
   635
	asm("2: ");
sl@0
   636
	asm("	sub		r1, r1, #4 ");
sl@0
   637
	asm("	add		r1, r1, r12, lsr #3 ");		// r1 = real unaligned source address
sl@0
   638
	asm("	tst		r2, #2 ");					// 2 bytes left?
sl@0
   639
	asm("	ldrneb	r5, [r1], #1 ");			// copy 2
sl@0
   640
	asm("	strneb	r5, [r0], #1 ");
sl@0
   641
	asm("	ldrneb	r5, [r1], #1 ");
sl@0
   642
	asm("	strneb	r5, [r0], #1 ");
sl@0
   643
	asm("	tst		r2, #1 ");					// 1 byte left?
sl@0
   644
	asm("	ldrneb	r5, [r1], #1 ");			// copy 1
sl@0
   645
	asm("	strneb	r5, [r0], #1 ");
sl@0
   646
	__POPRET("r0,r4-r11,");
sl@0
   647
sl@0
   648
	
sl@0
   649
//
sl@0
   650
// Source is before destination and they overlap, so need to copy backwards
sl@0
   651
//
sl@0
   652
	
sl@0
   653
    asm("copy_back:");
sl@0
   654
	asm("	add		r0, r0, r2 ");				// r0=last dest address+1
sl@0
   655
	asm("	add		r1, r1, r2 ");				// r1=last source address+1
sl@0
   656
	PLD_noff(1, 33);							// preload last two cache lines
sl@0
   657
	PLD_noff(1, 1);
sl@0
   658
sl@0
   659
    asm("	movs	r3, r0, lsl #30 ");			// check destination word aligned
sl@0
   660
	asm("	bne		dest_unaligned_back ");
sl@0
   661
	
sl@0
   662
	asm("dest_aligned_back: ");
sl@0
   663
	asm("	ands	r12, r1, #3 ");					// r12=alignment of source
sl@0
   664
    asm("	bne		copy_back_nonaligned ");
sl@0
   665
sl@0
   666
//
sl@0
   667
// Backwards copying, addresses both word aligned, at least 13 bytes to go
sl@0
   668
//
sl@0
   669
	
sl@0
   670
    asm("mem_move_back:");
sl@0
   671
//
sl@0
   672
// superalign
sl@0
   673
// 
sl@0
   674
	asm("	movs	r4, r0, lsl #27 ");					// bytes required to align destination to 32
sl@0
   675
	asm("	beq		bal_already_aligned ");				// skip if already aligned to 32
sl@0
   676
	asm("	cmp		r2, r4, lsr #27 ");					// check that many remaining
sl@0
   677
	asm("	blo		its_smaller_back ");				// if too short, just stick with word alignment
sl@0
   678
	asm("	msr		cpsr_flg, r4 ");		 		 	// destination alignment into N, Z, C flags
sl@0
   679
														// do word moves to align destination
sl@0
   680
	asm("	ldrcs	lr, [r1, #-4]! ");					// C flag == 1 word (we are already word aligned)
sl@0
   681
	asm("	ldmeqdb	r1!, {r3,r9} ");					// Z flag == 2 words
sl@0
   682
	asm("	ldmmidb	r1!, {r5-r8} ");
sl@0
   683
	asm("	sub		r2, r2, r4, lsr #27 ");		 		// adjust length
sl@0
   684
	asm("	strcs	lr, [r0, #-4]! ");					// destination now 8 byte aligned
sl@0
   685
	asm("	stmeqdb	r0!, {r3,r9} ");					// destination now 16 byte aligned
sl@0
   686
	asm("	stmmidb	r0!, {r5-r8} ");					// N flag == 4 words, destination now 32 byte aligned
sl@0
   687
sl@0
   688
	asm("bal_already_aligned: ");
sl@0
   689
	asm("	cmp		r2, #64 ");
sl@0
   690
	asm("	bhs		large_copy_back ");
sl@0
   691
//
sl@0
   692
// Less than 64 bytes to go
sl@0
   693
//
sl@0
   694
    asm("its_smaller_back: ");
sl@0
   695
    asm("	movs	ip, r2, lsl #26 ");		// r2 = remaining length (<256) << 24
sl@0
   696
	asm("	beq		mem_copy_end2 ");		// skip if remaining length zero
sl@0
   697
    asm("	msr		cpsr_flg, ip ");
sl@0
   698
    asm("	ldmmidb	r1!, {r3-r10} ");
sl@0
   699
    asm("	stmmidb	r0!, {r3-r10} ");		// copy 32
sl@0
   700
    asm("	ldmeqdb	r1!, {r3-r6} ");
sl@0
   701
    asm("	ldmcsdb	r1!, {r7,r8} ");
sl@0
   702
    asm("   ldrvs	r9, [r1, #-4]! ");
sl@0
   703
    asm("	stmeqdb	r0!, {r3-r6} ");		// copy 16
sl@0
   704
    asm("	stmcsdb	r0!, {r7,r8} ");		// copy 8
sl@0
   705
    asm("   strvs	r9, [r0, #-4]! ");		// copy 4
sl@0
   706
	
sl@0
   707
    asm("	movs	ip, r2, lsl #30 ");
sl@0
   708
	asm("	bne		smallest_copy_back ");
sl@0
   709
sl@0
   710
	asm("mem_copy_end2: ");
sl@0
   711
	__POPRET("r0,r4-r11,");
sl@0
   712
sl@0
   713
	
sl@0
   714
//
sl@0
   715
// Less than 4 bytes to go...
sl@0
   716
//
sl@0
   717
	
sl@0
   718
	asm("smallest_copy_back: ");
sl@0
   719
    asm("	msr		cpsr_flg, ip ");
sl@0
   720
    asm("	ldrmih	r3, [r1, #-2]! ");
sl@0
   721
    asm("	ldreqb	r4, [r1, #-1]! ");
sl@0
   722
    asm("	strmih	r3, [r0, #-2]! ");		// copy 2
sl@0
   723
    asm("	streqb	r4, [r0, #-1]! ");		// copy 1
sl@0
   724
	__POPRET("r0,r4-r11,");
sl@0
   725
	
sl@0
   726
sl@0
   727
//
sl@0
   728
// Do byte moves if necessary to word-align destination
sl@0
   729
//
sl@0
   730
	asm("dest_unaligned_back: ");
sl@0
   731
	asm("	msr		cpsr_flg, r3 ");				// destination alignment in r3 into N,Z flags
sl@0
   732
	asm("	ldrmib	r4, [r1, #-1]! ");				// do byte moves to align destination
sl@0
   733
	asm("	ldrmib	r5, [r1, #-1]! ");
sl@0
   734
	asm("	ldreqb	r6, [r1, #-1]! ");
sl@0
   735
	asm("	sub		r2, r2, r3, lsr #30 ");			// adjust length, at least 13 bytes remaining
sl@0
   736
	asm("	strmib	r4, [r0, #-1]! ");
sl@0
   737
	asm("	strmib	r5, [r0, #-1]! ");
sl@0
   738
	asm("	streqb	r6, [r0, #-1]! ");
sl@0
   739
	asm("	b		dest_aligned_back ");
sl@0
   740
sl@0
   741
sl@0
   742
//
sl@0
   743
//	Large backwards copy, length >= 64
sl@0
   744
//	
sl@0
   745
sl@0
   746
	asm("large_copy_back: ");
sl@0
   747
    asm("	movs	ip, r2, lsr #6 ");
sl@0
   748
	asm("1: ");
sl@0
   749
	PLD_noff(1, 65);
sl@0
   750
	PLD_noff(1, 33);
sl@0
   751
    asm("	ldmdb	r1!, {r3-r10} ");		// Copy 64
sl@0
   752
    asm("	stmdb	r0!, {r3-r10} "); 
sl@0
   753
    asm("	ldmdb	r1!, {r3-r10} ");
sl@0
   754
    asm("	subs	ip, ip, #1 ");
sl@0
   755
    asm("	stmdb	r0!, {r3-r10} "); 
sl@0
   756
	asm("	bne		1b ");		
sl@0
   757
	asm("	and		r2, r2, #63 ");
sl@0
   758
	asm("	b		its_smaller_back ");
sl@0
   759
sl@0
   760
//
sl@0
   761
// Backwards unlaigned copy
sl@0
   762
//	
sl@0
   763
sl@0
   764
	asm("copy_back_nonaligned: ");
sl@0
   765
//
sl@0
   766
// superalign
sl@0
   767
//
sl@0
   768
	asm("	bic		r1, r1, #3 ");					// align source
sl@0
   769
	asm("	ldr		r3, [r1] ");					// get first word
sl@0
   770
	asm("	mov		r12, r12, lsl #3 ");			// r12 = 8*source alignment
sl@0
   771
	asm("	ands	r4, r0, #31 ");					// r4 = bytes to align dest to 32
sl@0
   772
	asm("	beq		bunal_already_aligned ");		// skip if already aligned
sl@0
   773
	asm("	cmp		r2, r4 ");						// check if length big enough to align to 32
sl@0
   774
	asm("	blo		copy_back_remainder ");			// skip if too small
sl@0
   775
	asm("	sub		r2, r2, r4 ");					// adjust length
sl@0
   776
	asm("	rsb		r6, r12, #32 ");				// r6 = 32 - 8*source alignment
sl@0
   777
sl@0
   778
	asm("1: ");
sl@0
   779
	asm("	mov		r5, r3, lsl r6 ");				// r5 = part of previous source word required to make destination word
sl@0
   780
	asm("	ldr		r3, [r1, #-4]! ");				// get next word
sl@0
   781
	asm("	subs	r4, r4, #4 ");					// 4 bytes less to do
sl@0
   782
	asm("	orr		r5, r5, r3, lsr r12 ");			// form next destination word
sl@0
   783
	asm("	str		r5, [r0, #-4]! ");				// and store it
sl@0
   784
	asm("	bne		1b ");							// loop until destination 32 byte aligned
sl@0
   785
sl@0
   786
	asm("bunal_already_aligned: ");					// destination now aligned to 32 bytes
sl@0
   787
	asm("	movs	lr, r2, lsr #5 ");				// lr=number of 32-byte blocks
sl@0
   788
	asm("	beq		copy_back_remainder ");			// skip if length < 32
sl@0
   789
sl@0
   790
	asm("	cmp		r12, #16 ");
sl@0
   791
	asm("	beq		copy_back_nonaligned_2 ");		// branch if source = 2 mod 4
sl@0
   792
	asm("	bhi		copy_back_nonaligned_3 ");		// branch if source = 3 mod 4, else source = 1 mod 4
sl@0
   793
sl@0
   794
// source = 1 mod 4
sl@0
   795
	asm("copy_back_nonaligned_1: ");
sl@0
   796
	asm("	mov		r11, r3, lsl #24 ");
sl@0
   797
	asm("	ldmdb	r1!, {r3-r10} ");
sl@0
   798
	PLD_noff(1, 64);
sl@0
   799
	asm("	orr		r11, r11, r10, lsr #8 ");
sl@0
   800
	asm("	mov		r10, r10, lsl #24 ");
sl@0
   801
	asm("	orr		r10, r10, r9, lsr #8 ");
sl@0
   802
	asm("	mov		r9, r9, lsl #24 ");
sl@0
   803
	asm("	orr		r9, r9, r8, lsr #8 ");
sl@0
   804
	asm("	mov		r8, r8, lsl #24 ");
sl@0
   805
	asm("	orr		r8, r8, r7, lsr #8 ");
sl@0
   806
	asm("	mov		r7, r7, lsl #24 ");
sl@0
   807
	asm("	orr		r7, r7, r6, lsr #8 ");
sl@0
   808
	asm("	mov		r6, r6, lsl #24 ");
sl@0
   809
	asm("	orr		r6, r6, r5, lsr #8 ");
sl@0
   810
	asm("	mov		r5, r5, lsl #24 ");
sl@0
   811
	asm("	orr		r5, r5, r4, lsr #8 ");
sl@0
   812
	asm("	mov		r4, r4, lsl #24 ");
sl@0
   813
	asm("	orr		r4, r4, r3, lsr #8 ");
sl@0
   814
	asm("	stmdb	r0!, {r4-r11} ");
sl@0
   815
	asm("	subs	lr, lr, #1 ");
sl@0
   816
	asm("	bne		copy_back_nonaligned_1 ");
sl@0
   817
	asm("	b		copy_back_remainder ");
sl@0
   818
sl@0
   819
// source = 2 mod 4
sl@0
   820
	asm("copy_back_nonaligned_2: ");
sl@0
   821
	asm("	mov		r11, r3, lsl #16 ");
sl@0
   822
	asm("	ldmdb	r1!, {r3-r10} ");
sl@0
   823
	PLD_noff(1, 64);
sl@0
   824
	asm("	orr		r11, r11, r10, lsr #16 ");
sl@0
   825
	asm("	mov		r10, r10, lsl #16 ");
sl@0
   826
	asm("	orr		r10, r10, r9, lsr #16 ");
sl@0
   827
	asm("	mov		r9, r9, lsl #16 ");
sl@0
   828
	asm("	orr		r9, r9, r8, lsr #16 ");
sl@0
   829
	asm("	mov		r8, r8, lsl #16 ");
sl@0
   830
	asm("	orr		r8, r8, r7, lsr #16 ");
sl@0
   831
	asm("	mov		r7, r7, lsl #16 ");
sl@0
   832
	asm("	orr		r7, r7, r6, lsr #16 ");
sl@0
   833
	asm("	mov		r6, r6, lsl #16 ");
sl@0
   834
	asm("	orr		r6, r6, r5, lsr #16 ");
sl@0
   835
	asm("	mov		r5, r5, lsl #16 ");
sl@0
   836
	asm("	orr		r5, r5, r4, lsr #16 ");
sl@0
   837
	asm("	mov		r4, r4, lsl #16 ");
sl@0
   838
	asm("	orr		r4, r4, r3, lsr #16 ");
sl@0
   839
	asm("	stmdb	r0!, {r4-r11} ");
sl@0
   840
	asm("	subs	lr, lr, #1 ");
sl@0
   841
	asm("	bne		copy_back_nonaligned_2 ");
sl@0
   842
	asm("	b		copy_back_remainder ");
sl@0
   843
sl@0
   844
// source = 3 mod 4
sl@0
   845
	asm("copy_back_nonaligned_3: ");
sl@0
   846
	asm("	mov		r11, r3, lsl #8 ");
sl@0
   847
	asm("	ldmdb	r1!, {r3-r10} ");
sl@0
   848
	PLD_noff(1, 64);
sl@0
   849
	asm("	orr		r11, r11, r10, lsr #24 ");
sl@0
   850
	asm("	mov		r10, r10, lsl #8 ");
sl@0
   851
	asm("	orr		r10, r10, r9, lsr #24 ");
sl@0
   852
	asm("	mov		r9, r9, lsl #8 ");
sl@0
   853
	asm("	orr		r9, r9, r8, lsr #24 ");
sl@0
   854
	asm("	mov		r8, r8, lsl #8 ");
sl@0
   855
	asm("	orr		r8, r8, r7, lsr #24 ");
sl@0
   856
	asm("	mov		r7, r7, lsl #8 ");
sl@0
   857
	asm("	orr		r7, r7, r6, lsr #24 ");
sl@0
   858
	asm("	mov		r6, r6, lsl #8 ");
sl@0
   859
	asm("	orr		r6, r6, r5, lsr #24 ");
sl@0
   860
	asm("	mov		r5, r5, lsl #8 ");
sl@0
   861
	asm("	orr		r5, r5, r4, lsr #24 ");
sl@0
   862
	asm("	mov		r4, r4, lsl #8 ");
sl@0
   863
	asm("	orr		r4, r4, r3, lsr #24 ");
sl@0
   864
	asm("	stmdb	r0!, {r4-r11} ");
sl@0
   865
	asm("	subs	lr, lr, #1 ");
sl@0
   866
	asm("	bne		copy_back_nonaligned_3 ");
sl@0
   867
sl@0
   868
// <32 bytes to go, source alignment could be 1, 2 or 3 mod 4
sl@0
   869
// r12 = 8 * (source mod 4)
sl@0
   870
	asm("copy_back_remainder: ");
sl@0
   871
	asm("	ands	r4, r2, #0x1c ");			// r4 = 4*number of words left
sl@0
   872
	asm("	beq		2f ");						// skip if none
sl@0
   873
	asm("	rsb		r6, r12, #32 ");			// r6 = 32 - 8*source alignment
sl@0
   874
sl@0
   875
	asm("1: ");
sl@0
   876
	asm("	mov		r5, r3, lsl r6 ");			// r5 = part of previous source word required to make destination word
sl@0
   877
	asm("	ldr		r3, [r1, #-4]! ");			// get next word
sl@0
   878
	asm("	subs	r4, r4, #4 ");				// 4 bytes less to do
sl@0
   879
	asm("	orr		r5, r5, r3, lsr r12 ");		// form next destination word
sl@0
   880
	asm("	str		r5, [r0, #-4]! ");			// and store it
sl@0
   881
	asm("	bne		1b ");						// loop until destination 32 byte aligned
sl@0
   882
sl@0
   883
	asm("2: ");
sl@0
   884
	asm("	add		r1, r1, r12, lsr #3 ");		// r1 = real unaligned source address
sl@0
   885
	asm("	tst		r2, #2 ");					// 2 bytes left?
sl@0
   886
	asm("	ldrneb	r3, [r1, #-1]! ");			// copy 2
sl@0
   887
	asm("	strneb	r3, [r0, #-1]! ");
sl@0
   888
	asm("	ldrneb	r3, [r1, #-1]! ");
sl@0
   889
	asm("	strneb	r3, [r0, #-1]! ");
sl@0
   890
	asm("	tst		r2, #1 ");					// 1 byte left?
sl@0
   891
	asm("	ldrneb	r3, [r1, #-1]! ");			// copy 1
sl@0
   892
	asm("	strneb	r3, [r0, #-1]! ");
sl@0
   893
	__POPRET("r0,r4-r11,");
sl@0
   894
    }
sl@0
   895
sl@0
   896
#endif  // USE_REPLACEMENT_MEMCPY
sl@0
   897
sl@0
   898
sl@0
   899
#ifndef __KERNEL_MODE__
sl@0
   900
#ifdef __GCC32__ 
sl@0
   901
/**
sl@0
   902
Compares a block of data at one specified location with a block of data at 
sl@0
   903
another specified location.
sl@0
   904
sl@0
   905
The comparison proceeds on a byte for byte basis, the result of the comparison 
sl@0
   906
is based on the difference of the first bytes to disagree.
sl@0
   907
sl@0
   908
The data at the two locations are equal if they have the same length and content. 
sl@0
   909
Where the lengths are different and the shorter section of data is the same 
sl@0
   910
as the first part of the longer section of data, the shorter is considered 
sl@0
   911
to be less than the longer.
sl@0
   912
sl@0
   913
@param aLeft   A pointer to the first (or left) block of 8 bit data
sl@0
   914
               to be compared.
sl@0
   915
@param aLeftL  The length of the first (or left) block of data to be compared,  
sl@0
   916
               i.e. the number of bytes.
sl@0
   917
@param aRight  A pointer to the second (or right) block of 8 bit data to be 
sl@0
   918
               compared.
sl@0
   919
@param aRightL The length of the second (or right) block of data to be compared 
sl@0
   920
               i.e. the number of bytes.
sl@0
   921
               
sl@0
   922
@return Positive, if the first (or left) block of data is greater than the 
sl@0
   923
        second (or right) block of data.
sl@0
   924
        Negative, if the first (or left) block of data is less than the
sl@0
   925
        second (or right) block of data.
sl@0
   926
        Zero, if both the first (or left) and second (or right) blocks of data
sl@0
   927
        have the same length and the same content.
sl@0
   928
*/
sl@0
   929
EXPORT_C __NAKED__ TInt Mem::Compare(const TUint8* /*aLeft*/, TInt /*aLeftL*/, const TUint8* /*aRight*/, TInt /*aRightL*/)
sl@0
   930
	{
sl@0
   931
	// fall through
sl@0
   932
	}
sl@0
   933
#endif
sl@0
   934
#endif
sl@0
   935
sl@0
   936
sl@0
   937
sl@0
   938
// See header file e32cmn.h for the in-source documentation.
sl@0
   939
extern "C" EXPORT_C __NAKED__ TInt memcompare(const TUint8* /*aLeft*/, TInt /*aLeftL*/, const TUint8* /*aRight*/, TInt /*aRightL*/)
sl@0
   940
//
sl@0
   941
// Compares until the smaller of the two lengths is reached.
sl@0
   942
// If the lengths differ, returns leftlen-rightlen
sl@0
   943
// If a difference is encountered, returns left byte-right byte
sl@0
   944
//
sl@0
   945
    {
sl@0
   946
sl@0
   947
    asm("   stmfd    sp!,{r4,r5,r6,lr}");
sl@0
   948
    asm("   mov      r4,r0");
sl@0
   949
//
sl@0
   950
// Get the shorter of the two lengths, and check for zero length
sl@0
   951
//
sl@0
   952
    asm("   cmp      r1,r3");
sl@0
   953
    asm("   mov      r6,r1");
sl@0
   954
    asm("   movge    r6,r3");
sl@0
   955
    asm("   cmp      r6,#0");
sl@0
   956
    asm("   beq      compare_done");
sl@0
   957
    asm("   cmp      r6,#16");
sl@0
   958
//
sl@0
   959
// Check for aligned buffers for faster comparing if more than 16 bytes
sl@0
   960
//
sl@0
   961
    asm("   andge    r0,r4,#3");
sl@0
   962
    asm("   andge    r5,r2,#3");
sl@0
   963
    asm("   addlt    r0,r5,#1");
sl@0
   964
    asm("   cmp      r0,r5");
sl@0
   965
    asm("   beq      aligned_compare");
sl@0
   966
//
sl@0
   967
// Get aLeft+Min(aLeftL,aRightL)
sl@0
   968
//
sl@0
   969
    asm("   add      r6,r4,r6");
sl@0
   970
sl@0
   971
    asm("compare_loop:");
sl@0
   972
    asm("   ldrb     r0,[r4],#1");
sl@0
   973
    asm("   ldrb     r5,[r2],#1");
sl@0
   974
    asm("   subs     r0,r0,r5");
sl@0
   975
	asm("bne compare_exit ");
sl@0
   976
    asm("   cmp      r4,r6");
sl@0
   977
    asm("   beq      compare_done");
sl@0
   978
sl@0
   979
    asm("   ldrb     r0,[r4],#1");
sl@0
   980
    asm("   ldrb     r5,[r2],#1");
sl@0
   981
    asm("   subs     r0,r0,r5");
sl@0
   982
	asm("bne compare_exit ");
sl@0
   983
    asm("   cmp      r4,r6");
sl@0
   984
    asm("   beq      compare_done");
sl@0
   985
sl@0
   986
    asm("   ldrb     r0,[r4],#1");
sl@0
   987
    asm("   ldrb     r5,[r2],#1");
sl@0
   988
    asm("   subs     r0,r0,r5");
sl@0
   989
	asm("bne compare_exit ");
sl@0
   990
    asm("   cmp      r4,r6");
sl@0
   991
    asm("   beq      compare_done");
sl@0
   992
sl@0
   993
    asm("   ldrb     r0,[r4],#1");
sl@0
   994
    asm("   ldrb     r5,[r2],#1");
sl@0
   995
    asm("   subs     r0,r0,r5");
sl@0
   996
	asm("bne compare_exit ");
sl@0
   997
    asm("   cmp      r4,r6");
sl@0
   998
    asm("   bne      compare_loop");
sl@0
   999
//
sl@0
  1000
// Return difference of lengths
sl@0
  1001
//
sl@0
  1002
    asm("compare_done:");
sl@0
  1003
    asm("   sub      r0,r1,r3");
sl@0
  1004
sl@0
  1005
    asm("compare_exit:");
sl@0
  1006
	__POPRET("r4-r6,");
sl@0
  1007
//
sl@0
  1008
// Compare byte at a time until word aligned...
sl@0
  1009
//
sl@0
  1010
    asm("aligned_compare:");
sl@0
  1011
//
sl@0
  1012
// Get number of bytes to compare before word alignment reached...and jump to appropriate point
sl@0
  1013
//
sl@0
  1014
    asm("   mov      ip,r6");
sl@0
  1015
    asm("   add      r6,r4,r6");
sl@0
  1016
    asm("   subs     r0,r0,#1");
sl@0
  1017
    asm("   movmi    r0,#3");
sl@0
  1018
    asm("   rsb      r5,r0,#3");
sl@0
  1019
    asm("   sub      ip,ip,r5");
sl@0
  1020
    asm("   mov      ip,ip,lsr #2");
sl@0
  1021
	asm("   add      pc,pc,r0,asl #4");
sl@0
  1022
    asm("   b        compare_done"); // Never executed
sl@0
  1023
//
sl@0
  1024
// Jump here if alignment is 1. Do not use more than 4 instructions without altering above relative jump
sl@0
  1025
//
sl@0
  1026
    asm("   ldrb     r0,[r4],#1");
sl@0
  1027
    asm("   ldrb     r5,[r2],#1");
sl@0
  1028
    asm("   subs     r0,r0,r5");
sl@0
  1029
	asm("bne compare_exit ");
sl@0
  1030
//
sl@0
  1031
// Jump here if alignment is 2. Do not use more than 4 instructions without altering above relative jump
sl@0
  1032
//
sl@0
  1033
    asm("   ldrb     r0,[r4],#1");
sl@0
  1034
    asm("   ldrb     r5,[r2],#1");
sl@0
  1035
    asm("   subs     r0,r0,r5");
sl@0
  1036
	asm("bne compare_exit ");
sl@0
  1037
//
sl@0
  1038
// Jump here if alignment is 3. Do not use more than 4 instructions without altering above relative jump
sl@0
  1039
//
sl@0
  1040
    asm("   ldrb     r0,[r4],#1");
sl@0
  1041
    asm("   ldrb     r5,[r2],#1");
sl@0
  1042
    asm("   subs     r0,r0,r5");
sl@0
  1043
	asm("bne compare_exit ");
sl@0
  1044
//
sl@0
  1045
// Must now be word aligned
sl@0
  1046
//
sl@0
  1047
    asm("aligned_compare_loop:");
sl@0
  1048
    asm("   ldr      r0,[r4],#4");
sl@0
  1049
    asm("   ldr      r5,[r2],#4");
sl@0
  1050
    asm("   eors     r0,r0,r5");
sl@0
  1051
    asm("   bne      word_different");
sl@0
  1052
    asm("   subs     ip,ip,#1");
sl@0
  1053
    asm("   bne      aligned_compare_loop");
sl@0
  1054
//
sl@0
  1055
// Less than 4 bytes to go...
sl@0
  1056
//
sl@0
  1057
    asm("   cmp      r4,r6");
sl@0
  1058
    asm("   bne      compare_loop");
sl@0
  1059
    asm("   sub      r0,r1,r3");
sl@0
  1060
	__POPRET("r4-r6,");
sl@0
  1061
//
sl@0
  1062
// A difference encountered while word comparing, find out which byte it was
sl@0
  1063
//
sl@0
  1064
    asm("word_different:");
sl@0
  1065
    asm("   ldrb     r0,[r4,#-4]");
sl@0
  1066
    asm("   ldrb     r5,[r2,#-4]");
sl@0
  1067
    asm("   subs     r0,r0,r5");
sl@0
  1068
	asm("bne compare_exit ");
sl@0
  1069
    asm("   ldrb     r0,[r4,#-3]");
sl@0
  1070
    asm("   ldrb     r5,[r2,#-3]");
sl@0
  1071
    asm("   subs     r0,r0,r5");
sl@0
  1072
	asm("bne compare_exit ");
sl@0
  1073
    asm("   ldrb     r0,[r4,#-2]");
sl@0
  1074
    asm("   ldrb     r5,[r2,#-2]");
sl@0
  1075
    asm("   subs     r0,r0,r5");
sl@0
  1076
	asm("bne compare_exit ");
sl@0
  1077
//
sl@0
  1078
// This must be the different byte...
sl@0
  1079
//
sl@0
  1080
    asm("   ldrb     r0,[r4,#-1]");
sl@0
  1081
    asm("   ldrb     r5,[r2,#-1]");
sl@0
  1082
    asm("   sub      r0,r0,r5");
sl@0
  1083
	__POPRET("r4-r6,");
sl@0
  1084
    }
sl@0
  1085
#endif
sl@0
  1086