Update contrib.
1 // Copyright (c) 1995-2009 Nokia Corporation and/or its subsidiary(-ies).
2 // All rights reserved.
3 // This component and the accompanying materials are made available
4 // under the terms of the License "Eclipse Public License v1.0"
5 // which accompanies this distribution, and is available
6 // at the URL "http://www.eclipse.org/legal/epl-v10.html".
8 // Initial Contributors:
9 // Nokia Corporation - initial contribution.
14 // e32\common\arm\cmem.cia
18 #include "../common.h"
20 #if defined(__REPLACE_GENERIC_UTILS)
21 #include "replacement_utils.h"
24 #if defined(__MEM_MACHINE_CODED__)
26 #ifndef USE_REPLACEMENT_MEMSET
30 #ifdef __STANDALONE_NANOKERNEL__
32 #define ARM_ASSERT_MULTIPLE_OF_FOUR(rt1, panicfunc) \
33 asm("tst "#rt1", #3"); \
34 asm("ldrne "#rt1", ["#rt1"]")
36 #else // __STANDALONE_NANOKERNEL__
37 GLDEF_C void PanicEWordMoveLengthNotMultipleOf4();
38 GLDEF_C void PanicEWordMoveSourceNotAligned();
39 GLDEF_C void PanicEWordMoveTargetNotAligned();
41 #define ARM_ASSERT_MULTIPLE_OF_FOUR(rt1, panicfunc) \
42 asm("tst "#rt1", #3"); \
43 asm("bne " panicfunc )
45 #endif // __STANDALONE_NANOKERNEL__
49 #define ARM_ASSERT_MULTIPLE_OF_FOUR(rt1, panicfunc)
54 // See header file e32cmn.h for the in-source documentation.
55 extern "C" EXPORT_C __NAKED__ TAny* memclr(TAny* /*aTrg*/, unsigned int /*aLength*/)
62 // See header file e32cmn.h for the in-source documentation.
63 extern "C" EXPORT_C __NAKED__ TAny* memset(TAny* /*aTrg*/, TInt /*aValue*/, unsigned int /*aLength*/)
66 asm(" mov r3, r2 "); /* length into r3 */
67 asm(" and r2,r1,#255"); /* fill value into r2 */
68 asm(" mov r1, r3 "); /* length into r1 */
72 asm(" bls small_fill"); // only taken ~20% of the time
74 asm(" stmfd sp!,{r0,r4-r9,lr}");
75 asm(" movs r3, r0, lsl #30 "); // Check if word aligned
76 asm(" orr r2,r2,r2,lsl #8");
77 asm(" orr r2,r2,r2,lsl #16");
78 asm(" bne unaligned_fill ");
80 // Align destination address to 32 byte boundary if possible
82 asm("word_aligned_fill: ");
86 asm(" movs r3, r0, lsl #27 ");
87 asm(" beq aligned_fill ");
88 asm(" rsb r3, r3, #0 "); // calculate fill length necessary for aligment
89 asm(" cmp r1, r3, lsr #27 "); // compare with remaining length
90 asm(" blo smaller_fill "); // skip alignment if greater
91 asm(" msr cpsr_f, r3 "); // put length bits 4, 3, 2 into N, Z, C flags
92 asm(" strcs r2, [r0], #4 "); // align to 8 byte boundary
93 asm(" stmeqia r0!, {r2, r4} "); // align to 16 byte boundary
94 asm(" stmmiia r0!, {r2, r4-r6} "); // align to 32 byte boundary
95 asm(" sub r1, r1, r3, lsr #27 "); // adjust remaining length
99 asm(" bhs big_fill ");
103 asm("smaller_fill:");
104 asm(" movs r1, r1, lsl #26");
105 asm(" beq mem_fill_end ");
106 asm(" msr cpsr_flg, r1 ");
107 asm(" stmmiia r0!,{r2,r4-r6}"); // Fill 32
108 asm(" stmmiia r0!,{r2,r4-r6}");
109 asm(" stmeqia r0!,{r2,r4-r6}"); // Fill 16
110 asm(" stmcsia r0!,{r2,r4}"); // Fill 8
111 asm(" strvs r2,[r0],#4"); // Fill 4
112 asm(" movs r1, r1, lsl #4 ");
113 asm(" bne smallest_fill ");
114 asm("mem_fill_end: ");
115 __POPRET("r0,r4-r9,");
117 // Fill last 1-3 bytes
119 asm("smallest_fill: ");
120 asm(" msr cpsr_flg,r1");
121 asm(" strmih r2,[r0],#2"); // Fill 2
122 asm(" streqb r2,[r0],#1"); // Fill 1
123 __POPRET("r0,r4-r9,");
125 // Fill loop for length >= 64
132 asm(" movs ip,r1,lsr #8"); // Number of 256 byte blocks to fill
133 asm(" beq medium_fill ");
134 asm("fill_256_bytes_loop:");
135 asm(" stmia r0!,{r2-r9}"); // Fill 256 bytes
136 asm(" stmia r0!,{r2-r9}");
137 asm(" stmia r0!,{r2-r9}");
138 asm(" stmia r0!,{r2-r9}");
139 asm(" stmia r0!,{r2-r9}");
140 asm(" stmia r0!,{r2-r9}");
141 asm(" stmia r0!,{r2-r9}");
142 asm(" stmia r0!,{r2-r9}");
143 asm(" subs ip,ip,#1");
144 asm(" bne fill_256_bytes_loop");
145 asm("medium_fill: ");
146 asm(" movs ip,r1,lsl #24");
147 asm(" msr cpsr_flg,ip");
148 asm(" stmmiia r0!,{r2-r9}"); // Fill 128
149 asm(" stmmiia r0!,{r2-r9}");
150 asm(" stmmiia r0!,{r2-r9}");
151 asm(" stmmiia r0!,{r2-r9}");
152 asm(" stmeqia r0!,{r2-r9}"); // Fill 64
153 asm(" stmeqia r0!,{r2-r9}");
154 asm(" and r1, r1, #63 ");
155 asm(" b smaller_fill");
157 // Word-align destination address, length >= 8
159 asm("unaligned_fill: ");
160 asm(" rsb r3, r3, #0 "); // calculate fill length necessary for aligment
161 asm(" msr cpsr_flg, r3");
162 asm(" streqb r2, [r0], #1 "); // align to 2 byte boundary
163 asm(" strmih r2, [r0], #2 "); // align to 4 byte boundary
164 asm(" sub r1, r1, r3, lsr #30 ");
165 asm(" b word_aligned_fill ");
167 // Fill for length <= 8
170 asm(" mov r3, r0 "); /* r3=dest */
171 asm(" adr ip, small_fill_end ");
172 asm(" sub pc, ip, r1, lsl #2 ");
173 asm(" strb r2, [r3], #1");
174 asm(" strb r2, [r3], #1");
175 asm(" strb r2, [r3], #1");
176 asm(" strb r2, [r3], #1");
177 asm(" strb r2, [r3], #1");
178 asm(" strb r2, [r3], #1");
179 asm(" strb r2, [r3], #1");
180 asm(" strb r2, [r3], #1");
181 asm("small_fill_end: ");
185 // The AEABI switched the order of arg2 and arg3 to save an intruction when
186 // calling 'memset' from 'memclr'
187 asm(".global __aeabi_memset8 ");
188 asm("__aeabi_memset8: ");
189 asm(".global __aeabi_memset4 ");
190 asm("__aeabi_memset4: ");
191 asm(".global __aeabi_memset ");
192 asm("__aeabi_memset: ");
193 asm(" and r2, r2, #255");
198 #endif // USE_REPLACEMENT_MEMSET
200 #ifndef USE_REPLACEMENT_MEMCPY
202 // See header file e32cmn.h for the in-source documentation.
204 extern "C" EXPORT_C __NAKED__ TAny* wordmove(TAny* /*aTrg*/, const TAny* /*aSrc*/, unsigned int /*aLength*/)
206 // Assumes all is aligned
209 ARM_ASSERT_MULTIPLE_OF_FOUR(r0, CSM_Z30PanicEWordMoveTargetNotAlignedv);
210 ARM_ASSERT_MULTIPLE_OF_FOUR(r1, CSM_Z30PanicEWordMoveSourceNotAlignedv);
211 ARM_ASSERT_MULTIPLE_OF_FOUR(r2, CSM_Z34PanicEWordMoveLengthNotMultipleOf4v);
213 // Mask length to a multiple of four bytes to avoid memory, or register
214 // corruption by the special cases below.
217 // Length <= 24 in ~90% of cases, however can only copy > 16 bytes in 4
218 // instructions if LDM instuction restores thumb state when loading the PC.
219 #ifdef __CPU_ARM_LDR_PC_SETS_TBIT
225 asm("addls pc, pc, r2, lsl #2 "); // take branch depending on size
226 asm("b 9f "); // too big
235 asm("ldr ip, [r1] ");
236 asm("str ip, [r0] ");
241 asm("ldmia r1, {r2,r3}");
242 asm("stmia r0, {r2,r3}");
247 asm("ldmia r1, {r2,r3,ip}");
248 asm("stmia r0, {r2,r3,ip}");
253 asm("ldmia r1, {r1,r2,r3,ip}");
254 asm("stmia r0, {r1,r2,r3,ip}");
258 #ifdef __CPU_ARM_LDR_PC_SETS_TBIT
260 asm("stmfd sp!, {lr}");
261 asm("ldmia r1, {r1,r2,r3,ip,lr}");
262 asm("stmia r0, {r1,r2,r3,ip,lr}");
263 asm("ldmfd sp!, {pc}");
266 asm("stmfd sp!, {r4,lr}");
267 asm("ldmia r1, {r1,r2,r3,r4,ip,lr}");
268 asm("stmia r0, {r1,r2,r3,r4,ip,lr}");
269 asm("ldmfd sp!, {r4,pc}");
273 asm("subs r3, r0, r1 "); // r3 = dest - source
274 __JUMP(eq,lr); // return if source = dest
275 asm("stmfd sp!, {r0,r4-r11,lr} ");
276 asm("cmphi r2, r3 "); // if dest>source, compare length with dest-source
277 asm("bls mem_move_fore "); // if dest<source or length<=dest-source do forwards aligned copy
278 asm("add r0, r0, r2 ");
279 asm("add r1, r1, r2 ");
280 asm("b mem_move_back "); // Backwards aligned copy
286 // See header file e32cmn.h for the in-source documentation.
287 extern "C" EXPORT_C __NAKED__ TAny* memmove(TAny* /*aTrg*/, const TAny* /*aSrc*/, unsigned int /*aLength*/)
295 // See header file e32cmn.h for the in-source documentation.
296 extern "C" EXPORT_C __NAKED__ TAny* memcpy(TAny* /*aTrg*/, const TAny* /*aSrc*/, unsigned int /*aLength*/)
300 // Check for zero length or source and target being the same
302 asm(" cmp r2, #0 "); // zero length?
303 asm(" subnes r3, r0, r1 "); // if not, r3 = dest-source
304 __JUMP(eq,lr); // if zero length or dest=source, nothing to do
305 asm(" cmphi r2, r3 "); // if dest>source compare length to dest-source
306 asm(" movhi r3, #0 "); // if dest>source and length>dest-source need to go backwards - set r3=0
308 // If <16 bytes, just do byte moves
310 asm(" cmp r2, #15 ");
311 asm(" bhi main_copy ");
313 asm(" ldrb r12, [r0] "); // read dest so it's in cache - avoid lots of single accesses to external memory
314 asm(" sub r12, r0, #1 ");
315 asm(" ldrb r12, [r12, r2] "); // read dest+length-1
317 asm(" beq small_copy_back "); // r3=0 means go backwards
319 asm("small_copy_fwd: ");
321 asm(" adr r12, small_copy_fwd_end ");
322 asm(" sub pc, r12, r2, lsl #3 ");
324 asm(" ldrb r12, [r1], #1 ");
325 asm(" strb r12, [r3], #1 ");
326 asm(" ldrb r12, [r1], #1 ");
327 asm(" strb r12, [r3], #1 ");
328 asm(" ldrb r12, [r1], #1 ");
329 asm(" strb r12, [r3], #1 ");
330 asm(" ldrb r12, [r1], #1 ");
331 asm(" strb r12, [r3], #1 ");
332 asm(" ldrb r12, [r1], #1 ");
333 asm(" strb r12, [r3], #1 ");
334 asm(" ldrb r12, [r1], #1 ");
335 asm(" strb r12, [r3], #1 ");
336 asm(" ldrb r12, [r1], #1 ");
337 asm(" strb r12, [r3], #1 ");
338 asm(" ldrb r12, [r1], #1 ");
339 asm(" strb r12, [r3], #1 ");
340 asm(" ldrb r12, [r1], #1 ");
341 asm(" strb r12, [r3], #1 ");
342 asm(" ldrb r12, [r1], #1 ");
343 asm(" strb r12, [r3], #1 ");
344 asm(" ldrb r12, [r1], #1 ");
345 asm(" strb r12, [r3], #1 ");
346 asm(" ldrb r12, [r1], #1 ");
347 asm(" strb r12, [r3], #1 ");
348 asm(" ldrb r12, [r1], #1 ");
349 asm(" strb r12, [r3], #1 ");
350 asm(" ldrb r12, [r1], #1 ");
351 asm(" strb r12, [r3], #1 ");
352 asm(" ldrb r12, [r1], #1 ");
353 asm(" strb r12, [r3], #1 ");
354 asm("small_copy_fwd_end: ");
357 asm("small_copy_back: ");
358 asm(" add r3, r0, r2 ");
359 asm(" add r1, r1, r2 ");
360 asm(" adr r12, small_copy_back_end ");
361 asm(" sub pc, r12, r2, lsl #3 ");
363 asm(" ldrb r12, [r1, #-1]! ");
364 asm(" strb r12, [r3, #-1]! ");
365 asm(" ldrb r12, [r1, #-1]! ");
366 asm(" strb r12, [r3, #-1]! ");
367 asm(" ldrb r12, [r1, #-1]! ");
368 asm(" strb r12, [r3, #-1]! ");
369 asm(" ldrb r12, [r1, #-1]! ");
370 asm(" strb r12, [r3, #-1]! ");
371 asm(" ldrb r12, [r1, #-1]! ");
372 asm(" strb r12, [r3, #-1]! ");
373 asm(" ldrb r12, [r1, #-1]! ");
374 asm(" strb r12, [r3, #-1]! ");
375 asm(" ldrb r12, [r1, #-1]! ");
376 asm(" strb r12, [r3, #-1]! ");
377 asm(" ldrb r12, [r1, #-1]! ");
378 asm(" strb r12, [r3, #-1]! ");
379 asm(" ldrb r12, [r1, #-1]! ");
380 asm(" strb r12, [r3, #-1]! ");
381 asm(" ldrb r12, [r1, #-1]! ");
382 asm(" strb r12, [r3, #-1]! ");
383 asm(" ldrb r12, [r1, #-1]! ");
384 asm(" strb r12, [r3, #-1]! ");
385 asm(" ldrb r12, [r1, #-1]! ");
386 asm(" strb r12, [r3, #-1]! ");
387 asm(" ldrb r12, [r1, #-1]! ");
388 asm(" strb r12, [r3, #-1]! ");
389 asm(" ldrb r12, [r1, #-1]! ");
390 asm(" strb r12, [r3, #-1]! ");
391 asm(" ldrb r12, [r1, #-1]! ");
392 asm(" strb r12, [r3, #-1]! ");
393 asm("small_copy_back_end: ");
398 PLD(1); // preload first two cache lines
400 asm(" stmfd sp!, {r0,r4-r11,lr} "); // r0 == dest, r1 == src, r2 == len
402 asm(" beq copy_back "); // we must go backwards
403 asm(" movs r3, r0, lsl #30 "); // check destination word aligned
404 asm(" bne dest_unaligned_fore ");
407 // Normal copy forwards. r0 should point to end address on exit
408 // Destination now word-aligned; if source is also word-aligned, do aligned copy.
410 asm("dest_aligned_fore: ");
411 asm(" ands r12, r1, #3 "); // r12=alignment of source
412 asm(" bne copy_fwd_nonaligned ");
415 // We are now word aligned, at least 13 bytes to do
418 asm("mem_move_fore:");
422 asm(" movs r4, r0, lsl #27 "); // destination alignment into r4
423 asm(" beq f_al_already_aligned "); // fast path
424 asm(" rsb r4, r4, #0 "); // bytes required to align destination to 32
425 asm(" cmp r2, r4, lsr #27 "); // check that many remaining
426 asm(" blo its_smaller_fore "); // if too short, just stick with word alignment
427 asm(" msr cpsr_flg, r4 "); // destination alignment into N, Z, C flags
428 // do word moves to align destination
429 asm(" ldrcs lr, [r1], #4 "); // C flag == 1 word (we are already word aligned)
430 asm(" ldmeqia r1!, {r3,r9} "); // Z flag == 2 words
431 asm(" ldmmiia r1!, {r5-r8} "); // N flag == 4 words, destination now 32 byte aligned
432 asm(" sub r2, r2, r4, lsr #27 "); // adjust length
433 asm(" strcs lr, [r0], #4 "); // destination now 8 byte aligned
434 asm(" stmeqia r0!, {r3,r9} "); // destination now 16 byte aligned
435 asm(" stmmiia r0!, {r5-r8} "); // destination now 32 byte aligned
437 asm("f_al_already_aligned: ");
438 asm(" cmp r2, #64 ");
439 asm(" bhs large_copy_fore ");
441 // Less than 64 bytes to go...
443 asm("its_smaller_fore:");
444 asm(" movs ip, r2, lsl #26 "); // length bits 5, 4, 3, 2 into N, Z, C, V
445 asm(" beq mem_copy_end "); // skip if remaining length zero
446 asm(" msr cpsr_flg, ip ");
447 asm(" ldmmiia r1!, {r3-r10} ");
448 asm(" stmmiia r0!, {r3-r10} "); // copy 32
449 asm(" ldmeqia r1!, {r3-r6} ");
450 asm(" ldmcsia r1!, {r7-r8} ");
451 asm(" ldrvs r9, [r1], #4 ");
452 asm(" stmeqia r0!, {r3-r6} "); // copy 16
453 asm(" stmcsia r0!, {r7-r8} "); // copy 8
454 asm(" strvs r9, [r0], #4 "); // copy 4
456 asm(" movs ip, r2, lsl #30 ");
457 asm(" bne smallest_copy_fore ");
459 asm("mem_copy_end: ");
460 __POPRET("r0,r4-r11,");
464 // Less than 4 bytes to go...
467 asm("smallest_copy_fore: ");
468 asm(" msr cpsr_flg, ip ");
469 asm(" ldrmih r3, [r1], #2 ");
470 asm(" ldreqb r4, [r1], #1 ");
471 asm(" strmih r3, [r0], #2 "); // copy 2
472 asm(" streqb r4, [r0], #1 "); // copy 1
473 __POPRET("r0,r4-r11,");
477 // Do byte moves if necessary to word-align destination
479 asm("dest_unaligned_fore: ");
480 asm(" rsb r3, r3, #0 ");
481 asm(" msr cpsr_flg, r3 ");
482 asm(" ldrmib r4, [r1], #1 "); // move bytes to align destination
483 asm(" ldrmib r5, [r1], #1 ");
484 asm(" ldreqb r6, [r1], #1 ");
485 asm(" sub r2, r2, r3, lsr #30 "); // adjust length, at least 13 bytes remaining
486 asm(" strmib r4, [r0], #1 ");
487 asm(" strmib r5, [r0], #1 ");
488 asm(" streqb r6, [r0], #1 ");
489 asm(" b dest_aligned_fore ");
493 // Large copy, length >= 64
496 asm("large_copy_fore: ");
497 asm(" movs ip, r2, lsr #6 "); // ip = number of 64 blocks to copy
501 asm(" ldmia r1!, {r3-r10} "); // Copy 64
502 asm(" stmia r0!, {r3-r10} ");
503 asm(" ldmia r1!, {r3-r10} ");
504 asm(" subs ip, ip, #1 ");
505 asm(" stmia r0!, {r3-r10} ");
507 asm(" and r2, r2, #63 ");
508 asm(" b its_smaller_fore ");
512 // Forward unlaigned copy
515 asm("copy_fwd_nonaligned:");
519 asm(" bic r1, r1, #3 "); // align source
520 asm(" ldr r11, [r1], #4 "); // get first word
521 asm(" mov r12, r12, lsl #3 "); // r12 = 8*source alignment
522 asm(" ands r4, r0, #31 "); // destination alignment into r4
523 asm(" beq medium_unal_copy "); // skip if already aligned
524 asm(" rsb r4, r4, #32 "); // r4 = bytes to align dest to 32
525 asm(" cmp r2, r4 "); // check if length big enough to align to 32
526 asm(" blo copy_fwd_remainder "); // skip if too small
527 asm(" sub r2, r2, r4 "); // adjust length
528 asm(" rsb r3, r12, #32 "); // r3 = 32 - 8*source alignment
531 asm(" mov r5, r11, lsr r12 "); // r5 = part of previous source word required to make destination word
532 asm(" ldr r11, [r1], #4 "); // get next word
533 asm(" subs r4, r4, #4 "); // 4 bytes less to do
534 asm(" orr r5, r5, r11, lsl r3 "); // form next destination word
535 asm(" str r5, [r0], #4 "); // and store it
536 asm(" bne 1b "); // loop until destination 32 byte aligned
538 asm("medium_unal_copy: "); // destination now aligned to 32 bytes
539 asm(" movs lr, r2, lsr #5 "); // lr=number of 32-byte blocks
540 asm(" beq copy_fwd_remainder "); // skip if length < 32
542 asm(" cmp r12, #16 ");
543 asm(" beq copy_fwd_nonaligned_2 "); // branch if source = 2 mod 4
544 asm(" bhi copy_fwd_nonaligned_3 "); // branch if source = 3 mod 4, else source = 1 mod 4
547 asm("copy_fwd_nonaligned_1: ");
548 asm(" mov r3, r11, lsr #8 ");
549 asm(" ldmia r1!, {r4-r11} ");
551 asm(" subs lr, lr, #1 ");
552 asm(" orr r3, r3, r4, lsl #24 ");
553 asm(" mov r4, r4, lsr #8 ");
554 asm(" orr r4, r4, r5, lsl #24 ");
555 asm(" mov r5, r5, lsr #8 ");
556 asm(" orr r5, r5, r6, lsl #24 ");
557 asm(" mov r6, r6, lsr #8 ");
558 asm(" orr r6, r6, r7, lsl #24 ");
559 asm(" mov r7, r7, lsr #8 ");
560 asm(" orr r7, r7, r8, lsl #24 ");
561 asm(" mov r8, r8, lsr #8 ");
562 asm(" orr r8, r8, r9, lsl #24 ");
563 asm(" mov r9, r9, lsr #8 ");
564 asm(" orr r9, r9, r10, lsl #24 ");
565 asm(" mov r10, r10, lsr #8 ");
566 asm(" orr r10, r10, r11, lsl #24 ");
567 asm(" stmia r0!, {r3-r10} ");
568 asm(" bne copy_fwd_nonaligned_1 ");
569 asm(" b copy_fwd_remainder ");
572 asm("copy_fwd_nonaligned_2: ");
573 asm(" mov r3, r11, lsr #16 ");
574 asm(" ldmia r1!, {r4-r11} ");
576 asm(" subs lr, lr, #1 ");
577 asm(" orr r3, r3, r4, lsl #16 ");
578 asm(" mov r4, r4, lsr #16 ");
579 asm(" orr r4, r4, r5, lsl #16 ");
580 asm(" mov r5, r5, lsr #16 ");
581 asm(" orr r5, r5, r6, lsl #16 ");
582 asm(" mov r6, r6, lsr #16 ");
583 asm(" orr r6, r6, r7, lsl #16 ");
584 asm(" mov r7, r7, lsr #16 ");
585 asm(" orr r7, r7, r8, lsl #16 ");
586 asm(" mov r8, r8, lsr #16 ");
587 asm(" orr r8, r8, r9, lsl #16 ");
588 asm(" mov r9, r9, lsr #16 ");
589 asm(" orr r9, r9, r10, lsl #16 ");
590 asm(" mov r10, r10, lsr #16 ");
591 asm(" orr r10, r10, r11, lsl #16 ");
592 asm(" stmia r0!, {r3-r10} ");
593 asm(" bne copy_fwd_nonaligned_2 ");
594 asm(" b copy_fwd_remainder ");
597 asm("copy_fwd_nonaligned_3: ");
598 asm(" mov r3, r11, lsr #24 ");
599 asm(" ldmia r1!, {r4-r11} ");
601 asm(" subs lr, lr, #1 ");
602 asm(" orr r3, r3, r4, lsl #8 ");
603 asm(" mov r4, r4, lsr #24 ");
604 asm(" orr r4, r4, r5, lsl #8 ");
605 asm(" mov r5, r5, lsr #24 ");
606 asm(" orr r5, r5, r6, lsl #8 ");
607 asm(" mov r6, r6, lsr #24 ");
608 asm(" orr r6, r6, r7, lsl #8 ");
609 asm(" mov r7, r7, lsr #24 ");
610 asm(" orr r7, r7, r8, lsl #8 ");
611 asm(" mov r8, r8, lsr #24 ");
612 asm(" orr r8, r8, r9, lsl #8 ");
613 asm(" mov r9, r9, lsr #24 ");
614 asm(" orr r9, r9, r10, lsl #8 ");
615 asm(" mov r10, r10, lsr #24 ");
616 asm(" orr r10, r10, r11, lsl #8 ");
617 asm(" stmia r0!, {r3-r10} ");
618 asm(" bne copy_fwd_nonaligned_3 ");
620 // <32 bytes to go, source alignment could be 1, 2 or 3 mod 4
621 // r12 = 8 * (source mod 4)
622 asm("copy_fwd_remainder: ");
623 asm(" ands r4, r2, #0x1c "); // r4 = 4*number of words left
624 asm(" beq 2f "); // skip if none
625 asm(" rsb r3, r12, #32 "); // r3 = 32 - 8*source alignment
628 asm(" mov r5, r11, lsr r12 "); // r5 = part of previous source word required to make destination word
629 asm(" ldr r11, [r1], #4 "); // get next word
630 asm(" subs r4, r4, #4 "); // 4 bytes less to do
631 asm(" orr r5, r5, r11, lsl r3 "); // form next destination word
632 asm(" str r5, [r0], #4 "); // and store it
633 asm(" bne 1b "); // loop until destination 32 byte aligned
636 asm(" sub r1, r1, #4 ");
637 asm(" add r1, r1, r12, lsr #3 "); // r1 = real unaligned source address
638 asm(" tst r2, #2 "); // 2 bytes left?
639 asm(" ldrneb r5, [r1], #1 "); // copy 2
640 asm(" strneb r5, [r0], #1 ");
641 asm(" ldrneb r5, [r1], #1 ");
642 asm(" strneb r5, [r0], #1 ");
643 asm(" tst r2, #1 "); // 1 byte left?
644 asm(" ldrneb r5, [r1], #1 "); // copy 1
645 asm(" strneb r5, [r0], #1 ");
646 __POPRET("r0,r4-r11,");
650 // Source is before destination and they overlap, so need to copy backwards
654 asm(" add r0, r0, r2 "); // r0=last dest address+1
655 asm(" add r1, r1, r2 "); // r1=last source address+1
656 PLD_noff(1, 33); // preload last two cache lines
659 asm(" movs r3, r0, lsl #30 "); // check destination word aligned
660 asm(" bne dest_unaligned_back ");
662 asm("dest_aligned_back: ");
663 asm(" ands r12, r1, #3 "); // r12=alignment of source
664 asm(" bne copy_back_nonaligned ");
667 // Backwards copying, addresses both word aligned, at least 13 bytes to go
670 asm("mem_move_back:");
674 asm(" movs r4, r0, lsl #27 "); // bytes required to align destination to 32
675 asm(" beq bal_already_aligned "); // skip if already aligned to 32
676 asm(" cmp r2, r4, lsr #27 "); // check that many remaining
677 asm(" blo its_smaller_back "); // if too short, just stick with word alignment
678 asm(" msr cpsr_flg, r4 "); // destination alignment into N, Z, C flags
679 // do word moves to align destination
680 asm(" ldrcs lr, [r1, #-4]! "); // C flag == 1 word (we are already word aligned)
681 asm(" ldmeqdb r1!, {r3,r9} "); // Z flag == 2 words
682 asm(" ldmmidb r1!, {r5-r8} ");
683 asm(" sub r2, r2, r4, lsr #27 "); // adjust length
684 asm(" strcs lr, [r0, #-4]! "); // destination now 8 byte aligned
685 asm(" stmeqdb r0!, {r3,r9} "); // destination now 16 byte aligned
686 asm(" stmmidb r0!, {r5-r8} "); // N flag == 4 words, destination now 32 byte aligned
688 asm("bal_already_aligned: ");
689 asm(" cmp r2, #64 ");
690 asm(" bhs large_copy_back ");
692 // Less than 64 bytes to go
694 asm("its_smaller_back: ");
695 asm(" movs ip, r2, lsl #26 "); // r2 = remaining length (<256) << 24
696 asm(" beq mem_copy_end2 "); // skip if remaining length zero
697 asm(" msr cpsr_flg, ip ");
698 asm(" ldmmidb r1!, {r3-r10} ");
699 asm(" stmmidb r0!, {r3-r10} "); // copy 32
700 asm(" ldmeqdb r1!, {r3-r6} ");
701 asm(" ldmcsdb r1!, {r7,r8} ");
702 asm(" ldrvs r9, [r1, #-4]! ");
703 asm(" stmeqdb r0!, {r3-r6} "); // copy 16
704 asm(" stmcsdb r0!, {r7,r8} "); // copy 8
705 asm(" strvs r9, [r0, #-4]! "); // copy 4
707 asm(" movs ip, r2, lsl #30 ");
708 asm(" bne smallest_copy_back ");
710 asm("mem_copy_end2: ");
711 __POPRET("r0,r4-r11,");
715 // Less than 4 bytes to go...
718 asm("smallest_copy_back: ");
719 asm(" msr cpsr_flg, ip ");
720 asm(" ldrmih r3, [r1, #-2]! ");
721 asm(" ldreqb r4, [r1, #-1]! ");
722 asm(" strmih r3, [r0, #-2]! "); // copy 2
723 asm(" streqb r4, [r0, #-1]! "); // copy 1
724 __POPRET("r0,r4-r11,");
728 // Do byte moves if necessary to word-align destination
730 asm("dest_unaligned_back: ");
731 asm(" msr cpsr_flg, r3 "); // destination alignment in r3 into N,Z flags
732 asm(" ldrmib r4, [r1, #-1]! "); // do byte moves to align destination
733 asm(" ldrmib r5, [r1, #-1]! ");
734 asm(" ldreqb r6, [r1, #-1]! ");
735 asm(" sub r2, r2, r3, lsr #30 "); // adjust length, at least 13 bytes remaining
736 asm(" strmib r4, [r0, #-1]! ");
737 asm(" strmib r5, [r0, #-1]! ");
738 asm(" streqb r6, [r0, #-1]! ");
739 asm(" b dest_aligned_back ");
743 // Large backwards copy, length >= 64
746 asm("large_copy_back: ");
747 asm(" movs ip, r2, lsr #6 ");
751 asm(" ldmdb r1!, {r3-r10} "); // Copy 64
752 asm(" stmdb r0!, {r3-r10} ");
753 asm(" ldmdb r1!, {r3-r10} ");
754 asm(" subs ip, ip, #1 ");
755 asm(" stmdb r0!, {r3-r10} ");
757 asm(" and r2, r2, #63 ");
758 asm(" b its_smaller_back ");
761 // Backwards unlaigned copy
764 asm("copy_back_nonaligned: ");
768 asm(" bic r1, r1, #3 "); // align source
769 asm(" ldr r3, [r1] "); // get first word
770 asm(" mov r12, r12, lsl #3 "); // r12 = 8*source alignment
771 asm(" ands r4, r0, #31 "); // r4 = bytes to align dest to 32
772 asm(" beq bunal_already_aligned "); // skip if already aligned
773 asm(" cmp r2, r4 "); // check if length big enough to align to 32
774 asm(" blo copy_back_remainder "); // skip if too small
775 asm(" sub r2, r2, r4 "); // adjust length
776 asm(" rsb r6, r12, #32 "); // r6 = 32 - 8*source alignment
779 asm(" mov r5, r3, lsl r6 "); // r5 = part of previous source word required to make destination word
780 asm(" ldr r3, [r1, #-4]! "); // get next word
781 asm(" subs r4, r4, #4 "); // 4 bytes less to do
782 asm(" orr r5, r5, r3, lsr r12 "); // form next destination word
783 asm(" str r5, [r0, #-4]! "); // and store it
784 asm(" bne 1b "); // loop until destination 32 byte aligned
786 asm("bunal_already_aligned: "); // destination now aligned to 32 bytes
787 asm(" movs lr, r2, lsr #5 "); // lr=number of 32-byte blocks
788 asm(" beq copy_back_remainder "); // skip if length < 32
790 asm(" cmp r12, #16 ");
791 asm(" beq copy_back_nonaligned_2 "); // branch if source = 2 mod 4
792 asm(" bhi copy_back_nonaligned_3 "); // branch if source = 3 mod 4, else source = 1 mod 4
795 asm("copy_back_nonaligned_1: ");
796 asm(" mov r11, r3, lsl #24 ");
797 asm(" ldmdb r1!, {r3-r10} ");
799 asm(" orr r11, r11, r10, lsr #8 ");
800 asm(" mov r10, r10, lsl #24 ");
801 asm(" orr r10, r10, r9, lsr #8 ");
802 asm(" mov r9, r9, lsl #24 ");
803 asm(" orr r9, r9, r8, lsr #8 ");
804 asm(" mov r8, r8, lsl #24 ");
805 asm(" orr r8, r8, r7, lsr #8 ");
806 asm(" mov r7, r7, lsl #24 ");
807 asm(" orr r7, r7, r6, lsr #8 ");
808 asm(" mov r6, r6, lsl #24 ");
809 asm(" orr r6, r6, r5, lsr #8 ");
810 asm(" mov r5, r5, lsl #24 ");
811 asm(" orr r5, r5, r4, lsr #8 ");
812 asm(" mov r4, r4, lsl #24 ");
813 asm(" orr r4, r4, r3, lsr #8 ");
814 asm(" stmdb r0!, {r4-r11} ");
815 asm(" subs lr, lr, #1 ");
816 asm(" bne copy_back_nonaligned_1 ");
817 asm(" b copy_back_remainder ");
820 asm("copy_back_nonaligned_2: ");
821 asm(" mov r11, r3, lsl #16 ");
822 asm(" ldmdb r1!, {r3-r10} ");
824 asm(" orr r11, r11, r10, lsr #16 ");
825 asm(" mov r10, r10, lsl #16 ");
826 asm(" orr r10, r10, r9, lsr #16 ");
827 asm(" mov r9, r9, lsl #16 ");
828 asm(" orr r9, r9, r8, lsr #16 ");
829 asm(" mov r8, r8, lsl #16 ");
830 asm(" orr r8, r8, r7, lsr #16 ");
831 asm(" mov r7, r7, lsl #16 ");
832 asm(" orr r7, r7, r6, lsr #16 ");
833 asm(" mov r6, r6, lsl #16 ");
834 asm(" orr r6, r6, r5, lsr #16 ");
835 asm(" mov r5, r5, lsl #16 ");
836 asm(" orr r5, r5, r4, lsr #16 ");
837 asm(" mov r4, r4, lsl #16 ");
838 asm(" orr r4, r4, r3, lsr #16 ");
839 asm(" stmdb r0!, {r4-r11} ");
840 asm(" subs lr, lr, #1 ");
841 asm(" bne copy_back_nonaligned_2 ");
842 asm(" b copy_back_remainder ");
845 asm("copy_back_nonaligned_3: ");
846 asm(" mov r11, r3, lsl #8 ");
847 asm(" ldmdb r1!, {r3-r10} ");
849 asm(" orr r11, r11, r10, lsr #24 ");
850 asm(" mov r10, r10, lsl #8 ");
851 asm(" orr r10, r10, r9, lsr #24 ");
852 asm(" mov r9, r9, lsl #8 ");
853 asm(" orr r9, r9, r8, lsr #24 ");
854 asm(" mov r8, r8, lsl #8 ");
855 asm(" orr r8, r8, r7, lsr #24 ");
856 asm(" mov r7, r7, lsl #8 ");
857 asm(" orr r7, r7, r6, lsr #24 ");
858 asm(" mov r6, r6, lsl #8 ");
859 asm(" orr r6, r6, r5, lsr #24 ");
860 asm(" mov r5, r5, lsl #8 ");
861 asm(" orr r5, r5, r4, lsr #24 ");
862 asm(" mov r4, r4, lsl #8 ");
863 asm(" orr r4, r4, r3, lsr #24 ");
864 asm(" stmdb r0!, {r4-r11} ");
865 asm(" subs lr, lr, #1 ");
866 asm(" bne copy_back_nonaligned_3 ");
868 // <32 bytes to go, source alignment could be 1, 2 or 3 mod 4
869 // r12 = 8 * (source mod 4)
870 asm("copy_back_remainder: ");
871 asm(" ands r4, r2, #0x1c "); // r4 = 4*number of words left
872 asm(" beq 2f "); // skip if none
873 asm(" rsb r6, r12, #32 "); // r6 = 32 - 8*source alignment
876 asm(" mov r5, r3, lsl r6 "); // r5 = part of previous source word required to make destination word
877 asm(" ldr r3, [r1, #-4]! "); // get next word
878 asm(" subs r4, r4, #4 "); // 4 bytes less to do
879 asm(" orr r5, r5, r3, lsr r12 "); // form next destination word
880 asm(" str r5, [r0, #-4]! "); // and store it
881 asm(" bne 1b "); // loop until destination 32 byte aligned
884 asm(" add r1, r1, r12, lsr #3 "); // r1 = real unaligned source address
885 asm(" tst r2, #2 "); // 2 bytes left?
886 asm(" ldrneb r3, [r1, #-1]! "); // copy 2
887 asm(" strneb r3, [r0, #-1]! ");
888 asm(" ldrneb r3, [r1, #-1]! ");
889 asm(" strneb r3, [r0, #-1]! ");
890 asm(" tst r2, #1 "); // 1 byte left?
891 asm(" ldrneb r3, [r1, #-1]! "); // copy 1
892 asm(" strneb r3, [r0, #-1]! ");
893 __POPRET("r0,r4-r11,");
896 #endif // USE_REPLACEMENT_MEMCPY
899 #ifndef __KERNEL_MODE__
902 Compares a block of data at one specified location with a block of data at
903 another specified location.
905 The comparison proceeds on a byte for byte basis, the result of the comparison
906 is based on the difference of the first bytes to disagree.
908 The data at the two locations are equal if they have the same length and content.
909 Where the lengths are different and the shorter section of data is the same
910 as the first part of the longer section of data, the shorter is considered
911 to be less than the longer.
913 @param aLeft A pointer to the first (or left) block of 8 bit data
915 @param aLeftL The length of the first (or left) block of data to be compared,
916 i.e. the number of bytes.
917 @param aRight A pointer to the second (or right) block of 8 bit data to be
919 @param aRightL The length of the second (or right) block of data to be compared
920 i.e. the number of bytes.
922 @return Positive, if the first (or left) block of data is greater than the
923 second (or right) block of data.
924 Negative, if the first (or left) block of data is less than the
925 second (or right) block of data.
926 Zero, if both the first (or left) and second (or right) blocks of data
927 have the same length and the same content.
929 EXPORT_C __NAKED__ TInt Mem::Compare(const TUint8* /*aLeft*/, TInt /*aLeftL*/, const TUint8* /*aRight*/, TInt /*aRightL*/)
938 // See header file e32cmn.h for the in-source documentation.
939 extern "C" EXPORT_C __NAKED__ TInt memcompare(const TUint8* /*aLeft*/, TInt /*aLeftL*/, const TUint8* /*aRight*/, TInt /*aRightL*/)
941 // Compares until the smaller of the two lengths is reached.
942 // If the lengths differ, returns leftlen-rightlen
943 // If a difference is encountered, returns left byte-right byte
947 asm(" stmfd sp!,{r4,r5,r6,lr}");
950 // Get the shorter of the two lengths, and check for zero length
956 asm(" beq compare_done");
959 // Check for aligned buffers for faster comparing if more than 16 bytes
961 asm(" andge r0,r4,#3");
962 asm(" andge r5,r2,#3");
963 asm(" addlt r0,r5,#1");
965 asm(" beq aligned_compare");
967 // Get aLeft+Min(aLeftL,aRightL)
969 asm(" add r6,r4,r6");
971 asm("compare_loop:");
972 asm(" ldrb r0,[r4],#1");
973 asm(" ldrb r5,[r2],#1");
974 asm(" subs r0,r0,r5");
975 asm("bne compare_exit ");
977 asm(" beq compare_done");
979 asm(" ldrb r0,[r4],#1");
980 asm(" ldrb r5,[r2],#1");
981 asm(" subs r0,r0,r5");
982 asm("bne compare_exit ");
984 asm(" beq compare_done");
986 asm(" ldrb r0,[r4],#1");
987 asm(" ldrb r5,[r2],#1");
988 asm(" subs r0,r0,r5");
989 asm("bne compare_exit ");
991 asm(" beq compare_done");
993 asm(" ldrb r0,[r4],#1");
994 asm(" ldrb r5,[r2],#1");
995 asm(" subs r0,r0,r5");
996 asm("bne compare_exit ");
998 asm(" bne compare_loop");
1000 // Return difference of lengths
1002 asm("compare_done:");
1003 asm(" sub r0,r1,r3");
1005 asm("compare_exit:");
1008 // Compare byte at a time until word aligned...
1010 asm("aligned_compare:");
1012 // Get number of bytes to compare before word alignment reached...and jump to appropriate point
1015 asm(" add r6,r4,r6");
1016 asm(" subs r0,r0,#1");
1017 asm(" movmi r0,#3");
1018 asm(" rsb r5,r0,#3");
1019 asm(" sub ip,ip,r5");
1020 asm(" mov ip,ip,lsr #2");
1021 asm(" add pc,pc,r0,asl #4");
1022 asm(" b compare_done"); // Never executed
1024 // Jump here if alignment is 1. Do not use more than 4 instructions without altering above relative jump
1026 asm(" ldrb r0,[r4],#1");
1027 asm(" ldrb r5,[r2],#1");
1028 asm(" subs r0,r0,r5");
1029 asm("bne compare_exit ");
1031 // Jump here if alignment is 2. Do not use more than 4 instructions without altering above relative jump
1033 asm(" ldrb r0,[r4],#1");
1034 asm(" ldrb r5,[r2],#1");
1035 asm(" subs r0,r0,r5");
1036 asm("bne compare_exit ");
1038 // Jump here if alignment is 3. Do not use more than 4 instructions without altering above relative jump
1040 asm(" ldrb r0,[r4],#1");
1041 asm(" ldrb r5,[r2],#1");
1042 asm(" subs r0,r0,r5");
1043 asm("bne compare_exit ");
1045 // Must now be word aligned
1047 asm("aligned_compare_loop:");
1048 asm(" ldr r0,[r4],#4");
1049 asm(" ldr r5,[r2],#4");
1050 asm(" eors r0,r0,r5");
1051 asm(" bne word_different");
1052 asm(" subs ip,ip,#1");
1053 asm(" bne aligned_compare_loop");
1055 // Less than 4 bytes to go...
1058 asm(" bne compare_loop");
1059 asm(" sub r0,r1,r3");
1062 // A difference encountered while word comparing, find out which byte it was
1064 asm("word_different:");
1065 asm(" ldrb r0,[r4,#-4]");
1066 asm(" ldrb r5,[r2,#-4]");
1067 asm(" subs r0,r0,r5");
1068 asm("bne compare_exit ");
1069 asm(" ldrb r0,[r4,#-3]");
1070 asm(" ldrb r5,[r2,#-3]");
1071 asm(" subs r0,r0,r5");
1072 asm("bne compare_exit ");
1073 asm(" ldrb r0,[r4,#-2]");
1074 asm(" ldrb r5,[r2,#-2]");
1075 asm(" subs r0,r0,r5");
1076 asm("bne compare_exit ");
1078 // This must be the different byte...
1080 asm(" ldrb r0,[r4,#-1]");
1081 asm(" ldrb r5,[r2,#-1]");
1082 asm(" sub r0,r0,r5");