sl@0: // Copyright (c) 1995-2009 Nokia Corporation and/or its subsidiary(-ies). sl@0: // All rights reserved. sl@0: // This component and the accompanying materials are made available sl@0: // under the terms of the License "Eclipse Public License v1.0" sl@0: // which accompanies this distribution, and is available sl@0: // at the URL "http://www.eclipse.org/legal/epl-v10.html". sl@0: // sl@0: // Initial Contributors: sl@0: // Nokia Corporation - initial contribution. sl@0: // sl@0: // Contributors: sl@0: // sl@0: // Description: sl@0: // e32\common\arm\cmem.cia sl@0: // sl@0: // sl@0: sl@0: #include "../common.h" sl@0: #include sl@0: #if defined(__REPLACE_GENERIC_UTILS) sl@0: #include "replacement_utils.h" sl@0: #endif sl@0: sl@0: #if defined(__MEM_MACHINE_CODED__) sl@0: sl@0: #ifndef USE_REPLACEMENT_MEMSET sl@0: sl@0: #if defined(_DEBUG) sl@0: sl@0: #ifdef __STANDALONE_NANOKERNEL__ sl@0: sl@0: #define ARM_ASSERT_MULTIPLE_OF_FOUR(rt1, panicfunc) \ sl@0: asm("tst "#rt1", #3"); \ sl@0: asm("ldrne "#rt1", ["#rt1"]") sl@0: sl@0: #else // __STANDALONE_NANOKERNEL__ sl@0: GLDEF_C void PanicEWordMoveLengthNotMultipleOf4(); sl@0: GLDEF_C void PanicEWordMoveSourceNotAligned(); sl@0: GLDEF_C void PanicEWordMoveTargetNotAligned(); sl@0: sl@0: #define ARM_ASSERT_MULTIPLE_OF_FOUR(rt1, panicfunc) \ sl@0: asm("tst "#rt1", #3"); \ sl@0: asm("bne " panicfunc ) sl@0: sl@0: #endif // __STANDALONE_NANOKERNEL__ sl@0: sl@0: #else // _DEBUG sl@0: sl@0: #define ARM_ASSERT_MULTIPLE_OF_FOUR(rt1, panicfunc) sl@0: sl@0: #endif //_DEBUG sl@0: sl@0: sl@0: // See header file e32cmn.h for the in-source documentation. sl@0: extern "C" EXPORT_C __NAKED__ TAny* memclr(TAny* /*aTrg*/, unsigned int /*aLength*/) sl@0: { sl@0: KMEMCLRHOOK sl@0: asm("mov r2, #0 "); sl@0: asm("b fill "); sl@0: } sl@0: sl@0: // See header file e32cmn.h for the in-source documentation. sl@0: extern "C" EXPORT_C __NAKED__ TAny* memset(TAny* /*aTrg*/, TInt /*aValue*/, unsigned int /*aLength*/) sl@0: { sl@0: KMEMSETHOOK sl@0: asm(" mov r3, r2 "); /* length into r3 */ sl@0: asm(" and r2,r1,#255"); /* fill value into r2 */ sl@0: asm(" mov r1, r3 "); /* length into r1 */ sl@0: sl@0: asm("fill:"); sl@0: asm(" cmp r1,#8"); sl@0: asm(" bls small_fill"); // only taken ~20% of the time sl@0: sl@0: asm(" stmfd sp!,{r0,r4-r9,lr}"); sl@0: asm(" movs r3, r0, lsl #30 "); // Check if word aligned sl@0: asm(" orr r2,r2,r2,lsl #8"); sl@0: asm(" orr r2,r2,r2,lsl #16"); sl@0: asm(" bne unaligned_fill "); sl@0: sl@0: // Align destination address to 32 byte boundary if possible sl@0: sl@0: asm("word_aligned_fill: "); sl@0: asm(" mov r4,r2"); sl@0: asm(" mov r5,r2"); sl@0: asm(" mov r6,r2"); sl@0: asm(" movs r3, r0, lsl #27 "); sl@0: asm(" beq aligned_fill "); sl@0: asm(" rsb r3, r3, #0 "); // calculate fill length necessary for aligment sl@0: asm(" cmp r1, r3, lsr #27 "); // compare with remaining length sl@0: asm(" blo smaller_fill "); // skip alignment if greater sl@0: asm(" msr cpsr_f, r3 "); // put length bits 4, 3, 2 into N, Z, C flags sl@0: asm(" strcs r2, [r0], #4 "); // align to 8 byte boundary sl@0: asm(" stmeqia r0!, {r2, r4} "); // align to 16 byte boundary sl@0: asm(" stmmiia r0!, {r2, r4-r6} "); // align to 32 byte boundary sl@0: asm(" sub r1, r1, r3, lsr #27 "); // adjust remaining length sl@0: sl@0: asm("aligned_fill:"); sl@0: asm(" cmp r1, #64 "); sl@0: asm(" bhs big_fill "); sl@0: sl@0: // Fill 0-63 bytes sl@0: sl@0: asm("smaller_fill:"); sl@0: asm(" movs r1, r1, lsl #26"); sl@0: asm(" beq mem_fill_end "); sl@0: asm(" msr cpsr_flg, r1 "); sl@0: asm(" stmmiia r0!,{r2,r4-r6}"); // Fill 32 sl@0: asm(" stmmiia r0!,{r2,r4-r6}"); sl@0: asm(" stmeqia r0!,{r2,r4-r6}"); // Fill 16 sl@0: asm(" stmcsia r0!,{r2,r4}"); // Fill 8 sl@0: asm(" strvs r2,[r0],#4"); // Fill 4 sl@0: asm(" movs r1, r1, lsl #4 "); sl@0: asm(" bne smallest_fill "); sl@0: asm("mem_fill_end: "); sl@0: __POPRET("r0,r4-r9,"); sl@0: sl@0: // Fill last 1-3 bytes sl@0: sl@0: asm("smallest_fill: "); sl@0: asm(" msr cpsr_flg,r1"); sl@0: asm(" strmih r2,[r0],#2"); // Fill 2 sl@0: asm(" streqb r2,[r0],#1"); // Fill 1 sl@0: __POPRET("r0,r4-r9,"); sl@0: sl@0: // Fill loop for length >= 64 sl@0: sl@0: asm("big_fill: "); sl@0: asm(" mov r3,r2"); sl@0: asm(" mov r7,r2"); sl@0: asm(" mov r8,r2"); sl@0: asm(" mov r9,r2"); sl@0: asm(" movs ip,r1,lsr #8"); // Number of 256 byte blocks to fill sl@0: asm(" beq medium_fill "); sl@0: asm("fill_256_bytes_loop:"); sl@0: asm(" stmia r0!,{r2-r9}"); // Fill 256 bytes sl@0: asm(" stmia r0!,{r2-r9}"); sl@0: asm(" stmia r0!,{r2-r9}"); sl@0: asm(" stmia r0!,{r2-r9}"); sl@0: asm(" stmia r0!,{r2-r9}"); sl@0: asm(" stmia r0!,{r2-r9}"); sl@0: asm(" stmia r0!,{r2-r9}"); sl@0: asm(" stmia r0!,{r2-r9}"); sl@0: asm(" subs ip,ip,#1"); sl@0: asm(" bne fill_256_bytes_loop"); sl@0: asm("medium_fill: "); sl@0: asm(" movs ip,r1,lsl #24"); sl@0: asm(" msr cpsr_flg,ip"); sl@0: asm(" stmmiia r0!,{r2-r9}"); // Fill 128 sl@0: asm(" stmmiia r0!,{r2-r9}"); sl@0: asm(" stmmiia r0!,{r2-r9}"); sl@0: asm(" stmmiia r0!,{r2-r9}"); sl@0: asm(" stmeqia r0!,{r2-r9}"); // Fill 64 sl@0: asm(" stmeqia r0!,{r2-r9}"); sl@0: asm(" and r1, r1, #63 "); sl@0: asm(" b smaller_fill"); sl@0: sl@0: // Word-align destination address, length >= 8 sl@0: sl@0: asm("unaligned_fill: "); sl@0: asm(" rsb r3, r3, #0 "); // calculate fill length necessary for aligment sl@0: asm(" msr cpsr_flg, r3"); sl@0: asm(" streqb r2, [r0], #1 "); // align to 2 byte boundary sl@0: asm(" strmih r2, [r0], #2 "); // align to 4 byte boundary sl@0: asm(" sub r1, r1, r3, lsr #30 "); sl@0: asm(" b word_aligned_fill "); sl@0: sl@0: // Fill for length <= 8 sl@0: sl@0: asm("small_fill: "); sl@0: asm(" mov r3, r0 "); /* r3=dest */ sl@0: asm(" adr ip, small_fill_end "); sl@0: asm(" sub pc, ip, r1, lsl #2 "); sl@0: asm(" strb r2, [r3], #1"); sl@0: asm(" strb r2, [r3], #1"); sl@0: asm(" strb r2, [r3], #1"); sl@0: asm(" strb r2, [r3], #1"); sl@0: asm(" strb r2, [r3], #1"); sl@0: asm(" strb r2, [r3], #1"); sl@0: asm(" strb r2, [r3], #1"); sl@0: asm(" strb r2, [r3], #1"); sl@0: asm("small_fill_end: "); sl@0: __JUMP(,lr); sl@0: sl@0: #ifdef __EABI__ sl@0: // The AEABI switched the order of arg2 and arg3 to save an intruction when sl@0: // calling 'memset' from 'memclr' sl@0: asm(".global __aeabi_memset8 "); sl@0: asm("__aeabi_memset8: "); sl@0: asm(".global __aeabi_memset4 "); sl@0: asm("__aeabi_memset4: "); sl@0: asm(".global __aeabi_memset "); sl@0: asm("__aeabi_memset: "); sl@0: asm(" and r2, r2, #255"); sl@0: asm(" b fill "); sl@0: #endif sl@0: } sl@0: sl@0: #endif // USE_REPLACEMENT_MEMSET sl@0: sl@0: #ifndef USE_REPLACEMENT_MEMCPY sl@0: sl@0: // See header file e32cmn.h for the in-source documentation. sl@0: sl@0: extern "C" EXPORT_C __NAKED__ TAny* wordmove(TAny* /*aTrg*/, const TAny* /*aSrc*/, unsigned int /*aLength*/) sl@0: // sl@0: // Assumes all is aligned sl@0: // sl@0: { sl@0: ARM_ASSERT_MULTIPLE_OF_FOUR(r0, CSM_Z30PanicEWordMoveTargetNotAlignedv); sl@0: ARM_ASSERT_MULTIPLE_OF_FOUR(r1, CSM_Z30PanicEWordMoveSourceNotAlignedv); sl@0: ARM_ASSERT_MULTIPLE_OF_FOUR(r2, CSM_Z34PanicEWordMoveLengthNotMultipleOf4v); sl@0: sl@0: // Mask length to a multiple of four bytes to avoid memory, or register sl@0: // corruption by the special cases below. sl@0: asm("bic r2,r2,#3"); sl@0: sl@0: // Length <= 24 in ~90% of cases, however can only copy > 16 bytes in 4 sl@0: // instructions if LDM instuction restores thumb state when loading the PC. sl@0: #ifdef __CPU_ARM_LDR_PC_SETS_TBIT sl@0: asm("cmp r2, #24 "); sl@0: #else sl@0: asm("cmp r2, #16 "); sl@0: #endif sl@0: PLD(1); sl@0: asm("addls pc, pc, r2, lsl #2 "); // take branch depending on size sl@0: asm("b 9f "); // too big sl@0: sl@0: // 0 words sl@0: __JUMP(,lr); sl@0: __JUMP(,lr); sl@0: __JUMP(,lr); sl@0: __JUMP(,lr); sl@0: sl@0: // 1 word sl@0: asm("ldr ip, [r1] "); sl@0: asm("str ip, [r0] "); sl@0: __JUMP(,lr); sl@0: __JUMP(,lr); sl@0: sl@0: // 2 words sl@0: asm("ldmia r1, {r2,r3}"); sl@0: asm("stmia r0, {r2,r3}"); sl@0: __JUMP(,lr); sl@0: __JUMP(,lr); sl@0: sl@0: // 3 words sl@0: asm("ldmia r1, {r2,r3,ip}"); sl@0: asm("stmia r0, {r2,r3,ip}"); sl@0: __JUMP(,lr); sl@0: __JUMP(,lr); sl@0: sl@0: // 4 words sl@0: asm("ldmia r1, {r1,r2,r3,ip}"); sl@0: asm("stmia r0, {r1,r2,r3,ip}"); sl@0: __JUMP(,lr); sl@0: __JUMP(,lr); sl@0: sl@0: #ifdef __CPU_ARM_LDR_PC_SETS_TBIT sl@0: // 5 words sl@0: asm("stmfd sp!, {lr}"); sl@0: asm("ldmia r1, {r1,r2,r3,ip,lr}"); sl@0: asm("stmia r0, {r1,r2,r3,ip,lr}"); sl@0: asm("ldmfd sp!, {pc}"); sl@0: sl@0: // 6 words sl@0: asm("stmfd sp!, {r4,lr}"); sl@0: asm("ldmia r1, {r1,r2,r3,r4,ip,lr}"); sl@0: asm("stmia r0, {r1,r2,r3,r4,ip,lr}"); sl@0: asm("ldmfd sp!, {r4,pc}"); sl@0: #endif sl@0: sl@0: asm("9: "); sl@0: asm("subs r3, r0, r1 "); // r3 = dest - source sl@0: __JUMP(eq,lr); // return if source = dest sl@0: asm("stmfd sp!, {r0,r4-r11,lr} "); sl@0: asm("cmphi r2, r3 "); // if dest>source, compare length with dest-source sl@0: asm("bls mem_move_fore "); // if destsource compare length to dest-source sl@0: asm(" movhi r3, #0 "); // if dest>source and length>dest-source need to go backwards - set r3=0 sl@0: // sl@0: // If <16 bytes, just do byte moves sl@0: // sl@0: asm(" cmp r2, #15 "); sl@0: asm(" bhi main_copy "); sl@0: sl@0: asm(" ldrb r12, [r0] "); // read dest so it's in cache - avoid lots of single accesses to external memory sl@0: asm(" sub r12, r0, #1 "); sl@0: asm(" ldrb r12, [r12, r2] "); // read dest+length-1 sl@0: asm(" cmp r3, #0 "); sl@0: asm(" beq small_copy_back "); // r3=0 means go backwards sl@0: sl@0: asm("small_copy_fwd: "); sl@0: asm(" mov r3, r0 "); sl@0: asm(" adr r12, small_copy_fwd_end "); sl@0: asm(" sub pc, r12, r2, lsl #3 "); sl@0: sl@0: asm(" ldrb r12, [r1], #1 "); sl@0: asm(" strb r12, [r3], #1 "); sl@0: asm(" ldrb r12, [r1], #1 "); sl@0: asm(" strb r12, [r3], #1 "); sl@0: asm(" ldrb r12, [r1], #1 "); sl@0: asm(" strb r12, [r3], #1 "); sl@0: asm(" ldrb r12, [r1], #1 "); sl@0: asm(" strb r12, [r3], #1 "); sl@0: asm(" ldrb r12, [r1], #1 "); sl@0: asm(" strb r12, [r3], #1 "); sl@0: asm(" ldrb r12, [r1], #1 "); sl@0: asm(" strb r12, [r3], #1 "); sl@0: asm(" ldrb r12, [r1], #1 "); sl@0: asm(" strb r12, [r3], #1 "); sl@0: asm(" ldrb r12, [r1], #1 "); sl@0: asm(" strb r12, [r3], #1 "); sl@0: asm(" ldrb r12, [r1], #1 "); sl@0: asm(" strb r12, [r3], #1 "); sl@0: asm(" ldrb r12, [r1], #1 "); sl@0: asm(" strb r12, [r3], #1 "); sl@0: asm(" ldrb r12, [r1], #1 "); sl@0: asm(" strb r12, [r3], #1 "); sl@0: asm(" ldrb r12, [r1], #1 "); sl@0: asm(" strb r12, [r3], #1 "); sl@0: asm(" ldrb r12, [r1], #1 "); sl@0: asm(" strb r12, [r3], #1 "); sl@0: asm(" ldrb r12, [r1], #1 "); sl@0: asm(" strb r12, [r3], #1 "); sl@0: asm(" ldrb r12, [r1], #1 "); sl@0: asm(" strb r12, [r3], #1 "); sl@0: asm("small_copy_fwd_end: "); sl@0: __JUMP(,lr); sl@0: sl@0: asm("small_copy_back: "); sl@0: asm(" add r3, r0, r2 "); sl@0: asm(" add r1, r1, r2 "); sl@0: asm(" adr r12, small_copy_back_end "); sl@0: asm(" sub pc, r12, r2, lsl #3 "); sl@0: sl@0: asm(" ldrb r12, [r1, #-1]! "); sl@0: asm(" strb r12, [r3, #-1]! "); sl@0: asm(" ldrb r12, [r1, #-1]! "); sl@0: asm(" strb r12, [r3, #-1]! "); sl@0: asm(" ldrb r12, [r1, #-1]! "); sl@0: asm(" strb r12, [r3, #-1]! "); sl@0: asm(" ldrb r12, [r1, #-1]! "); sl@0: asm(" strb r12, [r3, #-1]! "); sl@0: asm(" ldrb r12, [r1, #-1]! "); sl@0: asm(" strb r12, [r3, #-1]! "); sl@0: asm(" ldrb r12, [r1, #-1]! "); sl@0: asm(" strb r12, [r3, #-1]! "); sl@0: asm(" ldrb r12, [r1, #-1]! "); sl@0: asm(" strb r12, [r3, #-1]! "); sl@0: asm(" ldrb r12, [r1, #-1]! "); sl@0: asm(" strb r12, [r3, #-1]! "); sl@0: asm(" ldrb r12, [r1, #-1]! "); sl@0: asm(" strb r12, [r3, #-1]! "); sl@0: asm(" ldrb r12, [r1, #-1]! "); sl@0: asm(" strb r12, [r3, #-1]! "); sl@0: asm(" ldrb r12, [r1, #-1]! "); sl@0: asm(" strb r12, [r3, #-1]! "); sl@0: asm(" ldrb r12, [r1, #-1]! "); sl@0: asm(" strb r12, [r3, #-1]! "); sl@0: asm(" ldrb r12, [r1, #-1]! "); sl@0: asm(" strb r12, [r3, #-1]! "); sl@0: asm(" ldrb r12, [r1, #-1]! "); sl@0: asm(" strb r12, [r3, #-1]! "); sl@0: asm(" ldrb r12, [r1, #-1]! "); sl@0: asm(" strb r12, [r3, #-1]! "); sl@0: asm("small_copy_back_end: "); sl@0: __JUMP(,lr); sl@0: sl@0: sl@0: asm("main_copy: "); sl@0: PLD(1); // preload first two cache lines sl@0: PLD_ioff(1, 32); sl@0: asm(" stmfd sp!, {r0,r4-r11,lr} "); // r0 == dest, r1 == src, r2 == len sl@0: asm(" cmp r3, #0 "); sl@0: asm(" beq copy_back "); // we must go backwards sl@0: asm(" movs r3, r0, lsl #30 "); // check destination word aligned sl@0: asm(" bne dest_unaligned_fore "); sl@0: sl@0: // sl@0: // Normal copy forwards. r0 should point to end address on exit sl@0: // Destination now word-aligned; if source is also word-aligned, do aligned copy. sl@0: // sl@0: asm("dest_aligned_fore: "); sl@0: asm(" ands r12, r1, #3 "); // r12=alignment of source sl@0: asm(" bne copy_fwd_nonaligned "); sl@0: sl@0: // sl@0: // We are now word aligned, at least 13 bytes to do sl@0: // sl@0: sl@0: asm("mem_move_fore:"); sl@0: // sl@0: // superalign sl@0: // sl@0: asm(" movs r4, r0, lsl #27 "); // destination alignment into r4 sl@0: asm(" beq f_al_already_aligned "); // fast path sl@0: asm(" rsb r4, r4, #0 "); // bytes required to align destination to 32 sl@0: asm(" cmp r2, r4, lsr #27 "); // check that many remaining sl@0: asm(" blo its_smaller_fore "); // if too short, just stick with word alignment sl@0: asm(" msr cpsr_flg, r4 "); // destination alignment into N, Z, C flags sl@0: // do word moves to align destination sl@0: asm(" ldrcs lr, [r1], #4 "); // C flag == 1 word (we are already word aligned) sl@0: asm(" ldmeqia r1!, {r3,r9} "); // Z flag == 2 words sl@0: asm(" ldmmiia r1!, {r5-r8} "); // N flag == 4 words, destination now 32 byte aligned sl@0: asm(" sub r2, r2, r4, lsr #27 "); // adjust length sl@0: asm(" strcs lr, [r0], #4 "); // destination now 8 byte aligned sl@0: asm(" stmeqia r0!, {r3,r9} "); // destination now 16 byte aligned sl@0: asm(" stmmiia r0!, {r5-r8} "); // destination now 32 byte aligned sl@0: sl@0: asm("f_al_already_aligned: "); sl@0: asm(" cmp r2, #64 "); sl@0: asm(" bhs large_copy_fore "); sl@0: // sl@0: // Less than 64 bytes to go... sl@0: // sl@0: asm("its_smaller_fore:"); sl@0: asm(" movs ip, r2, lsl #26 "); // length bits 5, 4, 3, 2 into N, Z, C, V sl@0: asm(" beq mem_copy_end "); // skip if remaining length zero sl@0: asm(" msr cpsr_flg, ip "); sl@0: asm(" ldmmiia r1!, {r3-r10} "); sl@0: asm(" stmmiia r0!, {r3-r10} "); // copy 32 sl@0: asm(" ldmeqia r1!, {r3-r6} "); sl@0: asm(" ldmcsia r1!, {r7-r8} "); sl@0: asm(" ldrvs r9, [r1], #4 "); sl@0: asm(" stmeqia r0!, {r3-r6} "); // copy 16 sl@0: asm(" stmcsia r0!, {r7-r8} "); // copy 8 sl@0: asm(" strvs r9, [r0], #4 "); // copy 4 sl@0: sl@0: asm(" movs ip, r2, lsl #30 "); sl@0: asm(" bne smallest_copy_fore "); sl@0: sl@0: asm("mem_copy_end: "); sl@0: __POPRET("r0,r4-r11,"); sl@0: sl@0: sl@0: // sl@0: // Less than 4 bytes to go... sl@0: // sl@0: sl@0: asm("smallest_copy_fore: "); sl@0: asm(" msr cpsr_flg, ip "); sl@0: asm(" ldrmih r3, [r1], #2 "); sl@0: asm(" ldreqb r4, [r1], #1 "); sl@0: asm(" strmih r3, [r0], #2 "); // copy 2 sl@0: asm(" streqb r4, [r0], #1 "); // copy 1 sl@0: __POPRET("r0,r4-r11,"); sl@0: sl@0: sl@0: // sl@0: // Do byte moves if necessary to word-align destination sl@0: // sl@0: asm("dest_unaligned_fore: "); sl@0: asm(" rsb r3, r3, #0 "); sl@0: asm(" msr cpsr_flg, r3 "); sl@0: asm(" ldrmib r4, [r1], #1 "); // move bytes to align destination sl@0: asm(" ldrmib r5, [r1], #1 "); sl@0: asm(" ldreqb r6, [r1], #1 "); sl@0: asm(" sub r2, r2, r3, lsr #30 "); // adjust length, at least 13 bytes remaining sl@0: asm(" strmib r4, [r0], #1 "); sl@0: asm(" strmib r5, [r0], #1 "); sl@0: asm(" streqb r6, [r0], #1 "); sl@0: asm(" b dest_aligned_fore "); sl@0: sl@0: sl@0: // sl@0: // Large copy, length >= 64 sl@0: // sl@0: sl@0: asm("large_copy_fore: "); sl@0: asm(" movs ip, r2, lsr #6 "); // ip = number of 64 blocks to copy sl@0: asm("1: "); sl@0: PLD_ioff(1, 32); sl@0: PLD_ioff(1, 64); sl@0: asm(" ldmia r1!, {r3-r10} "); // Copy 64 sl@0: asm(" stmia r0!, {r3-r10} "); sl@0: asm(" ldmia r1!, {r3-r10} "); sl@0: asm(" subs ip, ip, #1 "); sl@0: asm(" stmia r0!, {r3-r10} "); sl@0: asm(" bne 1b "); sl@0: asm(" and r2, r2, #63 "); sl@0: asm(" b its_smaller_fore "); sl@0: sl@0: sl@0: // sl@0: // Forward unlaigned copy sl@0: // sl@0: sl@0: asm("copy_fwd_nonaligned:"); sl@0: // sl@0: // superalign sl@0: // sl@0: asm(" bic r1, r1, #3 "); // align source sl@0: asm(" ldr r11, [r1], #4 "); // get first word sl@0: asm(" mov r12, r12, lsl #3 "); // r12 = 8*source alignment sl@0: asm(" ands r4, r0, #31 "); // destination alignment into r4 sl@0: asm(" beq medium_unal_copy "); // skip if already aligned sl@0: asm(" rsb r4, r4, #32 "); // r4 = bytes to align dest to 32 sl@0: asm(" cmp r2, r4 "); // check if length big enough to align to 32 sl@0: asm(" blo copy_fwd_remainder "); // skip if too small sl@0: asm(" sub r2, r2, r4 "); // adjust length sl@0: asm(" rsb r3, r12, #32 "); // r3 = 32 - 8*source alignment sl@0: sl@0: asm("1: "); sl@0: asm(" mov r5, r11, lsr r12 "); // r5 = part of previous source word required to make destination word sl@0: asm(" ldr r11, [r1], #4 "); // get next word sl@0: asm(" subs r4, r4, #4 "); // 4 bytes less to do sl@0: asm(" orr r5, r5, r11, lsl r3 "); // form next destination word sl@0: asm(" str r5, [r0], #4 "); // and store it sl@0: asm(" bne 1b "); // loop until destination 32 byte aligned sl@0: sl@0: asm("medium_unal_copy: "); // destination now aligned to 32 bytes sl@0: asm(" movs lr, r2, lsr #5 "); // lr=number of 32-byte blocks sl@0: asm(" beq copy_fwd_remainder "); // skip if length < 32 sl@0: sl@0: asm(" cmp r12, #16 "); sl@0: asm(" beq copy_fwd_nonaligned_2 "); // branch if source = 2 mod 4 sl@0: asm(" bhi copy_fwd_nonaligned_3 "); // branch if source = 3 mod 4, else source = 1 mod 4 sl@0: sl@0: // source = 1 mod 4 sl@0: asm("copy_fwd_nonaligned_1: "); sl@0: asm(" mov r3, r11, lsr #8 "); sl@0: asm(" ldmia r1!, {r4-r11} "); sl@0: PLD_ioff(1, 32); sl@0: asm(" subs lr, lr, #1 "); sl@0: asm(" orr r3, r3, r4, lsl #24 "); sl@0: asm(" mov r4, r4, lsr #8 "); sl@0: asm(" orr r4, r4, r5, lsl #24 "); sl@0: asm(" mov r5, r5, lsr #8 "); sl@0: asm(" orr r5, r5, r6, lsl #24 "); sl@0: asm(" mov r6, r6, lsr #8 "); sl@0: asm(" orr r6, r6, r7, lsl #24 "); sl@0: asm(" mov r7, r7, lsr #8 "); sl@0: asm(" orr r7, r7, r8, lsl #24 "); sl@0: asm(" mov r8, r8, lsr #8 "); sl@0: asm(" orr r8, r8, r9, lsl #24 "); sl@0: asm(" mov r9, r9, lsr #8 "); sl@0: asm(" orr r9, r9, r10, lsl #24 "); sl@0: asm(" mov r10, r10, lsr #8 "); sl@0: asm(" orr r10, r10, r11, lsl #24 "); sl@0: asm(" stmia r0!, {r3-r10} "); sl@0: asm(" bne copy_fwd_nonaligned_1 "); sl@0: asm(" b copy_fwd_remainder "); sl@0: sl@0: // source = 2 mod 4 sl@0: asm("copy_fwd_nonaligned_2: "); sl@0: asm(" mov r3, r11, lsr #16 "); sl@0: asm(" ldmia r1!, {r4-r11} "); sl@0: PLD_ioff(1, 32); sl@0: asm(" subs lr, lr, #1 "); sl@0: asm(" orr r3, r3, r4, lsl #16 "); sl@0: asm(" mov r4, r4, lsr #16 "); sl@0: asm(" orr r4, r4, r5, lsl #16 "); sl@0: asm(" mov r5, r5, lsr #16 "); sl@0: asm(" orr r5, r5, r6, lsl #16 "); sl@0: asm(" mov r6, r6, lsr #16 "); sl@0: asm(" orr r6, r6, r7, lsl #16 "); sl@0: asm(" mov r7, r7, lsr #16 "); sl@0: asm(" orr r7, r7, r8, lsl #16 "); sl@0: asm(" mov r8, r8, lsr #16 "); sl@0: asm(" orr r8, r8, r9, lsl #16 "); sl@0: asm(" mov r9, r9, lsr #16 "); sl@0: asm(" orr r9, r9, r10, lsl #16 "); sl@0: asm(" mov r10, r10, lsr #16 "); sl@0: asm(" orr r10, r10, r11, lsl #16 "); sl@0: asm(" stmia r0!, {r3-r10} "); sl@0: asm(" bne copy_fwd_nonaligned_2 "); sl@0: asm(" b copy_fwd_remainder "); sl@0: sl@0: // source = 3 mod 4 sl@0: asm("copy_fwd_nonaligned_3: "); sl@0: asm(" mov r3, r11, lsr #24 "); sl@0: asm(" ldmia r1!, {r4-r11} "); sl@0: PLD_ioff(1, 32); sl@0: asm(" subs lr, lr, #1 "); sl@0: asm(" orr r3, r3, r4, lsl #8 "); sl@0: asm(" mov r4, r4, lsr #24 "); sl@0: asm(" orr r4, r4, r5, lsl #8 "); sl@0: asm(" mov r5, r5, lsr #24 "); sl@0: asm(" orr r5, r5, r6, lsl #8 "); sl@0: asm(" mov r6, r6, lsr #24 "); sl@0: asm(" orr r6, r6, r7, lsl #8 "); sl@0: asm(" mov r7, r7, lsr #24 "); sl@0: asm(" orr r7, r7, r8, lsl #8 "); sl@0: asm(" mov r8, r8, lsr #24 "); sl@0: asm(" orr r8, r8, r9, lsl #8 "); sl@0: asm(" mov r9, r9, lsr #24 "); sl@0: asm(" orr r9, r9, r10, lsl #8 "); sl@0: asm(" mov r10, r10, lsr #24 "); sl@0: asm(" orr r10, r10, r11, lsl #8 "); sl@0: asm(" stmia r0!, {r3-r10} "); sl@0: asm(" bne copy_fwd_nonaligned_3 "); sl@0: sl@0: // <32 bytes to go, source alignment could be 1, 2 or 3 mod 4 sl@0: // r12 = 8 * (source mod 4) sl@0: asm("copy_fwd_remainder: "); sl@0: asm(" ands r4, r2, #0x1c "); // r4 = 4*number of words left sl@0: asm(" beq 2f "); // skip if none sl@0: asm(" rsb r3, r12, #32 "); // r3 = 32 - 8*source alignment sl@0: sl@0: asm("1: "); sl@0: asm(" mov r5, r11, lsr r12 "); // r5 = part of previous source word required to make destination word sl@0: asm(" ldr r11, [r1], #4 "); // get next word sl@0: asm(" subs r4, r4, #4 "); // 4 bytes less to do sl@0: asm(" orr r5, r5, r11, lsl r3 "); // form next destination word sl@0: asm(" str r5, [r0], #4 "); // and store it sl@0: asm(" bne 1b "); // loop until destination 32 byte aligned sl@0: sl@0: asm("2: "); sl@0: asm(" sub r1, r1, #4 "); sl@0: asm(" add r1, r1, r12, lsr #3 "); // r1 = real unaligned source address sl@0: asm(" tst r2, #2 "); // 2 bytes left? sl@0: asm(" ldrneb r5, [r1], #1 "); // copy 2 sl@0: asm(" strneb r5, [r0], #1 "); sl@0: asm(" ldrneb r5, [r1], #1 "); sl@0: asm(" strneb r5, [r0], #1 "); sl@0: asm(" tst r2, #1 "); // 1 byte left? sl@0: asm(" ldrneb r5, [r1], #1 "); // copy 1 sl@0: asm(" strneb r5, [r0], #1 "); sl@0: __POPRET("r0,r4-r11,"); sl@0: sl@0: sl@0: // sl@0: // Source is before destination and they overlap, so need to copy backwards sl@0: // sl@0: sl@0: asm("copy_back:"); sl@0: asm(" add r0, r0, r2 "); // r0=last dest address+1 sl@0: asm(" add r1, r1, r2 "); // r1=last source address+1 sl@0: PLD_noff(1, 33); // preload last two cache lines sl@0: PLD_noff(1, 1); sl@0: sl@0: asm(" movs r3, r0, lsl #30 "); // check destination word aligned sl@0: asm(" bne dest_unaligned_back "); sl@0: sl@0: asm("dest_aligned_back: "); sl@0: asm(" ands r12, r1, #3 "); // r12=alignment of source sl@0: asm(" bne copy_back_nonaligned "); sl@0: sl@0: // sl@0: // Backwards copying, addresses both word aligned, at least 13 bytes to go sl@0: // sl@0: sl@0: asm("mem_move_back:"); sl@0: // sl@0: // superalign sl@0: // sl@0: asm(" movs r4, r0, lsl #27 "); // bytes required to align destination to 32 sl@0: asm(" beq bal_already_aligned "); // skip if already aligned to 32 sl@0: asm(" cmp r2, r4, lsr #27 "); // check that many remaining sl@0: asm(" blo its_smaller_back "); // if too short, just stick with word alignment sl@0: asm(" msr cpsr_flg, r4 "); // destination alignment into N, Z, C flags sl@0: // do word moves to align destination sl@0: asm(" ldrcs lr, [r1, #-4]! "); // C flag == 1 word (we are already word aligned) sl@0: asm(" ldmeqdb r1!, {r3,r9} "); // Z flag == 2 words sl@0: asm(" ldmmidb r1!, {r5-r8} "); sl@0: asm(" sub r2, r2, r4, lsr #27 "); // adjust length sl@0: asm(" strcs lr, [r0, #-4]! "); // destination now 8 byte aligned sl@0: asm(" stmeqdb r0!, {r3,r9} "); // destination now 16 byte aligned sl@0: asm(" stmmidb r0!, {r5-r8} "); // N flag == 4 words, destination now 32 byte aligned sl@0: sl@0: asm("bal_already_aligned: "); sl@0: asm(" cmp r2, #64 "); sl@0: asm(" bhs large_copy_back "); sl@0: // sl@0: // Less than 64 bytes to go sl@0: // sl@0: asm("its_smaller_back: "); sl@0: asm(" movs ip, r2, lsl #26 "); // r2 = remaining length (<256) << 24 sl@0: asm(" beq mem_copy_end2 "); // skip if remaining length zero sl@0: asm(" msr cpsr_flg, ip "); sl@0: asm(" ldmmidb r1!, {r3-r10} "); sl@0: asm(" stmmidb r0!, {r3-r10} "); // copy 32 sl@0: asm(" ldmeqdb r1!, {r3-r6} "); sl@0: asm(" ldmcsdb r1!, {r7,r8} "); sl@0: asm(" ldrvs r9, [r1, #-4]! "); sl@0: asm(" stmeqdb r0!, {r3-r6} "); // copy 16 sl@0: asm(" stmcsdb r0!, {r7,r8} "); // copy 8 sl@0: asm(" strvs r9, [r0, #-4]! "); // copy 4 sl@0: sl@0: asm(" movs ip, r2, lsl #30 "); sl@0: asm(" bne smallest_copy_back "); sl@0: sl@0: asm("mem_copy_end2: "); sl@0: __POPRET("r0,r4-r11,"); sl@0: sl@0: sl@0: // sl@0: // Less than 4 bytes to go... sl@0: // sl@0: sl@0: asm("smallest_copy_back: "); sl@0: asm(" msr cpsr_flg, ip "); sl@0: asm(" ldrmih r3, [r1, #-2]! "); sl@0: asm(" ldreqb r4, [r1, #-1]! "); sl@0: asm(" strmih r3, [r0, #-2]! "); // copy 2 sl@0: asm(" streqb r4, [r0, #-1]! "); // copy 1 sl@0: __POPRET("r0,r4-r11,"); sl@0: sl@0: sl@0: // sl@0: // Do byte moves if necessary to word-align destination sl@0: // sl@0: asm("dest_unaligned_back: "); sl@0: asm(" msr cpsr_flg, r3 "); // destination alignment in r3 into N,Z flags sl@0: asm(" ldrmib r4, [r1, #-1]! "); // do byte moves to align destination sl@0: asm(" ldrmib r5, [r1, #-1]! "); sl@0: asm(" ldreqb r6, [r1, #-1]! "); sl@0: asm(" sub r2, r2, r3, lsr #30 "); // adjust length, at least 13 bytes remaining sl@0: asm(" strmib r4, [r0, #-1]! "); sl@0: asm(" strmib r5, [r0, #-1]! "); sl@0: asm(" streqb r6, [r0, #-1]! "); sl@0: asm(" b dest_aligned_back "); sl@0: sl@0: sl@0: // sl@0: // Large backwards copy, length >= 64 sl@0: // sl@0: sl@0: asm("large_copy_back: "); sl@0: asm(" movs ip, r2, lsr #6 "); sl@0: asm("1: "); sl@0: PLD_noff(1, 65); sl@0: PLD_noff(1, 33); sl@0: asm(" ldmdb r1!, {r3-r10} "); // Copy 64 sl@0: asm(" stmdb r0!, {r3-r10} "); sl@0: asm(" ldmdb r1!, {r3-r10} "); sl@0: asm(" subs ip, ip, #1 "); sl@0: asm(" stmdb r0!, {r3-r10} "); sl@0: asm(" bne 1b "); sl@0: asm(" and r2, r2, #63 "); sl@0: asm(" b its_smaller_back "); sl@0: sl@0: // sl@0: // Backwards unlaigned copy sl@0: // sl@0: sl@0: asm("copy_back_nonaligned: "); sl@0: // sl@0: // superalign sl@0: // sl@0: asm(" bic r1, r1, #3 "); // align source sl@0: asm(" ldr r3, [r1] "); // get first word sl@0: asm(" mov r12, r12, lsl #3 "); // r12 = 8*source alignment sl@0: asm(" ands r4, r0, #31 "); // r4 = bytes to align dest to 32 sl@0: asm(" beq bunal_already_aligned "); // skip if already aligned sl@0: asm(" cmp r2, r4 "); // check if length big enough to align to 32 sl@0: asm(" blo copy_back_remainder "); // skip if too small sl@0: asm(" sub r2, r2, r4 "); // adjust length sl@0: asm(" rsb r6, r12, #32 "); // r6 = 32 - 8*source alignment sl@0: sl@0: asm("1: "); sl@0: asm(" mov r5, r3, lsl r6 "); // r5 = part of previous source word required to make destination word sl@0: asm(" ldr r3, [r1, #-4]! "); // get next word sl@0: asm(" subs r4, r4, #4 "); // 4 bytes less to do sl@0: asm(" orr r5, r5, r3, lsr r12 "); // form next destination word sl@0: asm(" str r5, [r0, #-4]! "); // and store it sl@0: asm(" bne 1b "); // loop until destination 32 byte aligned sl@0: sl@0: asm("bunal_already_aligned: "); // destination now aligned to 32 bytes sl@0: asm(" movs lr, r2, lsr #5 "); // lr=number of 32-byte blocks sl@0: asm(" beq copy_back_remainder "); // skip if length < 32 sl@0: sl@0: asm(" cmp r12, #16 "); sl@0: asm(" beq copy_back_nonaligned_2 "); // branch if source = 2 mod 4 sl@0: asm(" bhi copy_back_nonaligned_3 "); // branch if source = 3 mod 4, else source = 1 mod 4 sl@0: sl@0: // source = 1 mod 4 sl@0: asm("copy_back_nonaligned_1: "); sl@0: asm(" mov r11, r3, lsl #24 "); sl@0: asm(" ldmdb r1!, {r3-r10} "); sl@0: PLD_noff(1, 64); sl@0: asm(" orr r11, r11, r10, lsr #8 "); sl@0: asm(" mov r10, r10, lsl #24 "); sl@0: asm(" orr r10, r10, r9, lsr #8 "); sl@0: asm(" mov r9, r9, lsl #24 "); sl@0: asm(" orr r9, r9, r8, lsr #8 "); sl@0: asm(" mov r8, r8, lsl #24 "); sl@0: asm(" orr r8, r8, r7, lsr #8 "); sl@0: asm(" mov r7, r7, lsl #24 "); sl@0: asm(" orr r7, r7, r6, lsr #8 "); sl@0: asm(" mov r6, r6, lsl #24 "); sl@0: asm(" orr r6, r6, r5, lsr #8 "); sl@0: asm(" mov r5, r5, lsl #24 "); sl@0: asm(" orr r5, r5, r4, lsr #8 "); sl@0: asm(" mov r4, r4, lsl #24 "); sl@0: asm(" orr r4, r4, r3, lsr #8 "); sl@0: asm(" stmdb r0!, {r4-r11} "); sl@0: asm(" subs lr, lr, #1 "); sl@0: asm(" bne copy_back_nonaligned_1 "); sl@0: asm(" b copy_back_remainder "); sl@0: sl@0: // source = 2 mod 4 sl@0: asm("copy_back_nonaligned_2: "); sl@0: asm(" mov r11, r3, lsl #16 "); sl@0: asm(" ldmdb r1!, {r3-r10} "); sl@0: PLD_noff(1, 64); sl@0: asm(" orr r11, r11, r10, lsr #16 "); sl@0: asm(" mov r10, r10, lsl #16 "); sl@0: asm(" orr r10, r10, r9, lsr #16 "); sl@0: asm(" mov r9, r9, lsl #16 "); sl@0: asm(" orr r9, r9, r8, lsr #16 "); sl@0: asm(" mov r8, r8, lsl #16 "); sl@0: asm(" orr r8, r8, r7, lsr #16 "); sl@0: asm(" mov r7, r7, lsl #16 "); sl@0: asm(" orr r7, r7, r6, lsr #16 "); sl@0: asm(" mov r6, r6, lsl #16 "); sl@0: asm(" orr r6, r6, r5, lsr #16 "); sl@0: asm(" mov r5, r5, lsl #16 "); sl@0: asm(" orr r5, r5, r4, lsr #16 "); sl@0: asm(" mov r4, r4, lsl #16 "); sl@0: asm(" orr r4, r4, r3, lsr #16 "); sl@0: asm(" stmdb r0!, {r4-r11} "); sl@0: asm(" subs lr, lr, #1 "); sl@0: asm(" bne copy_back_nonaligned_2 "); sl@0: asm(" b copy_back_remainder "); sl@0: sl@0: // source = 3 mod 4 sl@0: asm("copy_back_nonaligned_3: "); sl@0: asm(" mov r11, r3, lsl #8 "); sl@0: asm(" ldmdb r1!, {r3-r10} "); sl@0: PLD_noff(1, 64); sl@0: asm(" orr r11, r11, r10, lsr #24 "); sl@0: asm(" mov r10, r10, lsl #8 "); sl@0: asm(" orr r10, r10, r9, lsr #24 "); sl@0: asm(" mov r9, r9, lsl #8 "); sl@0: asm(" orr r9, r9, r8, lsr #24 "); sl@0: asm(" mov r8, r8, lsl #8 "); sl@0: asm(" orr r8, r8, r7, lsr #24 "); sl@0: asm(" mov r7, r7, lsl #8 "); sl@0: asm(" orr r7, r7, r6, lsr #24 "); sl@0: asm(" mov r6, r6, lsl #8 "); sl@0: asm(" orr r6, r6, r5, lsr #24 "); sl@0: asm(" mov r5, r5, lsl #8 "); sl@0: asm(" orr r5, r5, r4, lsr #24 "); sl@0: asm(" mov r4, r4, lsl #8 "); sl@0: asm(" orr r4, r4, r3, lsr #24 "); sl@0: asm(" stmdb r0!, {r4-r11} "); sl@0: asm(" subs lr, lr, #1 "); sl@0: asm(" bne copy_back_nonaligned_3 "); sl@0: sl@0: // <32 bytes to go, source alignment could be 1, 2 or 3 mod 4 sl@0: // r12 = 8 * (source mod 4) sl@0: asm("copy_back_remainder: "); sl@0: asm(" ands r4, r2, #0x1c "); // r4 = 4*number of words left sl@0: asm(" beq 2f "); // skip if none sl@0: asm(" rsb r6, r12, #32 "); // r6 = 32 - 8*source alignment sl@0: sl@0: asm("1: "); sl@0: asm(" mov r5, r3, lsl r6 "); // r5 = part of previous source word required to make destination word sl@0: asm(" ldr r3, [r1, #-4]! "); // get next word sl@0: asm(" subs r4, r4, #4 "); // 4 bytes less to do sl@0: asm(" orr r5, r5, r3, lsr r12 "); // form next destination word sl@0: asm(" str r5, [r0, #-4]! "); // and store it sl@0: asm(" bne 1b "); // loop until destination 32 byte aligned sl@0: sl@0: asm("2: "); sl@0: asm(" add r1, r1, r12, lsr #3 "); // r1 = real unaligned source address sl@0: asm(" tst r2, #2 "); // 2 bytes left? sl@0: asm(" ldrneb r3, [r1, #-1]! "); // copy 2 sl@0: asm(" strneb r3, [r0, #-1]! "); sl@0: asm(" ldrneb r3, [r1, #-1]! "); sl@0: asm(" strneb r3, [r0, #-1]! "); sl@0: asm(" tst r2, #1 "); // 1 byte left? sl@0: asm(" ldrneb r3, [r1, #-1]! "); // copy 1 sl@0: asm(" strneb r3, [r0, #-1]! "); sl@0: __POPRET("r0,r4-r11,"); sl@0: } sl@0: sl@0: #endif // USE_REPLACEMENT_MEMCPY sl@0: sl@0: sl@0: #ifndef __KERNEL_MODE__ sl@0: #ifdef __GCC32__ sl@0: /** sl@0: Compares a block of data at one specified location with a block of data at sl@0: another specified location. sl@0: sl@0: The comparison proceeds on a byte for byte basis, the result of the comparison sl@0: is based on the difference of the first bytes to disagree. sl@0: sl@0: The data at the two locations are equal if they have the same length and content. sl@0: Where the lengths are different and the shorter section of data is the same sl@0: as the first part of the longer section of data, the shorter is considered sl@0: to be less than the longer. sl@0: sl@0: @param aLeft A pointer to the first (or left) block of 8 bit data sl@0: to be compared. sl@0: @param aLeftL The length of the first (or left) block of data to be compared, sl@0: i.e. the number of bytes. sl@0: @param aRight A pointer to the second (or right) block of 8 bit data to be sl@0: compared. sl@0: @param aRightL The length of the second (or right) block of data to be compared sl@0: i.e. the number of bytes. sl@0: sl@0: @return Positive, if the first (or left) block of data is greater than the sl@0: second (or right) block of data. sl@0: Negative, if the first (or left) block of data is less than the sl@0: second (or right) block of data. sl@0: Zero, if both the first (or left) and second (or right) blocks of data sl@0: have the same length and the same content. sl@0: */ sl@0: EXPORT_C __NAKED__ TInt Mem::Compare(const TUint8* /*aLeft*/, TInt /*aLeftL*/, const TUint8* /*aRight*/, TInt /*aRightL*/) sl@0: { sl@0: // fall through sl@0: } sl@0: #endif sl@0: #endif sl@0: sl@0: sl@0: sl@0: // See header file e32cmn.h for the in-source documentation. sl@0: extern "C" EXPORT_C __NAKED__ TInt memcompare(const TUint8* /*aLeft*/, TInt /*aLeftL*/, const TUint8* /*aRight*/, TInt /*aRightL*/) sl@0: // sl@0: // Compares until the smaller of the two lengths is reached. sl@0: // If the lengths differ, returns leftlen-rightlen sl@0: // If a difference is encountered, returns left byte-right byte sl@0: // sl@0: { sl@0: sl@0: asm(" stmfd sp!,{r4,r5,r6,lr}"); sl@0: asm(" mov r4,r0"); sl@0: // sl@0: // Get the shorter of the two lengths, and check for zero length sl@0: // sl@0: asm(" cmp r1,r3"); sl@0: asm(" mov r6,r1"); sl@0: asm(" movge r6,r3"); sl@0: asm(" cmp r6,#0"); sl@0: asm(" beq compare_done"); sl@0: asm(" cmp r6,#16"); sl@0: // sl@0: // Check for aligned buffers for faster comparing if more than 16 bytes sl@0: // sl@0: asm(" andge r0,r4,#3"); sl@0: asm(" andge r5,r2,#3"); sl@0: asm(" addlt r0,r5,#1"); sl@0: asm(" cmp r0,r5"); sl@0: asm(" beq aligned_compare"); sl@0: // sl@0: // Get aLeft+Min(aLeftL,aRightL) sl@0: // sl@0: asm(" add r6,r4,r6"); sl@0: sl@0: asm("compare_loop:"); sl@0: asm(" ldrb r0,[r4],#1"); sl@0: asm(" ldrb r5,[r2],#1"); sl@0: asm(" subs r0,r0,r5"); sl@0: asm("bne compare_exit "); sl@0: asm(" cmp r4,r6"); sl@0: asm(" beq compare_done"); sl@0: sl@0: asm(" ldrb r0,[r4],#1"); sl@0: asm(" ldrb r5,[r2],#1"); sl@0: asm(" subs r0,r0,r5"); sl@0: asm("bne compare_exit "); sl@0: asm(" cmp r4,r6"); sl@0: asm(" beq compare_done"); sl@0: sl@0: asm(" ldrb r0,[r4],#1"); sl@0: asm(" ldrb r5,[r2],#1"); sl@0: asm(" subs r0,r0,r5"); sl@0: asm("bne compare_exit "); sl@0: asm(" cmp r4,r6"); sl@0: asm(" beq compare_done"); sl@0: sl@0: asm(" ldrb r0,[r4],#1"); sl@0: asm(" ldrb r5,[r2],#1"); sl@0: asm(" subs r0,r0,r5"); sl@0: asm("bne compare_exit "); sl@0: asm(" cmp r4,r6"); sl@0: asm(" bne compare_loop"); sl@0: // sl@0: // Return difference of lengths sl@0: // sl@0: asm("compare_done:"); sl@0: asm(" sub r0,r1,r3"); sl@0: sl@0: asm("compare_exit:"); sl@0: __POPRET("r4-r6,"); sl@0: // sl@0: // Compare byte at a time until word aligned... sl@0: // sl@0: asm("aligned_compare:"); sl@0: // sl@0: // Get number of bytes to compare before word alignment reached...and jump to appropriate point sl@0: // sl@0: asm(" mov ip,r6"); sl@0: asm(" add r6,r4,r6"); sl@0: asm(" subs r0,r0,#1"); sl@0: asm(" movmi r0,#3"); sl@0: asm(" rsb r5,r0,#3"); sl@0: asm(" sub ip,ip,r5"); sl@0: asm(" mov ip,ip,lsr #2"); sl@0: asm(" add pc,pc,r0,asl #4"); sl@0: asm(" b compare_done"); // Never executed sl@0: // sl@0: // Jump here if alignment is 1. Do not use more than 4 instructions without altering above relative jump sl@0: // sl@0: asm(" ldrb r0,[r4],#1"); sl@0: asm(" ldrb r5,[r2],#1"); sl@0: asm(" subs r0,r0,r5"); sl@0: asm("bne compare_exit "); sl@0: // sl@0: // Jump here if alignment is 2. Do not use more than 4 instructions without altering above relative jump sl@0: // sl@0: asm(" ldrb r0,[r4],#1"); sl@0: asm(" ldrb r5,[r2],#1"); sl@0: asm(" subs r0,r0,r5"); sl@0: asm("bne compare_exit "); sl@0: // sl@0: // Jump here if alignment is 3. Do not use more than 4 instructions without altering above relative jump sl@0: // sl@0: asm(" ldrb r0,[r4],#1"); sl@0: asm(" ldrb r5,[r2],#1"); sl@0: asm(" subs r0,r0,r5"); sl@0: asm("bne compare_exit "); sl@0: // sl@0: // Must now be word aligned sl@0: // sl@0: asm("aligned_compare_loop:"); sl@0: asm(" ldr r0,[r4],#4"); sl@0: asm(" ldr r5,[r2],#4"); sl@0: asm(" eors r0,r0,r5"); sl@0: asm(" bne word_different"); sl@0: asm(" subs ip,ip,#1"); sl@0: asm(" bne aligned_compare_loop"); sl@0: // sl@0: // Less than 4 bytes to go... sl@0: // sl@0: asm(" cmp r4,r6"); sl@0: asm(" bne compare_loop"); sl@0: asm(" sub r0,r1,r3"); sl@0: __POPRET("r4-r6,"); sl@0: // sl@0: // A difference encountered while word comparing, find out which byte it was sl@0: // sl@0: asm("word_different:"); sl@0: asm(" ldrb r0,[r4,#-4]"); sl@0: asm(" ldrb r5,[r2,#-4]"); sl@0: asm(" subs r0,r0,r5"); sl@0: asm("bne compare_exit "); sl@0: asm(" ldrb r0,[r4,#-3]"); sl@0: asm(" ldrb r5,[r2,#-3]"); sl@0: asm(" subs r0,r0,r5"); sl@0: asm("bne compare_exit "); sl@0: asm(" ldrb r0,[r4,#-2]"); sl@0: asm(" ldrb r5,[r2,#-2]"); sl@0: asm(" subs r0,r0,r5"); sl@0: asm("bne compare_exit "); sl@0: // sl@0: // This must be the different byte... sl@0: // sl@0: asm(" ldrb r0,[r4,#-1]"); sl@0: asm(" ldrb r5,[r2,#-1]"); sl@0: asm(" sub r0,r0,r5"); sl@0: __POPRET("r4-r6,"); sl@0: } sl@0: #endif sl@0: