1,5 → 1,5 |
# |
# Copyright (c) 2006 Martin Decky |
# Copyright (c) 2007 Michal Kebrt |
# All rights reserved. |
# |
# Redistribution and use in source and binary forms, with or without |
29,147 → 29,58 |
|
.text |
|
.align 2 |
|
|
different_aligns: |
/* We must do byte copies */ |
stmfd r13!, {r0, lr } |
byte_loop: |
ldrb r3, [r1], #1 |
strb r3, [r0], #1 |
subs r2, r2, #1 |
bne byte_loop |
ldmfd r13!, {r0, pc } |
.global memcpy |
|
.global memcpy |
.type memcpy, %function |
memcpy: |
@void* memcpy( void* dst, const void* src, size_t len ) |
|
@@ Register usage: |
@@ r0: dst |
@@ r1: src |
@@ r2: len |
@@ |
@@ r3: various bitmasks, load and store for different_aligns loop |
@@ r4: counter for multi loop, not used for different_aligns |
@@ r5-r8: load and store, not used for different_aligns |
add r3, r1, #3 |
bic r3, r3, #3 |
cmp r1, r3 |
stmdb sp!, {r4, lr} |
beq case_4 |
case_1: |
cmp r2, #0 |
movne ip, #0 |
beq case_3 |
case_2: |
ldrb r3, [ip, r1] |
strb r3, [ip, r0] |
add ip, ip, #1 |
cmp ip, r2 |
bne case_2 |
case_3: |
mov r0, r1 |
ldmia sp!, {r4, pc} |
case_4: |
add r3, r0, #3 |
bic r3, r3, #3 |
cmp r0, r3 |
bne case_1 |
movs r4, r2, lsr #2 |
moveq lr, r4 |
beq case_6 |
mov lr, #0 |
mov ip, lr |
case_5: |
ldr r3, [ip, r1] |
add lr, lr, #1 |
cmp lr, r4 |
str r3, [ip, r0] |
add ip, ip, #4 |
bne case_5 |
case_6: |
ands r4, r2, #3 |
beq case_3 |
mov r3, lr, lsl #2 |
add r0, r3, r0 |
add ip, r3, r1 |
mov r2, #0 |
case_7: |
ldrb r3, [r2, ip] |
strb r3, [r2, r0] |
add r2, r2, #1 |
cmp r2, r4 |
bne case_7 |
b case_3 |
|
cmp r2, #0 |
moveq pc, lr @ just return if caller wants to copy zero bytes |
|
cmp r2, #8 |
bls different_aligns |
|
/*check for src alignment*/ |
eor r3, r0, r1 @ r3 = dest | src |
tst r3, #3 @ test for same alignment |
bne different_aligns @ jump if align( r1 ) != align( ro ) |
|
/* else, they have the same same alignment */ |
stmfd r13!, {r0, r4-r8, lr } @ save regs |
ands r3, r0, #3 @ find out what that alignment is |
beq multi @ iff align( r1 ) == align( r0 ) == 0 skip to multi |
|
/* otherwise, move up to three bytes to get to a word alignment |
if align = 1, we need to move forward 3 bytes to get to a word boundry |
if align = 2, we need to move forward 2 bytes to get to a word boundry |
if align = 3, we need to move forward 1 byte to get to a word boundry |
*/ |
cmp r3, #2 @ "subtract" 2 from either 1, 2, or 3 |
ldrneb r4, [r1], #1 @ one aligned or three aligned ( 1 != 2 || 3 !- 2 ), ((byte*) src)++ |
strneb r4, [r0], #1 @ one aligned or three aligned ( 1 != 2 || 3 !- 2 ), store to *dst++ |
@ldrlsh r4, [r1], #2 @ one or two ( 1 <= 2 || 2 <= 2 ) aligned, ((halfword*) src)++ |
@strlsh r4, [r0], #2 @ one or two ( 1 <= 2 || 2 <= 2 ) aligned, store to *dst++ |
|
ldrlsb r4, [r1], #1 @ one aligned or two aligned, ((byte*) src)++ |
strlsb r4, [r0], #1 @ one aligned or two aligned, store to *dst++ |
ldrlsb r4, [r1], #1 @ one aligned or two aligned, ((byte*) src)++ |
strlsb r4, [r0], #1 @ one aligned or two aligned, store to *dst++ |
|
sub r3, r3, #4 |
add r2, r2, r3 @ length -= bytes written |
|
multi: |
/* once we get here, we're word aligned */ |
|
/* |
bytes = length |
words = byte / 4, rem = byte moves |
instr = quadword = words / 4, rem = partial instructions |
loop = instr / 4, rem = jump to instr |
|
010101010 |
llliiwwbb |
|
21 |
52631 |
684268421 |
*/ |
|
ands r3, r2, #48 @ #32 | #16, r3 = partial loop count << 3 |
mov r4, r2, LSR #6 @ r4 = loop count |
|
/* Now, like Duff's device, jump into the loop to perform the extra instructions */ |
/* Replace later with direct adjustment of PC */ |
beq loop_test |
cmp r3, #32 @r3 = 16 or 32 or 48, corresponding to 1 or 2 or 3 |
bhi loop3 |
beq loop2 |
blo loop1 |
|
loop: |
ldmia r1!, { r5-r8 } @load four registers |
stmia r0!, { r5-r8 } @store four registers |
loop3: |
ldmia r1!, { r5-r8 } @load four registers |
stmia r0!, { r5-r8 } @store four registers |
loop2: |
ldmia r1!, { r5-r8 } @load four registers |
stmia r0!, { r5-r8 } @store four registers |
loop1: |
ldmia r1!, { r5-r8 } @load four registers |
stmia r0!, { r5-r8 } @store four registers |
|
loop_test: |
cmp r4, #0 |
subne r4, r4, #1 |
bne loop |
|
/* Now do the extra words */ |
ands r3, r2, #12 @ #8 | #4, r3 = extra words << 2 |
beq extra_bytes |
cmp r3, #8 |
ldrne r5, [r1], #4 @ if r5 !=8 then it's 4 or 12, so load and store |
strne r5, [r0], #4 |
ldmhsia r1!, {r5-r6} @ if r5 >= 8, it's 8 or 12, so load and store 2 |
stmhsia r0!, {r5-r6} |
|
/* Now do the extra bytes */ |
extra_bytes: |
tst r2, #2 @ any extra bytes? |
beq clean_up |
cmp r3, #2 @ subtract 2 from either 1, 2, or 3 |
ldrneb r5, [r1], #1 @ load and store one byte iff r3 != 2 (i.e, r3 == 1 || r3 == 3) |
strneb r5, [r0], #1 @ 1 or 3 |
ldrhsb r5, [r1], #1 @ load and store a byte iff r3 >= 2 |
strhsb r5, [r0], #1 @ 2 or 3 |
ldrhsb r5, [r1], #1 @ load and store a byte iff r3 > 2 |
strhsb r5, [r0], #1 @ 2 or 3 |
|
clean_up: |
ldmfd r13!, {r0, r4-r8, pc } @r0 is the retval, must equal original dst |
|
end: |
.size different_aligns, .end-memcpy |
.align 2 |
|
@ Local Variables: |
@ asm-comment-char: ?@ |
@ comment-start: "@ " |
@ block-comment-start: "/*" |
@ block-comment-end: "*/" |
@ indent-tabs-mode: t |
@ End: |
|