Subversion Repositories HelenOS

Compare Revisions

Ignore whitespace Rev 2152 → Rev 2153

/branches/arm/boot/arch/arm32/loader/asm.S
1,5 → 1,5
#
# Copyright (c) 2006 Martin Decky
# Copyright (c) 2007 Michal Kebrt
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
29,147 → 29,58
 
.text
 
.align 2
different_aligns:
/* We must do byte copies */
stmfd r13!, {r0, lr }
byte_loop:
ldrb r3, [r1], #1
strb r3, [r0], #1
subs r2, r2, #1
bne byte_loop
ldmfd r13!, {r0, pc }
.global memcpy
 
.global memcpy
.type memcpy, %function
memcpy:
@void* memcpy( void* dst, const void* src, size_t len )
 
@@ Register usage:
@@ r0: dst
@@ r1: src
@@ r2: len
@@
@@ r3: various bitmasks, load and store for different_aligns loop
@@ r4: counter for multi loop, not used for different_aligns
@@ r5-r8: load and store, not used for different_aligns
add r3, r1, #3
bic r3, r3, #3
cmp r1, r3
stmdb sp!, {r4, lr}
beq case_4
case_1:
cmp r2, #0
movne ip, #0
beq case_3
case_2:
ldrb r3, [ip, r1]
strb r3, [ip, r0]
add ip, ip, #1
cmp ip, r2
bne case_2
case_3:
mov r0, r1
ldmia sp!, {r4, pc}
case_4:
add r3, r0, #3
bic r3, r3, #3
cmp r0, r3
bne case_1
movs r4, r2, lsr #2
moveq lr, r4
beq case_6
mov lr, #0
mov ip, lr
case_5:
ldr r3, [ip, r1]
add lr, lr, #1
cmp lr, r4
str r3, [ip, r0]
add ip, ip, #4
bne case_5
case_6:
ands r4, r2, #3
beq case_3
mov r3, lr, lsl #2
add r0, r3, r0
add ip, r3, r1
mov r2, #0
case_7:
ldrb r3, [r2, ip]
strb r3, [r2, r0]
add r2, r2, #1
cmp r2, r4
bne case_7
b case_3
 
cmp r2, #0
moveq pc, lr @ just return if caller wants to copy zero bytes
cmp r2, #8
bls different_aligns
/*check for src alignment*/
eor r3, r0, r1 @ r3 = dest | src
tst r3, #3 @ test for same alignment
bne different_aligns @ jump if align( r1 ) != align( ro )
 
/* else, they have the same same alignment */
stmfd r13!, {r0, r4-r8, lr } @ save regs
ands r3, r0, #3 @ find out what that alignment is
beq multi @ iff align( r1 ) == align( r0 ) == 0 skip to multi
/* otherwise, move up to three bytes to get to a word alignment
if align = 1, we need to move forward 3 bytes to get to a word boundry
if align = 2, we need to move forward 2 bytes to get to a word boundry
if align = 3, we need to move forward 1 byte to get to a word boundry
*/
cmp r3, #2 @ "subtract" 2 from either 1, 2, or 3
ldrneb r4, [r1], #1 @ one aligned or three aligned ( 1 != 2 || 3 !- 2 ), ((byte*) src)++
strneb r4, [r0], #1 @ one aligned or three aligned ( 1 != 2 || 3 !- 2 ), store to *dst++
@ldrlsh r4, [r1], #2 @ one or two ( 1 <= 2 || 2 <= 2 ) aligned, ((halfword*) src)++
@strlsh r4, [r0], #2 @ one or two ( 1 <= 2 || 2 <= 2 ) aligned, store to *dst++
ldrlsb r4, [r1], #1 @ one aligned or two aligned, ((byte*) src)++
strlsb r4, [r0], #1 @ one aligned or two aligned, store to *dst++
ldrlsb r4, [r1], #1 @ one aligned or two aligned, ((byte*) src)++
strlsb r4, [r0], #1 @ one aligned or two aligned, store to *dst++
sub r3, r3, #4
add r2, r2, r3 @ length -= bytes written
 
multi:
/* once we get here, we're word aligned */
/*
bytes = length
words = byte / 4, rem = byte moves
instr = quadword = words / 4, rem = partial instructions
loop = instr / 4, rem = jump to instr
010101010
llliiwwbb
21
52631
684268421
*/
ands r3, r2, #48 @ #32 | #16, r3 = partial loop count << 3
mov r4, r2, LSR #6 @ r4 = loop count
/* Now, like Duff's device, jump into the loop to perform the extra instructions */
/* Replace later with direct adjustment of PC */
beq loop_test
cmp r3, #32 @r3 = 16 or 32 or 48, corresponding to 1 or 2 or 3
bhi loop3
beq loop2
blo loop1
loop:
ldmia r1!, { r5-r8 } @load four registers
stmia r0!, { r5-r8 } @store four registers
loop3:
ldmia r1!, { r5-r8 } @load four registers
stmia r0!, { r5-r8 } @store four registers
loop2:
ldmia r1!, { r5-r8 } @load four registers
stmia r0!, { r5-r8 } @store four registers
loop1:
ldmia r1!, { r5-r8 } @load four registers
stmia r0!, { r5-r8 } @store four registers
loop_test:
cmp r4, #0
subne r4, r4, #1
bne loop
/* Now do the extra words */
ands r3, r2, #12 @ #8 | #4, r3 = extra words << 2
beq extra_bytes
cmp r3, #8
ldrne r5, [r1], #4 @ if r5 !=8 then it's 4 or 12, so load and store
strne r5, [r0], #4
ldmhsia r1!, {r5-r6} @ if r5 >= 8, it's 8 or 12, so load and store 2
stmhsia r0!, {r5-r6}
/* Now do the extra bytes */
extra_bytes:
tst r2, #2 @ any extra bytes?
beq clean_up
cmp r3, #2 @ subtract 2 from either 1, 2, or 3
ldrneb r5, [r1], #1 @ load and store one byte iff r3 != 2 (i.e, r3 == 1 || r3 == 3)
strneb r5, [r0], #1 @ 1 or 3
ldrhsb r5, [r1], #1 @ load and store a byte iff r3 >= 2
strhsb r5, [r0], #1 @ 2 or 3
ldrhsb r5, [r1], #1 @ load and store a byte iff r3 > 2
strhsb r5, [r0], #1 @ 2 or 3
clean_up:
ldmfd r13!, {r0, r4-r8, pc } @r0 is the retval, must equal original dst
 
end:
.size different_aligns, .end-memcpy
.align 2
@ Local Variables:
@ asm-comment-char: ?@
@ comment-start: "@ "
@ block-comment-start: "/*"
@ block-comment-end: "*/"
@ indent-tabs-mode: t
@ End: