WebSVN – HelenOS – Diff – /branches/arm/boot/arch/arm32/loader/asm.S

+#
-# Copyright (c) 2006 Martin Decky
+# Copyright (c) 2007 Michal Kebrt
 # All rights reserved.
+#
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
 # are met:
+#
 .text
-    .align 2
+.global memcpy
-different_aligns:
-    /* We must do byte copies */
-    stmfd r13!, {r0, lr }
-byte_loop:
-    ldrb r3, [r1], #1
-    strb r3, [r0], #1
-    subs r2, r2, #1
-    bne byte_loop
-    ldmfd r13!, {r0, pc }
-        .global memcpy
-        .type memcpy, %function
 memcpy:
-@void* memcpy( void* dst, const void* src, size_t len )
-@@ Register usage:
+add     r3, r1, #3
-@@ r0: dst
-@@ r1: src
-@@ r2: len
-@@
-@@ r3: various bitmasks, load and store for different_aligns loop
-@@ r4: counter for multi loop, not used for different_aligns
-@@ r5-r8: load and store, not used for different_aligns
-    cmp r2, #0
+bic     r3, r3, #3
-    moveq pc, lr @ just return if caller wants to copy zero bytes
-    cmp r2, #8
+cmp     r1, r3
-    bls different_aligns
+stmdb   sp!, {r4, lr}
-    /*check for src alignment*/
-    eor r3, r0, r1 @ r3 = dest | src
-    tst r3, #3 @ test for same alignment
-    bne different_aligns @ jump if align( r1 ) != align( ro )
-    /* else, they have the same same alignment */
-    stmfd r13!, {r0, r4-r8, lr } @ save regs
-    ands r3, r0, #3 @ find out what that alignment is
-    beq multi @ iff align( r1 ) == align( r0 ) == 0 skip to multi
+beq     case_4
-    /* otherwise, move up to three bytes to get to a word alignment
-       if align = 1, we need to move forward 3 bytes to get to a word boundry
-       if align = 2, we need to move forward 2 bytes to get to a word boundry
-       if align = 3, we need to move forward 1 byte to get to a word boundry
-    */
+case_1:
-    cmp r3, #2 @ "subtract" 2 from either 1, 2, or 3
-    ldrneb r4, [r1], #1 @ one aligned or three aligned ( 1 != 2 || 3 !- 2 ), ((byte*) src)++
-    strneb r4, [r0], #1 @ one aligned or three aligned ( 1 != 2 || 3 !- 2 ), store to *dst++
-    @ldrlsh r4, [r1], #2 @ one or two ( 1 <= 2 || 2 <= 2 ) aligned, ((halfword*) src)++
-    @strlsh r4, [r0], #2 @ one or two ( 1 <= 2 || 2 <= 2 ) aligned, store to *dst++
-    ldrlsb r4, [r1], #1 @ one aligned or two aligned, ((byte*) src)++
-    strlsb r4, [r0], #1 @ one aligned or two aligned, store to *dst++
-    ldrlsb r4, [r1], #1 @ one aligned or two aligned, ((byte*) src)++
-    strlsb r4, [r0], #1 @ one aligned or two aligned, store to *dst++
-    sub r3, r3, #4
+cmp     r2, #0
-    add r2, r2, r3 @ length -= bytes written
-multi:
+movne   ip, #0
-    /* once we get here, we're word aligned */
+beq     case_3
-    /*
+case_2:
-    bytes = length
+ldrb    r3, [ip, r1]
-    words = byte / 4, rem = byte moves
+strb    r3, [ip, r0]
-    instr = quadword = words / 4, rem = partial instructions
-    loop = instr / 4, rem = jump to instr
-    010101010
+add     ip, ip, #1
-    llliiwwbb
+cmp     ip, r2
+bne     case_2
+case_3:
+mov     r0, r1
-    684268421
+ldmia   sp!, {r4, pc}
-    */
+case_4:
-    ands r3, r2, #48 @ #32 | #16, r3 = partial loop count << 3
+add     r3, r0, #3
-    mov r4, r2, LSR #6 @ r4 = loop count
+bic     r3, r3, #3
+cmp     r0, r3
-    /* Now, like Duff's device, jump into the loop to perform the extra instructions */
-    /* Replace later with direct adjustment of PC */
-    beq loop_test
+bne     case_1
-    cmp r3, #32 @r3 = 16 or 32 or 48, corresponding to 1 or 2 or 3
-    bhi loop3
+movs    r4, r2, lsr #2
-    beq loop2
+moveq   lr, r4
-    blo loop1
+beq     case_6
+mov     lr, #0
-loop:
-    ldmia r1!, { r5-r8 } @load four registers
-    stmia r0!, { r5-r8 } @store four registers
-loop3:
+mov     ip, lr
-    ldmia r1!, { r5-r8 } @load four registers
-    stmia r0!, { r5-r8 } @store four registers
-loop2:
+case_5:
-    ldmia r1!, { r5-r8 } @load four registers
+ldr     r3, [ip, r1]
-    stmia r0!, { r5-r8 } @store four registers
-loop1:
-    ldmia r1!, { r5-r8 } @load four registers
+add     lr, lr, #1
-    stmia r0!, { r5-r8 } @store four registers
+cmp     lr, r4
-loop_test:
-    cmp r4, #0
+str     r3, [ip, r0]
-    subne r4, r4, #1
+add     ip, ip, #4
-    bne loop
+bne     case_5
+case_6:
-    /* Now do the extra words */
+ands    r4, r2, #3
-    ands r3, r2, #12 @ #8 | #4, r3 = extra words << 2
-    beq extra_bytes
+beq     case_3
-    cmp r3, #8
+mov     r3, lr, lsl #2
-    ldrne r5, [r1], #4 @ if r5 !=8 then it's 4 or 12, so load and store
-    strne r5, [r0], #4
+add     r0, r3, r0
-    ldmhsia r1!, {r5-r6} @ if r5 >= 8, it's 8 or 12, so load and store 2
-    stmhsia r0!, {r5-r6}
+add     ip, r3, r1
+mov     r2, #0
-    /* Now do the extra bytes */
-extra_bytes:
+case_7:
-    tst r2, #2 @ any extra bytes?
-    beq clean_up
+ldrb    r3, [r2, ip]
-    cmp r3, #2 @ subtract 2 from either 1, 2, or 3
-    ldrneb r5, [r1], #1 @ load and store one byte iff r3 != 2 (i.e, r3 == 1 || r3 == 3)
-    strneb r5, [r0], #1 @ 1 or 3
+strb    r3, [r2, r0]
-    ldrhsb r5, [r1], #1 @ load and store a byte iff r3 >= 2
-    strhsb r5, [r0], #1 @ 2 or 3
+add     r2, r2, #1
-    ldrhsb r5, [r1], #1 @ load and store a byte iff r3 > 2
-    strhsb r5, [r0], #1 @ 2 or 3
+cmp     r2, r4
+bne     case_7
-clean_up:
+b       case_3
-    ldmfd r13!, {r0, r4-r8, pc } @r0 is the retval, must equal original dst
-end:
-    .size different_aligns, .end-memcpy
-        .align 2
-@ Local Variables:
-@ asm-comment-char: ?@
-@ comment-start: "@ "
-@ block-comment-start: "/*"
-@ block-comment-end: "*/"
-@ indent-tabs-mode: t
-@ End:

Subversion Repositories HelenOS

(root)/branches/arm/boot/arch/arm32/loader/asm.S – Rev 2145 → 2153