WebSVN – HelenOS – Path Comparison – / – /branches/arm/boot/arch/arm32/loader/asm.S Rev 2144 and /branches/arm/boot/arch/arm32/loader/asm.S Rev 2145

Ignore whitespace Rev 2144 → Rev 2145

 /branches/arm/boot/arch/arm32/loader/asm.S
 ,0 → 1,175
+#
+# Copyright (c) 2006 Martin Decky
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# - Redistributions of source code must retain the above copyright
+#   notice, this list of conditions and the following disclaimer.
+# - Redistributions in binary form must reproduce the above copyright
+#   notice, this list of conditions and the following disclaimer in the
+#   documentation and/or other materials provided with the distribution.
+# - The name of the author may not be used to endorse or promote products
+#   derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+.text
+    .align 2
+different_aligns:
+    /* We must do byte copies */
+    stmfd r13!, {r0, lr }
+byte_loop:
+    ldrb r3, [r1], #1
+    strb r3, [r0], #1
+    subs r2, r2, #1
+    bne byte_loop
+    ldmfd r13!, {r0, pc }
+        .global memcpy
+        .type memcpy, %function
+memcpy:
+@void* memcpy( void* dst, const void* src, size_t len )
+@@ Register usage:
+@@ r0: dst
+@@ r1: src
+@@ r2: len
+@@
+@@ r3: various bitmasks, load and store for different_aligns loop
+@@ r4: counter for multi loop, not used for different_aligns
+@@ r5-r8: load and store, not used for different_aligns
+    cmp r2, #0
+    moveq pc, lr @ just return if caller wants to copy zero bytes
+    cmp r2, #8
+    bls different_aligns
+    /*check for src alignment*/
+    eor r3, r0, r1 @ r3 = dest | src
+    tst r3, #3 @ test for same alignment
+    bne different_aligns @ jump if align( r1 ) != align( ro )
+    /* else, they have the same same alignment */
+    stmfd r13!, {r0, r4-r8, lr } @ save regs
+    ands r3, r0, #3 @ find out what that alignment is
+    beq multi @ iff align( r1 ) == align( r0 ) == 0 skip to multi
+    /* otherwise, move up to three bytes to get to a word alignment
+       if align = 1, we need to move forward 3 bytes to get to a word boundry
+       if align = 2, we need to move forward 2 bytes to get to a word boundry
+       if align = 3, we need to move forward 1 byte to get to a word boundry
+    */
+    cmp r3, #2 @ "subtract" 2 from either 1, 2, or 3
+    ldrneb r4, [r1], #1 @ one aligned or three aligned ( 1 != 2 || 3 !- 2 ), ((byte*) src)++
+    strneb r4, [r0], #1 @ one aligned or three aligned ( 1 != 2 || 3 !- 2 ), store to *dst++
+    @ldrlsh r4, [r1], #2 @ one or two ( 1 <= 2 || 2 <= 2 ) aligned, ((halfword*) src)++
+    @strlsh r4, [r0], #2 @ one or two ( 1 <= 2 || 2 <= 2 ) aligned, store to *dst++
+    ldrlsb r4, [r1], #1 @ one aligned or two aligned, ((byte*) src)++
+    strlsb r4, [r0], #1 @ one aligned or two aligned, store to *dst++
+    ldrlsb r4, [r1], #1 @ one aligned or two aligned, ((byte*) src)++
+    strlsb r4, [r0], #1 @ one aligned or two aligned, store to *dst++
+    sub r3, r3, #4
+    add r2, r2, r3 @ length -= bytes written
+multi:
+    /* once we get here, we're word aligned */
+    /*
+    bytes = length
+    words = byte / 4, rem = byte moves
+    instr = quadword = words / 4, rem = partial instructions
+    loop = instr / 4, rem = jump to instr
+    010101010
+    llliiwwbb
+    684268421
+    */
+    ands r3, r2, #48 @ #32 | #16, r3 = partial loop count << 3
+    mov r4, r2, LSR #6 @ r4 = loop count
+    /* Now, like Duff's device, jump into the loop to perform the extra instructions */
+    /* Replace later with direct adjustment of PC */
+    beq loop_test
+    cmp r3, #32 @r3 = 16 or 32 or 48, corresponding to 1 or 2 or 3
+    bhi loop3
+    beq loop2
+    blo loop1
+loop:
+    ldmia r1!, { r5-r8 } @load four registers
+    stmia r0!, { r5-r8 } @store four registers
+loop3:
+    ldmia r1!, { r5-r8 } @load four registers
+    stmia r0!, { r5-r8 } @store four registers
+loop2:
+    ldmia r1!, { r5-r8 } @load four registers
+    stmia r0!, { r5-r8 } @store four registers
+loop1:
+    ldmia r1!, { r5-r8 } @load four registers
+    stmia r0!, { r5-r8 } @store four registers
+loop_test:
+    cmp r4, #0
+    subne r4, r4, #1
+    bne loop
+    /* Now do the extra words */
+    ands r3, r2, #12 @ #8 | #4, r3 = extra words << 2
+    beq extra_bytes
+    cmp r3, #8
+    ldrne r5, [r1], #4 @ if r5 !=8 then it's 4 or 12, so load and store
+    strne r5, [r0], #4
+    ldmhsia r1!, {r5-r6} @ if r5 >= 8, it's 8 or 12, so load and store 2
+    stmhsia r0!, {r5-r6}
+    /* Now do the extra bytes */
+extra_bytes:
+    tst r2, #2 @ any extra bytes?
+    beq clean_up
+    cmp r3, #2 @ subtract 2 from either 1, 2, or 3
+    ldrneb r5, [r1], #1 @ load and store one byte iff r3 != 2 (i.e, r3 == 1 || r3 == 3)
+    strneb r5, [r0], #1 @ 1 or 3
+    ldrhsb r5, [r1], #1 @ load and store a byte iff r3 >= 2
+    strhsb r5, [r0], #1 @ 2 or 3
+    ldrhsb r5, [r1], #1 @ load and store a byte iff r3 > 2
+    strhsb r5, [r0], #1 @ 2 or 3
+clean_up:
+    ldmfd r13!, {r0, r4-r8, pc } @r0 is the retval, must equal original dst
+end:
+    .size different_aligns, .end-memcpy
+        .align 2
+@ Local Variables:
+@ asm-comment-char: ?@
+@ comment-start: "@ "
+@ block-comment-start: "/*"
+@ block-comment-end: "*/"
+@ indent-tabs-mode: t
+@ End:

Subversion Repositories HelenOS

Compare Revisions

Ignore whitespace Rev 2144 → Rev 2145