Subversion Repositories HelenOS

Rev

Rev 2145 | Rev 2165 | Go to most recent revision | Show entire file | Regard whitespace | Details | Blame | Last modification | View Log | RSS feed

Rev 2145 Rev 2153
Line 1... Line 1...
1
#
1
#
2
# Copyright (c) 2006 Martin Decky
2
# Copyright (c) 2007 Michal Kebrt
3
# All rights reserved.
3
# All rights reserved.
4
#
4
#
5
# Redistribution and use in source and binary forms, with or without
5
# Redistribution and use in source and binary forms, with or without
6
# modification, are permitted provided that the following conditions
6
# modification, are permitted provided that the following conditions
7
# are met:
7
# are met:
Line 27... Line 27...
27
#
27
#
28
 
28
 
29
 
29
 
30
.text
30
.text
31
 
31
 
32
    .align 2
-
 
33
    
-
 
34
    
-
 
35
different_aligns:
-
 
36
    /* We must do byte copies */
-
 
37
    stmfd r13!, {r0, lr }
-
 
38
byte_loop:
-
 
39
    ldrb r3, [r1], #1
-
 
40
    strb r3, [r0], #1
-
 
41
    subs r2, r2, #1
-
 
42
    bne byte_loop
-
 
43
    ldmfd r13!, {r0, pc }
-
 
44
 
-
 
45
        .global memcpy
32
.global memcpy
46
        .type memcpy, %function
-
 
47
memcpy:
-
 
48
@void* memcpy( void* dst, const void* src, size_t len )
-
 
49
 
33
 
50
@@ Register usage:
-
 
51
@@ r0: dst
-
 
52
@@ r1: src
-
 
53
@@ r2: len
34
memcpy:
54
@@
-
 
55
@@ r3: various bitmasks, load and store for different_aligns loop
-
 
56
@@ r4: counter for multi loop, not used for different_aligns
-
 
57
@@ r5-r8: load and store, not used for different_aligns
-
 
58
 
35
 
-
 
36
add     r3, r1, #3
-
 
37
bic     r3, r3, #3
-
 
38
cmp     r1, r3
-
 
39
stmdb   sp!, {r4, lr}
-
 
40
beq     case_4
-
 
41
case_1:
59
    cmp r2, #0
42
cmp     r2, #0
-
 
43
movne   ip, #0
-
 
44
beq     case_3
-
 
45
case_2:
-
 
46
ldrb    r3, [ip, r1]
-
 
47
strb    r3, [ip, r0]
-
 
48
add     ip, ip, #1
-
 
49
cmp     ip, r2
-
 
50
bne     case_2
-
 
51
case_3:
-
 
52
mov     r0, r1
-
 
53
ldmia   sp!, {r4, pc}
-
 
54
case_4:
-
 
55
add     r3, r0, #3
-
 
56
bic     r3, r3, #3
-
 
57
cmp     r0, r3
-
 
58
bne     case_1
60
    moveq pc, lr @ just return if caller wants to copy zero bytes
59
movs    r4, r2, lsr #2
-
 
60
moveq   lr, r4
-
 
61
beq     case_6
-
 
62
mov     lr, #0
-
 
63
mov     ip, lr
-
 
64
case_5:
-
 
65
ldr     r3, [ip, r1]
-
 
66
add     lr, lr, #1
-
 
67
cmp     lr, r4
-
 
68
str     r3, [ip, r0]
-
 
69
add     ip, ip, #4
61
    
70
bne     case_5
-
 
71
case_6:
62
    cmp r2, #8
72
ands    r4, r2, #3
-
 
73
beq     case_3
63
    bls different_aligns
74
mov     r3, lr, lsl #2
-
 
75
add     r0, r3, r0
-
 
76
add     ip, r3, r1
-
 
77
mov     r2, #0
-
 
78
case_7:
-
 
79
ldrb    r3, [r2, ip]
-
 
80
strb    r3, [r2, r0]
-
 
81
add     r2, r2, #1
-
 
82
cmp     r2, r4
-
 
83
bne     case_7
-
 
84
b       case_3
64
    
85
 
65
    /*check for src alignment*/
-
 
66
    eor r3, r0, r1 @ r3 = dest | src
-
 
67
    tst r3, #3 @ test for same alignment
-
 
68
    bne different_aligns @ jump if align( r1 ) != align( ro )
-
 
69
 
-
 
70
    /* else, they have the same same alignment */
-
 
71
    stmfd r13!, {r0, r4-r8, lr } @ save regs
-
 
72
    ands r3, r0, #3 @ find out what that alignment is
-
 
73
    beq multi @ iff align( r1 ) == align( r0 ) == 0 skip to multi
-
 
74
    
-
 
75
    /* otherwise, move up to three bytes to get to a word alignment
-
 
76
       if align = 1, we need to move forward 3 bytes to get to a word boundry
-
 
77
       if align = 2, we need to move forward 2 bytes to get to a word boundry
-
 
78
       if align = 3, we need to move forward 1 byte to get to a word boundry
-
 
79
    */
-
 
80
    cmp r3, #2 @ "subtract" 2 from either 1, 2, or 3
-
 
81
    ldrneb r4, [r1], #1 @ one aligned or three aligned ( 1 != 2 || 3 !- 2 ), ((byte*) src)++
-
 
82
    strneb r4, [r0], #1 @ one aligned or three aligned ( 1 != 2 || 3 !- 2 ), store to *dst++
-
 
83
    @ldrlsh r4, [r1], #2 @ one or two ( 1 <= 2 || 2 <= 2 ) aligned, ((halfword*) src)++
-
 
84
    @strlsh r4, [r0], #2 @ one or two ( 1 <= 2 || 2 <= 2 ) aligned, store to *dst++
-
 
85
    
-
 
86
    ldrlsb r4, [r1], #1 @ one aligned or two aligned, ((byte*) src)++
-
 
87
    strlsb r4, [r0], #1 @ one aligned or two aligned, store to *dst++
-
 
88
    ldrlsb r4, [r1], #1 @ one aligned or two aligned, ((byte*) src)++
-
 
89
    strlsb r4, [r0], #1 @ one aligned or two aligned, store to *dst++
-
 
90
    
-
 
91
    sub r3, r3, #4
-
 
92
    add r2, r2, r3 @ length -= bytes written
-
 
93
 
-
 
94
multi:
-
 
95
    /* once we get here, we're word aligned */
-
 
96
    
-
 
97
    /*
-
 
98
    bytes = length
-
 
99
    words = byte / 4, rem = byte moves
-
 
100
    instr = quadword = words / 4, rem = partial instructions
-
 
101
    loop = instr / 4, rem = jump to instr
-
 
102
   
-
 
103
    010101010
-
 
104
    llliiwwbb
-
 
105
    
-
 
106
    21
-
 
107
    52631
-
 
108
    684268421
-
 
109
    */
-
 
110
 
-
 
111
    ands r3, r2, #48 @ #32 | #16, r3 = partial loop count << 3
-
 
112
    mov r4, r2, LSR #6 @ r4 = loop count
-
 
113
    
-
 
114
    /* Now, like Duff's device, jump into the loop to perform the extra instructions */
-
 
115
    /* Replace later with direct adjustment of PC */
-
 
116
    beq loop_test
-
 
117
    cmp r3, #32 @r3 = 16 or 32 or 48, corresponding to 1 or 2 or 3
-
 
118
    bhi loop3
-
 
119
    beq loop2
-
 
120
    blo loop1
-
 
121
    
-
 
122
loop:
-
 
123
    ldmia r1!, { r5-r8 } @load four registers
-
 
124
    stmia r0!, { r5-r8 } @store four registers
-
 
125
loop3:
-
 
126
    ldmia r1!, { r5-r8 } @load four registers
-
 
127
    stmia r0!, { r5-r8 } @store four registers
-
 
128
loop2:
-
 
129
    ldmia r1!, { r5-r8 } @load four registers
-
 
130
    stmia r0!, { r5-r8 } @store four registers
-
 
131
loop1:
-
 
132
    ldmia r1!, { r5-r8 } @load four registers
-
 
133
    stmia r0!, { r5-r8 } @store four registers
-
 
134
    
-
 
135
loop_test:
-
 
136
    cmp r4, #0
-
 
137
    subne r4, r4, #1
-
 
138
    bne loop
-
 
139
    
-
 
140
    /* Now do the extra words */
-
 
141
    ands r3, r2, #12 @ #8 | #4, r3 = extra words << 2
-
 
142
    beq extra_bytes
-
 
143
    cmp r3, #8
-
 
144
    ldrne r5, [r1], #4 @ if r5 !=8 then it's 4 or 12, so load and store
-
 
145
    strne r5, [r0], #4
-
 
146
    ldmhsia r1!, {r5-r6} @ if r5 >= 8, it's 8 or 12, so load and store 2
-
 
147
    stmhsia r0!, {r5-r6}
-
 
148
    
-
 
149
    /* Now do the extra bytes */
-
 
150
extra_bytes:
-
 
151
    tst r2, #2 @ any extra bytes?
-
 
152
    beq clean_up
-
 
153
    cmp r3, #2 @ subtract 2 from either 1, 2, or 3
-
 
154
    ldrneb r5, [r1], #1 @ load and store one byte iff r3 != 2 (i.e, r3 == 1 || r3 == 3)
-
 
155
    strneb r5, [r0], #1 @ 1 or 3
-
 
156
    ldrhsb r5, [r1], #1 @ load and store a byte iff r3 >= 2
-
 
157
    strhsb r5, [r0], #1 @ 2 or 3
-
 
158
    ldrhsb r5, [r1], #1 @ load and store a byte iff r3 > 2
-
 
159
    strhsb r5, [r0], #1 @ 2 or 3
-
 
160
    
-
 
161
clean_up:
-
 
162
    ldmfd r13!, {r0, r4-r8, pc } @r0 is the retval, must equal original dst
-
 
163
 
-
 
164
end:
-
 
165
    .size different_aligns, .end-memcpy
-
 
166
        .align 2
-
 
167
    
-
 
168
@ Local Variables:
-
 
169
@ asm-comment-char: ?@
-
 
170
@ comment-start: "@ "
-
 
171
@ block-comment-start: "/*"
-
 
172
@ block-comment-end: "*/"
-
 
173
@ indent-tabs-mode: t
-
 
174
@ End: 
-
 
175
 
86