Subversion Repositories HelenOS

Rev

Rev 2145 | Rev 2165 | Go to most recent revision | Show entire file | Ignore whitespace | Details | Blame | Last modification | View Log | RSS feed

Rev 2145 Rev 2153
Line 1... Line 1...
1
#
1
#
2
# Copyright (c) 2006 Martin Decky
2
# Copyright (c) 2007 Michal Kebrt
3
# All rights reserved.
3
# All rights reserved.
4
#
4
#
5
# Redistribution and use in source and binary forms, with or without
5
# Redistribution and use in source and binary forms, with or without
6
# modification, are permitted provided that the following conditions
6
# modification, are permitted provided that the following conditions
7
# are met:
7
# are met:
Line 27... Line 27...
27
#
27
#
28
 
28
 
29
 
29
 
30
.text
30
.text
31
 
31
 
32
    .align 2
32
.global memcpy
33
    
-
 
34
    
-
 
35
different_aligns:
-
 
36
    /* We must do byte copies */
-
 
37
    stmfd r13!, {r0, lr }
-
 
38
byte_loop:
-
 
39
    ldrb r3, [r1], #1
-
 
40
    strb r3, [r0], #1
-
 
41
    subs r2, r2, #1
-
 
42
    bne byte_loop
-
 
43
    ldmfd r13!, {r0, pc }
-
 
44
 
33
 
45
        .global memcpy
-
 
46
        .type memcpy, %function
-
 
47
memcpy:
34
memcpy:
48
@void* memcpy( void* dst, const void* src, size_t len )
-
 
49
 
35
 
50
@@ Register usage:
36
add     r3, r1, #3
51
@@ r0: dst
-
 
52
@@ r1: src
-
 
53
@@ r2: len
-
 
54
@@
-
 
55
@@ r3: various bitmasks, load and store for different_aligns loop
-
 
56
@@ r4: counter for multi loop, not used for different_aligns
-
 
57
@@ r5-r8: load and store, not used for different_aligns
-
 
58
 
-
 
59
    cmp r2, #0
37
bic     r3, r3, #3
60
    moveq pc, lr @ just return if caller wants to copy zero bytes
-
 
61
    
-
 
62
    cmp r2, #8
38
cmp     r1, r3
63
    bls different_aligns
39
stmdb   sp!, {r4, lr}
64
    
-
 
65
    /*check for src alignment*/
-
 
66
    eor r3, r0, r1 @ r3 = dest | src
-
 
67
    tst r3, #3 @ test for same alignment
-
 
68
    bne different_aligns @ jump if align( r1 ) != align( ro )
-
 
69
 
-
 
70
    /* else, they have the same same alignment */
-
 
71
    stmfd r13!, {r0, r4-r8, lr } @ save regs
-
 
72
    ands r3, r0, #3 @ find out what that alignment is
-
 
73
    beq multi @ iff align( r1 ) == align( r0 ) == 0 skip to multi
-
 
74
    
40
beq     case_4
75
    /* otherwise, move up to three bytes to get to a word alignment
-
 
76
       if align = 1, we need to move forward 3 bytes to get to a word boundry
-
 
77
       if align = 2, we need to move forward 2 bytes to get to a word boundry
-
 
78
       if align = 3, we need to move forward 1 byte to get to a word boundry
-
 
79
    */
41
case_1:
80
    cmp r3, #2 @ "subtract" 2 from either 1, 2, or 3
-
 
81
    ldrneb r4, [r1], #1 @ one aligned or three aligned ( 1 != 2 || 3 !- 2 ), ((byte*) src)++
-
 
82
    strneb r4, [r0], #1 @ one aligned or three aligned ( 1 != 2 || 3 !- 2 ), store to *dst++
-
 
83
    @ldrlsh r4, [r1], #2 @ one or two ( 1 <= 2 || 2 <= 2 ) aligned, ((halfword*) src)++
-
 
84
    @strlsh r4, [r0], #2 @ one or two ( 1 <= 2 || 2 <= 2 ) aligned, store to *dst++
-
 
85
    
-
 
86
    ldrlsb r4, [r1], #1 @ one aligned or two aligned, ((byte*) src)++
-
 
87
    strlsb r4, [r0], #1 @ one aligned or two aligned, store to *dst++
-
 
88
    ldrlsb r4, [r1], #1 @ one aligned or two aligned, ((byte*) src)++
-
 
89
    strlsb r4, [r0], #1 @ one aligned or two aligned, store to *dst++
-
 
90
    
-
 
91
    sub r3, r3, #4
42
cmp     r2, #0
92
    add r2, r2, r3 @ length -= bytes written
-
 
93
 
-
 
94
multi:
43
movne   ip, #0
95
    /* once we get here, we're word aligned */
-
 
96
    
44
beq     case_3
97
    /*
45
case_2:
98
    bytes = length
46
ldrb    r3, [ip, r1]
99
    words = byte / 4, rem = byte moves
47
strb    r3, [ip, r0]
100
    instr = quadword = words / 4, rem = partial instructions
-
 
101
    loop = instr / 4, rem = jump to instr
-
 
102
   
-
 
103
    010101010
48
add     ip, ip, #1
104
    llliiwwbb
49
cmp     ip, r2
105
    
50
bne     case_2
106
    21
51
case_3:
107
    52631
52
mov     r0, r1
108
    684268421
53
ldmia   sp!, {r4, pc}
109
    */
54
case_4:
110
 
-
 
111
    ands r3, r2, #48 @ #32 | #16, r3 = partial loop count << 3
55
add     r3, r0, #3
112
    mov r4, r2, LSR #6 @ r4 = loop count
56
bic     r3, r3, #3
113
    
57
cmp     r0, r3
114
    /* Now, like Duff's device, jump into the loop to perform the extra instructions */
-
 
115
    /* Replace later with direct adjustment of PC */
-
 
116
    beq loop_test
58
bne     case_1
117
    cmp r3, #32 @r3 = 16 or 32 or 48, corresponding to 1 or 2 or 3
-
 
118
    bhi loop3
59
movs    r4, r2, lsr #2
119
    beq loop2
60
moveq   lr, r4
120
    blo loop1
61
beq     case_6
121
    
62
mov     lr, #0
122
loop:
-
 
123
    ldmia r1!, { r5-r8 } @load four registers
-
 
124
    stmia r0!, { r5-r8 } @store four registers
-
 
125
loop3:
63
mov     ip, lr
126
    ldmia r1!, { r5-r8 } @load four registers
-
 
127
    stmia r0!, { r5-r8 } @store four registers
-
 
128
loop2:
64
case_5:
129
    ldmia r1!, { r5-r8 } @load four registers
65
ldr     r3, [ip, r1]
130
    stmia r0!, { r5-r8 } @store four registers
-
 
131
loop1:
-
 
132
    ldmia r1!, { r5-r8 } @load four registers
66
add     lr, lr, #1
133
    stmia r0!, { r5-r8 } @store four registers
-
 
134
    
67
cmp     lr, r4
135
loop_test:
-
 
136
    cmp r4, #0
68
str     r3, [ip, r0]
137
    subne r4, r4, #1
69
add     ip, ip, #4
138
    bne loop
70
bne     case_5
139
    
71
case_6:
140
    /* Now do the extra words */
72
ands    r4, r2, #3
141
    ands r3, r2, #12 @ #8 | #4, r3 = extra words << 2
-
 
142
    beq extra_bytes
73
beq     case_3
143
    cmp r3, #8
74
mov     r3, lr, lsl #2
144
    ldrne r5, [r1], #4 @ if r5 !=8 then it's 4 or 12, so load and store
-
 
145
    strne r5, [r0], #4
75
add     r0, r3, r0
146
    ldmhsia r1!, {r5-r6} @ if r5 >= 8, it's 8 or 12, so load and store 2
-
 
147
    stmhsia r0!, {r5-r6}
76
add     ip, r3, r1
148
    
77
mov     r2, #0
149
    /* Now do the extra bytes */
-
 
150
extra_bytes:
78
case_7:
151
    tst r2, #2 @ any extra bytes?
-
 
152
    beq clean_up
79
ldrb    r3, [r2, ip]
153
    cmp r3, #2 @ subtract 2 from either 1, 2, or 3
-
 
154
    ldrneb r5, [r1], #1 @ load and store one byte iff r3 != 2 (i.e, r3 == 1 || r3 == 3)
-
 
155
    strneb r5, [r0], #1 @ 1 or 3
80
strb    r3, [r2, r0]
156
    ldrhsb r5, [r1], #1 @ load and store a byte iff r3 >= 2
-
 
157
    strhsb r5, [r0], #1 @ 2 or 3
81
add     r2, r2, #1
158
    ldrhsb r5, [r1], #1 @ load and store a byte iff r3 > 2
-
 
159
    strhsb r5, [r0], #1 @ 2 or 3
82
cmp     r2, r4
160
    
83
bne     case_7
161
clean_up:
84
b       case_3
162
    ldmfd r13!, {r0, r4-r8, pc } @r0 is the retval, must equal original dst
-
 
163
 
85
 
164
end:
-
 
165
    .size different_aligns, .end-memcpy
-
 
166
        .align 2
-
 
167
    
-
 
168
@ Local Variables:
-
 
169
@ asm-comment-char: ?@
-
 
170
@ comment-start: "@ "
-
 
171
@ block-comment-start: "/*"
-
 
172
@ block-comment-end: "*/"
-
 
173
@ indent-tabs-mode: t
-
 
174
@ End: 
-
 
175
 
86