Working SDHOST/FatFS, boot partition mounts, some other minor fixes too.
[rpi-open-firmware.git] / arm_chainloader / lib / arm_bcopy.s
1 /*
2 * Copyright (c) 2006, 2009 Apple Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
11 * file.
12 *
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
23
24 .text
25 .align 2
26
27 .globl memcpy
28 .globl bcopy
29 .globl _memmove
30
31 bcopy: /* void bcopy(const void *src, void *dest, size_t len); */
32 mov r3, r0
33 mov r0, r1
34 mov r1, r3
35
36 memcpy: /* void *memcpy(void *dest, const void *src, size_t len); */
37 memmove: /* void *memmove(void *dest, const void *src, size_t len); */
38 /* check for zero len or if the pointers are the same */
39 cmp r2, #0
40 cmpne r0, r1
41 bxeq lr
42
43 /* save r0 (return value), r4 (scratch), and r5 (scratch) */
44 stmfd sp!, { r0, r4, r5, r7, lr }
45 add r7, sp, #12
46
47 /* check for overlap. r3 <- distance between src & dest */
48 subhs r3, r0, r1
49 sublo r3, r1, r0
50 cmp r3, r2 /* if distance(src, dest) < len, we have overlap */
51 blo Loverlap
52
53 Lnormalforwardcopy:
54 /* are src and dest dissimilarly word aligned? */
55 mov r12, r0, lsl #30
56 cmp r12, r1, lsl #30
57 bne Lnonwordaligned_forward
58
59 /* if len < 64, do a quick forward copy */
60 cmp r2, #64
61 blt Lsmallforwardcopy
62
63 /* check for 16 byte src/dest unalignment */
64 tst r0, #0xf
65 bne Lsimilarlyunaligned
66
67 /* check for 32 byte dest unalignment */
68 tst r0, #(1<<4)
69 bne Lunaligned_32
70
71 Lmorethan64_aligned:
72 /* save some more registers to use in the copy */
73 stmfd sp!, { r6, r8, r10, r11 }
74
75 /* pre-subtract 64 from the len counter to avoid an extra compare in the loop */
76 sub r2, r2, #64
77
78 L64loop:
79 /* copy 64 bytes at a time */
80 ldmia r1!, { r3, r4, r5, r6, r8, r10, r11, r12 }
81 #ifdef _ARM_ARCH_6
82 pld [r1, #32]
83 #endif
84 stmia r0!, { r3, r4, r5, r6, r8, r10, r11, r12 }
85 ldmia r1!, { r3, r4, r5, r6, r8, r10, r11, r12 }
86 subs r2, r2, #64
87 #ifdef _ARM_ARCH_6
88 pld [r1, #32]
89 #endif
90 stmia r0!, { r3, r4, r5, r6, r8, r10, r11, r12 }
91 bge L64loop
92
93 /* restore the scratch registers we just saved */
94 ldmfd sp!, { r6, r8, r10, r11 }
95
96 /* fix up the len counter (previously subtracted an extra 64 from it) and test for completion */
97 adds r2, r2, #64
98 beq Lexit
99
100 Llessthan64_aligned:
101 /* copy 16 bytes at a time until we have < 16 bytes */
102 cmp r2, #16
103 ldmgeia r1!, { r3, r4, r5, r12 }
104 stmgeia r0!, { r3, r4, r5, r12 }
105 subges r2, r2, #16
106 bgt Llessthan64_aligned
107 beq Lexit
108
109 Llessthan16_aligned:
110 mov r2, r2, lsl #28
111 msr cpsr_f, r2
112
113 ldmmiia r1!, { r2, r3 }
114 ldreq r4, [r1], #4
115 ldrcsh r5, [r1], #2
116 ldrvsb r12, [r1], #1
117
118 stmmiia r0!, { r2, r3 }
119 streq r4, [r0], #4
120 strcsh r5, [r0], #2
121 strvsb r12, [r0], #1
122 b Lexit
123
124 Lsimilarlyunaligned:
125 /* both src and dest are unaligned in similar ways, align to dest on 32 byte boundary */
126 mov r12, r0, lsl #28
127 rsb r12, r12, #0
128 msr cpsr_f, r12
129
130 ldrvsb r3, [r1], #1
131 ldrcsh r4, [r1], #2
132 ldreq r5, [r1], #4
133
134 strvsb r3, [r0], #1
135 strcsh r4, [r0], #2
136 streq r5, [r0], #4
137
138 ldmmiia r1!, { r3, r4 }
139 stmmiia r0!, { r3, r4 }
140
141 subs r2, r2, r12, lsr #28
142 beq Lexit
143
144 Lunaligned_32:
145 /* bring up to dest 32 byte alignment */
146 tst r0, #(1 << 4)
147 ldmneia r1!, { r3, r4, r5, r12 }
148 stmneia r0!, { r3, r4, r5, r12 }
149 subne r2, r2, #16
150
151 /* we should now be aligned, see what copy method we should use */
152 cmp r2, #64
153 bge Lmorethan64_aligned
154 b Llessthan64_aligned
155
156 Lbytewise2:
157 /* copy 2 bytes at a time */
158 subs r2, r2, #2
159
160 ldrb r3, [r1], #1
161 ldrplb r4, [r1], #1
162
163 strb r3, [r0], #1
164 strplb r4, [r0], #1
165
166 bhi Lbytewise2
167 b Lexit
168
169 Lbytewise:
170 /* simple bytewise forward copy */
171 ldrb r3, [r1], #1
172 subs r2, r2, #1
173 strb r3, [r0], #1
174 bne Lbytewise
175 b Lexit
176
177 Lsmallforwardcopy:
178 /* src and dest are word aligned similarly, less than 64 bytes to copy */
179 cmp r2, #4
180 blt Lbytewise2
181
182 /* bytewise copy until word aligned */
183 tst r1, #3
184 Lwordalignloop:
185 ldrneb r3, [r1], #1
186 strneb r3, [r0], #1
187 subne r2, r2, #1
188 tstne r1, #3
189 bne Lwordalignloop
190
191 cmp r2, #16
192 bge Llessthan64_aligned
193 blt Llessthan16_aligned
194
195 Loverlap:
196 /* src and dest overlap in some way, len > 0 */
197 cmp r0, r1 /* if dest > src */
198 bhi Loverlap_srclower
199
200 Loverlap_destlower:
201 /* dest < src, see if we can still do a fast forward copy or fallback to slow forward copy */
202 cmp r3, #64
203 bge Lnormalforwardcopy /* overlap is greater than one stride of the copy, use normal copy */
204
205 cmp r3, #2
206 bge Lbytewise2
207 b Lbytewise
208
209 /* the following routines deal with having to copy in the reverse direction */
210 Loverlap_srclower:
211 /* src < dest, with overlap */
212
213 /* src += len; dest += len; */
214 add r0, r0, r2
215 add r1, r1, r2
216
217 /* we have to copy in reverse no matter what, test if we can we use a large block reverse copy */
218 cmp r2, #64 /* less than 64 bytes to copy? */
219 cmpgt r3, #64 /* less than 64 bytes of nonoverlap? */
220 blt Lbytewise_reverse
221
222 /* test of src and dest are nonword aligned differently */
223 mov r3, r0, lsl #30
224 cmp r3, r1, lsl #30
225 bne Lbytewise_reverse
226
227 /* test if src and dest are non word aligned or dest is non 16 byte aligned */
228 tst r0, #0xf
229 bne Lunaligned_reverse_similarly
230
231 /* test for dest 32 byte alignment */
232 tst r0, #(1<<4)
233 bne Lunaligned_32_reverse_similarly
234
235 /* 64 byte reverse block copy, src and dest aligned */
236 Lmorethan64_aligned_reverse:
237 /* save some more registers to use in the copy */
238 stmfd sp!, { r6, r8, r10, r11 }
239
240 /* pre-subtract 64 from the len counter to avoid an extra compare in the loop */
241 sub r2, r2, #64
242
243 L64loop_reverse:
244 /* copy 64 bytes at a time */
245 ldmdb r1!, { r3, r4, r5, r6, r8, r10, r11, r12 }
246 #ifdef _ARM_ARCH_6
247 pld [r1, #-32]
248 #endif
249 stmdb r0!, { r3, r4, r5, r6, r8, r10, r11, r12 }
250 ldmdb r1!, { r3, r4, r5, r6, r8, r10, r11, r12 }
251 subs r2, r2, #64
252 #ifdef _ARM_ARCH_6
253 pld [r1, #-32]
254 #endif
255 stmdb r0!, { r3, r4, r5, r6, r8, r10, r11, r12 }
256 bge L64loop_reverse
257
258 /* restore the scratch registers we just saved */
259 ldmfd sp!, { r6, r8, r10, r11 }
260
261 /* fix up the len counter (previously subtracted an extra 64 from it) and test for completion */
262 adds r2, r2, #64
263 beq Lexit
264
265 Lbytewise_reverse:
266 ldrb r3, [r1, #-1]!
267 strb r3, [r0, #-1]!
268 subs r2, r2, #1
269 bne Lbytewise_reverse
270 b Lexit
271
272 Lunaligned_reverse_similarly:
273 /* both src and dest are unaligned in similar ways, align to dest on 32 byte boundary */
274 mov r12, r0, lsl #28
275 msr cpsr_f, r12
276
277 ldrvsb r3, [r1, #-1]!
278 ldrcsh r4, [r1, #-2]!
279 ldreq r5, [r1, #-4]!
280
281 strvsb r3, [r0, #-1]!
282 strcsh r4, [r0, #-2]!
283 streq r5, [r0, #-4]!
284
285 ldmmidb r1!, { r3, r4 }
286 stmmidb r0!, { r3, r4 }
287
288 subs r2, r2, r12, lsr #28
289 beq Lexit
290
291 Lunaligned_32_reverse_similarly:
292 /* bring up to dest 32 byte alignment */
293 tst r0, #(1 << 4)
294 ldmnedb r1!, { r3, r4, r5, r12 }
295 stmnedb r0!, { r3, r4, r5, r12 }
296 subne r2, r2, #16
297
298 /* we should now be aligned, see what copy method we should use */
299 cmp r2, #64
300 bge Lmorethan64_aligned_reverse
301 b Lbytewise_reverse
302
303 /* the following routines deal with non word aligned copies */
304 Lnonwordaligned_forward:
305 cmp r2, #8
306 blt Lbytewise2 /* not worth the effort with less than 24 bytes total */
307
308 /* bytewise copy until src word aligned */
309 tst r1, #3
310 Lwordalignloop2:
311 ldrneb r3, [r1], #1
312 strneb r3, [r0], #1
313 subne r2, r2, #1
314 tstne r1, #3
315 bne Lwordalignloop2
316
317 /* figure out how the src and dest are unaligned */
318 and r3, r0, #3
319 cmp r3, #2
320 blt Lalign1_forward
321 beq Lalign2_forward
322 bgt Lalign3_forward
323
324 Lalign1_forward:
325 /* the dest pointer is 1 byte off from src */
326 mov r12, r2, lsr #2 /* number of words we should copy */
327 sub r0, r0, #1
328
329 /* prime the copy */
330 ldrb r4, [r0] /* load D[7:0] */
331
332 Lalign1_forward_loop:
333 ldr r3, [r1], #4 /* load S */
334 orr r4, r4, r3, lsl #8 /* D[31:8] = S[24:0] */
335 str r4, [r0], #4 /* save D */
336 mov r4, r3, lsr #24 /* D[7:0] = S[31:25] */
337 subs r12, r12, #1
338 bne Lalign1_forward_loop
339
340 /* finish the copy off */
341 strb r4, [r0], #1 /* save D[7:0] */
342
343 ands r2, r2, #3
344 beq Lexit
345 b Lbytewise2
346
347 Lalign2_forward:
348 /* the dest pointer is 2 bytes off from src */
349 mov r12, r2, lsr #2 /* number of words we should copy */
350 sub r0, r0, #2
351
352 /* prime the copy */
353 ldrh r4, [r0] /* load D[15:0] */
354
355 Lalign2_forward_loop:
356 ldr r3, [r1], #4 /* load S */
357 orr r4, r4, r3, lsl #16 /* D[31:16] = S[15:0] */
358 str r4, [r0], #4 /* save D */
359 mov r4, r3, lsr #16 /* D[15:0] = S[31:15] */
360 subs r12, r12, #1
361 bne Lalign2_forward_loop
362
363 /* finish the copy off */
364 strh r4, [r0], #2 /* save D[15:0] */
365
366 ands r2, r2, #3
367 beq Lexit
368 b Lbytewise2
369
370 Lalign3_forward:
371 /* the dest pointer is 3 bytes off from src */
372 mov r12, r2, lsr #2 /* number of words we should copy */
373 sub r0, r0, #3
374
375 /* prime the copy */
376 ldr r4, [r0]
377 and r4, r4, #0x00ffffff /* load D[24:0] */
378
379 Lalign3_forward_loop:
380 ldr r3, [r1], #4 /* load S */
381 orr r4, r4, r3, lsl #24 /* D[31:25] = S[7:0] */
382 str r4, [r0], #4 /* save D */
383 mov r4, r3, lsr #8 /* D[24:0] = S[31:8] */
384 subs r12, r12, #1
385 bne Lalign3_forward_loop
386
387 /* finish the copy off */
388 strh r4, [r0], #2 /* save D[15:0] */
389 mov r4, r4, lsr #16
390 strb r4, [r0], #1 /* save D[23:16] */
391
392 ands r2, r2, #3
393 beq Lexit
394 b Lbytewise2
395
396 Lexit:
397 ldmfd sp!, {r0, r4, r5, r7, pc}
This page took 0.102017 seconds and 4 git commands to generate.