Working SDHOST/FatFS, boot partition mounts, some other minor fixes too.
[rpi-open-firmware.git] / arm_chainloader / lib / arm_bzero.s
1 /*
2 * Copyright (c) 2006, 2009 Apple Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
11 * file.
12 *
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
23
24 /*
25 * A reasonably well-optimized bzero/memset. Should work equally well on arm11 and arm9 based
26 * cores.
27 *
28 * The algorithm is to align the destination pointer on a 32 byte boundary and then
29 * blast data 64 bytes at a time, in two stores of 32 bytes per loop.
30 */
31 .text
32 .align 2
33
34 .globl memset
35 /* void *memset(void *ptr, int c, size_t len); */
36 memset:
37 /* move len into r1, unpack c into r2 */
38 mov r3, r2
39 and r1, r1, #0xff
40 orr r1, r1, r1, lsl #8
41 orr r2, r1, r1, lsl #16
42 mov r1, r3
43 b Lbzeroengine
44
45 .globl bzero
46 /* void bzero(void *ptr, size_t len); */
47 bzero:
48 /* zero out r2 so we can be just like memset(0) */
49 mov r2, #0
50
51 Lbzeroengine:
52 /* move the base pointer into r12 and leave r0 alone so that we return the original pointer */
53 mov r12, r0
54
55 /* copy r2 into r3 for 64-bit stores */
56 mov r3, r2
57
58 /* check for zero len */
59 cmp r1, #0
60 bxeq lr
61
62 /* fall back to a bytewise store for less than 32 bytes */
63 cmp r1, #32
64 blt L_bytewise
65
66 /* check for 32 byte unaligned ptr */
67 tst r12, #0x1f
68 bne L_unaligned
69
70 /* make sure we have more than 64 bytes to zero */
71 cmp r1, #64
72 blt L_lessthan64aligned
73
74 /* >= 64 bytes of len, 32 byte aligned */
75 L_64ormorealigned:
76
77 /* we need some registers, avoid r7 (frame pointer) and r9 (thread register) */
78 stmfd sp!, { r4-r6, r8, r10-r11 }
79 mov r4, r2
80 mov r5, r2
81 mov r6, r2
82 mov r8, r2
83 mov r10, r2
84 mov r11, r2
85
86 /* pre-subtract 64 from the len to avoid an extra compare in the loop */
87 sub r1, r1, #64
88
89 L_64loop:
90 stmia r12!, { r2-r6, r8, r10-r11 }
91 subs r1, r1, #64
92 stmia r12!, { r2-r6, r8, r10-r11 }
93 bge L_64loop
94
95 /* restore the saved regs */
96 ldmfd sp!, { r4-r6, r8, r10-r11 }
97
98 /* check for completion (had previously subtracted an extra 64 from len) */
99 adds r1, r1, #64
100 bxeq lr
101
102 L_lessthan64aligned:
103 /* do we have 16 or more bytes left */
104 cmp r1, #16
105 stmgeia r12!, { r2-r3 }
106 stmgeia r12!, { r2-r3 }
107 subges r1, r1, #16
108 bgt L_lessthan64aligned
109 bxeq lr
110
111 L_lessthan16aligned:
112 /* store 0 to 15 bytes */
113 mov r1, r1, lsl #28 /* move the remaining len bits [3:0] to the flags area of cpsr */
114 msr cpsr_f, r1
115
116 stmmiia r12!, { r2-r3 } /* n is set, store 8 bytes */
117 streq r2, [r12], #4 /* z is set, store 4 bytes */
118 strcsh r2, [r12], #2 /* c is set, store 2 bytes */
119 strvsb r2, [r12], #1 /* v is set, store 1 byte */
120 bx lr
121
122 L_bytewise:
123 /* bytewise copy, 2 bytes at a time, alignment not guaranteed */
124 subs r1, r1, #2
125 strb r2, [r12], #1
126 strplb r2, [r12], #1
127 bhi L_bytewise
128 bx lr
129
130 L_unaligned:
131 /* unaligned on 32 byte boundary, store 1-15 bytes until we're 16 byte aligned */
132 mov r3, r12, lsl #28
133 rsb r3, r3, #0x00000000
134 msr cpsr_f, r3
135
136 strvsb r2, [r12], #1 /* v is set, unaligned in the 1s column */
137 strcsh r2, [r12], #2 /* c is set, unaligned in the 2s column */
138 streq r2, [r12], #4 /* z is set, unaligned in the 4s column */
139 strmi r2, [r12], #4 /* n is set, unaligned in the 8s column */
140 strmi r2, [r12], #4
141
142 subs r1, r1, r3, lsr #28
143 bxeq lr
144
145 /* we had previously trashed r3, restore it */
146 mov r3, r2
147
148 /* now make sure we're 32 byte aligned */
149 tst r12, #(1 << 4)
150 stmneia r12!, { r2-r3 }
151 stmneia r12!, { r2-r3 }
152 subnes r1, r1, #16
153
154 /* we're now aligned, check for >= 64 bytes left */
155 cmp r1, #64
156 bge L_64ormorealigned
157 b L_lessthan64aligned
This page took 0.078988 seconds and 4 git commands to generate.