blob: bf3d9aded6e405956ad46d58c063a843be6f124b [file] [log] [blame]
Shu Zhang5b5d6e72014-03-12 11:18:41 +08001/*
2 * Copyright (C) 2013 The Android Open Source Project
3 * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved.
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * * Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * * Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in
13 * the documentation and/or other materials provided with the
14 * distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
19 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
20 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
22 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
23 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
24 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
25 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
26 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30#include <machine/cpu-features.h>
31#include <private/bionic_asm.h>
32#include <private/libc_events.h>
33
34 /*
35 * Optimized memset() for ARM.
36 *
37 * memset() returns its first argument.
38 */
39
40 .fpu neon
41 .syntax unified
42
43ENTRY(__memset_chk)
44 cmp r2, r3
45 bls .L_done
46
47 // Preserve lr for backtrace.
48 push {lr}
49 .cfi_def_cfa_offset 4
50 .cfi_rel_offset lr, 0
51
52
53 ldr r0, error_message
54 ldr r1, error_code
551:
56 add r0, pc
57 bl __fortify_chk_fail
58error_code:
59 .word BIONIC_EVENT_MEMSET_BUFFER_OVERFLOW
60error_message:
61 .word error_string-(1b+8)
62END(__memset_chk)
63
64ENTRY(bzero)
65 mov r2, r1
66 mov r1, #0
67.L_done:
68 // Fall through to memset...
69END(bzero)
70
71ENTRY(memset)
72 pldw [r0]
73 mov r3, r0
74
75 // Duplicate the low byte of r1
76 mov r1, r1, lsl #24
77 orr r1, r1, r1, lsr #8
78 orr r1, r1, r1, lsr #16
79
80 cmp r2, #16
81 blo .L_less_than_16
82
83 // This section handles regions 16 bytes or larger
84 //
85 // Use aligned vst1.8 and vstm when possible. Register values will be:
86 // ip is scratch
87 // q0, q1, and r1 contain the memset value
88 // r2 is the number of bytes to set
89 // r3 is the advancing destination pointer
90 vdup.32 q0, r1
91
92 ands ip, r3, 0xF
93 beq .L_memset_aligned
94
95 // Align dest pointer to 16-byte boundary.
96 pldw [r0, #64]
97 rsb ip, ip, #16
98
99 // Pre-adjust the byte count to reflect post-aligment value. Expecting
100 // 8-byte alignment to be rather common so we special case that one.
101 sub r2, r2, ip
102
103 /* set 1 byte */
104 tst ip, #1
105 it ne
106 strbne r1, [r3], #1
107 /* set 2 bytes */
108 tst ip, #2
109 it ne
110 strhne r1, [r3], #2
111 /* set 4 bytes */
112 movs ip, ip, lsl #29
113 it mi
114 strmi r1, [r3], #4
115 /* set 8 bytes */
116 itt cs
117 strcs r1, [r3], #4
118 strcs r1, [r3], #4
119
120.L_memset_aligned:
121 // Destination is now 16-byte aligned. Determine how to handle
122 // remaining bytes.
123 vmov q1, q0
124 cmp r2, #128
125 blo .L_less_than_128
126
127 // We need to set a larger block of memory. Use four Q regs to
128 // set a full cache line in one instruction. Pre-decrement
129 // r2 to simplify end-of-loop detection
130 vmov q2, q0
131 vmov q3, q0
132 pldw [r0, #128]
133 sub r2, r2, #128
134 .align 4
135.L_memset_loop_128:
136 pldw [r3, #192]
137 vstm r3!, {q0, q1, q2, q3}
138 vstm r3!, {q0, q1, q2, q3}
139 subs r2, r2, #128
140 bhs .L_memset_loop_128
141
142 // Un-bias r2 so it contains the number of bytes left. Early
143 // exit if we are done.
144 adds r2, r2, #128
145 beq 2f
146
147 .align 4
148.L_less_than_128:
149 // set 64 bytes
150 movs ip, r2, lsl #26
151 bcc 1f
152 vst1.8 {q0, q1}, [r3, :128]!
153 vst1.8 {q0, q1}, [r3, :128]!
154 beq 2f
1551:
156 // set 32 bytes
157 bpl 1f
158 vst1.8 {q0, q1}, [r3, :128]!
1591:
160 // set 16 bytes
161 movs ip, r2, lsl #28
162 bcc 1f
163 vst1.8 {q0}, [r3, :128]!
164 beq 2f
1651:
166 // set 8 bytes
167 bpl 1f
168 vst1.8 {d0}, [r3, :64]!
1691:
170 // set 4 bytes
171 tst r2, #4
172 it ne
173 strne r1, [r3], #4
1741:
175 // set 2 bytes
176 movs ip, r2, lsl #31
177 it cs
178 strhcs r1, [r3], #2
179 // set 1 byte
180 it mi
181 strbmi r1, [r3]
1822:
183 bx lr
184
185.L_less_than_16:
186 // Store up to 15 bytes without worrying about byte alignment
187 movs ip, r2, lsl #29
188 bcc 1f
189 str r1, [r3], #4
190 str r1, [r3], #4
191 beq 2f
1921:
193 it mi
194 strmi r1, [r3], #4
195 movs ip, r2, lsl #31
196 it mi
197 strbmi r1, [r3], #1
198 itt cs
199 strbcs r1, [r3], #1
200 strbcs r1, [r3]
2012:
202 bx lr
203END(memset)
204
205 .data
206error_string:
207 .string "memset: prevented write past end of buffer"