blob: 12fc09db85e2b13956956b88665abfd38ae1894b [file] [log] [blame]
Jake Weinstein372f19e2016-11-17 16:01:25 -05001/* Copyright (c) 2012-2013, Linaro Limited
Bernhard Rosenkraenzer7e4fa562014-03-05 11:40:57 +01002 All rights reserved.
3
4 Redistribution and use in source and binary forms, with or without
5 modification, are permitted provided that the following conditions are met:
6 * Redistributions of source code must retain the above copyright
7 notice, this list of conditions and the following disclaimer.
8 * Redistributions in binary form must reproduce the above copyright
9 notice, this list of conditions and the following disclaimer in the
10 documentation and/or other materials provided with the distribution.
11 * Neither the name of the Linaro nor the
12 names of its contributors may be used to endorse or promote products
13 derived from this software without specific prior written permission.
14
15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
18 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
19 HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
20 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
21 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
Jake Weinstein372f19e2016-11-17 16:01:25 -050025 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
26
27/*
28 * Copyright (c) 2015 ARM Ltd
29 * All rights reserved.
30 *
31 * Redistribution and use in source and binary forms, with or without
32 * modification, are permitted provided that the following conditions
33 * are met:
34 * 1. Redistributions of source code must retain the above copyright
35 * notice, this list of conditions and the following disclaimer.
36 * 2. Redistributions in binary form must reproduce the above copyright
37 * notice, this list of conditions and the following disclaimer in the
38 * documentation and/or other materials provided with the distribution.
39 * 3. The name of the company may not be used to endorse or promote
40 * products derived from this software without specific prior written
41 * permission.
42 *
43 * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
44 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
45 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
46 * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
47 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
48 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
49 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
50 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
51 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
52 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
53 */
Bernhard Rosenkraenzer7e4fa562014-03-05 11:40:57 +010054
55/* Assumptions:
56 *
Jake Weinstein372f19e2016-11-17 16:01:25 -050057 * ARMv8-a, AArch64, unaligned accesses
Bernhard Rosenkraenzer7e4fa562014-03-05 11:40:57 +010058 *
59 */
60
61#include <private/bionic_asm.h>
62
63/* By default we assume that the DC instruction can be used to zero
64 data blocks more efficiently. In some circumstances this might be
65 unsafe, for example in an asymmetric multiprocessor environment with
66 different DC clear lengths (neither the upper nor lower lengths are
Bernhard Rosenkraenzer62d92e12014-05-19 13:16:41 +020067 safe to use).
Bernhard Rosenkraenzer7e4fa562014-03-05 11:40:57 +010068
69 If code may be run in a virtualized environment, then define
70 MAYBE_VIRT. This will cause the code to cache the system register
71 values rather than re-reading them each call. */
72
73#define dstin x0
Jake Weinstein372f19e2016-11-17 16:01:25 -050074#define val x1
75#define valw w1
Elliott Hughes78460932016-03-02 11:58:41 -080076#define count x2
Jake Weinstein372f19e2016-11-17 16:01:25 -050077#define dst x3
78#define dstend x4
79#define tmp1 x5
80#define tmp1w w5
81#define tmp2 x6
82#define tmp2w w6
Kevin Brodskyf19eeb82017-05-16 11:29:49 +010083#define zva_len x7
Jake Weinstein372f19e2016-11-17 16:01:25 -050084#define zva_lenw w7
Bernhard Rosenkraenzer7e4fa562014-03-05 11:40:57 +010085
Jake Weinstein372f19e2016-11-17 16:01:25 -050086#define L(l) .L ## l
Bernhard Rosenkraenzer7e4fa562014-03-05 11:40:57 +010087
Elliott Hughes78460932016-03-02 11:58:41 -080088ENTRY(__memset_chk)
Jake Weinstein372f19e2016-11-17 16:01:25 -050089 cmp count, dst
Elliott Hughes78460932016-03-02 11:58:41 -080090 bls memset
91
92 // Preserve for accurate backtrace.
93 stp x29, x30, [sp, -16]!
94 .cfi_def_cfa_offset 16
95 .cfi_rel_offset x29, 0
96 .cfi_rel_offset x30, 8
97
98 bl __memset_chk_fail
99END(__memset_chk)
100
Bernhard Rosenkraenzer7e4fa562014-03-05 11:40:57 +0100101ENTRY(memset)
102
Jake Weinstein372f19e2016-11-17 16:01:25 -0500103 dup v0.16B, valw
104 add dstend, dstin, count
Bernhard Rosenkraenzer7e4fa562014-03-05 11:40:57 +0100105
Jake Weinstein372f19e2016-11-17 16:01:25 -0500106 cmp count, 96
107 b.hi L(set_long)
108 cmp count, 16
109 b.hs L(set_medium)
110 mov val, v0.D[0]
111
112 /* Set 0..15 bytes. */
113 tbz count, 3, 1f
114 str val, [dstin]
115 str val, [dstend, -8]
116 ret
117 nop
1181: tbz count, 2, 2f
119 str valw, [dstin]
120 str valw, [dstend, -4]
121 ret
1222: cbz count, 3f
123 strb valw, [dstin]
124 tbz count, 1, 3f
125 strh valw, [dstend, -2]
1263: ret
127
128 /* Set 17..96 bytes. */
129L(set_medium):
130 str q0, [dstin]
131 tbnz count, 6, L(set96)
132 str q0, [dstend, -16]
133 tbz count, 5, 1f
134 str q0, [dstin, 16]
135 str q0, [dstend, -32]
1361: ret
137
138 .p2align 4
139 /* Set 64..96 bytes. Write 64 bytes from the start and
140 32 bytes from the end. */
141L(set96):
142 str q0, [dstin, 16]
143 stp q0, q0, [dstin, 32]
144 stp q0, q0, [dstend, -32]
Bernhard Rosenkraenzer7e4fa562014-03-05 11:40:57 +0100145 ret
146
Jake Weinstein372f19e2016-11-17 16:01:25 -0500147 .p2align 3
148 nop
149L(set_long):
150 and valw, valw, 255
151 bic dst, dstin, 15
152 str q0, [dstin]
153 cmp count, 256
154 ccmp valw, 0, 0, cs
155 b.eq L(try_zva)
156L(no_zva):
157 sub count, dstend, dst /* Count is 16 too large. */
158 add dst, dst, 16
159 sub count, count, 64 + 16 /* Adjust count and bias for loop. */
1601: stp q0, q0, [dst], 64
161 stp q0, q0, [dst, -32]
162L(tail64):
163 subs count, count, 64
164 b.hi 1b
1652: stp q0, q0, [dstend, -64]
166 stp q0, q0, [dstend, -32]
Bernhard Rosenkraenzer7e4fa562014-03-05 11:40:57 +0100167 ret
168
Jake Weinstein372f19e2016-11-17 16:01:25 -0500169 .p2align 3
170L(try_zva):
Bernhard Rosenkraenzer7e4fa562014-03-05 11:40:57 +0100171 mrs tmp1, dczid_el0
Jake Weinstein372f19e2016-11-17 16:01:25 -0500172 tbnz tmp1w, 4, L(no_zva)
173 and tmp1w, tmp1w, 15
174 cmp tmp1w, 4 /* ZVA size is 64 bytes. */
175 b.ne L(zva_128)
Bernhard Rosenkraenzer7e4fa562014-03-05 11:40:57 +0100176
Jake Weinstein372f19e2016-11-17 16:01:25 -0500177 /* Write the first and last 64 byte aligned block using stp rather
178 than using DC ZVA. This is faster on some cores.
179 */
180L(zva_64):
181 str q0, [dst, 16]
182 stp q0, q0, [dst, 32]
183 bic dst, dst, 63
184 stp q0, q0, [dst, 64]
185 stp q0, q0, [dst, 96]
186 sub count, dstend, dst /* Count is now 128 too large. */
187 sub count, count, 128+64+64 /* Adjust count and bias for loop. */
188 add dst, dst, 128
189 nop
1901: dc zva, dst
191 add dst, dst, 64
192 subs count, count, 64
193 b.hi 1b
194 stp q0, q0, [dst, 0]
195 stp q0, q0, [dst, 32]
196 stp q0, q0, [dstend, -64]
197 stp q0, q0, [dstend, -32]
Bernhard Rosenkraenzer7e4fa562014-03-05 11:40:57 +0100198 ret
Jake Weinstein372f19e2016-11-17 16:01:25 -0500199
200 .p2align 3
201L(zva_128):
202 cmp tmp1w, 5 /* ZVA size is 128 bytes. */
203 b.ne L(zva_other)
204
205 str q0, [dst, 16]
206 stp q0, q0, [dst, 32]
207 stp q0, q0, [dst, 64]
208 stp q0, q0, [dst, 96]
209 bic dst, dst, 127
210 sub count, dstend, dst /* Count is now 128 too large. */
211 sub count, count, 128+128 /* Adjust count and bias for loop. */
212 add dst, dst, 128
2131: dc zva, dst
214 add dst, dst, 128
215 subs count, count, 128
216 b.hi 1b
217 stp q0, q0, [dstend, -128]
218 stp q0, q0, [dstend, -96]
219 stp q0, q0, [dstend, -64]
220 stp q0, q0, [dstend, -32]
221 ret
222
223L(zva_other):
224 mov tmp2w, 4
225 lsl zva_lenw, tmp2w, tmp1w
226 add tmp1, zva_len, 64 /* Max alignment bytes written. */
227 cmp count, tmp1
228 blo L(no_zva)
229
230 sub tmp2, zva_len, 1
231 add tmp1, dst, zva_len
232 add dst, dst, 16
233 subs count, tmp1, dst /* Actual alignment bytes to write. */
234 bic tmp1, tmp1, tmp2 /* Aligned dc zva start address. */
235 beq 2f
2361: stp q0, q0, [dst], 64
237 stp q0, q0, [dst, -32]
238 subs count, count, 64
239 b.hi 1b
2402: mov dst, tmp1
241 sub count, dstend, tmp1 /* Remaining bytes to write. */
242 subs count, count, zva_len
243 b.lo 4f
2443: dc zva, dst
245 add dst, dst, zva_len
246 subs count, count, zva_len
247 b.hs 3b
2484: add count, count, zva_len
249 b L(tail64)
250
Bernhard Rosenkraenzer7e4fa562014-03-05 11:40:57 +0100251END(memset)