blob: 2bf969d60188bfbc2412ccb3d82b4a8f45975af0 [file] [log] [blame]
Christopher Ferris753eb7f2014-06-27 15:26:15 -07001/*
2 * Copyright (C) 2014 The Android Open Source Project
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * * Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in
12 * the documentation and/or other materials provided with the
13 * distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
18 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
19 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
21 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
22 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
23 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
25 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
Elliott Hughes15581382014-07-07 15:42:06 -070028
29/*
30 Copyright (c) 2014, Linaro Limited
Christopher Ferris753eb7f2014-06-27 15:26:15 -070031 All rights reserved.
32
33 Redistribution and use in source and binary forms, with or without
34 modification, are permitted provided that the following conditions are met:
35 * Redistributions of source code must retain the above copyright
36 notice, this list of conditions and the following disclaimer.
37 * Redistributions in binary form must reproduce the above copyright
38 notice, this list of conditions and the following disclaimer in the
39 documentation and/or other materials provided with the distribution.
40 * Neither the name of the Linaro nor the
41 names of its contributors may be used to endorse or promote products
42 derived from this software without specific prior written permission.
43
44 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
45 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
46 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
47 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
48 HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
49 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
50 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
51 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
52 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
53 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
54 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
55*/
56
57/* Assumptions:
58 *
59 * ARMv8-a, AArch64
60 */
61
62#if !defined(STPCPY) && !defined(STRCPY)
63#error "Either STPCPY or STRCPY must be defined."
64#endif
65
66#include <private/bionic_asm.h>
67
68/* Arguments and results. */
69#if defined(STPCPY)
70#define dst x0
71#elif defined(STRCPY)
72#define dstin x0
73#endif
74#define src x1
75
76/* Locals and temporaries. */
77#if defined(STRCPY)
78#define dst x2
79#endif
80#define data1 x3
81#define data1_w w3
82#define data2 x4
83#define data2_w w4
84#define has_nul1 x5
85#define has_nul1_w w5
86#define has_nul2 x6
87#define tmp1 x7
88#define tmp2 x8
89#define tmp3 x9
90#define tmp4 x10
91#define zeroones x11
92#define zeroones_w w11
93#define pos x12
94
95#define REP8_01 0x0101010101010101
96#define REP8_7f 0x7f7f7f7f7f7f7f7f
97#define REP8_80 0x8080808080808080
98
99#if defined(STPCPY)
100ENTRY(stpcpy)
101#elif defined(STRCPY)
102ENTRY(strcpy)
103#endif
104 mov zeroones, #REP8_01
105#if defined(STRCPY)
106 mov dst, dstin
107#endif
108 ands tmp1, src, #15
109 b.ne .Lmisaligned
110 // NUL detection works on the principle that (X - 1) & (~X) & 0x80
111 // (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
112 // can be done in parallel across the entire word.
113 // The inner loop deals with two Dwords at a time. This has a
114 // slightly higher start-up cost, but we should win quite quickly,
115 // especially on cores with a high number of issue slots per
116 // cycle, as we get much better parallelism out of the operations.
117.Lloop:
118 ldp data1, data2, [src], #16
119 sub tmp1, data1, zeroones
120 orr tmp2, data1, #REP8_7f
121 bic has_nul1, tmp1, tmp2
122 cbnz has_nul1, .Lnul_in_data1
123 sub tmp3, data2, zeroones
124 orr tmp4, data2, #REP8_7f
125 bic has_nul2, tmp3, tmp4
126 cbnz has_nul2, .Lnul_in_data2
127 // No NUL in either register, copy it in a single instruction.
128 stp data1, data2, [dst], #16
129 b .Lloop
130
131.Lnul_in_data1:
132 rev has_nul1, has_nul1
133 clz pos, has_nul1
134 add tmp1, pos, #0x8
135
136 tbz tmp1, #6, 1f
137#if defined(STPCPY)
138 str data1, [dst], #7
139#elif defined(STRCPY)
140 str data1, [dst]
141#endif
142 ret
1431:
144 tbz tmp1, #5, 1f
145 str data1_w, [dst], #4
146 lsr data1, data1, #32
1471:
148 tbz tmp1, #4, 1f
149 strh data1_w, [dst], #2
150 lsr data1, data1, #16
1511:
152 tbz tmp1, #3, 1f
153 strb data1_w, [dst]
154#if defined(STPCPY)
155 ret
156#endif
1571:
158#if defined(STPCPY)
159 // Back up one so that dst points to the '\0' string terminator.
160 sub dst, dst, #1
161#endif
162 ret
163
164.Lnul_in_data2:
165 str data1, [dst], #8
166 rev has_nul2, has_nul2
167 clz pos, has_nul2
168 add tmp1, pos, #0x8
169
170 tbz tmp1, #6, 1f
171#if defined(STPCPY)
172 str data2, [dst], #7
173#elif defined(STRCPY)
174 str data2, [dst]
175#endif
176 ret
1771:
178 tbz tmp1, #5, 1f
179 str data2_w, [dst], #4
180 lsr data2, data2, #32
1811:
182 tbz tmp1, #4, 1f
183 strh data2_w, [dst], #2
184 lsr data2, data2, #16
1851:
186 tbz tmp1, #3, 1f
187 strb data2_w, [dst]
188#if defined(STPCPY)
189 ret
190#endif
1911:
192#if defined(STPCPY)
193 // Back up one so that dst points to the '\0' string terminator.
194 sub dst, dst, #1
195#endif
196 ret
197
198.Lmisaligned:
199 tbz src, #0, 1f
200 ldrb data1_w, [src], #1
201 strb data1_w, [dst], #1
202 cbnz data1_w, 1f
203#if defined(STPCPY)
204 // Back up one so that dst points to the '\0' string terminator.
205 sub dst, dst, #1
206#endif
207 ret
2081:
209 tbz src, #1, 1f
210 ldrb data1_w, [src], #1
211 strb data1_w, [dst], #1
212 cbz data1_w, .Ldone
213 ldrb data2_w, [src], #1
214 strb data2_w, [dst], #1
215 cbnz data2_w, 1f
216.Ldone:
217#if defined(STPCPY)
218 // Back up one so that dst points to the '\0' string terminator.
219 sub dst, dst, #1
220#endif
221 ret
2221:
223 tbz src, #2, 1f
224 ldr data1_w, [src], #4
225 // Check for a zero.
226 sub has_nul1_w, data1_w, zeroones_w
227 bic has_nul1_w, has_nul1_w, data1_w
228 ands has_nul1_w, has_nul1_w, #0x80808080
229 b.ne .Lnul_in_data1
230 str data1_w, [dst], #4
2311:
232 tbz src, #3, .Lloop
233 ldr data1, [src], #8
234 // Check for a zero.
235 sub tmp1, data1, zeroones
236 orr tmp2, data1, #REP8_7f
237 bics has_nul1, tmp1, tmp2
238 b.ne .Lnul_in_data1
239 str data1, [dst], #8
240 b .Lloop
241#if defined(STPCPY)
242END(stpcpy)
243#elif defined(STRCPY)
244END(strcpy)
245#endif