blob: b15e06d713bc1676198629b32a467b736b800b1a [file] [log] [blame]
Christopher Ferris77561bf2014-06-26 15:27:08 -07001/*
2 * Copyright (C) 2014 The Android Open Source Project
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * * Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in
12 * the documentation and/or other materials provided with the
13 * distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
18 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
19 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
21 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
22 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
23 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
25 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28/* Copyright (c) 2014, Linaro Limited
29 All rights reserved.
30
31 Redistribution and use in source and binary forms, with or without
32 modification, are permitted provided that the following conditions are met:
33 * Redistributions of source code must retain the above copyright
34 notice, this list of conditions and the following disclaimer.
35 * Redistributions in binary form must reproduce the above copyright
36 notice, this list of conditions and the following disclaimer in the
37 documentation and/or other materials provided with the distribution.
38 * Neither the name of the Linaro nor the
39 names of its contributors may be used to endorse or promote products
40 derived from this software without specific prior written permission.
41
42 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
43 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
44 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
45 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
46 HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
47 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
48 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
49 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
50 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
51 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
52 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
53*/
54
55/* Assumptions:
56 *
57 * ARMv8-a, AArch64
58 */
59
60#include <private/bionic_asm.h>
61
62/* Arguments and results. */
63#define dstin x0
64#define src x1
65
66/* Locals and temporaries. */
67#define dst x2
68#define data1 x3
69#define data1_w w3
70#define data2 x4
71#define data2_w w4
72#define has_nul1 x5
73#define has_nul1_w w5
74#define has_nul2 x6
75#define tmp1 x7
76#define tmp2 x8
77#define tmp3 x9
78#define tmp4 x10
79#define zeroones x11
80#define zeroones_w w11
81#define pos x12
82
83#define REP8_01 0x0101010101010101
84#define REP8_7f 0x7f7f7f7f7f7f7f7f
85#define REP8_80 0x8080808080808080
86
87ENTRY(strcpy)
88 mov zeroones, #REP8_01
89 mov dst, dstin
90 ands tmp1, src, #15
91 b.ne .Lmisaligned
92 // NUL detection works on the principle that (X - 1) & (~X) & 0x80
93 // (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
94 // can be done in parallel across the entire word.
95 // The inner loop deals with two Dwords at a time. This has a
96 // slightly higher start-up cost, but we should win quite quickly,
97 // especially on cores with a high number of issue slots per
98 // cycle, as we get much better parallelism out of the operations.
99.Lloop:
100 ldp data1, data2, [src], #16
101 sub tmp1, data1, zeroones
102 orr tmp2, data1, #REP8_7f
103 bic has_nul1, tmp1, tmp2
104 cbnz has_nul1, .Lnul_in_data1
105 sub tmp3, data2, zeroones
106 orr tmp4, data2, #REP8_7f
107 bic has_nul2, tmp3, tmp4
108 cbnz has_nul2, .Lnul_in_data2
109 // No NUL in either register, copy it in a single instruction.
110 stp data1, data2, [dst], #16
111 b .Lloop
112
113.Lnul_in_data1:
114 rev has_nul1, has_nul1
115 clz pos, has_nul1
116 add tmp1, pos, #0x8
117
118 tbz tmp1, #6, 1f
119 str data1, [dst]
120 ret
1211:
122 tbz tmp1, #5, 1f
123 str data1_w, [dst], #4
124 lsr data1, data1, #32
1251:
126 tbz tmp1, #4, 1f
127 strh data1_w, [dst], #2
128 lsr data1, data1, #16
1291:
130 tbz tmp1, #3, 1f
131 strb data1_w, [dst]
1321:
133 ret
134
135.Lnul_in_data2:
136 str data1, [dst], #8
137 rev has_nul2, has_nul2
138 clz pos, has_nul2
139 add tmp1, pos, #0x8
140
141 tbz tmp1, #6, 1f
142 str data2, [dst]
143 ret
1441:
145 tbz tmp1, #5, 1f
146 str data2_w, [dst], #4
147 lsr data2, data2, #32
1481:
149 tbz tmp1, #4, 1f
150 strh data2_w, [dst], #2
151 lsr data2, data2, #16
1521:
153 tbz tmp1, #3, 1f
154 strb data2_w, [dst]
1551:
156 ret
157
158.Lmisaligned:
159 tbz src, #0, 1f
160 ldrb data1_w, [src], #1
161 strb data1_w, [dst], #1
162 cbnz data1_w, 1f
163 ret
1641:
165 tbz src, #1, 1f
166 ldrb data1_w, [src], #1
167 strb data1_w, [dst], #1
168 cbz data1_w, .Ldone
169 ldrb data2_w, [src], #1
170 strb data2_w, [dst], #1
171 cbnz data2_w, 1f
172.Ldone:
173 ret
1741:
175 tbz src, #2, 1f
176 ldr data1_w, [src], #4
177 // Check for a zero.
178 sub has_nul1_w, data1_w, zeroones_w
179 bic has_nul1_w, has_nul1_w, data1_w
180 ands has_nul1_w, has_nul1_w, #0x80808080
181 b.ne .Lnul_in_data1
182 str data1_w, [dst], #4
1831:
184 tbz src, #3, .Lloop
185 ldr data1, [src], #8
186 // Check for a zero.
187 sub tmp1, data1, zeroones
188 orr tmp2, data1, #REP8_7f
189 bics has_nul1, tmp1, tmp2
190 b.ne .Lnul_in_data1
191 str data1, [dst], #8
192 b .Lloop
193END(strcpy)