blob: 469b83c6363d5af9fbb9ff8c3bd151aa4bb12bbc [file] [log] [blame]
Bernhard Rosenkränzer12c02742014-06-27 13:21:42 +02001/*
2 strchr - find a character in a string
3
4 Copyright (c) 2014, ARM Limited
5 All rights Reserved.
6 Copyright (c) 2014, Linaro Ltd.
7
8 Redistribution and use in source and binary forms, with or without
9 modification, are permitted provided that the following conditions are met:
10 * Redistributions of source code must retain the above copyright
11 notice, this list of conditions and the following disclaimer.
12 * Redistributions in binary form must reproduce the above copyright
13 notice, this list of conditions and the following disclaimer in the
14 documentation and/or other materials provided with the distribution.
15 * Neither the name of the company nor the names of its contributors
16 may be used to endorse or promote products derived from this
17 software without specific prior written permission.
18
19 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
Elliott Hughes15581382014-07-07 15:42:06 -070029 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30*/
Bernhard Rosenkränzer12c02742014-06-27 13:21:42 +020031
32/* Assumptions:
33 *
34 * ARMv8-a, AArch64
35 * Neon Available.
36 */
37
38#include <private/bionic_asm.h>
39
40/* Arguments and results. */
41#define srcin x0
42#define chrin w1
43
44#define result x0
45
46#define src x2
47#define tmp1 x3
48#define wtmp2 w4
49#define tmp3 x5
50
51#define vrepchr v0
52#define vdata1 v1
53#define vdata2 v2
54#define vhas_nul1 v3
55#define vhas_nul2 v4
56#define vhas_chr1 v5
57#define vhas_chr2 v6
58#define vrepmask_0 v7
59#define vrepmask_c v16
60#define vend1 v17
61#define vend2 v18
62
63/* Core algorithm.
64
65 For each 32-byte hunk we calculate a 64-bit syndrome value, with
66 two bits per byte (LSB is always in bits 0 and 1, for both big
67 and little-endian systems). For each tuple, bit 0 is set iff
68 the relevant byte matched the requested character; bit 1 is set
69 iff the relevant byte matched the NUL end of string (we trigger
70 off bit0 for the special case of looking for NUL). Since the bits
71 in the syndrome reflect exactly the order in which things occur
72 in the original string a count_trailing_zeros() operation will
73 identify exactly which byte is causing the termination, and why. */
74
75/* Locals and temporaries. */
76
77ENTRY(strchr)
78 /* Magic constant 0x40100401 to allow us to identify which lane
79 matches the requested byte. Magic constant 0x80200802 used
80 similarly for NUL termination. */
81 mov wtmp2, #0x0401
82 movk wtmp2, #0x4010, lsl #16
83 dup vrepchr.16b, chrin
84 bic src, srcin, #31 /* Work with aligned 32-byte hunks. */
85 dup vrepmask_c.4s, wtmp2
86 ands tmp1, srcin, #31
87 add vrepmask_0.4s, vrepmask_c.4s, vrepmask_c.4s /* equiv: lsl #1 */
88 b.eq .Lloop
89
90 /* Input string is not 32-byte aligned. Rather than forcing
91 the padding bytes to a safe value, we calculate the syndrome
92 for all the bytes, but then mask off those bits of the
93 syndrome that are related to the padding. */
94 ld1 {vdata1.16b, vdata2.16b}, [src], #32
95 neg tmp1, tmp1
96 cmeq vhas_nul1.16b, vdata1.16b, #0
97 cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b
98 cmeq vhas_nul2.16b, vdata2.16b, #0
99 cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b
100 and vhas_nul1.16b, vhas_nul1.16b, vrepmask_0.16b
101 and vhas_nul2.16b, vhas_nul2.16b, vrepmask_0.16b
102 and vhas_chr1.16b, vhas_chr1.16b, vrepmask_c.16b
103 and vhas_chr2.16b, vhas_chr2.16b, vrepmask_c.16b
104 orr vend1.16b, vhas_nul1.16b, vhas_chr1.16b
105 orr vend2.16b, vhas_nul2.16b, vhas_chr2.16b
106 lsl tmp1, tmp1, #1
107 addp vend1.16b, vend1.16b, vend2.16b // 256->128
108 mov tmp3, #~0
109 addp vend1.16b, vend1.16b, vend2.16b // 128->64
110 lsr tmp1, tmp3, tmp1
111
112 mov tmp3, vend1.2d[0]
113 bic tmp1, tmp3, tmp1 // Mask padding bits.
114 cbnz tmp1, .Ltail
115
116.Lloop:
117 ld1 {vdata1.16b, vdata2.16b}, [src], #32
118 cmeq vhas_nul1.16b, vdata1.16b, #0
119 cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b
120 cmeq vhas_nul2.16b, vdata2.16b, #0
121 cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b
122 /* Use a fast check for the termination condition. */
123 orr vend1.16b, vhas_nul1.16b, vhas_chr1.16b
124 orr vend2.16b, vhas_nul2.16b, vhas_chr2.16b
125 orr vend1.16b, vend1.16b, vend2.16b
126 addp vend1.2d, vend1.2d, vend1.2d
127 mov tmp1, vend1.2d[0]
128 cbz tmp1, .Lloop
129
130 /* Termination condition found. Now need to establish exactly why
131 we terminated. */
132 and vhas_nul1.16b, vhas_nul1.16b, vrepmask_0.16b
133 and vhas_nul2.16b, vhas_nul2.16b, vrepmask_0.16b
134 and vhas_chr1.16b, vhas_chr1.16b, vrepmask_c.16b
135 and vhas_chr2.16b, vhas_chr2.16b, vrepmask_c.16b
136 orr vend1.16b, vhas_nul1.16b, vhas_chr1.16b
137 orr vend2.16b, vhas_nul2.16b, vhas_chr2.16b
138 addp vend1.16b, vend1.16b, vend2.16b // 256->128
139 addp vend1.16b, vend1.16b, vend2.16b // 128->64
140
141 mov tmp1, vend1.2d[0]
142.Ltail:
143 /* Count the trailing zeros, by bit reversing... */
144 rbit tmp1, tmp1
145 /* Re-bias source. */
146 sub src, src, #32
147 clz tmp1, tmp1 /* And counting the leading zeros. */
148 /* Tmp1 is even if the target charager was found first. Otherwise
149 we've found the end of string and we weren't looking for NUL. */
150 tst tmp1, #1
151 add result, src, tmp1, lsr #1
152 csel result, result, xzr, eq
153 ret
154END(strchr)