blob: 158eaf72e82fbdb85ae2fcf1799d35443aa1ff15 [file] [log] [blame]
Bernhard Rosenkränzer12c02742014-06-27 13:21:42 +02001/*
2 strchr - find a character in a string
3
4 Copyright (c) 2014, ARM Limited
5 All rights Reserved.
6 Copyright (c) 2014, Linaro Ltd.
7
8 Redistribution and use in source and binary forms, with or without
9 modification, are permitted provided that the following conditions are met:
10 * Redistributions of source code must retain the above copyright
11 notice, this list of conditions and the following disclaimer.
12 * Redistributions in binary form must reproduce the above copyright
13 notice, this list of conditions and the following disclaimer in the
14 documentation and/or other materials provided with the distribution.
15 * Neither the name of the company nor the names of its contributors
16 may be used to endorse or promote products derived from this
17 software without specific prior written permission.
18
19 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
30
31/* Assumptions:
32 *
33 * ARMv8-a, AArch64
34 * Neon Available.
35 */
36
37#include <private/bionic_asm.h>
38
39/* Arguments and results. */
40#define srcin x0
41#define chrin w1
42
43#define result x0
44
45#define src x2
46#define tmp1 x3
47#define wtmp2 w4
48#define tmp3 x5
49
50#define vrepchr v0
51#define vdata1 v1
52#define vdata2 v2
53#define vhas_nul1 v3
54#define vhas_nul2 v4
55#define vhas_chr1 v5
56#define vhas_chr2 v6
57#define vrepmask_0 v7
58#define vrepmask_c v16
59#define vend1 v17
60#define vend2 v18
61
62/* Core algorithm.
63
64 For each 32-byte hunk we calculate a 64-bit syndrome value, with
65 two bits per byte (LSB is always in bits 0 and 1, for both big
66 and little-endian systems). For each tuple, bit 0 is set iff
67 the relevant byte matched the requested character; bit 1 is set
68 iff the relevant byte matched the NUL end of string (we trigger
69 off bit0 for the special case of looking for NUL). Since the bits
70 in the syndrome reflect exactly the order in which things occur
71 in the original string a count_trailing_zeros() operation will
72 identify exactly which byte is causing the termination, and why. */
73
74/* Locals and temporaries. */
75
76ENTRY(strchr)
77 /* Magic constant 0x40100401 to allow us to identify which lane
78 matches the requested byte. Magic constant 0x80200802 used
79 similarly for NUL termination. */
80 mov wtmp2, #0x0401
81 movk wtmp2, #0x4010, lsl #16
82 dup vrepchr.16b, chrin
83 bic src, srcin, #31 /* Work with aligned 32-byte hunks. */
84 dup vrepmask_c.4s, wtmp2
85 ands tmp1, srcin, #31
86 add vrepmask_0.4s, vrepmask_c.4s, vrepmask_c.4s /* equiv: lsl #1 */
87 b.eq .Lloop
88
89 /* Input string is not 32-byte aligned. Rather than forcing
90 the padding bytes to a safe value, we calculate the syndrome
91 for all the bytes, but then mask off those bits of the
92 syndrome that are related to the padding. */
93 ld1 {vdata1.16b, vdata2.16b}, [src], #32
94 neg tmp1, tmp1
95 cmeq vhas_nul1.16b, vdata1.16b, #0
96 cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b
97 cmeq vhas_nul2.16b, vdata2.16b, #0
98 cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b
99 and vhas_nul1.16b, vhas_nul1.16b, vrepmask_0.16b
100 and vhas_nul2.16b, vhas_nul2.16b, vrepmask_0.16b
101 and vhas_chr1.16b, vhas_chr1.16b, vrepmask_c.16b
102 and vhas_chr2.16b, vhas_chr2.16b, vrepmask_c.16b
103 orr vend1.16b, vhas_nul1.16b, vhas_chr1.16b
104 orr vend2.16b, vhas_nul2.16b, vhas_chr2.16b
105 lsl tmp1, tmp1, #1
106 addp vend1.16b, vend1.16b, vend2.16b // 256->128
107 mov tmp3, #~0
108 addp vend1.16b, vend1.16b, vend2.16b // 128->64
109 lsr tmp1, tmp3, tmp1
110
111 mov tmp3, vend1.2d[0]
112 bic tmp1, tmp3, tmp1 // Mask padding bits.
113 cbnz tmp1, .Ltail
114
115.Lloop:
116 ld1 {vdata1.16b, vdata2.16b}, [src], #32
117 cmeq vhas_nul1.16b, vdata1.16b, #0
118 cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b
119 cmeq vhas_nul2.16b, vdata2.16b, #0
120 cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b
121 /* Use a fast check for the termination condition. */
122 orr vend1.16b, vhas_nul1.16b, vhas_chr1.16b
123 orr vend2.16b, vhas_nul2.16b, vhas_chr2.16b
124 orr vend1.16b, vend1.16b, vend2.16b
125 addp vend1.2d, vend1.2d, vend1.2d
126 mov tmp1, vend1.2d[0]
127 cbz tmp1, .Lloop
128
129 /* Termination condition found. Now need to establish exactly why
130 we terminated. */
131 and vhas_nul1.16b, vhas_nul1.16b, vrepmask_0.16b
132 and vhas_nul2.16b, vhas_nul2.16b, vrepmask_0.16b
133 and vhas_chr1.16b, vhas_chr1.16b, vrepmask_c.16b
134 and vhas_chr2.16b, vhas_chr2.16b, vrepmask_c.16b
135 orr vend1.16b, vhas_nul1.16b, vhas_chr1.16b
136 orr vend2.16b, vhas_nul2.16b, vhas_chr2.16b
137 addp vend1.16b, vend1.16b, vend2.16b // 256->128
138 addp vend1.16b, vend1.16b, vend2.16b // 128->64
139
140 mov tmp1, vend1.2d[0]
141.Ltail:
142 /* Count the trailing zeros, by bit reversing... */
143 rbit tmp1, tmp1
144 /* Re-bias source. */
145 sub src, src, #32
146 clz tmp1, tmp1 /* And counting the leading zeros. */
147 /* Tmp1 is even if the target charager was found first. Otherwise
148 we've found the end of string and we weren't looking for NUL. */
149 tst tmp1, #1
150 add result, src, tmp1, lsr #1
151 csel result, result, xzr, eq
152 ret
153END(strchr)