blob: e5ea57d8c619135be08e2c0951ae19dd5b07061d [file] [log] [blame]
Bernhard Rosenkränzer8c20c132014-07-11 00:17:07 +02001/*
2 * memchr - find a character in a memory zone
3 *
4 * Copyright (c) 2014, ARM Limited
5 * All rights Reserved.
6 * Copyright (c) 2014, Linaro Ltd.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions are met:
10 * * Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * * Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * * Neither the name of the company nor the names of its contributors
16 * may be used to endorse or promote products derived from this
17 * software without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 */
31
32/* Assumptions:
33 *
34 * ARMv8-a, AArch64
35 * Neon Available.
36 */
37
38#include <private/bionic_asm.h>
39
40/* Arguments and results. */
41#define srcin x0
42#define chrin w1
43#define cntin x2
44
45#define result x0
46
47#define src x3
48#define tmp x4
49#define wtmp2 w5
50#define synd x6
51#define soff x9
52#define cntrem x10
53
54#define vrepchr v0
55#define vdata1 v1
56#define vdata2 v2
57#define vhas_chr1 v3
58#define vhas_chr2 v4
59#define vrepmask v5
60#define vend v6
61
62/*
63 * Core algorithm:
64 *
65 * For each 32-byte chunk we calculate a 64-bit syndrome value, with two bits
66 * per byte. For each tuple, bit 0 is set if the relevant byte matched the
67 * requested character and bit 1 is not used (faster than using a 32bit
68 * syndrome). Since the bits in the syndrome reflect exactly the order in which
69 * things occur in the original string, counting trailing zeros allows to
70 * identify exactly which byte has matched.
71 */
72
73ENTRY(memchr)
74 /*
75 * Magic constant 0x40100401 allows us to identify which lane matches
76 * the requested byte.
77 */
Christopher Ferrise03e1ea2014-07-30 16:06:56 -070078 cbz cntin, .Lzero_length
Bernhard Rosenkränzer8c20c132014-07-11 00:17:07 +020079 mov wtmp2, #0x0401
80 movk wtmp2, #0x4010, lsl #16
81 dup vrepchr.16b, chrin
82 /* Work with aligned 32-byte chunks */
83 bic src, srcin, #31
84 dup vrepmask.4s, wtmp2
85 ands soff, srcin, #31
86 and cntrem, cntin, #31
87 b.eq .Lloop
88
89 /*
90 * Input string is not 32-byte aligned. We calculate the syndrome
91 * value for the aligned 32 bytes block containing the first bytes
92 * and mask the irrelevant part.
93 */
94
95 ld1 {vdata1.16b, vdata2.16b}, [src], #32
96 sub tmp, soff, #32
97 adds cntin, cntin, tmp
98 cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b
99 cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b
100 and vhas_chr1.16b, vhas_chr1.16b, vrepmask.16b
101 and vhas_chr2.16b, vhas_chr2.16b, vrepmask.16b
102 addp vend.16b, vhas_chr1.16b, vhas_chr2.16b /* 256->128 */
103 addp vend.16b, vend.16b, vend.16b /* 128->64 */
104 mov synd, vend.2d[0]
105 /* Clear the soff*2 lower bits */
106 lsl tmp, soff, #1
107 lsr synd, synd, tmp
108 lsl synd, synd, tmp
109 /* The first block can also be the last */
110 b.ls .Lmasklast
111 /* Have we found something already? */
112 cbnz synd, .Ltail
113
114.Lloop:
115 ld1 {vdata1.16b, vdata2.16b}, [src], #32
116 subs cntin, cntin, #32
117 cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b
118 cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b
119 /* If we're out of data we finish regardless of the result */
120 b.ls .Lend
121 /* Use a fast check for the termination condition */
122 orr vend.16b, vhas_chr1.16b, vhas_chr2.16b
123 addp vend.2d, vend.2d, vend.2d
124 mov synd, vend.2d[0]
125 /* We're not out of data, loop if we haven't found the character */
126 cbz synd, .Lloop
127
128.Lend:
129 /* Termination condition found, let's calculate the syndrome value */
130 and vhas_chr1.16b, vhas_chr1.16b, vrepmask.16b
131 and vhas_chr2.16b, vhas_chr2.16b, vrepmask.16b
132 addp vend.16b, vhas_chr1.16b, vhas_chr2.16b /* 256->128 */
133 addp vend.16b, vend.16b, vend.16b /* 128->64 */
134 mov synd, vend.2d[0]
135 /* Only do the clear for the last possible block */
136 b.hi .Ltail
137
138.Lmasklast:
139 /* Clear the (32 - ((cntrem + soff) % 32)) * 2 upper bits */
140 add tmp, cntrem, soff
141 and tmp, tmp, #31
142 sub tmp, tmp, #32
143 neg tmp, tmp, lsl #1
144 lsl synd, synd, tmp
145 lsr synd, synd, tmp
146
147.Ltail:
148 /* Count the trailing zeros using bit reversing */
149 rbit synd, synd
150 /* Compensate the last post-increment */
151 sub src, src, #32
152 /* Check that we have found a character */
153 cmp synd, #0
154 /* And count the leading zeros */
155 clz synd, synd
156 /* Compute the potential result */
157 add result, src, synd, lsr #1
158 /* Select result or NULL */
159 csel result, xzr, result, eq
160 ret
Christopher Ferrise03e1ea2014-07-30 16:06:56 -0700161
162.Lzero_length:
163 mov result, xzr
164 ret
Bernhard Rosenkränzer8c20c132014-07-11 00:17:07 +0200165END(memchr)