blob: 7b7e69912427a0b2a5df0d53a9ecc2e9aa8cbd88 [file] [log] [blame]
Bernhard Rosenkränzer8c20c132014-07-11 00:17:07 +02001/*
Bernhard Rosenkränzer8c20c132014-07-11 00:17:07 +02002 *
Jake Weinstein28285f52017-05-04 12:08:39 -04003 Copyright (c) 2014, ARM Limited
4 All rights Reserved.
5 Copyright (c) 2014, Linaro Ltd.
6
7 Redistribution and use in source and binary forms, with or without
8 modification, are permitted provided that the following conditions are met:
9 * Redistributions of source code must retain the above copyright
10 notice, this list of conditions and the following disclaimer.
11 * Redistributions in binary form must reproduce the above copyright
12 notice, this list of conditions and the following disclaimer in the
13 documentation and/or other materials provided with the distribution.
14 * Neither the name of the company nor the names of its contributors
15 may be used to endorse or promote products derived from this
16 software without specific prior written permission.
17
18 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29*/
Bernhard Rosenkränzer8c20c132014-07-11 00:17:07 +020030
31/* Assumptions:
32 *
33 * ARMv8-a, AArch64
34 * Neon Available.
35 */
36
37#include <private/bionic_asm.h>
38
39/* Arguments and results. */
40#define srcin x0
41#define chrin w1
42#define cntin x2
43
44#define result x0
45
46#define src x3
47#define tmp x4
48#define wtmp2 w5
49#define synd x6
50#define soff x9
51#define cntrem x10
52
53#define vrepchr v0
54#define vdata1 v1
55#define vdata2 v2
56#define vhas_chr1 v3
57#define vhas_chr2 v4
58#define vrepmask v5
59#define vend v6
60
61/*
62 * Core algorithm:
63 *
64 * For each 32-byte chunk we calculate a 64-bit syndrome value, with two bits
65 * per byte. For each tuple, bit 0 is set if the relevant byte matched the
66 * requested character and bit 1 is not used (faster than using a 32bit
67 * syndrome). Since the bits in the syndrome reflect exactly the order in which
68 * things occur in the original string, counting trailing zeros allows to
69 * identify exactly which byte has matched.
70 */
71
72ENTRY(memchr)
73 /*
74 * Magic constant 0x40100401 allows us to identify which lane matches
75 * the requested byte.
76 */
Christopher Ferrise03e1ea2014-07-30 16:06:56 -070077 cbz cntin, .Lzero_length
Bernhard Rosenkränzer8c20c132014-07-11 00:17:07 +020078 mov wtmp2, #0x0401
79 movk wtmp2, #0x4010, lsl #16
80 dup vrepchr.16b, chrin
81 /* Work with aligned 32-byte chunks */
82 bic src, srcin, #31
83 dup vrepmask.4s, wtmp2
84 ands soff, srcin, #31
85 and cntrem, cntin, #31
86 b.eq .Lloop
87
88 /*
89 * Input string is not 32-byte aligned. We calculate the syndrome
90 * value for the aligned 32 bytes block containing the first bytes
91 * and mask the irrelevant part.
92 */
93
94 ld1 {vdata1.16b, vdata2.16b}, [src], #32
95 sub tmp, soff, #32
96 adds cntin, cntin, tmp
97 cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b
98 cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b
99 and vhas_chr1.16b, vhas_chr1.16b, vrepmask.16b
100 and vhas_chr2.16b, vhas_chr2.16b, vrepmask.16b
101 addp vend.16b, vhas_chr1.16b, vhas_chr2.16b /* 256->128 */
102 addp vend.16b, vend.16b, vend.16b /* 128->64 */
Chih-Hung Hsieh33f33512015-05-11 11:21:19 -0700103 mov synd, vend.d[0]
Bernhard Rosenkränzer8c20c132014-07-11 00:17:07 +0200104 /* Clear the soff*2 lower bits */
105 lsl tmp, soff, #1
106 lsr synd, synd, tmp
107 lsl synd, synd, tmp
108 /* The first block can also be the last */
109 b.ls .Lmasklast
110 /* Have we found something already? */
111 cbnz synd, .Ltail
112
113.Lloop:
114 ld1 {vdata1.16b, vdata2.16b}, [src], #32
115 subs cntin, cntin, #32
116 cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b
117 cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b
118 /* If we're out of data we finish regardless of the result */
119 b.ls .Lend
120 /* Use a fast check for the termination condition */
121 orr vend.16b, vhas_chr1.16b, vhas_chr2.16b
122 addp vend.2d, vend.2d, vend.2d
Chih-Hung Hsieh33f33512015-05-11 11:21:19 -0700123 mov synd, vend.d[0]
Bernhard Rosenkränzer8c20c132014-07-11 00:17:07 +0200124 /* We're not out of data, loop if we haven't found the character */
125 cbz synd, .Lloop
126
127.Lend:
128 /* Termination condition found, let's calculate the syndrome value */
129 and vhas_chr1.16b, vhas_chr1.16b, vrepmask.16b
130 and vhas_chr2.16b, vhas_chr2.16b, vrepmask.16b
131 addp vend.16b, vhas_chr1.16b, vhas_chr2.16b /* 256->128 */
132 addp vend.16b, vend.16b, vend.16b /* 128->64 */
Chih-Hung Hsieh33f33512015-05-11 11:21:19 -0700133 mov synd, vend.d[0]
Bernhard Rosenkränzer8c20c132014-07-11 00:17:07 +0200134 /* Only do the clear for the last possible block */
135 b.hi .Ltail
136
137.Lmasklast:
138 /* Clear the (32 - ((cntrem + soff) % 32)) * 2 upper bits */
139 add tmp, cntrem, soff
140 and tmp, tmp, #31
141 sub tmp, tmp, #32
142 neg tmp, tmp, lsl #1
143 lsl synd, synd, tmp
144 lsr synd, synd, tmp
145
146.Ltail:
147 /* Count the trailing zeros using bit reversing */
148 rbit synd, synd
149 /* Compensate the last post-increment */
150 sub src, src, #32
151 /* Check that we have found a character */
152 cmp synd, #0
153 /* And count the leading zeros */
154 clz synd, synd
155 /* Compute the potential result */
156 add result, src, synd, lsr #1
157 /* Select result or NULL */
158 csel result, xzr, result, eq
159 ret
Christopher Ferrise03e1ea2014-07-30 16:06:56 -0700160
161.Lzero_length:
162 mov result, xzr
163 ret
Bernhard Rosenkränzer8c20c132014-07-11 00:17:07 +0200164END(memchr)