blob: 2b67f5a23756a36370d6851b5ba4e475496b0579 [file] [log] [blame]
Nikola Veljkovic38f2eaa2015-05-26 12:06:09 +02001/*
2 * Copyright (c) 2014
3 * Imagination Technologies Limited.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
14 * contributors may be used to endorse or promote products derived from
15 * this software without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY IMAGINATION TECHNOLOGIES LIMITED ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL IMAGINATION TECHNOLOGIES LIMITED BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30#ifdef __ANDROID__
31# include <private/bionic_asm.h>
32#elif _LIBC
33# include <sysdep.h>
34# include <regdef.h>
35# include <sys/asm.h>
36#elif _COMPILING_NEWLIB
37# include "machine/asm.h"
38# include "machine/regdef.h"
39#else
40# include <regdef.h>
41# include <sys/asm.h>
42#endif
43
44/* Technically strcmp should not read past the end of the strings being
45 compared. We will read a full word that may contain excess bits beyond
46 the NULL string terminator but unless ENABLE_READAHEAD is set, we will not
47 read the next word after the end of string. Setting ENABLE_READAHEAD will
48 improve performance but is technically illegal based on the definition of
49 strcmp. */
50#ifdef ENABLE_READAHEAD
51# define DELAY_READ
52#else
53# define DELAY_READ nop
54#endif
55
56/* Testing on a little endian machine showed using CLZ was a
57 performance loss, so we are not turning it on by default. */
58#if defined(ENABLE_CLZ) && (__mips_isa_rev > 1)
59# define USE_CLZ
60#endif
61
62/* Some asm.h files do not have the L macro definition. */
63#ifndef L
64# if _MIPS_SIM == _ABIO32
65# define L(label) $L ## label
66# else
67# define L(label) .L ## label
68# endif
69#endif
70
71/* Some asm.h files do not have the PTR_ADDIU macro definition. */
72#ifndef PTR_ADDIU
73# if _MIPS_SIM == _ABIO32
74# define PTR_ADDIU addiu
75# else
76# define PTR_ADDIU daddiu
77# endif
78#endif
79
80/* Allow the routine to be named something else if desired. */
81#ifndef STRCMP_NAME
82# define STRCMP_NAME strcmp
83#endif
84
85#ifdef __ANDROID__
86LEAF(STRCMP_NAME, 0)
87#else
88LEAF(STRCMP_NAME)
89#endif
90 .set nomips16
91 .set noreorder
92
93 or t0, a0, a1
94 andi t0,0x3
95 bne t0, zero, L(byteloop)
96
97/* Both strings are 4 byte aligned at this point. */
98
99 lui t8, 0x0101
100 ori t8, t8, 0x0101
101 lui t9, 0x7f7f
102 ori t9, 0x7f7f
103
104#define STRCMP32(OFFSET) \
105 lw v0, OFFSET(a0); \
106 lw v1, OFFSET(a1); \
107 subu t0, v0, t8; \
108 bne v0, v1, L(worddiff); \
109 nor t1, v0, t9; \
110 and t0, t0, t1; \
111 bne t0, zero, L(returnzero)
112
113L(wordloop):
114 STRCMP32(0)
115 DELAY_READ
116 STRCMP32(4)
117 DELAY_READ
118 STRCMP32(8)
119 DELAY_READ
120 STRCMP32(12)
121 DELAY_READ
122 STRCMP32(16)
123 DELAY_READ
124 STRCMP32(20)
125 DELAY_READ
126 STRCMP32(24)
127 DELAY_READ
128 STRCMP32(28)
129 PTR_ADDIU a0, a0, 32
130 b L(wordloop)
131 PTR_ADDIU a1, a1, 32
132
133L(returnzero):
134 j ra
135 move v0, zero
136
137L(worddiff):
138#ifdef USE_CLZ
139 subu t0, v0, t8
140 nor t1, v0, t9
141 and t1, t0, t1
142 xor t0, v0, v1
143 or t0, t0, t1
144# if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
145 wsbh t0, t0
146 rotr t0, t0, 16
147# endif
148 clz t1, t0
149 and t1, 0xf8
150# if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
151 neg t1
152 addu t1, 24
153# endif
154 rotrv v0, v0, t1
155 rotrv v1, v1, t1
156 and v0, v0, 0xff
157 and v1, v1, 0xff
158 j ra
159 subu v0, v0, v1
160#else /* USE_CLZ */
161# if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
162 andi t0, v0, 0xff
163 beq t0, zero, L(wexit01)
164 andi t1, v1, 0xff
165 bne t0, t1, L(wexit01)
166
167 srl t8, v0, 8
168 srl t9, v1, 8
169 andi t8, t8, 0xff
170 beq t8, zero, L(wexit89)
171 andi t9, t9, 0xff
172 bne t8, t9, L(wexit89)
173
174 srl t0, v0, 16
175 srl t1, v1, 16
176 andi t0, t0, 0xff
177 beq t0, zero, L(wexit01)
178 andi t1, t1, 0xff
179 bne t0, t1, L(wexit01)
180
181 srl t8, v0, 24
182 srl t9, v1, 24
183# else /* __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ */
184 srl t0, v0, 24
185 beq t0, zero, L(wexit01)
186 srl t1, v1, 24
187 bne t0, t1, L(wexit01)
188
189 srl t8, v0, 16
190 srl t9, v1, 16
191 andi t8, t8, 0xff
192 beq t8, zero, L(wexit89)
193 andi t9, t9, 0xff
194 bne t8, t9, L(wexit89)
195
196 srl t0, v0, 8
197 srl t1, v1, 8
198 andi t0, t0, 0xff
199 beq t0, zero, L(wexit01)
200 andi t1, t1, 0xff
201 bne t0, t1, L(wexit01)
202
203 andi t8, v0, 0xff
204 andi t9, v1, 0xff
205# endif /* __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ */
206
207L(wexit89):
208 j ra
209 subu v0, t8, t9
210L(wexit01):
211 j ra
212 subu v0, t0, t1
213#endif /* USE_CLZ */
214
215/* It might seem better to do the 'beq' instruction between the two 'lbu'
216 instructions so that the nop is not needed but testing showed that this
217 code is actually faster (based on glibc strcmp test). */
218#define BYTECMP01(OFFSET) \
219 lbu v0, OFFSET(a0); \
220 lbu v1, OFFSET(a1); \
221 beq v0, zero, L(bexit01); \
222 nop; \
223 bne v0, v1, L(bexit01)
224
225#define BYTECMP89(OFFSET) \
226 lbu t8, OFFSET(a0); \
227 lbu t9, OFFSET(a1); \
228 beq t8, zero, L(bexit89); \
229 nop; \
230 bne t8, t9, L(bexit89)
231
232L(byteloop):
233 BYTECMP01(0)
234 BYTECMP89(1)
235 BYTECMP01(2)
236 BYTECMP89(3)
237 BYTECMP01(4)
238 BYTECMP89(5)
239 BYTECMP01(6)
240 BYTECMP89(7)
241 PTR_ADDIU a0, a0, 8
242 b L(byteloop)
243 PTR_ADDIU a1, a1, 8
244
245L(bexit01):
246 j ra
247 subu v0, v0, v1
248L(bexit89):
249 j ra
250 subu v0, t8, t9
251
252 .set at
253 .set reorder
254
255END(STRCMP_NAME)
256#ifndef __ANDROID__
257# ifdef _LIBC
258libc_hidden_builtin_def (STRCMP_NAME)
259# endif
260#endif