blob: 848610ab72d16ea9463f73474cd6c4bbadfba1e4 [file] [log] [blame]
Jari Aalto7117c2d2002-07-17 14:10:11 +00001/* strmatch.c -- ksh-like extended pattern matching for the shell and filename
2 globbing. */
3
Chet Ramey495aee42011-11-22 19:11:26 -05004/* Copyright (C) 1991-2011 Free Software Foundation, Inc.
Jari Aalto7117c2d2002-07-17 14:10:11 +00005
6 This file is part of GNU Bash, the Bourne Again SHell.
7
Jari Aalto31859422009-01-12 13:36:28 +00008 Bash is free software: you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation, either version 3 of the License, or
11 (at your option) any later version.
12
13 Bash is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with Bash. If not, see <http://www.gnu.org/licenses/>.
20*/
Jari Aalto7117c2d2002-07-17 14:10:11 +000021
22#include <config.h>
23
24#include <stdio.h> /* for debugging */
25
26#include "strmatch.h"
27#include <chartypes.h>
28
29#include "bashansi.h"
30#include "shmbutil.h"
31#include "xmalloc.h"
32
33/* First, compile `sm_loop.c' for single-byte characters. */
34#define CHAR unsigned char
35#define U_CHAR unsigned char
36#define XCHAR char
37#define INT int
38#define L(CS) CS
39#define INVALID -1
40
41#undef STREQ
42#undef STREQN
43#define STREQ(a, b) ((a)[0] == (b)[0] && strcmp(a, b) == 0)
44#define STREQN(a, b, n) ((a)[0] == (b)[0] && strncmp(a, b, n) == 0)
45
Chet Rameyac50fba2014-02-26 09:36:43 -050046#ifndef GLOBASCII_DEFAULT
47# define GLOBASCII_DEFAULT 0
48#endif
49
50int glob_asciirange = GLOBASCII_DEFAULT;
51
Jari Aalto7117c2d2002-07-17 14:10:11 +000052/* We use strcoll(3) for range comparisons in bracket expressions,
53 even though it can have unwanted side effects in locales
54 other than POSIX or US. For instance, in the de locale, [A-Z] matches
Chet Rameyac50fba2014-02-26 09:36:43 -050055 all characters. If GLOB_ASCIIRANGE is non-zero, and we're not forcing
56 the use of strcoll (e.g., for explicit collating symbols), we use
57 straight ordering as if in the C locale. */
Jari Aalto7117c2d2002-07-17 14:10:11 +000058
59#if defined (HAVE_STRCOLL)
60/* Helper function for collating symbol equivalence. */
Chet Rameyac50fba2014-02-26 09:36:43 -050061static int
62rangecmp (c1, c2, forcecoll)
Jari Aalto7117c2d2002-07-17 14:10:11 +000063 int c1, c2;
Chet Rameyac50fba2014-02-26 09:36:43 -050064 int forcecoll;
Jari Aalto7117c2d2002-07-17 14:10:11 +000065{
66 static char s1[2] = { ' ', '\0' };
67 static char s2[2] = { ' ', '\0' };
68 int ret;
69
70 /* Eight bits only. Period. */
71 c1 &= 0xFF;
72 c2 &= 0xFF;
73
74 if (c1 == c2)
75 return (0);
76
Chet Rameyac50fba2014-02-26 09:36:43 -050077 if (forcecoll == 0 && glob_asciirange)
78 return (c1 - c2);
79
Jari Aalto7117c2d2002-07-17 14:10:11 +000080 s1[0] = c1;
81 s2[0] = c2;
82
83 if ((ret = strcoll (s1, s2)) != 0)
84 return ret;
85 return (c1 - c2);
86}
87#else /* !HAVE_STRCOLL */
Chet Rameyac50fba2014-02-26 09:36:43 -050088# define rangecmp(c1, c2, f) ((int)(c1) - (int)(c2))
Jari Aalto7117c2d2002-07-17 14:10:11 +000089#endif /* !HAVE_STRCOLL */
90
91#if defined (HAVE_STRCOLL)
92static int
93collequiv (c1, c2)
94 int c1, c2;
95{
Chet Rameyac50fba2014-02-26 09:36:43 -050096 return (rangecmp (c1, c2, 1) == 0);
Jari Aalto7117c2d2002-07-17 14:10:11 +000097}
98#else
99# define collequiv(c1, c2) ((c1) == (c2))
100#endif
101
102#define _COLLSYM _collsym
103#define __COLLSYM __collsym
104#define POSIXCOLL posix_collsyms
105#include "collsyms.h"
106
107static int
108collsym (s, len)
Jari Aalto95732b42005-12-07 14:08:12 +0000109 CHAR *s;
Jari Aalto7117c2d2002-07-17 14:10:11 +0000110 int len;
111{
112 register struct _collsym *csp;
Jari Aalto95732b42005-12-07 14:08:12 +0000113 char *x;
Jari Aalto7117c2d2002-07-17 14:10:11 +0000114
Jari Aalto95732b42005-12-07 14:08:12 +0000115 x = (char *)s;
Jari Aalto7117c2d2002-07-17 14:10:11 +0000116 for (csp = posix_collsyms; csp->name; csp++)
117 {
Jari Aalto95732b42005-12-07 14:08:12 +0000118 if (STREQN(csp->name, x, len) && csp->name[len] == '\0')
Jari Aalto7117c2d2002-07-17 14:10:11 +0000119 return (csp->code);
120 }
121 if (len == 1)
122 return s[0];
123 return INVALID;
124}
125
126/* unibyte character classification */
127#if !defined (isascii) && !defined (HAVE_ISASCII)
128# define isascii(c) ((unsigned int)(c) <= 0177)
129#endif
130
131enum char_class
132 {
133 CC_NO_CLASS = 0,
134 CC_ASCII, CC_ALNUM, CC_ALPHA, CC_BLANK, CC_CNTRL, CC_DIGIT, CC_GRAPH,
135 CC_LOWER, CC_PRINT, CC_PUNCT, CC_SPACE, CC_UPPER, CC_WORD, CC_XDIGIT
136 };
137
138static char const *const cclass_name[] =
139 {
140 "",
141 "ascii", "alnum", "alpha", "blank", "cntrl", "digit", "graph",
142 "lower", "print", "punct", "space", "upper", "word", "xdigit"
143 };
144
145#define N_CHAR_CLASS (sizeof(cclass_name) / sizeof (cclass_name[0]))
146
147static int
148is_cclass (c, name)
149 int c;
150 const char *name;
151{
152 enum char_class char_class = CC_NO_CLASS;
153 int i, result;
154
155 for (i = 1; i < N_CHAR_CLASS; i++)
156 {
157 if (STREQ (name, cclass_name[i]))
158 {
159 char_class = (enum char_class)i;
160 break;
161 }
162 }
163
164 if (char_class == 0)
165 return -1;
166
167 switch (char_class)
168 {
169 case CC_ASCII:
170 result = isascii (c);
171 break;
172 case CC_ALNUM:
173 result = ISALNUM (c);
174 break;
175 case CC_ALPHA:
176 result = ISALPHA (c);
177 break;
178 case CC_BLANK:
179 result = ISBLANK (c);
180 break;
181 case CC_CNTRL:
182 result = ISCNTRL (c);
183 break;
184 case CC_DIGIT:
185 result = ISDIGIT (c);
186 break;
187 case CC_GRAPH:
188 result = ISGRAPH (c);
189 break;
190 case CC_LOWER:
191 result = ISLOWER (c);
192 break;
193 case CC_PRINT:
194 result = ISPRINT (c);
195 break;
196 case CC_PUNCT:
197 result = ISPUNCT (c);
198 break;
199 case CC_SPACE:
200 result = ISSPACE (c);
201 break;
202 case CC_UPPER:
203 result = ISUPPER (c);
204 break;
205 case CC_WORD:
206 result = (ISALNUM (c) || c == '_');
207 break;
208 case CC_XDIGIT:
209 result = ISXDIGIT (c);
210 break;
211 default:
212 result = -1;
213 break;
214 }
215
216 return result;
217}
218
219/* Now include `sm_loop.c' for single-byte characters. */
220/* The result of FOLD is an `unsigned char' */
221# define FOLD(c) ((flags & FNM_CASEFOLD) \
222 ? TOLOWER ((unsigned char)c) \
223 : ((unsigned char)c))
224
225#define FCT internal_strmatch
226#define GMATCH gmatch
227#define COLLSYM collsym
228#define PARSE_COLLSYM parse_collsym
229#define BRACKMATCH brackmatch
Chet Rameyac50fba2014-02-26 09:36:43 -0500230#define PATSCAN glob_patscan
Jari Aalto7117c2d2002-07-17 14:10:11 +0000231#define STRCOMPARE strcompare
232#define EXTMATCH extmatch
233#define STRCHR(S, C) strchr((S), (C))
234#define STRCOLL(S1, S2) strcoll((S1), (S2))
235#define STRLEN(S) strlen(S)
236#define STRCMP(S1, S2) strcmp((S1), (S2))
Chet Rameyac50fba2014-02-26 09:36:43 -0500237#define RANGECMP(C1, C2, F) rangecmp((C1), (C2), (F))
Jari Aalto7117c2d2002-07-17 14:10:11 +0000238#define COLLEQUIV(C1, C2) collequiv((C1), (C2))
239#define CTYPE_T enum char_class
240#define IS_CCLASS(C, S) is_cclass((C), (S))
241#include "sm_loop.c"
242
243#if HANDLE_MULTIBYTE
244
245# define CHAR wchar_t
246# define U_CHAR wint_t
247# define XCHAR wchar_t
248# define INT wint_t
249# define L(CS) L##CS
250# define INVALID WEOF
251
252# undef STREQ
253# undef STREQN
254# define STREQ(s1, s2) ((wcscmp (s1, s2) == 0))
255# define STREQN(a, b, n) ((a)[0] == (b)[0] && wcsncmp(a, b, n) == 0)
256
Chet Ramey495aee42011-11-22 19:11:26 -0500257extern char *mbsmbchar __P((const char *));
258
Jari Aalto7117c2d2002-07-17 14:10:11 +0000259static int
Chet Rameyac50fba2014-02-26 09:36:43 -0500260rangecmp_wc (c1, c2, forcecoll)
Jari Aalto7117c2d2002-07-17 14:10:11 +0000261 wint_t c1, c2;
Chet Rameyac50fba2014-02-26 09:36:43 -0500262 int forcecoll;
Jari Aalto7117c2d2002-07-17 14:10:11 +0000263{
264 static wchar_t s1[2] = { L' ', L'\0' };
265 static wchar_t s2[2] = { L' ', L'\0' };
Jari Aalto7117c2d2002-07-17 14:10:11 +0000266
267 if (c1 == c2)
268 return 0;
269
Chet Rameyac50fba2014-02-26 09:36:43 -0500270 if (forcecoll == 0 && glob_asciirange && c1 <= UCHAR_MAX && c2 <= UCHAR_MAX)
271 return ((int)(c1 - c2));
272
Jari Aalto7117c2d2002-07-17 14:10:11 +0000273 s1[0] = c1;
274 s2[0] = c2;
275
276 return (wcscoll (s1, s2));
277}
278
279static int
280collequiv_wc (c, equiv)
281 wint_t c, equiv;
282{
Chet Rameyac50fba2014-02-26 09:36:43 -0500283 return (c == equiv);
Jari Aalto7117c2d2002-07-17 14:10:11 +0000284}
285
286/* Helper function for collating symbol. */
287# define _COLLSYM _collwcsym
288# define __COLLSYM __collwcsym
289# define POSIXCOLL posix_collwcsyms
290# include "collsyms.h"
291
292static wint_t
293collwcsym (s, len)
294 wchar_t *s;
295 int len;
296{
297 register struct _collwcsym *csp;
298
299 for (csp = posix_collwcsyms; csp->name; csp++)
300 {
301 if (STREQN(csp->name, s, len) && csp->name[len] == L'\0')
302 return (csp->code);
303 }
304 if (len == 1)
305 return s[0];
306 return INVALID;
307}
308
309static int
310is_wcclass (wc, name)
311 wint_t wc;
312 wchar_t *name;
313{
314 char *mbs;
315 mbstate_t state;
316 size_t mbslength;
317 wctype_t desc;
318 int want_word;
319
320 if ((wctype ("ascii") == (wctype_t)0) && (wcscmp (name, L"ascii") == 0))
321 {
322 int c;
323
324 if ((c = wctob (wc)) == EOF)
325 return 0;
326 else
327 return (c <= 0x7F);
328 }
329
330 want_word = (wcscmp (name, L"word") == 0);
331 if (want_word)
332 name = L"alnum";
333
334 memset (&state, '\0', sizeof (mbstate_t));
335 mbs = (char *) malloc (wcslen(name) * MB_CUR_MAX + 1);
Chet Ramey495aee42011-11-22 19:11:26 -0500336 mbslength = wcsrtombs (mbs, (const wchar_t **)&name, (wcslen(name) * MB_CUR_MAX + 1), &state);
Jari Aalto7117c2d2002-07-17 14:10:11 +0000337
338 if (mbslength == (size_t)-1 || mbslength == (size_t)-2)
339 {
340 free (mbs);
341 return -1;
342 }
343 desc = wctype (mbs);
344 free (mbs);
345
346 if (desc == (wctype_t)0)
347 return -1;
348
349 if (want_word)
350 return (iswctype (wc, desc) || wc == L'_');
351 else
352 return (iswctype (wc, desc));
353}
354
355/* Now include `sm_loop.c' for multibyte characters. */
356#define FOLD(c) ((flags & FNM_CASEFOLD) && iswupper (c) ? towlower (c) : (c))
357#define FCT internal_wstrmatch
358#define GMATCH gmatch_wc
359#define COLLSYM collwcsym
360#define PARSE_COLLSYM parse_collwcsym
361#define BRACKMATCH brackmatch_wc
Chet Rameyac50fba2014-02-26 09:36:43 -0500362#define PATSCAN glob_patscan_wc
Jari Aalto7117c2d2002-07-17 14:10:11 +0000363#define STRCOMPARE wscompare
364#define EXTMATCH extmatch_wc
365#define STRCHR(S, C) wcschr((S), (C))
366#define STRCOLL(S1, S2) wcscoll((S1), (S2))
367#define STRLEN(S) wcslen(S)
368#define STRCMP(S1, S2) wcscmp((S1), (S2))
Chet Rameyac50fba2014-02-26 09:36:43 -0500369#define RANGECMP(C1, C2, F) rangecmp_wc((C1), (C2), (F))
Jari Aalto7117c2d2002-07-17 14:10:11 +0000370#define COLLEQUIV(C1, C2) collequiv_wc((C1), (C2))
371#define CTYPE_T enum char_class
372#define IS_CCLASS(C, S) is_wcclass((C), (S))
373#include "sm_loop.c"
374
375#endif /* HAVE_MULTIBYTE */
376
377int
378xstrmatch (pattern, string, flags)
379 char *pattern;
380 char *string;
381 int flags;
382{
383#if HANDLE_MULTIBYTE
384 int ret;
Jari Aalto7117c2d2002-07-17 14:10:11 +0000385 size_t n;
Jari Aalto7117c2d2002-07-17 14:10:11 +0000386 wchar_t *wpattern, *wstring;
Chet Ramey495aee42011-11-22 19:11:26 -0500387 size_t plen, slen, mplen, mslen;
388
Chet Ramey495aee42011-11-22 19:11:26 -0500389 if (mbsmbchar (string) == 0 && mbsmbchar (pattern) == 0)
Chet Ramey495aee42011-11-22 19:11:26 -0500390 return (internal_strmatch ((unsigned char *)pattern, (unsigned char *)string, flags));
Jari Aalto7117c2d2002-07-17 14:10:11 +0000391
392 if (MB_CUR_MAX == 1)
Jari Aalto95732b42005-12-07 14:08:12 +0000393 return (internal_strmatch ((unsigned char *)pattern, (unsigned char *)string, flags));
Jari Aalto7117c2d2002-07-17 14:10:11 +0000394
Jari Aaltob80f6442004-07-27 13:29:18 +0000395 n = xdupmbstowcs (&wpattern, NULL, pattern);
Jari Aalto7117c2d2002-07-17 14:10:11 +0000396 if (n == (size_t)-1 || n == (size_t)-2)
Jari Aaltob80f6442004-07-27 13:29:18 +0000397 return (internal_strmatch ((unsigned char *)pattern, (unsigned char *)string, flags));
Jari Aalto7117c2d2002-07-17 14:10:11 +0000398
Jari Aaltob80f6442004-07-27 13:29:18 +0000399 n = xdupmbstowcs (&wstring, NULL, string);
Jari Aalto7117c2d2002-07-17 14:10:11 +0000400 if (n == (size_t)-1 || n == (size_t)-2)
401 {
402 free (wpattern);
Jari Aaltob80f6442004-07-27 13:29:18 +0000403 return (internal_strmatch ((unsigned char *)pattern, (unsigned char *)string, flags));
Jari Aalto7117c2d2002-07-17 14:10:11 +0000404 }
405
Jari Aalto7117c2d2002-07-17 14:10:11 +0000406 ret = internal_wstrmatch (wpattern, wstring, flags);
407
Jari Aalto7117c2d2002-07-17 14:10:11 +0000408 free (wpattern);
409 free (wstring);
410
411 return ret;
412#else
413 return (internal_strmatch ((unsigned char *)pattern, (unsigned char *)string, flags));
414#endif /* !HANDLE_MULTIBYTE */
415}