Jari Aalto | 7117c2d | 2002-07-17 14:10:11 +0000 | [diff] [blame] | 1 | /* strmatch.c -- ksh-like extended pattern matching for the shell and filename |
| 2 | globbing. */ |
| 3 | |
Chet Ramey | 495aee4 | 2011-11-22 19:11:26 -0500 | [diff] [blame] | 4 | /* Copyright (C) 1991-2011 Free Software Foundation, Inc. |
Jari Aalto | 7117c2d | 2002-07-17 14:10:11 +0000 | [diff] [blame] | 5 | |
| 6 | This file is part of GNU Bash, the Bourne Again SHell. |
| 7 | |
Jari Aalto | 3185942 | 2009-01-12 13:36:28 +0000 | [diff] [blame] | 8 | Bash is free software: you can redistribute it and/or modify |
| 9 | it under the terms of the GNU General Public License as published by |
| 10 | the Free Software Foundation, either version 3 of the License, or |
| 11 | (at your option) any later version. |
| 12 | |
| 13 | Bash is distributed in the hope that it will be useful, |
| 14 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 15 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 16 | GNU General Public License for more details. |
| 17 | |
| 18 | You should have received a copy of the GNU General Public License |
| 19 | along with Bash. If not, see <http://www.gnu.org/licenses/>. |
| 20 | */ |
Jari Aalto | 7117c2d | 2002-07-17 14:10:11 +0000 | [diff] [blame] | 21 | |
| 22 | #include <config.h> |
| 23 | |
| 24 | #include <stdio.h> /* for debugging */ |
| 25 | |
| 26 | #include "strmatch.h" |
| 27 | #include <chartypes.h> |
| 28 | |
| 29 | #include "bashansi.h" |
| 30 | #include "shmbutil.h" |
| 31 | #include "xmalloc.h" |
| 32 | |
| 33 | /* First, compile `sm_loop.c' for single-byte characters. */ |
| 34 | #define CHAR unsigned char |
| 35 | #define U_CHAR unsigned char |
| 36 | #define XCHAR char |
| 37 | #define INT int |
| 38 | #define L(CS) CS |
| 39 | #define INVALID -1 |
| 40 | |
| 41 | #undef STREQ |
| 42 | #undef STREQN |
| 43 | #define STREQ(a, b) ((a)[0] == (b)[0] && strcmp(a, b) == 0) |
| 44 | #define STREQN(a, b, n) ((a)[0] == (b)[0] && strncmp(a, b, n) == 0) |
| 45 | |
Chet Ramey | ac50fba | 2014-02-26 09:36:43 -0500 | [diff] [blame] | 46 | #ifndef GLOBASCII_DEFAULT |
| 47 | # define GLOBASCII_DEFAULT 0 |
| 48 | #endif |
| 49 | |
| 50 | int glob_asciirange = GLOBASCII_DEFAULT; |
| 51 | |
Jari Aalto | 7117c2d | 2002-07-17 14:10:11 +0000 | [diff] [blame] | 52 | /* We use strcoll(3) for range comparisons in bracket expressions, |
| 53 | even though it can have unwanted side effects in locales |
| 54 | other than POSIX or US. For instance, in the de locale, [A-Z] matches |
Chet Ramey | ac50fba | 2014-02-26 09:36:43 -0500 | [diff] [blame] | 55 | all characters. If GLOB_ASCIIRANGE is non-zero, and we're not forcing |
| 56 | the use of strcoll (e.g., for explicit collating symbols), we use |
| 57 | straight ordering as if in the C locale. */ |
Jari Aalto | 7117c2d | 2002-07-17 14:10:11 +0000 | [diff] [blame] | 58 | |
| 59 | #if defined (HAVE_STRCOLL) |
| 60 | /* Helper function for collating symbol equivalence. */ |
Chet Ramey | ac50fba | 2014-02-26 09:36:43 -0500 | [diff] [blame] | 61 | static int |
| 62 | rangecmp (c1, c2, forcecoll) |
Jari Aalto | 7117c2d | 2002-07-17 14:10:11 +0000 | [diff] [blame] | 63 | int c1, c2; |
Chet Ramey | ac50fba | 2014-02-26 09:36:43 -0500 | [diff] [blame] | 64 | int forcecoll; |
Jari Aalto | 7117c2d | 2002-07-17 14:10:11 +0000 | [diff] [blame] | 65 | { |
| 66 | static char s1[2] = { ' ', '\0' }; |
| 67 | static char s2[2] = { ' ', '\0' }; |
| 68 | int ret; |
| 69 | |
| 70 | /* Eight bits only. Period. */ |
| 71 | c1 &= 0xFF; |
| 72 | c2 &= 0xFF; |
| 73 | |
| 74 | if (c1 == c2) |
| 75 | return (0); |
| 76 | |
Chet Ramey | ac50fba | 2014-02-26 09:36:43 -0500 | [diff] [blame] | 77 | if (forcecoll == 0 && glob_asciirange) |
| 78 | return (c1 - c2); |
| 79 | |
Jari Aalto | 7117c2d | 2002-07-17 14:10:11 +0000 | [diff] [blame] | 80 | s1[0] = c1; |
| 81 | s2[0] = c2; |
| 82 | |
| 83 | if ((ret = strcoll (s1, s2)) != 0) |
| 84 | return ret; |
| 85 | return (c1 - c2); |
| 86 | } |
| 87 | #else /* !HAVE_STRCOLL */ |
Chet Ramey | ac50fba | 2014-02-26 09:36:43 -0500 | [diff] [blame] | 88 | # define rangecmp(c1, c2, f) ((int)(c1) - (int)(c2)) |
Jari Aalto | 7117c2d | 2002-07-17 14:10:11 +0000 | [diff] [blame] | 89 | #endif /* !HAVE_STRCOLL */ |
| 90 | |
| 91 | #if defined (HAVE_STRCOLL) |
| 92 | static int |
| 93 | collequiv (c1, c2) |
| 94 | int c1, c2; |
| 95 | { |
Chet Ramey | ac50fba | 2014-02-26 09:36:43 -0500 | [diff] [blame] | 96 | return (rangecmp (c1, c2, 1) == 0); |
Jari Aalto | 7117c2d | 2002-07-17 14:10:11 +0000 | [diff] [blame] | 97 | } |
| 98 | #else |
| 99 | # define collequiv(c1, c2) ((c1) == (c2)) |
| 100 | #endif |
| 101 | |
| 102 | #define _COLLSYM _collsym |
| 103 | #define __COLLSYM __collsym |
| 104 | #define POSIXCOLL posix_collsyms |
| 105 | #include "collsyms.h" |
| 106 | |
| 107 | static int |
| 108 | collsym (s, len) |
Jari Aalto | 95732b4 | 2005-12-07 14:08:12 +0000 | [diff] [blame] | 109 | CHAR *s; |
Jari Aalto | 7117c2d | 2002-07-17 14:10:11 +0000 | [diff] [blame] | 110 | int len; |
| 111 | { |
| 112 | register struct _collsym *csp; |
Jari Aalto | 95732b4 | 2005-12-07 14:08:12 +0000 | [diff] [blame] | 113 | char *x; |
Jari Aalto | 7117c2d | 2002-07-17 14:10:11 +0000 | [diff] [blame] | 114 | |
Jari Aalto | 95732b4 | 2005-12-07 14:08:12 +0000 | [diff] [blame] | 115 | x = (char *)s; |
Jari Aalto | 7117c2d | 2002-07-17 14:10:11 +0000 | [diff] [blame] | 116 | for (csp = posix_collsyms; csp->name; csp++) |
| 117 | { |
Jari Aalto | 95732b4 | 2005-12-07 14:08:12 +0000 | [diff] [blame] | 118 | if (STREQN(csp->name, x, len) && csp->name[len] == '\0') |
Jari Aalto | 7117c2d | 2002-07-17 14:10:11 +0000 | [diff] [blame] | 119 | return (csp->code); |
| 120 | } |
| 121 | if (len == 1) |
| 122 | return s[0]; |
| 123 | return INVALID; |
| 124 | } |
| 125 | |
| 126 | /* unibyte character classification */ |
| 127 | #if !defined (isascii) && !defined (HAVE_ISASCII) |
| 128 | # define isascii(c) ((unsigned int)(c) <= 0177) |
| 129 | #endif |
| 130 | |
| 131 | enum char_class |
| 132 | { |
| 133 | CC_NO_CLASS = 0, |
| 134 | CC_ASCII, CC_ALNUM, CC_ALPHA, CC_BLANK, CC_CNTRL, CC_DIGIT, CC_GRAPH, |
| 135 | CC_LOWER, CC_PRINT, CC_PUNCT, CC_SPACE, CC_UPPER, CC_WORD, CC_XDIGIT |
| 136 | }; |
| 137 | |
| 138 | static char const *const cclass_name[] = |
| 139 | { |
| 140 | "", |
| 141 | "ascii", "alnum", "alpha", "blank", "cntrl", "digit", "graph", |
| 142 | "lower", "print", "punct", "space", "upper", "word", "xdigit" |
| 143 | }; |
| 144 | |
| 145 | #define N_CHAR_CLASS (sizeof(cclass_name) / sizeof (cclass_name[0])) |
| 146 | |
| 147 | static int |
| 148 | is_cclass (c, name) |
| 149 | int c; |
| 150 | const char *name; |
| 151 | { |
| 152 | enum char_class char_class = CC_NO_CLASS; |
| 153 | int i, result; |
| 154 | |
| 155 | for (i = 1; i < N_CHAR_CLASS; i++) |
| 156 | { |
| 157 | if (STREQ (name, cclass_name[i])) |
| 158 | { |
| 159 | char_class = (enum char_class)i; |
| 160 | break; |
| 161 | } |
| 162 | } |
| 163 | |
| 164 | if (char_class == 0) |
| 165 | return -1; |
| 166 | |
| 167 | switch (char_class) |
| 168 | { |
| 169 | case CC_ASCII: |
| 170 | result = isascii (c); |
| 171 | break; |
| 172 | case CC_ALNUM: |
| 173 | result = ISALNUM (c); |
| 174 | break; |
| 175 | case CC_ALPHA: |
| 176 | result = ISALPHA (c); |
| 177 | break; |
| 178 | case CC_BLANK: |
| 179 | result = ISBLANK (c); |
| 180 | break; |
| 181 | case CC_CNTRL: |
| 182 | result = ISCNTRL (c); |
| 183 | break; |
| 184 | case CC_DIGIT: |
| 185 | result = ISDIGIT (c); |
| 186 | break; |
| 187 | case CC_GRAPH: |
| 188 | result = ISGRAPH (c); |
| 189 | break; |
| 190 | case CC_LOWER: |
| 191 | result = ISLOWER (c); |
| 192 | break; |
| 193 | case CC_PRINT: |
| 194 | result = ISPRINT (c); |
| 195 | break; |
| 196 | case CC_PUNCT: |
| 197 | result = ISPUNCT (c); |
| 198 | break; |
| 199 | case CC_SPACE: |
| 200 | result = ISSPACE (c); |
| 201 | break; |
| 202 | case CC_UPPER: |
| 203 | result = ISUPPER (c); |
| 204 | break; |
| 205 | case CC_WORD: |
| 206 | result = (ISALNUM (c) || c == '_'); |
| 207 | break; |
| 208 | case CC_XDIGIT: |
| 209 | result = ISXDIGIT (c); |
| 210 | break; |
| 211 | default: |
| 212 | result = -1; |
| 213 | break; |
| 214 | } |
| 215 | |
| 216 | return result; |
| 217 | } |
| 218 | |
| 219 | /* Now include `sm_loop.c' for single-byte characters. */ |
| 220 | /* The result of FOLD is an `unsigned char' */ |
| 221 | # define FOLD(c) ((flags & FNM_CASEFOLD) \ |
| 222 | ? TOLOWER ((unsigned char)c) \ |
| 223 | : ((unsigned char)c)) |
| 224 | |
| 225 | #define FCT internal_strmatch |
| 226 | #define GMATCH gmatch |
| 227 | #define COLLSYM collsym |
| 228 | #define PARSE_COLLSYM parse_collsym |
| 229 | #define BRACKMATCH brackmatch |
Chet Ramey | ac50fba | 2014-02-26 09:36:43 -0500 | [diff] [blame] | 230 | #define PATSCAN glob_patscan |
Jari Aalto | 7117c2d | 2002-07-17 14:10:11 +0000 | [diff] [blame] | 231 | #define STRCOMPARE strcompare |
| 232 | #define EXTMATCH extmatch |
| 233 | #define STRCHR(S, C) strchr((S), (C)) |
| 234 | #define STRCOLL(S1, S2) strcoll((S1), (S2)) |
| 235 | #define STRLEN(S) strlen(S) |
| 236 | #define STRCMP(S1, S2) strcmp((S1), (S2)) |
Chet Ramey | ac50fba | 2014-02-26 09:36:43 -0500 | [diff] [blame] | 237 | #define RANGECMP(C1, C2, F) rangecmp((C1), (C2), (F)) |
Jari Aalto | 7117c2d | 2002-07-17 14:10:11 +0000 | [diff] [blame] | 238 | #define COLLEQUIV(C1, C2) collequiv((C1), (C2)) |
| 239 | #define CTYPE_T enum char_class |
| 240 | #define IS_CCLASS(C, S) is_cclass((C), (S)) |
| 241 | #include "sm_loop.c" |
| 242 | |
| 243 | #if HANDLE_MULTIBYTE |
| 244 | |
| 245 | # define CHAR wchar_t |
| 246 | # define U_CHAR wint_t |
| 247 | # define XCHAR wchar_t |
| 248 | # define INT wint_t |
| 249 | # define L(CS) L##CS |
| 250 | # define INVALID WEOF |
| 251 | |
| 252 | # undef STREQ |
| 253 | # undef STREQN |
| 254 | # define STREQ(s1, s2) ((wcscmp (s1, s2) == 0)) |
| 255 | # define STREQN(a, b, n) ((a)[0] == (b)[0] && wcsncmp(a, b, n) == 0) |
| 256 | |
Chet Ramey | 495aee4 | 2011-11-22 19:11:26 -0500 | [diff] [blame] | 257 | extern char *mbsmbchar __P((const char *)); |
| 258 | |
Jari Aalto | 7117c2d | 2002-07-17 14:10:11 +0000 | [diff] [blame] | 259 | static int |
Chet Ramey | ac50fba | 2014-02-26 09:36:43 -0500 | [diff] [blame] | 260 | rangecmp_wc (c1, c2, forcecoll) |
Jari Aalto | 7117c2d | 2002-07-17 14:10:11 +0000 | [diff] [blame] | 261 | wint_t c1, c2; |
Chet Ramey | ac50fba | 2014-02-26 09:36:43 -0500 | [diff] [blame] | 262 | int forcecoll; |
Jari Aalto | 7117c2d | 2002-07-17 14:10:11 +0000 | [diff] [blame] | 263 | { |
| 264 | static wchar_t s1[2] = { L' ', L'\0' }; |
| 265 | static wchar_t s2[2] = { L' ', L'\0' }; |
Jari Aalto | 7117c2d | 2002-07-17 14:10:11 +0000 | [diff] [blame] | 266 | |
| 267 | if (c1 == c2) |
| 268 | return 0; |
| 269 | |
Chet Ramey | ac50fba | 2014-02-26 09:36:43 -0500 | [diff] [blame] | 270 | if (forcecoll == 0 && glob_asciirange && c1 <= UCHAR_MAX && c2 <= UCHAR_MAX) |
| 271 | return ((int)(c1 - c2)); |
| 272 | |
Jari Aalto | 7117c2d | 2002-07-17 14:10:11 +0000 | [diff] [blame] | 273 | s1[0] = c1; |
| 274 | s2[0] = c2; |
| 275 | |
| 276 | return (wcscoll (s1, s2)); |
| 277 | } |
| 278 | |
| 279 | static int |
| 280 | collequiv_wc (c, equiv) |
| 281 | wint_t c, equiv; |
| 282 | { |
Chet Ramey | ac50fba | 2014-02-26 09:36:43 -0500 | [diff] [blame] | 283 | return (c == equiv); |
Jari Aalto | 7117c2d | 2002-07-17 14:10:11 +0000 | [diff] [blame] | 284 | } |
| 285 | |
| 286 | /* Helper function for collating symbol. */ |
| 287 | # define _COLLSYM _collwcsym |
| 288 | # define __COLLSYM __collwcsym |
| 289 | # define POSIXCOLL posix_collwcsyms |
| 290 | # include "collsyms.h" |
| 291 | |
| 292 | static wint_t |
| 293 | collwcsym (s, len) |
| 294 | wchar_t *s; |
| 295 | int len; |
| 296 | { |
| 297 | register struct _collwcsym *csp; |
| 298 | |
| 299 | for (csp = posix_collwcsyms; csp->name; csp++) |
| 300 | { |
| 301 | if (STREQN(csp->name, s, len) && csp->name[len] == L'\0') |
| 302 | return (csp->code); |
| 303 | } |
| 304 | if (len == 1) |
| 305 | return s[0]; |
| 306 | return INVALID; |
| 307 | } |
| 308 | |
| 309 | static int |
| 310 | is_wcclass (wc, name) |
| 311 | wint_t wc; |
| 312 | wchar_t *name; |
| 313 | { |
| 314 | char *mbs; |
| 315 | mbstate_t state; |
| 316 | size_t mbslength; |
| 317 | wctype_t desc; |
| 318 | int want_word; |
| 319 | |
| 320 | if ((wctype ("ascii") == (wctype_t)0) && (wcscmp (name, L"ascii") == 0)) |
| 321 | { |
| 322 | int c; |
| 323 | |
| 324 | if ((c = wctob (wc)) == EOF) |
| 325 | return 0; |
| 326 | else |
| 327 | return (c <= 0x7F); |
| 328 | } |
| 329 | |
| 330 | want_word = (wcscmp (name, L"word") == 0); |
| 331 | if (want_word) |
| 332 | name = L"alnum"; |
| 333 | |
| 334 | memset (&state, '\0', sizeof (mbstate_t)); |
| 335 | mbs = (char *) malloc (wcslen(name) * MB_CUR_MAX + 1); |
Chet Ramey | 495aee4 | 2011-11-22 19:11:26 -0500 | [diff] [blame] | 336 | mbslength = wcsrtombs (mbs, (const wchar_t **)&name, (wcslen(name) * MB_CUR_MAX + 1), &state); |
Jari Aalto | 7117c2d | 2002-07-17 14:10:11 +0000 | [diff] [blame] | 337 | |
| 338 | if (mbslength == (size_t)-1 || mbslength == (size_t)-2) |
| 339 | { |
| 340 | free (mbs); |
| 341 | return -1; |
| 342 | } |
| 343 | desc = wctype (mbs); |
| 344 | free (mbs); |
| 345 | |
| 346 | if (desc == (wctype_t)0) |
| 347 | return -1; |
| 348 | |
| 349 | if (want_word) |
| 350 | return (iswctype (wc, desc) || wc == L'_'); |
| 351 | else |
| 352 | return (iswctype (wc, desc)); |
| 353 | } |
| 354 | |
| 355 | /* Now include `sm_loop.c' for multibyte characters. */ |
| 356 | #define FOLD(c) ((flags & FNM_CASEFOLD) && iswupper (c) ? towlower (c) : (c)) |
| 357 | #define FCT internal_wstrmatch |
| 358 | #define GMATCH gmatch_wc |
| 359 | #define COLLSYM collwcsym |
| 360 | #define PARSE_COLLSYM parse_collwcsym |
| 361 | #define BRACKMATCH brackmatch_wc |
Chet Ramey | ac50fba | 2014-02-26 09:36:43 -0500 | [diff] [blame] | 362 | #define PATSCAN glob_patscan_wc |
Jari Aalto | 7117c2d | 2002-07-17 14:10:11 +0000 | [diff] [blame] | 363 | #define STRCOMPARE wscompare |
| 364 | #define EXTMATCH extmatch_wc |
| 365 | #define STRCHR(S, C) wcschr((S), (C)) |
| 366 | #define STRCOLL(S1, S2) wcscoll((S1), (S2)) |
| 367 | #define STRLEN(S) wcslen(S) |
| 368 | #define STRCMP(S1, S2) wcscmp((S1), (S2)) |
Chet Ramey | ac50fba | 2014-02-26 09:36:43 -0500 | [diff] [blame] | 369 | #define RANGECMP(C1, C2, F) rangecmp_wc((C1), (C2), (F)) |
Jari Aalto | 7117c2d | 2002-07-17 14:10:11 +0000 | [diff] [blame] | 370 | #define COLLEQUIV(C1, C2) collequiv_wc((C1), (C2)) |
| 371 | #define CTYPE_T enum char_class |
| 372 | #define IS_CCLASS(C, S) is_wcclass((C), (S)) |
| 373 | #include "sm_loop.c" |
| 374 | |
| 375 | #endif /* HAVE_MULTIBYTE */ |
| 376 | |
| 377 | int |
| 378 | xstrmatch (pattern, string, flags) |
| 379 | char *pattern; |
| 380 | char *string; |
| 381 | int flags; |
| 382 | { |
| 383 | #if HANDLE_MULTIBYTE |
| 384 | int ret; |
Jari Aalto | 7117c2d | 2002-07-17 14:10:11 +0000 | [diff] [blame] | 385 | size_t n; |
Jari Aalto | 7117c2d | 2002-07-17 14:10:11 +0000 | [diff] [blame] | 386 | wchar_t *wpattern, *wstring; |
Chet Ramey | 495aee4 | 2011-11-22 19:11:26 -0500 | [diff] [blame] | 387 | size_t plen, slen, mplen, mslen; |
| 388 | |
Chet Ramey | 495aee4 | 2011-11-22 19:11:26 -0500 | [diff] [blame] | 389 | if (mbsmbchar (string) == 0 && mbsmbchar (pattern) == 0) |
Chet Ramey | 495aee4 | 2011-11-22 19:11:26 -0500 | [diff] [blame] | 390 | return (internal_strmatch ((unsigned char *)pattern, (unsigned char *)string, flags)); |
Jari Aalto | 7117c2d | 2002-07-17 14:10:11 +0000 | [diff] [blame] | 391 | |
| 392 | if (MB_CUR_MAX == 1) |
Jari Aalto | 95732b4 | 2005-12-07 14:08:12 +0000 | [diff] [blame] | 393 | return (internal_strmatch ((unsigned char *)pattern, (unsigned char *)string, flags)); |
Jari Aalto | 7117c2d | 2002-07-17 14:10:11 +0000 | [diff] [blame] | 394 | |
Jari Aalto | b80f644 | 2004-07-27 13:29:18 +0000 | [diff] [blame] | 395 | n = xdupmbstowcs (&wpattern, NULL, pattern); |
Jari Aalto | 7117c2d | 2002-07-17 14:10:11 +0000 | [diff] [blame] | 396 | if (n == (size_t)-1 || n == (size_t)-2) |
Jari Aalto | b80f644 | 2004-07-27 13:29:18 +0000 | [diff] [blame] | 397 | return (internal_strmatch ((unsigned char *)pattern, (unsigned char *)string, flags)); |
Jari Aalto | 7117c2d | 2002-07-17 14:10:11 +0000 | [diff] [blame] | 398 | |
Jari Aalto | b80f644 | 2004-07-27 13:29:18 +0000 | [diff] [blame] | 399 | n = xdupmbstowcs (&wstring, NULL, string); |
Jari Aalto | 7117c2d | 2002-07-17 14:10:11 +0000 | [diff] [blame] | 400 | if (n == (size_t)-1 || n == (size_t)-2) |
| 401 | { |
| 402 | free (wpattern); |
Jari Aalto | b80f644 | 2004-07-27 13:29:18 +0000 | [diff] [blame] | 403 | return (internal_strmatch ((unsigned char *)pattern, (unsigned char *)string, flags)); |
Jari Aalto | 7117c2d | 2002-07-17 14:10:11 +0000 | [diff] [blame] | 404 | } |
| 405 | |
Jari Aalto | 7117c2d | 2002-07-17 14:10:11 +0000 | [diff] [blame] | 406 | ret = internal_wstrmatch (wpattern, wstring, flags); |
| 407 | |
Jari Aalto | 7117c2d | 2002-07-17 14:10:11 +0000 | [diff] [blame] | 408 | free (wpattern); |
| 409 | free (wstring); |
| 410 | |
| 411 | return ret; |
| 412 | #else |
| 413 | return (internal_strmatch ((unsigned char *)pattern, (unsigned char *)string, flags)); |
| 414 | #endif /* !HANDLE_MULTIBYTE */ |
| 415 | } |