blob: 96b1bc0852ab3f325a38c6c64a430df6899c6f4e [file] [log] [blame]
Chet Ramey495aee42011-11-22 19:11:26 -05001/* gmisc.c -- miscellaneous pattern matching utility functions for Bash.
2
3 Copyright (C) 2010 Free Software Foundation, Inc.
4
5 This file is part of GNU Bash, the Bourne-Again SHell.
6
7 Bash is free software: you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation, either version 3 of the License, or
10 (at your option) any later version.
11
12 Bash is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with Bash. If not, see <http://www.gnu.org/licenses/>.
19*/
20
21#include <config.h>
22
23#include "bashtypes.h"
24
25#if defined (HAVE_UNISTD_H)
26# include <unistd.h>
27#endif
28
29#include "bashansi.h"
30#include "shmbutil.h"
31
32#include "stdc.h"
33
34#ifndef LPAREN
35# define LPAREN '('
36#endif
37#ifndef RPAREN
38# define RPAREN ')'
39#endif
40
41#if defined (HANDLE_MULTIBYTE)
42#define WLPAREN L'('
43#define WRPAREN L')'
44
Chet Ramey34ce4022014-05-16 14:18:39 -040045extern char *glob_patscan __P((char *, char *, int));
46
Chet Ramey495aee42011-11-22 19:11:26 -050047/* Return 1 of the first character of WSTRING could match the first
48 character of pattern WPAT. Wide character version. */
49int
50match_pattern_wchar (wpat, wstring)
51 wchar_t *wpat, *wstring;
52{
53 wchar_t wc;
54
55 if (*wstring == 0)
56 return (0);
57
58 switch (wc = *wpat++)
59 {
60 default:
61 return (*wstring == wc);
62 case L'\\':
63 return (*wstring == *wpat);
64 case L'?':
65 return (*wpat == WLPAREN ? 1 : (*wstring != L'\0'));
66 case L'*':
67 return (1);
68 case L'+':
69 case L'!':
70 case L'@':
71 return (*wpat == WLPAREN ? 1 : (*wstring == wc));
72 case L'[':
73 return (*wstring != L'\0');
74 }
75}
76
77int
78wmatchlen (wpat, wmax)
79 wchar_t *wpat;
80 size_t wmax;
81{
Chet Rameyd26fdfa2011-11-22 20:00:02 -050082 wchar_t wc;
83 int matlen, bracklen, t, in_cclass, in_collsym, in_equiv;
Chet Ramey495aee42011-11-22 19:11:26 -050084
85 if (*wpat == 0)
86 return (0);
87
88 matlen = in_cclass = in_collsym = in_equiv = 0;
89 while (wc = *wpat++)
90 {
91 switch (wc)
92 {
93 default:
94 matlen++;
95 break;
96 case L'\\':
97 if (*wpat == 0)
98 return ++matlen;
99 else
100 {
101 matlen++;
102 wpat++;
103 }
104 break;
105 case L'?':
106 if (*wpat == WLPAREN)
107 return (matlen = -1); /* XXX for now */
108 else
109 matlen++;
110 break;
111 case L'*':
112 return (matlen = -1);
113 case L'+':
114 case L'!':
115 case L'@':
116 if (*wpat == WLPAREN)
117 return (matlen = -1); /* XXX for now */
118 else
119 matlen++;
120 break;
121 case L'[':
122 /* scan for ending `]', skipping over embedded [:...:] */
Chet Rameyd26fdfa2011-11-22 20:00:02 -0500123 bracklen = 1;
Chet Ramey495aee42011-11-22 19:11:26 -0500124 wc = *wpat++;
125 do
126 {
127 if (wc == 0)
128 {
Chet Rameyd26fdfa2011-11-22 20:00:02 -0500129 wpat--; /* back up to NUL */
130 matlen += bracklen;
131 goto bad_bracket;
Chet Ramey495aee42011-11-22 19:11:26 -0500132 }
133 else if (wc == L'\\')
134 {
Chet Rameyd26fdfa2011-11-22 20:00:02 -0500135 /* *wpat == backslash-escaped character */
136 bracklen++;
137 /* If the backslash or backslash-escape ends the string,
138 bail. The ++wpat skips over the backslash escape */
139 if (*wpat == 0 || *++wpat == 0)
140 {
141 matlen += bracklen;
142 goto bad_bracket;
143 }
Chet Ramey495aee42011-11-22 19:11:26 -0500144 }
145 else if (wc == L'[' && *wpat == L':') /* character class */
146 {
147 wpat++;
Chet Rameyd26fdfa2011-11-22 20:00:02 -0500148 bracklen++;
Chet Ramey495aee42011-11-22 19:11:26 -0500149 in_cclass = 1;
150 }
151 else if (in_cclass && wc == L':' && *wpat == L']')
152 {
153 wpat++;
Chet Rameyd26fdfa2011-11-22 20:00:02 -0500154 bracklen++;
Chet Ramey495aee42011-11-22 19:11:26 -0500155 in_cclass = 0;
156 }
157 else if (wc == L'[' && *wpat == L'.') /* collating symbol */
158 {
159 wpat++;
Chet Rameyd26fdfa2011-11-22 20:00:02 -0500160 bracklen++;
Chet Ramey495aee42011-11-22 19:11:26 -0500161 if (*wpat == L']') /* right bracket can appear as collating symbol */
Chet Rameyd26fdfa2011-11-22 20:00:02 -0500162 {
163 wpat++;
164 bracklen++;
165 }
Chet Ramey495aee42011-11-22 19:11:26 -0500166 in_collsym = 1;
167 }
168 else if (in_collsym && wc == L'.' && *wpat == L']')
169 {
170 wpat++;
Chet Rameyd26fdfa2011-11-22 20:00:02 -0500171 bracklen++;
Chet Ramey495aee42011-11-22 19:11:26 -0500172 in_collsym = 0;
173 }
174 else if (wc == L'[' && *wpat == L'=') /* equivalence class */
175 {
176 wpat++;
Chet Rameyd26fdfa2011-11-22 20:00:02 -0500177 bracklen++;
Chet Ramey495aee42011-11-22 19:11:26 -0500178 if (*wpat == L']') /* right bracket can appear as equivalence class */
Chet Rameyd26fdfa2011-11-22 20:00:02 -0500179 {
180 wpat++;
181 bracklen++;
182 }
Chet Ramey495aee42011-11-22 19:11:26 -0500183 in_equiv = 1;
184 }
185 else if (in_equiv && wc == L'=' && *wpat == L']')
186 {
187 wpat++;
Chet Rameyd26fdfa2011-11-22 20:00:02 -0500188 bracklen++;
Chet Ramey495aee42011-11-22 19:11:26 -0500189 in_equiv = 0;
190 }
Chet Rameyd26fdfa2011-11-22 20:00:02 -0500191 else
192 bracklen++;
Chet Ramey495aee42011-11-22 19:11:26 -0500193 }
194 while ((wc = *wpat++) != L']');
195 matlen++; /* bracket expression can only match one char */
Chet Rameyd26fdfa2011-11-22 20:00:02 -0500196bad_bracket:
Chet Ramey495aee42011-11-22 19:11:26 -0500197 break;
198 }
199 }
200
201 return matlen;
202}
203#endif
204
Chet Rameyac50fba2014-02-26 09:36:43 -0500205int
206extglob_pattern_p (pat)
207 char *pat;
208{
209 switch (pat[0])
210 {
211 case '*':
212 case '+':
213 case '!':
214 case '@':
Chet Ramey7137b2d2014-03-28 11:54:45 -0400215 case '?':
Chet Rameyac50fba2014-02-26 09:36:43 -0500216 return (pat[1] == LPAREN);
217 default:
218 return 0;
219 }
220
221 return 0;
222}
223
Chet Ramey495aee42011-11-22 19:11:26 -0500224/* Return 1 of the first character of STRING could match the first
225 character of pattern PAT. Used to avoid n2 calls to strmatch(). */
226int
227match_pattern_char (pat, string)
228 char *pat, *string;
229{
230 char c;
231
232 if (*string == 0)
233 return (0);
234
235 switch (c = *pat++)
236 {
237 default:
238 return (*string == c);
239 case '\\':
240 return (*string == *pat);
241 case '?':
242 return (*pat == LPAREN ? 1 : (*string != '\0'));
243 case '*':
244 return (1);
245 case '+':
246 case '!':
247 case '@':
248 return (*pat == LPAREN ? 1 : (*string == c));
249 case '[':
250 return (*string != '\0');
251 }
252}
253
254int
255umatchlen (pat, max)
256 char *pat;
257 size_t max;
258{
Chet Rameyd26fdfa2011-11-22 20:00:02 -0500259 char c;
260 int matlen, bracklen, t, in_cclass, in_collsym, in_equiv;
Chet Ramey495aee42011-11-22 19:11:26 -0500261
262 if (*pat == 0)
263 return (0);
264
265 matlen = in_cclass = in_collsym = in_equiv = 0;
266 while (c = *pat++)
267 {
268 switch (c)
269 {
270 default:
271 matlen++;
272 break;
273 case '\\':
274 if (*pat == 0)
275 return ++matlen;
276 else
277 {
278 matlen++;
279 pat++;
280 }
281 break;
282 case '?':
283 if (*pat == LPAREN)
284 return (matlen = -1); /* XXX for now */
285 else
286 matlen++;
287 break;
288 case '*':
289 return (matlen = -1);
290 case '+':
291 case '!':
292 case '@':
293 if (*pat == LPAREN)
294 return (matlen = -1); /* XXX for now */
295 else
296 matlen++;
297 break;
298 case '[':
299 /* scan for ending `]', skipping over embedded [:...:] */
Chet Rameyd26fdfa2011-11-22 20:00:02 -0500300 bracklen = 1;
Chet Ramey495aee42011-11-22 19:11:26 -0500301 c = *pat++;
302 do
303 {
304 if (c == 0)
305 {
Chet Rameyd26fdfa2011-11-22 20:00:02 -0500306 pat--; /* back up to NUL */
307 matlen += bracklen;
308 goto bad_bracket;
Chet Ramey495aee42011-11-22 19:11:26 -0500309 }
310 else if (c == '\\')
311 {
Chet Rameyd26fdfa2011-11-22 20:00:02 -0500312 /* *pat == backslash-escaped character */
313 bracklen++;
314 /* If the backslash or backslash-escape ends the string,
315 bail. The ++pat skips over the backslash escape */
316 if (*pat == 0 || *++pat == 0)
317 {
318 matlen += bracklen;
319 goto bad_bracket;
320 }
Chet Ramey495aee42011-11-22 19:11:26 -0500321 }
322 else if (c == '[' && *pat == ':') /* character class */
323 {
324 pat++;
Chet Rameyd26fdfa2011-11-22 20:00:02 -0500325 bracklen++;
Chet Ramey495aee42011-11-22 19:11:26 -0500326 in_cclass = 1;
327 }
328 else if (in_cclass && c == ':' && *pat == ']')
329 {
330 pat++;
Chet Rameyd26fdfa2011-11-22 20:00:02 -0500331 bracklen++;
Chet Ramey495aee42011-11-22 19:11:26 -0500332 in_cclass = 0;
333 }
334 else if (c == '[' && *pat == '.') /* collating symbol */
335 {
336 pat++;
Chet Rameyd26fdfa2011-11-22 20:00:02 -0500337 bracklen++;
Chet Ramey495aee42011-11-22 19:11:26 -0500338 if (*pat == ']') /* right bracket can appear as collating symbol */
Chet Rameyd26fdfa2011-11-22 20:00:02 -0500339 {
340 pat++;
341 bracklen++;
342 }
Chet Ramey495aee42011-11-22 19:11:26 -0500343 in_collsym = 1;
344 }
345 else if (in_collsym && c == '.' && *pat == ']')
346 {
347 pat++;
Chet Rameyd26fdfa2011-11-22 20:00:02 -0500348 bracklen++;
Chet Ramey495aee42011-11-22 19:11:26 -0500349 in_collsym = 0;
350 }
351 else if (c == '[' && *pat == '=') /* equivalence class */
352 {
353 pat++;
Chet Rameyd26fdfa2011-11-22 20:00:02 -0500354 bracklen++;
Chet Ramey495aee42011-11-22 19:11:26 -0500355 if (*pat == ']') /* right bracket can appear as equivalence class */
Chet Rameyd26fdfa2011-11-22 20:00:02 -0500356 {
357 pat++;
358 bracklen++;
359 }
Chet Ramey495aee42011-11-22 19:11:26 -0500360 in_equiv = 1;
361 }
362 else if (in_equiv && c == '=' && *pat == ']')
363 {
364 pat++;
Chet Rameyd26fdfa2011-11-22 20:00:02 -0500365 bracklen++;
Chet Ramey495aee42011-11-22 19:11:26 -0500366 in_equiv = 0;
367 }
Chet Rameyd26fdfa2011-11-22 20:00:02 -0500368 else
369 bracklen++;
Chet Ramey495aee42011-11-22 19:11:26 -0500370 }
371 while ((c = *pat++) != ']');
372 matlen++; /* bracket expression can only match one char */
Chet Rameyd26fdfa2011-11-22 20:00:02 -0500373bad_bracket:
Chet Ramey495aee42011-11-22 19:11:26 -0500374 break;
375 }
376 }
377
378 return matlen;
379}
Chet Ramey34ce4022014-05-16 14:18:39 -0400380
381/* Skip characters in PAT and return the final occurrence of DIRSEP. This
382 is only called when extended_glob is set, so we have to skip over extglob
383 patterns x(...) */
384char *
385glob_dirscan (pat, dirsep)
386 char *pat;
387 int dirsep;
388{
389 char *p, *d, *pe, *se;
390
391 d = pe = se = 0;
392 for (p = pat; p && *p; p++)
393 {
394 if (extglob_pattern_p (p))
395 {
396 if (se == 0)
397 se = p + strlen (p) - 1;
398 pe = glob_patscan (p + 2, se, 0);
399 if (pe == 0)
400 continue;
401 else if (*pe == 0)
402 break;
403 p = pe - 1; /* will do increment above */
404 continue;
405 }
406 if (*p == dirsep)
407 d = p;
408 }
409 return d;
410}