blob: 17e4265ff27ad9a8a32ddb12dc122837b8c9a66e [file] [log] [blame]
/* gmisc.c -- miscellaneous pattern matching utility functions for Bash.
Copyright (C) 2010 Free Software Foundation, Inc.
This file is part of GNU Bash, the Bourne-Again SHell.
Bash is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Bash is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Bash. If not, see <http://www.gnu.org/licenses/>.
*/
#include <config.h>
#include "bashtypes.h"
#if defined (HAVE_UNISTD_H)
# include <unistd.h>
#endif
#include "bashansi.h"
#include "shmbutil.h"
#include "stdc.h"
#ifndef LPAREN
# define LPAREN '('
#endif
#ifndef RPAREN
# define RPAREN ')'
#endif
#if defined (HANDLE_MULTIBYTE)
#define WLPAREN L'('
#define WRPAREN L')'
/* Return 1 of the first character of WSTRING could match the first
character of pattern WPAT. Wide character version. */
int
match_pattern_wchar (wpat, wstring)
wchar_t *wpat, *wstring;
{
wchar_t wc;
if (*wstring == 0)
return (0);
switch (wc = *wpat++)
{
default:
return (*wstring == wc);
case L'\\':
return (*wstring == *wpat);
case L'?':
return (*wpat == WLPAREN ? 1 : (*wstring != L'\0'));
case L'*':
return (1);
case L'+':
case L'!':
case L'@':
return (*wpat == WLPAREN ? 1 : (*wstring == wc));
case L'[':
return (*wstring != L'\0');
}
}
int
wmatchlen (wpat, wmax)
wchar_t *wpat;
size_t wmax;
{
wchar_t wc;
int matlen, bracklen, t, in_cclass, in_collsym, in_equiv;
if (*wpat == 0)
return (0);
matlen = in_cclass = in_collsym = in_equiv = 0;
while (wc = *wpat++)
{
switch (wc)
{
default:
matlen++;
break;
case L'\\':
if (*wpat == 0)
return ++matlen;
else
{
matlen++;
wpat++;
}
break;
case L'?':
if (*wpat == WLPAREN)
return (matlen = -1); /* XXX for now */
else
matlen++;
break;
case L'*':
return (matlen = -1);
case L'+':
case L'!':
case L'@':
if (*wpat == WLPAREN)
return (matlen = -1); /* XXX for now */
else
matlen++;
break;
case L'[':
/* scan for ending `]', skipping over embedded [:...:] */
bracklen = 1;
wc = *wpat++;
do
{
if (wc == 0)
{
wpat--; /* back up to NUL */
matlen += bracklen;
goto bad_bracket;
}
else if (wc == L'\\')
{
/* *wpat == backslash-escaped character */
bracklen++;
/* If the backslash or backslash-escape ends the string,
bail. The ++wpat skips over the backslash escape */
if (*wpat == 0 || *++wpat == 0)
{
matlen += bracklen;
goto bad_bracket;
}
}
else if (wc == L'[' && *wpat == L':') /* character class */
{
wpat++;
bracklen++;
in_cclass = 1;
}
else if (in_cclass && wc == L':' && *wpat == L']')
{
wpat++;
bracklen++;
in_cclass = 0;
}
else if (wc == L'[' && *wpat == L'.') /* collating symbol */
{
wpat++;
bracklen++;
if (*wpat == L']') /* right bracket can appear as collating symbol */
{
wpat++;
bracklen++;
}
in_collsym = 1;
}
else if (in_collsym && wc == L'.' && *wpat == L']')
{
wpat++;
bracklen++;
in_collsym = 0;
}
else if (wc == L'[' && *wpat == L'=') /* equivalence class */
{
wpat++;
bracklen++;
if (*wpat == L']') /* right bracket can appear as equivalence class */
{
wpat++;
bracklen++;
}
in_equiv = 1;
}
else if (in_equiv && wc == L'=' && *wpat == L']')
{
wpat++;
bracklen++;
in_equiv = 0;
}
else
bracklen++;
}
while ((wc = *wpat++) != L']');
matlen++; /* bracket expression can only match one char */
bad_bracket:
break;
}
}
return matlen;
}
#endif
int
extglob_pattern_p (pat)
char *pat;
{
switch (pat[0])
{
case '*':
case '+':
case '!':
case '@':
return (pat[1] == LPAREN);
default:
return 0;
}
return 0;
}
/* Return 1 of the first character of STRING could match the first
character of pattern PAT. Used to avoid n2 calls to strmatch(). */
int
match_pattern_char (pat, string)
char *pat, *string;
{
char c;
if (*string == 0)
return (0);
switch (c = *pat++)
{
default:
return (*string == c);
case '\\':
return (*string == *pat);
case '?':
return (*pat == LPAREN ? 1 : (*string != '\0'));
case '*':
return (1);
case '+':
case '!':
case '@':
return (*pat == LPAREN ? 1 : (*string == c));
case '[':
return (*string != '\0');
}
}
int
umatchlen (pat, max)
char *pat;
size_t max;
{
char c;
int matlen, bracklen, t, in_cclass, in_collsym, in_equiv;
if (*pat == 0)
return (0);
matlen = in_cclass = in_collsym = in_equiv = 0;
while (c = *pat++)
{
switch (c)
{
default:
matlen++;
break;
case '\\':
if (*pat == 0)
return ++matlen;
else
{
matlen++;
pat++;
}
break;
case '?':
if (*pat == LPAREN)
return (matlen = -1); /* XXX for now */
else
matlen++;
break;
case '*':
return (matlen = -1);
case '+':
case '!':
case '@':
if (*pat == LPAREN)
return (matlen = -1); /* XXX for now */
else
matlen++;
break;
case '[':
/* scan for ending `]', skipping over embedded [:...:] */
bracklen = 1;
c = *pat++;
do
{
if (c == 0)
{
pat--; /* back up to NUL */
matlen += bracklen;
goto bad_bracket;
}
else if (c == '\\')
{
/* *pat == backslash-escaped character */
bracklen++;
/* If the backslash or backslash-escape ends the string,
bail. The ++pat skips over the backslash escape */
if (*pat == 0 || *++pat == 0)
{
matlen += bracklen;
goto bad_bracket;
}
}
else if (c == '[' && *pat == ':') /* character class */
{
pat++;
bracklen++;
in_cclass = 1;
}
else if (in_cclass && c == ':' && *pat == ']')
{
pat++;
bracklen++;
in_cclass = 0;
}
else if (c == '[' && *pat == '.') /* collating symbol */
{
pat++;
bracklen++;
if (*pat == ']') /* right bracket can appear as collating symbol */
{
pat++;
bracklen++;
}
in_collsym = 1;
}
else if (in_collsym && c == '.' && *pat == ']')
{
pat++;
bracklen++;
in_collsym = 0;
}
else if (c == '[' && *pat == '=') /* equivalence class */
{
pat++;
bracklen++;
if (*pat == ']') /* right bracket can appear as equivalence class */
{
pat++;
bracklen++;
}
in_equiv = 1;
}
else if (in_equiv && c == '=' && *pat == ']')
{
pat++;
bracklen++;
in_equiv = 0;
}
else
bracklen++;
}
while ((c = *pat++) != ']');
matlen++; /* bracket expression can only match one char */
bad_bracket:
break;
}
}
return matlen;
}