lib/sh/casemod.c - platform_external_bash - Gitiles

 /* casemod.c -- functions to change case of strings */

 /* Copyright (C) 2008,2009 Free Software Foundation, Inc.

    This file is part of GNU Bash, the Bourne Again SHell.

    Bash is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    Bash is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with Bash.  If not, see <http://www.gnu.org/licenses/>.
 */

 #if defined (HAVE_CONFIG_H)
 #  include <config.h>
 #endif

 #if defined (HAVE_UNISTD_H)
 #  include <unistd.h>
 #endif /* HAVE_UNISTD_H */

 #include <stdc.h>

 #include <bashansi.h>
 #include <bashintl.h>
 #include <bashtypes.h>

 #include <stdio.h>
 #include <ctype.h>
 #include <xmalloc.h>

 #include <shmbutil.h>
 #include <chartypes.h>

 #include <glob/strmatch.h>

 #define _to_wupper(wc)	(iswlower (wc) ? towupper (wc) : (wc))
 #define _to_wlower(wc)	(iswupper (wc) ? towlower (wc) : (wc))

 #if !defined (HANDLE_MULTIBYTE)
 #  define cval(s, i)	((s)[(i)])
 #  define iswalnum(c)	(isalnum(c))
 #  define TOGGLE(x)	(ISUPPER (x) ? tolower (x) : (TOUPPER (x)))
 #else
 #  define TOGGLE(x)	(iswupper (x) ? towlower (x) : (_to_wupper(x)))
 #endif

 /* These must agree with the defines in externs.h */
 #define CASE_NOOP	0x0000
 #define CASE_LOWER	0x0001
 #define CASE_UPPER	0x0002
 #define CASE_CAPITALIZE	0x0004
 #define CASE_UNCAP	0x0008
 #define CASE_TOGGLE	0x0010
 #define CASE_TOGGLEALL	0x0020
 #define CASE_UPFIRST	0x0040
 #define CASE_LOWFIRST	0x0080

 #define CASE_USEWORDS	0x1000		/* modify behavior to act on words in passed string */

 extern char *substring __P((char *, int, int));

 #if defined (HANDLE_MULTIBYTE)
 static wchar_t
 cval (s, i)
      char *s;
      int i;
 {
   size_t tmp;
   wchar_t wc;
   int l;
   mbstate_t mps;

   if (MB_CUR_MAX == 1)
     return ((wchar_t)s[i]);
   l = strlen (s);
   if (i >= (l - 1))
     return ((wchar_t)s[i]);
   memset (&mps, 0, sizeof (mbstate_t));
   tmp = mbrtowc (&wc, s + i, l - i, &mps);
   if (MB_INVALIDCH (tmp) || MB_NULLWCH (tmp))
     return ((wchar_t)s[i]);
   return wc;
 }
 #endif

 /* Modify the case of characters in STRING matching PAT based on the value of
    FLAGS.  If PAT is null, modify the case of each character */
 char *
 sh_modcase (string, pat, flags)
      const char *string;
      char *pat;
      int flags;
 {
   int start, next, end;
   int inword, c, nc, nop, match, usewords;
   char *ret, *s;
   wchar_t wc;
 #if defined (HANDLE_MULTIBYTE)
   wchar_t nwc;
   char mb[MB_LEN_MAX+1];
   int mlen;
   size_t m;
   mbstate_t state;
 #endif

 #if defined (HANDLE_MULTIBYTE)
   memset (&state, 0, sizeof (mbstate_t));
 #endif

   start = 0;
   end = strlen (string);

   ret = (char *)xmalloc (end + 1);
   strcpy (ret, string);

   /* See if we are supposed to split on alphanumerics and operate on each word */
   usewords = (flags & CASE_USEWORDS);
   flags &= ~CASE_USEWORDS;

   inword = 0;
   while (start < end)
     {
       wc = cval (ret, start);

       if (iswalnum (wc) == 0)
 	{
 	  inword = 0;
 	  ADVANCE_CHAR (ret, end, start);
 	  continue;
 	}

       if (pat)
 	{
 	  next = start;
 	  ADVANCE_CHAR (ret, end, next);
 	  s = substring (ret, start, next);
 	  match = strmatch (pat, s, FNM_EXTMATCH) != FNM_NOMATCH;
 	  free (s);
 	  if (match == 0)
             {
               start = next;
               inword = 1;
               continue;
             }
 	}

       /* XXX - for now, the toggling operators work on the individual
 	 words in the string, breaking on alphanumerics.  Should I
 	 leave the capitalization operators to do that also? */
       if (flags == CASE_CAPITALIZE)
 	{
 	  if (usewords)
 	    nop = inword ? CASE_LOWER : CASE_UPPER;
 	  else
 	    nop = (start > 0) ? CASE_LOWER : CASE_UPPER;
 	  inword = 1;
 	}
       else if (flags == CASE_UNCAP)
 	{
 	  if (usewords)
 	    nop = inword ? CASE_UPPER : CASE_LOWER;
 	  else
 	    nop = (start > 0) ? CASE_UPPER : CASE_LOWER;
 	  inword = 1;
 	}
       else if (flags == CASE_UPFIRST)
  	{
  	  if (usewords)
 	    nop = inword ? CASE_NOOP : CASE_UPPER;
 	  else
 	    nop = (start > 0) ? CASE_NOOP : CASE_UPPER;
  	  inword = 1;
  	}
       else if (flags == CASE_LOWFIRST)
  	{
  	  if (usewords)
 	    nop = inword ? CASE_NOOP : CASE_LOWER;
 	  else
 	    nop = (start > 0) ? CASE_NOOP : CASE_LOWER;
  	  inword = 1;
  	}
       else if (flags == CASE_TOGGLE)
 	{
 	  nop = inword ? CASE_NOOP : CASE_TOGGLE;
 	  inword = 1;
 	}
       else
 	nop = flags;

       if (MB_CUR_MAX == 1 || isascii (wc))
 	{
 	  switch (nop)
 	  {
 	  default:
 	  case CASE_NOOP:  nc = wc; break;
 	  case CASE_UPPER:  nc = TOUPPER (wc); break;
 	  case CASE_LOWER:  nc = TOLOWER (wc); break;
 	  case CASE_TOGGLEALL:
 	  case CASE_TOGGLE: nc = TOGGLE (wc); break;
 	  }
 	  ret[start] = nc;
 	}
 #if defined (HANDLE_MULTIBYTE)
       else
 	{
 	  m = mbrtowc (&wc, string + start, end - start, &state);
 	  if (MB_INVALIDCH (m))
 	    wc = (wchar_t)string[start];
 	  else if (MB_NULLWCH (m))
 	    wc = L'\0';
 	  switch (nop)
 	  {
 	  default:
 	  case CASE_NOOP:  nwc = wc; break;
 	  case CASE_UPPER:  nwc = TOUPPER (wc); break;
 	  case CASE_LOWER:  nwc = TOLOWER (wc); break;
 	  case CASE_TOGGLEALL:
 	  case CASE_TOGGLE: nwc = TOGGLE (wc); break;
 	  }
 	  if  (nwc != wc)	/*  just skip unchanged characters */
 	    {
 	      mlen = wcrtomb (mb, nwc, &state);
 	      if (mlen > 0)
 		mb[mlen] = '\0';
 	      /* Assume the same width */
 	      strncpy (ret + start, mb, mlen);
 	    }
 	}
 #endif

       /*  This assumes that the upper and lower case versions are the same width. */
       ADVANCE_CHAR (ret, end, start);
     }

   return ret;
 }
	/* casemod.c -- functions to change case of strings */

	/* Copyright (C) 2008,2009 Free Software Foundation, Inc.

	This file is part of GNU Bash, the Bourne Again SHell.

	Bash is free software: you can redistribute it and/or modify
	it under the terms of the GNU General Public License as published by
	the Free Software Foundation, either version 3 of the License, or
	(at your option) any later version.

	Bash is distributed in the hope that it will be useful,
	but WITHOUT ANY WARRANTY; without even the implied warranty of
	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	GNU General Public License for more details.

	You should have received a copy of the GNU General Public License
	along with Bash. If not, see <http://www.gnu.org/licenses/>.
	*/

	#if defined (HAVE_CONFIG_H)
	# include <config.h>
	#endif

	#if defined (HAVE_UNISTD_H)
	# include <unistd.h>
	#endif /* HAVE_UNISTD_H */

	#include <stdc.h>

	#include <bashansi.h>
	#include <bashintl.h>
	#include <bashtypes.h>

	#include <stdio.h>
	#include <ctype.h>
	#include <xmalloc.h>

	#include <shmbutil.h>
	#include <chartypes.h>

	#include <glob/strmatch.h>

	#define _to_wupper(wc) (iswlower (wc) ? towupper (wc) : (wc))
	#define _to_wlower(wc) (iswupper (wc) ? towlower (wc) : (wc))

	#if !defined (HANDLE_MULTIBYTE)
	# define cval(s, i) ((s)[(i)])
	# define iswalnum(c) (isalnum(c))
	# define TOGGLE(x) (ISUPPER (x) ? tolower (x) : (TOUPPER (x)))
	#else
	# define TOGGLE(x) (iswupper (x) ? towlower (x) : (_to_wupper(x)))
	#endif

	/* These must agree with the defines in externs.h */
	#define CASE_NOOP 0x0000
	#define CASE_LOWER 0x0001
	#define CASE_UPPER 0x0002
	#define CASE_CAPITALIZE 0x0004
	#define CASE_UNCAP 0x0008
	#define CASE_TOGGLE 0x0010
	#define CASE_TOGGLEALL 0x0020
	#define CASE_UPFIRST 0x0040
	#define CASE_LOWFIRST 0x0080

	#define CASE_USEWORDS 0x1000 /* modify behavior to act on words in passed string */

	extern char substring __P((char , int, int));

	#if defined (HANDLE_MULTIBYTE)
	static wchar_t
	cval (s, i)
	char *s;
	int i;
	{
	size_t tmp;
	wchar_t wc;
	int l;
	mbstate_t mps;

	if (MB_CUR_MAX == 1)
	return ((wchar_t)s[i]);
	l = strlen (s);
	if (i >= (l - 1))
	return ((wchar_t)s[i]);
	memset (&mps, 0, sizeof (mbstate_t));
	tmp = mbrtowc (&wc, s + i, l - i, &mps);
	if (MB_INVALIDCH (tmp) \|\| MB_NULLWCH (tmp))
	return ((wchar_t)s[i]);
	return wc;
	}
	#endif

	/* Modify the case of characters in STRING matching PAT based on the value of
	FLAGS. If PAT is null, modify the case of each character */
	char *
	sh_modcase (string, pat, flags)
	const char *string;
	char *pat;
	int flags;
	{
	int start, next, end;
	int inword, c, nc, nop, match, usewords;
	char ret, s;
	wchar_t wc;
	#if defined (HANDLE_MULTIBYTE)
	wchar_t nwc;
	char mb[MB_LEN_MAX+1];
	int mlen;
	size_t m;
	mbstate_t state;
	#endif

	#if defined (HANDLE_MULTIBYTE)
	memset (&state, 0, sizeof (mbstate_t));
	#endif

	start = 0;
	end = strlen (string);

	ret = (char *)xmalloc (end + 1);
	strcpy (ret, string);

	/* See if we are supposed to split on alphanumerics and operate on each word */
	usewords = (flags & CASE_USEWORDS);
	flags &= ~CASE_USEWORDS;

	inword = 0;
	while (start < end)
	{
	wc = cval (ret, start);

	if (iswalnum (wc) == 0)
	{
	inword = 0;
	ADVANCE_CHAR (ret, end, start);
	continue;
	}

	if (pat)
	{
	next = start;
	ADVANCE_CHAR (ret, end, next);
	s = substring (ret, start, next);
	match = strmatch (pat, s, FNM_EXTMATCH) != FNM_NOMATCH;
	free (s);
	if (match == 0)
	{
	start = next;
	inword = 1;
	continue;
	}
	}

	/* XXX - for now, the toggling operators work on the individual
	words in the string, breaking on alphanumerics. Should I
	leave the capitalization operators to do that also? */
	if (flags == CASE_CAPITALIZE)
	{
	if (usewords)
	nop = inword ? CASE_LOWER : CASE_UPPER;
	else
	nop = (start > 0) ? CASE_LOWER : CASE_UPPER;
	inword = 1;
	}
	else if (flags == CASE_UNCAP)
	{
	if (usewords)
	nop = inword ? CASE_UPPER : CASE_LOWER;
	else
	nop = (start > 0) ? CASE_UPPER : CASE_LOWER;
	inword = 1;
	}
	else if (flags == CASE_UPFIRST)
	{
	if (usewords)
	nop = inword ? CASE_NOOP : CASE_UPPER;
	else
	nop = (start > 0) ? CASE_NOOP : CASE_UPPER;
	inword = 1;
	}
	else if (flags == CASE_LOWFIRST)
	{
	if (usewords)
	nop = inword ? CASE_NOOP : CASE_LOWER;
	else
	nop = (start > 0) ? CASE_NOOP : CASE_LOWER;
	inword = 1;
	}
	else if (flags == CASE_TOGGLE)
	{
	nop = inword ? CASE_NOOP : CASE_TOGGLE;
	inword = 1;
	}
	else
	nop = flags;

	if (MB_CUR_MAX == 1 \|\| isascii (wc))
	{
	switch (nop)
	{
	default:
	case CASE_NOOP: nc = wc; break;
	case CASE_UPPER: nc = TOUPPER (wc); break;
	case CASE_LOWER: nc = TOLOWER (wc); break;
	case CASE_TOGGLEALL:
	case CASE_TOGGLE: nc = TOGGLE (wc); break;
	}
	ret[start] = nc;
	}
	#if defined (HANDLE_MULTIBYTE)
	else
	{
	m = mbrtowc (&wc, string + start, end - start, &state);
	if (MB_INVALIDCH (m))
	wc = (wchar_t)string[start];
	else if (MB_NULLWCH (m))
	wc = L'\0';
	switch (nop)
	{
	default:
	case CASE_NOOP: nwc = wc; break;
	case CASE_UPPER: nwc = TOUPPER (wc); break;
	case CASE_LOWER: nwc = TOLOWER (wc); break;
	case CASE_TOGGLEALL:
	case CASE_TOGGLE: nwc = TOGGLE (wc); break;
	}
	if (nwc != wc) /* just skip unchanged characters */
	{
	mlen = wcrtomb (mb, nwc, &state);
	if (mlen > 0)
	mb[mlen] = '\0';
	/* Assume the same width */
	strncpy (ret + start, mb, mlen);
	}
	}
	#endif

	/* This assumes that the upper and lower case versions are the same width. */
	ADVANCE_CHAR (ret, end, start);
	}

	return ret;
	}