lib/sh/strtrans.c - platform_external_bash - Gitiles

 /* strtrans.c - Translate and untranslate strings with ANSI-C escape sequences. */

 /* Copyright (C) 2000-2011 Free Software Foundation, Inc.

    This file is part of GNU Bash, the Bourne Again SHell.

    Bash is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    Bash is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with Bash.  If not, see <http://www.gnu.org/licenses/>.
 */

 #include <config.h>

 #if defined (HAVE_UNISTD_H)
 #  include <unistd.h>
 #endif

 #include <bashansi.h>
 #include <stdio.h>
 #include <chartypes.h>

 #include "shell.h"

 #include "shmbchar.h"
 #include "shmbutil.h"

 #ifdef ESC
 #undef ESC
 #endif
 #define ESC '\033'	/* ASCII */

 /* Convert STRING by expanding the escape sequences specified by the
    ANSI C standard.  If SAWC is non-null, recognize `\c' and use that
    as a string terminator.  If we see \c, set *SAWC to 1 before
    returning.  LEN is the length of STRING.  If (FLAGS&1) is non-zero,
    that we're translating a string for `echo -e', and therefore should not
    treat a single quote as a character that may be escaped with a backslash.
    If (FLAGS&2) is non-zero, we're expanding for the parser and want to
    quote CTLESC and CTLNUL with CTLESC.  If (flags&4) is non-zero, we want
    to remove the backslash before any unrecognized escape sequence. */
 char *
 ansicstr (string, len, flags, sawc, rlen)
      char *string;
      int len, flags, *sawc, *rlen;
 {
   int c, temp;
   char *ret, *r, *s;
   unsigned long v;

   if (string == 0 || *string == '\0')
     return ((char *)NULL);

 #if defined (HANDLE_MULTIBYTE)
   ret = (char *)xmalloc (4*len + 1);
 #else
   ret = (char *)xmalloc (2*len + 1);	/* 2*len for possible CTLESC */
 #endif
   for (r = ret, s = string; s && *s; )
     {
       c = *s++;
       if (c != '\\' || *s == '\0')
 	*r++ = c;
       else
 	{
 	  switch (c = *s++)
 	    {
 #if defined (__STDC__)
 	    case 'a': c = '\a'; break;
 	    case 'v': c = '\v'; break;
 #else
 	    case 'a': c = (int) 0x07; break;
 	    case 'v': c = (int) 0x0B; break;
 #endif
 	    case 'b': c = '\b'; break;
 	    case 'e': case 'E':		/* ESC -- non-ANSI */
 	      c = ESC; break;
 	    case 'f': c = '\f'; break;
 	    case 'n': c = '\n'; break;
 	    case 'r': c = '\r'; break;
 	    case 't': c = '\t'; break;
 	    case '1': case '2': case '3':
 	    case '4': case '5': case '6':
 	    case '7':
 #if 1
 	      if (flags & 1)
 		{
 		  *r++ = '\\';
 		  break;
 		}
 	    /*FALLTHROUGH*/
 #endif
 	    case '0':
 	      /* If (FLAGS & 1), we're translating a string for echo -e (or
 		 the equivalent xpg_echo option), so we obey the SUSv3/
 		 POSIX-2001 requirement and accept 0-3 octal digits after
 		 a leading `0'. */
 	      temp = 2 + ((flags & 1) && (c == '0'));
 	      for (c -= '0'; ISOCTAL (*s) && temp--; s++)
 		c = (c * 8) + OCTVALUE (*s);
 	      c &= 0xFF;
 	      break;
 	    case 'x':			/* Hex digit -- non-ANSI */
 	      if ((flags & 2) && *s == '{')
 		{
 		  flags |= 16;		/* internal flag value */
 		  s++;
 		}
 	      /* Consume at least two hex characters */
 	      for (temp = 2, c = 0; ISXDIGIT ((unsigned char)*s) && temp--; s++)
 		c = (c * 16) + HEXVALUE (*s);
 	      /* DGK says that after a `\x{' ksh93 consumes ISXDIGIT chars
 		 until a non-xdigit or `}', so potentially more than two
 		 chars are consumed. */
 	      if (flags & 16)
 		{
 		  for ( ; ISXDIGIT ((unsigned char)*s); s++)
 		    c = (c * 16) + HEXVALUE (*s);
 		  flags &= ~16;
 		  if (*s == '}')
 		    s++;
 	        }
 	      /* \x followed by non-hex digits is passed through unchanged */
 	      else if (temp == 2)
 		{
 		  *r++ = '\\';
 		  c = 'x';
 		}
 	      c &= 0xFF;
 	      break;
 #if defined (HANDLE_MULTIBYTE)
 	    case 'u':
 	    case 'U':
 	      temp = (c == 'u') ? 4 : 8;	/* \uNNNN \UNNNNNNNN */
 	      for (v = 0; ISXDIGIT ((unsigned char)*s) && temp--; s++)
 		v = (v * 16) + HEXVALUE (*s);
 	      if (temp == ((c == 'u') ? 4 : 8))
 		{
 		  *r++ = '\\';	/* c remains unchanged */
 		  break;
 		}
 	      else if (v <= 0x7f)	/* <= 0x7f translates directly */
 		{
 		  c = v;
 		  break;
 		}
 	      else
 		{
 		  temp = u32cconv (v, r);
 		  r += temp;
 		  continue;
 		}
 #endif
 	    case '\\':
 	      break;
 	    case '\'': case '"': case '?':
 	      if (flags & 1)
 		*r++ = '\\';
 	      break;
 	    case 'c':
 	      if (sawc)
 		{
 		  *sawc = 1;
 		  *r = '\0';
 		  if (rlen)
 		    *rlen = r - ret;
 		  return ret;
 		}
 	      else if ((flags & 1) == 0 && *s == 0)
 		;		/* pass \c through */
 	      else if ((flags & 1) == 0 && (c = *s))
 		{
 		  s++;
 		  if ((flags & 2) && c == '\\' && c == *s)
 		    s++;	/* Posix requires $'\c\\' do backslash escaping */
 		  c = TOCTRL(c);
 		  break;
 		}
 		/*FALLTHROUGH*/
 	    default:
 		if ((flags & 4) == 0)
 		  *r++ = '\\';
 		break;
 	    }
 	  if ((flags & 2) && (c == CTLESC || c == CTLNUL))
 	    *r++ = CTLESC;
 	  *r++ = c;
 	}
     }
   *r = '\0';
   if (rlen)
     *rlen = r - ret;
   return ret;
 }

 /* Take a string STR, possibly containing non-printing characters, and turn it
    into a $'...' ANSI-C style quoted string.  Returns a new string. */
 char *
 ansic_quote (str, flags, rlen)
      char *str;
      int flags, *rlen;
 {
   char *r, *ret, *s;
   int l, rsize;
   unsigned char c;
   size_t clen;
   int b;
 #if defined (HANDLE_MULTIBYTE)
   wchar_t wc;
 #endif

   if (str == 0 || *str == 0)
     return ((char *)0);

   l = strlen (str);
   rsize = 4 * l + 4;
   r = ret = (char *)xmalloc (rsize);

   *r++ = '$';
   *r++ = '\'';

   s = str;

   for (s = str; c = *s; s++)
     {
       b = l = 1;		/* 1 == add backslash; 0 == no backslash */
       clen = 1;

       switch (c)
 	{
 	case ESC: c = 'E'; break;
 #ifdef __STDC__
 	case '\a': c = 'a'; break;
 	case '\v': c = 'v'; break;
 #else
 	case 0x07: c = 'a'; break;
 	case 0x0b: c = 'v'; break;
 #endif

 	case '\b': c = 'b'; break;
 	case '\f': c = 'f'; break;
 	case '\n': c = 'n'; break;
 	case '\r': c = 'r'; break;
 	case '\t': c = 't'; break;
 	case '\\':
 	case '\'':
 	  break;
 	default:
 #if defined (HANDLE_MULTIBYTE)
 	  b = is_basic (c);
 	  /* XXX - clen comparison to 0 is dicey */
 	  if ((b == 0 && ((clen = mbrtowc (&wc, s, MB_CUR_MAX, 0)) < 0 || MB_INVALIDCH (clen) || iswprint (wc) == 0)) ||
 	      (b == 1 && ISPRINT (c) == 0))
 #else
 	  if (ISPRINT (c) == 0)
 #endif
 	    {
 	      *r++ = '\\';
 	      *r++ = TOCHAR ((c >> 6) & 07);
 	      *r++ = TOCHAR ((c >> 3) & 07);
 	      *r++ = TOCHAR (c & 07);
 	      continue;
 	    }
 	  l = 0;
 	  break;
 	}
       if (b == 0 && clen == 0)
 	break;

       if (l)
 	*r++ = '\\';

       if (clen == 1)
 	*r++ = c;
       else
 	{
 	  for (b = 0; b < (int)clen; b++)
 	    *r++ = (unsigned char)s[b];
 	  s += clen - 1;	/* -1 because of the increment above */
 	}
     }

   *r++ = '\'';
   *r = '\0';
   if (rlen)
     *rlen = r - ret;
   return ret;
 }

 #if defined (HANDLE_MULTIBYTE)
 int
 ansic_wshouldquote (string)
      const char *string;
 {
   const wchar_t *wcs;
   wchar_t wcc;

   wchar_t *wcstr = NULL;
   size_t slen;


   slen = mbstowcs (wcstr, string, 0);

   if (slen == -1)
     slen = 0;
   wcstr = (wchar_t *)xmalloc (sizeof (wchar_t) * (slen + 1));
   mbstowcs (wcstr, string, slen + 1);

   for (wcs = wcstr; wcc = *wcs; wcs++)
     if (iswprint(wcc) == 0)
       {
 	free (wcstr);
 	return 1;
       }

   free (wcstr);
   return 0;
 }
 #endif

 /* return 1 if we need to quote with $'...' because of non-printing chars. */
 int
 ansic_shouldquote (string)
      const char *string;
 {
   const char *s;
   unsigned char c;

   if (string == 0)
     return 0;

   for (s = string; c = *s; s++)
     {
 #if defined (HANDLE_MULTIBYTE)
       if (is_basic (c) == 0)
 	return (ansic_wshouldquote (s));
 #endif
       if (ISPRINT (c) == 0)
 	return 1;
     }

   return 0;
 }

 /* $'...' ANSI-C expand the portion of STRING between START and END and
    return the result.  The result cannot be longer than the input string. */
 char *
 ansiexpand (string, start, end, lenp)
      char *string;
      int start, end, *lenp;
 {
   char *temp, *t;
   int len, tlen;

   temp = (char *)xmalloc (end - start + 1);
   for (tlen = 0, len = start; len < end; )
     temp[tlen++] = string[len++];
   temp[tlen] = '\0';

   if (*temp)
     {
       t = ansicstr (temp, tlen, 2, (int *)NULL, lenp);
       free (temp);
       return (t);
     }
   else
     {
       if (lenp)
 	*lenp = 0;
       return (temp);
     }
 }
	/* strtrans.c - Translate and untranslate strings with ANSI-C escape sequences. */

	/* Copyright (C) 2000-2011 Free Software Foundation, Inc.

	This file is part of GNU Bash, the Bourne Again SHell.

	Bash is free software: you can redistribute it and/or modify
	it under the terms of the GNU General Public License as published by
	the Free Software Foundation, either version 3 of the License, or
	(at your option) any later version.

	Bash is distributed in the hope that it will be useful,
	but WITHOUT ANY WARRANTY; without even the implied warranty of
	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	GNU General Public License for more details.

	You should have received a copy of the GNU General Public License
	along with Bash. If not, see <http://www.gnu.org/licenses/>.
	*/

	#include <config.h>

	#if defined (HAVE_UNISTD_H)
	# include <unistd.h>
	#endif

	#include <bashansi.h>
	#include <stdio.h>
	#include <chartypes.h>

	#include "shell.h"

	#include "shmbchar.h"
	#include "shmbutil.h"

	#ifdef ESC
	#undef ESC
	#endif
	#define ESC '\033' /* ASCII */

	/* Convert STRING by expanding the escape sequences specified by the
	ANSI C standard. If SAWC is non-null, recognize `\c' and use that
	as a string terminator. If we see \c, set *SAWC to 1 before
	returning. LEN is the length of STRING. If (FLAGS&1) is non-zero,
	that we're translating a string for `echo -e', and therefore should not
	treat a single quote as a character that may be escaped with a backslash.
	If (FLAGS&2) is non-zero, we're expanding for the parser and want to
	quote CTLESC and CTLNUL with CTLESC. If (flags&4) is non-zero, we want
	to remove the backslash before any unrecognized escape sequence. */
	char *
	ansicstr (string, len, flags, sawc, rlen)
	char *string;
	int len, flags, sawc, rlen;
	{
	int c, temp;
	char ret, r, *s;
	unsigned long v;

	if (string == 0 \|\| *string == '\0')
	return ((char *)NULL);

	#if defined (HANDLE_MULTIBYTE)
	ret = (char )xmalloc (4len + 1);
	#else
	ret = (char )xmalloc (2len + 1); /* 2len for possible CTLESC /
	#endif
	for (r = ret, s = string; s && *s; )
	{
	c = *s++;
	if (c != '\\' \|\| *s == '\0')
	*r++ = c;
	else
	{
	switch (c = *s++)
	{
	#if defined (__STDC__)
	case 'a': c = '\a'; break;
	case 'v': c = '\v'; break;
	#else
	case 'a': c = (int) 0x07; break;
	case 'v': c = (int) 0x0B; break;
	#endif
	case 'b': c = '\b'; break;
	case 'e': case 'E': /* ESC -- non-ANSI */
	c = ESC; break;
	case 'f': c = '\f'; break;
	case 'n': c = '\n'; break;
	case 'r': c = '\r'; break;
	case 't': c = '\t'; break;
	case '1': case '2': case '3':
	case '4': case '5': case '6':
	case '7':
	#if 1
	if (flags & 1)
	{
	*r++ = '\\';
	break;
	}
	/FALLTHROUGH/
	#endif
	case '0':
	/* If (FLAGS & 1), we're translating a string for echo -e (or
	the equivalent xpg_echo option), so we obey the SUSv3/
	POSIX-2001 requirement and accept 0-3 octal digits after
	a leading `0'. */
	temp = 2 + ((flags & 1) && (c == '0'));
	for (c -= '0'; ISOCTAL (*s) && temp--; s++)
	c = (c * 8) + OCTVALUE (*s);
	c &= 0xFF;
	break;
	case 'x': /* Hex digit -- non-ANSI */
	if ((flags & 2) && *s == '{')
	{
	flags \|= 16; /* internal flag value */
	s++;
	}
	/* Consume at least two hex characters */
	for (temp = 2, c = 0; ISXDIGIT ((unsigned char)*s) && temp--; s++)
	c = (c * 16) + HEXVALUE (*s);
	/* DGK says that after a `\x{' ksh93 consumes ISXDIGIT chars
	until a non-xdigit or `}', so potentially more than two
	chars are consumed. */
	if (flags & 16)
	{
	for ( ; ISXDIGIT ((unsigned char)*s); s++)
	c = (c * 16) + HEXVALUE (*s);
	flags &= ~16;
	if (*s == '}')
	s++;
	}
	/* \x followed by non-hex digits is passed through unchanged */
	else if (temp == 2)
	{
	*r++ = '\\';
	c = 'x';
	}
	c &= 0xFF;
	break;
	#if defined (HANDLE_MULTIBYTE)
	case 'u':
	case 'U':
	temp = (c == 'u') ? 4 : 8; /* \uNNNN \UNNNNNNNN */
	for (v = 0; ISXDIGIT ((unsigned char)*s) && temp--; s++)
	v = (v * 16) + HEXVALUE (*s);
	if (temp == ((c == 'u') ? 4 : 8))
	{
	r++ = '\\'; / c remains unchanged */
	break;
	}
	else if (v <= 0x7f) /* <= 0x7f translates directly */
	{
	c = v;
	break;
	}
	else
	{
	temp = u32cconv (v, r);
	r += temp;
	continue;
	}
	#endif
	case '\\':
	break;
	case '\'': case '"': case '?':
	if (flags & 1)
	*r++ = '\\';
	break;
	case 'c':
	if (sawc)
	{
	*sawc = 1;
	*r = '\0';
	if (rlen)
	*rlen = r - ret;
	return ret;
	}
	else if ((flags & 1) == 0 && *s == 0)
	; /* pass \c through */
	else if ((flags & 1) == 0 && (c = *s))
	{
	s++;
	if ((flags & 2) && c == '\\' && c == *s)
	s++; /* Posix requires $'\c\\' do backslash escaping */
	c = TOCTRL(c);
	break;
	}
	/FALLTHROUGH/
	default:
	if ((flags & 4) == 0)
	*r++ = '\\';
	break;
	}
	if ((flags & 2) && (c == CTLESC \|\| c == CTLNUL))
	*r++ = CTLESC;
	*r++ = c;
	}
	}
	*r = '\0';
	if (rlen)
	*rlen = r - ret;
	return ret;
	}

	/* Take a string STR, possibly containing non-printing characters, and turn it
	into a $'...' ANSI-C style quoted string. Returns a new string. */
	char *
	ansic_quote (str, flags, rlen)
	char *str;
	int flags, *rlen;
	{
	char r, ret, *s;
	int l, rsize;
	unsigned char c;
	size_t clen;
	int b;
	#if defined (HANDLE_MULTIBYTE)
	wchar_t wc;
	#endif

	if (str == 0 \|\| *str == 0)
	return ((char *)0);

	l = strlen (str);
	rsize = 4 * l + 4;
	r = ret = (char *)xmalloc (rsize);

	*r++ = '$';
	*r++ = '\'';

	s = str;

	for (s = str; c = *s; s++)
	{
	b = l = 1; /* 1 == add backslash; 0 == no backslash */
	clen = 1;

	switch (c)
	{
	case ESC: c = 'E'; break;
	#ifdef __STDC__
	case '\a': c = 'a'; break;
	case '\v': c = 'v'; break;
	#else
	case 0x07: c = 'a'; break;
	case 0x0b: c = 'v'; break;
	#endif

	case '\b': c = 'b'; break;
	case '\f': c = 'f'; break;
	case '\n': c = 'n'; break;
	case '\r': c = 'r'; break;
	case '\t': c = 't'; break;
	case '\\':
	case '\'':
	break;
	default:
	#if defined (HANDLE_MULTIBYTE)
	b = is_basic (c);
	/* XXX - clen comparison to 0 is dicey */
	if ((b == 0 && ((clen = mbrtowc (&wc, s, MB_CUR_MAX, 0)) < 0 \|\| MB_INVALIDCH (clen) \|\| iswprint (wc) == 0)) \|\|
	(b == 1 && ISPRINT (c) == 0))
	#else
	if (ISPRINT (c) == 0)
	#endif
	{
	*r++ = '\\';
	*r++ = TOCHAR ((c >> 6) & 07);
	*r++ = TOCHAR ((c >> 3) & 07);
	*r++ = TOCHAR (c & 07);
	continue;
	}
	l = 0;
	break;
	}
	if (b == 0 && clen == 0)
	break;

	if (l)
	*r++ = '\\';

	if (clen == 1)
	*r++ = c;
	else
	{
	for (b = 0; b < (int)clen; b++)
	*r++ = (unsigned char)s[b];
	s += clen - 1; /* -1 because of the increment above */
	}
	}

	*r++ = '\'';
	*r = '\0';
	if (rlen)
	*rlen = r - ret;
	return ret;
	}

	#if defined (HANDLE_MULTIBYTE)
	int
	ansic_wshouldquote (string)
	const char *string;
	{
	const wchar_t *wcs;
	wchar_t wcc;

	wchar_t *wcstr = NULL;
	size_t slen;


	slen = mbstowcs (wcstr, string, 0);

	if (slen == -1)
	slen = 0;
	wcstr = (wchar_t )xmalloc (sizeof (wchar_t) (slen + 1));
	mbstowcs (wcstr, string, slen + 1);

	for (wcs = wcstr; wcc = *wcs; wcs++)
	if (iswprint(wcc) == 0)
	{
	free (wcstr);
	return 1;
	}

	free (wcstr);
	return 0;
	}
	#endif

	/* return 1 if we need to quote with $'...' because of non-printing chars. */
	int
	ansic_shouldquote (string)
	const char *string;
	{
	const char *s;
	unsigned char c;

	if (string == 0)
	return 0;

	for (s = string; c = *s; s++)
	{
	#if defined (HANDLE_MULTIBYTE)
	if (is_basic (c) == 0)
	return (ansic_wshouldquote (s));
	#endif
	if (ISPRINT (c) == 0)
	return 1;
	}

	return 0;
	}

	/* $'...' ANSI-C expand the portion of STRING between START and END and
	return the result. The result cannot be longer than the input string. */
	char *
	ansiexpand (string, start, end, lenp)
	char *string;
	int start, end, *lenp;
	{
	char temp, t;
	int len, tlen;

	temp = (char *)xmalloc (end - start + 1);
	for (tlen = 0, len = start; len < end; )
	temp[tlen++] = string[len++];
	temp[tlen] = '\0';

	if (*temp)
	{
	t = ansicstr (temp, tlen, 2, (int *)NULL, lenp);
	free (temp);
	return (t);
	}
	else
	{
	if (lenp)
	*lenp = 0;
	return (temp);
	}
	}