Jari Aalto | 3185942 | 2009-01-12 13:36:28 +0000 | [diff] [blame] | 1 | /* casemod.c -- functions to change case of strings */ |
| 2 | |
| 3 | /* Copyright (C) 2008,2009 Free Software Foundation, Inc. |
| 4 | |
| 5 | This file is part of GNU Bash, the Bourne Again SHell. |
| 6 | |
| 7 | Bash is free software: you can redistribute it and/or modify |
| 8 | it under the terms of the GNU General Public License as published by |
| 9 | the Free Software Foundation, either version 3 of the License, or |
| 10 | (at your option) any later version. |
| 11 | |
| 12 | Bash is distributed in the hope that it will be useful, |
| 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 15 | GNU General Public License for more details. |
| 16 | |
| 17 | You should have received a copy of the GNU General Public License |
| 18 | along with Bash. If not, see <http://www.gnu.org/licenses/>. |
| 19 | */ |
| 20 | |
| 21 | #if defined (HAVE_CONFIG_H) |
| 22 | # include <config.h> |
| 23 | #endif |
| 24 | |
| 25 | #if defined (HAVE_UNISTD_H) |
| 26 | # include <unistd.h> |
| 27 | #endif /* HAVE_UNISTD_H */ |
| 28 | |
| 29 | #include <stdc.h> |
| 30 | |
| 31 | #include <bashansi.h> |
| 32 | #include <bashintl.h> |
| 33 | #include <bashtypes.h> |
| 34 | |
| 35 | #include <stdio.h> |
| 36 | #include <ctype.h> |
| 37 | #include <xmalloc.h> |
| 38 | |
Chet Ramey | ac50fba | 2014-02-26 09:36:43 -0500 | [diff] [blame] | 39 | #include <shmbchar.h> |
Jari Aalto | 3185942 | 2009-01-12 13:36:28 +0000 | [diff] [blame] | 40 | #include <shmbutil.h> |
| 41 | #include <chartypes.h> |
Chet Ramey | ac50fba | 2014-02-26 09:36:43 -0500 | [diff] [blame] | 42 | #include <typemax.h> |
Jari Aalto | 3185942 | 2009-01-12 13:36:28 +0000 | [diff] [blame] | 43 | |
| 44 | #include <glob/strmatch.h> |
| 45 | |
| 46 | #define _to_wupper(wc) (iswlower (wc) ? towupper (wc) : (wc)) |
| 47 | #define _to_wlower(wc) (iswupper (wc) ? towlower (wc) : (wc)) |
| 48 | |
| 49 | #if !defined (HANDLE_MULTIBYTE) |
| 50 | # define cval(s, i) ((s)[(i)]) |
| 51 | # define iswalnum(c) (isalnum(c)) |
| 52 | # define TOGGLE(x) (ISUPPER (x) ? tolower (x) : (TOUPPER (x))) |
| 53 | #else |
| 54 | # define TOGGLE(x) (iswupper (x) ? towlower (x) : (_to_wupper(x))) |
| 55 | #endif |
| 56 | |
| 57 | /* These must agree with the defines in externs.h */ |
Jari Aalto | 17345e5 | 2009-02-19 22:21:29 +0000 | [diff] [blame] | 58 | #define CASE_NOOP 0x0000 |
| 59 | #define CASE_LOWER 0x0001 |
| 60 | #define CASE_UPPER 0x0002 |
| 61 | #define CASE_CAPITALIZE 0x0004 |
| 62 | #define CASE_UNCAP 0x0008 |
| 63 | #define CASE_TOGGLE 0x0010 |
| 64 | #define CASE_TOGGLEALL 0x0020 |
| 65 | #define CASE_UPFIRST 0x0040 |
| 66 | #define CASE_LOWFIRST 0x0080 |
| 67 | |
| 68 | #define CASE_USEWORDS 0x1000 /* modify behavior to act on words in passed string */ |
Jari Aalto | 3185942 | 2009-01-12 13:36:28 +0000 | [diff] [blame] | 69 | |
| 70 | extern char *substring __P((char *, int, int)); |
| 71 | |
Chet Ramey | ac50fba | 2014-02-26 09:36:43 -0500 | [diff] [blame] | 72 | #ifndef UCHAR_MAX |
| 73 | # define UCHAR_MAX TYPE_MAXIMUM(unsigned char) |
| 74 | #endif |
| 75 | |
Jari Aalto | 3185942 | 2009-01-12 13:36:28 +0000 | [diff] [blame] | 76 | #if defined (HANDLE_MULTIBYTE) |
| 77 | static wchar_t |
| 78 | cval (s, i) |
| 79 | char *s; |
| 80 | int i; |
| 81 | { |
| 82 | size_t tmp; |
| 83 | wchar_t wc; |
| 84 | int l; |
| 85 | mbstate_t mps; |
| 86 | |
Chet Ramey | ac50fba | 2014-02-26 09:36:43 -0500 | [diff] [blame] | 87 | if (MB_CUR_MAX == 1 || is_basic (s[i])) |
Jari Aalto | 3185942 | 2009-01-12 13:36:28 +0000 | [diff] [blame] | 88 | return ((wchar_t)s[i]); |
| 89 | l = strlen (s); |
| 90 | if (i >= (l - 1)) |
| 91 | return ((wchar_t)s[i]); |
| 92 | memset (&mps, 0, sizeof (mbstate_t)); |
| 93 | tmp = mbrtowc (&wc, s + i, l - i, &mps); |
| 94 | if (MB_INVALIDCH (tmp) || MB_NULLWCH (tmp)) |
| 95 | return ((wchar_t)s[i]); |
| 96 | return wc; |
| 97 | } |
| 98 | #endif |
| 99 | |
| 100 | /* Modify the case of characters in STRING matching PAT based on the value of |
| 101 | FLAGS. If PAT is null, modify the case of each character */ |
| 102 | char * |
| 103 | sh_modcase (string, pat, flags) |
| 104 | const char *string; |
| 105 | char *pat; |
| 106 | int flags; |
| 107 | { |
| 108 | int start, next, end; |
Jari Aalto | 17345e5 | 2009-02-19 22:21:29 +0000 | [diff] [blame] | 109 | int inword, c, nc, nop, match, usewords; |
Jari Aalto | 3185942 | 2009-01-12 13:36:28 +0000 | [diff] [blame] | 110 | char *ret, *s; |
| 111 | wchar_t wc; |
| 112 | #if defined (HANDLE_MULTIBYTE) |
| 113 | wchar_t nwc; |
| 114 | char mb[MB_LEN_MAX+1]; |
| 115 | int mlen; |
Chet Ramey | 0001803 | 2011-11-21 20:51:19 -0500 | [diff] [blame] | 116 | size_t m; |
Jari Aalto | 3185942 | 2009-01-12 13:36:28 +0000 | [diff] [blame] | 117 | mbstate_t state; |
| 118 | #endif |
| 119 | |
Chet Ramey | 495aee4 | 2011-11-22 19:11:26 -0500 | [diff] [blame] | 120 | if (string == 0 || *string == 0) |
| 121 | { |
| 122 | ret = (char *)xmalloc (1); |
| 123 | ret[0] = '\0'; |
| 124 | return ret; |
| 125 | } |
| 126 | |
Jari Aalto | 3185942 | 2009-01-12 13:36:28 +0000 | [diff] [blame] | 127 | #if defined (HANDLE_MULTIBYTE) |
| 128 | memset (&state, 0, sizeof (mbstate_t)); |
| 129 | #endif |
| 130 | |
| 131 | start = 0; |
| 132 | end = strlen (string); |
| 133 | |
| 134 | ret = (char *)xmalloc (end + 1); |
| 135 | strcpy (ret, string); |
| 136 | |
Jari Aalto | 17345e5 | 2009-02-19 22:21:29 +0000 | [diff] [blame] | 137 | /* See if we are supposed to split on alphanumerics and operate on each word */ |
| 138 | usewords = (flags & CASE_USEWORDS); |
| 139 | flags &= ~CASE_USEWORDS; |
| 140 | |
Jari Aalto | 3185942 | 2009-01-12 13:36:28 +0000 | [diff] [blame] | 141 | inword = 0; |
| 142 | while (start < end) |
| 143 | { |
| 144 | wc = cval (ret, start); |
| 145 | |
| 146 | if (iswalnum (wc) == 0) |
| 147 | { |
| 148 | inword = 0; |
Chet Ramey | ac50fba | 2014-02-26 09:36:43 -0500 | [diff] [blame] | 149 | #if 0 |
Jari Aalto | 3185942 | 2009-01-12 13:36:28 +0000 | [diff] [blame] | 150 | ADVANCE_CHAR (ret, end, start); |
| 151 | continue; |
Chet Ramey | ac50fba | 2014-02-26 09:36:43 -0500 | [diff] [blame] | 152 | #endif |
Jari Aalto | 3185942 | 2009-01-12 13:36:28 +0000 | [diff] [blame] | 153 | } |
| 154 | |
| 155 | if (pat) |
| 156 | { |
| 157 | next = start; |
| 158 | ADVANCE_CHAR (ret, end, next); |
| 159 | s = substring (ret, start, next); |
| 160 | match = strmatch (pat, s, FNM_EXTMATCH) != FNM_NOMATCH; |
| 161 | free (s); |
| 162 | if (match == 0) |
| 163 | { |
| 164 | start = next; |
| 165 | inword = 1; |
| 166 | continue; |
| 167 | } |
| 168 | } |
| 169 | |
Jari Aalto | 17345e5 | 2009-02-19 22:21:29 +0000 | [diff] [blame] | 170 | /* XXX - for now, the toggling operators work on the individual |
| 171 | words in the string, breaking on alphanumerics. Should I |
| 172 | leave the capitalization operators to do that also? */ |
Jari Aalto | 3185942 | 2009-01-12 13:36:28 +0000 | [diff] [blame] | 173 | if (flags == CASE_CAPITALIZE) |
| 174 | { |
Jari Aalto | 17345e5 | 2009-02-19 22:21:29 +0000 | [diff] [blame] | 175 | if (usewords) |
| 176 | nop = inword ? CASE_LOWER : CASE_UPPER; |
| 177 | else |
| 178 | nop = (start > 0) ? CASE_LOWER : CASE_UPPER; |
Jari Aalto | 3185942 | 2009-01-12 13:36:28 +0000 | [diff] [blame] | 179 | inword = 1; |
| 180 | } |
| 181 | else if (flags == CASE_UNCAP) |
| 182 | { |
Jari Aalto | 17345e5 | 2009-02-19 22:21:29 +0000 | [diff] [blame] | 183 | if (usewords) |
| 184 | nop = inword ? CASE_UPPER : CASE_LOWER; |
| 185 | else |
| 186 | nop = (start > 0) ? CASE_UPPER : CASE_LOWER; |
Jari Aalto | 3185942 | 2009-01-12 13:36:28 +0000 | [diff] [blame] | 187 | inword = 1; |
| 188 | } |
Jari Aalto | 17345e5 | 2009-02-19 22:21:29 +0000 | [diff] [blame] | 189 | else if (flags == CASE_UPFIRST) |
| 190 | { |
| 191 | if (usewords) |
| 192 | nop = inword ? CASE_NOOP : CASE_UPPER; |
| 193 | else |
| 194 | nop = (start > 0) ? CASE_NOOP : CASE_UPPER; |
| 195 | inword = 1; |
| 196 | } |
| 197 | else if (flags == CASE_LOWFIRST) |
| 198 | { |
| 199 | if (usewords) |
| 200 | nop = inword ? CASE_NOOP : CASE_LOWER; |
| 201 | else |
| 202 | nop = (start > 0) ? CASE_NOOP : CASE_LOWER; |
| 203 | inword = 1; |
| 204 | } |
Jari Aalto | 3185942 | 2009-01-12 13:36:28 +0000 | [diff] [blame] | 205 | else if (flags == CASE_TOGGLE) |
| 206 | { |
| 207 | nop = inword ? CASE_NOOP : CASE_TOGGLE; |
| 208 | inword = 1; |
| 209 | } |
| 210 | else |
| 211 | nop = flags; |
| 212 | |
Chet Ramey | ac50fba | 2014-02-26 09:36:43 -0500 | [diff] [blame] | 213 | /* Need to check UCHAR_MAX since wc may have already been converted to a |
| 214 | wide character by cval() */ |
| 215 | if (MB_CUR_MAX == 1 || (wc <= UCHAR_MAX && is_basic ((int)wc))) |
Jari Aalto | 3185942 | 2009-01-12 13:36:28 +0000 | [diff] [blame] | 216 | { |
Chet Ramey | ac50fba | 2014-02-26 09:36:43 -0500 | [diff] [blame] | 217 | singlebyte: |
Jari Aalto | 3185942 | 2009-01-12 13:36:28 +0000 | [diff] [blame] | 218 | switch (nop) |
| 219 | { |
| 220 | default: |
| 221 | case CASE_NOOP: nc = wc; break; |
| 222 | case CASE_UPPER: nc = TOUPPER (wc); break; |
| 223 | case CASE_LOWER: nc = TOLOWER (wc); break; |
| 224 | case CASE_TOGGLEALL: |
| 225 | case CASE_TOGGLE: nc = TOGGLE (wc); break; |
| 226 | } |
| 227 | ret[start] = nc; |
| 228 | } |
| 229 | #if defined (HANDLE_MULTIBYTE) |
| 230 | else |
| 231 | { |
Chet Ramey | 0001803 | 2011-11-21 20:51:19 -0500 | [diff] [blame] | 232 | m = mbrtowc (&wc, string + start, end - start, &state); |
| 233 | if (MB_INVALIDCH (m)) |
Chet Ramey | ac50fba | 2014-02-26 09:36:43 -0500 | [diff] [blame] | 234 | { |
| 235 | wc = (unsigned char)string[start]; |
| 236 | goto singlebyte; |
| 237 | } |
Chet Ramey | 0001803 | 2011-11-21 20:51:19 -0500 | [diff] [blame] | 238 | else if (MB_NULLWCH (m)) |
| 239 | wc = L'\0'; |
Jari Aalto | 3185942 | 2009-01-12 13:36:28 +0000 | [diff] [blame] | 240 | switch (nop) |
| 241 | { |
| 242 | default: |
| 243 | case CASE_NOOP: nwc = wc; break; |
Chet Ramey | ac50fba | 2014-02-26 09:36:43 -0500 | [diff] [blame] | 244 | case CASE_UPPER: nwc = _to_wupper (wc); break; |
| 245 | case CASE_LOWER: nwc = _to_wlower (wc); break; |
Jari Aalto | 3185942 | 2009-01-12 13:36:28 +0000 | [diff] [blame] | 246 | case CASE_TOGGLEALL: |
| 247 | case CASE_TOGGLE: nwc = TOGGLE (wc); break; |
| 248 | } |
| 249 | if (nwc != wc) /* just skip unchanged characters */ |
| 250 | { |
| 251 | mlen = wcrtomb (mb, nwc, &state); |
| 252 | if (mlen > 0) |
| 253 | mb[mlen] = '\0'; |
| 254 | /* Assume the same width */ |
| 255 | strncpy (ret + start, mb, mlen); |
| 256 | } |
| 257 | } |
| 258 | #endif |
| 259 | |
| 260 | /* This assumes that the upper and lower case versions are the same width. */ |
| 261 | ADVANCE_CHAR (ret, end, start); |
| 262 | } |
| 263 | |
| 264 | return ret; |
| 265 | } |