blob: 11a4d1b94cef98db63bd0d0c6b9fae368b7bce23 [file] [log] [blame]
Jari Aalto7117c2d2002-07-17 14:10:11 +00001/* xmbsrtowcs.c -- replacement function for mbsrtowcs */
2
Chet Rameyac50fba2014-02-26 09:36:43 -05003/* Copyright (C) 2002-2013 Free Software Foundation, Inc.
Jari Aalto7117c2d2002-07-17 14:10:11 +00004
5 This file is part of GNU Bash, the Bourne Again SHell.
6
Jari Aalto31859422009-01-12 13:36:28 +00007 Bash is free software: you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation, either version 3 of the License, or
10 (at your option) any later version.
Jari Aalto7117c2d2002-07-17 14:10:11 +000011
Jari Aalto31859422009-01-12 13:36:28 +000012 Bash is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
Jari Aalto7117c2d2002-07-17 14:10:11 +000016
Jari Aalto31859422009-01-12 13:36:28 +000017 You should have received a copy of the GNU General Public License
18 along with Bash. If not, see <http://www.gnu.org/licenses/>.
19*/
20
Chet Ramey495aee42011-11-22 19:11:26 -050021/* Ask for GNU extensions to get extern declaration for mbsnrtowcs if
22 available via glibc. */
23#ifndef _GNU_SOURCE
24# define _GNU_SOURCE 1
25#endif
26
Jari Aalto7117c2d2002-07-17 14:10:11 +000027#include <config.h>
28
29#include <bashansi.h>
30
31/* <wchar.h>, <wctype.h> and <stdlib.h> are included in "shmbutil.h".
32 If <wchar.h>, <wctype.h>, mbsrtowcs(), exist, HANDLE_MULTIBYTE
33 is defined as 1. */
34#include <shmbutil.h>
35
36#if HANDLE_MULTIBYTE
Jari Aalto31859422009-01-12 13:36:28 +000037
Chet Rameyd79bb402012-07-10 09:43:39 -040038#define WSBUF_INC 32
39
Jari Aalto31859422009-01-12 13:36:28 +000040#ifndef FREE
41# define FREE(x) do { if (x) free (x); } while (0)
42#endif
Chet Ramey495aee42011-11-22 19:11:26 -050043
44#if ! HAVE_STRCHRNUL
45extern char *strchrnul __P((const char *, int));
46#endif
47
Jari Aalto7117c2d2002-07-17 14:10:11 +000048/* On some locales (ex. ja_JP.sjis), mbsrtowc doesn't convert 0x5c to U<0x5c>.
49 So, this function is made for converting 0x5c to U<0x5c>. */
50
51static mbstate_t local_state;
52static int local_state_use = 0;
53
54size_t
55xmbsrtowcs (dest, src, len, pstate)
56 wchar_t *dest;
57 const char **src;
58 size_t len;
59 mbstate_t *pstate;
60{
61 mbstate_t *ps;
62 size_t mblength, wclength, n;
63
64 ps = pstate;
65 if (pstate == NULL)
66 {
67 if (!local_state_use)
68 {
69 memset (&local_state, '\0', sizeof(mbstate_t));
70 local_state_use = 1;
71 }
72 ps = &local_state;
73 }
74
Jari Aalto95732b42005-12-07 14:08:12 +000075 n = strlen (*src);
Jari Aalto7117c2d2002-07-17 14:10:11 +000076
77 if (dest == NULL)
78 {
79 wchar_t *wsbuf;
Jari Aaltob80f6442004-07-27 13:29:18 +000080 const char *mbs;
Jari Aalto7117c2d2002-07-17 14:10:11 +000081 mbstate_t psbuf;
82
Jari Aalto95732b42005-12-07 14:08:12 +000083 /* It doesn't matter if malloc fails here, since mbsrtowcs should do
84 the right thing with a NULL first argument. */
Jari Aalto7117c2d2002-07-17 14:10:11 +000085 wsbuf = (wchar_t *) malloc ((n + 1) * sizeof(wchar_t));
Jari Aaltob80f6442004-07-27 13:29:18 +000086 mbs = *src;
Jari Aalto7117c2d2002-07-17 14:10:11 +000087 psbuf = *ps;
88
Jari Aaltob80f6442004-07-27 13:29:18 +000089 wclength = mbsrtowcs (wsbuf, &mbs, n, &psbuf);
Jari Aalto7117c2d2002-07-17 14:10:11 +000090
Jari Aalto95732b42005-12-07 14:08:12 +000091 if (wsbuf)
92 free (wsbuf);
Jari Aalto7117c2d2002-07-17 14:10:11 +000093 return wclength;
94 }
95
Jari Aaltob80f6442004-07-27 13:29:18 +000096 for (wclength = 0; wclength < len; wclength++, dest++)
Jari Aalto7117c2d2002-07-17 14:10:11 +000097 {
Jari Aalto95732b42005-12-07 14:08:12 +000098 if (mbsinit(ps))
Jari Aalto7117c2d2002-07-17 14:10:11 +000099 {
100 if (**src == '\0')
101 {
102 *dest = L'\0';
103 *src = NULL;
104 return (wclength);
105 }
106 else if (**src == '\\')
107 {
108 *dest = L'\\';
109 mblength = 1;
110 }
111 else
112 mblength = mbrtowc(dest, *src, n, ps);
113 }
114 else
115 mblength = mbrtowc(dest, *src, n, ps);
116
117 /* Cannot convert multibyte character to wide character. */
118 if (mblength == (size_t)-1 || mblength == (size_t)-2)
119 return (size_t)-1;
120
121 *src += mblength;
122 n -= mblength;
123
124 /* The multibyte string has been completely converted,
125 including the terminating '\0'. */
126 if (*dest == L'\0')
127 {
128 *src = NULL;
129 break;
130 }
131 }
132
133 return (wclength);
134}
Jari Aaltob80f6442004-07-27 13:29:18 +0000135
Chet Ramey495aee42011-11-22 19:11:26 -0500136#if HAVE_MBSNRTOWCS
137/* Convert a multibyte string to a wide character string. Memory for the
138 new wide character string is obtained with malloc.
139
140 Fast multiple-character version of xdupmbstowcs used when the indices are
141 not required and mbsnrtowcs is available. */
142
143static size_t
144xdupmbstowcs2 (destp, src)
145 wchar_t **destp; /* Store the pointer to the wide character string */
146 const char *src; /* Multibyte character string */
147{
148 const char *p; /* Conversion start position of src */
149 wchar_t *wsbuf; /* Buffer for wide characters. */
150 size_t wsbuf_size; /* Size of WSBUF */
151 size_t wcnum; /* Number of wide characters in WSBUF */
152 mbstate_t state; /* Conversion State */
Chet Rameyd79bb402012-07-10 09:43:39 -0400153 size_t n, wcslength; /* Number of wide characters produced by the conversion. */
Chet Ramey495aee42011-11-22 19:11:26 -0500154 const char *end_or_backslash;
155 size_t nms; /* Number of multibyte characters to convert at one time. */
156 mbstate_t tmp_state;
157 const char *tmp_p;
158
159 memset (&state, '\0', sizeof(mbstate_t));
160
161 wsbuf_size = 0;
162 wsbuf = NULL;
163
164 p = src;
165 wcnum = 0;
166 do
167 {
168 end_or_backslash = strchrnul(p, '\\');
Chet Rameyac50fba2014-02-26 09:36:43 -0500169 nms = end_or_backslash - p;
Chet Ramey495aee42011-11-22 19:11:26 -0500170 if (*end_or_backslash == '\0')
171 nms++;
172
173 /* Compute the number of produced wide-characters. */
174 tmp_p = p;
175 tmp_state = state;
Chet Rameyd79bb402012-07-10 09:43:39 -0400176
177 if (nms == 0 && *p == '\\') /* special initial case */
178 nms = wcslength = 1;
179 else
180 wcslength = mbsnrtowcs (NULL, &tmp_p, nms, 0, &tmp_state);
181
182 if (wcslength == 0)
183 {
184 tmp_p = p; /* will need below */
185 tmp_state = state;
186 wcslength = 1; /* take a single byte */
187 }
Chet Ramey495aee42011-11-22 19:11:26 -0500188
189 /* Conversion failed. */
190 if (wcslength == (size_t)-1)
191 {
192 free (wsbuf);
193 *destp = NULL;
194 return (size_t)-1;
195 }
196
197 /* Resize the buffer if it is not large enough. */
198 if (wsbuf_size < wcnum+wcslength+1) /* 1 for the L'\0' or the potential L'\\' */
199 {
200 wchar_t *wstmp;
201
Chet Rameyd79bb402012-07-10 09:43:39 -0400202 while (wsbuf_size < wcnum+wcslength+1) /* 1 for the L'\0' or the potential L'\\' */
203 wsbuf_size += WSBUF_INC;
Chet Ramey495aee42011-11-22 19:11:26 -0500204
205 wstmp = (wchar_t *) realloc (wsbuf, wsbuf_size * sizeof (wchar_t));
206 if (wstmp == NULL)
207 {
208 free (wsbuf);
209 *destp = NULL;
210 return (size_t)-1;
211 }
212 wsbuf = wstmp;
213 }
214
215 /* Perform the conversion. This is assumed to return 'wcslength'.
Chet Rameyd79bb402012-07-10 09:43:39 -0400216 It may set 'p' to NULL. */
217 n = mbsnrtowcs(wsbuf+wcnum, &p, nms, wsbuf_size-wcnum, &state);
Chet Ramey495aee42011-11-22 19:11:26 -0500218
Chet Ramey4fd1af42013-03-07 15:21:45 -0500219 if (n == 0 && p == 0)
220 {
221 wsbuf[wcnum] = L'\0';
222 break;
223 }
224
Chet Rameyd79bb402012-07-10 09:43:39 -0400225 /* Compensate for taking single byte on wcs conversion failure above. */
226 if (wcslength == 1 && (n == 0 || n == (size_t)-1))
227 {
228 state = tmp_state;
229 p = tmp_p;
Chet Ramey4fd1af42013-03-07 15:21:45 -0500230 wsbuf[wcnum] = *p;
231 if (*p == 0)
232 break;
233 else
234 {
235 wcnum++; p++;
236 }
Chet Rameyd79bb402012-07-10 09:43:39 -0400237 }
238 else
239 wcnum += wcslength;
Chet Ramey495aee42011-11-22 19:11:26 -0500240
241 if (mbsinit (&state) && (p != NULL) && (*p == '\\'))
242 {
243 wsbuf[wcnum++] = L'\\';
244 p++;
245 }
246 }
247 while (p != NULL);
248
249 *destp = wsbuf;
250
251 /* Return the length of the wide character string, not including `\0'. */
252 return wcnum;
253}
254#endif /* HAVE_MBSNRTOWCS */
255
Jari Aaltob80f6442004-07-27 13:29:18 +0000256/* Convert a multibyte string to a wide character string. Memory for the
257 new wide character string is obtained with malloc.
258
259 The return value is the length of the wide character string. Returns a
260 pointer to the wide character string in DESTP. If INDICESP is not NULL,
261 INDICESP stores the pointer to the pointer array. Each pointer is to
262 the first byte of each multibyte character. Memory for the pointer array
263 is obtained with malloc, too.
264 If conversion is failed, the return value is (size_t)-1 and the values
265 of DESTP and INDICESP are NULL. */
266
Jari Aaltob80f6442004-07-27 13:29:18 +0000267size_t
268xdupmbstowcs (destp, indicesp, src)
269 wchar_t **destp; /* Store the pointer to the wide character string */
270 char ***indicesp; /* Store the pointer to the pointer array. */
271 const char *src; /* Multibyte character string */
272{
273 const char *p; /* Conversion start position of src */
274 wchar_t wc; /* Created wide character by conversion */
275 wchar_t *wsbuf; /* Buffer for wide characters. */
276 char **indices; /* Buffer for indices. */
277 size_t wsbuf_size; /* Size of WSBUF */
278 size_t wcnum; /* Number of wide characters in WSBUF */
279 mbstate_t state; /* Conversion State */
280
281 /* In case SRC or DESP is NULL, conversion doesn't take place. */
282 if (src == NULL || destp == NULL)
283 {
Jari Aalto06285672006-10-10 14:15:34 +0000284 if (destp)
285 *destp = NULL;
Chet Rameyac50fba2014-02-26 09:36:43 -0500286 if (indicesp)
287 *indicesp = NULL;
Jari Aaltob80f6442004-07-27 13:29:18 +0000288 return (size_t)-1;
289 }
290
Chet Ramey495aee42011-11-22 19:11:26 -0500291#if HAVE_MBSNRTOWCS
292 if (indicesp == NULL)
293 return (xdupmbstowcs2 (destp, src));
294#endif
295
Jari Aaltob80f6442004-07-27 13:29:18 +0000296 memset (&state, '\0', sizeof(mbstate_t));
297 wsbuf_size = WSBUF_INC;
298
299 wsbuf = (wchar_t *) malloc (wsbuf_size * sizeof(wchar_t));
300 if (wsbuf == NULL)
301 {
302 *destp = NULL;
Chet Rameyac50fba2014-02-26 09:36:43 -0500303 if (indicesp)
304 *indicesp = NULL;
Jari Aaltob80f6442004-07-27 13:29:18 +0000305 return (size_t)-1;
306 }
307
Jari Aalto31859422009-01-12 13:36:28 +0000308 indices = NULL;
309 if (indicesp)
Jari Aaltob80f6442004-07-27 13:29:18 +0000310 {
Jari Aalto31859422009-01-12 13:36:28 +0000311 indices = (char **) malloc (wsbuf_size * sizeof(char *));
312 if (indices == NULL)
313 {
314 free (wsbuf);
315 *destp = NULL;
Chet Rameyac50fba2014-02-26 09:36:43 -0500316 *indicesp = NULL;
Jari Aalto31859422009-01-12 13:36:28 +0000317 return (size_t)-1;
318 }
Jari Aaltob80f6442004-07-27 13:29:18 +0000319 }
320
321 p = src;
322 wcnum = 0;
Jari Aalto95732b42005-12-07 14:08:12 +0000323 do
324 {
Jari Aaltob80f6442004-07-27 13:29:18 +0000325 size_t mblength; /* Byte length of one multibyte character. */
326
Jari Aalto95732b42005-12-07 14:08:12 +0000327 if (mbsinit (&state))
Jari Aaltob80f6442004-07-27 13:29:18 +0000328 {
329 if (*p == '\0')
330 {
331 wc = L'\0';
332 mblength = 1;
333 }
334 else if (*p == '\\')
335 {
336 wc = L'\\';
337 mblength = 1;
338 }
339 else
340 mblength = mbrtowc(&wc, p, MB_LEN_MAX, &state);
341 }
342 else
343 mblength = mbrtowc(&wc, p, MB_LEN_MAX, &state);
344
345 /* Conversion failed. */
346 if (MB_INVALIDCH (mblength))
347 {
348 free (wsbuf);
Jari Aalto31859422009-01-12 13:36:28 +0000349 FREE (indices);
Jari Aaltob80f6442004-07-27 13:29:18 +0000350 *destp = NULL;
Chet Rameyac50fba2014-02-26 09:36:43 -0500351 if (indicesp)
352 *indicesp = NULL;
Jari Aaltob80f6442004-07-27 13:29:18 +0000353 return (size_t)-1;
354 }
355
356 ++wcnum;
357
358 /* Resize buffers when they are not large enough. */
359 if (wsbuf_size < wcnum)
360 {
361 wchar_t *wstmp;
362 char **idxtmp;
363
364 wsbuf_size += WSBUF_INC;
365
366 wstmp = (wchar_t *) realloc (wsbuf, wsbuf_size * sizeof (wchar_t));
367 if (wstmp == NULL)
368 {
369 free (wsbuf);
Jari Aalto31859422009-01-12 13:36:28 +0000370 FREE (indices);
Jari Aaltob80f6442004-07-27 13:29:18 +0000371 *destp = NULL;
Chet Rameyac50fba2014-02-26 09:36:43 -0500372 if (indicesp)
373 *indicesp = NULL;
Jari Aaltob80f6442004-07-27 13:29:18 +0000374 return (size_t)-1;
375 }
376 wsbuf = wstmp;
377
Jari Aalto31859422009-01-12 13:36:28 +0000378 if (indicesp)
Jari Aaltob80f6442004-07-27 13:29:18 +0000379 {
Chet Rameyac50fba2014-02-26 09:36:43 -0500380 idxtmp = (char **) realloc (indices, wsbuf_size * sizeof (char *));
Jari Aalto31859422009-01-12 13:36:28 +0000381 if (idxtmp == NULL)
382 {
383 free (wsbuf);
384 free (indices);
385 *destp = NULL;
Chet Rameyac50fba2014-02-26 09:36:43 -0500386 if (indicesp)
387 *indicesp = NULL;
Jari Aalto31859422009-01-12 13:36:28 +0000388 return (size_t)-1;
389 }
390 indices = idxtmp;
Jari Aaltob80f6442004-07-27 13:29:18 +0000391 }
Jari Aaltob80f6442004-07-27 13:29:18 +0000392 }
393
394 wsbuf[wcnum - 1] = wc;
Jari Aalto31859422009-01-12 13:36:28 +0000395 if (indices)
396 indices[wcnum - 1] = (char *)p;
Jari Aaltob80f6442004-07-27 13:29:18 +0000397 p += mblength;
Jari Aalto95732b42005-12-07 14:08:12 +0000398 }
399 while (MB_NULLWCH (wc) == 0);
Jari Aaltob80f6442004-07-27 13:29:18 +0000400
401 /* Return the length of the wide character string, not including `\0'. */
402 *destp = wsbuf;
403 if (indicesp != NULL)
404 *indicesp = indices;
Jari Aaltob80f6442004-07-27 13:29:18 +0000405
406 return (wcnum - 1);
407}
408
Jari Aalto7117c2d2002-07-17 14:10:11 +0000409#endif /* HANDLE_MULTIBYTE */