blob: a683177b0e5e602ab593686800f74f716483cf5c [file] [log] [blame]
David Lawrence Ramseyb54155c2005-01-12 03:25:57 +00001/**************************************************************************
Benno Schulenberg514cd9a2016-08-29 17:10:49 +02002 * chars.c -- This file is part of GNU nano. *
David Lawrence Ramseyb54155c2005-01-12 03:25:57 +00003 * *
Benno Schulenberg7a9f4a42014-04-30 20:18:26 +00004 * Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, *
5 * 2010, 2011, 2013, 2014 Free Software Foundation, Inc. *
Benno Schulenberg406e5242016-08-29 15:14:18 +02006 * Copyright (C) 2016 Benno Schulenberg *
7 * *
Benno Schulenberg514cd9a2016-08-29 17:10:49 +02008 * GNU nano is free software: you can redistribute it and/or modify *
9 * it under the terms of the GNU General Public License as published *
10 * by the Free Software Foundation, either version 3 of the License, *
11 * or (at your option) any later version. *
David Lawrence Ramseyb54155c2005-01-12 03:25:57 +000012 * *
Benno Schulenberg514cd9a2016-08-29 17:10:49 +020013 * GNU nano is distributed in the hope that it will be useful, *
14 * but WITHOUT ANY WARRANTY; without even the implied warranty *
15 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. *
16 * See the GNU General Public License for more details. *
David Lawrence Ramseyb54155c2005-01-12 03:25:57 +000017 * *
18 * You should have received a copy of the GNU General Public License *
Benno Schulenberg514cd9a2016-08-29 17:10:49 +020019 * along with this program. If not, see http://www.gnu.org/licenses/. *
David Lawrence Ramseyb54155c2005-01-12 03:25:57 +000020 * *
21 **************************************************************************/
22
David Lawrence Ramsey034b9942005-12-08 02:47:10 +000023#include "proto.h"
David Lawrence Ramseyb54155c2005-01-12 03:25:57 +000024
David Lawrence Ramsey5508cc52005-01-14 04:22:14 +000025#include <string.h>
David Lawrence Ramseyb54155c2005-01-12 03:25:57 +000026#include <ctype.h>
David Lawrence Ramseyb54155c2005-01-12 03:25:57 +000027
David Lawrence Ramsey7eb30a82005-07-17 02:40:07 +000028#ifdef ENABLE_UTF8
David Lawrence Ramsey42abfe02005-01-22 18:24:16 +000029#ifdef HAVE_WCHAR_H
David Lawrence Ramseyb54155c2005-01-12 03:25:57 +000030#include <wchar.h>
31#endif
David Lawrence Ramsey42abfe02005-01-22 18:24:16 +000032#ifdef HAVE_WCTYPE_H
David Lawrence Ramseyb54155c2005-01-12 03:25:57 +000033#include <wctype.h>
34#endif
David Lawrence Ramsey61f56732005-07-21 22:12:03 +000035
David Lawrence Ramsey4d72de72006-04-12 15:27:40 +000036static bool use_utf8 = FALSE;
37 /* Whether we've enabled UTF-8 support. */
David Lawrence Ramsey4d72de72006-04-12 15:27:40 +000038
39/* Enable UTF-8 support. */
40void utf8_init(void)
41{
42 use_utf8 = TRUE;
43}
44
45/* Is UTF-8 support enabled? */
46bool using_utf8(void)
47{
48 return use_utf8;
49}
Benno Schulenberg70b2d082014-04-05 20:28:29 +000050#endif /* ENABLE_UTF8 */
David Lawrence Ramseyb54155c2005-01-12 03:25:57 +000051
Benno Schulenberg3b216592016-05-24 10:34:40 +020052/* Concatenate two allocated strings, and free the second. */
Benno Schulenberg2163d962016-02-16 10:09:26 +000053char *addstrings(char* str1, size_t len1, char* str2, size_t len2)
Chris Allegretta82a41102014-05-29 18:30:23 +000054{
55 str1 = charealloc(str1, len1 + len2 + 1);
56 str1[len1] = '\0';
Benno Schulenberg3b216592016-05-24 10:34:40 +020057
Chris Allegretta82a41102014-05-29 18:30:23 +000058 strncat(&str1[len1], str2, len2);
59 free(str2);
60
61 return str1;
62}
63
David Lawrence Ramseyd8640482005-06-12 17:48:46 +000064#ifndef HAVE_ISBLANK
David Lawrence Ramseyb54155c2005-01-12 03:25:57 +000065/* This function is equivalent to isblank(). */
David Lawrence Ramsey1aee5cc2005-06-29 18:17:54 +000066bool nisblank(int c)
David Lawrence Ramseyb54155c2005-01-12 03:25:57 +000067{
David Lawrence Ramseyd8640482005-06-12 17:48:46 +000068 return isspace(c) && (c == '\t' || !is_cntrl_char(c));
David Lawrence Ramseyb54155c2005-01-12 03:25:57 +000069}
David Lawrence Ramseyd8640482005-06-12 17:48:46 +000070#endif
71
David Lawrence Ramsey7eb30a82005-07-17 02:40:07 +000072#if !defined(HAVE_ISWBLANK) && defined(ENABLE_UTF8)
David Lawrence Ramseyd8640482005-06-12 17:48:46 +000073/* This function is equivalent to iswblank(). */
David Lawrence Ramsey1aee5cc2005-06-29 18:17:54 +000074bool niswblank(wchar_t wc)
David Lawrence Ramseyd8640482005-06-12 17:48:46 +000075{
76 return iswspace(wc) && (wc == '\t' || !is_cntrl_wchar(wc));
77}
78#endif
David Lawrence Ramseyb54155c2005-01-12 03:25:57 +000079
Benno Schulenberg3b216592016-05-24 10:34:40 +020080/* Return TRUE if the value of c is in byte range, and FALSE otherwise. */
David Lawrence Ramsey356d2482005-06-13 19:51:56 +000081bool is_byte(int c)
82{
83 return ((unsigned int)c == (unsigned char)c);
84}
85
Benno Schulenberg9205c282015-09-04 19:34:55 +000086void mbtowc_reset(void)
87{
88 IGNORE_CALL_RESULT(mbtowc(NULL, NULL, 0));
89}
90
Benno Schulenberg20058a12016-08-02 22:09:22 +020091/* This function is equivalent to isalpha() for multibyte characters. */
92bool is_alpha_mbchar(const char *c)
93{
94 assert(c != NULL);
95
96#ifdef ENABLE_UTF8
97 if (use_utf8) {
98 wchar_t wc;
99
100 if (mbtowc(&wc, c, MB_CUR_MAX) < 0) {
101 mbtowc_reset();
102 return 0;
103 }
104
105 return iswalpha(wc);
106 } else
107#endif
108 return isalpha((unsigned char)*c);
109}
110
David Lawrence Ramsey2515ccc2005-06-15 06:04:08 +0000111/* This function is equivalent to isalnum() for multibyte characters. */
112bool is_alnum_mbchar(const char *c)
113{
114 assert(c != NULL);
115
David Lawrence Ramsey7eb30a82005-07-17 02:40:07 +0000116#ifdef ENABLE_UTF8
David Lawrence Ramsey4d72de72006-04-12 15:27:40 +0000117 if (use_utf8) {
David Lawrence Ramsey2515ccc2005-06-15 06:04:08 +0000118 wchar_t wc;
David Lawrence Ramsey2515ccc2005-06-15 06:04:08 +0000119
Benno Schulenberg9205c282015-09-04 19:34:55 +0000120 if (mbtowc(&wc, c, MB_CUR_MAX) < 0) {
121 mbtowc_reset();
Benno Schulenbergb6efea22016-06-05 21:49:29 +0200122 return 0;
Benno Schulenberg9205c282015-09-04 19:34:55 +0000123 }
David Lawrence Ramsey2515ccc2005-06-15 06:04:08 +0000124
125 return iswalnum(wc);
126 } else
127#endif
128 return isalnum((unsigned char)*c);
129}
130
David Lawrence Ramseyb54155c2005-01-12 03:25:57 +0000131/* This function is equivalent to isblank() for multibyte characters. */
132bool is_blank_mbchar(const char *c)
133{
134 assert(c != NULL);
135
David Lawrence Ramsey7eb30a82005-07-17 02:40:07 +0000136#ifdef ENABLE_UTF8
David Lawrence Ramsey4d72de72006-04-12 15:27:40 +0000137 if (use_utf8) {
David Lawrence Ramseyb54155c2005-01-12 03:25:57 +0000138 wchar_t wc;
David Lawrence Ramseyb54155c2005-01-12 03:25:57 +0000139
Benno Schulenberg9205c282015-09-04 19:34:55 +0000140 if (mbtowc(&wc, c, MB_CUR_MAX) < 0) {
141 mbtowc_reset();
Benno Schulenbergb6efea22016-06-05 21:49:29 +0200142 return 0;
Benno Schulenberg9205c282015-09-04 19:34:55 +0000143 }
David Lawrence Ramseyb54155c2005-01-12 03:25:57 +0000144
David Lawrence Ramseyd8640482005-06-12 17:48:46 +0000145 return iswblank(wc);
David Lawrence Ramseyb54155c2005-01-12 03:25:57 +0000146 } else
147#endif
David Lawrence Ramseyd8640482005-06-12 17:48:46 +0000148 return isblank((unsigned char)*c);
David Lawrence Ramseyb54155c2005-01-12 03:25:57 +0000149}
150
David Lawrence Ramsey8e341e12006-05-24 17:36:00 +0000151/* This function is equivalent to iscntrl(), except in that it only
152 * handles non-high-bit control characters. */
153bool is_ascii_cntrl_char(int c)
154{
155 return (0 <= c && c < 32);
156}
157
David Lawrence Ramseyb54155c2005-01-12 03:25:57 +0000158/* This function is equivalent to iscntrl(), except in that it also
David Lawrence Ramsey85ea1de2005-08-13 20:05:06 +0000159 * handles high-bit control characters. */
David Lawrence Ramsey65e6ecb2005-02-08 20:37:53 +0000160bool is_cntrl_char(int c)
David Lawrence Ramseyb54155c2005-01-12 03:25:57 +0000161{
Benno Schulenbergaf53c562016-06-29 20:48:04 +0200162 return ((c & 0x60) == 0 || c == 127);
David Lawrence Ramseyb54155c2005-01-12 03:25:57 +0000163}
164
165/* This function is equivalent to iscntrl() for multibyte characters,
166 * except in that it also handles multibyte control characters with
167 * their high bits set. */
168bool is_cntrl_mbchar(const char *c)
169{
David Lawrence Ramsey7eb30a82005-07-17 02:40:07 +0000170#ifdef ENABLE_UTF8
David Lawrence Ramsey4d72de72006-04-12 15:27:40 +0000171 if (use_utf8) {
Benno Schulenbergaf53c562016-06-29 20:48:04 +0200172 return ((c[0] & 0xE0) == 0 || c[0] == 127 ||
173 ((signed char)c[0] == -62 && (signed char)c[1] < -96));
David Lawrence Ramseyb54155c2005-01-12 03:25:57 +0000174 } else
175#endif
176 return is_cntrl_char((unsigned char)*c);
177}
178
David Lawrence Ramsey2515ccc2005-06-15 06:04:08 +0000179/* This function is equivalent to ispunct() for multibyte characters. */
180bool is_punct_mbchar(const char *c)
David Lawrence Ramsey67287082005-06-13 02:40:04 +0000181{
182 assert(c != NULL);
183
David Lawrence Ramsey7eb30a82005-07-17 02:40:07 +0000184#ifdef ENABLE_UTF8
David Lawrence Ramsey4d72de72006-04-12 15:27:40 +0000185 if (use_utf8) {
David Lawrence Ramsey67287082005-06-13 02:40:04 +0000186 wchar_t wc;
David Lawrence Ramsey67287082005-06-13 02:40:04 +0000187
Benno Schulenberg9205c282015-09-04 19:34:55 +0000188 if (mbtowc(&wc, c, MB_CUR_MAX) < 0) {
189 mbtowc_reset();
Benno Schulenbergb6efea22016-06-05 21:49:29 +0200190 return 0;
Benno Schulenberg9205c282015-09-04 19:34:55 +0000191 }
David Lawrence Ramsey67287082005-06-13 02:40:04 +0000192
David Lawrence Ramsey2515ccc2005-06-15 06:04:08 +0000193 return iswpunct(wc);
David Lawrence Ramsey67287082005-06-13 02:40:04 +0000194 } else
195#endif
David Lawrence Ramsey2515ccc2005-06-15 06:04:08 +0000196 return ispunct((unsigned char)*c);
197}
198
Benno Schulenberg6f129922016-06-30 18:02:45 +0200199/* Return TRUE when the given multibyte character c is a word-forming
200 * character (that is: alphanumeric, or specified in wordchars, or
201 * punctuation when allow_punct is TRUE), and FALSE otherwise. */
David Lawrence Ramsey2515ccc2005-06-15 06:04:08 +0000202bool is_word_mbchar(const char *c, bool allow_punct)
203{
Benno Schulenbergbf091be2016-07-21 09:46:47 +0200204 if (*c == '\0')
205 return FALSE;
206
Benno Schulenberg6f129922016-06-30 18:02:45 +0200207 if (is_alnum_mbchar(c))
208 return TRUE;
209
210 if (word_chars != NULL && *word_chars != '\0') {
Benno Schulenberg116d9e62016-12-15 21:30:30 +0100211 char symbol[mb_cur_max() + 1];
Benno Schulenberg6f129922016-06-30 18:02:45 +0200212 int symlen = parse_mbchar(c, symbol, NULL);
213
214 symbol[symlen] = '\0';
Benno Schulenberg116d9e62016-12-15 21:30:30 +0100215 return (strstr(word_chars, symbol) != NULL);
Benno Schulenberg6f129922016-06-30 18:02:45 +0200216 }
217
218 return (allow_punct && is_punct_mbchar(c));
David Lawrence Ramsey67287082005-06-13 02:40:04 +0000219}
220
Benno Schulenberg019d7b32016-06-29 20:40:22 +0200221/* Return the visible representation of control character c. */
Benno Schulenberg03586c62016-05-30 11:28:16 +0200222char control_rep(const signed char c)
David Lawrence Ramseyb54155c2005-01-12 03:25:57 +0000223{
Benno Schulenbergeafae5d2016-12-18 09:40:09 +0100224 if (c == DEL_CODE)
David Lawrence Ramseyb54155c2005-01-12 03:25:57 +0000225 return '?';
Benno Schulenberg03586c62016-05-30 11:28:16 +0200226 else if (c == -97)
227 return '=';
228 else if (c < 0)
229 return c + 224;
David Lawrence Ramseyb54155c2005-01-12 03:25:57 +0000230 else
231 return c + 64;
232}
233
Benno Schulenberg622995f2016-06-29 20:37:28 +0200234/* Return the visible representation of multibyte control character c. */
Benno Schulenbergeafae5d2016-12-18 09:40:09 +0100235char control_mbrep(const char *c, bool isdata)
David Lawrence Ramseyb54155c2005-01-12 03:25:57 +0000236{
Benno Schulenbergeef7d102016-12-20 19:27:41 +0100237 /* An embedded newline is an encoded NUL if it is data. */
238 if (*c == '\n' && (isdata || as_an_at))
Benno Schulenbergeafae5d2016-12-18 09:40:09 +0100239 return '@';
240
David Lawrence Ramsey7eb30a82005-07-17 02:40:07 +0000241#ifdef ENABLE_UTF8
David Lawrence Ramsey4d72de72006-04-12 15:27:40 +0000242 if (use_utf8) {
Benno Schulenberg86a64b12016-07-01 11:47:15 +0200243 if ((unsigned char)c[0] < 128)
Benno Schulenberg622995f2016-06-29 20:37:28 +0200244 return control_rep(c[0]);
Benno Schulenberg03586c62016-05-30 11:28:16 +0200245 else
Benno Schulenberg622995f2016-06-29 20:37:28 +0200246 return control_rep(c[1]);
Benno Schulenberg3b216592016-05-24 10:34:40 +0200247 } else
David Lawrence Ramseyb54155c2005-01-12 03:25:57 +0000248#endif
Benno Schulenberg622995f2016-06-29 20:37:28 +0200249 return control_rep(*c);
David Lawrence Ramseyb54155c2005-01-12 03:25:57 +0000250}
251
Benno Schulenberg08945872016-06-06 12:48:26 +0200252/* Assess how many bytes the given (multibyte) character occupies. Return -1
253 * if the byte sequence is invalid, and return the number of bytes minus 8
Benno Schulenberge33a0b62016-06-06 13:20:04 +0200254 * when it encodes an invalid codepoint. Also, in the second parameter,
255 * return the number of columns that the character occupies. */
256int length_of_char(const char *c, int *width)
David Lawrence Ramsey96452cb2005-07-26 06:13:45 +0000257{
Benno Schulenberg08945872016-06-06 12:48:26 +0200258 assert(c != NULL);
David Lawrence Ramsey96452cb2005-07-26 06:13:45 +0000259
260#ifdef ENABLE_UTF8
David Lawrence Ramsey4d72de72006-04-12 15:27:40 +0000261 if (use_utf8) {
David Lawrence Ramsey96452cb2005-07-26 06:13:45 +0000262 wchar_t wc;
Benno Schulenberg08945872016-06-06 12:48:26 +0200263 int charlen = mbtowc(&wc, c, MB_CUR_MAX);
David Lawrence Ramsey96452cb2005-07-26 06:13:45 +0000264
Benno Schulenberg08945872016-06-06 12:48:26 +0200265 /* If the sequence is invalid... */
266 if (charlen < 0) {
Benno Schulenberg9205c282015-09-04 19:34:55 +0000267 mbtowc_reset();
Benno Schulenberg08945872016-06-06 12:48:26 +0200268 return -1;
David Lawrence Ramsey96452cb2005-07-26 06:13:45 +0000269 }
Benno Schulenberg08945872016-06-06 12:48:26 +0200270
271 /* If the codepoint is invalid... */
272 if (!is_valid_unicode(wc))
273 return charlen - 8;
Benno Schulenberge33a0b62016-06-06 13:20:04 +0200274 else {
275 *width = wcwidth(wc);
276 /* If the codepoint is unassigned, assume a width of one. */
277 if (*width < 0)
278 *width = 1;
Benno Schulenberg08945872016-06-06 12:48:26 +0200279 return charlen;
Benno Schulenberge33a0b62016-06-06 13:20:04 +0200280 }
Benno Schulenberg3b216592016-05-24 10:34:40 +0200281 } else
David Lawrence Ramsey96452cb2005-07-26 06:13:45 +0000282#endif
Benno Schulenberg08945872016-06-06 12:48:26 +0200283 return 1;
David Lawrence Ramsey96452cb2005-07-26 06:13:45 +0000284}
285
David Lawrence Ramseyb54155c2005-01-12 03:25:57 +0000286/* This function is equivalent to wcwidth() for multibyte characters. */
287int mbwidth(const char *c)
288{
289 assert(c != NULL);
290
David Lawrence Ramsey7eb30a82005-07-17 02:40:07 +0000291#ifdef ENABLE_UTF8
David Lawrence Ramsey4d72de72006-04-12 15:27:40 +0000292 if (use_utf8) {
David Lawrence Ramseyb54155c2005-01-12 03:25:57 +0000293 wchar_t wc;
David Lawrence Ramsey61f56732005-07-21 22:12:03 +0000294 int width;
David Lawrence Ramseyb54155c2005-01-12 03:25:57 +0000295
Benno Schulenberg9205c282015-09-04 19:34:55 +0000296 if (mbtowc(&wc, c, MB_CUR_MAX) < 0) {
297 mbtowc_reset();
Benno Schulenberg8686cb32016-06-05 21:42:27 +0200298 return 1;
Benno Schulenberg9205c282015-09-04 19:34:55 +0000299 }
David Lawrence Ramseyb54155c2005-01-12 03:25:57 +0000300
301 width = wcwidth(wc);
David Lawrence Ramsey6209e0e2005-06-14 02:08:25 +0000302
Benno Schulenberg8686cb32016-06-05 21:42:27 +0200303 if (width == -1)
304 return 1;
David Lawrence Ramseyb54155c2005-01-12 03:25:57 +0000305
306 return width;
307 } else
308#endif
309 return 1;
310}
311
Benno Schulenbergc5f49162016-12-15 19:46:16 +0100312/* Return the maximum length (in bytes) of a character. */
David Lawrence Ramseyb54155c2005-01-12 03:25:57 +0000313int mb_cur_max(void)
314{
David Lawrence Ramsey7eb30a82005-07-17 02:40:07 +0000315#ifdef ENABLE_UTF8
Benno Schulenbergc5f49162016-12-15 19:46:16 +0100316 if (use_utf8)
317 return MB_CUR_MAX;
318 else
David Lawrence Ramseyb54155c2005-01-12 03:25:57 +0000319#endif
Benno Schulenbergc5f49162016-12-15 19:46:16 +0100320 return 1;
David Lawrence Ramseyb54155c2005-01-12 03:25:57 +0000321}
322
Benno Schulenbergc5f49162016-12-15 19:46:16 +0100323/* Convert the Unicode value in chr to a multibyte character, if possible.
324 * If the conversion succeeds, return the (dynamically allocated) multibyte
325 * character and its length. Otherwise, return an undefined (dynamically
326 * allocated) multibyte character and a length of zero. */
David Lawrence Ramsey8b006c22005-08-08 23:03:25 +0000327char *make_mbchar(long chr, int *chr_mb_len)
David Lawrence Ramseyb54155c2005-01-12 03:25:57 +0000328{
David Lawrence Ramseyf0195a82005-03-14 18:47:21 +0000329 char *chr_mb;
330
David Lawrence Ramsey7eb30a82005-07-17 02:40:07 +0000331#ifdef ENABLE_UTF8
David Lawrence Ramsey4d72de72006-04-12 15:27:40 +0000332 if (use_utf8) {
David Lawrence Ramseyf0195a82005-03-14 18:47:21 +0000333 chr_mb = charalloc(MB_CUR_MAX);
David Lawrence Ramsey6ff695c2005-08-05 03:14:29 +0000334 *chr_mb_len = wctomb(chr_mb, (wchar_t)chr);
David Lawrence Ramseyb54155c2005-01-12 03:25:57 +0000335
David Lawrence Ramsey6ff695c2005-08-05 03:14:29 +0000336 /* Reject invalid Unicode characters. */
Benno Schulenberg9205c282015-09-04 19:34:55 +0000337 if (*chr_mb_len < 0 || !is_valid_unicode((wchar_t)chr)) {
Benno Schulenberg9765c2f2016-12-15 19:28:43 +0100338 IGNORE_CALL_RESULT(wctomb(NULL, 0));
David Lawrence Ramsey4c6956b2005-01-12 04:32:43 +0000339 *chr_mb_len = 0;
Benno Schulenberg9205c282015-09-04 19:34:55 +0000340 }
Benno Schulenberg3b216592016-05-24 10:34:40 +0200341 } else
David Lawrence Ramseyb54155c2005-01-12 03:25:57 +0000342#endif
Benno Schulenberg3b216592016-05-24 10:34:40 +0200343 {
David Lawrence Ramseyb54155c2005-01-12 03:25:57 +0000344 *chr_mb_len = 1;
David Lawrence Ramsey6a0d5b82005-06-13 14:00:22 +0000345 chr_mb = mallocstrncpy(NULL, (char *)&chr, 1);
David Lawrence Ramseyb54155c2005-01-12 03:25:57 +0000346 }
David Lawrence Ramseyb54155c2005-01-12 03:25:57 +0000347
348 return chr_mb;
349}
350
351/* Parse a multibyte character from buf. Return the number of bytes
352 * used. If chr isn't NULL, store the multibyte character in it. If
Benno Schulenbergc5f49162016-12-15 19:46:16 +0100353 * col isn't NULL, add the character's width (in columns) to it. */
David Lawrence Ramsey96452cb2005-07-26 06:13:45 +0000354int parse_mbchar(const char *buf, char *chr, size_t *col)
David Lawrence Ramseyb54155c2005-01-12 03:25:57 +0000355{
Benno Schulenbergfc101a62016-12-15 15:50:07 +0100356 int length;
David Lawrence Ramseyb54155c2005-01-12 03:25:57 +0000357
358 assert(buf != NULL);
359
David Lawrence Ramsey7eb30a82005-07-17 02:40:07 +0000360#ifdef ENABLE_UTF8
David Lawrence Ramsey4d72de72006-04-12 15:27:40 +0000361 if (use_utf8) {
David Lawrence Ramseyb54155c2005-01-12 03:25:57 +0000362 /* Get the number of bytes in the multibyte character. */
Benno Schulenbergfc101a62016-12-15 15:50:07 +0100363 length = mblen(buf, MB_CUR_MAX);
David Lawrence Ramseyb54155c2005-01-12 03:25:57 +0000364
Benno Schulenbergb42887f2016-05-29 21:45:52 +0200365 /* When the multibyte sequence is invalid, only take the first byte. */
Benno Schulenberg85ebe972016-12-15 16:45:26 +0100366 if (length <= 0) {
Chris Allegrettaa97cb812009-12-02 03:24:18 +0000367 IGNORE_CALL_RESULT(mblen(NULL, 0));
Benno Schulenbergfc101a62016-12-15 15:50:07 +0100368 length = 1;
Benno Schulenberg85ebe972016-12-15 16:45:26 +0100369 }
David Lawrence Ramseyb54155c2005-01-12 03:25:57 +0000370
Benno Schulenbergb42887f2016-05-29 21:45:52 +0200371 /* When requested, store the multibyte character in chr. */
David Lawrence Ramseyb54155c2005-01-12 03:25:57 +0000372 if (chr != NULL) {
373 int i;
David Lawrence Ramseye0fb4d52005-03-11 04:03:32 +0000374
Benno Schulenbergfc101a62016-12-15 15:50:07 +0100375 for (i = 0; i < length; i++)
David Lawrence Ramseyb54155c2005-01-12 03:25:57 +0000376 chr[i] = buf[i];
377 }
378
Benno Schulenbergc5f49162016-12-15 19:46:16 +0100379 /* When requested, add the width of the character to col. */
David Lawrence Ramseyb54155c2005-01-12 03:25:57 +0000380 if (col != NULL) {
Benno Schulenbergc5f49162016-12-15 19:46:16 +0100381 /* If we have a tab, compute its width in columns based on the
David Lawrence Ramseyb54155c2005-01-12 03:25:57 +0000382 * current value of col. */
383 if (*buf == '\t')
384 *col += tabsize - *col % tabsize;
Benno Schulenberg41722682016-05-28 15:56:16 +0200385 /* If we have a control character, it's two columns wide: one
386 * column for the "^", and one for the visible character. */
David Lawrence Ramseyb54155c2005-01-12 03:25:57 +0000387 else if (is_cntrl_mbchar(buf)) {
Benno Schulenberg41722682016-05-28 15:56:16 +0200388 *col += 2;
Benno Schulenbergc5f49162016-12-15 19:46:16 +0100389 /* If we have a normal character, get its width normally. */
David Lawrence Ramseyb54155c2005-01-12 03:25:57 +0000390 } else
391 *col += mbwidth(buf);
392 }
Benno Schulenberg3b216592016-05-24 10:34:40 +0200393 } else
David Lawrence Ramseyb54155c2005-01-12 03:25:57 +0000394#endif
Benno Schulenberg3b216592016-05-24 10:34:40 +0200395 {
Benno Schulenbergb42887f2016-05-29 21:45:52 +0200396 /* A byte character is one byte long. */
Benno Schulenbergfc101a62016-12-15 15:50:07 +0100397 length = 1;
David Lawrence Ramseyb54155c2005-01-12 03:25:57 +0000398
Benno Schulenbergb42887f2016-05-29 21:45:52 +0200399 /* When requested, store the byte character in chr. */
David Lawrence Ramseyb54155c2005-01-12 03:25:57 +0000400 if (chr != NULL)
401 *chr = *buf;
402
Benno Schulenbergc5f49162016-12-15 19:46:16 +0100403 /* When requested, add the width of the character to col. */
David Lawrence Ramseyb54155c2005-01-12 03:25:57 +0000404 if (col != NULL) {
Benno Schulenbergc5f49162016-12-15 19:46:16 +0100405 /* If we have a tab, compute its width in columns using the
David Lawrence Ramseyb54155c2005-01-12 03:25:57 +0000406 * current value of col. */
407 if (*buf == '\t')
408 *col += tabsize - *col % tabsize;
Benno Schulenberga9f79a62016-05-28 15:40:39 +0200409 /* If we have a control character, it's two columns wide: one
410 * column for the "^", and one for the visible character. */
David Lawrence Ramseyb54155c2005-01-12 03:25:57 +0000411 else if (is_cntrl_char((unsigned char)*buf))
412 *col += 2;
413 /* If we have a normal character, it's one column wide. */
414 else
415 (*col)++;
416 }
David Lawrence Ramseyb54155c2005-01-12 03:25:57 +0000417 }
David Lawrence Ramseyb54155c2005-01-12 03:25:57 +0000418
Benno Schulenbergfc101a62016-12-15 15:50:07 +0100419 return length;
David Lawrence Ramseyb54155c2005-01-12 03:25:57 +0000420}
David Lawrence Ramseyd24fbb72005-01-14 21:50:32 +0000421
422/* Return the index in buf of the beginning of the multibyte character
423 * before the one at pos. */
424size_t move_mbleft(const char *buf, size_t pos)
425{
Benno Schulenberg76e7aaf2015-03-22 11:20:02 +0000426 size_t before, char_len = 0;
David Lawrence Ramseyd24fbb72005-01-14 21:50:32 +0000427
David Lawrence Ramsey263b4472005-03-23 05:56:11 +0000428 assert(buf != NULL && pos <= strlen(buf));
David Lawrence Ramseyd24fbb72005-01-14 21:50:32 +0000429
430 /* There is no library function to move backward one multibyte
Benno Schulenberg76e7aaf2015-03-22 11:20:02 +0000431 * character. So we just start groping for one at the farthest
432 * possible point. */
433 if (mb_cur_max() > pos)
434 before = 0;
435 else
436 before = pos - mb_cur_max();
437
Benno Schulenbergff8454a2014-04-14 20:42:10 +0000438 while (before < pos) {
439 char_len = parse_mbchar(buf + before, NULL, NULL);
440 before += char_len;
David Lawrence Ramseyd24fbb72005-01-14 21:50:32 +0000441 }
442
Benno Schulenbergff8454a2014-04-14 20:42:10 +0000443 return before - char_len;
David Lawrence Ramseyd24fbb72005-01-14 21:50:32 +0000444}
445
446/* Return the index in buf of the beginning of the multibyte character
447 * after the one at pos. */
448size_t move_mbright(const char *buf, size_t pos)
449{
David Lawrence Ramsey96452cb2005-07-26 06:13:45 +0000450 return pos + parse_mbchar(buf + pos, NULL, NULL);
David Lawrence Ramseyd24fbb72005-01-14 21:50:32 +0000451}
David Lawrence Ramsey3a1fc8f2005-01-16 18:49:19 +0000452
453#ifndef HAVE_STRCASECMP
454/* This function is equivalent to strcasecmp(). */
455int nstrcasecmp(const char *s1, const char *s2)
456{
Benno Schulenberg56f067a2016-06-01 21:56:38 +0200457 return strncasecmp(s1, s2, HIGHEST_POSITIVE);
David Lawrence Ramsey3a1fc8f2005-01-16 18:49:19 +0000458}
459#endif
460
461/* This function is equivalent to strcasecmp() for multibyte strings. */
462int mbstrcasecmp(const char *s1, const char *s2)
463{
Benno Schulenberg56f067a2016-06-01 21:56:38 +0200464 return mbstrncasecmp(s1, s2, HIGHEST_POSITIVE);
David Lawrence Ramsey3a1fc8f2005-01-16 18:49:19 +0000465}
466
467#ifndef HAVE_STRNCASECMP
468/* This function is equivalent to strncasecmp(). */
469int nstrncasecmp(const char *s1, const char *s2, size_t n)
470{
David Lawrence Ramsey444f8022007-07-01 21:46:00 +0000471 if (s1 == s2)
472 return 0;
473
David Lawrence Ramsey10442332007-07-01 21:17:05 +0000474 for (; *s1 != '\0' && *s2 != '\0' && n > 0; s1++, s2++, n--) {
David Lawrence Ramsey3a1fc8f2005-01-16 18:49:19 +0000475 if (tolower(*s1) != tolower(*s2))
476 break;
477 }
478
David Lawrence Ramsey89ae4a32006-10-08 15:21:23 +0000479 return (n > 0) ? tolower(*s1) - tolower(*s2) : 0;
David Lawrence Ramsey3a1fc8f2005-01-16 18:49:19 +0000480}
481#endif
482
Benno Schulenberg3b216592016-05-24 10:34:40 +0200483/* This function is equivalent to strncasecmp() for multibyte strings. */
David Lawrence Ramsey3a1fc8f2005-01-16 18:49:19 +0000484int mbstrncasecmp(const char *s1, const char *s2, size_t n)
485{
David Lawrence Ramsey7eb30a82005-07-17 02:40:07 +0000486#ifdef ENABLE_UTF8
David Lawrence Ramsey4d72de72006-04-12 15:27:40 +0000487 if (use_utf8) {
Benno Schulenberga1511672016-05-24 16:49:00 +0200488 wchar_t wc1, wc2;
David Lawrence Ramsey3a1fc8f2005-01-16 18:49:19 +0000489
Benno Schulenbergb3059112016-07-27 22:03:48 +0200490 while (*s1 != '\0' && *s2 != '\0' && n > 0) {
Benno Schulenberg1bffa172016-05-24 17:19:22 +0200491 bool bad1 = FALSE, bad2 = FALSE;
David Lawrence Ramsey42abfe02005-01-22 18:24:16 +0000492
Benno Schulenberga5b3f002016-05-24 21:45:22 +0200493 if (mbtowc(&wc1, s1, MB_CUR_MAX) < 0) {
Benno Schulenberg9205c282015-09-04 19:34:55 +0000494 mbtowc_reset();
Benno Schulenberg1bffa172016-05-24 17:19:22 +0200495 bad1 = TRUE;
David Lawrence Ramsey3a1fc8f2005-01-16 18:49:19 +0000496 }
497
Benno Schulenberga5b3f002016-05-24 21:45:22 +0200498 if (mbtowc(&wc2, s2, MB_CUR_MAX) < 0) {
Benno Schulenberg9205c282015-09-04 19:34:55 +0000499 mbtowc_reset();
Benno Schulenberg1bffa172016-05-24 17:19:22 +0200500 bad2 = TRUE;
David Lawrence Ramsey3a1fc8f2005-01-16 18:49:19 +0000501 }
502
Benno Schulenbergd80109d2016-07-27 22:15:34 +0200503 if (bad1 || bad2) {
504 if (*s1 != *s2)
505 return (unsigned char)*s1 - (unsigned char)*s2;
506
Benno Schulenberge38e2c62016-08-06 10:34:38 +0200507 if (bad1 != bad2)
508 return (bad1 ? 1 : -1);
Benno Schulenberg370406b2016-08-06 11:06:29 +0200509 } else {
510 int difference = towlower(wc1) - towlower(wc2);
Benno Schulenberge38e2c62016-08-06 10:34:38 +0200511
Benno Schulenberg370406b2016-08-06 11:06:29 +0200512 if (difference != 0)
513 return difference;
Benno Schulenbergd80109d2016-07-27 22:15:34 +0200514 }
515
Benno Schulenbergb3059112016-07-27 22:03:48 +0200516 s1 += move_mbright(s1, 0);
517 s2 += move_mbright(s2, 0);
518 n--;
David Lawrence Ramsey3a1fc8f2005-01-16 18:49:19 +0000519 }
520
Benno Schulenbergd80109d2016-07-27 22:15:34 +0200521 return (n > 0) ? ((unsigned char)*s1 - (unsigned char)*s2) : 0;
David Lawrence Ramsey3a1fc8f2005-01-16 18:49:19 +0000522 } else
523#endif
David Lawrence Ramseyadc30a82005-03-20 07:24:49 +0000524 return strncasecmp(s1, s2, n);
David Lawrence Ramsey3a1fc8f2005-01-16 18:49:19 +0000525}
526
527#ifndef HAVE_STRCASESTR
David Lawrence Ramsey3f7c8c52005-11-14 22:20:35 +0000528/* This function is equivalent to strcasestr(). */
David Lawrence Ramseyae46b912007-07-06 13:44:13 +0000529char *nstrcasestr(const char *haystack, const char *needle)
David Lawrence Ramsey3a1fc8f2005-01-16 18:49:19 +0000530{
Benno Schulenbergc8bc05b2016-08-05 17:16:37 +0200531 size_t needle_len;
David Lawrence Ramsey9276f4d2007-07-10 22:54:58 +0000532
David Lawrence Ramsey36536672007-07-09 22:57:07 +0000533 if (*needle == '\0')
David Lawrence Ramseyae46b912007-07-06 13:44:13 +0000534 return (char *)haystack;
David Lawrence Ramsey71058302007-07-02 15:45:13 +0000535
David Lawrence Ramsey9276f4d2007-07-10 22:54:58 +0000536 needle_len = strlen(needle);
David Lawrence Ramsey3a1fc8f2005-01-16 18:49:19 +0000537
Benno Schulenbergc8bc05b2016-08-05 17:16:37 +0200538 while (*haystack != '\0') {
David Lawrence Ramsey9276f4d2007-07-10 22:54:58 +0000539 if (strncasecmp(haystack, needle, needle_len) == 0)
David Lawrence Ramseyae46b912007-07-06 13:44:13 +0000540 return (char *)haystack;
Benno Schulenbergc8bc05b2016-08-05 17:16:37 +0200541
542 haystack++;
David Lawrence Ramsey3a1fc8f2005-01-16 18:49:19 +0000543 }
544
545 return NULL;
546}
547#endif
548
David Lawrence Ramsey42abfe02005-01-22 18:24:16 +0000549/* This function is equivalent to strcasestr() for multibyte strings. */
David Lawrence Ramseyae46b912007-07-06 13:44:13 +0000550char *mbstrcasestr(const char *haystack, const char *needle)
David Lawrence Ramsey42abfe02005-01-22 18:24:16 +0000551{
David Lawrence Ramsey7eb30a82005-07-17 02:40:07 +0000552#ifdef ENABLE_UTF8
David Lawrence Ramsey4d72de72006-04-12 15:27:40 +0000553 if (use_utf8) {
Benno Schulenbergc8bc05b2016-08-05 17:16:37 +0200554 size_t needle_len;
David Lawrence Ramsey42abfe02005-01-22 18:24:16 +0000555
David Lawrence Ramsey36536672007-07-09 22:57:07 +0000556 if (*needle == '\0')
David Lawrence Ramseyae46b912007-07-06 13:44:13 +0000557 return (char *)haystack;
David Lawrence Ramsey71058302007-07-02 15:45:13 +0000558
David Lawrence Ramsey9276f4d2007-07-10 22:54:58 +0000559 needle_len = mbstrlen(needle);
David Lawrence Ramsey118cb372006-07-22 16:45:11 +0000560
Benno Schulenbergc8bc05b2016-08-05 17:16:37 +0200561 while (*haystack != '\0') {
Benno Schulenberg85844ee2016-08-06 10:47:22 +0200562 if (mbstrncasecmp(haystack, needle, needle_len) == 0)
David Lawrence Ramsey9276f4d2007-07-10 22:54:58 +0000563 return (char *)haystack;
Benno Schulenbergc8bc05b2016-08-05 17:16:37 +0200564
565 haystack += move_mbright(haystack, 0);
David Lawrence Ramsey42abfe02005-01-22 18:24:16 +0000566 }
567
David Lawrence Ramsey9276f4d2007-07-10 22:54:58 +0000568 return NULL;
David Lawrence Ramsey42abfe02005-01-22 18:24:16 +0000569 } else
570#endif
Chris Allegretta3459e4f2011-02-24 02:47:25 +0000571 return (char *) strcasestr(haystack, needle);
David Lawrence Ramsey42abfe02005-01-22 18:24:16 +0000572}
573
David Lawrence Ramsey21954762005-01-18 17:00:00 +0000574/* This function is equivalent to strstr(), except in that it scans the
David Lawrence Ramsey3ee4cf32005-01-22 20:49:14 +0000575 * string in reverse, starting at rev_start. */
Benno Schulenbergcd705a72016-12-18 21:45:47 +0100576char *revstrstr(const char *haystack, const char *needle,
577 const char *pointer)
David Lawrence Ramsey21954762005-01-18 17:00:00 +0000578{
Benno Schulenbergcd705a72016-12-18 21:45:47 +0100579 size_t needle_len = strlen(needle);
580 size_t tail_len = strlen(pointer);
David Lawrence Ramsey9276f4d2007-07-10 22:54:58 +0000581
Benno Schulenbergcd705a72016-12-18 21:45:47 +0100582 if (needle_len == 0)
583 return (char *)pointer;
David Lawrence Ramsey21954762005-01-18 17:00:00 +0000584
David Lawrence Ramsey9276f4d2007-07-10 22:54:58 +0000585 if (strlen(haystack) < needle_len)
586 return NULL;
David Lawrence Ramsey21954762005-01-18 17:00:00 +0000587
Benno Schulenbergcd705a72016-12-18 21:45:47 +0100588 if (tail_len < needle_len)
589 pointer += tail_len - needle_len;
David Lawrence Ramsey9276f4d2007-07-10 22:54:58 +0000590
Benno Schulenbergcd705a72016-12-18 21:45:47 +0100591 while (pointer >= haystack) {
592 if (strncmp(pointer, needle, needle_len) == 0)
593 return (char *)pointer;
594 pointer--;
David Lawrence Ramsey21954762005-01-18 17:00:00 +0000595 }
596
597 return NULL;
598}
599
600/* This function is equivalent to strcasestr(), except in that it scans
David Lawrence Ramsey3ee4cf32005-01-22 20:49:14 +0000601 * the string in reverse, starting at rev_start. */
David Lawrence Ramseyae46b912007-07-06 13:44:13 +0000602char *revstrcasestr(const char *haystack, const char *needle, const char
603 *rev_start)
David Lawrence Ramsey3a1fc8f2005-01-16 18:49:19 +0000604{
David Lawrence Ramsey9276f4d2007-07-10 22:54:58 +0000605 size_t rev_start_len, needle_len;
606
David Lawrence Ramsey36536672007-07-09 22:57:07 +0000607 if (*needle == '\0')
David Lawrence Ramseyae46b912007-07-06 13:44:13 +0000608 return (char *)rev_start;
David Lawrence Ramsey71058302007-07-02 15:45:13 +0000609
David Lawrence Ramsey9276f4d2007-07-10 22:54:58 +0000610 needle_len = strlen(needle);
David Lawrence Ramsey3a1fc8f2005-01-16 18:49:19 +0000611
David Lawrence Ramsey9276f4d2007-07-10 22:54:58 +0000612 if (strlen(haystack) < needle_len)
613 return NULL;
David Lawrence Ramsey3a1fc8f2005-01-16 18:49:19 +0000614
David Lawrence Ramsey9276f4d2007-07-10 22:54:58 +0000615 rev_start_len = strlen(rev_start);
616
617 for (; rev_start >= haystack; rev_start--, rev_start_len++) {
618 if (rev_start_len >= needle_len && strncasecmp(rev_start,
619 needle, needle_len) == 0)
David Lawrence Ramseyae46b912007-07-06 13:44:13 +0000620 return (char *)rev_start;
David Lawrence Ramsey3a1fc8f2005-01-16 18:49:19 +0000621 }
622
623 return NULL;
624}
David Lawrence Ramsey345260c2005-01-24 01:14:17 +0000625
626/* This function is equivalent to strcasestr() for multibyte strings,
Benno Schulenberg3b216592016-05-24 10:34:40 +0200627 * except in that it scans the string in reverse, starting at rev_start. */
David Lawrence Ramseyae46b912007-07-06 13:44:13 +0000628char *mbrevstrcasestr(const char *haystack, const char *needle, const
629 char *rev_start)
David Lawrence Ramsey345260c2005-01-24 01:14:17 +0000630{
David Lawrence Ramsey7eb30a82005-07-17 02:40:07 +0000631#ifdef ENABLE_UTF8
David Lawrence Ramsey4d72de72006-04-12 15:27:40 +0000632 if (use_utf8) {
David Lawrence Ramsey9276f4d2007-07-10 22:54:58 +0000633 size_t rev_start_len, needle_len;
David Lawrence Ramsey345260c2005-01-24 01:14:17 +0000634
David Lawrence Ramsey36536672007-07-09 22:57:07 +0000635 if (*needle == '\0')
David Lawrence Ramseyae46b912007-07-06 13:44:13 +0000636 return (char *)rev_start;
David Lawrence Ramsey71058302007-07-02 15:45:13 +0000637
David Lawrence Ramsey9276f4d2007-07-10 22:54:58 +0000638 needle_len = mbstrlen(needle);
639
640 if (mbstrlen(haystack) < needle_len)
641 return NULL;
642
643 rev_start_len = mbstrlen(rev_start);
David Lawrence Ramsey118cb372006-07-22 16:45:11 +0000644
Benno Schulenberg1e2833e2016-06-25 20:57:35 +0200645 while (TRUE) {
Benno Schulenbergb9673682015-07-23 19:18:25 +0000646 if (rev_start_len >= needle_len &&
Benno Schulenberg85844ee2016-08-06 10:47:22 +0200647 mbstrncasecmp(rev_start, needle, needle_len) == 0)
David Lawrence Ramsey9276f4d2007-07-10 22:54:58 +0000648 return (char *)rev_start;
David Lawrence Ramsey345260c2005-01-24 01:14:17 +0000649
Benno Schulenberg1e2833e2016-06-25 20:57:35 +0200650 /* If we've reached the head of the haystack, we found nothing. */
David Lawrence Ramsey345260c2005-01-24 01:14:17 +0000651 if (rev_start == haystack)
Benno Schulenberg1e2833e2016-06-25 20:57:35 +0200652 return NULL;
David Lawrence Ramsey345260c2005-01-24 01:14:17 +0000653
Benno Schulenberg1e2833e2016-06-25 20:57:35 +0200654 rev_start = haystack + move_mbleft(haystack, rev_start - haystack);
655 rev_start_len++;
656 }
David Lawrence Ramsey345260c2005-01-24 01:14:17 +0000657 } else
658#endif
659 return revstrcasestr(haystack, needle, rev_start);
660}
David Lawrence Ramsey3a1fc8f2005-01-16 18:49:19 +0000661
David Lawrence Ramsey3f9c6352005-01-25 19:21:11 +0000662/* This function is equivalent to strlen() for multibyte strings. */
663size_t mbstrlen(const char *s)
664{
665 return mbstrnlen(s, (size_t)-1);
666}
667
David Lawrence Ramsey3a1fc8f2005-01-16 18:49:19 +0000668#ifndef HAVE_STRNLEN
669/* This function is equivalent to strnlen(). */
670size_t nstrnlen(const char *s, size_t maxlen)
671{
672 size_t n = 0;
673
David Lawrence Ramsey10442332007-07-01 21:17:05 +0000674 for (; *s != '\0' && maxlen > 0; s++, maxlen--, n++)
David Lawrence Ramsey3a1fc8f2005-01-16 18:49:19 +0000675 ;
676
677 return n;
678}
679#endif
680
681/* This function is equivalent to strnlen() for multibyte strings. */
682size_t mbstrnlen(const char *s, size_t maxlen)
683{
David Lawrence Ramsey7eb30a82005-07-17 02:40:07 +0000684#ifdef ENABLE_UTF8
David Lawrence Ramsey4d72de72006-04-12 15:27:40 +0000685 if (use_utf8) {
David Lawrence Ramsey3a1fc8f2005-01-16 18:49:19 +0000686 size_t n = 0;
David Lawrence Ramsey3a1fc8f2005-01-16 18:49:19 +0000687
David Lawrence Ramseybebfd9f2007-07-09 22:36:32 +0000688 for (; *s != '\0' && maxlen > 0; s += move_mbright(s, 0),
689 maxlen--, n++)
690 ;
David Lawrence Ramsey3a1fc8f2005-01-16 18:49:19 +0000691
David Lawrence Ramsey3f9c6352005-01-25 19:21:11 +0000692 return n;
David Lawrence Ramsey3a1fc8f2005-01-16 18:49:19 +0000693 } else
694#endif
David Lawrence Ramseyadc30a82005-03-20 07:24:49 +0000695 return strnlen(s, maxlen);
David Lawrence Ramsey3a1fc8f2005-01-16 18:49:19 +0000696}
David Lawrence Ramsey38156d42005-03-15 05:44:03 +0000697
David Lawrence Ramseya2488632006-01-06 07:10:30 +0000698#if !defined(NANO_TINY) || !defined(DISABLE_JUSTIFY)
David Lawrence Ramsey66444c32005-07-21 18:05:27 +0000699/* This function is equivalent to strchr() for multibyte strings. */
David Lawrence Ramsey5978f9b2006-01-06 05:54:44 +0000700char *mbstrchr(const char *s, const char *c)
David Lawrence Ramsey66444c32005-07-21 18:05:27 +0000701{
702 assert(s != NULL && c != NULL);
703
704#ifdef ENABLE_UTF8
David Lawrence Ramsey4d72de72006-04-12 15:27:40 +0000705 if (use_utf8) {
David Lawrence Ramsey9c926342005-11-15 18:42:56 +0000706 bool bad_s_mb = FALSE, bad_c_mb = FALSE;
Benno Schulenberg116d9e62016-12-15 21:30:30 +0100707 char symbol[MB_CUR_MAX];
David Lawrence Ramsey66444c32005-07-21 18:05:27 +0000708 const char *q = s;
709 wchar_t ws, wc;
David Lawrence Ramsey66444c32005-07-21 18:05:27 +0000710
Benno Schulenberg0b2f8432014-03-21 12:47:34 +0000711 if (mbtowc(&wc, c, MB_CUR_MAX) < 0) {
Benno Schulenberg9205c282015-09-04 19:34:55 +0000712 mbtowc_reset();
David Lawrence Ramsey66444c32005-07-21 18:05:27 +0000713 wc = (unsigned char)*c;
714 bad_c_mb = TRUE;
715 }
716
717 while (*s != '\0') {
Benno Schulenberg116d9e62016-12-15 21:30:30 +0100718 int sym_len = parse_mbchar(s, symbol, NULL);
David Lawrence Ramsey66444c32005-07-21 18:05:27 +0000719
Benno Schulenberg116d9e62016-12-15 21:30:30 +0100720 if (mbtowc(&ws, symbol, sym_len) < 0) {
Benno Schulenberg9205c282015-09-04 19:34:55 +0000721 mbtowc_reset();
David Lawrence Ramsey66444c32005-07-21 18:05:27 +0000722 ws = (unsigned char)*s;
723 bad_s_mb = TRUE;
724 }
725
726 if (bad_s_mb == bad_c_mb && ws == wc)
727 break;
728
Benno Schulenberg116d9e62016-12-15 21:30:30 +0100729 s += sym_len;
730 q += sym_len;
David Lawrence Ramsey66444c32005-07-21 18:05:27 +0000731 }
732
David Lawrence Ramseybc8b7d72005-11-15 19:01:07 +0000733 if (*s == '\0')
David Lawrence Ramsey66444c32005-07-21 18:05:27 +0000734 q = NULL;
735
736 return (char *)q;
737 } else
738#endif
Chris Allegretta3459e4f2011-02-24 02:47:25 +0000739 return (char *) strchr(s, *c);
David Lawrence Ramsey66444c32005-07-21 18:05:27 +0000740}
David Lawrence Ramseya2488632006-01-06 07:10:30 +0000741#endif /* !NANO_TINY || !DISABLE_JUSTIFY */
David Lawrence Ramsey66444c32005-07-21 18:05:27 +0000742
David Lawrence Ramseya2488632006-01-06 07:10:30 +0000743#ifndef NANO_TINY
744/* This function is equivalent to strpbrk() for multibyte strings. */
745char *mbstrpbrk(const char *s, const char *accept)
746{
David Lawrence Ramseya2488632006-01-06 07:10:30 +0000747#ifdef ENABLE_UTF8
David Lawrence Ramsey4d72de72006-04-12 15:27:40 +0000748 if (use_utf8) {
David Lawrence Ramseybebfd9f2007-07-09 22:36:32 +0000749 for (; *s != '\0'; s += move_mbright(s, 0)) {
David Lawrence Ramseya2488632006-01-06 07:10:30 +0000750 if (mbstrchr(accept, s) != NULL)
751 return (char *)s;
David Lawrence Ramseya2488632006-01-06 07:10:30 +0000752 }
753
754 return NULL;
755 } else
756#endif
Chris Allegretta3459e4f2011-02-24 02:47:25 +0000757 return (char *) strpbrk(s, accept);
David Lawrence Ramseya2488632006-01-06 07:10:30 +0000758}
759
760/* This function is equivalent to strpbrk(), except in that it scans the
761 * string in reverse, starting at rev_start. */
762char *revstrpbrk(const char *s, const char *accept, const char
763 *rev_start)
764{
765 assert(s != NULL && accept != NULL && rev_start != NULL);
766
Benno Schulenberg6fda7a72016-06-25 21:04:19 +0200767 if (*rev_start == '\0') {
768 if (rev_start == s)
769 return NULL;
770 rev_start--;
771 }
David Lawrence Ramseya2488632006-01-06 07:10:30 +0000772
Benno Schulenberg6fda7a72016-06-25 21:04:19 +0200773 for (; rev_start >= s; rev_start--) {
774 if (strchr(accept, *rev_start) != NULL)
David Lawrence Ramseya2488632006-01-06 07:10:30 +0000775 return (char *)rev_start;
776 }
777
778 return NULL;
779}
780
781/* This function is equivalent to strpbrk() for multibyte strings,
Benno Schulenberg3b216592016-05-24 10:34:40 +0200782 * except in that it scans the string in reverse, starting at rev_start. */
David Lawrence Ramseya2488632006-01-06 07:10:30 +0000783char *mbrevstrpbrk(const char *s, const char *accept, const char
784 *rev_start)
785{
786 assert(s != NULL && accept != NULL && rev_start != NULL);
787
788#ifdef ENABLE_UTF8
David Lawrence Ramsey4d72de72006-04-12 15:27:40 +0000789 if (use_utf8) {
Benno Schulenberg6fda7a72016-06-25 21:04:19 +0200790 if (*rev_start == '\0') {
791 if (rev_start == s)
792 return NULL;
793 rev_start = s + move_mbleft(s, rev_start - s);
794 }
David Lawrence Ramseya2488632006-01-06 07:10:30 +0000795
Benno Schulenberg6fda7a72016-06-25 21:04:19 +0200796 while (TRUE) {
797 if (mbstrchr(accept, rev_start) != NULL)
David Lawrence Ramseya2488632006-01-06 07:10:30 +0000798 return (char *)rev_start;
799
Benno Schulenberg1e2833e2016-06-25 20:57:35 +0200800 /* If we've reached the head of the string, we found nothing. */
David Lawrence Ramsey66d3ebf2006-02-02 22:30:40 +0000801 if (rev_start == s)
Benno Schulenberg1e2833e2016-06-25 20:57:35 +0200802 return NULL;
David Lawrence Ramseya2488632006-01-06 07:10:30 +0000803
Benno Schulenberg1e2833e2016-06-25 20:57:35 +0200804 rev_start = s + move_mbleft(s, rev_start - s);
805 }
David Lawrence Ramseya2488632006-01-06 07:10:30 +0000806 } else
807#endif
808 return revstrpbrk(s, accept, rev_start);
809}
810#endif /* !NANO_TINY */
811
Benno Schulenbergeea09082014-04-13 20:50:20 +0000812#if !defined(DISABLE_NANORC) && (!defined(NANO_TINY) || !defined(DISABLE_JUSTIFY))
David Lawrence Ramseyd5d4dde2005-06-14 01:55:56 +0000813/* Return TRUE if the string s contains one or more blank characters,
814 * and FALSE otherwise. */
815bool has_blank_chars(const char *s)
816{
David Lawrence Ramseyd5d4dde2005-06-14 01:55:56 +0000817 for (; *s != '\0'; s++) {
818 if (isblank(*s))
819 return TRUE;
820 }
821
822 return FALSE;
823}
824
825/* Return TRUE if the multibyte string s contains one or more blank
826 * multibyte characters, and FALSE otherwise. */
827bool has_blank_mbchars(const char *s)
828{
David Lawrence Ramsey7eb30a82005-07-17 02:40:07 +0000829#ifdef ENABLE_UTF8
David Lawrence Ramsey4d72de72006-04-12 15:27:40 +0000830 if (use_utf8) {
Benno Schulenberg116d9e62016-12-15 21:30:30 +0100831 char symbol[MB_CUR_MAX];
David Lawrence Ramseyd5d4dde2005-06-14 01:55:56 +0000832
David Lawrence Ramseybebfd9f2007-07-09 22:36:32 +0000833 for (; *s != '\0'; s += move_mbright(s, 0)) {
Benno Schulenberg116d9e62016-12-15 21:30:30 +0100834 parse_mbchar(s, symbol, NULL);
David Lawrence Ramseyd5d4dde2005-06-14 01:55:56 +0000835
Benno Schulenberg116d9e62016-12-15 21:30:30 +0100836 if (is_blank_mbchar(symbol))
837 return TRUE;
David Lawrence Ramseyd5d4dde2005-06-14 01:55:56 +0000838 }
839
Benno Schulenberg116d9e62016-12-15 21:30:30 +0100840 return FALSE;
David Lawrence Ramseyd5d4dde2005-06-14 01:55:56 +0000841 } else
842#endif
843 return has_blank_chars(s);
844}
Benno Schulenbergeea09082014-04-13 20:50:20 +0000845#endif /* !DISABLE_NANORC && (!NANO_TINY || !DISABLE_JUSTIFY) */
David Lawrence Ramseybdfa9272005-06-14 23:36:13 +0000846
David Lawrence Ramsey6ff695c2005-08-05 03:14:29 +0000847#ifdef ENABLE_UTF8
David Lawrence Ramsey79d97952005-08-28 03:07:13 +0000848/* Return TRUE if wc is valid Unicode, and FALSE otherwise. */
David Lawrence Ramsey6ff695c2005-08-05 03:14:29 +0000849bool is_valid_unicode(wchar_t wc)
850{
Benno Schulenbergf9d6aa92016-03-29 14:46:53 +0000851 return ((0 <= wc && wc <= 0xD7FF) ||
Benno Schulenberg17cf8332016-05-30 09:09:36 +0200852 (0xE000 <= wc && wc <= 0xFDCF) ||
853 (0xFDF0 <= wc && wc <= 0xFFFD) ||
854 (0xFFFF < wc && wc <= 0x10FFFF && (wc & 0xFFFF) <= 0xFFFD));
David Lawrence Ramsey6ff695c2005-08-05 03:14:29 +0000855}
856#endif
857
Benno Schulenbergeea09082014-04-13 20:50:20 +0000858#ifndef DISABLE_NANORC
David Lawrence Ramseybdfa9272005-06-14 23:36:13 +0000859/* Check if the string s is a valid multibyte string. Return TRUE if it
860 * is, and FALSE otherwise. */
861bool is_valid_mbstring(const char *s)
862{
David Lawrence Ramsey7eb30a82005-07-17 02:40:07 +0000863#ifdef ENABLE_UTF8
Benno Schulenbergc5f49162016-12-15 19:46:16 +0100864 if (use_utf8)
865 return (mbstowcs(NULL, s, 0) != (size_t)-1);
866 else
David Lawrence Ramseybdfa9272005-06-14 23:36:13 +0000867#endif
Benno Schulenbergc5f49162016-12-15 19:46:16 +0100868 return TRUE;
David Lawrence Ramseybdfa9272005-06-14 23:36:13 +0000869}
Benno Schulenbergeea09082014-04-13 20:50:20 +0000870#endif /* !DISABLE_NANORC */