src/chars.c - platform_external_nano - Gitiles

 /* $Id$ */
 /**************************************************************************
  *   chars.c                                                              *
  *                                                                        *
  *   Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009,  *
  *   2010, 2011, 2013, 2014 Free Software Foundation, Inc.                *
  *   This program is free software; you can redistribute it and/or modify *
  *   it under the terms of the GNU General Public License as published by *
  *   the Free Software Foundation; either version 3, or (at your option)  *
  *   any later version.                                                   *
  *                                                                        *
  *   This program is distributed in the hope that it will be useful, but  *
  *   WITHOUT ANY WARRANTY; without even the implied warranty of           *
  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU    *
  *   General Public License for more details.                             *
  *                                                                        *
  *   You should have received a copy of the GNU General Public License    *
  *   along with this program; if not, write to the Free Software          *
  *   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA            *
  *   02110-1301, USA.                                                     *
  *                                                                        *
  **************************************************************************/

 #include "proto.h"

 #include <string.h>
 #include <ctype.h>

 #ifdef ENABLE_UTF8
 #ifdef HAVE_WCHAR_H
 #include <wchar.h>
 #endif
 #ifdef HAVE_WCTYPE_H
 #include <wctype.h>
 #endif

 static bool use_utf8 = FALSE;
 	/* Whether we've enabled UTF-8 support. */
 static const wchar_t bad_wchar = 0xFFFD;
 	/* If we get an invalid multibyte sequence, we treat it as
 	 * Unicode FFFD (Replacement Character), unless we're searching
 	 * for a match to it. */
 static const char *const bad_mbchar = "\xEF\xBF\xBD";
 static const int bad_mbchar_len = 3;

 /* Enable UTF-8 support. */
 void utf8_init(void)
 {
     use_utf8 = TRUE;
 }

 /* Is UTF-8 support enabled? */
 bool using_utf8(void)
 {
     return use_utf8;
 }
 #endif /* ENABLE_UTF8 */

 /* Concatenate two allocated strings. */
 char* addstrings(char* str1, size_t len1, char* str2, size_t len2)
 {
     str1 = charealloc(str1, len1 + len2 + 1);
     str1[len1] = '\0';
     strncat(&str1[len1], str2, len2);
     free(str2);

     return str1;
 }


 #ifndef HAVE_ISBLANK
 /* This function is equivalent to isblank(). */
 bool nisblank(int c)
 {
     return isspace(c) && (c == '\t' || !is_cntrl_char(c));
 }
 #endif

 #if !defined(HAVE_ISWBLANK) && defined(ENABLE_UTF8)
 /* This function is equivalent to iswblank(). */
 bool niswblank(wchar_t wc)
 {
     return iswspace(wc) && (wc == '\t' || !is_cntrl_wchar(wc));
 }
 #endif

 /* Return TRUE if the value of c is in byte range, and FALSE
  * otherwise. */
 bool is_byte(int c)
 {
     return ((unsigned int)c == (unsigned char)c);
 }

 void mbtowc_reset(void)
 {
     IGNORE_CALL_RESULT(mbtowc(NULL, NULL, 0));
 }

 void wctomb_reset(void)
 {
     IGNORE_CALL_RESULT(wctomb(NULL, 0));
 }

 /* This function is equivalent to isalnum() for multibyte characters. */
 bool is_alnum_mbchar(const char *c)
 {
     assert(c != NULL);

 #ifdef ENABLE_UTF8
     if (use_utf8) {
 	wchar_t wc;

 	if (mbtowc(&wc, c, MB_CUR_MAX) < 0) {
 	    mbtowc_reset();
 	    wc = bad_wchar;
 	}

 	return iswalnum(wc);
     } else
 #endif
 	return isalnum((unsigned char)*c);
 }

 /* This function is equivalent to isblank() for multibyte characters. */
 bool is_blank_mbchar(const char *c)
 {
     assert(c != NULL);

 #ifdef ENABLE_UTF8
     if (use_utf8) {
 	wchar_t wc;

 	if (mbtowc(&wc, c, MB_CUR_MAX) < 0) {
 	    mbtowc_reset();
 	    wc = bad_wchar;
 	}

 	return iswblank(wc);
     } else
 #endif
 	return isblank((unsigned char)*c);
 }

 /* This function is equivalent to iscntrl(), except in that it only
  * handles non-high-bit control characters. */
 bool is_ascii_cntrl_char(int c)
 {
     return (0 <= c && c < 32);
 }

 /* This function is equivalent to iscntrl(), except in that it also
  * handles high-bit control characters. */
 bool is_cntrl_char(int c)
 {
     return (-128 <= c && c < -96) || (0 <= c && c < 32) ||
 	(127 <= c && c < 160);
 }

 #ifdef ENABLE_UTF8
 /* This function is equivalent to iscntrl() for wide characters, except
  * in that it also handles wide control characters with their high bits
  * set. */
 bool is_cntrl_wchar(wchar_t wc)
 {
     return (0 <= wc && wc < 32) || (127 <= wc && wc < 160);
 }
 #endif

 /* This function is equivalent to iscntrl() for multibyte characters,
  * except in that it also handles multibyte control characters with
  * their high bits set. */
 bool is_cntrl_mbchar(const char *c)
 {
     assert(c != NULL);

 #ifdef ENABLE_UTF8
     if (use_utf8) {
 	wchar_t wc;

 	if (mbtowc(&wc, c, MB_CUR_MAX) < 0) {
 	    mbtowc_reset();
 	    wc = bad_wchar;
 	}

 	return is_cntrl_wchar(wc);
     } else
 #endif
 	return is_cntrl_char((unsigned char)*c);
 }

 /* This function is equivalent to ispunct() for multibyte characters. */
 bool is_punct_mbchar(const char *c)
 {
     assert(c != NULL);

 #ifdef ENABLE_UTF8
     if (use_utf8) {
 	wchar_t wc;

 	if (mbtowc(&wc, c, MB_CUR_MAX) < 0) {
 	    mbtowc_reset();
 	    wc = bad_wchar;
 	}

 	return iswpunct(wc);
     } else
 #endif
 	return ispunct((unsigned char)*c);
 }

 /* Return TRUE for a multibyte character found in a word (currently only
  * an alphanumeric or punctuation character, and only the latter if
  * allow_punct is TRUE) and FALSE otherwise. */
 bool is_word_mbchar(const char *c, bool allow_punct)
 {
     assert(c != NULL);

     return is_alnum_mbchar(c) || (allow_punct ? is_punct_mbchar(c) :
 	FALSE);
 }

 /* c is a control character.  It displays as ^@, ^?, or ^[ch], where ch
  * is (c + 64).  We return that character. */
 char control_rep(char c)
 {
     assert(is_cntrl_char(c));

     /* Treat newlines embedded in a line as encoded nulls. */
     if (c == '\n')
 	return '@';
     else if (c == NANO_CONTROL_8)
 	return '?';
     else
 	return c + 64;
 }

 #ifdef ENABLE_UTF8
 /* c is a wide control character.  It displays as ^@, ^?, or ^[ch],
  * where ch is (c + 64).  We return that wide character. */
 wchar_t control_wrep(wchar_t wc)
 {
     assert(is_cntrl_wchar(wc));

     /* Treat newlines embedded in a line as encoded nulls. */
     if (wc == '\n')
 	return '@';
     else if (wc == NANO_CONTROL_8)
 	return '?';
     else
 	return wc + 64;
 }
 #endif

 /* c is a multibyte control character.  It displays as ^@, ^?, or ^[ch],
  * where ch is (c + 64).  We return that multibyte character.  If crep
  * is an invalid multibyte sequence, it will be replaced with Unicode
  * 0xFFFD (Replacement Character). */
 char *control_mbrep(const char *c, char *crep, int *crep_len)
 {
     assert(c != NULL && crep != NULL && crep_len != NULL);

 #ifdef ENABLE_UTF8
     if (use_utf8) {
 	wchar_t wc;

 	if (mbtowc(&wc, c, MB_CUR_MAX) < 0) {
 	    mbtowc_reset();
 	    *crep_len = bad_mbchar_len;
 	    strncpy(crep, bad_mbchar, *crep_len);
 	} else {
 	    *crep_len = wctomb(crep, control_wrep(wc));

 	    if (*crep_len < 0) {
 		wctomb_reset();
 		*crep_len = 0;
 	    }
 	}
     } else {
 #endif
 	*crep_len = 1;
 	*crep = control_rep(*c);
 #ifdef ENABLE_UTF8
     }
 #endif

     return crep;
 }

 /* c is a multibyte non-control character.  We return that multibyte
  * character.  If crep is an invalid multibyte sequence, it will be
  * replaced with Unicode 0xFFFD (Replacement Character). */
 char *mbrep(const char *c, char *crep, int *crep_len)
 {
     assert(c != NULL && crep != NULL && crep_len != NULL);

 #ifdef ENABLE_UTF8
     if (use_utf8) {
 	wchar_t wc;

 	/* Reject invalid Unicode characters. */
 	if (mbtowc(&wc, c, MB_CUR_MAX) < 0 || !is_valid_unicode(wc)) {
 	    mbtowc_reset();
 	    *crep_len = bad_mbchar_len;
 	    strncpy(crep, bad_mbchar, *crep_len);
 	} else {
 	    *crep_len = wctomb(crep, wc);

 	    if (*crep_len < 0) {
 		wctomb_reset();
 		*crep_len = 0;
 	    }
 	}
     } else {
 #endif
 	*crep_len = 1;
 	*crep = *c;
 #ifdef ENABLE_UTF8
     }
 #endif

     return crep;
 }

 /* This function is equivalent to wcwidth() for multibyte characters. */
 int mbwidth(const char *c)
 {
     assert(c != NULL);

 #ifdef ENABLE_UTF8
     if (use_utf8) {
 	wchar_t wc;
 	int width;

 	if (mbtowc(&wc, c, MB_CUR_MAX) < 0) {
 	    mbtowc_reset();
 	    wc = bad_wchar;
 	}

 	width = wcwidth(wc);

 	if (width == -1) {
 	    wc = bad_wchar;
 	    width = wcwidth(wc);
 	}

 	return width;
     } else
 #endif
 	return 1;
 }

 /* Return the maximum width in bytes of a multibyte character. */
 int mb_cur_max(void)
 {
     return
 #ifdef ENABLE_UTF8
 	use_utf8 ? MB_CUR_MAX :
 #endif
 	1;
 }

 /* Convert the Unicode value in chr to a multibyte character with the
  * same wide character value as chr, if possible.  If the conversion
  * succeeds, return the (dynamically allocated) multibyte character and
  * its length.  Otherwise, return an undefined (dynamically allocated)
  * multibyte character and a length of zero. */
 char *make_mbchar(long chr, int *chr_mb_len)
 {
     char *chr_mb;

     assert(chr_mb_len != NULL);

 #ifdef ENABLE_UTF8
     if (use_utf8) {
 	chr_mb = charalloc(MB_CUR_MAX);
 	*chr_mb_len = wctomb(chr_mb, (wchar_t)chr);

 	/* Reject invalid Unicode characters. */
 	if (*chr_mb_len < 0 || !is_valid_unicode((wchar_t)chr)) {
 	    wctomb_reset();
 	    *chr_mb_len = 0;
 	}
     } else {
 #endif
 	*chr_mb_len = 1;
 	chr_mb = mallocstrncpy(NULL, (char *)&chr, 1);
 #ifdef ENABLE_UTF8
     }
 #endif

     return chr_mb;
 }

 /* Parse a multibyte character from buf.  Return the number of bytes
  * used.  If chr isn't NULL, store the multibyte character in it.  If
  * col isn't NULL, store the new display width in it.  If *buf is '\t',
  * we expect col to have the current display width. */
 int parse_mbchar(const char *buf, char *chr, size_t *col)
 {
     int buf_mb_len;

     assert(buf != NULL);

 #ifdef ENABLE_UTF8
     if (use_utf8) {
 	/* Get the number of bytes in the multibyte character. */
 	buf_mb_len = mblen(buf, MB_CUR_MAX);

 	/* If buf contains an invalid multibyte character, only
 	 * interpret buf's first byte. */
 	if (buf_mb_len < 0) {
 	    IGNORE_CALL_RESULT(mblen(NULL, 0));
 	    buf_mb_len = 1;
 	} else if (buf_mb_len == 0)
 	    buf_mb_len++;

 	/* Save the multibyte character in chr. */
 	if (chr != NULL) {
 	    int i;

 	    for (i = 0; i < buf_mb_len; i++)
 		chr[i] = buf[i];
 	}

 	/* Save the column width of the wide character in col. */
 	if (col != NULL) {
 	    /* If we have a tab, get its width in columns using the
 	     * current value of col. */
 	    if (*buf == '\t')
 		*col += tabsize - *col % tabsize;
 	    /* If we have a control character, get its width using one
 	     * column for the "^" that will be displayed in front of it,
 	     * and the width in columns of its visible equivalent as
 	     * returned by control_mbrep(). */
 	    else if (is_cntrl_mbchar(buf)) {
 		char *ctrl_buf_mb = charalloc(MB_CUR_MAX);
 		int ctrl_buf_mb_len;

 		(*col)++;

 		ctrl_buf_mb = control_mbrep(buf, ctrl_buf_mb,
 			&ctrl_buf_mb_len);

 		*col += mbwidth(ctrl_buf_mb);

 		free(ctrl_buf_mb);
 	    /* If we have a normal character, get its width in columns
 	     * normally. */
 	    } else
 		*col += mbwidth(buf);
 	}
     } else {
 #endif
 	/* Get the number of bytes in the byte character. */
 	buf_mb_len = 1;

 	/* Save the byte character in chr. */
 	if (chr != NULL)
 	    *chr = *buf;

 	if (col != NULL) {
 	    /* If we have a tab, get its width in columns using the
 	     * current value of col. */
 	    if (*buf == '\t')
 		*col += tabsize - *col % tabsize;
 	    /* If we have a control character, it's two columns wide:
 	     * one column for the "^" that will be displayed in front of
 	     * it, and one column for its visible equivalent as returned
 	     * by control_mbrep(). */
 	    else if (is_cntrl_char((unsigned char)*buf))
 		*col += 2;
 	    /* If we have a normal character, it's one column wide. */
 	    else
 		(*col)++;
 	}
 #ifdef ENABLE_UTF8
     }
 #endif

     return buf_mb_len;
 }

 /* Return the index in buf of the beginning of the multibyte character
  * before the one at pos. */
 size_t move_mbleft(const char *buf, size_t pos)
 {
     size_t before = 0, char_len = 0;

     assert(buf != NULL && pos <= strlen(buf));

     /* There is no library function to move backward one multibyte
      * character.  Here is the naive, O(pos) way to do it. */
     while (before < pos) {
 	char_len = parse_mbchar(buf + before, NULL, NULL);
 	before += char_len;
     }

     return before - char_len;
 }

 /* Return the index in buf of the beginning of the multibyte character
  * after the one at pos. */
 size_t move_mbright(const char *buf, size_t pos)
 {
     return pos + parse_mbchar(buf + pos, NULL, NULL);
 }

 #ifndef HAVE_STRCASECMP
 /* This function is equivalent to strcasecmp(). */
 int nstrcasecmp(const char *s1, const char *s2)
 {
     return strncasecmp(s1, s2, (size_t)-1);
 }
 #endif

 /* This function is equivalent to strcasecmp() for multibyte strings. */
 int mbstrcasecmp(const char *s1, const char *s2)
 {
     return mbstrncasecmp(s1, s2, (size_t)-1);
 }

 #ifndef HAVE_STRNCASECMP
 /* This function is equivalent to strncasecmp(). */
 int nstrncasecmp(const char *s1, const char *s2, size_t n)
 {
     if (s1 == s2)
 	return 0;

     assert(s1 != NULL && s2 != NULL);

     for (; *s1 != '\0' && *s2 != '\0' && n > 0; s1++, s2++, n--) {
 	if (tolower(*s1) != tolower(*s2))
 	    break;
     }

     return (n > 0) ? tolower(*s1) - tolower(*s2) : 0;
 }
 #endif

 /* This function is equivalent to strncasecmp() for multibyte
  * strings. */
 int mbstrncasecmp(const char *s1, const char *s2, size_t n)
 {
 #ifdef ENABLE_UTF8
     if (use_utf8) {
 	char *s1_mb, *s2_mb;
 	wchar_t ws1, ws2;

 	if (s1 == s2)
 	    return 0;

 	assert(s1 != NULL && s2 != NULL);

 	s1_mb = charalloc(MB_CUR_MAX);
 	s2_mb = charalloc(MB_CUR_MAX);

 	for (; *s1 != '\0' && *s2 != '\0' && n > 0; s1 +=
 		move_mbright(s1, 0), s2 += move_mbright(s2, 0), n--) {
 	    bool bad_s1_mb = FALSE, bad_s2_mb = FALSE;
 	    int s1_mb_len, s2_mb_len;

 	    s1_mb_len = parse_mbchar(s1, s1_mb, NULL);

 	    if (mbtowc(&ws1, s1_mb, s1_mb_len) < 0) {
 		mbtowc_reset();
 		ws1 = (unsigned char)*s1_mb;
 		bad_s1_mb = TRUE;
 	    }

 	    s2_mb_len = parse_mbchar(s2, s2_mb, NULL);

 	    if (mbtowc(&ws2, s2_mb, s2_mb_len) < 0) {
 		mbtowc_reset();
 		ws2 = (unsigned char)*s2_mb;
 		bad_s2_mb = TRUE;
 	    }

 	    if (bad_s1_mb != bad_s2_mb || towlower(ws1) !=
 		towlower(ws2))
 		break;
 	}

 	free(s1_mb);
 	free(s2_mb);

 	return (n > 0) ? towlower(ws1) - towlower(ws2) : 0;
     } else
 #endif
 	return strncasecmp(s1, s2, n);
 }

 #ifndef HAVE_STRCASESTR
 /* This function is equivalent to strcasestr(). */
 char *nstrcasestr(const char *haystack, const char *needle)
 {
     size_t haystack_len, needle_len;

     assert(haystack != NULL && needle != NULL);

     if (*needle == '\0')
 	return (char *)haystack;

     haystack_len = strlen(haystack);
     needle_len = strlen(needle);

     for (; *haystack != '\0' && haystack_len >= needle_len; haystack++,
 	haystack_len--) {
 	if (strncasecmp(haystack, needle, needle_len) == 0)
 	    return (char *)haystack;
     }

     return NULL;
 }
 #endif

 /* This function is equivalent to strcasestr() for multibyte strings. */
 char *mbstrcasestr(const char *haystack, const char *needle)
 {
 #ifdef ENABLE_UTF8
     if (use_utf8) {
 	size_t haystack_len, needle_len;

 	assert(haystack != NULL && needle != NULL);

 	if (*needle == '\0')
 	    return (char *)haystack;

 	haystack_len = mbstrlen(haystack);
 	needle_len = mbstrlen(needle);

 	for (; *haystack != '\0' && haystack_len >= needle_len;
 		haystack += move_mbright(haystack, 0), haystack_len--) {
 	    if (mbstrncasecmp(haystack, needle, needle_len) == 0)
 		return (char *)haystack;
 	}

 	return NULL;
     } else
 #endif
 	return (char *) strcasestr(haystack, needle);
 }

 #if !defined(NANO_TINY) || !defined(DISABLE_TABCOMP)
 /* This function is equivalent to strstr(), except in that it scans the
  * string in reverse, starting at rev_start. */
 char *revstrstr(const char *haystack, const char *needle, const char
 	*rev_start)
 {
     size_t rev_start_len, needle_len;

     assert(haystack != NULL && needle != NULL && rev_start != NULL);

     if (*needle == '\0')
 	return (char *)rev_start;

     needle_len = strlen(needle);

     if (strlen(haystack) < needle_len)
 	return NULL;

     rev_start_len = strlen(rev_start);

     for (; rev_start >= haystack; rev_start--, rev_start_len++) {
 	if (rev_start_len >= needle_len && strncmp(rev_start, needle,
 		needle_len) == 0)
 	    return (char *)rev_start;
     }

     return NULL;
 }
 #endif /* !NANO_TINY || !DISABLE_TABCOMP */

 #ifndef NANO_TINY
 /* This function is equivalent to strcasestr(), except in that it scans
  * the string in reverse, starting at rev_start. */
 char *revstrcasestr(const char *haystack, const char *needle, const char
 	*rev_start)
 {
     size_t rev_start_len, needle_len;

     assert(haystack != NULL && needle != NULL && rev_start != NULL);

     if (*needle == '\0')
 	return (char *)rev_start;

     needle_len = strlen(needle);

     if (strlen(haystack) < needle_len)
 	return NULL;

     rev_start_len = strlen(rev_start);

     for (; rev_start >= haystack; rev_start--, rev_start_len++) {
 	if (rev_start_len >= needle_len && strncasecmp(rev_start,
 		needle, needle_len) == 0)
 	    return (char *)rev_start;
     }

     return NULL;
 }

 /* This function is equivalent to strcasestr() for multibyte strings,
  * except in that it scans the string in reverse, starting at
  * rev_start. */
 char *mbrevstrcasestr(const char *haystack, const char *needle, const
 	char *rev_start)
 {
 #ifdef ENABLE_UTF8
     if (use_utf8) {
 	bool begin_line = FALSE;
 	size_t rev_start_len, needle_len;

 	assert(haystack != NULL && needle != NULL && rev_start != NULL);

 	if (*needle == '\0')
 	    return (char *)rev_start;

 	needle_len = mbstrlen(needle);

 	if (mbstrlen(haystack) < needle_len)
 	    return NULL;

 	rev_start_len = mbstrlen(rev_start);

 	while (!begin_line) {
 	    if (rev_start_len >= needle_len && mbstrncasecmp(rev_start,
 		needle, needle_len) == 0)
 		return (char *)rev_start;

 	    if (rev_start == haystack)
 		begin_line = TRUE;
 	    else {
 		rev_start = haystack + move_mbleft(haystack, rev_start -
 			haystack);
 		rev_start_len++;
 	    }
 	}

 	return NULL;
     } else
 #endif
 	return revstrcasestr(haystack, needle, rev_start);
 }
 #endif /* !NANO_TINY */

 /* This function is equivalent to strlen() for multibyte strings. */
 size_t mbstrlen(const char *s)
 {
     return mbstrnlen(s, (size_t)-1);
 }

 #ifndef HAVE_STRNLEN
 /* This function is equivalent to strnlen(). */
 size_t nstrnlen(const char *s, size_t maxlen)
 {
     size_t n = 0;

     assert(s != NULL);

     for (; *s != '\0' && maxlen > 0; s++, maxlen--, n++)
 	;

     return n;
 }
 #endif

 /* This function is equivalent to strnlen() for multibyte strings. */
 size_t mbstrnlen(const char *s, size_t maxlen)
 {
     assert(s != NULL);

 #ifdef ENABLE_UTF8
     if (use_utf8) {
 	size_t n = 0;

 	for (; *s != '\0' && maxlen > 0; s += move_mbright(s, 0),
 		maxlen--, n++)
 	    ;

 	return n;
     } else
 #endif
 	return strnlen(s, maxlen);
 }

 #if !defined(NANO_TINY) || !defined(DISABLE_JUSTIFY)
 /* This function is equivalent to strchr() for multibyte strings. */
 char *mbstrchr(const char *s, const char *c)
 {
     assert(s != NULL && c != NULL);

 #ifdef ENABLE_UTF8
     if (use_utf8) {
 	bool bad_s_mb = FALSE, bad_c_mb = FALSE;
 	char *s_mb = charalloc(MB_CUR_MAX);
 	const char *q = s;
 	wchar_t ws, wc;

 	if (mbtowc(&wc, c, MB_CUR_MAX) < 0) {
 	    mbtowc_reset();
 	    wc = (unsigned char)*c;
 	    bad_c_mb = TRUE;
 	}

 	while (*s != '\0') {
 	    int s_mb_len = parse_mbchar(s, s_mb, NULL);

 	    if (mbtowc(&ws, s_mb, s_mb_len) < 0) {
 		mbtowc_reset();
 		ws = (unsigned char)*s;
 		bad_s_mb = TRUE;
 	    }

 	    if (bad_s_mb == bad_c_mb && ws == wc)
 		break;

 	    s += s_mb_len;
 	    q += s_mb_len;
 	}

 	free(s_mb);

 	if (*s == '\0')
 	    q = NULL;

 	return (char *)q;
     } else
 #endif
 	return (char *) strchr(s, *c);
 }
 #endif /* !NANO_TINY || !DISABLE_JUSTIFY */

 #ifndef NANO_TINY
 /* This function is equivalent to strpbrk() for multibyte strings. */
 char *mbstrpbrk(const char *s, const char *accept)
 {
     assert(s != NULL && accept != NULL);

 #ifdef ENABLE_UTF8
     if (use_utf8) {
 	for (; *s != '\0'; s += move_mbright(s, 0)) {
 	    if (mbstrchr(accept, s) != NULL)
 		return (char *)s;
 	}

 	return NULL;
     } else
 #endif
 	return (char *) strpbrk(s, accept);
 }

 /* This function is equivalent to strpbrk(), except in that it scans the
  * string in reverse, starting at rev_start. */
 char *revstrpbrk(const char *s, const char *accept, const char
 	*rev_start)
 {
     assert(s != NULL && accept != NULL && rev_start != NULL);

     for (; rev_start >= s; rev_start--) {
 	const char *q = (*rev_start == '\0') ? NULL : strchr(accept,
 		*rev_start);

 	if (q != NULL)
 	    return (char *)rev_start;
     }

     return NULL;
 }

 /* This function is equivalent to strpbrk() for multibyte strings,
  * except in that it scans the string in reverse, starting at
  * rev_start. */
 char *mbrevstrpbrk(const char *s, const char *accept, const char
 	*rev_start)
 {
     assert(s != NULL && accept != NULL && rev_start != NULL);

 #ifdef ENABLE_UTF8
     if (use_utf8) {
 	bool begin_line = FALSE;

 	while (!begin_line) {
 	    const char *q = (*rev_start == '\0') ? NULL :
 		mbstrchr(accept, rev_start);

 	    if (q != NULL)
 		return (char *)rev_start;

 	    if (rev_start == s)
 		begin_line = TRUE;
 	    else
 		rev_start = s + move_mbleft(s, rev_start - s);
 	}

 	return NULL;
     } else
 #endif
 	return revstrpbrk(s, accept, rev_start);
 }
 #endif /* !NANO_TINY */

 #if !defined(DISABLE_NANORC) && (!defined(NANO_TINY) || !defined(DISABLE_JUSTIFY))
 /* Return TRUE if the string s contains one or more blank characters,
  * and FALSE otherwise. */
 bool has_blank_chars(const char *s)
 {
     assert(s != NULL);

     for (; *s != '\0'; s++) {
 	if (isblank(*s))
 	    return TRUE;
     }

     return FALSE;
 }

 /* Return TRUE if the multibyte string s contains one or more blank
  * multibyte characters, and FALSE otherwise. */
 bool has_blank_mbchars(const char *s)
 {
     assert(s != NULL);

 #ifdef ENABLE_UTF8
     if (use_utf8) {
 	bool retval = FALSE;
 	char *chr_mb = charalloc(MB_CUR_MAX);

 	for (; *s != '\0'; s += move_mbright(s, 0)) {
 	    parse_mbchar(s, chr_mb, NULL);

 	    if (is_blank_mbchar(chr_mb)) {
 		retval = TRUE;
 		break;
 	    }
 	}

 	free(chr_mb);

 	return retval;
     } else
 #endif
 	return has_blank_chars(s);
 }
 #endif /* !DISABLE_NANORC && (!NANO_TINY || !DISABLE_JUSTIFY) */

 #ifdef ENABLE_UTF8
 /* Return TRUE if wc is valid Unicode, and FALSE otherwise. */
 bool is_valid_unicode(wchar_t wc)
 {
     return ((0 <= wc && wc <= 0x10FFFF) && (wc <= 0xD7FF || 0xE000 <=
 	wc) && (wc <= 0xFDCF || 0xFDF0 <= wc) && ((wc & 0xFFFF) <=
 	0xFFFD));
 }
 #endif

 #ifndef DISABLE_NANORC
 /* Check if the string s is a valid multibyte string.  Return TRUE if it
  * is, and FALSE otherwise. */
 bool is_valid_mbstring(const char *s)
 {
     assert(s != NULL);

     return
 #ifdef ENABLE_UTF8
 	use_utf8 ? (mbstowcs(NULL, s, 0) != (size_t)-1) :
 #endif
 	TRUE;
 }
 #endif /* !DISABLE_NANORC */