| /* $Id$ */ |
| /************************************************************************** |
| * chars.c * |
| * * |
| * Copyright (C) 2005 Chris Allegretta * |
| * This program is free software; you can redistribute it and/or modify * |
| * it under the terms of the GNU General Public License as published by * |
| * the Free Software Foundation; either version 2, or (at your option) * |
| * any later version. * |
| * * |
| * This program is distributed in the hope that it will be useful, * |
| * but WITHOUT ANY WARRANTY; without even the implied warranty of * |
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * |
| * GNU General Public License for more details. * |
| * * |
| * You should have received a copy of the GNU General Public License * |
| * along with this program; if not, write to the Free Software * |
| * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. * |
| * * |
| **************************************************************************/ |
| |
| #ifdef HAVE_CONFIG_H |
| #include <config.h> |
| #endif |
| |
| #include <stdlib.h> |
| #include <string.h> |
| #include <ctype.h> |
| #include <assert.h> |
| #include "proto.h" |
| #include "nano.h" |
| |
| #ifdef NANO_WIDE |
| #ifdef HAVE_WCHAR_H |
| #include <wchar.h> |
| #endif |
| #ifdef HAVE_WCTYPE_H |
| #include <wctype.h> |
| #endif |
| #endif |
| |
| /* Return TRUE if the value of c is in byte range, and FALSE |
| * otherwise. */ |
| bool is_byte(int c) |
| { |
| return ((unsigned int)c == (unsigned char)c); |
| } |
| |
| /* This function is equivalent to isalnum(). */ |
| bool is_alnum_char(int c) |
| { |
| return isalnum(c); |
| } |
| |
| /* This function is equivalent to isalnum() for multibyte characters. */ |
| bool is_alnum_mbchar(const char *c) |
| { |
| assert(c != NULL); |
| |
| #ifdef NANO_WIDE |
| if (!ISSET(NO_UTF8)) { |
| wchar_t wc; |
| int c_mb_len = mbtowc(&wc, c, MB_CUR_MAX); |
| |
| if (c_mb_len <= 0) { |
| mbtowc(NULL, NULL, 0); |
| wc = (unsigned char)*c; |
| } |
| |
| return is_alnum_wchar(wc); |
| } else |
| #endif |
| return is_alnum_char((unsigned char)*c); |
| } |
| |
| #ifdef NANO_WIDE |
| /* This function is equivalent to isalnum() for wide characters. */ |
| bool is_alnum_wchar(wchar_t wc) |
| { |
| return iswalnum(wc); |
| } |
| #endif |
| |
| /* This function is equivalent to isascii(). */ |
| bool is_ascii_char(int c) |
| { |
| return |
| #ifdef HAVE_ISASCII |
| isascii(c) |
| #else |
| ((unsigned int)c == (signed char)c) |
| #endif |
| ; |
| } |
| |
| /* This function is equivalent to isblank(). */ |
| bool is_blank_char(int c) |
| { |
| return |
| #ifdef HAVE_ISBLANK |
| isblank(c) |
| #else |
| isspace(c) && (c == '\t' || !is_cntrl_char(c)) |
| #endif |
| ; |
| } |
| |
| /* This function is equivalent to isblank() for multibyte characters. */ |
| bool is_blank_mbchar(const char *c) |
| { |
| assert(c != NULL); |
| |
| #ifdef NANO_WIDE |
| if (!ISSET(NO_UTF8)) { |
| wchar_t wc; |
| int c_mb_len = mbtowc(&wc, c, MB_CUR_MAX); |
| |
| if (c_mb_len <= 0) { |
| mbtowc(NULL, NULL, 0); |
| wc = (unsigned char)*c; |
| } |
| |
| return is_blank_wchar(wc); |
| } else |
| #endif |
| return is_blank_char((unsigned char)*c); |
| } |
| |
| #ifdef NANO_WIDE |
| /* This function is equivalent to isblank() for wide characters. */ |
| bool is_blank_wchar(wchar_t wc) |
| { |
| return |
| #ifdef HAVE_ISWBLANK |
| iswblank(wc) |
| #else |
| iswspace(wc) && (wc == '\t' || !is_cntrl_wchar(wc)) |
| #endif |
| ; |
| } |
| #endif |
| |
| /* This function is equivalent to iscntrl(), except in that it also |
| * handles control characters with their high bits set. */ |
| bool is_cntrl_char(int c) |
| { |
| return (-128 <= c && c < -96) || (0 <= c && c < 32) || |
| (127 <= c && c < 160); |
| } |
| |
| /* This function is equivalent to iscntrl() for multibyte characters, |
| * except in that it also handles multibyte control characters with |
| * their high bits set. */ |
| bool is_cntrl_mbchar(const char *c) |
| { |
| assert(c != NULL); |
| |
| #ifdef NANO_WIDE |
| if (!ISSET(NO_UTF8)) { |
| wchar_t wc; |
| int c_mb_len = mbtowc(&wc, c, MB_CUR_MAX); |
| |
| if (c_mb_len <= 0) { |
| mbtowc(NULL, NULL, 0); |
| wc = (unsigned char)*c; |
| } |
| |
| return is_cntrl_wchar(wc); |
| } else |
| #endif |
| return is_cntrl_char((unsigned char)*c); |
| } |
| |
| #ifdef NANO_WIDE |
| /* This function is equivalent to iscntrl() for wide characters, except |
| * in that it also handles wide control characters with their high bits |
| * set. */ |
| bool is_cntrl_wchar(wchar_t wc) |
| { |
| return (0 <= wc && wc < 32) || (127 <= wc && wc < 160); |
| } |
| #endif |
| |
| /* c is a control character. It displays as ^@, ^?, or ^[ch] where ch |
| * is c + 64. We return that character. */ |
| unsigned char control_rep(unsigned char c) |
| { |
| /* Treat newlines embedded in a line as encoded nulls. */ |
| if (c == '\n') |
| return '@'; |
| else if (c == NANO_CONTROL_8) |
| return '?'; |
| else |
| return c + 64; |
| } |
| |
| /* c is a multibyte control character. It displays as ^@, ^?, or ^[ch] |
| * where ch is c + 64. We return that multibyte character. */ |
| char *control_mbrep(const char *c, char *crep, int *crep_len) |
| { |
| assert(c != NULL); |
| |
| #ifdef NANO_WIDE |
| if (!ISSET(NO_UTF8)) { |
| wchar_t wc, wcrep; |
| int c_mb_len = mbtowc(&wc, c, MB_CUR_MAX), crep_mb_len; |
| |
| if (c_mb_len <= 0) { |
| mbtowc(NULL, NULL, 0); |
| wc = (unsigned char)*c; |
| } |
| |
| wcrep = control_wrep(wc); |
| |
| crep_mb_len = wctomb(crep, wcrep); |
| |
| if (crep_mb_len <= 0) { |
| wctomb(NULL, 0); |
| crep_mb_len = 0; |
| } |
| |
| *crep_len = crep_mb_len; |
| |
| return crep; |
| } else { |
| #endif |
| *crep_len = 1; |
| *crep = control_rep((unsigned char)*c); |
| |
| return crep; |
| #ifdef NANO_WIDE |
| } |
| #endif |
| } |
| |
| #ifdef NANO_WIDE |
| /* c is a wide control character. It displays as ^@, ^?, or ^[ch] where |
| * ch is c + 64. We return that wide character. */ |
| wchar_t control_wrep(wchar_t wc) |
| { |
| /* Treat newlines embedded in a line as encoded nulls. */ |
| if (wc == '\n') |
| return '@'; |
| else if (wc == NANO_CONTROL_8) |
| return '?'; |
| else |
| return wc + 64; |
| } |
| #endif |
| |
| /* This function is equivalent to wcwidth() for multibyte characters. */ |
| int mbwidth(const char *c) |
| { |
| assert(c != NULL); |
| |
| #ifdef NANO_WIDE |
| if (!ISSET(NO_UTF8)) { |
| wchar_t wc; |
| int c_mb_len = mbtowc(&wc, c, MB_CUR_MAX), width; |
| |
| if (c_mb_len <= 0) { |
| mbtowc(NULL, NULL, 0); |
| wc = (unsigned char)*c; |
| } |
| |
| width = wcwidth(wc); |
| if (width == -1) |
| width++; |
| |
| return width; |
| } else |
| #endif |
| return 1; |
| } |
| |
| /* Return the maximum width in bytes of a multibyte character. */ |
| int mb_cur_max(void) |
| { |
| return |
| #ifdef NANO_WIDE |
| !ISSET(NO_UTF8) ? MB_CUR_MAX : |
| #endif |
| 1; |
| } |
| |
| /* Convert the value in chr to a multibyte character with the same |
| * wide character value as chr. Return the (dynamically allocated) |
| * multibyte character and its length. */ |
| char *make_mbchar(int chr, int *chr_mb_len) |
| { |
| char *chr_mb; |
| |
| assert(chr_mb != NULL && chr_mb_len != NULL); |
| |
| #ifdef NANO_WIDE |
| if (!ISSET(NO_UTF8)) { |
| chr_mb = charalloc(MB_CUR_MAX); |
| *chr_mb_len = wctomb(chr_mb, chr); |
| |
| if (*chr_mb_len <= 0) { |
| wctomb(NULL, 0); |
| *chr_mb_len = 0; |
| } |
| } else { |
| #endif |
| *chr_mb_len = 1; |
| chr_mb = charalloc(1); |
| *chr_mb = (char)chr; |
| #ifdef NANO_WIDE |
| } |
| #endif |
| |
| return chr_mb; |
| } |
| |
| #if defined(ENABLE_NANORC) || defined(ENABLE_EXTRA) |
| /* Convert the string str to a valid multibyte string with the same wide |
| * character values as str. Return the (dynamically allocated) |
| * multibyte string. */ |
| char *make_mbstring(const char *str) |
| { |
| assert(str != NULL); |
| |
| #ifdef NANO_WIDE |
| if (!ISSET(NO_UTF8)) { |
| char *chr_mb = charalloc(MB_CUR_MAX); |
| int chr_mb_len; |
| char *str_mb = charalloc((MB_CUR_MAX * strlen(str)) + 1); |
| size_t str_mb_len = 0; |
| |
| while (*str != '\0') { |
| bool bad_char; |
| int i; |
| |
| chr_mb_len = parse_mbchar(str, chr_mb, &bad_char, NULL); |
| |
| if (bad_char) { |
| char *bad_chr_mb; |
| int bad_chr_mb_len; |
| |
| bad_chr_mb = make_mbchar((unsigned char)*chr_mb, |
| &bad_chr_mb_len); |
| |
| for (i = 0; i < bad_chr_mb_len; i++) |
| str_mb[str_mb_len + i] = bad_chr_mb[i]; |
| str_mb_len += bad_chr_mb_len; |
| |
| free(bad_chr_mb); |
| } else { |
| for (i = 0; i < chr_mb_len; i++) |
| str_mb[str_mb_len + i] = chr_mb[i]; |
| str_mb_len += chr_mb_len; |
| } |
| |
| str += chr_mb_len; |
| } |
| |
| free(chr_mb); |
| null_at(&str_mb, str_mb_len); |
| |
| return str_mb; |
| } else |
| #endif |
| return mallocstrcpy(NULL, str); |
| } |
| #endif |
| |
| /* Parse a multibyte character from buf. Return the number of bytes |
| * used. If chr isn't NULL, store the multibyte character in it. If |
| * bad_chr isn't NULL, set it to TRUE if we have a bad multibyte |
| * character. If col isn't NULL, store the new display width in it. If |
| * *str is '\t', we expect col to have the current display width. */ |
| int parse_mbchar(const char *buf, char *chr, bool *bad_chr, size_t |
| *col) |
| { |
| int buf_mb_len; |
| |
| assert(buf != NULL); |
| |
| if (bad_chr != NULL) |
| *bad_chr = FALSE; |
| |
| #ifdef NANO_WIDE |
| if (!ISSET(NO_UTF8)) { |
| /* Get the number of bytes in the multibyte character. */ |
| buf_mb_len = mblen(buf, MB_CUR_MAX); |
| |
| /* If buf contains a null byte or an invalid multibyte |
| * character, set bad_chr to TRUE (if it contains the latter) |
| * and interpret buf's first byte. */ |
| if (buf_mb_len <= 0) { |
| mblen(NULL, 0); |
| if (buf_mb_len < 0 && bad_chr != NULL) |
| *bad_chr = TRUE; |
| buf_mb_len = 1; |
| } |
| |
| /* Save the multibyte character in chr. */ |
| if (chr != NULL) { |
| int i; |
| |
| for (i = 0; i < buf_mb_len; i++) |
| chr[i] = buf[i]; |
| } |
| |
| /* Save the column width of the wide character in col. */ |
| if (col != NULL) { |
| /* If we have a tab, get its width in columns using the |
| * current value of col. */ |
| if (*buf == '\t') |
| *col += tabsize - *col % tabsize; |
| /* If we have a control character, get its width using one |
| * column for the "^" that will be displayed in front of it, |
| * and the width in columns of its visible equivalent as |
| * returned by control_mbrep(). */ |
| else if (is_cntrl_mbchar(buf)) { |
| char *ctrl_buf_mb = charalloc(MB_CUR_MAX); |
| int ctrl_buf_mb_len; |
| |
| (*col)++; |
| |
| ctrl_buf_mb = control_mbrep(buf, ctrl_buf_mb, |
| &ctrl_buf_mb_len); |
| |
| *col += mbwidth(ctrl_buf_mb); |
| |
| free(ctrl_buf_mb); |
| /* If we have a normal character, get its width in columns |
| * normally. */ |
| } else |
| *col += mbwidth(buf); |
| } |
| } else { |
| #endif |
| /* Get the number of bytes in the byte character. */ |
| buf_mb_len = 1; |
| |
| /* Save the byte character in chr. */ |
| if (chr != NULL) |
| *chr = *buf; |
| |
| if (col != NULL) { |
| /* If we have a tab, get its width in columns using the |
| * current value of col. */ |
| if (*buf == '\t') |
| *col += tabsize - *col % tabsize; |
| /* If we have a control character, it's two columns wide: |
| * one column for the "^" that will be displayed in front of |
| * it, and one column for its visible equivalent as returned |
| * by control_mbrep(). */ |
| else if (is_cntrl_char((unsigned char)*buf)) |
| *col += 2; |
| /* If we have a normal character, it's one column wide. */ |
| else |
| (*col)++; |
| } |
| #ifdef NANO_WIDE |
| } |
| #endif |
| |
| return buf_mb_len; |
| } |
| |
| /* Return the index in buf of the beginning of the multibyte character |
| * before the one at pos. */ |
| size_t move_mbleft(const char *buf, size_t pos) |
| { |
| size_t pos_prev = pos; |
| |
| assert(str != NULL && pos <= strlen(buf)); |
| |
| /* There is no library function to move backward one multibyte |
| * character. Here is the naive, O(pos) way to do it. */ |
| while (TRUE) { |
| int buf_mb_len = parse_mbchar(buf + pos - pos_prev, NULL, NULL, |
| NULL); |
| |
| if (pos_prev <= (size_t)buf_mb_len) |
| break; |
| |
| pos_prev -= buf_mb_len; |
| } |
| |
| return pos - pos_prev; |
| } |
| |
| /* Return the index in buf of the beginning of the multibyte character |
| * after the one at pos. */ |
| size_t move_mbright(const char *buf, size_t pos) |
| { |
| return pos + parse_mbchar(buf + pos, NULL, NULL, NULL); |
| } |
| |
| #ifndef HAVE_STRCASECMP |
| /* This function is equivalent to strcasecmp(). */ |
| int nstrcasecmp(const char *s1, const char *s2) |
| { |
| return |
| #ifdef HAVE_STRNCASECMP |
| strncasecmp(s1, s2, (size_t)-1); |
| #else |
| nstrncasecmp(s1, s2, (size_t)-1); |
| #endif |
| } |
| #endif |
| |
| /* This function is equivalent to strcasecmp() for multibyte strings. */ |
| int mbstrcasecmp(const char *s1, const char *s2) |
| { |
| return mbstrncasecmp(s1, s2, (size_t)-1); |
| } |
| |
| #ifndef HAVE_STRNCASECMP |
| /* This function is equivalent to strncasecmp(). */ |
| int nstrncasecmp(const char *s1, const char *s2, size_t n) |
| { |
| assert(s1 != NULL && s2 != NULL); |
| |
| for (; n > 0 && *s1 != '\0' && *s2 != '\0'; n--, s1++, s2++) { |
| if (tolower(*s1) != tolower(*s2)) |
| break; |
| } |
| |
| if (n > 0) |
| return (tolower(*s1) - tolower(*s2)); |
| else |
| return 0; |
| } |
| #endif |
| |
| /* This function is equivalent to strncasecmp() for multibyte |
| * strings. */ |
| int mbstrncasecmp(const char *s1, const char *s2, size_t n) |
| { |
| #ifdef NANO_WIDE |
| if (!ISSET(NO_UTF8)) { |
| char *s1_mb = charalloc(MB_CUR_MAX); |
| char *s2_mb = charalloc(MB_CUR_MAX); |
| wchar_t ws1, ws2; |
| |
| assert(s1 != NULL && s2 != NULL); |
| |
| while (n > 0 && *s1 != '\0' && *s2 != '\0') { |
| int s1_mb_len, s2_mb_len; |
| |
| s1_mb_len = parse_mbchar(s1, s1_mb, NULL, NULL); |
| |
| if (mbtowc(&ws1, s1_mb, s1_mb_len) <= 0) { |
| mbtowc(NULL, NULL, 0); |
| ws1 = (unsigned char)*s1_mb; |
| } |
| |
| s2_mb_len = parse_mbchar(s2, s2_mb, NULL, NULL); |
| |
| if (mbtowc(&ws2, s2_mb, s2_mb_len) <= 0) { |
| mbtowc(NULL, NULL, 0); |
| ws2 = (unsigned char)*s2_mb; |
| } |
| |
| if (n == 0 || towlower(ws1) != towlower(ws2)) |
| break; |
| |
| s1 += s1_mb_len; |
| s2 += s2_mb_len; |
| n--; |
| } |
| |
| free(s1_mb); |
| free(s2_mb); |
| |
| return (towlower(ws1) - towlower(ws2)); |
| } else |
| #endif |
| return |
| #ifdef HAVE_STRNCASECMP |
| strncasecmp(s1, s2, n); |
| #else |
| nstrncasecmp(s1, s2, n); |
| #endif |
| } |
| |
| #ifndef HAVE_STRCASESTR |
| /* This function is equivalent to strcasestr(). It was adapted from |
| * mutt's mutt_stristr() function. */ |
| const char *nstrcasestr(const char *haystack, const char *needle) |
| { |
| assert(haystack != NULL && needle != NULL); |
| |
| for (; *haystack != '\0'; haystack++) { |
| const char *r = haystack, *q = needle; |
| |
| for (; tolower(*r) == tolower(*q) && *q != '\0'; r++, q++) |
| ; |
| |
| if (*q == '\0') |
| return haystack; |
| } |
| |
| return NULL; |
| } |
| #endif |
| |
| /* This function is equivalent to strcasestr() for multibyte strings. */ |
| const char *mbstrcasestr(const char *haystack, const char *needle) |
| { |
| #ifdef NANO_WIDE |
| if (!ISSET(NO_UTF8)) { |
| char *r_mb = charalloc(MB_CUR_MAX); |
| char *q_mb = charalloc(MB_CUR_MAX); |
| wchar_t wr, wq; |
| bool found_needle = FALSE; |
| |
| assert(haystack != NULL && needle != NULL); |
| |
| while (*haystack != '\0') { |
| const char *r = haystack, *q = needle; |
| int r_mb_len, q_mb_len; |
| |
| while (*q != '\0') { |
| r_mb_len = parse_mbchar(r, r_mb, NULL, NULL); |
| |
| if (mbtowc(&wr, r_mb, r_mb_len) <= 0) { |
| mbtowc(NULL, NULL, 0); |
| wr = (unsigned char)*r; |
| } |
| |
| q_mb_len = parse_mbchar(q, q_mb, NULL, NULL); |
| |
| if (mbtowc(&wq, q_mb, q_mb_len) <= 0) { |
| mbtowc(NULL, NULL, 0); |
| wq = (unsigned char)*q; |
| } |
| |
| if (towlower(wr) != towlower(wq)) |
| break; |
| |
| r += r_mb_len; |
| q += q_mb_len; |
| } |
| |
| if (*q == '\0') { |
| found_needle = TRUE; |
| break; |
| } |
| |
| haystack += move_mbright(haystack, 0); |
| } |
| |
| free(r_mb); |
| free(q_mb); |
| |
| return found_needle ? haystack : NULL; |
| } else |
| #endif |
| return |
| #ifdef HAVE_STRCASESTR |
| strcasestr(haystack, needle); |
| #else |
| nstrcasestr(haystack, needle); |
| #endif |
| } |
| |
| #if !defined(NANO_SMALL) || !defined(DISABLE_TABCOMP) |
| /* This function is equivalent to strstr(), except in that it scans the |
| * string in reverse, starting at rev_start. */ |
| const char *revstrstr(const char *haystack, const char *needle, const |
| char *rev_start) |
| { |
| assert(haystack != NULL && needle != NULL && rev_start != NULL); |
| |
| for (; rev_start >= haystack; rev_start--) { |
| const char *r, *q; |
| |
| for (r = rev_start, q = needle; *r == *q && *q != '\0'; r++, q++) |
| ; |
| |
| if (*q == '\0') |
| return rev_start; |
| } |
| |
| return NULL; |
| } |
| #endif |
| |
| #ifndef NANO_SMALL |
| /* This function is equivalent to strcasestr(), except in that it scans |
| * the string in reverse, starting at rev_start. */ |
| const char *revstrcasestr(const char *haystack, const char *needle, |
| const char *rev_start) |
| { |
| assert(haystack != NULL && needle != NULL && rev_start != NULL); |
| |
| for (; rev_start >= haystack; rev_start--) { |
| const char *r = rev_start, *q = needle; |
| |
| for (; tolower(*r) == tolower(*q) && *q != '\0'; r++, q++) |
| ; |
| |
| if (*q == '\0') |
| return rev_start; |
| } |
| |
| return NULL; |
| } |
| |
| /* This function is equivalent to strcasestr() for multibyte strings, |
| * except in that it scans the string in reverse, starting at |
| * rev_start. */ |
| const char *mbrevstrcasestr(const char *haystack, const char *needle, |
| const char *rev_start) |
| { |
| #ifdef NANO_WIDE |
| if (!ISSET(NO_UTF8)) { |
| char *r_mb = charalloc(MB_CUR_MAX); |
| char *q_mb = charalloc(MB_CUR_MAX); |
| wchar_t wr, wq; |
| bool begin_line = FALSE, found_needle = FALSE; |
| |
| assert(haystack != NULL && needle != NULL && rev_start != NULL); |
| |
| while (!begin_line) { |
| const char *r = rev_start, *q = needle; |
| int r_mb_len, q_mb_len; |
| |
| while (*q != '\0') { |
| r_mb_len = parse_mbchar(r, r_mb, NULL, NULL); |
| |
| if (mbtowc(&wr, r_mb, r_mb_len) <= 0) { |
| mbtowc(NULL, NULL, 0); |
| wr = (unsigned char)*r; |
| } |
| |
| q_mb_len = parse_mbchar(q, q_mb, NULL, NULL); |
| |
| if (mbtowc(&wq, q_mb, q_mb_len) <= 0) { |
| mbtowc(NULL, NULL, 0); |
| wq = (unsigned char)*q; |
| } |
| |
| if (towlower(wr) != towlower(wq)) |
| break; |
| |
| r += r_mb_len; |
| q += q_mb_len; |
| } |
| |
| if (*q == '\0') { |
| found_needle = TRUE; |
| break; |
| } |
| |
| if (rev_start == haystack) |
| begin_line = TRUE; |
| else |
| rev_start = haystack + move_mbleft(haystack, rev_start - |
| haystack); |
| } |
| |
| free(r_mb); |
| free(q_mb); |
| |
| return found_needle ? rev_start : NULL; |
| } else |
| #endif |
| return revstrcasestr(haystack, needle, rev_start); |
| } |
| #endif |
| |
| /* This function is equivalent to strlen() for multibyte strings. */ |
| size_t mbstrlen(const char *s) |
| { |
| return mbstrnlen(s, (size_t)-1); |
| } |
| |
| #ifndef HAVE_STRNLEN |
| /* This function is equivalent to strnlen(). */ |
| size_t nstrnlen(const char *s, size_t maxlen) |
| { |
| size_t n = 0; |
| |
| assert(s != NULL); |
| |
| for (; maxlen > 0 && *s != '\0'; maxlen--, n++, s++) |
| ; |
| |
| return n; |
| } |
| #endif |
| |
| /* This function is equivalent to strnlen() for multibyte strings. */ |
| size_t mbstrnlen(const char *s, size_t maxlen) |
| { |
| assert(s != NULL); |
| |
| #ifdef NANO_WIDE |
| if (!ISSET(NO_UTF8)) { |
| size_t n = 0; |
| char *s_mb = charalloc(MB_CUR_MAX); |
| int s_mb_len; |
| |
| while (*s != '\0') { |
| s_mb_len = parse_mbchar(s, s_mb, NULL, NULL); |
| |
| if (maxlen == 0) |
| break; |
| |
| maxlen--; |
| s += s_mb_len; |
| n++; |
| } |
| |
| free(s_mb); |
| |
| return n; |
| } else |
| #endif |
| return |
| #ifdef HAVE_STRNLEN |
| strnlen(s, maxlen); |
| #else |
| nstrnlen(s, maxlen); |
| #endif |
| } |
| |
| #ifndef DISABLE_JUSTIFY |
| /* This function is equivalent to strchr() for multibyte strings. */ |
| char *mbstrchr(const char *s, char *c) |
| { |
| assert(s != NULL && c != NULL); |
| |
| #ifdef NANO_WIDE |
| if (!ISSET(NO_UTF8)) { |
| char *s_mb = charalloc(MB_CUR_MAX); |
| const char *q = s; |
| wchar_t ws, wc; |
| int s_mb_len, c_mb_len = mbtowc(&wc, c, MB_CUR_MAX); |
| |
| if (c_mb_len <= 0) { |
| mbtowc(NULL, NULL, 0); |
| wc = (unsigned char)*c; |
| } |
| |
| while (*s != '\0') { |
| s_mb_len = parse_mbchar(s, s_mb, NULL, NULL); |
| |
| if (mbtowc(&ws, s_mb, s_mb_len) <= 0) { |
| mbtowc(NULL, NULL, 0); |
| ws = (unsigned char)*s; |
| } |
| |
| if (ws == wc) |
| break; |
| |
| s += s_mb_len; |
| q += s_mb_len; |
| } |
| |
| free(s_mb); |
| |
| if (ws != wc) |
| q = NULL; |
| |
| return (char *)q; |
| } else |
| #endif |
| return strchr(s, *c); |
| } |
| #endif |