David Lawrence Ramsey | b54155c | 2005-01-12 03:25:57 +0000 | [diff] [blame] | 1 | /************************************************************************** |
Benno Schulenberg | 514cd9a | 2016-08-29 17:10:49 +0200 | [diff] [blame] | 2 | * chars.c -- This file is part of GNU nano. * |
David Lawrence Ramsey | b54155c | 2005-01-12 03:25:57 +0000 | [diff] [blame] | 3 | * * |
Benno Schulenberg | 7a9f4a4 | 2014-04-30 20:18:26 +0000 | [diff] [blame] | 4 | * Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, * |
| 5 | * 2010, 2011, 2013, 2014 Free Software Foundation, Inc. * |
Benno Schulenberg | 406e524 | 2016-08-29 15:14:18 +0200 | [diff] [blame] | 6 | * Copyright (C) 2016 Benno Schulenberg * |
| 7 | * * |
Benno Schulenberg | 514cd9a | 2016-08-29 17:10:49 +0200 | [diff] [blame] | 8 | * GNU nano is free software: you can redistribute it and/or modify * |
| 9 | * it under the terms of the GNU General Public License as published * |
| 10 | * by the Free Software Foundation, either version 3 of the License, * |
| 11 | * or (at your option) any later version. * |
David Lawrence Ramsey | b54155c | 2005-01-12 03:25:57 +0000 | [diff] [blame] | 12 | * * |
Benno Schulenberg | 514cd9a | 2016-08-29 17:10:49 +0200 | [diff] [blame] | 13 | * GNU nano is distributed in the hope that it will be useful, * |
| 14 | * but WITHOUT ANY WARRANTY; without even the implied warranty * |
| 15 | * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * |
| 16 | * See the GNU General Public License for more details. * |
David Lawrence Ramsey | b54155c | 2005-01-12 03:25:57 +0000 | [diff] [blame] | 17 | * * |
| 18 | * You should have received a copy of the GNU General Public License * |
Benno Schulenberg | 514cd9a | 2016-08-29 17:10:49 +0200 | [diff] [blame] | 19 | * along with this program. If not, see http://www.gnu.org/licenses/. * |
David Lawrence Ramsey | b54155c | 2005-01-12 03:25:57 +0000 | [diff] [blame] | 20 | * * |
| 21 | **************************************************************************/ |
| 22 | |
David Lawrence Ramsey | 034b994 | 2005-12-08 02:47:10 +0000 | [diff] [blame] | 23 | #include "proto.h" |
David Lawrence Ramsey | b54155c | 2005-01-12 03:25:57 +0000 | [diff] [blame] | 24 | |
David Lawrence Ramsey | 5508cc5 | 2005-01-14 04:22:14 +0000 | [diff] [blame] | 25 | #include <string.h> |
David Lawrence Ramsey | b54155c | 2005-01-12 03:25:57 +0000 | [diff] [blame] | 26 | #include <ctype.h> |
David Lawrence Ramsey | b54155c | 2005-01-12 03:25:57 +0000 | [diff] [blame] | 27 | |
David Lawrence Ramsey | 7eb30a8 | 2005-07-17 02:40:07 +0000 | [diff] [blame] | 28 | #ifdef ENABLE_UTF8 |
David Lawrence Ramsey | 42abfe0 | 2005-01-22 18:24:16 +0000 | [diff] [blame] | 29 | #ifdef HAVE_WCHAR_H |
David Lawrence Ramsey | b54155c | 2005-01-12 03:25:57 +0000 | [diff] [blame] | 30 | #include <wchar.h> |
| 31 | #endif |
David Lawrence Ramsey | 42abfe0 | 2005-01-22 18:24:16 +0000 | [diff] [blame] | 32 | #ifdef HAVE_WCTYPE_H |
David Lawrence Ramsey | b54155c | 2005-01-12 03:25:57 +0000 | [diff] [blame] | 33 | #include <wctype.h> |
| 34 | #endif |
David Lawrence Ramsey | 61f5673 | 2005-07-21 22:12:03 +0000 | [diff] [blame] | 35 | |
David Lawrence Ramsey | 4d72de7 | 2006-04-12 15:27:40 +0000 | [diff] [blame] | 36 | static bool use_utf8 = FALSE; |
| 37 | /* Whether we've enabled UTF-8 support. */ |
David Lawrence Ramsey | 4d72de7 | 2006-04-12 15:27:40 +0000 | [diff] [blame] | 38 | |
| 39 | /* Enable UTF-8 support. */ |
| 40 | void utf8_init(void) |
| 41 | { |
| 42 | use_utf8 = TRUE; |
| 43 | } |
| 44 | |
| 45 | /* Is UTF-8 support enabled? */ |
| 46 | bool using_utf8(void) |
| 47 | { |
| 48 | return use_utf8; |
| 49 | } |
Benno Schulenberg | 70b2d08 | 2014-04-05 20:28:29 +0000 | [diff] [blame] | 50 | #endif /* ENABLE_UTF8 */ |
David Lawrence Ramsey | b54155c | 2005-01-12 03:25:57 +0000 | [diff] [blame] | 51 | |
Benno Schulenberg | 3b21659 | 2016-05-24 10:34:40 +0200 | [diff] [blame] | 52 | /* Concatenate two allocated strings, and free the second. */ |
Benno Schulenberg | 2163d96 | 2016-02-16 10:09:26 +0000 | [diff] [blame] | 53 | char *addstrings(char* str1, size_t len1, char* str2, size_t len2) |
Chris Allegretta | 82a4110 | 2014-05-29 18:30:23 +0000 | [diff] [blame] | 54 | { |
| 55 | str1 = charealloc(str1, len1 + len2 + 1); |
| 56 | str1[len1] = '\0'; |
Benno Schulenberg | 3b21659 | 2016-05-24 10:34:40 +0200 | [diff] [blame] | 57 | |
Chris Allegretta | 82a4110 | 2014-05-29 18:30:23 +0000 | [diff] [blame] | 58 | strncat(&str1[len1], str2, len2); |
| 59 | free(str2); |
| 60 | |
| 61 | return str1; |
| 62 | } |
| 63 | |
David Lawrence Ramsey | d864048 | 2005-06-12 17:48:46 +0000 | [diff] [blame] | 64 | #ifndef HAVE_ISBLANK |
David Lawrence Ramsey | b54155c | 2005-01-12 03:25:57 +0000 | [diff] [blame] | 65 | /* This function is equivalent to isblank(). */ |
David Lawrence Ramsey | 1aee5cc | 2005-06-29 18:17:54 +0000 | [diff] [blame] | 66 | bool nisblank(int c) |
David Lawrence Ramsey | b54155c | 2005-01-12 03:25:57 +0000 | [diff] [blame] | 67 | { |
David Lawrence Ramsey | d864048 | 2005-06-12 17:48:46 +0000 | [diff] [blame] | 68 | return isspace(c) && (c == '\t' || !is_cntrl_char(c)); |
David Lawrence Ramsey | b54155c | 2005-01-12 03:25:57 +0000 | [diff] [blame] | 69 | } |
David Lawrence Ramsey | d864048 | 2005-06-12 17:48:46 +0000 | [diff] [blame] | 70 | #endif |
| 71 | |
David Lawrence Ramsey | 7eb30a8 | 2005-07-17 02:40:07 +0000 | [diff] [blame] | 72 | #if !defined(HAVE_ISWBLANK) && defined(ENABLE_UTF8) |
David Lawrence Ramsey | d864048 | 2005-06-12 17:48:46 +0000 | [diff] [blame] | 73 | /* This function is equivalent to iswblank(). */ |
David Lawrence Ramsey | 1aee5cc | 2005-06-29 18:17:54 +0000 | [diff] [blame] | 74 | bool niswblank(wchar_t wc) |
David Lawrence Ramsey | d864048 | 2005-06-12 17:48:46 +0000 | [diff] [blame] | 75 | { |
| 76 | return iswspace(wc) && (wc == '\t' || !is_cntrl_wchar(wc)); |
| 77 | } |
| 78 | #endif |
David Lawrence Ramsey | b54155c | 2005-01-12 03:25:57 +0000 | [diff] [blame] | 79 | |
Benno Schulenberg | 3b21659 | 2016-05-24 10:34:40 +0200 | [diff] [blame] | 80 | /* Return TRUE if the value of c is in byte range, and FALSE otherwise. */ |
David Lawrence Ramsey | 356d248 | 2005-06-13 19:51:56 +0000 | [diff] [blame] | 81 | bool is_byte(int c) |
| 82 | { |
| 83 | return ((unsigned int)c == (unsigned char)c); |
| 84 | } |
| 85 | |
Benno Schulenberg | 9205c28 | 2015-09-04 19:34:55 +0000 | [diff] [blame] | 86 | void mbtowc_reset(void) |
| 87 | { |
| 88 | IGNORE_CALL_RESULT(mbtowc(NULL, NULL, 0)); |
| 89 | } |
| 90 | |
Benno Schulenberg | 20058a1 | 2016-08-02 22:09:22 +0200 | [diff] [blame] | 91 | /* This function is equivalent to isalpha() for multibyte characters. */ |
| 92 | bool is_alpha_mbchar(const char *c) |
| 93 | { |
| 94 | assert(c != NULL); |
| 95 | |
| 96 | #ifdef ENABLE_UTF8 |
| 97 | if (use_utf8) { |
| 98 | wchar_t wc; |
| 99 | |
| 100 | if (mbtowc(&wc, c, MB_CUR_MAX) < 0) { |
| 101 | mbtowc_reset(); |
| 102 | return 0; |
| 103 | } |
| 104 | |
| 105 | return iswalpha(wc); |
| 106 | } else |
| 107 | #endif |
| 108 | return isalpha((unsigned char)*c); |
| 109 | } |
| 110 | |
David Lawrence Ramsey | 2515ccc | 2005-06-15 06:04:08 +0000 | [diff] [blame] | 111 | /* This function is equivalent to isalnum() for multibyte characters. */ |
| 112 | bool is_alnum_mbchar(const char *c) |
| 113 | { |
| 114 | assert(c != NULL); |
| 115 | |
David Lawrence Ramsey | 7eb30a8 | 2005-07-17 02:40:07 +0000 | [diff] [blame] | 116 | #ifdef ENABLE_UTF8 |
David Lawrence Ramsey | 4d72de7 | 2006-04-12 15:27:40 +0000 | [diff] [blame] | 117 | if (use_utf8) { |
David Lawrence Ramsey | 2515ccc | 2005-06-15 06:04:08 +0000 | [diff] [blame] | 118 | wchar_t wc; |
David Lawrence Ramsey | 2515ccc | 2005-06-15 06:04:08 +0000 | [diff] [blame] | 119 | |
Benno Schulenberg | 9205c28 | 2015-09-04 19:34:55 +0000 | [diff] [blame] | 120 | if (mbtowc(&wc, c, MB_CUR_MAX) < 0) { |
| 121 | mbtowc_reset(); |
Benno Schulenberg | b6efea2 | 2016-06-05 21:49:29 +0200 | [diff] [blame] | 122 | return 0; |
Benno Schulenberg | 9205c28 | 2015-09-04 19:34:55 +0000 | [diff] [blame] | 123 | } |
David Lawrence Ramsey | 2515ccc | 2005-06-15 06:04:08 +0000 | [diff] [blame] | 124 | |
| 125 | return iswalnum(wc); |
| 126 | } else |
| 127 | #endif |
| 128 | return isalnum((unsigned char)*c); |
| 129 | } |
| 130 | |
David Lawrence Ramsey | b54155c | 2005-01-12 03:25:57 +0000 | [diff] [blame] | 131 | /* This function is equivalent to isblank() for multibyte characters. */ |
| 132 | bool is_blank_mbchar(const char *c) |
| 133 | { |
| 134 | assert(c != NULL); |
| 135 | |
David Lawrence Ramsey | 7eb30a8 | 2005-07-17 02:40:07 +0000 | [diff] [blame] | 136 | #ifdef ENABLE_UTF8 |
David Lawrence Ramsey | 4d72de7 | 2006-04-12 15:27:40 +0000 | [diff] [blame] | 137 | if (use_utf8) { |
David Lawrence Ramsey | b54155c | 2005-01-12 03:25:57 +0000 | [diff] [blame] | 138 | wchar_t wc; |
David Lawrence Ramsey | b54155c | 2005-01-12 03:25:57 +0000 | [diff] [blame] | 139 | |
Benno Schulenberg | 9205c28 | 2015-09-04 19:34:55 +0000 | [diff] [blame] | 140 | if (mbtowc(&wc, c, MB_CUR_MAX) < 0) { |
| 141 | mbtowc_reset(); |
Benno Schulenberg | b6efea2 | 2016-06-05 21:49:29 +0200 | [diff] [blame] | 142 | return 0; |
Benno Schulenberg | 9205c28 | 2015-09-04 19:34:55 +0000 | [diff] [blame] | 143 | } |
David Lawrence Ramsey | b54155c | 2005-01-12 03:25:57 +0000 | [diff] [blame] | 144 | |
David Lawrence Ramsey | d864048 | 2005-06-12 17:48:46 +0000 | [diff] [blame] | 145 | return iswblank(wc); |
David Lawrence Ramsey | b54155c | 2005-01-12 03:25:57 +0000 | [diff] [blame] | 146 | } else |
| 147 | #endif |
David Lawrence Ramsey | d864048 | 2005-06-12 17:48:46 +0000 | [diff] [blame] | 148 | return isblank((unsigned char)*c); |
David Lawrence Ramsey | b54155c | 2005-01-12 03:25:57 +0000 | [diff] [blame] | 149 | } |
| 150 | |
David Lawrence Ramsey | 8e341e1 | 2006-05-24 17:36:00 +0000 | [diff] [blame] | 151 | /* This function is equivalent to iscntrl(), except in that it only |
| 152 | * handles non-high-bit control characters. */ |
| 153 | bool is_ascii_cntrl_char(int c) |
| 154 | { |
| 155 | return (0 <= c && c < 32); |
| 156 | } |
| 157 | |
David Lawrence Ramsey | b54155c | 2005-01-12 03:25:57 +0000 | [diff] [blame] | 158 | /* This function is equivalent to iscntrl(), except in that it also |
David Lawrence Ramsey | 85ea1de | 2005-08-13 20:05:06 +0000 | [diff] [blame] | 159 | * handles high-bit control characters. */ |
David Lawrence Ramsey | 65e6ecb | 2005-02-08 20:37:53 +0000 | [diff] [blame] | 160 | bool is_cntrl_char(int c) |
David Lawrence Ramsey | b54155c | 2005-01-12 03:25:57 +0000 | [diff] [blame] | 161 | { |
Benno Schulenberg | af53c56 | 2016-06-29 20:48:04 +0200 | [diff] [blame] | 162 | return ((c & 0x60) == 0 || c == 127); |
David Lawrence Ramsey | b54155c | 2005-01-12 03:25:57 +0000 | [diff] [blame] | 163 | } |
| 164 | |
| 165 | /* This function is equivalent to iscntrl() for multibyte characters, |
| 166 | * except in that it also handles multibyte control characters with |
| 167 | * their high bits set. */ |
| 168 | bool is_cntrl_mbchar(const char *c) |
| 169 | { |
David Lawrence Ramsey | 7eb30a8 | 2005-07-17 02:40:07 +0000 | [diff] [blame] | 170 | #ifdef ENABLE_UTF8 |
David Lawrence Ramsey | 4d72de7 | 2006-04-12 15:27:40 +0000 | [diff] [blame] | 171 | if (use_utf8) { |
Benno Schulenberg | af53c56 | 2016-06-29 20:48:04 +0200 | [diff] [blame] | 172 | return ((c[0] & 0xE0) == 0 || c[0] == 127 || |
| 173 | ((signed char)c[0] == -62 && (signed char)c[1] < -96)); |
David Lawrence Ramsey | b54155c | 2005-01-12 03:25:57 +0000 | [diff] [blame] | 174 | } else |
| 175 | #endif |
| 176 | return is_cntrl_char((unsigned char)*c); |
| 177 | } |
| 178 | |
David Lawrence Ramsey | 2515ccc | 2005-06-15 06:04:08 +0000 | [diff] [blame] | 179 | /* This function is equivalent to ispunct() for multibyte characters. */ |
| 180 | bool is_punct_mbchar(const char *c) |
David Lawrence Ramsey | 6728708 | 2005-06-13 02:40:04 +0000 | [diff] [blame] | 181 | { |
| 182 | assert(c != NULL); |
| 183 | |
David Lawrence Ramsey | 7eb30a8 | 2005-07-17 02:40:07 +0000 | [diff] [blame] | 184 | #ifdef ENABLE_UTF8 |
David Lawrence Ramsey | 4d72de7 | 2006-04-12 15:27:40 +0000 | [diff] [blame] | 185 | if (use_utf8) { |
David Lawrence Ramsey | 6728708 | 2005-06-13 02:40:04 +0000 | [diff] [blame] | 186 | wchar_t wc; |
David Lawrence Ramsey | 6728708 | 2005-06-13 02:40:04 +0000 | [diff] [blame] | 187 | |
Benno Schulenberg | 9205c28 | 2015-09-04 19:34:55 +0000 | [diff] [blame] | 188 | if (mbtowc(&wc, c, MB_CUR_MAX) < 0) { |
| 189 | mbtowc_reset(); |
Benno Schulenberg | b6efea2 | 2016-06-05 21:49:29 +0200 | [diff] [blame] | 190 | return 0; |
Benno Schulenberg | 9205c28 | 2015-09-04 19:34:55 +0000 | [diff] [blame] | 191 | } |
David Lawrence Ramsey | 6728708 | 2005-06-13 02:40:04 +0000 | [diff] [blame] | 192 | |
David Lawrence Ramsey | 2515ccc | 2005-06-15 06:04:08 +0000 | [diff] [blame] | 193 | return iswpunct(wc); |
David Lawrence Ramsey | 6728708 | 2005-06-13 02:40:04 +0000 | [diff] [blame] | 194 | } else |
| 195 | #endif |
David Lawrence Ramsey | 2515ccc | 2005-06-15 06:04:08 +0000 | [diff] [blame] | 196 | return ispunct((unsigned char)*c); |
| 197 | } |
| 198 | |
Benno Schulenberg | 6f12992 | 2016-06-30 18:02:45 +0200 | [diff] [blame] | 199 | /* Return TRUE when the given multibyte character c is a word-forming |
| 200 | * character (that is: alphanumeric, or specified in wordchars, or |
| 201 | * punctuation when allow_punct is TRUE), and FALSE otherwise. */ |
David Lawrence Ramsey | 2515ccc | 2005-06-15 06:04:08 +0000 | [diff] [blame] | 202 | bool is_word_mbchar(const char *c, bool allow_punct) |
| 203 | { |
Benno Schulenberg | bf091be | 2016-07-21 09:46:47 +0200 | [diff] [blame] | 204 | if (*c == '\0') |
| 205 | return FALSE; |
| 206 | |
Benno Schulenberg | 6f12992 | 2016-06-30 18:02:45 +0200 | [diff] [blame] | 207 | if (is_alnum_mbchar(c)) |
| 208 | return TRUE; |
| 209 | |
| 210 | if (word_chars != NULL && *word_chars != '\0') { |
Benno Schulenberg | 116d9e6 | 2016-12-15 21:30:30 +0100 | [diff] [blame] | 211 | char symbol[mb_cur_max() + 1]; |
Benno Schulenberg | 6f12992 | 2016-06-30 18:02:45 +0200 | [diff] [blame] | 212 | int symlen = parse_mbchar(c, symbol, NULL); |
| 213 | |
| 214 | symbol[symlen] = '\0'; |
Benno Schulenberg | 116d9e6 | 2016-12-15 21:30:30 +0100 | [diff] [blame] | 215 | return (strstr(word_chars, symbol) != NULL); |
Benno Schulenberg | 6f12992 | 2016-06-30 18:02:45 +0200 | [diff] [blame] | 216 | } |
| 217 | |
| 218 | return (allow_punct && is_punct_mbchar(c)); |
David Lawrence Ramsey | 6728708 | 2005-06-13 02:40:04 +0000 | [diff] [blame] | 219 | } |
| 220 | |
Benno Schulenberg | 019d7b3 | 2016-06-29 20:40:22 +0200 | [diff] [blame] | 221 | /* Return the visible representation of control character c. */ |
Benno Schulenberg | 03586c6 | 2016-05-30 11:28:16 +0200 | [diff] [blame] | 222 | char control_rep(const signed char c) |
David Lawrence Ramsey | b54155c | 2005-01-12 03:25:57 +0000 | [diff] [blame] | 223 | { |
Benno Schulenberg | eafae5d | 2016-12-18 09:40:09 +0100 | [diff] [blame] | 224 | if (c == DEL_CODE) |
David Lawrence Ramsey | b54155c | 2005-01-12 03:25:57 +0000 | [diff] [blame] | 225 | return '?'; |
Benno Schulenberg | 03586c6 | 2016-05-30 11:28:16 +0200 | [diff] [blame] | 226 | else if (c == -97) |
| 227 | return '='; |
| 228 | else if (c < 0) |
| 229 | return c + 224; |
David Lawrence Ramsey | b54155c | 2005-01-12 03:25:57 +0000 | [diff] [blame] | 230 | else |
| 231 | return c + 64; |
| 232 | } |
| 233 | |
Benno Schulenberg | 622995f | 2016-06-29 20:37:28 +0200 | [diff] [blame] | 234 | /* Return the visible representation of multibyte control character c. */ |
Benno Schulenberg | eafae5d | 2016-12-18 09:40:09 +0100 | [diff] [blame] | 235 | char control_mbrep(const char *c, bool isdata) |
David Lawrence Ramsey | b54155c | 2005-01-12 03:25:57 +0000 | [diff] [blame] | 236 | { |
Benno Schulenberg | eef7d10 | 2016-12-20 19:27:41 +0100 | [diff] [blame] | 237 | /* An embedded newline is an encoded NUL if it is data. */ |
| 238 | if (*c == '\n' && (isdata || as_an_at)) |
Benno Schulenberg | eafae5d | 2016-12-18 09:40:09 +0100 | [diff] [blame] | 239 | return '@'; |
| 240 | |
David Lawrence Ramsey | 7eb30a8 | 2005-07-17 02:40:07 +0000 | [diff] [blame] | 241 | #ifdef ENABLE_UTF8 |
David Lawrence Ramsey | 4d72de7 | 2006-04-12 15:27:40 +0000 | [diff] [blame] | 242 | if (use_utf8) { |
Benno Schulenberg | 86a64b1 | 2016-07-01 11:47:15 +0200 | [diff] [blame] | 243 | if ((unsigned char)c[0] < 128) |
Benno Schulenberg | 622995f | 2016-06-29 20:37:28 +0200 | [diff] [blame] | 244 | return control_rep(c[0]); |
Benno Schulenberg | 03586c6 | 2016-05-30 11:28:16 +0200 | [diff] [blame] | 245 | else |
Benno Schulenberg | 622995f | 2016-06-29 20:37:28 +0200 | [diff] [blame] | 246 | return control_rep(c[1]); |
Benno Schulenberg | 3b21659 | 2016-05-24 10:34:40 +0200 | [diff] [blame] | 247 | } else |
David Lawrence Ramsey | b54155c | 2005-01-12 03:25:57 +0000 | [diff] [blame] | 248 | #endif |
Benno Schulenberg | 622995f | 2016-06-29 20:37:28 +0200 | [diff] [blame] | 249 | return control_rep(*c); |
David Lawrence Ramsey | b54155c | 2005-01-12 03:25:57 +0000 | [diff] [blame] | 250 | } |
| 251 | |
Benno Schulenberg | 0894587 | 2016-06-06 12:48:26 +0200 | [diff] [blame] | 252 | /* Assess how many bytes the given (multibyte) character occupies. Return -1 |
| 253 | * if the byte sequence is invalid, and return the number of bytes minus 8 |
Benno Schulenberg | e33a0b6 | 2016-06-06 13:20:04 +0200 | [diff] [blame] | 254 | * when it encodes an invalid codepoint. Also, in the second parameter, |
| 255 | * return the number of columns that the character occupies. */ |
| 256 | int length_of_char(const char *c, int *width) |
David Lawrence Ramsey | 96452cb | 2005-07-26 06:13:45 +0000 | [diff] [blame] | 257 | { |
Benno Schulenberg | 0894587 | 2016-06-06 12:48:26 +0200 | [diff] [blame] | 258 | assert(c != NULL); |
David Lawrence Ramsey | 96452cb | 2005-07-26 06:13:45 +0000 | [diff] [blame] | 259 | |
| 260 | #ifdef ENABLE_UTF8 |
David Lawrence Ramsey | 4d72de7 | 2006-04-12 15:27:40 +0000 | [diff] [blame] | 261 | if (use_utf8) { |
David Lawrence Ramsey | 96452cb | 2005-07-26 06:13:45 +0000 | [diff] [blame] | 262 | wchar_t wc; |
Benno Schulenberg | 0894587 | 2016-06-06 12:48:26 +0200 | [diff] [blame] | 263 | int charlen = mbtowc(&wc, c, MB_CUR_MAX); |
David Lawrence Ramsey | 96452cb | 2005-07-26 06:13:45 +0000 | [diff] [blame] | 264 | |
Benno Schulenberg | 0894587 | 2016-06-06 12:48:26 +0200 | [diff] [blame] | 265 | /* If the sequence is invalid... */ |
| 266 | if (charlen < 0) { |
Benno Schulenberg | 9205c28 | 2015-09-04 19:34:55 +0000 | [diff] [blame] | 267 | mbtowc_reset(); |
Benno Schulenberg | 0894587 | 2016-06-06 12:48:26 +0200 | [diff] [blame] | 268 | return -1; |
David Lawrence Ramsey | 96452cb | 2005-07-26 06:13:45 +0000 | [diff] [blame] | 269 | } |
Benno Schulenberg | 0894587 | 2016-06-06 12:48:26 +0200 | [diff] [blame] | 270 | |
| 271 | /* If the codepoint is invalid... */ |
| 272 | if (!is_valid_unicode(wc)) |
| 273 | return charlen - 8; |
Benno Schulenberg | e33a0b6 | 2016-06-06 13:20:04 +0200 | [diff] [blame] | 274 | else { |
| 275 | *width = wcwidth(wc); |
| 276 | /* If the codepoint is unassigned, assume a width of one. */ |
| 277 | if (*width < 0) |
| 278 | *width = 1; |
Benno Schulenberg | 0894587 | 2016-06-06 12:48:26 +0200 | [diff] [blame] | 279 | return charlen; |
Benno Schulenberg | e33a0b6 | 2016-06-06 13:20:04 +0200 | [diff] [blame] | 280 | } |
Benno Schulenberg | 3b21659 | 2016-05-24 10:34:40 +0200 | [diff] [blame] | 281 | } else |
David Lawrence Ramsey | 96452cb | 2005-07-26 06:13:45 +0000 | [diff] [blame] | 282 | #endif |
Benno Schulenberg | 0894587 | 2016-06-06 12:48:26 +0200 | [diff] [blame] | 283 | return 1; |
David Lawrence Ramsey | 96452cb | 2005-07-26 06:13:45 +0000 | [diff] [blame] | 284 | } |
| 285 | |
David Lawrence Ramsey | b54155c | 2005-01-12 03:25:57 +0000 | [diff] [blame] | 286 | /* This function is equivalent to wcwidth() for multibyte characters. */ |
| 287 | int mbwidth(const char *c) |
| 288 | { |
| 289 | assert(c != NULL); |
| 290 | |
David Lawrence Ramsey | 7eb30a8 | 2005-07-17 02:40:07 +0000 | [diff] [blame] | 291 | #ifdef ENABLE_UTF8 |
David Lawrence Ramsey | 4d72de7 | 2006-04-12 15:27:40 +0000 | [diff] [blame] | 292 | if (use_utf8) { |
David Lawrence Ramsey | b54155c | 2005-01-12 03:25:57 +0000 | [diff] [blame] | 293 | wchar_t wc; |
David Lawrence Ramsey | 61f5673 | 2005-07-21 22:12:03 +0000 | [diff] [blame] | 294 | int width; |
David Lawrence Ramsey | b54155c | 2005-01-12 03:25:57 +0000 | [diff] [blame] | 295 | |
Benno Schulenberg | 9205c28 | 2015-09-04 19:34:55 +0000 | [diff] [blame] | 296 | if (mbtowc(&wc, c, MB_CUR_MAX) < 0) { |
| 297 | mbtowc_reset(); |
Benno Schulenberg | 8686cb3 | 2016-06-05 21:42:27 +0200 | [diff] [blame] | 298 | return 1; |
Benno Schulenberg | 9205c28 | 2015-09-04 19:34:55 +0000 | [diff] [blame] | 299 | } |
David Lawrence Ramsey | b54155c | 2005-01-12 03:25:57 +0000 | [diff] [blame] | 300 | |
| 301 | width = wcwidth(wc); |
David Lawrence Ramsey | 6209e0e | 2005-06-14 02:08:25 +0000 | [diff] [blame] | 302 | |
Benno Schulenberg | 8686cb3 | 2016-06-05 21:42:27 +0200 | [diff] [blame] | 303 | if (width == -1) |
| 304 | return 1; |
David Lawrence Ramsey | b54155c | 2005-01-12 03:25:57 +0000 | [diff] [blame] | 305 | |
| 306 | return width; |
| 307 | } else |
| 308 | #endif |
| 309 | return 1; |
| 310 | } |
| 311 | |
Benno Schulenberg | c5f4916 | 2016-12-15 19:46:16 +0100 | [diff] [blame] | 312 | /* Return the maximum length (in bytes) of a character. */ |
David Lawrence Ramsey | b54155c | 2005-01-12 03:25:57 +0000 | [diff] [blame] | 313 | int mb_cur_max(void) |
| 314 | { |
David Lawrence Ramsey | 7eb30a8 | 2005-07-17 02:40:07 +0000 | [diff] [blame] | 315 | #ifdef ENABLE_UTF8 |
Benno Schulenberg | c5f4916 | 2016-12-15 19:46:16 +0100 | [diff] [blame] | 316 | if (use_utf8) |
| 317 | return MB_CUR_MAX; |
| 318 | else |
David Lawrence Ramsey | b54155c | 2005-01-12 03:25:57 +0000 | [diff] [blame] | 319 | #endif |
Benno Schulenberg | c5f4916 | 2016-12-15 19:46:16 +0100 | [diff] [blame] | 320 | return 1; |
David Lawrence Ramsey | b54155c | 2005-01-12 03:25:57 +0000 | [diff] [blame] | 321 | } |
| 322 | |
Benno Schulenberg | c5f4916 | 2016-12-15 19:46:16 +0100 | [diff] [blame] | 323 | /* Convert the Unicode value in chr to a multibyte character, if possible. |
| 324 | * If the conversion succeeds, return the (dynamically allocated) multibyte |
| 325 | * character and its length. Otherwise, return an undefined (dynamically |
| 326 | * allocated) multibyte character and a length of zero. */ |
David Lawrence Ramsey | 8b006c2 | 2005-08-08 23:03:25 +0000 | [diff] [blame] | 327 | char *make_mbchar(long chr, int *chr_mb_len) |
David Lawrence Ramsey | b54155c | 2005-01-12 03:25:57 +0000 | [diff] [blame] | 328 | { |
David Lawrence Ramsey | f0195a8 | 2005-03-14 18:47:21 +0000 | [diff] [blame] | 329 | char *chr_mb; |
| 330 | |
David Lawrence Ramsey | 7eb30a8 | 2005-07-17 02:40:07 +0000 | [diff] [blame] | 331 | #ifdef ENABLE_UTF8 |
David Lawrence Ramsey | 4d72de7 | 2006-04-12 15:27:40 +0000 | [diff] [blame] | 332 | if (use_utf8) { |
David Lawrence Ramsey | f0195a8 | 2005-03-14 18:47:21 +0000 | [diff] [blame] | 333 | chr_mb = charalloc(MB_CUR_MAX); |
David Lawrence Ramsey | 6ff695c | 2005-08-05 03:14:29 +0000 | [diff] [blame] | 334 | *chr_mb_len = wctomb(chr_mb, (wchar_t)chr); |
David Lawrence Ramsey | b54155c | 2005-01-12 03:25:57 +0000 | [diff] [blame] | 335 | |
David Lawrence Ramsey | 6ff695c | 2005-08-05 03:14:29 +0000 | [diff] [blame] | 336 | /* Reject invalid Unicode characters. */ |
Benno Schulenberg | 9205c28 | 2015-09-04 19:34:55 +0000 | [diff] [blame] | 337 | if (*chr_mb_len < 0 || !is_valid_unicode((wchar_t)chr)) { |
Benno Schulenberg | 9765c2f | 2016-12-15 19:28:43 +0100 | [diff] [blame] | 338 | IGNORE_CALL_RESULT(wctomb(NULL, 0)); |
David Lawrence Ramsey | 4c6956b | 2005-01-12 04:32:43 +0000 | [diff] [blame] | 339 | *chr_mb_len = 0; |
Benno Schulenberg | 9205c28 | 2015-09-04 19:34:55 +0000 | [diff] [blame] | 340 | } |
Benno Schulenberg | 3b21659 | 2016-05-24 10:34:40 +0200 | [diff] [blame] | 341 | } else |
David Lawrence Ramsey | b54155c | 2005-01-12 03:25:57 +0000 | [diff] [blame] | 342 | #endif |
Benno Schulenberg | 3b21659 | 2016-05-24 10:34:40 +0200 | [diff] [blame] | 343 | { |
David Lawrence Ramsey | b54155c | 2005-01-12 03:25:57 +0000 | [diff] [blame] | 344 | *chr_mb_len = 1; |
David Lawrence Ramsey | 6a0d5b8 | 2005-06-13 14:00:22 +0000 | [diff] [blame] | 345 | chr_mb = mallocstrncpy(NULL, (char *)&chr, 1); |
David Lawrence Ramsey | b54155c | 2005-01-12 03:25:57 +0000 | [diff] [blame] | 346 | } |
David Lawrence Ramsey | b54155c | 2005-01-12 03:25:57 +0000 | [diff] [blame] | 347 | |
| 348 | return chr_mb; |
| 349 | } |
| 350 | |
| 351 | /* Parse a multibyte character from buf. Return the number of bytes |
| 352 | * used. If chr isn't NULL, store the multibyte character in it. If |
Benno Schulenberg | c5f4916 | 2016-12-15 19:46:16 +0100 | [diff] [blame] | 353 | * col isn't NULL, add the character's width (in columns) to it. */ |
David Lawrence Ramsey | 96452cb | 2005-07-26 06:13:45 +0000 | [diff] [blame] | 354 | int parse_mbchar(const char *buf, char *chr, size_t *col) |
David Lawrence Ramsey | b54155c | 2005-01-12 03:25:57 +0000 | [diff] [blame] | 355 | { |
Benno Schulenberg | fc101a6 | 2016-12-15 15:50:07 +0100 | [diff] [blame] | 356 | int length; |
David Lawrence Ramsey | b54155c | 2005-01-12 03:25:57 +0000 | [diff] [blame] | 357 | |
| 358 | assert(buf != NULL); |
| 359 | |
David Lawrence Ramsey | 7eb30a8 | 2005-07-17 02:40:07 +0000 | [diff] [blame] | 360 | #ifdef ENABLE_UTF8 |
David Lawrence Ramsey | 4d72de7 | 2006-04-12 15:27:40 +0000 | [diff] [blame] | 361 | if (use_utf8) { |
David Lawrence Ramsey | b54155c | 2005-01-12 03:25:57 +0000 | [diff] [blame] | 362 | /* Get the number of bytes in the multibyte character. */ |
Benno Schulenberg | fc101a6 | 2016-12-15 15:50:07 +0100 | [diff] [blame] | 363 | length = mblen(buf, MB_CUR_MAX); |
David Lawrence Ramsey | b54155c | 2005-01-12 03:25:57 +0000 | [diff] [blame] | 364 | |
Benno Schulenberg | b42887f | 2016-05-29 21:45:52 +0200 | [diff] [blame] | 365 | /* When the multibyte sequence is invalid, only take the first byte. */ |
Benno Schulenberg | 85ebe97 | 2016-12-15 16:45:26 +0100 | [diff] [blame] | 366 | if (length <= 0) { |
Chris Allegretta | a97cb81 | 2009-12-02 03:24:18 +0000 | [diff] [blame] | 367 | IGNORE_CALL_RESULT(mblen(NULL, 0)); |
Benno Schulenberg | fc101a6 | 2016-12-15 15:50:07 +0100 | [diff] [blame] | 368 | length = 1; |
Benno Schulenberg | 85ebe97 | 2016-12-15 16:45:26 +0100 | [diff] [blame] | 369 | } |
David Lawrence Ramsey | b54155c | 2005-01-12 03:25:57 +0000 | [diff] [blame] | 370 | |
Benno Schulenberg | b42887f | 2016-05-29 21:45:52 +0200 | [diff] [blame] | 371 | /* When requested, store the multibyte character in chr. */ |
David Lawrence Ramsey | b54155c | 2005-01-12 03:25:57 +0000 | [diff] [blame] | 372 | if (chr != NULL) { |
| 373 | int i; |
David Lawrence Ramsey | e0fb4d5 | 2005-03-11 04:03:32 +0000 | [diff] [blame] | 374 | |
Benno Schulenberg | fc101a6 | 2016-12-15 15:50:07 +0100 | [diff] [blame] | 375 | for (i = 0; i < length; i++) |
David Lawrence Ramsey | b54155c | 2005-01-12 03:25:57 +0000 | [diff] [blame] | 376 | chr[i] = buf[i]; |
| 377 | } |
| 378 | |
Benno Schulenberg | c5f4916 | 2016-12-15 19:46:16 +0100 | [diff] [blame] | 379 | /* When requested, add the width of the character to col. */ |
David Lawrence Ramsey | b54155c | 2005-01-12 03:25:57 +0000 | [diff] [blame] | 380 | if (col != NULL) { |
Benno Schulenberg | c5f4916 | 2016-12-15 19:46:16 +0100 | [diff] [blame] | 381 | /* If we have a tab, compute its width in columns based on the |
David Lawrence Ramsey | b54155c | 2005-01-12 03:25:57 +0000 | [diff] [blame] | 382 | * current value of col. */ |
| 383 | if (*buf == '\t') |
| 384 | *col += tabsize - *col % tabsize; |
Benno Schulenberg | 4172268 | 2016-05-28 15:56:16 +0200 | [diff] [blame] | 385 | /* If we have a control character, it's two columns wide: one |
| 386 | * column for the "^", and one for the visible character. */ |
David Lawrence Ramsey | b54155c | 2005-01-12 03:25:57 +0000 | [diff] [blame] | 387 | else if (is_cntrl_mbchar(buf)) { |
Benno Schulenberg | 4172268 | 2016-05-28 15:56:16 +0200 | [diff] [blame] | 388 | *col += 2; |
Benno Schulenberg | c5f4916 | 2016-12-15 19:46:16 +0100 | [diff] [blame] | 389 | /* If we have a normal character, get its width normally. */ |
David Lawrence Ramsey | b54155c | 2005-01-12 03:25:57 +0000 | [diff] [blame] | 390 | } else |
| 391 | *col += mbwidth(buf); |
| 392 | } |
Benno Schulenberg | 3b21659 | 2016-05-24 10:34:40 +0200 | [diff] [blame] | 393 | } else |
David Lawrence Ramsey | b54155c | 2005-01-12 03:25:57 +0000 | [diff] [blame] | 394 | #endif |
Benno Schulenberg | 3b21659 | 2016-05-24 10:34:40 +0200 | [diff] [blame] | 395 | { |
Benno Schulenberg | b42887f | 2016-05-29 21:45:52 +0200 | [diff] [blame] | 396 | /* A byte character is one byte long. */ |
Benno Schulenberg | fc101a6 | 2016-12-15 15:50:07 +0100 | [diff] [blame] | 397 | length = 1; |
David Lawrence Ramsey | b54155c | 2005-01-12 03:25:57 +0000 | [diff] [blame] | 398 | |
Benno Schulenberg | b42887f | 2016-05-29 21:45:52 +0200 | [diff] [blame] | 399 | /* When requested, store the byte character in chr. */ |
David Lawrence Ramsey | b54155c | 2005-01-12 03:25:57 +0000 | [diff] [blame] | 400 | if (chr != NULL) |
| 401 | *chr = *buf; |
| 402 | |
Benno Schulenberg | c5f4916 | 2016-12-15 19:46:16 +0100 | [diff] [blame] | 403 | /* When requested, add the width of the character to col. */ |
David Lawrence Ramsey | b54155c | 2005-01-12 03:25:57 +0000 | [diff] [blame] | 404 | if (col != NULL) { |
Benno Schulenberg | c5f4916 | 2016-12-15 19:46:16 +0100 | [diff] [blame] | 405 | /* If we have a tab, compute its width in columns using the |
David Lawrence Ramsey | b54155c | 2005-01-12 03:25:57 +0000 | [diff] [blame] | 406 | * current value of col. */ |
| 407 | if (*buf == '\t') |
| 408 | *col += tabsize - *col % tabsize; |
Benno Schulenberg | a9f79a6 | 2016-05-28 15:40:39 +0200 | [diff] [blame] | 409 | /* If we have a control character, it's two columns wide: one |
| 410 | * column for the "^", and one for the visible character. */ |
David Lawrence Ramsey | b54155c | 2005-01-12 03:25:57 +0000 | [diff] [blame] | 411 | else if (is_cntrl_char((unsigned char)*buf)) |
| 412 | *col += 2; |
| 413 | /* If we have a normal character, it's one column wide. */ |
| 414 | else |
| 415 | (*col)++; |
| 416 | } |
David Lawrence Ramsey | b54155c | 2005-01-12 03:25:57 +0000 | [diff] [blame] | 417 | } |
David Lawrence Ramsey | b54155c | 2005-01-12 03:25:57 +0000 | [diff] [blame] | 418 | |
Benno Schulenberg | fc101a6 | 2016-12-15 15:50:07 +0100 | [diff] [blame] | 419 | return length; |
David Lawrence Ramsey | b54155c | 2005-01-12 03:25:57 +0000 | [diff] [blame] | 420 | } |
David Lawrence Ramsey | d24fbb7 | 2005-01-14 21:50:32 +0000 | [diff] [blame] | 421 | |
| 422 | /* Return the index in buf of the beginning of the multibyte character |
| 423 | * before the one at pos. */ |
| 424 | size_t move_mbleft(const char *buf, size_t pos) |
| 425 | { |
Benno Schulenberg | 76e7aaf | 2015-03-22 11:20:02 +0000 | [diff] [blame] | 426 | size_t before, char_len = 0; |
David Lawrence Ramsey | d24fbb7 | 2005-01-14 21:50:32 +0000 | [diff] [blame] | 427 | |
David Lawrence Ramsey | 263b447 | 2005-03-23 05:56:11 +0000 | [diff] [blame] | 428 | assert(buf != NULL && pos <= strlen(buf)); |
David Lawrence Ramsey | d24fbb7 | 2005-01-14 21:50:32 +0000 | [diff] [blame] | 429 | |
| 430 | /* There is no library function to move backward one multibyte |
Benno Schulenberg | 76e7aaf | 2015-03-22 11:20:02 +0000 | [diff] [blame] | 431 | * character. So we just start groping for one at the farthest |
| 432 | * possible point. */ |
| 433 | if (mb_cur_max() > pos) |
| 434 | before = 0; |
| 435 | else |
| 436 | before = pos - mb_cur_max(); |
| 437 | |
Benno Schulenberg | ff8454a | 2014-04-14 20:42:10 +0000 | [diff] [blame] | 438 | while (before < pos) { |
| 439 | char_len = parse_mbchar(buf + before, NULL, NULL); |
| 440 | before += char_len; |
David Lawrence Ramsey | d24fbb7 | 2005-01-14 21:50:32 +0000 | [diff] [blame] | 441 | } |
| 442 | |
Benno Schulenberg | ff8454a | 2014-04-14 20:42:10 +0000 | [diff] [blame] | 443 | return before - char_len; |
David Lawrence Ramsey | d24fbb7 | 2005-01-14 21:50:32 +0000 | [diff] [blame] | 444 | } |
| 445 | |
| 446 | /* Return the index in buf of the beginning of the multibyte character |
| 447 | * after the one at pos. */ |
| 448 | size_t move_mbright(const char *buf, size_t pos) |
| 449 | { |
David Lawrence Ramsey | 96452cb | 2005-07-26 06:13:45 +0000 | [diff] [blame] | 450 | return pos + parse_mbchar(buf + pos, NULL, NULL); |
David Lawrence Ramsey | d24fbb7 | 2005-01-14 21:50:32 +0000 | [diff] [blame] | 451 | } |
David Lawrence Ramsey | 3a1fc8f | 2005-01-16 18:49:19 +0000 | [diff] [blame] | 452 | |
| 453 | #ifndef HAVE_STRCASECMP |
| 454 | /* This function is equivalent to strcasecmp(). */ |
| 455 | int nstrcasecmp(const char *s1, const char *s2) |
| 456 | { |
Benno Schulenberg | 56f067a | 2016-06-01 21:56:38 +0200 | [diff] [blame] | 457 | return strncasecmp(s1, s2, HIGHEST_POSITIVE); |
David Lawrence Ramsey | 3a1fc8f | 2005-01-16 18:49:19 +0000 | [diff] [blame] | 458 | } |
| 459 | #endif |
| 460 | |
| 461 | /* This function is equivalent to strcasecmp() for multibyte strings. */ |
| 462 | int mbstrcasecmp(const char *s1, const char *s2) |
| 463 | { |
Benno Schulenberg | 56f067a | 2016-06-01 21:56:38 +0200 | [diff] [blame] | 464 | return mbstrncasecmp(s1, s2, HIGHEST_POSITIVE); |
David Lawrence Ramsey | 3a1fc8f | 2005-01-16 18:49:19 +0000 | [diff] [blame] | 465 | } |
| 466 | |
| 467 | #ifndef HAVE_STRNCASECMP |
| 468 | /* This function is equivalent to strncasecmp(). */ |
| 469 | int nstrncasecmp(const char *s1, const char *s2, size_t n) |
| 470 | { |
David Lawrence Ramsey | 444f802 | 2007-07-01 21:46:00 +0000 | [diff] [blame] | 471 | if (s1 == s2) |
| 472 | return 0; |
| 473 | |
David Lawrence Ramsey | 1044233 | 2007-07-01 21:17:05 +0000 | [diff] [blame] | 474 | for (; *s1 != '\0' && *s2 != '\0' && n > 0; s1++, s2++, n--) { |
David Lawrence Ramsey | 3a1fc8f | 2005-01-16 18:49:19 +0000 | [diff] [blame] | 475 | if (tolower(*s1) != tolower(*s2)) |
| 476 | break; |
| 477 | } |
| 478 | |
David Lawrence Ramsey | 89ae4a3 | 2006-10-08 15:21:23 +0000 | [diff] [blame] | 479 | return (n > 0) ? tolower(*s1) - tolower(*s2) : 0; |
David Lawrence Ramsey | 3a1fc8f | 2005-01-16 18:49:19 +0000 | [diff] [blame] | 480 | } |
| 481 | #endif |
| 482 | |
Benno Schulenberg | 3b21659 | 2016-05-24 10:34:40 +0200 | [diff] [blame] | 483 | /* This function is equivalent to strncasecmp() for multibyte strings. */ |
David Lawrence Ramsey | 3a1fc8f | 2005-01-16 18:49:19 +0000 | [diff] [blame] | 484 | int mbstrncasecmp(const char *s1, const char *s2, size_t n) |
| 485 | { |
David Lawrence Ramsey | 7eb30a8 | 2005-07-17 02:40:07 +0000 | [diff] [blame] | 486 | #ifdef ENABLE_UTF8 |
David Lawrence Ramsey | 4d72de7 | 2006-04-12 15:27:40 +0000 | [diff] [blame] | 487 | if (use_utf8) { |
Benno Schulenberg | a151167 | 2016-05-24 16:49:00 +0200 | [diff] [blame] | 488 | wchar_t wc1, wc2; |
David Lawrence Ramsey | 3a1fc8f | 2005-01-16 18:49:19 +0000 | [diff] [blame] | 489 | |
Benno Schulenberg | b305911 | 2016-07-27 22:03:48 +0200 | [diff] [blame] | 490 | while (*s1 != '\0' && *s2 != '\0' && n > 0) { |
Benno Schulenberg | 1bffa17 | 2016-05-24 17:19:22 +0200 | [diff] [blame] | 491 | bool bad1 = FALSE, bad2 = FALSE; |
David Lawrence Ramsey | 42abfe0 | 2005-01-22 18:24:16 +0000 | [diff] [blame] | 492 | |
Benno Schulenberg | a5b3f00 | 2016-05-24 21:45:22 +0200 | [diff] [blame] | 493 | if (mbtowc(&wc1, s1, MB_CUR_MAX) < 0) { |
Benno Schulenberg | 9205c28 | 2015-09-04 19:34:55 +0000 | [diff] [blame] | 494 | mbtowc_reset(); |
Benno Schulenberg | 1bffa17 | 2016-05-24 17:19:22 +0200 | [diff] [blame] | 495 | bad1 = TRUE; |
David Lawrence Ramsey | 3a1fc8f | 2005-01-16 18:49:19 +0000 | [diff] [blame] | 496 | } |
| 497 | |
Benno Schulenberg | a5b3f00 | 2016-05-24 21:45:22 +0200 | [diff] [blame] | 498 | if (mbtowc(&wc2, s2, MB_CUR_MAX) < 0) { |
Benno Schulenberg | 9205c28 | 2015-09-04 19:34:55 +0000 | [diff] [blame] | 499 | mbtowc_reset(); |
Benno Schulenberg | 1bffa17 | 2016-05-24 17:19:22 +0200 | [diff] [blame] | 500 | bad2 = TRUE; |
David Lawrence Ramsey | 3a1fc8f | 2005-01-16 18:49:19 +0000 | [diff] [blame] | 501 | } |
| 502 | |
Benno Schulenberg | d80109d | 2016-07-27 22:15:34 +0200 | [diff] [blame] | 503 | if (bad1 || bad2) { |
| 504 | if (*s1 != *s2) |
| 505 | return (unsigned char)*s1 - (unsigned char)*s2; |
| 506 | |
Benno Schulenberg | e38e2c6 | 2016-08-06 10:34:38 +0200 | [diff] [blame] | 507 | if (bad1 != bad2) |
| 508 | return (bad1 ? 1 : -1); |
Benno Schulenberg | 370406b | 2016-08-06 11:06:29 +0200 | [diff] [blame] | 509 | } else { |
| 510 | int difference = towlower(wc1) - towlower(wc2); |
Benno Schulenberg | e38e2c6 | 2016-08-06 10:34:38 +0200 | [diff] [blame] | 511 | |
Benno Schulenberg | 370406b | 2016-08-06 11:06:29 +0200 | [diff] [blame] | 512 | if (difference != 0) |
| 513 | return difference; |
Benno Schulenberg | d80109d | 2016-07-27 22:15:34 +0200 | [diff] [blame] | 514 | } |
| 515 | |
Benno Schulenberg | b305911 | 2016-07-27 22:03:48 +0200 | [diff] [blame] | 516 | s1 += move_mbright(s1, 0); |
| 517 | s2 += move_mbright(s2, 0); |
| 518 | n--; |
David Lawrence Ramsey | 3a1fc8f | 2005-01-16 18:49:19 +0000 | [diff] [blame] | 519 | } |
| 520 | |
Benno Schulenberg | d80109d | 2016-07-27 22:15:34 +0200 | [diff] [blame] | 521 | return (n > 0) ? ((unsigned char)*s1 - (unsigned char)*s2) : 0; |
David Lawrence Ramsey | 3a1fc8f | 2005-01-16 18:49:19 +0000 | [diff] [blame] | 522 | } else |
| 523 | #endif |
David Lawrence Ramsey | adc30a8 | 2005-03-20 07:24:49 +0000 | [diff] [blame] | 524 | return strncasecmp(s1, s2, n); |
David Lawrence Ramsey | 3a1fc8f | 2005-01-16 18:49:19 +0000 | [diff] [blame] | 525 | } |
| 526 | |
| 527 | #ifndef HAVE_STRCASESTR |
David Lawrence Ramsey | 3f7c8c5 | 2005-11-14 22:20:35 +0000 | [diff] [blame] | 528 | /* This function is equivalent to strcasestr(). */ |
David Lawrence Ramsey | ae46b91 | 2007-07-06 13:44:13 +0000 | [diff] [blame] | 529 | char *nstrcasestr(const char *haystack, const char *needle) |
David Lawrence Ramsey | 3a1fc8f | 2005-01-16 18:49:19 +0000 | [diff] [blame] | 530 | { |
Benno Schulenberg | c8bc05b | 2016-08-05 17:16:37 +0200 | [diff] [blame] | 531 | size_t needle_len; |
David Lawrence Ramsey | 9276f4d | 2007-07-10 22:54:58 +0000 | [diff] [blame] | 532 | |
David Lawrence Ramsey | 3653667 | 2007-07-09 22:57:07 +0000 | [diff] [blame] | 533 | if (*needle == '\0') |
David Lawrence Ramsey | ae46b91 | 2007-07-06 13:44:13 +0000 | [diff] [blame] | 534 | return (char *)haystack; |
David Lawrence Ramsey | 7105830 | 2007-07-02 15:45:13 +0000 | [diff] [blame] | 535 | |
David Lawrence Ramsey | 9276f4d | 2007-07-10 22:54:58 +0000 | [diff] [blame] | 536 | needle_len = strlen(needle); |
David Lawrence Ramsey | 3a1fc8f | 2005-01-16 18:49:19 +0000 | [diff] [blame] | 537 | |
Benno Schulenberg | c8bc05b | 2016-08-05 17:16:37 +0200 | [diff] [blame] | 538 | while (*haystack != '\0') { |
David Lawrence Ramsey | 9276f4d | 2007-07-10 22:54:58 +0000 | [diff] [blame] | 539 | if (strncasecmp(haystack, needle, needle_len) == 0) |
David Lawrence Ramsey | ae46b91 | 2007-07-06 13:44:13 +0000 | [diff] [blame] | 540 | return (char *)haystack; |
Benno Schulenberg | c8bc05b | 2016-08-05 17:16:37 +0200 | [diff] [blame] | 541 | |
| 542 | haystack++; |
David Lawrence Ramsey | 3a1fc8f | 2005-01-16 18:49:19 +0000 | [diff] [blame] | 543 | } |
| 544 | |
| 545 | return NULL; |
| 546 | } |
| 547 | #endif |
| 548 | |
David Lawrence Ramsey | 42abfe0 | 2005-01-22 18:24:16 +0000 | [diff] [blame] | 549 | /* This function is equivalent to strcasestr() for multibyte strings. */ |
David Lawrence Ramsey | ae46b91 | 2007-07-06 13:44:13 +0000 | [diff] [blame] | 550 | char *mbstrcasestr(const char *haystack, const char *needle) |
David Lawrence Ramsey | 42abfe0 | 2005-01-22 18:24:16 +0000 | [diff] [blame] | 551 | { |
David Lawrence Ramsey | 7eb30a8 | 2005-07-17 02:40:07 +0000 | [diff] [blame] | 552 | #ifdef ENABLE_UTF8 |
David Lawrence Ramsey | 4d72de7 | 2006-04-12 15:27:40 +0000 | [diff] [blame] | 553 | if (use_utf8) { |
Benno Schulenberg | c8bc05b | 2016-08-05 17:16:37 +0200 | [diff] [blame] | 554 | size_t needle_len; |
David Lawrence Ramsey | 42abfe0 | 2005-01-22 18:24:16 +0000 | [diff] [blame] | 555 | |
David Lawrence Ramsey | 3653667 | 2007-07-09 22:57:07 +0000 | [diff] [blame] | 556 | if (*needle == '\0') |
David Lawrence Ramsey | ae46b91 | 2007-07-06 13:44:13 +0000 | [diff] [blame] | 557 | return (char *)haystack; |
David Lawrence Ramsey | 7105830 | 2007-07-02 15:45:13 +0000 | [diff] [blame] | 558 | |
David Lawrence Ramsey | 9276f4d | 2007-07-10 22:54:58 +0000 | [diff] [blame] | 559 | needle_len = mbstrlen(needle); |
David Lawrence Ramsey | 118cb37 | 2006-07-22 16:45:11 +0000 | [diff] [blame] | 560 | |
Benno Schulenberg | c8bc05b | 2016-08-05 17:16:37 +0200 | [diff] [blame] | 561 | while (*haystack != '\0') { |
Benno Schulenberg | 85844ee | 2016-08-06 10:47:22 +0200 | [diff] [blame] | 562 | if (mbstrncasecmp(haystack, needle, needle_len) == 0) |
David Lawrence Ramsey | 9276f4d | 2007-07-10 22:54:58 +0000 | [diff] [blame] | 563 | return (char *)haystack; |
Benno Schulenberg | c8bc05b | 2016-08-05 17:16:37 +0200 | [diff] [blame] | 564 | |
| 565 | haystack += move_mbright(haystack, 0); |
David Lawrence Ramsey | 42abfe0 | 2005-01-22 18:24:16 +0000 | [diff] [blame] | 566 | } |
| 567 | |
David Lawrence Ramsey | 9276f4d | 2007-07-10 22:54:58 +0000 | [diff] [blame] | 568 | return NULL; |
David Lawrence Ramsey | 42abfe0 | 2005-01-22 18:24:16 +0000 | [diff] [blame] | 569 | } else |
| 570 | #endif |
Chris Allegretta | 3459e4f | 2011-02-24 02:47:25 +0000 | [diff] [blame] | 571 | return (char *) strcasestr(haystack, needle); |
David Lawrence Ramsey | 42abfe0 | 2005-01-22 18:24:16 +0000 | [diff] [blame] | 572 | } |
| 573 | |
David Lawrence Ramsey | 2195476 | 2005-01-18 17:00:00 +0000 | [diff] [blame] | 574 | /* This function is equivalent to strstr(), except in that it scans the |
David Lawrence Ramsey | 3ee4cf3 | 2005-01-22 20:49:14 +0000 | [diff] [blame] | 575 | * string in reverse, starting at rev_start. */ |
Benno Schulenberg | cd705a7 | 2016-12-18 21:45:47 +0100 | [diff] [blame] | 576 | char *revstrstr(const char *haystack, const char *needle, |
| 577 | const char *pointer) |
David Lawrence Ramsey | 2195476 | 2005-01-18 17:00:00 +0000 | [diff] [blame] | 578 | { |
Benno Schulenberg | cd705a7 | 2016-12-18 21:45:47 +0100 | [diff] [blame] | 579 | size_t needle_len = strlen(needle); |
| 580 | size_t tail_len = strlen(pointer); |
David Lawrence Ramsey | 9276f4d | 2007-07-10 22:54:58 +0000 | [diff] [blame] | 581 | |
Benno Schulenberg | cd705a7 | 2016-12-18 21:45:47 +0100 | [diff] [blame] | 582 | if (needle_len == 0) |
| 583 | return (char *)pointer; |
David Lawrence Ramsey | 2195476 | 2005-01-18 17:00:00 +0000 | [diff] [blame] | 584 | |
David Lawrence Ramsey | 9276f4d | 2007-07-10 22:54:58 +0000 | [diff] [blame] | 585 | if (strlen(haystack) < needle_len) |
| 586 | return NULL; |
David Lawrence Ramsey | 2195476 | 2005-01-18 17:00:00 +0000 | [diff] [blame] | 587 | |
Benno Schulenberg | cd705a7 | 2016-12-18 21:45:47 +0100 | [diff] [blame] | 588 | if (tail_len < needle_len) |
| 589 | pointer += tail_len - needle_len; |
David Lawrence Ramsey | 9276f4d | 2007-07-10 22:54:58 +0000 | [diff] [blame] | 590 | |
Benno Schulenberg | cd705a7 | 2016-12-18 21:45:47 +0100 | [diff] [blame] | 591 | while (pointer >= haystack) { |
| 592 | if (strncmp(pointer, needle, needle_len) == 0) |
| 593 | return (char *)pointer; |
| 594 | pointer--; |
David Lawrence Ramsey | 2195476 | 2005-01-18 17:00:00 +0000 | [diff] [blame] | 595 | } |
| 596 | |
| 597 | return NULL; |
| 598 | } |
| 599 | |
| 600 | /* This function is equivalent to strcasestr(), except in that it scans |
David Lawrence Ramsey | 3ee4cf3 | 2005-01-22 20:49:14 +0000 | [diff] [blame] | 601 | * the string in reverse, starting at rev_start. */ |
David Lawrence Ramsey | ae46b91 | 2007-07-06 13:44:13 +0000 | [diff] [blame] | 602 | char *revstrcasestr(const char *haystack, const char *needle, const char |
| 603 | *rev_start) |
David Lawrence Ramsey | 3a1fc8f | 2005-01-16 18:49:19 +0000 | [diff] [blame] | 604 | { |
David Lawrence Ramsey | 9276f4d | 2007-07-10 22:54:58 +0000 | [diff] [blame] | 605 | size_t rev_start_len, needle_len; |
| 606 | |
David Lawrence Ramsey | 3653667 | 2007-07-09 22:57:07 +0000 | [diff] [blame] | 607 | if (*needle == '\0') |
David Lawrence Ramsey | ae46b91 | 2007-07-06 13:44:13 +0000 | [diff] [blame] | 608 | return (char *)rev_start; |
David Lawrence Ramsey | 7105830 | 2007-07-02 15:45:13 +0000 | [diff] [blame] | 609 | |
David Lawrence Ramsey | 9276f4d | 2007-07-10 22:54:58 +0000 | [diff] [blame] | 610 | needle_len = strlen(needle); |
David Lawrence Ramsey | 3a1fc8f | 2005-01-16 18:49:19 +0000 | [diff] [blame] | 611 | |
David Lawrence Ramsey | 9276f4d | 2007-07-10 22:54:58 +0000 | [diff] [blame] | 612 | if (strlen(haystack) < needle_len) |
| 613 | return NULL; |
David Lawrence Ramsey | 3a1fc8f | 2005-01-16 18:49:19 +0000 | [diff] [blame] | 614 | |
David Lawrence Ramsey | 9276f4d | 2007-07-10 22:54:58 +0000 | [diff] [blame] | 615 | rev_start_len = strlen(rev_start); |
| 616 | |
| 617 | for (; rev_start >= haystack; rev_start--, rev_start_len++) { |
| 618 | if (rev_start_len >= needle_len && strncasecmp(rev_start, |
| 619 | needle, needle_len) == 0) |
David Lawrence Ramsey | ae46b91 | 2007-07-06 13:44:13 +0000 | [diff] [blame] | 620 | return (char *)rev_start; |
David Lawrence Ramsey | 3a1fc8f | 2005-01-16 18:49:19 +0000 | [diff] [blame] | 621 | } |
| 622 | |
| 623 | return NULL; |
| 624 | } |
David Lawrence Ramsey | 345260c | 2005-01-24 01:14:17 +0000 | [diff] [blame] | 625 | |
| 626 | /* This function is equivalent to strcasestr() for multibyte strings, |
Benno Schulenberg | 3b21659 | 2016-05-24 10:34:40 +0200 | [diff] [blame] | 627 | * except in that it scans the string in reverse, starting at rev_start. */ |
David Lawrence Ramsey | ae46b91 | 2007-07-06 13:44:13 +0000 | [diff] [blame] | 628 | char *mbrevstrcasestr(const char *haystack, const char *needle, const |
| 629 | char *rev_start) |
David Lawrence Ramsey | 345260c | 2005-01-24 01:14:17 +0000 | [diff] [blame] | 630 | { |
David Lawrence Ramsey | 7eb30a8 | 2005-07-17 02:40:07 +0000 | [diff] [blame] | 631 | #ifdef ENABLE_UTF8 |
David Lawrence Ramsey | 4d72de7 | 2006-04-12 15:27:40 +0000 | [diff] [blame] | 632 | if (use_utf8) { |
David Lawrence Ramsey | 9276f4d | 2007-07-10 22:54:58 +0000 | [diff] [blame] | 633 | size_t rev_start_len, needle_len; |
David Lawrence Ramsey | 345260c | 2005-01-24 01:14:17 +0000 | [diff] [blame] | 634 | |
David Lawrence Ramsey | 3653667 | 2007-07-09 22:57:07 +0000 | [diff] [blame] | 635 | if (*needle == '\0') |
David Lawrence Ramsey | ae46b91 | 2007-07-06 13:44:13 +0000 | [diff] [blame] | 636 | return (char *)rev_start; |
David Lawrence Ramsey | 7105830 | 2007-07-02 15:45:13 +0000 | [diff] [blame] | 637 | |
David Lawrence Ramsey | 9276f4d | 2007-07-10 22:54:58 +0000 | [diff] [blame] | 638 | needle_len = mbstrlen(needle); |
| 639 | |
| 640 | if (mbstrlen(haystack) < needle_len) |
| 641 | return NULL; |
| 642 | |
| 643 | rev_start_len = mbstrlen(rev_start); |
David Lawrence Ramsey | 118cb37 | 2006-07-22 16:45:11 +0000 | [diff] [blame] | 644 | |
Benno Schulenberg | 1e2833e | 2016-06-25 20:57:35 +0200 | [diff] [blame] | 645 | while (TRUE) { |
Benno Schulenberg | b967368 | 2015-07-23 19:18:25 +0000 | [diff] [blame] | 646 | if (rev_start_len >= needle_len && |
Benno Schulenberg | 85844ee | 2016-08-06 10:47:22 +0200 | [diff] [blame] | 647 | mbstrncasecmp(rev_start, needle, needle_len) == 0) |
David Lawrence Ramsey | 9276f4d | 2007-07-10 22:54:58 +0000 | [diff] [blame] | 648 | return (char *)rev_start; |
David Lawrence Ramsey | 345260c | 2005-01-24 01:14:17 +0000 | [diff] [blame] | 649 | |
Benno Schulenberg | 1e2833e | 2016-06-25 20:57:35 +0200 | [diff] [blame] | 650 | /* If we've reached the head of the haystack, we found nothing. */ |
David Lawrence Ramsey | 345260c | 2005-01-24 01:14:17 +0000 | [diff] [blame] | 651 | if (rev_start == haystack) |
Benno Schulenberg | 1e2833e | 2016-06-25 20:57:35 +0200 | [diff] [blame] | 652 | return NULL; |
David Lawrence Ramsey | 345260c | 2005-01-24 01:14:17 +0000 | [diff] [blame] | 653 | |
Benno Schulenberg | 1e2833e | 2016-06-25 20:57:35 +0200 | [diff] [blame] | 654 | rev_start = haystack + move_mbleft(haystack, rev_start - haystack); |
| 655 | rev_start_len++; |
| 656 | } |
David Lawrence Ramsey | 345260c | 2005-01-24 01:14:17 +0000 | [diff] [blame] | 657 | } else |
| 658 | #endif |
| 659 | return revstrcasestr(haystack, needle, rev_start); |
| 660 | } |
David Lawrence Ramsey | 3a1fc8f | 2005-01-16 18:49:19 +0000 | [diff] [blame] | 661 | |
David Lawrence Ramsey | 3f9c635 | 2005-01-25 19:21:11 +0000 | [diff] [blame] | 662 | /* This function is equivalent to strlen() for multibyte strings. */ |
| 663 | size_t mbstrlen(const char *s) |
| 664 | { |
| 665 | return mbstrnlen(s, (size_t)-1); |
| 666 | } |
| 667 | |
David Lawrence Ramsey | 3a1fc8f | 2005-01-16 18:49:19 +0000 | [diff] [blame] | 668 | #ifndef HAVE_STRNLEN |
| 669 | /* This function is equivalent to strnlen(). */ |
| 670 | size_t nstrnlen(const char *s, size_t maxlen) |
| 671 | { |
| 672 | size_t n = 0; |
| 673 | |
David Lawrence Ramsey | 1044233 | 2007-07-01 21:17:05 +0000 | [diff] [blame] | 674 | for (; *s != '\0' && maxlen > 0; s++, maxlen--, n++) |
David Lawrence Ramsey | 3a1fc8f | 2005-01-16 18:49:19 +0000 | [diff] [blame] | 675 | ; |
| 676 | |
| 677 | return n; |
| 678 | } |
| 679 | #endif |
| 680 | |
| 681 | /* This function is equivalent to strnlen() for multibyte strings. */ |
| 682 | size_t mbstrnlen(const char *s, size_t maxlen) |
| 683 | { |
David Lawrence Ramsey | 7eb30a8 | 2005-07-17 02:40:07 +0000 | [diff] [blame] | 684 | #ifdef ENABLE_UTF8 |
David Lawrence Ramsey | 4d72de7 | 2006-04-12 15:27:40 +0000 | [diff] [blame] | 685 | if (use_utf8) { |
David Lawrence Ramsey | 3a1fc8f | 2005-01-16 18:49:19 +0000 | [diff] [blame] | 686 | size_t n = 0; |
David Lawrence Ramsey | 3a1fc8f | 2005-01-16 18:49:19 +0000 | [diff] [blame] | 687 | |
David Lawrence Ramsey | bebfd9f | 2007-07-09 22:36:32 +0000 | [diff] [blame] | 688 | for (; *s != '\0' && maxlen > 0; s += move_mbright(s, 0), |
| 689 | maxlen--, n++) |
| 690 | ; |
David Lawrence Ramsey | 3a1fc8f | 2005-01-16 18:49:19 +0000 | [diff] [blame] | 691 | |
David Lawrence Ramsey | 3f9c635 | 2005-01-25 19:21:11 +0000 | [diff] [blame] | 692 | return n; |
David Lawrence Ramsey | 3a1fc8f | 2005-01-16 18:49:19 +0000 | [diff] [blame] | 693 | } else |
| 694 | #endif |
David Lawrence Ramsey | adc30a8 | 2005-03-20 07:24:49 +0000 | [diff] [blame] | 695 | return strnlen(s, maxlen); |
David Lawrence Ramsey | 3a1fc8f | 2005-01-16 18:49:19 +0000 | [diff] [blame] | 696 | } |
David Lawrence Ramsey | 38156d4 | 2005-03-15 05:44:03 +0000 | [diff] [blame] | 697 | |
David Lawrence Ramsey | a248863 | 2006-01-06 07:10:30 +0000 | [diff] [blame] | 698 | #if !defined(NANO_TINY) || !defined(DISABLE_JUSTIFY) |
David Lawrence Ramsey | 66444c3 | 2005-07-21 18:05:27 +0000 | [diff] [blame] | 699 | /* This function is equivalent to strchr() for multibyte strings. */ |
David Lawrence Ramsey | 5978f9b | 2006-01-06 05:54:44 +0000 | [diff] [blame] | 700 | char *mbstrchr(const char *s, const char *c) |
David Lawrence Ramsey | 66444c3 | 2005-07-21 18:05:27 +0000 | [diff] [blame] | 701 | { |
| 702 | assert(s != NULL && c != NULL); |
| 703 | |
| 704 | #ifdef ENABLE_UTF8 |
David Lawrence Ramsey | 4d72de7 | 2006-04-12 15:27:40 +0000 | [diff] [blame] | 705 | if (use_utf8) { |
David Lawrence Ramsey | 9c92634 | 2005-11-15 18:42:56 +0000 | [diff] [blame] | 706 | bool bad_s_mb = FALSE, bad_c_mb = FALSE; |
Benno Schulenberg | 116d9e6 | 2016-12-15 21:30:30 +0100 | [diff] [blame] | 707 | char symbol[MB_CUR_MAX]; |
David Lawrence Ramsey | 66444c3 | 2005-07-21 18:05:27 +0000 | [diff] [blame] | 708 | const char *q = s; |
| 709 | wchar_t ws, wc; |
David Lawrence Ramsey | 66444c3 | 2005-07-21 18:05:27 +0000 | [diff] [blame] | 710 | |
Benno Schulenberg | 0b2f843 | 2014-03-21 12:47:34 +0000 | [diff] [blame] | 711 | if (mbtowc(&wc, c, MB_CUR_MAX) < 0) { |
Benno Schulenberg | 9205c28 | 2015-09-04 19:34:55 +0000 | [diff] [blame] | 712 | mbtowc_reset(); |
David Lawrence Ramsey | 66444c3 | 2005-07-21 18:05:27 +0000 | [diff] [blame] | 713 | wc = (unsigned char)*c; |
| 714 | bad_c_mb = TRUE; |
| 715 | } |
| 716 | |
| 717 | while (*s != '\0') { |
Benno Schulenberg | 116d9e6 | 2016-12-15 21:30:30 +0100 | [diff] [blame] | 718 | int sym_len = parse_mbchar(s, symbol, NULL); |
David Lawrence Ramsey | 66444c3 | 2005-07-21 18:05:27 +0000 | [diff] [blame] | 719 | |
Benno Schulenberg | 116d9e6 | 2016-12-15 21:30:30 +0100 | [diff] [blame] | 720 | if (mbtowc(&ws, symbol, sym_len) < 0) { |
Benno Schulenberg | 9205c28 | 2015-09-04 19:34:55 +0000 | [diff] [blame] | 721 | mbtowc_reset(); |
David Lawrence Ramsey | 66444c3 | 2005-07-21 18:05:27 +0000 | [diff] [blame] | 722 | ws = (unsigned char)*s; |
| 723 | bad_s_mb = TRUE; |
| 724 | } |
| 725 | |
| 726 | if (bad_s_mb == bad_c_mb && ws == wc) |
| 727 | break; |
| 728 | |
Benno Schulenberg | 116d9e6 | 2016-12-15 21:30:30 +0100 | [diff] [blame] | 729 | s += sym_len; |
| 730 | q += sym_len; |
David Lawrence Ramsey | 66444c3 | 2005-07-21 18:05:27 +0000 | [diff] [blame] | 731 | } |
| 732 | |
David Lawrence Ramsey | bc8b7d7 | 2005-11-15 19:01:07 +0000 | [diff] [blame] | 733 | if (*s == '\0') |
David Lawrence Ramsey | 66444c3 | 2005-07-21 18:05:27 +0000 | [diff] [blame] | 734 | q = NULL; |
| 735 | |
| 736 | return (char *)q; |
| 737 | } else |
| 738 | #endif |
Chris Allegretta | 3459e4f | 2011-02-24 02:47:25 +0000 | [diff] [blame] | 739 | return (char *) strchr(s, *c); |
David Lawrence Ramsey | 66444c3 | 2005-07-21 18:05:27 +0000 | [diff] [blame] | 740 | } |
David Lawrence Ramsey | a248863 | 2006-01-06 07:10:30 +0000 | [diff] [blame] | 741 | #endif /* !NANO_TINY || !DISABLE_JUSTIFY */ |
David Lawrence Ramsey | 66444c3 | 2005-07-21 18:05:27 +0000 | [diff] [blame] | 742 | |
David Lawrence Ramsey | a248863 | 2006-01-06 07:10:30 +0000 | [diff] [blame] | 743 | #ifndef NANO_TINY |
| 744 | /* This function is equivalent to strpbrk() for multibyte strings. */ |
| 745 | char *mbstrpbrk(const char *s, const char *accept) |
| 746 | { |
David Lawrence Ramsey | a248863 | 2006-01-06 07:10:30 +0000 | [diff] [blame] | 747 | #ifdef ENABLE_UTF8 |
David Lawrence Ramsey | 4d72de7 | 2006-04-12 15:27:40 +0000 | [diff] [blame] | 748 | if (use_utf8) { |
David Lawrence Ramsey | bebfd9f | 2007-07-09 22:36:32 +0000 | [diff] [blame] | 749 | for (; *s != '\0'; s += move_mbright(s, 0)) { |
David Lawrence Ramsey | a248863 | 2006-01-06 07:10:30 +0000 | [diff] [blame] | 750 | if (mbstrchr(accept, s) != NULL) |
| 751 | return (char *)s; |
David Lawrence Ramsey | a248863 | 2006-01-06 07:10:30 +0000 | [diff] [blame] | 752 | } |
| 753 | |
| 754 | return NULL; |
| 755 | } else |
| 756 | #endif |
Chris Allegretta | 3459e4f | 2011-02-24 02:47:25 +0000 | [diff] [blame] | 757 | return (char *) strpbrk(s, accept); |
David Lawrence Ramsey | a248863 | 2006-01-06 07:10:30 +0000 | [diff] [blame] | 758 | } |
| 759 | |
| 760 | /* This function is equivalent to strpbrk(), except in that it scans the |
| 761 | * string in reverse, starting at rev_start. */ |
| 762 | char *revstrpbrk(const char *s, const char *accept, const char |
| 763 | *rev_start) |
| 764 | { |
| 765 | assert(s != NULL && accept != NULL && rev_start != NULL); |
| 766 | |
Benno Schulenberg | 6fda7a7 | 2016-06-25 21:04:19 +0200 | [diff] [blame] | 767 | if (*rev_start == '\0') { |
| 768 | if (rev_start == s) |
| 769 | return NULL; |
| 770 | rev_start--; |
| 771 | } |
David Lawrence Ramsey | a248863 | 2006-01-06 07:10:30 +0000 | [diff] [blame] | 772 | |
Benno Schulenberg | 6fda7a7 | 2016-06-25 21:04:19 +0200 | [diff] [blame] | 773 | for (; rev_start >= s; rev_start--) { |
| 774 | if (strchr(accept, *rev_start) != NULL) |
David Lawrence Ramsey | a248863 | 2006-01-06 07:10:30 +0000 | [diff] [blame] | 775 | return (char *)rev_start; |
| 776 | } |
| 777 | |
| 778 | return NULL; |
| 779 | } |
| 780 | |
| 781 | /* This function is equivalent to strpbrk() for multibyte strings, |
Benno Schulenberg | 3b21659 | 2016-05-24 10:34:40 +0200 | [diff] [blame] | 782 | * except in that it scans the string in reverse, starting at rev_start. */ |
David Lawrence Ramsey | a248863 | 2006-01-06 07:10:30 +0000 | [diff] [blame] | 783 | char *mbrevstrpbrk(const char *s, const char *accept, const char |
| 784 | *rev_start) |
| 785 | { |
| 786 | assert(s != NULL && accept != NULL && rev_start != NULL); |
| 787 | |
| 788 | #ifdef ENABLE_UTF8 |
David Lawrence Ramsey | 4d72de7 | 2006-04-12 15:27:40 +0000 | [diff] [blame] | 789 | if (use_utf8) { |
Benno Schulenberg | 6fda7a7 | 2016-06-25 21:04:19 +0200 | [diff] [blame] | 790 | if (*rev_start == '\0') { |
| 791 | if (rev_start == s) |
| 792 | return NULL; |
| 793 | rev_start = s + move_mbleft(s, rev_start - s); |
| 794 | } |
David Lawrence Ramsey | a248863 | 2006-01-06 07:10:30 +0000 | [diff] [blame] | 795 | |
Benno Schulenberg | 6fda7a7 | 2016-06-25 21:04:19 +0200 | [diff] [blame] | 796 | while (TRUE) { |
| 797 | if (mbstrchr(accept, rev_start) != NULL) |
David Lawrence Ramsey | a248863 | 2006-01-06 07:10:30 +0000 | [diff] [blame] | 798 | return (char *)rev_start; |
| 799 | |
Benno Schulenberg | 1e2833e | 2016-06-25 20:57:35 +0200 | [diff] [blame] | 800 | /* If we've reached the head of the string, we found nothing. */ |
David Lawrence Ramsey | 66d3ebf | 2006-02-02 22:30:40 +0000 | [diff] [blame] | 801 | if (rev_start == s) |
Benno Schulenberg | 1e2833e | 2016-06-25 20:57:35 +0200 | [diff] [blame] | 802 | return NULL; |
David Lawrence Ramsey | a248863 | 2006-01-06 07:10:30 +0000 | [diff] [blame] | 803 | |
Benno Schulenberg | 1e2833e | 2016-06-25 20:57:35 +0200 | [diff] [blame] | 804 | rev_start = s + move_mbleft(s, rev_start - s); |
| 805 | } |
David Lawrence Ramsey | a248863 | 2006-01-06 07:10:30 +0000 | [diff] [blame] | 806 | } else |
| 807 | #endif |
| 808 | return revstrpbrk(s, accept, rev_start); |
| 809 | } |
| 810 | #endif /* !NANO_TINY */ |
| 811 | |
Benno Schulenberg | eea0908 | 2014-04-13 20:50:20 +0000 | [diff] [blame] | 812 | #if !defined(DISABLE_NANORC) && (!defined(NANO_TINY) || !defined(DISABLE_JUSTIFY)) |
David Lawrence Ramsey | d5d4dde | 2005-06-14 01:55:56 +0000 | [diff] [blame] | 813 | /* Return TRUE if the string s contains one or more blank characters, |
| 814 | * and FALSE otherwise. */ |
| 815 | bool has_blank_chars(const char *s) |
| 816 | { |
David Lawrence Ramsey | d5d4dde | 2005-06-14 01:55:56 +0000 | [diff] [blame] | 817 | for (; *s != '\0'; s++) { |
| 818 | if (isblank(*s)) |
| 819 | return TRUE; |
| 820 | } |
| 821 | |
| 822 | return FALSE; |
| 823 | } |
| 824 | |
| 825 | /* Return TRUE if the multibyte string s contains one or more blank |
| 826 | * multibyte characters, and FALSE otherwise. */ |
| 827 | bool has_blank_mbchars(const char *s) |
| 828 | { |
David Lawrence Ramsey | 7eb30a8 | 2005-07-17 02:40:07 +0000 | [diff] [blame] | 829 | #ifdef ENABLE_UTF8 |
David Lawrence Ramsey | 4d72de7 | 2006-04-12 15:27:40 +0000 | [diff] [blame] | 830 | if (use_utf8) { |
Benno Schulenberg | 116d9e6 | 2016-12-15 21:30:30 +0100 | [diff] [blame] | 831 | char symbol[MB_CUR_MAX]; |
David Lawrence Ramsey | d5d4dde | 2005-06-14 01:55:56 +0000 | [diff] [blame] | 832 | |
David Lawrence Ramsey | bebfd9f | 2007-07-09 22:36:32 +0000 | [diff] [blame] | 833 | for (; *s != '\0'; s += move_mbright(s, 0)) { |
Benno Schulenberg | 116d9e6 | 2016-12-15 21:30:30 +0100 | [diff] [blame] | 834 | parse_mbchar(s, symbol, NULL); |
David Lawrence Ramsey | d5d4dde | 2005-06-14 01:55:56 +0000 | [diff] [blame] | 835 | |
Benno Schulenberg | 116d9e6 | 2016-12-15 21:30:30 +0100 | [diff] [blame] | 836 | if (is_blank_mbchar(symbol)) |
| 837 | return TRUE; |
David Lawrence Ramsey | d5d4dde | 2005-06-14 01:55:56 +0000 | [diff] [blame] | 838 | } |
| 839 | |
Benno Schulenberg | 116d9e6 | 2016-12-15 21:30:30 +0100 | [diff] [blame] | 840 | return FALSE; |
David Lawrence Ramsey | d5d4dde | 2005-06-14 01:55:56 +0000 | [diff] [blame] | 841 | } else |
| 842 | #endif |
| 843 | return has_blank_chars(s); |
| 844 | } |
Benno Schulenberg | eea0908 | 2014-04-13 20:50:20 +0000 | [diff] [blame] | 845 | #endif /* !DISABLE_NANORC && (!NANO_TINY || !DISABLE_JUSTIFY) */ |
David Lawrence Ramsey | bdfa927 | 2005-06-14 23:36:13 +0000 | [diff] [blame] | 846 | |
David Lawrence Ramsey | 6ff695c | 2005-08-05 03:14:29 +0000 | [diff] [blame] | 847 | #ifdef ENABLE_UTF8 |
David Lawrence Ramsey | 79d9795 | 2005-08-28 03:07:13 +0000 | [diff] [blame] | 848 | /* Return TRUE if wc is valid Unicode, and FALSE otherwise. */ |
David Lawrence Ramsey | 6ff695c | 2005-08-05 03:14:29 +0000 | [diff] [blame] | 849 | bool is_valid_unicode(wchar_t wc) |
| 850 | { |
Benno Schulenberg | f9d6aa9 | 2016-03-29 14:46:53 +0000 | [diff] [blame] | 851 | return ((0 <= wc && wc <= 0xD7FF) || |
Benno Schulenberg | 17cf833 | 2016-05-30 09:09:36 +0200 | [diff] [blame] | 852 | (0xE000 <= wc && wc <= 0xFDCF) || |
| 853 | (0xFDF0 <= wc && wc <= 0xFFFD) || |
| 854 | (0xFFFF < wc && wc <= 0x10FFFF && (wc & 0xFFFF) <= 0xFFFD)); |
David Lawrence Ramsey | 6ff695c | 2005-08-05 03:14:29 +0000 | [diff] [blame] | 855 | } |
| 856 | #endif |
| 857 | |
Benno Schulenberg | eea0908 | 2014-04-13 20:50:20 +0000 | [diff] [blame] | 858 | #ifndef DISABLE_NANORC |
David Lawrence Ramsey | bdfa927 | 2005-06-14 23:36:13 +0000 | [diff] [blame] | 859 | /* Check if the string s is a valid multibyte string. Return TRUE if it |
| 860 | * is, and FALSE otherwise. */ |
| 861 | bool is_valid_mbstring(const char *s) |
| 862 | { |
David Lawrence Ramsey | 7eb30a8 | 2005-07-17 02:40:07 +0000 | [diff] [blame] | 863 | #ifdef ENABLE_UTF8 |
Benno Schulenberg | c5f4916 | 2016-12-15 19:46:16 +0100 | [diff] [blame] | 864 | if (use_utf8) |
| 865 | return (mbstowcs(NULL, s, 0) != (size_t)-1); |
| 866 | else |
David Lawrence Ramsey | bdfa927 | 2005-06-14 23:36:13 +0000 | [diff] [blame] | 867 | #endif |
Benno Schulenberg | c5f4916 | 2016-12-15 19:46:16 +0100 | [diff] [blame] | 868 | return TRUE; |
David Lawrence Ramsey | bdfa927 | 2005-06-14 23:36:13 +0000 | [diff] [blame] | 869 | } |
Benno Schulenberg | eea0908 | 2014-04-13 20:50:20 +0000 | [diff] [blame] | 870 | #endif /* !DISABLE_NANORC */ |