blob: 8c75a98f081c4f62327c9f82a2d657543a89345e [file] [log] [blame]
Elliott Hughesfc0307d2016-02-02 15:26:47 -08001/* $OpenBSD: lex.c,v 1.51 2015/09/10 22:48:58 nicm Exp $ */
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -07002
3/*-
Thorsten Glaserc2dc5de2013-02-18 23:02:51 +00004 * Copyright (c) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
Elliott Hughesdd4abe02018-02-05 15:55:19 -08005 * 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018
Elliott Hughesfc0307d2016-02-02 15:26:47 -08006 * mirabilos <m@mirbsd.org>
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -07007 *
8 * Provided that these terms and disclaimer and all copyright notices
9 * are retained or reproduced in an accompanying document, permission
10 * is granted to deal in this work without restriction, including un-
11 * limited rights to use, publicly perform, distribute, sell, modify,
12 * merge, give away, or sublicence.
13 *
14 * This work is provided "AS IS" and WITHOUT WARRANTY of any kind, to
15 * the utmost extent permitted by applicable law, neither express nor
16 * implied; without malicious intent or gross negligence. In no event
17 * may a licensor, author or contributor be held liable for indirect,
18 * direct, other damage, loss, or other issues arising in any way out
19 * of dealing in the work, even if advised of the possibility of such
20 * damage or existence of a defect, except proven that it results out
21 * of said person's immediate fault when using the work as intended.
22 */
23
24#include "sh.h"
25
Elliott Hughes47086262019-03-26 12:34:31 -070026__RCSID("$MirOS: src/bin/mksh/lex.c,v 1.250 2018/10/20 18:34:14 tg Exp $");
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -070027
28/*
29 * states while lexing word
30 */
31#define SBASE 0 /* outside any lexical constructs */
32#define SWORD 1 /* implicit quoting for substitute() */
33#define SLETPAREN 2 /* inside (( )), implicit quoting */
34#define SSQUOTE 3 /* inside '' */
35#define SDQUOTE 4 /* inside "" */
36#define SEQUOTE 5 /* inside $'' */
37#define SBRACE 6 /* inside ${} */
38#define SQBRACE 7 /* inside "${}" */
Geremy Condra03ebf062011-10-12 18:17:24 -070039#define SBQUOTE 8 /* inside `` */
40#define SASPAREN 9 /* inside $(( )) */
Elliott Hughesfc0307d2016-02-02 15:26:47 -080041#define SHEREDELIM 10 /* parsing << or <<- delimiter */
42#define SHEREDQUOTE 11 /* parsing " in << or <<- delimiter */
Geremy Condra03ebf062011-10-12 18:17:24 -070043#define SPATTERN 12 /* parsing *(...|...) pattern (*+?@!) */
44#define SADELIM 13 /* like SBASE, looking for delimiter */
Thorsten Glaserc2dc5de2013-02-18 23:02:51 +000045#define STBRACEKORN 14 /* parsing ${...[#%]...} !FSH */
46#define STBRACEBOURNE 15 /* parsing ${...[#%]...} FSH */
Geremy Condra03ebf062011-10-12 18:17:24 -070047#define SINVALID 255 /* invalid state */
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -070048
Geremy Condra03ebf062011-10-12 18:17:24 -070049struct sretrace_info {
50 struct sretrace_info *next;
51 XString xs;
52 char *xp;
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -070053};
54
Geremy Condra03ebf062011-10-12 18:17:24 -070055/*
56 * Structure to keep track of the lexing state and the various pieces of info
57 * needed for each particular state.
58 */
59typedef struct lex_state {
60 union {
61 /* point to the next state block */
62 struct lex_state *base;
63 /* marks start of state output in output string */
Elliott Hughesfc0307d2016-02-02 15:26:47 -080064 size_t start;
Geremy Condra03ebf062011-10-12 18:17:24 -070065 /* SBQUOTE: true if in double quotes: "`...`" */
66 /* SEQUOTE: got NUL, ignore rest of string */
67 bool abool;
68 /* SADELIM information */
69 struct {
70 /* character to search for */
71 unsigned char delimiter;
72 /* max. number of delimiters */
73 unsigned char num;
74 } adelim;
75 } u;
76 /* count open parentheses */
77 short nparen;
78 /* type of this state */
79 uint8_t type;
Elliott Hughes47086262019-03-26 12:34:31 -070080 /* extra flags */
81 uint8_t ls_flags;
Geremy Condra03ebf062011-10-12 18:17:24 -070082} Lex_state;
83#define ls_base u.base
84#define ls_start u.start
85#define ls_bool u.abool
86#define ls_adelim u.adelim
87
Elliott Hughes47086262019-03-26 12:34:31 -070088/* ls_flags */
89#define LS_HEREDOC BIT(0)
90
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -070091typedef struct {
92 Lex_state *base;
93 Lex_state *end;
94} State_info;
95
96static void readhere(struct ioword *);
Geremy Condra03ebf062011-10-12 18:17:24 -070097static void ungetsc(int);
Thorsten Glaserc2dc5de2013-02-18 23:02:51 +000098static void ungetsc_i(int);
Geremy Condra03ebf062011-10-12 18:17:24 -070099static int getsc_uu(void);
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -0700100static void getsc_line(Source *);
101static int getsc_bn(void);
Elliott Hughesfc0307d2016-02-02 15:26:47 -0800102static int getsc_i(void);
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -0700103static char *get_brace_var(XString *, char *);
Geremy Condra03ebf062011-10-12 18:17:24 -0700104static bool arraysub(char **);
Elliott Hughesfc0307d2016-02-02 15:26:47 -0800105static void gethere(void);
Thorsten Glaserc2dc5de2013-02-18 23:02:51 +0000106static Lex_state *push_state_i(State_info *, Lex_state *);
107static Lex_state *pop_state_i(State_info *, Lex_state *);
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -0700108
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -0700109static int backslash_skip;
110static int ignore_backslash_newline;
111
112/* optimised getsc_bn() */
Geremy Condra03ebf062011-10-12 18:17:24 -0700113#define o_getsc() (*source->str != '\0' && *source->str != '\\' && \
114 !backslash_skip ? *source->str++ : getsc_bn())
115/* optimised getsc_uu() */
116#define o_getsc_u() ((*source->str != '\0') ? *source->str++ : getsc_uu())
117
118/* retrace helper */
Elliott Hughesfc0307d2016-02-02 15:26:47 -0800119#define o_getsc_r(carg) \
Geremy Condra03ebf062011-10-12 18:17:24 -0700120 int cev = (carg); \
121 struct sretrace_info *rp = retrace_info; \
122 \
123 while (rp) { \
124 Xcheck(rp->xs, rp->xp); \
125 *rp->xp++ = cev; \
126 rp = rp->next; \
127 } \
128 \
Elliott Hughesfc0307d2016-02-02 15:26:47 -0800129 return (cev);
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -0700130
Elliott Hughesfc0307d2016-02-02 15:26:47 -0800131/* callback */
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -0700132static int
Elliott Hughesfc0307d2016-02-02 15:26:47 -0800133getsc_i(void)
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -0700134{
Elliott Hughesdd4abe02018-02-05 15:55:19 -0800135 o_getsc_r((unsigned int)(unsigned char)o_getsc());
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -0700136}
Elliott Hughesfc0307d2016-02-02 15:26:47 -0800137
138#if defined(MKSH_SMALL) && !defined(MKSH_SMALL_BUT_FAST)
Elliott Hughesdd4abe02018-02-05 15:55:19 -0800139#define getsc() getsc_i()
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -0700140#else
Geremy Condra03ebf062011-10-12 18:17:24 -0700141static int getsc_r(int);
142
143static int
144getsc_r(int c)
145{
146 o_getsc_r(c);
147}
148
Elliott Hughesdd4abe02018-02-05 15:55:19 -0800149#define getsc() getsc_r((unsigned int)(unsigned char)o_getsc())
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -0700150#endif
151
Geremy Condra03ebf062011-10-12 18:17:24 -0700152#define STATE_BSIZE 8
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -0700153
154#define PUSH_STATE(s) do { \
Elliott Hughes47086262019-03-26 12:34:31 -0700155 uint8_t state_flags = statep->ls_flags; \
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -0700156 if (++statep == state_info.end) \
Thorsten Glaserc2dc5de2013-02-18 23:02:51 +0000157 statep = push_state_i(&state_info, statep); \
Geremy Condra03ebf062011-10-12 18:17:24 -0700158 state = statep->type = (s); \
Elliott Hughes47086262019-03-26 12:34:31 -0700159 statep->ls_flags = state_flags; \
Geremy Condra03ebf062011-10-12 18:17:24 -0700160} while (/* CONSTCOND */ 0)
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -0700161
162#define POP_STATE() do { \
163 if (--statep == state_info.base) \
Thorsten Glaserc2dc5de2013-02-18 23:02:51 +0000164 statep = pop_state_i(&state_info, statep); \
Geremy Condra03ebf062011-10-12 18:17:24 -0700165 state = statep->type; \
166} while (/* CONSTCOND */ 0)
167
Elliott Hughes737fdce2014-08-07 12:59:26 -0700168#define PUSH_SRETRACE(s) do { \
Geremy Condra03ebf062011-10-12 18:17:24 -0700169 struct sretrace_info *ri; \
170 \
Elliott Hughes737fdce2014-08-07 12:59:26 -0700171 PUSH_STATE(s); \
Geremy Condra03ebf062011-10-12 18:17:24 -0700172 statep->ls_start = Xsavepos(ws, wp); \
173 ri = alloc(sizeof(struct sretrace_info), ATEMP); \
174 Xinit(ri->xs, ri->xp, 64, ATEMP); \
175 ri->next = retrace_info; \
176 retrace_info = ri; \
177} while (/* CONSTCOND */ 0)
178
179#define POP_SRETRACE() do { \
180 wp = Xrestpos(ws, wp, statep->ls_start); \
181 *retrace_info->xp = '\0'; \
182 sp = Xstring(retrace_info->xs, retrace_info->xp); \
183 dp = (void *)retrace_info; \
184 retrace_info = retrace_info->next; \
185 afree(dp, ATEMP); \
Elliott Hughes737fdce2014-08-07 12:59:26 -0700186 POP_STATE(); \
Geremy Condra03ebf062011-10-12 18:17:24 -0700187} while (/* CONSTCOND */ 0)
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -0700188
189/**
190 * Lexical analyser
191 *
192 * tokens are not regular expressions, they are LL(1).
193 * for example, "${var:-${PWD}}", and "$(size $(whence ksh))".
Geremy Condra03ebf062011-10-12 18:17:24 -0700194 * hence the state stack. Note "$(...)" are now parsed recursively.
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -0700195 */
196
197int
198yylex(int cf)
199{
200 Lex_state states[STATE_BSIZE], *statep, *s2, *base;
201 State_info state_info;
202 int c, c2, state;
Geremy Condra03ebf062011-10-12 18:17:24 -0700203 size_t cz;
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -0700204 XString ws; /* expandable output word */
205 char *wp; /* output word pointer */
206 char *sp, *dp;
207
208 Again:
Geremy Condra03ebf062011-10-12 18:17:24 -0700209 states[0].type = SINVALID;
210 states[0].ls_base = NULL;
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -0700211 statep = &states[1];
212 state_info.base = states;
213 state_info.end = &state_info.base[STATE_BSIZE];
214
215 Xinit(ws, wp, 64, ATEMP);
216
217 backslash_skip = 0;
218 ignore_backslash_newline = 0;
219
Geremy Condra03ebf062011-10-12 18:17:24 -0700220 if (cf & ONEWORD)
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -0700221 state = SWORD;
Geremy Condra03ebf062011-10-12 18:17:24 -0700222 else if (cf & LETEXPR) {
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -0700223 /* enclose arguments in (double) quotes */
224 *wp++ = OQUOTE;
225 state = SLETPAREN;
Geremy Condra03ebf062011-10-12 18:17:24 -0700226 statep->nparen = 0;
227 } else {
228 /* normal lexing */
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -0700229 state = (cf & HEREDELIM) ? SHEREDELIM : SBASE;
Elliott Hughesdd4abe02018-02-05 15:55:19 -0800230 do {
231 c = getsc();
232 } while (ctype(c, C_BLANK));
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -0700233 if (c == '#') {
234 ignore_backslash_newline++;
Elliott Hughesdd4abe02018-02-05 15:55:19 -0800235 do {
236 c = getsc();
237 } while (!ctype(c, C_NUL | C_LF));
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -0700238 ignore_backslash_newline--;
239 }
240 ungetsc(c);
241 }
Geremy Condra03ebf062011-10-12 18:17:24 -0700242 if (source->flags & SF_ALIAS) {
243 /* trailing ' ' in alias definition */
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -0700244 source->flags &= ~SF_ALIAS;
Elliott Hughesfc0307d2016-02-02 15:26:47 -0800245 /* POSIX: trailing space only counts if parsing simple cmd */
246 if (!Flag(FPOSIX) || (cf & CMDWORD))
247 cf |= ALIAS;
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -0700248 }
249
Geremy Condra03ebf062011-10-12 18:17:24 -0700250 /* Initial state: one of SWORD SLETPAREN SHEREDELIM SBASE */
251 statep->type = state;
Elliott Hughes47086262019-03-26 12:34:31 -0700252 statep->ls_flags = (cf & HEREDOC) ? LS_HEREDOC : 0;
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -0700253
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -0700254 /* collect non-special or quoted characters to form word */
255 while (!((c = getsc()) == 0 ||
Thorsten Glaserc2dc5de2013-02-18 23:02:51 +0000256 ((state == SBASE || state == SHEREDELIM) && ctype(c, C_LEX1)))) {
257 if (state == SBASE &&
Elliott Hughesdd4abe02018-02-05 15:55:19 -0800258 subshell_nesting_type == ORD(/*{*/ '}') &&
259 (unsigned int)c == ORD(/*{*/ '}'))
Thorsten Glaserc2dc5de2013-02-18 23:02:51 +0000260 /* possibly end ${ :;} */
261 break;
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -0700262 Xcheck(ws, wp);
263 switch (state) {
264 case SADELIM:
Elliott Hughesdd4abe02018-02-05 15:55:19 -0800265 if ((unsigned int)c == ORD('('))
Geremy Condra03ebf062011-10-12 18:17:24 -0700266 statep->nparen++;
Elliott Hughesdd4abe02018-02-05 15:55:19 -0800267 else if ((unsigned int)c == ORD(')'))
Geremy Condra03ebf062011-10-12 18:17:24 -0700268 statep->nparen--;
Elliott Hughesdd4abe02018-02-05 15:55:19 -0800269 else if (statep->nparen == 0 &&
270 ((unsigned int)c == ORD(/*{*/ '}') ||
Thorsten Glaserc2dc5de2013-02-18 23:02:51 +0000271 c == (int)statep->ls_adelim.delimiter)) {
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -0700272 *wp++ = ADELIM;
273 *wp++ = c;
Elliott Hughesdd4abe02018-02-05 15:55:19 -0800274 if ((unsigned int)c == ORD(/*{*/ '}') ||
275 --statep->ls_adelim.num == 0)
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -0700276 POP_STATE();
Elliott Hughesdd4abe02018-02-05 15:55:19 -0800277 if ((unsigned int)c == ORD(/*{*/ '}'))
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -0700278 POP_STATE();
279 break;
280 }
281 /* FALLTHROUGH */
282 case SBASE:
Elliott Hughesdd4abe02018-02-05 15:55:19 -0800283 if ((unsigned int)c == ORD('[') && (cf & CMDASN)) {
Geremy Condra03ebf062011-10-12 18:17:24 -0700284 /* temporary */
285 *wp = EOS;
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -0700286 if (is_wdvarname(Xstring(ws, wp), false)) {
287 char *p, *tmp;
288
289 if (arraysub(&tmp)) {
290 *wp++ = CHAR;
291 *wp++ = c;
292 for (p = tmp; *p; ) {
293 Xcheck(ws, wp);
294 *wp++ = CHAR;
295 *wp++ = *p++;
296 }
297 afree(tmp, ATEMP);
298 break;
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -0700299 }
300 }
301 *wp++ = CHAR;
302 *wp++ = c;
303 break;
304 }
305 /* FALLTHROUGH */
306 Sbase1: /* includes *(...|...) pattern (*+?@!) */
Elliott Hughes23925bb2017-09-22 16:04:20 -0700307 if (ctype(c, C_PATMO)) {
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -0700308 c2 = getsc();
Elliott Hughesdd4abe02018-02-05 15:55:19 -0800309 if ((unsigned int)c2 == ORD('(' /*)*/)) {
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -0700310 *wp++ = OPAT;
311 *wp++ = c;
312 PUSH_STATE(SPATTERN);
313 break;
314 }
315 ungetsc(c2);
316 }
317 /* FALLTHROUGH */
318 Sbase2: /* doesn't include *(...|...) pattern (*+?@!) */
319 switch (c) {
Elliott Hughesdd4abe02018-02-05 15:55:19 -0800320 case ORD('\\'):
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -0700321 getsc_qchar:
322 if ((c = getsc())) {
323 /* trailing \ is lost */
324 *wp++ = QCHAR;
325 *wp++ = c;
326 }
327 break;
Elliott Hughesdd4abe02018-02-05 15:55:19 -0800328 case ORD('\''):
Thorsten Glaser811a5752013-07-25 14:24:45 +0000329 open_ssquote_unless_heredoc:
Elliott Hughes47086262019-03-26 12:34:31 -0700330 if ((statep->ls_flags & LS_HEREDOC))
Thorsten Glaser811a5752013-07-25 14:24:45 +0000331 goto store_char;
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -0700332 *wp++ = OQUOTE;
333 ignore_backslash_newline++;
334 PUSH_STATE(SSQUOTE);
335 break;
Elliott Hughesdd4abe02018-02-05 15:55:19 -0800336 case ORD('"'):
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -0700337 open_sdquote:
338 *wp++ = OQUOTE;
339 PUSH_STATE(SDQUOTE);
340 break;
Elliott Hughesdd4abe02018-02-05 15:55:19 -0800341 case ORD('$'):
Thorsten Glaserc2dc5de2013-02-18 23:02:51 +0000342 /*
343 * processing of dollar sign belongs into
344 * Subst, except for those which can open
345 * a string: $'…' and $"…"
346 */
347 subst_dollar_ex:
348 c = getsc();
349 switch (c) {
Elliott Hughesdd4abe02018-02-05 15:55:19 -0800350 case ORD('"'):
Thorsten Glaserc2dc5de2013-02-18 23:02:51 +0000351 goto open_sdquote;
Elliott Hughesdd4abe02018-02-05 15:55:19 -0800352 case ORD('\''):
Thorsten Glaserc2dc5de2013-02-18 23:02:51 +0000353 goto open_sequote;
354 default:
355 goto SubstS;
356 }
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -0700357 default:
358 goto Subst;
359 }
360 break;
361
362 Subst:
363 switch (c) {
Elliott Hughesdd4abe02018-02-05 15:55:19 -0800364 case ORD('\\'):
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -0700365 c = getsc();
366 switch (c) {
Elliott Hughesdd4abe02018-02-05 15:55:19 -0800367 case ORD('"'):
Elliott Hughes47086262019-03-26 12:34:31 -0700368 if ((statep->ls_flags & LS_HEREDOC))
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -0700369 goto heredocquote;
370 /* FALLTHROUGH */
Elliott Hughesdd4abe02018-02-05 15:55:19 -0800371 case ORD('\\'):
372 case ORD('$'):
373 case ORD('`'):
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -0700374 store_qchar:
375 *wp++ = QCHAR;
376 *wp++ = c;
377 break;
378 default:
379 heredocquote:
380 Xcheck(ws, wp);
381 if (c) {
382 /* trailing \ is lost */
383 *wp++ = CHAR;
384 *wp++ = '\\';
385 *wp++ = CHAR;
386 *wp++ = c;
387 }
388 break;
389 }
390 break;
Elliott Hughesdd4abe02018-02-05 15:55:19 -0800391 case ORD('$'):
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -0700392 c = getsc();
Thorsten Glaserc2dc5de2013-02-18 23:02:51 +0000393 SubstS:
Elliott Hughesdd4abe02018-02-05 15:55:19 -0800394 if ((unsigned int)c == ORD('(' /*)*/)) {
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -0700395 c = getsc();
Elliott Hughesdd4abe02018-02-05 15:55:19 -0800396 if ((unsigned int)c == ORD('(' /*)*/)) {
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -0700397 *wp++ = EXPRSUB;
Elliott Hughes737fdce2014-08-07 12:59:26 -0700398 PUSH_SRETRACE(SASPAREN);
Elliott Hughes47086262019-03-26 12:34:31 -0700399 /* unneeded? */
400 /*statep->ls_flags &= ~LS_HEREDOC;*/
Geremy Condra03ebf062011-10-12 18:17:24 -0700401 statep->nparen = 2;
Geremy Condra03ebf062011-10-12 18:17:24 -0700402 *retrace_info->xp++ = '(';
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -0700403 } else {
404 ungetsc(c);
Geremy Condra03ebf062011-10-12 18:17:24 -0700405 subst_command:
Thorsten Glaserc2dc5de2013-02-18 23:02:51 +0000406 c = COMSUB;
407 subst_command2:
408 sp = yyrecursive(c);
Geremy Condra03ebf062011-10-12 18:17:24 -0700409 cz = strlen(sp) + 1;
410 XcheckN(ws, wp, cz);
Thorsten Glaserc2dc5de2013-02-18 23:02:51 +0000411 *wp++ = c;
Geremy Condra03ebf062011-10-12 18:17:24 -0700412 memcpy(wp, sp, cz);
413 wp += cz;
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -0700414 }
Elliott Hughesdd4abe02018-02-05 15:55:19 -0800415 } else if ((unsigned int)c == ORD('{' /*}*/)) {
416 if ((unsigned int)(c = getsc()) == ORD('|')) {
Thorsten Glaser811a5752013-07-25 14:24:45 +0000417 /*
418 * non-subenvironment
419 * value substitution
420 */
421 c = VALSUB;
422 goto subst_command2;
423 } else if (ctype(c, C_IFSWS)) {
Thorsten Glaserc2dc5de2013-02-18 23:02:51 +0000424 /*
425 * non-subenvironment
426 * "command" substitution
427 */
428 c = FUNSUB;
429 goto subst_command2;
430 }
431 ungetsc(c);
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -0700432 *wp++ = OSUBST;
Elliott Hughes23925bb2017-09-22 16:04:20 -0700433 *wp++ = '{' /*}*/;
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -0700434 wp = get_brace_var(&ws, wp);
435 c = getsc();
436 /* allow :# and :% (ksh88 compat) */
Elliott Hughesdd4abe02018-02-05 15:55:19 -0800437 if ((unsigned int)c == ORD(':')) {
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -0700438 *wp++ = CHAR;
439 *wp++ = c;
440 c = getsc();
Elliott Hughesdd4abe02018-02-05 15:55:19 -0800441 if ((unsigned int)c == ORD(':')) {
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -0700442 *wp++ = CHAR;
443 *wp++ = '0';
444 *wp++ = ADELIM;
445 *wp++ = ':';
446 PUSH_STATE(SBRACE);
Elliott Hughes47086262019-03-26 12:34:31 -0700447 /* perhaps unneeded? */
448 statep->ls_flags &= ~LS_HEREDOC;
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -0700449 PUSH_STATE(SADELIM);
Geremy Condra03ebf062011-10-12 18:17:24 -0700450 statep->ls_adelim.delimiter = ':';
451 statep->ls_adelim.num = 1;
452 statep->nparen = 0;
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -0700453 break;
Elliott Hughes23925bb2017-09-22 16:04:20 -0700454 } else if (ctype(c, C_DIGIT | C_DOLAR | C_SPC) ||
Geremy Condra03ebf062011-10-12 18:17:24 -0700455 /*XXX what else? */
Elliott Hughes23925bb2017-09-22 16:04:20 -0700456 c == '(' /*)*/) {
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -0700457 /* substring subst. */
458 if (c != ' ') {
459 *wp++ = CHAR;
460 *wp++ = ' ';
461 }
462 ungetsc(c);
463 PUSH_STATE(SBRACE);
Elliott Hughes47086262019-03-26 12:34:31 -0700464 /* perhaps unneeded? */
465 statep->ls_flags &= ~LS_HEREDOC;
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -0700466 PUSH_STATE(SADELIM);
Geremy Condra03ebf062011-10-12 18:17:24 -0700467 statep->ls_adelim.delimiter = ':';
468 statep->ls_adelim.num = 2;
469 statep->nparen = 0;
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -0700470 break;
471 }
472 } else if (c == '/') {
Elliott Hughes77740fc2016-08-12 15:06:53 -0700473 c2 = ADELIM;
474 parse_adelim_slash:
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -0700475 *wp++ = CHAR;
476 *wp++ = c;
Elliott Hughesdd4abe02018-02-05 15:55:19 -0800477 if ((unsigned int)(c = getsc()) == ORD('/')) {
Elliott Hughes77740fc2016-08-12 15:06:53 -0700478 *wp++ = c2;
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -0700479 *wp++ = c;
480 } else
481 ungetsc(c);
482 PUSH_STATE(SBRACE);
Elliott Hughes47086262019-03-26 12:34:31 -0700483 /* perhaps unneeded? */
484 statep->ls_flags &= ~LS_HEREDOC;
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -0700485 PUSH_STATE(SADELIM);
Geremy Condra03ebf062011-10-12 18:17:24 -0700486 statep->ls_adelim.delimiter = '/';
487 statep->ls_adelim.num = 1;
488 statep->nparen = 0;
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -0700489 break;
Elliott Hughes77740fc2016-08-12 15:06:53 -0700490 } else if (c == '@') {
491 c2 = getsc();
492 ungetsc(c2);
Elliott Hughesdd4abe02018-02-05 15:55:19 -0800493 if ((unsigned int)c2 == ORD('/')) {
Elliott Hughes77740fc2016-08-12 15:06:53 -0700494 c2 = CHAR;
495 goto parse_adelim_slash;
496 }
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -0700497 }
Geremy Condra03ebf062011-10-12 18:17:24 -0700498 /*
499 * If this is a trim operation,
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -0700500 * treat (,|,) specially in STBRACE.
501 */
Elliott Hughes23925bb2017-09-22 16:04:20 -0700502 if (ctype(c, C_SUB2)) {
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -0700503 ungetsc(c);
Geremy Condra03ebf062011-10-12 18:17:24 -0700504 if (Flag(FSH))
505 PUSH_STATE(STBRACEBOURNE);
506 else
507 PUSH_STATE(STBRACEKORN);
Elliott Hughes47086262019-03-26 12:34:31 -0700508 /* single-quotes-in-heredoc-trim */
509 statep->ls_flags &= ~LS_HEREDOC;
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -0700510 } else {
511 ungetsc(c);
Thorsten Glaser811a5752013-07-25 14:24:45 +0000512 if (state == SDQUOTE ||
513 state == SQBRACE)
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -0700514 PUSH_STATE(SQBRACE);
515 else
516 PUSH_STATE(SBRACE);
Elliott Hughes47086262019-03-26 12:34:31 -0700517 /* here no LS_HEREDOC removal */
518 /* single-quotes-in-heredoc-braces */
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -0700519 }
Elliott Hughes23925bb2017-09-22 16:04:20 -0700520 } else if (ctype(c, C_ALPHX)) {
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -0700521 *wp++ = OSUBST;
522 *wp++ = 'X';
523 do {
524 Xcheck(ws, wp);
525 *wp++ = c;
526 c = getsc();
Elliott Hughes23925bb2017-09-22 16:04:20 -0700527 } while (ctype(c, C_ALNUX));
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -0700528 *wp++ = '\0';
529 *wp++ = CSUBST;
530 *wp++ = 'X';
531 ungetsc(c);
532 } else if (ctype(c, C_VAR1 | C_DIGIT)) {
533 Xcheck(ws, wp);
534 *wp++ = OSUBST;
535 *wp++ = 'X';
536 *wp++ = c;
537 *wp++ = '\0';
538 *wp++ = CSUBST;
539 *wp++ = 'X';
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -0700540 } else {
541 *wp++ = CHAR;
542 *wp++ = '$';
543 ungetsc(c);
544 }
545 break;
Elliott Hughesdd4abe02018-02-05 15:55:19 -0800546 case ORD('`'):
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -0700547 subst_gravis:
548 PUSH_STATE(SBQUOTE);
Elliott Hughesa3c3f962017-04-12 16:52:30 -0700549 *wp++ = COMASUB;
Geremy Condra03ebf062011-10-12 18:17:24 -0700550 /*
Elliott Hughesfc0307d2016-02-02 15:26:47 -0800551 * We need to know whether we are within double
Elliott Hughes77740fc2016-08-12 15:06:53 -0700552 * quotes in order to translate \" to " within
553 * "…`…\"…`…" because, unlike for COMSUBs, the
554 * outer double quoteing changes the backslash
555 * meaning for the inside. For more details:
556 * http://austingroupbugs.net/view.php?id=1015
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -0700557 */
Geremy Condra03ebf062011-10-12 18:17:24 -0700558 statep->ls_bool = false;
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -0700559 s2 = statep;
560 base = state_info.base;
Geremy Condra03ebf062011-10-12 18:17:24 -0700561 while (/* CONSTCOND */ 1) {
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -0700562 for (; s2 != base; s2--) {
Geremy Condra03ebf062011-10-12 18:17:24 -0700563 if (s2->type == SDQUOTE) {
564 statep->ls_bool = true;
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -0700565 break;
566 }
567 }
568 if (s2 != base)
569 break;
Geremy Condra03ebf062011-10-12 18:17:24 -0700570 if (!(s2 = s2->ls_base))
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -0700571 break;
572 base = s2-- - STATE_BSIZE;
573 }
574 break;
575 case QCHAR:
576 if (cf & LQCHAR) {
577 *wp++ = QCHAR;
578 *wp++ = getsc();
579 break;
580 }
581 /* FALLTHROUGH */
582 default:
583 store_char:
584 *wp++ = CHAR;
585 *wp++ = c;
586 }
587 break;
588
589 case SEQUOTE:
Elliott Hughesdd4abe02018-02-05 15:55:19 -0800590 if ((unsigned int)c == ORD('\'')) {
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -0700591 POP_STATE();
592 *wp++ = CQUOTE;
593 ignore_backslash_newline--;
Elliott Hughesdd4abe02018-02-05 15:55:19 -0800594 } else if ((unsigned int)c == ORD('\\')) {
Elliott Hughesfc0307d2016-02-02 15:26:47 -0800595 if ((c2 = unbksl(true, getsc_i, ungetsc)) == -1)
596 c2 = getsc();
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -0700597 if (c2 == 0)
Geremy Condra03ebf062011-10-12 18:17:24 -0700598 statep->ls_bool = true;
599 if (!statep->ls_bool) {
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -0700600 char ts[4];
601
602 if ((unsigned int)c2 < 0x100) {
603 *wp++ = QCHAR;
604 *wp++ = c2;
605 } else {
Geremy Condra03ebf062011-10-12 18:17:24 -0700606 cz = utf_wctomb(ts, c2 - 0x100);
607 ts[cz] = 0;
Elliott Hughesfc0307d2016-02-02 15:26:47 -0800608 cz = 0;
609 do {
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -0700610 *wp++ = QCHAR;
Geremy Condra03ebf062011-10-12 18:17:24 -0700611 *wp++ = ts[cz];
Elliott Hughesfc0307d2016-02-02 15:26:47 -0800612 } while (ts[++cz]);
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -0700613 }
614 }
Geremy Condra03ebf062011-10-12 18:17:24 -0700615 } else if (!statep->ls_bool) {
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -0700616 *wp++ = QCHAR;
617 *wp++ = c;
618 }
619 break;
620
621 case SSQUOTE:
Elliott Hughesdd4abe02018-02-05 15:55:19 -0800622 if ((unsigned int)c == ORD('\'')) {
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -0700623 POP_STATE();
Elliott Hughes47086262019-03-26 12:34:31 -0700624 if ((statep->ls_flags & LS_HEREDOC) ||
625 state == SQBRACE)
Thorsten Glaser811a5752013-07-25 14:24:45 +0000626 goto store_char;
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -0700627 *wp++ = CQUOTE;
628 ignore_backslash_newline--;
629 } else {
630 *wp++ = QCHAR;
631 *wp++ = c;
632 }
633 break;
634
635 case SDQUOTE:
Elliott Hughesdd4abe02018-02-05 15:55:19 -0800636 if ((unsigned int)c == ORD('"')) {
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -0700637 POP_STATE();
638 *wp++ = CQUOTE;
639 } else
640 goto Subst;
641 break;
642
Geremy Condra03ebf062011-10-12 18:17:24 -0700643 /* $(( ... )) */
644 case SASPAREN:
Elliott Hughesdd4abe02018-02-05 15:55:19 -0800645 if ((unsigned int)c == ORD('('))
Geremy Condra03ebf062011-10-12 18:17:24 -0700646 statep->nparen++;
Elliott Hughesdd4abe02018-02-05 15:55:19 -0800647 else if ((unsigned int)c == ORD(')')) {
Geremy Condra03ebf062011-10-12 18:17:24 -0700648 statep->nparen--;
649 if (statep->nparen == 1) {
650 /* end of EXPRSUB */
651 POP_SRETRACE();
Geremy Condra03ebf062011-10-12 18:17:24 -0700652
Elliott Hughesdd4abe02018-02-05 15:55:19 -0800653 if ((unsigned int)(c2 = getsc()) == ORD(/*(*/ ')')) {
Geremy Condra03ebf062011-10-12 18:17:24 -0700654 cz = strlen(sp) - 2;
655 XcheckN(ws, wp, cz);
656 memcpy(wp, sp + 1, cz);
657 wp += cz;
658 afree(sp, ATEMP);
659 *wp++ = '\0';
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -0700660 break;
661 } else {
Geremy Condra03ebf062011-10-12 18:17:24 -0700662 Source *s;
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -0700663
664 ungetsc(c2);
Geremy Condra03ebf062011-10-12 18:17:24 -0700665 /*
666 * mismatched parenthesis -
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -0700667 * assume we were really
668 * parsing a $(...) expression
669 */
Geremy Condra03ebf062011-10-12 18:17:24 -0700670 --wp;
671 s = pushs(SREREAD,
672 source->areap);
673 s->start = s->str =
674 s->u.freeme = sp;
675 s->next = source;
676 source = s;
677 goto subst_command;
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -0700678 }
679 }
680 }
Geremy Condra03ebf062011-10-12 18:17:24 -0700681 /* reuse existing state machine */
682 goto Sbase2;
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -0700683
684 case SQBRACE:
Elliott Hughesdd4abe02018-02-05 15:55:19 -0800685 if ((unsigned int)c == ORD('\\')) {
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -0700686 /*
687 * perform POSIX "quote removal" if the back-
688 * slash is "special", i.e. same cases as the
689 * {case '\\':} in Subst: plus closing brace;
690 * in mksh code "quote removal" on '\c' means
691 * write QCHAR+c, otherwise CHAR+\+CHAR+c are
692 * emitted (in heredocquote:)
693 */
Elliott Hughesdd4abe02018-02-05 15:55:19 -0800694 if ((unsigned int)(c = getsc()) == ORD('"') ||
695 (unsigned int)c == ORD('\\') ||
696 ctype(c, C_DOLAR | C_GRAVE) ||
697 (unsigned int)c == ORD(/*{*/ '}'))
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -0700698 goto store_qchar;
699 goto heredocquote;
700 }
701 goto common_SQBRACE;
702
703 case SBRACE:
Elliott Hughesdd4abe02018-02-05 15:55:19 -0800704 if ((unsigned int)c == ORD('\''))
Thorsten Glaser811a5752013-07-25 14:24:45 +0000705 goto open_ssquote_unless_heredoc;
Elliott Hughesdd4abe02018-02-05 15:55:19 -0800706 else if ((unsigned int)c == ORD('\\'))
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -0700707 goto getsc_qchar;
708 common_SQBRACE:
Elliott Hughesdd4abe02018-02-05 15:55:19 -0800709 if ((unsigned int)c == ORD('"'))
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -0700710 goto open_sdquote;
Elliott Hughesdd4abe02018-02-05 15:55:19 -0800711 else if ((unsigned int)c == ORD('$'))
Thorsten Glaserc2dc5de2013-02-18 23:02:51 +0000712 goto subst_dollar_ex;
Elliott Hughesdd4abe02018-02-05 15:55:19 -0800713 else if ((unsigned int)c == ORD('`'))
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -0700714 goto subst_gravis;
Elliott Hughesdd4abe02018-02-05 15:55:19 -0800715 else if ((unsigned int)c != ORD(/*{*/ '}'))
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -0700716 goto store_char;
717 POP_STATE();
718 *wp++ = CSUBST;
719 *wp++ = /*{*/ '}';
720 break;
721
Geremy Condra03ebf062011-10-12 18:17:24 -0700722 /* Same as SBASE, except (,|,) treated specially */
723 case STBRACEKORN:
Elliott Hughesdd4abe02018-02-05 15:55:19 -0800724 if ((unsigned int)c == ORD('|'))
Geremy Condra03ebf062011-10-12 18:17:24 -0700725 *wp++ = SPAT;
Elliott Hughesdd4abe02018-02-05 15:55:19 -0800726 else if ((unsigned int)c == ORD('(')) {
Geremy Condra03ebf062011-10-12 18:17:24 -0700727 *wp++ = OPAT;
728 /* simile for @ */
729 *wp++ = ' ';
730 PUSH_STATE(SPATTERN);
731 } else /* FALLTHROUGH */
732 case STBRACEBOURNE:
Elliott Hughesdd4abe02018-02-05 15:55:19 -0800733 if ((unsigned int)c == ORD(/*{*/ '}')) {
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -0700734 POP_STATE();
735 *wp++ = CSUBST;
736 *wp++ = /*{*/ '}';
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -0700737 } else
738 goto Sbase1;
739 break;
740
741 case SBQUOTE:
Elliott Hughesdd4abe02018-02-05 15:55:19 -0800742 if ((unsigned int)c == ORD('`')) {
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -0700743 *wp++ = 0;
744 POP_STATE();
Elliott Hughesdd4abe02018-02-05 15:55:19 -0800745 } else if ((unsigned int)c == ORD('\\')) {
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -0700746 switch (c = getsc()) {
Geremy Condra03ebf062011-10-12 18:17:24 -0700747 case 0:
748 /* trailing \ is lost */
749 break;
Elliott Hughesdd4abe02018-02-05 15:55:19 -0800750 case ORD('$'):
751 case ORD('`'):
752 case ORD('\\'):
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -0700753 *wp++ = c;
754 break;
Elliott Hughesdd4abe02018-02-05 15:55:19 -0800755 case ORD('"'):
Geremy Condra03ebf062011-10-12 18:17:24 -0700756 if (statep->ls_bool) {
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -0700757 *wp++ = c;
758 break;
759 }
760 /* FALLTHROUGH */
761 default:
Geremy Condra03ebf062011-10-12 18:17:24 -0700762 *wp++ = '\\';
763 *wp++ = c;
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -0700764 break;
765 }
766 } else
767 *wp++ = c;
768 break;
769
Geremy Condra03ebf062011-10-12 18:17:24 -0700770 /* ONEWORD */
771 case SWORD:
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -0700772 goto Subst;
773
Geremy Condra03ebf062011-10-12 18:17:24 -0700774 /* LETEXPR: (( ... )) */
775 case SLETPAREN:
Elliott Hughesdd4abe02018-02-05 15:55:19 -0800776 if ((unsigned int)c == ORD(/*(*/ ')')) {
Geremy Condra03ebf062011-10-12 18:17:24 -0700777 if (statep->nparen > 0)
778 --statep->nparen;
Elliott Hughesdd4abe02018-02-05 15:55:19 -0800779 else if ((unsigned int)(c2 = getsc()) == ORD(/*(*/ ')')) {
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -0700780 c = 0;
781 *wp++ = CQUOTE;
782 goto Done;
783 } else {
784 Source *s;
785
786 ungetsc(c2);
Elliott Hughesfc0307d2016-02-02 15:26:47 -0800787 ungetsc(c);
Geremy Condra03ebf062011-10-12 18:17:24 -0700788 /*
789 * mismatched parenthesis -
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -0700790 * assume we were really
Geremy Condra03ebf062011-10-12 18:17:24 -0700791 * parsing a (...) expression
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -0700792 */
793 *wp = EOS;
794 sp = Xstring(ws, wp);
Elliott Hughesfc0307d2016-02-02 15:26:47 -0800795 dp = wdstrip(sp + 1, WDS_TPUTS);
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -0700796 s = pushs(SREREAD, source->areap);
797 s->start = s->str = s->u.freeme = dp;
798 s->next = source;
799 source = s;
Elliott Hughes23925bb2017-09-22 16:04:20 -0700800 ungetsc('(' /*)*/);
Elliott Hughesdd4abe02018-02-05 15:55:19 -0800801 return (ORD('(' /*)*/));
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -0700802 }
Elliott Hughesdd4abe02018-02-05 15:55:19 -0800803 } else if ((unsigned int)c == ORD('('))
Geremy Condra03ebf062011-10-12 18:17:24 -0700804 /*
805 * parentheses inside quotes and
806 * backslashes are lost, but AT&T ksh
807 * doesn't count them either
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -0700808 */
Geremy Condra03ebf062011-10-12 18:17:24 -0700809 ++statep->nparen;
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -0700810 goto Sbase2;
811
Elliott Hughesfc0307d2016-02-02 15:26:47 -0800812 /* << or <<- delimiter */
Geremy Condra03ebf062011-10-12 18:17:24 -0700813 case SHEREDELIM:
814 /*
Geremy Condra03ebf062011-10-12 18:17:24 -0700815 * here delimiters need a special case since
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -0700816 * $ and `...` are not to be treated specially
817 */
Thorsten Glaserc2dc5de2013-02-18 23:02:51 +0000818 switch (c) {
Elliott Hughesdd4abe02018-02-05 15:55:19 -0800819 case ORD('\\'):
Thorsten Glaserc2dc5de2013-02-18 23:02:51 +0000820 if ((c = getsc())) {
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -0700821 /* trailing \ is lost */
822 *wp++ = QCHAR;
823 *wp++ = c;
824 }
Thorsten Glaserc2dc5de2013-02-18 23:02:51 +0000825 break;
Elliott Hughesdd4abe02018-02-05 15:55:19 -0800826 case ORD('\''):
Thorsten Glaser811a5752013-07-25 14:24:45 +0000827 goto open_ssquote_unless_heredoc;
Elliott Hughesdd4abe02018-02-05 15:55:19 -0800828 case ORD('$'):
829 if ((unsigned int)(c2 = getsc()) == ORD('\'')) {
Thorsten Glaserc2dc5de2013-02-18 23:02:51 +0000830 open_sequote:
831 *wp++ = OQUOTE;
832 ignore_backslash_newline++;
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -0700833 PUSH_STATE(SEQUOTE);
Geremy Condra03ebf062011-10-12 18:17:24 -0700834 statep->ls_bool = false;
Thorsten Glaserc2dc5de2013-02-18 23:02:51 +0000835 break;
Elliott Hughesdd4abe02018-02-05 15:55:19 -0800836 } else if ((unsigned int)c2 == ORD('"')) {
Thorsten Glaserc2dc5de2013-02-18 23:02:51 +0000837 /* FALLTHROUGH */
Elliott Hughesdd4abe02018-02-05 15:55:19 -0800838 case ORD('"'):
Elliott Hughes737fdce2014-08-07 12:59:26 -0700839 PUSH_SRETRACE(SHEREDQUOTE);
Thorsten Glaserc2dc5de2013-02-18 23:02:51 +0000840 break;
841 }
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -0700842 ungetsc(c2);
Thorsten Glaserc2dc5de2013-02-18 23:02:51 +0000843 /* FALLTHROUGH */
844 default:
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -0700845 *wp++ = CHAR;
846 *wp++ = c;
847 }
848 break;
849
Elliott Hughesfc0307d2016-02-02 15:26:47 -0800850 /* " in << or <<- delimiter */
Geremy Condra03ebf062011-10-12 18:17:24 -0700851 case SHEREDQUOTE:
Elliott Hughesdd4abe02018-02-05 15:55:19 -0800852 if ((unsigned int)c != ORD('"'))
Thorsten Glaserc2dc5de2013-02-18 23:02:51 +0000853 goto Subst;
854 POP_SRETRACE();
855 dp = strnul(sp) - 1;
856 /* remove the trailing double quote */
857 *dp = '\0';
858 /* store the quoted string */
859 *wp++ = OQUOTE;
Elliott Hughes50012062015-03-10 22:22:24 -0700860 XcheckN(ws, wp, (dp - sp) * 2);
Thorsten Glaserc2dc5de2013-02-18 23:02:51 +0000861 dp = sp;
862 while ((c = *dp++)) {
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -0700863 if (c == '\\') {
Thorsten Glaserc2dc5de2013-02-18 23:02:51 +0000864 switch ((c = *dp++)) {
Elliott Hughesdd4abe02018-02-05 15:55:19 -0800865 case ORD('\\'):
866 case ORD('"'):
867 case ORD('$'):
868 case ORD('`'):
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -0700869 break;
870 default:
Geremy Condra03ebf062011-10-12 18:17:24 -0700871 *wp++ = CHAR;
872 *wp++ = '\\';
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -0700873 break;
874 }
875 }
876 *wp++ = CHAR;
877 *wp++ = c;
878 }
Thorsten Glaserc2dc5de2013-02-18 23:02:51 +0000879 afree(sp, ATEMP);
880 *wp++ = CQUOTE;
881 state = statep->type = SHEREDELIM;
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -0700882 break;
883
Geremy Condra03ebf062011-10-12 18:17:24 -0700884 /* in *(...|...) pattern (*+?@!) */
885 case SPATTERN:
Elliott Hughesdd4abe02018-02-05 15:55:19 -0800886 if ((unsigned int)c == ORD(/*(*/ ')')) {
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -0700887 *wp++ = CPAT;
888 POP_STATE();
Elliott Hughesdd4abe02018-02-05 15:55:19 -0800889 } else if ((unsigned int)c == ORD('|')) {
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -0700890 *wp++ = SPAT;
Elliott Hughesdd4abe02018-02-05 15:55:19 -0800891 } else if ((unsigned int)c == ORD('(')) {
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -0700892 *wp++ = OPAT;
Geremy Condra03ebf062011-10-12 18:17:24 -0700893 /* simile for @ */
894 *wp++ = ' ';
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -0700895 PUSH_STATE(SPATTERN);
896 } else
897 goto Sbase1;
898 break;
899 }
900 }
901 Done:
902 Xcheck(ws, wp);
903 if (statep != &states[1])
904 /* XXX figure out what is missing */
Elliott Hughesa3c3f962017-04-12 16:52:30 -0700905 yyerror("no closing quote");
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -0700906
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -0700907 /* This done to avoid tests for SHEREDELIM wherever SBASE tested */
Thorsten Glaserc2dc5de2013-02-18 23:02:51 +0000908 if (state == SHEREDELIM)
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -0700909 state = SBASE;
910
911 dp = Xstring(ws, wp);
Thorsten Glaser811a5752013-07-25 14:24:45 +0000912 if (state == SBASE && (
Thorsten Glaser811a5752013-07-25 14:24:45 +0000913 (c == '&' && !Flag(FSH) && !Flag(FPOSIX)) ||
Elliott Hughes23925bb2017-09-22 16:04:20 -0700914 ctype(c, C_ANGLE)) && ((c2 = Xlength(ws, wp)) == 0 ||
915 (c2 == 2 && dp[0] == CHAR && ctype(dp[1], C_DIGIT)))) {
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -0700916 struct ioword *iop = alloc(sizeof(struct ioword), ATEMP);
917
Elliott Hughes77740fc2016-08-12 15:06:53 -0700918 iop->unit = c2 == 2 ? ksh_numdig(dp[1]) : c == '<' ? 0 : 1;
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -0700919
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -0700920 if (c == '&') {
Elliott Hughesdd4abe02018-02-05 15:55:19 -0800921 if ((unsigned int)(c2 = getsc()) != ORD('>')) {
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -0700922 ungetsc(c2);
923 goto no_iop;
924 }
925 c = c2;
Elliott Hughesb27ce952015-04-21 13:39:18 -0700926 iop->ioflag = IOBASH;
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -0700927 } else
Elliott Hughesb27ce952015-04-21 13:39:18 -0700928 iop->ioflag = 0;
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -0700929
930 c2 = getsc();
931 /* <<, >>, <> are ok, >< is not */
Elliott Hughesdd4abe02018-02-05 15:55:19 -0800932 if (c == c2 || ((unsigned int)c == ORD('<') &&
933 (unsigned int)c2 == ORD('>'))) {
Elliott Hughesb27ce952015-04-21 13:39:18 -0700934 iop->ioflag |= c == c2 ?
Elliott Hughesdd4abe02018-02-05 15:55:19 -0800935 ((unsigned int)c == ORD('>') ? IOCAT : IOHERE) : IORDWR;
Elliott Hughesb27ce952015-04-21 13:39:18 -0700936 if (iop->ioflag == IOHERE) {
Elliott Hughesdd4abe02018-02-05 15:55:19 -0800937 if ((unsigned int)(c2 = getsc()) == ORD('-'))
Elliott Hughesb27ce952015-04-21 13:39:18 -0700938 iop->ioflag |= IOSKIP;
Elliott Hughesdd4abe02018-02-05 15:55:19 -0800939 else if ((unsigned int)c2 == ORD('<'))
Elliott Hughesb27ce952015-04-21 13:39:18 -0700940 iop->ioflag |= IOHERESTR;
Elliott Hughesfc0307d2016-02-02 15:26:47 -0800941 else
942 ungetsc(c2);
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -0700943 }
Elliott Hughesdd4abe02018-02-05 15:55:19 -0800944 } else if ((unsigned int)c2 == ORD('&'))
945 iop->ioflag |= IODUP | ((unsigned int)c == ORD('<') ? IORDUP : 0);
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -0700946 else {
Elliott Hughesdd4abe02018-02-05 15:55:19 -0800947 iop->ioflag |= (unsigned int)c == ORD('>') ? IOWRITE : IOREAD;
948 if ((unsigned int)c == ORD('>') && (unsigned int)c2 == ORD('|'))
Elliott Hughesb27ce952015-04-21 13:39:18 -0700949 iop->ioflag |= IOCLOB;
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -0700950 else
951 ungetsc(c2);
952 }
953
Elliott Hughesfc0307d2016-02-02 15:26:47 -0800954 iop->ioname = NULL;
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -0700955 iop->delim = NULL;
956 iop->heredoc = NULL;
Geremy Condra03ebf062011-10-12 18:17:24 -0700957 /* free word */
958 Xfree(ws, wp);
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -0700959 yylval.iop = iop;
960 return (REDIR);
961 no_iop:
Geremy Condra03ebf062011-10-12 18:17:24 -0700962 afree(iop, ATEMP);
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -0700963 }
964
965 if (wp == dp && state == SBASE) {
Geremy Condra03ebf062011-10-12 18:17:24 -0700966 /* free word */
967 Xfree(ws, wp);
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -0700968 /* no word, process LEX1 character */
Elliott Hughesdd4abe02018-02-05 15:55:19 -0800969 if (((unsigned int)c == ORD('|')) ||
970 ((unsigned int)c == ORD('&')) ||
971 ((unsigned int)c == ORD(';')) ||
972 ((unsigned int)c == ORD('(' /*)*/))) {
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -0700973 if ((c2 = getsc()) == c)
Elliott Hughesdd4abe02018-02-05 15:55:19 -0800974 c = ((unsigned int)c == ORD(';')) ? BREAK :
975 ((unsigned int)c == ORD('|')) ? LOGOR :
976 ((unsigned int)c == ORD('&')) ? LOGAND :
977 /* (unsigned int)c == ORD('(' )) */ MDPAREN;
978 else if ((unsigned int)c == ORD('|') && (unsigned int)c2 == ORD('&'))
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -0700979 c = COPROC;
Elliott Hughesdd4abe02018-02-05 15:55:19 -0800980 else if ((unsigned int)c == ORD(';') && (unsigned int)c2 == ORD('|'))
Geremy Condra03ebf062011-10-12 18:17:24 -0700981 c = BRKEV;
Elliott Hughesdd4abe02018-02-05 15:55:19 -0800982 else if ((unsigned int)c == ORD(';') && (unsigned int)c2 == ORD('&'))
Geremy Condra03ebf062011-10-12 18:17:24 -0700983 c = BRKFT;
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -0700984 else
985 ungetsc(c2);
Geremy Condra03ebf062011-10-12 18:17:24 -0700986#ifndef MKSH_SMALL
987 if (c == BREAK) {
Elliott Hughesdd4abe02018-02-05 15:55:19 -0800988 if ((unsigned int)(c2 = getsc()) == ORD('&'))
Geremy Condra03ebf062011-10-12 18:17:24 -0700989 c = BRKEV;
990 else
991 ungetsc(c2);
992 }
993#endif
Elliott Hughesdd4abe02018-02-05 15:55:19 -0800994 } else if ((unsigned int)c == ORD('\n')) {
Elliott Hughesfc0307d2016-02-02 15:26:47 -0800995 if (cf & HEREDELIM)
996 ungetsc(c);
997 else {
998 gethere();
999 if (cf & CONTIN)
1000 goto Again;
1001 }
Elliott Hughes77740fc2016-08-12 15:06:53 -07001002 } else if (c == '\0' && !(cf & HEREDELIM)) {
1003 struct ioword **p = heres;
1004
1005 while (p < herep)
1006 if ((*p)->ioflag & IOHERESTR)
1007 ++p;
1008 else
1009 /* ksh -c 'cat <<EOF' can cause this */
1010 yyerror(Tf_heredoc,
1011 evalstr((*p)->delim, 0));
Elliott Hughesfc0307d2016-02-02 15:26:47 -08001012 }
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -07001013 return (c);
1014 }
1015
Geremy Condra03ebf062011-10-12 18:17:24 -07001016 /* terminate word */
1017 *wp++ = EOS;
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -07001018 yylval.cp = Xclose(ws, wp);
1019 if (state == SWORD || state == SLETPAREN
Geremy Condra03ebf062011-10-12 18:17:24 -07001020 /* XXX ONEWORD? */)
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -07001021 return (LWORD);
1022
1023 /* unget terminator */
1024 ungetsc(c);
1025
1026 /*
1027 * note: the alias-vs-function code below depends on several
1028 * interna: starting from here, source->str is not modified;
1029 * the way getsc() and ungetsc() operate; etc.
1030 */
1031
1032 /* copy word to unprefixed string ident */
1033 sp = yylval.cp;
1034 dp = ident;
Elliott Hughesfc0307d2016-02-02 15:26:47 -08001035 while ((dp - ident) < IDENT && (c = *sp++) == CHAR)
1036 *dp++ = *sp++;
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -07001037 if (c != EOS)
Elliott Hughesa3c3f962017-04-12 16:52:30 -07001038 /* word is not unquoted, or space ran out */
Elliott Hughesb27ce952015-04-21 13:39:18 -07001039 dp = ident;
1040 /* make sure the ident array stays NUL padded */
1041 memset(dp, 0, (ident + IDENT) - dp + 1);
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -07001042
Elliott Hughesa3c3f962017-04-12 16:52:30 -07001043 if (*ident != '\0' && (cf & (KEYWORD | ALIAS))) {
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -07001044 struct tbl *p;
1045 uint32_t h = hash(ident);
1046
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -07001047 if ((cf & KEYWORD) && (p = ktsearch(&keywords, ident, h)) &&
Geremy Condra03ebf062011-10-12 18:17:24 -07001048 (!(cf & ESACONLY) || p->val.i == ESAC ||
Elliott Hughesdd4abe02018-02-05 15:55:19 -08001049 (unsigned int)p->val.i == ORD(/*{*/ '}'))) {
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -07001050 afree(yylval.cp, ATEMP);
1051 return (p->val.i);
1052 }
1053 if ((cf & ALIAS) && (p = ktsearch(&aliases, ident, h)) &&
1054 (p->flag & ISSET)) {
1055 /*
1056 * this still points to the same character as the
1057 * ungetsc'd terminator from above
1058 */
1059 const char *cp = source->str;
1060
1061 /* prefer POSIX but not Korn functions over aliases */
Elliott Hughes23925bb2017-09-22 16:04:20 -07001062 while (ctype(*cp, C_BLANK))
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -07001063 /*
1064 * this is like getsc() without skipping
1065 * over Source boundaries (including not
1066 * parsing ungetsc'd characters that got
1067 * pushed into an SREREAD) which is what
1068 * we want here anyway: find out whether
1069 * the alias name is followed by a POSIX
Elliott Hughesb27ce952015-04-21 13:39:18 -07001070 * function definition
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -07001071 */
1072 ++cp;
1073 /* prefer functions over aliases */
Geremy Condra03ebf062011-10-12 18:17:24 -07001074 if (cp[0] != '(' || cp[1] != ')') {
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -07001075 Source *s = source;
1076
1077 while (s && (s->flags & SF_HASALIAS))
1078 if (s->u.tblp == p)
1079 return (LWORD);
1080 else
1081 s = s->next;
1082 /* push alias expansion */
1083 s = pushs(SALIAS, source->areap);
1084 s->start = s->str = p->val.s;
1085 s->u.tblp = p;
1086 s->flags |= SF_HASALIAS;
Elliott Hughesa3c3f962017-04-12 16:52:30 -07001087 s->line = source->line;
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -07001088 s->next = source;
1089 if (source->type == SEOF) {
1090 /* prevent infinite recursion at EOS */
1091 source->u.tblp = p;
1092 source->flags |= SF_HASALIAS;
1093 }
1094 source = s;
1095 afree(yylval.cp, ATEMP);
1096 goto Again;
1097 }
1098 }
Elliott Hughesa3c3f962017-04-12 16:52:30 -07001099 } else if (*ident == '\0') {
Elliott Hughesb27ce952015-04-21 13:39:18 -07001100 /* retain typeset et al. even when quoted */
Elliott Hughesa3c3f962017-04-12 16:52:30 -07001101 struct tbl *tt = get_builtin((dp = wdstrip(yylval.cp, 0)));
1102 uint32_t flag = tt ? tt->flag : 0;
1103
1104 if (flag & (DECL_UTIL | DECL_FWDR))
Elliott Hughesb27ce952015-04-21 13:39:18 -07001105 strlcpy(ident, dp, sizeof(ident));
1106 afree(dp, ATEMP);
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -07001107 }
1108
1109 return (LWORD);
1110}
1111
1112static void
Elliott Hughesfc0307d2016-02-02 15:26:47 -08001113gethere(void)
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -07001114{
1115 struct ioword **p;
1116
1117 for (p = heres; p < herep; p++)
Elliott Hughesfc0307d2016-02-02 15:26:47 -08001118 if (!((*p)->ioflag & IOHERESTR))
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -07001119 readhere(*p);
1120 herep = heres;
1121}
1122
1123/*
1124 * read "<<word" text into temp file
1125 */
1126
1127static void
1128readhere(struct ioword *iop)
1129{
1130 int c;
Geremy Condra03ebf062011-10-12 18:17:24 -07001131 const char *eof, *eofp;
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -07001132 XString xs;
1133 char *xp;
Elliott Hughesfc0307d2016-02-02 15:26:47 -08001134 size_t xpos;
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -07001135
Elliott Hughesfc0307d2016-02-02 15:26:47 -08001136 eof = evalstr(iop->delim, 0);
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -07001137
Elliott Hughesb27ce952015-04-21 13:39:18 -07001138 if (!(iop->ioflag & IOEVAL))
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -07001139 ignore_backslash_newline++;
1140
1141 Xinit(xs, xp, 256, ATEMP);
1142
Geremy Condra03ebf062011-10-12 18:17:24 -07001143 heredoc_read_line:
1144 /* beginning of line */
1145 eofp = eof;
1146 xpos = Xsavepos(xs, xp);
Elliott Hughesb27ce952015-04-21 13:39:18 -07001147 if (iop->ioflag & IOSKIP) {
Geremy Condra03ebf062011-10-12 18:17:24 -07001148 /* skip over leading tabs */
1149 while ((c = getsc()) == '\t')
Elliott Hughesb27ce952015-04-21 13:39:18 -07001150 ; /* nothing */
Geremy Condra03ebf062011-10-12 18:17:24 -07001151 goto heredoc_parse_char;
1152 }
1153 heredoc_read_char:
1154 c = getsc();
1155 heredoc_parse_char:
1156 /* compare with here document marker */
1157 if (!*eofp) {
1158 /* end of here document marker, what to do? */
1159 switch (c) {
Elliott Hughesdd4abe02018-02-05 15:55:19 -08001160 case ORD(/*(*/ ')'):
Thorsten Glaserc2dc5de2013-02-18 23:02:51 +00001161 if (!subshell_nesting_type)
Geremy Condra03ebf062011-10-12 18:17:24 -07001162 /*-
1163 * not allowed outside $(...) or (...)
1164 * => mismatch
1165 */
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -07001166 break;
Geremy Condra03ebf062011-10-12 18:17:24 -07001167 /* allow $(...) or (...) to close here */
1168 ungetsc(/*(*/ ')');
1169 /* FALLTHROUGH */
1170 case 0:
1171 /*
1172 * Allow EOF here to commands without trailing
1173 * newlines (mksh -c '...') will work as well.
1174 */
Elliott Hughesdd4abe02018-02-05 15:55:19 -08001175 case ORD('\n'):
Geremy Condra03ebf062011-10-12 18:17:24 -07001176 /* Newline terminates here document marker */
1177 goto heredoc_found_terminator;
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -07001178 }
Elliott Hughes47086262019-03-26 12:34:31 -07001179 } else if ((unsigned int)c == ord(*eofp++))
Geremy Condra03ebf062011-10-12 18:17:24 -07001180 /* store; then read and compare next character */
1181 goto heredoc_store_and_loop;
1182 /* nope, mismatch; read until end of line */
1183 while (c != '\n') {
1184 if (!c)
1185 /* oops, reached EOF */
Elliott Hughes77740fc2016-08-12 15:06:53 -07001186 yyerror(Tf_heredoc, eof);
Geremy Condra03ebf062011-10-12 18:17:24 -07001187 /* store character */
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -07001188 Xcheck(xs, xp);
1189 Xput(xs, xp, c);
Geremy Condra03ebf062011-10-12 18:17:24 -07001190 /* read next character */
1191 c = getsc();
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -07001192 }
Geremy Condra03ebf062011-10-12 18:17:24 -07001193 /* we read a newline as last character */
1194 heredoc_store_and_loop:
1195 /* store character */
1196 Xcheck(xs, xp);
1197 Xput(xs, xp, c);
1198 if (c == '\n')
1199 goto heredoc_read_line;
1200 goto heredoc_read_char;
1201
1202 heredoc_found_terminator:
1203 /* jump back to saved beginning of line */
1204 xp = Xrestpos(xs, xp, xpos);
1205 /* terminate, close and store */
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -07001206 Xput(xs, xp, '\0');
1207 iop->heredoc = Xclose(xs, xp);
1208
Elliott Hughesb27ce952015-04-21 13:39:18 -07001209 if (!(iop->ioflag & IOEVAL))
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -07001210 ignore_backslash_newline--;
1211}
1212
1213void
1214yyerror(const char *fmt, ...)
1215{
1216 va_list va;
1217
1218 /* pop aliases and re-reads */
1219 while (source->type == SALIAS || source->type == SREREAD)
1220 source = source->next;
Geremy Condra03ebf062011-10-12 18:17:24 -07001221 /* zap pending input */
1222 source->str = null;
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -07001223
1224 error_prefix(true);
1225 va_start(va, fmt);
1226 shf_vfprintf(shl_out, fmt, va);
Elliott Hughesa3c3f962017-04-12 16:52:30 -07001227 shf_putc('\n', shl_out);
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -07001228 va_end(va);
1229 errorfz();
1230}
1231
1232/*
1233 * input for yylex with alias expansion
1234 */
1235
1236Source *
1237pushs(int type, Area *areap)
1238{
1239 Source *s;
1240
1241 s = alloc(sizeof(Source), areap);
1242 memset(s, 0, sizeof(Source));
1243 s->type = type;
1244 s->str = null;
1245 s->areap = areap;
1246 if (type == SFILE || type == SSTDIN)
1247 XinitN(s->xs, 256, s->areap);
1248 return (s);
1249}
1250
1251static int
Geremy Condra03ebf062011-10-12 18:17:24 -07001252getsc_uu(void)
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -07001253{
1254 Source *s = source;
1255 int c;
1256
Elliott Hughes23925bb2017-09-22 16:04:20 -07001257 while ((c = ord(*s->str++)) == 0) {
Geremy Condra03ebf062011-10-12 18:17:24 -07001258 /* return 0 for EOF by default */
1259 s->str = NULL;
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -07001260 switch (s->type) {
1261 case SEOF:
1262 s->str = null;
1263 return (0);
1264
1265 case SSTDIN:
1266 case SFILE:
1267 getsc_line(s);
1268 break;
1269
1270 case SWSTR:
1271 break;
1272
1273 case SSTRING:
Thorsten Glaserc2dc5de2013-02-18 23:02:51 +00001274 case SSTRINGCMDLINE:
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -07001275 break;
1276
1277 case SWORDS:
1278 s->start = s->str = *s->u.strv++;
1279 s->type = SWORDSEP;
1280 break;
1281
1282 case SWORDSEP:
1283 if (*s->u.strv == NULL) {
1284 s->start = s->str = "\n";
1285 s->type = SEOF;
1286 } else {
Elliott Hughes77740fc2016-08-12 15:06:53 -07001287 s->start = s->str = T1space;
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -07001288 s->type = SWORDS;
1289 }
1290 break;
1291
1292 case SALIAS:
1293 if (s->flags & SF_ALIASEND) {
1294 /* pass on an unused SF_ALIAS flag */
1295 source = s->next;
1296 source->flags |= s->flags & SF_ALIAS;
1297 s = source;
1298 } else if (*s->u.tblp->val.s &&
Elliott Hughes23925bb2017-09-22 16:04:20 -07001299 ctype((c = strnul(s->u.tblp->val.s)[-1]), C_SPACE)) {
Geremy Condra03ebf062011-10-12 18:17:24 -07001300 /* pop source stack */
1301 source = s = s->next;
1302 /*
1303 * Note that this alias ended with a
1304 * space, enabling alias expansion on
1305 * the following word.
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -07001306 */
1307 s->flags |= SF_ALIAS;
1308 } else {
Geremy Condra03ebf062011-10-12 18:17:24 -07001309 /*
1310 * At this point, we need to keep the current
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -07001311 * alias in the source list so recursive
Geremy Condra03ebf062011-10-12 18:17:24 -07001312 * aliases can be detected and we also need to
1313 * return the next character. Do this by
1314 * temporarily popping the alias to get the
1315 * next character and then put it back in the
1316 * source list with the SF_ALIASEND flag set.
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -07001317 */
Geremy Condra03ebf062011-10-12 18:17:24 -07001318 /* pop source stack */
1319 source = s->next;
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -07001320 source->flags |= s->flags & SF_ALIAS;
Geremy Condra03ebf062011-10-12 18:17:24 -07001321 c = getsc_uu();
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -07001322 if (c) {
1323 s->flags |= SF_ALIASEND;
1324 s->ugbuf[0] = c; s->ugbuf[1] = '\0';
1325 s->start = s->str = s->ugbuf;
1326 s->next = source;
1327 source = s;
1328 } else {
1329 s = source;
Geremy Condra03ebf062011-10-12 18:17:24 -07001330 /* avoid reading EOF twice */
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -07001331 s->str = NULL;
1332 break;
1333 }
1334 }
1335 continue;
1336
1337 case SREREAD:
Geremy Condra03ebf062011-10-12 18:17:24 -07001338 if (s->start != s->ugbuf)
1339 /* yuck */
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -07001340 afree(s->u.freeme, ATEMP);
1341 source = s = s->next;
1342 continue;
1343 }
1344 if (s->str == NULL) {
1345 s->type = SEOF;
1346 s->start = s->str = null;
1347 return ('\0');
1348 }
1349 if (s->flags & SF_ECHO) {
1350 shf_puts(s->str, shl_out);
1351 shf_flush(shl_out);
1352 }
1353 }
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -07001354 return (c);
1355}
1356
1357static void
1358getsc_line(Source *s)
1359{
1360 char *xp = Xstring(s->xs, xp), *cp;
1361 bool interactive = Flag(FTALKING) && s->type == SSTDIN;
Elliott Hughes47086262019-03-26 12:34:31 -07001362 bool have_tty = interactive && (s->flags & SF_TTY) && tty_hasstate;
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -07001363
1364 /* Done here to ensure nothing odd happens when a timeout occurs */
1365 XcheckN(s->xs, xp, LINE);
1366 *xp = '\0';
1367 s->start = s->str = xp;
1368
1369 if (have_tty && ksh_tmout) {
1370 ksh_tmout_state = TMOUT_READING;
1371 alarm(ksh_tmout);
1372 }
Elliott Hughes77740fc2016-08-12 15:06:53 -07001373 if (interactive) {
1374 if (cur_prompt == PS1)
1375 histsave(&s->line, NULL, HIST_FLUSH, true);
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -07001376 change_winsz();
Elliott Hughes77740fc2016-08-12 15:06:53 -07001377 }
Thorsten Glaserc2dc5de2013-02-18 23:02:51 +00001378#ifndef MKSH_NO_CMDLINE_EDITING
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -07001379 if (have_tty && (
1380#if !MKSH_S_NOVI
1381 Flag(FVI) ||
1382#endif
1383 Flag(FEMACS) || Flag(FGMACS))) {
1384 int nread;
1385
Thorsten Glaser811a5752013-07-25 14:24:45 +00001386 nread = x_read(xp);
Geremy Condra03ebf062011-10-12 18:17:24 -07001387 if (nread < 0)
1388 /* read error */
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -07001389 nread = 0;
1390 xp[nread] = '\0';
1391 xp += nread;
Thorsten Glaserc2dc5de2013-02-18 23:02:51 +00001392 } else
1393#endif
1394 {
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -07001395 if (interactive)
1396 pprompt(prompt, 0);
1397 else
1398 s->line++;
1399
Geremy Condra03ebf062011-10-12 18:17:24 -07001400 while (/* CONSTCOND */ 1) {
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -07001401 char *p = shf_getse(xp, Xnleft(s->xs, xp), s->u.shf);
1402
1403 if (!p && shf_error(s->u.shf) &&
1404 shf_errno(s->u.shf) == EINTR) {
1405 shf_clearerr(s->u.shf);
1406 if (trap)
1407 runtraps(0);
1408 continue;
1409 }
1410 if (!p || (xp = p, xp[-1] == '\n'))
1411 break;
1412 /* double buffer size */
Geremy Condra03ebf062011-10-12 18:17:24 -07001413 /* move past NUL so doubling works... */
1414 xp++;
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -07001415 XcheckN(s->xs, xp, Xlength(s->xs, xp));
Geremy Condra03ebf062011-10-12 18:17:24 -07001416 /* ...and move back again */
1417 xp--;
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -07001418 }
Geremy Condra03ebf062011-10-12 18:17:24 -07001419 /*
1420 * flush any unwanted input so other programs/builtins
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -07001421 * can read it. Not very optimal, but less error prone
1422 * than flushing else where, dealing with redirections,
1423 * etc.
Geremy Condra03ebf062011-10-12 18:17:24 -07001424 * TODO: reduce size of shf buffer (~128?) if SSTDIN
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -07001425 */
1426 if (s->type == SSTDIN)
1427 shf_flush(s->u.shf);
1428 }
Geremy Condra03ebf062011-10-12 18:17:24 -07001429 /*
1430 * XXX: temporary kludge to restore source after a
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -07001431 * trap may have been executed.
1432 */
1433 source = s;
1434 if (have_tty && ksh_tmout) {
1435 ksh_tmout_state = TMOUT_EXECUTING;
1436 alarm(0);
1437 }
1438 cp = Xstring(s->xs, xp);
Elliott Hughes737fdce2014-08-07 12:59:26 -07001439 rndpush(cp);
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -07001440 s->start = s->str = cp;
1441 strip_nuls(Xstring(s->xs, xp), Xlength(s->xs, xp));
1442 /* Note: if input is all nulls, this is not eof */
1443 if (Xlength(s->xs, xp) == 0) {
1444 /* EOF */
1445 if (s->type == SFILE)
1446 shf_fdclose(s->u.shf);
1447 s->str = NULL;
Elliott Hughes96b43632015-07-17 11:39:41 -07001448 } else if (interactive && *s->str) {
1449 if (cur_prompt != PS1)
1450 histsave(&s->line, s->str, HIST_APPEND, true);
1451 else if (!ctype(*s->str, C_IFS | C_IFSWS))
1452 histsave(&s->line, s->str, HIST_QUEUE, true);
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -07001453#if !defined(MKSH_SMALL) && HAVE_PERSISTENT_HISTORY
Elliott Hughes96b43632015-07-17 11:39:41 -07001454 else
1455 goto check_for_sole_return;
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -07001456 } else if (interactive && cur_prompt == PS1) {
Elliott Hughes96b43632015-07-17 11:39:41 -07001457 check_for_sole_return:
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -07001458 cp = Xstring(s->xs, xp);
Elliott Hughes23925bb2017-09-22 16:04:20 -07001459 while (ctype(*cp, C_IFSWS))
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -07001460 ++cp;
Elliott Hughes96b43632015-07-17 11:39:41 -07001461 if (!*cp) {
1462 histsave(&s->line, NULL, HIST_FLUSH, true);
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -07001463 histsync();
Elliott Hughes96b43632015-07-17 11:39:41 -07001464 }
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -07001465#endif
1466 }
1467 if (interactive)
1468 set_prompt(PS2, NULL);
1469}
1470
1471void
1472set_prompt(int to, Source *s)
1473{
Elliott Hughesb27ce952015-04-21 13:39:18 -07001474 cur_prompt = (uint8_t)to;
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -07001475
1476 switch (to) {
Geremy Condra03ebf062011-10-12 18:17:24 -07001477 /* command */
1478 case PS1:
1479 /*
1480 * Substitute ! and !! here, before substitutions are done
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -07001481 * so ! in expanded variables are not expanded.
1482 * NOTE: this is not what AT&T ksh does (it does it after
1483 * substitutions, POSIX doesn't say which is to be done.
1484 */
1485 {
1486 struct shf *shf;
1487 char * volatile ps1;
1488 Area *saved_atemp;
Elliott Hughes96b43632015-07-17 11:39:41 -07001489 int saved_lineno;
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -07001490
1491 ps1 = str_val(global("PS1"));
1492 shf = shf_sopen(NULL, strlen(ps1) * 2,
1493 SHF_WR | SHF_DYNAMIC, NULL);
1494 while (*ps1)
1495 if (*ps1 != '!' || *++ps1 == '!')
1496 shf_putchar(*ps1++, shf);
1497 else
Elliott Hughes77740fc2016-08-12 15:06:53 -07001498 shf_fprintf(shf, Tf_lu, s ?
Elliott Hughesb27ce952015-04-21 13:39:18 -07001499 (unsigned long)s->line + 1 : 0UL);
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -07001500 ps1 = shf_sclose(shf);
Elliott Hughes96b43632015-07-17 11:39:41 -07001501 saved_lineno = current_lineno;
1502 if (s)
1503 current_lineno = s->line + 1;
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -07001504 saved_atemp = ATEMP;
1505 newenv(E_ERRH);
Thorsten Glaserc2dc5de2013-02-18 23:02:51 +00001506 if (kshsetjmp(e->jbuf)) {
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -07001507 prompt = safe_prompt;
Geremy Condra03ebf062011-10-12 18:17:24 -07001508 /*
1509 * Don't print an error - assume it has already
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -07001510 * been printed. Reason is we may have forked
1511 * to run a command and the child may be
1512 * unwinding its stack through this code as it
1513 * exits.
1514 */
1515 } else {
1516 char *cp = substitute(ps1, 0);
1517 strdupx(prompt, cp, saved_atemp);
1518 }
Elliott Hughes96b43632015-07-17 11:39:41 -07001519 current_lineno = saved_lineno;
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -07001520 quitenv(NULL);
1521 }
1522 break;
Geremy Condra03ebf062011-10-12 18:17:24 -07001523 /* command continuation */
1524 case PS2:
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -07001525 prompt = str_val(global("PS2"));
1526 break;
1527 }
1528}
1529
Thorsten Glaser811a5752013-07-25 14:24:45 +00001530int
1531pprompt(const char *cp, int ntruncate)
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -07001532{
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -07001533 char delimiter = 0;
Elliott Hughes737fdce2014-08-07 12:59:26 -07001534 bool doprint = (ntruncate != -1);
1535 bool indelimit = false;
1536 int columns = 0, lines = 0;
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -07001537
Geremy Condra03ebf062011-10-12 18:17:24 -07001538 /*
1539 * Undocumented AT&T ksh feature:
1540 * If the second char in the prompt string is \r then the first
1541 * char is taken to be a non-printing delimiter and any chars
1542 * between two instances of the delimiter are not considered to
1543 * be part of the prompt length
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -07001544 */
1545 if (*cp && cp[1] == '\r') {
1546 delimiter = *cp;
1547 cp += 2;
1548 }
1549 for (; *cp; cp++) {
1550 if (indelimit && *cp != delimiter)
1551 ;
Elliott Hughes23925bb2017-09-22 16:04:20 -07001552 else if (ctype(*cp, C_CR | C_LF)) {
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -07001553 lines += columns / x_cols + ((*cp == '\n') ? 1 : 0);
1554 columns = 0;
1555 } else if (*cp == '\t') {
1556 columns = (columns | 7) + 1;
1557 } else if (*cp == '\b') {
1558 if (columns > 0)
1559 columns--;
1560 } else if (*cp == delimiter)
1561 indelimit = !indelimit;
Elliott Hughes23925bb2017-09-22 16:04:20 -07001562 else if (UTFMODE && (rtt2asc(*cp) > 0x7F)) {
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -07001563 const char *cp2;
1564 columns += utf_widthadj(cp, &cp2);
Elliott Hughes737fdce2014-08-07 12:59:26 -07001565 if (doprint && (indelimit ||
1566 (ntruncate < (x_cols * lines + columns))))
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -07001567 shf_write(cp, cp2 - cp, shl_out);
1568 cp = cp2 - /* loop increment */ 1;
1569 continue;
1570 } else
1571 columns++;
Elliott Hughes737fdce2014-08-07 12:59:26 -07001572 if (doprint && (*cp != delimiter) &&
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -07001573 (indelimit || (ntruncate < (x_cols * lines + columns))))
1574 shf_putc(*cp, shl_out);
1575 }
Elliott Hughes737fdce2014-08-07 12:59:26 -07001576 if (doprint)
1577 shf_flush(shl_out);
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -07001578 return (x_cols * lines + columns);
1579}
1580
Geremy Condra03ebf062011-10-12 18:17:24 -07001581/*
1582 * Read the variable part of a ${...} expression (i.e. up to but not
1583 * including the :[-+?=#%] or close-brace).
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -07001584 */
1585static char *
1586get_brace_var(XString *wsp, char *wp)
1587{
Geremy Condra03ebf062011-10-12 18:17:24 -07001588 char c;
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -07001589 enum parse_state {
Elliott Hughes77740fc2016-08-12 15:06:53 -07001590 PS_INITIAL, PS_SAW_PERCENT, PS_SAW_HASH, PS_SAW_BANG,
1591 PS_IDENT, PS_NUMBER, PS_VAR1
Geremy Condra03ebf062011-10-12 18:17:24 -07001592 } state = PS_INITIAL;
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -07001593
Geremy Condra03ebf062011-10-12 18:17:24 -07001594 while (/* CONSTCOND */ 1) {
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -07001595 c = getsc();
1596 /* State machine to figure out where the variable part ends. */
1597 switch (state) {
Elliott Hughes77740fc2016-08-12 15:06:53 -07001598 case PS_SAW_HASH:
1599 if (ctype(c, C_VAR1)) {
1600 char c2;
1601
1602 c2 = getsc();
1603 ungetsc(c2);
Elliott Hughesdd4abe02018-02-05 15:55:19 -08001604 if (ord(c2) != ORD(/*{*/ '}')) {
Elliott Hughes77740fc2016-08-12 15:06:53 -07001605 ungetsc(c);
1606 goto out;
1607 }
1608 }
1609 goto ps_common;
1610 case PS_SAW_BANG:
Elliott Hughes23925bb2017-09-22 16:04:20 -07001611 switch (ord(c)) {
Elliott Hughesdd4abe02018-02-05 15:55:19 -08001612 case ORD('@'):
1613 case ORD('#'):
1614 case ORD('-'):
1615 case ORD('?'):
Elliott Hughes77740fc2016-08-12 15:06:53 -07001616 goto out;
1617 }
1618 goto ps_common;
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -07001619 case PS_INITIAL:
Elliott Hughes23925bb2017-09-22 16:04:20 -07001620 switch (ord(c)) {
Elliott Hughesdd4abe02018-02-05 15:55:19 -08001621 case ORD('%'):
Elliott Hughes77740fc2016-08-12 15:06:53 -07001622 state = PS_SAW_PERCENT;
1623 goto next;
Elliott Hughesdd4abe02018-02-05 15:55:19 -08001624 case ORD('#'):
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -07001625 state = PS_SAW_HASH;
Elliott Hughes77740fc2016-08-12 15:06:53 -07001626 goto next;
Elliott Hughesdd4abe02018-02-05 15:55:19 -08001627 case ORD('!'):
Elliott Hughes77740fc2016-08-12 15:06:53 -07001628 state = PS_SAW_BANG;
1629 goto next;
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -07001630 }
1631 /* FALLTHROUGH */
Elliott Hughes77740fc2016-08-12 15:06:53 -07001632 case PS_SAW_PERCENT:
1633 ps_common:
Elliott Hughes23925bb2017-09-22 16:04:20 -07001634 if (ctype(c, C_ALPHX))
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -07001635 state = PS_IDENT;
Elliott Hughes23925bb2017-09-22 16:04:20 -07001636 else if (ctype(c, C_DIGIT))
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -07001637 state = PS_NUMBER;
Elliott Hughes77740fc2016-08-12 15:06:53 -07001638 else if (ctype(c, C_VAR1))
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -07001639 state = PS_VAR1;
1640 else
1641 goto out;
1642 break;
1643 case PS_IDENT:
Elliott Hughes23925bb2017-09-22 16:04:20 -07001644 if (!ctype(c, C_ALNUX)) {
Elliott Hughesdd4abe02018-02-05 15:55:19 -08001645 if (ord(c) == ORD('[')) {
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -07001646 char *tmp, *p;
1647
1648 if (!arraysub(&tmp))
Elliott Hughesa3c3f962017-04-12 16:52:30 -07001649 yyerror("missing ]");
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -07001650 *wp++ = c;
Elliott Hughes23925bb2017-09-22 16:04:20 -07001651 p = tmp;
1652 while (*p) {
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -07001653 Xcheck(*wsp, wp);
1654 *wp++ = *p++;
1655 }
1656 afree(tmp, ATEMP);
Geremy Condra03ebf062011-10-12 18:17:24 -07001657 /* the ] */
1658 c = getsc();
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -07001659 }
1660 goto out;
1661 }
Elliott Hughes77740fc2016-08-12 15:06:53 -07001662 next:
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -07001663 break;
1664 case PS_NUMBER:
Elliott Hughes23925bb2017-09-22 16:04:20 -07001665 if (!ctype(c, C_DIGIT))
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -07001666 goto out;
1667 break;
1668 case PS_VAR1:
1669 goto out;
1670 }
1671 Xcheck(*wsp, wp);
1672 *wp++ = c;
1673 }
1674 out:
Geremy Condra03ebf062011-10-12 18:17:24 -07001675 /* end of variable part */
1676 *wp++ = '\0';
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -07001677 ungetsc(c);
1678 return (wp);
1679}
1680
1681/*
1682 * Save an array subscript - returns true if matching bracket found, false
1683 * if eof or newline was found.
1684 * (Returned string double null terminated)
1685 */
Geremy Condra03ebf062011-10-12 18:17:24 -07001686static bool
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -07001687arraysub(char **strp)
1688{
1689 XString ws;
Geremy Condra03ebf062011-10-12 18:17:24 -07001690 char *wp, c;
1691 /* we are just past the initial [ */
Thorsten Glaserc2dc5de2013-02-18 23:02:51 +00001692 unsigned int depth = 1;
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -07001693
1694 Xinit(ws, wp, 32, ATEMP);
1695
1696 do {
1697 c = getsc();
1698 Xcheck(ws, wp);
1699 *wp++ = c;
Elliott Hughesdd4abe02018-02-05 15:55:19 -08001700 if (ord(c) == ORD('['))
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -07001701 depth++;
Elliott Hughesdd4abe02018-02-05 15:55:19 -08001702 else if (ord(c) == ORD(']'))
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -07001703 depth--;
1704 } while (depth > 0 && c && c != '\n');
1705
1706 *wp++ = '\0';
1707 *strp = Xclose(ws, wp);
1708
Geremy Condra03ebf062011-10-12 18:17:24 -07001709 return (tobool(depth == 0));
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -07001710}
1711
1712/* Unget a char: handles case when we are already at the start of the buffer */
Geremy Condra03ebf062011-10-12 18:17:24 -07001713static void
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -07001714ungetsc(int c)
1715{
Geremy Condra03ebf062011-10-12 18:17:24 -07001716 struct sretrace_info *rp = retrace_info;
1717
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -07001718 if (backslash_skip)
1719 backslash_skip--;
Geremy Condra03ebf062011-10-12 18:17:24 -07001720 /* Don't unget EOF... */
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -07001721 if (source->str == null && c == '\0')
Geremy Condra03ebf062011-10-12 18:17:24 -07001722 return;
1723 while (rp) {
1724 if (Xlength(rp->xs, rp->xp))
1725 rp->xp--;
1726 rp = rp->next;
1727 }
Thorsten Glaserc2dc5de2013-02-18 23:02:51 +00001728 ungetsc_i(c);
Geremy Condra03ebf062011-10-12 18:17:24 -07001729}
1730static void
Thorsten Glaserc2dc5de2013-02-18 23:02:51 +00001731ungetsc_i(int c)
Geremy Condra03ebf062011-10-12 18:17:24 -07001732{
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -07001733 if (source->str > source->start)
1734 source->str--;
1735 else {
1736 Source *s;
1737
1738 s = pushs(SREREAD, source->areap);
1739 s->ugbuf[0] = c; s->ugbuf[1] = '\0';
1740 s->start = s->str = s->ugbuf;
1741 s->next = source;
1742 source = s;
1743 }
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -07001744}
1745
1746
1747/* Called to get a char that isn't a \newline sequence. */
1748static int
1749getsc_bn(void)
1750{
1751 int c, c2;
1752
1753 if (ignore_backslash_newline)
Geremy Condra03ebf062011-10-12 18:17:24 -07001754 return (o_getsc_u());
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -07001755
1756 if (backslash_skip == 1) {
1757 backslash_skip = 2;
Geremy Condra03ebf062011-10-12 18:17:24 -07001758 return (o_getsc_u());
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -07001759 }
1760
1761 backslash_skip = 0;
1762
Geremy Condra03ebf062011-10-12 18:17:24 -07001763 while (/* CONSTCOND */ 1) {
1764 c = o_getsc_u();
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -07001765 if (c == '\\') {
Geremy Condra03ebf062011-10-12 18:17:24 -07001766 if ((c2 = o_getsc_u()) == '\n')
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -07001767 /* ignore the \newline; get the next char... */
1768 continue;
Thorsten Glaserc2dc5de2013-02-18 23:02:51 +00001769 ungetsc_i(c2);
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -07001770 backslash_skip = 1;
1771 }
1772 return (c);
1773 }
1774}
1775
Geremy Condra03ebf062011-10-12 18:17:24 -07001776void
1777yyskiputf8bom(void)
1778{
1779 int c;
1780
Elliott Hughes23925bb2017-09-22 16:04:20 -07001781 if (rtt2asc((c = o_getsc_u())) != 0xEF) {
Thorsten Glaserc2dc5de2013-02-18 23:02:51 +00001782 ungetsc_i(c);
Geremy Condra03ebf062011-10-12 18:17:24 -07001783 return;
1784 }
Elliott Hughes23925bb2017-09-22 16:04:20 -07001785 if (rtt2asc((c = o_getsc_u())) != 0xBB) {
Thorsten Glaserc2dc5de2013-02-18 23:02:51 +00001786 ungetsc_i(c);
Elliott Hughes23925bb2017-09-22 16:04:20 -07001787 ungetsc_i(asc2rtt(0xEF));
Geremy Condra03ebf062011-10-12 18:17:24 -07001788 return;
1789 }
Elliott Hughes23925bb2017-09-22 16:04:20 -07001790 if (rtt2asc((c = o_getsc_u())) != 0xBF) {
Thorsten Glaserc2dc5de2013-02-18 23:02:51 +00001791 ungetsc_i(c);
Elliott Hughes23925bb2017-09-22 16:04:20 -07001792 ungetsc_i(asc2rtt(0xBB));
1793 ungetsc_i(asc2rtt(0xEF));
Geremy Condra03ebf062011-10-12 18:17:24 -07001794 return;
1795 }
1796 UTFMODE |= 8;
1797}
1798
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -07001799static Lex_state *
Thorsten Glaserc2dc5de2013-02-18 23:02:51 +00001800push_state_i(State_info *si, Lex_state *old_end)
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -07001801{
Geremy Condra03ebf062011-10-12 18:17:24 -07001802 Lex_state *news = alloc2(STATE_BSIZE, sizeof(Lex_state), ATEMP);
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -07001803
Geremy Condra03ebf062011-10-12 18:17:24 -07001804 news[0].ls_base = old_end;
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -07001805 si->base = &news[0];
1806 si->end = &news[STATE_BSIZE];
1807 return (&news[1]);
1808}
1809
1810static Lex_state *
Thorsten Glaserc2dc5de2013-02-18 23:02:51 +00001811pop_state_i(State_info *si, Lex_state *old_end)
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -07001812{
1813 Lex_state *old_base = si->base;
1814
Geremy Condra03ebf062011-10-12 18:17:24 -07001815 si->base = old_end->ls_base - STATE_BSIZE;
1816 si->end = old_end->ls_base;
Jean-Baptiste Queru5155f1c2011-06-16 10:05:28 -07001817
1818 afree(old_base, ATEMP);
1819
1820 return (si->base + STATE_BSIZE - 1);
1821}