aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHerbert Xu <herbert@gondor.apana.org.au>2018-03-09 00:14:02 +0800
committerBen Hutchings <ben@decadent.org.uk>2020-03-28 21:42:54 +0000
commitef6ed3c138aaf680ff3e4e0d7fbad6d2c61fb7d0 (patch)
tree67d860ad39c65b8258692d00f7930f804af8cd59
parenta4659bfa776f24f790c3ec071c5c9ef9459cdb70 (diff)
downloadklibc-ef6ed3c138aaf680ff3e4e0d7fbad6d2c61fb7d0.tar.gz
[klibc] dash: parser: Add syntax stack for recursive parsing
[ dash commit ab1cecb4047864afb247a6ed691e7f59ce716f2c ] Without a stack of syntaxes we cannot correctly these two cases together: "${a#'$$'}" "${a#"${b-'$$'}"}" A recursive parser also helps in some other corner cases such as nested arithmetic expansion with paratheses. This patch adds a syntax stack allocated from the stack using alloca. As a side-effect this allows us to remove the naked backslashes for patterns within double-quotes, which means that EXP_QPAT also has to go. This patch also fixes removes any backslashes that precede right braces when they are present within a parameter expansion context, and backslashes that precede double quotes within inner double quotes inside a parameter expansion in a here-document context. The idea of a recursive parser is based on a patch by Harald van Dijk. Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
-rw-r--r--usr/dash/expand.c27
-rw-r--r--usr/dash/expand.h1
-rw-r--r--usr/dash/parser.c177
3 files changed, 120 insertions, 85 deletions
diff --git a/usr/dash/expand.c b/usr/dash/expand.c
index e86bd29da60e3..2c29ea445d3e2 100644
--- a/usr/dash/expand.c
+++ b/usr/dash/expand.c
@@ -85,7 +85,7 @@
#define RMESCAPE_HEAP 0x10 /* Malloc strings instead of stalloc */
/* Add CTLESC when necessary. */
-#define QUOTES_ESC (EXP_FULL | EXP_CASE | EXP_QPAT)
+#define QUOTES_ESC (EXP_FULL | EXP_CASE)
/* Do not skip NUL characters. */
#define QUOTES_KEEPNUL EXP_TILDE
@@ -335,16 +335,6 @@ addquote:
case CTLESC:
startloc++;
length++;
-
- /*
- * Quoted parameter expansion pattern: remove quote
- * unless inside inner quotes or we have a literal
- * backslash.
- */
- if (((flag | inquotes) & (EXP_QPAT | EXP_QUOTED)) ==
- EXP_QPAT && *p != '\\')
- break;
-
goto addquote;
case CTLVAR:
p = evalvar(p, flag | inquotes);
@@ -653,8 +643,7 @@ subevalvar(char *p, char *str, int strloc, int subtype, int startloc, int varfla
char *(*scan)(char *, char *, char *, char *, int , int);
argstr(p, EXP_TILDE | (subtype != VSASSIGN && subtype != VSQUESTION ?
- (flag & (EXP_QUOTED | EXP_QPAT) ?
- EXP_QPAT : EXP_CASE) : 0));
+ EXP_CASE : 0));
STPUTC('\0', expdest);
argbackq = saveargbackq;
startp = stackblock() + startloc;
@@ -1646,7 +1635,6 @@ char *
_rmescapes(char *str, int flag)
{
char *p, *q, *r;
- unsigned inquotes;
int notescaped;
int globbing;
@@ -1676,24 +1664,23 @@ _rmescapes(char *str, int flag)
q = mempcpy(q, str, len);
}
}
- inquotes = 0;
globbing = flag & RMESCAPE_GLOB;
notescaped = globbing;
while (*p) {
if (*p == (char)CTLQUOTEMARK) {
- inquotes = ~inquotes;
p++;
notescaped = globbing;
continue;
}
+ if (*p == '\\') {
+ /* naked back slash */
+ notescaped = 0;
+ goto copy;
+ }
if (*p == (char)CTLESC) {
p++;
if (notescaped)
*q++ = '\\';
- } else if (*p == '\\' && !inquotes) {
- /* naked back slash */
- notescaped = 0;
- goto copy;
}
notescaped = globbing;
copy:
diff --git a/usr/dash/expand.h b/usr/dash/expand.h
index e2be8aa4f394f..5c767e66f6741 100644
--- a/usr/dash/expand.h
+++ b/usr/dash/expand.h
@@ -58,7 +58,6 @@ struct arglist {
#define EXP_VARTILDE 0x4 /* expand tildes in an assignment */
#define EXP_REDIR 0x8 /* file glob for a redirection (1 match only) */
#define EXP_CASE 0x10 /* keeps quotes around for CASE pattern */
-#define EXP_QPAT 0x20 /* pattern in quoted parameter expansion */
#define EXP_VARTILDE2 0x40 /* expand tildes after colons only */
#define EXP_WORD 0x80 /* expand word in parameter expansion */
#define EXP_QUOTED 0x100 /* expand word in double quotes */
diff --git a/usr/dash/parser.c b/usr/dash/parser.c
index 8b945e36f32b3..c28363caede75 100644
--- a/usr/dash/parser.c
+++ b/usr/dash/parser.c
@@ -80,6 +80,18 @@ struct heredoc {
int striptabs; /* if set, strip leading tabs */
};
+struct synstack {
+ const char *syntax;
+ struct synstack *prev;
+ struct synstack *next;
+ int innerdq;
+ int varpushed;
+ int dblquote;
+ int varnest; /* levels of variables expansion */
+ int parenlevel; /* levels of parens in arithmetic */
+ int dqvarnest; /* levels of variables expansion within double quotes */
+};
+
struct heredoc *heredoclist; /* list of here documents to read */
@@ -841,6 +853,21 @@ static int pgetc_eatbnl(void)
return c;
}
+static void synstack_push(struct synstack **stack, struct synstack *next,
+ const char *syntax)
+{
+ memset(next, 0, sizeof(*next));
+ next->syntax = syntax;
+ next->next = *stack;
+ (*stack)->prev = next;
+ *stack = next;
+}
+
+static void synstack_pop(struct synstack **stack)
+{
+ *stack = (*stack)->next;
+}
+
/*
@@ -870,24 +897,15 @@ readtoken1(int firstc, char const *syntax, char *eofmark, int striptabs)
size_t len;
struct nodelist *bqlist;
int quotef;
- int dblquote;
- int varnest; /* levels of variables expansion */
- int arinest; /* levels of arithmetic expansion */
- int parenlevel; /* levels of parens in arithmetic */
- int dqvarnest; /* levels of variables expansion within double quotes */
int oldstyle;
- /* syntax before arithmetic */
- char const *uninitialized_var(prevsyntax);
+ /* syntax stack */
+ struct synstack synbase = { .syntax = syntax };
+ struct synstack *synstack = &synbase;
- dblquote = 0;
if (syntax == DQSYNTAX)
- dblquote = 1;
+ synstack->dblquote = 1;
quotef = 0;
bqlist = NULL;
- varnest = 0;
- arinest = 0;
- parenlevel = 0;
- dqvarnest = 0;
STARTSTACKSTR(out);
loop: { /* for each line, until end of word */
@@ -895,7 +913,7 @@ readtoken1(int firstc, char const *syntax, char *eofmark, int striptabs)
if (c == '\034' && doprompt
&& attyset() && ! equal(termval(), "emacs")) {
attyline();
- if (syntax == BASESYNTAX)
+ if (synstack->syntax == BASESYNTAX)
return readtoken();
c = syntax == SQSYNTAX ? pgetc() : pgetc_eatbnl();
goto loop;
@@ -904,9 +922,9 @@ readtoken1(int firstc, char const *syntax, char *eofmark, int striptabs)
CHECKEND(); /* set c to PEOF if at end of here document */
for (;;) { /* until end of line or end of word */
CHECKSTRSPACE(4, out); /* permit 4 calls to USTPUTC */
- switch(syntax[c]) {
+ switch(synstack->syntax[c]) {
case CNL: /* '\n' */
- if (syntax == BASESYNTAX)
+ if (synstack->syntax == BASESYNTAX)
goto endword; /* exit outer loop */
USTPUTC(c, out);
nlprompt();
@@ -916,7 +934,7 @@ readtoken1(int firstc, char const *syntax, char *eofmark, int striptabs)
USTPUTC(c, out);
break;
case CCTL:
- if (eofmark == NULL || dblquote)
+ if (eofmark == NULL || synstack->dblquote)
USTPUTC(CTLESC, out);
USTPUTC(c, out);
break;
@@ -929,13 +947,18 @@ readtoken1(int firstc, char const *syntax, char *eofmark, int striptabs)
pungetc();
} else {
if (
- dblquote &&
+ synstack->dblquote &&
c != '\\' && c != '`' &&
c != '$' && (
c != '"' ||
- eofmark != NULL
+ (eofmark != NULL &&
+ !synstack->varnest)
+ ) && (
+ c != '}' ||
+ !synstack->varnest
)
) {
+ USTPUTC(CTLESC, out);
USTPUTC('\\', out);
}
USTPUTC(CTLESC, out);
@@ -944,55 +967,64 @@ readtoken1(int firstc, char const *syntax, char *eofmark, int striptabs)
}
break;
case CSQUOTE:
- syntax = SQSYNTAX;
+ synstack->syntax = SQSYNTAX;
quotemark:
if (eofmark == NULL) {
USTPUTC(CTLQUOTEMARK, out);
}
break;
case CDQUOTE:
- syntax = DQSYNTAX;
- dblquote = 1;
+ synstack->syntax = DQSYNTAX;
+ synstack->dblquote = 1;
+toggledq:
+ if (synstack->varnest)
+ synstack->innerdq ^= 1;
goto quotemark;
case CENDQUOTE:
- if (eofmark && !varnest)
+ if (eofmark && !synstack->varnest) {
USTPUTC(c, out);
- else {
- if (dqvarnest == 0) {
- syntax = BASESYNTAX;
- dblquote = 0;
- }
- quotef++;
- goto quotemark;
+ break;
}
- break;
+
+ if (synstack->dqvarnest == 0) {
+ synstack->syntax = BASESYNTAX;
+ synstack->dblquote = 0;
+ }
+
+ quotef++;
+
+ if (c == '"')
+ goto toggledq;
+
+ goto quotemark;
case CVAR: /* '$' */
PARSESUB(); /* parse substitution */
break;
case CENDVAR: /* '}' */
- if (varnest > 0) {
- varnest--;
- if (dqvarnest > 0) {
- dqvarnest--;
- }
+ if (!synstack->innerdq &&
+ synstack->varnest > 0) {
+ if (!--synstack->varnest &&
+ synstack->varpushed)
+ synstack_pop(&synstack);
+ else if (synstack->dqvarnest > 0)
+ synstack->dqvarnest--;
USTPUTC(CTLENDVAR, out);
} else {
USTPUTC(c, out);
}
break;
case CLP: /* '(' in arithmetic */
- parenlevel++;
+ synstack->parenlevel++;
USTPUTC(c, out);
break;
case CRP: /* ')' in arithmetic */
- if (parenlevel > 0) {
+ if (synstack->parenlevel > 0) {
USTPUTC(c, out);
- --parenlevel;
+ --synstack->parenlevel;
} else {
if (pgetc_eatbnl() == ')') {
USTPUTC(CTLENDARI, out);
- if (!--arinest)
- syntax = prevsyntax;
+ synstack_pop(&synstack);
} else {
/*
* unbalanced parens
@@ -1011,7 +1043,7 @@ quotemark:
case CIGN:
break;
default:
- if (varnest == 0)
+ if (synstack->varnest == 0)
goto endword; /* exit outer loop */
if (c != PEOA) {
USTPUTC(c, out);
@@ -1021,11 +1053,11 @@ quotemark:
}
}
endword:
- if (syntax == ARISYNTAX)
+ if (synstack->syntax == ARISYNTAX)
synerror("Missing '))'");
- if (syntax != BASESYNTAX && eofmark == NULL)
+ if (synstack->syntax != BASESYNTAX && eofmark == NULL)
synerror("Unterminated quoted string");
- if (varnest != 0) {
+ if (synstack->varnest != 0) {
/* { */
synerror("Missing '}'");
}
@@ -1202,6 +1234,8 @@ parsesub: {
PARSEBACKQNEW();
}
} else {
+ const char *newsyn = synstack->syntax;
+
USTPUTC(CTLVAR, out);
typeloc = out - (char *)stackblock();
STADJUST(1, out);
@@ -1252,6 +1286,8 @@ varname:
}
if (subtype == 0) {
+ int cc = c;
+
switch (c) {
case ':':
subtype = VSNUL;
@@ -1265,27 +1301,40 @@ varname:
break;
case '%':
case '#':
- {
- int cc = c;
- subtype = c == '#' ? VSTRIMLEFT :
- VSTRIMRIGHT;
- c = pgetc_eatbnl();
- if (c == cc)
- subtype++;
- else
- pungetc();
- break;
- }
+ subtype = c == '#' ? VSTRIMLEFT :
+ VSTRIMRIGHT;
+ c = pgetc_eatbnl();
+ if (c == cc)
+ subtype++;
+ else
+ pungetc();
+
+ newsyn = BASESYNTAX;
+ break;
}
} else {
badsub:
pungetc();
}
+
+ if (newsyn == ARISYNTAX && subtype > VSNORMAL)
+ newsyn = DQSYNTAX;
+
+ if (newsyn != synstack->syntax) {
+ synstack_push(&synstack,
+ synstack->prev ?:
+ alloca(sizeof(*synstack)),
+ newsyn);
+
+ synstack->varpushed++;
+ synstack->dblquote = newsyn != BASESYNTAX;
+ }
+
*((char *)stackblock() + typeloc) = subtype;
if (subtype != VSNORMAL) {
- varnest++;
- if (dblquote)
- dqvarnest++;
+ synstack->varnest++;
+ if (synstack->dblquote)
+ synstack->dqvarnest++;
}
STPUTC('=', out);
}
@@ -1335,7 +1384,7 @@ parsebackq: {
case '\\':
pc = pgetc_eatbnl();
if (pc != '\\' && pc != '`' && pc != '$'
- && (!dblquote || pc != '"'))
+ && (!synstack->dblquote || pc != '"'))
STPUTC('\\', pout);
if (pc > PEOA) {
break;
@@ -1411,10 +1460,10 @@ done:
*/
parsearith: {
- if (++arinest == 1) {
- prevsyntax = syntax;
- syntax = ARISYNTAX;
- }
+ synstack_push(&synstack,
+ synstack->prev ?: alloca(sizeof(*synstack)),
+ ARISYNTAX);
+ synstack->dblquote = 1;
USTPUTC(CTLARI, out);
goto parsearith_return;
}