summaryrefslogtreecommitdiff
path: root/Src/subst.c
diff options
context:
space:
mode:
Diffstat (limited to 'Src/subst.c')
-rw-r--r--Src/subst.c358
1 files changed, 316 insertions, 42 deletions
diff --git a/Src/subst.c b/Src/subst.c
index 0f98e6ea3..a079672df 100644
--- a/Src/subst.c
+++ b/Src/subst.c
@@ -556,7 +556,7 @@ multsub(char **s, int pf_flags, char ***a, int *isarr, char *sep,
for ( ; *x; x += l) {
char c = (l = *x == Meta) ? x[1] ^ 32 : *x;
l++;
- if (!iwsep(STOUC(c)))
+ if (!iwsep((unsigned char) c))
break;
*ms_flags |= MULTSUB_WS_AT_START;
}
@@ -573,7 +573,7 @@ multsub(char **s, int pf_flags, char ***a, int *isarr, char *sep,
convchar_t c;
if (*x == Dash)
*x = '-';
- if (itok(STOUC(*x))) {
+ if (itok((unsigned char) *x)) {
/* token, can't be separator, must be single byte */
rawc = *x;
l = 1;
@@ -582,7 +582,7 @@ multsub(char **s, int pf_flags, char ***a, int *isarr, char *sep,
if (!inq && !inp && WC_ZISTYPE(c, ISEP)) {
*x = '\0';
for (x += l; *x; x += l) {
- if (itok(STOUC(*x))) {
+ if (itok((unsigned char) *x)) {
/* as above */
rawc = *x;
l = 1;
@@ -1489,21 +1489,27 @@ subst_parse_str(char **sp, int single, int err)
static char *
substevalchar(char *ptr)
{
- zlong ires = mathevali(ptr);
+ zlong ires;
int len = 0;
+ int saved_errflag = errflag;
- if (errflag)
- return NULL;
-#ifdef MULTIBYTE_SUPPORT
- if (isset(MULTIBYTE) && ires > 127) {
- /* '\\' + 'U' + 8 bytes of character + '\0' */
- char buf[11];
+ errflag = 0;
+ ires = mathevali(ptr);
- /* inefficient: should separate out \U handling from getkeystring */
- sprintf(buf, "\\U%.8x", (unsigned int)ires & 0xFFFFFFFFu);
- ptr = getkeystring(buf, &len, GETKEYS_BINDKEY, NULL);
+ if (errflag) { /* not a valid numerical expression */
+ errflag |= saved_errflag;
+ return noerrs ? dupstring(""): NULL;
+ }
+ errflag |= saved_errflag;
+ if (ires < 0) {
+ zerr("character not in range");
+ }
+#ifdef MULTIBYTE_SUPPORT
+ else if (isset(MULTIBYTE) && ires > 127) {
+ ptr = zhalloc(MB_CUR_MAX+1);
+ len = ucs4tomb((unsigned int)ires & 0xffffffff, ptr);
}
- if (len == 0)
+ if (len <= 0)
#endif
{
ptr = zhalloc(2);
@@ -1818,14 +1824,14 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int pf_flags,
* Use for the (k) flag. Goes down into the parameter code,
* sometimes.
*/
- char hkeys = 0;
+ int hkeys = 0;
/*
* Used for the (v) flag, ditto. Not quite sure why they're
* separate, but the tradition seems to be that things only
* get combined when that makes the result more obscure rather
* than less.
*/
- char hvals = 0;
+ int hvals = 0;
/*
* Whether we had to evaluate a subexpression, i.e. an
* internal ${...} or $(...) or plain $pm. We almost don't
@@ -1860,6 +1866,11 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int pf_flags,
* joining the array into a string (for compatibility with ksh/bash).
*/
int quoted_array_with_offset = 0;
+ /*
+ * Nofork substitution controls
+ */
+ char *rplyvar = NULL; /* Indicates ${|...;} or ${{var} ...;} */
+ char *rplytmp = NULL; /* Indicates ${ ... ;} */
*s++ = '\0';
/*
@@ -1870,8 +1881,8 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int pf_flags,
* these later on, too.
*/
c = *s;
- if (itype_end(s, IIDENT, 1) == s && *s != '#' && c != Pound &&
- !IS_DASH(c) &&
+ if (itype_end(s, (c == Inbrace ? INAMESPC : IIDENT), 1) == s &&
+ *s != '#' && c != Pound && !IS_DASH(c) &&
c != '!' && c != '$' && c != String && c != Qstring &&
c != '?' && c != Quest &&
c != '*' && c != Star && c != '@' && c != '{' &&
@@ -1887,19 +1898,235 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int pf_flags,
* flags in parentheses, but also one ksh hack.
*/
if (c == Inbrace) {
- inbrace = 1;
- s++;
+ /* For processing nofork command substitution string */
+ char *cmdarg = NULL, *endvar = NULL, inchar = *++s;
+ char *outbracep = s, sav = *s;
+ Param rplypm = NULL;
+ size_t slen = 0;
+ int trim = (!EMULATION(EMULATE_ZSH)) ? 2 : !qt;
+
+ inbrace = 1; /* Outer scope boolean, see above */
+
+ /* Handling for nofork command substitution e.g. ${|cmd;}
+ * See other comments about kludges for why this is here.
+ *
+ * The command string is extracted and executed, and the
+ * substitution assigned. There's no (...)-flags processing,
+ * i.e. no ${|(U)cmd;}, because it looks quite awful and
+ * should not be part of command substitution in any case.
+ * Use ${(U)${|cmd;}} as you would for ${(U)$(cmd;)}.
+ */
+ if (inchar == '|' || inchar == Bar || inblank(inchar)) {
+ *s = Inbrace;
+ if (skipparens(Inbrace, Outbrace, &outbracep) == 0)
+ slen = outbracep - s - 1;
+ *s = sav;
+ if (inchar == '|')
+ inchar = Bar; /* Simplify later compares */
+ } else if (inchar == '{' || inchar == Inbrace) {
+ *s = Inbrace;
+ if ((outbracep = itype_end(s+1, INAMESPC, 0))) {
+ if (*outbracep == Inbrack &&
+ (outbracep = parse_subscript(++outbracep, 1, ']')))
+ ++outbracep;
+ }
+
+ /* If we reached the first close brace, find the last */
+ if (outbracep && *outbracep == Outbrace) {
+ char outchar = inchar == Inbrace ? Outbrace : '}';
+ endvar = outbracep++;
+
+ /* Require space to avoid ${{var}} typo for ${${var}} */
+ if (!inblank(*outbracep)) {
+ zerr("bad substitution");
+ return NULL;
+ }
+
+ *endvar = '|'; /* Almost anything but braces/brackets */
+ outbracep = s;
+ if (skipparens(Inbrace, outchar, &outbracep) == 0)
+ *endvar = Outbrace;
+ else { /* Never happens? */
+ *endvar = outchar;
+ outbracep = endvar + 1;
+ }
+ slen = outbracep - s - 1;
+ if (inchar != Inbrace)
+ outbracep[-1] = Outbrace;
+ *s = sav;
+ inchar = Inbrace; /* Simplify later compares */
+ } else {
+ zerr("bad substitution");
+ return NULL;
+ }
+ }
+ if (slen > 1) {
+ char *outbracep = s + slen;
+ if (!itok(*s) || inblank(inchar)) {
+ /* This tokenize() is important */
+ char sav = *outbracep;
+ *outbracep = '\0';
+ tokenize(s);
+ *outbracep = sav;
+ }
+ if (*outbracep == Outbrace) {
+ if (endvar == s+1) {
+ /* For consistency with ${} we allow ${{}...} */
+ rplyvar = NULL;
+ }
+ if (endvar && *endvar == Outbrace) {
+ cmdarg = dupstrpfx(endvar+1, outbracep-endvar-1);
+ rplyvar = dupstrpfx(s+1,endvar-s-1);
+ } else {
+ cmdarg = dupstrpfx(s+1, outbracep-s-1);
+ rplyvar = "REPLY";
+ }
+ if (inblank(inchar)) {
+ /*
+ * Admittedly a hack. Take advantage of the added
+ * parameter scope and the semantics of $(<file) to
+ * construct a command to write/read a temporary file.
+ * Then fall through to the regular parameter handling
+ * to manage word splitting, expansion flags, etc.
+ */
+ char *outfmt = ">| %s {\n%s\n;}"; /* 13 */
+ if ((rplytmp = gettempname(NULL, 1))) {
+ /* Prevent shenanigans with $TMPPREFIX */
+ char *tmpfile = quotestring(rplytmp, QT_BACKSLASH);
+ char *dummy = zhalloc(strlen(cmdarg) +
+ strlen(tmpfile) +
+ 13);
+ sprintf(dummy, outfmt, tmpfile, cmdarg);
+ cmdarg = dummy;
+ } else {
+ /* TMPPREFIX not writable? */
+ cmdoutval = lastval;
+ cmdarg = NULL;
+ }
+ }
+ s = outbracep;
+ }
+ }
+
+ if (rplyvar) {
+ /* char *rplyval = getsparam("REPLY"); cf. Future? below */
+ startparamscope(); /* "local" behaves as if in a function */
+ if (inchar == Bar) {
+ /* rplyvar should be REPLY at this point, but create
+ * hardwired name anyway to expose any bugs elsewhere
+ */
+ rplypm = createparam("REPLY", PM_LOCAL|PM_UNSET|PM_HIDE);
+ if (rplypm) /* Shouldn't createparam() do this? */
+ rplypm->level = locallevel;
+ /* Future? Expose global value of $REPLY if any? */
+ /* if (rplyval) setsparam("REPLY", ztrdup(rplyval)); */
+ } else if (inblank(inchar)) {
+ rplypm = createparam(".zsh.cmdsubst",
+ PM_LOCAL|PM_UNSET|PM_HIDE|
+ PM_READONLY_SPECIAL);
+ if (rplypm)
+ rplypm->level = locallevel;
+ }
+ if (inchar != Inbrace && !rplypm) {
+ zerr("failed to create scope for command substitution");
+ return NULL;
+ }
+ }
+
+ if (rplyvar && cmdarg && *cmdarg) {
+ int obreaks = breaks;
+ Eprog cmdprog;
+ /* Execute the shell command */
+ queue_signals();
+ untokenize(cmdarg);
+ cmdprog = parse_string(cmdarg, 0);
+ if (cmdprog) {
+ /* exec.c handles dont_queue_signals() */
+ execode(cmdprog, 1, 0, "cmdsubst");
+ cmdoutval = lastval;
+ /* "return" behaves as if in a function */
+ if (retflag) {
+ retflag = 0;
+ breaks = obreaks; /* Is this ever not zero? */
+ }
+ } else /* parse error */
+ errflag |= ERRFLAG_ERROR;
+ if (rplypm)
+ rplypm->node.flags &= ~PM_READONLY_SPECIAL;
+ if (rplytmp && !errflag) {
+ int onoerrs = noerrs, rplylen;
+ noerrs = 2;
+ rplylen = zstuff(&cmdarg, rplytmp);
+ if (trim) {
+ /* bash and ksh strip trailing newlines here */
+ while (rplylen > 0 && cmdarg[rplylen-1] == '\n') {
+ rplylen--;
+ if (trim == 1)
+ break;
+ }
+ cmdarg[rplylen] = 0;
+ }
+ noerrs = onoerrs;
+ if (rplylen >= 0)
+ setsparam(rplyvar, metafy(cmdarg, rplylen, META_REALLOC));
+ }
+ unqueue_signals();
+ }
+
+ if (rplytmp)
+ unlink(rplytmp);
+ if (rplyvar) {
+ if (inchar != Inbrace) {
+ if ((val = dupstring(getsparam(rplyvar))))
+ vunset = 0;
+ else {
+ vunset = 1;
+ val = dupstring("");
+ }
+ } else {
+ s = dyncat(rplyvar, s);
+ rplyvar = NULL;
+ }
+ endparamscope();
+ if (exit_pending) {
+ if (mypid == getpid()) {
+ /*
+ * paranoia: don't check for jobs, but there
+ * shouldn't be any if not interactive.
+ */
+ stopmsg = 1;
+ zexit(exit_val, ZEXIT_NORMAL);
+ } else
+ _exit(exit_val);
+ }
+ }
+
/*
* In ksh emulation a leading `!' is a special flag working
- * sort of like our (k).
+ * sort of like our (k). This is true only for arrays or
+ * associative arrays and only with subscripts [*] or [@],
+ * so zsh's implementation is approximate. For namerefs
+ * in ksh, ${!ref} substitues the parameter name at the
+ * end of any chain of references, rather than the value.
+ *
* TODO: this is one of very few cases tied directly to
* the emulation mode rather than an option. Since ksh
* doesn't have parameter flags it might be neater to
* handle this with the ^, =, ~ stuff, below.
*/
if ((c = *s) == '!' && s[1] != Outbrace && EMULATION(EMULATE_KSH)) {
- hkeys = SCANPM_WANTKEYS;
+ hkeys = SCANPM_WANTKEYS|SCANPM_NONAMEREF;
s++;
+ /* There's a slew of other special bash meanings of parameter
+ * references that start with "!":
+ * ${!name} == ${(P)name} (when name is not a nameref)
+ * ${!name*} == ${(k)parameters[(I)name*]}
+ * ${!name@} == ${(@k)parameters[(I)name*]}
+ * ${!name[*]} == ${(k)name} (but indexes of ordinary arrays, too)
+ * ${!name[@]} == ${(@k)name} (ditto, as noted above for ksh)
+ *
+ * See also workers/34390, workers/34397, workers/34408.
+ */
} else if (c == '(' || c == Inpar) {
char *t, sav;
int tt = 0;
@@ -2154,10 +2381,19 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int pf_flags,
escapes = 1;
break;
+ case '!':
+ if ((hkeys|hvals) & ~SCANPM_NONAMEREF)
+ goto flagerr;
+ hkeys = SCANPM_NONAMEREF;
+ break;
case 'k':
+ if (hkeys & ~SCANPM_WANTKEYS)
+ goto flagerr;
hkeys = SCANPM_WANTKEYS;
break;
case 'v':
+ if (hvals & ~SCANPM_WANTVALS)
+ goto flagerr;
hvals = SCANPM_WANTVALS;
break;
@@ -2308,7 +2544,7 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int pf_flags,
/*
* Look for special unparenthesised flags.
* TODO: could make these able to appear inside parentheses, too,
- * i.e. ${(^)...} etc.
+ * i.e. ${(^)...} etc., but ${(~)...} already has another meaning.
*/
for (;;) {
if ((c = *s) == '^' || c == Hat) {
@@ -2332,7 +2568,7 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int pf_flags,
}
} else if ((c == '#' || c == Pound) &&
(inbrace || !isset(POSIXIDENTIFIERS)) &&
- (itype_end(s+1, IIDENT, 0) != s + 1
+ (itype_end(s+1, INAMESPC, 0) != s + 1
|| (cc = s[1]) == '*' || cc == Star || cc == '@'
|| cc == '?' || cc == Quest
|| cc == '$' || cc == String || cc == Qstring
@@ -2369,8 +2605,9 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int pf_flags,
* Try to handle this when parameter is named
* by (P) (second part of test).
*/
- if (itype_end(s+1, IIDENT, 0) != s+1 || (aspar && isstring(s[1]) &&
- (s[2] == Inbrace || s[2] == Inpar)))
+ if (itype_end(s+1, INAMESPC, 0) != s+1 ||
+ (aspar && isstring(s[1]) &&
+ (s[2] == Inbrace || s[2] == Inpar)))
chkset = 1, s++;
else if (!inbrace) {
/* Special case for `$+' on its own --- leave unmodified */
@@ -2531,6 +2768,8 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int pf_flags,
scanflags |= SCANPM_DQUOTED;
if (chkset)
scanflags |= SCANPM_CHECKING;
+ if (!inbrace)
+ scanflags |= SCANPM_NONAMESPC;
/*
* Second argument: decide whether to use the subexpression or
* the string next on the line as the parameter name.
@@ -2556,14 +2795,14 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int pf_flags,
* we let fetchvalue set the main string pointer s to
* the end of the bit it's fetched.
*/
- if (!(v = fetchvalue(&vbuf, (subexp ? &ov : &s),
- (wantt ? -1 :
- ((unset(KSHARRAYS) || inbrace) ? 1 : -1)),
- scanflags)) ||
- (v->pm && (v->pm->node.flags & PM_UNSET)) ||
- (v->flags & VALFLAG_EMPTY))
+ if (!rplyvar &&
+ (!(v = fetchvalue(&vbuf, (subexp ? &ov : &s),
+ (wantt ? -1 :
+ ((unset(KSHARRAYS) || inbrace) ? 1 : -1)),
+ scanflags)) ||
+ (v->pm && (v->pm->node.flags & PM_UNSET)) ||
+ (v->flags & VALFLAG_EMPTY)))
vunset = 1;
-
if (wantt) {
/*
* Handle the (t) flag: value now becomes the type
@@ -2573,13 +2812,14 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int pf_flags,
!(v->pm->node.flags & PM_UNSET))) {
int f = v->pm->node.flags;
- switch (PM_TYPE(f)) {
+ switch (PM_TYPE(f)|(f & PM_NAMEREF)) {
case PM_SCALAR: val = "scalar"; break;
case PM_ARRAY: val = "array"; break;
case PM_INTEGER: val = "integer"; break;
case PM_EFLOAT:
case PM_FFLOAT: val = "float"; break;
case PM_HASHED: val = "association"; break;
+ case PM_NAMEREF: val = "nameref"; break;
}
val = dupstring(val);
if (v->pm->level)
@@ -2910,6 +3150,13 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int pf_flags,
chuck(ptr);
else
ptr++;
+ } else if (c == Dnull) {
+ chuck(ptr);
+ while (*ptr && *ptr != c)
+ ptr++;
+ if (*ptr == Dnull)
+ chuck(ptr);
+ ptr--; /* Outer loop is about to increment */
}
}
replstr = (*ptr && ptr[1]) ? ptr+1 : "";
@@ -2926,6 +3173,9 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int pf_flags,
*/
if (!(flags & (SUB_MATCH|SUB_REST|SUB_BIND|SUB_EIND|SUB_LEN)))
flags |= SUB_REST;
+ /* If matching at start and end, don't stop early */
+ if ((flags & (SUB_START|SUB_END)) == (SUB_START|SUB_END))
+ flags |= SUB_LONG;
/*
* With ":" treat a value as unset if the variable is set but
@@ -3076,7 +3326,13 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int pf_flags,
if (vunset) {
if (isset(EXECOPT)) {
*idend = '\0';
- zerr("%s: %s", idbeg, *s ? s : "parameter not set");
+ if (*s){
+ int l;
+ singsub(&s);
+ s = unmetafy(s, &l);
+ zerr("%s: %l", idbeg, s, l);
+ } else
+ zerr("%s: %s", idbeg, "parameter not set");
/*
* In interactive shell we need to return to
* top-level prompt --- don't clear this error
@@ -3203,7 +3459,7 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int pf_flags,
shortest = 0;
++s;
}
- if (*itype_end(s, IIDENT, 0)) {
+ if (*itype_end(s, INAMESPC, 0)) {
untokenize(s);
zerr("not an identifier: %s", s);
return NULL;
@@ -3219,6 +3475,9 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int pf_flags,
char *sval;
zip = getaparam(s);
if (!zip) {
+ zip = gethparam(s);
+ }
+ if (!zip) {
sval = getsparam(s);
if (sval)
zip = hmkarray(sval);
@@ -3263,7 +3522,7 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int pf_flags,
int intersect = (*s == '*' || *s == Star);
char **compare, **ap, **apsrc;
++s;
- if (*itype_end(s, IIDENT, 0)) {
+ if (*itype_end(s, INAMESPC, 0)) {
untokenize(s);
zerr("not an identifier: %s", s);
return NULL;
@@ -3716,6 +3975,8 @@ colonsubscript:
if (presc) {
int ops = opts[PROMPTSUBST], opb = opts[PROMPTBANG];
int opp = opts[PROMPTPERCENT];
+ zattr savecurrent = txtcurrentattrs;
+ zattr saveunknown = txtunknownattrs;
if (presc < 2) {
opts[PROMPTPERCENT] = 1;
@@ -3738,7 +3999,8 @@ colonsubscript:
for (; *ap; ap++) {
char *tmps;
untokenize(*ap);
- tmps = promptexpand(*ap, 0, NULL, NULL, NULL);
+ txtunknownattrs = TXT_ATTR_ALL;
+ tmps = promptexpand(*ap, 0, NULL, NULL);
*ap = dupstring(tmps);
free(tmps);
}
@@ -3747,10 +4009,14 @@ colonsubscript:
if (!copied)
val = dupstring(val), copied = 1;
untokenize(val);
- tmps = promptexpand(val, 0, NULL, NULL, NULL);
+ txtunknownattrs = TXT_ATTR_ALL;
+ tmps = promptexpand(val, 0, NULL, NULL);
val = dupstring(tmps);
free(tmps);
}
+
+ txtpendingattrs = txtcurrentattrs = savecurrent;
+ txtunknownattrs = saveunknown;
opts[PROMPTSUBST] = ops;
opts[PROMPTBANG] = opb;
opts[PROMPTPERCENT] = opp;
@@ -4309,6 +4575,8 @@ modify(char **str, char **ptr, int inbrace)
break;
case 's':
+ case 'S':
+ hsubpatopt = (**ptr == 'S');
c = **ptr;
(*ptr)++;
ptr1 = *ptr;
@@ -4403,7 +4671,7 @@ modify(char **str, char **ptr, int inbrace)
break;
case '&':
- c = 's';
+ c = hsubpatopt ? 'S' : 's';
break;
case 'g':
@@ -4492,8 +4760,11 @@ modify(char **str, char **ptr, int inbrace)
copy = casemodify(tt, CASMOD_UPPER);
break;
case 's':
+ case 'S':
+ hsubpatopt = (c == 'S');
if (hsubl && hsubr)
- subst(&copy, hsubl, hsubr, gbal);
+ subst(&copy, dupstring(hsubl), dupstring(hsubr),
+ gbal, hsubpatopt);
break;
case 'q':
copy = quotestring(copy, QT_BACKSLASH_SHOWNULL);
@@ -4578,8 +4849,11 @@ modify(char **str, char **ptr, int inbrace)
*str = casemodify(*str, CASMOD_UPPER);
break;
case 's':
+ case 'S':
+ hsubpatopt = (c == 'S');
if (hsubl && hsubr)
- subst(str, hsubl, hsubr, gbal);
+ subst(str, dupstring(hsubl), dupstring(hsubr),
+ gbal, hsubpatopt);
break;
case 'q':
*str = quotestring(*str, QT_BACKSLASH);