summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeter Stephenson <pws@users.sourceforge.net>2010-11-18 10:07:55 +0000
committerPeter Stephenson <pws@users.sourceforge.net>2010-11-18 10:07:55 +0000
commit23bdfc7fd2a012d5205ed22d18eb39e41c8fbc95 (patch)
tree80d7f0e6c868a1283f3275cf88e08b8c86d509c1
parent851b8e151004ec729dad9c8c7867bbf67649df8e (diff)
downloadzsh-23bdfc7fd2a012d5205ed22d18eb39e41c8fbc95.tar.gz
zsh-23bdfc7fd2a012d5205ed22d18eb39e41c8fbc95.zip
28418: add ${NAME:OFFSET:LENGTH} substitution
-rw-r--r--ChangeLog8
-rw-r--r--Doc/Zsh/expn.yo39
-rw-r--r--Src/lex.c26
-rw-r--r--Src/params.c4
-rw-r--r--Src/subst.c128
-rw-r--r--Test/D04parameter.ztst46
6 files changed, 241 insertions, 10 deletions
diff --git a/ChangeLog b/ChangeLog
index dd3873860..91b8d964d 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,9 @@
+2010-11-18 Peter Stephenson <pws@csr.com>
+
+ * 28418: Doc/Zsh/expn.yo, Src/lex.c, Src/params.c, Src/subst.c,
+ Test/D04parameter.ztst: add ${NAME:OFFSET} and
+ ${NAME:OFFSET:LENGTH} substitution syntax.
+
2010-11-17 Peter Stephenson <pws@csr.com>
* 28377: Doc/Zsh/grammar.yo: document more alias problems.
@@ -13819,5 +13825,5 @@
*****************************************************
* This is used by the shell to define $ZSH_PATCHLEVEL
-* $Revision: 1.5121 $
+* $Revision: 1.5122 $
*****************************************************
diff --git a/Doc/Zsh/expn.yo b/Doc/Zsh/expn.yo
index 64fcd74e3..6f29a8778 100644
--- a/Doc/Zsh/expn.yo
+++ b/Doc/Zsh/expn.yo
@@ -585,6 +585,45 @@ If var(name) is an array
the matching array elements are removed (use the `tt((M))' flag to
remove the non-matched elements).
)
+xitem(tt(${)var(name)tt(:)var(offset)tt(}))
+item(tt(${)var(name)tt(:)var(offset)tt(:)var(length)tt(}))(
+This syntax gives effects similar to parameter subscripting
+in the form tt($)var(name)tt({)var(offset)tt(,)var(end)tt(}) but in
+a form compatible with other shells.
+
+If the variable var(name) is a scalar, substitute the contents
+starting from offset var(offset); if var(name) is an array,
+substitute elements from element var(offset). If var(length) is
+given, substitute that many characters or elements, otherwise the
+entire rest of the scalar or array.
+
+var(offset) is treated similarly to a parameter subscript:
+the offset of the first character or element in var(name)
+is 0 if the option tt(KSH_ARRAYS) is set, else 1; a negative
+subscript counts backwards so that -1 corresponds to the last
+character or element.
+
+var(length) is always treated directly as a length and hence may not be
+negative.
+
+var(offset) and var(length) undergo the same set of shell substitutions
+as for scalar assignment; in addition, they are then subject to arithmetic
+evaluation. Hence, for example
+
+example(print ${foo:3}
+print ${foo: 1 + 2}
+print ${foo:$(( 1 + 2))}
+print ${foo:$(echo 1 + 2)})
+
+all have the same effect.
+
+Note that if var(offset) is negative, the tt(-) may not appear immediately
+after the tt(:) as this indicates the
+tt(${)var(name)tt(:-)var(word)tt(}) form of substitution; a space
+may be inserted before the tt(-). Furthermore, neither var(offset) nor
+var(length) may begin with an alphabetic character or tt(&) as these are
+used to indicate history-style modifiers.
+)
xitem(tt(${)var(name)tt(/)var(pattern)tt(/)var(repl)tt(}))
item(tt(${)var(name)tt(//)var(pattern)tt(/)var(repl)tt(}))(
Replace the longest possible match of var(pattern) in the expansion of
diff --git a/Src/lex.c b/Src/lex.c
index 28899fef0..fdb4b98ac 100644
--- a/Src/lex.c
+++ b/Src/lex.c
@@ -1398,7 +1398,12 @@ gettokstr(int c, int sub)
}
-/* Return non-zero for error (character to unget), else zero */
+/*
+ * Parse input as if in double quotes.
+ * endchar is the end character to expect.
+ * sub has got something to do with whether we are doing quoted substitution.
+ * Return non-zero for error (character to unget), else zero
+ */
/**/
static int
@@ -1591,14 +1596,20 @@ parsestrnoerr(char *s)
return err;
}
+/*
+ * Parse a subscript in string s.
+ * sub is passed down to dquote_parse().
+ * endchar is the final character.
+ * Return the next character, or NULL.
+ */
/**/
mod_export char *
-parse_subscript(char *s, int sub)
+parse_subscript(char *s, int sub, int endchar)
{
int l = strlen(s), err;
char *t;
- if (!*s || *s == ']')
+ if (!*s || *s == endchar)
return 0;
lexsave();
untokenize(t = dupstring(s));
@@ -1607,15 +1618,16 @@ parse_subscript(char *s, int sub)
len = 0;
bptr = tokstr = s;
bsiz = l + 1;
- err = dquote_parse(']', sub);
+ err = dquote_parse(endchar, sub);
if (err) {
err = *bptr;
- *bptr = 0;
+ *bptr = '\0';
untokenize(s);
*bptr = err;
- s = 0;
- } else
+ s = NULL;
+ } else {
s = bptr;
+ }
strinend();
inpop();
DPUTS(cmdsp, "BUG: parse_subscript: cmdstack not empty.");
diff --git a/Src/params.c b/Src/params.c
index 7ac33b912..92e0e5368 100644
--- a/Src/params.c
+++ b/Src/params.c
@@ -1013,7 +1013,7 @@ isident(char *s)
return 0;
/* Require balanced [ ] pairs with something between */
- if (!(ss = parse_subscript(++ss, 1)))
+ if (!(ss = parse_subscript(++ss, 1, ']')))
return 0;
untokenize(s);
return !ss[1];
@@ -1628,7 +1628,7 @@ getindex(char **pptr, Value v, int flags)
*s++ = '[';
/* Error handled after untokenizing */
- s = parse_subscript(s, flags & SCANPM_DQUOTED);
+ s = parse_subscript(s, flags & SCANPM_DQUOTED, ']');
/* Now we untokenize everything except inull() markers so we can check *
* for the '*' and '@' special subscripts. The inull()s are removed *
* in getarg() after we know whether we're doing reverse indexing. */
diff --git a/Src/subst.c b/Src/subst.c
index 5f65945a5..c0fb38a48 100644
--- a/Src/subst.c
+++ b/Src/subst.c
@@ -1371,6 +1371,43 @@ untok_and_escape(char *s, int escapes, int tok_arg)
return dst;
}
+/*
+ * See if an argument str looks like a subscript or length following
+ * a colon and parse it. It must be followed by a ':' or nothing.
+ * If this succeeds, expand and return the evaulated expression if
+ * found, else return NULL.
+ *
+ * We assume this is what is meant if the first character is not
+ * an alphabetic character or '&', which signify modifiers.
+ *
+ * Set *endp to point to the next character following.
+ */
+static char *
+check_colon_subscript(char *str, char **endp)
+{
+ int sav;
+
+ /* Could this be a modifier (or empty)? */
+ if (!*str || ialpha(*str) || *str == '&')
+ return NULL;
+
+ *endp = parse_subscript(str, 0, ':');
+ if (!*endp) {
+ /* No trailing colon? */
+ *endp = parse_subscript(str, 0, '\0');
+ if (!*endp)
+ return NULL;
+ }
+ sav = **endp;
+ **endp = '\0';
+ if (parsestr(str = dupstring(str)))
+ return NULL;
+ singsub(&str);
+
+ **endp = sav;
+ return str;
+}
+
/* parameter substitution */
#define isstring(c) ((c) == '$' || (char)(c) == String || (char)(c) == Qstring)
@@ -2683,6 +2720,97 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
}
val = dupstring("");
}
+ if (colf && inbrace) {
+ /*
+ * Look for ${PARAM:OFFSET} or ${PARAM:OFFSET:LENGTH}.
+ * This must appear before modifiers. For compatibility
+ * with bash we perform both standard string substitutions
+ * and math eval.
+ */
+ char *check_offset2;
+ char *check_offset = check_colon_subscript(s, &check_offset2);
+ if (check_offset) {
+ zlong offset = mathevali(check_offset);
+ zlong length = (zlong)-1;
+ if (errflag)
+ return NULL;
+ if ((*check_offset2 && *check_offset2 != ':')) {
+ zerr("invalid subscript: %s", check_offset);
+ return NULL;
+ }
+ if (*check_offset2) {
+ check_offset = check_colon_subscript(check_offset2 + 1,
+ &check_offset2);
+ if (*check_offset2 && *check_offset2 != ':') {
+ zerr("invalid length: %s", check_offset);
+ return NULL;
+ }
+ length = mathevali(check_offset);
+ if (errflag)
+ return NULL;
+ if (length < (zlong)0) {
+ zerr("invalid length: %s", check_offset);
+ return NULL;
+ }
+ }
+ if (!isset(KSHARRAYS) && offset > 0)
+ offset--;
+ if (isarr) {
+ int alen = arrlen(aval), count;
+ char **srcptr, **dstptr, **newarr;
+
+ if (offset < 0) {
+ offset += alen;
+ if (offset < 0)
+ offset = 0;
+ }
+ if (length < 0)
+ length = alen;
+ if (offset > alen)
+ offset = alen;
+ if (offset + length > alen)
+ length = alen - offset;
+ count = length;
+ srcptr = aval + offset;
+ newarr = dstptr = (char **)
+ zhalloc((length+1)*sizeof(char *));
+ while (count--)
+ *dstptr++ = dupstring(*srcptr++);
+ *dstptr = (char *)NULL;
+ aval = newarr;
+ } else {
+ char *sptr, *eptr;
+ if (offset < 0) {
+ MB_METACHARINIT();
+ for (sptr = val; *sptr; ) {
+ sptr += MB_METACHARLEN(sptr);
+ offset++;
+ }
+ if (offset < 0)
+ offset = 0;
+ }
+ MB_METACHARINIT();
+ for (sptr = val; *sptr && offset; ) {
+ sptr += MB_METACHARLEN(sptr);
+ offset--;
+ }
+ if (length >= 0) {
+ for (eptr = sptr; *eptr && length; ) {
+ eptr += MB_METACHARLEN(eptr);
+ length--;
+ }
+ val = dupstrpfx(sptr, eptr - sptr);
+ } else {
+ val = dupstring(sptr);
+ }
+ }
+ if (!*check_offset2) {
+ colf = 0;
+ } else {
+ s = check_offset2 + 1;
+ }
+ }
+ }
if (colf) {
/*
* History style colon modifiers. May need to apply
diff --git a/Test/D04parameter.ztst b/Test/D04parameter.ztst
index fe978263f..7c6a465af 100644
--- a/Test/D04parameter.ztst
+++ b/Test/D04parameter.ztst
@@ -1256,3 +1256,49 @@
0:$ZSH_EVAL_CONTEXT and $zsh_eval_context
>toplevel
>shfunc cmdsubst
+
+ foo="123456789"
+ print ${foo:3}
+ print ${foo: 1 + 3}
+ print ${foo:$(( 2 + 3))}
+ print ${foo:$(echo 3 + 3)}
+ print ${foo:3:1}
+ print ${foo: 1 + 3:(4-2)/2}
+ print ${foo:$(( 2 + 3)):$(( 7 - 6 ))}
+ print ${foo:$(echo 3 + 3):`echo 4 - 3`}
+ print ${foo: -1}
+ print ${foo: -10}
+0:Bash-style subscripts, scalar
+>3456789
+>456789
+>56789
+>6789
+>3
+>4
+>5
+>6
+>9
+>123456789
+
+ foo=(1 2 3 4 5 6 7 8 9)
+ print ${foo:3}
+ print ${foo: 1 + 3}
+ print ${foo:$(( 2 + 3))}
+ print ${foo:$(echo 3 + 3)}
+ print ${foo:3:1}
+ print ${foo: 1 + 3:(4-2)/2}
+ print ${foo:$(( 2 + 3)):$(( 7 - 6 ))}
+ print ${foo:$(echo 3 + 3):`echo 4 - 3`}
+ print ${foo: -1}
+ print ${foo: -10}
+0:Bash-style subscripts, array
+>3 4 5 6 7 8 9
+>4 5 6 7 8 9
+>5 6 7 8 9
+>6 7 8 9
+>3
+>4
+>5
+>6
+>9
+>1 2 3 4 5 6 7 8 9