summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ChangeLog6
-rw-r--r--Src/glob.c17
-rw-r--r--Src/params.c23
-rw-r--r--Src/subst.c197
-rw-r--r--Test/D04parameter.ztst14
-rw-r--r--Test/D07multibyte.ztst14
6 files changed, 196 insertions, 75 deletions
diff --git a/ChangeLog b/ChangeLog
index f703cf0fd..c6ebc0d02 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,11 @@
2006-11-02 Peter Stephenson <pws@csr.com>
+ * 22952: Src/glob.c, Src/params.c, Src/subst.c,
+ Test/D04parameter.ztst: fix multibyte delimiters for
+ arguments to parameter flags and substitution modifiers
+ in parameters and glob qualifiers (but not yet substitution
+ modifiers in history).
+
* 22950: Src/Zle/zle_tricky.c: starting menu completion
with reverse-menu-complete used the first match instead
of the last.
diff --git a/Src/glob.c b/Src/glob.c
index 201427bdb..394e91d01 100644
--- a/Src/glob.c
+++ b/Src/glob.c
@@ -1243,9 +1243,10 @@ zglob(LinkList list, LinkNode np, int nountok)
else {
/* ... or a user name */
char sav, *tt;
+ int arglen;
/* Find matching delimiters */
- tt = get_strarg(s);
+ tt = get_strarg(s, &arglen);
if (!*tt) {
zerr("missing end of name");
data = 0;
@@ -1255,7 +1256,7 @@ zglob(LinkList list, LinkNode np, int nountok)
sav = *tt;
*tt = '\0';
- if ((pw = getpwnam(s + 1)))
+ if ((pw = getpwnam(s + arglen)))
data = pw->pw_uid;
else {
zerr("unknown user");
@@ -1268,7 +1269,7 @@ zglob(LinkList list, LinkNode np, int nountok)
data = 0;
#endif /* !USE_GETPWNAM */
if (sav)
- s = tt + 1;
+ s = tt + arglen;
else
s = tt;
}
@@ -1283,8 +1284,9 @@ zglob(LinkList list, LinkNode np, int nountok)
else {
/* ...or a delimited group name. */
char sav, *tt;
+ int arglen;
- tt = get_strarg(s);
+ tt = get_strarg(s, &arglen);
if (!*tt) {
zerr("missing end of name");
data = 0;
@@ -1294,7 +1296,7 @@ zglob(LinkList list, LinkNode np, int nountok)
sav = *tt;
*tt = '\0';
- if ((gr = getgrnam(s + 1)))
+ if ((gr = getgrnam(s + arglen)))
data = gr->gr_gid;
else {
zerr("unknown group");
@@ -1307,7 +1309,7 @@ zglob(LinkList list, LinkNode np, int nountok)
data = 0;
#endif /* !USE_GETGRNAM */
if (sav)
- s = tt + 1;
+ s = tt + arglen;
else
s = tt;
}
@@ -1438,8 +1440,7 @@ zglob(LinkList list, LinkNode np, int nountok)
tt = NULL;
}
} else {
- plus = 1;
- tt = get_strarg(s);
+ tt = get_strarg(s, &plus);
if (!*tt)
{
zerr("missing end of string");
diff --git a/Src/params.c b/Src/params.c
index e60c8c740..7d7f0e8e7 100644
--- a/Src/params.c
+++ b/Src/params.c
@@ -947,7 +947,7 @@ getarg(char **str, int *inv, Value v, int a2, zlong *w,
int *prevcharlen, int *nextcharlen)
{
int hasbeg = 0, word = 0, rev = 0, ind = 0, down = 0, l, i, ishash;
- int keymatch = 0, needtok = 0;
+ int keymatch = 0, needtok = 0, arglen;
char *s = *str, *sep = NULL, *t, sav, *d, **ta, **p, *tt, c;
zlong num = 1, beg = 0, r = 0;
Patprog pprog = NULL;
@@ -1004,28 +1004,28 @@ getarg(char **str, int *inv, Value v, int a2, zlong *w,
* special interpretation by getindex() of `*' or `@'. */
break;
case 'n':
- t = get_strarg(++s);
+ t = get_strarg(++s, &arglen);
if (!*t)
goto flagerr;
sav = *t;
*t = '\0';
- num = mathevalarg(s + 1, &d);
+ num = mathevalarg(s + arglen, &d);
if (!num)
num = 1;
*t = sav;
- s = t;
+ s = t + arglen - 1;
break;
case 'b':
hasbeg = 1;
- t = get_strarg(++s);
+ t = get_strarg(++s, &arglen);
if (!*t)
goto flagerr;
sav = *t;
*t = '\0';
- if ((beg = mathevalarg(s + 1, &d)) > 0)
+ if ((beg = mathevalarg(s + arglen, &d)) > 0)
beg--;
*t = sav;
- s = t;
+ s = t + arglen - 1;
break;
case 'p':
escapes = 1;
@@ -1033,15 +1033,16 @@ getarg(char **str, int *inv, Value v, int a2, zlong *w,
case 's':
/* This gives the string that separates words *
* (for use with the `w' flag). */
- t = get_strarg(++s);
+ t = get_strarg(++s, &arglen);
if (!*t)
goto flagerr;
sav = *t;
*t = '\0';
- sep = escapes ? getkeystring(s + 1, &waste, GETKEYS_SEP, NULL)
- : dupstring(s + 1);
+ s += arglen;
+ sep = escapes ? getkeystring(s, &waste, GETKEYS_SEP, NULL)
+ : dupstring(s);
*t = sav;
- s = t;
+ s = t + arglen - 1;
break;
default:
flagerr:
diff --git a/Src/subst.c b/Src/subst.c
index abc3c82af..3a5b9b353 100644
--- a/Src/subst.c
+++ b/Src/subst.c
@@ -1137,62 +1137,113 @@ dopadding(char *str, int prenum, int postnum, char *preone, char *postone,
return ret;
}
+
+/*
+ * Look for a delimited portion of a string. The first (possibly
+ * multibyte) character at s is the delimiter. Various forms
+ * of brackets are treated separately, as documented.
+ *
+ * Returns a pointer to the final delimiter. Sets *len to the
+ * length of the final delimiter; a NULL causes *len to be set
+ * to zero since we shouldn't advance past it. (The string is
+ * tokenized, so a NULL is a real end of string.)
+ */
+
/**/
char *
-get_strarg(char *s)
+get_strarg(char *s, int *lenp)
{
- char t = *s++;
+ convchar_t del;
+ int len;
+ char tok = 0;
- if (!t)
- return s - 1;
+ MB_METACHARINIT();
+ len = MB_METACHARLENCONV(s, &del);
+ if (!len) {
+ *lenp = 0;
+ return s;
+ }
- switch (t) {
- case '(':
- t = ')';
+#ifdef MULTIBYTE_SUPPORT
+ if (del == WEOF)
+ del = (wint_t)((*s == Meta) ? s[1] ^ 32 : *s);
+#endif
+ s += len;
+ switch (del) {
+ case ZWC('('):
+ del = ZWC(')');
break;
case '[':
- t = ']';
+ del = ZWC(']');
break;
case '{':
- t = '}';
+ del = ZWC('}');
break;
case '<':
- t = '>';
+ del = ZWC('>');
break;
case Inpar:
- t = Outpar;
+ tok = Outpar;
break;
case Inang:
- t = Outang;
+ tok = Outang;
break;
case Inbrace:
- t = Outbrace;
+ tok = Outbrace;
break;
case Inbrack:
- t = Outbrack;
+ tok = Outbrack;
break;
}
- while (*s && *s != t)
- s++;
+ if (tok) {
+ /*
+ * Looking for a matching token; we want the literal byte,
+ * not a decoded multibyte character, so search specially.
+ */
+ while (*s && *s != tok)
+ s++;
+ } else {
+ convchar_t del2;
+ len = 0;
+ while (*s) {
+ len = MB_METACHARLENCONV(s, &del2);
+#ifdef MULTIBYTE_SUPPORT
+ if (del2 == WEOF)
+ del2 = (wint_t)((*s == Meta) ? s[1] ^ 32 : *s);
+#endif
+ if (del == del2)
+ break;
+ s += len;
+ }
+ }
+ *lenp = len;
return s;
}
+/*
+ * Get an integer argument; update *s to the end of the
+ * final delimiter. *delmatchp is set to 1 if we have matching
+ * delimiters and there was no error in the evaluation, else 0.
+ */
+
/**/
static int
-get_intarg(char **s)
+get_intarg(char **s, int *delmatchp)
{
- char *t = get_strarg(*s + 1);
+ int arglen;
+ char *t = get_strarg(*s, &arglen);
char *p, sav;
zlong ret;
+ *delmatchp = 0;
if (!*t)
return -1;
sav = *t;
*t = '\0';
- p = dupstring(*s + 2);
- *s = t;
+ p = dupstring(*s + arglen);
+ *s = t + arglen;
*t = sav;
if (parsestr(p))
return -1;
@@ -1204,6 +1255,7 @@ get_intarg(char **s)
return -1;
if (ret < 0)
ret = -ret;
+ *delmatchp = 1;
return ret < 0 ? -ret : ret;
}
@@ -1540,8 +1592,8 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
int escapes = 0;
int klen;
#define UNTOK(C) (itok(C) ? ztokens[(C) - Pound] : (C))
-#define UNTOK_AND_ESCAPE(X) {\
- untokenize(X = dupstring(s + 1));\
+#define UNTOK_AND_ESCAPE(X, S) {\
+ untokenize(X = dupstring(S));\
if (escapes) {\
X = getkeystring(X, &klen, GETKEYS_SEP, NULL);\
X = metafy(X, klen, META_HREALLOC);\
@@ -1549,6 +1601,9 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
}
for (s++; (c = *s) != ')' && c != Outpar; s++, tt = 0) {
+ int arglen; /* length of modifier argument */
+ int delmatch; /* integer delimiters matched OK */
+
switch (c) {
case ')':
case Outpar:
@@ -1578,9 +1633,11 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
flags |= SUB_SUBSTR;
break;
case 'I':
- flnum = get_intarg(&s);
+ s++;
+ flnum = get_intarg(&s, &delmatch);
if (flnum < 0)
goto flagerr;
+ s--;
break;
case 'L':
@@ -1658,16 +1715,16 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
tt = 1;
/* fall through */
case 'j':
- t = get_strarg(++s);
+ t = get_strarg(++s, &arglen);
if (*t) {
sav = *t;
*t = '\0';
if (tt)
- UNTOK_AND_ESCAPE(spsep)
+ UNTOK_AND_ESCAPE(spsep, s + arglen)
else
- UNTOK_AND_ESCAPE(sep)
+ UNTOK_AND_ESCAPE(sep, s + arglen)
*t = sav;
- s = t;
+ s = t + arglen - 1;
} else
goto flagerr;
break;
@@ -1676,43 +1733,43 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
tt = 1;
/* fall through */
case 'r':
- sav = s[1];
- num = get_intarg(&s);
+ s++;
+ num = get_intarg(&s, &delmatch);
if (num < 0)
goto flagerr;
if (tt)
prenum = num;
else
postnum = num;
- if (UNTOK(s[1]) != UNTOK(sav))
+ if (!delmatch)
break;
- t = get_strarg(++s);
+ t = get_strarg(s, &arglen);
if (!*t)
goto flagerr;
sav = *t;
*t = '\0';
if (tt)
- UNTOK_AND_ESCAPE(premul)
+ UNTOK_AND_ESCAPE(premul, s + arglen)
else
- UNTOK_AND_ESCAPE(postmul)
+ UNTOK_AND_ESCAPE(postmul, s + arglen)
*t = sav;
sav = *s;
- s = t + 1;
+ s = t + arglen;
if (UNTOK(*s) != UNTOK(sav)) {
s--;
break;
}
- t = get_strarg(s);
+ t = get_strarg(s, &arglen);
if (!*t)
goto flagerr;
sav = *t;
*t = '\0';
if (tt)
- UNTOK_AND_ESCAPE(preone)
+ UNTOK_AND_ESCAPE(preone, s + arglen)
else
- UNTOK_AND_ESCAPE(postone)
+ UNTOK_AND_ESCAPE(postone, s + arglen)
*t = sav;
- s = t;
+ s = t + arglen - 1;
break;
case 'm':
@@ -3251,9 +3308,10 @@ arithsubst(char *a, char **bptr, char *rest)
void
modify(char **str, char **ptr)
{
- char *ptr1, *ptr2, *ptr3, del, *lptr, c, *test, *sep, *t, *tt, tc, *e;
- char *copy, *all, *tmp, sav;
- int gbal, wall, rec, al, nl;
+ char *ptr1, *ptr2, *ptr3, *lptr, c, *test, *sep, *t, *tt, tc, *e;
+ char *copy, *all, *tmp, sav, sav1, *ptr1end;
+ int gbal, wall, rec, al, nl, charlen, delmatch;
+ convchar_t del;
test = NULL;
@@ -3282,20 +3340,48 @@ modify(char **str, char **ptr)
break;
case 's':
- /* TODO: multibyte delimiter */
c = **ptr;
(*ptr)++;
ptr1 = *ptr;
- del = *ptr1++;
- for (ptr2 = ptr1; *ptr2 != del && *ptr2; ptr2++);
+ MB_METACHARINIT();
+ charlen = MB_METACHARLENCONV(ptr1, &del);
+#ifdef MULTIBYTE_SUPPORT
+ if (del == WEOF)
+ del = (wint_t)((*ptr1 == Meta) ? ptr1[1] ^ 32 : *ptr1);
+#endif
+ ptr1 += charlen;
+ for (ptr2 = ptr1, charlen = 0; *ptr2; ptr2 += charlen) {
+ convchar_t del2;
+ charlen = MB_METACHARLENCONV(ptr2, &del2);
+#ifdef MULTIBYTE_SUPPORT
+ if (del2 == WEOF)
+ del2 = (wint_t)((*ptr2 == Meta) ?
+ ptr2[1] ^ 32 : *ptr2);
+#endif
+ if (del2 == del)
+ break;
+ }
if (!*ptr2) {
zerr("bad substitution");
return;
}
- *ptr2++ = '\0';
- for (ptr3 = ptr2; *ptr3 != del && *ptr3; ptr3++);
- if ((sav = *ptr3))
- *ptr3++ = '\0';
+ ptr1end = ptr2;
+ ptr2 += charlen;
+ sav1 = *ptr1end;
+ *ptr1end = '\0';
+ for (ptr3 = ptr2, charlen = 0; *ptr3; ptr3 += charlen) {
+ convchar_t del3;
+ charlen = MB_METACHARLENCONV(ptr3, &del3);
+#ifdef MULTIBYTE_SUPPORT
+ if (del3 == WEOF)
+ del3 = (wint_t)((*ptr3 == Meta) ?
+ ptr3[1] ^ 32 : *ptr3);
+#endif
+ if (del3 == del)
+ break;
+ }
+ sav = *ptr3;
+ *ptr3 = '\0';
if (*ptr1) {
zsfree(hsubl);
hsubl = ztrdup(ptr1);
@@ -3313,10 +3399,9 @@ modify(char **str, char **ptr)
for (tt = hsubr = ztrdup(ptr2); *tt; tt++)
if (inull(*tt) && *tt != Bnullkeep)
chuck(tt--);
- ptr2[-1] = del;
- if (sav)
- ptr3[-1] = sav;
- *ptr = ptr3 - 1;
+ *ptr1end = sav1;
+ *ptr3 = sav;
+ *ptr = ptr3 + charlen - 1;
break;
case '&':
@@ -3335,13 +3420,13 @@ modify(char **str, char **ptr)
case 'W':
wall = 1;
(*ptr)++;
- ptr1 = get_strarg(ptr2 = *ptr);
+ ptr1 = get_strarg(ptr2 = *ptr, &charlen);
if ((sav = *ptr1))
*ptr1 = '\0';
- sep = dupstring(ptr2 + 1);
+ sep = dupstring(ptr2 + charlen);
if (sav)
*ptr1 = sav;
- *ptr = ptr1 + 1;
+ *ptr = ptr1 + charlen;
c = '\0';
break;
@@ -3350,8 +3435,8 @@ modify(char **str, char **ptr)
(*ptr)++;
break;
case 'F':
- rec = get_intarg(ptr);
(*ptr)++;
+ rec = get_intarg(ptr, &delmatch);
break;
default:
*ptr = lptr;
diff --git a/Test/D04parameter.ztst b/Test/D04parameter.ztst
index 57147d53e..ce5898f88 100644
--- a/Test/D04parameter.ztst
+++ b/Test/D04parameter.ztst
@@ -867,3 +867,17 @@
>andsomekept
>andsomekept
+ file=/one/two/three/four
+ print ${file:fh}
+ print ${file:F.1.h}
+ print ${file:F+2+h}
+ print ${file:F(3)h}
+ print ${file:F<4>h}
+ print ${file:F{5}h}
+0:Modifiers with repetition
+>/
+>/one/two/three
+>/one/two
+>/one
+>/
+>/
diff --git a/Test/D07multibyte.ztst b/Test/D07multibyte.ztst
index 8b17a7294..752013eec 100644
--- a/Test/D07multibyte.ztst
+++ b/Test/D07multibyte.ztst
@@ -297,3 +297,17 @@
>«κατέβην ¥«χθὲς»£ ¥¥«εἰς»£ «Πειραιᾶ
>ςκατέβην ηςχθὲςΓλ τηςεἰςΓλ ςΠειραιᾶ
# er... yeah, that looks right...
+
+ foo=picobarn
+ print ${foo:s£bar£rod£:s¥rod¥stick¥}
+0:Delimiters in modifiers
+>picostickn
+
+# TODO: if we get paired multibyte bracket delimiters to work
+# (as Emacs does, the smug so-and-so), the following should change.
+ foo=bar
+ print ${(r£5£¥X¥)foo}
+ print ${(l«10«»Y»£HI£)foo}
+0:Delimiters in parameter flags
+>barXX
+>YYYYYHIbar