summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ChangeLog8
-rw-r--r--Doc/Zsh/builtins.yo4
-rw-r--r--Functions/Zle/insert-composed-char5
-rw-r--r--Src/Zle/zle.h4
-rw-r--r--Src/Zle/zle_main.c32
-rw-r--r--Src/builtin.c264
-rw-r--r--Src/pattern.c99
-rw-r--r--Src/subst.c60
-rw-r--r--Src/utils.c402
-rw-r--r--Src/zsh.h4
-rw-r--r--Src/ztype.h6
-rw-r--r--Test/D04parameter.ztst23
-rw-r--r--Test/D07multibyte.ztst54
13 files changed, 737 insertions, 228 deletions
diff --git a/ChangeLog b/ChangeLog
index 130dfc92c..4d5c8e7c7 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,11 @@
+2006-07-24 Peter Stephenson <p.w.stephenson@ntlworld.com>
+
+ * 22556: Doc/Zsh/builtins.yo, Functions/Zle/insert-composed-char,
+ Src/builtin.c, Src/pattern.c, Src/subst.c, Src/utils.c, Src/zsh.h,
+ Src/ztype.h, Src/Zle/zle.h, Src/Zle/zle_main.c,
+ Test/D04parameter.ztst, Test/D07multibyte.ztst: Multibyte
+ separators and delimiters.
+
2006-07-18 Clint Adams <clint@zsh.org>
* 22554: Jesse Weinstein: Completion/Unix/Command/_vorbiscomment:
diff --git a/Doc/Zsh/builtins.yo b/Doc/Zsh/builtins.yo
index bd81a7746..d8892cd5c 100644
--- a/Doc/Zsh/builtins.yo
+++ b/Doc/Zsh/builtins.yo
@@ -1003,6 +1003,10 @@ Read only one (or var(num)) characters. All are assigned to the first
var(name), without word splitting. This flag is ignored when tt(-q) is
present. Input is read from the terminal unless one of tt(-u) or tt(-p)
is present. This option may also be used within zle widgets.
+
+Note that despite the mnemonic `key' this option does read full
+characters, which may consist of multiple bytes if the option
+tt(MULTIBYTE) is set.
)
item(tt(-z))(
Read one entry from the editor buffer stack and assign it to the first
diff --git a/Functions/Zle/insert-composed-char b/Functions/Zle/insert-composed-char
index 2ed008990..7978a7589 100644
--- a/Functions/Zle/insert-composed-char
+++ b/Functions/Zle/insert-composed-char
@@ -128,7 +128,7 @@
# 'm Macron
# '' Acute
-emulate -LR zsh
+emulate -L zsh
setopt cbases extendedglob printeightbit
local accent basechar ochar error
@@ -165,7 +165,8 @@ else
fi
local -A charmap
-charmap=(${=zsh_accented_chars[$accent]})
+# just in case someone is monkeying with IFS...
+charmap=(${(s. .)zsh_accented_chars[$accent]})
if [[ ${#charmap} -eq 0 || -z $charmap[$basechar] ]]; then
$error "Combination ${basechar}${accent} is not available."
diff --git a/Src/Zle/zle.h b/Src/Zle/zle.h
index 3671f90f3..69c73f4cf 100644
--- a/Src/Zle/zle.h
+++ b/Src/Zle/zle.h
@@ -62,11 +62,11 @@ typedef wint_t ZLE_INT_T;
#define ZC_iblank wcsiblank
#define ZC_icntrl iswcntrl
#define ZC_idigit iswdigit
-#define ZC_iident wcsiident
+#define ZC_iident(x) wcsitype((x), IIDENT)
#define ZC_ilower iswlower
#define ZC_inblank iswspace
#define ZC_iupper iswupper
-#define ZC_iword wcsiword
+#define ZC_iword(x) wcsitype((x), IWORD)
#define ZC_tolower towlower
#define ZC_toupper towupper
diff --git a/Src/Zle/zle_main.c b/Src/Zle/zle_main.c
index 1c82611c2..1d4636937 100644
--- a/Src/Zle/zle_main.c
+++ b/Src/Zle/zle_main.c
@@ -1290,32 +1290,40 @@ bin_vared(char *name, char **args, Options ops, UNUSED(int func))
char **arr = getarrvalue(v), **aptr, **tmparr, **tptr;
tptr = tmparr = (char **)zhalloc(sizeof(char *)*(arrlen(arr)+1));
for (aptr = arr; *aptr; aptr++) {
- int sepcount = 0;
+ int sepcount = 0, clen;
+ convchar_t c;
/*
* See if this word contains a separator character
* or backslash
*/
- for (t = *aptr; *t; t++) {
- if (*t == Meta) {
- if (isep(t[1] ^ 32))
- sepcount++;
+ MB_METACHARINIT();
+ for (t = *aptr; *t; ) {
+ if (*t == '\\') {
t++;
- } else if (isep(*t) || *t == '\\')
sepcount++;
+ } else {
+ t += MB_METACHARLENCONV(t, &c);
+ if (MB_ZISTYPE(c, ISEP))
+ sepcount++;
+ }
}
if (sepcount) {
/* Yes, so allocate enough space to quote it. */
char *newstr, *nptr;
newstr = zhalloc(strlen(*aptr)+sepcount+1);
/* Go through string quoting separators */
+ MB_METACHARINIT();
for (t = *aptr, nptr = newstr; *t; ) {
- if (*t == Meta) {
- if (isep(t[1] ^ 32))
- *nptr++ = '\\';
- *nptr++ = *t++;
- } else if (isep(*t) || *t == '\\')
+ if (*t == '\\') {
*nptr++ = '\\';
- *nptr++ = *t++;
+ *nptr++ = *t++;
+ } else {
+ clen = MB_METACHARLENCONV(t, &c);
+ if (MB_ZISTYPE(c, ISEP))
+ *nptr++ = '\\';
+ while (clen--)
+ *nptr++ = *t++;
+ }
}
*nptr = '\0';
/* Stick this into the array of words to join up */
diff --git a/Src/builtin.c b/Src/builtin.c
index 71dcbffc3..3bd3b63cb 100644
--- a/Src/builtin.c
+++ b/Src/builtin.c
@@ -4266,7 +4266,7 @@ bin_break(char *name, char **argv, UNUSED(Options ops), int func)
zerrnam(name, "not in while, until, select, or repeat loop");
return 1;
}
- contflag = 1; /* ARE WE SUPPOSED TO FALL THROUGH HERE? */
+ contflag = 1; /* FALLTHROUGH */
case BIN_BREAK:
if (!loops) { /* break is only permitted in loops */
zerrnam(name, "not in while, until, select, or repeat loop");
@@ -4560,7 +4560,14 @@ bin_read(char *name, char **args, Options ops, UNUSED(int func))
int readchar = -1, val, resettty = 0;
struct ttyinfo saveti;
char d;
+#ifdef MULTIBYTE_SUPPORT
+ wchar_t delim = L'\n', wc;
+ mbstate_t mbs;
+ char *laststart;
+ size_t ret;
+#else
char delim = '\n';
+#endif
if (OPT_HASARG(ops,c='k')) {
char *eptr, *optarg = OPT_ARG(ops,c);
@@ -4666,7 +4673,23 @@ bin_read(char *name, char **args, Options ops, UNUSED(int func))
}
if (OPT_ISSET(ops,'d')) {
char *delimstr = OPT_ARG(ops,'d');
+#ifdef MULTIBYTE_SUPPORT
+ wint_t wc;
+
+ if (isset(MULTIBYTE)) {
+ mb_metacharinit();
+ (void)mb_metacharlenconv(delimstr, &wc);
+ }
+ else
+ wc = WEOF;
+ if (wc != WEOF)
+ delim = (wchar_t)wc;
+ else
+ delim = (wchar_t)((delimstr[0] == Meta) ?
+ delimstr[1] ^ 32 : delimstr[0]);
+#else
delim = (delimstr[0] == Meta) ? delimstr[1] ^ 32 : delimstr[0];
+#endif
if (SHTTY != -1) {
struct ttyinfo ti;
gettyinfo(&ti);
@@ -4710,26 +4733,74 @@ bin_read(char *name, char **args, Options ops, UNUSED(int func))
}
}
+#ifdef MULTIBYTE_SUPPORT
+ memset(&mbs, 0, sizeof(mbs));
+#endif
+
/* option -k means read only a given number of characters (default 1) */
if (OPT_ISSET(ops,'k')) {
+ int eof = 0;
/* allocate buffer space for result */
bptr = buf = (char *)zalloc(nchars+1);
do {
if (izle) {
- if ((val = getkeyptr(0, NULL)) < 0)
+ if ((val = getkeyptr(0, NULL)) < 0) {
+ eof = 1;
break;
- *bptr++ = (char) val;
+ }
+ *bptr = (char) val;
+#ifdef MULTIBYTE_SUPPORT
+ if (isset(MULTIBYTE)) {
+ ret = mbrlen(bptr++, 1, &mbs);
+ if (ret == MB_INVALID)
+ memset(&mbs, 0, sizeof(mbs));
+ /* treat invalid as single character */
+ if (ret != MB_INCOMPLETE)
+ nchars--;
+ continue;
+ } else {
+ bptr++;
+ nchars--;
+ }
+#else
+ bptr++;
nchars--;
+#endif
} else {
/* If read returns 0, is end of file */
if (readchar >= 0) {
*bptr = readchar;
val = 1;
readchar = -1;
- } else if ((val = read(readfd, bptr, nchars)) <= 0)
+ } else if ((val = read(readfd, bptr, nchars)) <= 0) {
+ eof = 1;
break;
+ }
+#ifdef MULTIBYTE_SUPPORT
+ if (isset(MULTIBYTE)) {
+ while (val > 0) {
+ ret = mbrlen(bptr, val, &mbs);
+ if (ret == MB_INCOMPLETE) {
+ bptr += val;
+ break;
+ } else {
+ if (ret == MB_INVALID) {
+ memset(&mbs, 0, sizeof(mbs));
+ /* treat as single byte */
+ ret = 1;
+ }
+ else if (ret == 0) /* handle null as normal char */
+ ret = 1;
+ nchars--;
+ val -= ret;
+ bptr += ret;
+ }
+ }
+ continue;
+ }
+#endif
/* decrement number of characters read from number required */
nchars -= val;
@@ -4761,7 +4832,7 @@ bin_read(char *name, char **args, Options ops, UNUSED(int func))
zfree(buf, bptr - buf + 1);
if (resettty && SHTTY != -1)
settyinfo(&saveti);
- return val <= 0;
+ return eof;
}
/* option -q means get one character, and interpret it as a Y or N */
@@ -4770,10 +4841,25 @@ bin_read(char *name, char **args, Options ops, UNUSED(int func))
/* set up the buffer */
readbuf[1] = '\0';
-
+
/* get, and store, reply */
if (izle) {
+#ifdef MULTIBYTE_SUPPORT
+ int key;
+
+ while ((key = getkeyptr(0, NULL)) >= 0) {
+ char c = (char)key;
+ /*
+ * If multibyte, it can't be y, so we don't care
+ * what key gets set to; just read to end of character.
+ */
+ if (!isset(MULTIBYTE) ||
+ mbrlen(&c, 1, &mbs) != MB_INCOMPLETE)
+ break;
+ }
+#else
int key = getkeyptr(0, NULL);
+#endif
readbuf[0] = (key == 'y' ? 'y' : 'n');
} else {
@@ -4786,6 +4872,7 @@ bin_read(char *name, char **args, Options ops, UNUSED(int func))
SHTTY = -1;
}
}
+
if (OPT_ISSET(ops,'e') || OPT_ISSET(ops,'E'))
printf("%s\n", readbuf);
if (!OPT_ISSET(ops,'e'))
@@ -4808,16 +4895,79 @@ bin_read(char *name, char **args, Options ops, UNUSED(int func))
while (*args || (OPT_ISSET(ops,'A') && !gotnl)) {
sigset_t s = child_unblock();
buf = bptr = (char *)zalloc(bsiz = 64);
+#ifdef MULTIBYTE_SUPPORT
+ laststart = buf;
+ ret = MB_INCOMPLETE;
+#endif
/* get input, a character at a time */
while (!gotnl) {
c = zread(izle, &readchar);
/* \ at the end of a line indicates a continuation *
* line, except in raw mode (-r option) */
+#ifdef MULTIBYTE_SUPPORT
+ if (c == EOF) {
+ /* not waiting to be completed any more */
+ ret = 0;
+ break;
+ }
+ *bptr = (char)c;
+ if (isset(MULTIBYTE)) {
+ ret = mbrtowc(&wc, bptr, 1, &mbs);
+ if (!ret) /* NULL */
+ ret = 1;
+ } else {
+ ret = 1;
+ wc = (wchar_t)c;
+ }
+ if (ret != MB_INCOMPLETE) {
+ if (ret == MB_INVALID)
+ memset(&mbs, 0, sizeof(mbs));
+ if (bslash && wc == delim) {
+ bslash = 0;
+ continue;
+ }
+ if (wc == delim)
+ break;
+ /*
+ * `first' is non-zero if any separator we encounter is a
+ * non-whitespace separator, which means that anything
+ * (even an empty string) between, before or after separators
+ * is significant. If it is zero, we have a whitespace
+ * separator, which shouldn't cause extra empty strings to
+ * be emitted. Hence the test for (*buf || first) when
+ * we assign the result of reading a word.
+ */
+ if (!bslash && wcsitype(wc, ISEP)) {
+ if (bptr != buf ||
+ (!(c < 128 && iwsep(c)) && first)) {
+ first |= !(c < 128 && iwsep(c));
+ break;
+ }
+ first |= !(c < 128 && iwsep(c));
+ continue;
+ }
+ bslash = (wc == L'\\' && !bslash && !OPT_ISSET(ops,'r'));
+ if (bslash)
+ continue;
+ first = 0;
+ }
+ if (imeta(STOUC(*bptr))) {
+ bptr[1] = bptr[0] ^ 32;
+ bptr[0] = Meta;
+ bptr += 2;
+ }
+ else
+ bptr++;
+ if (ret != MB_INCOMPLETE)
+ laststart = bptr;
+#else
+ if (c == EOF)
+ break;
if (bslash && c == delim) {
bslash = 0;
continue;
}
- if (c == EOF || c == delim)
+ if (c == delim)
break;
/*
* `first' is non-zero if any separator we encounter is a
@@ -4845,18 +4995,42 @@ bin_read(char *name, char **args, Options ops, UNUSED(int func))
*bptr++ = c ^ 32;
} else
*bptr++ = c;
+#endif
/* increase the buffer size, if necessary */
if (bptr >= buf + bsiz - 1) {
int blen = bptr - buf;
+#ifdef MULTIBYTE_SUPPORT
+ int llen = laststart - buf;
+#endif
buf = realloc(buf, bsiz *= 2);
bptr = buf + blen;
+#ifdef MULTIBYTE_SUPPORT
+ laststart = buf + llen;
+#endif
}
}
signal_setmask(s);
+#ifdef MULTIBYTE_SUPPORT
+ if (c == EOF)
+ gotnl = 1;
+ if (ret == MB_INCOMPLETE) {
+ /*
+ * We can only get here if there is an EOF in the
+ * middle of a character... safest to keep the debris,
+ * I suppose.
+ */
+ *bptr = '\0';
+ } else {
+ if (wc == delim)
+ gotnl = 1;
+ *laststart = '\0';
+ }
+#else
if (c == delim || c == EOF)
gotnl = 1;
*bptr = '\0';
+#endif
/* dispose of word appropriately */
if (OPT_ISSET(ops,'e') || OPT_ISSET(ops,'E')) {
zputs(buf, stdout);
@@ -4908,12 +5082,66 @@ bin_read(char *name, char **args, Options ops, UNUSED(int func))
return c == EOF;
}
buf = bptr = (char *)zalloc(bsiz = 64);
+#ifdef MULTIBYTE_SUPPORT
+ laststart = buf;
+ ret = MB_INCOMPLETE;
+#endif
/* any remaining part of the line goes into one parameter */
bslash = 0;
if (!gotnl) {
sigset_t s = child_unblock();
for (;;) {
c = zread(izle, &readchar);
+#ifdef MULTIBYTE_SUPPORT
+ if (c == EOF) {
+ /* not waiting to be completed any more */
+ ret = 0;
+ break;
+ }
+ *bptr = (char)c;
+ if (isset(MULTIBYTE)) {
+ ret = mbrtowc(&wc, bptr, 1, &mbs);
+ if (!ret) /* NULL */
+ ret = 1;
+ } else {
+ ret = 1;
+ wc = (wchar_t)c;
+ }
+ if (ret != MB_INCOMPLETE) {
+ if (ret == MB_INVALID)
+ memset(&mbs, 0, sizeof(mbs));
+ /*
+ * \ at the end of a line introduces a continuation line,
+ * except in raw mode (-r option)
+ */
+ if (bslash && wc == delim) {
+ bslash = 0;
+ continue;
+ }
+ if (wc == delim && !zbuf)
+ break;
+ if (!bslash && bptr == buf && wcsitype(wc, ISEP)) {
+ if (c < 128 && iwsep(c))
+ continue;
+ else if (!first) {
+ first = 1;
+ continue;
+ }
+ }
+ bslash = (wc == L'\\' && !bslash && !OPT_ISSET(ops,'r'));
+ if (bslash)
+ continue;
+ }
+ if (imeta(STOUC(*bptr))) {
+ bptr[1] = bptr[0] ^ 32;
+ bptr[0] = Meta;
+ bptr += 2;
+ }
+ else
+ bptr++;
+ if (ret != MB_INCOMPLETE)
+ laststart = bptr;
+#else
/* \ at the end of a line introduces a continuation line, except in
raw mode (-r option) */
if (bslash && c == delim) {
@@ -4938,22 +5166,36 @@ bin_read(char *name, char **args, Options ops, UNUSED(int func))
*bptr++ = c ^ 32;
} else
*bptr++ = c;
+#endif
/* increase the buffer size, if necessary */
if (bptr >= buf + bsiz - 1) {
int blen = bptr - buf;
+#ifdef MULTIBYTE_SUPPORT
+ int llen = laststart - buf;
+#endif
buf = realloc(buf, bsiz *= 2);
bptr = buf + blen;
+#ifdef MULTIBYTE_SUPPORT
+ laststart = buf + llen;
+#endif
}
}
signal_setmask(s);
}
+#ifdef MULTIBYTE_SUPPORT
+ if (ret != MB_INCOMPLETE)
+ bptr = laststart;
+#endif
+ /*
+ * Strip trailing IFS whitespace.
+ * iwsep can only be certain single-byte ASCII bytes, but we
+ * must check the byte isn't metafied.
+ */
while (bptr > buf) {
if (bptr > buf + 1 && bptr[-2] == Meta) {
- if (iwsep(bptr[-1] ^ 32))
- bptr -= 2;
- else
- break;
+ /* non-ASCII, can't be IWSEP */
+ break;
} else if (iwsep(bptr[-1]))
bptr--;
else
diff --git a/Src/pattern.c b/Src/pattern.c
index bc9afbae3..39c146b86 100644
--- a/Src/pattern.c
+++ b/Src/pattern.c
@@ -318,7 +318,7 @@ metacharinc(char **x)
inchar = *inptr++;
}
*x = inptr;
- return (wchar_t)inchar;
+ return (wchar_t)STOUC(inchar);
}
while (*inptr) {
@@ -352,12 +352,14 @@ typedef int patint_t;
#define PEOF EOF
#define METACHARINC(x) ((void)((x) += (*(x) == Meta) ? 2 : 1))
+#endif
+
/*
- * Return unmetafied char from string (x is any char *)
+ * Return unmetafied char from string (x is any char *).
+ * Used with MULTIBYTE_SUPPORT if the GF_MULTIBYTE is not
+ * in effect.
*/
#define UNMETA(x) (*(x) == Meta ? (x)[1] ^ 32 : *(x))
-#endif
-
/* Add n more characters, ensuring there is enough space. */
@@ -1575,7 +1577,7 @@ charref(char *x, char *y)
size_t ret;
if (!(patglobflags & GF_MULTIBYTE) || !(STOUC(*x) & 0x80))
- return (wchar_t) *x;
+ return (wchar_t) STOUC(*x);
ret = mbrtowc(&wc, x, y-x, &shiftstate);
@@ -1583,7 +1585,7 @@ charref(char *x, char *y)
/* Error. Treat as single byte. */
/* Reset the shift state for next time. */
memset(&shiftstate, 0, sizeof(shiftstate));
- return (wchar_t) *x;
+ return (wchar_t) STOUC(*x);
}
return wc;
@@ -1626,7 +1628,7 @@ charrefinc(char **x, char *y)
size_t ret;
if (!(patglobflags & GF_MULTIBYTE) || !(STOUC(**x) & 0x80))
- return (wchar_t) *(*x)++;
+ return (wchar_t) STOUC(*(*x)++);
ret = mbrtowc(&wc, *x, y-*x, &shiftstate);
@@ -1634,7 +1636,7 @@ charrefinc(char **x, char *y)
/* Error. Treat as single byte. */
/* Reset the shift state for next time. */
memset(&shiftstate, 0, sizeof(shiftstate));
- return (wchar_t) *(*x)++;
+ return (wchar_t) STOUC(*(*x)++);
}
/* Nulls here are normal characters */
@@ -2222,20 +2224,33 @@ patmatch(Upat prog)
}
break;
case P_ANYOF:
- if (patinput == patinend ||
- !patmatchrange((char *)P_OPERAND(scan),
- CHARREF(patinput, patinend)))
- fail = 1;
- else
- CHARINC(patinput, patinend);
- break;
case P_ANYBUT:
- if (patinput == patinend ||
- patmatchrange((char *)P_OPERAND(scan),
- CHARREF(patinput, patinend)))
+ if (patinput == patinend)
fail = 1;
- else
- CHARINC(patinput, patinend);
+ else {
+#ifdef MULTIBYTE_SUPPORT
+ wchar_t cr = CHARREF(patinput, patinend);
+ char *scanop = (char *)P_OPERAND(scan);
+ if (patglobflags & GF_MULTIBYTE) {
+ if (mb_patmatchrange(scanop, cr) ^
+ (P_OP(scan) == P_ANYOF))
+ fail = 1;
+ else
+ CHARINC(patinput, patinend);
+ } else if (patmatchrange(scanop, (int)cr) ^
+ (P_OP(scan) == P_ANYOF))
+ fail = 1;
+ else
+ CHARINC(patinput, patinend);
+#else
+ if (patmatchrange((char *)P_OPERAND(scan),
+ CHARREF(patinput, patinend)) ^
+ (P_OP(scan) == P_ANYOF))
+ fail = 1;
+ else
+ CHARINC(patinput, patinend);
+#endif
+ }
break;
case P_NUMRNG:
case P_NUMFROM:
@@ -2923,7 +2938,7 @@ patmatch(Upat prog)
/**/
static int
-patmatchrange(char *range, wchar_t ch)
+mb_patmatchrange(char *range, wchar_t ch)
{
wchar_t r1, r2;
@@ -2994,21 +3009,20 @@ patmatchrange(char *range, wchar_t ch)
return 1;
break;
case PP_IDENT:
- if (wcsiident(ch))
+ if (wcsitype(ch, IIDENT))
return 1;
break;
case PP_IFS:
- /* TODO */
- if (isep(ch))
+ if (wcsitype(ch, ISEP))
return 1;
break;
case PP_IFSSPACE:
- /* TODO */
- if (iwsep(ch))
+ /* must be ASCII space character */
+ if (ch < 128 && iwsep((int)ch))
return 1;
break;
case PP_WORD:
- if (wcsiword(ch))
+ if (wcsitype(ch, IWORD))
return 1;
break;
case PP_RANGE:
@@ -3031,7 +3045,7 @@ patmatchrange(char *range, wchar_t ch)
}
/**/
-#else
+#endif
/**/
static int
@@ -3142,9 +3156,6 @@ patmatchrange(char *range, int ch)
return 0;
}
-/**/
-#endif
-
/*
* Repeatedly match something simple and say how many times.
* charstart is an array parallel to that starting at patinput
@@ -3180,20 +3191,26 @@ static int patrepeat(Upat p, char *charstart)
}
break;
case P_ANYOF:
- while (scan < patinend &&
- patmatchrange(opnd, CHARREF(scan, patinend))) {
- charstart[scan-patinput] = 1;
- count++;
- CHARINC(scan, patinend);
- }
- break;
case P_ANYBUT:
- while (scan < patinend &&
- !patmatchrange(opnd, CHARREF(scan, patinend))) {
+ while (scan < patinend) {
+#ifdef MULTIBYTE_SUPPORT
+ wchar_t cr = CHARREF(scan, patinend);
+ if (patglobflags & GF_MULTIBYTE) {
+ if (mb_patmatchrange(opnd, cr) ^
+ (P_OP(p) == P_ANYOF))
+ break;
+ } else if (patmatchrange(opnd, (int)cr) ^
+ (P_OP(p) == P_ANYOF))
+ break;
+#else
+ if (patmatchrange(opnd, CHARREF(scan, patinend)) ^
+ P_OP(p) == P_ANYOF)
+ break;
+#endif
charstart[scan-patinput] = 1;
count++;
CHARINC(scan, patinend);
- }
+ }
break;
#ifdef DEBUG
default:
diff --git a/Src/subst.c b/Src/subst.c
index 821c1c79a..9f2703326 100644
--- a/Src/subst.c
+++ b/Src/subst.c
@@ -316,9 +316,14 @@ multsub(char **s, int split, char ***a, int *isarr, char *sep)
local_list1(foo);
if (split) {
- for ( ; *x; x += l+1) {
+ /*
+ * This doesn't handle multibyte characters, but we're
+ * looking for whitespace separators which must be ASCII.
+ */
+ for ( ; *x; x += l) {
char c = (l = *x == Meta) ? x[1] ^ 32 : *x;
- if (!iwsep(c))
+ l++;
+ if (!iwsep(STOUC(c)))
break;
}
}
@@ -328,20 +333,35 @@ multsub(char **s, int split, char ***a, int *isarr, char *sep)
if (split) {
LinkNode n = firstnode(&foo);
int inq = 0, inp = 0;
- for ( ; *x; x += l+1) {
- char c = (l = *x == Meta) ? x[1] ^ 32 : *x;
- if (!inq && !inp && isep(c)) {
- *x = '\0';
- for (x += l+1; *x; x += l+1) {
- c = (l = *x == Meta) ? x[1] ^ 32 : *x;
- if (!isep(c))
+ MB_METACHARINIT();
+ for ( ; *x; x += l) {
+ int rawc = -1;
+ convchar_t c;
+ if (itok(STOUC(*x))) {
+ /* token, can't be separator, must be single byte */
+ rawc = *x;
+ l = 1;
+ } else {
+ l = MB_METACHARLENCONV(x, &c);
+ if (!inq && !inp && MB_ZISTYPE(c, ISEP)) {
+ *x = '\0';
+ for (x += l; *x; x += l) {
+ if (itok(STOUC(*x))) {
+ /* as above */
+ rawc = *x;
+ l = 1;
+ break;
+ }
+ l = MB_METACHARLENCONV(x, &c);
+ if (!MB_ZISTYPE(c, ISEP))
+ break;
+ }
+ if (!*x)
break;
+ insertlinknode(&foo, n, (void *)x), incnode(n);
}
- if (!*x)
- break;
- insertlinknode(&foo, n, (void *)x), incnode(n);
}
- switch (c) {
+ switch (rawc) {
case Dnull: /* " */
case Snull: /* ' */
case Tick: /* ` (note: no Qtick!) */
@@ -357,8 +377,8 @@ multsub(char **s, int split, char ***a, int *isarr, char *sep)
case Bnull: /* \ */
case Bnullkeep:
/* The parser verified the following char's existence. */
- x += l+1;
- l = *x == Meta;
+ x += l;
+ l = MB_METACHARLEN(x);
break;
}
}
@@ -685,12 +705,14 @@ invinstrpcmp(const void *a, const void *b)
static char *
dopadding(char *str, int prenum, int postnum, char *preone, char *postone, char *premul, char *postmul)
{
- char def[3], *ret, *t, *r;
+ char *def, *ret, *t, *r;
int ls, ls2, lpreone, lpostone, lpremul, lpostmul, lr, f, m, c, cc;
- def[0] = *ifs ? *ifs : ' ';
- def[1] = *ifs == Meta ? ifs[1] ^ 32 : '\0';
- def[2] = '\0';
+ MB_METACHARINIT();
+ if (*ifs)
+ def = dupstrpfx(ifs, MB_METACHARLEN(ifs));
+ else
+ def = "";
if (preone && !*preone)
preone = def;
if (postone && !*postone)
diff --git a/Src/utils.c b/Src/utils.c
index 0d6cd8866..6ea254a4d 100644
--- a/Src/utils.c
+++ b/Src/utils.c
@@ -35,16 +35,65 @@
/**/
char *scriptname;
-/**/
#ifdef MULTIBYTE_SUPPORT
+struct widechar_array {
+ wchar_t *chars;
+ size_t len;
+};
+typedef struct widechar_array *Widechar_array;
+
/*
* The wordchars variable turned into a wide character array.
* This is much more convenient for testing.
*/
+struct widechar_array wordchars_wide;
-/**/
-mod_export wchar_t *wordchars_wide;
-/**/
+/*
+ * The same for the separators (IFS) array.
+ */
+struct widechar_array ifs_wide;
+
+/* Function to set one of the above from the multibyte array */
+
+static void
+set_widearray(char *mb_array, Widechar_array wca)
+{
+ if (wca->chars) {
+ free(wca->chars);
+ wca->chars = NULL;
+ }
+ wca->len = 0;
+
+ if (!isset(MULTIBYTE))
+ return;
+
+ if (mb_array) {
+ VARARR(wchar_t, tmpwcs, strlen(mb_array));
+ wchar_t *wcptr = tmpwcs;
+ wint_t wci;
+
+ mb_metacharinit();
+ while (*mb_array) {
+ int mblen = mb_metacharlenconv(mb_array, &wci);
+
+ if (!mblen)
+ break;
+ /* No good unless all characters are convertible */
+ if (*wcptr == WEOF)
+ return;
+ *wcptr++ = (wchar_t)wci;
+#ifdef DEBUG
+ if (wcptr[-1] < 0)
+ fprintf(stderr, "BUG: Bad cast to wchar_t\n");
+#endif
+ mb_array += mblen;
+ }
+
+ wca->len = wcptr - tmpwcs;
+ wca->chars = (wchar_t *)zalloc(wca->len * sizeof(wchar_t));
+ wmemcpy(wca->chars, tmpwcs, wca->len);
+ }
+}
#endif
@@ -1853,9 +1902,34 @@ getquery(char *valid_chars, int purge)
if (c != '\n')
while ((d = read1char()) >= 0 && d != '\n');
} else {
- settyinfo(&shttyinfo);
- if (c != '\n' && !valid_chars)
+ if (c != '\n' && !valid_chars) {
+#ifdef MULTIBYTE_SUPPORT
+ if (isset(MULTIBYTE) && c >= 0) {
+ /*
+ * No waiting for a valid character, and no draining;
+ * we should ensure we haven't stopped in the middle
+ * of a multibyte character.
+ */
+ mbstate_t mbs;
+ char cc = (char)c;
+ memset(&mbs, 0, sizeof(mbs));
+ for (;;) {
+ size_t ret = mbrlen(&cc, 1, &mbs);
+
+ if (ret != MB_INCOMPLETE)
+ break;
+ c = read1char();
+ if (c < 0)
+ break;
+ cc = (char)c;
+ }
+ }
+#endif
+ settyinfo(&shttyinfo);
write(SHTTY, "\n", 1);
+ }
+ else
+ settyinfo(&shttyinfo);
}
return c;
}
@@ -2253,6 +2327,10 @@ skipwsep(char **s)
char *t = *s;
int i = 0;
+ /*
+ * Don't need to handle mutlibyte characters, they can't
+ * be IWSEP. Do need to check for metafication.
+ */
while (*t && iwsep(*t == Meta ? t[1] ^ 32 : *t)) {
if (*t == Meta)
t++;
@@ -2293,19 +2371,23 @@ spacesplit(char *s, int allownull, int heap, int quote)
t = s;
skipwsep(&s);
- if (*s && isep(*s == Meta ? s[1] ^ 32 : *s))
+ MB_METACHARINIT();
+ if (*s && itype_end(s, ISEP, 1) != s)
*ptr++ = dup(allownull ? "" : nulstring);
else if (!allownull && t != s)
*ptr++ = dup("");
while (*s) {
- if (isep(*s == Meta ? s[1] ^ 32 : *s) || (quote && *s == '\\')) {
- if (*s == Meta)
- s++;
+ char *iend = itype_end(s, ISEP, 1);
+ if (iend != s) {
+ s = iend;
+ skipwsep(&s);
+ }
+ else if (quote && *s == '\\') {
s++;
skipwsep(&s);
}
t = s;
- findsep(&s, NULL, quote);
+ (void)findsep(&s, NULL, quote);
if (s > t || allownull) {
*ptr = (heap ? (char *) hcalloc((s - t) + 1) :
(char *) zshcalloc((s - t) + 1));
@@ -2321,68 +2403,87 @@ spacesplit(char *s, int allownull, int heap, int quote)
return ret;
}
+/*
+ * Find a separator. Return 0 if already at separator, 1 if separator
+ * found later, else -1. (Historical note: used to return length into
+ * string but this is all that is necessary and is less ambiguous with
+ * multibyte characters around.)
+ *
+ * *s is the string we are looking along, which will be updated
+ * to the point we have got to.
+ *
+ * sep is a possibly multicharacter separator to look for. If NULL,
+ * use normal separator characters. If *sep is NULL, split on individual
+ * characters.
+ *
+ * quote is a flag that '\<sep>' should not be treated as a separator.
+ * in this case we need to be able to strip the backslash directly
+ * in the string, so the calling function must have sent us something
+ * modifiable. currently this only works for sep == NULL. also in
+ * in this case only, we need to turn \\ into \.
+ */
+
/**/
static int
findsep(char **s, char *sep, int quote)
{
/*
- * *s is the string we are looking along, which will be updated
- * to the point we have got to.
- *
- * sep is a possibly multicharacter separator to look for. If NULL,
- * use normal separator characters.
- *
- * quote is a flag that '\<sep>' should not be treated as a separator.
- * in this case we need to be able to strip the backslash directly
- * in the string, so the calling function must have sent us something
- * modifiable. currently this only works for sep == NULL. also in
- * in this case only, we need to turn \\ into \.
*/
- int i;
+ int i, ilen;
char *t, *tt;
+ convchar_t c;
+ MB_METACHARINIT();
if (!sep) {
- for (t = *s; *t; t++) {
- if (quote && *t == '\\' &&
- (isep(t[1] == Meta ? (t[2] ^ 32) : t[1]) || t[1] == '\\')) {
- chuck(t);
- if (*t == Meta)
- t++;
- continue;
- }
- if (*t == Meta) {
- if (isep(t[1] ^ 32))
+ for (t = *s; *t; t += ilen) {
+ if (quote && *t == '\\') {
+ if (t[1] == '\\') {
+ chuck(t);
+ ilen = 1;
+ continue;
+ } else {
+ ilen = MB_METACHARLENCONV(t+1, &c);
+ if (MB_ZISTYPE(c, ISEP)) {
+ chuck(t);
+ /* then advance over new character, length ilen */
+ } else {
+ /* treat *t (backslash) as normal byte */
+ if (isep(*t))
+ break;
+ ilen = 1;
+ }
+ }
+ } else {
+ ilen = MB_METACHARLENCONV(t, &c);
+ if (MB_ZISTYPE(c, ISEP))
break;
- t++;
- } else if (isep(*t))
- break;
+ }
}
- i = t - *s;
+ i = (t > *s);
*s = t;
return i;
}
if (!sep[0]) {
+ /*
+ * NULL separator just means advance past first character,
+ * if any.
+ */
if (**s) {
- if (**s == Meta)
- *s += 2;
- else
- ++*s;
+ *s += MB_METACHARLEN(*s);
return 1;
}
return -1;
}
for (i = 0; **s; i++) {
+ /*
+ * The following works for multibyte characters by virtue of
+ * the fact that sep may be a string (and we don't care how
+ * it divides up, we need to match all of it).
+ */
for (t = sep, tt = *s; *t && *tt && *t == *tt; t++, tt++);
if (!*t)
- return i;
- if (*(*s)++ == Meta) {
-#ifdef DEBUG
- if (! *(*s)++)
- fprintf(stderr, "BUG: unexpected end of string in findsep()\n");
-#else
- (*s)++;
-#endif
- }
+ return (i > 0);
+ *s += MB_METACHARLEN(*s);
}
return -1;
}
@@ -2405,16 +2506,15 @@ findword(char **s, char *sep)
}
return r;
}
- for (t = *s; *t; t++) {
- if (*t == Meta) {
- if (! isep(t[1] ^ 32))
- break;
- t++;
- } else if (! isep(*t))
+ MB_METACHARINIT();
+ for (t = *s; *t; t += sl) {
+ convchar_t c;
+ sl = MB_METACHARLENCONV(t, &c);
+ if (!MB_ZISTYPE(c, ISEP))
break;
}
*s = t;
- findsep(s, sep, 0);
+ (void)findsep(s, sep, 0);
return t;
}
@@ -2436,18 +2536,17 @@ wordcount(char *s, char *sep, int mul)
r = 0;
if (mul <= 0)
skipwsep(&s);
- if ((*s && isep(*s == Meta ? s[1] ^ 32 : *s)) ||
+ if ((*s && itype_end(s, ISEP, 1) != s) ||
(mul < 0 && t != s))
r++;
for (; *s; r++) {
- if (isep(*s == Meta ? s[1] ^ 32 : *s)) {
- if (*s == Meta)
- s++;
- s++;
+ char *ie = itype_end(s, ISEP, 1);
+ if (ie != s) {
+ s = ie;
if (mul <= 0)
skipwsep(&s);
}
- findsep(&s, NULL, 0);
+ (void)findsep(&s, NULL, 0);
t = s;
if (mul <= 0)
skipwsep(&s);
@@ -2464,19 +2563,20 @@ sepjoin(char **s, char *sep, int heap)
{
char *r, *p, **t;
int l, sl;
- char sepbuf[3];
+ char sepbuf[2];
if (!*s)
return heap ? "" : ztrdup("");
if (!sep) {
- p = sep = sepbuf;
- if (ifs) {
- *p++ = *ifs;
- *p++ = *ifs == Meta ? ifs[1] ^ 32 : '\0';
+ /* optimise common case that ifs[0] is space */
+ if (ifs && *ifs != ' ') {
+ MB_METACHARINIT();
+ sep = dupstrpfx(ifs, MB_METACHARLEN(ifs));
} else {
+ p = sep = sepbuf;
*p++ = ' ';
+ *p = '\0';
}
- *p = '\0';
}
sl = strlen(sep);
for (t = s, l = 1 - sl; *t; l += strlen(*t) + sl, t++);
@@ -2508,7 +2608,7 @@ sepsplit(char *s, char *sep, int allownull, int heap)
for (t = s; n--;) {
tt = t;
- findsep(&t, sep, 0);
+ (void)findsep(&t, sep, 0);
*p = (heap ? (char *) hcalloc(t - tt + 1) :
(char *) zshcalloc(t - tt + 1));
strncpy(*p, tt, t - tt);
@@ -2637,39 +2737,21 @@ inittyptab(void)
for (t0 = (int)STOUC(Snull); t0 <= (int)STOUC(Nularg); t0++)
typtab[t0] |= ITOK | IMETA | INULL;
for (s = ifs ? ifs : DEFAULT_IFS; *s; s++) {
- if (inblank(*s)) {
- if (s[1] == *s)
+ int c = STOUC(*s == Meta ? *++s ^ 32 : *s);
+#ifdef MULTIBYTE_SUPPORT
+ if (!isascii(c)) {
+ /* see comment for wordchars below */
+ continue;
+ }
+#endif
+ if (inblank(c)) {
+ if (s[1] == c)
s++;
else
- typtab[STOUC(*s)] |= IWSEP;
- }
- typtab[STOUC(*s == Meta ? *++s ^ 32 : *s)] |= ISEP;
- }
-#ifdef MULTIBYTE_SUPPORT
- if (wordchars) {
- char *wordchars_unmeta;
- const char *wordchars_ptr;
- mbstate_t mbs;
- size_t nchars;
- int unmetalen;
-
- wordchars_unmeta = dupstring(wordchars);
- wordchars_ptr = unmetafy(wordchars_unmeta, &unmetalen);
-
- memset(&mbs, 0, sizeof(mbs));
- wordchars_wide = (wchar_t *)
- zrealloc(wordchars_wide, (unmetalen+1)*sizeof(wchar_t));
- nchars = mbsrtowcs(wordchars_wide, &wordchars_ptr, unmetalen, &mbs);
- if (nchars == MB_INVALID || nchars == MB_INCOMPLETE) {
- /* Conversion state is undefined: better just set to null */
- nchars = 0;
+ typtab[c] |= IWSEP;
}
- wordchars_wide[nchars] = L'\0';
- } else {
- wordchars_wide = zrealloc(wordchars_wide, sizeof(wchar_t));
- *wordchars_wide = L'\0';
+ typtab[c] |= ISEP;
}
-#endif
for (s = wordchars ? wordchars : DEFAULT_WORDCHARS; *s; s++) {
int c = STOUC(*s == Meta ? *++s ^ 32 : *s);
#ifdef MULTIBYTE_SUPPORT
@@ -2686,6 +2768,10 @@ inittyptab(void)
#endif
typtab[c] |= IWORD;
}
+#ifdef MULTIBYTE_SUPPORT
+ set_widearray(wordchars, &wordchars_wide);
+ set_widearray(ifs, &ifs_wide);
+#endif
for (s = SPECCHARS; *s; s++)
typtab[STOUC(*s)] |= ISPECIAL;
if (specialcomma)
@@ -2718,62 +2804,60 @@ wcsiblank(wint_t wc)
}
/*
- * iword() macro extended to support wide characters.
+ * zistype macro extended to support wide characters.
+ * Works for IIDENT, IWORD, IALNUM, ISEP.
+ * We don't need this for IWSEP because that only applies to
+ * a fixed set of ASCII characters.
+ * Note here that use of multibyte mode is not tested:
+ * that's because for ZLE this is unconditional,
+ * not dependent on the option. The caller must decide.
*/
/**/
mod_export int
-wcsiword(wchar_t c)
+wcsitype(wchar_t c, int itype)
{
int len;
VARARR(char, outstr, MB_CUR_MAX);
+
+ if (!isset(MULTIBYTE))
+ return zistype(c, itype);
+
/*
* Strategy: the shell requires that the multibyte representation
* be an extension of ASCII. So see if converting the character
- * produces an ASCII character. If it does, use iword on that.
- * If it doesn't, use iswalnum on the original character. This
- * is pretty good most of the time.
+ * produces an ASCII character. If it does, use zistype on that.
+ * If it doesn't, use iswalnum on the original character.
+ * If that fails, resort to the appropriate wide character array.
*/
len = wctomb(outstr, c);
if (len == 0) {
/* NULL is special */
- return iword(0);
+ return zistype(0, itype);
} else if (len == 1 && iascii(*outstr)) {
- return iword(*outstr);
+ return zistype(*outstr, itype);
} else {
- return iswalnum(c) || wcschr(wordchars_wide, c);
- }
-}
-
-/*
- * iident() macro extended to support wide characters.
- *
- * The macro is intended to test if a character is allowed in an
- * internal zsh identifier. We allow all alphanumerics outside
- * the ASCII range unless POSIXIDENTIFIERS is set.
- *
- * Otherwise similar to wcsiword.
- */
+ switch (itype) {
+ case IIDENT:
+ if (!isset(POSIXIDENTIFIERS))
+ return 0;
+ return iswalnum(c);
-/**/
-mod_export int
-wcsiident(wchar_t c)
-{
- int len;
- VARARR(char, outstr, MB_CUR_MAX);
+ case IWORD:
+ if (iswalnum(c))
+ return 1;
+ return !!wmemchr(wordchars_wide.chars, c, wordchars_wide.len);
- len = wctomb(outstr, c);
+ case ISEP:
+ return !!wmemchr(ifs_wide.chars, c, ifs_wide.len);
- if (len == 0) {
- /* NULL is special */
- return 0;
- } else if (len == 1 && iascii(*outstr)) {
- return iident(*outstr);
- } else {
- return !isset(POSIXIDENTIFIERS) && iswalnum(c);
+ default:
+ return iswalnum(c);
+ }
}
}
+
/**/
#endif
@@ -2789,7 +2873,7 @@ wcsiident(wchar_t c)
* If "once" is set, just test the first character, i.e. (outptr !=
* inptr) tests whether the first character is valid in an identifier.
*
- * Currently this is only called with itype IIDENT or IUSER.
+ * Currently this is only called with itype IIDENT, IUSER or ISEP.
*/
/**/
@@ -2819,12 +2903,25 @@ itype_end(const char *ptr, int itype, int once)
break;
} else {
/*
- * Valid non-ASCII character. Allow all alphanumerics;
- * if testing for words, allow all wordchars.
+ * Valid non-ASCII character.
*/
- if (!(iswalnum(wc) ||
- (itype == IWORD && wcschr(wordchars_wide, wc))))
+ switch (itype) {
+ case IWORD:
+ if (!iswalnum(wc) &&
+ !wmemchr(wordchars_wide.chars, wc,
+ wordchars_wide.len))
+ return (char *)ptr;
break;
+
+ case ISEP:
+ if (!wmemchr(ifs_wide.chars, wc, ifs_wide.len))
+ return (char *)ptr;
+ break;
+
+ default:
+ if (!iswalnum(wc))
+ return (char *)ptr;
+ }
}
ptr += len;
@@ -3791,16 +3888,22 @@ mb_metacharlenconv(const char *s, wint_t *wcp)
wchar_t wc;
if (!isset(MULTIBYTE)) {
+ /* treat as single byte, possibly metafied */
if (wcp)
- *wcp = WEOF;
+ *wcp = (wint_t)(*s == Meta ? s[1] ^ 32 : *s);
return 1 + (*s == Meta);
}
ret = MB_INVALID;
for (ptr = s; *ptr; ) {
- if (*ptr == Meta)
+ if (*ptr == Meta) {
inchar = *++ptr ^ 32;
- else
+#ifdef DEBUG
+ if (!*ptr)
+ fprintf(stderr,
+ "BUG: unexpected end of string in mb_metacharlen()\n");
+#endif
+ } else
inchar = *ptr;
ptr++;
ret = mbrtowc(&wc, &inchar, 1, &mb_shiftstate);
@@ -3874,6 +3977,23 @@ mb_metastrlen(char *ptr)
}
/**/
+#else
+
+/* Simple replacement for mb_metacharlenconv */
+int
+metacharlenconv(char *x, int *c)
+{
+ if (*x == Meta) {
+ if (c)
+ *c == STOUC(x[1]);
+ return 2;
+ }
+ if (c)
+ *c = STOUC(*x);
+ return 1;
+}
+
+/**/
#endif /* MULTIBYTE_SUPPORT */
/* check for special characters in the string */
diff --git a/Src/zsh.h b/Src/zsh.h
index b5f675db5..25399b9d9 100644
--- a/Src/zsh.h
+++ b/Src/zsh.h
@@ -1925,6 +1925,8 @@ typedef char *(*ZleGetLineFn) _((int *, int *));
#ifdef MULTIBYTE_SUPPORT
#define nicezputs(str, outs) (void)mb_niceformat((str), (outs), NULL, 0)
#define MB_METACHARINIT() mb_metacharinit()
+typedef wint_t convchar_t;
+#define MB_METACHARLENCONV(str, cp) mb_metacharlenconv((str), (cp))
#define MB_METACHARLEN(str) mb_metacharlenconv(str, NULL)
#define MB_METASTRLEN(str) mb_metastrlen(str)
@@ -1948,6 +1950,8 @@ typedef char *(*ZleGetLineFn) _((int *, int *));
#else
#define MB_METACHARINIT()
+typedef int convchar_t;
+#define MB_METACHARLENCONV(str, cp) metacharlenconv((str), (cp))
#define MB_METACHARLEN(str) (*(str) == Meta ? 2 : 1)
#define MB_METASTRLEN(str) ztrlen(str)
diff --git a/Src/ztype.h b/Src/ztype.h
index 7b7973602..7aa56b073 100644
--- a/Src/ztype.h
+++ b/Src/ztype.h
@@ -59,6 +59,12 @@
#define iwsep(X) zistype(X,IWSEP)
#define inull(X) zistype(X,INULL)
+#ifdef MULTIBYTE_SUPPORT
+#define MB_ZISTYPE(X,Y) wcsitype((X),(Y))
+#else
+#define MB_ZISTYPE(X,Y) zistype((X),(Y))
+#endif
+
#define iascii(X) isascii(STOUC(X))
#define ilower(X) islower(STOUC(X))
#define iprint(X) isprint(STOUC(X))
diff --git a/Test/D04parameter.ztst b/Test/D04parameter.ztst
index 73e87d0f8..6e97d7450 100644
--- a/Test/D04parameter.ztst
+++ b/Test/D04parameter.ztst
@@ -725,6 +725,29 @@
>7
>8
+# Tests a long-standing bug with joining on metafied characters in IFS
+ (array=(one two three)
+ IFS=$'\0'
+ foo="$array"
+ for (( i = 1; i <= ${#foo}; i++ )); do
+ char=${foo[i]}
+ print $(( #char ))
+ done)
+0:Joining with NULL character from IFS
+>111
+>110
+>101
+>0
+>116
+>119
+>111
+>0
+>116
+>104
+>114
+>101
+>101
+
unset SHLVL
(( SHLVL++ ))
print $SHLVL
diff --git a/Test/D07multibyte.ztst b/Test/D07multibyte.ztst
index 20c967540..683e8350e 100644
--- a/Test/D07multibyte.ztst
+++ b/Test/D07multibyte.ztst
@@ -174,3 +174,57 @@
1:POSIX_IDENTIFIERS option
>3
?(eval):1: command not found: hähä=3
+
+ foo="Ølaf«Ødd«øpénëd«ån«àpple"
+ print -l ${(s.«.)foo}
+ ioh="Ἐν ἀρχῇ ἦν ὁ λόγος, καὶ ὁ λόγος ἦν πρὸς τὸν θεόν, καὶ θεὸς ἦν ὁ λόγος."
+ print -l ${=ioh}
+ print ${(w)#ioh}
+0:Splitting with multibyte characters
+>Ølaf
+>Ødd
+>øpénëd
+>ån
+>àpple
+>Ἐν
+>ἀρχῇ
+>ἦν
+>ὁ
+>λόγος,
+>καὶ
+>ὁ
+>λόγος
+>ἦν
+>πρὸς
+>τὸν
+>θεόν,
+>καὶ
+>θεὸς
+>ἦν
+>ὁ
+>λόγος.
+>17
+
+ read -d £ one
+ read -d £ two
+ print $one
+ print $two
+0:read with multibyte delimiter
+<first£second£
+>first
+>second
+
+ (IFS=«
+ read -d » -A array
+ print -l $array)
+0:read -A with multibyte IFS
+<dominus«illuminatio«mea»ignored
+>dominus
+>illuminatio
+>mea
+
+ read -k2 -u0 twochars
+ print $twochars
+0:read multibyte characters
+<«»ignored
+>«»