summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorOliver Kiddle <opk@zsh.org>2022-12-17 00:09:37 +0100
committerOliver Kiddle <opk@zsh.org>2022-12-17 00:37:19 +0100
commit35a2f155c3b92e67957325e1f49e409546378e3e (patch)
tree7c4593cf459ae5067627043c7fba15866c48c8c6
parent2701ab161df1f259b8292a650a4ea5cebd668d81 (diff)
downloadzsh-35a2f155c3b92e67957325e1f49e409546378e3e.tar.gz
zsh-35a2f155c3b92e67957325e1f49e409546378e3e.zip
51214: handle read -d and a delimiter that can't be decoded into a character
Terminate input at the raw byte value of the delimiter. Also document and test the use of an empty string as a way to specify NUL as the delimiter.
-rw-r--r--ChangeLog4
-rw-r--r--Doc/Zsh/builtins.yo3
-rw-r--r--Src/builtin.c7
-rw-r--r--Test/B04read.ztst4
-rw-r--r--Test/D07multibyte.ztst14
5 files changed, 29 insertions, 3 deletions
diff --git a/ChangeLog b/ChangeLog
index 5b0af2135..130bec319 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,9 @@
2022-12-16 Oliver Kiddle <opk@zsh.org>
+ * 51214: Doc/Zsh/builtins.yo, Src/builtin.c, Test/B04read.ztst,
+ Test/D07multibyte.ztst: with read -d and a delimiter that can't be
+ decoded into a character terminate input at the raw byte value
+
* Jun T.: 51207: Src/builtin.c, Test/B04read.ztst:
fix for read -d when the delimiter is a byte >= 0x80
diff --git a/Doc/Zsh/builtins.yo b/Doc/Zsh/builtins.yo
index b6217f66d..56428a714 100644
--- a/Doc/Zsh/builtins.yo
+++ b/Doc/Zsh/builtins.yo
@@ -1589,7 +1589,8 @@ Input is read from the coprocess.
)
item(tt(-d) var(delim))(
Input is terminated by the first character of var(delim) instead of
-by newline.
+by newline. For compatibility with other shells, if var(delim) is an
+empty string, input is terminated at the first NUL.
)
item(tt(-t) [ var(num) ])(
Test if input is available before attempting to read. If var(num)
diff --git a/Src/builtin.c b/Src/builtin.c
index 951970138..70a950666 100644
--- a/Src/builtin.c
+++ b/Src/builtin.c
@@ -6282,6 +6282,7 @@ bin_read(char *name, char **args, Options ops, UNUSED(int func))
long izle_timeout = 0;
#ifdef MULTIBYTE_SUPPORT
wchar_t delim = L'\n', wc;
+ int rawbyte = 0;
mbstate_t mbs;
char *laststart;
size_t ret;
@@ -6412,9 +6413,11 @@ bin_read(char *name, char **args, Options ops, UNUSED(int func))
wi = WEOF;
if (wi != WEOF)
delim = (wchar_t)wi;
- else
+ else {
delim = (wchar_t) (unsigned char) ((delimstr[0] == Meta) ?
delimstr[1] ^ 32 : delimstr[0]);
+ rawbyte = 1;
+ }
#else
delim = (unsigned char) ((delimstr[0] == Meta) ?
delimstr[1] ^ 32 : delimstr[0]);
@@ -6842,7 +6845,7 @@ bin_read(char *name, char **args, Options ops, UNUSED(int func))
break;
}
*bptr = (char)c;
- if (isset(MULTIBYTE)) {
+ if (isset(MULTIBYTE) && !rawbyte) {
ret = mbrtowc(&wc, bptr, 1, &mbs);
if (!ret) /* NULL */
ret = 1;
diff --git a/Test/B04read.ztst b/Test/B04read.ztst
index 96adf51c7..14bdaeef5 100644
--- a/Test/B04read.ztst
+++ b/Test/B04read.ztst
@@ -82,6 +82,10 @@
>Testing the
>null hypothesis
+ read -ed '' <<<$'one\0two'
+0:empty delimiter terminates at nulls
+>one
+
print -n $'first line\x80second line\x80' |
while read -d $'\x80' line; do print $line; done
0:read with a delimiter >= 0x80
diff --git a/Test/D07multibyte.ztst b/Test/D07multibyte.ztst
index 6909346cb..413c4fe73 100644
--- a/Test/D07multibyte.ztst
+++ b/Test/D07multibyte.ztst
@@ -212,6 +212,20 @@
>first
>second
+ read -ed £
+0:read with multibyte delimiter where bytes of delimiter also occur in input
+<one¤twoãthree£four
+>one¤twoãthree
+
+ read -ed $'\xa0' <<<$'first\xa0second'
+0:read delimited by a byte that isn't a valid multibyte character
+>first
+
+ read -ed $'\xc2'
+0:read delimited by a single byte terminates if the byte is part of a multibyte character
+<one£two
+>one
+
(IFS=«
read -d » -A array
print -l $array)