summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ChangeLog5
-rw-r--r--Src/Zle/zle_utils.c16
-rw-r--r--Src/builtin.c10
-rw-r--r--Test/D07multibyte.ztst21
4 files changed, 43 insertions, 9 deletions
diff --git a/ChangeLog b/ChangeLog
index a74daa4d6..11d8293fd 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,10 @@
2007-12-17 Peter Stephenson <pws@csr.com>
+ * 24275: Src/builtin.c, Src/Zle/zle_utils.c,
+ Test/D07multibyte.ztst: Solaris returns the full character
+ length from mbrlen() etc. even if the call started in the
+ middle; bad characters are silently converted to a question mark.
+
* unposted: Config/version.mk: 4.3.4-dev-5.
* unposted: Src/lex.c: minor typo
diff --git a/Src/Zle/zle_utils.c b/Src/Zle/zle_utils.c
index 6583ef503..a146b67c0 100644
--- a/Src/Zle/zle_utils.c
+++ b/Src/Zle/zle_utils.c
@@ -294,6 +294,16 @@ stringaszleline(char *instr, int incs, int *outll, int *outsz, int *outcs)
* (certainly true for Unicode and unlikely to be false
* in any non-pathological multibyte representation). */
cnt = 1;
+ } else if (cnt > ll) {
+ /*
+ * Some multibyte implementations return the
+ * full length of a previous incomplete character
+ * instead of the remaining length.
+ * This is paranoia: it only applies if we start
+ * midway through a multibyte character, which
+ * presumably can't happen.
+ */
+ cnt = ll;
}
if (outcs) {
@@ -843,6 +853,12 @@ showmsg(char const *msg)
cnt = 1;
/* FALL THROUGH */
default:
+ /*
+ * Paranoia: only needed if we start in the middle
+ * of a multibyte string and only in some implementations.
+ */
+ if (cnt > ulen)
+ cnt = ulen;
n = wcs_nicechar(c, &width, NULL);
break;
}
diff --git a/Src/builtin.c b/Src/builtin.c
index 7bd4c6d83..8ded1c131 100644
--- a/Src/builtin.c
+++ b/Src/builtin.c
@@ -4927,7 +4927,7 @@ bin_read(char *name, char **args, Options ops, UNUSED(int func))
break;
}
*bptr = (char) val;
-#ifdef MULTIBYTE_SUPPORT
+#ifdef MULTIBYTE_SUPPORT
if (isset(MULTIBYTE)) {
ret = mbrlen(bptr++, 1, &mbs);
if (ret == MB_INVALID)
@@ -4954,8 +4954,8 @@ bin_read(char *name, char **args, Options ops, UNUSED(int func))
eof = 1;
break;
}
-
-#ifdef MULTIBYTE_SUPPORT
+
+#ifdef MULTIBYTE_SUPPORT
if (isset(MULTIBYTE)) {
while (val > 0) {
ret = mbrlen(bptr, val, &mbs);
@@ -4970,6 +4970,10 @@ bin_read(char *name, char **args, Options ops, UNUSED(int func))
}
else if (ret == 0) /* handle null as normal char */
ret = 1;
+ else if (ret > val) {
+ /* Some mbrlen()s return the full char len */
+ ret = val;
+ }
nchars--;
val -= ret;
bptr += ret;
diff --git a/Test/D07multibyte.ztst b/Test/D07multibyte.ztst
index 993e85b9b..2ebea2e10 100644
--- a/Test/D07multibyte.ztst
+++ b/Test/D07multibyte.ztst
@@ -388,9 +388,18 @@
# This also isn't strictly multibyte and is here to reduce the
# likelihood of a "can't do character set conversion" error.
testfn() { (LC_ALL=C; print $'\u00e9') }
- repeat 4 testfn
-1:error handling in Unicode quoting
-?testfn: character not in range
-?testfn: character not in range
-?testfn: character not in range
-?testfn: character not in range
+ repeat 4 testfn 2>&1 | while read line; do
+ if [[ $line = *"character not in range"* ]]; then
+ print OK
+ elif [[ $line = "?" ]]; then
+ print OK
+ else
+ print Failed: no error message and no question mark
+ fi
+ done
+ true
+0:error handling in Unicode quoting
+>OK
+>OK
+>OK
+>OK