summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ChangeLog8
-rw-r--r--Src/pattern.c28
-rw-r--r--Test/D07multibyte.ztst17
3 files changed, 47 insertions, 6 deletions
diff --git a/ChangeLog b/ChangeLog
index f9822b9c8..860e09df4 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,11 @@
+2015-09-04 Peter Stephenson <p.stephenson@samsung.com>
+
+ * 36415: Src/pattern.c, Test/D07multibyte.ztst: remap bytes from
+ invalid multibyte characters to 0xDC00 + index which is invalid
+ in Unicode. Strictly this only works if whcar_t is
+ ISO-10646-compliant, however it ought to be at least as good as
+ the current fudge in any case.
+
2015-09-03 Peter Stephenson <p.stephenson@samsung.com>
* 36416: Src/Zle/zle_refresh.c, Src/Zle/zle_utils.c: If
diff --git a/Src/pattern.c b/Src/pattern.c
index 7d38988a0..7457cbd23 100644
--- a/Src/pattern.c
+++ b/Src/pattern.c
@@ -224,6 +224,22 @@ typedef zlong zrange_t;
typedef unsigned long zrange_t;
#endif
+#ifdef MULTIBYTE_SUPPORT
+/*
+ * Handle a byte that's not part of a valid character.
+ *
+ * This range in Unicode is recommended for purposes of this
+ * kind as it corresponds to invalid characters.
+ *
+ * Note that this strictly only works if wchar_t represents
+ * Unicode code points, which isn't necessarily true; however,
+ * converting an invalid character into an unknown format is
+ * a bit tricky...
+ */
+#define WCHAR_INVALID(ch) \
+ ((wchar_t) (0xDC00 + STOUC(ch)))
+#endif /* MULTIBYTE_SUPPORT */
+
/*
* Array of characters corresponding to zpc_chars enum, which it must match.
*/
@@ -353,10 +369,10 @@ metacharinc(char **x)
return wc;
}
- /* Error. Treat as single byte. */
+ /* Error. */
/* Reset the shift state for next time. */
memset(&shiftstate, 0, sizeof(shiftstate));
- return (wchar_t) STOUC(*(*x)++);
+ return WCHAR_INVALID(*(*x)++);
}
#else
@@ -1867,10 +1883,10 @@ charref(char *x, char *y)
ret = mbrtowc(&wc, x, y-x, &shiftstate);
if (ret == MB_INVALID || ret == MB_INCOMPLETE) {
- /* Error. Treat as single byte. */
+ /* Error. */
/* Reset the shift state for next time. */
memset(&shiftstate, 0, sizeof(shiftstate));
- return (wchar_t) STOUC(*x);
+ return WCHAR_INVALID(*x);
}
return wc;
@@ -1913,7 +1929,7 @@ charrefinc(char **x, char *y, int *z)
size_t ret;
if (!(patglobflags & GF_MULTIBYTE) || !(STOUC(**x) & 0x80))
- return (wchar_t) STOUC(*(*x)++);
+ return WCHAR_INVALID(*(*x)++);
ret = mbrtowc(&wc, *x, y-*x, &shiftstate);
@@ -1922,7 +1938,7 @@ charrefinc(char **x, char *y, int *z)
*z = 1;
/* Reset the shift state for next time. */
memset(&shiftstate, 0, sizeof(shiftstate));
- return (wchar_t) STOUC(*(*x)++);
+ return WCHAR_INVALID(*(*x)++);
}
/* Nulls here are normal characters */
diff --git a/Test/D07multibyte.ztst b/Test/D07multibyte.ztst
index 0e3e98d38..3fadd8066 100644
--- a/Test/D07multibyte.ztst
+++ b/Test/D07multibyte.ztst
@@ -508,3 +508,20 @@
cd ..
}
0:cd with special characters
+
+ test_array=(
+ '[[ \xcc = \xcc ]]'
+ '[[ \xcc != \xcd ]]'
+ '[[ \xcc != \ucc ]]'
+ '[[ \ucc = \ucc ]]'
+ '[[ \ucc = [\ucc] ]]'
+ '[[ \xcc != [\ucc] ]]'
+ # Not clear how useful the following is...
+ '[[ \xcc = [\xcc] ]]'
+ )
+ for test in $test_array; do
+ if ! eval ${(g::)test} ; then
+ print -rl "Test $test failed" >&2
+ fi
+ done
+0:Invalid characters in pattern matching