summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ChangeLog9
-rw-r--r--Doc/Zsh/zle.yo14
-rw-r--r--Src/Zle/zle.h14
-rw-r--r--Src/Zle/zle_refresh.c12
-rw-r--r--Src/Zle/zle_utils.c47
5 files changed, 82 insertions, 14 deletions
diff --git a/ChangeLog b/ChangeLog
index 222789bc2..a3f194dee 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,10 @@
+2010-03-22 Peter Stephenson <p.w.stephenson@ntlworld.com>
+
+ * 27812: Doc/Zsh/zle.yo, Src/Zle/zle.h, Src/Zle/zle_refresh.c,
+ Src/Zle/zle_utils.c: when wchar_t contains Unicode code points,
+ use private area to put bytes that don't form characters for
+ special display.
+
2010-03-22 Peter Stephenson <pws@csr.com>
* 27822: Src/hist.c, Src/lex.c, Src/zle_params.c,
@@ -12949,5 +12956,5 @@
*****************************************************
* This is used by the shell to define $ZSH_PATCHLEVEL
-* $Revision: 1.4941 $
+* $Revision: 1.4942 $
*****************************************************
diff --git a/Doc/Zsh/zle.yo b/Doc/Zsh/zle.yo
index 91c13a563..0e2fea5bd 100644
--- a/Doc/Zsh/zle.yo
+++ b/Doc/Zsh/zle.yo
@@ -2286,6 +2286,20 @@ angle brackets. The number is the code point of the character in the wide
character set; this may or may not be Unicode, depending on the operating
system.
)
+item(Invalid multibyte characters)(
+If the tt(MULTIBYTE) option is in effect, any sequence of one or more
+bytes that does not form a valid character in the current character
+set is treated as a series of bytes each shown as a special character.
+This case can be distinguished from other unprintable characters
+as the bytes are represented as two hexadecimal digits between angle
+brackets, as distinct from the four or eight digits that are used for
+unprintable characters that are nonetheless valid in the current
+character set.
+
+Not all systems support this: for it to work, the system's representation of
+wide characters must be code values from the Universal Character Set,
+as defined by IS0 10646 (also known as Unicode).
+)
enditem()
If tt(zle_highlight) is not set or no value applies to a particular
diff --git a/Src/Zle/zle.h b/Src/Zle/zle.h
index 577a4442f..32f3e59f6 100644
--- a/Src/Zle/zle.h
+++ b/Src/Zle/zle.h
@@ -419,6 +419,20 @@ typedef struct {
typedef REFRESH_ELEMENT *REFRESH_STRING;
+#if defined(MULTIBYTE_SUPPORT) && defined(__STDC_ISO_10646__)
+#define ZSH_INVALID_WCHAR_BASE (0xe000U)
+#define ZSH_INVALID_WCHAR_TEST(x) \
+ ((unsigned)(x) >= ZSH_INVALID_WCHAR_BASE && \
+ (unsigned)(x) <= (ZSH_INVALID_WCHAR_BASE + 255u))
+#define ZSH_INVALID_WCHAR_TO_CHAR(x) \
+ ((char)((unsigned)(x) - ZSH_INVALID_WCHAR_BASE))
+#define ZSH_INVALID_WCHAR_TO_INT(x) \
+ ((int)((unsigned)(x) - ZSH_INVALID_WCHAR_BASE))
+#define ZSH_CHAR_TO_INVALID_WCHAR(x) \
+ ((wchar_t)(STOUC(x) + ZSH_INVALID_WCHAR_BASE))
+#endif
+
+
#ifdef DEBUG
#define METACHECK() \
DPUTS(zlemetaline == NULL, "line not metafied")
diff --git a/Src/Zle/zle_refresh.c b/Src/Zle/zle_refresh.c
index 8604317f3..352dcf0d6 100644
--- a/Src/Zle/zle_refresh.c
+++ b/Src/Zle/zle_refresh.c
@@ -1263,7 +1263,11 @@ zrefresh(void)
}
}
#ifdef MULTIBYTE_SUPPORT
- else if (iswprint(*t) && (width = WCWIDTH(*t)) > 0) {
+ else if (
+#ifdef __STDC_ISO_10646__
+ !ZSH_INVALID_WCHAR_TEST(*t) &&
+#endif
+ iswprint(*t) && (width = WCWIDTH(*t)) > 0) {
int ichars;
if (width > rpms.sen - rpms.s) {
int started = 0;
@@ -1367,6 +1371,12 @@ zrefresh(void)
wchar_t wc;
int started = 0;
+#ifdef __STDC_ISO_10646__
+ if (ZSH_INVALID_WCHAR_TEST(*t)) {
+ int c = ZSH_INVALID_WCHAR_TO_INT(*t);
+ sprintf(dispchars, "<%.02x>", c);
+ } else
+#endif
if ((unsigned)*t > 0xffffU) {
sprintf(dispchars, "<%.08x>", (unsigned)*t);
} else {
diff --git a/Src/Zle/zle_utils.c b/Src/Zle/zle_utils.c
index 2b2da7dcd..cc84eb8bb 100644
--- a/Src/Zle/zle_utils.c
+++ b/Src/Zle/zle_utils.c
@@ -120,11 +120,19 @@ zlecharasstring(ZLE_CHAR_T inchar, char *buf)
size_t ret;
char *ptr;
- ret = wctomb(buf, inchar);
- if (ret <= 0) {
- /* Ick. */
- buf[0] = '?';
- return 1;
+#ifdef __STDC_ISO_10646__
+ if (ZSH_INVALID_WCHAR_TEST(inchar)) {
+ buf[0] = ZSH_INVALID_WCHAR_TO_CHAR(inchar);
+ ret = 1;
+ } else
+#endif
+ {
+ ret = wctomb(buf, inchar);
+ if (ret <= 0) {
+ /* Ick. */
+ buf[0] = '?';
+ return 1;
+ }
}
ptr = buf + ret - 1;
for (;;) {
@@ -196,13 +204,20 @@ zlelineasstring(ZLE_STRING_T instr, int inll, int incs, int *outllp,
for (i=0; i < inll; i++, incs--) {
if (incs == 0)
outcs = mb_len;
- j = wcrtomb(s + mb_len, instr[i], &mbs);
- if (j == -1) {
- /* invalid char; what to do? */
- s[mb_len++] = ZWC('?');
- memset(&mbs, 0, sizeof(mbs));
- } else {
- mb_len += j;
+#ifdef __STDC_ISO_10646__
+ if (ZSH_INVALID_WCHAR_TEST(instr[i])) {
+ s[mb_len++] = ZSH_INVALID_WCHAR_TO_CHAR(instr[i]);
+ } else
+#endif
+ {
+ j = wcrtomb(s + mb_len, instr[i], &mbs);
+ if (j == -1) {
+ /* invalid char */
+ s[mb_len++] = ZWC('?');
+ memset(&mbs, 0, sizeof(mbs));
+ } else {
+ mb_len += j;
+ }
}
}
if (incs == 0)
@@ -332,6 +347,13 @@ stringaszleline(char *instr, int incs, int *outll, int *outsz, int *outcs)
while (ll > 0) {
size_t cnt = mbrtowc(outptr, inptr, ll, &mbs);
+#ifdef __STDC_ISO_10646__
+ if (cnt == MB_INCOMPLETE || cnt == MB_INVALID) {
+ /* Use private encoding for invalid single byte */
+ *outptr = ZSH_CHAR_TO_INVALID_WCHAR(*inptr);
+ cnt = 1;
+ }
+#else
/*
* At this point we don't handle either incomplete (-2) or
* invalid (-1) multibyte sequences. Use the current length
@@ -339,6 +361,7 @@ stringaszleline(char *instr, int incs, int *outll, int *outsz, int *outcs)
*/
if (cnt == MB_INCOMPLETE || cnt == MB_INVALID)
break;
+#endif
if (cnt == 0) {
/* Converting '\0' returns 0, but a '\0' is a real