summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ChangeLog7
-rw-r--r--Src/Zle/zle.h7
-rw-r--r--Src/Zle/zle_main.c19
-rw-r--r--Src/init.c3
-rw-r--r--Src/params.c1
-rw-r--r--Src/pattern.c4
-rw-r--r--Src/utils.c98
7 files changed, 104 insertions, 35 deletions
diff --git a/ChangeLog b/ChangeLog
index 06560d2fc..1052e53ee 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,10 @@
+2005-09-20 Peter Stephenson <pws@csr.com>
+
+ * 21736: Src/init.c, Src/params.c, Src/pattern.c, Src/utils.c,
+ Src/Zle/zle.h, Src/Zle/zle_main.c: Fix WORDCHARS to use multibyte
+ characters; rationalise test for identifiers only to use ASCII
+ characters; remove existing hack for 8-bit characters.
+
2005-09-19 Peter Stephenson <pws@csr.com>
* unposted, c.f. 21735: Doc/Zsh/grammar.yo: document
diff --git a/Src/Zle/zle.h b/Src/Zle/zle.h
index 4b3f3f75a..fbfc02265 100644
--- a/Src/Zle/zle.h
+++ b/Src/Zle/zle.h
@@ -66,12 +66,7 @@ typedef wint_t ZLE_INT_T;
#define ZC_iblank iswspace
#define ZC_icntrl iswcntrl
-/*
- * TODO: doesn't work on arguments with side effects.
- * Also YUK. Not even sure this is guaranteed to work.
- * Should be easy to do along the lines of wcsiword.
- */
-#define ZC_iident(x) (x < 256 && iident((int)x))
+#define ZC_iident wcsiident
#define ZC_tolower towlower
#define ZC_toupper towupper
diff --git a/Src/Zle/zle_main.c b/Src/Zle/zle_main.c
index 1b62ff027..923145710 100644
--- a/Src/Zle/zle_main.c
+++ b/Src/Zle/zle_main.c
@@ -106,11 +106,6 @@ mod_export ZLE_INT_T lastchar_wide;
/**/
mod_export int
lastchar_wide_valid;
-
-/**/
-mod_export ZLE_STRING_T zle_wordchars;
-#else
-# define zle_wordchars wordchars;
#endif
/* the bindings for the previous and for this key */
@@ -1558,17 +1553,6 @@ trashzle(void)
kungetct = 0;
}
-/**/
-mod_export void
-wordcharstrigger(void)
-{
-#ifdef ZLE_UNICODE_SUPPORT
- zrealloc(zle_wordchars, strlen(wordchars)*MB_CUR_MAX);
- mbsrtowcs(zle_wordchars, (const char **)&wordchars,
- strlen(wordchars), NULL);
- /* TODO: error handling here */
-#endif
-}
/* Hook functions. Used to allow access to zle parameters if zle is
* active. */
@@ -1636,8 +1620,6 @@ setup_(UNUSED(Module m))
kungetbuf = (char *) zalloc(kungetsz = 32);
comprecursive = 0;
rdstrs = NULL;
- wordcharstriggerptr = wordcharstrigger;
- wordcharstrigger();
/* initialise the keymap system */
init_keymaps();
@@ -1712,7 +1694,6 @@ finish_(UNUSED(Module m))
zlegetlineptr = NULL;
zlereadptr = fallback_zleread;
zlesetkeymapptr= noop_function_int;
- wordcharstriggerptr = noop_function;
getkeyptr = NULL;
diff --git a/Src/init.c b/Src/init.c
index de6d4efcb..716898e28 100644
--- a/Src/init.c
+++ b/Src/init.c
@@ -1180,9 +1180,6 @@ mod_export ZleVoidIntFn zlesetkeymapptr = noop_function_int;
#endif /* !LINKED_XMOD_zshQszle */
/**/
-mod_export ZleVoidFn wordcharstriggerptr = noop_function;
-
-/**/
unsigned char *
autoload_zleread(char **lp, char **rp, int ha, int con)
{
diff --git a/Src/params.c b/Src/params.c
index 89d25afee..218744000 100644
--- a/Src/params.c
+++ b/Src/params.c
@@ -3346,7 +3346,6 @@ wordcharssetfn(UNUSED(Param pm), char *x)
zsfree(wordchars);
wordchars = x;
inittyptab();
- wordcharstriggerptr();
}
/* Function to get value for special parameter `_' */
diff --git a/Src/pattern.c b/Src/pattern.c
index 393d9bf41..36578226c 100644
--- a/Src/pattern.c
+++ b/Src/pattern.c
@@ -2749,6 +2749,10 @@ patmatchrange(char *range, int ch)
return 1;
break;
case PP_WORD:
+ /*
+ * HERE: when we support multibyte characters,
+ * this test needs to be wcsiword().
+ */
if (iword(ch))
return 1;
break;
diff --git a/Src/utils.c b/Src/utils.c
index 71af531c3..dce10beee 100644
--- a/Src/utils.c
+++ b/Src/utils.c
@@ -35,6 +35,16 @@
/**/
char *scriptname;
+#ifdef ZLE_UNICODE_SUPPORT
+/*
+ * The wordchars variable turned into a wide character array.
+ * This is much more convenient for testing.
+ */
+
+/**/
+mod_export wchar_t *wordchars_wide;
+#endif
+
/* Print an error */
/**/
@@ -2456,8 +2466,18 @@ inittyptab(void)
typtab[t0] = IDIGIT | IALNUM | IWORD | IIDENT | IUSER;
for (t0 = 'a'; t0 <= 'z'; t0++)
typtab[t0] = typtab[t0 - 'a' + 'A'] = IALPHA | IALNUM | IIDENT | IUSER | IWORD;
+#ifndef ZLE_UNICODE_SUPPORT
+ /*
+ * This really doesn't seem to me the right thing to do when
+ * we have multibyte character support... it was a hack to assume
+ * eight bit characters `worked' for some values of work before
+ * we could test for them properly. I'm not 100% convinced
+ * having IIDENT here is a good idea at all, but this code
+ * should disappear into history...
+ */
for (t0 = 0240; t0 != 0400; t0++)
typtab[t0] = IALPHA | IALNUM | IIDENT | IUSER | IWORD;
+#endif
typtab['_'] = IIDENT | IUSER;
typtab['-'] = IUSER;
typtab[' '] |= IBLANK | INBLANK;
@@ -2477,8 +2497,44 @@ inittyptab(void)
}
typtab[STOUC(*s == Meta ? *++s ^ 32 : *s)] |= ISEP;
}
- for (s = wordchars ? wordchars : DEFAULT_WORDCHARS; *s; s++)
- typtab[STOUC(*s == Meta ? *++s ^ 32 : *s)] |= IWORD;
+#ifdef ZLE_UNICODE_SUPPORT
+ if (wordchars) {
+ const char *wordchars_ptr = wordchars;
+ mbstate_t mbs;
+ int nchars;
+
+ memset(&mbs, 0, sizeof(mbs));
+ wordchars_wide = (wchar_t *)
+ zrealloc(wordchars_wide, (strlen(wordchars)+1)*sizeof(wchar_t));
+ nchars = mbsrtowcs(wordchars_wide, &wordchars_ptr, strlen(wordchars),
+ &mbs);
+ if (nchars == -1) {
+ /* Conversion state is undefined: better just set to null */
+ *wordchars_wide = L'\0';
+ } else {
+ wordchars_wide[nchars] = L'\0';
+ }
+ } else {
+ wordchars_wide = zrealloc(wordchars_wide, sizeof(wchar_t));
+ *wordchars_wide = L'\0';
+ }
+#endif
+ for (s = wordchars ? wordchars : DEFAULT_WORDCHARS; *s; s++) {
+ int c = STOUC(*s == Meta ? *++s ^ 32 : *s);
+#ifdef ZLE_UNICODE_SUPPORT
+ if (!isascii(c)) {
+ /*
+ * If we have support for multibyte characters, we don't
+ * handle non-ASCII characters here; instead, we turn
+ * wordchars into a wide character array.
+ * (We may actually have a single-byte 8-bit character set,
+ * but it works the same way.)
+ */
+ continue;
+ }
+#endif
+ typtab[c] |= IWORD;
+ }
for (s = SPECCHARS; *s; s++)
typtab[STOUC(*s)] |= ISPECIAL;
if (isset(BANGHIST) && bangchar && interact && isset(SHINSTDIN))
@@ -2503,9 +2559,6 @@ wcsiword(wchar_t c)
* produces an ASCII character. If it does, use iword on that.
* If it doesn't, use iswalnum on the original character. This
* is pretty good most of the time.
- *
- * TODO: extend WORDCHARS to handle multibyte chars by some kind
- * of hierarchical list or hash table.
*/
len = wctomb(outstr, c);
@@ -2515,7 +2568,40 @@ wcsiword(wchar_t c)
} else if (len == 1 && isascii(*outstr)) {
return iword(*outstr);
} else {
- return iswalnum(c);
+ return iswalnum(c) || wcschr(wordchars_wide, c);
+ }
+}
+
+/*
+ * iident() macro extended to support wide characters.
+ *
+ * The macro is intended to test if a character is allowed in an
+ * internal zsh identifier. Until the main shell handles multibyte
+ * characters it's not a good idea to allow characters other than
+ * ASCII characters; it would cause zle to allow characters that
+ * the main shell would reject. Eventually we should be able
+ * to allow all alphanumerics.
+ *
+ * Otherwise similar to wcsiword.
+ */
+
+/**/
+mod_export int
+wcsiident(wchar_t c)
+{
+ int len;
+ VARARR(char, outstr, MB_CUR_MAX);
+
+ len = wctomb(outstr, c);
+
+ if (len == 0) {
+ /* NULL is special */
+ return 0;
+ } else if (len == 1 && isascii(*outstr)) {
+ return iword(*outstr);
+ } else {
+ /* not currently allowed, see above */
+ return 0;
}
}
#endif