summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorOliver Kiddle <opk@users.sourceforge.net>2003-03-14 13:36:07 +0000
committerOliver Kiddle <opk@users.sourceforge.net>2003-03-14 13:36:07 +0000
commitd8fac9fb2315c3edf5f51d0df81c8d99b7018662 (patch)
tree24c54569809eea638bfdf2466541b6fd760f3322
parent586de1938f5739b199b5595b6abc8c81258ab822 (diff)
downloadzsh-d8fac9fb2315c3edf5f51d0df81c8d99b7018662.tar.gz
zsh-d8fac9fb2315c3edf5f51d0df81c8d99b7018662.zip
18343, 18348: handle \u and \U escapes for specifying unicode characters
-rw-r--r--ChangeLog5
-rw-r--r--Doc/Zsh/builtins.yo2
-rw-r--r--Src/utils.c92
-rw-r--r--zshconfig.ac7
4 files changed, 102 insertions, 4 deletions
diff --git a/ChangeLog b/ChangeLog
index dd1b20142..e4e024b18 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,8 @@
+2003-03-14 Oliver Kiddle <opk@zsh.org>
+
+ * 18343, 18348: zshconfig.ac, Doc/Zsh/builtins.yo, Src/utils.c:
+ handle \u and \U escapes for specifying unicode characters
+
2003-03-15 Doug Kearns <djkea2@mugca.its.monash.edu.au>
* 18351: Completion/Unix/Command/_antiword: new completion for
diff --git a/Doc/Zsh/builtins.yo b/Doc/Zsh/builtins.yo
index 899bc96d8..67d2c11e4 100644
--- a/Doc/Zsh/builtins.yo
+++ b/Doc/Zsh/builtins.yo
@@ -278,6 +278,8 @@ sitem(tt(\v))(vertical tab)
sitem(tt(\\))(backslash)
sitem(tt(\0)var(NNN))(character code in octal)
sitem(tt(\x)var(NN))(character code in hexadecimal)
+sitem(tt(\u)var(NNNN))(unicode character code in hexadecimal)
+sitem(tt(\U)var(NNNNNNNN))(unicode character code in hexadecimal)
endsitem()
pindex(BSD_ECHO, use of)
diff --git a/Src/utils.c b/Src/utils.c
index 64a6a722e..2b0e7faea 100644
--- a/Src/utils.c
+++ b/Src/utils.c
@@ -30,6 +30,15 @@
#include "zsh.mdh"
#include "utils.pro"
+#if defined(HAVE_WCHAR_H) && defined(HAVE_WCTOMB)
+#include <wchar.h>
+# ifndef __STDC_ISO_10646__
+# if defined(HAVE_ICONV) || defined(HAVE_LIBICONV)
+# include <iconv.h>
+# endif
+# endif
+#endif
+
/* name of script being sourced */
/**/
@@ -3274,7 +3283,8 @@ dquotedzputs(char const *s, FILE *stream)
* for no newlines.
* 3: As 1, but don't handle \c.
* 4: Do $'...' quoting. Overwrites the existing string instead of
- * zhalloc'ing
+ * zhalloc'ing. If \uNNNN ever generates multi-byte chars longer
+ * than 6 bytes, will need to adjust this to re-allocate memory.
* 5: As 2, but \- is special. Expects misc to be defined.
* 6: As 2, but parses only one character and returns end-pointer
* and parsed character in *misc
@@ -3288,11 +3298,28 @@ getkeystring(char *s, int *len, int fromwhere, int *misc)
char *t, *u = NULL;
char svchar = '\0';
int meta = 0, control = 0;
+ int i;
+#if defined(HAVE_WCHAR_H) && defined(HAVE_WCTOMB)
+# ifdef __STDC_ISO_10646__
+ wint_t wval;
+# elif defined(HAVE_ICONV) || defined(HAVE_LIBICONV)
+ unsigned int wval;
+ iconv_t cd;
+ char inbuf[4];
+ wchar_t outbuf[1];
+ size_t inbytes, outbytes;
+ char *inptr, *outptr;
+# endif
+ size_t count;
+ size_t buflen = MB_LEN_MAX * (strlen(s) / 6) + (strlen(s) % 6) + 1;
+#else
+ size_t buflen = strlen(s) + 1;
+#endif
if (fromwhere == 6)
t = buf = tmp;
else if (fromwhere != 4)
- t = buf = zhalloc(strlen(s) + 1);
+ t = buf = zhalloc(buflen);
else {
t = buf = s;
s += 2;
@@ -3363,6 +3390,67 @@ getkeystring(char *s, int *len, int fromwhere, int *misc)
*misc = 1;
break;
}
+#if defined(HAVE_WCHAR_H) && defined(HAVE_WCTOMB)
+#if defined(__STDC_ISO_10646__) || defined(HAVE_ICONV) || defined(HAVE_LIBICONV)
+ case 'u':
+ case 'U':
+ wval = 0;
+ for (i=(*s == 'u' ? 4 : 8); i>0; i--) {
+ if (*++s && idigit(*s))
+ wval = wval * 16 + (*s - '0');
+ else if (*s && (*s >= 'a' && *s <= 'f') ||
+ (*s >= 'A' && *s <= 'F'))
+ wval = wval * 16 + (*s & 0x1f) + 9;
+ else {
+ s--;
+ break;
+ }
+ }
+ if (fromwhere == 6) {
+ *misc = wval;
+ return s+1;
+ }
+#ifdef __STDC_ISO_10646__
+ count = wctomb(t, (wchar_t)wval);
+#elif defined(HAVE_ICONV) || defined(HAVE_LIBICONV)
+ inbytes = outbytes = 4;
+ inptr = inbuf;
+ outptr = (char *)outbuf;
+ /* assume big endian convention for UCS-4 */
+ for (i=3;i>=0;i--) {
+ inbuf[i] = wval & 0xff;
+ wval >>= 8;
+ }
+
+ cd = iconv_open("WCHAR_T", "ISO-10646");
+ if (cd == (iconv_t)-1) {
+ zerr("cannot do charset conversion", NULL, 0);
+ if (fromwhere == 4) {
+ for (u = t; (*u++ = *++s););
+ return t;
+ }
+ *t = '\0';
+ *len = t - buf;
+ return buf;
+ }
+ iconv(cd, &inptr, &inbytes, &outptr, &outbytes);
+ iconv_close(cd);
+ count = wctomb(t, *outbuf);
+#endif
+ if (count == (size_t)-1) {
+ zerr("character not in range", NULL, 0);
+ if (fromwhere == 4) {
+ for (u = t; (*u++ = *++s););
+ return t;
+ }
+ *t = '\0';
+ *len = t - buf;
+ return buf;
+ }
+ t += count;
+ continue;
+#endif
+#endif
default:
def:
if ((idigit(*s) && *s < '8') || *s == 'x') {
diff --git a/zshconfig.ac b/zshconfig.ac
index 33ca83f78..fd578c028 100644
--- a/zshconfig.ac
+++ b/zshconfig.ac
@@ -494,7 +494,7 @@ AC_CHECK_HEADERS(sys/time.h sys/times.h sys/select.h termcap.h termio.h \
limits.h fcntl.h libc.h sys/utsname.h sys/resource.h \
locale.h errno.h stdio.h stdlib.h unistd.h sys/capability.h \
utmp.h utmpx.h sys/types.h pwd.h grp.h poll.h sys/mman.h \
- netinet/in_systm.h pcre.h langinfo.h)
+ netinet/in_systm.h pcre.h langinfo.h wchar.h)
if test $dynamic = yes; then
AC_CHECK_HEADERS(dlfcn.h)
AC_CHECK_HEADERS(dl.h)
@@ -663,6 +663,8 @@ AC_CHECK_LIB(cap, cap_get_proc)
AC_CHECK_LIB(socket, socket)
+AC_CHECK_LIB(iconv, iconv)
+
dnl pcre-config should probably be employed here
AC_SEARCH_LIBS(pcre_compile, pcre)
@@ -959,7 +961,8 @@ AC_CHECK_FUNCS(strftime difftime gettimeofday \
tgetent tigetflag tigetnum tigetstr setupterm \
pcre_compile pcre_study pcre_exec \
nl_langinfo \
- erand48 open_memstream)
+ erand48 open_memstream \
+ wctomb iconv)
AC_FUNC_STRCOLL
dnl Check if tgetent accepts NULL (and will allocate its own termcap buffer)