summaryrefslogtreecommitdiff
path: root/Src/utils.c
diff options
context:
space:
mode:
authorOliver Kiddle <opk@users.sourceforge.net>2003-03-14 13:36:07 +0000
committerOliver Kiddle <opk@users.sourceforge.net>2003-03-14 13:36:07 +0000
commitd8fac9fb2315c3edf5f51d0df81c8d99b7018662 (patch)
tree24c54569809eea638bfdf2466541b6fd760f3322 /Src/utils.c
parent586de1938f5739b199b5595b6abc8c81258ab822 (diff)
downloadzsh-d8fac9fb2315c3edf5f51d0df81c8d99b7018662.tar.gz
zsh-d8fac9fb2315c3edf5f51d0df81c8d99b7018662.zip
18343, 18348: handle \u and \U escapes for specifying unicode characters
Diffstat (limited to 'Src/utils.c')
-rw-r--r--Src/utils.c92
1 files changed, 90 insertions, 2 deletions
diff --git a/Src/utils.c b/Src/utils.c
index 64a6a722e..2b0e7faea 100644
--- a/Src/utils.c
+++ b/Src/utils.c
@@ -30,6 +30,15 @@
#include "zsh.mdh"
#include "utils.pro"
+#if defined(HAVE_WCHAR_H) && defined(HAVE_WCTOMB)
+#include <wchar.h>
+# ifndef __STDC_ISO_10646__
+# if defined(HAVE_ICONV) || defined(HAVE_LIBICONV)
+# include <iconv.h>
+# endif
+# endif
+#endif
+
/* name of script being sourced */
/**/
@@ -3274,7 +3283,8 @@ dquotedzputs(char const *s, FILE *stream)
* for no newlines.
* 3: As 1, but don't handle \c.
* 4: Do $'...' quoting. Overwrites the existing string instead of
- * zhalloc'ing
+ * zhalloc'ing. If \uNNNN ever generates multi-byte chars longer
+ * than 6 bytes, will need to adjust this to re-allocate memory.
* 5: As 2, but \- is special. Expects misc to be defined.
* 6: As 2, but parses only one character and returns end-pointer
* and parsed character in *misc
@@ -3288,11 +3298,28 @@ getkeystring(char *s, int *len, int fromwhere, int *misc)
char *t, *u = NULL;
char svchar = '\0';
int meta = 0, control = 0;
+ int i;
+#if defined(HAVE_WCHAR_H) && defined(HAVE_WCTOMB)
+# ifdef __STDC_ISO_10646__
+ wint_t wval;
+# elif defined(HAVE_ICONV) || defined(HAVE_LIBICONV)
+ unsigned int wval;
+ iconv_t cd;
+ char inbuf[4];
+ wchar_t outbuf[1];
+ size_t inbytes, outbytes;
+ char *inptr, *outptr;
+# endif
+ size_t count;
+ size_t buflen = MB_LEN_MAX * (strlen(s) / 6) + (strlen(s) % 6) + 1;
+#else
+ size_t buflen = strlen(s) + 1;
+#endif
if (fromwhere == 6)
t = buf = tmp;
else if (fromwhere != 4)
- t = buf = zhalloc(strlen(s) + 1);
+ t = buf = zhalloc(buflen);
else {
t = buf = s;
s += 2;
@@ -3363,6 +3390,67 @@ getkeystring(char *s, int *len, int fromwhere, int *misc)
*misc = 1;
break;
}
+#if defined(HAVE_WCHAR_H) && defined(HAVE_WCTOMB)
+#if defined(__STDC_ISO_10646__) || defined(HAVE_ICONV) || defined(HAVE_LIBICONV)
+ case 'u':
+ case 'U':
+ wval = 0;
+ for (i=(*s == 'u' ? 4 : 8); i>0; i--) {
+ if (*++s && idigit(*s))
+ wval = wval * 16 + (*s - '0');
+ else if (*s && (*s >= 'a' && *s <= 'f') ||
+ (*s >= 'A' && *s <= 'F'))
+ wval = wval * 16 + (*s & 0x1f) + 9;
+ else {
+ s--;
+ break;
+ }
+ }
+ if (fromwhere == 6) {
+ *misc = wval;
+ return s+1;
+ }
+#ifdef __STDC_ISO_10646__
+ count = wctomb(t, (wchar_t)wval);
+#elif defined(HAVE_ICONV) || defined(HAVE_LIBICONV)
+ inbytes = outbytes = 4;
+ inptr = inbuf;
+ outptr = (char *)outbuf;
+ /* assume big endian convention for UCS-4 */
+ for (i=3;i>=0;i--) {
+ inbuf[i] = wval & 0xff;
+ wval >>= 8;
+ }
+
+ cd = iconv_open("WCHAR_T", "ISO-10646");
+ if (cd == (iconv_t)-1) {
+ zerr("cannot do charset conversion", NULL, 0);
+ if (fromwhere == 4) {
+ for (u = t; (*u++ = *++s););
+ return t;
+ }
+ *t = '\0';
+ *len = t - buf;
+ return buf;
+ }
+ iconv(cd, &inptr, &inbytes, &outptr, &outbytes);
+ iconv_close(cd);
+ count = wctomb(t, *outbuf);
+#endif
+ if (count == (size_t)-1) {
+ zerr("character not in range", NULL, 0);
+ if (fromwhere == 4) {
+ for (u = t; (*u++ = *++s););
+ return t;
+ }
+ *t = '\0';
+ *len = t - buf;
+ return buf;
+ }
+ t += count;
+ continue;
+#endif
+#endif
default:
def:
if ((idigit(*s) && *s < '8') || *s == 'x') {