summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ChangeLog7
-rw-r--r--Completion/compinit5
-rw-r--r--Src/hist.c174
-rw-r--r--Src/jobs.c2
-rw-r--r--Src/pattern.c19
-rw-r--r--Src/subst.c67
-rw-r--r--Src/utils.c20
-rw-r--r--Src/zsh.h13
-rw-r--r--Test/D07multibyte.ztst34
9 files changed, 226 insertions, 115 deletions
diff --git a/ChangeLog b/ChangeLog
index 34fc634ba..5d88aff9b 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,10 @@
+2006-06-28 Peter Stephenson <pws@csr.com>
+
+ * 22525: Completion/compinit, Src/hist.c, Src/jobs.c,
+ Src/pattern.c, Src/subst.c, Src/utils.c, Src/zsh.h,
+ Test/D07multibyte.ztst: lengths and cases of multibyte strings
+ in parameters and history.
+
2006-06-27 Peter Stephenson <pws@csr.com>
* 22524: Src/params.c, Test/D07multibyte.ztst: searchable
diff --git a/Completion/compinit b/Completion/compinit
index 74a512fc3..a78fd1ce2 100644
--- a/Completion/compinit
+++ b/Completion/compinit
@@ -128,11 +128,12 @@ fi
# The standard options set in completion functions.
_comp_options=(
- glob
+ extendedglob
bareglobqual
+ glob
+ multibyte
nullglob
rcexpandparam
- extendedglob
unset
NO_markdirs
NO_globsubst
diff --git a/Src/hist.c b/Src/hist.c
index 0873ccce5..33c4035bf 100644
--- a/Src/hist.c
+++ b/Src/hist.c
@@ -635,10 +635,10 @@ histsubchar(int c)
quotebreak(&sline);
break;
case 'l':
- downcase(&sline);
+ sline = casemodify(sline, CASMOD_LOWER);
break;
case 'u':
- upcase(&sline);
+ sline = casemodify(sline, CASMOD_UPPER);
break;
default:
herrflush();
@@ -1503,42 +1503,130 @@ remlpaths(char **junkptr)
return 0;
}
-/**/
-int
-makeuppercase(char **junkptr)
-{
- char *str = *junkptr;
-
- for (; *str; str++)
- *str = tuupper(*str);
- return 1;
-}
+/*
+ * Return modified version of str from the heap with modification
+ * according to one of the CASMOD_* types defined in zsh.h; CASMOD_NONE
+ * is not handled, for obvious reasons.
+ */
/**/
-int
-makelowercase(char **junkptr)
-{
- char *str = *junkptr;
-
- for (; *str; str++)
- *str = tulower(*str);
- return 1;
-}
+char *
+casemodify(char *str, int how)
+{
+ char *str2 = zhalloc(2 * strlen(str) + 1);
+ char *ptr2 = str2;
+ int nextupper = 1;
+
+#ifdef MULTIBYTE_SUPPORT
+ if (isset(MULTIBYTE)) {
+ VARARR(char, mbstr, MB_CUR_MAX);
+ mbstate_t ps;
+
+ mb_metacharinit();
+ memset(&ps, 0, sizeof(ps));
+ while (*str) {
+ wint_t wc;
+ int len = mb_metacharlenconv(str, &wc), mod = 0, len2;
+ /*
+ * wc is set to WEOF if the start of str couldn't be
+ * converted. Presumably WEOF doesn't match iswlower(), but
+ * better be safe.
+ */
+ if (wc == WEOF) {
+ while (len--)
+ *ptr2++ = *str++;
+ /* not alphanumeric */
+ nextupper = 1;
+ continue;
+ }
+ switch (how) {
+ case CASMOD_LOWER:
+ if (iswupper(wc)) {
+ wc = towlower(wc);
+ mod = 1;
+ }
+ break;
-/**/
-int
-makecapitals(char **junkptr)
-{
- char *str = *junkptr;
+ case CASMOD_UPPER:
+ if (iswlower(wc)) {
+ wc = towupper(wc);
+ mod = 1;
+ }
+ break;
- for (; *str;) {
- for (; *str && !ialnum(*str); str++);
- if (*str)
- *str = tuupper(*str), str++;
- for (; *str && ialnum(*str); str++)
- *str = tulower(*str);
+ case CASMOD_CAPS:
+ default: /* shuts up compiler */
+ if (!iswalnum(wc))
+ nextupper = 1;
+ else if (nextupper) {
+ if (iswlower(wc)) {
+ wc = towupper(wc);
+ mod = 1;
+ }
+ nextupper = 0;
+ } else if (iswupper(wc)) {
+ wc = towlower(wc);
+ mod = 1;
+ }
+ break;
+ }
+ if (mod && (len2 = wcrtomb(mbstr, wc, &ps)) > 0) {
+ char *mbptr;
+
+ for (mbptr = mbstr; mbptr < mbstr + len2; mbptr++) {
+ if (imeta(STOUC(*mbptr))) {
+ *ptr2++ = Meta;
+ *ptr2++ = *mbptr ^ 32;
+ } else
+ *ptr2++ = *mbptr;
+ }
+ str += len;
+ } else {
+ while (len--)
+ *ptr2++ = *str++;
+ }
+ }
}
- return 1;
+ else
+#endif
+ while (*str) {
+ int c;
+ if (*str == Meta) {
+ c = str[1] ^ 32;
+ str += 2;
+ } else
+ c = *str++;
+ switch (how) {
+ case CASMOD_LOWER:
+ if (isupper(c))
+ c = tolower(c);
+ break;
+
+ case CASMOD_UPPER:
+ if (islower(c))
+ c = toupper(c);
+ break;
+
+ case CASMOD_CAPS:
+ default: /* shuts up compiler */
+ if (!ialnum(c))
+ nextupper = 1;
+ else if (nextupper) {
+ if (islower(c))
+ c = toupper(c);
+ nextupper = 0;
+ } else if (isupper(c))
+ c = tolower(c);
+ break;
+ }
+ if (imeta(c)) {
+ *ptr2++ = Meta;
+ *ptr2++ = c ^ 32;
+ } else
+ *ptr2++ = c;
+ }
+ *ptr2 = '\0';
+ return str2;
}
/**/
@@ -1645,26 +1733,6 @@ getargs(Histent elist, int arg1, int arg2)
}
/**/
-void
-upcase(char **x)
-{
- char *pp = *(char **)x;
-
- for (; *pp; pp++)
- *pp = tuupper(*pp);
-}
-
-/**/
-void
-downcase(char **x)
-{
- char *pp = *(char **)x;
-
- for (; *pp; pp++)
- *pp = tulower(*pp);
-}
-
-/**/
int
quote(char **tr)
{
diff --git a/Src/jobs.c b/Src/jobs.c
index cfc733ecf..509b9e843 100644
--- a/Src/jobs.c
+++ b/Src/jobs.c
@@ -2014,7 +2014,7 @@ bin_kill(char *nam, char **argv, UNUSED(Options ops), UNUSED(int func))
return 1;
} else
signame = *argv;
- makeuppercase(&signame);
+ signame = casemodify(signame, CASMOD_UPPER);
if (!strncmp(signame, "SIG", 3))
signame+=3;
diff --git a/Src/pattern.c b/Src/pattern.c
index a39095c37..bc9afbae3 100644
--- a/Src/pattern.c
+++ b/Src/pattern.c
@@ -1644,17 +1644,12 @@ charrefinc(char **x, char *y)
}
-#ifndef PARAMETER_CODE_HANDLES_MULTIBYTE
/*
- * TODO: We should use the other branch, but currently
- * the parameter code doesn't handle multibyte input,
- * so this would produce the wrong subscripts,
- * so just use a raw byte difference for now.
+ * Counter the number of characters between two pointers, smaller first
+ *
+ * This is used when setting values in parameters, so we obey
+ * the MULTIBYTE option (even if it's been overridden locally).
*/
-/* Counter the number of characters between two pointers, smaller first */
-# define CHARSUB(x,y) ((y) - (x))
-#else
-/* Counter the number of characters between two pointers, smaller first */
#define CHARSUB(x,y) charsub(x, y)
static ptrdiff_t
charsub(char *x, char *y)
@@ -1663,6 +1658,9 @@ charsub(char *x, char *y)
size_t ret;
wchar_t wc;
+ if (!isset(MULTIBYTE))
+ return y - x;
+
while (x < y) {
ret = mbrtowc(&wc, x, y-x, &shiftstate);
@@ -1674,13 +1672,12 @@ charsub(char *x, char *y)
/* Treat nulls as normal characters */
if (!ret)
ret = 1;
- res += ret;
+ res++;
x += ret;
}
return res;
}
-#endif
#else /* no MULTIBYTE_SUPPORT */
diff --git a/Src/subst.c b/Src/subst.c
index 803f8d99d..d69f34c4b 100644
--- a/Src/subst.c
+++ b/Src/subst.c
@@ -1019,7 +1019,7 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
/* (u): straightforward. */
int unique = 0;
/* combination of (L), (U) and (C) flags. */
- int casmod = 0;
+ int casmod = CASMOD_NONE;
/*
* quotemod says we are doing either (q) (positive), (Q) (negative)
* or not (0). quotetype counts the q's for the first case.
@@ -1211,13 +1211,13 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
break;
case 'L':
- casmod = 2;
+ casmod = CASMOD_LOWER;
break;
case 'U':
- casmod = 1;
+ casmod = CASMOD_UPPER;
break;
case 'C':
- casmod = 3;
+ casmod = CASMOD_CAPS;
break;
case 'o':
@@ -1819,17 +1819,13 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
break;
}
switch (v->pm->node.flags & (PM_LOWER | PM_UPPER)) {
- char *t;
-
case PM_LOWER:
- t = val;
- for (; (c = *t); t++)
- *t = tulower(c);
+ val = casemodify(val, CASMOD_LOWER);
+ copied = 1;
break;
case PM_UPPER:
- t = val;
- for (; (c = *t); t++)
- *t = tuupper(c);
+ val = casemodify(val, CASMOD_UPPER);
+ copied = 1;
break;
}
}
@@ -2316,14 +2312,14 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
if (isarr) {
char **ctr;
- int sl = sep ? ztrlen(sep) : 1;
+ int sl = sep ? MB_METASTRLEN(sep) : 1;
if (getlen == 1)
for (ctr = aval; *ctr; ctr++, len++);
else if (getlen == 2) {
if (*aval)
for (len = -sl, ctr = aval;
- len += sl + ztrlen(*ctr), *++ctr;);
+ len += sl + MB_METASTRLEN(*ctr), *++ctr;);
}
else
for (ctr = aval;
@@ -2331,7 +2327,7 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
len += wordcount(*ctr, spsep, getlen > 3), ctr++);
} else {
if (getlen < 3)
- len = ztrlen(val);
+ len = MB_METASTRLEN(val);
else
len = wordcount(val, spsep, getlen > 3);
}
@@ -2387,33 +2383,19 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
/*
* Perform case modififications.
*/
- if (casmod) {
+ if (casmod != CASMOD_NONE) {
+ copied = 1; /* string is always modified by copy */
if (isarr) {
- char **ap;
+ char **ap, **ap2;
- if (!copied)
- aval = arrdup(aval), copied = 1;
ap = aval;
+ ap2 = aval = (char **) zhalloc(sizeof(char *) * (arrlen(aval)+1));
- if (casmod == 1)
- for (; *ap; ap++)
- makeuppercase(ap);
- else if (casmod == 2)
- for (; *ap; ap++)
- makelowercase(ap);
- else
- for (; *ap; ap++)
- makecapitals(ap);
-
+ while (*ap)
+ *ap2++ = casemodify(*ap++, casmod);
+ *ap2++ = NULL;
} else {
- if (!copied)
- val = dupstring(val), copied = 1;
- if (casmod == 1)
- makeuppercase(&val);
- else if (casmod == 2)
- makelowercase(&val);
- else
- makecapitals(&val);
+ val = casemodify(val, casmod);
}
}
/*
@@ -2975,7 +2957,8 @@ modify(char **str, char **ptr)
for (t = e = *str; (tt = findword(&e, sep));) {
tc = *e;
*e = '\0';
- copy = dupstring(tt);
+ if (c != 'l' && c != 'u')
+ copy = dupstring(tt);
*e = tc;
switch (c) {
case 'h':
@@ -2991,10 +2974,10 @@ modify(char **str, char **ptr)
remlpaths(&copy);
break;
case 'l':
- downcase(&copy);
+ copy = casemodify(tt, CASMOD_LOWER);
break;
case 'u':
- upcase(&copy);
+ copy = casemodify(tt, CASMOD_UPPER);
break;
case 's':
if (hsubl && hsubr)
@@ -3050,10 +3033,10 @@ modify(char **str, char **ptr)
remlpaths(str);
break;
case 'l':
- downcase(str);
+ *str = casemodify(*str, CASMOD_LOWER);
break;
case 'u':
- upcase(str);
+ *str = casemodify(*str, CASMOD_UPPER);
break;
case 's':
if (hsubl && hsubr) {
diff --git a/Src/utils.c b/Src/utils.c
index 4b2f07f19..32f6ae336 100644
--- a/Src/utils.c
+++ b/Src/utils.c
@@ -3687,7 +3687,7 @@ static mbstate_t mb_shiftstate;
/*
* Initialise multibyte state: called before a sequence of
- * mb_metacharlen().
+ * mb_metacharlenconv().
*/
/**/
@@ -3703,18 +3703,24 @@ mb_metacharinit(void)
* but character is not valid (e.g. possibly incomplete at end of string).
* Returned value is guaranteed not to reach beyond the end of the
* string (assuming correct metafication).
+ *
+ * If wcp is not NULL, the converted wide character is stored there.
+ * If no conversion could be done WEOF is used.
*/
/**/
int
-mb_metacharlen(char *s)
+mb_metacharlenconv(char *s, wint_t *wcp)
{
char inchar, *ptr;
size_t ret;
wchar_t wc;
- if (!isset(MULTIBYTE))
+ if (!isset(MULTIBYTE)) {
+ if (wcp)
+ *wcp = WEOF;
return 1 + (*s == Meta);
+ }
ret = MB_INVALID;
for (ptr = s; *ptr; ) {
@@ -3729,14 +3735,18 @@ mb_metacharlen(char *s)
break;
if (ret == MB_INCOMPLETE)
continue;
+ if (wcp)
+ *wcp = wc;
return ptr - s;
}
+ if (wcp)
+ *wcp = WEOF;
/* No valid multibyte sequence */
memset(&mb_shiftstate, 0, sizeof(mb_shiftstate));
- if (ptr > s)
+ if (ptr > s) {
return 1 + (*s == Meta); /* Treat as single byte character */
- else
+ } else
return 0; /* Probably shouldn't happen */
}
diff --git a/Src/zsh.h b/Src/zsh.h
index 31609d3c5..b0962574a 100644
--- a/Src/zsh.h
+++ b/Src/zsh.h
@@ -1882,6 +1882,17 @@ struct heap {
#define ZSIG_ALIAS (1<<3) /* Trap is stored under an alias */
#define ZSIG_SHIFT 4
+/************************/
+/* Flags to casemodifiy */
+/************************/
+
+enum {
+ CASMOD_NONE, /* dummy for tests */
+ CASMOD_UPPER,
+ CASMOD_LOWER,
+ CASMOD_CAPS
+};
+
/**********************************/
/* Flags to third argument of zle */
/**********************************/
@@ -1927,7 +1938,7 @@ typedef char *(*ZleGetLineFn) _((int *, int *));
#ifdef MULTIBYTE_SUPPORT
#define nicezputs(str, outs) (void)mb_niceformat((str), (outs), NULL, 0)
#define MB_METACHARINIT() mb_metacharinit()
-#define MB_METACHARLEN(str) mb_metacharlen(str)
+#define MB_METACHARLEN(str) mb_metacharlenconv(str, NULL)
#define MB_METASTRLEN(str) mb_metastrlen(str)
#define MB_INCOMPLETE ((size_t)-2)
diff --git a/Test/D07multibyte.ztst b/Test/D07multibyte.ztst
index a1937cc68..4d364d879 100644
--- a/Test/D07multibyte.ztst
+++ b/Test/D07multibyte.ztst
@@ -121,3 +121,37 @@
# Starting offsets with (R) seem to be so strange as to be hardly
# worth testing.
+
+ setopt extendedglob
+ [[ $a = (#b)t(én)(éb)reux ]] || print "Failed to match." >&2
+ for i in {1..${#match}}; do
+ print $match[i] $mbegin[i] $mend[i] ${a[$mbegin[i],$mend[i]]}
+ done
+0:Multibyte offsets in pattern tests
+>én 2 3 én
+>éb 4 5 éb
+
+ b=${(U)a}
+ print $b
+ print ${(L)b}
+ desdichado="Je suis le $a, le veuf, l'inconsolé"
+ print ${(C)desdichado}
+ lxiv="l'état c'est moi"
+ print ${(C)lxiv}
+0:Case modification of multibyte strings
+>TÉNÉBREUX
+>ténébreux
+>Je Suis Le Ténébreux, Le Veuf, L'Inconsolé
+>L'État C'Est Moi
+
+ array=(ølaf ødd øpened án encyclopædia)
+ barray=(${(U)array})
+ print $barray
+ print ${(L)barray}
+ print ${(C)array}
+ print ${(C)barray}
+0:Case modification of arrays with multibyte strings
+>ØLAF ØDD ØPENED ÁN ENCYCLOPÆDIA
+>ølaf ødd øpened án encyclopædia
+>Ølaf Ødd Øpened Án Encyclopædia
+>Ølaf Ødd Øpened Án Encyclopædia