summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ChangeLog5
-rw-r--r--Src/sort.c48
-rw-r--r--Test/B03print.ztst7
-rw-r--r--Test/D07multibyte.ztst11
4 files changed, 67 insertions, 4 deletions
diff --git a/ChangeLog b/ChangeLog
index 8bcc4a322..a747e8aa2 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,8 @@
+2007-01-22 Peter Stephenson <pws@csr.com>
+
+ * 23119: Src/sort.c, Test/B03print.ztst, Test/D07multibyte.ztst:
+ do lowering of multibyte character case in sorting properly.
+
2007-01-21 Peter Stephenson <p.w.stephenson@ntlworld.com>
* 23118: Doc/Zsh/expn.yo, Src/builtin.c, Src/glob.c, Src/jobs.c,
diff --git a/Src/sort.c b/Src/sort.c
index 2fdb77931..1b8507342 100644
--- a/Src/sort.c
+++ b/Src/sort.c
@@ -248,7 +248,8 @@ strmetasort(char **array, int sortwhat, int *unmetalenp)
|| *metaptr == Meta) {
char *s, *t, *src = *arrptr, *dst;
int len;
- sortarrptr->cmp = dst = (char *)zhalloc(strlen(src) + 1);
+ sortarrptr->cmp = dst =
+ (char *)zhalloc(((sortwhat & SORTIT_IGNORING_CASE)?2:1)*strlen(src)+1);
if (unmetalenp) {
/* Already unmetafied and we have the length. */
@@ -283,8 +284,49 @@ strmetasort(char **array, int sortwhat, int *unmetalenp)
len = metaptr - src;
}
if (sortwhat & SORTIT_IGNORING_CASE) {
- for (s = src, t = dst; s - src != len; )
- *t++ = tulower(*s++);
+ char *send = src + len;
+#ifdef MULTIBYTE_SUPPORT
+ if (isset(MULTIBYTE)) {
+ /*
+ * Lower the case the hard way. Convert to a wide
+ * character, process that, and convert back. We
+ * don't assume the characters have the same
+ * multibyte length. We can't use casemodify()
+ * because we have unmetafied data, which may have
+ * been passed down to use.
+ */
+ mbstate_t mbsin, mbsout;
+ int clen;
+ wchar_t wc;
+ memset(&mbsin, 0, sizeof(mbstate_t));
+ memset(&mbsout, 0, sizeof(mbstate_t));
+
+ for (s = src, t = dst; s < send; ) {
+ clen = mbrtowc(&wc, s, send-s, &mbsin);
+ if (clen < 0) {
+ /* invalid or unfinished: treat as single bytes */
+ while (s < send)
+ *t++ = tulower(*s++);
+ break;
+ }
+ if (clen == 0) {
+ /* embedded null */
+ *t++ = '\0';
+ s++;
+ continue;
+ }
+ s += clen;
+ wc = towlower(wc);
+ clen = wcrtomb(t, wc, &mbsout);
+ t += clen;
+ DPUTS(clen < 0, "Bad conversion when lowering case");
+ }
+ *t = '\0';
+ len = t - dst;
+ } else
+#endif
+ for (s = src, t = dst; s < send; )
+ *t++ = tulower(*s++);
src = dst;
}
if (sortwhat & SORTIT_IGNORING_BACKSLASHES) {
diff --git a/Test/B03print.ztst b/Test/B03print.ztst
index c3ba42b18..92a24d6b6 100644
--- a/Test/B03print.ztst
+++ b/Test/B03print.ztst
@@ -34,7 +34,12 @@
>baz
>bar
- print -io a B c
+# some locales force case-insensitive sorting
+ (LC_ALL=C; print -o a B c)
+0:case-sensitive argument sorting
+>B a c
+
+ (LC_ALL=C; print -io a B c)
0:case-insensitive argument sorting
>a B c
diff --git a/Test/D07multibyte.ztst b/Test/D07multibyte.ztst
index ecac737a1..c3a24c067 100644
--- a/Test/D07multibyte.ztst
+++ b/Test/D07multibyte.ztst
@@ -2,6 +2,8 @@
# Find a UTF-8 locale.
setopt multibyte
+# Don't let LC_* override our choice of locale.
+ unset -m LC_\*
mb_ok=
langs=(en_US.UTF-8 en_GB.UTF-8 en.UTF-8
$(locale -a 2>/dev/null | sed -e 's/utf8/UTF-8/' | grep UTF-8))
@@ -315,3 +317,12 @@
printf "%4.3s\n" főobar
0:Multibyte characters in printf widths
> főo
+
+# We ask for case-insensitive sorting here (and supply upper case
+# characters) so that we exercise the logic in the shell that lowers the
+# case of the string for case-insensitive sorting.
+ print -oi HAH HUH HEH HÉH HÈH
+ (LC_ALL=C; print -oi HAH HUH HEH HÉH HÈH)
+0:Multibyte characters in print sorting
+>HAH HEH HÉH HÈH HUH
+>HAH HEH HUH HÈH HÉH