summaryrefslogtreecommitdiff
path: root/Src/sort.c
diff options
context:
space:
mode:
authorPeter Stephenson <pws@users.sourceforge.net>2007-01-21 22:47:36 +0000
committerPeter Stephenson <pws@users.sourceforge.net>2007-01-21 22:47:36 +0000
commit553e011320798af097e8de95a1e2a1d2ed6a1a3e (patch)
treed2e7cd070f33afddb0ce84129e61b4f3034bceb4 /Src/sort.c
parentf3bf48149a2e86faf35db529d864edd904b52fb4 (diff)
downloadzsh-553e011320798af097e8de95a1e2a1d2ed6a1a3e.tar.gz
zsh-553e011320798af097e8de95a1e2a1d2ed6a1a3e.zip
23118: improve sorting to make it work with locales
Diffstat (limited to 'Src/sort.c')
-rw-r--r--Src/sort.c332
1 files changed, 332 insertions, 0 deletions
diff --git a/Src/sort.c b/Src/sort.c
new file mode 100644
index 000000000..2fdb77931
--- /dev/null
+++ b/Src/sort.c
@@ -0,0 +1,332 @@
+/*
+ * sort.c - comparison and sorting of strings
+ *
+ * This file is part of zsh, the Z shell.
+ *
+ * Copyright (c) 1992-2007 Paul Falstad
+ * All rights reserved.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and to distribute modified versions of this software for any
+ * purpose, provided that the above copyright notice and the following
+ * two paragraphs appear in all copies of this software.
+ *
+ * In no event shall Paul Falstad or the Zsh Development Group be liable
+ * to any party for direct, indirect, special, incidental, or consequential
+ * damages arising out of the use of this software and its documentation,
+ * even if Paul Falstad and the Zsh Development Group have been advised of
+ * the possibility of such damage.
+ *
+ * Paul Falstad and the Zsh Development Group specifically disclaim any
+ * warranties, including, but not limited to, the implied warranties of
+ * merchantability and fitness for a particular purpose. The software
+ * provided hereunder is on an "as is" basis, and Paul Falstad and the
+ * Zsh Development Group have no obligation to provide maintenance,
+ * support, updates, enhancements, or modifications.
+ *
+ */
+
+#include "zsh.mdh"
+#include "sort.pro"
+
+/* Flag for direction of sort: 1 forwards, -1 reverse */
+static int sortdir;
+
+/* Flag that sort is numeric */
+static int sortnumeric;
+
+/**/
+static int
+eltpcmp(const void *a, const void *b)
+{
+ const SortElt ae = *(const SortElt *)a;
+ const SortElt be = *(const SortElt *)b;
+ const char *as = ae->cmp;
+ const char *bs = be->cmp;
+ int cmp;
+
+ if (ae->len != -1 || be->len != -1) {
+ /*
+ * Length recorded. We only do that if there are embedded
+ * nulls we need to treat as regular characters.
+ *
+ * Since we don't know where multibyte characters start,
+ * but do know that a null character can't occur inside
+ * one (we are relying on multibyte characters being extensions
+ * of ASCII), we can compare starting from after the last
+ * null character that occurred in both strings.
+ */
+ const char *cmpa, *cmpb;
+ const char *laststarta = as;
+ int len;
+ if (ae->len != -1) {
+ len = ae->len;
+ if (be->len != -1 && len > be->len)
+ len = be->len;
+ }
+ else
+ len = be->len;
+
+ for (cmpa = as, cmpb = bs; *cmpa == *cmpb && len--; cmpa++, cmpb++)
+ if (!*cmpa)
+ laststarta = cmpa + 1;
+ if (*cmpa == *cmpb && ae->len != be->len) {
+ /*
+ * Special case: one of the strings has finished, but
+ * another one continues after the NULL. The string
+ * that's finished sorts below the other. We need
+ * to handle this here since stroll() or strcmp()
+ * will just compare the strings as equal.
+ */
+ if (ae->len != -1) {
+ if (be->len != -1) {
+ /*
+ * if a is shorter it's earlier, so return -1 and
+ * vice versa
+ */
+ return (ae->len - be->len) * sortdir;
+ } else {
+ /*
+ * a has a length and so continues, hence
+ * b sorts lower.
+ */
+ return sortdir;
+ }
+ } else {
+ /*
+ * b must continue because it has a recorded length,
+ * so a sorts lower.
+ */
+ return - sortdir;
+ }
+ }
+
+ bs += (laststarta - as);
+ as += (laststarta - as);
+ }
+#ifdef HAVE_STRCOLL
+ cmp = strcoll(as, bs);
+#endif
+ if (sortnumeric) {
+ for (; *as == *bs && *as; as++, bs++);
+#ifndef HAVE_STRCOLL
+ cmp = (int)STOUC(*as) - (int)STOUC(*bs);
+#endif
+ if (idigit(*as) || idigit(*bs)) {
+ for (; as > *(char **)a && idigit(as[-1]); as--, bs--);
+ if (idigit(*as) && idigit(*bs)) {
+ while (*as == '0')
+ as++;
+ while (*bs == '0')
+ bs++;
+ for (; idigit(*as) && *as == *bs; as++, bs++);
+ if (idigit(*as) || idigit(*bs)) {
+ cmp = (int)STOUC(*as) - (int)STOUC(*bs);
+ while (idigit(*as) && idigit(*bs))
+ as++, bs++;
+ if (idigit(*as) && !idigit(*bs))
+ return 1;
+ if (idigit(*bs) && !idigit(*as))
+ return -1;
+ }
+ }
+ }
+ }
+#ifndef HAVE_STRCOLL
+ else
+ cmp = strcmp(as, bs);
+#endif
+
+ return sortdir * cmp;
+}
+
+
+/*
+ * Front-end to eltpcmp() to compare strings.
+ * TODO: it would be better to eliminate this altogether by
+ * making the calling function call into the sort code
+ * at a higher level.
+ */
+
+/**/
+mod_export int
+zstrcmp(const char *as, const char *bs, int sortflags)
+{
+ struct sortelt ae, be, *aeptr, *beptr;
+ int oldsortdir = sortdir, oldsortnumeric = sortnumeric, ret;
+
+ ae.cmp = as;
+ be.cmp = bs;
+ ae.len = -1;
+ be.len = -1;
+
+ aeptr = &ae;
+ beptr = &be;
+
+ sortdir = 1;
+ sortnumeric = (sortflags & SORTIT_NUMERICALLY) ? 1 : 0;
+
+ ret = eltpcmp(&aeptr, &beptr);
+
+ /* Paranoia: I don't think we ever need to restore these. */
+ sortnumeric = oldsortnumeric;
+ sortdir = oldsortdir;
+
+ return ret;
+}
+
+
+/*
+ * Sort an array of metafied strings. Use an "or" of bit flags
+ * to decide how to sort. See the SORTIT_* flags in zsh.h.
+ *
+ * If unmetalenp is not NULL, the strings in array are already
+ * unmetafied and unmetalenp is an array containing the corresponding
+ * lengths.
+ */
+
+/**/
+void
+strmetasort(char **array, int sortwhat, int *unmetalenp)
+{
+ char **arrptr;
+ /*
+ * Array of elements containing stuff to sort. Note sortptrarr
+ * is an array of pointers, since that's more efficient
+ * for qsort() to manipulate. sortarr is the array of
+ * structures.
+ */
+ SortElt *sortptrarr, *sortptrarrptr;
+ SortElt sortarr, sortarrptr;
+ int oldsortdir, oldsortnumeric, nsort;
+
+ nsort = arrlen(array);
+ if (nsort < 2)
+ return;
+
+ pushheap();
+
+ sortptrarr = (SortElt *) zhalloc(nsort * sizeof(SortElt));
+ sortarr = (SortElt) zhalloc(nsort * sizeof(struct sortelt));
+ for (arrptr = array, sortptrarrptr = sortptrarr, sortarrptr = sortarr;
+ *arrptr; arrptr++, sortptrarrptr++, sortarrptr++) {
+ char *metaptr;
+ int needlen, needalloc;
+ *sortptrarrptr = sortarrptr;
+ sortarrptr->orig = *arrptr;
+
+ if (unmetalenp) {
+ /*
+ * Already unmetafied. We just need to check for
+ * embededded nulls.
+ */
+ int count = unmetalenp[arrptr-array];
+ /* Remember this length for sorted array */
+ sortarrptr->origlen = count;
+ for (metaptr = *arrptr; *metaptr != '\0' && count--; metaptr++)
+ ;
+ /* *metaptr must now be \0, even if we reached the end */
+ needlen = (count != 0);
+ } else {
+ /*
+ * Not yet unmetafied. See if it needs unmetafying.
+ * If it doesn't, there can't be any embedded nulls,
+ * since these are metafied.
+ */
+ needlen = 0;
+ for (metaptr = *arrptr; *metaptr && *metaptr != Meta;
+ metaptr++);
+ }
+ /*
+ * See if we need to do some special checking.
+ * Either we're going to need to copy it to transform it,
+ * or we need to unmetafy it.
+ */
+ if ((needalloc = (sortwhat &
+ (SORTIT_IGNORING_CASE|SORTIT_IGNORING_BACKSLASHES)))
+ || *metaptr == Meta) {
+ char *s, *t, *src = *arrptr, *dst;
+ int len;
+ sortarrptr->cmp = dst = (char *)zhalloc(strlen(src) + 1);
+
+ if (unmetalenp) {
+ /* Already unmetafied and we have the length. */
+ len = unmetalenp[arrptr-array];
+ } else if (*metaptr != '\0') {
+ /*
+ * Needs unmetafying. We need to check for
+ * embedded nulls while we do this.
+ */
+ char *t = dst + (metaptr - src);
+
+ if (metaptr != src)
+ memcpy(dst, src, metaptr - src);
+ while ((*t = *metaptr++)) {
+ if (*t++ == Meta) {
+ if ((t[-1] = *metaptr++ ^ 32) == '\0')
+ needlen = 1;
+ }
+ }
+ len = t - dst;
+ src = dst;
+ } else {
+ /*
+ * Doesn't need unmetafying.
+ * This means metaptr is the NULL at the
+ * end of the string, so we have the length, and
+ * there are no embedded nulls, so we don't
+ * need the length later.
+ * We're only copying the string to transform it
+ * below.
+ */
+ len = metaptr - src;
+ }
+ if (sortwhat & SORTIT_IGNORING_CASE) {
+ for (s = src, t = dst; s - src != len; )
+ *t++ = tulower(*s++);
+ src = dst;
+ }
+ if (sortwhat & SORTIT_IGNORING_BACKSLASHES) {
+ /* copy null byte, so increment length */
+ for (s = src, t = dst; s - src != len+1; ) {
+ if (*s == '\\') {
+ s++;
+ len--;
+ }
+ *t++ = *s++;
+ }
+ }
+ /* Do we need to store the length (embedded null)? */
+ sortarrptr->len = needlen ? len : -1;
+ } else {
+ /*
+ * We can use the string as is, although it's possible
+ * we still need to take account of an embedded null.
+ */
+ sortarrptr->cmp = *arrptr;
+ sortarrptr->len = needlen ? unmetalenp[arrptr-array] : -1;
+ }
+ }
+ /*
+ * We need to restore sortdir so that calls to
+ * [n]strcmp work when
+ */
+ oldsortdir = sortdir;
+ oldsortnumeric = sortnumeric;
+
+ sortdir = (sortwhat & SORTIT_BACKWARDS) ? -1 : 1;
+ sortnumeric = (sortwhat & SORTIT_NUMERICALLY) ? 1 : 0;
+
+ qsort(sortptrarr, nsort, sizeof(SortElt *), eltpcmp);
+
+ sortnumeric = oldsortnumeric;
+ sortdir = oldsortdir;
+ for (arrptr = array, sortptrarrptr = sortptrarr; nsort--; ) {
+ if (unmetalenp)
+ unmetalenp[arrptr-array] = (*sortptrarrptr)->origlen;
+ *arrptr++ = (*sortptrarrptr++)->orig;
+ }
+
+ popheap();
+}