summaryrefslogtreecommitdiff
path: root/Src/pattern.c
diff options
context:
space:
mode:
Diffstat (limited to 'Src/pattern.c')
-rw-r--r--Src/pattern.c1116
1 files changed, 656 insertions, 460 deletions
diff --git a/Src/pattern.c b/Src/pattern.c
index 048e3d3ec..914479847 100644
--- a/Src/pattern.c
+++ b/Src/pattern.c
@@ -1,5 +1,5 @@
/*
- * glob.c - filename generation
+ * pattern.c - pattern matching
*
* This file is part of zsh, the Z shell.
*
@@ -50,13 +50,28 @@
*/
#include "zsh.mdh"
-#include "pattern.pro"
/*
- * Globbing flags: lower 8 bits gives approx count
+ * The following union is used mostly for alignment purposes.
+ * Normal nodes are longs, while certain nodes take a char * as an argument;
+ * here we make sure that they both work out to the same length.
+ * The compiled regexp we construct consists of upats stuck together;
+ * anything else to be added (strings, numbers) is stuck after and
+ * then aligned to a whole number of upat units.
+ *
+ * Note also that offsets are in terms of the sizes of these things.
*/
-#define C_LCMATCHUC 0x0100
-#define C_IGNCASE 0x0200
+union upat {
+ long l;
+ unsigned char *p;
+};
+
+typedef union upat *Upat;
+
+#include "pattern.pro"
+
+/* Number of active parenthesised expressions allowed in backreferencing */
+#define NSUBEXP 9
/* definition number opnd? meaning */
#define P_END 0x00 /* no End of program. */
@@ -154,36 +169,17 @@
#define PP_UNKWN 13
#define PP_RANGE 14
-/* Align everything to the pointer type. */
-typedef char *zalign_t;
-
-#define P_OP(p) (*(long *)(p) & 0xff)
-#define P_NEXT(p) (*(long *)(p) >> 8)
-#define P_OPERAND(p) ((p) + sizeof(zalign_t))
-#define P_ISBRANCH(p) (*(long *)(p) & 0x20)
-#define P_ISEXCLUDE(p) ((*(long *)(p) & 0x30) == 0x30)
-#define P_NOTDOT(p) (*(long *)(p) & 0x40)
+#define P_OP(p) ((p)->l & 0xff)
+#define P_NEXT(p) ((p)->l >> 8)
+#define P_OPERAND(p) ((p) + 1)
+#define P_ISBRANCH(p) ((p)->l & 0x20)
+#define P_ISEXCLUDE(p) (((p)->l & 0x30) == 0x30)
+#define P_NOTDOT(p) ((p)->l & 0x40)
#define P_SIMPLE 0x01 /* Simple enough to be #/## operand. */
#define P_HSTART 0x02 /* Starts with # or ##'d pattern. */
#define P_PURESTR 0x04 /* Can be matched with a strcmp */
-/*
- * pointer is end string to be parsed?
- * a bit dire because of extendedglob possibilities:
- * we need to make sure a ~ at the end of a string isn't mistaken
- * for an excluder or lots of emacs users get very cross.
- */
-#define ISENDCHAR(X) (!*(X) || ((patflags & PAT_FILE) && *(X) == '/') || \
- *(X) == Bar || *(X) == Outpar || \
- (isset(EXTENDEDGLOB) && \
- (*(X) == Hat || \
- (*(X) == Tilde && \
- !(!(X)[1] || ((patflags & PAT_FILE) && \
- (X)[1] == '/') || \
- (X)[1] == Bar || (X)[1] == Outpar || \
- (X)[1] == Tilde)))))
-
/* Next character after one which may be a Meta (x is any char *) */
#define METANEXT(x) (*(x) == Meta ? (x)+2 : (x)+1)
/*
@@ -202,6 +198,36 @@ typedef zlong zrange_t;
typedef unsigned long zrange_t;
#endif
+/*
+ * Characters which terminate a pattern segment. We actually use
+ * a pointer patendseg which skips the first character if we are not
+ * parsing a file pattern.
+ * Note that the size of this and the next array are hard-wired
+ * via the definitions.
+ */
+
+static char endseg[] = {
+ '/', /* file only */
+ '\0', Bar, Outpar, /* all patterns */
+ Tilde /* extended glob only */
+};
+
+#define PATENDSEGLEN_NORM 4
+#define PATENDSEGLEN_EXT 5
+
+/* Characters which terminate a simple string */
+
+static char endstr[] = {
+ '/', /* file only */
+ '\0', Bar, Outpar, Quest, Star, Inbrack, Inpar, Inang,
+ /* all patterns */
+ Tilde, Hat, Pound /* extended glob only */
+};
+
+#define PATENDSTRLEN_NORM 9
+#define PATENDSTRLEN_EXT 12
+
+
/* Default size for pattern buffer */
#define P_DEF_ALLOC 256
@@ -212,6 +238,10 @@ static char *patcode; /* point of code emission */
static long patsize; /* size of code */
static char *patout; /* start of code emission string */
static long patalloc; /* size allocated for same */
+static char *patendseg; /* characters ending segment */
+static int patendseglen; /* length of same */
+static char *patendstr; /* characters ending plain string */
+static int patendstrlen; /* length of sameo */
/* Flags used in both compilation and execution */
static int patflags; /* flags passed down to patcompile */
@@ -226,8 +256,8 @@ patadd(char *add, int ch, long n, int noalgn)
/* Make sure everything gets aligned unless we get noalgn. */
long newpatsize = patsize + n;
if (!noalgn)
- newpatsize = (newpatsize + sizeof(zalign_t) - 1) &
- ~(sizeof(zalign_t) - 1);
+ newpatsize = (newpatsize + sizeof(union upat) - 1) &
+ ~(sizeof(union upat) - 1);
if (patalloc < newpatsize) {
long newpatalloc =
2*(newpatsize > patalloc ? newpatsize : patalloc);
@@ -245,7 +275,7 @@ patadd(char *add, int ch, long n, int noalgn)
}
static long rn_offs;
-/* operates on poiners, returns a pointer */
+/* operates on pointers to union upat, returns a pointer */
#define PATNEXT(p) ((rn_offs = P_NEXT(p)) ? \
(P_OP(p) == P_BACK) ? \
((p)-rn_offs) : ((p)+rn_offs) : NULL)
@@ -262,24 +292,18 @@ patcompstart(void)
/* Top level pattern compilation subroutine */
/**/
-Patprog
+mod_export Patprog
patcompile(char *exp, int inflags, char **endexp)
{
- int flags, len;
+ int flags = 0, len = 0;
long startoff;
- char *pscan, *lng;
+ Upat pscan;
+ char *lng, *strp = NULL;
Patprog p;
- DPUTS(sizeof(long) > sizeof(zalign_t), "BUG: patprog alignment too small");
-
-#ifdef BACKREFERENCES
- startoff = (inflags & PAT_BACKR) ? sizeof(struct patprog) :
- sizeof(struct patprog_short);
-#else
startoff = sizeof(struct patprog);
-#endif
/* Ensure alignment of start of program string */
- startoff = (startoff + sizeof(zalign_t) - 1) & ~(sizeof(zalign_t) - 1);
+ startoff = (startoff + sizeof(union upat) - 1) & ~(sizeof(union upat) - 1);
/* Allocate reasonable sized chunk if none, reduce size if too big */
if (patalloc != P_DEF_ALLOC)
@@ -287,10 +311,23 @@ patcompile(char *exp, int inflags, char **endexp)
patcode = patout + startoff;
patsize = patcode - patout;
patstart = patparse = exp;
+ /*
+ * Note global patnpar numbers parentheses 1..9, while patnpar
+ * in struct is actual count of parentheses.
+ */
patnpar = 1;
- patflags = inflags;
+ patflags = inflags & ~PAT_PURES;
+
+ patendseg = endseg;
+ patendseglen = isset(EXTENDEDGLOB) ? PATENDSEGLEN_EXT : PATENDSEGLEN_NORM;
+ patendstr = endstr;
+ patendstrlen = isset(EXTENDEDGLOB) ? PATENDSTRLEN_EXT : PATENDSTRLEN_NORM;
if (!(patflags & PAT_FILE)) {
+ patendseg++;
+ patendstr++;
+ patendseglen--;
+ patendstrlen--;
remnulargs(exp);
patglobflags = 0;
}
@@ -299,65 +336,88 @@ patcompile(char *exp, int inflags, char **endexp)
*/
((Patprog)patout)->globflags = patglobflags;
- if (patflags & PAT_ANY)
- flags = 0;
- else if (patcompswitch(0, &flags) == 0)
- return NULL;
+ if (!(patflags & PAT_ANY)) {
+ /* Look for a really pure string, with no tokens at all. */
+ if (!patglobflags)
+ for (strp = exp; *strp &&
+ (!(patflags & PAT_FILE) || *strp != '/') && !itok(*strp);
+ strp++)
+ ;
+ if (!strp || (*strp && *strp != '/')) {
+ /* No, do normal compilation. */
+ strp = NULL;
+ if (patcompswitch(0, &flags) == 0)
+ return NULL;
+ } else {
+ /* Yes, copy the string and skip compilation altogether */
+ patparse = strp;
+ len = strp - exp;
+ patadd(exp, 0, len + 1, 0);
+ patout[startoff + len] = '\0';
+ patflags |= PAT_PURES;
+ }
+ }
/* end of compilation: safe to use pointers */
p = (Patprog)patout;
p->startoff = startoff;
p->patstartch = '\0';
p->globend = patglobflags;
- p->flags = (patflags & ~PAT_PURES);
+ p->flags = patflags;
p->mustoff = 0;
p->size = patsize;
- p->patmlen = 0;
- pscan = patout + startoff;
+ p->patmlen = len;
+ p->patnpar = patnpar-1;
- if (!(patflags & PAT_ANY) && P_OP(PATNEXT(pscan)) == P_END) {
- /* only one top level choice */
- pscan = P_OPERAND(pscan);
+ if (!strp) {
+ pscan = (Upat)(patout + startoff);
- if (flags & P_PURESTR) {
- /*
- * The pattern can be matched with a simple strncmp/strcmp.
- * Careful in case we've overwritten the node for the next ptr.
- */
- char *dst = patout + startoff, *next;
- p->flags |= PAT_PURES;
- for (; pscan; pscan = next) {
- next = PATNEXT(pscan);
- if (P_OP(pscan) == P_EXACTLY) {
- char *opnd = P_OPERAND(pscan);
- while ((*dst = *opnd++))
- dst++;
+ if (!(patflags & PAT_ANY) && P_OP(PATNEXT(pscan)) == P_END) {
+ /* only one top level choice */
+ pscan = P_OPERAND(pscan);
+
+ if (flags & P_PURESTR) {
+ /*
+ * The pattern can be matched with a simple strncmp/strcmp.
+ * Careful in case we've overwritten the node for the next ptr.
+ */
+ char *dst = patout + startoff;
+ Upat next;
+ p->flags |= PAT_PURES;
+ for (; pscan; pscan = next) {
+ next = PATNEXT(pscan);
+ if (P_OP(pscan) == P_EXACTLY) {
+ char *opnd = (char *)P_OPERAND(pscan);
+ while ((*dst = *opnd++))
+ dst++;
+ }
}
- }
- *dst++ = '\0';
- p->size = dst - patout;
- /* patmlen is reall strlen, don't include null byte */
- p->patmlen = p->size - startoff - 1;
- } else {
- /* starting point info */
- if (P_OP(pscan) == P_EXACTLY && !p->globflags)
- p->patstartch = *P_OPERAND(pscan);
- /* Find the longest literal string in something expensive.
- * This is itself not all that cheap if we have case-insensitive
- * matching or approximation, so don't.
- */
- if ((flags & P_HSTART) && !p->globflags) {
- lng = NULL;
- len = 0;
- for (; pscan; pscan = PATNEXT(pscan))
- if (P_OP(pscan) == P_EXACTLY &&
- strlen((char *)P_OPERAND(pscan)) >= len) {
- lng = P_OPERAND(pscan);
- len = strlen((char *)P_OPERAND(pscan));
+ *dst++ = '\0';
+ p->size = dst - patout;
+ /* patmlen is really strlen, don't include null byte */
+ p->patmlen = p->size - startoff - 1;
+ } else {
+ /* starting point info */
+ if (P_OP(pscan) == P_EXACTLY && !p->globflags)
+ p->patstartch = *(char *)P_OPERAND(pscan);
+ /*
+ * Find the longest literal string in something expensive.
+ * This is itself not all that cheap if we have
+ * case-insensitive matching or approximation, so don't.
+ */
+ if ((flags & P_HSTART) && !p->globflags) {
+ lng = NULL;
+ len = 0;
+ for (; pscan; pscan = PATNEXT(pscan))
+ if (P_OP(pscan) == P_EXACTLY &&
+ strlen((char *)P_OPERAND(pscan)) >= len) {
+ lng = (char *)P_OPERAND(pscan);
+ len = strlen(lng);
+ }
+ if (lng) {
+ p->mustoff = lng - patout;
+ p->patmlen = len;
}
- if (lng) {
- p->mustoff = lng - patout;
- p->patmlen = len;
}
}
}
@@ -367,8 +427,13 @@ patcompile(char *exp, int inflags, char **endexp)
* The pattern was compiled in a fixed buffer: unless told otherwise,
* we stick the compiled pattern on the heap. This is necessary
* for files where we will often be compiling multiple segments at once.
+ * But if we get the ZDUP flag w always put it in zalloc()ed memory.
*/
- if (!(patflags & PAT_STATIC)) {
+ if (patflags & PAT_ZDUP) {
+ Patprog newp = (Patprog)zalloc(patsize);
+ memcpy((char *)newp, (char *)p, patsize);
+ p = newp;
+ } else if (!(patflags & PAT_STATIC)) {
Patprog newp = (Patprog)zhalloc(patsize);
memcpy((char *)newp, (char *)p, patsize);
p = newp;
@@ -389,26 +454,22 @@ static long
patcompswitch(int paren, int *flagp)
{
long starter, br, ender, excsync = 0;
-#ifdef BACKREFERENCES
int parno = 0;
-#endif
- int flags, gfchanged = 0, savflags = patflags, savglobflags = patglobflags;
- char *ptr;
+ int flags, gfchanged = 0, savglobflags = patglobflags;
+ Upat ptr;
*flagp = 0;
-#ifdef BACKREFERENCES
- if (paren && (patflags & PAT_BACKR)) {
+ if (paren && (patglobflags & GF_BACKREF) && patnpar <= NSUBEXP) {
/*
* parenthesized: make an open node.
* We can only refer to the first nine parentheses.
* For any others, we just use P_OPEN on its own; there's
* no gain in arbitrarily limiting the number of parentheses.
*/
- parno = patnpar >= NSUBEXP ? 0 : patnpar++;
+ parno = patnpar++;
starter = patnode(P_OPEN + parno);
} else
-#endif
starter = 0;
br = patnode(P_BRANCH);
@@ -424,17 +485,16 @@ patcompswitch(int paren, int *flagp)
*flagp |= flags & (P_HSTART|P_PURESTR);
while (*patparse == Bar ||
- (isset(EXTENDEDGLOB) &&
- *patparse == Tilde && patparse[1] && patparse[1] != Bar &&
- patparse[1] != Outpar && patparse[1] != Tilde &&
- !((patflags & PAT_FILE) && patparse[1] == '/'))) {
+ (isset(EXTENDEDGLOB) && *patparse == Tilde &&
+ (patparse[1] == '/' ||
+ !memchr(patendseg, patparse[1], patendseglen)))) {
int tilde = *patparse++ == Tilde;
long gfnode = 0, newbr;
*flagp &= ~P_PURESTR;
if (tilde) {
- unsigned char *unull = NULL;
+ union upat up;
/* excsync remembers the P_EXCSYNC node before a chain of
* exclusions: all point back to this. only the
* original (non-excluded) branch gets a trailing P_EXCSYNC.
@@ -455,10 +515,16 @@ patcompswitch(int paren, int *flagp)
patglobflags &= ~0xff;
br = patnode(!(patflags & PAT_FILET) || paren ?
P_EXCLUDE : P_EXCLUDP);
- patadd((char *)&unull, 0, sizeof(unull), 0);
+ up.p = NULL;
+ patadd((char *)&up, 0, sizeof(up), 0);
/* / is not treated as special if we are at top level */
- if (!paren)
- patflags &= ~PAT_FILE;
+ if (!paren && *patendseg == '/') {
+ tilde++;
+ patendseg++;
+ patendseglen--;
+ patendstr++;
+ patendstrlen--;
+ }
} else {
excsync = 0;
br = patnode(P_BRANCH);
@@ -485,16 +551,23 @@ patcompswitch(int paren, int *flagp)
* No gfchanged, as nothing to follow branch at top
* level.
*/
+ union upat up;
gfnode = patnode(P_GFLAGS);
- patadd((char *)&patglobflags, 0, sizeof(long),
- 0);
+ up.l = patglobflags;
+ patadd((char *)&up, 0, sizeof(union upat), 0);
}
} else {
patglobflags = savglobflags;
}
}
newbr = patcompbranch(&flags);
- patflags = savflags;
+ if (tilde == 2) {
+ /* restore special treatment of / */
+ patendseg--;
+ patendseglen++;
+ patendstr--;
+ patendstrlen++;
+ }
if (!newbr)
return 0;
if (gfnode)
@@ -513,21 +586,16 @@ patcompswitch(int paren, int *flagp)
* branch at that point would indicate the current choices continue,
* which they don't.
*/
-#ifdef BACKREFERENCES
- ender = patnode(paren ? (patflags & PAT_BACKR) ? P_CLOSE+parno
- : P_NOTHING : P_END);
-#else
- ender = patnode(paren ? P_NOTHING : P_END);
-#endif
+ ender = patnode(paren ? parno ? P_CLOSE+parno : P_NOTHING : P_END);
pattail(starter, ender);
/*
* Hook the tails of the branches to the closing node,
* except for exclusions which terminate where they are.
*/
- for (ptr = patout + starter; ptr; ptr = PATNEXT(ptr))
+ for (ptr = (Upat)patout + starter; ptr; ptr = PATNEXT(ptr))
if (!P_ISEXCLUDE(ptr))
- patoptail(ptr-patout, ender);
+ patoptail(ptr-(Upat)patout, ender);
/* check for proper termination */
if ((paren && *patparse++ != Outpar) ||
@@ -566,12 +634,9 @@ patcompbranch(int *flagp)
*flagp = P_PURESTR;
starter = chain = 0;
- while (*patparse && !((patflags & PAT_FILE) && *patparse == '/') &&
- *patparse != Bar && *patparse != Outpar &&
- (!isset(EXTENDEDGLOB) || *patparse != Tilde ||
- !patparse[1] || patparse[1] == Bar || patparse[1] == Outpar
- || patparse[1] == Tilde ||
- ((patflags & PAT_FILE) && patparse[1] == '/'))) {
+ while (!memchr(patendseg, *patparse, patendseglen) ||
+ (*patparse == Tilde && patparse[1] != '/' &&
+ memchr(patendseg, patparse[1], patendseglen))) {
if (isset(EXTENDEDGLOB) &&
((!isset(SHGLOB) &&
(*patparse == Inpar && patparse[1] == Pound)) ||
@@ -601,8 +666,10 @@ patcompbranch(int *flagp)
*/
if (oldglobflags != patglobflags) {
/* Flags changed */
+ union upat up;
latest = patnode(P_GFLAGS);
- patadd((char *)&patglobflags, 0, sizeof(int), 0);
+ up.l = patglobflags;
+ patadd((char *)&up, 0, sizeof(union upat), 0);
} else {
/* No effect. */
continue;
@@ -663,17 +730,37 @@ patgetglobflags(char **strp)
case 'l':
/* Lowercase in pattern matches lower or upper in target */
- patglobflags = (patglobflags & ~C_IGNCASE) | C_LCMATCHUC;
+ patglobflags = (patglobflags & ~GF_IGNCASE) | GF_LCMATCHUC;
break;
case 'i':
/* Fully case insensitive */
- patglobflags = (patglobflags & ~C_LCMATCHUC) | C_IGNCASE;
+ patglobflags = (patglobflags & ~GF_LCMATCHUC) | GF_IGNCASE;
break;
case 'I':
/* Restore case sensitivity */
- patglobflags &= ~(C_LCMATCHUC|C_IGNCASE);
+ patglobflags &= ~(GF_LCMATCHUC|GF_IGNCASE);
+ break;
+
+ case 'b':
+ /* Make backreferences */
+ patglobflags |= GF_BACKREF;
+ break;
+
+ case 'B':
+ /* Don't make backreferences */
+ patglobflags &= ~GF_BACKREF;
+ break;
+
+ case 'm':
+ /* Make references to complete match */
+ patglobflags |= GF_MATCHREF;
+ break;
+
+ case 'M':
+ /* Don't */
+ patglobflags &= ~GF_MATCHREF;
break;
default:
@@ -696,156 +783,105 @@ static long
patcomppiece(int *flagp)
{
long starter, next, pound, op;
- int flags, kshchar;
- unsigned char *ptr;
-
- starter = patcompatom(&flags, &kshchar);
- if (!starter)
- return 0;
-
- if (!(pound = (*patparse == Pound && isset(EXTENDEDGLOB))) &&
- (kshchar <= 0 || kshchar == '@' || kshchar == '!')) {
- *flagp = flags;
- return starter;
- }
-
- /* too much at once doesn't currently work */
- if (kshchar && pound)
- return 0;
-
- if (kshchar == '*') {
- op = P_ONEHASH;
- *flagp = P_HSTART;
- } else if (kshchar == '+') {
- op = P_TWOHASH;
- *flagp = P_HSTART;
- } else if (kshchar == '?') {
- op = 0;
- *flagp = 0;
- } else if (*++patparse == Pound) {
- op = P_TWOHASH;
- patparse++;
- *flagp = P_HSTART;
- } else {
- op = P_ONEHASH;
- *flagp = P_HSTART;
- }
-
- /*
- * Note optimizations with pointers into P_NOTHING branches: some
- * should logically point to next node after current piece.
- *
- * Backtracking is also encoded in a slightly obscure way: the
- * code emitted ensures we test the non-empty branch of complex
- * patterns before the empty branch on each repetition. Hence
- * each time we fail on a non-empty branch, we try the empty branch,
- * which is equivalent to backtracking.
- */
- if ((flags & P_SIMPLE) && op == P_ONEHASH &&
- P_OP(patout+starter) == P_ANY) {
- /* Optimize ?# to *. Silly thing to do, since who would use
- * use ?# ? But it makes the later code shorter.
- */
- long *lptr = (long *)(patout + starter);
- *lptr = (*lptr & ~0xff) | P_STAR;
- } else if ((flags & P_SIMPLE) && op && !(patglobflags & 0xff)) {
- /* Don't simplify if we need to look for approximations. */
- patinsert(op, starter, NULL, 0);
- } else if (op == P_ONEHASH) {
- /* Emit x# as (x&|), where & means "self". */
- ptr = NULL;
- patinsert(P_WBRANCH, starter, (char *)&ptr, sizeof(ptr));
- /* Either x */
- patoptail(starter, patnode(P_BACK)); /* and loop */
- patoptail(starter, starter); /* back */
- pattail(starter, patnode(P_BRANCH)); /* or */
- pattail(starter, patnode(P_NOTHING)); /* null. */
- } else if (op == P_TWOHASH) {
- /* Emit x## as x(&|) where & means "self". */
- next = patnode(P_WBRANCH); /* Either */
- ptr = NULL;
- patadd((char *)&ptr , 0, sizeof(ptr), 0);
- pattail(starter, next);
- pattail(patnode(P_BACK), starter); /* loop back */
- pattail(next, patnode(P_BRANCH)); /* or */
- pattail(starter, patnode(P_NOTHING)); /* null. */
- } else if (kshchar == '?') {
- /* Emit ?(x) as (x|) */
- patinsert(P_BRANCH, starter, NULL, 0); /* Either x */
- pattail(starter, patnode(P_BRANCH)); /* or */
- next = patnode(P_NOTHING); /* null */
- pattail(starter, next);
- patoptail(starter, next);
- }
- if (*patparse == Pound)
- return 0;
-
- return starter;
-}
-
-/*
- * Parse lowest level pattern. If doing ordinary characters, we
- * gobble a complete string as far as we can get.
- * kshcharp returns a character found before an Inpar, for handling
- * as a closure.
- */
-
-/**/
-static long
-patcompatom(int *flagp, int *kshcharp)
-{
- long starter;
- int patch, flags, len, ch;
+ int flags, flags2, kshchar, len, ch, patch;
+ union upat up;
char *nptr, *str0, cbuf[2];
zrange_t from, to;
- *flagp = 0;
+ flags = 0;
str0 = patparse;
for (;;) {
- /* check kshglob here */
- *kshcharp = '\0';
+ /*
+ * Check if we have a string. First, we need to make sure
+ * the string doesn't introduce a ksh-like parenthesised expression.
+ */
+ kshchar = '\0';
if (isset(KSHGLOB) && *patparse && patparse[1] == Inpar) {
- if (strchr("?*+!@", (char)*patparse))
- *kshcharp = STOUC(*patparse);
+ if (strchr("?*+!@", *patparse))
+ kshchar = STOUC(*patparse);
else if (*patparse == Star || *patparse == Quest)
- *kshcharp = STOUC(ztokens[*patparse - Pound]);
+ kshchar = STOUC(ztokens[*patparse - Pound]);
}
- if (patparse > str0) {
- /*
- * This is up here instead of at the end to simplify the
- * kshglob bracket testing. Note patparse doesn't
- * get incremented till afterwards.
- */
- if (ISENDCHAR(patparse) || *kshcharp || *patparse == Quest ||
- *patparse == Star || *patparse == Inbrack ||
- (*patparse == Inpar && !isset(SHGLOB)) ||
- *patparse == Inang ||
- (isset(EXTENDEDGLOB) && *patparse == Pound))
- break;
- else {
- METAINC(patparse);
- continue;
- }
- }
+ /*
+ * End of string (or no string at all) if ksh-type parentheses,
+ * or special character, unless that character is a tilde and
+ * the character following is an end-of-segment character. Thus
+ * tildes are not special if there is nothing following to
+ * be excluded.
+ */
+ if (kshchar || (memchr(patendstr, *patparse, patendstrlen) &&
+ (*patparse != Tilde ||
+ patparse[1] == '/' ||
+ !memchr(patendseg, patparse[1], patendseglen))))
+ break;
+
+ METAINC(patparse);
+ }
- if (*kshcharp)
+ if (patparse > str0) {
+ /* Ordinary string: cancel kshchar lookahead */
+ kshchar = '\0';
+ /*
+ * Assume it matches a simple string until we find otherwise.
+ */
+ flags |= P_PURESTR;
+ DPUTS(patparse == str0, "BUG: matched nothing in patcomppiece.");
+ /* more than one character matched? */
+ len = str0 + (*str0 == Meta ? 2 : 1) < patparse;
+ /*
+ * If we have more than one character, a following hash only
+ * applies to the last, so decrement.
+ */
+ if (isset(EXTENDEDGLOB) && *patparse == Pound && len)
+ patparse -= (patparse > str0 + 1 && patparse[-2] == Meta) ? 2 : 1;
+ /*
+ * If len is 1, we can't have an active # following, so doesn't
+ * matter that we don't make X in `XX#' simple.
+ */
+ if (!len)
+ flags |= P_SIMPLE;
+ starter = patnode(P_EXACTLY);
+ /* add enough space including null byte */
+ len = patparse - str0;
+ patadd(str0, 0, len + 1, 0);
+ nptr = (char *)P_OPERAND((Upat)patout + starter);
+ nptr[len] = '\0';
+ /*
+ * It's much simpler to turn off pure string mode for
+ * any case-insensitive or approximate matching; usually,
+ * that is correct, or they wouldn't have been turned on.
+ * However, we need to make sure we match a "." or ".."
+ * in a file name as a pure string. There's a minor bug
+ * that this will also apply to something like
+ * ..(#a1).. (i.e. the (#a1) has no effect), but if you're
+ * going to write funny patterns, you get no sympathy from me.
+ */
+ if (patglobflags &&
+ (!(patflags & PAT_FILE) || (strcmp(nptr, ".") &&
+ strcmp(nptr, ".."))))
+ flags &= ~P_PURESTR;
+ for (; *nptr; METAINC(nptr))
+ if (itok(*nptr))
+ *nptr = ztokens[*nptr - Pound];
+ } else {
+ if (kshchar)
patparse++;
patch = *patparse;
METAINC(patparse);
switch(patch) {
case Quest:
- *flagp |= P_SIMPLE;
- return patnode(P_ANY);
+ flags |= P_SIMPLE;
+ starter = patnode(P_ANY);
break;
case Star:
/* kshchar is used as a sign that we can't have #'s. */
- *kshcharp = -1;
- return patnode(P_STAR);
+ kshchar = -1;
+ starter = patnode(P_STAR);
break;
case Inbrack:
- *flagp |= P_SIMPLE;
+ flags |= P_SIMPLE;
if (*patparse == Hat || *patparse == '^' || *patparse == '!') {
patparse++;
starter = patnode(P_ANYBUT);
@@ -919,15 +955,14 @@ patcompatom(int *flagp, int *kshcharp)
patparse++;
/* terminate null string and fix alignment */
patadd(NULL, 0, 1, 0);
- return starter;
break;
case Inpar:
/* is this how to treat parentheses in SHGLOB? */
- if (isset(SHGLOB) && !*kshcharp)
+ if (isset(SHGLOB) && !kshchar)
return 0;
- if (*kshcharp == '!') {
+ if (kshchar == '!') {
/* This is nasty, we should really either handle all
- * kshglobbing upstairs or down here. But most of the
+ * kshglobbing below or here. But most of the
* others look like non-ksh patterns, while this one
* doesn't, so we handle it here and leave the rest.
* We treat it like an extendedglob ^, except that
@@ -939,12 +974,11 @@ patcompatom(int *flagp, int *kshcharp)
* the expense of allowing the user to do things
* they shouldn't.
*/
- if (!(starter = patcompnot(1, &flags)))
+ if (!(starter = patcompnot(1, &flags2)))
return 0;
- } else if (!(starter = patcompswitch(1, &flags)))
+ } else if (!(starter = patcompswitch(1, &flags2)))
return 0;
- *flagp |= flags & P_HSTART;
- return starter;
+ flags |= flags2 & P_HSTART;
break;
case Inang:
/* Numeric glob */
@@ -990,68 +1024,101 @@ patcompatom(int *flagp, int *kshcharp)
* Mention in manual that matching digits with [...]
* is more efficient.
*/
- return starter;
break;
case Pound:
- if (!isset(EXTENDEDGLOB))
- break;
+ DPUTS(!isset(EXTENDEDGLOB), "BUG: # not treated as string");
+ /*
+ * A hash here is an error; it should follow something
+ * repeatable.
+ */
return 0;
break;
#ifdef DEBUG
- case Bar:
- case Outpar:
- case '\0':
- dputs("BUG: wrong character in patcompatom.");
+ default:
+ dputs("BUG: character not handled in patcomppiece");
return 0;
break;
#endif
}
}
- /* Simple string: cancel kshchar lookahead */
- *kshcharp = '\0';
- /*
- * Assume it matches a simple string until we find otherwise.
- */
- *flagp |= P_PURESTR;
- DPUTS(patparse == str0, "BUG: matched nothing in patcompatom.");
- /* more than one character matched? */
- len = str0 + (*str0 == Meta ? 2 : 1) < patparse;
+ if (!(pound = (*patparse == Pound && isset(EXTENDEDGLOB))) &&
+ (kshchar <= 0 || kshchar == '@' || kshchar == '!')) {
+ *flagp = flags;
+ return starter;
+ }
+
+ /* too much at once doesn't currently work */
+ if (kshchar && pound)
+ return 0;
+
+ if (kshchar == '*') {
+ op = P_ONEHASH;
+ *flagp = P_HSTART;
+ } else if (kshchar == '+') {
+ op = P_TWOHASH;
+ *flagp = P_HSTART;
+ } else if (kshchar == '?') {
+ op = 0;
+ *flagp = 0;
+ } else if (*++patparse == Pound) {
+ op = P_TWOHASH;
+ patparse++;
+ *flagp = P_HSTART;
+ } else {
+ op = P_ONEHASH;
+ *flagp = P_HSTART;
+ }
+
/*
- * Ordinary string of characters.
- * If we have more than one character, a following hash only
- * applies to the last, so decrement.
- */
- if (isset(EXTENDEDGLOB) && *patparse == Pound && len)
- patparse -= (patparse > str0 + 1 && patparse[-2] == Meta) ? 2 : 1;
- /* if len is 1, we can't have an active # following, so doesn't
- * matter that we don't make X in `XX#' simple.
+ * Note optimizations with pointers into P_NOTHING branches: some
+ * should logically point to next node after current piece.
+ *
+ * Backtracking is also encoded in a slightly obscure way: the
+ * code emitted ensures we test the non-empty branch of complex
+ * patterns before the empty branch on each repetition. Hence
+ * each time we fail on a non-empty branch, we try the empty branch,
+ * which is equivalent to backtracking.
*/
- if (!len)
- *flagp |= P_SIMPLE;
- starter = patnode(P_EXACTLY);
- while (str0 < patparse) {
- if (*str0 == Meta) {
- cbuf[0] = *str0++;
- cbuf[1] = *str0++;
- } else {
- cbuf[0] = itok(*str0) ? ztokens[*str0 - Pound] : *str0;
- str0++;
- }
- ch = UNMETA(cbuf);
- /*
- * HACK: this cause a string consisting of any number of
- * dots in files to be matched exactly, even with approximation.
- * We just want to limit it to the first two.
+ if ((flags & P_SIMPLE) && op == P_ONEHASH &&
+ P_OP((Upat)patout+starter) == P_ANY) {
+ /* Optimize ?# to *. Silly thing to do, since who would use
+ * use ?# ? But it makes the later code shorter.
*/
- if (((patglobflags & C_IGNCASE) && (islower(ch) || isupper(ch))) ||
- ((patglobflags & C_LCMATCHUC) && islower(ch)) ||
- ((patglobflags & 0xff) && !((patflags & PAT_FILE) && ch == '.')))
- *flagp &= ~P_PURESTR;
- patadd(cbuf, 0, (cbuf[0] == Meta) ? 2 : 1, 1);
+ Upat uptr = (Upat)patout + starter;
+ uptr->l = (uptr->l & ~0xff) | P_STAR;
+ } else if ((flags & P_SIMPLE) && op && !(patglobflags & 0xff)) {
+ /* Don't simplify if we need to look for approximations. */
+ patinsert(op, starter, NULL, 0);
+ } else if (op == P_ONEHASH) {
+ /* Emit x# as (x&|), where & means "self". */
+ up.p = NULL;
+ patinsert(P_WBRANCH, starter, (char *)&up, sizeof(up));
+ /* Either x */
+ patoptail(starter, patnode(P_BACK)); /* and loop */
+ patoptail(starter, starter); /* back */
+ pattail(starter, patnode(P_BRANCH)); /* or */
+ pattail(starter, patnode(P_NOTHING)); /* null. */
+ } else if (op == P_TWOHASH) {
+ /* Emit x## as x(&|) where & means "self". */
+ next = patnode(P_WBRANCH); /* Either */
+ up.p = NULL;
+ patadd((char *)&up, 0, sizeof(up), 0);
+ pattail(starter, next);
+ pattail(patnode(P_BACK), starter); /* loop back */
+ pattail(next, patnode(P_BRANCH)); /* or */
+ pattail(starter, patnode(P_NOTHING)); /* null. */
+ } else if (kshchar == '?') {
+ /* Emit ?(x) as (x|) */
+ patinsert(P_BRANCH, starter, NULL, 0); /* Either x */
+ pattail(starter, patnode(P_BRANCH)); /* or */
+ next = patnode(P_NOTHING); /* null */
+ pattail(starter, next);
+ patoptail(starter, next);
}
- /* null terminate and fix alignment */
- patadd(NULL, 0, 1, 0);
+ if (*patparse == Pound)
+ return 0;
+
return starter;
}
@@ -1064,7 +1131,7 @@ patcompatom(int *flagp, int *kshcharp)
static long
patcompnot(int paren, int *flagsp)
{
- unsigned char *unull = NULL;
+ union upat up;
long excsync, br, excl, n, starter;
int dummy;
@@ -1076,7 +1143,8 @@ patcompnot(int paren, int *flagsp)
excsync = patnode(P_EXCSYNC);
pattail(br, excsync);
pattail(starter, excl = patnode(P_EXCLUDE));
- patadd((char *)&unull, 0, sizeof(unull), 0);
+ up.p = NULL;
+ patadd((char *)&up, 0, sizeof(up), 0);
if (!(br = (paren ? patcompswitch(1, &dummy) : patcompbranch(&dummy))))
return 0;
pattail(br, patnode(P_EXCEND));
@@ -1093,9 +1161,11 @@ patcompnot(int paren, int *flagsp)
static long
patnode(long op)
{
- long starter = patcode - patout;
+ long starter = (Upat)patcode - (Upat)patout;
+ union upat up;
- patadd((char *)&op, 0, sizeof(long), 0);
+ up.l = op;
+ patadd((char *)&up, 0, sizeof(union upat), 0);
return starter;
}
@@ -1109,22 +1179,22 @@ static void
patinsert(long op, int opnd, char *xtra, int sz)
{
char *src, *dst, *opdst;
- long buf, *lptr;
+ union upat buf, *lptr;
- buf = 0;
- patadd((char *)&buf, 0, sizeof(long), 0);
+ buf.l = 0;
+ patadd((char *)&buf, 0, sizeof(buf), 0);
if (sz)
patadd(xtra, 0, sz, 0);
- src = patcode - sizeof(long) - sz;
+ src = patcode - sizeof(union upat) - sz;
dst = patcode;
- opdst = patout + opnd;
+ opdst = patout + opnd * sizeof(union upat);
while (src > opdst)
*--dst = *--src;
/* A cast can't be an lvalue */
- lptr = (long *)opdst;
- *lptr = op;
- opdst += sizeof(long);
+ lptr = (Upat)opdst;
+ lptr->l = op;
+ opdst += sizeof(union upat);
while (sz--)
*opdst++ = *xtra++;
}
@@ -1135,10 +1205,10 @@ patinsert(long op, int opnd, char *xtra, int sz)
static void
pattail(long p, long val)
{
- char *scan, *temp;
- long offset, *lptr;
+ Upat scan, temp;
+ long offset;
- scan = patout + p;
+ scan = (Upat)patout + p;
for (;;) {
if (!(temp = PATNEXT(scan)))
break;
@@ -1146,10 +1216,9 @@ pattail(long p, long val)
}
offset = (P_OP(scan) == P_BACK)
- ? (scan - patout) - val : val - (scan - patout);
+ ? (scan - (Upat)patout) - val : val - (scan - (Upat)patout);
- lptr = (long *)scan;
- *lptr |= offset << 8;
+ scan->l |= offset << 8;
}
/* do pattail, but on operand of first argument; nop if operandless */
@@ -1157,14 +1226,14 @@ pattail(long p, long val)
/**/
static void patoptail(long p, long val)
{
- char *ptr = patout + p;
+ Upat ptr = (Upat)patout + p;
int op = P_OP(ptr);
if (!p || !P_ISBRANCH(ptr))
return;
if (op == P_BRANCH)
pattail(P_OPERAND(p), val);
else
- pattail(P_OPERAND(p) + sizeof(char *), val);
+ pattail(P_OPERAND(p) + 1, val);
}
@@ -1178,11 +1247,20 @@ char *patinput; /* String input pointer */
/* Length of input string, plus null byte, if needed */
static int patinlen;
-#ifdef BACKREFERENCES
-static char **patstartp; /* Pointer to backref starts */
-static char **patendp; /* Pointer to backref ends */
-static int parsfound; /* parentheses found */
-#endif
+
+/*
+ * Offset of string at which we are trying to match.
+ * This is added in to the positions recorded in patbeginp and patendp
+ * when we are looking for substrings. Currently this only happens
+ * in the parameter substitution code.
+ */
+/**/
+int patoffset;
+
+static char *patbeginp[NSUBEXP]; /* Pointer to backref beginnings */
+static char *patendp[NSUBEXP]; /* Pointer to backref ends */
+static int parsfound; /* parentheses (with backrefs) found */
+
static int globdots; /* Glob initial dots? */
/*
@@ -1204,15 +1282,28 @@ pattrystart(void)
}
/**/
-int
+mod_export int
pattry(Patprog prog, char *string)
{
-#ifdef BACKREFERENCES
- int i;
+ return pattryrefs(prog, string, NULL, NULL, NULL);
+}
+
+/* The last three arguments are used to report the positions for the
+ * backreferences. On entry, *nump should contain the maximum number
+ * positions to report. */
+
+/**/
+mod_export int
+pattryrefs(Patprog prog, char *string, int *nump, int *begp, int *endp)
+{
+ int i, maxnpos = 0;
char **sp, **ep;
-#endif
char *progstr = (char *)prog + prog->startoff;
+ if (nump) {
+ maxnpos = *nump;
+ *nump = 0;
+ }
/* inherited from domatch, but why, exactly? */
if (*string == Nularg)
string++;
@@ -1248,40 +1339,111 @@ pattry(Patprog prog, char *string)
errsfound = 0;
}
globdots = !(patflags & PAT_NOGLD);
-#ifdef BACKREFERENCES
parsfound = 0;
- if (patflags & PAT_BACKR) {
- patstartp = prog->ppStartp;
- patendp = prog->ppEndp;
- } else {
- patstartp = patendp = NULL;
- }
-#endif
- if (patmatch(progstr)) {
-#ifdef BACKREFERENCES
- if (patflags & PAT_BACKR) {
- prog->ppStartp[0] = string;
- prog->ppEndp[0] = patinput;
-
- sp = patstartp+1;
- ep = patendp + 1;
- for (i = 1; i < NSUBEXP; i++) {
- if (!(parsfound & (1 << (i - 1))))
- *sp = 0;
- if (!(parsfound & (1 << (i + 15))))
- *ep = 0;
- sp++;
- ep++;
- }
-
- }
-#endif
+ if (patmatch((Upat)progstr)) {
/*
* we were lazy and didn't save the globflags if an exclusion
* failed, so set it now
*/
patglobflags = prog->globend;
+ /*
+ * Should we clear backreferences and matches on a failed
+ * match?
+ */
+ if ((patglobflags & GF_MATCHREF) && !(patflags & PAT_FILE)) {
+ /*
+ * m flag: for global match. This carries no overhead
+ * in the pattern matching part.
+ */
+ char *str;
+ int mlen = ztrsub(patinput, patinstart);
+
+ str = ztrduppfx(patinstart, patinput - patinstart);
+ setsparam("MATCH", str);
+ setiparam("MBEGIN", (zlong)(patoffset + !isset(KSHARRAYS)));
+ setiparam("MEND",
+ (zlong)(mlen + patoffset + !isset(KSHARRAYS) - 1));
+ }
+ if (prog->patnpar && nump) {
+ /*
+ * b flag: for backreferences using parentheses. Reported
+ * directly.
+ */
+ *nump = prog->patnpar;
+
+ sp = patbeginp;
+ ep = patendp;
+
+ for (i = 0; i < prog->patnpar && i < maxnpos; i++) {
+ if (parsfound & (1 << i)) {
+ if (begp)
+ *begp++ = ztrsub(*sp, patinstart) + patoffset;
+ if (endp)
+ *endp++ = ztrsub(*ep, patinstart) + patoffset - 1;
+ } else {
+ if (begp)
+ *begp++ = -1;
+ if (endp)
+ *endp++ = -1;
+ }
+
+ sp++;
+ ep++;
+ }
+ } else if (prog->patnpar && !(patflags & PAT_FILE)) {
+ /*
+ * b flag: for backreferences using parentheses.
+ */
+ int palen = prog->patnpar+1;
+ char **matcharr, **mbeginarr, **mendarr;
+ char numbuf[DIGBUFSIZE];
+
+ matcharr = zcalloc(palen*sizeof(char *));
+ mbeginarr = zcalloc(palen*sizeof(char *));
+ mendarr = zcalloc(palen*sizeof(char *));
+
+ sp = patbeginp;
+ ep = patendp;
+
+ for (i = 0; i < prog->patnpar; i++) {
+ if (parsfound & (1 << i)) {
+ matcharr[i] = ztrduppfx(*sp, *ep - *sp);
+ /*
+ * mbegin and mend give indexes into the string
+ * in the standard notation, i.e. respecting
+ * KSHARRAYS, and with the end index giving
+ * the last character, not one beyond.
+ * For example, foo=foo; [[ $foo = (f)oo ]] gives
+ * (without KSHARRAYS) indexes 1 and 1, which
+ * corresponds to indexing as ${foo[1,1]}.
+ */
+ sprintf(numbuf, "%ld",
+ (long)(ztrsub(*sp, patinstart) +
+ patoffset +
+ !isset(KSHARRAYS)));
+ mbeginarr[i] = ztrdup(numbuf);
+ sprintf(numbuf, "%ld",
+ (long)(ztrsub(*ep, patinstart) +
+ patoffset +
+ !isset(KSHARRAYS) - 1));
+ mendarr[i] = ztrdup(numbuf);
+ } else {
+ /* Pattern wasn't set: either it was in an
+ * unmatched branch, or a hashed parenthesis
+ * that didn't match at all.
+ */
+ matcharr[i] = ztrdup("");
+ mbeginarr[i] = ztrdup("-1");
+ mendarr[i] = ztrdup("-1");
+ }
+ sp++;
+ ep++;
+ }
+ setaparam("match", matcharr);
+ setaparam("mbegin", mbeginarr);
+ setaparam("mend", mendarr);
+ }
return 1;
} else
return 0;
@@ -1293,10 +1455,10 @@ pattry(Patprog prog, char *string)
* comes from the input string, the second the current pattern.
*/
#define CHARMATCH(chin, chpa) (chin == chpa || \
- ((patglobflags & C_IGNCASE) ? \
+ ((patglobflags & GF_IGNCASE) ? \
((isupper(chin) ? tolower(chin) : chin) == \
(isupper(chpa) ? tolower(chpa) : chpa)) : \
- (patglobflags & C_LCMATCHUC) ? \
+ (patglobflags & GF_LCMATCHUC) ? \
(islower(chpa) && toupper(chpa) == chin) : 0))
/*
@@ -1314,11 +1476,12 @@ static char *exactpos;
*/
/**/
-int
-patmatch(char *prog)
+static int
+patmatch(Upat prog)
{
/* Current and next nodes */
- char *scan = prog, *next, *opnd, *save, *start;
+ Upat scan = prog, next, opnd;
+ char *start, *save, *chrop;
int savglobflags, op, no, min, nextch, fail = 0, saverrsfound;
zrange_t from, to, comp;
@@ -1338,36 +1501,38 @@ patmatch(char *prog)
break;
case P_EXACTLY:
/*
- * acts as nothing if *opnd is null: this is used by
+ * acts as nothing if *chrop is null: this is used by
* approx code.
*/
- opnd = exactpos ? exactpos : P_OPERAND(scan);
+ chrop = exactpos ? exactpos : (char *)P_OPERAND(scan);
exactpos = NULL;
- while (*opnd && *patinput) {
+ while (*chrop && *patinput) {
int chin = STOUC(UNMETA(patinput));
- int chpa = STOUC(UNMETA(opnd));
+ int chpa = STOUC(UNMETA(chrop));
if (!CHARMATCH(chin, chpa)) {
fail = 1;
break;
}
- METAINC(opnd);
+ METAINC(chrop);
METAINC(patinput);
}
- if (*opnd) {
- exactpos = opnd;
+ if (*chrop) {
+ exactpos = chrop;
fail = 1;
}
break;
case P_ANYOF:
if (!*patinput ||
- !patmatchrange(P_OPERAND(scan), STOUC(UNMETA(patinput))))
+ !patmatchrange((char *)P_OPERAND(scan),
+ STOUC(UNMETA(patinput))))
fail = 1;
else
METAINC(patinput);
break;
case P_ANYBUT:
if (!*patinput ||
- patmatchrange(P_OPERAND(scan), STOUC(UNMETA(patinput))))
+ patmatchrange((char *)P_OPERAND(scan),
+ STOUC(UNMETA(patinput))))
fail = 1;
else
METAINC(patinput);
@@ -1384,12 +1549,12 @@ patmatch(char *prog)
* the first attempt.
*/
op = P_OP(scan);
- start = P_OPERAND(scan);
+ start = (char *)P_OPERAND(scan);
from = to = 0;
if (op != P_NUMTO) {
#ifdef ZSH_64_BIT_TYPE
/* We can't rely on pointer alignment being good enough. */
- memcpy((char *)&from, (char *)start, sizeof(zrange_t));
+ memcpy((char *)&from, start, sizeof(zrange_t));
#else
from = *((zrange_t *) start);
#endif
@@ -1414,7 +1579,8 @@ patmatch(char *prog)
return 1;
}
if (!no && P_OP(next) == P_EXACTLY &&
- !idigit(STOUC(*P_OPERAND(next))) && !(patglobflags & 0xff))
+ !idigit(STOUC(*(char *)P_OPERAND(next))) &&
+ !(patglobflags & 0xff))
return 0;
patinput = --save;
no++;
@@ -1434,7 +1600,8 @@ patmatch(char *prog)
if (patmatch(next))
return 1;
if (!no && P_OP(next) == P_EXACTLY &&
- !idigit(STOUC(*P_OPERAND(next))) && !(patglobflags & 0xff))
+ !idigit(STOUC(*(char *)P_OPERAND(next))) &&
+ !(patglobflags & 0xff))
return 0;
patinput = --save;
no++;
@@ -1447,9 +1614,8 @@ patmatch(char *prog)
case P_BACK:
break;
case P_GFLAGS:
- patglobflags = *(int *)P_OPERAND(scan);
+ patglobflags = P_OPERAND(scan)->l;
break;
-#ifdef BACKREFERENCES
case P_OPEN:
case P_OPEN+1:
case P_OPEN+2:
@@ -1464,13 +1630,12 @@ patmatch(char *prog)
save = patinput;
if (patmatch(next)) {
- DPUTS(!patstartp, "patstartp not set for backreferencing");
/*
- * Don't set ppStartp if some later invocation of
+ * Don't set patbeginp if some later invocation of
* the same parentheses already has.
*/
if (no && !(parsfound & (1 << (no - 1)))) {
- patstartp[no] = save;
+ patbeginp[no-1] = save;
parsfound |= 1 << (no - 1);
}
return 1;
@@ -1493,25 +1658,24 @@ patmatch(char *prog)
if (patmatch(next)) {
DPUTS(!patendp, "patendp not set for backreferencing");
if (no && !(parsfound & (1 << (no + 15)))) {
- patendp[no] = save;
+ patendp[no-1] = save;
parsfound |= 1 << (no + 15);
}
return 1;
} else
return 0;
break;
-#endif
case P_EXCSYNC:
- /* See the P_EXCLUDE code below for where syncstrp comes from */
+ /* See the P_EXCLUDE code below for where syncptr comes from */
{
- unsigned char **syncstrp, *syncptr;
- char *after;
+ unsigned char *syncptr;
+ Upat after;
after = P_OPERAND(scan);
DPUTS(!P_ISEXCLUDE(after),
"BUG: EXCSYNC not followed by EXCLUDE.");
- syncstrp = (unsigned char **)P_OPERAND(after);
- DPUTS(!*syncstrp, "BUG: EXCSYNC not handled by EXCLUDE");
- syncptr = *syncstrp + (patinput - patinstart);
+ DPUTS(!P_OPERAND(after)->p,
+ "BUG: EXCSYNC not handled by EXCLUDE");
+ syncptr = P_OPERAND(after)->p + (patinput - patinstart);
/*
* If we already matched from here, this time we fail.
* See WBRANCH code for story about error count.
@@ -1570,15 +1734,14 @@ patmatch(char *prog)
* The pointer also tells us where the asserted
* pattern matched for use by the exclusion.
*/
- unsigned char **syncstrp, *oldsyncstr;
+ Upat syncstrp;
+ unsigned char *oldsyncstr;
char *matchpt = NULL;
int ret, savglobdots, matchederrs = 0;
-#ifdef BACKREFERENCES
int savparsfound = parsfound;
-#endif
DPUTS(P_OP(scan) == P_WBRANCH,
"BUG: excluded WBRANCH");
- syncstrp = (unsigned char **)P_OPERAND(next);
+ syncstrp = P_OPERAND(next);
/*
* Unlike WBRANCH, each test at the same exclude
* sync point (due to an external loop) is separate,
@@ -1586,14 +1749,15 @@ patmatch(char *prog)
* (foo~bar)(foo~bar)... from the exclusion point
* of view, so we use a different sync string.
*/
- oldsyncstr = *syncstrp;
+ oldsyncstr = syncstrp->p;
if (!patinlen)
patinlen = strlen(patinstart)+1;
- *syncstrp = (unsigned char *)zcalloc(patinlen);
+ syncstrp->p = (unsigned char *)zcalloc(patinlen);
while ((ret = patmatch(P_OPERAND(scan)))) {
unsigned char *syncpt;
char savchar, *testptr;
- int savforce = forceerrs;
+ char *savpatinstart = patinstart;
+ int savforce = forceerrs, savpatinlen = patinlen;
forceerrs = -1;
savglobdots = globdots;
matchederrs = errsfound;
@@ -1607,9 +1771,9 @@ patmatch(char *prog)
* possibilities for approximation have been
* checked.)
*/
- for (syncpt = *syncstrp; !*syncpt; syncpt++)
+ for (syncpt = syncstrp->p; !*syncpt; syncpt++)
;
- testptr = patinstart + (syncpt - *syncstrp);
+ testptr = patinstart + (syncpt - syncstrp->p);
DPUTS(testptr > matchpt, "BUG: EXCSYNC failed");
savchar = *testptr;
*testptr = '\0';
@@ -1625,7 +1789,7 @@ patmatch(char *prog)
*/
patglobflags &= ~0xff;
errsfound = 0;
- opnd = P_OPERAND(next) + sizeof(char *);
+ opnd = P_OPERAND(next) + 1;
if (P_OP(next) == P_EXCLUDP && pathpos) {
/*
* top level exclusion with a file,
@@ -1638,21 +1802,23 @@ patmatch(char *prog)
zalloc(pathpos + patinlen);
strcpy(buf, pathbuf);
strcpy(buf + pathpos, patinput);
- patinput = buf;
+ patinput = patinstart = buf;
+ patinlen += pathpos;
}
if (patmatch(opnd)) {
ret = 0;
-#ifdef BACKREFERENCES
/*
* Another subtlety: if we exclude the
* match, any parentheses just found
* become invalidated.
*/
parsfound = savparsfound;
-#endif
}
- if (buf)
+ if (buf) {
+ patinstart = savpatinstart;
+ patinlen = savpatinlen;
zfree(buf, pathpos + patinlen);
+ }
if (!ret)
break;
next = PATNEXT(next);
@@ -1666,8 +1832,8 @@ patmatch(char *prog)
patglobflags = savglobflags;
errsfound = saverrsfound;
}
- zfree((char *)*syncstrp, patinlen);
- *syncstrp = oldsyncstr;
+ zfree((char *)syncstrp->p, patinlen);
+ syncstrp->p = oldsyncstr;
if (ret) {
patinput = matchpt;
errsfound = matchederrs;
@@ -1678,7 +1844,8 @@ patmatch(char *prog)
;
} else {
int ret = 1, pfree = 0;
- unsigned char **ptrp = NULL, *ptr;
+ Upat ptrp = NULL;
+ unsigned char *ptr;
if (P_OP(scan) == P_WBRANCH) {
/*
* This is where we make sure that we are not
@@ -1696,15 +1863,14 @@ patmatch(char *prog)
* there is already a 1, then the test fails.
*/
opnd = P_OPERAND(scan);
- ptrp = (unsigned char **)opnd;
- opnd += sizeof(unsigned char *);
- if (!*ptrp) {
+ ptrp = opnd++;
+ if (!ptrp->p) {
if (!patinlen)
patinlen = strlen((char *)patinstart)+1;
- *ptrp = (unsigned char *)zcalloc(patinlen);
+ ptrp->p = (unsigned char *)zcalloc(patinlen);
pfree = 1;
}
- ptr = *ptrp + (patinput - patinstart);
+ ptr = ptrp->p + (patinput - patinstart);
/*
* Without approximation, this is just a
@@ -1712,7 +1878,7 @@ patmatch(char *prog)
* need to know how many errors there were
* last time we made the test. If errsfound
* is now smaller than it was, hence we can
- * maker more approximations in the remaining
+ * make more approximations in the remaining
* code, we continue with the test.
* (This is why the max number of errors is
* 254, not 255.)
@@ -1725,8 +1891,8 @@ patmatch(char *prog)
if (ret)
ret = patmatch(opnd);
if (pfree) {
- zfree((char *)*ptrp, patinlen);
- *ptrp = NULL;
+ zfree((char *)ptrp->p, patinlen);
+ ptrp->p = NULL;
}
if (ret)
return 1;
@@ -1769,12 +1935,38 @@ patmatch(char *prog)
return 0;
no = patrepeat(P_OPERAND(scan));
}
- /* Lookahead to avoid useless matches */
- if (P_OP(next) == P_EXACTLY && !(patglobflags & 0xff))
- nextch = STOUC(UNMETA(P_OPERAND(next)));
- else
- nextch = -1;
min = (op == P_TWOHASH) ? 1 : 0;
+ /*
+ * Lookahead to avoid useless matches. This is not possible
+ * with approximation.
+ */
+ if (P_OP(next) == P_EXACTLY && !(patglobflags & 0xff)) {
+ char *nextop = (char *)P_OPERAND(next);
+ /*
+ * If that P_EXACTLY is last (common in simple patterns,
+ * such as *.c), then it can be only be matched at one
+ * point in the test string, so record that.
+ */
+ if (P_OP(PATNEXT(next)) == P_END &&
+ !(patflags & PAT_NOANCH)) {
+ int ptlen = strlen(patinput);
+ int oplen = strlen(nextop);
+ /* Are we in the right range? */
+ if (oplen > strlen(min ? METANEXT(start) : start) ||
+ oplen < ptlen)
+ return 0;
+ /* Yes, just position appropriately and test. */
+ patinput += ptlen - oplen;
+ if (patinput > start && patinput[-1] == Meta) {
+ /* doesn't align properly, no go */
+ return 0;
+ }
+ /* Continue loop with P_EXACTLY test. */
+ break;
+ }
+ nextch = STOUC(UNMETA(nextop));
+ } else
+ nextch = -1;
save = patinput;
savglobflags = patglobflags;
saverrsfound = errsfound;
@@ -2010,13 +2202,13 @@ patmatchrange(char *range, int ch)
/* repeatedly match something simple and say how many times */
/**/
-static int patrepeat(char *p)
+static int patrepeat(Upat p)
{
int count = 0, tch, inch;
char *scan, *opnd;
scan = patinput;
- opnd = P_OPERAND(p);
+ opnd = (char *)P_OPERAND(p);
switch(P_OP(p)) {
#ifdef DEBUG
@@ -2056,6 +2248,16 @@ static int patrepeat(char *p)
return count;
}
+/* Free a patprog. */
+
+/**/
+mod_export void
+freepatprog(Patprog prog)
+{
+ if (prog && prog != dummy_patprog1 && prog != dummy_patprog2)
+ zfree(prog, prog->size);
+}
+
/**/
#ifdef ZSH_PAT_DEBUG
@@ -2067,7 +2269,8 @@ static int patrepeat(char *p)
static void
patdump(Patprog r)
{
- char *s, *next, *codestart, *base, op = P_EXACTLY;
+ char *s, *base, op = P_EXACTLY;
+ Upat up, codestart, next;
base = (char *)r;
s = base + r->startoff;
@@ -2075,13 +2278,14 @@ patdump(Patprog r)
if (r->flags & PAT_PURES) {
printf("STRING:%s\n", (char *)s);
} else {
- codestart = s;
+ codestart = (Upat)s;
while (op != P_END) {
- op = P_OP(s);
- printf("%2d%s", s-codestart, patprop(s));
- next = PATNEXT(s);
- printf("(%d)", next ? (s-codestart)+(next-s) : 0);
- s += sizeof(zalign_t);
+ up = (Upat)s;
+ op = P_OP(up);
+ printf("%2d%s", up-codestart, patprop(up));
+ next = PATNEXT(up);
+ printf("(%d)", next ? next-codestart : 0);
+ s += sizeof(union upat);
if (op == P_ANYOF || op == P_ANYBUT || op == P_EXACTLY) {
while (*s != '\0') {
if (itok(*s)) {
@@ -2107,16 +2311,15 @@ patdump(Patprog r)
s += sizeof(zrange_t);
}
} else if (op == P_GFLAGS) {
- long *lptr = (long *)s;
- printf("%ld, %ld", *lptr & ~0xff, *lptr & 0xff);
- s += sizeof(long);
+ printf("%ld, %ld", (++up)->l & ~0xff, (++up)->l & 0xff);
+ s += sizeof(union upat);
} else if (op == P_WBRANCH || op == P_EXCLUDE ||
op == P_EXCLUDP) {
- s += sizeof(char *);
+ s += sizeof(union upat);
}
putchar('\n');
- s = base + (((s - base) + sizeof(zalign_t) - 1) &
- ~(sizeof(zalign_t) - 1));
+ s = base + (((s - base) + sizeof(union upat) - 1) &
+ ~(sizeof(union upat) - 1));
}
}
@@ -2125,18 +2328,16 @@ patdump(Patprog r)
printf("start `%c' ", r->patstartch);
if (!(r->flags & PAT_NOANCH))
printf("EOL-anchor ");
-#ifdef BACKREFERENCES
- if (r->flags & PAT_BACKR)
- printf("backreferences ");
-#endif
+ if (r->patnpar)
+ printf("%d active backreferences ", r->patnpar);
if (r->mustoff)
printf("must have \"%s\"", (char *)r + r->mustoff);
printf("\n");
if (r->globflags) {
printf("Globbing flags: ");
- if (r->globflags & C_LCMATCHUC)
+ if (r->globflags & GF_LCMATCHUC)
printf("LC matches UC ");
- if (r->globflags & C_IGNCASE)
+ if (r->globflags & GF_IGNCASE)
printf("Ignore case");
printf("\n");
if (r->globflags & 0xff)
@@ -2146,7 +2347,7 @@ patdump(Patprog r)
/**/
static char *
-patprop(char *op)
+patprop(Upat op)
{
char *p = NULL;
static char buf[50];
@@ -2258,16 +2459,11 @@ int
bin_patdebug(char *name, char **args, char *ops, int func)
{
Patprog prog;
- int ret = 0, flags;
+ int ret = 0;
tokenize(*args);
-#ifdef BACKREFERENCES
- flags = ops['b'] ? PAT_BACKR : 0;
-#else
- flags = 0;
-#endif
- if (!(prog = patcompile((char *)*args, flags, 0)))
+ if (!(prog = patcompile((char *)*args, 0, 0)))
return 1;
if (ops['p'] || !args[1]) {
patdump(prog);