From df0d86b847fbf5bd1ad383531cd52b261dc07717 Mon Sep 17 00:00:00 2001
From: Peter Stephenson
Date: Sat, 19 Sep 2015 23:08:46 +0100
Subject: 36559: test earlier for overflow in pattern range
---
Src/pattern.c | 23 ++++++++++++++++++-----
1 file changed, 18 insertions(+), 5 deletions(-)
(limited to 'Src/pattern.c')
diff --git a/Src/pattern.c b/Src/pattern.c
index 3b55ccf1c..af56bd9cc 100644
--- a/Src/pattern.c
+++ b/Src/pattern.c
@@ -220,8 +220,10 @@ typedef union upat *Upat;
#if defined(ZSH_64_BIT_TYPE) || defined(LONG_IS_64_BIT)
typedef zlong zrange_t;
#define ZRANGE_T_IS_SIGNED (1)
+#define ZRANGE_MAX ZLONG_MAX
#else
typedef unsigned long zrange_t;
+#define ZRANGE_MAX ULONG_MAX
#endif
#ifdef MULTIBYTE_SUPPORT
@@ -2641,19 +2643,30 @@ patmatch(Upat prog)
start = compend = patinput;
comp = 0;
while (patinput < patinend && idigit(*patinput)) {
- if (comp)
- comp *= 10;
- comp += *patinput - '0';
+ int out_of_range = 0;
+ int digit = *patinput - '0';
+ if (comp > ZRANGE_MAX / (zlong)10) {
+ out_of_range = 1;
+ } else {
+ zrange_t c10 = comp ? comp * 10 : 0;
+ if (ZRANGE_MAX - c10 < digit) {
+ out_of_range = 1;
+ } else {
+ comp = c10;
+ comp += digit;
+ }
+ }
patinput++;
compend++;
- if (comp & ((zrange_t)1 << (sizeof(comp)*8 -
+ if (out_of_range ||
+ (comp & ((zrange_t)1 << (sizeof(comp)*8 -
#ifdef ZRANGE_T_IS_SIGNED
2
#else
1
#endif
- ))) {
+ )))) {
/*
* Out of range (allowing for signedness, which
* we need if we are using zlongs).
--
cgit v1.2.3
From f9d7651c2554bb5db0373f63185ff358f795ab3c Mon Sep 17 00:00:00 2001
From: Peter Stephenson
Date: Mon, 28 Sep 2015 20:31:51 +0100
Subject: 36682: expand pattern interface to optimise unmetafication
---
ChangeLog | 6 +
Src/Zle/complist.c | 5 +-
Src/Zle/zle_hist.c | 4 +-
Src/glob.c | 56 +++++----
Src/pattern.c | 337 +++++++++++++++++++++++++++++++++++++----------------
Src/zsh.h | 10 ++
6 files changed, 288 insertions(+), 130 deletions(-)
(limited to 'Src/pattern.c')
diff --git a/ChangeLog b/ChangeLog
index 546620d6b..38e0e4627 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,9 @@
+2015-09-28 Peter Stephenson
+
+ * 36682: Src/glob.c, Src/pattern.c, Src/zsh.h,
+ Src/Zle/complist,c, Src/Zle/zle_hist.c: expand pattern interface
+ to allow unmetafying trial string once for reuse.
+
2015-09-28 Daniel Shahaf
* unposted: Test/D04parameter.ztst: Test for 36669
diff --git a/Src/Zle/complist.c b/Src/Zle/complist.c
index 433701514..986ad31ea 100644
--- a/Src/Zle/complist.c
+++ b/Src/Zle/complist.c
@@ -868,7 +868,7 @@ putmatchcol(char *group, char *n)
nrefs = MAX_POS - 1;
if ((!pc->prog || !group || pattry(pc->prog, group)) &&
- pattryrefs(pc->pat, n, -1, -1, 0, &nrefs, begpos, endpos)) {
+ pattryrefs(pc->pat, n, -1, -1, NULL, 0, &nrefs, begpos, endpos)) {
if (pc->cols[1]) {
patcols = pc->cols;
@@ -900,7 +900,8 @@ putfilecol(char *group, char *filename, mode_t m, int special)
nrefs = MAX_POS - 1;
if ((!pc->prog || !group || pattry(pc->prog, group)) &&
- pattryrefs(pc->pat, filename, -1, -1, 0, &nrefs, begpos, endpos)) {
+ pattryrefs(pc->pat, filename, -1, -1, NULL,
+ 0, &nrefs, begpos, endpos)) {
if (pc->cols[1]) {
patcols = pc->cols;
diff --git a/Src/Zle/zle_hist.c b/Src/Zle/zle_hist.c
index 0cff0391a..95d96c95c 100644
--- a/Src/Zle/zle_hist.c
+++ b/Src/Zle/zle_hist.c
@@ -1306,8 +1306,8 @@ doisearch(char **args, int dir, int pattern)
* this mode.
*/
if (!skip_pos &&
- pattryrefs(patprog, zt, -1, -1, 0, NULL, NULL,
- &end_pos))
+ pattryrefs(patprog, zt, -1, -1, NULL, 0,
+ NULL, NULL, &end_pos))
t = zt;
} else {
if (!matchlist && !skip_pos) {
diff --git a/Src/glob.c b/Src/glob.c
index fa3ce25f4..8bf73520f 100644
--- a/Src/glob.c
+++ b/Src/glob.c
@@ -2780,7 +2780,7 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
p->flags &= ~(PAT_NOTSTART|PAT_NOTEND);
if (fl & SUB_ALL) {
- int i = matched && pattry(p, s);
+ int i = matched && pattrylen(p, s, -1, -1, NULL, 0);
*sp = get_match_ret(*sp, 0, i ? l : 0, fl, i ? replstr : 0, NULL);
if (! **sp && (((fl & SUB_MATCH) && !i) || ((fl & SUB_REST) && i)))
return 0;
@@ -2809,7 +2809,7 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
* Largest/smallest possible match at head of string.
* First get the longest match...
*/
- if (pattry(p, s)) {
+ if (pattrylen(p, s, -1, -1, NULL, 0)) {
/* patmatchlen returns metafied length, as we need */
int mlen = patmatchlen();
if (!(fl & SUB_LONG) && !(p->flags & PAT_PURES)) {
@@ -2820,7 +2820,7 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
mb_charinit();
for (t = s, umlen = 0; t < s + mlen; ) {
set_pat_end(p, *t);
- if (pattrylen(p, s, t - s, umlen, 0)) {
+ if (pattrylen(p, s, t - s, umlen, NULL, 0)) {
mlen = patmatchlen();
break;
}
@@ -2847,7 +2847,7 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
tmatch = NULL;
for (ioff = 0, t = s, umlen = umltot; t < s + l; ioff++) {
set_pat_start(p, t-s);
- if (pattrylen(p, t, s + l - t, umlen, ioff))
+ if (pattrylen(p, t, s + l - t, umlen, NULL, ioff))
tmatch = t;
if (fl & SUB_START)
break;
@@ -2857,7 +2857,7 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
*sp = get_match_ret(*sp, tmatch - s, l, fl, replstr, NULL);
return 1;
}
- if (!(fl & SUB_START) && pattrylen(p, s + l, 0, 0, ioff)) {
+ if (!(fl & SUB_START) && pattrylen(p, s + l, 0, 0, NULL, ioff)) {
*sp = get_match_ret(*sp, l, l, fl, replstr, NULL);
return 1;
}
@@ -2870,7 +2870,7 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
mb_charinit();
for (ioff = 0, t = s, umlen = umltot; t < s + l; ioff++) {
set_pat_start(p, t-s);
- if (pattrylen(p, t, s + l - t, umlen, ioff)) {
+ if (pattrylen(p, t, s + l - t, umlen, NULL, ioff)) {
*sp = get_match_ret(*sp, t-s, l, fl, replstr, NULL);
return 1;
}
@@ -2878,7 +2878,7 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
break;
umlen -= iincchar(&t);
}
- if (!(fl & SUB_START) && pattrylen(p, s + l, 0, 0, ioff)) {
+ if (!(fl & SUB_START) && pattrylen(p, s + l, 0, 0, NULL, ioff)) {
*sp = get_match_ret(*sp, l, l, fl, replstr, NULL);
return 1;
}
@@ -2887,7 +2887,8 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
case SUB_SUBSTR:
/* Smallest at start, but matching substrings. */
set_pat_start(p, l);
- if (!(fl & SUB_GLOBAL) && pattry(p, s + l) && !--n) {
+ if (!(fl & SUB_GLOBAL) && pattrylen(p, s + l, -1, -1, NULL, 0) &&
+ !--n) {
*sp = get_match_ret(*sp, 0, 0, fl, replstr, NULL);
return 1;
} /* fall through */
@@ -2908,7 +2909,7 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
for (; t < s + l; ioff++) {
/* Find the longest match from this position. */
set_pat_start(p, t-s);
- if (pattrylen(p, t, s + l - t, umlen, ioff)) {
+ if (pattrylen(p, t, s + l - t, umlen, NULL, ioff)) {
char *mpos = t + patmatchlen();
if (!(fl & SUB_LONG) && !(p->flags & PAT_PURES)) {
char *ptr;
@@ -2922,7 +2923,8 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
*/
for (ptr = t, umlen2 = 0; ptr < mpos;) {
set_pat_end(p, *ptr);
- if (pattrylen(p, t, ptr - t, umlen2, ioff)) {
+ if (pattrylen(p, t, ptr - t, umlen2,
+ NULL, ioff)) {
mpos = t + patmatchlen();
break;
}
@@ -2970,7 +2972,7 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
*/
set_pat_start(p, l);
if ((fl & (SUB_LONG|SUB_GLOBAL)) == SUB_LONG &&
- pattry(p, s + l) && !--n) {
+ pattrylen(p, s + l, -1, -1, NULL, 0) && !--n) {
*sp = get_match_ret(*sp, 0, 0, fl, replstr, repllist);
return 1;
}
@@ -2981,7 +2983,7 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
/* Longest/shortest at end, matching substrings. */
if (!(fl & SUB_LONG)) {
set_pat_start(p, l);
- if (pattrylen(p, s + l, 0, 0, umltot) && !--n) {
+ if (pattrylen(p, s + l, 0, 0, NULL, umltot) && !--n) {
*sp = get_match_ret(*sp, l, l, fl, replstr, NULL);
return 1;
}
@@ -3001,7 +3003,7 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
mb_charinit();
for (ioff = 0, t = s, umlen = umltot; t < s + l; ioff++) {
set_pat_start(p, t-s);
- if (pattrylen(p, t, s + l - t, umlen, ioff)) {
+ if (pattrylen(p, t, s + l - t, umlen, NULL, ioff)) {
nmatches++;
tmatch = t;
}
@@ -3017,7 +3019,7 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
mb_charinit();
for (ioff = 0, t = s, umlen = umltot; t < s + l; ioff++) {
set_pat_start(p, t-s);
- if (pattrylen(p, t, s + l - t, umlen, ioff) &&
+ if (pattrylen(p, t, s + l - t, umlen, NULL, ioff) &&
!n--) {
tmatch = t;
break;
@@ -3030,7 +3032,8 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
if (!(fl & SUB_LONG) && !(p->flags & PAT_PURES)) {
for (t = tmatch, umlen = 0; t < mpos; ) {
set_pat_end(p, *t);
- if (pattrylen(p, tmatch, t - tmatch, umlen, ioff)) {
+ if (pattrylen(p, tmatch, t - tmatch, umlen,
+ NULL, ioff)) {
mpos = tmatch + patmatchlen();
break;
}
@@ -3042,7 +3045,8 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
return 1;
}
set_pat_start(p, l);
- if ((fl & SUB_LONG) && pattrylen(p, s + l, 0, 0, umltot) && !--n) {
+ if ((fl & SUB_LONG) && pattrylen(p, s + l, 0, 0, NULL, umltot) &&
+ !--n) {
*sp = get_match_ret(*sp, l, l, fl, replstr, NULL);
return 1;
}
@@ -3167,7 +3171,7 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
*/
for (t = s, umlen = 0; t < s + mlen; METAINC(t), umlen++) {
set_pat_end(p, *t);
- if (pattrylen(p, s, t - s, umlen, 0)) {
+ if (pattrylen(p, s, t - s, umlen, NULL, 0)) {
mlen = patmatchlen();
break;
}
@@ -3187,7 +3191,7 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
if (t > s && t[-1] == Meta)
t--;
set_pat_start(p, t-s);
- if (pattrylen(p, t, s + l - t, umlen, ioff)) {
+ if (pattrylen(p, t, s + l - t, umlen, NULL, ioff)) {
*sp = get_match_ret(*sp, t - s, l, fl, replstr, NULL);
return 1;
}
@@ -3203,7 +3207,7 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
for (ioff = 0, t = s, umlen = uml; t < s + l;
ioff++, METAINC(t), umlen--) {
set_pat_start(p, t-s);
- if (pattrylen(p, t, s + l - t, umlen, ioff)) {
+ if (pattrylen(p, t, s + l - t, umlen, NULL, ioff)) {
*sp = get_match_ret(*sp, t-s, l, fl, replstr, NULL);
return 1;
}
@@ -3235,7 +3239,7 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
for (; t < s + l; METAINC(t), ioff++, umlen--) {
/* Find the longest match from this position. */
set_pat_start(p, t-s);
- if (pattrylen(p, t, s + l - t, umlen, ioff)) {
+ if (pattrylen(p, t, s + l - t, umlen, NULL, ioff)) {
char *mpos = t + patmatchlen();
if (!(fl & SUB_LONG) && !(p->flags & PAT_PURES)) {
char *ptr;
@@ -3243,7 +3247,8 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
for (ptr = t, umlen2 = 0; ptr < mpos;
METAINC(ptr), umlen2++) {
set_pat_end(p, *ptr);
- if (pattrylen(p, t, ptr - t, umlen2, ioff)) {
+ if (pattrylen(p, t, ptr - t, umlen2,
+ NULL, ioff)) {
mpos = t + patmatchlen();
break;
}
@@ -3300,7 +3305,7 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
/* Longest/shortest at end, matching substrings. */
if (!(fl & SUB_LONG)) {
set_pat_start(p, l);
- if (pattrylen(p, s + l, 0, 0, uml) && !--n) {
+ if (pattrylen(p, s + l, 0, 0, NULL, uml) && !--n) {
*sp = get_match_ret(*sp, l, l, fl, replstr, NULL);
return 1;
}
@@ -3310,7 +3315,7 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
if (t > s && t[-1] == Meta)
t--;
set_pat_start(p, t-s);
- if (pattrylen(p, t, s + l - t, umlen, ioff) && !--n) {
+ if (pattrylen(p, t, s + l - t, umlen, NULL, ioff) && !--n) {
/* Found the longest match */
char *mpos = t + patmatchlen();
if (!(fl & SUB_LONG) && !(p->flags & PAT_PURES)) {
@@ -3319,7 +3324,7 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
for (ptr = t, umlen2 = 0; ptr < mpos;
METAINC(ptr), umlen2++) {
set_pat_end(p, *ptr);
- if (pattrylen(p, t, ptr - t, umlen2, ioff)) {
+ if (pattrylen(p, t, ptr - t, umlen2, NULL, ioff)) {
mpos = t + patmatchlen();
break;
}
@@ -3331,7 +3336,8 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
}
}
set_pat_start(p, l);
- if ((fl & SUB_LONG) && pattrylen(p, s + l, 0, 0, uml) && !--n) {
+ if ((fl & SUB_LONG) && pattrylen(p, s + l, 0, 0, NULL, uml) &&
+ !--n) {
*sp = get_match_ret(*sp, l, l, fl, replstr, NULL);
return 1;
}
diff --git a/Src/pattern.c b/Src/pattern.c
index af56bd9cc..03ba37d92 100644
--- a/Src/pattern.c
+++ b/Src/pattern.c
@@ -2022,6 +2022,131 @@ pattrystart(void)
errsfound = 0;
}
+/*
+ * Allocate memeory for pattern match. Note this is specific to use
+ * of pattern *and* trial string.
+ *
+ * Unmetafy a trial string for use in pattern matching, if needed.
+ *
+ * If it is needed, returns a zalloc()'d string; if not needed, returns
+ * NULL.
+ *
+ * prog is the pattern to be executed.
+ * string is the metafied trial string.
+ * stringlen is it's length; it will be calculated if it's negative
+ * (this is a simple strlen()).
+ * unmetalen is the unmetafied length of the string, may be -1.
+ * force is 1 if we always unmetafy: this is useful if we are going
+ * to try again with different versions of the string. If this is
+ * called from pattryrefs() we don't force unmetafication as it won't
+ * be optimal.
+ * In patstralloc (supplied by caller, must last until last pattry is done)
+ * unmetalen is the unmetafied length of the string; it will be
+ * calculated if the input value is negative.
+ * unmetalenp is the umetafied length of a path segment preceeding
+ * the trial string needed for file mananagement; it is calculated as
+ * needed so does not need to be initialised.
+ * alloced is the memory allocated --- same as return value from
+ * function.
+ */
+/**/
+mod_export
+char *patallocstr(Patprog prog, char *string, int stringlen, int unmetalen,
+ int force, Patstralloc patstralloc)
+{
+ int needfullpath;
+
+ /*
+ * For a top-level ~-exclusion, we will need the full
+ * path to exclude, so copy the path so far and append the
+ * current test string.
+ */
+ needfullpath = (prog->flags & PAT_HAS_EXCLUDP) && pathpos;
+
+ /* Get the length of the full string when unmetafied. */
+ if (unmetalen < 0)
+ patstralloc->unmetalen = ztrsub(string + stringlen, string);
+ else
+ patstralloc->unmetalen = unmetalen;
+ if (needfullpath) {
+ patstralloc->unmetalenp = ztrsub(pathbuf + pathpos, pathbuf);
+ if (!patstralloc->unmetalenp)
+ needfullpath = 0;
+ } else
+ patstralloc->unmetalenp = 0;
+ /* Initialise cache area */
+ patstralloc->progstrunmeta = NULL;
+ patstralloc->progstrunmetalen = 0;
+
+ DPUTS(needfullpath && (prog->flags & (PAT_PURES|PAT_ANY)),
+ "rum sort of file exclusion");
+ /*
+ * Partly for efficiency, and partly for the convenience of
+ * globbing, we don't unmetafy pure string patterns, and
+ * there's no reason to if the pattern is just a *.
+ */
+ if (force ||
+ (!(prog->flags & (PAT_PURES|PAT_ANY))
+ && (needfullpath || patstralloc->unmetalen != stringlen))) {
+ /*
+ * We need to copy if we need to prepend the path so far
+ * (in which case we copy both chunks), or if we have
+ * Meta characters.
+ */
+ char *dst, *ptr;
+ int i, icopy, ncopy;
+
+ dst = patstralloc->alloced =
+ zalloc(patstralloc->unmetalen + patstralloc->unmetalenp);
+
+ if (needfullpath) {
+ /* loop twice, copy path buffer first time */
+ ptr = pathbuf;
+ ncopy = patstralloc->unmetalenp;
+ } else {
+ /* just loop once, copy string with unmetafication */
+ ptr = string;
+ ncopy = patstralloc->unmetalen;
+ }
+ for (icopy = 0; icopy < 2; icopy++) {
+ for (i = 0; i < ncopy; i++) {
+ if (*ptr == Meta) {
+ ptr++;
+ *dst++ = *ptr++ ^ 32;
+ } else {
+ *dst++ = *ptr++;
+ }
+ }
+ if (!needfullpath)
+ break;
+ /* next time append test string to path so far */
+ ptr = string;
+ ncopy = patstralloc->unmetalen;
+ }
+ }
+ else
+ {
+ patstralloc->alloced = NULL;
+ }
+
+ return patstralloc->alloced;
+}
+
+
+/*
+ * Free memory allocated by patallocstr().
+ */
+
+/**/
+mod_export
+void patfreestr(Patstralloc patstralloc)
+{
+ if (patstralloc->alloced)
+ zfree(patstralloc->alloced,
+ patstralloc->unmetalen + patstralloc->unmetalenp);
+}
+
+
/*
* Test prog against null-terminated, metafied string.
*/
@@ -2030,7 +2155,7 @@ pattrystart(void)
mod_export int
pattry(Patprog prog, char *string)
{
- return pattryrefs(prog, string, -1, -1, 0, NULL, NULL, NULL);
+ return pattryrefs(prog, string, -1, -1, NULL, 0, NULL, NULL, NULL);
}
/*
@@ -2041,9 +2166,11 @@ pattry(Patprog prog, char *string)
/**/
mod_export int
-pattrylen(Patprog prog, char *string, int len, int unmetalen, int offset)
+pattrylen(Patprog prog, char *string, int len, int unmetalen,
+ Patstralloc patstralloc, int offset)
{
- return pattryrefs(prog, string, len, unmetalen, offset, NULL, NULL, NULL);
+ return pattryrefs(prog, string, len, unmetalen, patstralloc, offset,
+ NULL, NULL, NULL);
}
/*
@@ -2055,14 +2182,32 @@ pattrylen(Patprog prog, char *string, int len, int unmetalen, int offset)
* there may be a severe penalty for this if a lot of matching is done
* on one string.
*
- * offset is the position in the original string (not seen by
+ * If patstralloc is not NULL it is used to optimise unmetafication
+ * of a trial string that may be passed (or any substring may be passed) to
+ * pattryrefs multiple times or the same pattern (N.B. so patstralloc
+ * depends on both prog *and* the trial string). This should only be
+ * done if there is no path prefix (pathpos == 0) as otherwise the path
+ * buffer and unmetafied string may not match. To do this,
+ * patallocstr() is callled (use force = 1 to ensure it is alway
+ * unmetafied); paststralloc points to existing storage. When all
+ * pattern matching is done, patfreestr() is called.
+ * patstralloc->alloced and patstralloc->unmetalen contain the
+ * unmetafied string and its length. In that case, the rules for the
+ * earlier arguments change:
+ * - string is an unmetafied string
+ * - stringlen is its unmetafied (i.e. actual) length
+ * - unmetalenin is not used.
+ * string and stringlen may refer to arbitrary substrings of
+ * patstralloc->alloced without any internal modification to patstralloc.
+ *
+ * patoffset is the position in the original string (not seen by
* the pattern module) at which we are trying to match.
* This is added in to the positions recorded in patbeginp and patendp
* when we are looking for substrings. Currently this only happens
* in the parameter substitution code.
*
- * Note this is a character offset, i.e. a metafied character
- * counts as 1.
+ * Note this is a character offset, i.e. a single possibly metafied and
+ * possibly multibyte character counts as 1.
*
* The last three arguments are used to report the positions for the
* backreferences. On entry, *nump should contain the maximum number
@@ -2075,14 +2220,15 @@ pattrylen(Patprog prog, char *string, int len, int unmetalen, int offset)
/**/
mod_export int
-pattryrefs(Patprog prog, char *string, int stringlen, int unmetalen,
- int patoffset,
+pattryrefs(Patprog prog, char *string, int stringlen, int unmetalenin,
+ Patstralloc patstralloc, int patoffset,
int *nump, int *begp, int *endp)
{
- int i, maxnpos = 0, ret, needfullpath, unmetalenp;
+ int i, maxnpos = 0, ret;
int origlen;
- char **sp, **ep, *tryalloced, *ptr;
+ char **sp, **ep, *ptr;
char *progstr = (char *)prog + prog->startoff;
+ struct patstralloc patstralloc_struct;
if (nump) {
maxnpos = *nump;
@@ -2091,86 +2237,38 @@ pattryrefs(Patprog prog, char *string, int stringlen, int unmetalen,
/* inherited from domatch, but why, exactly? */
if (*string == Nularg) {
string++;
- unmetalen--;
+ if (unmetalenin > 0)
+ unmetalenin--;
+ if (stringlen > 0)
+ stringlen--;
}
if (stringlen < 0)
stringlen = strlen(string);
origlen = stringlen;
- patflags = prog->flags;
- /*
- * For a top-level ~-exclusion, we will need the full
- * path to exclude, so copy the path so far and append the
- * current test string.
- */
- needfullpath = (patflags & PAT_HAS_EXCLUDP) && pathpos;
-
- /* Get the length of the full string when unmetafied. */
- if (unmetalen < 0)
- unmetalen = ztrsub(string + stringlen, string);
- if (needfullpath)
- unmetalenp = ztrsub(pathbuf + pathpos, pathbuf);
- else
- unmetalenp = 0;
-
- DPUTS(needfullpath && (patflags & (PAT_PURES|PAT_ANY)),
- "rum sort of file exclusion");
- /*
- * Partly for efficiency, and partly for the convenience of
- * globbing, we don't unmetafy pure string patterns, and
- * there's no reason to if the pattern is just a *.
- */
- if (!(patflags & (PAT_PURES|PAT_ANY))
- && (needfullpath || unmetalen != stringlen)) {
- /*
- * We need to copy if we need to prepend the path so far
- * (in which case we copy both chunks), or if we have
- * Meta characters.
- */
- char *dst;
- int icopy, ncopy;
-
- dst = tryalloced = zalloc(unmetalen + unmetalenp);
-
- if (needfullpath) {
- /* loop twice, copy path buffer first time */
- ptr = pathbuf;
- ncopy = unmetalenp;
- } else {
- /* just loop once, copy string with unmetafication */
- ptr = string;
- ncopy = unmetalen;
- }
- for (icopy = 0; icopy < 2; icopy++) {
- for (i = 0; i < ncopy; i++) {
- if (*ptr == Meta) {
- ptr++;
- *dst++ = *ptr++ ^ 32;
- } else {
- *dst++ = *ptr++;
- }
- }
- if (!needfullpath)
- break;
- /* next time append test string to path so far */
- ptr = string;
- ncopy = unmetalen;
- }
-
- if (needfullpath) {
- patinstart = tryalloced + unmetalenp;
- patinpath = tryalloced;
- } else {
- patinstart = tryalloced;
- patinpath = NULL;
- }
- stringlen = unmetalen;
- } else {
+ if (patstralloc) {
+ DPUTS(!patstralloc->alloced,
+ "External unmetafy didn't actually unmetafy.");
+ DPUTS(patstralloc->unmetalenp,
+ "Ooh-err: pathpos with external unmetafy. I have bad vibes.");
+ patinpath = NULL;
patinstart = string;
- tryalloced = patinpath = NULL;
+ /* stringlen is unmetafied length; unmetalenin is ignored */
+ } else {
+ patstralloc = &patstralloc_struct;
+ if (patallocstr(prog, string, stringlen, unmetalenin, 0, patstralloc)) {
+ patinstart = patstralloc->alloced + patstralloc->unmetalenp;
+ stringlen = patstralloc->unmetalen;
+ } else
+ patinstart = string;
+ if (patstralloc->unmetalenp)
+ patinpath = patstralloc->alloced;
+ else
+ patinpath = NULL;
}
+ patflags = prog->flags;
patinend = patinstart + stringlen;
/*
* From now on we do not require NULL termination of
@@ -2183,7 +2281,30 @@ pattryrefs(Patprog prog, char *string, int stringlen, int unmetalen,
* Either we are testing against a pure string,
* or we can match anything at all.
*/
- int ret;
+ int ret, pstrlen;
+ char *pstr;
+ if (patstralloc->alloced)
+ {
+ /*
+ * Unmetafied; we need pattern sring that's also unmetafied.
+ * We'll cache it in the patstralloc structure.
+ * Note it's on the heap.
+ */
+ if (!patstralloc->progstrunmeta)
+ {
+ patstralloc->progstrunmeta = dupstring(progstr);
+ unmetafy(patstralloc->progstrunmeta,
+ &patstralloc->progstrunmetalen);
+ }
+ pstr = patstralloc->progstrunmeta;
+ pstrlen = patstralloc->progstrunmetalen;
+ }
+ else
+ {
+ /* Metafied. */
+ pstr = progstr;
+ pstrlen = (int)prog->patmlen;
+ }
if (prog->flags & PAT_ANY) {
/*
* Optimisation for a single "*": always matches
@@ -2195,11 +2316,11 @@ pattryrefs(Patprog prog, char *string, int stringlen, int unmetalen,
* Testing a pure string. See if initial
* components match.
*/
- int lendiff = stringlen - prog->patmlen;
+ int lendiff = stringlen - pstrlen;
if (lendiff < 0) {
/* No, the pattern string is too long. */
ret = 0;
- } else if (!memcmp(progstr, patinstart, prog->patmlen)) {
+ } else if (!memcmp(pstr, patinstart, pstrlen)) {
/*
* Initial component matches. Matches either
* if lengths are the same or we are not anchored
@@ -2221,7 +2342,9 @@ pattryrefs(Patprog prog, char *string, int stringlen, int unmetalen,
} else {
/*
* Remember the length in case used for ${..#..} etc.
- * In this case, we didn't unmetafy the string.
+ * In this case, we didn't unmetafy the pattern string
+ * In the orignal structure, but it might be unmetafied
+ * for use with an unmetafied test string.
*/
patinlen = (int)prog->patmlen;
/* if matching files, must update globbing flags */
@@ -2229,16 +2352,26 @@ pattryrefs(Patprog prog, char *string, int stringlen, int unmetalen,
if ((patglobflags & GF_MATCHREF) &&
!(patflags & PAT_FILE)) {
- char *str = ztrduppfx(patinstart, patinlen);
+ char *str;
int mlen;
- /*
- * Count the characters. We're not using CHARSUB()
- * because the string is still metafied.
- */
- MB_METACHARINIT();
- mlen = MB_METASTRLEN2END(patinstart, 0,
- patinstart + patinlen);
+ if (patstralloc->alloced) {
+ /*
+ * Unmetafied: pstrlen contains unmetafied
+ * length in bytes.
+ */
+ str = metafy(patinstart, pstrlen, META_ALLOC);
+ mlen = CHARSUB(patinstart, patinstart + pstrlen);
+ } else {
+ str = ztrduppfx(patinstart, patinlen);
+ /*
+ * Count the characters. We're not using CHARSUB()
+ * because the string is still metafied.
+ */
+ MB_METACHARINIT();
+ mlen = MB_METASTRLEN2END(patinstart, 0,
+ patinstart + patinlen);
+ }
setsparam("MATCH", str);
setiparam("MBEGIN",
@@ -2250,9 +2383,8 @@ pattryrefs(Patprog prog, char *string, int stringlen, int unmetalen,
}
}
- if (tryalloced)
- zfree(tryalloced, unmetalen + unmetalenp);
-
+ if (patstralloc == &patstralloc_struct)
+ patfreestr(patstralloc);
return ret;
} else {
int q = queue_signal_level();
@@ -2289,8 +2421,8 @@ pattryrefs(Patprog prog, char *string, int stringlen, int unmetalen,
}
}
if (!ret) {
- if (tryalloced)
- zfree(tryalloced, unmetalen + unmetalenp);
+ if (patstralloc == &patstralloc_struct)
+ patfreestr(patstralloc);
return 0;
}
@@ -2322,8 +2454,11 @@ pattryrefs(Patprog prog, char *string, int stringlen, int unmetalen,
/*
* Optimization: if we didn't find any Meta characters
* to begin with, we don't need to look for them now.
+ * Only do this if we did the unmetfication internally,
+ * since otherwise it's too hard to work out.
*/
- if (unmetalen != origlen) {
+ if (patstralloc == &patstralloc_struct &&
+ patstralloc->unmetalen != origlen) {
for (ptr = patinstart; ptr < patinput; ptr++)
if (imeta(*ptr))
patinlen++;
@@ -2444,8 +2579,8 @@ pattryrefs(Patprog prog, char *string, int stringlen, int unmetalen,
restore_queue_signals(q);
- if (tryalloced)
- zfree(tryalloced, unmetalen + unmetalenp);
+ if (patstralloc == &patstralloc_struct)
+ patfreestr(patstralloc);
return ret;
}
diff --git a/Src/zsh.h b/Src/zsh.h
index dd0596116..32f2e0cb2 100644
--- a/Src/zsh.h
+++ b/Src/zsh.h
@@ -491,6 +491,7 @@ typedef struct options *Options;
typedef struct optname *Optname;
typedef struct param *Param;
typedef struct paramdef *Paramdef;
+typedef struct patstralloc *Patstralloc;
typedef struct patprog *Patprog;
typedef struct prepromptfn *Prepromptfn;
typedef struct process *Process;
@@ -1470,6 +1471,15 @@ struct patprog {
char patstartch;
};
+struct patstralloc {
+ int unmetalen; /* Unmetafied length of trial string */
+ int unmetalenp; /* Unmetafied length of path prefix.
+ If 0, no path prefix. */
+ char *alloced; /* Allocated string, may be NULL */
+ char *progstrunmeta; /* Unmetafied pure string in pattern, cached */
+ int progstrunmetalen; /* Length of the foregoing */
+};
+
/* Flags used in pattern matchers (Patprog) and passed down to patcompile */
#define PAT_FILE 0x0001 /* Pattern is a file name */
--
cgit v1.2.3
From d07783628e935daab518509db123141ceb535a28 Mon Sep 17 00:00:00 2001
From: Peter Stephenson
Date: Tue, 29 Sep 2015 19:06:43 +0100
Subject: 36700: unmetafy early for parameter match
---
Src/glob.c | 436 ++++++++++++++++++++++++++++++++++------------------------
Src/pattern.c | 20 ++-
Src/zsh.h | 26 ++++
3 files changed, 294 insertions(+), 188 deletions(-)
(limited to 'Src/pattern.c')
diff --git a/Src/glob.c b/Src/glob.c
index 8bf73520f..0594f0a82 100644
--- a/Src/glob.c
+++ b/Src/glob.c
@@ -2450,29 +2450,46 @@ matchpat(char *a, char *b)
/* please do not laugh at this code. */
/* Having found a match in getmatch, decide what part of string
- * to return. The matched part starts b characters into string s
- * and finishes e characters in: 0 <= b <= e <= strlen(s)
+ * to return. The matched part starts b characters into string imd->ustr
+ * and finishes e characters in: 0 <= b <= e <= imd->ulen on input
* (yes, empty matches should work).
- * fl is a set of the SUB_* matches defined in zsh.h from SUB_MATCH onwards;
- * the lower parts are ignored.
- * replstr is the replacement string for a substitution
+ *
+ * imd->flags is a set of the SUB_* matches defined in zsh.h from
+ * SUB_MATCH onwards; the lower parts are ignored.
+ *
+ * imd->replstr is the replacement string for a substitution
+ *
+ * imd->replstr is metafied and the values put in imd->repllist are metafied.
*/
/**/
static char *
-get_match_ret(char *s, int b, int e, int fl, char *replstr,
- LinkList repllist)
+get_match_ret(Imatchdata imd, int b, int e)
{
- char buf[80], *r, *p, *rr;
- int ll = 0, l = strlen(s), bl = 0, t = 0, i;
-
+ char buf[80], *r, *p, *rr, *replstr = imd->replstr;
+ int ll = 0, bl = 0, t = 0, add = 0, fl = imd->flags, i;
+
+ /* Account for b and e referring to unmetafied string */
+ for (p = imd->ustr; p < imd->ustr + b; p++)
+ if (imeta(*p))
+ add++;
+ b += add;
+ for (; p < imd->ustr + e; p++)
+ if (imeta(*p))
+ add++;
+ e += add;
+ for (; p < imd->ustr + imd->ulen; p++)
+ if (imeta(*p))
+ add++;
+
+ /* Everything now refers to meatfied lengths. */
if (replstr || (fl & SUB_LIST)) {
if (fl & SUB_DOSUBST) {
replstr = dupstring(replstr);
singsub(&replstr);
untokenize(replstr);
}
- if ((fl & (SUB_GLOBAL|SUB_LIST)) && repllist) {
+ if ((fl & (SUB_GLOBAL|SUB_LIST)) && imd->repllist) {
/* We are replacing the chunk, just add this to the list */
Repldata rd = (Repldata)
((fl & SUB_LIST) ? zalloc(sizeof(*rd)) : zhalloc(sizeof(*rd)));
@@ -2480,30 +2497,32 @@ get_match_ret(char *s, int b, int e, int fl, char *replstr,
rd->e = e;
rd->replstr = replstr;
if (fl & SUB_LIST)
- zaddlinknode(repllist, rd);
+ zaddlinknode(imd->repllist, rd);
else
- addlinknode(repllist, rd);
- return s;
+ addlinknode(imd->repllist, rd);
+ return imd->mstr;
}
ll += strlen(replstr);
}
if (fl & SUB_MATCH) /* matched portion */
ll += 1 + (e - b);
if (fl & SUB_REST) /* unmatched portion */
- ll += 1 + (l - (e - b));
+ ll += 1 + (imd->mlen - (e - b));
if (fl & SUB_BIND) {
/* position of start of matched portion */
- sprintf(buf, "%d ", MB_METASTRLEN2END(s, 0, s+b) + 1);
+ sprintf(buf, "%d ", MB_METASTRLEN2END(imd->mstr, 0, imd->mstr+b) + 1);
ll += (bl = strlen(buf));
}
if (fl & SUB_EIND) {
/* position of end of matched portion */
- sprintf(buf + bl, "%d ", MB_METASTRLEN2END(s, 0, s+e) + 1);
+ sprintf(buf + bl, "%d ",
+ MB_METASTRLEN2END(imd->mstr, 0, imd->mstr+e) + 1);
ll += (bl = strlen(buf));
}
if (fl & SUB_LEN) {
/* length of matched portion */
- sprintf(buf + bl, "%d ", MB_METASTRLEN2END(s+b, 0, s+e));
+ sprintf(buf + bl, "%d ", MB_METASTRLEN2END(imd->mstr+b, 0,
+ imd->mstr+e));
ll += (bl = strlen(buf));
}
if (bl)
@@ -2513,7 +2532,7 @@ get_match_ret(char *s, int b, int e, int fl, char *replstr,
if (fl & SUB_MATCH) {
/* copy matched portion to new buffer */
- for (i = b, p = s + b; i < e; i++)
+ for (i = b, p = imd->mstr + b; i < e; i++)
*rr++ = *p++;
t = 1;
}
@@ -2523,12 +2542,12 @@ get_match_ret(char *s, int b, int e, int fl, char *replstr,
if (t)
*rr++ = ' ';
/* there may be unmatched bits at both beginning and end of string */
- for (i = 0, p = s; i < b; i++)
+ for (i = 0, p = imd->mstr; i < b; i++)
*rr++ = *p++;
if (replstr)
for (p = replstr; *p; )
*rr++ = *p++;
- for (i = e, p = s + e; i < l; i++)
+ for (i = e, p = imd->mstr + e; i < imd->mlen; i++)
*rr++ = *p++;
t = 1;
}
@@ -2710,26 +2729,18 @@ set_pat_end(Patprog p, char null_me)
/*
* Increment *tp over character which may be multibyte.
- * Return number of bytes that remain in the character after unmetafication.
+ * Return number of bytes.
+ * All unmetafied here.
*/
/**/
-static int iincchar(char **tp)
+static int iincchar(char **tp, int left)
{
char *t = *tp;
- int mbclen = mb_metacharlenconv(t, NULL);
- int umlen = 0;
-
- while (mbclen--) {
- umlen++;
- if (*t++ == Meta) {
- t++;
- mbclen--;
- }
- }
- *tp = t;
+ int mbclen = mb_charlenconv(t, left, NULL);
+ *tp = t + mbclen;
- return umlen;
+ return mbclen;
}
/**/
@@ -2737,7 +2748,7 @@ static int
igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
LinkList *repllistp)
{
- char *s = *sp, *t, *tmatch;
+ char *s = *sp, *t, *tmatch, *send;
/*
* Note that ioff counts (possibly multibyte) characters in the
* character set (Meta's are not included), while l counts characters in
@@ -2752,36 +2763,52 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
*/
int ioff, l = strlen(*sp), matched = 1, umltot = ztrlen(*sp);
int umlen, nmatches;
- /*
- * List of bits of matches to concatenate with replacement string.
- * The data is a struct repldata. It is not used in cases like
- * ${...//#foo/bar} even though SUB_GLOBAL is set, since the match
- * is anchored. It goes on the heap.
- */
- LinkList repllist = NULL;
+ struct patstralloc patstralloc;
+ struct imatchdata imd;
+
+ (void)patallocstr(p, s, l, umltot, 1, &patstralloc);
+ s = patstralloc.alloced;
+ DPUTS(!s, "forced patallocstr failed");
+ send = s + umltot;
+
+ imd.mstr = *sp;
+ imd.mlen = l;
+ imd.ustr = s;
+ imd.ulen = umltot;
+ imd.flags = fl;
+ imd.replstr = replstr;
+ imd.repllist = NULL;
/* perform must-match test for complex closures */
if (p->mustoff)
{
- /*
- * Yuk. Probably we should rewrite this whole function to
- * use an unmetafied test string.
- *
- * Use META_HEAPDUP because we need a terminating NULL.
- */
- char *muststr = metafy((char *)p + p->mustoff,
- p->patmlen, META_HEAPDUP);
+ char *muststr = (char *)p + p->mustoff;
- if (!strstr(s, muststr))
- matched = 0;
+ matched = 0;
+ if (p->patmlen <= umltot)
+ {
+ for (t = s; t <= send - p->patmlen; t++)
+ {
+ if (!memcmp(muststr, t, p->patmlen)) {
+ matched = 1;
+ break;
+ }
+ }
+ }
}
/* in case we used the prog before... */
p->flags &= ~(PAT_NOTSTART|PAT_NOTEND);
if (fl & SUB_ALL) {
- int i = matched && pattrylen(p, s, -1, -1, NULL, 0);
- *sp = get_match_ret(*sp, 0, i ? l : 0, fl, i ? replstr : 0, NULL);
+ int i = matched && pattrylen(p, s, umltot, 0, &patstralloc, 0);
+ if (!i) {
+ /* Perform under no-match conditions */
+ umltot = 0;
+ imd.replstr = NULL;
+ }
+ *sp = get_match_ret(&imd, 0, umltot);
+ patfreestr(&patstralloc);
if (! **sp && (((fl & SUB_MATCH) && !i) || ((fl & SUB_REST) && i)))
return 0;
return 1;
@@ -2809,25 +2836,27 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
* Largest/smallest possible match at head of string.
* First get the longest match...
*/
- if (pattrylen(p, s, -1, -1, NULL, 0)) {
- /* patmatchlen returns metafied length, as we need */
+ if (pattrylen(p, s, umltot, 0, &patstralloc, 0)) {
+ /* patmatchlen returns unmetafied length in this case */
int mlen = patmatchlen();
if (!(fl & SUB_LONG) && !(p->flags & PAT_PURES)) {
+ send = s + mlen;
/*
* ... now we know whether it's worth looking for the
* shortest, which we do by brute force.
*/
mb_charinit();
- for (t = s, umlen = 0; t < s + mlen; ) {
+ for (t = s, umlen = 0; t < send; ) {
set_pat_end(p, *t);
- if (pattrylen(p, s, t - s, umlen, NULL, 0)) {
+ if (pattrylen(p, s, umlen, 0, &patstralloc, 0)) {
mlen = patmatchlen();
break;
}
- umlen += iincchar(&t);
+ umlen += iincchar(&t, send - t);
}
}
- *sp = get_match_ret(*sp, 0, mlen, fl, replstr, NULL);
+ *sp = get_match_ret(&imd, 0, mlen);
+ patfreestr(&patstralloc);
return 1;
}
break;
@@ -2845,20 +2874,23 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
*/
mb_charinit();
tmatch = NULL;
- for (ioff = 0, t = s, umlen = umltot; t < s + l; ioff++) {
+ for (ioff = 0, t = s, umlen = umltot; t < send; ioff++) {
set_pat_start(p, t-s);
- if (pattrylen(p, t, s + l - t, umlen, NULL, ioff))
+ if (pattrylen(p, t, umlen, 0, &patstralloc, ioff))
tmatch = t;
if (fl & SUB_START)
break;
- umlen -= iincchar(&t);
+ umlen -= iincchar(&t, send - t);
}
if (tmatch) {
- *sp = get_match_ret(*sp, tmatch - s, l, fl, replstr, NULL);
+ *sp = get_match_ret(&imd, tmatch - s, umltot);
+ patfreestr(&patstralloc);
return 1;
}
- if (!(fl & SUB_START) && pattrylen(p, s + l, 0, 0, NULL, ioff)) {
- *sp = get_match_ret(*sp, l, l, fl, replstr, NULL);
+ if (!(fl & SUB_START) && pattrylen(p, s + umltot, 0, 0,
+ &patstralloc, ioff)) {
+ *sp = get_match_ret(&imd, umltot, umltot);
+ patfreestr(&patstralloc);
return 1;
}
break;
@@ -2868,18 +2900,21 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
* move forward along string until we get a match. *
* Again there's no optimisation. */
mb_charinit();
- for (ioff = 0, t = s, umlen = umltot; t < s + l; ioff++) {
+ for (ioff = 0, t = s, umlen = umltot; t < send ; ioff++) {
set_pat_start(p, t-s);
- if (pattrylen(p, t, s + l - t, umlen, NULL, ioff)) {
- *sp = get_match_ret(*sp, t-s, l, fl, replstr, NULL);
+ if (pattrylen(p, t, umlen, 0, &patstralloc, ioff)) {
+ *sp = get_match_ret(&imd, t-s, umltot);
+ patfreestr(&patstralloc);
return 1;
}
if (fl & SUB_START)
break;
- umlen -= iincchar(&t);
+ umlen -= iincchar(&t, send - t);
}
- if (!(fl & SUB_START) && pattrylen(p, s + l, 0, 0, NULL, ioff)) {
- *sp = get_match_ret(*sp, l, l, fl, replstr, NULL);
+ if (!(fl & SUB_START) && pattrylen(p, send, 0, 0,
+ &patstralloc, ioff)) {
+ *sp = get_match_ret(&imd, umltot, umltot);
+ patfreestr(&patstralloc);
return 1;
}
break;
@@ -2887,18 +2922,20 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
case SUB_SUBSTR:
/* Smallest at start, but matching substrings. */
set_pat_start(p, l);
- if (!(fl & SUB_GLOBAL) && pattrylen(p, s + l, -1, -1, NULL, 0) &&
+ if (!(fl & SUB_GLOBAL) &&
+ pattrylen(p, send, 0, 0, &patstralloc, 0) &&
!--n) {
- *sp = get_match_ret(*sp, 0, 0, fl, replstr, NULL);
+ *sp = get_match_ret(&imd, 0, 0);
+ patfreestr(&patstralloc);
return 1;
} /* fall through */
case (SUB_SUBSTR|SUB_LONG):
/* longest or smallest at start with substrings */
t = s;
if (fl & SUB_GLOBAL) {
- repllist = (fl & SUB_LIST) ? znewlinklist() : newlinklist();
+ imd.repllist = (fl & SUB_LIST) ? znewlinklist() : newlinklist();
if (repllistp)
- *repllistp = repllist;
+ *repllistp = imd.repllist;
}
ioff = 0; /* offset into string */
umlen = umltot;
@@ -2906,10 +2943,10 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
do {
/* loop over all matches for global substitution */
matched = 0;
- for (; t < s + l; ioff++) {
+ for (; t < send; ioff++) {
/* Find the longest match from this position. */
set_pat_start(p, t-s);
- if (pattrylen(p, t, s + l - t, umlen, NULL, ioff)) {
+ if (pattrylen(p, t, umlen, 0, &patstralloc, ioff)) {
char *mpos = t + patmatchlen();
if (!(fl & SUB_LONG) && !(p->flags & PAT_PURES)) {
char *ptr;
@@ -2923,19 +2960,18 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
*/
for (ptr = t, umlen2 = 0; ptr < mpos;) {
set_pat_end(p, *ptr);
- if (pattrylen(p, t, ptr - t, umlen2,
- NULL, ioff)) {
+ if (pattrylen(p, t, umlen2, 0,
+ &patstralloc, ioff)) {
mpos = t + patmatchlen();
break;
}
- umlen2 += iincchar(&ptr);
+ umlen2 += iincchar(&ptr, mpos - ptr);
}
}
if (!--n || (n <= 0 && (fl & SUB_GLOBAL))) {
- *sp = get_match_ret(*sp, t-s, mpos-s, fl,
- replstr, repllist);
+ *sp = get_match_ret(&imd, t-s, mpos-s);
if (mpos == t)
- mpos += mb_metacharlenconv(mpos, NULL);
+ mpos += mb_charlenconv(mpos, send - mpos, NULL);
}
if (!(fl & SUB_GLOBAL)) {
if (n) {
@@ -2945,9 +2981,10 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
* the next character, even if it overlaps
* with what we just found.
*/
- umlen -= iincchar(&t);
+ umlen -= iincchar(&t, send - t);
continue;
} else {
+ patfreestr(&patstralloc);
return 1;
}
}
@@ -2958,11 +2995,11 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
matched = 1;
while (t < mpos) {
ioff++;
- umlen -= iincchar(&t);
+ umlen -= iincchar(&t, send - t);
}
break;
}
- umlen -= iincchar(&t);
+ umlen -= iincchar(&t, send - t);
}
} while (matched);
/*
@@ -2972,8 +3009,9 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
*/
set_pat_start(p, l);
if ((fl & (SUB_LONG|SUB_GLOBAL)) == SUB_LONG &&
- pattrylen(p, s + l, -1, -1, NULL, 0) && !--n) {
- *sp = get_match_ret(*sp, 0, 0, fl, replstr, repllist);
+ pattrylen(p, send, 0, 0, &patstralloc, 0) && !--n) {
+ *sp = get_match_ret(&imd, 0, 0);
+ patfreestr(&patstralloc);
return 1;
}
break;
@@ -2983,8 +3021,10 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
/* Longest/shortest at end, matching substrings. */
if (!(fl & SUB_LONG)) {
set_pat_start(p, l);
- if (pattrylen(p, s + l, 0, 0, NULL, umltot) && !--n) {
- *sp = get_match_ret(*sp, l, l, fl, replstr, NULL);
+ if (pattrylen(p, send, 0, 0, &patstralloc, umltot) &&
+ !--n) {
+ *sp = get_match_ret(&imd, umltot, umltot);
+ patfreestr(&patstralloc);
return 1;
}
}
@@ -3001,13 +3041,13 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
nmatches = 0;
tmatch = NULL;
mb_charinit();
- for (ioff = 0, t = s, umlen = umltot; t < s + l; ioff++) {
+ for (ioff = 0, t = s, umlen = umltot; t < send; ioff++) {
set_pat_start(p, t-s);
- if (pattrylen(p, t, s + l - t, umlen, NULL, ioff)) {
+ if (pattrylen(p, t, umlen, 0, &patstralloc, ioff)) {
nmatches++;
tmatch = t;
}
- umlen -= iincchar(&t);
+ umlen -= iincchar(&t, send - t);
}
if (nmatches) {
char *mpos;
@@ -3017,14 +3057,14 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
*/
n = nmatches - n;
mb_charinit();
- for (ioff = 0, t = s, umlen = umltot; t < s + l; ioff++) {
+ for (ioff = 0, t = s, umlen = umltot; t < send; ioff++) {
set_pat_start(p, t-s);
- if (pattrylen(p, t, s + l - t, umlen, NULL, ioff) &&
+ if (pattrylen(p, t, umlen, 0, &patstralloc, ioff) &&
!n--) {
tmatch = t;
break;
}
- umlen -= iincchar(&t);
+ umlen -= iincchar(&t, send - t);
}
}
mpos = tmatch + patmatchlen();
@@ -3032,29 +3072,31 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
if (!(fl & SUB_LONG) && !(p->flags & PAT_PURES)) {
for (t = tmatch, umlen = 0; t < mpos; ) {
set_pat_end(p, *t);
- if (pattrylen(p, tmatch, t - tmatch, umlen,
- NULL, ioff)) {
+ if (pattrylen(p, tmatch, umlen, 0,
+ &patstralloc, ioff)) {
mpos = tmatch + patmatchlen();
break;
}
- umlen += iincchar(&t);
+ umlen += iincchar(&t, mpos - t);
}
}
- *sp = get_match_ret(*sp, tmatch-s, mpos-s, fl,
- replstr, NULL);
+ *sp = get_match_ret(&imd, tmatch-s, mpos-s);
+ patfreestr(&patstralloc);
return 1;
}
set_pat_start(p, l);
- if ((fl & SUB_LONG) && pattrylen(p, s + l, 0, 0, NULL, umltot) &&
+ if ((fl & SUB_LONG) && pattrylen(p, send, 0, 0,
+ &patstralloc, umltot) &&
!--n) {
- *sp = get_match_ret(*sp, l, l, fl, replstr, NULL);
+ *sp = get_match_ret(&imd, umltot, umltot);
+ patfreestr(&patstralloc);
return 1;
}
break;
}
}
- if (repllist && nonempty(repllist)) {
+ if (imd.repllist && nonempty(imd.repllist)) {
/* Put all the bits of a global search and replace together. */
LinkNode nd;
Repldata rd;
@@ -3062,10 +3104,15 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
char *ptr, *start;
int i;
+ /*
+ * Use metafied string again.
+ * Results from get_match_ret in repllist are all metafied.
+ */
+ s = *sp;
if (!(fl & SUB_LIST)) {
lleft = 0; /* size of returned string */
- i = 0; /* start of last chunk we got from *sp */
- for (nd = firstnode(repllist); nd; incnode(nd)) {
+ i = 0; /* start of last chunk we got from *sp */
+ for (nd = firstnode(imd.repllist); nd; incnode(nd)) {
rd = (Repldata) getdata(nd);
lleft += rd->b - i; /* previous chunk of *sp */
lleft += strlen(rd->replstr); /* the replaced bit */
@@ -3074,7 +3121,7 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
lleft += l - i; /* final chunk from *sp */
start = t = zhalloc(lleft+1);
i = 0;
- for (nd = firstnode(repllist); nd; incnode(nd)) {
+ for (nd = firstnode(imd.repllist); nd; incnode(nd)) {
rd = (Repldata) getdata(nd);
memcpy(t, s + i, rd->b - i);
t += rd->b - i;
@@ -3087,13 +3134,19 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
start[lleft] = '\0';
*sp = (char *)start;
}
+ patfreestr(&patstralloc);
return 1;
}
- if (fl & SUB_LIST) /* safety: don't think this can happen */
+ if (fl & SUB_LIST) { /* safety: don't think this can happen */
+ patfreestr(&patstralloc);
return 0;
+ }
/* munge the whole string: no match, so no replstr */
- *sp = get_match_ret(*sp, 0, 0, fl, 0, 0);
+ imd.replstr = NULL;
+ imd.repllist = NULL;
+ *sp = get_match_ret(&imd, 0, 0);
+ patfreestr(&patstralloc);
return (fl & SUB_RETFAIL) ? 0 : 1;
}
@@ -3111,7 +3164,7 @@ static int
igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
LinkList *repllistp)
{
- char *s = *sp, *t;
+ char *s = *sp, *t, *send;
/*
* Note that ioff and uml count characters in the character
* set (Meta's are not included), while l counts characters in the
@@ -3119,36 +3172,48 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
* lengths.
*/
int ioff, l = strlen(*sp), uml = ztrlen(*sp), matched = 1, umlen;
- /*
- * List of bits of matches to concatenate with replacement string.
- * The data is a struct repldata. It is not used in cases like
- * ${...//#foo/bar} even though SUB_GLOBAL is set, since the match
- * is anchored. It goes on the heap.
- */
- LinkList repllist = NULL;
+ struct patstralloc patstralloc;
+ struct imatchdata imd;
+
+ (void)patallocstr(p, s, l, uml, 1, &patstralloc);
+ s = patstralloc.alloced;
+ DPUTS(!s, "forced patallocstr failed");
+ send = s + uml;
+
+ imd.mstr = *sp;
+ imd.mlen = l;
+ imd.ustr = s;
+ imd.ulen = uml;
+ imd.flags = fl;
+ imd.replstr = replstr;
+ imd.repllist = NULL;
/* perform must-match test for complex closures */
if (p->mustoff)
{
- /*
- * Yuk. Probably we should rewrite this whole function to
- * use an unmetafied test string.
- *
- * Use META_HEAPDUP because we need a terminating NULL.
- */
- char *muststr = metafy((char *)p + p->mustoff,
- p->patmlen, META_HEAPDUP);
+ char *muststr = (char *)p + p->mustoff;
- if (!strstr(s, muststr))
- matched = 0;
+ matched = 0;
+ if (p->patmlen <= uml)
+ {
+ for (t = s; t <= send - p->patmlen; t++)
+ {
+ if (!memcmp(muststr, t, p->patmlen)) {
+ matched = 1;
+ break;
+ }
+ }
+ }
}
/* in case we used the prog before... */
p->flags &= ~(PAT_NOTSTART|PAT_NOTEND);
if (fl & SUB_ALL) {
- int i = matched && pattry(p, s);
- *sp = get_match_ret(*sp, 0, i ? l : 0, fl, i ? replstr : 0, NULL);
+ int i = matched && pattrylen(p, s, uml, 0, &patstralloc, 0);
+ if (!i)
+ imd.replstr = NULL;
+ *sp = get_match_ret(&imd, 0, i ? l : 0);
if (! **sp && (((fl & SUB_MATCH) && !i) || ((fl & SUB_REST) && i)))
return 0;
return 1;
@@ -3161,23 +3226,25 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
* Largest/smallest possible match at head of string.
* First get the longest match...
*/
- if (pattry(p, s)) {
+ if (pattrylen(p, s, uml, 0, &patstralloc, 0)) {
/* patmatchlen returns metafied length, as we need */
int mlen = patmatchlen();
if (!(fl & SUB_LONG) && !(p->flags & PAT_PURES)) {
+ send = s + mlen;
/*
* ... now we know whether it's worth looking for the
* shortest, which we do by brute force.
*/
for (t = s, umlen = 0; t < s + mlen; METAINC(t), umlen++) {
set_pat_end(p, *t);
- if (pattrylen(p, s, t - s, umlen, NULL, 0)) {
+ if (pattrylen(p, s, umlen, 0, &patstralloc, 0)) {
mlen = patmatchlen();
break;
}
}
}
- *sp = get_match_ret(*sp, 0, mlen, fl, replstr, NULL);
+ *sp = get_match_ret(&imd, 0, mlen);
+ patfreestr(&patstralloc);
return 1;
}
break;
@@ -3186,17 +3253,13 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
/* Smallest possible match at tail of string: *
* move back down string until we get a match. *
* There's no optimization here. */
- for (ioff = uml, t = s + l, umlen = 0; t >= s;
+ for (ioff = uml, t = send, umlen = 0; t >= s;
t--, ioff--, umlen++) {
- if (t > s && t[-1] == Meta)
- t--;
set_pat_start(p, t-s);
- if (pattrylen(p, t, s + l - t, umlen, NULL, ioff)) {
- *sp = get_match_ret(*sp, t - s, l, fl, replstr, NULL);
+ if (pattrylen(p, t, umlen, 0, &patstralloc, ioff)) {
+ *sp = get_match_ret(&imd, t - s, uml);
return 1;
}
- if (t > s+1 && t[-2] == Meta)
- t--;
}
break;
@@ -3204,61 +3267,59 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
/* Largest possible match at tail of string: *
* move forward along string until we get a match. *
* Again there's no optimisation. */
- for (ioff = 0, t = s, umlen = uml; t < s + l;
- ioff++, METAINC(t), umlen--) {
+ for (ioff = 0, t = s, umlen = uml; t < send;
+ ioff++, t++, umlen--) {
set_pat_start(p, t-s);
- if (pattrylen(p, t, s + l - t, umlen, NULL, ioff)) {
- *sp = get_match_ret(*sp, t-s, l, fl, replstr, NULL);
+ if (pattrylen(p, t, send - t, umlen, &patstralloc, ioff)) {
+ *sp = get_match_ret(&imd, t-s, uml);
return 1;
}
- if (*t == Meta)
- t++;
}
break;
case SUB_SUBSTR:
/* Smallest at start, but matching substrings. */
set_pat_start(p, l);
- if (!(fl & SUB_GLOBAL) && pattry(p, s + l) && !--n) {
- *sp = get_match_ret(*sp, 0, 0, fl, replstr, NULL);
+ if (!(fl & SUB_GLOBAL) &&
+ pattrylen(p, send, 0, 0, &patstralloc, 0) && !--n) {
+ *sp = get_match_ret(&imd, 0, 0);
return 1;
} /* fall through */
case (SUB_SUBSTR|SUB_LONG):
/* longest or smallest at start with substrings */
t = s;
if (fl & SUB_GLOBAL) {
- repllist = newlinklist();
+ imd.repllist = newlinklist();
if (repllistp)
- *repllistp = repllist;
+ *repllistp = imd.repllist;
}
ioff = 0; /* offset into string */
umlen = uml;
do {
/* loop over all matches for global substitution */
matched = 0;
- for (; t < s + l; METAINC(t), ioff++, umlen--) {
+ for (; t < send; t++, ioff++, umlen--) {
/* Find the longest match from this position. */
set_pat_start(p, t-s);
- if (pattrylen(p, t, s + l - t, umlen, NULL, ioff)) {
+ if (pattrylen(p, t, send - t, umlen, &patstralloc, ioff)) {
char *mpos = t + patmatchlen();
if (!(fl & SUB_LONG) && !(p->flags & PAT_PURES)) {
char *ptr;
int umlen2;
for (ptr = t, umlen2 = 0; ptr < mpos;
- METAINC(ptr), umlen2++) {
+ ptr++, umlen2++) {
set_pat_end(p, *ptr);
if (pattrylen(p, t, ptr - t, umlen2,
- NULL, ioff)) {
+ &patstralloc, ioff)) {
mpos = t + patmatchlen();
break;
}
}
}
if (!--n || (n <= 0 && (fl & SUB_GLOBAL))) {
- *sp = get_match_ret(*sp, t-s, mpos-s, fl,
- replstr, repllist);
+ *sp = get_match_ret(&imd, t-s, mpos-s);
if (mpos == t)
- METAINC(mpos);
+ mpos++;
}
if (!(fl & SUB_GLOBAL)) {
if (n) {
@@ -3278,13 +3339,13 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
* which is already marked for replacement.
*/
matched = 1;
- for ( ; t < mpos; t++, ioff++, umlen--)
- if (*t == Meta)
- t++;
+ while (t < mpos) {
+ ioff++;
+ umlen--;
+ t++;
+ }
break;
}
- if (*t == Meta)
- t++;
}
} while (matched);
/*
@@ -3294,8 +3355,9 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
*/
set_pat_start(p, l);
if ((fl & (SUB_LONG|SUB_GLOBAL)) == SUB_LONG &&
- pattry(p, s + l) && !--n) {
- *sp = get_match_ret(*sp, 0, 0, fl, replstr, repllist);
+ pattrylen(p, send, 0, 0, &patstralloc, 0) && !--n) {
+ *sp = get_match_ret(&imd, 0, 0);
+ patfreestr(&patstralloc);
return 1;
}
break;
@@ -3305,47 +3367,50 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
/* Longest/shortest at end, matching substrings. */
if (!(fl & SUB_LONG)) {
set_pat_start(p, l);
- if (pattrylen(p, s + l, 0, 0, NULL, uml) && !--n) {
- *sp = get_match_ret(*sp, l, l, fl, replstr, NULL);
+ if (pattrylen(p, send, 0, 0, &patstralloc, uml) && !--n) {
+ *sp = get_match_ret(&imd, uml, uml);
+ patfreestr(&patstralloc);
return 1;
}
}
- for (ioff = uml - 1, t = s + l - 1, umlen = 1; t >= s;
+ for (ioff = uml - 1, t = send - 1, umlen = 1; t >= s;
t--, ioff--, umlen++) {
- if (t > s && t[-1] == Meta)
- t--;
set_pat_start(p, t-s);
- if (pattrylen(p, t, s + l - t, umlen, NULL, ioff) && !--n) {
+ if (pattrylen(p, t, send - t, umlen, &patstralloc, ioff) &&
+ !--n) {
/* Found the longest match */
char *mpos = t + patmatchlen();
if (!(fl & SUB_LONG) && !(p->flags & PAT_PURES)) {
char *ptr;
int umlen2;
for (ptr = t, umlen2 = 0; ptr < mpos;
- METAINC(ptr), umlen2++) {
+ ptr++, umlen2++) {
set_pat_end(p, *ptr);
- if (pattrylen(p, t, ptr - t, umlen2, NULL, ioff)) {
+ if (pattrylen(p, t, umlen2, 0, &patstralloc,
+ ioff)) {
mpos = t + patmatchlen();
break;
}
}
}
- *sp = get_match_ret(*sp, t-s, mpos-s, fl,
- replstr, NULL);
+ *sp = get_match_ret(&imd, t-s, mpos-s);
+ patfreestr(&patstralloc);
return 1;
}
}
set_pat_start(p, l);
- if ((fl & SUB_LONG) && pattrylen(p, s + l, 0, 0, NULL, uml) &&
+ if ((fl & SUB_LONG) && pattrylen(p, send, 0, 0,
+ &patstralloc, uml) &&
!--n) {
- *sp = get_match_ret(*sp, l, l, fl, replstr, NULL);
+ *sp = get_match_ret(&imd, uml, uml);
+ patfreestr(&patstralloc);
return 1;
}
break;
}
}
- if (repllist && nonempty(repllist)) {
+ if (imd.repllist && nonempty(imd.repllist)) {
/* Put all the bits of a global search and replace together. */
LinkNode nd;
Repldata rd;
@@ -3353,8 +3418,13 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
char *ptr, *start;
int i;
+ /*
+ * Use metafied string again.
+ * Results from get_match_ret in repllist are all metafied.
+ */
+ s = *sp;
i = 0; /* start of last chunk we got from *sp */
- for (nd = firstnode(repllist); nd; incnode(nd)) {
+ for (nd = firstnode(imd.repllist); nd; incnode(nd)) {
rd = (Repldata) getdata(nd);
lleft += rd->b - i; /* previous chunk of *sp */
lleft += strlen(rd->replstr); /* the replaced bit */
@@ -3363,7 +3433,7 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
lleft += l - i; /* final chunk from *sp */
start = t = zhalloc(lleft+1);
i = 0;
- for (nd = firstnode(repllist); nd; incnode(nd)) {
+ for (nd = firstnode(imd.repllist); nd; incnode(nd)) {
rd = (Repldata) getdata(nd);
memcpy(t, s + i, rd->b - i);
t += rd->b - i;
@@ -3375,11 +3445,15 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
memcpy(t, s + i, l - i);
start[lleft] = '\0';
*sp = (char *)start;
+ patfreestr(&patstralloc);
return 1;
}
/* munge the whole string: no match, so no replstr */
- *sp = get_match_ret(*sp, 0, 0, fl, 0, 0);
+ imd.replstr = NULL;
+ imd.repllist = NULL;
+ *sp = get_match_ret(&imd, 0, 0);
+ patfreestr(&patstralloc);
return 1;
}
diff --git a/Src/pattern.c b/Src/pattern.c
index 03ba37d92..8de372c9e 100644
--- a/Src/pattern.c
+++ b/Src/pattern.c
@@ -2204,7 +2204,10 @@ pattrylen(Patprog prog, char *string, int len, int unmetalen,
* the pattern module) at which we are trying to match.
* This is added in to the positions recorded in patbeginp and patendp
* when we are looking for substrings. Currently this only happens
- * in the parameter substitution code.
+ * in the parameter substitution code. It refers to a real character
+ * offset, i.e. is already in the form ready for presentation to the
+ * general public --- this is necessary as we don't have the
+ * information to convert it down here.
*
* Note this is a character offset, i.e. a single possibly metafied and
* possibly multibyte character counts as 1.
@@ -2292,7 +2295,8 @@ pattryrefs(Patprog prog, char *string, int stringlen, int unmetalenin,
*/
if (!patstralloc->progstrunmeta)
{
- patstralloc->progstrunmeta = dupstring(progstr);
+ patstralloc->progstrunmeta =
+ dupstrpfx(progstr, (int)prog->patmlen);
unmetafy(patstralloc->progstrunmeta,
&patstralloc->progstrunmetalen);
}
@@ -2346,7 +2350,7 @@ pattryrefs(Patprog prog, char *string, int stringlen, int unmetalenin,
* In the orignal structure, but it might be unmetafied
* for use with an unmetafied test string.
*/
- patinlen = (int)prog->patmlen;
+ patinlen = pstrlen;
/* if matching files, must update globbing flags */
patglobflags = prog->globend;
@@ -2360,7 +2364,7 @@ pattryrefs(Patprog prog, char *string, int stringlen, int unmetalenin,
* Unmetafied: pstrlen contains unmetafied
* length in bytes.
*/
- str = metafy(patinstart, pstrlen, META_ALLOC);
+ str = metafy(patinstart, pstrlen, META_DUP);
mlen = CHARSUB(patinstart, patinstart + pstrlen);
} else {
str = ztrduppfx(patinstart, patinlen);
@@ -2454,8 +2458,8 @@ pattryrefs(Patprog prog, char *string, int stringlen, int unmetalenin,
/*
* Optimization: if we didn't find any Meta characters
* to begin with, we don't need to look for them now.
- * Only do this if we did the unmetfication internally,
- * since otherwise it's too hard to work out.
+ *
+ * For patstralloc pased in, we want the unmetafied length.
*/
if (patstralloc == &patstralloc_struct &&
patstralloc->unmetalen != origlen) {
@@ -2588,7 +2592,9 @@ pattryrefs(Patprog prog, char *string, int stringlen, int unmetalenin,
/*
* Return length of previous succesful match. This is
- * in metafied bytes, i.e. includes a count of Meta characters.
+ * in metafied bytes, i.e. includes a count of Meta characters,
+ * unless the match was done on an unmetafied string using
+ * a patstralloc stuct, in which case it, too is unmetafed.
* Unusual and futile attempt at modular encapsulation.
*/
diff --git a/Src/zsh.h b/Src/zsh.h
index 32f2e0cb2..15fa5e417 100644
--- a/Src/zsh.h
+++ b/Src/zsh.h
@@ -480,6 +480,7 @@ typedef struct heap *Heap;
typedef struct heapstack *Heapstack;
typedef struct histent *Histent;
typedef struct hookdef *Hookdef;
+typedef struct imatchdata *Imatchdata;
typedef struct jobfile *Jobfile;
typedef struct job *Job;
typedef struct linkedmod *Linkedmod;
@@ -1593,6 +1594,31 @@ typedef struct zpc_disables_save *Zpc_disables_save;
/* Range: token followed by the (possibly multibyte) start and end */
#define PP_RANGE 21
+/*
+ * Argument to get_match_ret() in glob.c
+ */
+struct imatchdata {
+ /* Metafied trial string */
+ char *mstr;
+ /* Its length */
+ int mlen;
+ /* Unmetafied string */
+ char *ustr;
+ /* Its length */
+ int ulen;
+ /* Flags (SUB_*) */
+ int flags;
+ /* Replacement string (metafied) */
+ char *replstr;
+ /*
+ * List of bits of matches to concatenate with replacement string.
+ * The data is a struct repldata. It is not used in cases like
+ * ${...//#foo/bar} even though SUB_GLOBAL is set, since the match
+ * is anchored. It goes on the heap.
+ */
+ LinkList repllist;
+};
+
/* Globbing flags: lower 8 bits gives approx count */
#define GF_LCMATCHUC 0x0100
#define GF_IGNCASE 0x0200
--
cgit v1.2.3
From 533658730745ae6261f9552aba01dc5346405d30 Mon Sep 17 00:00:00 2001
From: Peter Stephenson
Date: Wed, 30 Sep 2015 10:19:16 +0100
Subject: 36711: Allocate unmetafied pattern trial string on the heap
---
ChangeLog | 5 +++++
Src/glob.c | 25 -------------------------
Src/pattern.c | 34 +++++++---------------------------
3 files changed, 12 insertions(+), 52 deletions(-)
(limited to 'Src/pattern.c')
diff --git a/ChangeLog b/ChangeLog
index 38e0e4627..56e725cf0 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,8 @@
+2015-09-30 Peter Stephenson
+
+ * 36711: Src/glob.c, Src/pattern.c: Memory for early unmetafied
+ pattern trial string is on the heap.
+
2015-09-28 Peter Stephenson
* 36682: Src/glob.c, Src/pattern.c, Src/zsh.h,
diff --git a/Src/glob.c b/Src/glob.c
index d9986634a..24e60d0c5 100644
--- a/Src/glob.c
+++ b/Src/glob.c
@@ -2478,9 +2478,6 @@ get_match_ret(Imatchdata imd, int b, int e)
if (imeta(*p))
add++;
e += add;
- for (; p < imd->ustr + imd->ulen; p++)
- if (imeta(*p))
- add++;
/* Everything now refers to metafied lengths. */
if (replstr || (fl & SUB_LIST)) {
@@ -2808,7 +2805,6 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
imd.replstr = NULL;
}
*sp = get_match_ret(&imd, 0, umltot);
- patfreestr(&patstralloc);
if (! **sp && (((fl & SUB_MATCH) && !i) || ((fl & SUB_REST) && i)))
return 0;
return 1;
@@ -2856,7 +2852,6 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
}
}
*sp = get_match_ret(&imd, 0, mlen);
- patfreestr(&patstralloc);
return 1;
}
break;
@@ -2884,13 +2879,11 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
}
if (tmatch) {
*sp = get_match_ret(&imd, tmatch - s, umltot);
- patfreestr(&patstralloc);
return 1;
}
if (!(fl & SUB_START) && pattrylen(p, s + umltot, 0, 0,
&patstralloc, ioff)) {
*sp = get_match_ret(&imd, umltot, umltot);
- patfreestr(&patstralloc);
return 1;
}
break;
@@ -2904,7 +2897,6 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
set_pat_start(p, t-s);
if (pattrylen(p, t, umlen, 0, &patstralloc, ioff)) {
*sp = get_match_ret(&imd, t-s, umltot);
- patfreestr(&patstralloc);
return 1;
}
if (fl & SUB_START)
@@ -2914,7 +2906,6 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
if (!(fl & SUB_START) && pattrylen(p, send, 0, 0,
&patstralloc, ioff)) {
*sp = get_match_ret(&imd, umltot, umltot);
- patfreestr(&patstralloc);
return 1;
}
break;
@@ -2926,7 +2917,6 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
pattrylen(p, send, 0, 0, &patstralloc, 0) &&
!--n) {
*sp = get_match_ret(&imd, 0, 0);
- patfreestr(&patstralloc);
return 1;
} /* fall through */
case (SUB_SUBSTR|SUB_LONG):
@@ -2984,7 +2974,6 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
umlen -= iincchar(&t, send - t);
continue;
} else {
- patfreestr(&patstralloc);
return 1;
}
}
@@ -3011,7 +3000,6 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
if ((fl & (SUB_LONG|SUB_GLOBAL)) == SUB_LONG &&
pattrylen(p, send, 0, 0, &patstralloc, 0) && !--n) {
*sp = get_match_ret(&imd, 0, 0);
- patfreestr(&patstralloc);
return 1;
}
break;
@@ -3024,7 +3012,6 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
if (pattrylen(p, send, 0, 0, &patstralloc, umltot) &&
!--n) {
*sp = get_match_ret(&imd, umltot, umltot);
- patfreestr(&patstralloc);
return 1;
}
}
@@ -3081,7 +3068,6 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
}
}
*sp = get_match_ret(&imd, tmatch-s, mpos-s);
- patfreestr(&patstralloc);
return 1;
}
set_pat_start(p, l);
@@ -3089,7 +3075,6 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
&patstralloc, umltot) &&
!--n) {
*sp = get_match_ret(&imd, umltot, umltot);
- patfreestr(&patstralloc);
return 1;
}
break;
@@ -3134,11 +3119,9 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
start[lleft] = '\0';
*sp = (char *)start;
}
- patfreestr(&patstralloc);
return 1;
}
if (fl & SUB_LIST) { /* safety: don't think this can happen */
- patfreestr(&patstralloc);
return 0;
}
@@ -3146,7 +3129,6 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
imd.replstr = NULL;
imd.repllist = NULL;
*sp = get_match_ret(&imd, 0, 0);
- patfreestr(&patstralloc);
return (fl & SUB_RETFAIL) ? 0 : 1;
}
@@ -3244,7 +3226,6 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
}
}
*sp = get_match_ret(&imd, 0, mlen);
- patfreestr(&patstralloc);
return 1;
}
break;
@@ -3357,7 +3338,6 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
if ((fl & (SUB_LONG|SUB_GLOBAL)) == SUB_LONG &&
pattrylen(p, send, 0, 0, &patstralloc, 0) && !--n) {
*sp = get_match_ret(&imd, 0, 0);
- patfreestr(&patstralloc);
return 1;
}
break;
@@ -3369,7 +3349,6 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
set_pat_start(p, l);
if (pattrylen(p, send, 0, 0, &patstralloc, uml) && !--n) {
*sp = get_match_ret(&imd, uml, uml);
- patfreestr(&patstralloc);
return 1;
}
}
@@ -3394,7 +3373,6 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
}
}
*sp = get_match_ret(&imd, t-s, mpos-s);
- patfreestr(&patstralloc);
return 1;
}
}
@@ -3403,7 +3381,6 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
&patstralloc, uml) &&
!--n) {
*sp = get_match_ret(&imd, uml, uml);
- patfreestr(&patstralloc);
return 1;
}
break;
@@ -3445,7 +3422,6 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
memcpy(t, s + i, l - i);
start[lleft] = '\0';
*sp = (char *)start;
- patfreestr(&patstralloc);
return 1;
}
@@ -3453,7 +3429,6 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
imd.replstr = NULL;
imd.repllist = NULL;
*sp = get_match_ret(&imd, 0, 0);
- patfreestr(&patstralloc);
return 1;
}
diff --git a/Src/pattern.c b/Src/pattern.c
index 8de372c9e..68a340919 100644
--- a/Src/pattern.c
+++ b/Src/pattern.c
@@ -2028,8 +2028,8 @@ pattrystart(void)
*
* Unmetafy a trial string for use in pattern matching, if needed.
*
- * If it is needed, returns a zalloc()'d string; if not needed, returns
- * NULL.
+ * If it is needed, returns a heap allocated string; if not needed,
+ * returns NULL.
*
* prog is the pattern to be executed.
* string is the metafied trial string.
@@ -2046,7 +2046,7 @@ pattrystart(void)
* unmetalenp is the umetafied length of a path segment preceeding
* the trial string needed for file mananagement; it is calculated as
* needed so does not need to be initialised.
- * alloced is the memory allocated --- same as return value from
+ * alloced is the memory allocated on the heap --- same as return value from
* function.
*/
/**/
@@ -2097,7 +2097,7 @@ char *patallocstr(Patprog prog, char *string, int stringlen, int unmetalen,
int i, icopy, ncopy;
dst = patstralloc->alloced =
- zalloc(patstralloc->unmetalen + patstralloc->unmetalenp);
+ zhalloc(patstralloc->unmetalen + patstralloc->unmetalenp);
if (needfullpath) {
/* loop twice, copy path buffer first time */
@@ -2133,20 +2133,6 @@ char *patallocstr(Patprog prog, char *string, int stringlen, int unmetalen,
}
-/*
- * Free memory allocated by patallocstr().
- */
-
-/**/
-mod_export
-void patfreestr(Patstralloc patstralloc)
-{
- if (patstralloc->alloced)
- zfree(patstralloc->alloced,
- patstralloc->unmetalen + patstralloc->unmetalenp);
-}
-
-
/*
* Test prog against null-terminated, metafied string.
*/
@@ -2189,8 +2175,9 @@ pattrylen(Patprog prog, char *string, int len, int unmetalen,
* done if there is no path prefix (pathpos == 0) as otherwise the path
* buffer and unmetafied string may not match. To do this,
* patallocstr() is callled (use force = 1 to ensure it is alway
- * unmetafied); paststralloc points to existing storage. When all
- * pattern matching is done, patfreestr() is called.
+ * unmetafied); paststralloc points to existing storage. Memory is
+ * on the heap.
+ *
* patstralloc->alloced and patstralloc->unmetalen contain the
* unmetafied string and its length. In that case, the rules for the
* earlier arguments change:
@@ -2387,8 +2374,6 @@ pattryrefs(Patprog prog, char *string, int stringlen, int unmetalenin,
}
}
- if (patstralloc == &patstralloc_struct)
- patfreestr(patstralloc);
return ret;
} else {
int q = queue_signal_level();
@@ -2425,8 +2410,6 @@ pattryrefs(Patprog prog, char *string, int stringlen, int unmetalenin,
}
}
if (!ret) {
- if (patstralloc == &patstralloc_struct)
- patfreestr(patstralloc);
return 0;
}
@@ -2583,9 +2566,6 @@ pattryrefs(Patprog prog, char *string, int stringlen, int unmetalenin,
restore_queue_signals(q);
- if (patstralloc == &patstralloc_struct)
- patfreestr(patstralloc);
-
return ret;
}
}
--
cgit v1.2.3
From 807a8338a3be8127dd23c69971668b7b0c6b79a2 Mon Sep 17 00:00:00 2001
From: Peter Stephenson
Date: Thu, 1 Oct 2015 16:21:18 +0100
Subject: 36737: Ensure we don't dreference unterminated zero-length string
---
ChangeLog | 3 +++
Src/pattern.c | 10 +++++++---
2 files changed, 10 insertions(+), 3 deletions(-)
(limited to 'Src/pattern.c')
diff --git a/ChangeLog b/ChangeLog
index a51dc4795..14749d9ff 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,8 @@
2015-10-01 Peter Stephenson
+ * 36737: Src/pattern.c: Ensure we are not dereferencing
+ zero-length unterminated string.
+
* 36735: Doc/Zsh/contrib.yo, Functions/Misc/zcalc: ~/.zcalcrc.
* Andrew Janke: 36729: MACHINES, NEWS, README: fix some typos.
diff --git a/Src/pattern.c b/Src/pattern.c
index 68a340919..04d3e3dfb 100644
--- a/Src/pattern.c
+++ b/Src/pattern.c
@@ -2224,8 +2224,10 @@ pattryrefs(Patprog prog, char *string, int stringlen, int unmetalenin,
maxnpos = *nump;
*nump = 0;
}
- /* inherited from domatch, but why, exactly? */
- if (*string == Nularg) {
+ /*
+ * Special signalling of empty tokenised string.
+ */
+ if ((!patstralloc || stringlen > 0) && *string == Nularg) {
string++;
if (unmetalenin > 0)
unmetalenin--;
@@ -2233,8 +2235,10 @@ pattryrefs(Patprog prog, char *string, int stringlen, int unmetalenin,
stringlen--;
}
- if (stringlen < 0)
+ if (stringlen < 0) {
+ DPUTS(patstralloc != NULL, "length needed with patstralloc");
stringlen = strlen(string);
+ }
origlen = stringlen;
if (patstralloc) {
--
cgit v1.2.3
From faeb9555d3c853b019aa30ee1ca62bec3971ce9f Mon Sep 17 00:00:00 2001
From: Peter Stephenson
Date: Sat, 3 Oct 2015 20:25:57 +0100
Subject: 36760: more care with already unmetafied pattern trial strings
---
ChangeLog | 5 +++++
Src/pattern.c | 55 ++++++++++++++++++++++++++++++++++++++++---------------
2 files changed, 45 insertions(+), 15 deletions(-)
(limited to 'Src/pattern.c')
diff --git a/ChangeLog b/ChangeLog
index 1061d1d94..e45273e68 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,8 @@
+2015-10-03 Peter Stephenson
+
+ * 36760: Src/pattern.c: ensure we don't do anything untoward
+ with an already metafied pattern trial match.
+
2015-10-03 Mikael Magnusson
* 36754: Functions/TCP/tcp_open, Functions/TCP/tcp_read,
diff --git a/Src/pattern.c b/Src/pattern.c
index 04d3e3dfb..8b07cca92 100644
--- a/Src/pattern.c
+++ b/Src/pattern.c
@@ -2022,6 +2022,39 @@ pattrystart(void)
errsfound = 0;
}
+/*
+ * Fix up string length stuff.
+ *
+ * If we call patallocstr() with "force" to set things up early, it's
+ * done there, else it's done in pattryrefs(). The reason for the
+ * difference is in the latter case we may not be relying on
+ * patallocstr() having an effect.
+ */
+
+/**/
+static void
+patmungestring(char **string, int *stringlen, int *unmetalenin)
+{
+ /*
+ * Special signalling of empty tokenised string.
+ */
+ if (*stringlen > 0 && **string == Nularg) {
+ (*string)++;
+ /*
+ * If we don't have an unmetafied length
+ * and need it (we may not) we'll get it later.
+ */
+ if (*unmetalenin > 0)
+ (*unmetalenin)--;
+ if (*stringlen > 0)
+ (*stringlen)--;
+ }
+
+ /* Ensure we have a metafied length */
+ if (*stringlen < 0)
+ *stringlen = strlen(*string);
+}
+
/*
* Allocate memeory for pattern match. Note this is specific to use
* of pattern *and* trial string.
@@ -2039,7 +2072,8 @@ pattrystart(void)
* force is 1 if we always unmetafy: this is useful if we are going
* to try again with different versions of the string. If this is
* called from pattryrefs() we don't force unmetafication as it won't
- * be optimal.
+ * be optimal. This option should be used if the resulting
+ * patstralloc is going to be passed to pattrylen() / pattryrefs().
* In patstralloc (supplied by caller, must last until last pattry is done)
* unmetalen is the unmetafied length of the string; it will be
* calculated if the input value is negative.
@@ -2056,6 +2090,9 @@ char *patallocstr(Patprog prog, char *string, int stringlen, int unmetalen,
{
int needfullpath;
+ if (force)
+ patmungestring(&string, &stringlen, &unmetalen);
+
/*
* For a top-level ~-exclusion, we will need the full
* path to exclude, so copy the path so far and append the
@@ -2224,21 +2261,9 @@ pattryrefs(Patprog prog, char *string, int stringlen, int unmetalenin,
maxnpos = *nump;
*nump = 0;
}
- /*
- * Special signalling of empty tokenised string.
- */
- if ((!patstralloc || stringlen > 0) && *string == Nularg) {
- string++;
- if (unmetalenin > 0)
- unmetalenin--;
- if (stringlen > 0)
- stringlen--;
- }
- if (stringlen < 0) {
- DPUTS(patstralloc != NULL, "length needed with patstralloc");
- stringlen = strlen(string);
- }
+ if (!patstralloc)
+ patmungestring(&string, &stringlen, &unmetalenin);
origlen = stringlen;
if (patstralloc) {
--
cgit v1.2.3
From b498bd7ce5b8f126f420f8f9fd4946912f8334da Mon Sep 17 00:00:00 2001
From: Peter Stephenson
Date: Tue, 27 Oct 2015 11:54:19 +0000
Subject: 36982: Fix bug with (#cN) patterns and remove redundant description.
We need to restore the current count of matches when returning to
match at the point where we previously matched.
---
ChangeLog | 4 ++++
Doc/Zsh/expn.yo | 10 ----------
Src/pattern.c | 1 +
Test/D02glob.ztst | 8 ++++++++
Test/D04parameter.ztst | 9 +++++++++
5 files changed, 22 insertions(+), 10 deletions(-)
(limited to 'Src/pattern.c')
diff --git a/ChangeLog b/ChangeLog
index b64f785f7..cd8359281 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,9 @@
2015-10-27 Peter Stephenson
+ * 36982: Doc/Zsh/expn.yo, Src/pattern.c, Test/D02glob.ztst,
+ Test/D04parameter.ztst: fix actual bug with (#cN) and back off
+ previous change.
+
* 36977: Doc/Zsh/expn.yo: note about complications of pattern
repetitions (#cN).
diff --git a/Doc/Zsh/expn.yo b/Doc/Zsh/expn.yo
index 49a0f0d53..5ea8610f2 100644
--- a/Doc/Zsh/expn.yo
+++ b/Doc/Zsh/expn.yo
@@ -2192,16 +2192,6 @@ inclusive. The form tt(LPAR()#c)var(N)tt(RPAR()) requires exactly tt(N)
matches; tt(LPAR()#c,)var(M)tt(RPAR()) is equivalent to specifying var(N)
as 0; tt(LPAR()#c)var(N)tt(,RPAR()) specifies that there is no maximum
limit on the number of matches.
-
-Note that if the previous group of characters contains wildcards,
-results can be unpredictable to the point of being logically incorrect.
-It is recommended that the pattern be trimmed to match the minimum
-possible. For example, to match a string of the form `tt(1_2_3_)', use
-a pattern of the form `tt(LPAR()[[:digit:]]##_+RPAR()LPAR()#c3+RPAR())', not
-`tt(LPAR()*_+RPAR()LPAR()#c3+RPAR())'. This arises from the
-complicated interaction between attempts to match a number of
-repetitions of the whole pattern and attempts to match the wildcard
-`tt(*)'.
)
vindex(MATCH)
vindex(MBEGIN)
diff --git a/Src/pattern.c b/Src/pattern.c
index 8b07cca92..9e8a80ae1 100644
--- a/Src/pattern.c
+++ b/Src/pattern.c
@@ -3376,6 +3376,7 @@ patmatch(Upat prog)
scan[P_CT_CURRENT].l = cur + 1;
if (patmatch(scan + P_CT_OPERAND))
return 1;
+ scan[P_CT_CURRENT].l = cur;
patinput = patinput_thistime;
}
if (cur < min)
diff --git a/Test/D02glob.ztst b/Test/D02glob.ztst
index 3e2095a0c..f944a4fbd 100644
--- a/Test/D02glob.ztst
+++ b/Test/D02glob.ztst
@@ -574,3 +574,11 @@
0:Optimisation to squeeze multiple *'s used as ordinary glob wildcards.
>glob.tmp/ra=1.0_et=3.5
>glob.tmp/ra=1.0_et=3.5
+
+ [[ 1_2_ = (*_)(#c1) ]] && print 1 OK # because * matches 1_2
+ [[ 1_2_ = (*_)(#c2) ]] && print 2 OK
+ [[ 1_2_ = (*_)(#c3) ]] || print 3 OK
+0:Some more complicated backtracking with match counts.
+>1 OK
+>2 OK
+>3 OK
diff --git a/Test/D04parameter.ztst b/Test/D04parameter.ztst
index f1cc23e7c..cb7079c98 100644
--- a/Test/D04parameter.ztst
+++ b/Test/D04parameter.ztst
@@ -1735,3 +1735,12 @@
0:History modifier works the same for scalar and array substitution
>ddd bdb cdc
>ddd bdb cdc
+
+ a=1_2_3_4_5_6
+ print ${a#(*_)(#c2)}
+ print ${a#(*_)(#c5)}
+ print ${a#(*_)(#c7)}
+0:Complicated backtracking with match counts
+>3_4_5_6
+>6
+>1_2_3_4_5_6
--
cgit v1.2.3